import numpy as np
import math

import numm

WINDOW_SIZE = 4096
SAMPLE_RATE = 44100
BARK = [20, 100, 200, 300, 400, 510, 630, 770, 920, 1080, 1270, 1480, 1720, 2000, 2320, 2700, 3150, 3700, 4400, 5300, 6400, 7700, 9500, 12000, 15500]
FREQS = np.fft.fftfreq(WINDOW_SIZE, 1.0/SAMPLE_RATE)
BARK_FREQ = [(abs(FREQS - freq)).argmin() for freq in BARK]
MAX_SPECT_VAL = 3000000.0

window_buffer = np.zeros((WINDOW_SIZE, 2), np.int16)
window_ptr = 0
bark_spectrum = np.zeros((24), np.int16)
color = 1

def video_out(a):

    def bark_to_index(val):
        index = h - (val * h_scale)
        index = max(min(index, h), 0)
        return int(index)

    h = a.shape[0]
    w = a.shape[1]
    h_scale = h / MAX_SPECT_VAL
    w_scale = w / len(bark_spectrum)

    for i in range(len(bark_spectrum)):
        magnitude = bark_to_index(bark_spectrum[i])
        w_min = int(i * w_scale)
        w_max = int((i+1) * w_scale - 1)
        a[magnitude:h, w_min:w_max, color] = 255


def audio_input(new_buffer):
    global window_buffer
    global window_ptr
    new_ptr = 0
    _window_buffer = window_buffer
    _window_ptr = window_ptr

    copy = min(WINDOW_SIZE - _window_ptr, len(new_buffer) - new_ptr)

    while new_ptr < len(new_buffer):

        _window_buffer[_window_ptr : _window_ptr + copy] = new_buffer[new_ptr : new_ptr + copy]
        _window_ptr += copy
        new_ptr += copy

        if _window_ptr >= WINDOW_SIZE:
            _audio_callback(_window_buffer)
            _window_ptr = 0

        copy = min(WINDOW_SIZE - _window_ptr, len(new_buffer) - new_ptr)

    window_buffer = _window_buffer
    window_ptr = _window_ptr


def _audio_callback(audio):
    global bark_spectrum

    # currently mono. ideally, compute metric for both channels and sum[?].
    audio = audio.mean(axis=1)
    spectrum = np.abs(np.fft.fft(audio))

    bark_spectrum = np.array([spectrum[BARK_FREQ[i]:BARK_FREQ[i+1]].sum() \
                    for i in range(len(BARK_FREQ) - 1)])


if __name__ == '__main__':

    numm.run(**globals())
