Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using multi-channel audio #6

Open
razor1179 opened this issue Jul 1, 2022 · 0 comments
Open

Using multi-channel audio #6

razor1179 opened this issue Jul 1, 2022 · 0 comments

Comments

@razor1179
Copy link

razor1179 commented Jul 1, 2022

Hi there,

The current code works great when using a single channel audio input, but when the channel is set to 2 the code throws an error

Exception ignored from cffi callback <function _StreamBase.__init__.<locals>.callback_ptr at 0x7f73d4faf0>:
Traceback (most recent call last):
  File "/home/pi/.local/lib/python3.9/site-packages/sounddevice.py", line 880, in callback_ptr
    return _wrap_callback(
  File "/home/pi/.local/lib/python3.9/site-packages/sounddevice.py", line 2681, in _wrap_callback
    callback(*args)
  File "/home/pi/PiDTLN/ns.py", line 128, in callback
    indata = indata[:, [args.channel]]
IndexError: index 2 is out of bounds for axis 1 with size 2

I did modify the code to separately process the two streams but I get a input overflow output underflow issue as the way I have modified the code slows things down. Do you have a suggestion on how to solve the issue? The modified code is below

# set some parameters
block_len_ms = 32
block_shift_ms = 8
fs_target = 16000
# create the interpreters
interpreter_1 = tflite.Interpreter(model_path='./models/dtln_ns_quant_1.tflite', num_threads=args.threads)
interpreter_1.allocate_tensors()
interpreter_2 = tflite.Interpreter(model_path='./models/dtln_ns_quant_2.tflite', num_threads=args.threads)
interpreter_2.allocate_tensors()
# Get input and output tensors.
input_details_1 = interpreter_1.get_input_details()
output_details_1 = interpreter_1.get_output_details()
input_details_2 = interpreter_2.get_input_details()
output_details_2 = interpreter_2.get_output_details()
# create states for the lstms
states_1_ch1 = np.zeros(input_details_1[1]['shape']).astype('float32')
states_2_ch1 = np.zeros(input_details_2[1]['shape']).astype('float32')
states_1_ch2 = np.zeros(input_details_1[1]['shape']).astype('float32')
states_2_ch2 = np.zeros(input_details_2[1]['shape']).astype('float32')
# calculate shift and length
block_shift = int(np.round(fs_target * (block_shift_ms / 1000)))
block_len = int(np.round(fs_target * (block_len_ms / 1000)))
# create buffer
in_buffer_ch1 = np.zeros((block_len)).astype('float32')
out_buffer_ch1 = np.zeros((block_len)).astype('float32')
in_buffer_ch2 = np.zeros((block_len)).astype('float32')
out_buffer_ch2 = np.zeros((block_len)).astype('float32')

if args.no_fftw:
    g_use_fftw = False
if g_use_fftw:
    fft_buf_ch1 = pyfftw.empty_aligned(512, dtype='float32')
    rfft_ch1 = pyfftw.builders.rfft(fft_buf_ch1, threads=args.threads)
    ifft_buf_ch1 = pyfftw.empty_aligned(257, dtype='complex64')
    irfft_ch1 = pyfftw.builders.irfft(ifft_buf_ch1, threads=args.threads)
    fft_buf_ch2 = pyfftw.empty_aligned(512, dtype='float32')
    rfft_ch2 = pyfftw.builders.rfft(fft_buf_ch2, threads=args.threads)
    ifft_buf_ch2 = pyfftw.empty_aligned(257, dtype='complex64')
    irfft_ch2 = pyfftw.builders.irfft(ifft_buf_ch2, threads=args.threads)

t_ring = collections.deque(maxlen=100)


def callback(indata, outdata, frames, buf_time, status):
    # buffer and states to global
    global in_buffer_ch1, in_buffer_ch2, out_buffer_ch1, out_buffer_ch2, states_1_ch1, states_2_ch1, states_1_ch2,\
        states_2_ch2, t_ring, g_use_fftw
    if args.measure:
        start_time = time.time()
    if status:
        print(status)
    # if args.channels is not None:
    #     indata = indata[:, [args.channels]]
    indata = indata[:, args.channels]
    print(indata.shape)
    if args.no_denoise:
        outdata[:] = indata
        if args.measure:
            t_ring.append(time.time() - start_time)
        return
    # write to buffer
    in_buffer_ch1[:-block_shift] = in_buffer_ch1[block_shift:]
    in_buffer_ch1[-block_shift:] = np.squeeze(indata[:, 0])
    in_buffer_ch2[:-block_shift] = in_buffer_ch2[block_shift:]
    in_buffer_ch2[-block_shift:] = np.squeeze(indata[:, 1])
    # calculate fft of input block
    if g_use_fftw:
        fft_buf_ch1[:] = in_buffer_ch1
        in_block_fft_ch1 = rfft_ch1()
        fft_buf_ch2[:] = in_buffer_ch2
        in_block_fft_ch2 = rfft_ch2()
    else:
        in_block_fft = np.fft.rfft(in_buffer_ch1)
        in_block_fft = np.fft.rfft(in_buffer_ch2)
    in_mag_ch1 = np.abs(in_block_fft_ch1)
    in_phase_ch1 = np.angle(in_block_fft_ch1)
    in_mag_ch2 = np.abs(in_block_fft_ch2)
    in_phase_ch2 = np.angle(in_block_fft_ch2)
    # reshape magnitude to input dimensions
    in_mag_ch1 = np.reshape(in_mag_ch1, (1, 1, -1)).astype('float32')
    in_mag_ch2 = np.reshape(in_mag_ch2, (1, 1, -1)).astype('float32')

    # set tensors to the first model
    interpreter_1.set_tensor(input_details_1[1]['index'], states_1_ch1)
    interpreter_1.set_tensor(input_details_1[0]['index'], in_mag_ch1)
    # run calculation
    interpreter_1.invoke()
    # get the output of the first block
    out_mask_ch1 = interpreter_1.get_tensor(output_details_1[0]['index'])
    states_1_ch1 = interpreter_1.get_tensor(output_details_1[1]['index'])
    # calculate the ifft
    estimated_complex_ch1 = in_mag_ch1 * out_mask_ch1 * np.exp(1j * in_phase_ch1)
    if g_use_fftw:
        ifft_buf_ch1[:] = estimated_complex_ch1
        estimated_block_ch1 = irfft_ch1()
    else:
        estimated_block_ch1 = np.fft.irfft(estimated_complex_ch1)
    # reshape the time domain block
    estimated_block_ch1 = np.reshape(estimated_block_ch1, (1, 1, -1)).astype('float32')

    # set tensors to the first model
    interpreter_1.set_tensor(input_details_1[1]['index'], states_1_ch2)
    interpreter_1.set_tensor(input_details_1[0]['index'], in_mag_ch2)
    # run calculation
    interpreter_1.invoke()
    # get the output of the first block
    out_mask_ch2 = interpreter_1.get_tensor(output_details_1[0]['index'])
    states_1_ch2 = interpreter_1.get_tensor(output_details_1[1]['index'])
    # calculate the ifft
    estimated_complex_ch2 = in_mag_ch2 * out_mask_ch2 * np.exp(1j * in_phase_ch2)
    if g_use_fftw:
        ifft_buf_ch2[:] = estimated_complex_ch2
        estimated_block_ch2 = irfft_ch2()
    else:
        estimated_block_ch2 = np.fft.irfft(estimated_complex_ch2)
    # reshape the time domain block
    estimated_block_ch2 = np.reshape(estimated_block_ch2, (1, 1, -1)).astype('float32')

    # set tensors to the second block
    interpreter_2.set_tensor(input_details_2[1]['index'], states_2_ch1)
    interpreter_2.set_tensor(input_details_2[0]['index'], estimated_block_ch1)
    # run calculation
    interpreter_2.invoke()
    # get output tensors
    out_block_ch1 = interpreter_2.get_tensor(output_details_2[0]['index'])
    states_2_ch1 = interpreter_2.get_tensor(output_details_2[1]['index'])
    # write to buffer
    out_buffer_ch1[:-block_shift] = out_buffer_ch1[block_shift:]
    out_buffer_ch1[-block_shift:] = np.zeros((block_shift))
    out_buffer_ch1 += np.squeeze(out_block_ch1)
    # output to soundcard
    # outdata[:, 0] = np.expand_dims(out_buffer_ch1[:block_shift], axis=-1)
    outdata[:, 0] = out_buffer_ch1[:block_shift]

    # set tensors to the second block
    interpreter_2.set_tensor(input_details_2[1]['index'], states_2_ch2)
    interpreter_2.set_tensor(input_details_2[0]['index'], estimated_block_ch2)
    # run calculation
    interpreter_2.invoke()
    # get output tensors
    out_block_ch2 = interpreter_2.get_tensor(output_details_2[0]['index'])
    states_2_ch2 = interpreter_2.get_tensor(output_details_2[1]['index'])
    # write to buffer
    out_buffer_ch2[:-block_shift] = out_buffer_ch2[block_shift:]
    out_buffer_ch2[-block_shift:] = np.zeros((block_shift))
    out_buffer_ch2 += np.squeeze(out_block_ch2)
    # output to soundcard
    # outdata[:, 1] = np.expand_dims(out_buffer_ch2[:block_shift], axis=-1)
    outdata[:, 1] = out_buffer_ch2[:block_shift]
    # print(indata.shape)
    # print(outdata.shape)
    if args.measure:
        t_ring.append(time.time() - start_time)


def open_stream():
    with sd.Stream(device=(args.input_device, args.output_device), samplerate=fs_target, blocksize=block_shift,
                   dtype=np.float32, latency=args.latency, channels=args.channels, callback=callback):
        print('#' * 80)
        print('Ctrl-C to exit')
        print('#' * 80)
        if args.measure:
            while True:
                time.sleep(1)
                print('Processing time: {:.2f} ms'.format(1000 * np.average(t_ring)), end='\r')
        else:
            threading.Event().wait()


try:
    if args.daemonize:
        with daemon.DaemonContext():
            open_stream()
    else:
        open_stream()
except KeyboardInterrupt:
    parser.exit('')
except Exception as e:
    parser.exit(type(e).__name__ + ': ' + str(e))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant