Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation for xcorr_kernel callback set #3

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ src/rnnoise_data.h
src/rnnoise_data.c
CMakeUserPresets.json
.vscode/
sample_audios/
.DS_Store
sample_audios/
evaluation_output/
rnnoise_env/
23 changes: 20 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@ project(rnnoise)

option(RNNOISE_COMPILE_OPUS OFF)
option(RNNOISE_COMPILE_DEMO ON)
option(RNNOISE_BAREMETAL OFF)

if(NOT RNNOISE_BAREMETAL)
find_package(mimalloc REQUIRED)
endif()

if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
endif()

if(RNNOISE_COMPILE_OPUS)
add_definitions(-DCOMPILE_OPUS)
Expand Down Expand Up @@ -68,7 +77,7 @@ set(RN_NOISE_SRC
src/opus_types.h
src/pitch.h
src/rnn.h
src/rnnoise_data.h
src/rnnoise_data_little.h
include/rnnoise.h
src/vec.h

Expand All @@ -79,7 +88,7 @@ set(RN_NOISE_SRC
src/denoise.c
src/kiss_fft.c
src/parse_lpcnet_weights.c
src/rnnoise_data.c
src/rnnoise_data_little.c
#write_weights.c
#dump_features.c
src/nnet.c
Expand All @@ -96,8 +105,16 @@ target_include_directories(rnnoise PUBLIC
$<INSTALL_INTERFACE:include>
PRIVATE src)


if(NOT RNNOISE_BAREMETAL)
target_link_libraries(rnnoise PUBLIC mimalloc-static)
target_compile_definitions(rnnoise PUBLIC USE_MIMALLOC_ALLOCATOR)
endif()

if (UNIX)
target_link_libraries(rnnoise PUBLIC m)
endif(UNIX)

add_subdirectory(examples)
if(RNNOISE_COMPILE_DEMO)
add_subdirectory(examples)
endif()
5 changes: 5 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,8 @@ model, rename rnnoise_data_little.c to rnnoise_data.c. It is possible
to build both the regular and little binary weights and load any of them
at run time since the little model has the same size as the regular one
(except for the increased sparsity).

## Build with Vitis Xilinx toolchain:
```shell
cmake -G"Unix Makefiles" -DCMAKE_BUILD_TYPE="Release" DCMAKE_TOOLCHAIN_FILE="${PWD}/cmake/vitis_toolchain.cmake" -S="$PWD}" -B="${PWD}/build" -DRNNOISE_COMPILE_DEMO=OFF DRNNOISE_BAREMETAL=ON
```
2 changes: 1 addition & 1 deletion conanfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
libsndfile/1.2.2
cxxopts/3.2.0
fmt/10.2.1
spdlog/1.14.0
mimalloc/2.1.2

[generators]
CMakeDeps
Expand Down
11 changes: 7 additions & 4 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ target_link_libraries(rnnoise_demo PRIVATE rnnoise)
find_package(SndFile REQUIRED)
find_package(cxxopts REQUIRED)
find_package(fmt REQUIRED)
find_package(spdlog REQUIRED)
find_package(mimalloc REQUIRED)


add_executable(rnnoise_libsoundfile rnnoise_libsndfile.cpp)
add_executable(rnnoise_libsoundfile
rnnoise_libsndfile.cpp
profiling/xcorr_impl.cpp
profiling/xcorr_offload_kernel.cpp
)

if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
message(WARNING "Compiling RNNOISE for Windows...")
Expand All @@ -24,6 +27,6 @@ target_link_libraries(
SndFile::sndfile
cxxopts::cxxopts
fmt::fmt
spdlog::spdlog
mimalloc-static
)
target_include_directories(rnnoise_libsoundfile PRIVATE ${CMAKE_CURRENT_LIST_DIR})
11 changes: 4 additions & 7 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,11 @@ mkdir build

## For debug version of the app
conan install conanfile.txt --build=missing --settings=build_type=Debug
cmake -G"Unix Makefiles" -B=build --preset=conan-debug .

cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=./Debug/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Debug -B build
cmake --build build

## For Release version:
conan install conanfile.txt --build=missing
cd build
cmake -G"Unix Makefiles" -B=build --preset=conan-release .


cmake --build .
cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=./Release/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Release -B build
cmake --build buil
```
83 changes: 83 additions & 0 deletions examples/profiling/xcorr_impl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#include "xcorr_impl.h"
#include <cassert>
#include <fmt/core.h>
#include <chrono>

inline float MAC16_16(float c, float a, float b){
return c+a*b;
}

void xcorr_native_impl(const float * x, const float * y, float sum[4], int len)
{
int j;
float y_0, y_1, y_2, y_3;
assert(len>=3);
fmt::print("Xcorr native impl called with len {} \n",len);
auto start = std::chrono::high_resolution_clock::now();

y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
y_0=*y++;
y_1=*y++;
y_2=*y++;
for (j=0;j<len-3;j+=4)
{
float tmp;
tmp = *x++;
y_3=*y++;
sum[0] = MAC16_16(sum[0],tmp,y_0);
sum[1] = MAC16_16(sum[1],tmp,y_1);
sum[2] = MAC16_16(sum[2],tmp,y_2);
sum[3] = MAC16_16(sum[3],tmp,y_3);
tmp=*x++;
y_0=*y++;
sum[0] = MAC16_16(sum[0],tmp,y_1);
sum[1] = MAC16_16(sum[1],tmp,y_2);
sum[2] = MAC16_16(sum[2],tmp,y_3);
sum[3] = MAC16_16(sum[3],tmp,y_0);
tmp=*x++;
y_1=*y++;
sum[0] = MAC16_16(sum[0],tmp,y_2);
sum[1] = MAC16_16(sum[1],tmp,y_3);
sum[2] = MAC16_16(sum[2],tmp,y_0);
sum[3] = MAC16_16(sum[3],tmp,y_1);
tmp=*x++;
y_2=*y++;
sum[0] = MAC16_16(sum[0],tmp,y_3);
sum[1] = MAC16_16(sum[1],tmp,y_0);
sum[2] = MAC16_16(sum[2],tmp,y_1);
sum[3] = MAC16_16(sum[3],tmp,y_2);
}
if (j++<len)
{
float tmp = *x++;
y_3=*y++;
sum[0] = MAC16_16(sum[0],tmp,y_0);
sum[1] = MAC16_16(sum[1],tmp,y_1);
sum[2] = MAC16_16(sum[2],tmp,y_2);
sum[3] = MAC16_16(sum[3],tmp,y_3);
}
if (j++<len)
{
float tmp=*x++;
y_0=*y++;
sum[0] = MAC16_16(sum[0],tmp,y_1);
sum[1] = MAC16_16(sum[1],tmp,y_2);
sum[2] = MAC16_16(sum[2],tmp,y_3);
sum[3] = MAC16_16(sum[3],tmp,y_0);
}
if (j<len)
{
float tmp=*x++;
y_1=*y++;
sum[0] = MAC16_16(sum[0],tmp,y_2);
sum[1] = MAC16_16(sum[1],tmp,y_3);
sum[2] = MAC16_16(sum[2],tmp,y_0);
sum[3] = MAC16_16(sum[3],tmp,y_1);
}

auto end = std::chrono::high_resolution_clock::now();

auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);

fmt::print("Execution time is:{} us\n",duration.count());
}
10 changes: 10 additions & 0 deletions examples/profiling/xcorr_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef XCORR_IMPL_H
#define XCORR_IMPL_H

extern "C"
{

void xcorr_native_impl(const float * x, const float * y, float sum[4], int len);

}
#endif
103 changes: 103 additions & 0 deletions examples/profiling/xcorr_offload_kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#include <cassert>
#include <cstring>
#include "xcorr_offload_kernel.hpp"

#include <stdio.h>

inline float MAC16_16(float c, float a, float b){
return c+a*b;
}

#define MAX_PROCESSING_BLOCK_SIZE 1024
void xcorr_kernel(const float * x, const float * y, float* sum, int len)
{

#pragma HLS INTERFACE mode=s_axilite port=return bundle=CONTROL_BUS
#pragma HLS INTERFACE mode=s_axilite port=len bundle=CONTROL_BUS

#pragma HLS INTERFACE m_axi port=x offset=slave bundle=INPUT
#pragma HLS INTERFACE m_axi port=y offset=slave bundle=INPUT
#pragma HLS INTERFACE m_axi port=sum offset=slave bundle=OUTPUT

int j;
float y_0, y_1, y_2, y_3;
assert(len>=3);

float x_copy[MAX_PROCESSING_BLOCK_SIZE];
float y_copy[MAX_PROCESSING_BLOCK_SIZE];

float sum_copy[4];

float* x_arr_ptr = x_copy;
float* y_arr_ptr = y_copy;

memcpy(x_copy, x, len);
memcpy(y_copy, y, len);
memcpy(sum_copy, sum, 4);

y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
y_0=*y_arr_ptr++;
y_1=*y_arr_ptr++;
y_2=*y_arr_ptr++;

xcoor_1st_loop:
for (j=0;j<len-3;j+=4)
{
float tmp;
tmp = *x_arr_ptr++;
y_3=*y_arr_ptr++;
sum_copy[0] = MAC16_16(sum_copy[0],tmp,y_0);
sum_copy[1] = MAC16_16(sum_copy[1],tmp,y_1);
sum_copy[2] = MAC16_16(sum_copy[2],tmp,y_2);
sum_copy[3] = MAC16_16(sum_copy[3],tmp,y_3);
tmp=*x_arr_ptr++;
y_0=*y_arr_ptr++;
sum_copy[0] = MAC16_16(sum_copy[0],tmp,y_1);
sum_copy[1] = MAC16_16(sum_copy[1],tmp,y_2);
sum_copy[2] = MAC16_16(sum_copy[2],tmp,y_3);
sum_copy[3] = MAC16_16(sum_copy[3],tmp,y_0);
tmp=*x_arr_ptr++;
y_1=*y_arr_ptr++;
sum_copy[0] = MAC16_16(sum_copy[0],tmp,y_2);
sum_copy[1] = MAC16_16(sum_copy[1],tmp,y_3);
sum_copy[2] = MAC16_16(sum_copy[2],tmp,y_0);
sum_copy[3] = MAC16_16(sum_copy[3],tmp,y_1);
tmp=*x_arr_ptr++;
y_2=*y_arr_ptr++;
sum_copy[0] = MAC16_16(sum_copy[0],tmp,y_3);
sum_copy[1] = MAC16_16(sum_copy[1],tmp,y_0);
sum_copy[2] = MAC16_16(sum_copy[2],tmp,y_1);
sum_copy[3] = MAC16_16(sum_copy[3],tmp,y_2);
}
xcoor_2nd_loop:
if (j++<len)
{
float tmp = *x_arr_ptr++;
y_3=*y_arr_ptr++;
sum_copy[0] = MAC16_16(sum_copy[0],tmp,y_0);
sum_copy[1] = MAC16_16(sum_copy[1],tmp,y_1);
sum_copy[2] = MAC16_16(sum_copy[2],tmp,y_2);
sum_copy[3] = MAC16_16(sum_copy[3],tmp,y_3);
}
xcoor_3rd_loop:
if (j++<len)
{
float tmp=*x_arr_ptr++;
y_0=*y_arr_ptr++;
sum_copy[0] = MAC16_16(sum_copy[0],tmp,y_1);
sum_copy[1] = MAC16_16(sum_copy[1],tmp,y_2);
sum_copy[2] = MAC16_16(sum_copy[2],tmp,y_3);
sum_copy[3] = MAC16_16(sum_copy[3],tmp,y_0);
}
xcoor_4th_loop:
if (j<len)
{
float tmp=*x_arr_ptr++;
y_1=*y_arr_ptr++;
sum_copy[0] = MAC16_16(sum_copy[0],tmp,y_2);
sum_copy[1] = MAC16_16(sum_copy[1],tmp,y_3);
sum_copy[2] = MAC16_16(sum_copy[2],tmp,y_0);
sum_copy[3] = MAC16_16(sum_copy[3],tmp,y_1);
}
memcpy(sum,sum_copy,4);
}
6 changes: 6 additions & 0 deletions examples/profiling/xcorr_offload_kernel.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#ifndef XCORR_OFFLOAD_KERNEL_HPP
#define XCORR_OFFLOAD_KERNEL_HPP

void xcorr_kernel(const float * x, const float * y, float* sum, int len);

#endif
1 change: 0 additions & 1 deletion examples/rnnoise_demo.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ int main(int argc, char **argv) {
if (!first) fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
first = 0;
}
rnnoise_destroy(st);
fclose(f1);
fclose(fout);
#ifdef USE_WEIGHTS_FILE
Expand Down
Loading