Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into tile_matmul_bf16
Browse files Browse the repository at this point in the history
  • Loading branch information
frengels committed Sep 8, 2021
2 parents 5ad06e0 + b78b205 commit f0f9f3e
Show file tree
Hide file tree
Showing 171 changed files with 3,675 additions and 1,637 deletions.
3 changes: 3 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Checks: >
-misc-unconventional-assign-operator,
-misc-unused-parameters,
modernize-deprecated-headers,
modernize-make-shared,
modernize-make-unique,
modernize-redundant-void-arg,
modernize-use-bool-literals,
modernize-use-default-member-init,
Expand All @@ -28,6 +30,7 @@ Checks: >
performance-*,
-performance-inefficient-string-concatenation,
-performance-inefficient-vector-operation,
-performance-no-int-to-ptr,
readability-avoid-const-params-in-decls,
readability-braces-around-statements,
readability-const-return-type,
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/presubmit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ jobs:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- uses: DoozyX/clang-format-lint-action@v0.11
- uses: DoozyX/clang-format-lint-action@v0.12
with:
source: '.'
extensions: 'h,c,cpp'
clangFormatVersion: 11
clangFormatVersion: 12
check_clang_tidy:
name: Check clang-tidy
runs-on: ubuntu-20.04
Expand All @@ -28,12 +28,12 @@ jobs:
- name: Install clang-tidy
run: |
sudo apt-get update
sudo apt-get install llvm-11 clang-11 liblld-11-dev libclang-11-dev clang-tidy-11 ninja-build
sudo apt-get install llvm-12 clang-12 liblld-12-dev libclang-12-dev clang-tidy-12 ninja-build
- name: Run clang-tidy
run: |
export CC=clang-11
export CXX=clang++-11
export CLANG_TIDY_LLVM_INSTALL_DIR=/usr/lib/llvm-11
export CC=clang-12
export CXX=clang++-12
export CLANG_TIDY_LLVM_INSTALL_DIR=/usr/lib/llvm-12
./run-clang-tidy.sh
check_cmake_file_lists:
name: Check CMake file lists
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ python_bindings/bin/*
build-64/*
build-ios/*
build-osx/*
build_wasm/*
build_wasm*/*
cmake_build*/*
*/build/*
tmp/*
Expand Down
24 changes: 9 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1169,18 +1169,6 @@ GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_cleanup_on_error,$(GENER
# https://github.com/halide/Halide/issues/2084 (only if opencl enabled)
GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_buffer_copy,$(GENERATOR_AOTCPP_TESTS))

# https://github.com/halide/Halide/issues/2071
GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_user_context,$(GENERATOR_AOTCPP_TESTS))

# https://github.com/halide/Halide/issues/2071
GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_argvcall,$(GENERATOR_AOTCPP_TESTS))

# https://github.com/halide/Halide/issues/2071
GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_metadata_tester,$(GENERATOR_AOTCPP_TESTS))

# https://github.com/halide/Halide/issues/2071
GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_cxx_mangling,$(GENERATOR_AOTCPP_TESTS))

# https://github.com/halide/Halide/issues/2075
GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_msan,$(GENERATOR_AOTCPP_TESTS))

Expand Down Expand Up @@ -1471,6 +1459,8 @@ $(FILTERS_DIR)/metadata_tester_ucon.a: $(BIN_DIR)/metadata_tester.generator

$(BIN_DIR)/$(TARGET)/generator_aot_metadata_tester: $(FILTERS_DIR)/metadata_tester_ucon.a

$(BIN_DIR)/$(TARGET)/generator_aotcpp_metadata_tester: $(FILTERS_DIR)/metadata_tester_ucon.halide_generated.cpp

$(FILTERS_DIR)/multitarget.a: $(BIN_DIR)/multitarget.generator
@mkdir -p $(@D)
$(CURDIR)/$< -g multitarget -f "HalideTest::multitarget" $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) \
Expand Down Expand Up @@ -2131,6 +2121,10 @@ ifneq (,$(findstring clang version 13.0,$(CLANG_VERSION)))
CLANG_OK=yes
endif

ifneq (,$(findstring clang version 14.0,$(CLANG_VERSION)))
CLANG_OK=yes
endif

ifneq (,$(findstring Apple LLVM version 5.0,$(CLANG_VERSION)))
CLANG_OK=yes
endif
Expand All @@ -2151,7 +2145,7 @@ $(BUILD_DIR)/clang_ok:
@exit 1
endif

ifneq (,$(findstring $(LLVM_VERSION_TIMES_10), 110 111 120 130))
ifneq (,$(findstring $(LLVM_VERSION_TIMES_10), 110 111 120 130 140))
LLVM_OK=yes
endif

Expand Down Expand Up @@ -2345,15 +2339,15 @@ $(BIN_DIR)/HalideTraceDump: $(ROOT_DIR)/util/HalideTraceDump.cpp $(ROOT_DIR)/uti

# Note: you must have CLANG_FORMAT_LLVM_INSTALL_DIR set for this rule to work.
# Let's default to the Ubuntu install location.
CLANG_FORMAT_LLVM_INSTALL_DIR ?= /usr/lib/llvm-11
CLANG_FORMAT_LLVM_INSTALL_DIR ?= /usr/lib/llvm-12

.PHONY: format
format:
@CLANG_FORMAT_LLVM_INSTALL_DIR=$(CLANG_FORMAT_LLVM_INSTALL_DIR) ${ROOT_DIR}/run-clang-format.sh

# Note: you must have CLANG_TIDY_LLVM_INSTALL_DIR set for these rules to work.
# Let's default to the Ubuntu install location.
CLANG_TIDY_LLVM_INSTALL_DIR ?= /usr/lib/llvm-11
CLANG_TIDY_LLVM_INSTALL_DIR ?= /usr/lib/llvm-12

.PHONY: clang-tidy
clang-tidy:
Expand Down
2 changes: 1 addition & 1 deletion apps/blur/halide_blur_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class HalideBlur : public Halide::Generator<HalideBlur> {

blur_y.compute_root()
.hexagon()
.prefetch(input, y, 2)
.prefetch(input, y, y, 2)
.split(y, y, yi, 128)
.parallel(y)
.vectorize(x, vector_size * 2);
Expand Down
2 changes: 1 addition & 1 deletion apps/camera_pipe/camera_pipe_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ void CameraPipe::generate() {
denoised
.compute_at(processed, yi)
.store_at(processed, yo)
.prefetch(input, y, 2)
.prefetch(input, y, y, 2)
.fold_storage(y, 4)
.tile(x, y, x, y, xi, yi, 2 * vec, 2)
.vectorize(xi)
Expand Down
15 changes: 5 additions & 10 deletions apps/fft/fft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,8 +501,7 @@ ComplexFunc fft2d_c2c(ComplexFunc x,
TwiddleFactorSet twiddle_cache;

// transpose the input to the FFT.
ComplexFunc xT, x_tiled;
std::tie(xT, x_tiled) = tiled_transpose(x, N1, target, prefix);
auto [xT, x_tiled] = tiled_transpose(x, N1, target, prefix);

// Compute the DFT of dimension 1 (originally dimension 0).
ComplexFunc dft1T = fft_dim1(xT,
Expand All @@ -516,8 +515,7 @@ ComplexFunc fft2d_c2c(ComplexFunc x,
&twiddle_cache);

// transpose back.
ComplexFunc dft1, dft1_tiled;
std::tie(dft1, dft1_tiled) = tiled_transpose(dft1T, N0, target, prefix);
auto [dft1, dft1_tiled] = tiled_transpose(dft1T, N0, target, prefix);

// Compute the DFT of dimension 1.
ComplexFunc dft = fft_dim1(dft1,
Expand Down Expand Up @@ -783,8 +781,7 @@ ComplexFunc fft2d_r2c(Func r,
int zipped_extent0 = std::min((N1 + 1) / 2, zip_width);

// transpose so we can FFT dimension 0 (by making it dimension 1).
ComplexFunc unzippedT, unzippedT_tiled;
std::tie(unzippedT, unzippedT_tiled) = tiled_transpose(zipped_0, zipped_extent0, target, prefix);
auto [unzippedT, unzippedT_tiled] = tiled_transpose(zipped_0, zipped_extent0, target, prefix);

// DFT down the columns again (the rows of the original).
ComplexFunc dftT = fft_dim1(unzippedT,
Expand Down Expand Up @@ -949,8 +946,7 @@ Func fft2d_c2r(ComplexFunc c,
}

// transpose the input.
ComplexFunc cT, cT_tiled;
std::tie(cT, cT_tiled) =
auto [cT, cT_tiled] =
tiled_transpose(c_zipped, zipped_extent0, target, prefix);

// Take the inverse DFT of the columns (rows in the final result).
Expand All @@ -971,8 +967,7 @@ Func fft2d_c2r(ComplexFunc c,
}

// transpose so we can take the DFT of the columns again.
ComplexFunc dft0, dft0_tiled;
std::tie(dft0, dft0_tiled) = tiled_transpose(dft0T, zip_width, target, prefix, true);
auto [dft0, dft0_tiled] = tiled_transpose(dft0T, zip_width, target, prefix, true);

// Unzip the DC and Nyquist DFTs.
ComplexFunc dft0_unzipped("dft0_unzipped");
Expand Down
6 changes: 5 additions & 1 deletion apps/hannk/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,13 @@ find_package(Halide REQUIRED)

# Set up the version of TFLite we expect
set(TFLITE_VERSION_MAJOR "2" CACHE STRING "Major version of TFLite to assume")
set(TFLITE_VERSION_MINOR "5" CACHE STRING "Minor version of TFLite to assume")
set(TFLITE_VERSION_MINOR "6" CACHE STRING "Minor version of TFLite to assume")
set(TFLITE_VERSION_PATCH "0" CACHE STRING "Patch version of TFLite to assume")

add_compile_definitions(TFLITE_VERSION_MAJOR=${TFLITE_VERSION_MAJOR})
add_compile_definitions(TFLITE_VERSION_MINOR=${TFLITE_VERSION_MINOR})
add_compile_definitions(TFLITE_VERSION_PATCH=${TFLITE_VERSION_PATCH})

set(TFLITE_VERSION "${TFLITE_VERSION_MAJOR}.${TFLITE_VERSION_MINOR}.${TFLITE_VERSION_PATCH}")

add_subdirectory(delegate)
Expand Down
55 changes: 30 additions & 25 deletions apps/hannk/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ clean:
# ---------------------- TFLite glue

TFLITE_VERSION_MAJOR ?= 2
TFLITE_VERSION_MINOR ?= 5
TFLITE_VERSION_MINOR ?= 6
TFLITE_VERSION_PATCH ?= 0

TFLITE_VERSION = $(TFLITE_VERSION_MAJOR).$(TFLITE_VERSION_MINOR).$(TFLITE_VERSION_PATCH)
Expand Down Expand Up @@ -136,29 +136,37 @@ $(BIN)/%/halide/average_pool_uint8.a: $(GENERATOR_BIN)/pool.generator
@mkdir -p $(@D)
$< -g AveragePool -f hannk::average_pool_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/conv_uint8.a: $(GENERATOR_BIN)/conv.generator
$(BIN)/%/halide/conv_u8_u8_u8.a: $(GENERATOR_BIN)/conv.generator
@mkdir -p $(@D)
$< -g Conv -f hannk::conv_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly
$< -g Conv output.type=uint8 -f hannk::conv_u8_u8_u8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/conv_r16_uint8.a: $(GENERATOR_BIN)/conv.generator
$(BIN)/%/halide/conv_u8_u8_i16.a: $(GENERATOR_BIN)/conv.generator
@mkdir -p $(@D)
$< -g Conv unroll_reduction=16 -f hannk::conv_r16_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly
$< -g Conv output.type=int16 -f hannk::conv_u8_u8_i16 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/copy_uint8_uint8.a: $(GENERATOR_BIN)/copy.generator
$(BIN)/%/halide/conv_r16_u8_u8_u8.a: $(GENERATOR_BIN)/conv.generator
@mkdir -p $(@D)
$< -g Copy input.type=uint8 output.type=uint8 -f hannk::copy_uint8_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-no_bounds_query-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly
$< -g Conv unroll_reduction=16 output.type=uint8 -f hannk::conv_r16_u8_u8_u8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/depthwise_conv_uint8.a: $(GENERATOR_BIN)/depthwise_conv.generator
$(BIN)/%/halide/conv_r16_u8_u8_i16.a: $(GENERATOR_BIN)/conv.generator
@mkdir -p $(@D)
$< -g Conv unroll_reduction=16 output.type=int16 -f hannk::conv_r16_u8_u8_i16 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/copy_uint8_uint8.a: $(GENERATOR_BIN)/copy.generator
@mkdir -p $(@D)
$< -g DepthwiseConv -f hannk::depthwise_conv_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly
$< -g Copy input.type=uint8 output.type=uint8 -f hannk::copy_uint8_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-no_bounds_query-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/depthwise_conv_broadcast_uint8.a: $(GENERATOR_BIN)/depthwise_conv.generator
@mkdir -p $(@D)
$< -g DepthwiseConv inv_depth_multiplier=0 -f hannk::depthwise_conv_broadcast_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/depthwise_conv_dm1_uint8.a: $(GENERATOR_BIN)/depthwise_conv.generator
$(BIN)/%/halide/depthwise_conv_uint8.a: $(GENERATOR_BIN)/depthwise_conv.generator
@mkdir -p $(@D)
$< -g DepthwiseConv inv_depth_multiplier=1 -f hannk::depthwise_conv_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/depthwise_conv_shallow_uint8.a: $(GENERATOR_BIN)/depthwise_conv.generator
@mkdir -p $(@D)
$< -g DepthwiseConv inv_depth_multiplier=1 -f hannk::depthwise_conv_dm1_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly
$< -g DepthwiseConv inv_depth_multiplier=1 shallow=true -f hannk::depthwise_conv_shallow_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/elementwise_5xuint8_1xuint8.a: $(GENERATOR_BIN)/elementwise.generator
@mkdir -p $(@D)
Expand All @@ -172,14 +180,6 @@ $(BIN)/%/halide/fill_uint8.a: $(GENERATOR_BIN)/fill.generator
@mkdir -p $(@D)
$< -g Fill -f hannk::fill_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-no_asserts-no_bounds_query-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/fully_connected_uint8_uint8.a: $(GENERATOR_BIN)/fully_connected.generator
@mkdir -p $(@D)
$< -g FullyConnected output.type=uint8 -f hannk::fully_connected_uint8_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/fully_connected_uint8_int16.a: $(GENERATOR_BIN)/fully_connected.generator
@mkdir -p $(@D)
$< -g FullyConnected output.type=int16 -f hannk::fully_connected_uint8_int16 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/l2_normalization_uint8.a: $(GENERATOR_BIN)/normalizations.generator
@mkdir -p $(@D)
$< -g L2Normalization -f hannk::l2_normalization_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-no_bounds_query-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly
Expand All @@ -204,35 +204,40 @@ $(BIN)/%/halide/tile_conv_filter_uint8.a: $(GENERATOR_BIN)/conv.generator
@mkdir -p $(@D)
$< -g TileConvFilter -f hannk::tile_conv_filter_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/upsample_channels_uint8.a: $(GENERATOR_BIN)/depthwise_conv.generator
@mkdir -p $(@D)
$< -g UpsampleChannels -f hannk::upsample_channels_uint8 -o $(BIN)/$*/halide target=$(HL_TARGET)-no_runtime-c_plus_plus_name_mangling -e static_library,assembly,stmt,c_header,llvm_assembly

$(BIN)/%/halide/runtime.a: $(GENERATOR_BIN)/fill.generator
@mkdir -p $(@D)
$< -r runtime -o $(BIN)/$*/halide target=$(HL_TARGET)

OPS_HALIDE = \
$(BIN)/%/halide/add_uint8_uint8.a \
$(BIN)/%/halide/average_pool_uint8.a \
$(BIN)/%/halide/conv_uint8.a \
$(BIN)/%/halide/conv_u8_u8_u8.a \
$(BIN)/%/halide/conv_u8_u8_i16.a \
$(BIN)/%/halide/copy_uint8_uint8.a \
$(BIN)/%/halide/depthwise_conv_uint8.a \
$(BIN)/%/halide/depthwise_conv_broadcast_uint8.a \
$(BIN)/%/halide/depthwise_conv_dm1_uint8.a \
$(BIN)/%/halide/depthwise_conv_shallow_uint8.a \
$(BIN)/%/halide/elementwise_5xuint8_1xuint8.a \
$(BIN)/%/halide/elementwise_5xint16_1xuint8int16.a \
$(BIN)/%/halide/fill_uint8.a \
$(BIN)/%/halide/fully_connected_uint8_uint8.a \
$(BIN)/%/halide/fully_connected_uint8_int16.a \
$(BIN)/%/halide/l2_normalization_uint8.a \
$(BIN)/%/halide/max_pool_uint8.a \
$(BIN)/%/halide/mean_uint8.a \
$(BIN)/%/halide/mul_uint8_uint8_uint8.a \
$(BIN)/%/halide/softmax_uint8.a \
$(BIN)/%/halide/tile_conv_filter_uint8.a \
$(BIN)/%/halide/upsample_channels_uint8.a \
$(BIN)/%/halide/runtime.a

OPS_CXXFLAGS = -I$(BIN)/$*

ifneq (,$(findstring arm_dot_prod,$(HL_TARGET)))
OPS_HALIDE += $(BIN)/%/halide/conv_r16_uint8.a
OPS_HALIDE += $(BIN)/%/halide/conv_r16_u8_u8_u8.a
OPS_HALIDE += $(BIN)/%/halide/conv_r16_u8_u8_i16.a
OPS_CXXFLAGS += -DCONV_R16
endif

Expand Down Expand Up @@ -377,7 +382,7 @@ HANNK_INTERNAL_DELEGATE_DEPS = \

$(BIN)/%/$(BENCHMARK_OUT): benchmark.cpp $(INTERPRETER_DEPS) $(TFLITE_PARSER_DEPS) $(UTIL_DEPS) util/file_util.h
@mkdir -p $(@D)
$(CXX-$*) $(CXXFLAGS-$*) $(BENCHMARK_HEXAGON_FLAGS) $(APP_CXXFLAGS) -Wl,--whole-archive $(filter %.cpp %.o %.a,$^) -Wl,--no-whole-archive -o $@ $(LDFLAGS-$*)
$(CXX-$*) $(CXXFLAGS-$*) $(BENCHMARK_HEXAGON_FLAGS) $(APP_CXXFLAGS) $(filter %.cpp %.o %.a,$^) -o $@ $(LDFLAGS-$*)


# To build for Android, use `HL_TARGET=arm-64-android make compare_vs_tflite`
Expand Down
14 changes: 7 additions & 7 deletions apps/hannk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ All of the [TensorFlow hosted models](https://www.tensorflow.org/lite/guide/host
are working and producing good performance.

### Benchmarks
The comparison data below was produced with TensorFlow v.2.4.0 (the latest release as of this writing):
The comparison data below was produced with TensorFlow v.2.5.0 (the latest release as of this writing):

x86 OSX laptop w/ AVX2:

Expand All @@ -28,14 +28,14 @@ Qualcomm Snapdragon 855 A76 core (Pixel 4):

| Network | TFlite (ms)|Halide (ms)| Speedup
| ---- | ---- | ---- | ---- |
| inception_v1_224_quant | 24.7 | 25.9 | 0.95 |
| inception_v2_224_quant | 49.8 | 34.2 | 1.46 |
| inception_v3_quant | 97 | 89 | 1.09 |
| inception_v4_299_quant | 198 | 186.7 | 1.06 |
| mobilenet_v1_0.25_128_quant | 0.97 | 0.72 | 1.34 |
| inception_v1_224_quant | 24.7 | 25.0 | 0.99 |
| inception_v2_224_quant | 49.8 | 33.5 | 1.49 |
| inception_v3_quant | 97 | 87.6 | 1.11 |
| inception_v4_299_quant | 198 | 183.4 | 1.09 |
| mobilenet_v1_0.25_128_quant | 0.97 | 0.63 | 1.54 |
| mobilenet_v1_1.0_128_quant |4.64 | 4.44 | 1.05 |
| mobilenet_v1_1.0_224_quant | 12.9 | 11.6 | 1.11 |
| mobilenet_v2_1.0_224_quant | 11.8 | 9.89 | 1.19 |
| mobilenet_v2_1.0_224_quant | 11.8 | 9.72 | 1.21 |

### Planned but still TODO
- More op support
Expand Down
6 changes: 3 additions & 3 deletions apps/hannk/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ void run_benchmark(const std::string &filename, const InterpreterOptions &option
std::vector<char> buffer = read_entire_file(filename);
std::unique_ptr<OpGroup> model = parse_tflite_model_from_buffer(buffer.data());

if (options.verbose) {
if (options.verbosity >= 1) {
model->dump(std::cout);
}

Expand Down Expand Up @@ -49,7 +49,7 @@ __attribute__((visibility("default"))) int main(int argc, char **argv) {

for (int i = 1; i < argc; i++) {
if (!strcmp(argv[i], "--verbose")) {
options.verbose = true;
options.verbosity = 1;
continue;
}
if (!strcmp(argv[i], "--trace")) {
Expand All @@ -62,7 +62,7 @@ __attribute__((visibility("default"))) int main(int argc, char **argv) {
}
}

if (options.verbose && options.trace) {
if (options.verbosity > 0 && options.trace) {
HLOG(ERROR) << "You cannot specify --trace and --verbose at the same time.\n";
exit(-1);
}
Expand Down
3 changes: 0 additions & 3 deletions apps/hannk/compare_vs_tflite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@ int main(int argc, char **argv) {
runner.run(f);
halide_profiler_report(nullptr);
halide_profiler_reset();
std::cout << "\n";
}

std::cout << "Done!\n";
return 0;
}
Loading

0 comments on commit f0f9f3e

Please sign in to comment.