diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 6db4342b..17ab6fd1 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -17,7 +17,9 @@ jobs: - name: clang++ C++17 cxx_compiler: clang++ cxx_standard: 17 - cmake_options: "" + cmake_options: "-DURL_BUILD_BENCH=ON" + after_test: | + build/bench-url test/wpt/urltestdata.json steps: - uses: actions/checkout@v4 @@ -32,4 +34,7 @@ jobs: - name: build run: cmake --build build --config Release - name: test - run: cd build ; ctest -C Release -V + run: ctest --test-dir build -C Release -V + - name: after test + if: ${{ matrix.after_test }} + run: ${{ matrix.after_test }} diff --git a/.gitignore b/.gitignore index 6695ed96..4533942e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ /build/ # downloadable dependencies +/deps/ankerl/ /deps/doctest/ /deps/picojson/ diff --git a/CMakeLists.txt b/CMakeLists.txt index b9e1d870..4cc5991f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,7 @@ endif() # Options option(URL_BUILD_TESTS "Build the Upa URL tests." ${URL_MAIN_PROJECT}) +option(URL_BUILD_BENCH "Build the Upa URL benchmarks." OFF) option(URL_BUILD_FUZZER "Build the Upa URL fuzzer." OFF) option(URL_BUILD_EXAMPLES "Build the Upa URL examples." OFF) option(URL_BUILD_EXTRACTED "Build Upa URL examples extracted from the docs." OFF) @@ -111,8 +112,8 @@ endif() include_directories(deps) # Are Upa URL and ICU libraries needed? -if (URL_BUILD_TESTS OR URL_BUILD_FUZZER OR URL_BUILD_EXAMPLES OR URL_BUILD_EXTRACTED OR - URL_INSTALL OR NOT URL_BUILD_TOOLS) +if (URL_BUILD_TESTS OR URL_BUILD_BENCH OR URL_BUILD_FUZZER OR URL_BUILD_EXAMPLES OR + URL_BUILD_EXTRACTED OR URL_INSTALL OR NOT URL_BUILD_TOOLS) # This library depends on ICU find_package(ICU REQUIRED COMPONENTS i18n uc) @@ -187,6 +188,18 @@ if (URL_BUILD_TESTS) endforeach() endif() +# Benchmark targets + +if (URL_BUILD_BENCH) + file(GLOB bench_files test/bench-*.cpp) + + foreach(file ${bench_files}) + get_filename_component(exe_name ${file} NAME_WE) + add_executable(${exe_name} ${file}) + target_link_libraries(${exe_name} PRIVATE upa::url) + endforeach() +endif() + # Fuzzer targets if (URL_BUILD_FUZZER) diff --git a/deps/ankerl/.gitkeep b/deps/ankerl/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/deps/download-deps.bat b/deps/download-deps.bat index 93d9e0de..10367d1f 100644 --- a/deps/download-deps.bat +++ b/deps/download-deps.bat @@ -5,3 +5,4 @@ set p=%~dp0 curl -fsS -o %p%\doctest\doctest.h https://raw.githubusercontent.com/doctest/doctest/v2.4.11/doctest/doctest.h curl -fsS -o %p%\picojson\picojson.h https://raw.githubusercontent.com/kazuho/picojson/111c9be5188f7350c2eac9ddaedd8cca3d7bf394/picojson.h +curl -fsS -o %p%\ankerl/nanobench.h https://raw.githubusercontent.com/martinus/nanobench/v4.3.11/src/include/nanobench.h diff --git a/deps/download-deps.sh b/deps/download-deps.sh index 803f47ec..17643599 100755 --- a/deps/download-deps.sh +++ b/deps/download-deps.sh @@ -6,3 +6,4 @@ p="$(dirname "$0")" curl -fsS -o $p/doctest/doctest.h https://raw.githubusercontent.com/doctest/doctest/v2.4.11/doctest/doctest.h curl -fsS -o $p/picojson/picojson.h https://raw.githubusercontent.com/kazuho/picojson/111c9be5188f7350c2eac9ddaedd8cca3d7bf394/picojson.h +curl -fsS -o $p/ankerl/nanobench.h https://raw.githubusercontent.com/martinus/nanobench/v4.3.11/src/include/nanobench.h diff --git a/test/bench-url.cpp b/test/bench-url.cpp new file mode 100644 index 00000000..dd1ca76d --- /dev/null +++ b/test/bench-url.cpp @@ -0,0 +1,181 @@ +#include "upa/url.h" +#include "picojson_fffd.h" + +#include +#include +#include +#include +#include + +#define ANKERL_NANOBENCH_IMPLEMENT +#include "ankerl/nanobench.h" + +// ----------------------------------------------------------------------------- +// Read samples from text file (URL in each line) and benchmark + +int benchmark_txt(const char* file_name, uint64_t min_iters) { + std::vector url_strings; + + // Load URL samples + std::cout << "Load URL samples from: " << file_name << '\n'; + std::ifstream finp(file_name); + if (!finp.is_open()) { + std::cout << "Failed to open " << file_name << '\n'; + return 2; + } + + std::string line; + while (std::getline(finp, line)) + url_strings.push_back(line); + + // Run benchmark + + ankerl::nanobench::Bench().minEpochIterations(min_iters).run("Upa URL", [&] { + upa::url url; + + for (const auto& str_url : url_strings) { + url.parse(str_url, nullptr); + + ankerl::nanobench::doNotOptimizeAway(url); + } + }); + + return 0; +} + +// ----------------------------------------------------------------------------- +// Read samples from urltestdata.json and benchmark + +template +class root_array_context : public picojson::deny_parse_context { + OnArrayItem on_array_item_; +public: + root_array_context(OnArrayItem on_array_item) + : on_array_item_(on_array_item) + {} + + // array as root + bool parse_array_start() { return true; } + bool parse_array_stop(std::size_t) { return true; } + + template bool parse_array_item(picojson::input& in, std::size_t) { + picojson::value item; + + // parse the array item + picojson::default_parse_context ctx(&item); + if (!picojson::_parse(ctx, in)) + return false; + + // callback with array item + return on_array_item_(item); + } + + // deny object as root + bool parse_object_start() { return false; } + bool parse_object_stop() { return false; } +}; + +template +bool load_tests(Context& ctx, const char* file_name) { + // Load URL samples + std::cout << "Load URL samples from: " << file_name << '\n'; + std::ifstream file(file_name, std::ios_base::in | std::ios_base::binary); + if (!file.is_open()) { + std::cerr << "Can't open file: " << file_name << std::endl; + return false; + } + + std::string err; + + // for unformatted reading use std::istreambuf_iterator + // http://stackoverflow.com/a/17776228/3908097 + picojson::_parse(ctx, std::istreambuf_iterator(file), std::istreambuf_iterator(), &err); + + if (!err.empty()) { + std::cerr << err << std::endl; + return false; + } + return true; +} + +void benchmark_wpt(const char* file_name, uint64_t min_iters) { + // Load URL strings + std::vector> url_samples; + + root_array_context context{ [&](const picojson::value& item) { + if (item.is()) { + try { + const picojson::object& obj = item.get(); + const auto input_val = obj.at("input"); + const auto base_val = obj.at("base"); + + url_samples.emplace_back( + input_val.get(), + base_val.is() ? std::string{} : base_val.get()); + } + catch (const std::out_of_range& ex) { + std::cout << "[ERR:invalid file]: " << ex.what() << std::endl; + return false; + } + } + return true; + } }; + + if (!load_tests(context, file_name)) + return; + + // Run benchmark + + ankerl::nanobench::Bench().minEpochIterations(min_iters).run("Upa URL", [&] { + upa::url url; + upa::url url_base; + + for (const auto& url_strings : url_samples) { + upa::url* ptr_base = nullptr; + if (!url_strings.second.empty()) { + if (!upa::success(url_base.parse(url_strings.second, nullptr))) + continue; // invalid base + ptr_base = &url_base; + } + url.parse(url_strings.first, ptr_base); + + ankerl::nanobench::doNotOptimizeAway(url); + } + }); +} + +// ----------------------------------------------------------------------------- + +uint64_t get_positive_or_default(const char* str, uint64_t def) +{ + const uint64_t res = std::strtoull(str, nullptr, 10); + if (res > 0) + return res; + return def; +} + +int main(int argc, const char* argv[]) +{ + constexpr uint64_t min_iters_def = 3; + + if (argc < 2) { + std::cerr << "Usage: bench-url []\n"; + return 1; + } + + const std::filesystem::path file_name = argv[1]; + const uint64_t min_iters = argc > 2 + ? get_positive_or_default(argv[2], min_iters_def) + : min_iters_def; + + if (file_name.extension() == ".json") { + benchmark_wpt(file_name.string().c_str(), min_iters); + } else if (file_name.extension() == ".txt") { + benchmark_txt(file_name.string().c_str(), min_iters); + } else { + std::cerr << "File containing URLs should have .json or .txt extension.\n"; + return 1; + } + + return 0; +}