diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml index 566d22a45..2468014a2 100644 --- a/.github/workflows/bazel.yml +++ b/.github/workflows/bazel.yml @@ -2,9 +2,9 @@ name: C++ CI with Bazel on: push: - branches: [ master ] + branches: [master, bazel] pull_request: - branches: [ master ] + branches: [master] jobs: build: @@ -13,4 +13,4 @@ jobs: - uses: actions/checkout@v4 - uses: bazelbuild/setup-bazelisk@v3 - run: bazel build //:opencc - - run: bazel test //src/... + - run: bazel test --test_output=all //src/... //data/... //test/... diff --git a/BUILD.bazel b/BUILD.bazel index 1419e3147..6c61b49ba 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1,5 +1,18 @@ +package(default_visibility = ["//visibility:public"]) + cc_library( name = "opencc", + hdrs = [ + "//src:Export.hpp", + "//src:SimpleConverter.hpp", + "//src:opencc.h", + ], + data = [ + "//data/config", + "//data/dictionary:binary_dictionaries", + "//data/dictionary:text_dictionaries", + ], + strip_include_prefix = "src", deps = [ "//src:opencc", ], diff --git a/MODULE.bazel b/MODULE.bazel index 517df6ca1..6a50ee8a3 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -9,3 +9,7 @@ module( bazel_dep(name = "darts-clone", version = "0.32") bazel_dep(name = "googletest", version = "1.14.0.bcr.1") bazel_dep(name = "marisa-trie", version = "0.2.6") +bazel_dep(name = "rapidjson", version = "1.1.0") +bazel_dep(name = "rules_cc", version = "0.0.9") +bazel_dep(name = "rules_python", version = "0.34.0") +bazel_dep(name = "tclap", version = "1.2.5") diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock index f7ec958cd..768826959 100644 --- a/MODULE.bazel.lock +++ b/MODULE.bazel.lock @@ -5,17 +5,21 @@ "https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2", "https://bcr.bazel.build/modules/abseil-cpp/20211102.0/MODULE.bazel": "70390338f7a5106231d20620712f7cccb659cd0e9d073d1991c038eb9fc57589", "https://bcr.bazel.build/modules/abseil-cpp/20230125.1/MODULE.bazel": "89047429cb0207707b2dface14ba7f8df85273d484c2572755be4bab7ce9c3a0", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.0.bcr.1/MODULE.bazel": "1c8cec495288dccd14fdae6e3f95f772c1c91857047a098fad772034264cc8cb", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.0.bcr.1/source.json": "14892cc698e02ffedf4967546e6bedb7245015906888d3465fcf27c90a26da10", "https://bcr.bazel.build/modules/abseil-cpp/20230802.0/MODULE.bazel": "d253ae36a8bd9ee3c5955384096ccb6baf16a1b1e93e858370da0a3b94f77c16", - "https://bcr.bazel.build/modules/abseil-cpp/20230802.0/source.json": "7949ba8f239b40b9b24a21ff7f5f9554cf08e8c1161fca37b7cc42fa595db5f0", "https://bcr.bazel.build/modules/apple_support/1.5.0/MODULE.bazel": "50341a62efbc483e8a2a6aec30994a58749bd7b885e18dd96aa8c33031e558ef", "https://bcr.bazel.build/modules/apple_support/1.5.0/source.json": "eb98a7627c0bc486b57f598ad8da50f6625d974c8f723e9ea71bd39f709c9862", "https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8", "https://bcr.bazel.build/modules/bazel_features/1.11.0/source.json": "c9320aa53cd1c441d24bd6b716da087ad7e4ff0d9742a9884587596edfe53015", + "https://bcr.bazel.build/modules/bazel_features/1.9.1/MODULE.bazel": "8f679097876a9b609ad1f60249c49d68bfab783dd9be012faf9d82547b14815a", "https://bcr.bazel.build/modules/bazel_skylib/1.0.3/MODULE.bazel": "bcb0fd896384802d1ad283b4e4eb4d718eebd8cb820b0a2c3a347fb971afd9d8", "https://bcr.bazel.build/modules/bazel_skylib/1.1.1/MODULE.bazel": "1add3e7d93ff2e6998f9e118022c84d163917d912f5afafb3058e3d2f1545b5e", + "https://bcr.bazel.build/modules/bazel_skylib/1.2.0/MODULE.bazel": "44fe84260e454ed94ad326352a698422dbe372b21a1ac9f3eab76eb531223686", "https://bcr.bazel.build/modules/bazel_skylib/1.2.1/MODULE.bazel": "f35baf9da0efe45fa3da1696ae906eea3d615ad41e2e3def4aeb4e8bc0ef9a7a", "https://bcr.bazel.build/modules/bazel_skylib/1.3.0/MODULE.bazel": "20228b92868bf5cfc41bda7afc8a8ba2a543201851de39d990ec957b513579c5", "https://bcr.bazel.build/modules/bazel_skylib/1.4.1/MODULE.bazel": "a0dcb779424be33100dcae821e9e27e4f2901d9dfd5333efe5ac6a8d7ab75e1d", + "https://bcr.bazel.build/modules/bazel_skylib/1.5.0/MODULE.bazel": "32880f5e2945ce6a03d1fbd588e9198c0a959bb42297b2cfaf1685b7bc32e138", "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917", "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/source.json": "082ed5f9837901fada8c68c2f3ddc958bb22b6d654f71dd73f3df30d45d4b749", "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84", @@ -23,42 +27,47 @@ "https://bcr.bazel.build/modules/darts-clone/0.32/MODULE.bazel": "bdd235e31dd7f2538ff8b3ab3ef09c831349b141afca587d32b487d75c502361", "https://bcr.bazel.build/modules/darts-clone/0.32/source.json": "c65158c152e276f3c59dc0fc0fa746f1ff601e23b0a09812e024fe563e4dc99c", "https://bcr.bazel.build/modules/google_benchmark/1.8.2/MODULE.bazel": "a70cf1bba851000ba93b58ae2f6d76490a9feb74192e57ab8e8ff13c34ec50cb", - "https://bcr.bazel.build/modules/google_benchmark/1.8.2/source.json": "c5ec7882c4122369f645139400fa6a1ce5092cec12893bf90bcc32914a38508b", "https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4", "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/MODULE.bazel": "22c31a561553727960057361aa33bf20fb2e98584bc4fec007906e27053f80c6", "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/source.json": "41e9e129f80d8c8bf103a7acc337b76e54fad1214ac0a7084bf24f4cd924b8b4", "https://bcr.bazel.build/modules/googletest/1.14.0/MODULE.bazel": "cfbcbf3e6eac06ef9d85900f64424708cc08687d1b527f0ef65aa7517af8118f", "https://bcr.bazel.build/modules/libpfm/4.11.0/MODULE.bazel": "45061ff025b301940f1e30d2c16bea596c25b176c8b6b3087e92615adbd52902", - "https://bcr.bazel.build/modules/libpfm/4.11.0/source.json": "caaffb3ac2b59b8aac456917a4ecf3167d40478ee79f15ab7a877ec9273937c9", "https://bcr.bazel.build/modules/marisa-trie/0.2.6/MODULE.bazel": "3a4e187ae58831081fe6b38d3f58f44e9d929164b2c1bc970821f076a023dcb6", "https://bcr.bazel.build/modules/marisa-trie/0.2.6/source.json": "a9670e7b0889be633edb31e9aa4ffffa6a562ead1c576cf8ff17f474e54d2c59", "https://bcr.bazel.build/modules/platforms/0.0.4/MODULE.bazel": "9b328e31ee156f53f3c416a64f8491f7eb731742655a47c9eec4703a71644aee", "https://bcr.bazel.build/modules/platforms/0.0.5/MODULE.bazel": "5733b54ea419d5eaf7997054bb55f6a1d0b5ff8aedf0176fef9eea44f3acda37", "https://bcr.bazel.build/modules/platforms/0.0.6/MODULE.bazel": "ad6eeef431dc52aefd2d77ed20a4b353f8ebf0f4ecdd26a807d2da5aa8cd0615", "https://bcr.bazel.build/modules/platforms/0.0.7/MODULE.bazel": "72fd4a0ede9ee5c021f6a8dd92b503e089f46c227ba2813ff183b71616034814", + "https://bcr.bazel.build/modules/platforms/0.0.8/MODULE.bazel": "9f142c03e348f6d263719f5074b21ef3adf0b139ee4c5133e2aa35664da9eb2d", "https://bcr.bazel.build/modules/platforms/0.0.9/MODULE.bazel": "4a87a60c927b56ddd67db50c89acaa62f4ce2a1d2149ccb63ffd871d5ce29ebc", "https://bcr.bazel.build/modules/platforms/0.0.9/source.json": "cd74d854bf16a9e002fb2ca7b1a421f4403cda29f824a765acd3a8c56f8d43e6", "https://bcr.bazel.build/modules/protobuf/21.7/MODULE.bazel": "a5a29bb89544f9b97edce05642fac225a808b5b7be74038ea3640fae2f8e66a7", - "https://bcr.bazel.build/modules/protobuf/21.7/source.json": "bbe500720421e582ff2d18b0802464205138c06056f443184de39fbb8187b09b", + "https://bcr.bazel.build/modules/protobuf/23.1/MODULE.bazel": "88b393b3eb4101d18129e5db51847cd40a5517a53e81216144a8c32dfeeca52a", + "https://bcr.bazel.build/modules/protobuf/24.4/MODULE.bazel": "7bc7ce5f2abf36b3b7b7c8218d3acdebb9426aeb35c2257c96445756f970eb12", + "https://bcr.bazel.build/modules/protobuf/24.4/source.json": "ace4b8c65d4cfe64efe544f09fc5e5df77faf3a67fbb29c5341e0d755d9b15d6", "https://bcr.bazel.build/modules/protobuf/3.19.0/MODULE.bazel": "6b5fbb433f760a99a22b18b6850ed5784ef0e9928a72668b66e4d7ccd47db9b0", "https://bcr.bazel.build/modules/protobuf/3.19.6/MODULE.bazel": "9233edc5e1f2ee276a60de3eaa47ac4132302ef9643238f23128fea53ea12858", "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/MODULE.bazel": "88af1c246226d87e65be78ed49ecd1e6f5e98648558c14ce99176da041dc378e", "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/source.json": "be4789e951dd5301282729fe3d4938995dc4c1a81c2ff150afc9f1b0504c6022", + "https://bcr.bazel.build/modules/rapidjson/1.1.0/MODULE.bazel": "0367b53ebffe290358729893e7c435da379397738e09ae45c845e1e4f59fa3fc", + "https://bcr.bazel.build/modules/rapidjson/1.1.0/source.json": "0e1c31420d28513742394cd6ab5c4ed004e097670fc85fcf111cdcab96f381bb", "https://bcr.bazel.build/modules/re2/2023-09-01/MODULE.bazel": "cb3d511531b16cfc78a225a9e2136007a48cf8a677e4264baeab57fe78a80206", "https://bcr.bazel.build/modules/re2/2023-09-01/source.json": "e044ce89c2883cd957a2969a43e79f7752f9656f6b20050b62f90ede21ec6eb4", "https://bcr.bazel.build/modules/rules_cc/0.0.1/MODULE.bazel": "cb2aa0747f84c6c3a78dad4e2049c154f08ab9d166b1273835a8174940365647", "https://bcr.bazel.build/modules/rules_cc/0.0.2/MODULE.bazel": "6915987c90970493ab97393024c156ea8fb9f3bea953b2f3ec05c34f19b5695c", + "https://bcr.bazel.build/modules/rules_cc/0.0.5/MODULE.bazel": "be41f87587998fe8890cd82ea4e848ed8eb799e053c224f78f3ff7fe1a1d9b74", "https://bcr.bazel.build/modules/rules_cc/0.0.6/MODULE.bazel": "abf360251023dfe3efcef65ab9d56beefa8394d4176dd29529750e1c57eaa33f", "https://bcr.bazel.build/modules/rules_cc/0.0.8/MODULE.bazel": "964c85c82cfeb6f3855e6a07054fdb159aced38e99a5eecf7bce9d53990afa3e", "https://bcr.bazel.build/modules/rules_cc/0.0.9/MODULE.bazel": "836e76439f354b89afe6a911a7adf59a6b2518fafb174483ad78a2a2fde7b1c5", "https://bcr.bazel.build/modules/rules_cc/0.0.9/source.json": "1f1ba6fea244b616de4a554a0f4983c91a9301640c8fe0dd1d410254115c8430", "https://bcr.bazel.build/modules/rules_foreign_cc/0.9.0/MODULE.bazel": "c9e8c682bf75b0e7c704166d79b599f93b72cfca5ad7477df596947891feeef6", - "https://bcr.bazel.build/modules/rules_foreign_cc/0.9.0/source.json": "8be72488e3139bdca3af856fecc3860d0c480ba52e67b4035d0741b19e6d96d7", "https://bcr.bazel.build/modules/rules_java/4.0.0/MODULE.bazel": "5a78a7ae82cd1a33cef56dc578c7d2a46ed0dca12643ee45edbb8417899e6f74", + "https://bcr.bazel.build/modules/rules_java/7.1.0/MODULE.bazel": "30d9135a2b6561c761bd67bd4990da591e6bdc128790ce3e7afd6a3558b2fb64", "https://bcr.bazel.build/modules/rules_java/7.6.1/MODULE.bazel": "2f14b7e8a1aa2f67ae92bc69d1ec0fa8d9f827c4e17ff5e5f02e91caa3b2d0fe", "https://bcr.bazel.build/modules/rules_java/7.6.1/source.json": "8f3f3076554e1558e8e468b2232991c510ecbcbed9e6f8c06ac31c93bcf38362", "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/MODULE.bazel": "a56b85e418c83eb1839819f0b515c431010160383306d13ec21959ac412d2fe7", - "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/source.json": "a075731e1b46bc8425098512d038d416e966ab19684a10a34f4741295642fc35", + "https://bcr.bazel.build/modules/rules_jvm_external/5.1/MODULE.bazel": "33f6f999e03183f7d088c9be518a63467dfd0be94a11d0055fe2d210f89aa909", + "https://bcr.bazel.build/modules/rules_jvm_external/5.1/source.json": "5abb45cc9beb27b77aec6a65a11855ef2b55d95dfdc358e9f312b78ae0ba32d5", "https://bcr.bazel.build/modules/rules_license/0.0.3/MODULE.bazel": "627e9ab0247f7d1e05736b59dbb1b6871373de5ad31c3011880b4133cafd4bd0", "https://bcr.bazel.build/modules/rules_license/0.0.7/MODULE.bazel": "088fbeb0b6a419005b89cf93fe62d9517c0a2b8bb56af3244af65ecfe37e7d5d", "https://bcr.bazel.build/modules/rules_license/0.0.7/source.json": "355cc5737a0f294e560d52b1b7a6492d4fff2caf0bef1a315df5a298fca2d34a", @@ -66,16 +75,22 @@ "https://bcr.bazel.build/modules/rules_pkg/0.7.0/source.json": "c2557066e0c0342223ba592510ad3d812d4963b9024831f7f66fd0584dd8c66c", "https://bcr.bazel.build/modules/rules_proto/4.0.0/MODULE.bazel": "a7a7b6ce9bee418c1a760b3d84f83a299ad6952f9903c67f19e4edd964894e06", "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/MODULE.bazel": "e8dff86b0971688790ae75528fe1813f71809b5afd57facb44dad9e8eca631b7", - "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/source.json": "d57902c052424dfda0e71646cb12668d39c4620ee0544294d9d941e7d12bc3a9", + "https://bcr.bazel.build/modules/rules_proto/6.0.0-rc1/MODULE.bazel": "1e5b502e2e1a9e825eef74476a5a1ee524a92297085015a052510b09a1a09483", + "https://bcr.bazel.build/modules/rules_proto/6.0.0-rc1/source.json": "8d8448e71706df7450ced227ca6b3812407ff5e2ccad74a43a9fbe79c84e34e0", "https://bcr.bazel.build/modules/rules_python/0.10.2/MODULE.bazel": "cc82bc96f2997baa545ab3ce73f196d040ffb8756fd2d66125a530031cd90e5f", "https://bcr.bazel.build/modules/rules_python/0.22.1/MODULE.bazel": "26114f0c0b5e93018c0c066d6673f1a2c3737c7e90af95eff30cfee38d0bbac7", "https://bcr.bazel.build/modules/rules_python/0.25.0/MODULE.bazel": "72f1506841c920a1afec76975b35312410eea3aa7b63267436bfb1dd91d2d382", - "https://bcr.bazel.build/modules/rules_python/0.25.0/source.json": "c45006984eeaa18ad14c006091b264bff620c41952e9184edfe225ea95c3f986", + "https://bcr.bazel.build/modules/rules_python/0.34.0/MODULE.bazel": "1d623d026e075b78c9fde483a889cda7996f5da4f36dffb24c246ab30f06513a", + "https://bcr.bazel.build/modules/rules_python/0.34.0/source.json": "113116e287eec64a7d005a9db44865d810499fdc4f621e352aff58214f5ea2d8", "https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c", "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8", - "https://bcr.bazel.build/modules/stardoc/0.5.1/source.json": "a96f95e02123320aa015b956f29c00cb818fa891ef823d55148e1a362caacf29", + "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c", + "https://bcr.bazel.build/modules/stardoc/0.5.3/source.json": "cd53fe968dc8cd98197c052db3db6d82562960c87b61e7a90ee96f8e4e0dda97", + "https://bcr.bazel.build/modules/tclap/1.2.5/MODULE.bazel": "d91b779402516ce378283a867e5af24bcc37a8cf80934bf7f9679d082eaded53", + "https://bcr.bazel.build/modules/tclap/1.2.5/source.json": "8e519d780d8bb314bbe87af7aa50f0ba7fe68e2450e6df97f860ed105aecd41e", "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/MODULE.bazel": "7298990c00040a0e2f121f6c32544bab27d4452f80d9ce51349b1a28f3005c43", - "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/source.json": "f1ef7d3f9e0e26d4b23d1c39b5f5de71f584dd7d1b4ef83d9bbba6ec7a6a6459", + "https://bcr.bazel.build/modules/upb/0.0.0-20230516-61a97ef/MODULE.bazel": "c0df5e35ad55e264160417fd0875932ee3c9dda63d9fccace35ac62f45e1b6f9", + "https://bcr.bazel.build/modules/upb/0.0.0-20230516-61a97ef/source.json": "b2150404947339e8b947c6b16baa39fa75657f4ddec5e37272c7b11c7ab533bc", "https://bcr.bazel.build/modules/zlib/1.2.11/MODULE.bazel": "07b389abc85fdbca459b69e2ec656ae5622873af3f845e1c9d80fe179f3effa0", "https://bcr.bazel.build/modules/zlib/1.2.12/MODULE.bazel": "3b1a8834ada2a883674be8cbd36ede1b6ec481477ada359cd2d3ddc562340b27", "https://bcr.bazel.build/modules/zlib/1.3/MODULE.bazel": "6a9c02f19a24dcedb05572b2381446e27c272cd383aed11d41d99da9e3167a72", @@ -127,458 +142,6 @@ }, "recordedRepoMappingEntries": [] } - }, - "@@rules_foreign_cc~//foreign_cc:extensions.bzl%ext": { - "general": { - "bzlTransitiveDigest": "lWyCSIOJXmfZWoeZWHLfkIAcjH1k5Y4uEnqPUow9afc=", - "usagesDigest": "ISoJ3lFTlj+YHDNZEpB9nHkEJAqhSjt4FHtQUfAuFR4=", - "recordedFileInputs": {}, - "recordedDirentsInputs": {}, - "envVariables": {}, - "generatedRepoSpecs": { - "cmake-3.23.2-linux-aarch64": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "urls": [ - "https://github.com/Kitware/CMake/releases/download/v3.23.2/cmake-3.23.2-linux-aarch64.tar.gz" - ], - "sha256": "f2654bf780b53f170bbbec44d8ac67d401d24788e590faa53036a89476efa91e", - "strip_prefix": "cmake-3.23.2-linux-aarch64", - "build_file_content": "load(\"@rules_foreign_cc//toolchains/native_tools:native_tools_toolchain.bzl\", \"native_tool_toolchain\")\n\npackage(default_visibility = [\"//visibility:public\"])\n\nfilegroup(\n name = \"cmake_data\",\n srcs = glob(\n [\n \"**\",\n ],\n exclude = [\n \"WORKSPACE\",\n \"WORKSPACE.bazel\",\n \"BUILD\",\n \"BUILD.bazel\",\n ],\n ),\n)\n\nnative_tool_toolchain(\n name = \"cmake_tool\",\n path = \"bin/cmake\",\n target = \":cmake_data\",\n)\n" - } - }, - "rules_foreign_cc_framework_toolchain_macos": { - "bzlFile": "@@rules_foreign_cc~//foreign_cc/private/framework:toolchain.bzl", - "ruleClassName": "framework_toolchain_repository", - "attributes": { - "commands_src": "@rules_foreign_cc//foreign_cc/private/framework/toolchains:macos_commands.bzl", - "exec_compatible_with": [ - "@platforms//os:macos" - ], - "target_compatible_with": [] - } - }, - "ninja_1.11.0_linux": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "urls": [ - "https://github.com/ninja-build/ninja/releases/download/v1.11.0/ninja-linux.zip" - ], - "sha256": "9726e730d5b8599f82654dc80265e64a10a8a817552c34153361ed0c017f9f02", - "strip_prefix": "", - "build_file_content": "load(\"@rules_foreign_cc//toolchains/native_tools:native_tools_toolchain.bzl\", \"native_tool_toolchain\")\n\npackage(default_visibility = [\"//visibility:public\"])\n\nfilegroup(\n name = \"ninja_bin\",\n srcs = [\"ninja\"],\n)\n\nnative_tool_toolchain(\n name = \"ninja_tool\",\n env = {\"NINJA\": \"$(execpath :ninja_bin)\"},\n path = \"$(execpath :ninja_bin)\",\n target = \":ninja_bin\",\n)\n" - } - }, - "gnumake_src": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "build_file_content": "filegroup(\n name = \"all_srcs\",\n srcs = glob([\"**\"]),\n visibility = [\"//visibility:public\"],\n)\n", - "patches": [ - "@@rules_foreign_cc~//toolchains:make-reproducible-bootstrap.patch" - ], - "sha256": "e05fdde47c5f7ca45cb697e973894ff4f5d79e13b750ed57d7b66d8defc78e19", - "strip_prefix": "make-4.3", - "urls": [ - "https://mirror.bazel.build/ftpmirror.gnu.org/gnu/make/make-4.3.tar.gz", - "http://ftpmirror.gnu.org/gnu/make/make-4.3.tar.gz" - ] - } - }, - "ninja_1.11.0_win": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "urls": [ - "https://github.com/ninja-build/ninja/releases/download/v1.11.0/ninja-win.zip" - ], - "sha256": "d0ee3da143211aa447e750085876c9b9d7bcdd637ab5b2c5b41349c617f22f3b", - "strip_prefix": "", - "build_file_content": "load(\"@rules_foreign_cc//toolchains/native_tools:native_tools_toolchain.bzl\", \"native_tool_toolchain\")\n\npackage(default_visibility = [\"//visibility:public\"])\n\nfilegroup(\n name = \"ninja_bin\",\n srcs = [\"ninja.exe\"],\n)\n\nnative_tool_toolchain(\n name = \"ninja_tool\",\n env = {\"NINJA\": \"$(execpath :ninja_bin)\"},\n path = \"$(execpath :ninja_bin)\",\n target = \":ninja_bin\",\n)\n" - } - }, - "cmake_3.23.2_toolchains": { - "bzlFile": "@@rules_foreign_cc~//toolchains:prebuilt_toolchains_repository.bzl", - "ruleClassName": "prebuilt_toolchains_repository", - "attributes": { - "repos": { - "cmake-3.23.2-linux-aarch64": [ - "@platforms//cpu:aarch64", - "@platforms//os:linux" - ], - "cmake-3.23.2-linux-x86_64": [ - "@platforms//cpu:x86_64", - "@platforms//os:linux" - ], - "cmake-3.23.2-macos-universal": [ - "@platforms//os:macos" - ], - "cmake-3.23.2-windows-i386": [ - "@platforms//cpu:x86_32", - "@platforms//os:windows" - ], - "cmake-3.23.2-windows-x86_64": [ - "@platforms//cpu:x86_64", - "@platforms//os:windows" - ] - }, - "tool": "cmake" - } - }, - "cmake_src": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "build_file_content": "filegroup(\n name = \"all_srcs\",\n srcs = glob([\"**\"]),\n visibility = [\"//visibility:public\"],\n)\n", - "sha256": "f316b40053466f9a416adf981efda41b160ca859e97f6a484b447ea299ff26aa", - "strip_prefix": "cmake-3.23.2", - "urls": [ - "https://github.com/Kitware/CMake/releases/download/v3.23.2/cmake-3.23.2.tar.gz" - ] - } - }, - "bazel_skylib": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "urls": [ - "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz", - "https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz" - ], - "sha256": "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728" - } - }, - "cmake-3.23.2-macos-universal": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "urls": [ - "https://github.com/Kitware/CMake/releases/download/v3.23.2/cmake-3.23.2-macos-universal.tar.gz" - ], - "sha256": "853a0f9af148c5ef47282ffffee06c4c9f257be2635936755f39ca13c3286c88", - "strip_prefix": "cmake-3.23.2-macos-universal/CMake.app/Contents", - "build_file_content": "load(\"@rules_foreign_cc//toolchains/native_tools:native_tools_toolchain.bzl\", \"native_tool_toolchain\")\n\npackage(default_visibility = [\"//visibility:public\"])\n\nfilegroup(\n name = \"cmake_data\",\n srcs = glob(\n [\n \"**\",\n ],\n exclude = [\n \"WORKSPACE\",\n \"WORKSPACE.bazel\",\n \"BUILD\",\n \"BUILD.bazel\",\n ],\n ),\n)\n\nnative_tool_toolchain(\n name = \"cmake_tool\",\n path = \"bin/cmake\",\n target = \":cmake_data\",\n)\n" - } - }, - "rules_foreign_cc_framework_toolchain_freebsd": { - "bzlFile": "@@rules_foreign_cc~//foreign_cc/private/framework:toolchain.bzl", - "ruleClassName": "framework_toolchain_repository", - "attributes": { - "commands_src": "@rules_foreign_cc//foreign_cc/private/framework/toolchains:freebsd_commands.bzl", - "exec_compatible_with": [ - "@platforms//os:freebsd" - ], - "target_compatible_with": [] - } - }, - "rules_foreign_cc_framework_toolchain_linux": { - "bzlFile": "@@rules_foreign_cc~//foreign_cc/private/framework:toolchain.bzl", - "ruleClassName": "framework_toolchain_repository", - "attributes": { - "commands_src": "@rules_foreign_cc//foreign_cc/private/framework/toolchains:linux_commands.bzl", - "exec_compatible_with": [ - "@platforms//os:linux" - ], - "target_compatible_with": [] - } - }, - "rules_python": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "sha256": "5fa3c738d33acca3b97622a13a741129f67ef43f5fdfcec63b29374cc0574c29", - "strip_prefix": "rules_python-0.9.0", - "url": "https://github.com/bazelbuild/rules_python/archive/refs/tags/0.9.0.tar.gz" - } - }, - "ninja_1.11.0_mac": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "urls": [ - "https://github.com/ninja-build/ninja/releases/download/v1.11.0/ninja-mac.zip" - ], - "sha256": "21915277db59756bfc61f6f281c1f5e3897760b63776fd3d360f77dd7364137f", - "strip_prefix": "", - "build_file_content": "load(\"@rules_foreign_cc//toolchains/native_tools:native_tools_toolchain.bzl\", \"native_tool_toolchain\")\n\npackage(default_visibility = [\"//visibility:public\"])\n\nfilegroup(\n name = \"ninja_bin\",\n srcs = [\"ninja\"],\n)\n\nnative_tool_toolchain(\n name = \"ninja_tool\",\n env = {\"NINJA\": \"$(execpath :ninja_bin)\"},\n path = \"$(execpath :ninja_bin)\",\n target = \":ninja_bin\",\n)\n" - } - }, - "ninja_build_src": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "build_file_content": "filegroup(\n name = \"all_srcs\",\n srcs = glob([\"**\"]),\n visibility = [\"//visibility:public\"],\n)\n", - "sha256": "3c6ba2e66400fe3f1ae83deb4b235faf3137ec20bd5b08c29bfc368db143e4c6", - "strip_prefix": "ninja-1.11.0", - "urls": [ - "https://github.com/ninja-build/ninja/archive/v1.11.0.tar.gz" - ] - } - }, - "cmake-3.23.2-windows-i386": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "urls": [ - "https://github.com/Kitware/CMake/releases/download/v3.23.2/cmake-3.23.2-windows-i386.zip" - ], - "sha256": "6a4fcd6a2315b93cb23c93507efccacc30c449c2bf98f14d6032bb226c582e07", - "strip_prefix": "cmake-3.23.2-windows-i386", - "build_file_content": "load(\"@rules_foreign_cc//toolchains/native_tools:native_tools_toolchain.bzl\", \"native_tool_toolchain\")\n\npackage(default_visibility = [\"//visibility:public\"])\n\nfilegroup(\n name = \"cmake_data\",\n srcs = glob(\n [\n \"**\",\n ],\n exclude = [\n \"WORKSPACE\",\n \"WORKSPACE.bazel\",\n \"BUILD\",\n \"BUILD.bazel\",\n ],\n ),\n)\n\nnative_tool_toolchain(\n name = \"cmake_tool\",\n path = \"bin/cmake.exe\",\n target = \":cmake_data\",\n)\n" - } - }, - "cmake-3.23.2-linux-x86_64": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "urls": [ - "https://github.com/Kitware/CMake/releases/download/v3.23.2/cmake-3.23.2-linux-x86_64.tar.gz" - ], - "sha256": "aaced6f745b86ce853661a595bdac6c5314a60f8181b6912a0a4920acfa32708", - "strip_prefix": "cmake-3.23.2-linux-x86_64", - "build_file_content": "load(\"@rules_foreign_cc//toolchains/native_tools:native_tools_toolchain.bzl\", \"native_tool_toolchain\")\n\npackage(default_visibility = [\"//visibility:public\"])\n\nfilegroup(\n name = \"cmake_data\",\n srcs = glob(\n [\n \"**\",\n ],\n exclude = [\n \"WORKSPACE\",\n \"WORKSPACE.bazel\",\n \"BUILD\",\n \"BUILD.bazel\",\n ],\n ),\n)\n\nnative_tool_toolchain(\n name = \"cmake_tool\",\n path = \"bin/cmake\",\n target = \":cmake_data\",\n)\n" - } - }, - "cmake-3.23.2-windows-x86_64": { - "bzlFile": "@@bazel_tools//tools/build_defs/repo:http.bzl", - "ruleClassName": "http_archive", - "attributes": { - "urls": [ - "https://github.com/Kitware/CMake/releases/download/v3.23.2/cmake-3.23.2-windows-x86_64.zip" - ], - "sha256": "2329387f3166b84c25091c86389fb891193967740c9bcf01e7f6d3306f7ffda0", - "strip_prefix": "cmake-3.23.2-windows-x86_64", - "build_file_content": "load(\"@rules_foreign_cc//toolchains/native_tools:native_tools_toolchain.bzl\", \"native_tool_toolchain\")\n\npackage(default_visibility = [\"//visibility:public\"])\n\nfilegroup(\n name = \"cmake_data\",\n srcs = glob(\n [\n \"**\",\n ],\n exclude = [\n \"WORKSPACE\",\n \"WORKSPACE.bazel\",\n \"BUILD\",\n \"BUILD.bazel\",\n ],\n ),\n)\n\nnative_tool_toolchain(\n name = \"cmake_tool\",\n path = \"bin/cmake.exe\",\n target = \":cmake_data\",\n)\n" - } - }, - "rules_foreign_cc_framework_toolchain_windows": { - "bzlFile": "@@rules_foreign_cc~//foreign_cc/private/framework:toolchain.bzl", - "ruleClassName": "framework_toolchain_repository", - "attributes": { - "commands_src": "@rules_foreign_cc//foreign_cc/private/framework/toolchains:windows_commands.bzl", - "exec_compatible_with": [ - "@platforms//os:windows" - ], - "target_compatible_with": [] - } - }, - "ninja_1.11.0_toolchains": { - "bzlFile": "@@rules_foreign_cc~//toolchains:prebuilt_toolchains_repository.bzl", - "ruleClassName": "prebuilt_toolchains_repository", - "attributes": { - "repos": { - "ninja_1.11.0_linux": [ - "@platforms//cpu:x86_64", - "@platforms//os:linux" - ], - "ninja_1.11.0_mac": [ - "@platforms//cpu:x86_64", - "@platforms//os:macos" - ], - "ninja_1.11.0_win": [ - "@platforms//cpu:x86_64", - "@platforms//os:windows" - ] - }, - "tool": "ninja" - } - } - }, - "recordedRepoMappingEntries": [ - [ - "rules_foreign_cc~", - "bazel_tools", - "bazel_tools" - ], - [ - "rules_foreign_cc~", - "rules_foreign_cc", - "rules_foreign_cc~" - ] - ] - } - }, - "@@rules_python~//python/extensions:python.bzl%python": { - "general": { - "bzlTransitiveDigest": "Kf/7zZzswWR1HLTsIkowkR+hnYBf2occRd9uW0D1KME=", - "usagesDigest": "ZrD60LyZ9UaPARLmqCmHvbT6XpgCR15SzpV2MblMrYM=", - "recordedFileInputs": {}, - "recordedDirentsInputs": {}, - "envVariables": {}, - "generatedRepoSpecs": { - "python_3_11_s390x-unknown-linux-gnu": { - "bzlFile": "@@rules_python~//python:repositories.bzl", - "ruleClassName": "python_repository", - "attributes": { - "sha256": "e477f0749161f9aa7887964f089d9460a539f6b4a8fdab5166f898210e1a87a4", - "patches": [], - "platform": "s390x-unknown-linux-gnu", - "python_version": "3.11.4", - "release_filename": "20230726/cpython-3.11.4+20230726-s390x-unknown-linux-gnu-install_only.tar.gz", - "urls": [ - "https://github.com/indygreg/python-build-standalone/releases/download/20230726/cpython-3.11.4+20230726-s390x-unknown-linux-gnu-install_only.tar.gz" - ], - "distutils_content": "", - "strip_prefix": "python", - "coverage_tool": "", - "ignore_root_user_error": false - } - }, - "python_3_11": { - "bzlFile": "@@rules_python~//python/private:toolchains_repo.bzl", - "ruleClassName": "toolchain_aliases", - "attributes": { - "python_version": "3.11.4", - "user_repository_name": "python_3_11" - } - }, - "python_3_11_aarch64-unknown-linux-gnu": { - "bzlFile": "@@rules_python~//python:repositories.bzl", - "ruleClassName": "python_repository", - "attributes": { - "sha256": "2e84fc53f4e90e11963281c5c871f593abcb24fc796a50337fa516be99af02fb", - "patches": [], - "platform": "aarch64-unknown-linux-gnu", - "python_version": "3.11.4", - "release_filename": "20230726/cpython-3.11.4+20230726-aarch64-unknown-linux-gnu-install_only.tar.gz", - "urls": [ - "https://github.com/indygreg/python-build-standalone/releases/download/20230726/cpython-3.11.4+20230726-aarch64-unknown-linux-gnu-install_only.tar.gz" - ], - "distutils_content": "", - "strip_prefix": "python", - "coverage_tool": "", - "ignore_root_user_error": false - } - }, - "python_3_11_aarch64-apple-darwin": { - "bzlFile": "@@rules_python~//python:repositories.bzl", - "ruleClassName": "python_repository", - "attributes": { - "sha256": "cb6d2948384a857321f2aa40fa67744cd9676a330f08b6dad7070bda0b6120a4", - "patches": [], - "platform": "aarch64-apple-darwin", - "python_version": "3.11.4", - "release_filename": "20230726/cpython-3.11.4+20230726-aarch64-apple-darwin-install_only.tar.gz", - "urls": [ - "https://github.com/indygreg/python-build-standalone/releases/download/20230726/cpython-3.11.4+20230726-aarch64-apple-darwin-install_only.tar.gz" - ], - "distutils_content": "", - "strip_prefix": "python", - "coverage_tool": "", - "ignore_root_user_error": false - } - }, - "python_3_11_ppc64le-unknown-linux-gnu": { - "bzlFile": "@@rules_python~//python:repositories.bzl", - "ruleClassName": "python_repository", - "attributes": { - "sha256": "df7b92ed9cec96b3bb658fb586be947722ecd8e420fb23cee13d2e90abcfcf25", - "patches": [], - "platform": "ppc64le-unknown-linux-gnu", - "python_version": "3.11.4", - "release_filename": "20230726/cpython-3.11.4+20230726-ppc64le-unknown-linux-gnu-install_only.tar.gz", - "urls": [ - "https://github.com/indygreg/python-build-standalone/releases/download/20230726/cpython-3.11.4+20230726-ppc64le-unknown-linux-gnu-install_only.tar.gz" - ], - "distutils_content": "", - "strip_prefix": "python", - "coverage_tool": "", - "ignore_root_user_error": false - } - }, - "python_3_11_x86_64-apple-darwin": { - "bzlFile": "@@rules_python~//python:repositories.bzl", - "ruleClassName": "python_repository", - "attributes": { - "sha256": "47e1557d93a42585972772e82661047ca5f608293158acb2778dccf120eabb00", - "patches": [], - "platform": "x86_64-apple-darwin", - "python_version": "3.11.4", - "release_filename": "20230726/cpython-3.11.4+20230726-x86_64-apple-darwin-install_only.tar.gz", - "urls": [ - "https://github.com/indygreg/python-build-standalone/releases/download/20230726/cpython-3.11.4+20230726-x86_64-apple-darwin-install_only.tar.gz" - ], - "distutils_content": "", - "strip_prefix": "python", - "coverage_tool": "", - "ignore_root_user_error": false - } - }, - "pythons_hub": { - "bzlFile": "@@rules_python~//python/extensions/private:pythons_hub.bzl", - "ruleClassName": "hub_repo", - "attributes": { - "default_python_version": "3.11", - "toolchain_prefixes": [ - "_0000_python_3_11_" - ], - "toolchain_python_versions": [ - "3.11" - ], - "toolchain_set_python_version_constraints": [ - "False" - ], - "toolchain_user_repository_names": [ - "python_3_11" - ] - } - }, - "python_versions": { - "bzlFile": "@@rules_python~//python/private:toolchains_repo.bzl", - "ruleClassName": "multi_toolchain_aliases", - "attributes": { - "python_versions": { - "3.11": "python_3_11" - } - } - }, - "python_3_11_x86_64-pc-windows-msvc": { - "bzlFile": "@@rules_python~//python:repositories.bzl", - "ruleClassName": "python_repository", - "attributes": { - "sha256": "878614c03ea38538ae2f758e36c85d2c0eb1eaaca86cd400ff8c76693ee0b3e1", - "patches": [], - "platform": "x86_64-pc-windows-msvc", - "python_version": "3.11.4", - "release_filename": "20230726/cpython-3.11.4+20230726-x86_64-pc-windows-msvc-shared-install_only.tar.gz", - "urls": [ - "https://github.com/indygreg/python-build-standalone/releases/download/20230726/cpython-3.11.4+20230726-x86_64-pc-windows-msvc-shared-install_only.tar.gz" - ], - "distutils_content": "", - "strip_prefix": "python", - "coverage_tool": "", - "ignore_root_user_error": false - } - }, - "python_3_11_x86_64-unknown-linux-gnu": { - "bzlFile": "@@rules_python~//python:repositories.bzl", - "ruleClassName": "python_repository", - "attributes": { - "sha256": "e26247302bc8e9083a43ce9e8dd94905b40d464745b1603041f7bc9a93c65d05", - "patches": [], - "platform": "x86_64-unknown-linux-gnu", - "python_version": "3.11.4", - "release_filename": "20230726/cpython-3.11.4+20230726-x86_64-unknown-linux-gnu-install_only.tar.gz", - "urls": [ - "https://github.com/indygreg/python-build-standalone/releases/download/20230726/cpython-3.11.4+20230726-x86_64-unknown-linux-gnu-install_only.tar.gz" - ], - "distutils_content": "", - "strip_prefix": "python", - "coverage_tool": "", - "ignore_root_user_error": false - } - } - }, - "recordedRepoMappingEntries": [ - [ - "rules_python~", - "bazel_tools", - "bazel_tools" - ] - ] - } } } } diff --git a/data/config/BUILD.bazel b/data/config/BUILD.bazel new file mode 100644 index 000000000..ea3ee617d --- /dev/null +++ b/data/config/BUILD.bazel @@ -0,0 +1,6 @@ +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "config", + srcs = glob(["*.json"]), +) diff --git a/data/dictionary/BUILD.bazel b/data/dictionary/BUILD.bazel new file mode 100644 index 000000000..3a1f1aa0a --- /dev/null +++ b/data/dictionary/BUILD.bazel @@ -0,0 +1,79 @@ +package(default_visibility = ["//visibility:public"]) + +genrule( + name = "merge_TWPhrases", + srcs = [ + "TWPhrasesIT.txt", + "TWPhrasesName.txt", + "TWPhrasesOther.txt", + ], + outs = ["TWPhrases.txt"], + cmd = "$(location //data/scripts:merge) " + + "$(SRCS) $(OUTS)", + tools = ["//data/scripts:merge"], +) + +[ + genrule( + name = "reverse_" + txt, + srcs = [txt + ".txt"], + outs = [txt + "Rev.txt"], + cmd = "$(location //data/scripts:reverse) " + + "$(SRCS) $(OUTS)", + tools = ["//data/scripts:reverse"], + ) + for txt in [ + "TWVariants", + "TWPhrases", + "HKVariants", + "JPVariants", + ] +] + +TEXT_DICTS = glob(["*.txt"]) + [ + "TWPhrases.txt", + "TWVariantsRev.txt", + "TWPhrasesRev.txt", + "HKVariantsRev.txt", + "JPVariantsRev.txt", +] + +[ + genrule( + name = "generate_bin_" + txt[:-4], + srcs = [txt], + outs = [txt.replace(".txt", ".ocd2")], + cmd = "$(location //src/tools:dict_converter) " + + "--input $(location " + txt + ") " + + "--output $(OUTS) " + + "--from text " + + "--to ocd2", + tools = ["//src/tools:dict_converter"], + ) + for txt in TEXT_DICTS +] + +filegroup( + name = "text_dictionaries", + srcs = TEXT_DICTS, +) + +filegroup( + name = "binary_dictionaries", + srcs = [txt.replace(".txt", ".ocd2") for txt in TEXT_DICTS], +) + +cc_test( + name = "dictionary_test", + srcs = ["DictionaryTest.cpp"], + data = [ + ":binary_dictionaries", + ":text_dictionaries", + ], + deps = [ + "//src:lexicon", + "//src:marisa_dict", + "//src:utf8_util", + "@googletest//:gtest_main", + ], +) diff --git a/data/dictionary/DictionaryTest.cpp b/data/dictionary/DictionaryTest.cpp new file mode 100644 index 000000000..7b931c722 --- /dev/null +++ b/data/dictionary/DictionaryTest.cpp @@ -0,0 +1,90 @@ +/* + * Open Chinese Convert + * + * Copyright 2024-2024 Carbo Kuo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gtest/gtest.h" + +#include "src/Lexicon.hpp" +#include "src/MarisaDict.hpp" +#include "src/UTF8Util.hpp" + +namespace opencc { + +const char* RUNFILE_SUFFIX = ".runfiles/_main"; + +class DictionaryTest : public ::testing::Test, + public ::testing::WithParamInterface { +protected: + static void SetUpTestSuite() { + + std::string program_filename = ::testing::internal::GetArgvs().front(); + size_t suffix_pos = program_filename.find(RUNFILE_SUFFIX); + ASSERT_NE(suffix_pos, std::string::npos); + + runfile_dir_ = + program_filename.substr(0, suffix_pos + strlen(RUNFILE_SUFFIX)); + } + + static std::string runfile_dir_; +}; + +std::string DictionaryTest::runfile_dir_; + +INSTANTIATE_TEST_SUITE_P( + , DictionaryTest, + ::testing::Values("HKVariants", "HKVariantsRevPhrases", + "JPShinjitaiCharacters", "JPShinjitaiPhrases", + "JPVariants", "STCharacters", "STPhrases", "TSCharacters", + "TSPhrases", "TWPhrasesIT", "TWPhrasesName", + "TWPhrasesOther", "TWVariants", "TWVariantsRevPhrases", + "TWPhrases", "TWVariantsRev", "TWPhrasesRev", + "HKVariantsRev", "JPVariantsRev"), + [](const testing::TestParamInfo& info) { + return info.param; + }); + +TEST_P(DictionaryTest, UniqueSortedTest) { + const std::string dictionaryFileName = + runfile_dir_ + "/data/dictionary/" + GetParam() + ".txt"; + FILE* fp = + fopen(UTF8Util::GetPlatformString(dictionaryFileName).c_str(), "rb"); + ASSERT_NE(fp, nullptr); + LexiconPtr lexicon = Lexicon::ParseLexiconFromFile(fp); + EXPECT_TRUE(lexicon->IsUnique()) << GetParam() << " has duplicated keys."; + EXPECT_TRUE(lexicon->IsSorted()) << GetParam() << " is not sorted."; +} + +TEST_P(DictionaryTest, BinaryTest) { + const std::string binaryDictionaryFileName = + runfile_dir_ + "/data/dictionary/" + GetParam() + ".ocd2"; + FILE* fp_bin = fopen( + UTF8Util::GetPlatformString(binaryDictionaryFileName).c_str(), "rb"); + ASSERT_NE(fp_bin, nullptr); + MarisaDictPtr dict = MarisaDict::NewFromFile(fp_bin); + ASSERT_NE(dict, nullptr); + + const std::string textDictionaryFileName = + runfile_dir_ + "/data/dictionary/" + GetParam() + ".txt"; + FILE* fp_txt = + fopen(UTF8Util::GetPlatformString(textDictionaryFileName).c_str(), "rb"); + ASSERT_NE(fp_txt, nullptr); + LexiconPtr txt_lexicon = Lexicon::ParseLexiconFromFile(fp_txt); + + EXPECT_EQ(dict->GetLexicon()->Length(), txt_lexicon->Length()); +} + +} // namespace opencc diff --git a/data/dictionary/STPhrases.txt b/data/dictionary/STPhrases.txt index 60b7fc230..7b1e2e1d5 100644 --- a/data/dictionary/STPhrases.txt +++ b/data/dictionary/STPhrases.txt @@ -4044,7 +4044,6 @@ 余事 餘事 余二 餘二 余五 餘五 -余慈高速 餘慈高速 余亩 餘畝 余人 餘人 余件 餘件 @@ -4114,6 +4113,7 @@ 余悸 餘悸 余情 餘情 余情未了 餘情未了 +余慈高速 餘慈高速 余户 餘戶 余政宪 余政憲 余数 餘數 @@ -14369,10 +14369,10 @@ 吞了 吞了 吞了下去 吞了下去 吞云吐雾 吞雲吐霧 -吞武里 吞武里 吞刀刮肠 吞刀刮腸 吞咽 吞嚥 吞并 吞併 +吞武里 吞武里 吞烟 吞煙 吞米桑布札 吞米桑布札 吟叹 吟歎 @@ -17589,6 +17589,7 @@ 太初历史 太初歷史 太卜 太卜 太原师范学院 太原師範學院 +太古里 太古里 太后 太后 太咸 太鹹 太好了 太好了 @@ -17598,7 +17599,6 @@ 太平御览 太平御覽 太平洋周边 太平洋周邊 太平洋联合铁路 太平洋聯合鐵路 -太古里 太古里 太扯了 太扯了 太松 太鬆 太极 太極 @@ -23571,8 +23571,8 @@ 愿谨 願謹 愿闻其详 願聞其詳 慈云 慈雲 -慈制 慈制 慈余高速 慈餘高速 +慈制 慈制 慈安太后 慈安太后 慈悲喜舍 慈悲喜捨 慈溪 慈谿 @@ -24544,9 +24544,9 @@ 扳回一城 扳回一城 扶了 扶了 扶余 扶餘 -扶余市 扶餘市 扶余县 扶餘縣 扶余国 扶餘國 +扶余市 扶餘市 扶出 扶出 扶出去 扶出去 扶出来 扶出來 @@ -26181,11 +26181,11 @@ 提拉米苏 提拉米蘇 提摩太后书 提摩太後書 提梁 提樑 -提纳里 提納里 提炼 提煉 提炼出 提煉出 提甕出汲 提甕出汲 提纯复壮 提純復壯 +提纳里 提納里 提舍尼 提舍尼 插于 插於 插回 插回 @@ -29405,16 +29405,16 @@ 束身修行 束身修行 束身自修 束身自修 杠一 杠一 -杠二 杠二 +杠七 杠七 杠三 杠三 -杠四 杠四 +杠上 槓上 +杠九 杠九 +杠二 杠二 杠五 杠五 -杠六 杠六 -杠七 杠七 杠八 杠八 -杠九 杠九 +杠六 杠六 +杠四 杠四 杠增一 杠增一 -杠上 槓上 杠头 槓頭 杠子 槓子 杠杆 槓桿 @@ -36487,7 +36487,6 @@ 神人鉴知 神人鑑知 神出鬼入 神出鬼入 神出鬼没 神出鬼沒 -神里 神里 神分志夺 神分志奪 神台 神臺 神圣同盟 神聖同盟 @@ -36528,6 +36527,7 @@ 神采飘逸 神采飄逸 神采飞扬 神采飛揚 神采骏发 神采駿發 +神里 神里 神雕 神鵰 神雕侠侣 神鵰俠侶 神雕像 神雕像 @@ -38291,13 +38291,13 @@ 细雨蒙蒙 細雨濛濛 细雨蒙蒙忆当年 細雨濛濛憶當年 织出 織出 -织里 織里 织布 織布 织布厂 織布廠 织布娘 織布娘 织布机 織布機 织席 織蓆 织当访婢 織當訪婢 +织里 織里 织锦回文 織錦回文 终了 終了 终于 終於 @@ -40336,12 +40336,12 @@ 艾维斯普里斯莱 艾維斯普里斯萊 艾赛克斯 艾賽克斯 艾达克 艾達克 -艾里西 艾里西 -艾里西湖 艾里西湖 -艾里西湖镇 艾里西湖鎮 艾里亚森 艾里亞森 艾里斯 艾里斯 艾里森 艾里森 +艾里西 艾里西 +艾里西湖 艾里西湖 +艾里西湖镇 艾里西湖鎮 艾里赛宫 艾里賽宮 节余 節餘 节制 節制 @@ -43279,8 +43279,6 @@ 谷关 谷關 谷口 谷口 谷口耕岩 谷口耕岩 -谷里 谷里 -谷里街道 谷里街道 谷圭 穀圭 谷地 谷地 谷场 穀場 @@ -43327,6 +43325,8 @@ 谷贵饿农谷贱伤农 穀貴餓農穀賤傷農 谷道 穀道 谷都 谷都 +谷里 谷里 +谷里街道 谷里街道 谷雨 穀雨 谷风 穀風 谷風 谷食 穀食 @@ -44906,7 +44906,6 @@ 通了 通了 通事舍人 通事舍人 通于 通於 -通车里程 通車里程 通人达才 通人達才 通便药 通便藥 通信技术 通信技術 @@ -44935,6 +44934,7 @@ 通联记录 通聯記錄 通讯录 通訊錄 通讯系统 通訊系統 +通车里程 通車里程 通过事后 通過事後 通鉴 通鑑 逛了 逛了 @@ -45594,7 +45594,6 @@ 里包恩 里包恩 里名 里名 里君 里君 -里甲 里甲 里咽 裏咽 里士满 里士滿 里外 裏外 @@ -45647,6 +45646,7 @@ 里特维宁科 里特維寧科 里瓦几亚条约 里瓦幾亞條約 里瓦尔多 裏瓦爾多 +里甲 里甲 里社 里社 里科 里科 里程 里程 @@ -46408,7 +46408,6 @@ 镰状细胞血症 鐮狀細胞血症 镶了 鑲了 镶板 鑲板 -长干里 長干里 长丰 長豐 长丰县 長豐縣 长了 長了 @@ -46431,6 +46430,7 @@ 长寿面 長壽麪 长干巷 長干巷 长干曲 長干曲 +长干里 長干里 长征 長征 长征军 長征軍 长恶不悛 長惡不悛 diff --git a/data/scripts/BUILD.bazel b/data/scripts/BUILD.bazel new file mode 100644 index 000000000..d9f718d4e --- /dev/null +++ b/data/scripts/BUILD.bazel @@ -0,0 +1,23 @@ +load("@rules_python//python:py_binary.bzl", "py_binary") +load("@rules_python//python:py_library.bzl", "py_library") + +package(default_visibility = ["//visibility:public"]) + +py_library( + name = "common", + srcs = ["common.py"], +) + +py_binary( + name = "merge", + srcs = ["merge.py"], + imports = ["."], + deps = [":common"], +) + +py_binary( + name = "reverse", + srcs = ["reverse.py"], + imports = ["."], + deps = [":common"], +) diff --git a/data/scripts/sort_all.py b/data/scripts/sort_all.py index d1ba06c07..bd57ae735 100755 --- a/data/scripts/sort_all.py +++ b/data/scripts/sort_all.py @@ -11,7 +11,7 @@ exit(1) directory = sys.argv[1] -files = glob.glob(directory + "/*") +files = glob.glob(directory + "/*.txt") for filename in files: print(filename) sort_items(filename, filename) diff --git a/src/BUILD.bazel b/src/BUILD.bazel index 82a6712fe..acf0e98e0 100644 --- a/src/BUILD.bazel +++ b/src/BUILD.bazel @@ -1,10 +1,33 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + +package(default_visibility = ["//visibility:public"]) + cc_library( name = "opencc", - visibility = ["//visibility:public"], deps = [ ":binary_dict", ":common", + ":config", + ":conversion", + ":conversion_chain", + ":converter", ":darts_dict", + ":dict", + ":dict_converter", + ":dict_entry", + ":dict_group", + ":exception", + ":lexicon", + ":marisa_dict", + ":max_match_segmentation", + ":phrase_extract", + ":segmentation", + ":segments", + ":serializable_dict", + ":serialized_values", + ":simple_converter", + ":utf8_string_slice", + ":utf8_util", ], ) @@ -29,6 +52,15 @@ cc_test( ], ) +cc_library( + name = "cmd_line_output", + hdrs = ["CmdLineOutput.hpp"], + visibility = ["//src/tools:__pkg__"], + deps = [ + "@tclap", + ], +) + cc_library( name = "common", hdrs = [ @@ -39,6 +71,102 @@ cc_library( ], ) +cc_library( + name = "config", + srcs = ["Config.cpp"], + hdrs = ["Config.hpp"], + deps = [ + ":common", + ":conversion_chain", + ":converter", + ":darts_dict", + ":dict_group", + ":marisa_dict", + ":max_match_segmentation", + ":text_dict", + "@rapidjson", + ], +) + +cc_test( + name = "config_test", + srcs = ["ConfigTest.cpp"], + deps = [ + ":common", + ":config", + ":config_test_base", + ":converter", + ":test_utils_utf8", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "config_test_base", + testonly = True, + hdrs = ["ConfigTestBase.hpp"], + data = ["//test/config_test"], + defines = ["BAZEL"], + deps = [ + ":test_utils", + "@bazel_tools//tools/cpp/runfiles", + ], +) + +cc_library( + name = "conversion", + srcs = ["Conversion.cpp"], + hdrs = ["Conversion.hpp"], + deps = [ + ":common", + ":dict", + ":segmentation", + ], +) + +cc_test( + name = "conversion_test", + srcs = ["ConversionTest.cpp"], + deps = [ + ":conversion", + ":dict_group_test_base", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "conversion_chain", + srcs = ["ConversionChain.cpp"], + hdrs = ["ConversionChain.hpp"], + deps = [ + ":common", + ":conversion", + ":segments", + ], +) + +cc_test( + name = "conversion_chain_test", + srcs = ["ConversionChainTest.cpp"], + deps = [ + ":conversion_chain", + ":dict_group_test_base", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "converter", + srcs = ["Converter.cpp"], + hdrs = ["Converter.hpp"], + deps = [ + ":common", + ":conversion_chain", + ":segmentation", + ":segments", + ], +) + cc_library( name = "darts_dict", srcs = ["DartsDict.cpp"], @@ -72,6 +200,18 @@ cc_library( ], ) +cc_library( + name = "dict_converter", + srcs = ["DictConverter.cpp"], + hdrs = ["DictConverter.hpp"], + deps = [ + ":common", + ":darts_dict", + ":marisa_dict", + ":text_dict", + ], +) + cc_library( name = "dict_entry", srcs = ["DictEntry.cpp"], @@ -83,6 +223,37 @@ cc_library( ], ) +cc_library( + name = "dict_group", + srcs = ["DictGroup.cpp"], + hdrs = ["DictGroup.hpp"], + deps = [ + ":common", + ":dict", + ":lexicon", + ":text_dict", + ], +) + +cc_test( + name = "dict_group_test", + srcs = ["DictGroupTest.cpp"], + deps = [ + ":dict_group_test_base", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "dict_group_test_base", + testonly = True, + hdrs = ["DictGroupTestBase.hpp"], + deps = [ + ":dict_group", + ":text_dict_test_base", + ], +) + cc_library( name = "exception", hdrs = [ @@ -124,10 +295,48 @@ cc_test( ) cc_library( - name = "segments", - hdrs = [ - "Segments.hpp", + name = "max_match_segmentation", + srcs = ["MaxMatchSegmentation.cpp"], + hdrs = ["MaxMatchSegmentation.hpp"], + deps = [ + ":common", + ":dict_group", + ":segmentation", + ], +) + +cc_test( + name = "max_match_segmentation_test", + srcs = ["MaxMatchSegmentationTest.cpp"], + deps = [ + ":dict_group_test_base", + ":max_match_segmentation", + "@googletest//:gtest_main", ], +) + +cc_library( + name = "phrase_extract", + srcs = ["PhraseExtract.cpp"], + hdrs = ["PhraseExtract.hpp"], + visibility = ["//src/tools:__pkg__"], + deps = [ + ":common", + ":marisa_dict", + ":utf8_string_slice", + ], +) + +cc_library( + name = "segmentation", + srcs = ["Segmentation.cpp"], + hdrs = ["Segmentation.hpp"], + deps = [":common"], +) + +cc_library( + name = "segments", + hdrs = ["Segments.hpp"], deps = [":common"], ) @@ -160,6 +369,34 @@ cc_test( ], ) +cc_library( + name = "simple_converter", + srcs = ["SimpleConverter.cpp"], + hdrs = [ + "SimpleConverter.hpp", + "opencc.h", + ], + defines = ["BAZEL"], + deps = [ + ":common", + ":config", + ":converter", + ":utf8_util", + "@bazel_tools//tools/cpp/runfiles", + ], +) + +cc_test( + name = "simple_converter_test", + srcs = ["SimpleConverterTest.cpp"], + deps = [ + ":config_test_base", + ":simple_converter", + ":test_utils_utf8", + "@googletest//:gtest_main", + ], +) + cc_library( name = "test_utils", testonly = True, @@ -208,6 +445,16 @@ cc_library( ], ) +cc_library( + name = "utf8_string_slice", + srcs = ["UTF8StringSlice.cpp"], + hdrs = ["UTF8StringSlice.hpp"], + deps = [ + ":common", + ":utf8_util", + ], +) + cc_library( name = "utf8_util", srcs = ["UTF8Util.cpp"], diff --git a/src/Config.cpp b/src/Config.cpp index a3a36b646..aaee40992 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -43,11 +43,7 @@ namespace { class ConfigInternal { public: - std::string configDirectory; - std::unordered_map< - std::string, - std::unordered_map>> - dictCache; + std::vector paths; const JSONValue& GetProperty(const JSONValue& doc, const char* name) { if (!doc.HasMember(name)) { @@ -88,16 +84,11 @@ class ConfigInternal { if (SerializableDict::TryLoadFromFile(fileName, &dict)) { return dict; } - // Configuration directory - if ((configDirectory != "") && SerializableDict::TryLoadFromFile( - configDirectory + fileName, &dict)) { - return dict; - } - // Package data directory - if ((PACKAGE_DATA_DIRECTORY != "") && - SerializableDict::TryLoadFromFile( - PACKAGE_DATA_DIRECTORY + fileName, &dict)) { - return dict; + for (const std::string& dirPath : paths) { + std::string path = dirPath + '/' + fileName; + if (SerializableDict::TryLoadFromFile(path, &dict)) { + return dict; + } } throw FileNotFound(fileName); } @@ -138,15 +129,7 @@ class ConfigInternal { return DictGroupPtr(new DictGroup(dicts)); } else { std::string fileName = GetStringProperty(doc, "file"); - // Read from cache - DictPtr& cache = dictCache[type][configDirectory][fileName]; - if (cache != nullptr) { - return cache; - } DictPtr dict = LoadDictFromFile(type, fileName); - - // Update Cache - cache = dict; return dict; } } @@ -188,7 +171,8 @@ class ConfigInternal { return chain; } - std::string FindConfigFile(std::string fileName) { + std::string FindConfigFile(std::string fileName, + const std::vector& paths) { std::ifstream ifs; // Working directory @@ -209,6 +193,15 @@ class ConfigInternal { return prefixedFileName; } } + + for (const std::string& dirPath : paths) { + std::string path = dirPath + '/' + fileName; + ifs.open(UTF8Util::GetPlatformString(path).c_str()); + if (ifs.is_open()) { + return path; + } + } + throw FileNotFound(fileName); } }; @@ -218,9 +211,10 @@ Config::Config() : internal(new ConfigInternal()) {} Config::~Config() { delete (ConfigInternal*)internal; } -ConverterPtr Config::NewFromFile(const std::string& fileName) { +ConverterPtr Config::NewFromFile(const std::string& fileName, + const std::vector& paths) { ConfigInternal* impl = (ConfigInternal*)internal; - std::string prefixedFileName = impl->FindConfigFile(fileName); + std::string prefixedFileName = impl->FindConfigFile(fileName, paths); std::ifstream ifs(UTF8Util::GetPlatformString(prefixedFileName)); std::string content(std::istreambuf_iterator(ifs), (std::istreambuf_iterator())); @@ -233,11 +227,27 @@ ConverterPtr Config::NewFromFile(const std::string& fileName) { if (slashPos != std::string::npos) { configDirectory = prefixedFileName.substr(0, slashPos) + "/"; } - return NewFromString(content, configDirectory); + std::vector dictPaths = paths; + if (!configDirectory.empty()) { + dictPaths.push_back(configDirectory); + } + return NewFromString(content, dictPaths); } ConverterPtr Config::NewFromString(const std::string& json, const std::string& configDirectory) { + std::vector paths; + if (!configDirectory.empty()) { + if (configDirectory.back() == '/' || configDirectory.back() == '\\') + paths.push_back(configDirectory); + else + paths.push_back(configDirectory + '/'); + } + return NewFromString(json, paths); +} + +ConverterPtr Config::NewFromString(const std::string& json, + const std::vector& paths) { rapidjson::Document doc; doc.ParseInsitu<0>(const_cast(json.c_str())); @@ -255,13 +265,9 @@ ConverterPtr Config::NewFromString(const std::string& json, } ConfigInternal* impl = (ConfigInternal*)internal; - if (!configDirectory.empty()) { - if (configDirectory.back() == '/' || configDirectory.back() == '\\') - impl->configDirectory = configDirectory; - else - impl->configDirectory = configDirectory + '/'; - } else { - impl->configDirectory.clear(); + impl->paths = paths; + if (PACKAGE_DATA_DIRECTORY != "") { + impl->paths.push_back(PACKAGE_DATA_DIRECTORY); } // Required: segmentation diff --git a/src/Config.hpp b/src/Config.hpp index 7a904cee6..2b66d1736 100644 --- a/src/Config.hpp +++ b/src/Config.hpp @@ -34,7 +34,11 @@ class OPENCC_EXPORT Config { ConverterPtr NewFromString(const std::string& json, const std::string& configDirectory); - ConverterPtr NewFromFile(const std::string& fileName); + ConverterPtr NewFromString(const std::string& json, + const std::vector& paths); + + ConverterPtr NewFromFile(const std::string& fileName, + const std::vector& paths = {}); private: void* internal; diff --git a/src/ConfigTest.cpp b/src/ConfigTest.cpp index 369c9dd36..50bf857b4 100644 --- a/src/ConfigTest.cpp +++ b/src/ConfigTest.cpp @@ -32,7 +32,9 @@ class ConfigTest : public ConfigTestBase { : input(utf8("燕燕于飞差池其羽之子于归远送于野")), expected(utf8("燕燕于飛差池其羽之子于歸遠送於野")) {} - virtual void SetUp() { converter = config.NewFromFile(CONFIG_TEST_PATH); } + virtual void SetUp() { + converter = config.NewFromFile(CONFIG_TEST_JSON_PATH); + } Config config; ConverterPtr converter; @@ -62,13 +64,11 @@ TEST_F(ConfigTest, NonexistingPath) { } TEST_F(ConfigTest, NewFromStringWitoutTrailingSlash) { - std::ifstream ifs(CONFIG_TEST_PATH); + std::ifstream ifs(CONFIG_TEST_JSON_PATH); std::string content(std::istreambuf_iterator(ifs), (std::istreambuf_iterator())); - std::string pathWithoutTrailingSlash = CMAKE_SOURCE_DIR "/test/config_test"; - const ConverterPtr _ = - config.NewFromString(content, pathWithoutTrailingSlash); + const ConverterPtr _ = config.NewFromString(content, CONFIG_TEST_DIR_PATH); } } // namespace opencc diff --git a/src/ConfigTestBase.hpp b/src/ConfigTestBase.hpp index fc8bb74d2..b3aa5da59 100644 --- a/src/ConfigTestBase.hpp +++ b/src/ConfigTestBase.hpp @@ -18,17 +18,41 @@ #pragma once +#ifdef BAZEL +#include "tools/cpp/runfiles/runfiles.h" +#endif + #include "TestUtils.hpp" namespace opencc { +#ifdef CMAKE_SOURCE_DIR +class ConfigTestBase : public ::testing::Test { +protected: + ConfigTestBase() + : CONFIG_TEST_JSON_PATH(CMAKE_SOURCE_DIR + "/test/config_test/config_test.json"), + CONFIG_TEST_DIR_PATH(CMAKE_SOURCE_DIR "/test/config_test") {} + + const std::string CONFIG_TEST_JSON_PATH; + const std::string CONFIG_TEST_DIR_PATH; +}; +#endif + +#ifdef BAZEL +using bazel::tools::cpp::runfiles::Runfiles; class ConfigTestBase : public ::testing::Test { protected: ConfigTestBase() - : CONFIG_TEST_PATH(CMAKE_SOURCE_DIR - "/test/config_test/config_test.json") {} + : runfiles_(Runfiles::CreateForTest()), + CONFIG_TEST_JSON_PATH( + runfiles_->Rlocation("_main/test/config_test/config_test.json")), + CONFIG_TEST_DIR_PATH(runfiles_->Rlocation("_main/test/config_test")) {} - const std::string CONFIG_TEST_PATH; + const std::unique_ptr runfiles_; + const std::string CONFIG_TEST_JSON_PATH; + const std::string CONFIG_TEST_DIR_PATH; }; +#endif } // namespace opencc diff --git a/src/Lexicon.cpp b/src/Lexicon.cpp index f5e514014..cfb215c43 100644 --- a/src/Lexicon.cpp +++ b/src/Lexicon.cpp @@ -19,8 +19,42 @@ #include #include "Lexicon.hpp" + namespace opencc { +namespace { + +DictEntry* ParseKeyValues(const char* buff, size_t lineNum) { + size_t length; + if (buff == nullptr || UTF8Util::IsLineEndingOrFileEnding(*buff)) { + return nullptr; + } + const char* pbuff = UTF8Util::FindNextInline(buff, '\t'); + if (UTF8Util::IsLineEndingOrFileEnding(*pbuff)) { + throw InvalidTextDictionary("Tabular not found " + std::string(buff), + lineNum); + } + length = static_cast(pbuff - buff); + std::string key = UTF8Util::FromSubstr(buff, length); + std::vector values; + while (!UTF8Util::IsLineEndingOrFileEnding(*pbuff)) { + buff = pbuff = UTF8Util::NextChar(pbuff); + pbuff = UTF8Util::FindNextInline(buff, ' '); + length = static_cast(pbuff - buff); + const std::string& value = UTF8Util::FromSubstr(buff, length); + values.push_back(value); + } + if (values.size() == 0) { + throw InvalidTextDictionary("No value in an item", lineNum); + } else if (values.size() == 1) { + return DictEntryFactory::New(key, values.at(0)); + } else { + return DictEntryFactory::New(key, values); + } +} + +} // namespace + void Lexicon::Sort() { std::sort(entries.begin(), entries.end(), DictEntry::UPtrLessThan); } @@ -42,4 +76,20 @@ bool Lexicon::IsUnique(std::string* dupkey) { return true; } +LexiconPtr Lexicon::ParseLexiconFromFile(FILE* fp) { + const int ENTRY_BUFF_SIZE = 4096; + char buff[ENTRY_BUFF_SIZE]; + LexiconPtr lexicon(new Lexicon); + UTF8Util::SkipUtf8Bom(fp); + size_t lineNum = 1; + while (fgets(buff, ENTRY_BUFF_SIZE, fp)) { + DictEntry* entry = ParseKeyValues(buff, lineNum); + if (entry != nullptr) { + lexicon->Add(entry); + } + lineNum++; + } + return lexicon; +} + } // namespace opencc diff --git a/src/Lexicon.hpp b/src/Lexicon.hpp index 688912875..61dcc59ed 100644 --- a/src/Lexicon.hpp +++ b/src/Lexicon.hpp @@ -62,6 +62,8 @@ class OPENCC_EXPORT Lexicon { return entries.end(); } + static LexiconPtr ParseLexiconFromFile(FILE* fp); + private: std::vector> entries; }; diff --git a/src/SimpleConverter.cpp b/src/SimpleConverter.cpp index 417f7a506..24fe900bb 100644 --- a/src/SimpleConverter.cpp +++ b/src/SimpleConverter.cpp @@ -27,22 +27,50 @@ #include "UTF8Util.hpp" #include "opencc.h" +#ifdef BAZEL +#include "tools/cpp/runfiles/runfiles.h" +using bazel::tools::cpp::runfiles::Runfiles; +#endif + using namespace opencc; +namespace { + struct InternalData { const ConverterPtr converter; InternalData(const ConverterPtr& _converter) : converter(_converter) {} -}; -SimpleConverter::SimpleConverter(const std::string& configFileName) { - try { - Config config; - internalData = new InternalData(config.NewFromFile(configFileName)); - } catch (Exception& ex) { - throw std::runtime_error(ex.what()); + static InternalData* NewInternalData(const std::string& configFileName, + const std::vector& paths) { + try { + Config config; +#ifdef BAZEL + std::unique_ptr bazel_runfiles(Runfiles::Create("")); + std::vector paths_with_runfiles = paths; + paths_with_runfiles.push_back( + bazel_runfiles->Rlocation("_main/data/config")); + paths_with_runfiles.push_back( + bazel_runfiles->Rlocation("_main/data/dictionary")); + return new InternalData( + config.NewFromFile(configFileName, paths_with_runfiles)); +#else + return new InternalData(config.NewFromFile(configFileName, paths)); +#endif + } catch (Exception& ex) { + throw std::runtime_error(ex.what()); + } } -} +}; + +} // namespace + +SimpleConverter::SimpleConverter(const std::string& configFileName) + : SimpleConverter(configFileName, std::vector()) {} + +SimpleConverter::SimpleConverter(const std::string& configFileName, + const std::vector& paths) + : internalData(InternalData::NewInternalData(configFileName, paths)) {} SimpleConverter::~SimpleConverter() { delete (InternalData*)internalData; } diff --git a/src/SimpleConverter.hpp b/src/SimpleConverter.hpp index 56932b76c..57e88cd60 100644 --- a/src/SimpleConverter.hpp +++ b/src/SimpleConverter.hpp @@ -18,6 +18,7 @@ #include "Export.hpp" #include +#include #ifndef __OPENCC_SIMPLECONVERTER_HPP_ #define __OPENCC_SIMPLECONVERTER_HPP_ @@ -29,6 +30,7 @@ */ namespace opencc { + /** * A high level converter * This interface does not require C++11 to compile. @@ -40,7 +42,15 @@ class OPENCC_EXPORT SimpleConverter { * Constructor of SimpleConverter * @param configFileName File name of configuration. */ - SimpleConverter(const std::string& configFileName); + explicit SimpleConverter(const std::string& configFileName); + + /** + * Constructor of SimpleConverter + * @param configFileName File name of configuration. + * @param paths Additional paths to locate configuration and dictionary files. + */ + SimpleConverter(const std::string& configFileName, + const std::vector& paths); ~SimpleConverter(); diff --git a/src/SimpleConverterTest.cpp b/src/SimpleConverterTest.cpp index 9c645b784..199ba5e7a 100644 --- a/src/SimpleConverterTest.cpp +++ b/src/SimpleConverterTest.cpp @@ -37,15 +37,15 @@ class SimpleConverterTest : public ConfigTestBase { } }; -TEST_F(SimpleConverterTest, Convert) { TestConverter(CONFIG_TEST_PATH); } +TEST_F(SimpleConverterTest, Convert) { TestConverter(CONFIG_TEST_JSON_PATH); } TEST_F(SimpleConverterTest, Multithreading) { const auto& routine = [this](const std::string& config) { TestConverter(config); }; - std::thread thread1(routine, CONFIG_TEST_PATH); - std::thread thread2(routine, CONFIG_TEST_PATH); - routine(CONFIG_TEST_PATH); + std::thread thread1(routine, CONFIG_TEST_JSON_PATH); + std::thread thread2(routine, CONFIG_TEST_JSON_PATH); + routine(CONFIG_TEST_JSON_PATH); thread1.join(); thread2.join(); } @@ -54,7 +54,7 @@ TEST_F(SimpleConverterTest, CInterface) { const std::string& text = utf8("燕燕于飞差池其羽之子于归远送于野"); const std::string& expected = utf8("燕燕于飛差池其羽之子于歸遠送於野"); { - opencc_t od = opencc_open(CONFIG_TEST_PATH.c_str()); + opencc_t od = opencc_open(CONFIG_TEST_JSON_PATH.c_str()); char* converted = opencc_convert_utf8(od, text.c_str(), (size_t)-1); EXPECT_EQ(expected, converted); opencc_convert_utf8_free(converted); @@ -62,7 +62,7 @@ TEST_F(SimpleConverterTest, CInterface) { } { char output[1024]; - opencc_t od = opencc_open(CONFIG_TEST_PATH.c_str()); + opencc_t od = opencc_open(CONFIG_TEST_JSON_PATH.c_str()); size_t length = opencc_convert_utf8_to_buffer(od, text.c_str(), (size_t)-1, output); EXPECT_EQ(expected.length(), length); diff --git a/src/TextDict.cpp b/src/TextDict.cpp index c81b31199..34d024e71 100644 --- a/src/TextDict.cpp +++ b/src/TextDict.cpp @@ -33,51 +33,6 @@ static size_t GetKeyMaxLength(const LexiconPtr& lexicon) { return maxLength; } -static DictEntry* ParseKeyValues(const char* buff, size_t lineNum) { - size_t length; - if (buff == nullptr || UTF8Util::IsLineEndingOrFileEnding(*buff)) { - return nullptr; - } - const char* pbuff = UTF8Util::FindNextInline(buff, '\t'); - if (UTF8Util::IsLineEndingOrFileEnding(*pbuff)) { - throw InvalidTextDictionary("Tabular not found " + std::string(buff), - lineNum); - } - length = static_cast(pbuff - buff); - std::string key = UTF8Util::FromSubstr(buff, length); - std::vector values; - while (!UTF8Util::IsLineEndingOrFileEnding(*pbuff)) { - buff = pbuff = UTF8Util::NextChar(pbuff); - pbuff = UTF8Util::FindNextInline(buff, ' '); - length = static_cast(pbuff - buff); - const std::string& value = UTF8Util::FromSubstr(buff, length); - values.push_back(value); - } - if (values.size() == 0) { - throw InvalidTextDictionary("No value in an item", lineNum); - } else if (values.size() == 1) { - return DictEntryFactory::New(key, values.at(0)); - } else { - return DictEntryFactory::New(key, values); - } -} - -static LexiconPtr ParseLexiconFromFile(FILE* fp) { - const int ENTRY_BUFF_SIZE = 4096; - char buff[ENTRY_BUFF_SIZE]; - LexiconPtr lexicon(new Lexicon); - UTF8Util::SkipUtf8Bom(fp); - size_t lineNum = 1; - while (fgets(buff, ENTRY_BUFF_SIZE, fp)) { - DictEntry* entry = ParseKeyValues(buff, lineNum); - if (entry != nullptr) { - lexicon->Add(entry); - } - lineNum++; - } - return lexicon; -} - TextDict::TextDict(const LexiconPtr& _lexicon) : maxLength(GetKeyMaxLength(_lexicon)), lexicon(_lexicon) { assert(lexicon->IsSorted()); @@ -87,12 +42,12 @@ TextDict::TextDict(const LexiconPtr& _lexicon) TextDict::~TextDict() {} TextDictPtr TextDict::NewFromSortedFile(FILE* fp) { - const LexiconPtr& lexicon = ParseLexiconFromFile(fp); + const LexiconPtr& lexicon = Lexicon::ParseLexiconFromFile(fp); return TextDictPtr(new TextDict(lexicon)); } TextDictPtr TextDict::NewFromFile(FILE* fp) { - const LexiconPtr& lexicon = ParseLexiconFromFile(fp); + const LexiconPtr& lexicon = Lexicon::ParseLexiconFromFile(fp); lexicon->Sort(); std::string dupkey; if (!lexicon->IsUnique(&dupkey)) { diff --git a/src/tools/BUILD.bazel b/src/tools/BUILD.bazel new file mode 100644 index 000000000..ac4e69155 --- /dev/null +++ b/src/tools/BUILD.bazel @@ -0,0 +1,30 @@ +package(default_visibility = ["//visibility:public"]) + +cc_binary( + name = "command_line", + srcs = ["CommandLine.cpp"], + deps = [ + "//src:cmd_line_output", + "//src:config", + "//src:converter", + "//src:utf8_util", + ], +) + +cc_binary( + name = "dict_converter", + srcs = ["DictConverter.cpp"], + deps = [ + "//src:cmd_line_output", + "//src:dict_converter", + ], +) + +cc_binary( + name = "phrase_extract", + srcs = ["PhraseExtract.cpp"], + deps = [ + "//src:cmd_line_output", + "//src:phrase_extract", + ], +) diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt index 373b9cc8e..bf619ea05 100644 --- a/src/tools/CMakeLists.txt +++ b/src/tools/CMakeLists.txt @@ -1,5 +1,7 @@ # Executables +include_directories("${PROJECT_SOURCE_DIR}") + ## opencc add_executable(opencc CommandLine.cpp) target_link_libraries(opencc libopencc) diff --git a/src/tools/CommandLine.cpp b/src/tools/CommandLine.cpp index b12ab5a3d..e029d3d36 100644 --- a/src/tools/CommandLine.cpp +++ b/src/tools/CommandLine.cpp @@ -18,10 +18,10 @@ #include -#include "CmdLineOutput.hpp" -#include "Config.hpp" -#include "Converter.hpp" -#include "UTF8Util.hpp" +#include "src/CmdLineOutput.hpp" +#include "src/Config.hpp" +#include "src/Converter.hpp" +#include "src/UTF8Util.hpp" using namespace opencc; @@ -184,6 +184,9 @@ int main(int argc, const char* argv[]) { TCLAP::ValueArg noFlushArg( "", "noflush", "Disable flush for every line", false /* required */, false /* default */, "bool" /* type */, cmd); + TCLAP::MultiArg pathArg( + "", "path", "Additional paths to locate config and dictionary files.", + false /* required */, "file" /* type */, cmd); cmd.parse(argc, argv); configFileName = configArg.getValue(); noFlush = noFlushArg.getValue(); @@ -194,7 +197,7 @@ int main(int argc, const char* argv[]) { outputFileName = Optional(outputArg.getValue()); noFlush = true; } - converter = config.NewFromFile(configFileName); + converter = config.NewFromFile(configFileName, pathArg.getValue()); bool lineByLine = inputFileName.IsNull(); if (lineByLine) { ConvertLineByLine(); diff --git a/src/tools/DictConverter.cpp b/src/tools/DictConverter.cpp index bb5ea8f3e..8389edebb 100644 --- a/src/tools/DictConverter.cpp +++ b/src/tools/DictConverter.cpp @@ -16,9 +16,9 @@ * limitations under the License. */ -#include "DictConverter.hpp" -#include "CmdLineOutput.hpp" -#include "Exception.hpp" +#include "src/DictConverter.hpp" +#include "src/CmdLineOutput.hpp" +#include "src/Exception.hpp" using namespace opencc; diff --git a/src/tools/PhraseExtract.cpp b/src/tools/PhraseExtract.cpp index eddb18d9f..08eb7d740 100644 --- a/src/tools/PhraseExtract.cpp +++ b/src/tools/PhraseExtract.cpp @@ -18,8 +18,8 @@ #include -#include "CmdLineOutput.hpp" -#include "PhraseExtract.hpp" +#include "src/CmdLineOutput.hpp" +#include "src/PhraseExtract.hpp" using opencc::Exception; using opencc::PhraseExtract; diff --git a/test/BUILD.bazel b/test/BUILD.bazel new file mode 100644 index 000000000..71a118a7a --- /dev/null +++ b/test/BUILD.bazel @@ -0,0 +1,26 @@ +cc_test( + name = "bazel_opencc_test", + srcs = ["BazelOpenccTest.cpp"], + deps = [ + "//:opencc", + "@googletest//:gtest_main", + ], +) + +cc_test( + name = "command_line_converter_test", + srcs = ["CommandLineConvertTest.cpp"], + data = [ + "//data/config", + "//data/dictionary:binary_dictionaries", + "//data/dictionary:text_dictionaries", + "//src/tools:command_line", + "//test/testcases", + ], + defines = ["BAZEL"], + deps = [ + "//src:common", + "@bazel_tools//tools/cpp/runfiles", + "@googletest//:gtest_main", + ], +) diff --git a/test/BazelOpenccTest.cpp b/test/BazelOpenccTest.cpp new file mode 100644 index 000000000..70d13e577 --- /dev/null +++ b/test/BazelOpenccTest.cpp @@ -0,0 +1,54 @@ +/* + * Open Chinese Convert + * + * Copyright 2024-2024 Carbo Kuo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "opencc.h" +#include "gtest/gtest.h" + +namespace opencc { + +class BazelOpenccTest : public ::testing::Test {}; + +TEST_F(BazelOpenccTest, SimpleConverter_s2t) { + SimpleConverter converter(OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD); + EXPECT_EQ(converter.Convert("简化字测试"), "簡化字測試"); +} + +TEST_F(BazelOpenccTest, SimpleConverter_t2s) { + SimpleConverter converter(OPENCC_DEFAULT_CONFIG_TRAD_TO_SIMP); + EXPECT_EQ(converter.Convert("簡化字測試"), "简化字测试"); +} + +TEST_F(BazelOpenccTest, CInterface_s2t) { + std::string text = "简化字测试"; + opencc_t od = opencc_open(OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD); + char* converted = opencc_convert_utf8(od, text.c_str(), (size_t)-1); + EXPECT_STREQ("簡化字測試", converted); + opencc_convert_utf8_free(converted); + EXPECT_EQ(0, opencc_close(od)); +} + +TEST_F(BazelOpenccTest, CInterface_t2s) { + std::string text = "簡化字測試"; + opencc_t od = opencc_open(OPENCC_DEFAULT_CONFIG_TRAD_TO_SIMP); + char* converted = opencc_convert_utf8(od, text.c_str(), (size_t)-1); + EXPECT_STREQ("简化字测试", converted); + opencc_convert_utf8_free(converted); + EXPECT_EQ(0, opencc_close(od)); +} + +} // namespace opencc diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e6dc3a765..91027a048 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,3 +1,4 @@ +include_directories("${PROJECT_SOURCE_DIR}") include_directories("${PROJECT_BINARY_DIR}/src") include_directories("${PROJECT_SOURCE_DIR}/src") diff --git a/test/CommandLineConvertTest.cpp b/test/CommandLineConvertTest.cpp index 6bfd20a8a..7be8a398a 100644 --- a/test/CommandLineConvertTest.cpp +++ b/test/CommandLineConvertTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 Carbo Kuo + * Copyright 2015-2024 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,9 +19,14 @@ #include #include -#include "Common.hpp" +#include "src/Common.hpp" #include "gtest/gtest.h" +#ifdef BAZEL +#include "tools/cpp/runfiles/runfiles.h" +using bazel::tools::cpp::runfiles::Runfiles; +#endif + namespace opencc { class CommandLineConvertTest : public ::testing::Test { @@ -31,16 +36,20 @@ class CommandLineConvertTest : public ::testing::Test { virtual ~CommandLineConvertTest() { free(originalWorkingDirectory); } virtual void SetUp() { +#ifdef BAZEL + runfiles_.reset(Runfiles::CreateForTest()); +#else ASSERT_NE("", PROJECT_BINARY_DIR); ASSERT_NE("", CMAKE_SOURCE_DIR); ASSERT_EQ(0, chdir(PROJECT_BINARY_DIR "/data")); +#endif } virtual void TearDown() { ASSERT_EQ(0, chdir(originalWorkingDirectory)); } std::string GetFileContents(const std::string& fileName) const { std::ifstream fs(fileName); - EXPECT_TRUE(fs.is_open()); + EXPECT_TRUE(fs.is_open()) << fileName; const std::string content((std::istreambuf_iterator(fs)), (std::istreambuf_iterator())); fs.close(); @@ -51,7 +60,10 @@ class CommandLineConvertTest : public ::testing::Test { originalWorkingDirectory = getcwd(nullptr, 0); } - const char* OpenccCommand() const { + std::string OpenccCommand() const { +#ifdef BAZEL + return runfiles_->Rlocation("_main/src/tools/command_line"); +#else #ifndef _MSC_VER return PROJECT_BINARY_DIR "/src/tools/opencc"; #else @@ -60,42 +72,71 @@ class CommandLineConvertTest : public ::testing::Test { #else return PROJECT_BINARY_DIR "/src/tools/Debug/opencc.exe"; #endif +#endif #endif } - const char* InputDirectory() const { + std::string InputDirectory() const { +#ifdef BAZEL + return runfiles_->Rlocation("_main/test/testcases") + "/"; +#else return CMAKE_SOURCE_DIR "/test/testcases/"; +#endif } - const char* OutputDirectory() const { return PROJECT_BINARY_DIR "/test/"; } + std::string OutputDirectory() const { +#ifdef BAZEL + return ::testing::TempDir() + "/"; +#else + return PROJECT_BINARY_DIR "/test/"; +#endif + } - const char* AnswerDirectory() const { + std::string AnswerDirectory() const { +#ifdef BAZEL + return runfiles_->Rlocation("_main/test/testcases") + "/"; +#else return CMAKE_SOURCE_DIR "/test/testcases/"; +#endif } - const char* ConfigurationDirectory() const { + std::string ConfigurationDirectory() const { +#ifdef BAZEL + return ""; +#else return CMAKE_SOURCE_DIR "/data/config/"; +#endif } std::string InputFile(const char* config) const { - return std::string(InputDirectory()) + config + ".in"; + return InputDirectory() + config + ".in"; } std::string OutputFile(const char* config) const { - return std::string(OutputDirectory()) + config + ".out"; + return OutputDirectory() + config + ".out"; } std::string AnswerFile(const char* config) const { - return std::string(AnswerDirectory()) + config + ".ans"; + return AnswerDirectory() + config + ".ans"; } std::string TestCommand(const char* config, const std::string& inputFile, const std::string& outputFile) const { - return OpenccCommand() + std::string("") + " -i " + inputFile + " -o " + - outputFile + " -c " + ConfigurationDirectory() + config + ".json"; + std::string cmd = OpenccCommand() + " -i " + inputFile + " -o " + + outputFile + " -c " + ConfigurationDirectory() + config + + ".json"; +#ifdef BAZEL + cmd += " --path " + runfiles_->Rlocation("_main/data/dictionary") + "/" + + " --path " + runfiles_->Rlocation("_main/data/config") + "/"; +#endif + return cmd; } char* originalWorkingDirectory; + +#ifdef BAZEL + std::unique_ptr runfiles_; +#endif }; class ConfigurationTest : public CommandLineConvertTest, @@ -128,10 +169,12 @@ TEST_P(ConfigurationTest, InPlaceConvert) { ASSERT_EQ(answer, output); } -INSTANTIATE_TEST_SUITE_P(CommandLine, ConfigurationTest, - ::testing::Values("hk2s", "hk2t", "jp2t", "s2hk", - "s2t", "s2tw", "s2twp", "t2hk", - "t2jp", "t2s", "tw2s", "tw2sp", - "tw2t")); +INSTANTIATE_TEST_SUITE_P( + CommandLine, ConfigurationTest, + ::testing::Values("hk2s", "hk2t", "jp2t", "s2hk", "s2t", "s2tw", "s2twp", + "t2hk", "t2jp", "t2s", "tw2s", "tw2sp", "tw2t"), + [](const testing::TestParamInfo& info) { + return info.param; + }); } // namespace opencc diff --git a/test/config_test/BUILD.bazel b/test/config_test/BUILD.bazel new file mode 100644 index 000000000..b27f1cc42 --- /dev/null +++ b/test/config_test/BUILD.bazel @@ -0,0 +1,9 @@ +filegroup( + name = "config_test", + srcs = [ + "config_test.json", + "config_test_characters.txt", + "config_test_phrases.txt", + ], + visibility = ["//visibility:public"], +) diff --git a/test/testcases/BUILD.bazel b/test/testcases/BUILD.bazel new file mode 100644 index 000000000..4c9a63ab5 --- /dev/null +++ b/test/testcases/BUILD.bazel @@ -0,0 +1,6 @@ +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "testcases", + srcs = glob(["*.in"]) + glob(["*.ans"]), +)