Skip to content

Commit

Permalink
NU IS AL should have no break opportunity (#3548)
Browse files Browse the repository at this point in the history
* Fix ISxAL case.

* Run cargo make bakeddata

* Add comment to link this PR in test file.
  • Loading branch information
makotokato authored Jun 22, 2023
1 parent d22475d commit 004581f
Show file tree
Hide file tree
Showing 9 changed files with 30 additions and 20 deletions.

Large diffs are not rendered by default.

15 changes: 12 additions & 3 deletions components/segmenter/tests/spec_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,8 @@ impl Iterator for TestContentIterator {
}
}

#[test]
fn run_line_break_test() {
let test_iter = TestContentIterator::new("./tests/testdata/LineBreakTest.txt");
fn line_break_test(filename: &str) {
let test_iter = TestContentIterator::new(filename);
let segmenter =
LineSegmenter::try_new_dictionary_unstable(&icu_testdata::unstable()).expect("Data exists");
for mut test in test_iter {
Expand Down Expand Up @@ -140,6 +139,16 @@ fn run_line_break_test() {
}
}

#[test]
fn run_line_break_test() {
line_break_test("./tests/testdata/LineBreakTest.txt");
}

#[test]
fn run_line_break_extra_test() {
line_break_test("./tests/testdata/LineBreakExtraTest.txt");
}

#[test]
fn run_word_break_test() {
let test_iter = TestContentIterator::new("./tests/testdata/WordBreakTest.txt");
Expand Down
7 changes: 7 additions & 0 deletions components/segmenter/tests/testdata/LineBreakExtraTest.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Additional line breaking tests, not in LineBreakTest.txt
#
# https://github.com/unicode-org/icu4x/pull/3548
× 0031 × 003A × 0041 ÷ # × [NU] × [IS] × [AL] ÷
× 0031 × 003A × 05D0 ÷ # × [NU] × [IS] × [HL] ÷
× 0031 × 003A ÷ FFFC ÷ # × [NU] × [IS] ÷ [CB] ÷
× 0031 × 003A ÷ 1F3CA ÷ # × [NU] × [IS] ÷ [EB] ÷
10 changes: 2 additions & 8 deletions provider/datagen/data/segmenter/rules/line.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1139,12 +1139,6 @@ left = [ "LB25_NU_CL" ]
right = [ "Any" ]
break_state = true

[[rules]]
# LB25
left = [ "LB25_NU_IS" ]
right = [ "Any" ]
break_state = true

[[rules]]
# LB25
left = [ "LB25_NU_SY" ]
Expand Down Expand Up @@ -1192,7 +1186,7 @@ break_state = false
[[rules]]
# LB29
# (LB1 AL = AI, AL, SA or XX)
left = [ "IS" ]
left = [ "IS", "LB25_NU_IS" ]
right = [ "AL", "HL", "AI", "SA", "XX" ]
break_state = false

Expand Down Expand Up @@ -1231,7 +1225,7 @@ break_state = false

[[rules]]
# (LB25)
left = [ "LB25_NU_CP" ]
left = [ "LB25_NU_CP", "LB25_NU_IS" ]
right = [ "Any" ]
break_state = true

Expand Down
2 changes: 1 addition & 1 deletion provider/repodata/data/json/fingerprints.csv

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions provider/repodata/data/json/segmenter/line@1/und.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion provider/testdata/data/postcard/fingerprints.csv
Original file line number Diff line number Diff line change
Expand Up @@ -1559,7 +1559,7 @@ relativetime/short/year@1, und, 62B, 45e258268a1e1f9
segmenter/dictionary/w_auto@1, ja, 2007113B, f7552d2848b39b0d
segmenter/dictionary/wl_ext@1, th, 224981B, f2d574736bb1a754
segmenter/grapheme@1, und, 9078B, 521276f1d6a6e7fb
segmenter/line@1, und, 18811B, a5a3ac8178b6b335
segmenter/line@1, und, 18811B, 131b23adbe306490
segmenter/lstm/wl_auto@1, th, 72034B, c46e2e0c098c1fc1
segmenter/sentence@1, und, 14402B, 6adb54fd1dae7b09
segmenter/word@1, und, 14641B, d91c662e2d94f17f
Expand Down
Binary file modified provider/testdata/data/testdata.postcard
Binary file not shown.

0 comments on commit 004581f

Please sign in to comment.