Skip to content

Commit

Permalink
Regenerate data for GB18030-2022 (tests fail)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsivonen committed Oct 24, 2024
1 parent acae064 commit 2f1b14f
Show file tree
Hide file tree
Showing 29 changed files with 139 additions and 104 deletions.
5 changes: 2 additions & 3 deletions generate-encoding-data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import os.path

if (not os.path.isfile("../encoding/encodings.json")) or (not os.path.isfile("../encoding/indexes.json")):
sys.stderr.write("This script needs a clone of https://github.com/whatwg/encoding/ (preferably at revision f381389) next to the encoding_rs directory.\n");
sys.stderr.write("This script needs a clone of https://github.com/whatwg/encoding/ next to the encoding_rs directory.\n");
sys.exit(-1)

if not os.path.isfile("../encoding_c/src/lib.rs"):
Expand Down Expand Up @@ -1612,8 +1612,7 @@ def write_variant_method(name, mut, arg_list, ret, variants, excludes, kind):

# Unit tests

TEST_HEADER = '''Any copyright to the test code below this comment is dedicated to the
Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
TEST_HEADER = '''Generated from WHATWG indexes.json; see LICENSE-WHATWG.
This is a generated file. Please do not edit.
Instead, please regenerate using generate-encoding-data.py
Expand Down
57 changes: 44 additions & 13 deletions src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,37 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// The above license applies to code in this file. The data in this
// file is generated from WHATWG's indexes.json, which came under
// the following license:

// Copyright © WHATWG (Apple, Google, Mozilla, Microsoft).
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// BEGIN GENERATED CODE. PLEASE DO NOT EDIT.
// Instead, please regenerate using generate-encoding-data.py

Expand Down Expand Up @@ -88061,13 +88092,13 @@ pub static GBK_OTHER_UNSORTED_OFFSETS: [u16; 59] = [
pub static GBK_BOTTOM: [u16; 101] = [
0xF92C, 0xF979, 0xF995, 0xF9E7, 0xF9F1, 0xFA0C, 0xFA0D, 0xFA0E, 0xFA0F, 0xFA11, 0xFA13, 0xFA14,
0xFA18, 0xFA1F, 0xFA20, 0xFA21, 0xFA23, 0xFA24, 0xFA27, 0xFA28, 0xFA29, 0x2E81, 0xE816, 0xE817,
0xE818, 0x2E84, 0x3473, 0x3447, 0x2E88, 0x2E8B, 0xE81E, 0x359E, 0x361A, 0x360E, 0x2E8C, 0x2E97,
0x396E, 0x3918, 0xE826, 0x39CF, 0x39DF, 0x3A73, 0x39D0, 0xE82B, 0xE82C, 0x3B4E, 0x3C6E, 0x3CE0,
0x2EA7, 0xE831, 0xE832, 0x2EAA, 0x4056, 0x415F, 0x2EAE, 0x4337, 0x2EB3, 0x2EB6, 0x2EB7, 0xE83B,
0x43B1, 0x43AC, 0x2EBB, 0x43DD, 0x44D6, 0x4661, 0x464C, 0xE843, 0x4723, 0x4729, 0x477C, 0x478D,
0xE818, 0x2E84, 0x3473, 0x3447, 0x2E88, 0x2E8B, 0x9FB4, 0x359E, 0x361A, 0x360E, 0x2E8C, 0x2E97,
0x396E, 0x3918, 0x9FB5, 0x39CF, 0x39DF, 0x3A73, 0x39D0, 0x9FB6, 0x9FB7, 0x3B4E, 0x3C6E, 0x3CE0,
0x2EA7, 0xE831, 0x9FB8, 0x2EAA, 0x4056, 0x415F, 0x2EAE, 0x4337, 0x2EB3, 0x2EB6, 0x2EB7, 0xE83B,
0x43B1, 0x43AC, 0x2EBB, 0x43DD, 0x44D6, 0x4661, 0x464C, 0x9FB9, 0x4723, 0x4729, 0x477C, 0x478D,
0x2ECA, 0x4947, 0x497A, 0x497D, 0x4982, 0x4983, 0x4985, 0x4986, 0x499F, 0x499B, 0x49B7, 0x49B6,
0xE854, 0xE855, 0x4CA3, 0x4C9F, 0x4CA0, 0x4CA1, 0x4C77, 0x4CA2, 0x4D13, 0x4D14, 0x4D15, 0x4D16,
0x4D17, 0x4D18, 0x4D19, 0x4DAE, 0xE864,
0x9FBA, 0xE855, 0x4CA3, 0x4C9F, 0x4CA0, 0x4CA1, 0x4C77, 0x4CA2, 0x4D13, 0x4D14, 0x4D15, 0x4D16,
0x4D17, 0x4D18, 0x4D19, 0x4DAE, 0x9FBB,
];

pub static GB2312_HANZI: [u16; 6768] = [
Expand Down Expand Up @@ -88650,7 +88681,7 @@ pub static GB2312_SYMBOLS: [u16; 94] = [

pub static GB2312_SYMBOLS_AFTER_GREEK: [u16; 22] = [
0xFE35, 0xFE36, 0xFE39, 0xFE3A, 0xFE3F, 0xFE40, 0xFE3D, 0xFE3E, 0xFE41, 0xFE42, 0xFE43, 0xFE44,
0xE794, 0xE795, 0xFE3B, 0xFE3C, 0xFE37, 0xFE38, 0xFE31, 0xE796, 0xFE33, 0xFE34,
0xFE17, 0xFE18, 0xFE3B, 0xFE3C, 0xFE37, 0xFE38, 0xFE31, 0xFE19, 0xFE33, 0xFE34,
];

pub static GB2312_PINYIN: [u16; 32] = [
Expand All @@ -88659,18 +88690,18 @@ pub static GB2312_PINYIN: [u16; 32] = [
0x00FC, 0x00EA, 0x0251, 0x1E3F, 0x0144, 0x0148, 0x01F9, 0x0261,
];

pub static GB2312_OTHER_POINTERS: [u16; 44] = [
pub static GB2312_OTHER_POINTERS: [u16; 47] = [
0x0000, 0x000A, 0x0010, 0x0024, 0x0038, 0x0042, 0x0043, 0x0044, 0x004E, 0x0050, 0x005C, 0x005E,
0x0061, 0x0062, 0x00BB, 0x00BC, 0x010F, 0x011A, 0x0170, 0x0178, 0x0189, 0x0190, 0x0198, 0x01A9,
0x01B0, 0x01B7, 0x01CD, 0x01D6, 0x01DC, 0x01DD, 0x01F7, 0x0206, 0x020C, 0x020D, 0x0227, 0x0234,
0x0254, 0x0258, 0x027D, 0x0292, 0x0295, 0x02E1, 0x02F0, 0x0524,
0x01B0, 0x01B1, 0x01B2, 0x01B3, 0x01B7, 0x01CD, 0x01D6, 0x01DC, 0x01DD, 0x01F7, 0x0206, 0x020C,
0x020D, 0x0227, 0x0234, 0x0254, 0x0258, 0x027D, 0x0292, 0x0295, 0x02E1, 0x02F0, 0x0524,
];

pub static GB2312_OTHER_UNSORTED_OFFSETS: [u16; 43] = [
pub static GB2312_OTHER_UNSORTED_OFFSETS: [u16; 46] = [
0x2170, 0xE766, 0x2488, 0x2474, 0x2460, 0x20AC, 0xE76D, 0x3220, 0xE76E, 0x2160, 0xE770, 0xFF01,
0xFFE5, 0xFF05, 0xFFE3, 0x3041, 0xE772, 0x30A1, 0xE77D, 0x0391, 0x03A3, 0xE785, 0x03B1, 0x03C3,
0xE78D, 0x0000, 0xE797, 0x0410, 0x0401, 0x0416, 0xE7A0, 0x0430, 0x0451, 0x0436, 0xE7AF, 0x0000,
0xE7C9, 0x3105, 0xE7CD, 0xE7FE, 0x2500, 0xE801, 0xE000,
0xFE10, 0xFE12, 0xFE11, 0xFE13, 0x0000, 0xE797, 0x0410, 0x0401, 0x0416, 0xE7A0, 0x0430, 0x0451,
0x0436, 0xE7AF, 0x0000, 0xE7C9, 0x3105, 0xE7CD, 0xE7FE, 0x2500, 0xE801, 0xE000,
];

pub static GB18030_RANGE_POINTERS: [u16; 206] = [
Expand Down
31 changes: 31 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,37 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// The above license applies to code in this file. The label data in
// this file is generated from WHATWG's encodings.json, which came under
// the following license:

// Copyright © WHATWG (Apple, Google, Mozilla, Microsoft).
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#![cfg_attr(
feature = "cargo-clippy",
allow(doc_markdown, inline_always, new_ret_no_self)
Expand Down
3 changes: 1 addition & 2 deletions src/test_data/big5_in.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Any copyright to the test code below this comment is dedicated to the
Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
Generated from WHATWG indexes.json; see LICENSE-WHATWG.

This is a generated file. Please do not edit.
Instead, please regenerate using generate-encoding-data.py
Expand Down
3 changes: 1 addition & 2 deletions src/test_data/big5_in_ref.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Any copyright to the test code below this comment is dedicated to the
Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
Generated from WHATWG indexes.json; see LICENSE-WHATWG.

This is a generated file. Please do not edit.
Instead, please regenerate using generate-encoding-data.py
Expand Down
3 changes: 1 addition & 2 deletions src/test_data/big5_out.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Any copyright to the test code below this comment is dedicated to the
Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
Generated from WHATWG indexes.json; see LICENSE-WHATWG.

This is a generated file. Please do not edit.
Instead, please regenerate using generate-encoding-data.py
Expand Down
3 changes: 1 addition & 2 deletions src/test_data/big5_out_ref.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Any copyright to the test code below this comment is dedicated to the
Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
Generated from WHATWG indexes.json; see LICENSE-WHATWG.

This is a generated file. Please do not edit.
Instead, please regenerate using generate-encoding-data.py
Expand Down
3 changes: 1 addition & 2 deletions src/test_data/euc_kr_in.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Any copyright to the test code below this comment is dedicated to the
Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
Generated from WHATWG indexes.json; see LICENSE-WHATWG.

This is a generated file. Please do not edit.
Instead, please regenerate using generate-encoding-data.py
Expand Down
3 changes: 1 addition & 2 deletions src/test_data/euc_kr_in_ref.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Any copyright to the test code below this comment is dedicated to the
Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
Generated from WHATWG indexes.json; see LICENSE-WHATWG.

This is a generated file. Please do not edit.
Instead, please regenerate using generate-encoding-data.py
Expand Down
3 changes: 1 addition & 2 deletions src/test_data/euc_kr_out.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Any copyright to the test code below this comment is dedicated to the
Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
Generated from WHATWG indexes.json; see LICENSE-WHATWG.

This is a generated file. Please do not edit.
Instead, please regenerate using generate-encoding-data.py
Expand Down
3 changes: 1 addition & 2 deletions src/test_data/euc_kr_out_ref.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Any copyright to the test code below this comment is dedicated to the
Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
Generated from WHATWG indexes.json; see LICENSE-WHATWG.

This is a generated file. Please do not edit.
Instead, please regenerate using generate-encoding-data.py
Expand Down
3 changes: 1 addition & 2 deletions src/test_data/gb18030_in.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Any copyright to the test code below this comment is dedicated to the
Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
Generated from WHATWG indexes.json; see LICENSE-WHATWG.

This is a generated file. Please do not edit.
Instead, please regenerate using generate-encoding-data.py
Expand Down
39 changes: 19 additions & 20 deletions src/test_data/gb18030_in_ref.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Any copyright to the test code below this comment is dedicated to the
Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
Generated from WHATWG indexes.json; see LICENSE-WHATWG.

This is a generated file. Please do not edit.
Instead, please regenerate using generate-encoding-data.py
Expand Down Expand Up @@ -7185,13 +7184,13 @@ Instead, please regenerate using generate-encoding-data.py
χ
ψ
ω
Expand All @@ -7204,14 +7203,14 @@ Instead, please regenerate using generate-encoding-data.py
Expand Down Expand Up @@ -23778,27 +23777,27 @@ Instead, please regenerate using generate-encoding-data.py
Expand All @@ -23815,7 +23814,7 @@ Instead, please regenerate using generate-encoding-data.py
Expand All @@ -23832,7 +23831,7 @@ Instead, please regenerate using generate-encoding-data.py
Expand All @@ -23848,7 +23847,7 @@ Instead, please regenerate using generate-encoding-data.py
Expand Down
Loading

0 comments on commit 2f1b14f

Please sign in to comment.