Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for Unicode properties Script Values added in ES2022 #27

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions scripts/update-unicode-properties.ts
Original file line number Diff line number Diff line change
@@ -4,33 +4,47 @@ import { CLIEngine } from "eslint"

const DataSources = [
{
url: "https://www.ecma-international.org/ecma-262/9.0/",
url: "https://262.ecma-international.org/9.0/",
version: 2018,
binProperties: "#table-binary-unicode-properties",
gcValues: "#table-unicode-general-category-values",
scValues: "#table-unicode-script-values",
},
{
url: "https://www.ecma-international.org/ecma-262/10.0/",
url: "https://262.ecma-international.org/10.0/",
version: 2019,
binProperties: "#table-binary-unicode-properties",
gcValues: "#table-unicode-general-category-values",
scValues: "#table-unicode-script-values",
},
{
url: "https://www.ecma-international.org/ecma-262/11.0/",
url: "https://262.ecma-international.org/11.0/",
version: 2020,
binProperties: "#table-binary-unicode-properties",
gcValues: "#table-unicode-general-category-values",
scValues: "#table-unicode-script-values",
},
{
url: "https://tc39.es/ecma262/",
url: "https://262.ecma-international.org/12.0/",
version: 2021,
binProperties: "#table-binary-unicode-properties",
gcValues: "#table-unicode-general-category-values",
scValues: "#table-unicode-script-values",
},
{
url: "https://tc39.es/ecma262/2022/multipage/text-processing.html",
version: 2022,
binProperties: "#table-binary-unicode-properties",
gcValues: "#table-unicode-general-category-values",
scValues: "#table-unicode-script-values",
},
{
url: "https://tc39.es/ecma262/multipage/text-processing.html",
version: 2023,
binProperties: "#table-binary-unicode-properties",
gcValues: "#table-unicode-general-category-values",
scValues: "#table-unicode-script-values",
},
]
const FILE_PATH = "src/unicode/properties.ts"
const logger = console
27 changes: 26 additions & 1 deletion src/unicode/properties.ts
Original file line number Diff line number Diff line change
@@ -9,16 +9,24 @@ class DataSet {
private _set2020: Set<string> | undefined
private _raw2021: string
private _set2021: Set<string> | undefined
private _raw2022: string
private _set2022: Set<string> | undefined
private _raw2023: string
private _set2023: Set<string> | undefined
public constructor(
raw2018: string,
raw2019: string,
raw2020: string,
raw2021: string,
raw2022: string,
raw2023: string,
) {
this._raw2018 = raw2018
this._raw2019 = raw2019
this._raw2020 = raw2020
this._raw2021 = raw2021
this._raw2022 = raw2022
this._raw2023 = raw2023
}
public get es2018(): Set<string> {
return (
@@ -40,6 +48,16 @@ class DataSet {
this._set2021 || (this._set2021 = new Set(this._raw2021.split(" ")))
)
}
public get es2022(): Set<string> {
return (
this._set2022 || (this._set2022 = new Set(this._raw2022.split(" ")))
)
}
public get es2023(): Set<string> {
return (
this._set2023 || (this._set2023 = new Set(this._raw2023.split(" ")))
)
}
}

const gcNameSet = new Set(["General_Category", "gc"])
@@ -49,18 +67,24 @@ const gcValueSets = new DataSet(
"",
"",
"",
"",
"",
)
const scValueSets = new DataSet(
"Adlam Adlm Aghb Ahom Anatolian_Hieroglyphs Arab Arabic Armenian Armi Armn Avestan Avst Bali Balinese Bamu Bamum Bass Bassa_Vah Batak Batk Beng Bengali Bhaiksuki Bhks Bopo Bopomofo Brah Brahmi Brai Braille Bugi Buginese Buhd Buhid Cakm Canadian_Aboriginal Cans Cari Carian Caucasian_Albanian Chakma Cham Cher Cherokee Common Copt Coptic Cprt Cuneiform Cypriot Cyrillic Cyrl Deseret Deva Devanagari Dsrt Dupl Duployan Egyp Egyptian_Hieroglyphs Elba Elbasan Ethi Ethiopic Geor Georgian Glag Glagolitic Gonm Goth Gothic Gran Grantha Greek Grek Gujarati Gujr Gurmukhi Guru Han Hang Hangul Hani Hano Hanunoo Hatr Hatran Hebr Hebrew Hira Hiragana Hluw Hmng Hung Imperial_Aramaic Inherited Inscriptional_Pahlavi Inscriptional_Parthian Ital Java Javanese Kaithi Kali Kana Kannada Katakana Kayah_Li Khar Kharoshthi Khmer Khmr Khoj Khojki Khudawadi Knda Kthi Lana Lao Laoo Latin Latn Lepc Lepcha Limb Limbu Lina Linb Linear_A Linear_B Lisu Lyci Lycian Lydi Lydian Mahajani Mahj Malayalam Mand Mandaic Mani Manichaean Marc Marchen Masaram_Gondi Meetei_Mayek Mend Mende_Kikakui Merc Mero Meroitic_Cursive Meroitic_Hieroglyphs Miao Mlym Modi Mong Mongolian Mro Mroo Mtei Mult Multani Myanmar Mymr Nabataean Narb Nbat New_Tai_Lue Newa Nko Nkoo Nshu Nushu Ogam Ogham Ol_Chiki Olck Old_Hungarian Old_Italic Old_North_Arabian Old_Permic Old_Persian Old_South_Arabian Old_Turkic Oriya Orkh Orya Osage Osge Osma Osmanya Pahawh_Hmong Palm Palmyrene Pau_Cin_Hau Pauc Perm Phag Phags_Pa Phli Phlp Phnx Phoenician Plrd Prti Psalter_Pahlavi Qaac Qaai Rejang Rjng Runic Runr Samaritan Samr Sarb Saur Saurashtra Sgnw Sharada Shavian Shaw Shrd Sidd Siddham SignWriting Sind Sinh Sinhala Sora Sora_Sompeng Soyo Soyombo Sund Sundanese Sylo Syloti_Nagri Syrc Syriac Tagalog Tagb Tagbanwa Tai_Le Tai_Tham Tai_Viet Takr Takri Tale Talu Tamil Taml Tang Tangut Tavt Telu Telugu Tfng Tglg Thaa Thaana Thai Tibetan Tibt Tifinagh Tirh Tirhuta Ugar Ugaritic Vai Vaii Wara Warang_Citi Xpeo Xsux Yi Yiii Zanabazar_Square Zanb Zinh Zyyy",
"Dogr Dogra Gong Gunjala_Gondi Hanifi_Rohingya Maka Makasar Medefaidrin Medf Old_Sogdian Rohg Sogd Sogdian Sogo",
"Elym Elymaic Hmnp Nand Nandinagari Nyiakeng_Puachue_Hmong Wancho Wcho",
"Chorasmian Chrs Diak Dives_Akuru Khitan_Small_Script Kits Yezi Yezidi",
"Cpmn Cypro_Minoan Old_Uyghur Ougr Tangsa Tnsa Toto Vith Vithkuqi",
"",
)
const binPropertySets = new DataSet(
"AHex ASCII ASCII_Hex_Digit Alpha Alphabetic Any Assigned Bidi_C Bidi_Control Bidi_M Bidi_Mirrored CI CWCF CWCM CWKCF CWL CWT CWU Case_Ignorable Cased Changes_When_Casefolded Changes_When_Casemapped Changes_When_Lowercased Changes_When_NFKC_Casefolded Changes_When_Titlecased Changes_When_Uppercased DI Dash Default_Ignorable_Code_Point Dep Deprecated Dia Diacritic Emoji Emoji_Component Emoji_Modifier Emoji_Modifier_Base Emoji_Presentation Ext Extender Gr_Base Gr_Ext Grapheme_Base Grapheme_Extend Hex Hex_Digit IDC IDS IDSB IDST IDS_Binary_Operator IDS_Trinary_Operator ID_Continue ID_Start Ideo Ideographic Join_C Join_Control LOE Logical_Order_Exception Lower Lowercase Math NChar Noncharacter_Code_Point Pat_Syn Pat_WS Pattern_Syntax Pattern_White_Space QMark Quotation_Mark RI Radical Regional_Indicator SD STerm Sentence_Terminal Soft_Dotted Term Terminal_Punctuation UIdeo Unified_Ideograph Upper Uppercase VS Variation_Selector White_Space XIDC XIDS XID_Continue XID_Start space",
"Extended_Pictographic",
"",
"EBase EComp EMod EPres ExtPict",
"",
"",
)

export function isValidUnicodeProperty(
@@ -76,7 +100,8 @@ export function isValidUnicodeProperty(
(version >= 2018 && scValueSets.es2018.has(value)) ||
(version >= 2019 && scValueSets.es2019.has(value)) ||
(version >= 2020 && scValueSets.es2020.has(value)) ||
(version >= 2021 && scValueSets.es2021.has(value))
(version >= 2021 && scValueSets.es2021.has(value)) ||
(version >= 2022 && scValueSets.es2022.has(value))
)
}
return false