diff --git a/.github/workflows/keywords.yaml b/.github/workflows/keywords.yaml new file mode 100644 index 0000000000000..5ebd112a8d592 --- /dev/null +++ b/.github/workflows/keywords.yaml @@ -0,0 +1,16 @@ +name: Keywords + +on: + pull_request: + branches: + - 'master' + push: + branches: + - 'master' + +jobs: + check-keywords: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - run: ./scripts/check-keywords.py --download_from_url diff --git a/keywords.md b/keywords.md index 1790dfb298f5c..931d44c70def5 100644 --- a/keywords.md +++ b/keywords.md @@ -63,6 +63,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - ACCOUNT - ACTION - ADD (R) +- ADD_COLUMNAR_REPLICA_ON_DEMAND - ADMIN - ADVISE - AFTER @@ -86,6 +87,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - AUTO_INCREMENT - AUTO_RANDOM - AUTO_RANDOM_BASE +- AUTOEXTEND_SIZE - AVG - AVG_ROW_LENGTH @@ -152,6 +154,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - COLUMN (R) - COLUMN_FORMAT - COLUMN_STATS_USAGE +- COLUMNAR - COLUMNS - COMMENT - COMMIT @@ -225,6 +228,9 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - DISK - DISTINCT (R) - DISTINCTROW (R) +- DISTRIBUTE +- DISTRIBUTION +- DISTRIBUTIONS - DIV (R) - DO - DOUBLE (R) @@ -248,6 +254,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - END - ENFORCED - ENGINE +- ENGINE_ATTRIBUTE - ENGINES - ENUM - ERROR @@ -264,6 +271,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - EXISTS (R) - EXIT (R) - EXPANSION +- EXPLORE - EXPIRE - EXPLAIN (R) - EXTENDED @@ -324,6 +332,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with I - IDENTIFIED +- IETF_QUOTES - IF (R) - IGNORE (R) - IGNORE_STATS @@ -394,6 +403,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - LINEAR (R) - LINES (R) - LIST +- LITE - LOAD (R) - LOAD_STATS - LOCAL @@ -497,6 +507,9 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - PACK_KEYS - PAGE +- PAGE_CHECKSUM +- PAGE_COMPRESSED +- PAGE_COMPRESSION_LEVEL - PARSER - PARTIAL - PARTITION (R) @@ -549,6 +562,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - RECURSIVE (R) - REDUNDANT - REFERENCES (R) +- REFRESH - REGEXP (R) - REGION - REGIONS @@ -589,6 +603,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - ROW_NUMBER (R-Window) - ROWS (R-Window) - RTREE +- RULE - RUN S @@ -601,6 +616,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - SECOND_MICROSECOND (R) - SECONDARY - SECONDARY_ENGINE +- SECONDARY_ENGINE_ATTRIBUTE - SECONDARY_LOAD - SECONDARY_UNLOAD - SECURITY @@ -700,6 +716,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - TIFLASH - TIKV_IMPORTER - TIME +- TIMEOUT - TIMESTAMP - TINYBLOB (R) - TINYINT (R) @@ -713,6 +730,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - TRADITIONAL - TRAILING (R) - TRANSACTION +- TRANSACTIONAL - TRIGGER (R) - TRIGGERS - TRUE (R) diff --git a/scripts/check-keywords.sh b/scripts/check-keywords.py similarity index 68% rename from scripts/check-keywords.sh rename to scripts/check-keywords.py index fcee793d7fa87..ab2d5444eb1aa 100755 --- a/scripts/check-keywords.sh +++ b/scripts/check-keywords.py @@ -1,11 +1,34 @@ #!/bin/python3 +import argparse +import requests import re import sys from pathlib import Path -parser = Path("../tidb/pkg/parser/parser.y") -if not parser.exists(): - sys.exit(f"{parser} doesn't exist") +aparser = argparse.ArgumentParser() +aparser.add_argument( + "--parser_file", default="../tidb/pkg/parser/parser.y", help="Path to parser.y" +) +aparser.add_argument( + "--parser_url", + default="https://github.com/pingcap/tidb/raw/refs/heads/master/pkg/parser/parser.y", + help="URL to parser.y", +) +aparser.add_argument("--download_from_url", action="store_true") +args = aparser.parse_args() + +if args.download_from_url: + try: + r = requests.get(args.parser_url, timeout=30) + r.raise_for_status() + lines = r.text.splitlines() + except requests.RequestException as e: + sys.exit(f"Failed to download parser file: {e}") +else: + parser = Path(args.parser_file) + if not parser.exists(): + sys.exit(f"{parser} doesn't exist") + lines = parser.read_text(encoding="utf-8").splitlines() kwdocs = Path("keywords.md") if not kwdocs.exists(): @@ -15,7 +38,7 @@ errors = 0 section = "Unknown" -for line in parser.read_text().split("\n"): +for line in lines: if line == "": section = "NotKeywordToken"