From 923cc7ff8a0bfa824bd8d823eda36adab8da1a77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Fri, 5 Dec 2025 13:09:29 +0100 Subject: [PATCH 1/4] Add workflow for keyword checking --- .github/workflows/keywords.yaml | 15 ++++++++++ .../{check-keywords.sh => check-keywords.py} | 29 ++++++++++++++++--- 2 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/keywords.yaml rename scripts/{check-keywords.sh => check-keywords.py} (69%) diff --git a/.github/workflows/keywords.yaml b/.github/workflows/keywords.yaml new file mode 100644 index 0000000000000..bd5ae4c2d5d65 --- /dev/null +++ b/.github/workflows/keywords.yaml @@ -0,0 +1,15 @@ +name: Keywords + +on: + pull_request: + push: + branches: + - 'master' + - 'release**' + +jobs: + check-keywords: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - run: ./scripts/check-keywords.py --download_from_url diff --git a/scripts/check-keywords.sh b/scripts/check-keywords.py similarity index 69% rename from scripts/check-keywords.sh rename to scripts/check-keywords.py index fcee793d7fa87..a769d9dcc44e1 100755 --- a/scripts/check-keywords.sh +++ b/scripts/check-keywords.py @@ -1,11 +1,32 @@ #!/bin/python3 +import argparse +import requests import re import sys from pathlib import Path -parser = Path("../tidb/pkg/parser/parser.y") -if not parser.exists(): - sys.exit(f"{parser} doesn't exist") +aparser = argparse.ArgumentParser() +aparser.add_argument( + "--parser_file", default="../tidb/pkg/parser/parser.y", help="Path to parser.y" +) +aparser.add_argument( + "--parser_url", + default="https://github.com/pingcap/tidb/raw/refs/heads/master/pkg/parser/parser.y", + help="Url to parser.y", +) +aparser.add_argument("--download_from_url", action="store_true") +args = aparser.parse_args() + +if args.download_from_url: + r = requests.get(args.parser_url) + if r.status_code != 200: + sys.exit(f"failed to download parser file, got HTTP {r.status_code}") + lines = r.text.splitlines() +else: + parser = Path(args.parser_file) + if not parser.exists(): + sys.exit(f"{parser} doesn't exist") + lines = parser.read_text().split("\n") kwdocs = Path("keywords.md") if not kwdocs.exists(): @@ -15,7 +36,7 @@ errors = 0 section = "Unknown" -for line in parser.read_text().split("\n"): +for line in lines: if line == "": section = "NotKeywordToken" From 7157b8d82e59e6dbc4a5b52498565cb4c4efdfaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Fri, 5 Dec 2025 13:17:22 +0100 Subject: [PATCH 2/4] Update keywords --- keywords.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/keywords.md b/keywords.md index 1790dfb298f5c..931d44c70def5 100644 --- a/keywords.md +++ b/keywords.md @@ -63,6 +63,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - ACCOUNT - ACTION - ADD (R) +- ADD_COLUMNAR_REPLICA_ON_DEMAND - ADMIN - ADVISE - AFTER @@ -86,6 +87,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - AUTO_INCREMENT - AUTO_RANDOM - AUTO_RANDOM_BASE +- AUTOEXTEND_SIZE - AVG - AVG_ROW_LENGTH @@ -152,6 +154,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - COLUMN (R) - COLUMN_FORMAT - COLUMN_STATS_USAGE +- COLUMNAR - COLUMNS - COMMENT - COMMIT @@ -225,6 +228,9 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - DISK - DISTINCT (R) - DISTINCTROW (R) +- DISTRIBUTE +- DISTRIBUTION +- DISTRIBUTIONS - DIV (R) - DO - DOUBLE (R) @@ -248,6 +254,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - END - ENFORCED - ENGINE +- ENGINE_ATTRIBUTE - ENGINES - ENUM - ERROR @@ -264,6 +271,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - EXISTS (R) - EXIT (R) - EXPANSION +- EXPLORE - EXPIRE - EXPLAIN (R) - EXTENDED @@ -324,6 +332,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with I - IDENTIFIED +- IETF_QUOTES - IF (R) - IGNORE (R) - IGNORE_STATS @@ -394,6 +403,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - LINEAR (R) - LINES (R) - LIST +- LITE - LOAD (R) - LOAD_STATS - LOCAL @@ -497,6 +507,9 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - PACK_KEYS - PAGE +- PAGE_CHECKSUM +- PAGE_COMPRESSED +- PAGE_COMPRESSION_LEVEL - PARSER - PARTIAL - PARTITION (R) @@ -549,6 +562,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - RECURSIVE (R) - REDUNDANT - REFERENCES (R) +- REFRESH - REGEXP (R) - REGION - REGIONS @@ -589,6 +603,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - ROW_NUMBER (R-Window) - ROWS (R-Window) - RTREE +- RULE - RUN S @@ -601,6 +616,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - SECOND_MICROSECOND (R) - SECONDARY - SECONDARY_ENGINE +- SECONDARY_ENGINE_ATTRIBUTE - SECONDARY_LOAD - SECONDARY_UNLOAD - SECURITY @@ -700,6 +716,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - TIFLASH - TIKV_IMPORTER - TIME +- TIMEOUT - TIMESTAMP - TINYBLOB (R) - TINYINT (R) @@ -713,6 +730,7 @@ The following list shows the keywords in TiDB. Reserved keywords are marked with - TRADITIONAL - TRAILING (R) - TRANSACTION +- TRANSACTIONAL - TRIGGER (R) - TRIGGERS - TRUE (R) From 12048f3ed4b07e1e51a7ebf939b4aa7f8a31b2fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Fri, 5 Dec 2025 13:18:44 +0100 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- scripts/check-keywords.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/check-keywords.py b/scripts/check-keywords.py index a769d9dcc44e1..ab2d5444eb1aa 100755 --- a/scripts/check-keywords.py +++ b/scripts/check-keywords.py @@ -12,21 +12,23 @@ aparser.add_argument( "--parser_url", default="https://github.com/pingcap/tidb/raw/refs/heads/master/pkg/parser/parser.y", - help="Url to parser.y", + help="URL to parser.y", ) aparser.add_argument("--download_from_url", action="store_true") args = aparser.parse_args() if args.download_from_url: - r = requests.get(args.parser_url) - if r.status_code != 200: - sys.exit(f"failed to download parser file, got HTTP {r.status_code}") - lines = r.text.splitlines() + try: + r = requests.get(args.parser_url, timeout=30) + r.raise_for_status() + lines = r.text.splitlines() + except requests.RequestException as e: + sys.exit(f"Failed to download parser file: {e}") else: parser = Path(args.parser_file) if not parser.exists(): sys.exit(f"{parser} doesn't exist") - lines = parser.read_text().split("\n") + lines = parser.read_text(encoding="utf-8").splitlines() kwdocs = Path("keywords.md") if not kwdocs.exists(): From 56361936588c55f01783ee523a2e7449333afa8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Eeden?= Date: Fri, 5 Dec 2025 13:32:12 +0100 Subject: [PATCH 4/4] Restrict to master branch --- .github/workflows/keywords.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/keywords.yaml b/.github/workflows/keywords.yaml index bd5ae4c2d5d65..5ebd112a8d592 100644 --- a/.github/workflows/keywords.yaml +++ b/.github/workflows/keywords.yaml @@ -2,10 +2,11 @@ name: Keywords on: pull_request: + branches: + - 'master' push: branches: - 'master' - - 'release**' jobs: check-keywords: