From b750b6782ab5f9a9bb3a7651d08e96e96c4709c5 Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Mon, 25 Mar 2024 10:15:25 +0000 Subject: [PATCH 01/14] ci: init assets updater workflow --- .github/workflows/update-json-assets.yaml | 33 +++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/update-json-assets.yaml diff --git a/.github/workflows/update-json-assets.yaml b/.github/workflows/update-json-assets.yaml new file mode 100644 index 0000000..36cb68a --- /dev/null +++ b/.github/workflows/update-json-assets.yaml @@ -0,0 +1,33 @@ +name: update json assets +on: + push: + branches: [main] + pull_request: + branches: [main] + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: +jobs: + build: + runs-on: ubuntu-latest + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: latest + cache: yarn + - name: Install + run: yarn install + - name: Build + run: yarn run build + - name: run all downloaders + run: | + ls -lisha out/downloader/ + node out/downloader/http_headers.js || true + node out/downloader/mimetype.js || true + - uses: actions/upload-artifact@v4 + with: + name: npm-package + path: | + assets/ + retention-days: 3 From 27417b59712f4069b8b68aeb82d57c8d10a4d7c0 Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Mon, 25 Mar 2024 10:20:47 +0000 Subject: [PATCH 02/14] ci(updater): run all downloaders --- .github/workflows/update-json-assets.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/update-json-assets.yaml b/.github/workflows/update-json-assets.yaml index 36cb68a..fbf9a86 100644 --- a/.github/workflows/update-json-assets.yaml +++ b/.github/workflows/update-json-assets.yaml @@ -7,7 +7,7 @@ on: # Allows you to run this workflow manually from the Actions tab workflow_dispatch: jobs: - build: + update-assets: runs-on: ubuntu-latest steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it @@ -23,8 +23,11 @@ jobs: - name: run all downloaders run: | ls -lisha out/downloader/ - node out/downloader/http_headers.js || true - node out/downloader/mimetype.js || true + for i in out/downloader/*.js; do + node "$i" || true + done + + git status - uses: actions/upload-artifact@v4 with: name: npm-package From 94fa88c3f3f62bdb9ddddbf204d45851cb1cbf4d Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Mon, 25 Mar 2024 12:10:07 +0000 Subject: [PATCH 03/14] ci(updater): create pr for modified files --- .github/workflows/update-json-assets.yaml | 35 +++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update-json-assets.yaml b/.github/workflows/update-json-assets.yaml index fbf9a86..6709ecc 100644 --- a/.github/workflows/update-json-assets.yaml +++ b/.github/workflows/update-json-assets.yaml @@ -28,9 +28,40 @@ jobs: done git status + + - name: creating the pull request body + id: pr-body + run: | + json_path="assets/**/*.json" + diff_shortstat=$(git diff --shortstat "$json_path") + diff_numstat=$(git diff --numstat "$json_path") + + pr_body="This pull request has been generated **automatically** to **update** the following files:\n\n```bash\n$diff_shortstat\n$diff_numstat```" + + echo -e "\n----------------------\nThe pull request body:\n----------------------\n" + echo -e "$pr_body" + + echo 'pr_body<> $GITHUB_OUTPUT + echo -e "$pr_body" >> $GITHUB_OUTPUT + echo 'EOF' >> $GITHUB_OUTPUT - uses: actions/upload-artifact@v4 with: - name: npm-package + name: json-assets path: | - assets/ + assets/**/*.json retention-days: 3 + - name: Create Pull Request + id: cpr + uses: peter-evans/create-pull-request@v6 + with: + token: ${{ secrets.GITHUB_TOKEN }} + add-paths: | + assets/**/*.json + commit-message: "chore(assets): update json assets" + branch: update-json-assets + delete-branch: true + base: main + title: "Update JSON files in assets/" + body: ${{ steps.pr-body.outputs.pr_body }} + # labels: + draft: false From a46a30972e0b885e169bd45024e71557d3504d05 Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Mon, 25 Mar 2024 12:17:53 +0000 Subject: [PATCH 04/14] fix: add missing dev dependency turndown-plugin-gfm --- package.json | 1 + yarn.lock | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/package.json b/package.json index ccfc5be..a443bd8 100644 --- a/package.json +++ b/package.json @@ -72,6 +72,7 @@ "rimraf": "^5", "swc-loader": "^0.2.6", "turndown": "^7.1.2", + "turndown-plugin-gfm": "^1.0.2", "typescript": "^5", "webpack": "^5", "webpack-cli": "^5" diff --git a/yarn.lock b/yarn.lock index 5a2e953..328354a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2929,6 +2929,11 @@ tunnel@0.0.6: resolved "https://registry.yarnpkg.com/tunnel/-/tunnel-0.0.6.tgz#72f1314b34a5b192db012324df2cc587ca47f92c" integrity sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg== +turndown-plugin-gfm@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz#6f8678a361f35220b2bdf5619e6049add75bf1c7" + integrity sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg== + turndown@^7.1.2: version "7.1.3" resolved "https://registry.yarnpkg.com/turndown/-/turndown-7.1.3.tgz#2890eb76c603e66bf0c9e91526582b563065c57d" From 4230a0c1727ec360a9e3bc3ff72935062bab4c53 Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Mon, 25 Mar 2024 12:20:20 +0000 Subject: [PATCH 05/14] style: remove obsolete code leftover from debugging --- src/downloader/lua_openresty.ts | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/downloader/lua_openresty.ts b/src/downloader/lua_openresty.ts index b9ed80e..55a1b18 100755 --- a/src/downloader/lua_openresty.ts +++ b/src/downloader/lua_openresty.ts @@ -145,14 +145,9 @@ function processDirectiveElement( item.desc = item.desc || character; item.notes.push(character); - // console.log(temp.toString()); - // console.log(" --- --- --- --- ") - docsHTML += temp.toString(); } - // console.log(docsHTML); - if (item.def.startsWith("no")) { item.def = directiveName + " ;"; } @@ -184,11 +179,6 @@ function processDirectiveElement( {}, ]); - if (directiveName == "server_rewrite_by_lua_block") { - // TODO: needs to be changed remove SVG objects and add required contents (!) - console.log(docsHTML); - } - detailsStream.writeItem([ ManifestItemType.DirectiveDetails, directiveName, From 29fc98a0d97983acc9bc645a0d2c2146f7462986 Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Mon, 25 Mar 2024 12:25:50 +0000 Subject: [PATCH 06/14] ci(updater): run workflow every first day of the month --- .github/workflows/update-json-assets.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/update-json-assets.yaml b/.github/workflows/update-json-assets.yaml index 6709ecc..0e22529 100644 --- a/.github/workflows/update-json-assets.yaml +++ b/.github/workflows/update-json-assets.yaml @@ -1,11 +1,13 @@ name: update json assets on: - push: - branches: [main] - pull_request: - branches: [main] + # pull_request: + # branches: [main] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: + + schedule: + # runs on the first day of the month at 04:05AM (UTC) + - cron: "5 4 1 * *" jobs: update-assets: runs-on: ubuntu-latest From d73a23b1360ff7014c4a705d943bd02ab06259d9 Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Mon, 25 Mar 2024 12:34:04 +0000 Subject: [PATCH 07/14] fix: assertion error in nginx directives parsing --- src/downloader/nginx_directives.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/downloader/nginx_directives.ts b/src/downloader/nginx_directives.ts index c76924e..48887d4 100755 --- a/src/downloader/nginx_directives.ts +++ b/src/downloader/nginx_directives.ts @@ -40,7 +40,7 @@ async function main() { assertLength(`document page title "${titleShouleBe}"`, $title, 1); const directiveLists = $title.parent().nextAll("ul.compact"); - assertLength("length(ul.compact)", directiveLists, 6); + assertLength("length(ul.compact)", directiveLists, 7); const modules: Array<{ moduleName: string; moduleIndex: number; uri: string }> = []; directiveLists.each((i, list) => { From f2d09abeb5a38f4dbb962569175a0ce84f7b28c0 Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Mon, 25 Mar 2024 12:37:15 +0000 Subject: [PATCH 08/14] ci(updater): run workflow after modifications to test --- .github/workflows/update-json-assets.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update-json-assets.yaml b/.github/workflows/update-json-assets.yaml index 0e22529..d784b6e 100644 --- a/.github/workflows/update-json-assets.yaml +++ b/.github/workflows/update-json-assets.yaml @@ -1,7 +1,12 @@ name: update json assets on: - # pull_request: - # branches: [main] + push: + paths: + - .github/workflows/update-json-assets.yaml + pull_request: + paths: + - .github/workflows/update-json-assets.yaml + # Allows you to run this workflow manually from the Actions tab workflow_dispatch: From bcf553c93fc21895ae2424b452b92d78cb028b24 Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Mon, 25 Mar 2024 12:50:54 +0000 Subject: [PATCH 09/14] fix(updater): fix typos in bash code --- .github/workflows/update-json-assets.yaml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/.github/workflows/update-json-assets.yaml b/.github/workflows/update-json-assets.yaml index d784b6e..4840628 100644 --- a/.github/workflows/update-json-assets.yaml +++ b/.github/workflows/update-json-assets.yaml @@ -3,9 +3,6 @@ on: push: paths: - .github/workflows/update-json-assets.yaml - pull_request: - paths: - - .github/workflows/update-json-assets.yaml # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -13,6 +10,12 @@ on: schedule: # runs on the first day of the month at 04:05AM (UTC) - cron: "5 4 1 * *" + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +concurrency: + group: "updater" + cancel-in-progress: false + jobs: update-assets: runs-on: ubuntu-latest @@ -38,16 +41,19 @@ jobs: - name: creating the pull request body id: pr-body - run: | + run: | json_path="assets/**/*.json" - diff_shortstat=$(git diff --shortstat "$json_path") - diff_numstat=$(git diff --numstat "$json_path") + # capturing git diff stats and removing whitespace + diff_shortstat=$(git diff --shortstat "$json_path" | sed -re 's/^[[:blank:]]+|[[:blank:]]+$//g' -e 's/[[:blank:]]+/ /g') + diff_numstat=$(git diff --numstat "$json_path" | sed -re 's/^[[:blank:]]+|[[:blank:]]+$//g' -e 's/[[:blank:]]+/ /g') - pr_body="This pull request has been generated **automatically** to **update** the following files:\n\n```bash\n$diff_shortstat\n$diff_numstat```" + pr_body="This pull request has been generated **automatically** to **update** the following files:\n\n\`\`\`bash\n$diff_shortstat\n\n$diff_numstat\n\`\`\`\n" echo -e "\n----------------------\nThe pull request body:\n----------------------\n" echo -e "$pr_body" + echo -e "$pr_body" >> $GITHUB_STEP_SUMMARY + echo 'pr_body<> $GITHUB_OUTPUT echo -e "$pr_body" >> $GITHUB_OUTPUT echo 'EOF' >> $GITHUB_OUTPUT From defad96c32869203b780d4c0e16b028008505283 Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Mon, 15 Apr 2024 16:00:07 +0000 Subject: [PATCH 10/14] ci(updater): enhance downloader loop --- .github/workflows/update-json-assets.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update-json-assets.yaml b/.github/workflows/update-json-assets.yaml index 4840628..933606f 100644 --- a/.github/workflows/update-json-assets.yaml +++ b/.github/workflows/update-json-assets.yaml @@ -34,7 +34,11 @@ jobs: run: | ls -lisha out/downloader/ for i in out/downloader/*.js; do - node "$i" || true + # skipping the syntax downloader + if [[ $i != *"nginx_syntax"* ]]; then + echo "executing 'node $i' ..." + node "$i" || true + fi done git status From c0c8654bf3342eef48c46d1fa8760dd33fc30033 Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Thu, 25 Apr 2024 10:07:26 +0000 Subject: [PATCH 11/14] ci(updater): add dependencies label to pr --- .github/workflows/update-json-assets.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update-json-assets.yaml b/.github/workflows/update-json-assets.yaml index 933606f..9836c8b 100644 --- a/.github/workflows/update-json-assets.yaml +++ b/.github/workflows/update-json-assets.yaml @@ -47,6 +47,7 @@ jobs: id: pr-body run: | json_path="assets/**/*.json" + # capturing git diff stats and removing whitespace diff_shortstat=$(git diff --shortstat "$json_path" | sed -re 's/^[[:blank:]]+|[[:blank:]]+$//g' -e 's/[[:blank:]]+/ /g') diff_numstat=$(git diff --numstat "$json_path" | sed -re 's/^[[:blank:]]+|[[:blank:]]+$//g' -e 's/[[:blank:]]+/ /g') @@ -80,5 +81,5 @@ jobs: base: main title: "Update JSON files in assets/" body: ${{ steps.pr-body.outputs.pr_body }} - # labels: + labels: dependencies draft: false From b4cca54c328fabc7b555e98363e942b71af79cdf Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Thu, 25 Apr 2024 10:07:54 +0000 Subject: [PATCH 12/14] feat: add portuegese version of http headers --- src/downloader/config_url.ts | 1 + src/downloader/http_headers.ts | 36 ++++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/downloader/config_url.ts b/src/downloader/config_url.ts index 484112d..9d096b7 100644 --- a/src/downloader/config_url.ts +++ b/src/downloader/config_url.ts @@ -28,6 +28,7 @@ export const httpHeadersWikiURLs = { de: 'https://de.wikipedia.org/wiki/Liste_der_HTTP-Headerfelder', en: 'https://en.wikipedia.org/wiki/List_of_HTTP_header_fields', es: 'https://es.wikipedia.org/wiki/Anexo:Cabeceras_HTTP', + pt: 'https://pt.wikipedia.org/wiki/Lista_de_campos_de_cabe%C3%A7alho_HTTP', 'zh-Hans': 'https://zh.wikipedia.org/zh-cn/HTTP%E5%A4%B4%E5%AD%97%E6%AE%B5', 'zh-Hant-HK': 'https://zh.wikipedia.org/zh-hk/HTTP%E5%A4%B4%E5%AD%97%E6%AE%B5', 'zh-Hant-TW': 'https://zh.wikipedia.org/zh-tw/HTTP%E5%A4%B4%E5%AD%97%E6%AE%B5', diff --git a/src/downloader/http_headers.ts b/src/downloader/http_headers.ts index 66297d5..cbf687e 100644 --- a/src/downloader/http_headers.ts +++ b/src/downloader/http_headers.ts @@ -249,7 +249,7 @@ async function main() { const output = new JsonFileWriter(manifestFiles.httpHeaders("de")); const html = await getText("de", baseUrl); const $ = loadHtml(html); - const handleEnglishRow = ($row: Cheerio, type: ManifestItemType) => { + const handleGermanRow = ($row: Cheerio, type: ManifestItemType) => { const $cols = $row.find("td"); if ($cols.length === 0) return; const headerNames = normalizeHeaderName($cols.eq(0).text()); @@ -269,7 +269,39 @@ async function main() { const $rows = element.find("tr"); for (let row = 0; row < $rows.length; row++) { const $row = $rows.eq(row); - handleEnglishRow($row, ManifestItemType.HttpReqHeader); + handleGermanRow($row, ManifestItemType.HttpReqHeader); + } + } + output.close(); + } + + // pt + { + const baseUrl = httpHeadersWikiURLs.pt; + const output = new JsonFileWriter(manifestFiles.httpHeaders("pt")); + const html = await getText("pt", baseUrl); + const $ = loadHtml(html); + const handlePortgueseRow = ($row: Cheerio, type: ManifestItemType) => { + const $cols = $row.find("td"); + if ($cols.length === 0) return; + const headerNames = normalizeHeaderName($cols.eq(0).text()); + const description = getDescriptionMarkdown($cols.eq(1), baseUrl); + if (!description) print.warn(`header ${headerNames[0]} has no description`); + for (let j = 0; j < headerNames.length; j++) { + const headerName = headerNames[j]; + output.writeItem(j === 0 ? [type, headerName, description] : [type, headerName, -1]); + } + }; + + const $reqH2 = $("h2 #Campos_de_resposta"); + assertLength("request fields h2", $reqH2, 1); + const $tables = getNextTables($reqH2.parent(), "h2"); + assertLength("request fields table", $tables, 2); + for (const element of $tables) { + const $rows = element.find("tr"); + for (let row = 0; row < $rows.length; row++) { + const $row = $rows.eq(row); + handlePortgueseRow($row, ManifestItemType.HttpReqHeader); } } output.close(); From a210fdb799ddc1bafa9148bdc1430320eaeb2d3e Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Thu, 25 Apr 2024 10:15:59 +0000 Subject: [PATCH 13/14] ci(updater): fix bugs in git commands --- .github/workflows/update-json-assets.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update-json-assets.yaml b/.github/workflows/update-json-assets.yaml index 9836c8b..57e4529 100644 --- a/.github/workflows/update-json-assets.yaml +++ b/.github/workflows/update-json-assets.yaml @@ -41,6 +41,7 @@ jobs: fi done + git add assets/**/*.json git status - name: creating the pull request body @@ -49,8 +50,8 @@ jobs: json_path="assets/**/*.json" # capturing git diff stats and removing whitespace - diff_shortstat=$(git diff --shortstat "$json_path" | sed -re 's/^[[:blank:]]+|[[:blank:]]+$//g' -e 's/[[:blank:]]+/ /g') - diff_numstat=$(git diff --numstat "$json_path" | sed -re 's/^[[:blank:]]+|[[:blank:]]+$//g' -e 's/[[:blank:]]+/ /g') + diff_shortstat=$(git diff --staged --shortstat "$json_path" | sed -re 's/^[[:blank:]]+|[[:blank:]]+$//g' -e 's/[[:blank:]]+/ /g') + diff_numstat=$(git diff --staged --numstat "$json_path" | sed -re 's/^[[:blank:]]+|[[:blank:]]+$//g' -e 's/[[:blank:]]+/ /g') pr_body="This pull request has been generated **automatically** to **update** the following files:\n\n\`\`\`bash\n$diff_shortstat\n\n$diff_numstat\n\`\`\`\n" From 41e79b424b3dff94313d1938d246a2737abea6df Mon Sep 17 00:00:00 2001 From: Adrian Dischinger <72736591+AdrianDsg@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:28:21 +0000 Subject: [PATCH 14/14] ci(updater): temp deactivated peter-evans/create-pull-request as requested in code review --- .github/workflows/update-json-assets.yaml | 30 +++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/update-json-assets.yaml b/.github/workflows/update-json-assets.yaml index 57e4529..e465515 100644 --- a/.github/workflows/update-json-assets.yaml +++ b/.github/workflows/update-json-assets.yaml @@ -69,18 +69,18 @@ jobs: path: | assets/**/*.json retention-days: 3 - - name: Create Pull Request - id: cpr - uses: peter-evans/create-pull-request@v6 - with: - token: ${{ secrets.GITHUB_TOKEN }} - add-paths: | - assets/**/*.json - commit-message: "chore(assets): update json assets" - branch: update-json-assets - delete-branch: true - base: main - title: "Update JSON files in assets/" - body: ${{ steps.pr-body.outputs.pr_body }} - labels: dependencies - draft: false + # - name: Create Pull Request + # id: cpr + # uses: peter-evans/create-pull-request@v6 + # with: + # token: ${{ secrets.GITHUB_TOKEN }} + # add-paths: | + # assets/**/*.json + # commit-message: "chore(assets): update json assets" + # branch: update-json-assets + # delete-branch: true + # base: main + # title: "Update JSON files in assets/" + # body: ${{ steps.pr-body.outputs.pr_body }} + # labels: dependencies + # draft: false