Skip to content

GitHub pages and scraping #1016

GitHub pages and scraping

GitHub pages and scraping #1016

Workflow file for this run

name: GitHub pages and scraping
on:
push:
branches:
- main
schedule:
- cron: "0 12 * * *" # every day at 12:00PM
jobs:
build-deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- uses: actions/setup-node@v3
with:
node-version: 16
registry-url: https://registry.npmjs.org/
- name: Cache dependencies
uses: actions/cache@v1
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/yarn.lock') }}
restore-keys: |
${{ runner.os }}-node-
- run: git clone https://github.com/meilisearch/specifications
- run: yarn install
- run: git pull # this can be useful if the action is been re-run
- run: yarn build
- run: rm -rf docs
- run: cp -r .vuepress/dist docs
- run: cp CNAME favicon.ico docs/
- run: |
git config --global user.name "meili-bot"
git config --global user.email "74670311+meili-bot@users.noreply.github.com"
git add docs
git commit -m "update the website according to the specs"
git push
run-scraper:
needs: build-deploy
runs-on: ubuntu-latest
env:
HOST_URL: ${{ secrets.MEILISEARCH_HOST_URL }}
API_KEY: ${{ secrets.MEILISEARCH_API_KEY }}
steps:
- name: Wait 2 minutes for the github-pages to deploy
# usually the deployment of the github-pages takes ~1 minute thus we
# should be safe by sleeping two minutes
run: sleep 120
- uses: actions/checkout@master
- name: Delete all documents in index
run: |
curl -X DELETE "$HOST_URL/indexes/main/documents" -H "Authorization: Bearer $API_KEY"
- name: Run scraper
env:
CONFIG_FILE_PATH: ${{ github.workspace }}/.vuepress/scraper/
run: |
docker run -t --rm \
-e MEILISEARCH_HOST_URL=$HOST_URL \
-e MEILISEARCH_API_KEY=$API_KEY \
-v $CONFIG_FILE_PATH:/config \
getmeili/docs-scraper:v0.12.7 pipenv run ./docs_scraper /config/config.json