Skip to content

scrape

scrape #181

Workflow file for this run

# Scrape deployed site into Algolia index
# https://glebbahmutov.com/blog/scrape-static-site-with-algolia/
name: scrape
on:
# allow running the scraper manually
workflow_dispatch:
# and scrape weekly
schedule:
- cron: '0 0 * * TUE'
jobs:
scrape:
runs-on: ubuntu-20.04
steps:
# we need our scraping config, thus check out code
- name: Checkout 🛎️
uses: actions/checkout@v4
with:
persist-credentials: false
- uses: actions/setup-node@v4
with:
node-version: 20
- name: Print Node version 🖨️
run: node -v
- name: Install dependencies 📦
uses: cypress-io/github-action@v6
with:
# just perform install
runTests: false
- name: Set Cypress version 🎁
id: cypress
run: echo "version=$(node ./src/print-cypress-version)" >> $GITHUB_OUTPUT
- name: Print Cypress version 🖨
run: echo "Cypress version is ${{ steps.cypress.outputs.version }}"
- name: Update Algolia config 📝
run: node ./update-algolia-config https://glebbahmutov.com/cypress-examples/
# when scraping the site, inject secrets as environment variables
# then pass their values into the Docker container using "-e" syntax
# and inject config.json contents as another variable
# You can run the scraper locally using as-a to pass the environment variables
# and using a local Docker container
# $ as-a . docker run-e APPLICATION_ID -e API_KEY ...
- name: scrape the site 🧽
env:
APPLICATION_ID: ${{ secrets.ALGOLIA_APPLICATION_ID }}
API_KEY: ${{ secrets.ALGOLIA_API_KEY }}
run: |
docker run \
-e APPLICATION_ID -e API_KEY \
-e CONFIG="$(cat algolia-config.json)" \
algolia/docsearch-scraper:v1.6.0