Skip to content

Scrape latest data #1144

Scrape latest data

Scrape latest data #1144

Workflow file for this run

name: Scrape latest data
on:
push:
workflow_dispatch:
schedule:
- cron: '21 11 * * *'
jobs:
scheduled:
runs-on: ubuntu-latest
steps:
- name: Check out this repo
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- uses: actions/cache@v2
name: Configure pip caching
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install Python dependencies
run: |-
pip install -r requirements.txt
- name: Fetch latest data
run: |-
cp Street_Tree_List.csv Street_Tree_List-old.csv
curl -o Street_Tree_List-unsorted.csv "https://data.sfgov.org/api/views/tkzw-k3nq/rows.csv?accessType=DOWNLOAD"
# Remove heading line and use it to start a new file
head -n 1 Street_Tree_List-unsorted.csv > Street_Tree_List.csv
# Sort all but the first line and append to that file
tail -n +2 "Street_Tree_List-unsorted.csv" | sort >> Street_Tree_List.csv
# Generate commit message using csv-diff
csv-diff Street_Tree_List-old.csv Street_Tree_List.csv --key=TreeID --singular=tree --plural=trees > message.txt
- name: Commit and push if it changed
run: |-
git config user.name "Automated"
git config user.email "actions@users.noreply.github.com"
git add Street_Tree_List.csv
timestamp=$(date -u)
git commit -F message.txt || exit 0
git push