diff --git a/.circleci/config.yml b/.circleci/config.yml index ecf6721586..2f709db332 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,20 +2,21 @@ # # Check https://circleci.com/docs/2.0/language-javascript/ for more details # -version: 2 -jobs: - build: - docker: - # specify the version you desire here - - image: circleci/node:10.16.3 +version: 2.1 - # Specify service dependencies here if necessary - # CircleCI maintains a library of pre-built images - # documented at https://circleci.com/docs/2.0/circleci-images/ - # - image: circleci/mongo:3.4.4 +defaults: &defaults + working_directory: ~/repo + docker: + # specify the version you desire here + - image: circleci/node:10.16.3 - working_directory: ~/repo + # Specify service dependencies here if necessary + # CircleCI maintains a library of pre-built images + # documented at https://circleci.com/docs/2.0/circleci-images/ + # - image: circleci/mongo:3.4.4 +commands: + install: steps: - checkout @@ -28,24 +29,65 @@ jobs: git reset --hard origin/master git checkout - - # Download and cache dependencies + # Download cached dependencies - restore_cache: keys: - v1-dependencies-{{ checksum "yarn.lock" }} # fallback to using the latest cache if no exact match is found - v1-dependencies- - - run: yarn + - run: + name: apt dependencies + command: | + sudo apt-get update + sudo apt-get install pcregrep + cache: + steps: + # Upload dependencies cache + - save_cache: + paths: + - node_modules + key: v1-dependencies-{{ checksum "yarn.lock" }} +jobs: + test: + <<: *defaults + steps: + - install + - run: yarn - run: yarn build - - run: yarn test - - run: yarn format-check + - run: yarn lint-check + - run: yarn link-check-diff + - cache + test_full: + <<: *defaults + steps: + - install + - run: yarn + - run: yarn build + - run: yarn test + - run: yarn format-check - run: yarn lint-check + - run: yarn link-check + - cache - - save_cache: - paths: - - node_modules - key: v1-dependencies-{{ checksum "yarn.lock" }} +workflows: + version: 2 + + commit: + jobs: + - test + + daily: + triggers: + - schedule: + cron: '0 0 * * *' + filters: + branches: + only: + - master + jobs: + - test_full diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml new file mode 100644 index 0000000000..c4b759a66d --- /dev/null +++ b/.pre-commit-hooks.yaml @@ -0,0 +1,6 @@ +- id: dead-url + name: Dead URL Checker + entry: scripts/link-check.sh + language: script + types: [text] + description: This hook searches for problematic URLs. diff --git a/package.json b/package.json index 6eb4265682..0e11f3d286 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,9 @@ "format-check": "prettier --check '{.,pages/**,public/static/docs/**,src/**}/*.{js,md}'", "lint-check": "eslint src pages", "format-all": "prettier --write '{.,pages/**,public/static/docs/**,src/**}/*.{js,md}'", - "format": "prettier --write" + "format": "prettier --write", + "link-check": "scripts/link-check-git-all.sh", + "link-check-diff": "scripts/link-check-git-diff.sh" }, "repository": { "type": "git", diff --git a/scripts/exclude-links.txt b/scripts/exclude-links.txt new file mode 100644 index 0000000000..6fbff79aa5 --- /dev/null +++ b/scripts/exclude-links.txt @@ -0,0 +1,20 @@ +http://127.0.0.1:10000/devstoreaccount1; +http://localhost:3000/ +https://$ +https://api.github.com/repos/$ +https://blog.$ +https://discuss.$ +https://dvc.org/some.link +https://example.com/data.txt +https://example.com/path/to/data +https://example.com/path/to/data.csv +https://example.com/path/to/dir +https://github.com/$ +https://github.com/dataversioncontrol/myrepo.git +https://github.com/example/registry +https://github.com/iterative/dvc.org/blob/master/public$ +https://github.com/iterative/dvc/releases/download/$ +https://github.com/myaccount/myproject.git +https://myendpoint.com +https://object-storage.example.com +https://www.youtube.com/embed/$ diff --git a/scripts/link-check-git-all.sh b/scripts/link-check-git-all.sh new file mode 100755 index 0000000000..2251fb36e5 --- /dev/null +++ b/scripts/link-check-git-all.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +(find pages/ public/static/docs/ src/ .github/ -name '*.md' -o -name '*.js' && ls *.md *.js) \ + | xargs -n1 -P8 $(dirname "$0")/link-check.sh diff --git a/scripts/link-check-git-diff.sh b/scripts/link-check-git-diff.sh new file mode 100755 index 0000000000..79e56817ee --- /dev/null +++ b/scripts/link-check-git-diff.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +set -euxo pipefail +$(dirname "$0")/link-check.sh <(git diff origin/master -U0) diff --git a/scripts/link-check.sh b/scripts/link-check.sh new file mode 100755 index 0000000000..fab8b730a8 --- /dev/null +++ b/scripts/link-check.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Check HTTP status codes of links in the given files. +# Success: 2xx, Errors: 4xx/5xx, Warnings: anything else. +# Redirects (3xx) are followed. +# Usage: +# link-check.sh [] +set -euo pipefail + +base_url="${CHECK_LINKS_RELATIVE_URL:-https://dvc.org}" +exclude="${CHECK_LINKS_EXCLUDE_LIST:-$(dirname $0)/exclude-links.txt}" +[ -f "$exclude" ] && exclude="$(cat $exclude)" + +finder(){ # expects list of files + # explicit links not in markdown + pcregrep -o '(?{}"'"'"'`]+' "$@" + # explicit links in markdown + pcregrep -o '(?<=\])\(https?://[^[\]\s]+\)' "$@" | pcregrep -o '\((?:[^)(]*(?R)?)*+\)' | pcregrep -o '(?<=\().*(?=\))' + # relative links in markdown + sed -nr 's/.*]\((\/[^)[:space:]]+).*/\1/p' "$@" | xargs -n1 -II echo ${base_url}I + # relative links in html + sed -nr 's/.*href=["'"'"'](\/[^"'"'"']+?)["'"'"'].*/\1/p' "$@" | xargs -n1 -II echo ${base_url}I +} +checker(){ # expects list of urls + errors=0 + for url in "$@"; do + status="$(curl -IL -w '%{http_code}' -so /dev/null "$url")" + case "$status" in + 2??) + # success + ;; + [45]??) + echo + echo " ERROR:$status:$url" >&2 + errors=$(($errors + 1)) + ;; + *) + echo + echo " WARNING:$status:$url" >&2 + ;; + esac + done + return $errors +} + +fails=0 +for file in "$@"; do + echo -n "$file:" + prev=$fails + checker $(finder "$file" | sort -u | comm -23 - <(echo "$exclude" | sort -u)) || fails=$(($fails + 1)) + [ $prev -eq $fails ] && echo OK +done +[ $fails -eq 0 ] || echo -e "ERROR:$fails failures\n---" >&2 +exit $fails