diff --git a/scripts/check-links.sh b/scripts/check-links.sh new file mode 100755 index 0000000000..12d72de243 --- /dev/null +++ b/scripts/check-links.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Check links on the given .md files. If no files are given, check all +# the .md files of the project. +# Usage: +# scripts/check_links.sh [] +# +# If is missing, default is 'https://dvc.org'. If the list +# of files is missing, all the .md files in 'static/' will be checked +# +# Examples: +# scripts/check_links.sh https://dvc.org static/docs/*/*.md +# scripts/check_links.sh http://localhost:3000 +# scripts/check_links.sh + +cd $(dirname $0) +cd .. + +# wget settings +# the option '--max-redirect=0' disables redirections +settings="-q --max-redirect=0 --method=HEAD" + +BASE_URL=${1:-https://dvc.org} +BASE_URL=${BASE_URL%/} # remove a trailing / +shift + +files="$@" +[[ -z $files ]] && files='static/**/*.md' + +shopt -s globstar +for file in $files; do + grep -o ']([^)]*)' $file | sed -e 's/^](//' -e 's/)$//' | \ + while read link; do + case $link in + /*) url="${BASE_URL}${link}" ;; + '#'*) continue ;; + '') continue ;; + *) url=$link ;; + esac + wget $settings "$url" || echo "$file: '$link'" + done +done