From d6092901f133b022755eab2dec8213776f53a84d Mon Sep 17 00:00:00 2001 From: Daniel Peukert Date: Sun, 21 Jun 2020 22:21:48 +0200 Subject: [PATCH 1/2] Refactor parsing logic --- google-font-download | 193 +++++++++++++++++++------------------------ 1 file changed, 86 insertions(+), 107 deletions(-) diff --git a/google-font-download b/google-font-download index 8e54160..a9ad71c 100755 --- a/google-font-download +++ b/google-font-download @@ -2,7 +2,7 @@ # vim:noet:sts=4:ts=4:sw=4:tw=120 ## -# Copyright (c) 2014-2015, Clemens Lang +# Copyright (c) 2014-2020, Clemens Lang # All rights reserved. # # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the @@ -29,6 +29,7 @@ # - Robert, github.com/rotx. # - Thomas Papamichail, https://gist.github.com/pointergr # - Musikid +# - Daniel Peukert ## ## @@ -37,7 +38,7 @@ # Ensure the bash version is new enough. If it isn't error out with a helpful error message rather than crashing later. if [ "${BASH_VERSINFO[0]}" -lt 4 ]; then - echo "Error: This script needs Bash 4.x to run." >&2 + echo "Error: This script needs Bash 4.x or newer to run." >&2 exit 1 fi @@ -47,7 +48,9 @@ set -euo pipefail css="font.css" lang="latin" format="all" -url="https://fonts.googleapis.com/css" +url_selection="https://fonts.google.com/" +url_v1="https://fonts.googleapis.com/css" +url_v2="https://fonts.googleapis.com/css2" urlref="" # Usage message @@ -130,11 +133,70 @@ misuse_exit() { usage } -# function that act like split in perl. Syntax: splitarr IFS $var array -splitarr() { +# function that act like split in perl. Syntax: split_arr IFS $var array +split_arr() { IFS="$1" read -r -a "$3" <<< "$2" } +# function that parses a single URL part/family. Syntax: parse_url_part $var +parse_url_part() { + if [[ "$1" =~ \+ ]] || [[ "$1" =~ , ]] ; then + url_part="$1" + if [[ "$url_part" =~ \+ ]]; then + url_part=${url_part//\+/ } + fi + if [[ "$url_part" =~ , ]]; then + number=$(echo "$url_part" | grep -Po ':\K(.*)') + name=$(echo "$url_part" | grep -Po '(.*):') + split_arr ',' "$number" commas + for (( i = 0; i < ${#commas[@]}; i++ )); do + families+=("$name${commas[$i]}") + done + else + families+=("$url_part") + fi + else + families+=("$1") + fi +} + +# function that parses a font string (either a URL or a font family name). Syntax: parse_font_string $var +parse_font_string() { + case "$1" in + "") + return + ;; + "http"*) + url="$(echo "$1" | $ESED 's/http(s)?:\/\///')" + ;; + *) + families+=("$1") + return + ;; + esac + + case "https://$url" in + "$url_selection"*|"$url_v1"*) + url_string="$(echo "$url" | grep -Po 'family=\K(.*)')" + if [[ "$url_string" =~ \| ]]; then + split_arr '|' "$url_string" temp + for line in "${temp[@]}" + do + parse_url_part "$line" + done + else + parse_url_part "$url_string" + fi + ;; + "$url_v2"*) + err_exit "Google Fonts API v2 URLs are not yet supported" + ;; + *) + err_exit "Unsupported URL \`${url}'" + ;; + esac +} + # Check for modern getopt(1) that quotes correctly; see #1 for rationale ret=0 modern_getopt=1 @@ -206,103 +268,31 @@ while true; do esac done +# Check whether sed is GNU or BSD sed, or rather, which parameter enables extended regex support. Note that GNU sed does +# have -E as an undocumented compatibility option on some systems. +if [ "$(echo "test" | sed -E 's/([st]+)$/xx\1/' 2>/dev/null)" == "texxst" ]; then + ESED="sed -E" +elif [ "$(echo "test" | sed -r 's/([st]+)$/xx\1/' 2>/dev/null)" == "texxst" ]; then + ESED="sed -r" +else + # shellcheck disable=SC2230 + err_exit "$(which sed) seems to lack extended regex support with -E or -r" +fi + declare -a families families=() declare -a commas commas=() -# Detect and parse url -if [[ $urlref != "" ]]; then - urlref=$(echo "$urlref" | grep -Po 'family=\K(.*)') - if [[ "$urlref" =~ \| ]]; then - splitarr '|' "$urlref" temp - for line in "${temp[@]}" - do - if [[ "$line" =~ \+ ]] || [[ "$line" =~ , ]] ; then - if [[ "$line" =~ \+ ]]; then - line=${line//\+/ } - fi - if [[ "$line" =~ , ]]; then - number=$(echo "$line" | grep -Po ':\K(.*)') - name=$(echo "$line" | grep -Po '(.*):') - splitarr ',' "$number" commas - for (( i = 0; i < ${#commas[@]}; i++ )); do - families+=("$name${commas[$i]}") - done - else - families+=("$line") - fi - else - families+=("$line") - fi - done - else - if [[ "$urlref" =~ \+ ]] || [[ "$urlref" =~ , ]] ; then - if [[ "$urlref" =~ \+ ]]; then - urlref=${urlref//\+/ } - fi - if [[ "$urlref" =~ , ]]; then - number=$(echo "$urlref" | grep -Po ':\K(.*)') - name=$(echo "$urlref" | grep -Po '(.*):') - splitarr ',' "$number" commas - for (( i = 0; i < ${#commas[@]}; i++ )); do - families+=("$name${commas[$i]}") - done - else - families+=("$urlref") - fi - fi - fi -fi -# Validate font family input +# Parse and validate url input +parse_font_string "$urlref" + +# Parse and validate font family input for family do - # Directly parse url - if [[ "$family" =~ http ]]; then - family=$(echo "$family" | grep -Po 'family=\K(.*)') - if [[ "$family" =~ \| ]]; then - splitarr '|' "$family" temp - for line in "${temp[@]}" - do - if [[ "$line" =~ \+ ]] || [[ "$line" =~ , ]] ; then - if [[ "$line" =~ \+ ]]; then - line=${line//\+/ } - fi - if [[ "$line" =~ , ]]; then - number=$(echo "$line" | grep -Po ':\K(.*)') - name=$(echo "$line" | grep -Po '(.*):') - splitarr ',' "$number" commas - for (( i = 0; i < ${#commas[@]}; i++ )); do - families+=("$name${commas[$i]}") - done - else - families+=("$line") - fi - else - families+=("$line") - fi - done - else - if [[ "$family" =~ \+ ]] || [[ "$family" =~ , ]] ; then - if [[ "$family" =~ \+ ]]; then - family=${family//\+/ } - fi - if [[ "$family" =~ , ]]; then - number=$(echo "$family" | grep -Po ':\K(.*)') - name=$(echo "$family" | grep -Po '(.*):') - splitarr ',' "$number" commas - for (( i = 0; i < ${#commas[@]}; i++ )); do - families+=("$name${commas[$i]}") - done - else - families+=("$family") - fi - fi - fi - else - families+=("$family") - fi + parse_font_string "$family" done + if [ ${#families[@]} -eq 0 ]; then misuse_exit "No font families given" fi @@ -342,17 +332,6 @@ elif [ "$css" = "-" ]; then fi -# Check whether sed is GNU or BSD sed, or rather, which parameter enables extended regex support. Note that GNU sed does -# have -E as an undocumented compatibility option on some systems. -if [ "$(echo "test" | sed -E 's/([st]+)$/xx\1/' 2>/dev/null)" == "texxst" ]; then - ESED="sed -E" -elif [ "$(echo "test" | sed -r 's/([st]+)$/xx\1/' 2>/dev/null)" == "texxst" ]; then - ESED="sed -r" -else - # shellcheck disable=SC2230 - err_exit "$(which sed) seems to lack extended regex support with -E or -r" -fi - # Store the useragents we're going to use to trick Google's servers into serving us the correct CSS file. declare -A useragent # ShellCheck doesn't correctly notice our dynamic use of these variables. @@ -378,7 +357,7 @@ for family in "${families[@]}"; do # Test whether the chosen combination of font and language subset # exists; Google returns HTTP 400 if it doesn't ret=0 - css_string=$(curl -sSf --get --data-urlencode "family=$family" --data-urlencode "subset=$lang" "$url" 2>&1) || ret=$? + css_string=$(curl -sSf --get --data-urlencode "family=$family" --data-urlencode "subset=$lang" "$url_v1" 2>&1) || ret=$? if [ $ret -ne 0 ]; then errors=1 printf >&2 " error: %s\\n" "${css_string}" @@ -445,7 +424,7 @@ for family in "${families[@]}"; do else pattern="https:\\/\\/[^\\)]+" fi - file=$(curl -sf -A "${useragent[$uakey]}" --get --data-urlencode "family=$family" --data-urlencode "subset=$lang" "$url" | grep -Eo "$pattern" | sort -u) + file=$(curl -sf -A "${useragent[$uakey]}" --get --data-urlencode "family=$family" --data-urlencode "subset=$lang" "$url_v1" | grep -Eo "$pattern" | sort -u) printf >>"$css" "\\t\\t/* from %s */\\n" "$file" if [ "$uakey" == "svg" ]; then # SVG fonts need the font after a hash symbol, so extract the correct name from Google's CSS From 560ef69ae4bb7c05d011a5b9b208eeee85dc73c4 Mon Sep 17 00:00:00 2001 From: Daniel Peukert Date: Sun, 21 Jun 2020 22:21:58 +0200 Subject: [PATCH 2/2] Add more tests for parsing --- test/Makefile | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/test/Makefile b/test/Makefile index f0c9335..0094d6a 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,4 +1,4 @@ -TESTS=eot svg ttf woff woff2 fonts-with-spaces multiple-fonts format-param output-param output-long-param font-weight font-style url-arg +TESTS=eot svg ttf woff woff2 fonts-with-spaces multiple-fonts format-param output-param output-long-param font-weight font-style font-style-short url-single-arg url-single-positional url-multiple-arg url-multiple-positional url-wrong TUT=../../google-font-download # tests use bashisms, avoid failures on systems where dash is used @@ -79,7 +79,32 @@ font-style: $(V)mkdir -p $@ && (cd $@ && $(TUT) -f woff "Ubuntu:700italic" && grep "font-style: italic;" "font.css" >/dev/null); ret=$$?; rm -rf $@ && exit $$ret $(V)echo " OK" -url-arg: - $(V)echo "---> Testing download with a URL" +font-style-short: + $(V)echo "---> Testing short font style support" + $(V)mkdir -p $@ && (cd $@ && $(TUT) -f woff "Ubuntu:700i" && grep "font-style: italic;" "font.css" >/dev/null); ret=$$?; rm -rf $@ && exit $$ret + $(V)echo " OK" + +url-single-arg: + $(V)echo "---> Testing single font download with a URL argument" + $(V)mkdir -p $@ && (cd $@ && $(TUT) -f woff -u "https://fonts.google.com/?query=lora&selection.family=Roboto:300,400" && [ -f "Roboto_300.woff" ] && [ -f "Roboto_400.woff" ]); ret=$$?; rm -rf $@ && exit $$ret + $(V)echo " OK" + +url-single-positional: + $(V)echo "---> Testing single font download with a URL as a positional argument" + $(V)mkdir -p $@ && (cd $@ && $(TUT) -f woff "https://fonts.google.com/?query=lora&selection.family=Roboto:300,400" && [ -f "Roboto_300.woff" ] && [ -f "Roboto_400.woff" ]); ret=$$?; rm -rf $@ && exit $$ret + $(V)echo " OK" + +url-multiple-arg: + $(V)echo "---> Testing multiple font download with a URL argument" $(V)mkdir -p $@ && (cd $@ && $(TUT) -f woff -u "https://fonts.google.com/?query=lora&selection.family=Lora|Ubuntu|Roboto:300,400|Mukta+Malar" && [ -f "Ubuntu.woff" ] && [ -f "Lora.woff" ] && [ -f "Roboto_300.woff" ] && [ -f "Roboto_400.woff" ] && [ -f "Mukta_Malar.woff" ]); ret=$$?; rm -rf $@ && exit $$ret $(V)echo " OK" + +url-multiple-positional: + $(V)echo "---> Testing multiple font download with a URL as a positional argument" + $(V)mkdir -p $@ && (cd $@ && $(TUT) -f woff "https://fonts.google.com/?query=lora&selection.family=Lora|Ubuntu|Roboto:300,400|Mukta+Malar" && [ -f "Ubuntu.woff" ] && [ -f "Lora.woff" ] && [ -f "Roboto_300.woff" ] && [ -f "Roboto_400.woff" ] && [ -f "Mukta_Malar.woff" ]); ret=$$?; rm -rf $@ && exit $$ret + $(V)echo " OK" + +url-wrong: + $(V)echo "---> Testing that a wrong URL fails" + $(V)mkdir -p $@ && (cd $@ && $(TUT) -f woff "http://example.com"); ret=$$?; rm -rf $@ && [ "$$ret" = "2" ] && exit 0 || exit 2 + $(V)echo " OK"