-
-
Notifications
You must be signed in to change notification settings - Fork 186
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Update README.md * Top CDNs * caching queries part 1 * LH queries * caching sql part 2 * changes from review * comment
- Loading branch information
Showing
25 changed files
with
827 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,8 @@ | ||
# 2021 Caching queries | ||
|
||
<!-- | ||
This directory contains all of the 2021 Caching chapter queries. | ||
Resources: | ||
|
||
Each query should have a corresponding `metric_name.sql` file. | ||
Note that readers are linked to this directory, so try to make the SQL file names descriptive for easy browsing. | ||
Analysts: if helpful, you can use this README to give additional info about the queries. | ||
--> | ||
- [Planning doc](https://docs.google.com/document/d/1BSdljOv86CPAP5T0A9jcV6bU7NJp1f2mTFa4ZZ7xxe8/edit) | ||
- [Results sheet](https://docs.google.com/spreadsheets/d/1-v3yR0LZIC3t4zWtqTgR3jJsKjjRMP-HATU2caP8e2c/edit) | ||
- [2019 chapter](https://almanac.httparchive.org/en/2019/caching) | ||
- [2020 chapter](https://almanac.httparchive.org/en/2020/caching) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#standardSQL | ||
# Use of Cache-Control directives | ||
SELECT | ||
client, | ||
COUNT(0) AS total_requests, | ||
COUNTIF(uses_cache_control) AS total_using_cache_control, | ||
COUNTIF(uses_max_age) AS total_using_max_age, | ||
COUNTIF(uses_no_cache) AS total_using_no_cache, | ||
COUNTIF(uses_public) AS total_using_public, | ||
COUNTIF(uses_must_revalidate) AS total_using_must_revalidate, | ||
COUNTIF(uses_no_store) AS total_using_no_store, | ||
COUNTIF(uses_private) AS total_using_private, | ||
COUNTIF(uses_proxy_revalidate) AS total_using_proxy_revalidate, | ||
COUNTIF(uses_s_maxage) AS total_using_s_maxage, | ||
COUNTIF(uses_no_transform) AS total_using_no_transform, | ||
COUNTIF(uses_immutable) AS total_using_immutable, | ||
COUNTIF(uses_stale_while_revalidate) AS total_using_stale_while_revalidate, | ||
COUNTIF(uses_stale_if_error) AS total_using_stale_if_error, | ||
COUNTIF(uses_no_store AND uses_no_cache AND uses_max_age_zero) AS total_using_no_store_and_no_cache_and_max_age_zero, | ||
COUNTIF(uses_no_store AND uses_no_cache AND NOT uses_max_age_zero) AS total_using_no_store_and_no_cache_only, | ||
COUNTIF(uses_no_store AND NOT uses_no_cache AND NOT uses_max_age_zero) AS total_using_no_store_only, | ||
COUNTIF(uses_max_age_zero AND NOT uses_no_store) AS total_using_max_age_zero_without_no_store, | ||
COUNTIF(uses_pre_check_zero AND uses_post_check_zero) AS total_using_pre_check_zero_and_post_check_zero, | ||
COUNTIF(uses_pre_check_zero) AS total_using_pre_check_zero, | ||
COUNTIF(uses_post_check_zero) AS total_using_post_check_zero, | ||
COUNTIF(uses_cache_control AND NOT uses_max_age AND NOT uses_no_cache AND NOT uses_public AND NOT uses_must_revalidate AND NOT uses_no_store AND NOT uses_private AND NOT uses_proxy_revalidate AND NOT uses_s_maxage AND NOT uses_no_transform AND NOT uses_immutable AND NOT uses_stale_while_revalidate AND NOT uses_stale_if_error AND NOT uses_pre_check_zero AND NOT uses_post_check_zero) AS total_erroneous_directives, | ||
COUNTIF(uses_cache_control) / COUNT(0) AS pct_using_cache_control, | ||
COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age, | ||
COUNTIF(uses_no_cache) / COUNT(0) AS pct_using_no_cache, | ||
COUNTIF(uses_public) / COUNT(0) AS pct_using_public, | ||
COUNTIF(uses_must_revalidate) / COUNT(0) AS pct_using_must_revalidate, | ||
COUNTIF(uses_no_store) / COUNT(0) AS pct_using_no_store, | ||
COUNTIF(uses_private) / COUNT(0) AS pct_using_private, | ||
COUNTIF(uses_proxy_revalidate) / COUNT(0) AS pct_using_proxy_revalidate, | ||
COUNTIF(uses_s_maxage) / COUNT(0) AS pct_using_s_maxage, | ||
COUNTIF(uses_no_transform) / COUNT(0) AS pct_using_no_transform, | ||
COUNTIF(uses_immutable) / COUNT(0) AS pct_using_immutable, | ||
COUNTIF(uses_stale_while_revalidate) / COUNT(0) AS pct_using_stale_while_revalidate, | ||
COUNTIF(uses_stale_if_error) / COUNT(0) AS pct_using_stale_if_error, | ||
COUNTIF(uses_no_store AND uses_no_cache AND uses_max_age_zero) / COUNT(0) AS pct_using_no_store_and_no_cache_and_max_age_zero, | ||
COUNTIF(uses_no_store AND uses_no_cache AND NOT uses_max_age_zero) / COUNT(0) AS pct_using_no_store_and_no_cache_only, | ||
COUNTIF(uses_no_store AND NOT uses_no_cache AND NOT uses_max_age_zero) / COUNT(0) AS pct_using_no_store_only, | ||
COUNTIF(uses_max_age_zero AND NOT uses_no_store) / COUNT(0) AS pct_using_max_age_zero_without_no_store, | ||
COUNTIF(uses_pre_check_zero AND uses_post_check_zero) / COUNT(0) AS pct_using_pre_check_zero_and_post_check_zero, | ||
COUNTIF(uses_pre_check_zero) / COUNT(0) AS pct_using_pre_check_zero, | ||
COUNTIF(uses_post_check_zero) / COUNT(0) AS pct_using_post_check_zero, | ||
COUNTIF(uses_cache_control AND NOT uses_max_age AND NOT uses_no_cache AND NOT uses_public AND NOT uses_must_revalidate AND NOT uses_no_store AND NOT uses_private AND NOT uses_proxy_revalidate AND NOT uses_s_maxage AND NOT uses_no_transform AND NOT uses_immutable AND NOT uses_stale_while_revalidate AND NOT uses_stale_if_error AND NOT uses_pre_check_zero AND NOT uses_post_check_zero) / COUNT(0) AS pct_erroneous_directives | ||
FROM ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
TRIM(resp_cache_control) != "" AS uses_cache_control, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)max-age\s*=\s*[0-9]+') AS uses_max_age, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)max-age\s*=\s*0') AS uses_max_age_zero, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)public') AS uses_public, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)no-cache') AS uses_no_cache, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)must-revalidate') AS uses_must_revalidate, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)no-store') AS uses_no_store, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)private') AS uses_private, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)proxy-revalidate') AS uses_proxy_revalidate, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)s-maxage\s*=\s*[0-9]+') AS uses_s_maxage, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)no-transform') AS uses_no_transform, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)immutable') AS uses_immutable, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)stale-while-revalidate\s*=\s*[0-9]+') AS uses_stale_while_revalidate, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)stale-if-error\s*=\s*[0-9]+') AS uses_stale_if_error, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)pre-check\s*=\s*0') AS uses_pre_check_zero, | ||
REGEXP_CONTAINS(resp_cache_control, r'(?i)post-check\s*=\s*0') AS uses_post_check_zero | ||
FROM | ||
`httparchive.summary_requests.2021_07_01_*` | ||
) | ||
GROUP BY | ||
client |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#standardSQL | ||
# List of invalid Cache-Control directive names. | ||
SELECT | ||
client, | ||
total_directives, | ||
total_using_cache_control, | ||
directive_name, | ||
directive_occurrences, | ||
directive_occurrences / total_using_cache_control AS pct_of_cache_control, | ||
directive_occurrences / total_directives AS pct_of_total_directives | ||
FROM ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
directive_name, | ||
COUNT(0) AS directive_occurrences, | ||
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total_directives, | ||
SUM(COUNTIF(TRIM(resp_cache_control) != '')) OVER (PARTITION BY _TABLE_SUFFIX) AS total_using_cache_control | ||
FROM | ||
`httparchive.summary_requests.2021_07_01_*` | ||
LEFT JOIN | ||
UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_cache_control), r'([a-z][^,\s="\']*)')) AS directive_name | ||
GROUP BY | ||
client, | ||
directive_name) | ||
WHERE | ||
directive_name NOT IN ( | ||
'max-age', 'public', 'no-cache', 'must-revalidate', 'no-store', | ||
'private', 'proxy-revalidate', 's-maxage', 'no-transform', | ||
'immutable', 'stale-while-revalidate', 'stale-if-error', | ||
'pre-check', 'post-check') | ||
ORDER BY | ||
client, | ||
directive_occurrences DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#standardSQL | ||
# Difference between Cache TTL and the contents age | ||
CREATE TEMPORARY FUNCTION toTimestamp(date_string STRING) | ||
RETURNS INT64 LANGUAGE js AS ''' | ||
try { | ||
var timestamp = Math.round(new Date(date_string).getTime() / 1000); | ||
return isNaN(timestamp) || timestamp < 0 ? -1 : timestamp; | ||
} catch (e) { | ||
return null; | ||
} | ||
'''; | ||
|
||
SELECT | ||
client, | ||
percentile, | ||
APPROX_QUANTILES(diff_in_days, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS diff_in_days | ||
FROM ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
ROUND((expAge - (startedDateTime - toTimestamp(resp_last_modified))) / (60 * 60 * 24), 2) AS diff_in_days | ||
FROM | ||
`httparchive.summary_requests.2021_07_01_*` | ||
WHERE | ||
resp_last_modified <> "" AND | ||
expAge > 0), | ||
UNNEST([10, 25, 50, 75, 90]) AS percentile | ||
GROUP BY | ||
client, | ||
percentile | ||
ORDER BY | ||
client, | ||
percentile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#standardSQL | ||
# Distribution of cache TTL Lighthouse scores | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
JSON_EXTRACT_SCALAR(report, "$.audits.uses-long-cache-ttl.score") AS caching_score, | ||
COUNT(0) AS num_pages, | ||
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total, | ||
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS pct_pages | ||
FROM | ||
`httparchive.lighthouse.2021_07_01_*` | ||
GROUP BY | ||
client, | ||
caching_score | ||
ORDER BY | ||
client, | ||
caching_score ASC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#standardSQL | ||
# Distribution of bytes wasted (absence of adequate caching) from Lighthouse | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
ROUND(CAST(JSON_EXTRACT_SCALAR(report, "$.audits.uses-long-cache-ttl.details.summary.wastedBytes") AS NUMERIC) / 1024 / 1024) AS mbyte_savings, | ||
COUNT(0) AS num_pages, | ||
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total, | ||
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS pct_pages | ||
FROM | ||
`httparchive.lighthouse.2021_07_01_*` | ||
GROUP BY | ||
client, | ||
mbyte_savings | ||
ORDER BY | ||
client, | ||
mbyte_savings ASC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#standardSQL | ||
# Requests with a content age older than its TTL | ||
CREATE TEMPORARY FUNCTION toTimestamp(date_string STRING) | ||
RETURNS INT64 LANGUAGE js AS ''' | ||
try { | ||
var timestamp = Math.round(new Date(date_string).getTime() / 1000); | ||
return isNaN(timestamp) || timestamp < 0 ? -1 : timestamp; | ||
} catch (e) { | ||
return null; | ||
} | ||
'''; | ||
|
||
SELECT | ||
client, | ||
COUNT(0) AS total_req, | ||
COUNTIF(diff < 0) AS req_too_short_cache, | ||
COUNTIF(diff < 0) / COUNT(0) AS perc_req_too_short_cache | ||
FROM ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
expAge - (startedDateTime - toTimestamp(resp_last_modified)) AS diff | ||
FROM | ||
`httparchive.summary_requests.2021_07_01_*` | ||
WHERE | ||
resp_last_modified <> "" AND | ||
expAge > 0) | ||
GROUP BY | ||
client | ||
ORDER BY | ||
client |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#standardSQL | ||
# Difference between Cache TTL and the content age for third party request | ||
CREATE TEMPORARY FUNCTION toTimestamp(date_string STRING) | ||
RETURNS INT64 LANGUAGE js AS ''' | ||
try { | ||
var timestamp = Math.round(new Date(date_string).getTime() / 1000); | ||
return isNaN(timestamp) || timestamp < 0 ? -1 : timestamp; | ||
} catch (e) { | ||
return null; | ||
} | ||
'''; | ||
|
||
SELECT | ||
client, | ||
party, | ||
COUNT(0) AS total_req, | ||
COUNTIF(diff < 0) AS req_too_short_cache, | ||
COUNTIF(diff < 0) / COUNT(0) AS perc_req_too_short_cache | ||
FROM ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
IF(NET.HOST(url) IN ( | ||
SELECT domain FROM `httparchive.almanac.third_parties` WHERE date = '2021-07-01' AND category != 'hosting' | ||
), 'third party', 'first party') AS party, | ||
requests.expAge - (requests.startedDateTime - toTimestamp(requests.resp_last_modified)) AS diff | ||
FROM | ||
`httparchive.summary_requests.2021_07_01_*` requests | ||
WHERE | ||
TRIM(requests.resp_last_modified) <> "" AND | ||
expAge > 0) | ||
GROUP BY | ||
client, | ||
party | ||
ORDER BY | ||
client, | ||
party |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#standardSQL | ||
# Popularity of top Set-Cookie attributes/directives | ||
CREATE TEMPORARY FUNCTION getCookieAttributes(headers STRING) | ||
RETURNS ARRAY<STRING> DETERMINISTIC LANGUAGE js AS ''' | ||
try { | ||
var $ = JSON.parse(headers); | ||
return $.filter(header => { | ||
return header.name.toLowerCase() == 'set-cookie'; | ||
}).flatMap(header => { | ||
return Array.from(new Set(header.value.split(';').slice(1).map(attr => { | ||
return attr.trim().split('=')[0].trim(); | ||
}))); | ||
}); | ||
} catch (e) { | ||
return []; | ||
} | ||
'''; | ||
|
||
CREATE TEMPORARY FUNCTION countCookies(headers STRING) | ||
RETURNS INT64 DETERMINISTIC LANGUAGE js AS ''' | ||
try { | ||
var $ = JSON.parse(headers); | ||
return $.filter(header => { | ||
return header.name.toLowerCase() == 'set-cookie'; | ||
}).length; | ||
} catch (e) { | ||
return 0; | ||
} | ||
'''; | ||
|
||
SELECT | ||
client, | ||
attr.value AS attr, | ||
attr.count AS freq, | ||
total, | ||
attr.count / total AS pct | ||
FROM ( | ||
SELECT | ||
client, | ||
APPROX_TOP_COUNT(attr, 100) AS attrs | ||
FROM | ||
`httparchive.almanac.requests`, | ||
UNNEST(getCookieAttributes(response_headers)) AS attr | ||
WHERE | ||
date = '2021-07-01' | ||
GROUP BY | ||
client) | ||
JOIN ( | ||
SELECT | ||
client, | ||
SUM(countCookies(response_headers)) AS total | ||
FROM | ||
`httparchive.almanac.requests` | ||
GROUP BY | ||
client) | ||
USING | ||
(client), | ||
UNNEST(attrs) AS attr | ||
ORDER BY | ||
pct DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#standardSQL | ||
# Popularity of top Set-Cookie names | ||
CREATE TEMPORARY FUNCTION getCookies(headers STRING) | ||
RETURNS ARRAY<STRING> DETERMINISTIC LANGUAGE js AS ''' | ||
try { | ||
var $ = JSON.parse(headers); | ||
return $.filter(header => { | ||
return header.name.toLowerCase() == 'set-cookie'; | ||
}).map(header => { | ||
return header.value.split('=')[0].trim(); | ||
}); | ||
} catch (e) { | ||
return []; | ||
} | ||
'''; | ||
|
||
SELECT | ||
client, | ||
cookie.value AS cookie, | ||
cookie.count AS freq, | ||
total, | ||
cookie.count / total AS pct | ||
FROM ( | ||
SELECT | ||
client, | ||
APPROX_TOP_COUNT(cookie, 100) AS cookies, | ||
COUNT(0) AS total | ||
FROM | ||
`httparchive.almanac.requests`, | ||
UNNEST(getCookies(response_headers)) AS cookie | ||
WHERE | ||
date = '2021-07-01' | ||
GROUP BY | ||
client), | ||
UNNEST(cookies) AS cookie | ||
ORDER BY | ||
pct DESC |
Oops, something went wrong.