Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTTP 2022 additional queries #3081

Merged
merged 5 commits into from
Aug 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions sql/2022/http/h2_adoption_each_cdn_breakdown.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#standardSQL

# Percentage of requests using HTTP/2+ vs HTTP/1.1 broken down by whether the
# request was served from CDN.

SELECT
client,
_cdn_provider,
CASE
WHEN LOWER(protocol) = 'quic' OR LOWER(protocol) LIKE 'h3%' THEN 'HTTP/2+'
WHEN LOWER(protocol) = 'http/2' OR LOWER(protocol) = 'http/3' THEN 'HTTP/2+'
WHEN protocol IS NULL THEN 'Unknown'
ELSE UPPER(protocol)
END AS http_version_category,
COUNT(0) AS num_reqs,
SUM(COUNT(0)) OVER (PARTITION BY client, _cdn_provider) AS total_reqs,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client, _cdn_provider) AS pct_reqs
FROM
`httparchive.almanac.requests`
WHERE
date = '2022-06-01' AND
LENGTH(_cdn_provider) > 0
GROUP BY
client,
_cdn_provider,
http_version_category
ORDER BY
client ASC,
num_reqs DESC
57 changes: 57 additions & 0 deletions sql/2022/http/protocol_advertised_via_altsvc.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#standardSQL
# Protocol advertised via alt-svc breakdown
CREATE TEMPORARY FUNCTION extractHTTPHeader(HTTPheaders STRING, header STRING)
RETURNS ARRAY<STRING> LANGUAGE js AS """
try {
var headers = JSON.parse(HTTPheaders);

// Filter by header name (which is case insensitive)
// If multiple headers it's the same as comma separated
const result = [];
const allAltSvcHeaderValues = headers.filter(h => h.name.toLowerCase() == header.toLowerCase()).map(h => h.value);
for (let altsvcHeaderValue of allAltSvcHeaderValues) {
const splittedAltSvcHeaderValue = altsvcHeaderValue.split(",");
for (let altsvcToken of splittedAltSvcHeaderValue) {
const protocolPortToken = altsvcToken.trim().split(";")[0];
const protocolToken = protocolPortToken.split("=")[0];
result.push(protocolToken);
}
}
return result;
} catch (e) {
return [];
}
""";

WITH altsvcTable AS (
SELECT
client,
url,
extractHTTPHeader(response_headers, 'alt-svc') AS protocol
FROM
`httparchive.almanac.requests`
WHERE
date = '2022-06-01' AND
firstHtml
)
SELECT
client,
protocol,
COUNT(0) AS protocol_count,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total_advertised,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_advertised
FROM
(
SELECT
client,
url,
flattened_protocol AS protocol
FROM
altsvcTable, altsvcTable.protocol AS flattened_protocol
)
GROUP BY
client,
protocol
ORDER BY
client ASC,
total_advertised DESC
35 changes: 35 additions & 0 deletions sql/2022/http/sites_using_link_preload.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#standardSQL

# Number of <link rel="preload">.

CREATE TEMPORARY FUNCTION getNumLinkRelPreload(payload STRING)
RETURNS INT LANGUAGE js AS """
try {
const $ = JSON.parse(payload)
const almanac = JSON.parse($._almanac);
const link_preload_nodes = almanac['link-nodes']['nodes'].filter(link_node => link_node['rel'] === 'preload')
return link_preload_nodes.length;
} catch (e) {
return -1;
}
""";

SELECT
client,
percentile,
APPROX_QUANTILES(num_link_rel_preload, 1000)[OFFSET(percentile * 10)] AS num_percentiles
FROM (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
getNumLinkRelPreload(payload) AS num_link_rel_preload
FROM
`httparchive.pages.2022_06_01_*`
),
UNNEST([10, 25, 50, 75, 90, 95, 100]) AS percentile
GROUP BY
client,
percentile
ORDER BY
client,
percentile
16 changes: 16 additions & 0 deletions sql/2022/http/sites_with_altsvc.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#standardSQL
# Fraction of sites with alt-svc header
SELECT
client,
COUNTIF(CONTAINS_SUBSTR(response_headers, 'alt-svc')) AS sites_with_altsvc,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNTIF(CONTAINS_SUBSTR(response_headers, 'alt-svc')) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_sites_with_altsvc
FROM
`httparchive.almanac.requests`
WHERE
date = '2022-06-01' AND
firstHtml
GROUP BY
client
ORDER BY
pct_sites_with_altsvc DESC