Skip to content

Commit

Permalink
update generate_known_tag.R to accomodate updated MDN sites
Browse files Browse the repository at this point in the history
  • Loading branch information
cpsievert committed Feb 23, 2022
1 parent 3419e2f commit 6ff1ad6
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 55 deletions.
20 changes: 11 additions & 9 deletions R/known_tags.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ known_tags <- c(
"code", # html
"col", # html
"colgroup", # html
"color-profile", # svg
"command", #
"color-profile", # deprecated
"command", # deprecated
"data", # html
"datalist", # html
"dd", # html
Expand All @@ -45,7 +45,7 @@ known_tags <- c(
"ellipse", # svg
"em", # html
"embed", # html
"eventsource", #
"eventsource", # deprecated
"feBlend", # svg
"feColorMatrix", # svg
"feComponentTransfer",# svg
Expand Down Expand Up @@ -89,7 +89,7 @@ known_tags <- c(
"hatchpath", # svg
"head", # html
"header", # html
"hgroup", # html
"hgroup", # deprecated
"hr", # html
"html", # html
"i", # html
Expand All @@ -99,7 +99,7 @@ known_tags <- c(
"input", # html
"ins", # html
"kbd", # html
"keygen", #
"keygen", # deprecated
"label", # html
"legend", # html
"li", # html
Expand All @@ -111,6 +111,7 @@ known_tags <- c(
"mark", # html
"marker", # svg
"mask", # svg
"math", # html
"menu", # html
"meta", # html
"metadata", # svg
Expand All @@ -130,15 +131,16 @@ known_tags <- c(
"picture", # html
"polygon", # svg
"polyline", # svg
"portal", # html
"pre", # html
"progress", # html
"q", # html
"radialGradient", # svg
"rb", # html
"rb", # deprecated
"rect", # svg
"rp", # html
"rt", # html
"rtc", # html
"rtc", # deprecated
"ruby", # html
"s", # html
"samp", # html
Expand All @@ -148,7 +150,7 @@ known_tags <- c(
"set", # svg
"slot", # html
"small", # html
"solidcolor", # svg
"solidcolor", # deprecated
"source", # html
"span", # html
"stop", # svg
Expand All @@ -157,7 +159,7 @@ known_tags <- c(
"sub", # html
"summary", # html
"sup", # html
"svg", # svg
"svg", # html svg
"switch", # svg
"symbol", # svg
"table", # html
Expand Down
133 changes: 88 additions & 45 deletions scripts/generate_known_tags.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,77 @@
## This script web scrapes two Mozilla websites for HTML and SVG tag elements.
## All HTML tags

library(rvest)
library(dplyr)

# Note: Mozilla seems to have a more up to date set of what is possible / not obsolete compared to W3 schools
base_url <- "https://developer.mozilla.org/en-US/docs/Web"

html_tag_dfs <- read_html(file.path(base_url, "HTML", "Element")) %>%
html_table()

# The last table is obsolete/deprecated elements
n_dfs <- length(html_tag_dfs)

html_tags_df <- html_tag_dfs[-n_dfs] %>%
bind_rows() %>%
# h1-h6 all appear in one comma-separated row
mutate(name = strsplit(Element, ", ")) %>%
tidyr::unnest(name) %>%
select(Element = name, Description) %>%
transmute(
name = sub("^<", "", sub(">$", "", Element)),
desc = paste(
Description, "Learn more at",
file.path(base_url, "HTML", "Element", name)
)
)

svg <- read_html(file.path(base_url, "SVG", "Element"))

# Due to a lack of structure on the SVG page,
# this seems to be the best way to target just
# the hyperlinks under the "SVG elements A to Z" section
svg_tags <- lapply(letters, function(x) {
html_elements(svg, sprintf("h3[id=%s] + div > ul > li > a", x)) %>%
html_attr("href") %>%
basename()
})

# TODO: evenetually it might be nice to also scrape
# the descriptions by following the url
svg_tags_df <- tibble(
name = unlist(svg_tags),
desc = sprintf(
"Creates the <%s> SVG element. Learn more at %s",
name, file.path(base_url, "SVG", "Element", name)
)
)

library(magrittr)


get_tags <- function(url, css) {
url %>%
httr::GET() %>%
httr::content() %>%
rvest::html_nodes(css) %>%
rvest::html_text() %>%
sub("^<", "", .) %>%
sub(">$", "", .) %>%
sort() %>%
unique() %>%
print()
}

## W3 Schools
## Mozilla seemed to have a more up to date set of what is possible / not obsolete
# w3html_tags <- get_tags("https://www.w3schools.com/tags/default.asp", "#htmltags tr td:first-child a:not(.notsupported)")
## Had extra tags not seen in other places `altGlyph`
# w3svg_tags <- get_tags("https://www.w3schools.com/graphics/svg_reference.asp", "#main td:first-child")

## W3 Standard
# # The original spec websites made it very hard to determine what was obsolete / shouldn't be used and what was to be used
# html_tags <- get_tags("https://www.w3.org/TR/2018/WD-html53-20181018/single-page.html", "dfn[data-dfn-type='element']")
# svg_tags <- get_tags("https://svgwg.org/svg2-draft/single-page.html", "dfn[data-dfn-type='element']")

# Save a JSON version so other languages can read them in easily
cat(
jsonlite::toJSON(html_tags_df),
file = "scripts/html_tags.json"
)

## Mozilla
# do not include the last section of obsolete tags
html_tags <- get_tags("https://developer.mozilla.org/en-US/docs/Web/HTML/Element", "article table:not(:last-child) td:first-child code")
# html_tags_obsolete <- get_tags("https://developer.mozilla.org/en-US/docs/Web/HTML/Element", "#content table:last-child td:first-child a")
cat(
jsonlite::toJSON(svg_tags_df),
file = "scripts/svg_tags.json"
)

# do not include tags that do not contain documentation articles
# Only pull from the index, as elements not in the index are considered obsolete. (ex: altGlyph or font-face)
svg_tags <- get_tags("https://developer.mozilla.org/en-US/docs/Web/SVG/Element", "article .index a:not([rel='nofollow']) code")
html_tags <- html_tags_df$name
svg_tags <- svg_tags_df$name


# Both SVG2 and HTML5
svg_tags[svg_tags %in% html_tags]
#> [1] "a" "script" "style" "svg" "title"


new_tags <- c(svg_tags, html_tags) %>%
unique() %>%
sort()

# Call using callr::r to avoid any devtools loaded htmltools::tags namespace issues
cran_tags <- callr::r(
Expand All @@ -54,30 +84,43 @@ cran_tags <- callr::r(
show = TRUE
)

new_tags <- c(svg_tags, html_tags) %>% unique() %>% sort()

# tags which should not HTML5 / SVG2 supported
setdiff(cran_tags, new_tags)
#> "command" "eventsource" "keygen"
#> [1] "color-profile" "command" "eventsource" "hgroup"
#> [5] "keygen" "rb" "rtc" "solidcolor"


# New HTML5 tags
setdiff(html_tags, cran_tags)
#> "rb" "rtc" "slot"
#> "portal" "math"

# New SVG2 tags
setdiff(svg_tags, cran_tags)
### ...basically all svg tags
#> character(0)

# combine old and new tags so that old tags are not lost
save_tags <- c(new_tags, cran_tags) %>% unique() %>% sort()

# Save a JSON version so other languages can read them in easily
cat(jsonlite::toJSON(save_tags), file = "scripts/known_tags.json")
save_tags <- c(new_tags, cran_tags) %>%
unique() %>%
sort()

save_line <- paste0(
format(paste0(" \"", save_tags, "\"", ifelse(seq_along(save_tags) == length(save_tags), "", ",")), justify = "left"), "#",
ifelse(save_tags %in% html_tags, " html", " "),
ifelse(save_tags %in% svg_tags, " svg", "")
format(
paste0(
" \"", save_tags, "\"",
ifelse(
seq_along(save_tags) == length(save_tags),
"", ","
)
),
justify = "left"
),
"#",
case_when(
save_tags %in% html_tags & save_tags %in% svg_tags ~ " html svg",
save_tags %in% html_tags ~ " html",
save_tags %in% svg_tags ~ " svg",
TRUE ~ " deprecated"
)
) %>%
sub("\\s+$", "", .)
cat(
Expand Down
1 change: 1 addition & 0 deletions scripts/html_tags.json

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion scripts/known_tags.json

This file was deleted.

1 change: 1 addition & 0 deletions scripts/svg_tags.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"name":"a","desc":"Creates the <a> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/a"},{"name":"animate","desc":"Creates the <animate> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/animate"},{"name":"animateMotion","desc":"Creates the <animateMotion> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/animateMotion"},{"name":"animateTransform","desc":"Creates the <animateTransform> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/animateTransform"},{"name":"circle","desc":"Creates the <circle> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/circle"},{"name":"clipPath","desc":"Creates the <clipPath> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/clipPath"},{"name":"defs","desc":"Creates the <defs> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/defs"},{"name":"desc","desc":"Creates the <desc> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/desc"},{"name":"discard","desc":"Creates the <discard> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/discard"},{"name":"ellipse","desc":"Creates the <ellipse> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/ellipse"},{"name":"feBlend","desc":"Creates the <feBlend> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feBlend"},{"name":"feColorMatrix","desc":"Creates the <feColorMatrix> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feColorMatrix"},{"name":"feComponentTransfer","desc":"Creates the <feComponentTransfer> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feComponentTransfer"},{"name":"feComposite","desc":"Creates the <feComposite> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feComposite"},{"name":"feConvolveMatrix","desc":"Creates the <feConvolveMatrix> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feConvolveMatrix"},{"name":"feDiffuseLighting","desc":"Creates the <feDiffuseLighting> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feDiffuseLighting"},{"name":"feDisplacementMap","desc":"Creates the <feDisplacementMap> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feDisplacementMap"},{"name":"feDistantLight","desc":"Creates the <feDistantLight> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feDistantLight"},{"name":"feDropShadow","desc":"Creates the <feDropShadow> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feDropShadow"},{"name":"feFlood","desc":"Creates the <feFlood> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFlood"},{"name":"feFuncA","desc":"Creates the <feFuncA> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFuncA"},{"name":"feFuncB","desc":"Creates the <feFuncB> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFuncB"},{"name":"feFuncG","desc":"Creates the <feFuncG> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFuncG"},{"name":"feFuncR","desc":"Creates the <feFuncR> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFuncR"},{"name":"feGaussianBlur","desc":"Creates the <feGaussianBlur> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feGaussianBlur"},{"name":"feImage","desc":"Creates the <feImage> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feImage"},{"name":"feMerge","desc":"Creates the <feMerge> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feMerge"},{"name":"feMergeNode","desc":"Creates the <feMergeNode> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feMergeNode"},{"name":"feMorphology","desc":"Creates the <feMorphology> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feMorphology"},{"name":"feOffset","desc":"Creates the <feOffset> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feOffset"},{"name":"fePointLight","desc":"Creates the <fePointLight> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/fePointLight"},{"name":"feSpecularLighting","desc":"Creates the <feSpecularLighting> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feSpecularLighting"},{"name":"feSpotLight","desc":"Creates the <feSpotLight> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feSpotLight"},{"name":"feTile","desc":"Creates the <feTile> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feTile"},{"name":"feTurbulence","desc":"Creates the <feTurbulence> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feTurbulence"},{"name":"filter","desc":"Creates the <filter> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/filter"},{"name":"foreignObject","desc":"Creates the <foreignObject> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/foreignObject"},{"name":"g","desc":"Creates the <g> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/g"},{"name":"hatch","desc":"Creates the <hatch> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/hatch"},{"name":"hatchpath","desc":"Creates the <hatchpath> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/hatchpath"},{"name":"image","desc":"Creates the <image> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/image"},{"name":"line","desc":"Creates the <line> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/line"},{"name":"linearGradient","desc":"Creates the <linearGradient> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/linearGradient"},{"name":"marker","desc":"Creates the <marker> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/marker"},{"name":"mask","desc":"Creates the <mask> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/mask"},{"name":"metadata","desc":"Creates the <metadata> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/metadata"},{"name":"mpath","desc":"Creates the <mpath> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/mpath"},{"name":"path","desc":"Creates the <path> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/path"},{"name":"pattern","desc":"Creates the <pattern> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/pattern"},{"name":"polygon","desc":"Creates the <polygon> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/polygon"},{"name":"polyline","desc":"Creates the <polyline> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/polyline"},{"name":"radialGradient","desc":"Creates the <radialGradient> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/radialGradient"},{"name":"rect","desc":"Creates the <rect> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/rect"},{"name":"script","desc":"Creates the <script> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/script"},{"name":"set","desc":"Creates the <set> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/set"},{"name":"stop","desc":"Creates the <stop> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/stop"},{"name":"style","desc":"Creates the <style> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/style"},{"name":"svg","desc":"Creates the <svg> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/svg"},{"name":"switch","desc":"Creates the <switch> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/switch"},{"name":"symbol","desc":"Creates the <symbol> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/symbol"},{"name":"text","desc":"Creates the <text> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/text"},{"name":"textPath","desc":"Creates the <textPath> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/textPath"},{"name":"title","desc":"Creates the <title> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/title"},{"name":"tspan","desc":"Creates the <tspan> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/tspan"},{"name":"use","desc":"Creates the <use> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/use"},{"name":"view","desc":"Creates the <view> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/view"}]

0 comments on commit 6ff1ad6

Please sign in to comment.