Skip to content

Commit

Permalink
unsafe html should be a breaking flaw (mdn#3192)
Browse files Browse the repository at this point in the history
* Unsafe HTML should be a breaking flaw

Part of mdn#3177

* tests

* undo unnecessary if statement

* restore newline

* Update testing/content/files/en-us/web/unsafe_html/index.html

* Update testing/content/files/en-us/web/unsafe_html/index.html

Co-authored-by: Ryan Johnson <escattone@gmail.com>
  • Loading branch information
peterbe and escattone committed Jun 1, 2021
1 parent cd85799 commit 96d2786
Show file tree
Hide file tree
Showing 9 changed files with 226 additions and 83 deletions.
1 change: 1 addition & 0 deletions build/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const VALID_FLAW_CHECKS = new Set([
"bad_pre_tags",
"sectioning",
"heading_links",
"unsafe_html",
]);

// TODO (far future): Switch to "error" when number of flaws drops.
Expand Down
198 changes: 132 additions & 66 deletions build/flaws.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@ const imageminSvgo = require("imagemin-svgo");
const sanitizeFilename = require("sanitize-filename");

const { Archive, Document, Redirect, Image } = require("../content");
const { FLAW_LEVELS } = require("./constants");
const { FLAW_LEVELS, VALID_FLAW_CHECKS } = require("./constants");
const {
INTERACTIVE_EXAMPLES_BASE_URL,
LIVE_SAMPLES_BASE_URL,
} = require("../kumascript/src/constants");
const { packageBCD } = require("./resolve-bcd");
const {
findMatchesInText,
Expand All @@ -22,26 +26,134 @@ const {
const { humanFileSize } = require("./utils");
const { VALID_MIME_TYPES } = require("../filecheck/constants");

function injectFlaws(doc, $, options, { rawContent }) {
function injectFlaws(doc, $, options, document) {
if (doc.isArchive) return;

injectBrokenLinksFlaws(
options.flawLevels.get("broken_links"),
doc,
$,
rawContent
);
const flawChecks = [
["unsafe_html", injectUnsafeHTMLFlaws, false],
["broken_links", injectBrokenLinksFlaws, true],
["bad_bcd_queries", injectBadBCDQueriesFlaws, false],
["bad_pre_tags", injectPreTagFlaws, false],
["heading_links", injectHeadingLinksFlaws, false],
];

// Note that some flaw checking functions need to always run. Even if we're not
// recording the flaws, the checks that it does are important for regular
// building.

for (const [flawName, func, alwaysRun] of flawChecks) {
// Sanity check the list of flaw names that they're all recognized.
// Basically a cheap enum check.
if (!VALID_FLAW_CHECKS.has(flawName)) {
throw new Error(`'${flawName}' is not a valid flaw check name`);
}

injectBadBCDQueriesFlaws(options.flawLevels.get("bad_bcd_queries"), doc, $);
const level = options.flawLevels.get(flawName);
if (!alwaysRun && level === FLAW_LEVELS.IGNORE) {
continue;
}

injectPreTagFlaws(options.flawLevels.get("bad_pre_tags"), doc, $, rawContent);
// The flaw injection function will mutate the `doc.flaws` object.
func(doc, $, document, level);

injectHeadingLinksFlaws(
options.flawLevels.get("heading_links"),
doc,
$,
rawContent
);
if (
level === FLAW_LEVELS.ERROR &&
doc.flaws[flawName] &&
doc.flaws[flawName].length > 0
) {
throw new Error(
`${flawName} flaws: ${doc.flaws[flawName].map((f) => f.explanation)}`
);
}
}
}

function injectUnsafeHTMLFlaws(doc, $, { rawContent }) {
function addFlaw(element, explanation) {
if (!("unsafe_html" in doc.flaws)) {
doc.flaws.unsafe_html = [];
}
const id = `unsafe_html${doc.flaws.unsafe_html.length + 1}`;
let html = $.html($(element));
$(element).replaceWith($("<code>").addClass("unsafe-html").text(html));
// Some nasty tags are so broken they can make the HTML become more or less
// the whole page. E.g. `<script\x20type="text/javascript">`.
if (html.length > 100) {
html = html.slice(0, Math.min(html.indexOf("\n"), 100)) + "…";
}
// Perhaps in the future we can make it possibly fixable to delete it.
const fixable = false;
const suggestion = null;

const flaw = {
explanation,
id,
fixable,
html,
suggestion,
};
for (const { line, column } of findMatchesInText(html, rawContent)) {
// This might not find anything because the HTML might have mutated
// slightly because of how cheerio parses it. But it doesn't hurt to try.
flaw.line = line;
flaw.column = column;
}

doc.flaws.unsafe_html.push(flaw);
}

const safeIFrameSrcs = [
LIVE_SAMPLES_BASE_URL.toLowerCase(),
INTERACTIVE_EXAMPLES_BASE_URL.toLowerCase(),
// EmbedGHLiveSample.ejs
"https://mdn.github.io",
// EmbedYouTube.ejs
"https://www.youtube-nocookie.com",
// JSFiddleEmbed.ejs
"https://jsfiddle.net",
// EmbedTest262ReportResultsTable.ejs
"https://test262.report",
];

$("script, embed, object, iframe").each((i, element) => {
const { tagName } = element;
if (tagName === "iframe") {
// For iframes we only check the 'src' value
const src = $(element).attr("src");
if (!safeIFrameSrcs.find((s) => src.toLowerCase().startsWith(s))) {
addFlaw(element, `Unsafe <iframe> 'src' value (${src})`);
}
} else {
addFlaw(element, `<${tagName}> tag found`);
}
});

$("*").each((i, element) => {
const { tagName } = element;
// E.g. `<script\x20type="text/javascript">javascript:alert(1);</script>`
if (tagName.startsWith("script")) {
addFlaw(element, `possible <script> tag`);
}

const checkValueAttributes = new Set(["style", "href"]);
for (const key in element.attribs) {
// No need to lowercase the `key` because it's already always lowercased
// by cheerio.
// This regex will match on `\xa0onload` and `onmousover` but
// not `fond` or `stompon`.
if (/(\\x[a-f0-9]{2}|\b)on\w+/.test(key)) {
addFlaw(element, `'${key}' on-handler found in ${tagName}`);
} else if (checkValueAttributes.has(key)) {
const value = element.attribs[key];
if (value && /(^|\\x[a-f0-9]{2})javascript:/i.test(value)) {
addFlaw(
element,
`'javascript:' expression found inside 'style' attribute in ${tagName}`
);
}
}
}
});
}

function injectSectionFlaws(doc, flaws, options) {
Expand All @@ -62,7 +174,7 @@ function injectSectionFlaws(doc, flaws, options) {

// The 'broken_links' flaw check looks for internal links that
// link to a document that's going to fail with a 404 Not Found.
function injectBrokenLinksFlaws(level, doc, $, rawContent) {
function injectBrokenLinksFlaws(doc, $, { rawContent }, level) {
// This is needed because the same href can occur multiple time.
// For example:
// <a href="/foo/bar">
Expand Down Expand Up @@ -219,15 +331,6 @@ function injectBrokenLinksFlaws(level, doc, $, rawContent) {
}
}
});
if (
level === FLAW_LEVELS.ERROR &&
doc.flaws.broken_links &&
doc.flaws.broken_links.length
) {
throw new Error(
`broken_links flaws: ${doc.flaws.broken_links.map(JSON.stringify)}`
);
}
}

// Bad BCD queries are when the `<div class="bc-data">` tags have an
Expand All @@ -236,9 +339,7 @@ function injectBrokenLinksFlaws(level, doc, $, rawContent) {
//
// <div class="bc-data" id="bcd:never.ever.heard.of">
//
function injectBadBCDQueriesFlaws(level, doc, $) {
if (level === FLAW_LEVELS.IGNORE) return;

function injectBadBCDQueriesFlaws(doc, $) {
$("div.bc-data").each((i, element) => {
const dataQuery = $(element).attr("id");
if (!dataQuery) {
Expand All @@ -265,22 +366,9 @@ function injectBadBCDQueriesFlaws(level, doc, $) {
}
}
});
if (
level === FLAW_LEVELS.ERROR &&
doc.flaws.bad_bcd_queries &&
doc.flaws.bad_bcd_queries.length
) {
throw new Error(
`bad_bcd_queries flaws: ${doc.flaws.bad_bcd_queries.map(
(f) => f.explanation
)}`
);
}
}

function injectPreTagFlaws(level, doc, $, rawContent) {
if (level === FLAW_LEVELS.IGNORE) return;

function injectPreTagFlaws(doc, $, { rawContent }) {
function escapeHTML(s) {
return s
.replace(/&/g, "&amp;")
Expand Down Expand Up @@ -393,26 +481,14 @@ function injectPreTagFlaws(level, doc, $, rawContent) {
if (noFixablePreTagFlawsYet()) {
// another flaw check here
}

if (
level === FLAW_LEVELS.ERROR &&
doc.flaws.bad_pre_tags &&
doc.flaws.bad_pre_tags.length
) {
throw new Error(
`bad_pre_tags flaws: ${doc.flaws.bad_pre_tags.map(JSON.stringify)}`
);
}
}

// You're not allowed to have `<a>` elements inside `<h2>` or `<h3>` elements
// because those will be rendered out as "links to themselves".
// I.e. a source of `<h2 id="foo">Foo</h2>` renders out as:
// `<h2 id="foo"><a href="#foo">Foo</a></h2>` in the final HTML. That makes
// it easy to (perma)link to specific headings in the document.
function injectHeadingLinksFlaws(level, doc, $, rawContent) {
if (level === FLAW_LEVELS.IGNORE) return;

function injectHeadingLinksFlaws(doc, $, { rawContent }) {
function addFlaw($heading) {
if (!("heading_links" in doc.flaws)) {
doc.flaws.heading_links = [];
Expand Down Expand Up @@ -464,16 +540,6 @@ function injectHeadingLinksFlaws(level, doc, $, rawContent) {
$("h2 a, h3 a").each((i, element) => {
addFlaw($(element).parent());
});

if (
level === FLAW_LEVELS.ERROR &&
doc.flaws.heading_links &&
doc.flaws.heading_links.length
) {
throw new Error(
`heading_links flaws: ${doc.flaws.heading_links.map(JSON.stringify)}`
);
}
}

async function fixFixableFlaws(doc, options, document) {
Expand Down
32 changes: 32 additions & 0 deletions client/src/document/toolbar/flaws.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
BadPreTagFlaw,
SectioningFlaw,
HeadingLinksFlaw,
UnsafeHTMLFlaw,
} from "../types";
import "./flaws.scss";

Expand Down Expand Up @@ -271,6 +272,10 @@ function Flaws({
flaws={doc.flaws.heading_links}
/>
);
case "unsafe_html":
return (
<UnsafeHTML key="unsafe_html" flaws={doc.flaws.unsafe_html} />
);
case "sectioning":
return <Sectioning key="sectioning" flaws={doc.flaws.sectioning} />;
default:
Expand Down Expand Up @@ -976,3 +981,30 @@ function HeadingLinks({
</div>
);
}

function UnsafeHTML({ flaws }: { flaws: UnsafeHTMLFlaw[] }) {
// The UI for this flaw can be a bit "simplistic" because by default this
// flaw will error rather than warn.
return (
<div className="flaw">
<h3>⚠️ {humanizeFlawName("unsafe_html")} ⚠️</h3>
<ul>
{flaws.map((flaw, i) => {
const key = flaw.id;
return (
<li key={key}>
<b>{flaw.explanation}</b>{" "}
{flaw.line && flaw.column && (
<>
line {flaw.line}:{flaw.column}
</>
)}{" "}
{flaw.fixable && <FixableFlawBadge />} <br />
<b>HTML:</b> <pre className="example-bad">{flaw.html}</pre> <br />
</li>
);
})}
</ul>
</div>
);
}
7 changes: 7 additions & 0 deletions client/src/document/types.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ export interface HeadingLinksFlaw extends GenericFlaw {
column: number | null;
}

export interface UnsafeHTMLFlaw extends GenericFlaw {
html: string;
line: number | null;
column: number | null;
}

export interface MacroErrorMessage extends GenericFlaw {
name: string;
error: {
Expand All @@ -85,6 +91,7 @@ type Flaws = {
sectioning: SectioningFlaw[];
image_widths: ImageWidthFlaw[];
heading_links: HeadingLinksFlaw[];
unsafe_html: UnsafeHTMLFlaw[];
};

export type Translation = {
Expand Down
1 change: 1 addition & 0 deletions client/src/flaw-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export function humanizeFlawName(name) {
bad_bcd_queries: "Bad BCD queries",
bad_bcd_links: "Bad BCD links",
bad_pre_tags: "Bad <pre> tags",
unsafe_html: "Unsafe HTML",
};
function fallback() {
return name.charAt(0).toUpperCase() + name.slice(1).replace(/_/g, " ");
Expand Down
16 changes: 0 additions & 16 deletions kumascript/src/api/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,6 @@ class HTMLTool {
return this;
}

removeOnEventHandlers() {
// Remove ALL on-event handlers.
this.$("*").each((i, e) => {
// Since "e.attribs" is an object with a "null"
// prototype, "key in e.attribs" is equivalent to
// "key of Object.keys(e.attribs)" since we don't
// have to worry about keys from the prototype.
for (const key in e.attribs) {
if (key.startsWith("on")) {
delete e.attribs[key];
}
}
});
return this;
}

injectSectionIDs() {
let idCount = 0;
const $ = this.$;
Expand Down
1 change: 0 additions & 1 deletion kumascript/src/api/wiki.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ module.exports = {
// First, we need to inject section ID's since the section
// extraction often depends on them.
tool.injectSectionIDs();
tool.removeOnEventHandlers();
tool.removeNoIncludes();

if (section) {
Expand Down
Loading

0 comments on commit 96d2786

Please sign in to comment.