tc39 · bakkot · May 27, 2020 · May 24, 2020 · May 25, 2020 · May 26, 2020
diff --git a/src/lint/collect-header-diagnostics.ts b/src/lint/collect-header-diagnostics.ts
@@ -1,6 +1,6 @@
 import type { LintingError } from './algorithm-error-reporter-type';
 
-import { getLocation, indexWithinElementToTrueLocation } from './utils';
+import { getLocation, offsetWithinElementToTrueLocation } from './utils';
 
 const ruleId = 'header-format';
 
@@ -19,7 +19,7 @@ export function collectHeaderDiagnostics(
     let params = contents.substring(contents.indexOf('(') + 1, contents.length - 1);
 
     if (!/[\S] $/.test(name)) {
-      let { line, column } = indexWithinElementToTrueLocation(
+      let { line, column } = offsetWithinElementToTrueLocation(
         getLocation(dom, element),
         contents,
         name.length - 1
@@ -57,7 +57,7 @@ export function collectHeaderDiagnostics(
     ].some(r => r.test(name));
 
     if (!nameMatches) {
-      let { line, column } = indexWithinElementToTrueLocation(
+      let { line, column } = offsetWithinElementToTrueLocation(
         getLocation(dom, element),
         contents,
         0
@@ -94,7 +94,7 @@ export function collectHeaderDiagnostics(
       ].some(r => r.test(params));
 
     if (!paramsMatches) {
-      let { line, column } = indexWithinElementToTrueLocation(
+      let { line, column } = offsetWithinElementToTrueLocation(
         getLocation(dom, element),
         contents,
         name.length

diff --git a/src/lint/collect-spelling-diagnostics.ts b/src/lint/collect-spelling-diagnostics.ts
@@ -0,0 +1,64 @@
+import type { LintingError } from './algorithm-error-reporter-type';
+
+import { offsetToLineAndColumn } from './utils';
+
+let ruleId = 'spelling';
+
+// Note that these will be composed, so cannot contain backreferences
+let matchers = [
+  {
+    pattern: /\*this\* object/giu,
+    message: 'Prefer "*this* value"',
+  },
+  {
+    pattern: /1's complement/giu,
+    message: 'Prefer "one\'s complement"',
+  },
+  {
+    pattern: /2's complement/giu,
+    message: 'Prefer "two\'s complement"',
+  },
+  {
+    pattern: /\*0\*/gu,
+    message: 'The Number value 0 should be written "*+0*", to unambiguously exclude "*-0*"',
+  },
+  {
+    pattern: /behavior/giu,
+    message: 'ECMA-262 uses Oxford spelling ("behaviour")',
+  },
+  {
+    pattern: /[Tt]he empty string/gu,
+    message: 'Prefer "the empty String"',
+  },
+];
+
+export function collectSpellingDiagnostics(sourceText: string) {
+  let composed = new RegExp(matchers.map(m => `(?:${m.pattern.source})`).join('|'), 'u');
+
+  // The usual case will be to have no errors, so we have a fast path for that case.
+  // We only fall back to slower individual tests if there is at least one error.
+  if (composed.test(sourceText)) {
+    let errors: LintingError[] = [];
+    for (let { pattern, message } of matchers) {
+      let match = pattern.exec(sourceText);
+      while (match !== null) {
+        let { line, column } = offsetToLineAndColumn(sourceText, match.index);
+        errors.push({
+          ruleId,
+          nodeType: 'text',
+          line,
+          column,
+          message,
+        });
+        match = pattern.exec(sourceText);
+      }
+    }
+    if (errors.length === 0) {
+      throw new Error(
+        'Ecmarkup has a bug: the spell checker reported an error, but could not find one. Please report this at https://github.com/tc39/ecmarkup/issues/new.'
+      );
+    }
+    return errors;
+  }
+  return [];
+}
diff --git a/src/lint/lint.ts b/src/lint/lint.ts
@@ -2,6 +2,7 @@ import { emit } from 'ecmarkdown';
 
 import { collectNodes } from './collect-nodes';
 import { collectGrammarDiagnostics } from './collect-grammar-diagnostics';
+import { collectSpellingDiagnostics } from './collect-spelling-diagnostics';
 import { collectAlgorithmDiagnostics } from './collect-algorithm-diagnostics';
 import { collectHeaderDiagnostics } from './collect-header-diagnostics';
 import type { Reporter } from './algorithm-error-reporter-type';
@@ -12,6 +13,8 @@ Currently this checks
 - the productions in the definition of each early error and SDO are defined in the main grammar
 - those productions do not include `[no LineTerminator here]` restrictions or `[+flag]` gating
 - the algorithm linting rules imported above
+- headers of abstract operations have consistent spacing
+- certain common spelling errors
 
 There's more to do:
 https://github.com/tc39/ecmarkup/issues/173
@@ -35,7 +38,10 @@ export function lint(report: Reporter, sourceText: string, dom: any, document: D
 
   lintingErrors.push(...collectHeaderDiagnostics(dom, headers));
 
+  lintingErrors.push(...collectSpellingDiagnostics(sourceText));
+
   if (lintingErrors.length > 0) {
+    lintingErrors.sort((a, b) => (a.line === b.line ? a.column - b.column : a.line - b.line));
     report(lintingErrors, sourceText);
     return;
   }

diff --git a/src/lint/utils.ts b/src/lint/utils.ts
@@ -14,30 +14,37 @@ import type {
 
 import { Grammar as GrammarFile, SyntaxKind } from 'grammarkdown';
 
-export function indexWithinElementToTrueLocation(
-  elementLoc: ReturnType<typeof getLocation>,
-  string: string,
-  index: number
-) {
-  let headerLines = string.split('\n');
-  let headerLine = 0;
+export function offsetToLineAndColumn(string: string, offset: number) {
+  let lines = string.split('\n');
+  let line = 0;
   let seen = 0;
   while (true) {
-    if (seen + headerLines[headerLine].length >= index) {
+    if (seen + lines[line].length >= offset) {
       break;
     }
-    seen += headerLines[headerLine].length + 1; // +1 for the '\n'
-    ++headerLine;
+    seen += lines[line].length + 1; // +1 for the '\n'
+    ++line;
   }
-  let headerColumn = index - seen;
+  let column = offset - seen;
+  return { line: line + 1, column: column + 1 };
+}
+
+export function offsetWithinElementToTrueLocation(
+  elementLoc: ReturnType<typeof getLocation>,
+  string: string,
+  offset: number
+) {
+  let { line: offsetLine, column: offsetColumn } = offsetToLineAndColumn(string, offset);
 
-  let line = elementLoc.startTag.line + headerLine;
+  // both JSDOM and our line/column are 1-based, so subtract 1 to avoid double-counting
+  let line = elementLoc.startTag.line + offsetLine - 1;
   let column =
-    headerLine === 0
+    offsetLine === 1
       ? elementLoc.startTag.col +
         (elementLoc.startTag.endOffset - elementLoc.startTag.startOffset) +
-        headerColumn
-      : headerColumn + 1;
+        offsetColumn -
+        1
+      : offsetColumn;
 
   return { line, column };
 }

diff --git a/test/lint-spelling.js b/test/lint-spelling.js
@@ -0,0 +1,97 @@
+'use strict';
+
+let { assertLint, assertLintFree, positioned, lintLocationMarker: M } = require('./lint-helpers');
+
+describe('spelling', function () {
+  it('*this* object', async function () {
+    await assertLint(
+      positioned`
+        <p>If the ${M}*this* object ...</p>
+      `,
+      {
+        ruleId: 'spelling',
+        nodeType: 'text',
+        message: 'Prefer "*this* value"',
+      }
+    );
+  });
+
+  it("1's complement", async function () {
+    await assertLint(
+      positioned`
+        <p>It returns the ${M}1's complement of _x_.</p>
+      `,
+      {
+        ruleId: 'spelling',
+        nodeType: 'text',
+        message: 'Prefer "one\'s complement"',
+      }
+    );
+  });
+
+  it("2's complement", async function () {
+    await assertLint(
+      positioned`
+        <p>BigInts act as ${M}2's complement binary strings</p>
+      `,
+      {
+        ruleId: 'spelling',
+        nodeType: 'text',
+        message: 'Prefer "two\'s complement"',
+      }
+    );
+  });
+
+  it('*0*', async function () {
+    await assertLint(
+      positioned`
+        <emu-alg>1. If _x_ is ${M}*0*, then foo.</emu-alg>
+      `,
+      {
+        ruleId: 'spelling',
+        nodeType: 'text',
+        message: 'The Number value 0 should be written "*+0*", to unambiguously exclude "*-0*"',
+      }
+    );
+  });
+
+  it('behavior', async function () {
+    await assertLint(
+      positioned`
+        <p>Most hosts will be able to simply define HostGetImportMetaProperties, and leave HostFinalizeImportMeta with its default ${M}behavior.</p>
+      `,
+      {
+        ruleId: 'spelling',
+        nodeType: 'text',
+        message: 'ECMA-262 uses Oxford spelling ("behaviour")',
+      }
+    );
+  });
+
+  it('the empty string', async function () {
+    await assertLint(
+      positioned`
+        <p>_searchValue_ is ${M}the empty string</p>
+      `,
+      {
+        ruleId: 'spelling',
+        nodeType: 'text',
+        message: 'Prefer "the empty String"',
+      }
+    );
+  });
+
+  it('negative', async function () {
+    await assertLintFree(`
+      <p>
+        the *this* value
+        one's complement
+        two's complement
+        *+0*
+        *-0*
+        behaviour
+        the empty String
+      </p>
+    `);
+  });
+});