Skip to content

Commit

Permalink
remark-lint-no-unicode refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
sshakndr committed Dec 5, 2024
1 parent 6ea9afb commit 39b1147
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 75 deletions.
6 changes: 3 additions & 3 deletions packages/remark-lint-no-unicode/fixtures/002.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ abcd

---

1:6-1:7: Forbidden character "d". https://symbl.cc/en/0064/
3:4-3:5: Forbidden character "d". https://symbl.cc/en/0064/
4:4-4:5: Forbidden character "d". https://symbl.cc/en/0064/
1:6-1:7: Forbidden character "d" (see https://symbl.cc/en/0064/).
3:4-3:5: Forbidden character "d" (see https://symbl.cc/en/0064/).
4:4-4:5: Forbidden character "d" (see https://symbl.cc/en/0064/).

---
145 changes: 73 additions & 72 deletions packages/remark-lint-no-unicode/lib/main.ts
Original file line number Diff line number Diff line change
@@ -1,91 +1,92 @@
import {type Root, type RootContent} from "mdast"
import {isCodeNode, isInlineCodeNode, isLiteralNode, isParentNode} from "@onlyoffice/mdast-util-is-node"
import {type Node} from "mdast"
import {lintRule} from "unified-lint-rule"
import {type VFile} from "vfile"
import {type Position} from "unist"
import {type MessageOptions, type VFile} from "vfile"

const replaceHint = new Map([
["00A0", "Use a regular space instead of no-break space. https://symbl.cc/en/00A0/"],
])
const hint: Record<string, string> = {
"00A0": `Use a regular space instead of no-break space (see ${url("00A0")}).`,
}

export const remarkLintNoUnicode: any =
lintRule("@onlyoffice:no-unicode", rule)

function rule(t: Root, f: VFile, o: string[]): void {
if (o === null || o === undefined || o.length === 0) {
f.fail("No allowed characters provided")
function rule(t: Node, f: VFile, o: string[]): void {
if (isCodeNode(t) || isInlineCodeNode(t)) {
return
}
for (const c of t.children) {
unicodeCheck(c, f, o)
if (isParentNode(t)) {
for (const c of t.children) {
rule(c, f, o)
}
}
}

function unicodeCheck(t: RootContent, f: VFile, o: string[]): void {
if (t.type === "inlineCode" || t.type === "code") {
if (!isLiteralNode(t)) {
return
}
if ("value" in t) {
const uc = getUnicodeCharacters(t.value)
for (const [i, u] of uc.entries()) {
if (!o.includes(u)) {
const m = replaceHint.get(u) ||
`Forbidden character "${String.fromCodePoint(Number.parseInt(u, 16))}". https://symbl.cc/en/${u}/`
if (t.position !== undefined) {
const line = t.position.start.line + uc.slice(0, i + 1).filter((e) => e === "000A").length
let {column} = t.position.start
for (const c of uc.slice(0, i + 1)) {
column += 1
if (c === "000A") {
column = 1
}
}
f.message(m, {
start: {
line,
column: column - 1,
},
end: {
line,
column,
},
})
} else {
f.message(m, {
ancestors: [t],
})
}
}
for (const [i, v] of [...t.value].entries()) {
const u = unicode(v)
if (o.includes(u)) {
continue
}
}
if ("children" in t) {
for (const c of t.children) {
unicodeCheck(c, f, o)
const m = message(v, u)
const p: MessageOptions = {
ancestors: [t],
}
if (t.position) {
p.place = position(t.position, t.value, i)
}
f.message(m, p)
}
}

function getUnicodeCharacters(s: string): string[] {
if (s === null || s === undefined || s.length === 0) {
return []
function message(v: string, u: string): string {
let m = hint[u]
if (!m) {
m = `Forbidden character "${v}" (see ${url(u)}).`
}
const a = []
for (const c of s) {
const cp = c.codePointAt(0)
if (cp !== undefined) {
const u = cp.toString(16).toUpperCase()
switch (u.length) {
case 1:
a.push(`000${u}`)
break
case 2:
a.push(`00${u}`)
break
case 3:
a.push(`0${u}`)
break
default:
a.push(u)
break
}
return m
}

function position(p: Position, a: string, i: number): Position {
let {line, column} = p.start
for (let j = 0; j <= i; j += 1) {
column += 1
if (a[j] === "\n") {
column = 1
line += 1
}
}
return a
const n: Position = {
start: {
line,
column: column - 1,
},
end: {
line,
column,
},
}
return n
}

function unicode(s: string): string {
const p = s.codePointAt(0)
if (p === undefined) {
return ""
}
const u = p.toString(16).toUpperCase()
switch (u.length) {
case 1:
return `000${u}`
case 2:
return `00${u}`
case 3:
return `0${u}`
default:
return u
}
}

function url(u: string): string {
return `https://symbl.cc/en/${u}/`
}
2 changes: 2 additions & 0 deletions packages/remark-lint-no-unicode/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
"test": "pnpm test:types && pnpm test:unit"
},
"dependencies": {
"@onlyoffice/mdast-util-is-node": "workspace:^",
"@types/mdast": "4.0.3",
"@types/unist": "3.0.3",
"unified-lint-rule": "3.0.0",
"vfile": "6.0.1"
},
Expand Down
6 changes: 6 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 39b1147

Please sign in to comment.