Skip to content

Commit

Permalink
fix: resolve CondeNast#310 added support for more unicode whitespace …
Browse files Browse the repository at this point in the history
…characters
  • Loading branch information
JJMcConnell committed Sep 27, 2020
1 parent 919123e commit 6b7b7cf
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 11 deletions.
51 changes: 48 additions & 3 deletions packages/@atjson/renderer-commonmark/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -236,22 +236,67 @@ function escapeHtmlEntities(text: string) {
.replace(/&([^\s]+);/g, "\\&$1;")
.replace(/</g, "&lt;")
.replace(/\u00A0/gu, "&nbsp;")
.replace(/\u2003/gu, "&emsp;");
.replace(/\u2000/gu, "&#8192;")
.replace(/\u2001/gu, "&#8193;")
.replace(/\u2002/gu, "&ensp;")
.replace(/\u2003/gu, "&emsp;")
.replace(/\u2004/gu, "&#8196;")
.replace(/\u2005/gu, "&#8197;")
.replace(/\u2006/gu, "&#8198;")
.replace(/\u2007/gu, "&#8199;")
.replace(/\u2008/gu, "&#8200;")
.replace(/\u2009/gu, "&#8201;")
.replace(/\u200A/gu, "&#8202;")
.replace(/\u200B/gu, "&#8203;")
.replace(/\u202F/gu, "&#8239;")
.replace(/\u205F/gu, "&#8287;")
.replace(/\u3000/gu, "&#12288;")
.replace(/\uFEFF/gu, "&#65279;");
}

function escapeEntities(text: string) {
return text
.replace(/&([^\s]+);/g, "\\&$1;")
.replace(/\u00A0/gu, "&nbsp;")
.replace(/\u2003/gu, "&emsp;");
.replace(/\u2000/gu, "&#8192;")
.replace(/\u2001/gu, "&#8193;")
.replace(/\u2002/gu, "&ensp;")
.replace(/\u2003/gu, "&emsp;")
.replace(/\u2004/gu, "&#8196;")
.replace(/\u2005/gu, "&#8197;")
.replace(/\u2006/gu, "&#8198;")
.replace(/\u2007/gu, "&#8199;")
.replace(/\u2008/gu, "&#8200;")
.replace(/\u2009/gu, "&#8201;")
.replace(/\u200A/gu, "&#8202;")
.replace(/\u200B/gu, "&#8203;")
.replace(/\u202F/gu, "&#8239;")
.replace(/\u205F/gu, "&#8287;")
.replace(/\u3000/gu, "&#12288;")
.replace(/\uFEFF/gu, "&#65279;");
}

function unescapeEntities(text: string) {
return text
.replace(/&amp;/gi, "&")
.replace(/&lt;/gi, "<")
.replace(/&nbsp;/gi, "\u00A0")
.replace(/&emsp;/gi, "\u2003");
.replace(/&emsp;/gi, "\u2003")
.replace(/&#8192;/gi, "\u2000")
.replace(/&#8193;/gi, "\u2001")
.replace(/&ensp;/gi, "\u2002")
.replace(/&#8196;/gi, "\u2004")
.replace(/&#8197;/gi, "\u2005")
.replace(/&#8198;/gi, "\u2006")
.replace(/&#8199;/gi, "\u2007")
.replace(/&#8200;/gi, "\u2008")
.replace(/&#8201;/gi, "\u2009")
.replace(/&#8202;/gi, "\u200A")
.replace(/&#8203;/gi, "\u200B")
.replace(/&#8239;/gi, "\u202F")
.replace(/&#8287;/gi, "\u205F")
.replace(/&#12288;/gi, "\u3000")
.replace(/&#65279;/gi, "\uFEFF");
}

function escapeAttribute(text: string) {
Expand Down
30 changes: 23 additions & 7 deletions packages/@atjson/renderer-commonmark/test/commonmark-test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,21 @@ After all the lists
test.each([
["&emsp;", "\u2003"],
["&nbsp;", "\u00a0"],
["&#8192;", "\u2000"],
["&#8193;", "\u2001"],
["&ensp;", "\u2002"],
["&#8196;", "\u2004"],
["&#8197;", "\u2005"],
["&#8198;", "\u2006"],
["&#8199;", "\u2007"],
["&#8200;", "\u2008"],
["&#8201;", "\u2009"],
["&#8202;", "\u200A"],
["&#8203;", "\u200B"],
["&#8239;", "\u202F"],
["&#8287;", "\u205F"],
["&#12288;", "\u3000"],
["&#65279;", "\uFEFF"],
])("%s", (entity, unicode) => {
let doc = new OffsetSource({ content: unicode, annotations: [] });
expect(CommonmarkRenderer.render(doc)).toBe(entity);
Expand Down Expand Up @@ -1379,7 +1394,7 @@ After all the lists
});

expect(CommonmarkRenderer.render(document)).toBe(
"&nbsp;\n\n**text**\n\n\u202F"
"&nbsp;\n\n**text**\n\n&#8239;"
);
});

Expand Down Expand Up @@ -1462,22 +1477,23 @@ After all the lists
);
});

test("emspaces are encoded", () => {
test("unicode whitespace characters are encoded", () => {
let document = new OffsetSource({
content: "\u2003\u2003\u2003\u2003Hello \n This is my text",
content:
"\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u202F\u205F\u3000\uFEFFHello \n This is my text",
annotations: [
{
id: "1",
type: "-offset-paragraph",
start: 0,
end: 11,
end: 24,
attributes: {},
},
{
id: "2",
type: "-offset-paragraph",
start: 11,
end: 30,
start: 24,
end: 43,
attributes: {},
},
],
Expand All @@ -1486,7 +1502,7 @@ After all the lists
let markdown = CommonmarkRenderer.render(document);

expect(markdown).toBe(
"&emsp;&emsp;&emsp;&emsp;Hello\n\nThis is my text\n\n"
"&nbsp;&#8192;&#8193;&ensp;&emsp;&#8196;&#8197;&#8198;&#8199;&#8200;&#8201;&#8202;&#8203;&#8239;&#8287;&#12288;&#65279;Hello\n\nThis is my text\n\n"
);
// Make sure we're not generating code in the round-trip
expect(markdown).toEqual(
Expand Down
11 changes: 10 additions & 1 deletion packages/@atjson/source-commonmark/test/commonmark-test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ describe("whitespace", () => {

describe("non-breaking spaces", () => {
test("html entities are converted to unicode characters", () => {
let doc = CommonMarkSource.fromRaw("1\n\n&#8239;\n\n&nbsp;&emsp;\n\n2");
let doc = CommonMarkSource.fromRaw(
"1\n\n&#8239;\n\n&nbsp;&emsp;\n\n&#8192;&#8193;&ensp;&#8196;&#8197;&#8198;&#8199;&#8200;&#8201;&#8202;&#8203;&#8239;&#8287;&#12288;&#65279;\n\n2"
);
let hir = new HIR(doc);
expect(hir.toJSON()).toMatchObject({
type: "root",
Expand All @@ -36,6 +38,13 @@ describe("whitespace", () => {
attributes: {},
children: ["\u00A0\u2003"],
},
{
type: "paragraph",
attributes: {},
children: [
"\u2000\u2001\u2002\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u202F\u205F\u3000\uFEFF",
],
},
{
type: "paragraph",
attributes: {},
Expand Down

0 comments on commit 6b7b7cf

Please sign in to comment.