Skip to content

Commit

Permalink
Fix: HTML to MFM
Browse files Browse the repository at this point in the history
  • Loading branch information
mei23 committed Feb 6, 2021
1 parent 90eb9e6 commit 1343aa5
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 20 deletions.
63 changes: 43 additions & 20 deletions src/mfm/fromHtml.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import { parseFragment } from 'parse5';
import { urlRegexFull } from './prelude';
import * as parse5 from 'parse5';
import treeAdapter = require('parse5/lib/tree-adapters/default');
import { urlRegex, urlRegexFull } from './prelude';

export function fromHtml(html: string, hashtagNames?: string[]): string {
const dom = parseFragment(html);
const dom = parse5.parseFragment(html);

let text = '';

Expand All @@ -12,30 +13,35 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {

return text.trim();

function getText(node: any): string {
if (node.nodeName == '#text') return node.value;
function getText(node: parse5.Node): string {
if (treeAdapter.isTextNode(node)) return node.value;
if (!treeAdapter.isElementNode(node)) return '';

if (node.childNodes) {
return node.childNodes.map((n: any) => getText(n)).join('');
return node.childNodes.map(n => getText(n)).join('');
}

return '';
}

function analyze(node: any) {
switch (node.nodeName) {
case '#text':
text += node.value;
break;
function analyze(node: parse5.Node) {
if (treeAdapter.isTextNode(node)) {
text += node.value;
return;
}

// Skip comment or document type node
if (!treeAdapter.isElementNode(node)) return;

switch (node.nodeName) {
case 'br':
text += '\n';
break;

case 'a':
const txt = getText(node);
const rel = node.attrs.find((x: any) => x.name == 'rel');
const href = node.attrs.find((x: any) => x.name == 'href');
const rel = node.attrs.find(x => x.name === 'rel');
const href = node.attrs.find(x => x.name === 'href');

// ハッシュタグ
if (hashtagNames && href && hashtagNames.map(x => x.toLowerCase()).includes(txt.toLowerCase())) {
Expand All @@ -44,21 +50,38 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
} else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) {
const part = txt.split('@');

if (part.length == 2) {
if (part.length === 2 && href) {
//#region ホスト名部分が省略されているので復元する
const acct = `${txt}@${(new URL(href.value)).hostname}`;
text += acct;
//#endregion
} else if (part.length == 3) {
} else if (part.length === 3) {
text += txt;
}
// その他
} else {
text += !href ? txt
: txt === href.value
? txt.match(urlRegexFull) ? txt
: `<${txt}>`
: `[${txt}](${href.value})`;
const generateLink = () => {
if (!href && !txt) {
return '';
}
if (!href) {
return txt;
}
if (!txt || txt === href.value) {
if (href.value.match(urlRegexFull)) {
return href.value;
} else {
return `<${href.value}>`;
}
}
if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) {
return `[${txt}](<${href.value}>)`;
} else {
return `[${txt}](${href.value})`;
}
};

text += generateLink();
}
break;

Expand Down
12 changes: 12 additions & 0 deletions test/mfm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1311,6 +1311,10 @@ describe('fromHtml', () => {
assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b">c</a> d</p>'), 'a [c](https://example.com/b) d');
});

it('link with different text, but not encoded', () => {
assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/ä">c</a> d</p>'), 'a [c](<https://example.com/ä>) d');
});

it('link with same text', () => {
assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b">https://example.com/b</a> d</p>'), 'a https://example.com/b d');
});
Expand All @@ -1327,6 +1331,14 @@ describe('fromHtml', () => {
assert.deepStrictEqual(fromHtml('<p>a <a>c</a> d</p>'), 'a c d');
});

it('link without text', () => {
assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b"></a> d</p>'), 'a https://example.com/b d');
});

it('link without both', () => {
assert.deepStrictEqual(fromHtml('<p>a <a></a> d</p>'), 'a d');
});

it('mention', () => {
assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/@user" class="u-url mention">@user</a> d</p>'), 'a @user@example.com d');
});
Expand Down

0 comments on commit 1343aa5

Please sign in to comment.