Skip to content

Commit

Permalink
Added recursive translation to container-like elements and plaintext …
Browse files Browse the repository at this point in the history
…capturing support
  • Loading branch information
Wakeful-Cloud committed Aug 15, 2021
1 parent 0d34bdc commit a6b8d64
Show file tree
Hide file tree
Showing 51 changed files with 1,469 additions and 1,282 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Translate [HTML5](https://developer.mozilla.org/en-US/docs/Glossary/HTML5) to [D
## Caveats
* Ignores interactive elements (Buttons, inputs, switches, etc.)
* Ignores CSS
* Handles invalid HTML poorly
* Poor invalid HTML support
* Provides no sanitization

## Usage
Expand All @@ -29,7 +29,7 @@ const translate = require('@wakeful-cloud/html-translator'); //CommonJS

//Translate
const html = '<b>Bold text</b>';
const [markdown, images] = translate(html); //"*Bold text*"
const {markdown, images} = translate(html); //"*Bold text*"

//Compose the embed
const embed = new MessageEmbed({
Expand Down
1,966 changes: 999 additions & 967 deletions npm-shrinkwrap.json

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
{
"name": "@wakeful-cloud/html-translator",
"version": "1.0.1",
"version": "1.1.0",
"description": "Translate HTML5 to Discord flavored markdown",
"main": "dist/cjs/index.js",
"module": "dist/es/index.js",
"types": "dist/es/index.d.ts",
"main": "dist/cjs.js",
"module": "dist/es.js",
"types": "dist/index.d.ts",
"publishConfig": {
"access": "public"
},
"files": [
"dist"
],
"scripts": {
"build": "rollup -c",
"test": "ava src/**/*.spec.ts"
Expand All @@ -32,8 +35,6 @@
"homepage": "https://github.com/Wakeful-Cloud/html-translator#readme",
"devDependencies": {
"@rollup/plugin-commonjs": "^19.0.0",
"@rollup/plugin-json": "^4.1.0",
"@rollup/plugin-node-resolve": "^13.0.0",
"@types/table": "^6.3.2",
"@typescript-eslint/eslint-plugin": "^4.28.2",
"@typescript-eslint/parser": "^4.28.2",
Expand All @@ -42,13 +43,12 @@
"eslint-plugin-import": "^2.23.4",
"lodash": "^4.17.21",
"rollup": "^2.53.0",
"rollup-plugin-terser": "^7.0.2",
"rollup-plugin-typescript2": "^0.30.0",
"ts-node": "^10.0.0",
"typescript": "^4.3.5"
},
"dependencies": {
"node-html-parser": "^4.0.0",
"node-html-parser": "^4.1.3",
"table": "^6.7.1"
}
}
26 changes: 10 additions & 16 deletions rollup.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,29 @@
//Imports
import _ from 'lodash';
import commonjs from '@rollup/plugin-commonjs';
import json from '@rollup/plugin-json';
import resolve from '@rollup/plugin-node-resolve';
import pkg from './package.json';
import typescript from 'rollup-plugin-typescript2';
import {terser} from 'rollup-plugin-terser';

//External dependencies
const external = Object.keys(pkg.dependencies);

//Global build config
const global = {
external,
input: 'src/index.ts',
plugins: [
commonjs(),
json(),
typescript(),
terser()
typescript({
useTsconfigDeclarationDir: true
})
]
};

//CJS
const cjs = _.merge({
output: [
{
dir: 'dist/cjs',
file: 'dist/cjs.js',
format: 'cjs',
exports: 'default'
}
Expand All @@ -36,24 +38,16 @@ const cjs = _.merge({
'table'
]
}, global);
cjs.plugins.unshift(resolve({
browser: false,
preferBuiltins: true
}));

//ES
const es = _.merge({
output: [
{
dir: 'dist/es',
file: 'dist/es.js',
format: 'es'
}
]
}, global);
es.plugins.unshift(resolve({
browser: true,
preferBuiltins: false
}));

//Export
export default [cjs, es];
153 changes: 14 additions & 139 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,154 +3,29 @@
*/

//Imports
import {HTMLElement, NodeType, parse} from 'node-html-parser';
import {Image} from './types';

import abbreviation from './translators/abbreviation';
import anchor from './translators/anchor';
import blockBold from './translators/block-bold';
import blockCode from './translators/block-code';
import blockItalicized from './translators/block-italicized';
import blockPlain from './translators/block-plain';
import blockQuote from './translators/block-quote';
import deleted from './translators/deleted';
import descriptionList from './translators/description-list';
import image from './translators/image';
import inlineBold from './translators/inline-bold';
import inlineCode from './translators/inline-code';
import inlineItalicized from './translators/inline-italicized';
import inlinePlain from './translators/inline-plain';
import inlineQuote from './translators/inline-quote';
import lineBreak from './translators/line-break';
import orderedList from './translators/ordered-list';
import table from './translators/table';
import underlined from './translators/underlined';
import unorderedList from './translators/unordered-list';

//All translators
const translators = [
abbreviation,
anchor,
blockBold,
blockCode,
blockItalicized,
blockPlain,
blockQuote,
deleted,
descriptionList,
image,
inlineBold,
inlineCode,
inlineItalicized,
inlinePlain,
inlineQuote,
lineBreak,
orderedList,
table,
underlined,
unorderedList
];

//Regex to test if a string ends in a new line
const endNewline = /[\r\n]{1,2}?$/;

//Regex to test if a string ends in whitespace
const endWhitespace = /\s$/;

/**
* Translate an HTML element to Discord flavored markdown
* @param element HTML element
* @returns Discord flavored markdown, images
*/
const translate = (element: HTMLElement): [string, Image[]] =>
{
let markdown = '';
const images: Image[] = [];

//Iterate over children
for (const node of element.childNodes)
{
//Translate element nodes
if (node.nodeType == NodeType.ELEMENT_NODE)
{
//Cast
const child = node as HTMLElement;

//Find the correct translator
const translator = translators.find(translator => translator.tags.includes(child.tagName));

//Translate the element
if (translator != null)
{
const result = translator.translate(child);

//Add results
if (result.markdown != null)
{
//Inline-style elements
if (translator.inline)
{
//Add a leading space if the previous element didn't edit with whitespace
if (!endWhitespace.test(markdown))
{
markdown += ' ';
}

markdown += result.markdown;
}
//Block-style elements
else
{
//Add a leading new line if the previous element didn't end with one
if (!endNewline.test(markdown))
{
markdown += '\n';
}

//Add markdown
markdown += result.markdown;

//Add a trailing new line if the element didn't end with one
if (!endNewline.test(markdown))
{
markdown += '\n';
}
}
}

if (result.image != null)
{
images.push(result.image);
}
}
else
{
//Recur
const result = translate(child);

//Add results
markdown += result[0];
images.push(...result[1]);
}
}
}

return [markdown, images];
};
import translate from './translate';
import {TranslatorResult} from './types';
import {options} from './utils';
import {parse} from 'node-html-parser';

/**
* Translate raw HTML to Discord flavored markdown
* @param raw Raw HTML
* @param plaintext Whether or not to capture untagged plaintext
* @returns Discord flavored markdown, images
*/
export default (raw: string): [string, Image[]] =>
const main = (raw: string, plaintext = false): TranslatorResult =>
{
//Parse the HTML
const root = parse(raw);
const root = parse(raw, options);

//Translate
const [markdown, images] = translate(root);
const result = translate(root, plaintext);

//Trim markdown
return [markdown.trim(), images];
};
result.markdown = result.markdown.trim();

return result;
};

export default main;
14 changes: 7 additions & 7 deletions src/test.html
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ <h6>H6 heading</h6>
<img alt="Placeholder" src="https://via.placeholder.com/256x128">

<p>
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud
exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute
irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia
deserunt mollit anim id est laborum.
<b>Lorem ipsum</b> dolor sit amet, consectetur adipiscing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
culpa qui officia deserunt mollit anim id est laborum.
</p>

<span>Span text</span>
Expand All @@ -73,7 +73,7 @@ <h6>H6 heading</h6>
<code>1 + '1' + true == '11true'</code>
<p><kbd>CTRL</kbd> + <kbd>C</kbd></p>
<samp>fatal signal: Segmentation fault</samp>
<p><var>A<sup>2</sup></var> + B<sup>2</sup> = C<sup>2</sup></p>
<p><var>A<sup>2</sup></var> + <var>B<sup>2</sup></var> = <var>C<sup>2</sup></var></p>

<pre>
Pre
Expand Down
6 changes: 3 additions & 3 deletions src/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

[https://example.com](https://example.com)
[Example](https://example.com)
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
**Lorem ipsum** dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
Span text Abbreviation (Title) `Inline quote` ([Source](https://example.com))
> To be, or not to be, that is the question:
> Whether 'tis nobler in the mind to suffer
Expand All @@ -18,9 +18,9 @@ Span text Abbreviation (Title) `Inline quote` ([Source](https://example.com))
> And by opposing end them.
[Source](http://poetryfoundation.org/poems/56965)
`1 + '1' + true == '11true'`
CTRL + C
`CTRL` + `C`
`fatal signal: Segmentation fault`
A2 + B2 = C2
`A2` + `B2` = `C2`
```
Pre
formatted
Expand Down
17 changes: 11 additions & 6 deletions src/index.spec.ts → src/translate.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,29 @@
*/

//Imports
import htmlTranslator from './index';
import parse from 'node-html-parser';
import test from 'ava';
import translate from './translate';
import {join} from 'path';
import {options} from './utils';
import {readFileSync} from 'fs';

//Data
const html = readFileSync(join(__dirname, 'test.html'), 'utf-8');
const raw = readFileSync(join(__dirname, 'test.html'), 'utf-8');
const markdownReference = readFileSync(join(__dirname, 'test.txt'), 'utf-8');

test('translate everything', async ctx =>
{
//Parse
const html = parse(raw, options);

//Translate
const [markdown, images] = htmlTranslator(html);
const result = translate(html, false);

//Assert
ctx.is(markdown, markdownReference);
ctx.is(images.length, 2);
ctx.deepEqual(images, [
ctx.is(result.markdown.trim(), markdownReference);
ctx.is(result.images.length, 2);
ctx.deepEqual(result.images, [
{
src: 'https://via.placeholder.com/256x128'
},
Expand Down
Loading

0 comments on commit a6b8d64

Please sign in to comment.