Skip to content

Commit

Permalink
Smaller TLDs footprint (#426)
Browse files Browse the repository at this point in the history
* Reduce bundle size by compressing TLDs

Using an encoded trie inspired by @bacloud23's solution in #416

* Update README file sizes

* Improved docs for TLDs encoding/decoding

* Terser warning messages
  • Loading branch information
Nick Frasser authored Jan 4, 2023
1 parent 21ee835 commit ca6a88c
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 1,512 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ __Jump to__
* React and jQuery support
* Multi-language and emoji support
* Custom link plugins
* Fast, accurate and small footprint (~30kB minified, ~15kB gzipped)
* Fast, accurate and small footprint (~20kB minified, ~11kB gzipped)
* 99% test coverage
* Compatible with all modern browsers (Internet Explorer 11 and up)

Expand Down
8 changes: 4 additions & 4 deletions packages/linkifyjs/src/linkify.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { Options } from './options';
import { State } from './fsm';

const warn = typeof console !== 'undefined' && console && console.warn || (() => {});
const warnAdvice = 'To avoid this warning, please register all custom schemes before invoking linkify the first time.';
const warnAdvice = 'until manual call of linkify.init(). Register all schemes and plugins before invoking linkify the first time.';

// Side-effect initialization state
const INIT = {
Expand Down Expand Up @@ -72,7 +72,7 @@ export function registerTokenPlugin(name, plugin) {
}
INIT.tokenQueue.push([name, plugin]);
if (INIT.initialized) {
warn(`linkifyjs: already initialized - will not register token plugin "${name}" until you manually call linkify.init(). ${warnAdvice}`);
warn(`linkifyjs: already initialized - will not register token plugin "${name}" ${warnAdvice}`);
}
}

Expand All @@ -93,7 +93,7 @@ export function registerPlugin(name, plugin) {
}
INIT.pluginQueue.push([name, plugin]);
if (INIT.initialized) {
warn(`linkifyjs: already initialized - will not register plugin "${name}" until you manually call linkify.init(). ${warnAdvice}`);
warn(`linkifyjs: already initialized - will not register plugin "${name}" ${warnAdvice}`);
}
}

Expand All @@ -106,7 +106,7 @@ export function registerPlugin(name, plugin) {
*/
export function registerCustomProtocol(scheme, optionalSlashSlash = false) {
if (INIT.initialized) {
warn(`linkifyjs: already initialized - will not register custom scheme "${scheme}" until you manually call linkify.init(). ${warnAdvice}`);
warn(`linkifyjs: already initialized - will not register custom scheme "${scheme}" ${warnAdvice}`);
}
if (!/^[0-9a-z]+(-[0-9a-z]+)*$/.test(scheme)) {
throw new Error('linkifyjs: incorrect scheme format.\n 1. Must only contain digits, lowercase ASCII letters or "-"\n 2. Cannot start or end with "-"\n 3. "-" cannot repeat');
Expand Down
49 changes: 46 additions & 3 deletions packages/linkifyjs/src/scanner.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
outputs an array of tokens instances that can be used for easy URL parsing.
*/

import { tlds, utlds } from './tlds';
import { encodedTlds, encodedUtlds } from './tlds';
import { State, addToGroups, tr, ts, tt } from './fsm';
import * as fsm from './fsm';
import * as tk from './text';
Expand All @@ -14,6 +14,8 @@ const NL = '\n'; // New line character
const EMOJI_VARIATION = '\ufe0f'; // Variation selector, follows heart and others
const EMOJI_JOINER = '\u200d'; // zero-width joiner

let tlds = null, utlds = null; // don't change so only have to be computed once

/**
* Scanner output token:
* - `t` is the token name (e.g., 'NUM', 'EMOJI', 'TLD')
Expand Down Expand Up @@ -43,6 +45,13 @@ export function init(customSchemes = []) {
/** @type State<string> */
const Start = new State();

if (tlds == null) {
tlds = decodeTlds(encodedTlds);
}
if (utlds == null) {
utlds = decodeTlds(encodedUtlds);
}

// States for special URL symbols that accept immediately after start
tt(Start, "'", tk.APOSTROPHE);
tt(Start, '{', tk.OPENBRACE);
Expand Down Expand Up @@ -240,15 +249,15 @@ export function run(start, str) {
* @param {string} str
* @returns {string[]}
*/
export function stringToArray(str) {
export function stringToArray(str) {
const result = [];
const len = str.length;
let index = 0;
while (index < len) {
let first = str.charCodeAt(index);
let second;
let char = first < 0xd800 || first > 0xdbff || index + 1 === len
|| (second = str.charCodeAt(index + 1)) < 0xdc00 || second > 0xdfff
|| (second = str.charCodeAt(index + 1)) < 0xdc00 || second > 0xdfff
? str[index] // single character
: str.slice(index, index + 2); // two-index characters
result.push(char);
Expand Down Expand Up @@ -285,3 +294,37 @@ function fastts(state, input, t, defaultt, jr) {
state.j[input[len - 1]] = next;
return next;
}

/**
* Converts a string of Top-Level Domain names encoded in update-tlds.js back
* into a list of strings.
* @param {str} encoded encoded TLDs string
* @returns {str[]} original TLDs list
*/
function decodeTlds(encoded) {
const words = [];
const stack = [];
let i = 0;
let digits = '0123456789';
while (i < encoded.length) {
let popDigitCount = 0;
while (digits.indexOf(encoded[i + popDigitCount]) >= 0) {
popDigitCount++; // encountered some digits, have to pop to go one level up trie
}
if (popDigitCount > 0) {
words.push(stack.join('')); // whatever preceded the pop digits must be a word
let popCount = parseInt(encoded.substring(i, i + popDigitCount), 10);
for (; popCount > 0; popCount--) {
stack.pop();
}
i += popDigitCount;
} else if (encoded[i] === '_') {
words.push(stack.join('')); // found a word, will be followed by another
i++;
} else {
stack.push(encoded[i]); // drop down a level into the trie
i++;
}
}
return words;
}
Loading

0 comments on commit ca6a88c

Please sign in to comment.