-
Notifications
You must be signed in to change notification settings - Fork 3
/
index.ts
309 lines (254 loc) · 9.76 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
import defaultBlocklist from './blocklist.json';
type SqidsOptions = {
alphabet?: string;
minLength?: number; // u8
blocklist?: Set<string>;
};
export const defaultOptions = {
// url-safe characters
alphabet: 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
// `minLength` is the minimum length IDs should be (`u8` type)
minLength: 0,
// a list of words that should not appear anywhere in the IDs
blocklist: new Set<string>(defaultBlocklist)
};
export default class Sqids {
private alphabet: string;
private minLength: number;
private blocklist: Set<string>;
constructor(options?: SqidsOptions) {
const alphabet = options?.alphabet ?? defaultOptions.alphabet;
const minLength = options?.minLength ?? defaultOptions.minLength;
const blocklist = options?.blocklist ?? defaultOptions.blocklist;
// alphabet cannot contain multibyte characters
if (new Blob([alphabet]).size != alphabet.length) {
throw new Error('Alphabet cannot contain multibyte characters');
}
// check the length of the alphabet
if (alphabet.length < 3) {
throw new Error('Alphabet length must be at least 3');
}
// check that the alphabet has only unique characters
if (new Set(alphabet).size != alphabet.length) {
throw new Error('Alphabet must contain unique characters');
}
// test min length (type [might be lang-specific] + min length + max length)
// if lang supports `u8` type, you can just omit this check altogether
const minLengthLimit = 255;
if (typeof minLength != 'number' || minLength < 0 || minLength > minLengthLimit) {
throw new Error(`Minimum length has to be between 0 and ${minLengthLimit}`);
}
// clean up blocklist:
// 1. all blocklist words should be lowercase
// 2. no words less than 3 chars
// 3. if some words contain chars that are not in the alphabet, remove those
const filteredBlocklist = new Set<string>();
const alphabetChars = alphabet.toLowerCase().split('');
for (const word of blocklist) {
if (word.length >= 3) {
const wordLowercased = word.toLowerCase();
const wordChars = wordLowercased.split('');
const intersection = wordChars.filter((c) => alphabetChars.includes(c));
if (intersection.length == wordChars.length) {
filteredBlocklist.add(wordLowercased);
}
}
}
this.alphabet = this.shuffle(alphabet);
this.minLength = minLength;
this.blocklist = filteredBlocklist;
}
/**
* Encodes an array of unsigned integers into an ID
*
* These are the cases where encoding might fail:
* - One of the numbers passed is smaller than 0 or greater than `maxValue()`
* - An n-number of attempts has been made to re-generated the ID, where n is alphabet length + 1
*
* @param {array.<number>} numbers Non-negative integers to encode into an ID
* @returns {string} Generated ID
*/
encode(numbers: number[]): string {
// if no numbers passed, return an empty string
if (numbers.length == 0) {
return '';
}
// don't allow out-of-range numbers [might be lang-specific]
const inRangeNumbers = numbers.filter((n) => n >= 0 && n <= this.maxValue());
if (inRangeNumbers.length != numbers.length) {
throw new Error(`Encoding supports numbers between 0 and ${this.maxValue()}`);
}
return this.encodeNumbers(numbers);
}
/**
* Internal function that encodes an array of unsigned integers into an ID
*
* @param {array.<number>} numbers Non-negative integers to encode into an ID
* @param {number} increment An internal number used to modify the `offset` variable in order to re-generate the ID
* @returns {string} Generated ID
*/
private encodeNumbers(numbers: number[], increment = 0): string {
// if increment is greater than alphabet length, we've reached max attempts
if (increment > this.alphabet.length) {
throw new Error('Reached max attempts to re-generate the ID');
}
// get a semi-random offset from input numbers
let offset =
numbers.reduce((a, v, i) => {
return this.alphabet[v % this.alphabet.length].codePointAt(0) + i + a;
}, numbers.length) % this.alphabet.length;
// if there is a non-zero `increment`, it's an internal attempt to re-generated the ID
offset = (offset + increment) % this.alphabet.length;
// re-arrange alphabet so that second-half goes in front of the first-half
let alphabet = this.alphabet.slice(offset) + this.alphabet.slice(0, offset);
// `prefix` is the first character in the generated ID, used for randomization
const prefix = alphabet.charAt(0);
// reverse alphabet (otherwise for [0, x] `offset` and `separator` will be the same char)
alphabet = alphabet.split('').reverse().join('');
// final ID will always have the `prefix` character at the beginning
const ret = [prefix];
// encode input array
for (let i = 0; i != numbers.length; i++) {
const num = numbers[i];
// the first character of the alphabet is going to be reserved for the `separator`
const alphabetWithoutSeparator = alphabet.slice(1);
ret.push(this.toId(num, alphabetWithoutSeparator));
// if not the last number
if (i < numbers.length - 1) {
// `separator` character is used to isolate numbers within the ID
ret.push(alphabet.slice(0, 1));
// shuffle on every iteration
alphabet = this.shuffle(alphabet);
}
}
// join all the parts to form an ID
let id = ret.join('');
// handle `minLength` requirement, if the ID is too short
if (this.minLength > id.length) {
// append a separator
id += alphabet.slice(0, 1);
// keep appending `separator` + however much alphabet is needed
// for decoding: two separators next to each other is what tells us the rest are junk characters
while (this.minLength - id.length > 0) {
alphabet = this.shuffle(alphabet);
id += alphabet.slice(0, Math.min(this.minLength - id.length, alphabet.length));
}
}
// if ID has a blocked word anywhere, restart with a +1 increment
if (this.isBlockedId(id)) {
id = this.encodeNumbers(numbers, increment + 1);
}
return id;
}
/**
* Decodes an ID back into an array of unsigned integers
*
* These are the cases where the return value might be an empty array:
* - Empty ID / empty string
* - Non-alphabet character is found within ID
*
* @param {string} id Encoded ID
* @returns {array.<number>} Array of unsigned integers
*/
decode(id: string): number[] {
const ret: number[] = [];
// if an empty string, return an empty array
if (id == '') {
return ret;
}
// if a character is not in the alphabet, return an empty array
const alphabetChars = this.alphabet.split('');
for (const c of id.split('')) {
if (!alphabetChars.includes(c)) {
return ret;
}
}
// first character is always the `prefix`
const prefix = id.charAt(0);
// `offset` is the semi-random position that was generated during encoding
const offset = this.alphabet.indexOf(prefix);
// re-arrange alphabet back into it's original form
let alphabet = this.alphabet.slice(offset) + this.alphabet.slice(0, offset);
// reverse alphabet
alphabet = alphabet.split('').reverse().join('');
// now it's safe to remove the prefix character from ID, it's not needed anymore
id = id.slice(1);
// decode
while (id.length) {
const separator = alphabet.slice(0, 1);
// we need the first part to the left of the separator to decode the number
const chunks = id.split(separator);
if (chunks.length) {
// if chunk is empty, we are done (the rest are junk characters)
if (chunks[0] == '') {
return ret;
}
// decode the number without using the `separator` character
const alphabetWithoutSeparator = alphabet.slice(1);
ret.push(this.toNumber(chunks[0], alphabetWithoutSeparator));
// if this ID has multiple numbers, shuffle the alphabet because that's what encoding function did
if (chunks.length > 1) {
alphabet = this.shuffle(alphabet);
}
}
// `id` is now going to be everything to the right of the `separator`
id = chunks.slice(1).join(separator);
}
return ret;
}
// consistent shuffle (always produces the same result given the input)
private shuffle(alphabet: string): string {
const chars = alphabet.split('');
for (let i = 0, j = chars.length - 1; j > 0; i++, j--) {
const r = (i * j + chars[i].codePointAt(0) + chars[j].codePointAt(0)) % chars.length;
[chars[i], chars[r]] = [chars[r], chars[i]];
}
return chars.join('');
}
private toId(num: number, alphabet: string): string {
const id = [];
const chars = alphabet.split('');
let result = num;
do {
id.unshift(chars[result % chars.length]);
result = Math.floor(result / chars.length);
} while (result > 0);
return id.join('');
}
private toNumber(id: string, alphabet: string): number {
const chars = alphabet.split('');
return id.split('').reduce((a, v) => a * chars.length + chars.indexOf(v), 0);
}
private isBlockedId(id: string): boolean {
id = id.toLowerCase();
for (const word of this.blocklist) {
// no point in checking words that are longer than the ID
if (word.length <= id.length) {
if (id.length <= 3 || word.length <= 3) {
// short words have to match completely; otherwise, too many matches
if (id == word) {
return true;
}
} else if (/\d/.test(word)) {
// words with leet speak replacements are visible mostly on the ends of the ID
if (id.startsWith(word) || id.endsWith(word)) {
return true;
}
} else if (id.includes(word)) {
// otherwise, check for blocked word anywhere in the string
return true;
}
}
}
return false;
}
// this should be the biggest unsigned integer that the language can safely/mathematically support
// the spec does not specify the upper integer limit - so it's up to the individual programming languages
// examples as of 2023-09-24:
// golang: uint64
// rust: u128
// php: PHP_INT_MAX
private maxValue() {
return Number.MAX_SAFE_INTEGER;
}
}