-
Notifications
You must be signed in to change notification settings - Fork 108
/
utils.ts
184 lines (170 loc) · 5.49 KB
/
utils.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import isUrl from './isUrl';
import type {
CustomMetaTags,
OgObjectInternal,
OpenGraphScraperOptions,
ValidatorSettings,
} from './types';
export const defaultUrlValidatorSettings = {
allow_fragments: true,
allow_protocol_relative_urls: false,
allow_query_components: true,
allow_trailing_dot: false,
allow_underscores: false,
protocols: ['http', 'https'],
require_host: true,
require_port: false,
require_protocol: false,
require_tld: true,
require_valid_protocol: true,
validate_length: true,
};
/**
* Checks if URL is valid
*
* @param {string} url - url to be checked
* @param {string} urlValidatorSettings - settings used by validator
* @return {boolean} boolean value if the url is valid
*
*/
export function isUrlValid(url: string, urlValidatorSettings: ValidatorSettings): boolean {
return typeof url === 'string' && url.length > 0 && isUrl(url, urlValidatorSettings);
}
/**
* Forces url to start with http:// if it doesn't
*
* @param {string} url - url to be updated
* @return {string} url that starts with http
*
*/
const coerceUrl = (url: string): string => (/^(f|ht)tps?:\/\//i.test(url) ? url : `http://${url}`);
/**
* Validates and formats url
*
* @param {string} url - url to be checked and formatted
* @param {string} urlValidatorSettings - settings used by validator
* @return {string} proper url or null
*
*/
export function validateAndFormatURL(url: string, urlValidatorSettings: ValidatorSettings): { url: string | null } {
return { url: isUrlValid(url, urlValidatorSettings) ? coerceUrl(url) : null };
}
/**
* Finds the image type from a given url
*
* @param {string} url - url to be checked
* @return {string} image type from url
*
*/
export function findImageTypeFromUrl(url: string): string {
let type: string = url.split('.').pop() ?? '';
[type] = type.split('?');
return type;
}
/**
* Checks if image type is valid
*
* @param {string} type - type to be checked
* @return {boolean} boolean value if type is value
*
*/
export function isImageTypeValid(type: string): boolean {
const validImageTypes: string[] = ['apng', 'bmp', 'gif', 'ico', 'cur', 'jpg', 'jpeg', 'jfif', 'pjpeg', 'pjp', 'png', 'svg', 'tif', 'tiff', 'webp'];
return validImageTypes.includes(type);
}
/**
* Checks if URL is a non html page
*
* @param {string} url - url to be checked
* @return {boolean} boolean value if url is non html
*
*/
export function isThisANonHTMLUrl(url: string): boolean {
const invalidImageTypes: string[] = ['.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.3gp', '.avi', '.mov', '.mp4', '.m4v', '.m4a', '.mp3', '.mkv', '.ogv', '.ogm', '.ogg', '.oga', '.webm', '.wav', '.bmp', '.gif', '.jpg', '.jpeg', '.png', '.webp', '.zip', '.rar', '.tar', '.tar.gz', '.tgz', '.tar.bz2', '.tbz2', '.txt', '.pdf'];
const extension: string = findImageTypeFromUrl(url);
return invalidImageTypes.some((type: string): boolean => `.${extension}`.includes(type));
}
/**
* Find and delete nested undefineds
*
* @param {object} object - object to be cleaned
* @return {object} object without nested undefineds
*
*/
export function removeNestedUndefinedValues(object: Record<string, any>): OgObjectInternal {
Object.entries(object).forEach(([key, value]) => {
if (value && typeof value === 'object') removeNestedUndefinedValues(value);
else if (value === undefined) delete object[key];
});
return object;
}
/**
* Split the options object into ogs and got option objects
*
* @param {object} options - options that need to be split
* @return {object} object with nested options for ogs and got
*
*/
export function optionSetup(ogsOptions: OpenGraphScraperOptions): { options: OpenGraphScraperOptions } {
const options: OpenGraphScraperOptions = {
onlyGetOpenGraphInfo: false,
...ogsOptions,
};
return { options };
}
/**
* Checks if image type is valid
*
* @param {string} type - type to be checked
* @return {boolean} boolean value if type is value
*
*/
export function isCustomMetaTagsValid(customMetaTags: CustomMetaTags[]): boolean {
if (!Array.isArray(customMetaTags)) return false;
let result = true;
customMetaTags.forEach((customMetaTag) => {
if (typeof customMetaTag === 'object') {
if (!('fieldName' in customMetaTag) || typeof customMetaTag.fieldName !== 'string') result = false;
if (!('multiple' in customMetaTag) || typeof customMetaTag.multiple !== 'boolean') result = false;
if (!('property' in customMetaTag) || typeof customMetaTag.property !== 'string') result = false;
} else {
result = false;
}
});
return result;
}
/**
* Unescape script text.
*
* Certain websites escape script text within script tags, which can
* interfere with `JSON.parse()`. Therefore, we need to unescape it.
*
* Known good escape sequences:
*
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#uhhhh
*
* ```js
* JSON.parse('"\\u2611"'); // '☑'
* ```
*
* Known bad escape sequences:
*
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#xhh
*
* ```js
* JSON.parse('"\\x26"'); // '&'
* ```
*
* @param {string} scriptText - the text of the script tag
* @returns {string} unescaped script text
*/
export function unescapeScriptText(scriptText: string) {
// https://stackoverflow.com/a/34056693
return scriptText.replace(/\\x([0-9a-f]{2})/ig, (_, pair) => {
const charCode = parseInt(pair, 16);
if (charCode === 34) {
return '\\"';
}
return String.fromCharCode(charCode);
});
}