-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Handle invisible characters in forms
- Loading branch information
Showing
8 changed files
with
278 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import CONST from '../CONST'; | ||
|
||
/** | ||
* Checks if the string would be empty if all invisible characters were removed. | ||
*/ | ||
function isEmptyString(value: string): boolean { | ||
// \p{C} matches all 'Other' characters | ||
// \p{Z} matches all separators (spaces etc.) | ||
// Source: http://www.unicode.org/reports/tr18/#General_Category_Property | ||
return value.replace(CONST.REGEX.INVISIBLE_CHARACTERS, '') === ''; | ||
} | ||
|
||
export default isEmptyString; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
/** | ||
* Remove invisible characters from a string except for spaces and format characters for emoji, and trim it. | ||
*/ | ||
function removeInvisibleCharacters(value: string): string { | ||
let result = value; | ||
|
||
// Remove spaces: | ||
// - \u200B: zero-width space | ||
// - \u00A0: non-breaking space | ||
// - \u2060: word joiner | ||
result = result.replace(/[\u200B\u00A0\u2060]/g, ''); | ||
|
||
// Remove all characters from the 'Other' (C) category except for format characters (Cf) | ||
// because some of them they are used for emojis | ||
result = result.replace(/[\p{Cc}\p{Cs}\p{Co}\p{Cn}]/gu, ''); | ||
|
||
// Remove characters from the (Cf) category that are not used for emojis | ||
result = result.replace(/[\u200E-\u200F]/g, ''); | ||
|
||
// Remove all characters from the 'Separator' (Z) category except for Space Separator (Zs) | ||
result = result.replace(/[\p{Zl}\p{Zp}]/gu, ''); | ||
|
||
return result.trim(); | ||
} | ||
|
||
export default removeInvisibleCharacters; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import _ from 'underscore'; | ||
import isEmpty from '../../src/libs/isEmptyString'; | ||
import enEmojis from '../../assets/emojis/en'; | ||
|
||
describe('libs/isEmpty', () => { | ||
it('basic tests', () => { | ||
expect(isEmpty('test')).toBe(false); | ||
expect(isEmpty('test test')).toBe(false); | ||
expect(isEmpty('test test test')).toBe(false); | ||
expect(isEmpty(' ')).toBe(true); | ||
}); | ||
it('trim spaces', () => { | ||
expect(isEmpty(' test')).toBe(false); | ||
expect(isEmpty('test ')).toBe(false); | ||
expect(isEmpty(' test ')).toBe(false); | ||
}); | ||
it('remove invisible characters', () => { | ||
expect(isEmpty('\u200B')).toBe(true); | ||
expect(isEmpty('\u200B')).toBe(true); | ||
expect(isEmpty('\u200B ')).toBe(true); | ||
expect(isEmpty('\u200B \u200B')).toBe(true); | ||
expect(isEmpty('\u200B \u200B ')).toBe(true); | ||
}); | ||
it('remove invisible characters (Cc)', () => { | ||
expect(isEmpty('\u0000')).toBe(true); | ||
expect(isEmpty('\u0001')).toBe(true); | ||
expect(isEmpty('\u0009')).toBe(true); | ||
}); | ||
it('remove invisible characters (Cf)', () => { | ||
expect(isEmpty('\u200E')).toBe(true); | ||
expect(isEmpty('\u200F')).toBe(true); | ||
expect(isEmpty('\u2060')).toBe(true); | ||
}); | ||
it('remove invisible characters (Cs)', () => { | ||
expect(isEmpty('\uD800')).toBe(true); | ||
expect(isEmpty('\uD801')).toBe(true); | ||
expect(isEmpty('\uD802')).toBe(true); | ||
}); | ||
it('remove invisible characters (Co)', () => { | ||
expect(isEmpty('\uE000')).toBe(true); | ||
expect(isEmpty('\uE001')).toBe(true); | ||
expect(isEmpty('\uE002')).toBe(true); | ||
}); | ||
it('remove invisible characters (Zl)', () => { | ||
expect(isEmpty('\u2028')).toBe(true); | ||
expect(isEmpty('\u2029')).toBe(true); | ||
expect(isEmpty('\u202A')).toBe(true); | ||
}); | ||
it('basic check emojis not removed', () => { | ||
expect(isEmpty('😀')).toBe(false); | ||
}); | ||
it('all emojis not removed', () => { | ||
_.keys(enEmojis).forEach((key) => { | ||
expect(isEmpty(key)).toBe(false); | ||
}); | ||
}); | ||
it('remove invisible characters (editpad)', () => { | ||
expect(isEmpty('\u0020')).toBe(true); | ||
expect(isEmpty('\u00A0')).toBe(true); | ||
expect(isEmpty('\u2000')).toBe(true); | ||
expect(isEmpty('\u2001')).toBe(true); | ||
expect(isEmpty('\u2002')).toBe(true); | ||
expect(isEmpty('\u2003')).toBe(true); | ||
expect(isEmpty('\u2004')).toBe(true); | ||
expect(isEmpty('\u2005')).toBe(true); | ||
expect(isEmpty('\u2006')).toBe(true); | ||
expect(isEmpty('\u2007')).toBe(true); | ||
expect(isEmpty('\u2008')).toBe(true); | ||
expect(isEmpty('\u2009')).toBe(true); | ||
expect(isEmpty('\u200A')).toBe(true); | ||
expect(isEmpty('\u2028')).toBe(true); | ||
expect(isEmpty('\u205F')).toBe(true); | ||
expect(isEmpty('\u3000')).toBe(true); | ||
expect(isEmpty(' ')).toBe(true); | ||
}); | ||
it('other tests', () => { | ||
expect(isEmpty('\u200D')).toBe(true); | ||
expect(isEmpty('\uD83C\uDFF4\uDB40\uDC67\uDB40\uDC62\uDB40\uDC65\uDB40\uDC6E\uDB40\uDC67\uDB40\uDC7F')).toBe(false); | ||
expect(isEmpty('\uD83C')).toBe(true); | ||
expect(isEmpty('\uDFF4')).toBe(true); | ||
expect(isEmpty('\uDB40')).toBe(true); | ||
expect(isEmpty('\uDC67')).toBe(true); | ||
expect(isEmpty('\uDC62')).toBe(true); | ||
expect(isEmpty('\uDC65')).toBe(true); | ||
expect(isEmpty('\uDC6E')).toBe(true); | ||
expect(isEmpty('\uDC67')).toBe(true); | ||
expect(isEmpty('\uDC7F')).toBe(true); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
import _ from 'underscore'; | ||
import removeInvisible from '../../src/libs/removeInvisibleCharacters'; | ||
import enEmojis from '../../assets/emojis/en'; | ||
|
||
describe('libs/removeInvisible', () => { | ||
it('basic tests', () => { | ||
expect(removeInvisible('test')).toBe('test'); | ||
expect(removeInvisible('test test')).toBe('test test'); | ||
expect(removeInvisible('abcdefghijklmnopqrstuvwxyz')).toBe('abcdefghijklmnopqrstuvwxyz'); | ||
expect(removeInvisible('ABCDEFGHIJKLMNOPQRSTUVWXYZ')).toBe('ABCDEFGHIJKLMNOPQRSTUVWXYZ'); | ||
expect(removeInvisible('0123456789')).toBe('0123456789'); | ||
expect(removeInvisible('!@#$%^&*()_+-=[]{}|;:\'",.<>/?`~')).toBe('!@#$%^&*()_+-=[]{}|;:\'",.<>/?`~'); | ||
expect(removeInvisible('')).toBe(''); | ||
expect(removeInvisible(' ')).toBe(''); | ||
}); | ||
it('other alphabets, list of all characters', () => { | ||
// arabic | ||
expect(removeInvisible('أبجدية عربية')).toBe('أبجدية عربية'); | ||
// chinese | ||
expect(removeInvisible('的一是了我不人在他们')).toBe('的一是了我不人在他们'); | ||
// cyrillic | ||
expect(removeInvisible('абвгдезиклмнопр')).toBe('абвгдезиклмнопр'); | ||
// greek | ||
expect(removeInvisible('αβγδεζηθικλμνξοπρ')).toBe('αβγδεζηθικλμνξοπρ'); | ||
// hebrew | ||
expect(removeInvisible('אבגדהוזחטיכלמנ')).toBe('אבגדהוזחטיכלמנ'); | ||
// hindi | ||
expect(removeInvisible('अआइईउऊऋऍऎ')).toBe('अआइईउऊऋऍऎ'); | ||
// japanese | ||
expect(removeInvisible('あいうえおかきくけこ')).toBe('あいうえおかきくけこ'); | ||
// korean | ||
expect(removeInvisible('가나다라마바사아자')).toBe('가나다라마바사아자'); | ||
// thai | ||
expect(removeInvisible('กขคงจฉชซ')).toBe('กขคงจฉชซ'); | ||
}); | ||
it('trim spaces', () => { | ||
expect(removeInvisible(' test')).toBe('test'); | ||
expect(removeInvisible('test ')).toBe('test'); | ||
expect(removeInvisible(' test ')).toBe('test'); | ||
}); | ||
it('remove invisible characters', () => { | ||
expect(removeInvisible('test\u200B')).toBe('test'); | ||
expect(removeInvisible('test\u200Btest')).toBe('testtest'); | ||
expect(removeInvisible('test\u200B test')).toBe('test test'); | ||
expect(removeInvisible('test\u200B test\u200B')).toBe('test test'); | ||
expect(removeInvisible('test\u200B test\u200B test')).toBe('test test test'); | ||
}); | ||
it('remove invisible characters (Cc)', () => { | ||
expect(removeInvisible('test\u0000')).toBe('test'); | ||
expect(removeInvisible('test\u0001')).toBe('test'); | ||
expect(removeInvisible('test\u0009')).toBe('test'); | ||
}); | ||
it('remove invisible characters (Cf)', () => { | ||
expect(removeInvisible('test\u200E')).toBe('test'); | ||
expect(removeInvisible('test\u200F')).toBe('test'); | ||
expect(removeInvisible('test\u2060')).toBe('test'); | ||
}); | ||
it('check other visible characters (Cs)', () => { | ||
expect(removeInvisible('test\uD800')).toBe('test'); | ||
expect(removeInvisible('test\uD801')).toBe('test'); | ||
expect(removeInvisible('test\uD802')).toBe('test'); | ||
}); | ||
it('check other visible characters (Co)', () => { | ||
expect(removeInvisible('test\uE000')).toBe('test'); | ||
expect(removeInvisible('test\uE001')).toBe('test'); | ||
expect(removeInvisible('test\uE002')).toBe('test'); | ||
}); | ||
it('remove invisible characters (Cn)', () => { | ||
expect(removeInvisible('test\uFFF0')).toBe('test'); | ||
expect(removeInvisible('test\uFFF1')).toBe('test'); | ||
expect(removeInvisible('test\uFFF2')).toBe('test'); | ||
}); | ||
it('remove invisible characters (Zl)', () => { | ||
expect(removeInvisible('test\u2028')).toBe('test'); | ||
expect(removeInvisible('test\u2029')).toBe('test'); | ||
}); | ||
it('basic check emojis not removed', () => { | ||
expect(removeInvisible('test😀')).toBe('test😀'); | ||
expect(removeInvisible('test😀😀')).toBe('test😀😀'); | ||
expect(removeInvisible('test😀😀😀')).toBe('test😀😀😀'); | ||
}); | ||
it('all emojis not removed', () => { | ||
_.keys(enEmojis).forEach((key) => { | ||
expect(removeInvisible(key)).toBe(key); | ||
}); | ||
}); | ||
it('remove invisible characters (editpad)', () => { | ||
expect(removeInvisible('test\u0020')).toBe('test'); | ||
expect(removeInvisible('test\u00A0')).toBe('test'); | ||
expect(removeInvisible('test\u2000')).toBe('test'); | ||
expect(removeInvisible('test\u2001')).toBe('test'); | ||
expect(removeInvisible('test\u2002')).toBe('test'); | ||
expect(removeInvisible('test\u2003')).toBe('test'); | ||
expect(removeInvisible('test\u2004')).toBe('test'); | ||
expect(removeInvisible('test\u2005')).toBe('test'); | ||
expect(removeInvisible('test\u2006')).toBe('test'); | ||
expect(removeInvisible('test\u2007')).toBe('test'); | ||
expect(removeInvisible('test\u2008')).toBe('test'); | ||
expect(removeInvisible('test\u2009')).toBe('test'); | ||
expect(removeInvisible('test\u200A')).toBe('test'); | ||
expect(removeInvisible('test\u2028')).toBe('test'); | ||
expect(removeInvisible('test\u205F')).toBe('test'); | ||
expect(removeInvisible('test\u3000')).toBe('test'); | ||
expect(removeInvisible('test ')).toBe('test'); | ||
}); | ||
it('other tests', () => { | ||
expect(removeInvisible('\uD83D\uDE36\u200D\uD83C\uDF2B\uFE0F')).toBe('😶🌫️'); | ||
expect(removeInvisible('\u200D')).toBe(''); | ||
expect(removeInvisible('')).toBe(''); | ||
expect(removeInvisible('test')).toBe('test'); | ||
expect(removeInvisible('testtest')).toBe('testtest'); | ||
expect(removeInvisible(' ')).toBe(''); | ||
expect(removeInvisible('te st')).toBe('test'); | ||
expect(removeInvisible('\uD83C\uDFF4\uDB40\uDC67\uDB40\uDC62\uDB40\uDC65\uDB40\uDC6E\uDB40\uDC67\uDB40\uDC7F')).toBe('🏴'); | ||
}); | ||
}); |