diff --git a/lib/tokenize/utils.test.ts b/lib/tokenize/utils.test.ts index e69de29..01a3ce7 100644 --- a/lib/tokenize/utils.test.ts +++ b/lib/tokenize/utils.test.ts @@ -0,0 +1,116 @@ +import { assert, assertEquals } from "../../deps/std/testing.ts"; +import { + checkIsIdentifier, + checkIsInlineComment, + checkIsMultilineComment, + checkIsTextLiteral, + findInLexicon, +} from "./utils.ts"; +import { LEXICON, Lexicon } from "./lexicon.ts"; + +Deno.test("finds correct index in lexicon", () => { + const expectation = Lexicon.TypeDefiner; + const reality = findInLexicon("type", LEXICON); + assertEquals(expectation, reality); +}); + +Deno.test("returns null when not found (or null) in lexicon", () => { + const expectation = null; + const reality = findInLexicon("not_in_LEXICON", LEXICON); + assertEquals(expectation, reality); + assertEquals(expectation, findInLexicon(null, LEXICON)); +}); + +Deno.test("correctly checks identifier", () => { + assert(checkIsIdentifier("good")); +}); + +Deno.test("correctly checks identifier (inner dots are good)", () => { + assert(checkIsIdentifier("inner.dots.are.good")); +}); + +Deno.test("correctly checks identifier ('_' is good)", () => { + assert(checkIsIdentifier("_underscores_are_chill_anywhere_")); +}); + +Deno.test("correctly checks identifier ('$' is good)", () => { + assert(checkIsIdentifier("$_is_good_anywhere_$_$")); +}); + +Deno.test("correctly checks identifier (caps are good)", () => { + assert(checkIsIdentifier("CAPS_are_good_ANYWHERE")); +}); + +Deno.test("correctly checks identifier (emojis are good)", () => { + assert(checkIsIdentifier("CAPS_are_good_ANYWHERE")); +}); + +Deno.test("correctly checks identifier (numbers are good)", () => { + assert(checkIsIdentifier("nums_are_good1234567890")); +}); + +Deno.test("correctly checks identifier (leading numbers are bad)", () => { + assert(!checkIsIdentifier("1leading_number_is_bad")); +}); + +Deno.test("correctly checks identifier (symbols are bad)", () => { + assert(!checkIsIdentifier("symbols_are_bad_Δ")); +}); + +Deno.test("correctly checks identifier (some special characters are bad)", () => { + assert(!checkIsIdentifier("bad!")); // contains '!' + assert(!checkIsIdentifier("bad@")); // contains '@' + assert(!checkIsIdentifier("bad#")); // contains '#' + assert(!checkIsIdentifier("bad^")); // contains '^' + assert(!checkIsIdentifier("bad&")); // contains '&' + assert(!checkIsIdentifier("bad*")); // contains '*' + assert(!checkIsIdentifier("bad|")); // contains '|' + assert(!checkIsIdentifier("bad+")); // contains '+' + assert(!checkIsIdentifier("bad=")); // contains '=' +}); + +Deno.test("correctly checks identifier (outer dots are bad)", () => { + assert(!checkIsIdentifier(".outer.dots.are.bad.")); +}); + +Deno.test("correctly checks identifier (hyphens are bad)", () => { + assert(!checkIsIdentifier("hyphens-are-bad")); +}); + +Deno.test("correctly checks text literal (with '`')", () => { + assert(checkIsTextLiteral("`example`")); +}); + +Deno.test('correctly checks text literal (with "\'")', () => { + assert(checkIsTextLiteral("'example'")); +}); + +Deno.test("correctly checks text literal (with '\"')", () => { + assert(checkIsTextLiteral('"example"')); +}); + +Deno.test("correctly checks text literal (multiline is good)", () => { + assert( + checkIsTextLiteral(`"example +example +example"`), + ); +}); + +Deno.test("correctly checks text literal (non-matching quotes)", () => { + assert(!checkIsTextLiteral('"example`')); +}); + +Deno.test("correctly checks inline comment", () => { + assert(checkIsInlineComment("; example")); +}); + +Deno.test("correctly checks inline comment (not a comment)", () => { + assert(!checkIsInlineComment("example")); +}); + +Deno.test("correctly checks multiline comment", () => { + assert(checkIsMultilineComment(`/** + * example + */`)); +}); diff --git a/lib/tokenize/utils.ts b/lib/tokenize/utils.ts index e4fd715..3eff223 100644 --- a/lib/tokenize/utils.ts +++ b/lib/tokenize/utils.ts @@ -14,15 +14,17 @@ export const findInLexicon = ( }; export const checkIsIdentifier = (candidate: string): boolean => - /^[a-zA-Z_$]\.*[a-zA-Z_$0-9]*$/g.test(candidate); + /^[a-zA-Z_$][a-zA-Z0-9\._$]*$/.test(candidate); -export const checkIsTextLiteral = (candidate: string): boolean => - /^\`(.*?)\`$/g.test(candidate) || - /^\'(.*?)\'$/g.test(candidate) || - /^\"(.*?)\"$/g.test(candidate); +export const checkIsTextLiteral = (candidate: string): boolean => { + return (candidate.startsWith("`") && candidate.endsWith("`")) || + (candidate.startsWith("'") && candidate.endsWith("'")) || + (candidate.startsWith('"') && candidate.endsWith('"')); +}; export const checkIsInlineComment = (candidate: string): boolean => - /^;(.*?)$/.test(candidate); + candidate.startsWith(";"); -export const checkIsMultilineComment = (candidate: string): boolean => - /^\/\*(.*?)\*\/$/.test(candidate); +export const checkIsMultilineComment = (candidate: string): boolean => { + return candidate.startsWith("/*") && candidate.endsWith("*/"); +};