Skip to content

Commit

Permalink
feat(string): add TextCursor
Browse files Browse the repository at this point in the history
A `TextCursor` is `Text` tracking a `Position` within itself, alongside a suite of useful methods and getters. This provides a way to describe substrings without creating them, which can be memory-intensive for some tasks. Preliminary benchmarks also show that `TextCursor` is ~1.8x faster for standard parsing tasks than plain `string` and its methods, regardless of input length.

`TextCursor` also has an attractive `.inspect()` method which depicts the cursor location within the text:

```
…jumps over the lazy d…
             ^
```
  • Loading branch information
pskfyi committed May 28, 2023
1 parent 3b5e820 commit 3285c61
Show file tree
Hide file tree
Showing 4 changed files with 346 additions and 0 deletions.
8 changes: 8 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ import {
splitOn,
splitOnFirst,
Text,
TextCursor,
} from "https://deno.land/x/handy/string/utils.ts";

dedent(" a\n b\n c"); // "a\n b\n c"
Expand All @@ -408,6 +409,13 @@ const text = new Text("a\nb\nc");
text.lines; // ["a\n", "b\n", "c"]
text.locationAt(4); // location of "c", { line: 3, column: 1, offset: 4 }
text.locationAt(5); // end of text, { line: 3, column: 2, offset: 5 }

const cursor = new TextCursor("a\nb\nc", 2);
cursor.remainder; // "b\nc"
cursor.location; // { offset: 2, line: 2, column: 1 }
cursor.inspect(); // string depicting...
// [L2] b¶
// ^
```

```ts
Expand Down
223 changes: 223 additions & 0 deletions string/TextCursor.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
import { stripColor } from "../_deps/fmt.ts";
import { assert, assertEquals, describe, it } from "../_deps/testing.ts";
import { TextCursor } from "./TextCursor.ts";
import { dedent } from "./dedent.ts";

const str = "feat: example";
const longStr =
"the quick brown fox jumps over the lazy dog then does it again and again until it is tired";
const maxLength = 39;

describe("new TextCursor()", () => {
it("requires a string", () => assert(new TextCursor(str)));

it("can clone a cursor", () => {
const cursorA = new TextCursor(str);
const cursorB = new TextCursor(cursorA);
assert(cursorA !== cursorB);
assertEquals(cursorA.index, cursorB.index);
assertEquals(cursorA.input, cursorB.input);
});

it("can clone w/ new index", () => {
const cursorA = new TextCursor(str);
const cursorB = new TextCursor(cursorA, 3);
assert(cursorA !== cursorB);
assertEquals(cursorA.input, cursorB.input);
assertEquals(cursorB.index, 3);
});
});

describe("TextCursor.index", () => {
it("starts at 0", () => assertEquals(new TextCursor(str).index, 0));

it("can be initialized", () => assertEquals(new TextCursor(str, 3).index, 3));
});

describe("TextCursor.move()", () => {
it("moves the index", () => {
assertEquals(new TextCursor(str).move(3).index, 3);
assertEquals(new TextCursor(str, 3).move(2).index, 5);
});

it("returns a new cursor", () => {
const cursor = new TextCursor(str);
assert(cursor !== cursor.move(3));
});
});

describe("TextCursor.char", () => {
it("is the index character", () =>
assertEquals(new TextCursor(str, 2).char, "a"));
});

describe("TextCursor.remainder", () => {
it("defaults to full input", () =>
assertEquals(new TextCursor(str).remainder, str));

it("is index onward", () =>
assertEquals(new TextCursor(str, 3).remainder, "t: example"));
});

describe("TextCursor.antecedent", () => {
it("is empty by default", () =>
assertEquals(new TextCursor(str).antecedent, ""));

it("is value before index", () =>
assertEquals(new TextCursor(str, 3).antecedent, "fea"));
});

describe("TextCursor.location", () => {
it("is index Text.Location", () => {
const text = "a\n\nbc\nd\nef";
const cursor = new TextCursor(text);

assertEquals(cursor.location, { offset: 0, line: 1, column: 1 });
assertEquals(cursor.move(1).location, { offset: 1, line: 1, column: 2 });
assertEquals(cursor.move(1).location, { offset: 2, line: 2, column: 1 });
assertEquals(cursor.move(1).location, { offset: 3, line: 3, column: 1 });
assertEquals(cursor.move(1).location, { offset: 4, line: 3, column: 2 });
assertEquals(cursor.move(1).location, { offset: 5, line: 3, column: 3 });
assertEquals(cursor.move(1).location, { offset: 6, line: 4, column: 1 });
assertEquals(cursor.move(1).location, { offset: 7, line: 4, column: 2 });
assertEquals(cursor.move(1).location, { offset: 8, line: 5, column: 1 });
assertEquals(cursor.move(1).location, { offset: 9, line: 5, column: 2 });
});
});

describe("TextCursor.line", () => {
it("is the line at the index", () => {
const text = "a\n\nbc\nd\nef";

assertEquals(new TextCursor(text).line, "a\n");
assertEquals(new TextCursor(text, 1).line, "a\n");
assertEquals(new TextCursor(text, 2).line, "\n");
assertEquals(new TextCursor(text, 3).line, "bc\n");
assertEquals(new TextCursor(text, 4).line, "bc\n");
assertEquals(new TextCursor(text, 5).line, "bc\n");
assertEquals(new TextCursor(text, 6).line, "d\n");
assertEquals(new TextCursor(text, 7).line, "d\n");
assertEquals(new TextCursor(text, 8).line, "ef");
assertEquals(new TextCursor(text, 9).line, "ef");
});
});

describe("TextCursor.toString()", () => {
it("is elided around index", () =>
assertEquals(
new TextCursor("abcdefghijklmnopqrstuvwxyz", 13).toString(),
'TextCursor("…efghijklmnopqrstuv…", 13)',
));
});

describe("TextCursor.startsWith()", () => {
it("matches from the index", () => {
assert(new TextCursor(str).startsWith("feat"));
assert(!new TextCursor(str).startsWith("wxyz"));
assert(new TextCursor(str, 4).startsWith(": ex"));
});
});

describe("inspect", () => {
it("depicts the location", () =>
assertEquals(
stripColor(new TextCursor(str, 3).inspect()),
dedent(`
feat: example
^
`).trim(),
));

it("elides long lines", () =>
assertEquals(
stripColor(new TextCursor(longStr, 37).inspect({ maxLength })),
dedent(`
… jumps over the lazy dog then does it…
^
`).trim(),
));

describe("line numbers", () => {
it("shows when >1 lines", () => {
const cursor = new TextCursor("a\nb", 1);

assertEquals(
stripColor(cursor.inspect()),
dedent(`
[L1] a¶
^
`).trim(),
);

assertEquals(
stripColor(new TextCursor("a\nb", 2).inspect()),
dedent(`
[L2] b
^
`).trim(),
);
});

it("is configurable", () => {
const cursor = new TextCursor("a\nb", 1);

assertEquals(
stripColor(cursor.inspect({ lineNumber: false })),
dedent(`
^
`).trim(),
);

assertEquals(
stripColor(cursor.move(1).inspect({ lineNumber: false })),
dedent(`
b
^
`).trim(),
);
});
});

describe("max length", () => {
it("defaults to 40 or console width", () => {
// TODO: Consider refactoring to allow stubbing console width.
});

it("is configurable", () => {
const cursor = new TextCursor(longStr, 37);

assertEquals(
stripColor(cursor.inspect({ maxLength: 10 })),
dedent(`
…e lazy d…
^
`).trim(),
);

assertEquals(
stripColor(cursor.inspect({ maxLength: 20 })),
dedent(`
…er the lazy dog th…
^
`).trim(),
);
});
});

describe("colors", () => {
it("has color by default", () => {
const cursor = new TextCursor(str, 3);
const msg = cursor.inspect();

assert(msg !== stripColor(msg));
});

it("is configurable", () => {
const cursor = new TextCursor(str, 3);
const msg = cursor.inspect({ colors: false });

assertEquals(msg, stripColor(msg));
});
});
});
114 changes: 114 additions & 0 deletions string/TextCursor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import { blue } from "../_deps/fmt.ts";
import { consoleWidth } from "../cli/consoleSize.ts";
import { Text } from "./Text.ts";
import { elideAround } from "./elide.ts";
import { escapeTerse } from "./escape.ts";

export declare namespace TextCursor {
export interface InspectOptions {
/** The maximum length of the line to display. */
maxLength?: number;
/** Whether to colorize the output. */
colors?: boolean;
/** Whether to display the line number. */
lineNumber?: boolean;
}
}

/** Describes a string and an index within it. */
export class TextCursor extends Text {
readonly index: number;
#cache = {} as Record<number, Text.Location>;

constructor(input: string | Text | TextCursor, index?: number) {
super(input);
this.index = index ?? (input instanceof TextCursor ? input.index : 0);
}

/** The character at the current index, if extant. */
get char(): string | undefined {
return this.value.at(this.index);
}

/** The substring from the start of the string to the current index. */
get antecedent(): string {
return this.index === 0 ? "" : this.value.slice(0, this.index);
}

/** The substring from the current index to the end of the input. */
get remainder(): string {
return this.index === 0 ? this.value : this.value.slice(this.index);
}

get location(): Text.Location {
if (!this.#cache[this.index]) {
this.#cache[this.index] = this.locationAt(this.index);
}

return this.#cache[this.index];
}

get line(): string {
return this.lines[this.location.line - 1];
}

get column(): number {
return this.location.column;
}

/** @returns a new TextCursor with the same input moved by `amount`. */
move(amount: number): TextCursor {
return new TextCursor(this, this.index + amount);
}

/** Shorthand for `this.input.startsWith(str, this.index)`. */
startsWith(str: string): boolean {
return this.value.startsWith(str, this.index);
}

/** Depicts the location around the current index in the input string like so:
*
* ```
* …jumps over the lazy d…
^
* ```
*
* Uses `escapeTerse()` from the `string` module to convert special characters
* to single-character representations. Importantly:
*
* - `\t` becomes `⇥`
* - `\n` becomes `¶`
* - `\r` becomes `␍` */
inspect(opts?: TextCursor.InspectOptions): string {
let {
maxLength = consoleWidth(40),
colors = true,
lineNumber: showLineNumber,
} = opts ?? {};

showLineNumber ??= this.lines.length > 1;

const { column, line } = this.location;
const lineMarker = showLineNumber ? `[L${line}] ` : "";

maxLength = maxLength - lineMarker.length;
const [elided, offset] = elideAround(this.line, column - 1, { maxLength });
const escaped = escapeTerse(elided);
const pointerSpacing = " ".repeat(offset + lineMarker.length);

return colors
? `${blue(lineMarker)}${escaped}\n${pointerSpacing}${blue("^")}`
: `${lineMarker}${escaped}\n${pointerSpacing}^`;
}

toString(): string {
const [str] = elideAround(this.value, this.index, { maxLength: 20 });

return `TextCursor("${str}", ${this.index})`;
}

/** The function called by `console.log()` in Deno. */
[Symbol.for("Deno.customInspect")](opts: Deno.InspectOptions): string {
return this.inspect(opts);
}
}
1 change: 1 addition & 0 deletions string/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ export * from "./indent.ts";
export * from "./elide.ts";
export * from "./escape.ts";
export * from "./Text.ts";
export * from "./TextCursor.ts";

0 comments on commit 3285c61

Please sign in to comment.