diff --git a/example.ts b/example.ts index af5dff6..d4352ef 100644 --- a/example.ts +++ b/example.ts @@ -1,16 +1,21 @@ -import lines from "./read_lines.ts"; +import { linesBytes } from "./lines.ts"; import concatBytes from "./concatBytes.ts"; async function cat(filenames: string[]): Promise { - const newlinebuffer = new TextEncoder().encode("\r\n"); + const newlinebuffer = new TextEncoder().encode("\n"); for (let filename of filenames) { - const file_lines: Uint8Array[] = []; - for await (const line of lines(filename)) { - // you could transform the line buffers here - file_lines.push(line); - file_lines.push(newlinebuffer); + const file = await Deno.open(filename); + try { + const file_lines: Uint8Array[] = []; + for await (const line of linesBytes(file)) { + // you could transform the line buffers here + file_lines.push(line); + file_lines.push(newlinebuffer); + } + Deno.stdout.write(concatBytes(...file_lines)); + } finally { + file.close(); } - Deno.stdout.write(concatBytes(...file_lines)); } } diff --git a/example2.ts b/example2.ts new file mode 100644 index 0000000..634a825 --- /dev/null +++ b/example2.ts @@ -0,0 +1,20 @@ +import { lines } from "./lines.ts"; + +async function cat(filenames: string[]): Promise { + const newlinebuffer = new TextEncoder().encode("\n"); + for (let filename of filenames) { + const file = await Deno.open(filename); + try { + let fileStr = ""; + for await (const line of lines(file)) { + // you could transform the line buffers here + fileStr += line + "\n"; + } + console.log(fileStr); + } finally { + file.close(); + } + } +} + +cat(Deno.args.slice(1)); diff --git a/example_input.ts b/example_input.ts new file mode 100644 index 0000000..4807d63 --- /dev/null +++ b/example_input.ts @@ -0,0 +1,31 @@ +import { lines } from "./lines.ts"; + +/** + * Returns a python like input reader. + */ +function inputReader(r: Deno.Reader) { + const lineReader = lines(r); + /** + * Python like input reader. Returns an array containing at the first index + * the line read and at the second index a boolean indicating whether the eof + * has been reached. + */ + return async function input(output: string) { + if (output) { + Deno.stdout.write(new TextEncoder().encode(output)); + } + const { value: line, done: eof } = await lineReader.next(); + return [line, eof]; + }; +} + +(async () => { + const input = inputReader(Deno.stdin); + console.log("-- DENO ADDER --"); + // get the value and whether it's the eof + const [num1, eof] = await input("Enter a number: "); + console.log(eof); + // just get the value + const num2 = (await input("Enter another number: "))[0]; + console.log(`${num1} + ${num2} = ${Number(num1) + Number(num2)}`); +})(); diff --git a/lines.ts b/lines.ts new file mode 100644 index 0000000..f9b4b2c --- /dev/null +++ b/lines.ts @@ -0,0 +1,39 @@ +import { BufReader } from "https://deno.land/x/io/bufio.ts"; +import concatBytes from "./concatBytes.ts"; + +/** + * returns an AsyncIterable of lines from the given file in bytes. + */ +export async function* linesBytes( + reader: Deno.Reader +): AsyncIterableIterator { + const bufReader = new BufReader(reader); + let bufState; + let allBytes = []; + while (bufState !== "EOF") { + let bytes, isPrefix; + [bytes, isPrefix, bufState] = await bufReader.readLine(); + if (isPrefix) { + allBytes.push(bytes); + continue; + } + if (allBytes.length > 0) { + allBytes.push(bytes); + bytes = concatBytes(...allBytes); + allBytes = []; + } + yield bytes; + } +} + +/** + * Reads from a reader and yields each line + */ +export async function* lines( + reader: Deno.Reader +): AsyncIterableIterator { + const decoder = new TextDecoder(); + for await (const line of linesBytes(reader)) { + yield decoder.decode(line); + } +} diff --git a/read_lines.ts b/read_lines.ts deleted file mode 100644 index 15505f0..0000000 --- a/read_lines.ts +++ /dev/null @@ -1,66 +0,0 @@ -import concatBytes from "./concatBytes.ts"; - -/** - * returns an AsyncIterable of lines from the given file. - */ -export async function* file_lines(file: Deno.File): AsyncIterable { - // The Carriage-Return Line-Feed characters encoded - const CRLF = new TextEncoder().encode("\r\n"); - // the data that is batched until a new line is hit - let prevData = null; - // just the amount of bytes to read from the file at a time - let readBytes = 2 ** 8; - const data = new Uint8Array(readBytes); - - let readResult = { eof: false, nread: 0 }; - while (!readResult.eof) { - readResult = await file.read(data); - let checkData; - - if (prevData) { - checkData = concatBytes(prevData, data.slice(0, readResult.nread)); - } else { - checkData = data.slice(0, readResult.nread); - } - - let lastFind = 0; - for ( - let i = prevData ? prevData.byteLength : 0; - i < checkData.length; - i++ - ) { - if (checkData[i] == CRLF[0] && checkData[i + 1] == CRLF[1]) { - yield checkData.slice(lastFind, i); - lastFind = i + 2; - i += 2; - } else if (checkData[i] == CRLF[1]) { - yield checkData.slice(lastFind, i); - lastFind = i + 1; - i += 1; - } - } - - // save characthers that aren't a full line yet - prevData = checkData.slice(lastFind); - // maybe yield previous data here if it exceeds some maximum byte length - // then set it to null again - } - if (prevData.byteLength > 0) { - yield prevData; - } -} - -/** - * Opens a file with the file name and returns an AsyncIterable of `Uint8Array`s of lines from that file. - * Once the iterator has finished or errored it closes the file - */ -export default async function* read_lines( - fileName: string -): AsyncIterable { - const file = await Deno.open(fileName); - try { - yield* file_lines(file); - } finally { - file.close(); - } -} diff --git a/readme.md b/readme.md index d20abe1..3eda19f 100644 --- a/readme.md +++ b/readme.md @@ -1,8 +1,10 @@ -# read_lines +# lines -Main script is read_lines.ts. See the example.ts for an example to run. +Main script is [lines.ts](./lines.ts). -You can compare this with the cat implementation on deno's examples in the std library. The time spent seems to be pretty about 3x slower on my Macbook and about the same on my PC. +## Example + +See the [example.ts](./example.ts) for an example to run. You can compare this with the cat implementation on deno's examples in the std library. This scripts time spent seems to be pretty about 2x slower than deno's cat example on my Macbook Pro's native terminal. This example: @@ -22,8 +24,18 @@ Deno's cat example time deno -A https://deno.land/std/examples/cat.ts mobydick.txt ``` -You can get the [mobydick.txt from project gutenberg](https://www.gutenberg.org/files/2701/2701-0.txt) or curl it: +You can download the [mobydick.txt from project gutenberg](https://www.gutenberg.org/files/2701/2701-0.txt) or curl it (Mac/Linux) from there like so: ```sh curl https://www.gutenberg.org/files/2701/2701-0.txt -o mobydick.txt ``` + +## Input example + +The `lines` function's async iterator can be used directly like in [`./example_input.ts`](./example_input.ts). The input reader created in that file is somewhat similar in style to pythons `input` function. + +Try it out + +```sh +deno https://raw.githubusercontent.com/johnsonjo4531/read_lines/master/example_input.ts +```