From 172300dbd4256595bebfd2c067ce133447db2a1f Mon Sep 17 00:00:00 2001 From: Borewit Date: Sat, 13 Mar 2021 15:52:09 +0100 Subject: [PATCH] Update dependency @tokenizer/token v0.3.0 and abstract Buffer usage to Uint8Array --- README.md | 13 +++++++------ lib/AbstractTokenizer.ts | 22 +++++++++++----------- lib/BufferTokenizer.ts | 28 +++++++++++++++------------- lib/FileTokenizer.ts | 2 +- lib/FsPromise.ts | 4 ++-- lib/ReadStreamTokenizer.ts | 12 +++++------- lib/core.ts | 6 +++--- package.json | 4 ++-- test/test.ts | 4 ++-- yarn.lock | 13 +++++++++---- 10 files changed, 57 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 743be9eb..59071c94 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ The `strtok3` contains a few methods to turn different input into a [*tokenizer* It can read from: * A file (taking a file path as an input) * A Node.js [stream](https://nodejs.org/api/stream.html). -* A [Buffer](https://nodejs.org/api/buffer.html) +* A [Buffer](https://nodejs.org/api/buffer.html) or [Uint8Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array) * HTTP chunked transfer provided by [@tokenizer/http](https://github.com/Borewit/tokenizer-http). * Chunked [Amazon S3](https://aws.amazon.com/s3) access provided by [@tokenizer/s3](https://github.com/Borewit/tokenizer-s3). @@ -35,7 +35,8 @@ npm install strtok3 Use one of the methods to instantiate an [*abstract tokenizer*](#tokenizer): * [strtok3.fromFile](#method-strtok3fromfile) * [strtok3.fromStream](#method-strtok3fromstream) -* [strtok3.fromBuffer](#method-strtok3frombuffer) +* [strtok3.fromBuffer](#method-strtok3fromBuffer) +* [strtok3.fromUint8Array](#method-strtok3fromUint8Array) ### strtok3 methods @@ -92,10 +93,10 @@ strtok3.fromStream(stream).then(tokenizer => { #### Method `strtok3.fromBuffer()` -| Parameter | Optional | Type | Description | -|-----------|----------|----------------------------------------------|--------------------------| -| buffer | no | [Buffer](https://nodejs.org/api/buffer.html) | Buffer to read from | -| fileInfo | yes | [IFileInfo](#IFileInfo) | Provide file information | +| Parameter | Optional | Type | Description | +|------------|----------|--------------------------------------------------|----------------------------------------| +| uint8Array | no | [Uint8Array](https://nodejs.org/api/buffer.html) | Uint8Array or Buffer to read from | +| fileInfo | yes | [IFileInfo](#IFileInfo) | Provide file information | Returns a [*tokenizer*](#tokenizer) which can be used to parse the provided buffer. diff --git a/lib/AbstractTokenizer.ts b/lib/AbstractTokenizer.ts index e43d28a9..021e585f 100644 --- a/lib/AbstractTokenizer.ts +++ b/lib/AbstractTokenizer.ts @@ -18,7 +18,7 @@ export abstract class AbstractTokenizer implements ITokenizer { */ public position: number = 0; - private numBuffer = Buffer.alloc(10); + private numBuffer = new Uint8Array(8); /** * Read buffer from tokenizer @@ -26,7 +26,7 @@ export abstract class AbstractTokenizer implements ITokenizer { * @param options - Additional read options * @returns Promise with number of bytes read */ - public abstract readBuffer(buffer: Buffer | Uint8Array, options?: IReadChunkOptions): Promise; + public abstract readBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise; /** * Peek (read ahead) buffer from tokenizer @@ -34,7 +34,7 @@ export abstract class AbstractTokenizer implements ITokenizer { * @param options - Peek behaviour options * @returns Promise with number of bytes read */ - public abstract peekBuffer(buffer: Buffer | Uint8Array, options?: IReadChunkOptions): Promise; + public abstract peekBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise; /** * Read a token from the tokenizer-stream @@ -42,12 +42,12 @@ export abstract class AbstractTokenizer implements ITokenizer { * @param position - If provided, the desired position in the tokenizer-stream * @returns Promise with token data */ - public async readToken(token: IGetToken, position?: number): Promise { - const buffer = Buffer.alloc(token.len); - const len = await this.readBuffer(buffer, {position}); + public async readToken(token: IGetToken, position?: number): Promise { + const uint8Array = Buffer.alloc(token.len); + const len = await this.readBuffer(uint8Array, {position}); if (len < token.len) throw new EndOfStreamError(); - return token.get(buffer, 0); + return token.get(uint8Array, 0); } /** @@ -56,12 +56,12 @@ export abstract class AbstractTokenizer implements ITokenizer { * @param position - Offset where to begin reading within the file. If position is null, data will be read from the current file position. * @returns Promise with token data */ - public async peekToken(token: IGetToken, position: number = this.position): Promise { - const buffer = Buffer.alloc(token.len); - const len = await this.peekBuffer(buffer, {position}); + public async peekToken(token: IGetToken, position: number = this.position): Promise { + const uint8Array = Buffer.alloc(token.len); + const len = await this.peekBuffer(uint8Array, {position}); if (len < token.len) throw new EndOfStreamError(); - return token.get(buffer, 0); + return token.get(uint8Array, 0); } /** diff --git a/lib/BufferTokenizer.ts b/lib/BufferTokenizer.ts index 21517152..72126c83 100644 --- a/lib/BufferTokenizer.ts +++ b/lib/BufferTokenizer.ts @@ -12,9 +12,9 @@ export class BufferTokenizer implements ITokenizer { * @param buffer - Buffer to tokenize * @param fileInfo - Pass additional file information to the tokenizer */ - constructor(private buffer: Buffer, fileInfo?: IFileInfo) { + constructor(private uint8Array: Uint8Array, fileInfo?: IFileInfo) { this.fileInfo = fileInfo ? fileInfo : {}; - this.fileInfo.size = this.fileInfo.size ? this.fileInfo.size : buffer.length; + this.fileInfo.size = this.fileInfo.size ? this.fileInfo.size : uint8Array.length; } /** @@ -23,7 +23,7 @@ export class BufferTokenizer implements ITokenizer { * @param options - Read behaviour options * @returns {Promise} */ - public async readBuffer(buffer: Buffer | Uint8Array, options?: IReadChunkOptions): Promise { + public async readBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise { if (options && options.position) { if (options.position < this.position) { @@ -40,14 +40,14 @@ export class BufferTokenizer implements ITokenizer { /** * Peek (read ahead) buffer from tokenizer - * @param buffer + * @param uint8Array * @param options - Read behaviour options * @returns {Promise} */ - public async peekBuffer(buffer: Buffer | Uint8Array, options?: IReadChunkOptions): Promise { + public async peekBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise { let offset = 0; - let length = buffer.length; + let length = uint8Array.length; let position = this.position; if (options) { @@ -73,13 +73,15 @@ export class BufferTokenizer implements ITokenizer { position = position || this.position; if (!length) { - length = buffer.length; + length = uint8Array.length; } - const bytes2read = Math.min(this.buffer.length - position, length); + const bytes2read = Math.min(this.uint8Array.length - position, length); if ((!options || !options.mayBeLess) && bytes2read < length) { throw new EndOfStreamError(); } else { - this.buffer.copy(buffer, offset, position, position + bytes2read); + // old: this.buffer.copy(buffer, offset, position, position + bytes2read); + // uint8Array.set(this.uint8Array.subarray(position, position + bytes2read), offset); + Buffer.from(this.uint8Array).copy(uint8Array, offset, position, position + bytes2read); return bytes2read; } } @@ -91,16 +93,16 @@ export class BufferTokenizer implements ITokenizer { this.position += token.len; return tv; } catch (err) { - this.position += this.buffer.length - position; + this.position += this.uint8Array.length - position; throw err; } } public async peekToken(token: IGetToken, position: number = this.position): Promise { - if (this.buffer.length - position < token.len) { + if (this.uint8Array.length - position < token.len) { throw new EndOfStreamError(); } - return token.get(this.buffer, position); + return token.get(this.uint8Array, position); } public async readNumber(token: IToken): Promise { @@ -115,7 +117,7 @@ export class BufferTokenizer implements ITokenizer { * @return actual number of bytes ignored */ public async ignore(length: number): Promise { - const bytesIgnored = Math.min(this.buffer.length - this.position, length); + const bytesIgnored = Math.min(this.uint8Array.length - this.position, length); this.position += bytesIgnored; return bytesIgnored; } diff --git a/lib/FileTokenizer.ts b/lib/FileTokenizer.ts index 9008fd47..fed84d07 100644 --- a/lib/FileTokenizer.ts +++ b/lib/FileTokenizer.ts @@ -54,7 +54,7 @@ export class FileTokenizer extends AbstractTokenizer { * @param options - Read behaviour options * @returns Promise number of bytes read */ - public async peekBuffer(buffer: Buffer, options?: IReadChunkOptions): Promise { + public async peekBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise { let offset = 0; let length = buffer.length; diff --git a/lib/FsPromise.ts b/lib/FsPromise.ts index 5146714c..a75ba5dd 100644 --- a/lib/FsPromise.ts +++ b/lib/FsPromise.ts @@ -6,7 +6,7 @@ import * as fs from 'fs'; export interface IReadResult { bytesRead: number, - buffer: Buffer + buffer: Uint8Array } export const pathExists = fs.existsSync; @@ -45,7 +45,7 @@ export async function open(path: fs.PathLike, mode?: string): Promise { }); } -export async function read(fd: number, buffer: Buffer, offset: number, length: number, position: number): Promise { +export async function read(fd: number, buffer: Uint8Array, offset: number, length: number, position: number): Promise { return new Promise((resolve, reject) => { fs.read(fd, buffer, offset, length, position, (err, bytesRead, _buffer) => { if (err) diff --git a/lib/ReadStreamTokenizer.ts b/lib/ReadStreamTokenizer.ts index a0bb08dc..ad13a58d 100644 --- a/lib/ReadStreamTokenizer.ts +++ b/lib/ReadStreamTokenizer.ts @@ -2,9 +2,7 @@ import { AbstractTokenizer } from './AbstractTokenizer'; import { EndOfStreamError, StreamReader } from 'peek-readable'; import * as Stream from 'stream'; import { IFileInfo, IReadChunkOptions } from './types'; -// import * as _debug from 'debug'; -// const debug = _debug('strtok3:ReadStreamTokenizer'); const maxBufferSize = 256000; export class ReadStreamTokenizer extends AbstractTokenizer { @@ -30,7 +28,7 @@ export class ReadStreamTokenizer extends AbstractTokenizer { * @param options - Read behaviour options * @returns Promise with number of bytes read */ - public async readBuffer(buffer: Buffer | Uint8Array, options?: IReadChunkOptions): Promise { + public async readBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise { // const _offset = position ? position : this.position; // debug(`readBuffer ${_offset}...${_offset + length - 1}`); @@ -78,7 +76,7 @@ export class ReadStreamTokenizer extends AbstractTokenizer { * @param options - Read behaviour options * @returns Promise with number of bytes peeked */ - public async peekBuffer(buffer: Buffer | Uint8Array, options?: IReadChunkOptions): Promise { + public async peekBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise { // const _offset = position ? position : this.position; // debug(`peek ${_offset}...${_offset + length - 1}`); @@ -101,9 +99,9 @@ export class ReadStreamTokenizer extends AbstractTokenizer { if (options.position) { const skipBytes = options.position - this.position; if (skipBytes > 0) { - const skipBuffer = Buffer.alloc(length + skipBytes); + const skipBuffer = new Uint8Array(length + skipBytes); bytesRead = await this.peekBuffer(skipBuffer, {mayBeLess: options.mayBeLess}); - skipBuffer.copy(buffer, offset, skipBytes); + buffer.set(skipBuffer.subarray(skipBytes), offset); return bytesRead - skipBytes; } else if (skipBytes < 0) { throw new Error('Cannot peek from a negative offset in a stream'); @@ -128,7 +126,7 @@ export class ReadStreamTokenizer extends AbstractTokenizer { public async ignore(length: number): Promise { // debug(`ignore ${this.position}...${this.position + length - 1}`); const bufSize = Math.min(maxBufferSize, length); - const buf = Buffer.alloc(bufSize); + const buf = new Uint8Array(bufSize); let totBytesRead = 0; while (totBytesRead < length) { const remaining = length - totBytesRead; diff --git a/lib/core.ts b/lib/core.ts index 65302bae..49d65fad 100644 --- a/lib/core.ts +++ b/lib/core.ts @@ -20,10 +20,10 @@ export function fromStream(stream: Stream.Readable, fileInfo?: IFileInfo): ReadS /** * Construct ReadStreamTokenizer from given Buffer. - * @param buffer - Buffer to tokenize + * @param uint8Array - Uint8Array to tokenize * @param fileInfo - Pass additional file information to the tokenizer * @returns BufferTokenizer */ -export function fromBuffer(buffer: Buffer, fileInfo?: IFileInfo): BufferTokenizer { - return new BufferTokenizer(buffer, fileInfo); +export function fromBuffer(uint8Array: Uint8Array, fileInfo?: IFileInfo): BufferTokenizer { + return new BufferTokenizer(uint8Array, fileInfo); } diff --git a/package.json b/package.json index dd4a4b81..f9604aad 100644 --- a/package.json +++ b/package.json @@ -43,6 +43,7 @@ "url": "https://github.com/Borewit/strtok3/issues" }, "devDependencies": { + "@tokenizer/token": "^0.3.0", "@types/chai": "^4.2.21", "@types/debug": "^4.1.6", "@types/mocha": "^8.2.3", @@ -67,8 +68,7 @@ "typescript": "^4.3.5" }, "dependencies": { - "@tokenizer/token": "^0.1.1", - "peek-readable": "^3.1.4" + "peek-readable": "^4.0.0" }, "keywords": [ "tokenizer", diff --git a/test/test.ts b/test/test.ts index 4edddfe4..af65a746 100644 --- a/test/test.ts +++ b/test/test.ts @@ -18,7 +18,7 @@ function getResourcePath(testFile: string) { async function getTokenizerWithData(testData: string, test: ITokenizerTest): Promise { const testPath = getResourcePath('tmp.dat'); - await fs.writeFile(testPath, Buffer.from(testData, 'binary')); + await fs.writeFile(testPath, Buffer.from(testData, 'latin1')); return test.loadTokenizer('tmp.dat'); } @@ -642,7 +642,7 @@ for (const tokenizerType of tokenizerTests) { const rst = await getTokenizerWithData('\x05peter', tokenizerType); // should decode string from chunk assert.strictEqual(rst.position, 0); - const value = await rst.peekToken(new Token.StringType(5, 'utf-8'), 1); + const value = await rst.peekToken(new Token.StringType(5, 'latin1'), 1); assert.equal(typeof value, 'string'); assert.equal(value, 'peter'); assert.strictEqual(rst.position, 0); diff --git a/yarn.lock b/yarn.lock index 490cfa09..c19c55b2 100644 --- a/yarn.lock +++ b/yarn.lock @@ -158,6 +158,11 @@ resolved "https://registry.yarnpkg.com/@tokenizer/token/-/token-0.1.1.tgz#f0d92c12f87079ddfd1b29f614758b9696bc29e3" integrity sha512-XO6INPbZCxdprl+9qa/AAbFFOMzzwqYxpjPgLICrMD6C2FCw6qfJOPcBk6JqqPLSaZ/Qx87qn4rpPmPMwaAK6w== +"@tokenizer/token@^0.3.0": + version "0.3.0" + resolved "https://registry.yarnpkg.com/@tokenizer/token/-/token-0.3.0.tgz#fe98a93fe789247e998c75e74e9c7c63217aa276" + integrity sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A== + "@tsconfig/node10@^1.0.7": version "1.0.7" resolved "https://registry.yarnpkg.com/@tsconfig/node10/-/node10-1.0.7.tgz#1eb1de36c73478a2479cc661ef5af1c16d86d606" @@ -2717,10 +2722,10 @@ pathval@^1.1.1: resolved "https://registry.yarnpkg.com/pathval/-/pathval-1.1.1.tgz#8534e77a77ce7ac5a2512ea21e0fdb8fcf6c3d8d" integrity sha512-Dp6zGqpTdETdR63lehJYPeIOqpiNBNtc7BpWSLrOje7UaIsE5aY92r/AunQA7rsXvet3lrJ3JnZX29UPTKXyKQ== -peek-readable@^3.1.4: - version "3.1.4" - resolved "https://registry.yarnpkg.com/peek-readable/-/peek-readable-3.1.4.tgz#f5c3b41a4eeb63a1322c4131f0b5bac7105b892e" - integrity sha512-DX7ec7frSMtCWw+zMd27f66hcxIz/w9LQTY2RflB4WNHCVPAye1pJiP2t3gvaaOhu7IOhtPbHw8MemMj+F5lrg== +peek-readable@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/peek-readable/-/peek-readable-4.0.0.tgz#b024ef391c86136eba0ae9df3ff4f966a09e9a7e" + integrity sha512-kLbU4cz6h86poGVBKgAVMpFmD47nX04fPPQNKnv9fuj+IJZYkEBjsYAVu5nDbZWx0ZsWwWlMzeG90zQa5KLBaA== performance-now@^2.1.0: version "2.1.0"