Borewit · Borewit · Jul 16, 2021 · Mar 13, 2021
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ The `strtok3` contains a few methods to turn different input into a [*tokenizer*
 It can read from:
 *   A file (taking a file path as an input)
 *   A Node.js [stream](https://nodejs.org/api/stream.html).
-*   A [Buffer](https://nodejs.org/api/buffer.html)
+*   A [Buffer](https://nodejs.org/api/buffer.html) or [Uint8Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array)
 *   HTTP chunked transfer provided by [@tokenizer/http](https://github.com/Borewit/tokenizer-http).
 *   Chunked [Amazon S3](https://aws.amazon.com/s3) access provided by [@tokenizer/s3](https://github.com/Borewit/tokenizer-s3).
 
@@ -35,7 +35,8 @@ npm install strtok3
 Use one of the methods to instantiate an [*abstract tokenizer*](#tokenizer):
 *   [strtok3.fromFile](#method-strtok3fromfile)
 *   [strtok3.fromStream](#method-strtok3fromstream)
-*   [strtok3.fromBuffer](#method-strtok3frombuffer)
+*   [strtok3.fromBuffer](#method-strtok3fromBuffer)
+*   [strtok3.fromUint8Array](#method-strtok3fromUint8Array)
 
 ### strtok3 methods
 
@@ -92,10 +93,10 @@ strtok3.fromStream(stream).then(tokenizer => {
 
 #### Method `strtok3.fromBuffer()`
 
-| Parameter | Optional | Type                                         | Description              |
-|-----------|----------|----------------------------------------------|--------------------------|
-| buffer    | no       | [Buffer](https://nodejs.org/api/buffer.html) | Buffer to read from      |
-| fileInfo  | yes      | [IFileInfo](#IFileInfo)                      | Provide file information |
+| Parameter  | Optional | Type                                             | Description                            |
+|------------|----------|--------------------------------------------------|----------------------------------------|
+| uint8Array | no       | [Uint8Array](https://nodejs.org/api/buffer.html) | Uint8Array or Buffer to read from      |
+| fileInfo   | yes      | [IFileInfo](#IFileInfo)                          | Provide file information               |
 
 Returns a [*tokenizer*](#tokenizer) which can be used to parse the provided buffer.
 

diff --git a/lib/AbstractTokenizer.ts b/lib/AbstractTokenizer.ts
@@ -18,36 +18,36 @@ export abstract class AbstractTokenizer implements ITokenizer {
    */
   public position: number = 0;
 
-  private numBuffer = Buffer.alloc(10);
+  private numBuffer = new Uint8Array(8);
 
   /**
    * Read buffer from tokenizer
    * @param buffer - Target buffer to fill with data read from the tokenizer-stream
    * @param options - Additional read options
    * @returns Promise with number of bytes read
    */
-  public abstract readBuffer(buffer: Buffer | Uint8Array, options?: IReadChunkOptions): Promise<number>;
+  public abstract readBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise<number>;
 
   /**
    * Peek (read ahead) buffer from tokenizer
    * @param buffer - Target buffer to fill with data peek from the tokenizer-stream
    * @param options - Peek behaviour options
    * @returns Promise with number of bytes read
    */
-  public abstract peekBuffer(buffer: Buffer | Uint8Array, options?: IReadChunkOptions): Promise<number>;
+  public abstract peekBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise<number>;
 
   /**
    * Read a token from the tokenizer-stream
    * @param token - The token to read
    * @param position - If provided, the desired position in the tokenizer-stream
    * @returns Promise with token data
    */
-  public async readToken<T>(token: IGetToken<T>, position?: number): Promise<T> {
-    const buffer = Buffer.alloc(token.len);
-    const len = await this.readBuffer(buffer, {position});
+  public async readToken<Value>(token: IGetToken<Value>, position?: number): Promise<Value> {
+    const uint8Array = Buffer.alloc(token.len);
+    const len = await this.readBuffer(uint8Array, {position});
     if (len < token.len)
       throw new EndOfStreamError();
-    return token.get(buffer, 0);
+    return token.get(uint8Array, 0);
   }
 
   /**
@@ -56,12 +56,12 @@ export abstract class AbstractTokenizer implements ITokenizer {
    * @param position - Offset where to begin reading within the file. If position is null, data will be read from the current file position.
    * @returns Promise with token data
    */
-  public async peekToken<T>(token: IGetToken<T>, position: number = this.position): Promise<T> {
-    const buffer = Buffer.alloc(token.len);
-    const len = await this.peekBuffer(buffer, {position});
+  public async peekToken<Value>(token: IGetToken<Value>, position: number = this.position): Promise<Value> {
+    const uint8Array = Buffer.alloc(token.len);
+    const len = await this.peekBuffer(uint8Array, {position});
     if (len < token.len)
       throw new EndOfStreamError();
-    return token.get(buffer, 0);
+    return token.get(uint8Array, 0);
   }
 
   /**

diff --git a/lib/BufferTokenizer.ts b/lib/BufferTokenizer.ts
@@ -12,9 +12,9 @@ export class BufferTokenizer implements ITokenizer {
    * @param buffer - Buffer to tokenize
    * @param fileInfo - Pass additional file information to the tokenizer
    */
-  constructor(private buffer: Buffer, fileInfo?: IFileInfo) {
+  constructor(private uint8Array: Uint8Array, fileInfo?: IFileInfo) {
     this.fileInfo = fileInfo ? fileInfo : {};
-    this.fileInfo.size = this.fileInfo.size ?  this.fileInfo.size : buffer.length;
+    this.fileInfo.size = this.fileInfo.size ?  this.fileInfo.size : uint8Array.length;
   }
 
   /**
@@ -23,7 +23,7 @@ export class BufferTokenizer implements ITokenizer {
    * @param options - Read behaviour options
    * @returns {Promise<number>}
    */
-  public async readBuffer(buffer: Buffer | Uint8Array, options?: IReadChunkOptions): Promise<number> {
+  public async readBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise<number> {
 
     if (options && options.position) {
       if (options.position < this.position) {
@@ -40,14 +40,14 @@ export class BufferTokenizer implements ITokenizer {
 
   /**
    * Peek (read ahead) buffer from tokenizer
-   * @param buffer
+   * @param uint8Array
    * @param options - Read behaviour options
    * @returns {Promise<number>}
    */
-  public async peekBuffer(buffer: Buffer | Uint8Array, options?: IReadChunkOptions): Promise<number> {
+  public async peekBuffer(uint8Array: Uint8Array, options?: IReadChunkOptions): Promise<number> {
 
     let offset = 0;
-    let length = buffer.length;
+    let length = uint8Array.length;
     let position = this.position;
 
     if (options) {
@@ -73,13 +73,15 @@ export class BufferTokenizer implements ITokenizer {
 
     position = position || this.position;
     if (!length) {
-      length = buffer.length;
+      length = uint8Array.length;
     }
-    const bytes2read = Math.min(this.buffer.length - position, length);
+    const bytes2read = Math.min(this.uint8Array.length - position, length);
     if ((!options || !options.mayBeLess) && bytes2read < length) {
       throw new EndOfStreamError();
     } else {
-      this.buffer.copy(buffer, offset, position, position + bytes2read);
+      // old: this.buffer.copy(buffer, offset, position, position + bytes2read);
+      // uint8Array.set(this.uint8Array.subarray(position, position + bytes2read), offset);
+      Buffer.from(this.uint8Array).copy(uint8Array, offset, position, position + bytes2read);
       return bytes2read;
     }
   }
@@ -91,16 +93,16 @@ export class BufferTokenizer implements ITokenizer {
       this.position += token.len;
       return tv;
     } catch (err) {
-      this.position += this.buffer.length - position;
+      this.position += this.uint8Array.length - position;
       throw err;
     }
   }
 
   public async peekToken<T>(token: IGetToken<T>, position: number = this.position): Promise<T> {
-    if (this.buffer.length - position < token.len) {
+    if (this.uint8Array.length - position < token.len) {
       throw new EndOfStreamError();
     }
-    return token.get(this.buffer, position);
+    return token.get(this.uint8Array, position);
   }
 
   public async readNumber(token: IToken<number>): Promise<number> {
@@ -115,7 +117,7 @@ export class BufferTokenizer implements ITokenizer {
    * @return actual number of bytes ignored
    */
   public async ignore(length: number): Promise<number> {
-    const bytesIgnored = Math.min(this.buffer.length - this.position, length);
+    const bytesIgnored = Math.min(this.uint8Array.length - this.position, length);
     this.position += bytesIgnored;
     return bytesIgnored;
   }

diff --git a/lib/FileTokenizer.ts b/lib/FileTokenizer.ts
@@ -54,7 +54,7 @@ export class FileTokenizer extends AbstractTokenizer {
    * @param options - Read behaviour options
    * @returns Promise number of bytes read
    */
-  public async peekBuffer(buffer: Buffer, options?: IReadChunkOptions): Promise<number> {
+  public async peekBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise<number> {
 
     let offset = 0;
     let length = buffer.length;

diff --git a/lib/FsPromise.ts b/lib/FsPromise.ts
@@ -6,7 +6,7 @@ import * as fs from 'fs';
 
 export interface IReadResult {
   bytesRead: number,
-  buffer: Buffer
+  buffer: Uint8Array
 }
 
 export const pathExists = fs.existsSync;
@@ -45,7 +45,7 @@ export async function open(path: fs.PathLike, mode?: string): Promise<number> {
   });
 }
 
-export async function read(fd: number, buffer: Buffer, offset: number, length: number, position: number): Promise<IReadResult> {
+export async function read(fd: number, buffer: Uint8Array, offset: number, length: number, position: number): Promise<IReadResult> {
   return new Promise<IReadResult>((resolve, reject) => {
     fs.read(fd, buffer, offset, length, position, (err, bytesRead, _buffer) => {
       if (err)

diff --git a/lib/ReadStreamTokenizer.ts b/lib/ReadStreamTokenizer.ts
@@ -2,9 +2,7 @@ import { AbstractTokenizer } from './AbstractTokenizer';
 import { EndOfStreamError, StreamReader } from 'peek-readable';
 import * as Stream from 'stream';
 import { IFileInfo, IReadChunkOptions } from './types';
-// import * as _debug from 'debug';
 
-// const debug = _debug('strtok3:ReadStreamTokenizer');
 const maxBufferSize = 256000;
 
 export class ReadStreamTokenizer extends AbstractTokenizer {
@@ -30,7 +28,7 @@ export class ReadStreamTokenizer extends AbstractTokenizer {
    * @param options - Read behaviour options
    * @returns Promise with number of bytes read
    */
-  public async readBuffer(buffer: Buffer | Uint8Array, options?: IReadChunkOptions): Promise<number> {
+  public async readBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise<number> {
 
     // const _offset = position ? position : this.position;
     // debug(`readBuffer ${_offset}...${_offset + length - 1}`);
@@ -78,7 +76,7 @@ export class ReadStreamTokenizer extends AbstractTokenizer {
    * @param options - Read behaviour options
    * @returns Promise with number of bytes peeked
    */
-  public async peekBuffer(buffer: Buffer | Uint8Array, options?: IReadChunkOptions): Promise<number> {
+  public async peekBuffer(buffer: Uint8Array, options?: IReadChunkOptions): Promise<number> {
 
     // const _offset = position ? position : this.position;
     // debug(`peek ${_offset}...${_offset + length - 1}`);
@@ -101,9 +99,9 @@ export class ReadStreamTokenizer extends AbstractTokenizer {
       if (options.position) {
         const skipBytes = options.position - this.position;
         if (skipBytes > 0) {
-          const skipBuffer = Buffer.alloc(length + skipBytes);
+          const skipBuffer = new Uint8Array(length + skipBytes);
           bytesRead = await this.peekBuffer(skipBuffer, {mayBeLess: options.mayBeLess});
-          skipBuffer.copy(buffer, offset, skipBytes);
+          buffer.set(skipBuffer.subarray(skipBytes), offset);
           return bytesRead - skipBytes;
         } else if (skipBytes < 0) {
           throw new Error('Cannot peek from a negative offset in a stream');
@@ -128,7 +126,7 @@ export class ReadStreamTokenizer extends AbstractTokenizer {
   public async ignore(length: number): Promise<number> {
     // debug(`ignore ${this.position}...${this.position + length - 1}`);
     const bufSize = Math.min(maxBufferSize, length);
-    const buf = Buffer.alloc(bufSize);
+    const buf = new Uint8Array(bufSize);
     let totBytesRead = 0;
     while (totBytesRead < length) {
       const remaining = length - totBytesRead;

diff --git a/lib/core.ts b/lib/core.ts
@@ -20,10 +20,10 @@ export function fromStream(stream: Stream.Readable, fileInfo?: IFileInfo): ReadS
 
 /**
  * Construct ReadStreamTokenizer from given Buffer.
- * @param buffer - Buffer to tokenize
+ * @param uint8Array - Uint8Array to tokenize
  * @param fileInfo - Pass additional file information to the tokenizer
  * @returns BufferTokenizer
  */
-export function fromBuffer(buffer: Buffer, fileInfo?: IFileInfo): BufferTokenizer {
-  return new BufferTokenizer(buffer, fileInfo);
+export function fromBuffer(uint8Array: Uint8Array, fileInfo?: IFileInfo): BufferTokenizer {
+  return new BufferTokenizer(uint8Array, fileInfo);
 }
diff --git a/package.json b/package.json
@@ -43,6 +43,7 @@
     "url": "https://github.com/Borewit/strtok3/issues"
   },
   "devDependencies": {
+    "@tokenizer/token": "^0.3.0",
     "@types/chai": "^4.2.21",
     "@types/debug": "^4.1.6",
     "@types/mocha": "^8.2.3",
@@ -67,8 +68,7 @@
     "typescript": "^4.3.5"
   },
   "dependencies": {
-    "@tokenizer/token": "^0.1.1",
-    "peek-readable": "^3.1.4"
+    "peek-readable": "^4.0.0"
   },
   "keywords": [
     "tokenizer",

diff --git a/test/test.ts b/test/test.ts
@@ -18,7 +18,7 @@ function getResourcePath(testFile: string) {
 
 async function getTokenizerWithData(testData: string, test: ITokenizerTest): Promise<strtok3.ITokenizer> {
   const testPath = getResourcePath('tmp.dat');
-  await fs.writeFile(testPath, Buffer.from(testData, 'binary'));
+  await fs.writeFile(testPath, Buffer.from(testData, 'latin1'));
   return test.loadTokenizer('tmp.dat');
 }
 
@@ -642,7 +642,7 @@ for (const tokenizerType of tokenizerTests) {
       const rst = await getTokenizerWithData('\x05peter', tokenizerType);
       // should decode string from chunk
       assert.strictEqual(rst.position, 0);
-      const value = await rst.peekToken(new Token.StringType(5, 'utf-8'), 1);
+      const value = await rst.peekToken(new Token.StringType(5, 'latin1'), 1);
       assert.equal(typeof value, 'string');
       assert.equal(value, 'peter');
       assert.strictEqual(rst.position, 0);

diff --git a/yarn.lock b/yarn.lock
@@ -158,6 +158,11 @@
   resolved "https://registry.yarnpkg.com/@tokenizer/token/-/token-0.1.1.tgz#f0d92c12f87079ddfd1b29f614758b9696bc29e3"
   integrity sha512-XO6INPbZCxdprl+9qa/AAbFFOMzzwqYxpjPgLICrMD6C2FCw6qfJOPcBk6JqqPLSaZ/Qx87qn4rpPmPMwaAK6w==
 
+"@tokenizer/token@^0.3.0":
+  version "0.3.0"
+  resolved "https://registry.yarnpkg.com/@tokenizer/token/-/token-0.3.0.tgz#fe98a93fe789247e998c75e74e9c7c63217aa276"
+  integrity sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==
+
 "@tsconfig/node10@^1.0.7":
   version "1.0.7"
   resolved "https://registry.yarnpkg.com/@tsconfig/node10/-/node10-1.0.7.tgz#1eb1de36c73478a2479cc661ef5af1c16d86d606"
@@ -2717,10 +2722,10 @@ pathval@^1.1.1:
   resolved "https://registry.yarnpkg.com/pathval/-/pathval-1.1.1.tgz#8534e77a77ce7ac5a2512ea21e0fdb8fcf6c3d8d"
   integrity sha512-Dp6zGqpTdETdR63lehJYPeIOqpiNBNtc7BpWSLrOje7UaIsE5aY92r/AunQA7rsXvet3lrJ3JnZX29UPTKXyKQ==
 
-peek-readable@^3.1.4:
-  version "3.1.4"
-  resolved "https://registry.yarnpkg.com/peek-readable/-/peek-readable-3.1.4.tgz#f5c3b41a4eeb63a1322c4131f0b5bac7105b892e"
-  integrity sha512-DX7ec7frSMtCWw+zMd27f66hcxIz/w9LQTY2RflB4WNHCVPAye1pJiP2t3gvaaOhu7IOhtPbHw8MemMj+F5lrg==
+peek-readable@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/peek-readable/-/peek-readable-4.0.0.tgz#b024ef391c86136eba0ae9df3ff4f966a09e9a7e"
+  integrity sha512-kLbU4cz6h86poGVBKgAVMpFmD47nX04fPPQNKnv9fuj+IJZYkEBjsYAVu5nDbZWx0ZsWwWlMzeG90zQa5KLBaA==
 
 performance-now@^2.1.0:
   version "2.1.0"