Skip to content

Commit

Permalink
Replace Buffer usage with Uint8Array (#633)
Browse files Browse the repository at this point in the history
Co-authored-by: Borewit <Borewit@users.noreply.github.com>
  • Loading branch information
bjornstar and Borewit committed Jul 6, 2024
1 parent 37233b1 commit 00e051b
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 58 deletions.
4 changes: 2 additions & 2 deletions core.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -323,13 +323,13 @@ export type ReadableStreamWithFileType = ReadableStream & {
};

/**
Detect the file type of a `Buffer`, `Uint8Array`, or `ArrayBuffer`.
Detect the file type of a `Uint8Array`, or `ArrayBuffer`.
The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
If file access is available, it is recommended to use `.fromFile()` instead.
@param buffer - An Uint8Array or Buffer representing file data. It works best if the buffer contains the entire file. It may work with a smaller portion as well.
@param buffer - An Uint8Array or ArrayBuffer representing file data. It works best if the buffer contains the entire file. It may work with a smaller portion as well.
@returns The detected file type, or `undefined` when there is no match.
*/
export function fileTypeFromBuffer(buffer: Uint8Array | ArrayBuffer): Promise<FileTypeResult | undefined>;
Expand Down
60 changes: 34 additions & 26 deletions core.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {Buffer} from 'node:buffer';
import * as Token from 'token-types';
import * as strtok3 from 'strtok3/core';
import {includes, indexOf, getUintBE} from 'uint8array-extras';
import {
stringToBytes,
tarHeaderChecksumMatches,
Expand Down Expand Up @@ -75,7 +75,7 @@ export class FileTypeParser {

async fromBuffer(input) {
if (!(input instanceof Uint8Array || input instanceof ArrayBuffer)) {
throw new TypeError(`Expected the \`input\` argument to be of type \`Uint8Array\` or \`Buffer\` or \`ArrayBuffer\`, got \`${typeof input}\``);
throw new TypeError(`Expected the \`input\` argument to be of type \`Uint8Array\` or \`ArrayBuffer\`, got \`${typeof input}\``);
}

const buffer = input instanceof Uint8Array ? input : new Uint8Array(input);
Expand Down Expand Up @@ -116,7 +116,7 @@ export class FileTypeParser {
const outputStream = stream.pipeline ? stream.pipeline(readableStream, pass, () => {}) : readableStream.pipe(pass);

// Read the input stream and detect the filetype
const chunk = readableStream.read(sampleSize) ?? readableStream.read() ?? Buffer.alloc(0);
const chunk = readableStream.read(sampleSize) ?? readableStream.read() ?? new Uint8Array(0);
try {
pass.fileType = await this.fromBuffer(chunk);
} catch (error) {
Expand Down Expand Up @@ -145,7 +145,7 @@ export class FileTypeParser {
}

async parse(tokenizer) {
this.buffer = Buffer.alloc(minimumBytes);
this.buffer = new Uint8Array(minimumBytes);

// Keep reading until EOF if the file size is unknown.
if (tokenizer.fileInfo.size === undefined) {
Expand Down Expand Up @@ -372,12 +372,14 @@ export class FileTypeParser {
while (tokenizer.position + 30 < tokenizer.fileInfo.size) {
await tokenizer.readBuffer(this.buffer, {length: 30});

const view = new DataView(this.buffer.buffer);

// https://en.wikipedia.org/wiki/Zip_(file_format)#File_headers
const zipHeader = {
compressedSize: this.buffer.readUInt32LE(18),
uncompressedSize: this.buffer.readUInt32LE(22),
filenameLength: this.buffer.readUInt16LE(26),
extraFieldLength: this.buffer.readUInt16LE(28),
compressedSize: view.getUint32(18, true),
uncompressedSize: view.getUint32(22, true),
filenameLength: view.getUint16(26, true),
extraFieldLength: view.getUint16(28, true),
};

zipHeader.filename = await tokenizer.readToken(new Token.StringType(zipHeader.filenameLength, 'utf-8'));
Expand Down Expand Up @@ -472,7 +474,8 @@ export class FileTypeParser {
while (nextHeaderIndex < 0 && (tokenizer.position < tokenizer.fileInfo.size)) {
await tokenizer.peekBuffer(this.buffer, {mayBeLess: true});

nextHeaderIndex = this.buffer.indexOf('504B0304', 0, 'hex');
nextHeaderIndex = indexOf(this.buffer, new Uint8Array([0x50, 0x4B, 0x03, 0x04]));

// Move position to the next header if found, skip the whole buffer otherwise
await tokenizer.ignore(nextHeaderIndex >= 0 ? nextHeaderIndex : this.buffer.length);
}
Expand All @@ -495,7 +498,7 @@ export class FileTypeParser {
if (this.checkString('OggS')) {
// This is an OGG container
await tokenizer.ignore(28);
const type = Buffer.alloc(8);
const type = new Uint8Array(8);
await tokenizer.readBuffer(type);

// Needs to be before `ogg` check
Expand Down Expand Up @@ -576,7 +579,7 @@ export class FileTypeParser {
) {
// They all can have MIME `video/mp4` except `application/mp4` special-case which is hard to detect.
// For some cases, we're specific, everything else falls to `video/mp4` with `mp4` extension.
const brandMajor = this.buffer.toString('binary', 8, 12).replace('\0', ' ').trim();
const brandMajor = new Token.StringType(4, 'latin1').get(this.buffer, 8).replace('\0', ' ').trim();
switch (brandMajor) {
case 'avif':
case 'avis':
Expand Down Expand Up @@ -706,11 +709,11 @@ export class FileTypeParser {
try {
await tokenizer.ignore(1350);
const maxBufferSize = 10 * 1024 * 1024;
const buffer = Buffer.alloc(Math.min(maxBufferSize, tokenizer.fileInfo.size));
const buffer = new Uint8Array(Math.min(maxBufferSize, tokenizer.fileInfo.size));
await tokenizer.readBuffer(buffer, {mayBeLess: true});

// Check if this is an Adobe Illustrator file
if (buffer.includes(Buffer.from('AIPrivateData'))) {
if (includes(buffer, new TextEncoder().encode('AIPrivateData'))) {
return {
ext: 'ai',
mime: 'application/postscript',
Expand Down Expand Up @@ -765,35 +768,39 @@ export class FileTypeParser {
async function readField() {
const msb = await tokenizer.peekNumber(Token.UINT8);
let mask = 0x80;
let ic = 0; // 0 = A, 1 = B, 2 = C, 3
// = D
let ic = 0; // 0 = A, 1 = B, 2 = C, 3 = D

while ((msb & mask) === 0 && mask !== 0) {
++ic;
mask >>= 1;
}

const id = Buffer.alloc(ic + 1);
const id = new Uint8Array(ic + 1);
await tokenizer.readBuffer(id);
return id;
}

async function readElement() {
const id = await readField();
const idField = await readField();
const lengthField = await readField();

lengthField[0] ^= 0x80 >> (lengthField.length - 1);
const nrLength = Math.min(6, lengthField.length); // JavaScript can max read 6 bytes integer

const idView = new DataView(idField.buffer);
const lengthView = new DataView(lengthField.buffer, lengthField.length - nrLength, nrLength);

return {
id: id.readUIntBE(0, id.length),
len: lengthField.readUIntBE(lengthField.length - nrLength, nrLength),
id: getUintBE(idView),
len: getUintBE(lengthView),
};
}

async function readChildren(children) {
while (children > 0) {
const element = await readElement();
if (element.id === 0x42_82) {
const rawValue = await tokenizer.readToken(new Token.StringType(element.len, 'utf-8'));
const rawValue = await tokenizer.readToken(new Token.StringType(element.len));
return rawValue.replaceAll(/\00.*$/g, ''); // Return DocType
}

Expand Down Expand Up @@ -1059,7 +1066,7 @@ export class FileTypeParser {
}

if (this.checkString('AC')) {
const version = this.buffer.toString('binary', 2, 6);
const version = new Token.StringType(4, 'latin1').get(this.buffer, 2);
if (version.match('^d*') && version >= 1000 && version <= 1050) {
return {
ext: 'dwg',
Expand Down Expand Up @@ -1126,7 +1133,7 @@ export class FileTypeParser {
async function readChunkHeader() {
return {
length: await tokenizer.readToken(Token.INT32_BE),
type: await tokenizer.readToken(new Token.StringType(4, 'binary')),
type: await tokenizer.readToken(new Token.StringType(4, 'latin1')),
};
}

Expand Down Expand Up @@ -1213,7 +1220,7 @@ export class FileTypeParser {
// ASF_Header_Object first 80 bytes
if (this.check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) {
async function readHeader() {
const guid = Buffer.alloc(16);
const guid = new Uint8Array(16);
await tokenizer.readBuffer(guid);
return {
id: guid,
Expand All @@ -1228,7 +1235,7 @@ export class FileTypeParser {
let payload = header.size - 24;
if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
// Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
const typeId = Buffer.alloc(16);
const typeId = new Uint8Array(16);
payload -= await tokenizer.readBuffer(typeId);

if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
Expand Down Expand Up @@ -1432,10 +1439,11 @@ export class FileTypeParser {
}

if (this.check([0x04, 0x00, 0x00, 0x00]) && this.buffer.length >= 16) { // Rough & quick check Pickle/ASAR
const jsonSize = this.buffer.readUInt32LE(12);
const jsonSize = new DataView(this.buffer.buffer).getUint32(12, true);

if (jsonSize > 12 && this.buffer.length >= jsonSize + 16) {
try {
const header = this.buffer.slice(16, jsonSize + 16).toString();
const header = new TextDecoder().decode(this.buffer.slice(16, jsonSize + 16));
const json = JSON.parse(header);
// Check if Pickle is ASAR
if (json.files) { // Final check, assuring Pickle/ASAR format
Expand Down
4 changes: 1 addition & 3 deletions index.test-d.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import {Buffer} from 'node:buffer';
import {createReadStream} from 'node:fs';
import {expectType} from 'tsd';
import {
Expand All @@ -18,12 +17,11 @@ import {
type MimeType,
} from './index.js';

expectType<Promise<FileTypeResult | undefined>>(fileTypeFromBuffer(Buffer.from([0xFF, 0xD8, 0xFF])));
expectType<Promise<FileTypeResult | undefined>>(fileTypeFromBuffer(new Uint8Array([0xFF, 0xD8, 0xFF])));
expectType<Promise<FileTypeResult | undefined>>(fileTypeFromBuffer(new ArrayBuffer(42)));

(async () => {
const result = await fileTypeFromBuffer(Buffer.from([0xFF, 0xD8, 0xFF]));
const result = await fileTypeFromBuffer(new Uint8Array([0xFF, 0xD8, 0xFF]));
if (result !== undefined) {
expectType<FileExtension>(result.ext);
expectType<MimeType>(result.mime);
Expand Down
7 changes: 4 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "file-type",
"version": "19.0.0",
"description": "Detect the file type of a Buffer/Uint8Array/ArrayBuffer",
"description": "Detect the file type of a Uint8Array/ArrayBuffer",
"license": "MIT",
"repository": "sindresorhus/file-type",
"funding": "https://github.com/sindresorhus/file-type?sponsor=1",
Expand Down Expand Up @@ -211,8 +211,9 @@
],
"dependencies": {
"readable-web-to-node-stream": "^3.0.2",
"strtok3": "^7.0.0",
"token-types": "^5.0.1"
"strtok3": "^7.1.0",
"token-types": "^6.0.0",
"uint8array-extras": "^1.3.0"
},
"devDependencies": {
"@tokenizer/token": "^0.3.0",
Expand Down
12 changes: 6 additions & 6 deletions readme.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# file-type

> Detect the file type of a Buffer/Uint8Array/ArrayBuffer
> Detect the file type of a Uint8Array/ArrayBuffer
The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.

Expand Down Expand Up @@ -31,7 +31,7 @@ console.log(await fileTypeFromFile('Unicorn.png'));
//=> {ext: 'png', mime: 'image/png'}
```

Determine file type from a Buffer, which may be a portion of the beginning of a file:
Determine file type from a Uint8Array/ArrayBuffer, which may be a portion of the beginning of a file:

```js
import {fileTypeFromBuffer} from 'file-type';
Expand Down Expand Up @@ -107,7 +107,7 @@ console.log(fileType);

### fileTypeFromBuffer(buffer)

Detect the file type of a `Buffer`, `Uint8Array`, or `ArrayBuffer`.
Detect the file type of a `Uint8Array`, or `ArrayBuffer`.

The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.

Expand All @@ -122,7 +122,7 @@ Or `undefined` when there is no match.

#### buffer

Type: `Buffer | Uint8Array | ArrayBuffer`
Type: `Uint8Array | ArrayBuffer`

A buffer representing file data. It works best if the buffer contains the entire file. It may work with a smaller portion as well.

Expand Down Expand Up @@ -335,7 +335,7 @@ const customDetectors = [
async tokenizer => {
const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // 'UNICORN' as decimal string

const buffer = Buffer.alloc(7);
const buffer = new Uint8Array(7);
await tokenizer.peekBuffer(buffer, {length: unicornHeader.length, mayBeLess: true});

if (unicornHeader.every((value, index) => value === buffer[index])) {
Expand All @@ -346,7 +346,7 @@ const customDetectors = [
},
];

const buffer = Buffer.from('UNICORN');
const buffer = new Uint8Array(new TextEncoder().encode('UNICORN'));
const parser = new FileTypeParser({customDetectors});
const fileType = await parser.fromBuffer(buffer);
console.log(fileType);
Expand Down
Loading

0 comments on commit 00e051b

Please sign in to comment.