Skip to content

Commit

Permalink
Add detecting Web Stream to default (core) entry point
Browse files Browse the repository at this point in the history
  • Loading branch information
Borewit committed Jul 30, 2024
1 parent 499b985 commit 821e8be
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 16 deletions.
26 changes: 26 additions & 0 deletions core.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,27 @@ export declare class TokenizerPositionError extends Error {
constructor(message?: string);
}

export type AnyWebReadableByteStreamWithFileType = AnyWebReadableStream<Uint8Array> & {
readonly fileType?: FileTypeResult;
};

/**
Returns a `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
This method can be handy to put in between a stream, but it comes with a price.
Internally `stream()` builds up a buffer of `sampleSize` bytes, used as a sample, to determine the file type.
The sample size impacts the file detection resolution.
A smaller sample size will result in lower probability of the best file type detection.
**Note:** This method is only available when using Node.js.
**Note:** Requires Node.js 14 or later.
@param webStream - A Web Stream
@param options - Maybe used to override the default sample-size.
@returns A `Promise` which resolves to the original web stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
*/
export function fileTypeStream(webStream: AnyWebReadableStream<Uint8Array>, options?: StreamOptions): Promise<AnyWebReadableByteStreamWithFileType>;

export declare class FileTypeParser {
detectors: Iterable<Detector>;

Expand All @@ -494,4 +515,9 @@ export declare class FileTypeParser {
Works the same way as {@link fileTypeFromBlob}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
fromBlob(blob: Blob): Promise<FileTypeResult | undefined>;

/**
Works the same way as {@link fileTypeStream}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
toDetectionStream(webStream: AnyWebReadableStream<Uint8Array>, options?: StreamOptions): Promise<AnyWebReadableByteStreamWithFileType>;
}
48 changes: 48 additions & 0 deletions core.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ export async function fileTypeFromTokenizer(tokenizer) {
return new FileTypeParser().fromTokenizer(tokenizer);
}

export async function stream(webStream) {
return new FileTypeParser().toDetectionStream(webStream);
}

export class FileTypeParser {
constructor(options) {
this.detectors = options?.customDetectors;
Expand Down Expand Up @@ -104,6 +108,50 @@ export class FileTypeParser {
}
}

async toDetectionStream(webStream, options = {}) {
const {sampleSize = reasonableDetectionSizeInBytes} = options;

// Initialize a reader from the web stream
const reader = webStream.getReader({mode: 'byob'});
const pass = new TransformStream();
const writer = pass.writable.getWriter();
let detectedFileType;

// Read the first chunk for file type detection
const {value: chunk, done} = await reader.read(new Uint8Array(sampleSize));
if (done || !chunk) {
detectedFileType = undefined;
} else {
try {
detectedFileType = await this.fromBuffer(chunk.slice(0, sampleSize));
} catch (error) {
if (error instanceof strtok3.EndOfStreamError) {
detectedFileType = undefined;
} else {
throw error;
}
}
}

// Write the initial chunk into the pass-through stream
writer.write(chunk);

// Forward remaining data from the reader to the writer
(async function pump() {
const {value, done} = await reader.read(new Uint8Array(512 * 1024));
if (done) {
return writer.close();
}

await writer.write(value);
return pump();
})();

// Attach the detected file type to the output stream
pass.readable.fileType = detectedFileType;
return pass.readable;
}

check(header, options) {
return _check(this.buffer, header, options);
}
Expand Down
8 changes: 5 additions & 3 deletions index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Typings for Node.js specific entry point.
*/

import type {Readable as NodeReadableStream} from 'node:stream';
import type {FileTypeResult, StreamOptions, AnyWebReadableStream, Detector} from './core.js';
import type {FileTypeResult, StreamOptions, AnyWebReadableStream, Detector, AnyWebReadableByteStreamWithFileType} from './core.js';
import {FileTypeParser} from './core.js';

export type ReadableStreamWithFileType = NodeReadableStream & {
Expand All @@ -25,6 +25,7 @@ export declare class NodeFileTypeParser extends FileTypeParser {
Works the same way as {@link fileTypeStream}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
toDetectionStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise<ReadableStreamWithFileType>;
toDetectionStream(webStream: AnyWebReadableStream<Uint8Array>, options?: StreamOptions): Promise<AnyWebReadableByteStreamWithFileType>;
}

/**
Expand Down Expand Up @@ -68,10 +69,10 @@ A smaller sample size will result in lower probability of the best file type det
**Note:** Requires Node.js 14 or later.
@param readableStream - A [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable) containing a file to examine.
@param options - Maybe used to override the default sample-size.
@returns A `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
@param options - Maybe used to override the default sample-size.with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
@example
@returns A `Promise` which resolves to the original readable stream argument, but
```
import got from 'got';
import {fileTypeStream} from 'file-type';
Expand All @@ -87,5 +88,6 @@ if (stream2.fileType?.mime === 'image/jpeg') {
```
*/
export function fileTypeStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise<ReadableStreamWithFileType>;
export function fileTypeStream(webStream: AnyWebReadableStream<Uint8Array>, options?: StreamOptions): Promise<AnyWebReadableByteStreamWithFileType>;

export * from './core.js';
12 changes: 8 additions & 4 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Node.js specific entry point.
*/

import {ReadableStream as WebReadableStream} from 'node:stream/web';
import {pipeline, PassThrough} from 'node:stream';
import * as strtok3 from 'strtok3';
import {FileTypeParser, reasonableDetectionSizeInBytes} from './core.js';

Expand All @@ -26,7 +27,10 @@ export class NodeFileTypeParser extends FileTypeParser {
}

async toDetectionStream(readableStream, options = {}) {
const {default: stream} = await import('node:stream');
if (readableStream instanceof WebReadableStream) {
return super.toDetectionStream(readableStream, options);
}

const {sampleSize = reasonableDetectionSizeInBytes} = options;

return new Promise((resolve, reject) => {
Expand All @@ -36,8 +40,8 @@ export class NodeFileTypeParser extends FileTypeParser {
(async () => {
try {
// Set up output stream
const pass = new stream.PassThrough();
const outputStream = stream.pipeline ? stream.pipeline(readableStream, pass, () => {}) : readableStream.pipe(pass);
const pass = new PassThrough();
const outputStream = pipeline ? pipeline(readableStream, pass, () => {}) : readableStream.pipe(pass);

// Read the input stream and detect the filetype
const chunk = readableStream.read(sampleSize) ?? readableStream.read() ?? new Uint8Array(0);
Expand Down Expand Up @@ -70,7 +74,7 @@ export async function fileTypeFromStream(stream, fileTypeOptions) {
}

export async function fileTypeStream(readableStream, options = {}) {
return new NodeFileTypeParser().toDetectionStream(readableStream, options);
return (new NodeFileTypeParser(options)).toDetectionStream(readableStream, options);
}

export {fileTypeFromTokenizer, fileTypeFromBuffer, fileTypeFromBlob, FileTypeParser, supportedMimeTypes, supportedExtensions} from './core.js';
60 changes: 51 additions & 9 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import process from 'node:process';
import path from 'node:path';
import {fileURLToPath} from 'node:url';
import fs from 'node:fs';
import {readFile} from 'node:fs/promises';
import stream from 'node:stream';
import test from 'ava';
import {readableNoopStream} from 'noop-stream';
Expand All @@ -26,7 +27,7 @@ const missingTests = new Set([
]);

const [nodeMajorVersion] = process.versions.node.split('.').map(Number);
const nodeVersionSupportingByeBlobStream = 20;
const nodeVersionSupportingByteBlobStream = 20;

const types = [...supportedExtensions].filter(ext => !missingTests.has(ext));

Expand Down Expand Up @@ -337,7 +338,7 @@ async function testFileNodeFromStream(t, ext, name) {
t.is(typeof fileType.mime, 'string', 'fileType.mime');
}

async function loadEntireFile(readable) {
async function loadEntireFileFromNodeReadable(readable) {
const chunks = [];
let totalLength = 0;

Expand All @@ -357,18 +358,58 @@ async function loadEntireFile(readable) {
return entireFile;
}

async function testStream(t, ext, name) {
async function testStreamWithNodeStream(t, ext, name) {
const fixtureName = `${(name ?? 'fixture')}.${ext}`;
const file = path.join(__dirname, 'fixture', fixtureName);

const readableStream = await fileTypeStream(fs.createReadStream(file));
const fileStream = fs.createReadStream(file);

const [bufferA, bufferB] = await Promise.all([loadEntireFile(readableStream), loadEntireFile(fileStream)]);
const [bufferA, bufferB] = await Promise.all([loadEntireFileFromNodeReadable(readableStream), loadEntireFileFromNodeReadable(fileStream)]);

t.true(areUint8ArraysEqual(bufferA, bufferB));
}

async function loadEntireFileFromWebStream(webStream) {
const reader = webStream.getReader();
const chunks = [];
let totalLength = 0;
let bytesRead = 0;

do {
const {done, value} = await reader.read();
if (done) {
break;
}

chunks.push(value);
bytesRead = value.byteLength;
totalLength += bytesRead;
} while (bytesRead > 0);

// Concatenate all chunks into a single Uint8Array
const entireFile = new Uint8Array(totalLength);
let offset = 0;
for (const chunk of chunks) {
entireFile.set(chunk, offset);
offset += chunk.byteLength;
}

return entireFile;
}

async function testStreamWithWebStream(t, ext, name) {
const fixtureName = `${(name ?? 'fixture')}.${ext}`;
const file = path.join(__dirname, 'fixture', fixtureName);
// Read the file into a buffer
const fileBuffer = await readFile(file);
// Create a Blob from the buffer
const blob = new Blob([fileBuffer]);
const webStream = await fileTypeStream(blob.stream());
const webStreamResult = await loadEntireFileFromWebStream(webStream);
t.true(areUint8ArraysEqual(fileBuffer, webStreamResult));
}

test('Test suite must be able to detect Node.js major version', t => {
t.is(typeof nodeMajorVersion, 'number', 'Detected Node.js major version should be a number');
});
Expand All @@ -382,13 +423,14 @@ for (const type of types) {

_test(`${name}.${type} ${i++} .fileTypeFromFile() method - same fileType`, testFromFile, type, name);
_test(`${name}.${type} ${i++} .fileTypeFromBuffer() method - same fileType`, testFromBuffer, type, name);
if (nodeMajorVersion >= nodeVersionSupportingByeBlobStream) {
if (nodeMajorVersion >= nodeVersionSupportingByteBlobStream) {
// Blob requires to stream to BYOB ReadableStream, requiring Node.js ≥ 20
_test(`${name}.${type} ${i++} .fileTypeFromBlob() method - same fileType`, testFromBlob, type, name);
test(`${name}.${type} ${i++} .fileTypeStream() - identical Web Streams`, testStreamWithWebStream, type, name);
}

_test(`${name}.${type} ${i++} .fileTypeFromStream() Node.js method - same fileType`, testFileNodeFromStream, type, name);
test(`${name}.${type} ${i++} .fileTypeStream() - identical streams`, testStream, type, name);
_test(`${name}.${type} ${i++} .fileTypeStream() - identical Node.js Readable streams`, testStreamWithNodeStream, type, name);
}
} else {
const fixtureName = `fixture.${type}`;
Expand All @@ -397,7 +439,7 @@ for (const type of types) {
_test(`${type} ${i++} .fileTypeFromFile()`, testFromFile, type);
_test(`${type} ${i++} .fileTypeFromBuffer()`, testFromBuffer, type);
_test(`${type} ${i++} .fileTypeFromStream() Node.js`, testFileNodeFromStream, type);
test(`${type} ${i++} .fileTypeStream() - identical streams`, testStream, type);
test(`${type} ${i++} .fileTypeStream() - identical streams`, testStreamWithNodeStream, type);
}

if (Object.prototype.hasOwnProperty.call(falsePositives, type)) {
Expand Down Expand Up @@ -427,7 +469,7 @@ test('.fileTypeStream() method - short stream', async t => {
t.is(newStream.fileType, undefined);

// Test usability of returned stream
const bufferB = await loadEntireFile(newStream);
const bufferB = await loadEntireFileFromNodeReadable(newStream);
t.deepEqual(bufferA, bufferB);
});

Expand Down Expand Up @@ -708,7 +750,7 @@ const tokenizerPositionChanger = tokenizer => {
tokenizer.readBuffer(buffer, {length: 1, mayBeLess: true});
};

if (nodeMajorVersion >= nodeVersionSupportingByeBlobStream) {
if (nodeMajorVersion >= nodeVersionSupportingByteBlobStream) {
// Blob requires to stream to BYOB ReadableStream, requiring Node.js ≥ 20

test('fileTypeFromBlob should detect custom file type "unicorn" using custom detectors', async t => {
Expand Down

0 comments on commit 821e8be

Please sign in to comment.