diff --git a/packages/__tests__/index.js b/packages/__tests__/index.js new file mode 100644 index 00000000..bd63458b --- /dev/null +++ b/packages/__tests__/index.js @@ -0,0 +1,107 @@ +import S3v3 from '@aws-sdk/client-s3'; +import { FsAwsS3 } from '@chunkd/source-aws'; +import { S3LikeV3 } from '@chunkd/source-aws-v3'; +import { FsGoogleStorage } from '@chunkd/source-google-cloud'; +import { Storage } from '@google-cloud/storage'; +import S3 from 'aws-sdk/clients/s3.js'; +import o from 'ospec'; +import { fsa } from '../fs/build/index.node.js'; + +fsa.register(`s3://blacha-chunkd-test/v2`, new FsAwsS3(new S3())); +fsa.register(`s3://blacha-chunkd-test/v3`, new FsAwsS3(new S3LikeV3(new S3v3.S3()))); +fsa.register(`gs://blacha-chunkd-test/`, new FsGoogleStorage(new Storage())); + +const TestFiles = [ + { path: 'a/b/file-a-b-1.txt', buffer: Buffer.from('a/b/file-a-b-1.txt') }, + { path: 'a/b/file-a-b-2', buffer: Buffer.from('a/b/file-a-b-2') }, + { path: 'a/file-a-1', buffer: Buffer.from('file-a-1') }, + { path: 'c/file-c-1', buffer: Buffer.from('file-c-1') }, + { path: 'd/file-d-1', buffer: Buffer.from('file-d-1') }, + { path: 'file-1', buffer: Buffer.from('file-1') }, + { path: 'file-2', buffer: Buffer.from('file-2') }, + { path: '🦄.json', buffer: Buffer.from('🦄') }, +]; + +async function setupTestData(prefix) { + try { + const existing = await fsa.toArray(fsa.list(prefix)); + if (existing.length === TestFiles.length) return; + } catch (e) { + //noop + } + for (const file of TestFiles) { + const target = fsa.join(prefix, file.path); + console.log(target); + await fsa.write(target, file.buffer); + } +} + +function removeSlashes(f) { + if (f.startsWith('/')) f = f.slice(1); + if (f.endsWith('/')) f = f.slice(0, f.length - 1); + return f; +} + +function testPrefix(prefix) { + o.spec(prefix, () => { + o.specTimeout(5000); + o.before(async () => { + await setupTestData(prefix); + }); + + o('should list recursive:default ', async () => { + const files = await fsa.toArray(fsa.list(prefix)); + o(files.length).equals(TestFiles.length); + }); + + o('should list recursive:true ', async () => { + const files = await fsa.toArray(fsa.list(prefix, { recursive: true })); + o(files.length).equals(TestFiles.length); + + for (const file of TestFiles) { + o(files.find((f) => f.endsWith(file.path))).notEquals(undefined); + } + }); + + o('should list recursive:false ', async () => { + const files = await fsa.toArray(fsa.list(prefix, { recursive: false })); + o(files.length).equals(6); + o(files.map((f) => f.slice(prefix.length)).map(removeSlashes)).deepEquals([ + 'a', + 'c', + 'd', + 'file-1', + 'file-2', + '🦄.json', + ]); + }); + + o('should list folders', async () => { + const files = await fsa.toArray(fsa.details(prefix, { recursive: false })); + o( + files + .filter((f) => f.isDirectory) + .map((f) => f.path.slice(prefix.length)) + .map(removeSlashes), + ).deepEquals(['a', 'c', 'd']); + }); + + o('should read a file', async () => { + const file = await fsa.read(fsa.join(prefix, TestFiles[0].path)); + o(file.toString()).equals(TestFiles[0].buffer.toString()); + }); + + o('should head a file', async () => { + const ret = await fsa.head(fsa.join(prefix, TestFiles[0].path)); + o(ret.path).equals(fsa.join(prefix, TestFiles[0].path)); + o(ret.size).equals(TestFiles[0].buffer.length); + }); + }); +} + +testPrefix('/tmp/blacha-chunkd-test/'); +// testPrefix('s3://blacha-chunkd-test/v2/'); +// testPrefix('s3://blacha-chunkd-test/v3/'); +// testPrefix('gs://blacha-chunkd-test/'); + +o.run(); diff --git a/packages/core/src/fs.ts b/packages/core/src/fs.ts index fbbbdc02..b8c6bd6a 100644 --- a/packages/core/src/fs.ts +++ b/packages/core/src/fs.ts @@ -9,6 +9,9 @@ export interface FileInfo { * undefined if no size found */ size?: number; + + /** Is this file a directory */ + isDirectory?: boolean; } export interface WriteOptions { @@ -18,6 +21,14 @@ export interface WriteOptions { contentType?: string; } +export interface ListOptions { + /** + * List recursively + * @default true + */ + recursive?: boolean; +} + export interface FileSystem { /** * Protocol used for communication @@ -33,12 +44,10 @@ export interface FileSystem { stream(filePath: string): Readable; /** Write a file from either a buffer or stream */ write(filePath: string, buffer: Buffer | Readable | string, opts?: Partial): Promise; - /** Recursively list all files in path */ - list(filePath: string): AsyncGenerator; - /** Recursively list all files in path with additional details */ - details(filePath: string): AsyncGenerator; - /** Does the path exists */ - exists(filePath: string): Promise; + /** list all files in path */ + list(filePath: string, opt?: ListOptions): AsyncGenerator; + /** list all files with file info in path */ + details(filePath: string, opt?: ListOptions): AsyncGenerator; /** Get information about the path */ head(filePath: string): Promise; /** Create a file source to read chunks out of */ diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 3fa8b7bb..c6ff119b 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -3,7 +3,7 @@ export { ChunkSourceBase } from './chunk.source.js'; export { SourceMemory } from './chunk.source.memory.js'; export { ChunkSource } from './source.js'; export { ErrorCodes, CompositeError } from './composite.js'; -export { FileSystem, FileInfo, WriteOptions } from './fs.js'; +export { FileSystem, FileInfo, WriteOptions, ListOptions } from './fs.js'; export function isRecord(value: unknown): value is Record { return typeof value === 'object' && value !== null; diff --git a/packages/fs/src/fs.abstraction.ts b/packages/fs/src/fs.abstraction.ts index 9bc1bb9e..f125aebf 100644 --- a/packages/fs/src/fs.abstraction.ts +++ b/packages/fs/src/fs.abstraction.ts @@ -1,5 +1,6 @@ import type { Readable } from 'stream'; import { ChunkSource, FileInfo, FileSystem, WriteOptions } from '@chunkd/core'; +import { ListOptions } from '@chunkd/core'; export type FileWriteTypes = Buffer | Readable | string | Record | Array; @@ -96,8 +97,8 @@ export class FileSystemAbstraction implements FileSystem { * @param filePath file path to search * @returns list of files inside that path */ - list(filePath: string): AsyncGenerator { - return this.get(filePath).list(filePath); + list(filePath: string, opts?: ListOptions): AsyncGenerator { + return this.get(filePath).list(filePath, opts); } /** @@ -107,8 +108,8 @@ export class FileSystemAbstraction implements FileSystem { * @param filePath file path to search * @returns list of files inside that path */ - details(filePath: string): AsyncGenerator { - return this.get(filePath).details(filePath); + details(filePath: string, opts?: ListOptions): AsyncGenerator { + return this.get(filePath).details(filePath, opts); } /** @@ -118,7 +119,9 @@ export class FileSystemAbstraction implements FileSystem { * @returns true if file exists, false otherwise */ exists(filePath: string): Promise { - return this.get(filePath).exists(filePath); + return this.get(filePath) + .head(filePath) + .then((f) => f != null); } /** @@ -168,3 +171,17 @@ export class FileSystemAbstraction implements FileSystem { } export const fsa = new FileSystemAbstraction(); + +// async function main(): Promise { +// for await (const f of fsa.list('')) { +// console.log(f); +// } + +// for await (const f of fsa.list('', { details: true })) { +// console.log(f.path); +// } + +// for await (const f of fsa.list('', { details: false, recursive: false })) { +// console.log(f.path); +// } +// } diff --git a/packages/source-aws/src/__test__/s3.fs.test.ts b/packages/source-aws/src/__test__/s3.fs.test.ts index 46c1885f..090da36a 100644 --- a/packages/source-aws/src/__test__/s3.fs.test.ts +++ b/packages/source-aws/src/__test__/s3.fs.test.ts @@ -78,9 +78,14 @@ o.spec('file.s3', () => { ]); o(stub.callCount).equals(5); const [firstCall] = stub.args[0] as any; - o(firstCall).deepEquals({ Bucket: 'bucket', Prefix: undefined, ContinuationToken: undefined }); + o(firstCall).deepEquals({ + Bucket: 'bucket', + Prefix: undefined, + ContinuationToken: undefined, + Delimiter: undefined, + }); const [secondCall] = stub.args[1] as any; - o(secondCall).deepEquals({ Bucket: 'bucket', Prefix: undefined, ContinuationToken: 1 }); + o(secondCall).deepEquals({ Bucket: 'bucket', Prefix: undefined, ContinuationToken: 1, Delimiter: undefined }); }); o('should allow listing of bucket', async () => { @@ -94,7 +99,12 @@ o.spec('file.s3', () => { o(data).deepEquals(['s3://bucket/FirstFile']); o(stub.callCount).equals(1); const [firstCall] = stub.args[0] as any; - o(firstCall).deepEquals({ Bucket: 'bucket', Prefix: undefined, ContinuationToken: undefined }); + o(firstCall).deepEquals({ + Bucket: 'bucket', + Prefix: undefined, + ContinuationToken: undefined, + Delimiter: undefined, + }); }); o('should allow listing of bucket with prefix', async () => { @@ -108,7 +118,7 @@ o.spec('file.s3', () => { o(data).deepEquals(['s3://bucket/keyFirstFile']); o(stub.callCount).equals(1); const [firstCall] = stub.args[0] as any; - o(firstCall).deepEquals({ Bucket: 'bucket', Prefix: 'key', ContinuationToken: undefined }); + o(firstCall).deepEquals({ Bucket: 'bucket', Prefix: 'key', ContinuationToken: undefined, Delimiter: undefined }); }); }); diff --git a/packages/source-aws/src/s3.fs.ts b/packages/source-aws/src/s3.fs.ts index 6a1411f8..bacb70d3 100644 --- a/packages/source-aws/src/s3.fs.ts +++ b/packages/source-aws/src/s3.fs.ts @@ -1,4 +1,4 @@ -import { FileInfo, FileSystem, isRecord, parseUri, WriteOptions } from '@chunkd/core'; +import { FileInfo, FileSystem, isRecord, ListOptions, parseUri, WriteOptions } from '@chunkd/core'; import type { Readable } from 'stream'; import { getCompositeError, SourceAwsS3 } from './s3.source.js'; import { ListRes, S3Like, toPromise } from './type.js'; @@ -35,29 +35,36 @@ export class FsAwsS3 implements FileSystem { /** Parse a s3:// URI into the bucket and key components */ - async *list(filePath: string): AsyncGenerator { - for await (const obj of this.details(filePath)) yield obj.path; + async *list(filePath: string, opts?: ListOptions): AsyncGenerator { + for await (const obj of this.details(filePath, opts)) yield obj.path; } - async *details(filePath: string): AsyncGenerator { - const opts = parseUri(filePath); - if (opts == null) return; + async *details(filePath: string, opts?: ListOptions): AsyncGenerator { + const loc = parseUri(filePath); + if (loc == null) return; let ContinuationToken: string | undefined = undefined; - const Bucket = opts.bucket; - const Prefix = opts.key; + const Delimiter: string | undefined = opts?.recursive === false ? '/' : undefined; + const Bucket = loc.bucket; + const Prefix = loc.key; let count = 0; try { while (true) { count++; - const res: ListRes = await toPromise(this.s3.listObjectsV2({ Bucket, Prefix, ContinuationToken })); + const res: ListRes = await toPromise(this.s3.listObjectsV2({ Bucket, Prefix, ContinuationToken, Delimiter })); - // Failed to get any content abort - if (res.Contents == null) break; + if (res.CommonPrefixes != null) { + for (const prefix of res.CommonPrefixes) { + if (prefix.Prefix == null) continue; + yield { path: `s3://${Bucket}/${prefix.Prefix}`, isDirectory: true }; + } + } - for (const obj of res.Contents) { - if (obj.Key == null) continue; - yield { path: `s3://${Bucket}/${obj.Key}`, size: obj.Size }; + if (res.Contents != null) { + for (const obj of res.Contents) { + if (obj.Key == null) continue; + yield { path: `s3://${Bucket}/${obj.Key}`, size: obj.Size }; + } } // Nothing left to fetch diff --git a/packages/source-aws/src/type.ts b/packages/source-aws/src/type.ts index 3f8b4033..6ff94dfb 100644 --- a/packages/source-aws/src/type.ts +++ b/packages/source-aws/src/type.ts @@ -21,12 +21,13 @@ export type UploadReq = Location & { }; export type UploadRes = unknown; -export type ListReq = { Bucket: string; Prefix?: string; ContinuationToken?: string }; +export type ListReq = { Bucket: string; Prefix?: string; ContinuationToken?: string; Delimiter?: string }; export type ListResContents = { Key?: string; Size?: number }; export type ListRes = { IsTruncated?: boolean; NextContinuationToken?: string; Contents?: ListResContents[]; + CommonPrefixes?: { Prefix?: string }[]; }; export type HeadReq = Location; diff --git a/packages/source-file/src/__test__/fs.file.test.ts b/packages/source-file/src/__test__/fs.file.test.ts new file mode 100644 index 00000000..6d08eb3e --- /dev/null +++ b/packages/source-file/src/__test__/fs.file.test.ts @@ -0,0 +1,65 @@ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ +import o from 'ospec'; +import path from 'path'; +import 'source-map-support/register.js'; +import { fileURLToPath } from 'url'; +import { FsFile } from '../file.fs.js'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +async function toArray(generator: AsyncGenerator): Promise { + const output: T[] = []; + for await (const o of generator) output.push(o); + return output; +} + +o.spec('LocalFileSystem', () => { + const fs = new FsFile(); + + o('should read a file', async () => { + const buf = await fs.read(path.join(__dirname, 'fs.file.test.js')); + o(buf.toString().includes("import o from 'ospec'")).equals(true); + }); + + o('should 404 when file not found', async () => { + try { + await fs.read(path.join(__dirname, 'NOT A FILE.js')); + o(true).equals(false); // should have errored + } catch (e: any) { + o(e.code).equals(404); + } + }); + + o('should head/exists a file', async () => { + const ref = await fs.head(path.join(__dirname, 'fs.file.test.js')); + o(ref).notEquals(null); + }); + + o('should list files', async () => { + const files = await toArray(fs.list(__dirname)); + + o(files.length > 3).equals(true); + o(files.find((f) => f.endsWith('__test__/fs.file.test.js'))).notEquals(undefined); + }); + + o('should list files with details', async () => { + const files = await toArray(fs.details(__dirname)); + + o(files.length > 3).equals(true); + o(files.find((f) => f.path.endsWith('__test__/fs.file.test.js'))).notEquals(undefined); + o(files.filter((f) => f.isDirectory)).deepEquals([]); + }); + + o('should list recursively', async () => { + const files = await toArray(fs.details(path.join(__dirname, '..'))); + o(files.find((f) => f.path.endsWith('__test__/fs.file.test.js'))).notEquals(undefined); + o(files.filter((f) => f.isDirectory)).deepEquals([]); + }); + + o('should list folders when not recursive', async () => { + const files = await toArray(fs.details(path.join(__dirname, '..'), { recursive: false })); + // In a sub folder shouldn't find it + o(files.find((f) => f.path.endsWith('__test__/fs.file.test.js'))).equals(undefined); + o(files.filter((f) => f.isDirectory).length).deepEquals(1); + }); +}); diff --git a/packages/source-file/src/file.fs.ts b/packages/source-file/src/file.fs.ts index 9203806f..bf20035d 100644 --- a/packages/source-file/src/file.fs.ts +++ b/packages/source-file/src/file.fs.ts @@ -1,7 +1,7 @@ import fs from 'fs'; import path from 'path'; import { Readable } from 'stream'; -import { CompositeError, FileInfo, FileSystem, isRecord } from '@chunkd/core'; +import { CompositeError, FileInfo, FileSystem, isRecord, ListOptions } from '@chunkd/core'; import { SourceFile } from './file.source.js'; export type FsError = { code: string } & Error; @@ -24,13 +24,13 @@ export class FsFile implements FileSystem { return new SourceFile(filePath); } - async *list(filePath: string): AsyncGenerator { + async *list(filePath: string, opts?: ListOptions): AsyncGenerator { try { const files = await fs.promises.readdir(filePath, { withFileTypes: true }); const resolve = path.resolve(filePath); for (const file of files) { const targetPath = path.join(resolve, file.name); - if (file.isDirectory()) yield* this.list(targetPath); + if (file.isDirectory() && opts?.recursive !== false) yield* this.list(targetPath); else yield targetPath; } } catch (e) { @@ -38,8 +38,8 @@ export class FsFile implements FileSystem { } } - async *details(filePath: string): AsyncGenerator { - for await (const file of this.list(filePath)) { + async *details(filePath: string, opts?: ListOptions): AsyncGenerator { + for await (const file of this.list(filePath, opts)) { const res = await this.head(file); if (res == null) continue; yield res; @@ -64,10 +64,6 @@ export class FsFile implements FileSystem { } } - exists(filePath: string): Promise { - return this.head(filePath).then((f) => f != null); - } - async write(filePath: string, buf: Buffer | Readable | string): Promise { const folderPath = path.dirname(filePath); await fs.promises.mkdir(folderPath, { recursive: true }); diff --git a/packages/source-google-cloud/src/gcp.fs.ts b/packages/source-google-cloud/src/gcp.fs.ts index f2f76f32..236218dd 100644 --- a/packages/source-google-cloud/src/gcp.fs.ts +++ b/packages/source-google-cloud/src/gcp.fs.ts @@ -1,4 +1,4 @@ -import { FileInfo, FileSystem, isRecord, parseUri, WriteOptions } from '@chunkd/core'; +import { FileInfo, FileSystem, isRecord, ListOptions, parseUri, WriteOptions } from '@chunkd/core'; import { Storage } from '@google-cloud/storage'; import { Readable } from 'stream'; import { SourceGoogleStorage } from './gcp.source.js'; @@ -42,25 +42,28 @@ export class FsGoogleStorage implements FileSystem { static parse = parseUri; parse = parseUri; - async *list(filePath: string): AsyncGenerator { - for await (const obj of this.details(filePath)) yield obj.path; + async *list(filePath: string, opts?: ListOptions): AsyncGenerator { + for await (const obj of this.details(filePath, opts)) yield obj.path; } - async *details(filePath: string): AsyncGenerator { - const opts = this.parse(filePath); - if (opts == null) throw new Error(`GoogleStorage: Failed to list: "${filePath}"`); + async *details(filePath: string, opts?: ListOptions): AsyncGenerator { + const loc = this.parse(filePath); + if (loc == null) throw new Error(`GoogleStorage: Failed to list: "${filePath}"`); - const bucket = this.storage.bucket(opts.bucket); - const [files, , metadata] = await bucket.getFiles({ prefix: opts.key, autoPaginate: false, delimiter: '/' }); + const bucket = this.storage.bucket(loc.bucket); + const [files, , metadata] = await bucket.getFiles({ prefix: loc.key, autoPaginate: false, delimiter: '/' }); if (files != null && files.length > 0) { for (const file of files) { - yield { path: join(`gs://${opts.bucket}`, file.name), size: Number(file.metadata.size) }; + yield { path: join(`gs://${loc.bucket}`, file.name), size: Number(file.metadata.size) }; } } // Recurse down if (metadata != null && metadata.prefixes != null) { - for (const prefix of metadata.prefixes) yield* this.details(join(`gs://${opts.bucket}`, prefix)); + for (const prefix of metadata.prefixes) { + if (opts?.recursive !== false) yield* this.details(join(`gs://${loc.bucket}`, prefix), opts); + else yield { path: join(`gs://${loc.bucket}`, prefix), isDirectory: true }; + } } } diff --git a/packages/source-http/src/http.fs.ts b/packages/source-http/src/http.fs.ts index 27838bc4..7ae04e15 100644 --- a/packages/source-http/src/http.fs.ts +++ b/packages/source-http/src/http.fs.ts @@ -25,7 +25,6 @@ export class FsHttp implements FileSystem { async *list(filePath: string): AsyncGenerator { throw new Error(`Unable to "list" on ${filePath}`); } - async *details(filePath: string): AsyncGenerator { throw new Error(`Unable to "details" on ${filePath}`); } @@ -42,21 +41,11 @@ export class FsHttp implements FileSystem { return Buffer.from(await res.arrayBuffer()); } - exists(filePath: string): Promise { - return this.head(filePath).then((f) => f != null); - } - async write(filePath: string): Promise { throw new Error(`Unable to "write" on ${filePath}`); } stream(filePath: string): Readable { throw new Error(`Unable to "stream" on ${filePath}`); - - // TODO - // const res = await SourceHttp.fetch(filePath, { method: 'HEAD' }); - // if (!res.ok || res.body == null) throw getCompositeError(new Error(res.statusText), `Failed to head: ${filePath}`); - - // return res.body as unknown as Readable; } }