Skip to content

Commit

Permalink
Merge pull request #124 from effigies/fix/prune-subdatasets
Browse files Browse the repository at this point in the history
feat: Add --prune option to prevent walking subtrees
  • Loading branch information
rwblair authored Dec 9, 2024
2 parents d296de3 + d5cb385 commit a411f4c
Showing 6 changed files with 48 additions and 11 deletions.
18 changes: 16 additions & 2 deletions src/files/deno.test.ts
Original file line number Diff line number Diff line change
@@ -12,9 +12,11 @@ await requestReadPermission()
// Use this file for testing file behavior
const testUrl = import.meta.url
const testPath = fromFileUrl(testUrl)
const testDir = dirname(testPath)
const testDir = dirname(testPath) // $REPO/src/files
const testFilename = basename(testPath)
const repoRoot = dirname(dirname(dirname(testPath)))
const ignore = new FileIgnoreRules([])
const prune = new FileIgnoreRules(['derivatives'], false)

Deno.test('Deno implementation of BIDSFile', async (t) => {
await t.step('implements basic file properties', () => {
@@ -53,7 +55,7 @@ Deno.test('Deno implementation of BIDSFile', async (t) => {
'strips BOM characters when reading UTF-8 via .text()',
async () => {
// BOM is invalid in JSON but shows up often from certain tools, so abstract handling it
const bomDir = join(testPath, '..', '..', 'tests')
const bomDir = join(repoRoot, 'src', 'tests')
const bomFilename = 'bom-utf8.json'
const file = new BIDSFileDeno(bomDir, bomFilename, ignore)
const text = await file.text()
@@ -75,4 +77,16 @@ Deno.test('Deno implementation of FileTree', async (t) => {
assert(testObj !== undefined)
assertEquals(testObj.path, `/${parent}/${testFilename}`)
})

await t.step('implements pruning', async () => {
const dsDir = join(repoRoot, 'tests', 'data', 'valid_dataset')
const derivFile =
'derivatives/fmriprep/sub-01/ses-01/func/sub-01_ses-01_task-rest_confounds.tsv.gz'

const fullTree = await readFileTree(dsDir)
assert(fullTree.get(derivFile))

const prunedTree = await readFileTree(dsDir, prune)
assert(!prunedTree.get(derivFile))
})
})
18 changes: 14 additions & 4 deletions src/files/deno.ts
Original file line number Diff line number Diff line change
@@ -120,17 +120,22 @@ async function _readFileTree(
rootPath: string,
relativePath: string,
ignore: FileIgnoreRules,
prune: FileIgnoreRules,
parent?: FileTree,
): Promise<FileTree> {
await requestReadPermission()
const name = basename(relativePath)
const tree = new FileTree(relativePath, name, parent, ignore)

for await (const dirEntry of Deno.readDir(join(rootPath, relativePath))) {
const thisPath = posix.join(relativePath, dirEntry.name)
if (prune.test(thisPath)) {
continue
}
if (dirEntry.isFile || dirEntry.isSymlink) {
const file = new BIDSFileDeno(
rootPath,
posix.join(relativePath, dirEntry.name),
thisPath,
ignore,
)
file.parent = tree
@@ -139,8 +144,9 @@ async function _readFileTree(
if (dirEntry.isDirectory) {
const dirTree = await _readFileTree(
rootPath,
posix.join(relativePath, dirEntry.name),
thisPath,
ignore,
prune,
tree,
)
tree.directories.push(dirTree)
@@ -152,9 +158,13 @@ async function _readFileTree(
/**
* Read in the target directory structure and return a FileTree
*/
export async function readFileTree(rootPath: string): Promise<FileTree> {
export async function readFileTree(
rootPath: string,
prune?: FileIgnoreRules,
): Promise<FileTree> {
prune ??= new FileIgnoreRules([], false)
const ignore = new FileIgnoreRules([])
const tree = await _readFileTree(rootPath, '/', ignore)
const tree = await _readFileTree(rootPath, '/', ignore, prune)
const bidsignore = tree.get('.bidsignore')
if (bidsignore) {
try {
9 changes: 7 additions & 2 deletions src/files/ignore.ts
Original file line number Diff line number Diff line change
@@ -27,10 +27,15 @@ const defaultIgnores = [
export class FileIgnoreRules {
#ignore: Ignore

constructor(config: string[]) {
constructor(
config: string[],
addDefaults: boolean = true,
) {
// @ts-expect-error
this.#ignore = ignore()
this.#ignore.add(defaultIgnores)
if (addDefaults) {
this.#ignore.add(defaultIgnores)
}
this.#ignore.add(config)
}

6 changes: 5 additions & 1 deletion src/main.ts
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@ import type { Config } from './setup/options.ts'
import * as colors from '@std/fmt/colors'
import { readFileTree } from './files/deno.ts'
import { fileListToTree } from './files/browser.ts'
import { FileIgnoreRules } from './files/ignore.ts'
import { resolve } from '@std/path'
import { validate } from './validators/bids.ts'
import { consoleFormat, resultToJSONStr } from './utils/output.ts'
@@ -21,7 +22,10 @@ export async function main(): Promise<ValidationResult> {
setupLogging(options.debug)

const absolutePath = resolve(options.datasetPath)
const tree = await readFileTree(absolutePath)
const prune = options.prune
? new FileIgnoreRules(['derivatives', 'sourcedata', 'code'], false)
: undefined
const tree = await readFileTree(absolutePath, prune)

const config = options.config ? JSON.parse(Deno.readTextFileSync(options.config)) as Config : {}

5 changes: 5 additions & 0 deletions src/setup/options.ts
Original file line number Diff line number Diff line change
@@ -29,6 +29,7 @@ export type ValidatorOptions = {
recursive?: boolean
outfile?: string
blacklistModalities: string[]
prune?: boolean
}

const modalityType = new EnumType<string>(
@@ -72,6 +73,10 @@ export const validateCommand: Command<void, void, any, string[], void> = new Com
'-r, --recursive',
'Validate datasets found in derivatives directories in addition to root dataset',
)
.option(
'-p, --prune',
'Prune derivatives and sourcedata directories on load (disables -r and will underestimate dataset size)',
)
.option(
'-o, --outfile <file:string>',
'File to write validation results to.',
3 changes: 1 addition & 2 deletions src/tests/regression.test.ts
Original file line number Diff line number Diff line change
@@ -3,13 +3,12 @@ import { pathsToTree } from '../files/filetree.ts'
import { validate } from '../validators/bids.ts'
import type { BIDSFile } from '../types/filetree.ts'


Deno.test('Regression tests', async (t) => {
await t.step('Verify ignored files in scans.tsv do not trigger error', async () => {
const paths = [
'/dataset_description.json',
'/sub-01/anat/sub-01_T1w.nii.gz',
'/sub-01/anat/sub-01_CT.nii.gz', // unknown file
'/sub-01/anat/sub-01_CT.nii.gz', // unknown file
'/sub-01/sub-01_scans.tsv',
]
const ignore = ['*_CT.nii.gz']

0 comments on commit a411f4c

Please sign in to comment.