Skip to content

Commit

Permalink
Wow
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Aug 7, 2024
1 parent 9c93c78 commit 9a6b0f4
Show file tree
Hide file tree
Showing 12 changed files with 383 additions and 462 deletions.
296 changes: 141 additions & 155 deletions packages/text-indexing/src/TextIndexing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,57 +13,64 @@ import {

export async function indexTracks(args: {
tracks: Track[]
outLocation?: string
outDir?: string
signal?: AbortSignal
attributes?: string[]
assemblies?: string[]
exclude?: string[]
attributesToIndex?: string[]
assemblyNames?: string[]
featureTypesToExclude?: string[]
indexType?: indexType
statusCallback: (message: string) => void
}) {
const {
tracks,
outLocation,
attributes,
exclude,
assemblies,
outDir,
attributesToIndex,
featureTypesToExclude,
assemblyNames,
indexType,
statusCallback,
signal,
} = args
const idxType = indexType || 'perTrack'
checkAbortSignal(signal)
await (idxType === 'perTrack'
? perTrackIndex(
? perTrackIndex({
tracks,
statusCallback,
outLocation,
attributes,
exclude,
outDir,
attributesToIndex,
featureTypesToExclude,
signal,
)
: aggregateIndex(
})
: aggregateIndex({
tracks,
statusCallback,
outLocation,
attributes,
assemblies,
exclude,
outDir,
attributesToIndex,
assemblyNames,
featureTypesToExclude,
signal,
))
}))
checkAbortSignal(signal)
return []
}

async function perTrackIndex(
tracks: Track[],
statusCallback: (message: string) => void,
outLocation?: string,
attributes?: string[],
exclude?: string[],
signal?: AbortSignal,
) {
const outFlag = outLocation || '.'
async function perTrackIndex({
tracks,
statusCallback,
outDir: paramOutDir,
attributesToIndex = ['Name', 'ID'],
featureTypesToExclude = ['exon', 'CDS'],
signal,
}: {
tracks: Track[]
statusCallback: (message: string) => void
outDir?: string
attributesToIndex?: string[]
featureTypesToExclude?: string[]
signal?: AbortSignal
}) {
const outFlag = paramOutDir || '.'

const isDir = fs.lstatSync(outFlag).isDirectory()
const confFilePath = isDir ? path.join(outFlag, 'config.json') : outFlag
Expand All @@ -74,8 +81,6 @@ async function perTrackIndex(
}

// default settings
const attrs = attributes || ['Name', 'ID']
const excludeTypes = exclude || ['exon', 'CDS']
const force = true
const supportedTracks = tracks.filter(track =>
isSupportedIndexingAdapter(track.adapter?.type),
Expand All @@ -89,182 +94,163 @@ async function perTrackIndex(
)
continue
}
await indexDriver(
[trackConfig],
await indexDriver({
tracks: [trackConfig],
outDir,
attrs,
id,
true,
excludeTypes,
attributesToIndex,
name: id,
featureTypesToExclude,
assemblyNames,
statusCallback,
signal,
)
})
}
}

async function aggregateIndex(
tracks: Track[],
statusCallback: (message: string) => void,
outLocation?: string,
attributes?: string[],
assemblies?: string[],
exclude?: string[],
signal?: AbortSignal,
) {
const outFlag = outLocation || '.'

async function aggregateIndex({
tracks,
statusCallback,
outDir: paramOutDir,
attributesToIndex = ['Name', 'ID'],
featureTypesToExclude = ['exon', 'CDS'],
signal,
assemblyNames,
}: {
tracks: Track[]
statusCallback: (message: string) => void
outDir?: string
attributesToIndex?: string[]
assemblyNames?: string[]
featureTypesToExclude?: string[]
signal?: AbortSignal
}) {
const outFlag = paramOutDir || '.'
const isDir = fs.lstatSync(outFlag).isDirectory()
const confFilePath = isDir ? path.join(outFlag, 'config.json') : outFlag
const outDir = path.dirname(confFilePath)
const trixDir = path.join(outDir, 'trix')
if (!fs.existsSync(trixDir)) {
fs.mkdirSync(trixDir)
}
if (!assemblies) {
if (!assemblyNames) {
throw new Error(
'No assemblies passed. Assmeblies required for aggregate indexes',
)
}
for (const asm of assemblies) {
for (const asm of assemblyNames) {
// console.log('Indexing assembly ' + asm + '...')
const id = `${asm}-index`
// default settings
const attrs = attributes || ['Name', 'ID']
const excludeTypes = exclude || ['exon', 'CDS']
// const force = true
const quiet = true
// supported tracks for given assembly
const supportedTracks = tracks
.filter(track => isSupportedIndexingAdapter(track.adapter?.type))
.filter(track => (asm ? track.assemblyNames.includes(asm) : true))

await indexDriver(
supportedTracks,
outDir,
attrs,
id,
quiet,
excludeTypes,
[asm],
await indexDriver({
tracks: supportedTracks,
outDir: outDir,
attributesToIndex,
name: id,
featureTypesToExclude,
assemblyNames: [asm],
statusCallback,
signal,
)
})
}
}

async function indexDriver(
tracks: Track[],
idxLocation: string,
attributes: string[],
name: string,
quiet: boolean,
exclude: string[],
assemblyNames: string[],
statusCallback: (message: string) => void,
signal?: AbortSignal,
) {
async function indexDriver({
tracks,
outDir,
attributesToIndex,
name,
featureTypesToExclude,
assemblyNames,
statusCallback,
signal,
}: {
tracks: Track[]
outDir: string
attributesToIndex: string[]
name: string
featureTypesToExclude: string[]
assemblyNames: string[]
statusCallback: (message: string) => void
signal?: AbortSignal
}) {
const readable = Readable.from(
indexFiles(
indexFiles({
tracks,
attributes,
idxLocation,
quiet,
exclude,
attributesToIndex,
outDir,
featureTypesToExclude,
statusCallback,
signal,
),
}),
)
statusCallback('Indexing files.')
const ixIxxStream = await runIxIxx(readable, idxLocation, name)
const ixIxxStream = await runIxIxx(readable, outDir, name)
checkAbortSignal(signal)
await generateMeta({
configs: tracks,
attributes,
outDir: idxLocation,
attributesToIndex,
outDir,
name,
exclude,
featureTypesToExclude,
assemblyNames,
})
checkAbortSignal(signal)
return ixIxxStream
}

async function* indexFiles(
tracks: Track[],
attributes: string[],
outLocation: string,
quiet: boolean,
typesToExclude: string[],
statusCallback: (message: string) => void,
signal?: AbortSignal,
) {
async function* indexFiles({
tracks,
attributesToIndex: idx1,
outDir,
featureTypesToExclude: edx1,
statusCallback,
}: {
tracks: Track[]
attributesToIndex: string[]
outDir: string
featureTypesToExclude: string[]
statusCallback: (message: string) => void
signal?: AbortSignal
}) {
for (const track of tracks) {
const { adapter, textSearching } = track
const { type } = adapter
const {
indexingFeatureTypesToExclude: types = typesToExclude,
indexingAttributes: attrs = attributes,
indexingFeatureTypesToExclude: featureTypesToExclude = edx1,
indexingAttributes: attributesToIndex = idx1,
} = textSearching || {}
// currently only supporting GFF3Tabix and VCFTabix
switch (type) {
case 'Gff3TabixAdapter': {
yield* indexGff3(
track,
attrs,
getLoc('gffGzLocation', track),
outLocation,
types,
quiet,
statusCallback,
signal,
)

break
}
case 'Gff3Adapter': {
yield* indexGff3(
track,
attrs,
getLoc('gffLocation', track),
outLocation,
types,
quiet,
statusCallback,
signal,
)

break
}
case 'VcfTabixAdapter': {
yield* indexVcf(
track,
attrs,
getLoc('vcfGzLocation', track),
outLocation,
types,
quiet,
statusCallback,
signal,
)

break
}
case 'VcfAdapter': {
yield* indexVcf(
track,
attrs,
getLoc('vcfLocation', track),
outLocation,
types,
quiet,
statusCallback,
signal,
)

break
}
// No default
let myTotalBytes: number | undefined
if (type === 'Gff3TabixAdapter' || type === 'Gff3Adapter') {
yield* indexGff3({
config: track,
attributesToIndex,
inLocation: getLoc('gffGzLocation', track),
outDir,
featureTypesToExclude,
onStart: totalBytes => {
myTotalBytes = totalBytes
},
onUpdate: progressBytes => {
statusCallback(`${progressBytes}/${myTotalBytes}`)
},
})
} else if (type === 'VcfTabixAdapter' || type === 'VcfAdapter') {
yield* indexVcf({
config: track,
attributesToIndex,
inLocation: getLoc('vcfGzLocation', track),
outDir,
onStart: totalBytes => {
myTotalBytes = totalBytes
},
onUpdate: progressBytes => {
statusCallback(`${progressBytes}/${myTotalBytes}`)
},
})
}
}
return
Expand Down
Loading

0 comments on commit 9a6b0f4

Please sign in to comment.