-
Notifications
You must be signed in to change notification settings - Fork 63
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create an adapter for Blasts's tabular output
- Loading branch information
1 parent
03f6b7d
commit 5a6c786
Showing
10 changed files
with
19,759 additions
and
63 deletions.
There are no files selected for viewing
37 changes: 37 additions & 0 deletions
37
plugins/comparative-adapters/src/BlastTabularAdapter/BlastTabularAdapter.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import { toArray } from 'rxjs/operators' | ||
import { firstValueFrom } from 'rxjs' | ||
import Adapter from './BlastTabularAdapter' | ||
import MyConfigSchema from './configSchema' | ||
|
||
test('adapter can fetch features from peach_grape.paf', async () => { | ||
const adapter = new Adapter( | ||
MyConfigSchema.create({ | ||
blastTableLocation: { | ||
localPath: require.resolve('./test_data/peach_vs_grape.tsv'), | ||
locationType: 'LocalPathLocation', | ||
}, | ||
assemblyNames: ['peach', 'grape'], | ||
}), | ||
) | ||
|
||
const features1 = adapter.getFeatures({ | ||
refName: 'Pp05', | ||
start: 0, | ||
end: 200000, | ||
assemblyName: 'peach', | ||
}) | ||
|
||
const features2 = adapter.getFeatures({ | ||
refName: 'chr18', | ||
start: 0, | ||
end: 200000, | ||
assemblyName: 'grape', | ||
}) | ||
|
||
const fa1 = await firstValueFrom(features1.pipe(toArray())) | ||
const fa2 = await firstValueFrom(features2.pipe(toArray())) | ||
expect(fa1.length).toBe(1675) | ||
expect(fa2.length).toBe(2471) | ||
expect(fa1[0]!.get('refName')).toBe('Pp05') | ||
expect(fa2[0]!.get('refName')).toBe('chr18') | ||
}) |
256 changes: 256 additions & 0 deletions
256
plugins/comparative-adapters/src/BlastTabularAdapter/BlastTabularAdapter.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,256 @@ | ||
import { unzip } from '@gmod/bgzf-filehandle' | ||
import { readConfObject } from '@jbrowse/core/configuration' | ||
import { | ||
BaseFeatureDataAdapter, | ||
BaseOptions, | ||
} from '@jbrowse/core/data_adapters/BaseAdapter' | ||
import { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache' | ||
import PluginManager from '@jbrowse/core/PluginManager' | ||
import { ObservableCreate } from '@jbrowse/core/util/rxjs' | ||
import { doesIntersect2, Feature, isGzip, Region } from '@jbrowse/core/util' | ||
import { openLocation } from '@jbrowse/core/util/io' | ||
|
||
import { parseLineByLine } from '../util' | ||
import BlastTabularAdapterConfigType from './configSchema' | ||
import SyntenyFeature from '../SyntenyFeature' | ||
|
||
type BlastTabularAdapterConfig = ReturnType< | ||
typeof BlastTabularAdapterConfigType.create | ||
> | ||
|
||
// Blast output column names/descriptions taken from | ||
// https://www.ncbi.nlm.nih.gov/books/NBK279684/#_appendices_Options_for_the_commandline_a_ | ||
interface BlastRecord { | ||
/** Query Seq-id */ | ||
qseqid: string | ||
/** Subject Seq-id */ | ||
sseqid: string | ||
/** Percentage of identical matches */ | ||
pident: number | ||
/** Alignment length */ | ||
length: number | ||
/** Number of mismatches */ | ||
mismatch: number | ||
/** Number of gap openings */ | ||
gapopen: number | ||
/** Start of alignment in query */ | ||
qstart: number | ||
/** End of alignment in query */ | ||
qend: number | ||
/** Start of alignment in subject */ | ||
sstart: number | ||
/** End of alignment in subject */ | ||
send: number | ||
/** Expect value */ | ||
evalue: number | ||
/** Bit score */ | ||
bitscore: number | ||
} | ||
|
||
export function parseBlastLine(line: string): BlastRecord | undefined { | ||
const [ | ||
qseqid, | ||
sseqid, | ||
pident, | ||
length, | ||
mismatch, | ||
gapopen, | ||
qstart, | ||
qend, | ||
sstart, | ||
send, | ||
evalue, | ||
bitscore, | ||
] = line.split('\t') | ||
|
||
if ( | ||
!( | ||
qseqid && | ||
sseqid && | ||
pident && | ||
length && | ||
mismatch && | ||
gapopen && | ||
qstart && | ||
qend && | ||
sstart && | ||
send && | ||
evalue && | ||
bitscore | ||
) | ||
) { | ||
console.warn('Invalid BLAST line') | ||
console.warn(line) | ||
return | ||
} | ||
|
||
return { | ||
qseqid, | ||
sseqid, | ||
pident: Number.parseFloat(pident), | ||
length: Number.parseInt(length, 10), | ||
mismatch: Number.parseInt(mismatch, 10), | ||
gapopen: Number.parseInt(gapopen, 10), | ||
qstart: Number.parseInt(qstart, 10), | ||
qend: Number.parseInt(qend, 10), | ||
sstart: Number.parseInt(sstart, 10), | ||
send: Number.parseInt(send, 10), | ||
evalue: Number.parseFloat(evalue), | ||
bitscore: Number.parseFloat(bitscore), | ||
} | ||
} | ||
|
||
export default class BlastTabularAdapter extends BaseFeatureDataAdapter { | ||
private data: Promise<BlastRecord[]> | ||
|
||
public static capabilities = ['getFeatures', 'getRefNames'] | ||
|
||
constructor( | ||
public config: BlastTabularAdapterConfig, | ||
public getSubAdapter?: getSubAdapterType, | ||
public pluginManager?: PluginManager, | ||
) { | ||
super(config, getSubAdapter, pluginManager) | ||
this.data = this.setup(config) | ||
} | ||
|
||
async setup(config: BlastTabularAdapterConfig): Promise<BlastRecord[]> { | ||
const pm = this.pluginManager | ||
const blastTableLocation = openLocation( | ||
readConfObject(config, 'blastTableLocation'), | ||
pm, | ||
) | ||
const buffer = await blastTableLocation.readFile() | ||
const buf = isGzip(buffer) ? await unzip(buffer) : buffer | ||
return parseLineByLine(buf, parseBlastLine) | ||
} | ||
|
||
async hasDataForRefName() { | ||
// determining this properly is basically a call to getFeatures | ||
// so is not really that important, and has to be true or else | ||
// getFeatures is never called (BaseAdapter filters it out) | ||
return true | ||
} | ||
|
||
getAssemblyNames() { | ||
const assemblyNames = this.getConf('assemblyNames') as string[] | ||
if (assemblyNames.length === 0) { | ||
const query = this.getConf('queryAssembly') as string | ||
const target = this.getConf('targetAssembly') as string | ||
return [query, target] | ||
} | ||
return assemblyNames | ||
} | ||
|
||
async getRefNames(opts: BaseOptions = {}) { | ||
// @ts-expect-error | ||
const r1 = opts.regions?.[0].assemblyName | ||
const feats = await this.data | ||
|
||
const idx = this.getAssemblyNames().indexOf(r1) | ||
if (idx !== -1) { | ||
const set = new Set<string>() | ||
for (const feat of feats) { | ||
set.add(idx === 0 ? feat.qseqid : feat.sseqid) | ||
} | ||
return [...set] | ||
} | ||
console.warn('Unable to do ref renaming on adapter') | ||
return [] | ||
} | ||
|
||
getFeatures(query: Region) { | ||
return ObservableCreate<Feature>(async observer => { | ||
const blastRecords = await this.data | ||
const [queryAssembly, targetAssembly] = this.getAssemblyNames() | ||
|
||
// The index of the assembly name in the query list corresponds to the | ||
// adapter in the subadapters list | ||
const { | ||
refName: queryRefName, | ||
assemblyName: queryAssemblyName, | ||
start: queryStart, | ||
end: queryEnd, | ||
} = query | ||
if ( | ||
queryAssemblyName !== targetAssembly && | ||
queryAssemblyName !== queryAssembly | ||
) { | ||
console.warn(`${queryAssemblyName} not found in this adapter`) | ||
observer.complete() | ||
return | ||
} | ||
|
||
for (let i = 0; i < blastRecords.length; i++) { | ||
const r = blastRecords[i]! | ||
let start, | ||
end, | ||
refName, | ||
assemblyName, | ||
mateStart, | ||
mateEnd, | ||
mateRefName, | ||
mateAssemblyName | ||
|
||
const { qseqid, sseqid, qstart, qend, sstart, send, ...rest } = r | ||
if (queryAssemblyName === queryAssembly) { | ||
start = qstart | ||
end = qend | ||
refName = qseqid | ||
assemblyName = queryAssembly | ||
mateStart = sstart | ||
mateEnd = send | ||
mateRefName = sseqid | ||
mateAssemblyName = targetAssembly | ||
} else { | ||
start = sstart | ||
end = send | ||
refName = sseqid | ||
assemblyName = targetAssembly | ||
mateStart = qstart | ||
mateEnd = qend | ||
mateRefName = qseqid | ||
mateAssemblyName = queryAssembly | ||
} | ||
let strand = 1 | ||
let mateStrand = 1 | ||
if (start > end) { | ||
;[start, end] = [end, start] | ||
strand = -1 | ||
} | ||
if (mateStart > mateEnd) { | ||
;[mateStart, mateEnd] = [mateEnd, mateStart] | ||
mateStrand = -1 | ||
} | ||
if ( | ||
refName === queryRefName && | ||
doesIntersect2(queryStart, queryEnd, start, end) | ||
) { | ||
observer.next( | ||
new SyntenyFeature({ | ||
uniqueId: i + queryAssemblyName, | ||
assemblyName, | ||
start, | ||
end, | ||
type: 'match', | ||
refName, | ||
strand: strand * mateStrand, | ||
syntenyId: i, | ||
...rest, | ||
mate: { | ||
start: mateStart, | ||
end: mateEnd, | ||
refName: mateRefName, | ||
assemblyName: mateAssemblyName, | ||
}, | ||
}), | ||
) | ||
} | ||
} | ||
|
||
observer.complete() | ||
}) | ||
} | ||
|
||
freeResources(/* { query } */): void {} | ||
} |
51 changes: 51 additions & 0 deletions
51
plugins/comparative-adapters/src/BlastTabularAdapter/configSchema.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import { ConfigurationSchema } from '@jbrowse/core/configuration' | ||
|
||
/** | ||
* #config BlastTabularAdapter | ||
*/ | ||
function x() {} // eslint-disable-line @typescript-eslint/no-unused-vars | ||
|
||
const BlastTabularAdapter = ConfigurationSchema( | ||
'BlastTabularAdapter', | ||
{ | ||
/** | ||
* #slot | ||
*/ | ||
assemblyNames: { | ||
type: 'stringArray', | ||
defaultValue: [], | ||
description: | ||
'Query assembly is the first value in the array, target assembly is the second', | ||
}, | ||
|
||
/** | ||
* #slot | ||
*/ | ||
targetAssembly: { | ||
type: 'string', | ||
defaultValue: '', | ||
description: 'Alternative to assemblyNames array: the target assembly', | ||
}, | ||
/** | ||
* #slot | ||
*/ | ||
queryAssembly: { | ||
type: 'string', | ||
defaultValue: '', | ||
description: 'Alternative to assemblyNames array: the query assembly', | ||
}, | ||
/** | ||
* #slot | ||
*/ | ||
blastTableLocation: { | ||
type: 'fileLocation', | ||
defaultValue: { | ||
uri: '/path/to/blastTable.tsv', | ||
locationType: 'UriLocation', | ||
}, | ||
}, | ||
}, | ||
{ explicitlyTyped: true }, | ||
) | ||
|
||
export default BlastTabularAdapter |
19 changes: 19 additions & 0 deletions
19
plugins/comparative-adapters/src/BlastTabularAdapter/index.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import PluginManager from '@jbrowse/core/PluginManager' | ||
import AdapterType from '@jbrowse/core/pluggableElementTypes/AdapterType' | ||
import configSchema from './configSchema' | ||
|
||
export default function BlastTabularAdapterF(pluginManager: PluginManager) { | ||
pluginManager.addAdapterType( | ||
() => | ||
new AdapterType({ | ||
name: 'BlastTabularAdapter', | ||
displayName: 'Tabular BLAST output adapter', | ||
configSchema, | ||
adapterMetadata: { | ||
hiddenFromGUI: true, | ||
}, | ||
getAdapterClass: () => | ||
import('./BlastTabularAdapter').then(r => r.default), | ||
}), | ||
) | ||
} |
Oops, something went wrong.