Skip to content

Commit

Permalink
Create an adapter for Blasts's tabular output
Browse files Browse the repository at this point in the history
  • Loading branch information
garrettjstevens committed Oct 29, 2024
1 parent 03f6b7d commit 5a6c786
Show file tree
Hide file tree
Showing 10 changed files with 19,759 additions and 63 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { toArray } from 'rxjs/operators'
import { firstValueFrom } from 'rxjs'
import Adapter from './BlastTabularAdapter'
import MyConfigSchema from './configSchema'

test('adapter can fetch features from peach_grape.paf', async () => {
const adapter = new Adapter(
MyConfigSchema.create({
blastTableLocation: {
localPath: require.resolve('./test_data/peach_vs_grape.tsv'),
locationType: 'LocalPathLocation',
},
assemblyNames: ['peach', 'grape'],
}),
)

const features1 = adapter.getFeatures({
refName: 'Pp05',
start: 0,
end: 200000,
assemblyName: 'peach',
})

const features2 = adapter.getFeatures({
refName: 'chr18',
start: 0,
end: 200000,
assemblyName: 'grape',
})

const fa1 = await firstValueFrom(features1.pipe(toArray()))
const fa2 = await firstValueFrom(features2.pipe(toArray()))
expect(fa1.length).toBe(1675)
expect(fa2.length).toBe(2471)
expect(fa1[0]!.get('refName')).toBe('Pp05')
expect(fa2[0]!.get('refName')).toBe('chr18')
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
import { unzip } from '@gmod/bgzf-filehandle'
import { readConfObject } from '@jbrowse/core/configuration'
import {
BaseFeatureDataAdapter,
BaseOptions,
} from '@jbrowse/core/data_adapters/BaseAdapter'
import { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache'
import PluginManager from '@jbrowse/core/PluginManager'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import { doesIntersect2, Feature, isGzip, Region } from '@jbrowse/core/util'
import { openLocation } from '@jbrowse/core/util/io'

import { parseLineByLine } from '../util'
import BlastTabularAdapterConfigType from './configSchema'
import SyntenyFeature from '../SyntenyFeature'

type BlastTabularAdapterConfig = ReturnType<
typeof BlastTabularAdapterConfigType.create
>

// Blast output column names/descriptions taken from
// https://www.ncbi.nlm.nih.gov/books/NBK279684/#_appendices_Options_for_the_commandline_a_
interface BlastRecord {
/** Query Seq-id */
qseqid: string
/** Subject Seq-id */
sseqid: string
/** Percentage of identical matches */
pident: number
/** Alignment length */
length: number
/** Number of mismatches */
mismatch: number
/** Number of gap openings */
gapopen: number
/** Start of alignment in query */
qstart: number
/** End of alignment in query */
qend: number
/** Start of alignment in subject */
sstart: number
/** End of alignment in subject */
send: number
/** Expect value */
evalue: number
/** Bit score */
bitscore: number
}

export function parseBlastLine(line: string): BlastRecord | undefined {
const [
qseqid,
sseqid,
pident,
length,
mismatch,
gapopen,
qstart,
qend,
sstart,
send,
evalue,
bitscore,
] = line.split('\t')

if (
!(
qseqid &&
sseqid &&
pident &&
length &&
mismatch &&
gapopen &&
qstart &&
qend &&
sstart &&
send &&
evalue &&
bitscore
)
) {
console.warn('Invalid BLAST line')
console.warn(line)
return
}

return {
qseqid,
sseqid,
pident: Number.parseFloat(pident),
length: Number.parseInt(length, 10),
mismatch: Number.parseInt(mismatch, 10),
gapopen: Number.parseInt(gapopen, 10),
qstart: Number.parseInt(qstart, 10),
qend: Number.parseInt(qend, 10),
sstart: Number.parseInt(sstart, 10),
send: Number.parseInt(send, 10),
evalue: Number.parseFloat(evalue),
bitscore: Number.parseFloat(bitscore),
}
}

export default class BlastTabularAdapter extends BaseFeatureDataAdapter {
private data: Promise<BlastRecord[]>

public static capabilities = ['getFeatures', 'getRefNames']

constructor(
public config: BlastTabularAdapterConfig,
public getSubAdapter?: getSubAdapterType,
public pluginManager?: PluginManager,
) {
super(config, getSubAdapter, pluginManager)
this.data = this.setup(config)
}

async setup(config: BlastTabularAdapterConfig): Promise<BlastRecord[]> {
const pm = this.pluginManager
const blastTableLocation = openLocation(
readConfObject(config, 'blastTableLocation'),
pm,
)
const buffer = await blastTableLocation.readFile()
const buf = isGzip(buffer) ? await unzip(buffer) : buffer
return parseLineByLine(buf, parseBlastLine)
}

async hasDataForRefName() {
// determining this properly is basically a call to getFeatures
// so is not really that important, and has to be true or else
// getFeatures is never called (BaseAdapter filters it out)
return true
}

getAssemblyNames() {
const assemblyNames = this.getConf('assemblyNames') as string[]
if (assemblyNames.length === 0) {
const query = this.getConf('queryAssembly') as string
const target = this.getConf('targetAssembly') as string
return [query, target]
}
return assemblyNames
}

async getRefNames(opts: BaseOptions = {}) {
// @ts-expect-error
const r1 = opts.regions?.[0].assemblyName
const feats = await this.data

const idx = this.getAssemblyNames().indexOf(r1)
if (idx !== -1) {
const set = new Set<string>()
for (const feat of feats) {
set.add(idx === 0 ? feat.qseqid : feat.sseqid)
}
return [...set]
}
console.warn('Unable to do ref renaming on adapter')
return []
}

getFeatures(query: Region) {
return ObservableCreate<Feature>(async observer => {
const blastRecords = await this.data
const [queryAssembly, targetAssembly] = this.getAssemblyNames()

// The index of the assembly name in the query list corresponds to the
// adapter in the subadapters list
const {
refName: queryRefName,
assemblyName: queryAssemblyName,
start: queryStart,
end: queryEnd,
} = query
if (
queryAssemblyName !== targetAssembly &&
queryAssemblyName !== queryAssembly
) {
console.warn(`${queryAssemblyName} not found in this adapter`)
observer.complete()
return
}

for (let i = 0; i < blastRecords.length; i++) {
const r = blastRecords[i]!
let start,
end,
refName,
assemblyName,
mateStart,
mateEnd,
mateRefName,
mateAssemblyName

const { qseqid, sseqid, qstart, qend, sstart, send, ...rest } = r
if (queryAssemblyName === queryAssembly) {
start = qstart
end = qend
refName = qseqid
assemblyName = queryAssembly
mateStart = sstart
mateEnd = send
mateRefName = sseqid
mateAssemblyName = targetAssembly
} else {
start = sstart
end = send
refName = sseqid
assemblyName = targetAssembly
mateStart = qstart
mateEnd = qend
mateRefName = qseqid
mateAssemblyName = queryAssembly
}
let strand = 1
let mateStrand = 1
if (start > end) {
;[start, end] = [end, start]
strand = -1
}
if (mateStart > mateEnd) {
;[mateStart, mateEnd] = [mateEnd, mateStart]
mateStrand = -1
}
if (
refName === queryRefName &&
doesIntersect2(queryStart, queryEnd, start, end)
) {
observer.next(
new SyntenyFeature({
uniqueId: i + queryAssemblyName,
assemblyName,
start,
end,
type: 'match',
refName,
strand: strand * mateStrand,
syntenyId: i,
...rest,
mate: {
start: mateStart,
end: mateEnd,
refName: mateRefName,
assemblyName: mateAssemblyName,
},
}),
)
}
}

observer.complete()
})
}

freeResources(/* { query } */): void {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { ConfigurationSchema } from '@jbrowse/core/configuration'

/**
* #config BlastTabularAdapter
*/
function x() {} // eslint-disable-line @typescript-eslint/no-unused-vars

const BlastTabularAdapter = ConfigurationSchema(
'BlastTabularAdapter',
{
/**
* #slot
*/
assemblyNames: {
type: 'stringArray',
defaultValue: [],
description:
'Query assembly is the first value in the array, target assembly is the second',
},

/**
* #slot
*/
targetAssembly: {
type: 'string',
defaultValue: '',
description: 'Alternative to assemblyNames array: the target assembly',
},
/**
* #slot
*/
queryAssembly: {
type: 'string',
defaultValue: '',
description: 'Alternative to assemblyNames array: the query assembly',
},
/**
* #slot
*/
blastTableLocation: {
type: 'fileLocation',
defaultValue: {
uri: '/path/to/blastTable.tsv',
locationType: 'UriLocation',
},
},
},
{ explicitlyTyped: true },
)

export default BlastTabularAdapter
19 changes: 19 additions & 0 deletions plugins/comparative-adapters/src/BlastTabularAdapter/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import PluginManager from '@jbrowse/core/PluginManager'
import AdapterType from '@jbrowse/core/pluggableElementTypes/AdapterType'
import configSchema from './configSchema'

export default function BlastTabularAdapterF(pluginManager: PluginManager) {
pluginManager.addAdapterType(
() =>
new AdapterType({
name: 'BlastTabularAdapter',
displayName: 'Tabular BLAST output adapter',
configSchema,
adapterMetadata: {
hiddenFromGUI: true,
},
getAdapterClass: () =>
import('./BlastTabularAdapter').then(r => r.default),
}),
)
}
Loading

0 comments on commit 5a6c786

Please sign in to comment.