diff --git a/plugins/comparative-adapters/src/BlastTabularAdapter/BlastTabularAdapter.ts b/plugins/comparative-adapters/src/BlastTabularAdapter/BlastTabularAdapter.ts index b61629c013..223f9e2634 100755 --- a/plugins/comparative-adapters/src/BlastTabularAdapter/BlastTabularAdapter.ts +++ b/plugins/comparative-adapters/src/BlastTabularAdapter/BlastTabularAdapter.ts @@ -13,19 +13,106 @@ import SyntenyFeature from '../SyntenyFeature' // Blast output column names/descriptions taken from // https://www.ncbi.nlm.nih.gov/books/NBK279684/#_appendices_Options_for_the_commandline_a_ -interface BlastRecord { +interface BlastColumns { /** Query Seq-id */ - qseqid: string + qseqid?: string + /** Query GI */ + qgi?: string + /** Query accesion */ + qacc?: string /** Subject Seq-id */ - sseqid: string - /** Percentage of identical matches */ - pident: number + sseqid?: string + /** All subject Seq-id(s), separated by a ';' */ + sallseqid?: string + /** Subject GI */ + sgi?: string + /** All subject GIs */ + sallgi?: string + /** Subject accession */ + sacc?: string + /** All subject accessions */ + sallacc?: string + /** Start of alignment in query */ + qstart?: number + /** End of alignment in query */ + qend?: number + /** Start of alignment in subject */ + sstart?: number + /** End of alignment in subject */ + send?: number + /** Aligned part of query sequence */ + qseq?: string + /** Aligned part of subject sequence */ + sseq?: string + /** Expect value */ + evalue?: string + /** Bit score */ + bitscore?: string + /** Raw score */ + score?: string /** Alignment length */ - length: number + length?: string + /** Percentage of identical matches */ + pident?: string + /** Number of identical matches */ + nident?: string /** Number of mismatches */ - mismatch: number + mismatch?: string + /** Number of positive-scoring matches */ + positive?: string /** Number of gap openings */ - gapopen: number + gapopen?: string + /** Total number of gap */ + gaps?: string + /** Percentage of positive-scoring matches */ + ppos?: string + /** Query and subject frames separated by a '/' */ + frames?: string + /** Query frame */ + qframe?: string + /** Subject frame */ + sframe?: string + /** Blast traceback operations (BTOP) */ + btop?: string + /** Unique Subject Taxonomy ID(s), separated by a ';'(in numerical order) */ + staxids?: string + /** Unique Subject Scientific Name(s), separated by a ';' */ + sscinames?: string + /** Unique Subject Common Name(s), separated by a ';' */ + scomnames?: string + /** + * Unique Subject Blast Name(s), separated by a ';' (in alphabetical order) + */ + sblastnames?: string + /** + * Unique Subject Super Kingdom(s), separated by a ';' (in alphabetical order) + */ + sskingdoms?: string + /** Subject Title */ + stitle?: string + /** All Subject Title(s), separated by a '<>' */ + salltitles?: string + /** Subject Strand */ + sstrand?: string + /** Query Coverage Per Subject (for all HSPs) */ + qcovs?: string + /** Query Coverage Per HSP */ + qcovhsp?: string + /** + * A measure of Query Coverage that counts a position in a subject sequence + * for this measure only once. The second time the position is aligned to the + * query is not counted towards this measure. + */ + qcovus?: string +} + +// Blast output column names/descriptions taken from +// https://www.ncbi.nlm.nih.gov/books/NBK279684/#_appendices_Options_for_the_commandline_a_ +interface BlastRecord extends BlastColumns { + /** Query Seq-id */ + qseqid: string + /** Subject Seq-id */ + sseqid: string /** Start of alignment in query */ qstart: number /** End of alignment in query */ @@ -34,62 +121,75 @@ interface BlastRecord { sstart: number /** End of alignment in subject */ send: number - /** Expect value */ - evalue: number - /** Bit score */ - bitscore: number } -export function parseBlastLine(line: string): BlastRecord | undefined { - const [ - qseqid, - sseqid, - pident, - length, - mismatch, - gapopen, - qstart, - qend, - sstart, - send, - evalue, - bitscore, - ] = line.split('\t') - - if ( - !( - qseqid && - sseqid && - pident && - length && - mismatch && - gapopen && - qstart && - qend && - sstart && - send && - evalue && - bitscore - ) - ) { - console.warn('Invalid BLAST line') - console.warn(line) - return +function createBlastLineParser(columns: string) { + const columnNames = columns.trim().split(' ') as (keyof BlastRecord)[] + const qseqidIndex = columnNames.indexOf('qseqid') + if (qseqidIndex === -1) { + throw new Error('Missing required column "qseqid"') + } + const sseqidIndex = columnNames.indexOf('sseqid') + if (sseqidIndex === -1) { + throw new Error('Missing required column "sseqid"') + } + const qstartIndex = columnNames.indexOf('qstart') + if (qstartIndex === -1) { + throw new Error('Missing required column "qstart"') + } + const qendIndex = columnNames.indexOf('qend') + if (qendIndex === -1) { + throw new Error('Missing required column "qend"') + } + const sstartIndex = columnNames.indexOf('sstart') + if (sstartIndex === -1) { + throw new Error('Missing required column "sstart"') + } + const sendIndex = columnNames.indexOf('send') + if (sendIndex === -1) { + throw new Error('Missing required column "send"') } - return { - qseqid, - sseqid, - pident: Number.parseFloat(pident), - length: Number.parseInt(length, 10), - mismatch: Number.parseInt(mismatch, 10), - gapopen: Number.parseInt(gapopen, 10), - qstart: Number.parseInt(qstart, 10), - qend: Number.parseInt(qend, 10), - sstart: Number.parseInt(sstart, 10), - send: Number.parseInt(send, 10), - evalue: Number.parseFloat(evalue), - bitscore: Number.parseFloat(bitscore), + return (line: string): BlastRecord | undefined => { + if (line.startsWith('#')) { + return + } + const row = line.split('\t') + const qseqid = row[qseqidIndex] + const sseqid = row[sseqidIndex] + const qstart = row[qstartIndex] + const qend = row[qendIndex] + const sstart = row[sstartIndex] + const send = row[sendIndex] + if (!(qseqid && sseqid && qstart && qend && sstart && send)) { + console.warn('Invalid BLAST line') + console.warn(line) + return + } + const record: BlastRecord = { + qseqid, + sseqid, + qstart: Number.parseInt(qstart), + qend: Number.parseInt(qend), + sstart: Number.parseInt(sstart), + send: Number.parseInt(send), + } + for (const [idx, columnName] of columnNames.entries()) { + if ( + ['qseqid', 'sseqid', 'qstart', 'qend', 'sstart', 'send'].includes( + columnName, + ) + ) { + continue + } + const value = row[idx] + if (!value) { + continue + } + // @ts-expect-error + record[columnName] = value + } + return record } } @@ -118,7 +218,8 @@ export default class BlastTabularAdapter extends BaseFeatureDataAdapter { encoding: undefined, }) const buf = isGzip(buffer) ? await unzip(buffer) : buffer - return parseLineByLine(buf, parseBlastLine) + const columns: string = readConfObject(this.config, 'columns') + return parseLineByLine(buf, createBlastLineParser(columns)) } async hasDataForRefName() { diff --git a/plugins/comparative-adapters/src/BlastTabularAdapter/configSchema.ts b/plugins/comparative-adapters/src/BlastTabularAdapter/configSchema.ts index 50607ea3df..21e768d0aa 100755 --- a/plugins/comparative-adapters/src/BlastTabularAdapter/configSchema.ts +++ b/plugins/comparative-adapters/src/BlastTabularAdapter/configSchema.ts @@ -44,6 +44,16 @@ const BlastTabularAdapter = ConfigurationSchema( locationType: 'UriLocation', }, }, + /** + * #slot + */ + columns: { + type: 'string', + description: + 'Optional space-separated column name list. If custom columns were used in outfmt, enter them here exactly as specified in the command. At least qseqid, sseqid, qstart, qend, sstart, and send are required', + defaultValue: + 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore', + }, }, { explicitlyTyped: true }, ) diff --git a/test_data/config_synteny_grape_peach.json b/test_data/config_synteny_grape_peach.json index 47fed0b8cf..887cf9c470 100644 --- a/test_data/config_synteny_grape_peach.json +++ b/test_data/config_synteny_grape_peach.json @@ -1910,6 +1910,20 @@ } } } + }, + { + "type": "SyntenyTrack", + "trackId": "grape_peach_synteny_tblastx", + "name": "Grape peach synteny (TBLASTX)", + "assemblyNames": ["peach", "grape"], + "category": ["Annotation"], + "adapter": { + "type": "BlastTabularAdapter", + "blastTableLocation": { + "uri": "grape_peach_synteny/peach_vs_grape.tsv.gz" + }, + "assemblyNames": ["peach", "grape"] + } } ] } diff --git a/test_data/grape_peach_synteny/peach_vs_grape.tsv.gz b/test_data/grape_peach_synteny/peach_vs_grape.tsv.gz old mode 100755 new mode 100644 index c248ff195b..a18715c1eb Binary files a/test_data/grape_peach_synteny/peach_vs_grape.tsv.gz and b/test_data/grape_peach_synteny/peach_vs_grape.tsv.gz differ