From f7ef5655a9172819cb76b3221cfda627d46637f7 Mon Sep 17 00:00:00 2001 From: James Scherer Date: Mon, 28 Aug 2023 14:55:25 -0500 Subject: [PATCH] Updated ditto script to be more robust and efficient (#132) * Updated the ditto score script to be more robust and less inefficient * Removed comment in script --- etc/fixtures/add-ditto-annotation-scores.js | 67 --------------- etc/fixtures/add-ditto-score-annotations.js | 94 +++++++++++++++++++++ etc/fixtures/simplified_ros_pred_ann.csv | 52 ++++++++++++ 3 files changed, 146 insertions(+), 67 deletions(-) delete mode 100755 etc/fixtures/add-ditto-annotation-scores.js create mode 100755 etc/fixtures/add-ditto-score-annotations.js create mode 100644 etc/fixtures/simplified_ros_pred_ann.csv diff --git a/etc/fixtures/add-ditto-annotation-scores.js b/etc/fixtures/add-ditto-annotation-scores.js deleted file mode 100755 index be5091e2..00000000 --- a/etc/fixtures/add-ditto-annotation-scores.js +++ /dev/null @@ -1,67 +0,0 @@ -// docker exec -it rosalution-rosalution-db-1 mongosh /tmp/fixtures/add-ditto-annotation-scores.js - -const fs = require('fs'); - -const inputPath = `/tmp/fixtures/simplifiedDittoScores.csv` -var dittoScoreList = []; - -var csvData = fs.readFileSync(inputPath) - .toString() // convert Buffer to string - .split('\n') // split string to lines - .map(e => e.trim()) // remove white spaces for each line - .map(e => e.split(',').map(e => e.trim())); // split each line to array - -console.log(csvData) - -for(let i = 0; i < csvData.length; i++) { - if(csvData[i][0] == 'HGVS') - continue; - - dittoScore = { - hgvs_variant: '', - annotation: { - DITTO: [] - }, - } - - const ditto_annotation = { data_source: 'DITTO', version: '', value: 0, } - - dittoScore.hgvs_variant = csvData[i][0] - ditto_annotation.value = csvData[i][1] - - dittoScore.annotation.DITTO.push(ditto_annotation); - - dittoScoreList.push(dittoScore); -} - -db = db.getSiblingDB('rosalution_db'); - -try { - const genomic_units = db.genomic_units.find(); - - if(genomic_units.size() == 0) - throw new Error(`No genomic units found in ${databaseName}. Aborting.`) - - let newUnits = [] - genomic_units.forEach(unit => { - if(unit['hgvs_variant'] !== undefined) { - for(score in dittoScoreList) { - if(dittoScoreList[score].hgvs_variant == unit.hgvs_variant) { - console.log(unit.hgvs_variant) - unit.annotations.push(dittoScoreList[score].annotation) - newUnits.push(unit); - } - } - } - }) - - console.log(newUnits) - - newUnits.forEach(unit => {db.genomic_units.updateOne({'_id': unit._id}, {'$set': unit})}); -} catch (err) { - console.log(err.stack); - console.log(usage); - quit(1); -} - -console.log(`Ditto scores added!`); \ No newline at end of file diff --git a/etc/fixtures/add-ditto-score-annotations.js b/etc/fixtures/add-ditto-score-annotations.js new file mode 100755 index 00000000..46bf67df --- /dev/null +++ b/etc/fixtures/add-ditto-score-annotations.js @@ -0,0 +1,94 @@ +// docker exec -it rosalution-rosalution-db-1 mongosh --eval "var dittoScoreCSVPath='/tmp/fixtures/example.csv'" /tmp/add-ditto-score-annotations.js + +const usage = ` +mongosh /tmp/add-ditto-score-annotations.js + + Script Options: + help Bool If True prints this help message + databaseName String Mongo database name to use default: rosalution_db + dittoScoreCSVPath String File to read ditto score to genomic unit mapping + + Run mongosh help for mongosh connection and authentication usage. + + Examples: + mongosh --eval "var dittoScoreCSVPath='/tmp/fixtures/example.csv'" /tmp/add-ditto-score-annotations.js + mongosh --host localhost --port 27017 --eval "var dittoScoreCSVPath='/tmp/fixtures/example.csv'; databaseName=''" /tmp/add-ditto-score-annotations.js +`; + +const fs = require('fs'); + +if (help == true) { + print(usage); + quit(1); +} + +// Checking the ditto score csv path +if (typeof dittoScoreCSVPath === 'undefined') { + dittoScoreCSVPath = "/tmp/fixtures/example-adding-users.json"; +} else if (typeof dittoScoreCSVPath !== 'string') { + print("dittoScoreCSV must be a string containing file path"); + quit(1); +} + +// Checking if custom databaseName string +if (typeof databaseName === 'undefined') { + databaseName = "rosalution_db"; +} else if (typeof databaseName !== 'string') { + print("databaseName must be a string"); + quit(1); +} + +db = db.getSiblingDB(databaseName); + +var dittoScoreList = []; + +var csvData = fs.readFileSync(dittoScoreCSVPath) + .toString() + .split('\n') + .map(e => e.trim()) + .map(e => e.split(',').map(e => e.trim())); + +for(let i = 0; i < csvData.length; i++) { + if(csvData[i][0] == 'HGVS') + continue; + + dittoScore = { + hgvs_variant: '', + annotation: { + DITTO: [] + }, + } + + const ditto_annotation = { data_source: 'DITTO', version: '', value: 0, } + + dittoScore.hgvs_variant = csvData[i][0] + ditto_annotation.value = csvData[i][1] + + dittoScore.annotation.DITTO.push(ditto_annotation); + + dittoScoreList.push(dittoScore); +} + +try { + let count = 0; + + for(score in dittoScoreList) { + const genomic_unit = db.genomic_units.findOne({ hgvs_variant: dittoScoreList[score].hgvs_variant}) + + if(genomic_unit == null) + continue; + + console.log(`Adding Ditto Score: ${dittoScoreList[score].annotation.DITTO[0].value} to genomic unit: ${dittoScoreList[score].hgvs_variant}`) + + genomic_unit.annotations.push(dittoScoreList[score].annotation) + db.genomic_units.updateOne({'_id': genomic_unit._id}, {'$set': genomic_unit}) + count++; + } + + console.log(`${count} Ditto scores added! Exiting.`); +} catch (err) { + console.log(err.stack); + console.log(usage); + quit(1); +} + diff --git a/etc/fixtures/simplified_ros_pred_ann.csv b/etc/fixtures/simplified_ros_pred_ann.csv new file mode 100644 index 00000000..32dcc336 --- /dev/null +++ b/etc/fixtures/simplified_ros_pred_ann.csv @@ -0,0 +1,52 @@ +HGVS,DITTO,transcript,gene,consequence,chrom,pos,ref_base,alt_base,RefSeq match transcript (MANE Select) +NM_000478.6:c.436G>A,0.99183565,ENST00000374840,ALPL,missense_variant,chr1,21563248,G,A,NM_000478.6 +NM_170707.3:c.745C>T,1,ENST00000368300,LMNA,missense_variant,chr1,156134910,C,T,NM_170707.4 +NM_003324.5:c.1144C>T,1,ENST00000448120,TULP3,missense_variant,chr12,2938234,C,T,NM_003324.5 +NM_001193511.2:c.1099A>G,0.9999999,ENST00000547488,MAP3K12,missense_variant,chr12,53485096,T,C,NM_001193511.2 +NM_005249.5:c.256dup,1,ENST00000313071,FOXG1,frameshift_elongation,chr14,28767536,-,C,NM_005249.5 +NM_005249.5:c.924G>A,1,ENST00000313071,FOXG1,stop_gained,chr14,28768203,G,A,NM_005249.5 +NM_170674.4:c.242A>T,0.99998474,ENST00000561208,MEIS2,missense_variant,chr15,37097970,T,A,NM_170675.5 +NM_182958.2:c.304G>A,0.9999993,ENST00000219797,KAT8,missense_variant,chr16,31120356,G,A,NM_032188.3 +NM_001365.3:c.1961C>T,0.99965537,ENST00000399506,DLG4,missense_variant,chr17,7192979,G,A,NM_001321075.3 +NM_001365.4:c.1054C>T,1,ENST00000399506,DLG4,stop_gained,chr17,7196915,G,A,NM_001321075.3 +NM_001365.4:c.1039del,1,ENST00000399506,DLG4,frameshift_truncation,chr17,7196930,C,-,NM_001321075.3 +NM_001005271.3:c.3535G>A,0.9999955,ENST00000330494,CHD3,missense_variant,chr17,7902715,G,A,NM_001005273.3 +NM_005993.4:c.1255G>A,0.9999999,ENST00000355528,TBCD,missense_variant,chr17,82814871,G,A,NM_005993.5 +NM_005993.4:c.2305_2307delGAG,0.44043422,ENST00000355528,TBCD,inframe_deletion,chr17,82924983,GAG,-,NM_005993.5 +NM_004539.4:c.1600C>T,1,ENST00000256854,NARS1,stop_gained,chr18,57601699,G,A,NM_004539.4 +NM_001127221.1:c.1784+3A>C,0.99999994,ENST00000360228,CACNA1A,intron_variant,chr19,13308413,T,G,NM_001127222.2 +NM_016457.5:c.1687G>A,1,ENST00000291281,PRKD2,missense_variant,chr19,46691750,C,T,NM_016457.5 +NM_016457.5:c.1679G>C,0.99999994,ENST00000291281,PRKD2,missense_variant,chr19,46691758,C,G,NM_016457.5 +NM_016457.5:c.889+1G>T,1,ENST00000291281,PRKD2,"intron_variant,splice_site_variant",chr19,46704168,C,A,NM_016457.5 +NM_000836.4:c.2740C>A,0,ENST00000263269,GRIN2D,missense_variant,chr19,48442666,C,A,NM_000836.4 +NM_022055.2:c.333_343del,1,ENST00000327876,KCNK12,frameshift_truncation,chr2,47569989,CCCAGCGCGGG,-,NM_022055.2 +NM_198276.3:c.302G>T,1,ENST00000335390,TMEM17,missense_variant,chr2,62502453,C,A,NM_198276.3 +NM_015265.3:c.1595G>A,0.99729127,ENST00000417098,SATB2,missense_variant,chr2,199308905,C,T,NM_001172509.2 +NM_001230.4:c.1030C>G,0.13751435,ENST00000286186,CASP10,missense_variant,chr2,201209306,C,G,NM_032977.4 +NM_001875.5:c.3422T>G,0.99998856,ENST00000233072,CPS1,missense_variant,chr2,210650380,T,G,NM_001875.5 +NM_001008491.2:c.104G>A,1,ENST00000391971,SEPTIN2,missense_variant,chr2,241326087,G,A,NM_004404.5 +NM_003392.7:c.248G>C,0.9999998,ENST00000264634,WNT5A,missense_variant,chr3,55479457,C,G,NM_003392.7 +NM_000297.4:c.1967T>G,0.99999964,ENST00000237596,PKD2,missense_variant,chr4,88058051,T,G,NM_000297.4 +NM_005859.5:c.533dup,0.9999998,ENST00000331327,PURA,frameshift_elongation,chr5,140114715,-,C,NM_005859.5 +NM_016221.4:c.25dupC,1,ENST00000447998,DCTN4,frameshift_elongation,chr5,150758970,-,G,NM_016221.4 +NM_004640.7:c.275G>A,1,ENST00000396172,DDX39B,missense_variant,chr6,31538827,C,T,NM_004640.7 +NM_004640.7:c.368G>A,1,ENST00000396172,DDX39B,missense_variant,chr6,31539211,C,T,NM_004640.7 +NM_004640.7:c.109G>T,0.99999994,ENST00000396172,DDX39B,missense_variant,chr6,31540424,C,A,NM_004640.7 +NM_000492.3:c.1521_1523del,0.9999687,ENST00000003084,CFTR,inframe_deletion,chr7,117559592,CTT,-,NM_000492.4 +NM_000238.4:c.1979C>T,1,ENST00000262186,KCNH2,missense_variant,chr7,150951087,G,A,NM_000238.4 +NM_033402.5:c.105-1G>C,1,ENST00000360375,LRRCC1,"intron_variant,splice_site_variant",chr8,85109594,G,C,NM_033402.5 +NM_004260.4:c.2743A>G,0.005189061,ENST00000617875,RECQL4,missense_variant,chr8,144512859,T,C,NM_004260.4 +NM_004260.4:c.1488C>G,0.034600735,ENST00000617875,RECQL4,missense_variant,chr8,144515068,G,C,NM_004260.4 +NM_004972.3:c.1694G>C,0.03832954,ENST00000381652,JAK2,missense_variant,chr9,5072544,G,C,NM_004972.4 +NM_017617.3:c.1348G>A,0.8035152,ENST00000651671,NOTCH1,missense_variant,chr9,136517845,C,T,NM_017617.5 +NM_000718.4:c.5992C>T,1,ENST00000371372,CACNA1B,stop_gained,chr9,138118730,C,T,NM_000718.4 +NM_001001671.3:c.3294+2dup,0.99999905,ENST00000338883,MAP3K15,"intron_variant,splice_site_variant",chrX,19371344,-,A,NM_001001671.4 +NM_004586.2:c.1460A>T,0.9999884,ENST00000379565,RPS6KA3,missense_variant,chrX,20167731,T,A,NM_004586.3 +NM_006579.2:c.301+3G>C,0.9999976,ENST00000495186,EBP,intron_variant,chrX,48524075,G,C,NM_006579.3 +NM_006306.4:c.3103C>T,1,ENST00000322213,SMC1A,stop_gained,chrX,53383124,G,A,NM_006306.4 +NM_006306.4:c.3037C>T,1,ENST00000322213,SMC1A,stop_gained,chrX,53383190,G,A,NM_006306.4 +NM_004463.3:c.2581G>T,1,ENST00000375135,FGD1,missense_variant,chrX,54446414,C,A,NM_004463.3 +NM_002444.2:c.1574T>C,1,ENST00000360270,MSN,missense_variant,chrX,65739733,T,C,NM_002444.3 +NM_001847.4:c.1235A>G,0,ENST00000334504,COL4A6,missense_variant,chrX,108191482,T,C,NM_033641.4 +NM_001017980.3:c.164G>T,1,ENST00000330374,VMA21,missense_variant,chrX,151404916,G,T,NM_001017980.4 +NM_001360016.2:c.563C>T,0.415904,ENST00000393562,G6PD,missense_variant,chrX,154534419,G,A,NM_001360016.2 \ No newline at end of file