Skip to content

Commit

Permalink
Updated ditto script to be more robust and efficient (#132)
Browse files Browse the repository at this point in the history
* Updated the ditto score script to be more robust and less inefficient

* Removed comment in script
  • Loading branch information
JmScherer committed Aug 28, 2023
1 parent 9e4edac commit f7ef565
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 67 deletions.
67 changes: 0 additions & 67 deletions etc/fixtures/add-ditto-annotation-scores.js

This file was deleted.

94 changes: 94 additions & 0 deletions etc/fixtures/add-ditto-score-annotations.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// docker exec -it rosalution-rosalution-db-1 mongosh --eval "var dittoScoreCSVPath='/tmp/fixtures/example.csv'" /tmp/add-ditto-score-annotations.js

const usage = `
mongosh /tmp/add-ditto-score-annotations.js
Script Options:
help Bool If True prints this help message
databaseName String Mongo database name to use default: rosalution_db
dittoScoreCSVPath String File to read ditto score to genomic unit mapping
Run mongosh help for mongosh connection and authentication usage.
Examples:
mongosh --eval "var dittoScoreCSVPath='/tmp/fixtures/example.csv'" /tmp/add-ditto-score-annotations.js
mongosh --host localhost --port 27017 --eval "var dittoScoreCSVPath='/tmp/fixtures/example.csv'; databaseName='<database_name>'" /tmp/add-ditto-score-annotations.js
`;

const fs = require('fs');

if (help == true) {
print(usage);
quit(1);
}

// Checking the ditto score csv path
if (typeof dittoScoreCSVPath === 'undefined') {
dittoScoreCSVPath = "/tmp/fixtures/example-adding-users.json";
} else if (typeof dittoScoreCSVPath !== 'string') {
print("dittoScoreCSV must be a string containing file path");
quit(1);
}

// Checking if custom databaseName string
if (typeof databaseName === 'undefined') {
databaseName = "rosalution_db";
} else if (typeof databaseName !== 'string') {
print("databaseName must be a string");
quit(1);
}

db = db.getSiblingDB(databaseName);

var dittoScoreList = [];

var csvData = fs.readFileSync(dittoScoreCSVPath)
.toString()
.split('\n')
.map(e => e.trim())
.map(e => e.split(',').map(e => e.trim()));

for(let i = 0; i < csvData.length; i++) {
if(csvData[i][0] == 'HGVS')
continue;

dittoScore = {
hgvs_variant: '',
annotation: {
DITTO: []
},
}

const ditto_annotation = { data_source: 'DITTO', version: '', value: 0, }

dittoScore.hgvs_variant = csvData[i][0]
ditto_annotation.value = csvData[i][1]

dittoScore.annotation.DITTO.push(ditto_annotation);

dittoScoreList.push(dittoScore);
}

try {
let count = 0;

for(score in dittoScoreList) {
const genomic_unit = db.genomic_units.findOne({ hgvs_variant: dittoScoreList[score].hgvs_variant})

if(genomic_unit == null)
continue;

console.log(`Adding Ditto Score: ${dittoScoreList[score].annotation.DITTO[0].value} to genomic unit: ${dittoScoreList[score].hgvs_variant}`)

genomic_unit.annotations.push(dittoScoreList[score].annotation)
db.genomic_units.updateOne({'_id': genomic_unit._id}, {'$set': genomic_unit})
count++;
}

console.log(`${count} Ditto scores added! Exiting.`);
} catch (err) {
console.log(err.stack);
console.log(usage);
quit(1);
}

52 changes: 52 additions & 0 deletions etc/fixtures/simplified_ros_pred_ann.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
HGVS,DITTO,transcript,gene,consequence,chrom,pos,ref_base,alt_base,RefSeq match transcript (MANE Select)
NM_000478.6:c.436G>A,0.99183565,ENST00000374840,ALPL,missense_variant,chr1,21563248,G,A,NM_000478.6
NM_170707.3:c.745C>T,1,ENST00000368300,LMNA,missense_variant,chr1,156134910,C,T,NM_170707.4
NM_003324.5:c.1144C>T,1,ENST00000448120,TULP3,missense_variant,chr12,2938234,C,T,NM_003324.5
NM_001193511.2:c.1099A>G,0.9999999,ENST00000547488,MAP3K12,missense_variant,chr12,53485096,T,C,NM_001193511.2
NM_005249.5:c.256dup,1,ENST00000313071,FOXG1,frameshift_elongation,chr14,28767536,-,C,NM_005249.5
NM_005249.5:c.924G>A,1,ENST00000313071,FOXG1,stop_gained,chr14,28768203,G,A,NM_005249.5
NM_170674.4:c.242A>T,0.99998474,ENST00000561208,MEIS2,missense_variant,chr15,37097970,T,A,NM_170675.5
NM_182958.2:c.304G>A,0.9999993,ENST00000219797,KAT8,missense_variant,chr16,31120356,G,A,NM_032188.3
NM_001365.3:c.1961C>T,0.99965537,ENST00000399506,DLG4,missense_variant,chr17,7192979,G,A,NM_001321075.3
NM_001365.4:c.1054C>T,1,ENST00000399506,DLG4,stop_gained,chr17,7196915,G,A,NM_001321075.3
NM_001365.4:c.1039del,1,ENST00000399506,DLG4,frameshift_truncation,chr17,7196930,C,-,NM_001321075.3
NM_001005271.3:c.3535G>A,0.9999955,ENST00000330494,CHD3,missense_variant,chr17,7902715,G,A,NM_001005273.3
NM_005993.4:c.1255G>A,0.9999999,ENST00000355528,TBCD,missense_variant,chr17,82814871,G,A,NM_005993.5
NM_005993.4:c.2305_2307delGAG,0.44043422,ENST00000355528,TBCD,inframe_deletion,chr17,82924983,GAG,-,NM_005993.5
NM_004539.4:c.1600C>T,1,ENST00000256854,NARS1,stop_gained,chr18,57601699,G,A,NM_004539.4
NM_001127221.1:c.1784+3A>C,0.99999994,ENST00000360228,CACNA1A,intron_variant,chr19,13308413,T,G,NM_001127222.2
NM_016457.5:c.1687G>A,1,ENST00000291281,PRKD2,missense_variant,chr19,46691750,C,T,NM_016457.5
NM_016457.5:c.1679G>C,0.99999994,ENST00000291281,PRKD2,missense_variant,chr19,46691758,C,G,NM_016457.5
NM_016457.5:c.889+1G>T,1,ENST00000291281,PRKD2,"intron_variant,splice_site_variant",chr19,46704168,C,A,NM_016457.5
NM_000836.4:c.2740C>A,0,ENST00000263269,GRIN2D,missense_variant,chr19,48442666,C,A,NM_000836.4
NM_022055.2:c.333_343del,1,ENST00000327876,KCNK12,frameshift_truncation,chr2,47569989,CCCAGCGCGGG,-,NM_022055.2
NM_198276.3:c.302G>T,1,ENST00000335390,TMEM17,missense_variant,chr2,62502453,C,A,NM_198276.3
NM_015265.3:c.1595G>A,0.99729127,ENST00000417098,SATB2,missense_variant,chr2,199308905,C,T,NM_001172509.2
NM_001230.4:c.1030C>G,0.13751435,ENST00000286186,CASP10,missense_variant,chr2,201209306,C,G,NM_032977.4
NM_001875.5:c.3422T>G,0.99998856,ENST00000233072,CPS1,missense_variant,chr2,210650380,T,G,NM_001875.5
NM_001008491.2:c.104G>A,1,ENST00000391971,SEPTIN2,missense_variant,chr2,241326087,G,A,NM_004404.5
NM_003392.7:c.248G>C,0.9999998,ENST00000264634,WNT5A,missense_variant,chr3,55479457,C,G,NM_003392.7
NM_000297.4:c.1967T>G,0.99999964,ENST00000237596,PKD2,missense_variant,chr4,88058051,T,G,NM_000297.4
NM_005859.5:c.533dup,0.9999998,ENST00000331327,PURA,frameshift_elongation,chr5,140114715,-,C,NM_005859.5
NM_016221.4:c.25dupC,1,ENST00000447998,DCTN4,frameshift_elongation,chr5,150758970,-,G,NM_016221.4
NM_004640.7:c.275G>A,1,ENST00000396172,DDX39B,missense_variant,chr6,31538827,C,T,NM_004640.7
NM_004640.7:c.368G>A,1,ENST00000396172,DDX39B,missense_variant,chr6,31539211,C,T,NM_004640.7
NM_004640.7:c.109G>T,0.99999994,ENST00000396172,DDX39B,missense_variant,chr6,31540424,C,A,NM_004640.7
NM_000492.3:c.1521_1523del,0.9999687,ENST00000003084,CFTR,inframe_deletion,chr7,117559592,CTT,-,NM_000492.4
NM_000238.4:c.1979C>T,1,ENST00000262186,KCNH2,missense_variant,chr7,150951087,G,A,NM_000238.4
NM_033402.5:c.105-1G>C,1,ENST00000360375,LRRCC1,"intron_variant,splice_site_variant",chr8,85109594,G,C,NM_033402.5
NM_004260.4:c.2743A>G,0.005189061,ENST00000617875,RECQL4,missense_variant,chr8,144512859,T,C,NM_004260.4
NM_004260.4:c.1488C>G,0.034600735,ENST00000617875,RECQL4,missense_variant,chr8,144515068,G,C,NM_004260.4
NM_004972.3:c.1694G>C,0.03832954,ENST00000381652,JAK2,missense_variant,chr9,5072544,G,C,NM_004972.4
NM_017617.3:c.1348G>A,0.8035152,ENST00000651671,NOTCH1,missense_variant,chr9,136517845,C,T,NM_017617.5
NM_000718.4:c.5992C>T,1,ENST00000371372,CACNA1B,stop_gained,chr9,138118730,C,T,NM_000718.4
NM_001001671.3:c.3294+2dup,0.99999905,ENST00000338883,MAP3K15,"intron_variant,splice_site_variant",chrX,19371344,-,A,NM_001001671.4
NM_004586.2:c.1460A>T,0.9999884,ENST00000379565,RPS6KA3,missense_variant,chrX,20167731,T,A,NM_004586.3
NM_006579.2:c.301+3G>C,0.9999976,ENST00000495186,EBP,intron_variant,chrX,48524075,G,C,NM_006579.3
NM_006306.4:c.3103C>T,1,ENST00000322213,SMC1A,stop_gained,chrX,53383124,G,A,NM_006306.4
NM_006306.4:c.3037C>T,1,ENST00000322213,SMC1A,stop_gained,chrX,53383190,G,A,NM_006306.4
NM_004463.3:c.2581G>T,1,ENST00000375135,FGD1,missense_variant,chrX,54446414,C,A,NM_004463.3
NM_002444.2:c.1574T>C,1,ENST00000360270,MSN,missense_variant,chrX,65739733,T,C,NM_002444.3
NM_001847.4:c.1235A>G,0,ENST00000334504,COL4A6,missense_variant,chrX,108191482,T,C,NM_033641.4
NM_001017980.3:c.164G>T,1,ENST00000330374,VMA21,missense_variant,chrX,151404916,G,T,NM_001017980.4
NM_001360016.2:c.563C>T,0.415904,ENST00000393562,G6PD,missense_variant,chrX,154534419,G,A,NM_001360016.2

0 comments on commit f7ef565

Please sign in to comment.