diff --git a/api/tests/integration/tests/formats/ket_to_helm.py b/api/tests/integration/tests/formats/ket_to_helm.py index e063147ea9..705056fcb8 100644 --- a/api/tests/integration/tests/formats/ket_to_helm.py +++ b/api/tests/integration/tests/formats/ket_to_helm.py @@ -46,7 +46,7 @@ def find_diff(a, b): "helm_mixed_custom": "RNA1{[dR](A:10+[Xan]:20+G:30+T:50)P.[dR](A:10+C:20+G:30+T:50)P.[dR](A+C+G+T)}$$$$V2.0", "helm_aminoacids_variants": "PEPTIDE1{([Dha]+N).(L+I).(E+Q).(A+C+D+E+F+G+H+I+K+L+M+N+O+P+Q+R+S+T+U+V+W+Y)}$$$$V2.0", "dna_variants": "RNA1{[dR](C+G+T)P.[dR](A+C+G+T)}$$$$V2.0", - "rna_variants": "RNA1{R(G+T)P.R(A+C+G+T)}$$$$V2.0", + "rna_variants": "RNA1{R(A+G)P.R(G+T)P.R(A+C+G+T)}$$$$V2.0", "helm_monomer_molecule": "PEPTIDE1{A}|PEPTIDE2{G}|CHEM1{[C(N[*:2])=C[*:1] |$;;_R2;;_R1$|]}$CHEM1,PEPTIDE1,1:R2-1:R1$$$V2.0", } diff --git a/api/tests/integration/tests/formats/molecules/rna_variants.fasta b/api/tests/integration/tests/formats/molecules/rna_variants.fasta index 953114766f..69f6bceec9 100644 --- a/api/tests/integration/tests/formats/molecules/rna_variants.fasta +++ b/api/tests/integration/tests/formats/molecules/rna_variants.fasta @@ -1,2 +1,2 @@ >Sequence1 -KN +RKN diff --git a/api/tests/integration/tests/formats/ref/rna_variants.ket b/api/tests/integration/tests/formats/ref/rna_variants.ket index 523a6ee377..fdb05acd3c 100644 --- a/api/tests/integration/tests/formats/ref/rna_variants.ket +++ b/api/tests/integration/tests/formats/ref/rna_variants.ket @@ -15,6 +15,15 @@ }, { "$ref": "monomer4" + }, + { + "$ref": "monomer5" + }, + { + "$ref": "ambiguousMonomer-6" + }, + { + "$ref": "monomer7" } ], "connections": [ @@ -61,26 +70,62 @@ "monomerId": "monomer2", "attachmentPointId": "R1" } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer5", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "ambiguousMonomer-6", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer2", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer7", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer7", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer5", + "attachmentPointId": "R1" + } } ], "templates": [ { - "$ref": "monomerTemplate-G___Guanine" + "$ref": "monomerTemplate-A___Adenine" }, { - "$ref": "monomerTemplate-T___Thymine" + "$ref": "monomerTemplate-G___Guanine" }, { "$ref": "monomerTemplate-R___Ribose" }, { - "$ref": "monomerTemplate-A___Adenine" + "$ref": "monomerTemplate-T___Thymine" + }, + { + "$ref": "monomerTemplate-P___Phosphate" }, { "$ref": "monomerTemplate-C___Cytosine" }, { - "$ref": "monomerTemplate-P___Phosphate" + "$ref": "ambiguousMonomerTemplate-R" }, { "$ref": "ambiguousMonomerTemplate-K" @@ -109,8 +154,8 @@ "y": -1.600000 }, "seqid": 1, - "alias": "K", - "templateId": "K" + "alias": "R", + "templateId": "R" }, "monomer2": { "type": "monomer", @@ -131,8 +176,8 @@ "y": -1.600000 }, "seqid": 2, - "alias": "N", - "templateId": "N" + "alias": "K", + "templateId": "K" }, "monomer4": { "type": "monomer", @@ -145,21 +190,54 @@ "alias": "P", "templateId": "P___Phosphate" }, - "monomerTemplate-G___Guanine": { + "monomer5": { + "type": "monomer", + "id": "5", + "seqid": 3, + "position": { + "x": 6.400000, + "y": -0.000000 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "ambiguousMonomer-6": { + "type": "ambiguousMonomer", + "id": "6", + "position": { + "x": 6.400000, + "y": -1.600000 + }, + "seqid": 3, + "alias": "N", + "templateId": "N" + }, + "monomer7": { + "type": "monomer", + "id": "7", + "seqid": 2, + "position": { + "x": 4.800000, + "y": -0.000000 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomerTemplate-A___Adenine": { "type": "monomerTemplate", - "id": "G___Guanine", + "id": "A___Adenine", "class": "Base", "classHELM": "RNA", - "fullName": "Guanine", - "alias": "G", - "naturalAnalogShort": "G", + "fullName": "Adenine", + "alias": "A", + "naturalAnalogShort": "A", "attachmentPoints": [ { "attachmentAtom": 6, "type": "left", "leavingGroup": { "atoms": [ - 11 + 10 ] } } @@ -238,21 +316,13 @@ ] }, { - "label": "O", + "label": "N", "location": [ 2.176800, -0.120900, 0.000000 ] }, - { - "label": "N", - "location": [ - -0.952700, - 3.354200, - 0.000000 - ] - }, { "label": "H", "location": [ @@ -264,14 +334,14 @@ ], "bonds": [ { - "type": 2, + "type": 1, "atoms": [ 0, 9 ] }, { - "type": 1, + "type": 2, "atoms": [ 0, 5 @@ -326,13 +396,6 @@ 5 ] }, - { - "type": 1, - "atoms": [ - 4, - 10 - ] - }, { "type": 1, "atoms": [ @@ -344,7 +407,7 @@ "type": 1, "atoms": [ 6, - 11 + 10 ] }, { @@ -356,21 +419,21 @@ } ] }, - "monomerTemplate-T___Thymine": { + "monomerTemplate-G___Guanine": { "type": "monomerTemplate", - "id": "T___Thymine", + "id": "G___Guanine", "class": "Base", "classHELM": "RNA", - "fullName": "Thymine", - "alias": "T", - "naturalAnalogShort": "T", + "fullName": "Guanine", + "alias": "G", + "naturalAnalogShort": "G", "attachmentPoints": [ { - "attachmentAtom": 3, + "attachmentAtom": 6, "type": "left", "leavingGroup": { "atoms": [ - 8 + 11 ] } } @@ -379,80 +442,96 @@ { "label": "C", "location": [ - 1.861700, - 1.349900, + 1.035400, + 0.249800, 0.000000 ] }, { "label": "C", "location": [ - 1.111700, - 0.050900, + -0.079200, + -0.754000, 0.000000 ] }, { "label": "C", "location": [ - -0.388300, - 0.050900, + -1.505700, + -0.290600, 0.000000 ] }, { "label": "N", "location": [ - -1.138200, - 1.350000, + -1.817700, + 1.176600, 0.000000 ] }, { "label": "C", "location": [ - -0.388200, - 2.649000, + -0.703100, + 2.180400, 0.000000 ] }, { "label": "N", "location": [ - 1.111700, - 2.648900, + 0.723500, + 1.717000, 0.000000 ] }, { - "label": "O", + "label": "N", "location": [ - 3.061800, - 1.349900, + -2.387100, + -1.503400, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -1.505300, + -2.716800, + 0.000000 + ] + }, + { + "label": "N", + "location": [ + -0.078700, + -2.253200, 0.000000 ] }, { "label": "O", "location": [ - -0.988200, - 3.688200, + 2.176800, + -0.120900, 0.000000 ] }, { - "label": "H", + "label": "N", "location": [ - -2.338300, - 1.350000, + -0.952700, + 3.354200, 0.000000 ] }, { - "label": "C", + "label": "H", "location": [ - 1.711700, - -0.988400, + -3.587100, + -1.503400, 0.000000 ] } @@ -462,7 +541,7 @@ "type": 2, "atoms": [ 0, - 6 + 9 ] }, { @@ -479,6 +558,13 @@ 1 ] }, + { + "type": 1, + "atoms": [ + 8, + 1 + ] + }, { "type": 2, "atoms": [ @@ -486,6 +572,13 @@ 2 ] }, + { + "type": 1, + "atoms": [ + 6, + 2 + ] + }, { "type": 1, "atoms": [ @@ -494,7 +587,7 @@ ] }, { - "type": 1, + "type": 2, "atoms": [ 3, 4 @@ -503,29 +596,36 @@ { "type": 1, "atoms": [ - 3, - 8 + 4, + 5 ] }, { - "type": 2, + "type": 1, "atoms": [ 4, - 7 + 10 ] }, { "type": 1, "atoms": [ - 4, - 5 + 6, + 7 ] }, { "type": 1, "atoms": [ - 1, - 9 + 6, + 11 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 8 ] } ] @@ -760,21 +860,21 @@ } ] }, - "monomerTemplate-A___Adenine": { + "monomerTemplate-T___Thymine": { "type": "monomerTemplate", - "id": "A___Adenine", + "id": "T___Thymine", "class": "Base", "classHELM": "RNA", - "fullName": "Adenine", - "alias": "A", - "naturalAnalogShort": "A", + "fullName": "Thymine", + "alias": "T", + "naturalAnalogShort": "T", "attachmentPoints": [ { - "attachmentAtom": 6, + "attachmentAtom": 3, "type": "left", "leavingGroup": { "atoms": [ - 10 + 8 ] } } @@ -783,102 +883,94 @@ { "label": "C", "location": [ - 1.035400, - 0.249800, + 1.861700, + 1.349900, 0.000000 ] }, { "label": "C", "location": [ - -0.079200, - -0.754000, + 1.111700, + 0.050900, 0.000000 ] }, { "label": "C", "location": [ - -1.505700, - -0.290600, + -0.388300, + 0.050900, 0.000000 ] }, { "label": "N", "location": [ - -1.817700, - 1.176600, + -1.138200, + 1.350000, 0.000000 ] }, { "label": "C", "location": [ - -0.703100, - 2.180400, - 0.000000 - ] - }, - { - "label": "N", - "location": [ - 0.723500, - 1.717000, + -0.388200, + 2.649000, 0.000000 ] }, { "label": "N", "location": [ - -2.387100, - -1.503400, + 1.111700, + 2.648900, 0.000000 ] }, { - "label": "C", + "label": "O", "location": [ - -1.505300, - -2.716800, + 3.061800, + 1.349900, 0.000000 ] }, { - "label": "N", + "label": "O", "location": [ - -0.078700, - -2.253200, + -0.988200, + 3.688200, 0.000000 ] }, { - "label": "N", + "label": "H", "location": [ - 2.176800, - -0.120900, + -2.338300, + 1.350000, 0.000000 ] }, { - "label": "H", + "label": "C", "location": [ - -3.587100, - -1.503400, + 1.711700, + -0.988400, 0.000000 ] } ], "bonds": [ { - "type": 1, + "type": 2, "atoms": [ 0, - 9 + 6 ] }, { - "type": 2, + "type": 1, "atoms": [ 0, 5 @@ -892,38 +984,38 @@ ] }, { - "type": 1, + "type": 2, "atoms": [ - 8, - 1 + 1, + 2 ] }, { - "type": 2, + "type": 1, "atoms": [ - 1, - 2 + 2, + 3 ] }, { "type": 1, "atoms": [ - 6, - 2 + 3, + 4 ] }, { "type": 1, "atoms": [ - 2, - 3 + 3, + 8 ] }, { "type": 2, "atoms": [ - 3, - 4 + 4, + 7 ] }, { @@ -936,22 +1028,109 @@ { "type": 1, "atoms": [ - 6, - 7 + 1, + 9 ] + } + ] + }, + "monomerTemplate-P___Phosphate": { + "type": "monomerTemplate", + "id": "P___Phosphate", + "class": "Phosphate", + "classHELM": "RNA", + "fullName": "Phosphate", + "alias": "P", + "naturalAnalogShort": "P", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "type": "left", + "leavingGroup": { + "atoms": [ + 1 + ] + } }, + { + "attachmentAtom": 0, + "type": "right", + "leavingGroup": { + "atoms": [ + 3 + ] + } + } + ], + "atoms": [ + { + "label": "P", + "location": [ + -0.239900, + 0.000000, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + -1.439900, + 0.000000, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 0.359800, + -1.039400, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 0.960100, + 0.000000, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 0.359800, + 1.039400, + 0.000000 + ] + } + ], + "bonds": [ { "type": 1, "atoms": [ - 6, - 10 + 0, + 1 ] }, { "type": 2, "atoms": [ - 7, - 8 + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 4 ] } ] @@ -1115,104 +1294,17 @@ } ] }, - "monomerTemplate-P___Phosphate": { - "type": "monomerTemplate", - "id": "P___Phosphate", - "class": "Phosphate", - "classHELM": "RNA", - "fullName": "Phosphate", - "alias": "P", - "naturalAnalogShort": "P", - "attachmentPoints": [ - { - "attachmentAtom": 0, - "type": "left", - "leavingGroup": { - "atoms": [ - 1 - ] - } - }, - { - "attachmentAtom": 0, - "type": "right", - "leavingGroup": { - "atoms": [ - 3 - ] - } - } - ], - "atoms": [ - { - "label": "P", - "location": [ - -0.239900, - 0.000000, - 0.000000 - ] - }, - { - "label": "O", - "location": [ - -1.439900, - 0.000000, - 0.000000 - ] - }, - { - "label": "O", - "location": [ - 0.359800, - -1.039400, - 0.000000 - ] - }, - { - "label": "O", - "location": [ - 0.960100, - 0.000000, - 0.000000 - ] - }, - { - "label": "O", - "location": [ - 0.359800, - 1.039400, - 0.000000 - ] - } - ], - "bonds": [ - { - "type": 1, - "atoms": [ - 0, - 1 - ] - }, - { - "type": 2, - "atoms": [ - 0, - 2 - ] - }, + "ambiguousMonomerTemplate-R": { + "type": "ambiguousMonomerTemplate", + "subtype": "mixture", + "id": "R", + "alias": "R", + "options": [ { - "type": 1, - "atoms": [ - 0, - 3 - ] + "templateId": "A___Adenine" }, { - "type": 1, - "atoms": [ - 0, - 4 - ] + "templateId": "G___Guanine" } ] }, diff --git a/api/tests/integration/tests/formats/seq_to_ket.py b/api/tests/integration/tests/formats/seq_to_ket.py index 6f8c0120f2..b24c759b66 100644 --- a/api/tests/integration/tests/formats/seq_to_ket.py +++ b/api/tests/integration/tests/formats/seq_to_ket.py @@ -36,7 +36,7 @@ def find_diff(a, b): "ref": "spaces", }, {"seq_type": "PEPTIDE", "seq_data": "BJZX", "ref": "aminoacids_variants"}, - {"seq_type": "RNA", "seq_data": "KN", "ref": "rna_variants"}, + {"seq_type": "RNA", "seq_data": "RKN", "ref": "rna_variants"}, {"seq_type": "DNA", "seq_data": "BN", "ref": "dna_variants"}, ] diff --git a/core/indigo-core/molecule/sequence_loader.h b/core/indigo-core/molecule/sequence_loader.h index 1859f57048..7c06497980 100644 --- a/core/indigo-core/molecule/sequence_loader.h +++ b/core/indigo-core/molecule/sequence_loader.h @@ -130,6 +130,7 @@ namespace indigo int _col; MonomerTemplateLibrary& _library; std::map _alias_to_id; + std::map _var_alias_to_id; int _unknown_variants_count; std::map _opts_to_template_id; }; diff --git a/core/indigo-core/molecule/src/sequence_loader.cpp b/core/indigo-core/molecule/src/sequence_loader.cpp index e22eff7da7..196a67abdf 100644 --- a/core/indigo-core/molecule/src/sequence_loader.cpp +++ b/core/indigo-core/molecule/src/sequence_loader.cpp @@ -403,7 +403,7 @@ void SequenceLoader::addMonomer(KetDocument& document, const std::string& monome templ.setAttachmentPoints(aa_aps); else templ.setAttachmentPoints(base_aps); - _alias_to_id.emplace(monomer, monomer); + _var_alias_to_id.emplace(monomer, monomer); } std::string sugar_alias = seq_type == SeqType::RNASeq ? "R" : "dR"; @@ -439,9 +439,9 @@ void SequenceLoader::addAminoAcid(KetDocument& document, const std::string& mono { Vec3f pos(_col * MoleculeLayout::DEFAULT_BOND_LENGTH, -MoleculeLayout::DEFAULT_BOND_LENGTH * _row, 0); auto amino_idx = document.monomers().size(); - auto& amino_acid = variant ? document.addVariantMonomer(monomer, _alias_to_id.at(monomer)) : document.addMonomer(monomer, _alias_to_id.at(monomer)); + auto& amino_acid = variant ? document.addVariantMonomer(monomer, _var_alias_to_id.at(monomer)) : document.addMonomer(monomer, _alias_to_id.at(monomer)); if (variant) - amino_acid->setAttachmentPoints(document.variantTemplates().at(_alias_to_id.at(monomer)).attachmentPoints()); + amino_acid->setAttachmentPoints(document.variantTemplates().at(_var_alias_to_id.at(monomer)).attachmentPoints()); else amino_acid->setAttachmentPoints(document.templates().at(_alias_to_id.at(monomer)).attachmentPoints()); amino_acid->setIntProp("seqid", _seq_id); @@ -468,9 +468,9 @@ void SequenceLoader::addNucleotide(KetDocument& document, const std::string& bas { auto nuc_base_idx = document.monomers().size(); auto& base = - variant ? document.addVariantMonomer(base_alias, _alias_to_id.at(base_alias)) : document.addMonomer(base_alias, _alias_to_id.at(base_alias)); + variant ? document.addVariantMonomer(base_alias, _var_alias_to_id.at(base_alias)) : document.addMonomer(base_alias, _alias_to_id.at(base_alias)); if (variant) - base->setAttachmentPoints(document.variantTemplates().at(base_alias).attachmentPoints()); + base->setAttachmentPoints(document.variantTemplates().at(_var_alias_to_id.at(base_alias)).attachmentPoints()); else base->setAttachmentPoints(document.templates().at(_alias_to_id.at(base_alias)).attachmentPoints()); base->setIntProp("seqid", _seq_id); @@ -1123,7 +1123,7 @@ void SequenceLoader::loadIdt(KetDocument& document) auto& templ = document.addVariantMonomerTemplate("mixture", idt_alias, idt_alias, IdtAlias(), options); static const std::map aps{{"R1", -1}}; templ.setAttachmentPoints(aps); - _alias_to_id.emplace(idt_alias, idt_alias); + _var_alias_to_id.emplace(idt_alias, idt_alias); } else if (ratios.has_value()) throw Error("Variant monomer %s redefinion", idt_alias.c_str());