Skip to content

Commit

Permalink
#2015 Import/Export of variant monomers from Fasta/Sequence
Browse files Browse the repository at this point in the history
Add variant monomers support for Sequence/Fasta. Add UT
  • Loading branch information
AliaksandrDziarkach committed Aug 14, 2024
1 parent 179c0bc commit a6d4c91
Show file tree
Hide file tree
Showing 39 changed files with 23,451 additions and 12,739 deletions.
20 changes: 7 additions & 13 deletions api/c/indigo/src/indigo_molecule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -568,11 +568,10 @@ CEXPORT int indigoLoadSequence(int source, const char* seq_type, int library)
IndigoObject& lib_obj = self.getObject(library);
SequenceLoader loader(IndigoScanner::get(obj), IndigoMonomerLibrary::get(lib_obj));

std::unique_ptr<IndigoMolecule> molptr = std::make_unique<IndigoMolecule>();
std::unique_ptr<IndigoKetDocument> docptr = std::make_unique<IndigoKetDocument>();

Molecule& mol = molptr->mol;
loader.loadSequence(mol, seq_type);
return self.addObject(molptr.release());
loader.loadSequence(docptr->get(), seq_type);
return self.addObject(docptr.release());
}
INDIGO_END(-1);
}
Expand Down Expand Up @@ -619,11 +618,10 @@ CEXPORT int indigoLoadFasta(int source, const char* seq_type, int library)
IndigoObject& lib_obj = self.getObject(library);
SequenceLoader loader(IndigoScanner::get(obj), IndigoMonomerLibrary::get(lib_obj));

std::unique_ptr<IndigoMolecule> molptr = std::make_unique<IndigoMolecule>();
std::unique_ptr<IndigoKetDocument> docptr = std::make_unique<IndigoKetDocument>();

Molecule& mol = molptr->mol;
loader.loadFasta(mol, seq_type);
return self.addObject(molptr.release());
loader.loadFasta(docptr->get(), seq_type);
return self.addObject(docptr.release());
}
INDIGO_END(-1);
}
Expand Down Expand Up @@ -671,11 +669,8 @@ CEXPORT int indigoLoadIdt(int source, int library)
MonomerTemplateLibrary& lib = IndigoMonomerLibrary::get(lib_obj);
SequenceLoader loader(IndigoScanner::get(obj), lib);

// std::unique_ptr<IndigoMolecule> molptr = std::make_unique<IndigoMolecule>();
std::unique_ptr<IndigoKetDocument> docptr = std::make_unique<IndigoKetDocument>();

// Molecule& mol = molptr->mol;
// loader.loadIdt(mol);
loader.loadIdt(docptr->get());
return self.addObject(docptr.release());
}
Expand Down Expand Up @@ -3067,7 +3062,6 @@ CEXPORT int indigoAddDataSGroup(int molecule, int natoms, int* atoms, int nbonds
BaseMolecule& mol = self.getObject(molecule).getBaseMolecule();
int idx = mol.sgroups.addSGroup(SGroup::SG_TYPE_DAT);
DataSGroup& dsg = (DataSGroup&)mol.sgroups.getSGroup(idx);
int i;
if (atoms != nullptr)
dsg.atoms.concat(atoms, natoms);

Expand Down Expand Up @@ -4780,7 +4774,7 @@ CEXPORT int indigoNameToStructure(const char* name, const char* params)
Duplicate params string as we call destructive function strtok() on callee side
We can get rid of it if we have sustainable options in the future
*/
char* options = ::strdup(params);
char* options = ::_strdup(params);
if (options)
{
parser.setOptions(options);
Expand Down
18 changes: 17 additions & 1 deletion api/c/indigo/src/indigo_savers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ CEXPORT int indigoSaveSequence(int item, int output, int library)
{
IndigoObject& obj = self.getObject(item);
Output& out = IndigoOutput::get(self.getObject(output));
if (IndigoBaseMolecule::is(obj) || IndigoKetDocument::is(obj))
if (IndigoBaseMolecule::is(obj))
{
IndigoObject& lib_obj = self.getObject(library);
SequenceSaver saver(out, IndigoMonomerLibrary::get(lib_obj));
Expand All @@ -600,6 +600,14 @@ CEXPORT int indigoSaveSequence(int item, int output, int library)
out.flush();
return 1;
}
else if (IndigoKetDocument::is(obj))
{
IndigoObject& lib_obj = self.getObject(library);
SequenceSaver saver(out, IndigoMonomerLibrary::get(lib_obj));
saver.saveKetDocument(static_cast<IndigoKetDocument&>(obj).get());
out.flush();
return 1;
}
throw IndigoError("indigoSaveSequence(): expected molecule, got %s", obj.debugInfo());
}
INDIGO_END(-1);
Expand All @@ -620,6 +628,14 @@ CEXPORT int indigoSaveFasta(int item, int output, int library)
out.flush();
return 1;
}
else if (IndigoKetDocument::is(obj))
{
IndigoObject& lib_obj = self.getObject(library);
SequenceSaver saver(out, IndigoMonomerLibrary::get(lib_obj));
saver.saveKetDocument(static_cast<IndigoKetDocument&>(obj).get(), SequenceSaver::SeqFormat::FASTA);
out.flush();
return 1;
}
throw IndigoError("indigoSaveFasta(): expected molecule, got %s", obj.debugInfo());
}
INDIGO_END(-1);
Expand Down
3 changes: 3 additions & 0 deletions api/tests/integration/ref/formats/fasta_to_ket.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ break_peptide.ket:SUCCEED
break_rna.fasta:SEQUENCE loader: Invalid symbols in the sequence: *
comment.ket:SUCCEED
test_1881.ket:SUCCEED
aminoacids_variants.ket:SUCCEED
rna_variants.ket:SUCCEED
dna_variants.ket:SUCCEED
2 changes: 1 addition & 1 deletion api/tests/integration/ref/formats/idt_unresolved.py.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
*** IDT unresolved to misc. unsupported formats ***
Sequence saver: i2AmPr cannot be written in sequence/FASTA format.
Sequence saver: Can't save chem 'i2AmPr' to sequence format
molecule CDXML saver: i2AmPr cannot be written in CDXML/CDX format.
molfile saver: i2AmPr cannot be written in MDL Molfile format.
SMILES saver: i2AmPr cannot be written in SMILES/SMARTS format.
Expand Down
3 changes: 3 additions & 0 deletions api/tests/integration/ref/formats/seq_to_ket.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ all_aminoacids.ket:SUCCEED
rna_acgtu.ket:SUCCEED
dna_acgtu.ket:SUCCEED
spaces.ket:SUCCEED
aminoacids_variants.ket:SUCCEED
rna_variants.ket:SUCCEED
dna_variants.ket:SUCCEED
5 changes: 3 additions & 2 deletions api/tests/integration/tests/formats/fasta_to_fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ def find_diff(a, b):
{"file": "multiseq", "seq_type": "DNA"},
]

# empty library - internal used for now
lib = indigo.loadMonomerLibrary('{"root":{}}')
lib = indigo.loadMonomerLibraryFromFile(
os.path.join(ref_path, "monomer_library.ket")
)

for desc in fasta_files:
filename = desc["file"]
Expand Down
8 changes: 6 additions & 2 deletions api/tests/integration/tests/formats/fasta_to_ket.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ def find_diff(a, b):
{"file": "break_rna", "seq_type": "RNA"},
{"file": "comment", "seq_type": "PEPTIDE"},
{"file": "test_1881", "seq_type": "PEPTIDE"},
{"file": "aminoacids_variants", "seq_type": "PEPTIDE"},
{"file": "rna_variants", "seq_type": "RNA"},
{"file": "dna_variants", "seq_type": "DNA"},
]


Expand All @@ -41,8 +44,9 @@ def remove_prefix(s, prefix="com.epam.indigo.IndigoException: "):
return s


# empty library - internal used for now
lib = indigo.loadMonomerLibrary('{"root":{}}')
lib = indigo.loadMonomerLibraryFromFile(
os.path.join(ref_path, "monomer_library.ket")
)

for desc in fasta_files:
filename = desc["file"]
Expand Down
5 changes: 3 additions & 2 deletions api/tests/integration/tests/formats/genbank_to_seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ def find_diff(a, b):
{"file": "1844-gen_pept", "seq_type": "PEPTIDE"},
]

# empty library - internal used for now
lib = indigo.loadMonomerLibrary('{"root":{}}')
lib = indigo.loadMonomerLibraryFromFile(
os.path.join(ref_path, "monomer_library.ket")
)

for infile in files:
filename = infile["file"] + ".seq"
Expand Down
5 changes: 3 additions & 2 deletions api/tests/integration/tests/formats/ket_to_fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ def find_diff(a, b):
"1950-mixed-seq",
]

# empty library - internal used for now
lib = indigo.loadMonomerLibrary('{"root":{}}')
lib = indigo.loadMonomerLibraryFromFile(
os.path.join(ref_path, "monomer_library.ket")
)

files.sort()
for filename in files:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>Sequence1
BJZX
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>Sequence1
BN
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>Sequence1
KN
Loading

0 comments on commit a6d4c91

Please sign in to comment.