Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating jmzIdentML and adding Comet TSV parser #203

Merged
merged 4 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<groupId>de.mpc.pia</groupId>
<artifactId>pia</artifactId>
<version>1.4.10</version>
<version>1.5.0</version>
<name>PIA - Protein Inference Algorithms</name>
<url>https://github.com/mpc-bioinformatics/pia</url>

Expand Down Expand Up @@ -43,9 +43,9 @@
<junit.version>4.13.2</junit.version>
<commons-collections.version>4.4</commons-collections.version>
<commons-text.version>1.11.0</commons-text.version>
<jmzidentml.version>1.2.11</jmzidentml.version>
<jmzidentml.version>1.2.13</jmzidentml.version>
<jmztab.version>3.0.8</jmztab.version>
<pride-mod.version>2.1.8</pride-mod.version>
<pride-mod.version>2.1.12</pride-mod.version>
<pride-jaxb.version>1.0.22</pride-jaxb.version>
<xxindex.version>0.23</xxindex.version>
<mascotdatfile.version>3.6.1</mascotdatfile.version>
Expand Down Expand Up @@ -147,6 +147,12 @@
<artifactId>pride-mod</artifactId>
<version>${pride-mod.version}</version>
</dependency>

<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
<version>8.5.12</version>
</dependency>
<!-- End pride mod dependency -->

<!-- mzTab dependencies -->
Expand Down Expand Up @@ -327,6 +333,9 @@
<exclude>src/test/*.class</exclude>
</excludes>
<archive>
<manifestEntries>
<Add-Opens>java.base/sun.reflect.annotation</Add-Opens>
</manifestEntries>
<index>true</index>
<manifest>
<!-- Adds the classpath to the created manifest -->
Expand Down Expand Up @@ -384,7 +393,7 @@
<artifactId>maven-surefire-plugin</artifactId>
<version>3.2.2</version>
<configuration>
<argLine>${argLine} -Xmx2560m</argLine>
<argLine>${argLine} -Xmx2560m --add-opens java.base/sun.reflect.annotation=ALL-UNNAMED</argLine>
</configuration>
</plugin>
</plugins>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.apache.logging.log4j.Logger;

import de.mpc.pia.intermediate.compiler.PIACompiler;
import de.mpc.pia.intermediate.compiler.parser.searchengines.CometTSVFileParser;
import de.mpc.pia.intermediate.compiler.parser.searchengines.MascotDatFileParser;
import de.mpc.pia.intermediate.compiler.parser.searchengines.TandemFileParser;
import de.mpc.pia.intermediate.compiler.parser.searchengines.ThermoMSFFileParser;
Expand All @@ -20,6 +21,38 @@ public class InputFileParserFactory {
private static final Logger LOGGER = LogManager.getLogger();

public enum InputFileTypes {

/**
* the input file is a Comet TSV file
*/
COMET_TSV_INPUT {
@Override
public String getFileSuffix() {
return "txt";
}

@Override
public String getFileTypeName() {
return "Comet TSV";
}

@Override
public String getFileTypeShort() {
return "comet";
}

@Override
public boolean checkFileType(String fileName) {
return CometTSVFileParser.checkFileType(fileName);
}

@Override
public boolean parseFile(String name, String fileName,
PIACompiler compiler, String additionalInfoFileName) {
return CometTSVFileParser.getDataFromCometTSVFile(name, fileName, compiler);
}
},

/**
* the input file is a FASTA database file
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ private boolean parseFile(String name, String fileName) {

// get the AnalysisCollection:SpectrumIdentification for the SpectrumIdentificationLists
AnalysisCollection analysisCollection = unmarshaller.unmarshal(AnalysisCollection.class);

LOGGER.debug("scanning analysisCollection: " + analysisCollection
+ "\n\tgetSpectrumIdentification " + analysisCollection.getSpectrumIdentification()
+ "\n\tgetProteinDetection " + analysisCollection.getProteinDetection()
);

for (SpectrumIdentification si : analysisCollection.getSpectrumIdentification()) {
if (specIdLists.keySet().contains(si.getSpectrumIdentificationListRef())) {
// if the SpectrumIdentification's SpectrumIdentificationList is in the file, we need the SpectrumIdentification
Expand Down Expand Up @@ -165,8 +171,6 @@ private boolean parseFile(String name, String fileName) {
spectraDataRefs.put(ref, sd);
});

LOGGER.debug("Number of spectraData in inputs: " + inputs.getSpectraData().size());

// get the necessary inputs:SearchDBs
inputs.getSearchDatabase().stream()
.filter(searchDB -> neededSearchDatabases.contains(searchDB.getId()))
Expand All @@ -189,23 +193,37 @@ private boolean parseFile(String name, String fileName) {
// update the PIAFile's references for SpectraData, SearchDBs and AnalysisSoftwares
file.updateReferences(spectraDataRefs, searchDBRefs, analysisSoftwareRefs);

// get/hash the SequenceCollection:PeptideEvidences
SequenceCollection sc = unmarshaller.unmarshal(SequenceCollection.class);
peptideEvidences = new HashMap<>();
for (PeptideEvidence pepEvidence : sc.getPeptideEvidence()) {
peptideEvidences.put(pepEvidence.getId(), pepEvidence);
}

// get/hash the SequenceCollection:DBSequences
dbSequences = new HashMap<>();
for (DBSequence dbSeq : sc.getDBSequence()) {
dbSequences.put(dbSeq.getId(), dbSeq);

LOGGER.debug("added dbSequence: " + dbSeq.getId() + " -> " + dbSequences.get(dbSeq.getId()));
}

// get/hash the SequenceCollection:Peptides
peptides = new HashMap<>();
for (uk.ac.ebi.jmzidml.model.mzidml.Peptide peptide: sc.getPeptide()) {
peptides.put(peptide.getId(), peptide);

LOGGER.debug("added peptide: " + peptide.getId()
+ " -> " + peptides.get(peptide.getId())
+ "\n\tpeptideSequence " + peptide.getPeptideSequence()
);
}

// get/hash the SequenceCollection:PeptideEvidences
peptideEvidences = new HashMap<>();
for (PeptideEvidence pepEvidence : sc.getPeptideEvidence()) {
peptideEvidences.put(pepEvidence.getId(), pepEvidence);

LOGGER.debug("added pepEvidence: " + pepEvidence.getId()
+ " -> " + peptideEvidences.get(pepEvidence.getId())
+ "\n\tdbSequenceRef " + pepEvidence.getDBSequenceRef()
+ "\n\tdbSequence " + pepEvidence.getDBSequence()
);
}


Expand Down Expand Up @@ -667,7 +685,8 @@ private Peptide parseSIIPeptideEvidences(List<PeptideEvidenceRef> peptideEvidenc

DBSequence dbSeq = dbSequences.get(pepEvidence.getDBSequenceRef());
if (dbSeq == null) {
LOGGER.error("DBSequence " + pepEvidence.getDBSequenceRef() + " not found!");
LOGGER.error("DBSequence " + pepEvidence.getDBSequenceRef()
+ " for pepEvidence " + pepEvidence.getId() + " not found!");
return null;
}

Expand Down
Loading