Skip to content

Commit

Permalink
Merge pull request #51 from xomicsdatascience/all_quant
Browse files Browse the repository at this point in the history
extended peptide quantification to all peptides
  • Loading branch information
AlexandreHutton authored Oct 7, 2022
2 parents b038609 + 1ed8ffa commit d81072b
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 11 deletions.
2 changes: 1 addition & 1 deletion csodiaq/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '1.1.7'
__version__ = '1.2.0'

from csodiaq import csodiaq_gui, csodiaq, csodiaq_identification_functions, csodiaq_mgf_cleaning_functions, \
csodiaq_quantification_functions, idpicker, IdentificationSpectraMatcher, spectra_matcher_functions
Expand Down
12 changes: 6 additions & 6 deletions csodiaq/csodiaq.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@ def main():
# Post-processing
# Check whether we should do anything
if args['commonpeptide']:
print('Extracting peptides common across output files...', flush=True)
peptide_quantification.get_peptide_quantities(file_list=args['files'],
library_file=args['library'],
csodiaq_output_dir=args['outDirectory'],
num_library_fragments=args['peaks'],
save_file=os.path.join(args['outDirectory'], 'common_peptides.csv'))
print(f'Performing protein quantification and identifying common peptpides...', flush=True)
peptide_quantification.get_all_peptide_quantities(file_list=args['files'],
library_file=args['library'],
csodiaq_output_dir=args['outDirectory'],
num_library_fragments=args['peaks'],
save_file=os.path.join(args['outDirectory'], 'common_peptides.csv'))
print('Done.')
if args['commonprotein']:
print('Extracting proteins common across output files...', flush=True)
Expand Down
85 changes: 82 additions & 3 deletions csodiaq/peptide_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,92 @@
experimental_query_intensity_name = 'intensity array' # intensity name for the input to CsoDIAq
output_peptide_format = 'peptide_quantity_{x}' # format for output column


def get_all_peptide_quantities(file_list: list,
library_file: str,
csodiaq_output_dir: str,
num_library_fragments: int = 10,
save_file: str = None):
"""
Quantifies the peptides identified by CsoDIAq and updates the ionCount column in the _peptideFDR files. Optionally
extracts the set of proteins that are common across all input files.
files.
Parameters
----------
file_list : list
List of .mzxml files containing the raw MS2 data.
library_file : str
Path to the library file to use.
csodiaq_output_dir : str
Path to the output directory of CsoDIAq.
num_library_fragments : int
Number of fragments to use for quantification. Fragments are ordered by their intensity in the library.
save_file : str
Optional. If defined, saves the common peptides to the specified path.
Returns
-------
None
"""
if library_file.endswith('.csv'):
sep = ','
elif library_file.endswith('.tsv'):
sep = '\t'
else:
raise ValueError('library must be either .csv or .tsv')
for file_idx, exp_file in enumerate(file_list):
# Get "common"
common_intensity = get_peptide_quantities([exp_file],
library_file=library_file,
csodiaq_output_dir=csodiaq_output_dir,
num_library_fragments=num_library_fragments)
common_intensity.index.name = 'peptide'
common_intensity.drop(['mean','std'], axis=1, inplace=True)
common_intensity.reset_index(inplace=True)
common_intensity.set_index(['peptide','PrecursorMz'], inplace=True)
# Load csodiaq file
csod = gather_matching_fdr_files(csodiaq_output_dir,
[exp_file],
matcher_function=_is_peptidefdr_match)[0]
csod_data = pd.read_csv(csod, index_col=['peptide', 'MzLIB'])

csod_data.to_csv('second.csv')
merged = pd.merge(common_intensity, csod_data, how='inner', left_index=True, right_index=True)
merged['ionCount'] = merged[os.path.basename(exp_file)]
merged.drop(os.path.basename(exp_file), axis=1, inplace=True)
merged.reset_index(inplace=True)
merged.set_index('fileName', inplace=True)
merged.drop('PrecursorMz', inplace=True, axis=1)
# Overwrite csodiaq file
merged.to_csv(csod)

# Load newly-overwritten files
common_peptides = extract_common_entries(csodiaq_output_dir=csodiaq_output_dir,
input_file_list=file_list,
common_col=('peptide', 'MzLIB'),
load_columns=(csodiaq_mz_lib_name, 'peptide', 'ionCount'),
fdr_matcher=_is_peptidefdr_match,
normalize=False)
# Combine them; rename columns to files that were quantified
col_names = common_peptides.columns
new_names = format_filenames(file_list)
renamer = {}
for col, new in zip(col_names, new_names):
renamer[col] = new
common_peptides.rename(columns=renamer, inplace=True)
common_peptides.to_csv(save_file)
return




def get_peptide_quantities(file_list: list,
library_file: str,
csodiaq_output_dir: str,
num_library_fragments: int = 10,
save_file: str = None):
save_file: str = None) -> pd.DataFrame:
"""
Calculates the quantities of peptides that returned by CsoDIAq.
Calculates the quantities of peptides that returned by CsoDIAq and common across all files in the input list..
Parameters
----------
file_list : list
Expand All @@ -53,7 +132,7 @@ def get_peptide_quantities(file_list: list,
common_col=['peptide', 'MzLIB'],
load_columns=[csodiaq_query_scan_name,
csodiaq_mz_lib_name, 'peptide', 'ionCount'],
fdr_matcher=_is_proteinfdr_match,
fdr_matcher=_is_peptidefdr_match,
normalize=False)
if common_dataframe.shape[0] == 0:
return
Expand Down
4 changes: 3 additions & 1 deletion csodiaq/plotting/spectra.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ def spectrum_lineplot(spectrum: Spectrum,
if not positive:
intensity = [-i for i in intensity]
plt.vlines(spectrum.mz, [0]*len(intensity), intensity, colors=color, label=label)
plt.hlines(0, min(spectrum.mz), max(spectrum.mz), colors='black')
x_min, x_max = plt.xlim()
plt.hlines(0, x_min, x_max, colors='black')
plt.xlim(x_min, x_max)
return


Expand Down

0 comments on commit d81072b

Please sign in to comment.