Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add mz range attributes #58

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions spec2vec/SpectrumDocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ class SpectrumDocument(Document):
[100. 150. 200.51]
substance1
"""
def __init__(self, spectrum, n_decimals: int = 2):
def __init__(self, spectrum, n_decimals: int = 2,
mz_from: float = 0.0, mz_to: float = 1000.0):
"""

Parameters
Expand All @@ -49,16 +50,25 @@ def __init__(self, spectrum, n_decimals: int = 2):
Peak positions are converted to strings with n_decimal decimals.
The default is 2, which would convert a peak at 100.387 into the
word "peak@100.39".
mz_from:
Set lower threshold for m/z values to take into account.
Default is 0.0.
mz_to:
Set upper threshold for m/z values to take into account.
Default is 1000.0.
"""
self.n_decimals = n_decimals
self.mz_from = mz_from
self.mz_to = mz_to
self.weights = None
super().__init__(obj=spectrum)
self._add_weights()

def _make_words(self):
"""Create word from peaks (and losses)."""
mz_array_selected = self._obj.peaks.mz[(self._obj.peaks.mz >= self.mz_from) & (self._obj.peaks.mz <= self.mz_to)]
format_string = "{}@{:." + "{}".format(self.n_decimals) + "f}"
peak_words = [format_string.format("peak", mz) for mz in self._obj.peaks.mz]
peak_words = [format_string.format("peak", mz) for mz in mz_array_selected]
if self._obj.losses is not None:
loss_words = [format_string.format("loss", mz) for mz in self._obj.losses.mz]
else:
Expand Down
16 changes: 16 additions & 0 deletions tests/test_spectrum_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,22 @@ def test_spectrum_document_init_default_with_losses():
assert next(spectrum_document) == "peak@10.00"


def test_spectrum_document_init_default_peaks_outside_mz_range():
"""Use default n_decimal and test if peaks outside mz_range are excluded."""
mz = numpy.array([310, 320, 330, 540], dtype="float")
intensities = numpy.array([1, 0.01, 0.1, 1], dtype="float")
metadata = dict(precursor_mz=100.0)
spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata)
spectrum_document = SpectrumDocument(spectrum_in, mz_to=500.0)

assert spectrum_document.n_decimals == 2, "Expected different default for n_decimals"
assert len(spectrum_document) == 3
assert spectrum_document.words == [
"peak@310.00", "peak@320.00", "peak@330.00"
]
assert next(spectrum_document) == "peak@310.00"


def test_spectrum_document_init_n_decimals_1():
"""Use n_decimal=1 and add losses."""
mz = numpy.array([10, 20, 30, 40], dtype="float")
Expand Down