From a436379193bb5bfd8804f63a276bf9bdcc603a11 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Mon, 5 Oct 2020 21:06:34 -0400 Subject: [PATCH 1/3] Implement USI parser and minimal PROXI client --- pyteomics/usi.py | 239 ++++++++++++++++++++++++++++++++++++++++++++++ tests/data.py | 206 +++++++++++++++++++++++++++++++++++++++ tests/test_usi.py | 41 ++++++++ 3 files changed, 486 insertions(+) create mode 100644 pyteomics/usi.py create mode 100644 tests/test_usi.py diff --git a/pyteomics/usi.py b/pyteomics/usi.py new file mode 100644 index 00000000..b11e7ce0 --- /dev/null +++ b/pyteomics/usi.py @@ -0,0 +1,239 @@ +""" +usi - Universal Spectrum Identifier (USI) parser and minimal PROXI client +========================================================================= + +Summary +------- +`USI ` is a standardized method of referencing a specific +spectrum in a dataset, possibly attached to an interpretation. This module includes a +:class:`USI` type which can represent these constructs, :meth:`~USI.parse` them and +reconstruct them. + +One use-case for USI is to request spectrum information from a `PROXI ` +service host. PROXI services are available from several of the major national proteomics data hosts, +including MassIVE, PeptideAtlas, PRIDE, and jPOST. +""" +import json +from collections import namedtuple + +try: + from urllib2 import Request, urlopen +except ImportError: + from urllib.request import Request, urlopen + +try: + import numpy as np + + def coerce_array(array_data): + return np.array([float(v) for v in array_data]) + +except ImportError: + + def coerce_array(array_data): + return [float(v) for v in array_data] + + +class USI(namedtuple("USI", ['protocol', 'dataset', 'datafile', 'scan_identifier_type', 'scan_identifier', 'interpretation'])): + '''Represent a Universal Spectrum Identifier (USI). + + .. note:: + This implementation will capture the interpretation component but will not interpret it at this time. + + Attributes + ---------- + protocol: str + The protocol to use to access the data (usually mzspec) + dataset: str + The name or accession number for the dataset the spectrum residues in + datafile: str + The basename of the data file from :attr:`dataset` to retrieve the spectrum from + scan_identifier_type: str + The format of the scan identifier, one of (scan, index, nativeId, trace) + scan_identifier: str + A usually numerical but potentially comma separated value encoded as a string to uniquely + identify the spectrum to be recovered from :attr:`datafile` in :attr:`dataset`. + interpretation: str + The trailing material of the USI, such as the ProForma peptide sequence and charge + ''' + def __str__(self): + return ':'.join(filter(lambda x: x is not None, self)) + + @classmethod + def parse(cls, usi): + return cls(*_usi_parser(usi)) + +def _usi_parser(usi): + tokens = usi.split(":", 5) + protocol = tokens[0] + dataset = tokens[1] + datafile = tokens[2] + scan_identifier_type = tokens[3] + scan_identifier = tokens[4] + try: + interpretation = tokens[5] + except IndexError: + interpretation = None + return (protocol, dataset, datafile, scan_identifier_type, scan_identifier, interpretation) + + + +class _PROXIBackend(object): + '''A base class for all PROXI backends to implement the gory details of HTTP requests + and protocol parsing. + + If special processing needs to be done to interpret the spectrum returned from the service + provider, override the :meth:`_coerce` method. + + If extra information needs to be provided to the service provider for them to fulfill the + request not passed through the URL, override the :meth:`_request` method. + + Attributes + ---------- + name: str + The name of the backend service + url_template: str + The URL with {} fields to populate with the USI and any other relevant options, like protocol version + or the like. + options: dict + Additional options to be used when preparing the request URL + ''' + def __init__(self, name, url_template, **kwargs): + kwargs.setdefault('version', '0.1') + self.name = name + self.url_template = url_template + self.options = kwargs + + def __repr__(self): + return "{self.__class__.__name__}({self.options})".format(self=self) + + def _request(self, usi): + url = self.url_template.format(usi=usi, **self.options) + req = Request(url) + response = urlopen(req) + if response.getcode() != 200: + raise ValueError("PROXI Service Response Code %r" % (response.getcode())) + data = response.read().decode("utf-8") + data = json.loads(data) + return data + + def get(self, usi): + '''Retrieve a ``USI`` from the host PROXI service over the network. + + Parameters + ---------- + usi : str or :class:`USI` + The universal spectrum identifier to retrieve. + + Returns + ------- + dict: + The spectrum as represented by the requested PROXI host. + ''' + data = self._request(usi) + result = self._coerce(data) + return result + + def _coerce(self, data): + '''Override and extend this method to change how the spectrum information is refined. + + This implementation just deals with properly formatting the peak arrays and doing minor + cosmetic name normalization. + + Parameters + ---------- + data: dict + The raw mzSpecML representation parsed from JSON + + Returns + ------- + dict: + The coerced spectrum data of appropriate types + ''' + if isinstance(data, list): + data_collection = data + data = data_collection[0] + result = {} + result['attributes'] = data.pop('attributes', {}) + result['m/z array'] = coerce_array(data.pop('mzs', [])) + result['intensity array'] = coerce_array(data.pop('intensities', [])) + for key, value in data.items(): + if key in result: + raise ValueError( + "Attempting to set explicit value for {key!r}".format(key=key)) + result[key] = value + return result + + def __call__(self, usi): + return self.get(usi) + + +class PeptideAtlasBackend(_PROXIBackend): + _url_template = "http://www.peptideatlas.org/api/proxi/v{version}/spectra?resultType=full&usi={usi!s}" + + def __init__(self, **kwargs): + + super(PeptideAtlasBackend, self).__init__( + 'PeptideAtlas', self._url_template, **kwargs) + + +class MassIVEBackend(_PROXIBackend): + + _url_template = "http://massive.ucsd.edu/ProteoSAFe/proxi/v{version}/spectra?resultType=full&usi={usi}" + + def __init__(self, **kwargs): + super(MassIVEBackend, self).__init__( + 'MassIVE', self._url_template, **kwargs) + + +class PRIDEBackend(_PROXIBackend): + _url_template = "http://wwwdev.ebi.ac.uk/pride/proxi/archive/v{version}/spectra?resultType=full&usi={usi}" + + def __init__(self, **kwargs): + super(PRIDEBackend, self).__init__( + 'PRIDE', self._url_template, **kwargs) + + +class JPOSTBackend(_PROXIBackend): + _url_template = 'https://repository.jpostdb.org/spectrum/?USI={usi}' + + def __init__(self, **kwargs): + super(JPOSTBackend, self).__init__('jPOST', self._url_template, **kwargs) + kwargs.pop("version", None) + + + +_proxies = { + "peptide_atlas": PeptideAtlasBackend, + "massive": MassIVEBackend, + "pride": PRIDEBackend, + "jpost": JPOSTBackend, +} + +def proxi(usi, backend='peptide_atlas', **kwargs): + '''Retrieve a ``USI`` from a `PROXI `. + + Parameters + ---------- + usi : str or :class:`USI` + The universal spectrum identifier to request. + backend : str or :class:`Callable` + Either the name of a PROXI host (peptide_atlas, massive, pride, or jpost), or a + callable object (which :class:`_PROXIBackend` instances are) which will be used + to resolve the USI. + **kwargs: + extra arguments passed when constructing the backend by name. + + Returns + ------- + dict : + The spectrum as represented by the requested PROXI host. + ''' + if isinstance(backend, str): + backend = _proxies[backend](**kwargs) + elif issubclass(backend, _PROXIBackend): + backend = backend(**kwargs) + elif callable(backend): + backend = backend + else: + raise TypeError("Unrecognized backend type") + return backend(usi) diff --git a/tests/data.py b/tests/data.py index 956b2376..9e384890 100644 --- a/tests/data.py +++ b/tests/data.py @@ -2493,3 +2493,209 @@ def decode_dict(d, encoding='utf-8'): 'score': 0.0, 'sequence': '', 'OMSSA_score': 0.0}]}]}]} + + +usi_proxi_data = {'attributes': [{'accession': 'MS:1008025', + 'name': 'scan number', + 'value': '131256'}, + {'accession': 'MS:1000827', + 'name': 'isolation window target m/z', + 'value': '1046.4921'}, + {'accession': 'MS:1000041', 'name': 'charge state', 'value': '2'}, + {'accession': 'MS:1003061', + 'name': 'spectrum name', + 'value': 'DLPQGFSALEPLVDLPIGIN[HexNac]ITR/2'}, + {'accession': 'MS:1000888', + 'name': 'unmodified peptide sequence', + 'value': 'DLPQGFSALEPLVDLPIGINITR'}], + 'm/z array': ([120.0807, 121.0838, 122.0601, 122.1212, 124.0394, 125.0707, + 126.0549, 127.0389, 127.0582, 128.0422, 129.0658, 130.0976, + 131.0815, 135.8235, 138.0549, 138.1278, 139.0524, 139.0584, + 139.0868, 140.0595, 140.0704, 141.0658, 141.1022, 143.1179, + 144.0655, 144.0759, 145.0495, 145.0687, 146.0529, 150.0551, + 155.081, 155.118, 156.0768, 156.102, 157.061, 157.1083, + 157.1336, 158.0924, 159.0765, 162.2362, 163.0602, 164.0636, + 166.0863, 166.1227, 167.0816, 168.0657, 169.0611, 169.0691, + 169.0967, 170.093, 171.113, 173.0446, 173.4353, 175.1191, + 176.1224, 177.1024, 181.0972, 183.1131, 183.1493, 184.0968, + 184.1529, 185.0927, 185.1032, 185.1285, 185.1566, 185.165, + 186.0762, 186.0872, 187.0716, 187.0797, 193.1338, 196.0713, + 197.1282, 198.1238, 199.1084, 200.1394, 201.1236, 202.1273, + 203.1032, 203.1141, 204.0869, 205.0901, 205.0984, 206.0911, + 207.1129, 208.1082, 209.0924, 211.1443, 212.1479, 213.16, + 215.1033, 215.1395, 217.0974, 220.1089, 226.1189, 227.1027, + 227.1223, 227.1397, 228.1346, 228.1707, 229.1185, 232.1404, + 233.1294, 235.1077, 238.119, 239.1398, 239.1754, 240.1351, + 241.13, 243.1131, 243.1341, 243.1461, 248.1039, 250.1183, + 251.2113, 254.1502, 255.1459, 259.1405, 260.1425, 261.1241, + 265.1298, 266.1138, 266.1861, 268.166, 269.1694, 272.1609, + 273.1615, 274.1191, 275.1212, 276.1671, 277.1699, 277.6447, + 278.1138, 280.1663, 282.1813, 282.2179, 283.1406, 284.1439, + 284.199, 285.1564, 286.4622, 287.1509, 288.1349, 289.1381, + 292.1297, 294.1458, 294.1817, 295.1401, 295.1841, 297.1819, + 300.1359, 300.1671, 301.1522, 301.17, 303.1704, 305.161, + 306.1455, 307.1406, 308.1611, 309.1446, 310.2132, 311.172, + 311.2132, 312.1562, 313.2141, 314.1719, 315.1458, 316.1298, + 317.1618, 318.108, 320.1246, 320.1602, 321.1924, 322.1778, + 323.1606, 325.1882, 326.1718, 328.1875, 329.1842, 333.1562, + 335.1355, 335.172, 337.1514, 337.1875, 338.1915, 339.2031, + 340.1872, 341.1453, 341.183, 341.2189, 343.1986, 345.1566, + 346.159, 349.1885, 351.165, 351.2035, 352.162, 353.219, + 353.2553, 354.2156, 354.2592, 355.1988, 358.1399, 358.1622, + 360.1917, 361.1499, 363.167, 365.1835, 366.1403, 366.1783, + 368.1945, 369.1777, 370.1731, 370.2449, 371.2398, 372.2247, + 373.1509, 374.2083, 375.1666, 379.1997, 379.2347, 381.1396, + 381.2504, 382.2097, 382.2528, 383.1935, 384.1667, 385.1517, + 385.1878, 386.1553, 386.1907, 389.2517, 390.2537, 391.2, + 393.2504, 394.173, 394.2514, 395.1725, 396.2228, 397.2444, + 398.1682, 398.2409, 399.1696, 399.236, 401.2041, 402.1808, + 402.2143, 403.162, 403.2175, 404.22, 406.1737, 407.2308, + 407.2641, 408.2263, 411.2611, 413.1825, 413.2116, 420.1881, + 421.2455, 423.1993, 424.2025, 424.2552, 425.2404, 426.2432, + 428.1939, 430.175, 430.2094, 430.2425, 431.2124, 436.2216, + 439.2569, 447.2365, 448.2212, 448.2558, 450.2716, 453.2814, + 454.2064, 455.2062, 456.1888, 456.2213, 457.1917, 458.2393, + 460.1848, 465.2458, 466.3383, 468.2816, 469.2417, 471.2361, + 472.2187, 473.2144, 474.2006, 475.2007, 476.2134, 476.2513, + 478.2671, 479.2711, 481.2779, 482.2049, 482.2645, 483.2065, + 483.2565, 486.2681, 489.2454, 490.2477, 493.2432, 493.3098, + 494.3347, 495.2936, 495.3336, 496.2788, 497.2792, 498.275, + 499.2307, 499.2885, 500.2167, 503.2943, 510.3297, 511.2516, + 511.3279, 512.2547, 513.2472, 514.2614, 514.6443, 515.299, + 517.2418, 518.2451, 519.2954, 522.2933, 523.2952, 525.2454, + 527.223, 534.2673, 536.2747, 537.2743, 538.326, 539.3273, + 541.2422, 542.2728, 543.2557, 545.2374, 546.2383, 553.2421, + 554.241, 554.2872, 560.2838, 561.2878, 569.3062, 570.2684, + 570.3147, 571.2578, 578.3295, 580.3793, 585.3024, 586.3311, + 587.284, 588.2797, 589.283, 591.352, 592.3513, 595.2974, + 595.348, 605.2987, 608.3782, 609.3743, 612.3184, 613.2952, + 630.3265, 631.3271, 632.27, 632.3791, 635.379, 639.3464, + 640.3002, 651.3082, 655.3589, 656.3406, 656.3829, 658.3204, + 659.325, 666.3256, 673.3701, 674.3734, 675.3763, 680.4368, + 683.3511, 684.3432, 689.3477, 691.4151, 692.4018, 693.3339, + 698.3795, 699.4156, 701.3635, 702.3649, 703.3084, 706.3743, + 707.3776, 708.4289, 709.4282, 721.3949, 727.4354, 730.4598, + 745.351, 756.4364, 757.4412, 769.4483, 770.3829, 785.3878, + 786.4808, 788.3944, 795.3701, 798.3801, 802.4148, 805.4865, + 806.4776, 812.3951, 813.3889, 814.3867, 816.3902, 817.3939, + 819.4563, 820.4633, 830.4058, 831.4091, 848.5067, 849.4831, + 850.4828, 858.4691, 866.512, 867.5145, 869.5193, 876.4803, + 877.4835, 878.4819, 883.5378, 884.5388, 892.4265, 927.4429, + 929.475, 945.4327, 948.5568, 949.5511, 966.5744, 967.5771, + 968.5767, 971.5583, 979.5936, 980.5997, 989.5649, 990.554, + 991.5705, 996.6207, 997.622, 998.6257, 1022.5349, 1023.5327, + 1040.5256, 1041.5275, 1042.5845, 1050.595, 1051.589, 1058.5154, + 1059.5378, 1068.6058, 1069.6024, 1070.6089, 1079.5543, 1079.66, + 1080.6621, 1081.6632, 1086.6161, 1087.6189, 1088.6221, 1095.6342, + 1111.644, 1112.6522, 1181.6898, 1182.6868, 1192.6384, 1193.6614, + 1194.687, 1195.6874, 1199.7003, 1200.7036, 1201.7045, 1271.6893, + 1289.6963, 1290.6954, 1291.6979, 1293.7466, 1296.7097, 1297.7087, + 1298.6992, 1314.7263, 1315.7263, 1316.7278, 1402.7793, 1403.8024, + 1404.8221, 1413.7937, 1414.7994, 1420.8528, 1421.8392, 1451.7468, + 1503.89, 1504.8904, 1505.8995, 1528.8531, 1532.8982, 1605.9242, + 1606.922, 1623.9305, 1624.9381, 1625.9446, 1626.9426, 1735.6211, + 1752.9758, 1753.9739, 1898.9847]), + 'intensity array': ([41966.6758, 2547.6956, 3291.5342, 2838.4585, 4198.6621, + 2980.3152, 255955.7031, 259554.2812, 15788.3789, 15573.1006, + 4178.9922, 5410.9072, 5616.8442, 2474.366, 954771.875, + 10152.6621, 5554.1558, 63132.4688, 6978.6929, 3852.3772, + 6102.2876, 6130.3369, 7675.2935, 14993.0332, 108239.8047, + 6811.1016, 199574.7812, 4911.7881, 13389.499, 6146.4014, + 2646.5579, 3048.3428, 2869.1113, 5208.4102, 5745.9106, + 4367.8789, 12342.4629, 23719.2148, 12862.9375, 2557.7485, + 198537.0938, 13784.9414, 3543.4077, 4131.563, 31193.0723, + 224910.25, 8057.98, 14856.0166, 2870.9648, 4401.5791, + 9193.2881, 3348.6216, 14712.9502, 87049.7266, 7469.748, + 15210.1143, 9361.8613, 13005.0381, 233007.3594, 6379.459, + 17465.3633, 4546.292, 3519.7861, 12858.0059, 4718.2969, + 18815.377, 145815.4375, 21896.3047, 3676.7759, 11890.6113, + 10009.0488, 3699.269, 4043.9946, 65593.2344, 4878.5562, + 12677.7168, 103776.2891, 6596.2896, 3318.2097, 6772.8564, + 351681.125, 18734.9785, 10957.293, 3510.2415, 7858.1919, + 6179.2671, 13985.8643, 173662.8438, 20287.5, 8688.9844, + 8498.873, 8903.2383, 19180.8867, 3665.1787, 335366.8125, + 7017.2178, 28342.6836, 4865.4375, 18790.5293, 4750.0708, + 25336.3691, 3203.4902, 4257.25, 9891.249, 9430.8369, + 5323.1807, 3810.5613, 4382.1997, 7045.4399, 4381.0942, + 24189.3027, 8441.8184, 4532.8257, 4196.2856, 4110.918, + 8598.3818, 6921.2065, 39098.4648, 4789.5303, 5560.521, + 9069.1211, 18551.5332, 11671.959, 75855.1562, 6522.418, + 16535.8887, 3701.9485, 35926.0859, 3863.2244, 32059.7148, + 5819.3403, 3210.5969, 16217.5137, 17247.084, 3868.7102, + 5855.6655, 272802.7812, 27620.3594, 4390.2866, 24058.0742, + 3318.6807, 9631.8984, 28741.832, 6880.3589, 19617.8301, + 6861.2788, 22676.3984, 9000.6592, 4677.1577, 3663.7769, + 7423.7568, 64958.9453, 4355.772, 6121.9727, 4432.9341, + 14568.1914, 8590.666, 9882.8047, 8349.0869, 8193.5986, + 32859.0859, 14244.7568, 5366.3271, 8436.2861, 3541.928, + 8114.6763, 11038.0684, 13238.2871, 9012.165, 4139.0894, + 8639.3105, 3873.3665, 4799.3062, 3581.249, 6767.1538, + 3221.2576, 34234.8242, 27701.3027, 12575.6621, 22205.0137, + 12237.8467, 5908.9106, 9947.6084, 24797.748, 4669.2256, + 4571.4717, 14177.3848, 6805.0381, 4183.0161, 3842.967, + 6658.7861, 36391.8672, 5175.6484, 8281.4512, 6164.1709, + 6762.8203, 11843.6836, 7930.707, 41806.7734, 4367.5952, + 6773.2051, 4702.3066, 5567.2993, 4455.4995, 4444.3325, + 5055.1304, 18162.2148, 4480.1519, 15342.1143, 11285.541, + 8318.6074, 10304.4072, 5997.8765, 7593.6689, 4187.0688, + 10602.7109, 3672.6799, 8320.6348, 5356.5142, 3662.1902, + 11980.7168, 4636.2578, 41726.2422, 13200.499, 8885.6016, + 8894.1211, 4967.2891, 29418.1074, 32746.0078, 6112.438, + 7184.1636, 36919.9492, 5196.9824, 5471.1787, 12881.5703, + 10838.377, 5238.5288, 5155.4321, 6150.2373, 4111.8496, + 20762.8535, 19288.4609, 4497.6348, 11436.6729, 6415.1431, + 9214.043, 155290.5, 14550.5098, 25952.8242, 4105.3394, + 7406.4492, 8644.6816, 4586.876, 3843.9878, 7114.5103, + 19891.123, 4242.667, 4844.9673, 12831.1318, 44220.1445, + 7491.939, 4230.2671, 160216.5781, 35397.793, 10992.1924, + 9463.6084, 87356.7891, 4254.9961, 25704.248, 7932.1284, + 10517.7539, 5733.0195, 8632.5596, 10175.666, 36879.6055, + 5204.2793, 7365.5513, 5045.0781, 24276.1172, 7509.6475, + 4975.8628, 6691.5698, 3877.4844, 4361.6406, 6249.6157, + 4908.083, 18014.8926, 8978.2373, 6179.6362, 8305.2979, + 11382.0703, 4022.8655, 4265.6592, 10889.9678, 51238.4102, + 12708.8779, 7461.2456, 21825.8438, 3999.5769, 4827.0664, + 7533.9624, 23269.334, 11600.8018, 4762.519, 5106.3667, + 4442.5024, 7032.0605, 25456.2227, 5871.6138, 17477.4062, + 8218.1289, 4053.5696, 32143.2871, 7449.3823, 8398.5703, + 9791.9453, 25406.2539, 11674.1387, 5712.502, 4139.7842, + 4401.1045, 7204.8188, 3954.5417, 6161.9053, 32005.7363, + 6428.564, 5489.2305, 10636.6445, 4749.8843, 8948.25, + 4526.2495, 9052.9131, 14222.7773, 8232.1895, 20718.2891, + 5464.8374, 8501.5361, 17142.1934, 8471.3633, 16037.1406, + 4146.5811, 15923.6621, 4934.189, 8793.4043, 34129.1211, + 12574.1914, 12152.124, 47545.5664, 4292.9888, 15955.6084, + 9993.1094, 6893.1782, 8311.6094, 21146.418, 9047.8076, + 39483.2227, 13060.46, 12580.04, 4497.2866, 3737.1768, + 5266.8677, 8785.2305, 28534.9453, 10757.5723, 7430.501, + 7050.3403, 17575.3848, 4611.1118, 5129.0845, 4341.7598, + 10760.4297, 10225.1807, 4679.0171, 8483.8486, 9013.8955, + 11730.4531, 3790.2556, 12612.9414, 4082.8838, 7504.1924, + 35896.1445, 16693.1152, 5017.0947, 5207.6147, 12085.1699, + 14201.3936, 20826.8301, 7449.6035, 8584.9268, 4789.8286, + 6915.6299, 5846.1694, 32315.543, 15269.6934, 8256.1914, + 32809.0898, 11087.9678, 4199.9697, 6494.6421, 4413.3452, + 4201.7617, 3606.0955, 5441.686, 23864.4434, 9459.0645, + 4164.3262, 4590.3423, 4342.3149, 6736.8931, 8654.7998, + 9649.4893, 10202.7041, 10856.6143, 9960.1367, 5911.1245, + 15556.0107, 11216.333, 4908.4263, 15296.3115, 4665.8364, + 11577.4492, 4889.543, 44902.1758, 23394.2539, 4861.1089, + 11224.9121, 5479.5527, 9040.8555, 41152.7656, 13091.8457, + 6072.9536, 70285.5312, 27767.627, 5591.4673, 40979.4375, + 21223.6445, 5186.0054, 4497.604, 5784.5356, 6357.8408, + 4274.5059, 9011.8428, 77925.8594, 37771.168, 6407.4629, + 5945.9824, 20595.2656, 10457.5928, 18185.3223, 6212.7734, + 5226.3213, 40318.8164, 21220.1074, 6534.4219, 5595.3613, + 4403.0303, 8463.5703, 4463.9971, 6311.5747, 5473.1221, + 5640.0103, 5931.4033, 4443.1938, 31420.2207, 19907.5234, + 6193.0039, 9792.1543, 56711.4766, 29846.9121, 8117.728, + 175281.8125, 99740.7891, 24018.1094, 3852.635, 11292.21, + 6774.5361, 21905.0859, 16468.5293, 5227.1997, 5561.5205, + 14747.0723, 6571.646, 139674.8438, 67535.0156, 20008.0254, + 4196.876, 28087.0918, 17040.2598, 7583.5469, 4520.9663, + 5070.3828, 5462.8179, 5022.8677, 36560.4102, 24047.0879, + 8656.3838, 9155.3828, 10332.3398, 5795.52, 11947.2334, + 9987.9014, 16823.0645, 6565.8887, 8523.4277, 25558.1504, + 13748.7529, 6460.7681, 4543.1084, 5551.3354, 6699.9346, + 5086.3892, 46855.082, 31373.1426, 12940.0234, 7297.4478, + 4090.177, 7064.5483, 3922.6812, 5938.6528])} diff --git a/tests/test_usi.py b/tests/test_usi.py new file mode 100644 index 00000000..8a3e0952 --- /dev/null +++ b/tests/test_usi.py @@ -0,0 +1,41 @@ +from data import usi_proxi_data +from os import path +import pyteomics +pyteomics.__path__ = [path.abspath(path.join(path.dirname(__file__), path.pardir, 'pyteomics'))] + +import unittest +from itertools import product +import operator as op + +import numpy as np + +from pyteomics.usi import USI, proxi + + +class USITest(unittest.TestCase): + def test_parse(self): + usi_str = "mzspec:MSV000085202:210320_SARS_CoV_2_T:scan:131256" + inst = USI.parse(usi_str) + assert str(inst) == usi_str + assert inst.protocol == 'mzspec' + assert inst.dataset == "MSV000085202" + assert inst.datafile == "210320_SARS_CoV_2_T" + assert inst.scan_identifier_type == "scan" + assert inst.scan_identifier == "131256" + assert inst.interpretation == None + + +class PROXITest(unittest.TestCase): + def test_request(self): + usi_str = "mzspec:MSV000085202:210320_SARS_CoV_2_T:scan:131256" + response = proxi(usi_str, backend='peptide_atlas') + + assert usi_proxi_data.keys() <= response.keys() + assert np.allclose(response['m/z array'] - usi_proxi_data['m/z array'], 0) + assert np.allclose(response['intensity array'] - usi_proxi_data['intensity array'], 0) + + + + +if __name__ == "__main__": + unittest.main() From 1b776a65044bc3d0ac3e668d2884a6e231271a33 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Mon, 5 Oct 2020 21:18:30 -0400 Subject: [PATCH 2/3] Add documentation page --- doc/source/api.rst | 1 + doc/source/api/usi.rst | 1 + pyteomics/usi.py | 25 ++++++++++++++++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 doc/source/api/usi.rst diff --git a/doc/source/api.rst b/doc/source/api.rst index faa8f8e9..4fb25f0a 100755 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -26,6 +26,7 @@ Contents: api/tandem api/mzid api/mztab + api/usi api/featurexml api/trafoxml api/idxml diff --git a/doc/source/api/usi.rst b/doc/source/api/usi.rst new file mode 100644 index 00000000..b3290f71 --- /dev/null +++ b/doc/source/api/usi.rst @@ -0,0 +1 @@ +.. automodule:: pyteomics.usi diff --git a/pyteomics/usi.py b/pyteomics/usi.py index b11e7ce0..fae525f7 100644 --- a/pyteomics/usi.py +++ b/pyteomics/usi.py @@ -12,6 +12,17 @@ One use-case for USI is to request spectrum information from a `PROXI ` service host. PROXI services are available from several of the major national proteomics data hosts, including MassIVE, PeptideAtlas, PRIDE, and jPOST. + + +Data access +----------- + + :py:class:`USI` for representing Universal Spectrum Identifiers. Call :meth:`USI.parse` to parse a USI + string. + + :py:func:`proxi` to request a USI from a remote service. Provides access to the PeptideAtlas, MassIVE, + PRIDE and jPOST hosts. + """ import json from collections import namedtuple @@ -60,7 +71,19 @@ def __str__(self): @classmethod def parse(cls, usi): - return cls(*_usi_parser(usi)) + '''Parse a USI string into a :class:`USI` object. + + Parameters + ---------- + usi: str + The USI string to parse + + Returns + ------- + USI + ''' + return cls(*_usi_parser(str(usi))) + def _usi_parser(usi): tokens = usi.split(":", 5) From 743e9d4c0d8e02162ecbb82be9b91a28bc391108 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Tue, 6 Oct 2020 12:07:36 -0400 Subject: [PATCH 3/3] Remove test dependency on NumPy. Use explicit sets for Py2 --- tests/test_usi.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/test_usi.py b/tests/test_usi.py index 8a3e0952..9608ae8f 100644 --- a/tests/test_usi.py +++ b/tests/test_usi.py @@ -7,8 +7,6 @@ from itertools import product import operator as op -import numpy as np - from pyteomics.usi import USI, proxi @@ -30,9 +28,13 @@ def test_request(self): usi_str = "mzspec:MSV000085202:210320_SARS_CoV_2_T:scan:131256" response = proxi(usi_str, backend='peptide_atlas') - assert usi_proxi_data.keys() <= response.keys() - assert np.allclose(response['m/z array'] - usi_proxi_data['m/z array'], 0) - assert np.allclose(response['intensity array'] - usi_proxi_data['intensity array'], 0) + assert set(usi_proxi_data.keys()) <= set(response.keys()) + + for a, b in zip(response['m/z array'], usi_proxi_data['m/z array']): + self.assertAlmostEqual(a, b, 3) + + for a, b in zip(response['intensity array'], usi_proxi_data['intensity array']): + self.assertAlmostEqual(a, b, 3)