88from astropy import table
99from astropy .io import ascii
1010from astroquery .query import BaseQuery
11- from astroquery .utils import async_to_sync
1211# import configurable items declared in __init__.py
1312from astroquery .linelists .cdms import conf
1413from astroquery .exceptions import InvalidQueryError , EmptyResponseError
14+ from ..core import parse_letternumber
15+ from astroquery .utils import process_asyncs
1516from astroquery import log
1617
1718import re
18- import string
1919
2020__all__ = ['CDMS' , 'CDMSClass' ]
2121
@@ -25,7 +25,6 @@ def data_path(filename):
2525 return os .path .join (data_dir , filename )
2626
2727
28- @async_to_sync
2928class CDMSClass (BaseQuery ):
3029 # use the Configuration Items imported from __init__.py
3130 URL = conf .search
@@ -34,6 +33,74 @@ class CDMSClass(BaseQuery):
3433 TIMEOUT = conf .timeout
3534 MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS' , '028528 H2NC' , '058501 H2C2S' , '064527 HC3HCN' ]
3635
36+ def __init__ (self , fallback_to_getmolecule = False ):
37+ super ().__init__ ()
38+
39+ def _mol_to_payload (self , molecule , parse_name_locally , flags ):
40+ if parse_name_locally :
41+ self .lookup_ids = build_lookup ()
42+ luts = self .lookup_ids .find (molecule , flags )
43+ if len (luts ) == 0 :
44+ raise InvalidQueryError ('No matching species found. Please '
45+ 'refine your search or read the Docs '
46+ 'for pointers on how to search.' )
47+ return tuple (f"{ val :06d} { key } "
48+ for key , val in luts .items ())[0 ]
49+ else :
50+ return molecule
51+
52+ def query_lines (self , min_frequency , max_frequency , * ,
53+ min_strength = - 500 , molecule = 'All' ,
54+ temperature_for_intensity = 300 , flags = 0 ,
55+ parse_name_locally = False , get_query_payload = False ,
56+ fallback_to_getmolecule = False ,
57+ verbose = False ,
58+ cache = True ):
59+
60+ # Check if a malformatted molecule was requested and use fallback if enabled
61+ # accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S'
62+ badlist = (self .MALFORMATTED_MOLECULE_LIST
63+ + [y for x in self .MALFORMATTED_MOLECULE_LIST for y in x .split ()])
64+
65+ # extract molecule from the response or request
66+ requested_molecule = self ._mol_to_payload (molecule , parse_name_locally , flags ) if molecule != 'All' else None
67+
68+ if requested_molecule and requested_molecule in badlist and not get_query_payload :
69+ if fallback_to_getmolecule :
70+ try :
71+ return self .get_molecule (requested_molecule [:6 ])
72+ except ValueError as ex :
73+ # try to give the users good guidance on which parameters will work
74+ if "molecule_id should be a length-6 string of numbers" in str (ex ):
75+ if parse_name_locally :
76+ raise ValueError (f"Molecule { molecule } could not be parsed or identified."
77+ " Check that the name was correctly specified." )
78+ else :
79+ raise ValueError (f"Molecule { molecule } needs to be formatted as"
80+ " a 6-digit string ID for the get_molecule fallback to work."
81+ " Try setting parse_name_locally=True "
82+ "to turn your molecule name into a CDMS number ID." )
83+ else :
84+ raise ex
85+ else :
86+ raise ValueError (f"Molecule { requested_molecule } is known not to comply with standard CDMS format. "
87+ f"Try get_molecule({ requested_molecule } ) instead or set "
88+ f"CDMS.fallback_to_getmolecule = True." )
89+ else :
90+ response = self .query_lines_async (min_frequency = min_frequency ,
91+ max_frequency = max_frequency ,
92+ min_strength = min_strength ,
93+ molecule = molecule ,
94+ temperature_for_intensity = temperature_for_intensity ,
95+ flags = flags ,
96+ parse_name_locally = parse_name_locally ,
97+ get_query_payload = get_query_payload ,
98+ cache = cache )
99+ if get_query_payload :
100+ return response
101+ else :
102+ return self ._parse_result (response , molname = molecule , verbose = verbose )
103+
37104 def query_lines_async (self , min_frequency , max_frequency , * ,
38105 min_strength = - 500 , molecule = 'All' ,
39106 temperature_for_intensity = 300 , flags = 0 ,
@@ -140,17 +207,7 @@ def query_lines_async(self, min_frequency, max_frequency, *,
140207 payload ['Moleculesgrp' ] = 'all species'
141208 else :
142209 if molecule is not None :
143- if parse_name_locally :
144- self .lookup_ids = build_lookup ()
145- luts = self .lookup_ids .find (molecule , flags )
146- if len (luts ) == 0 :
147- raise InvalidQueryError ('No matching species found. Please '
148- 'refine your search or read the Docs '
149- 'for pointers on how to search.' )
150- payload ['Molecules' ] = tuple (f"{ val :06d} { key } "
151- for key , val in luts .items ())[0 ]
152- else :
153- payload ['Molecules' ] = molecule
210+ payload ['Molecules' ] = self ._mol_to_payload (molecule , parse_name_locally , flags )
154211
155212 if get_query_payload :
156213 return payload
@@ -182,16 +239,11 @@ def query_lines_async(self, min_frequency, max_frequency, *,
182239 response2 = self ._request (method = 'GET' , url = fullurl ,
183240 timeout = self .TIMEOUT , cache = cache )
184241
185- # accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S'
186- badlist = (self .MALFORMATTED_MOLECULE_LIST + # noqa
187- [y for x in self .MALFORMATTED_MOLECULE_LIST for y in x .split ()])
188- if 'Moleculesgrp' not in payload .keys () and payload ['Molecules' ] in badlist :
189- raise ValueError (f"Molecule { payload ['Molecules' ]} is known not to comply with standard CDMS format. "
190- f"Try get_molecule({ payload ['Molecules' ]} ) instead." )
191-
192242 return response2
193243
194- def _parse_result (self , response , * , verbose = False ):
244+ query_lines .__doc__ = process_asyncs .async_to_sync_docstr (query_lines_async .__doc__ )
245+
246+ def _parse_result (self , response , * , verbose = False , molname = None ):
195247 """
196248 Parse a response into an `~astropy.table.Table`
197249
@@ -238,6 +290,8 @@ def _parse_result(self, response, *, verbose=False):
238290 soup = BeautifulSoup (response .text , 'html.parser' )
239291 text = soup .find ('pre' ).text
240292
293+ # this is a different workaround to try to make _some_ of the bad molecules parseable
294+ # (it doesn't solve all of them, which is why the above fallback exists)
241295 need_to_filter_bad_molecules = False
242296 for bad_molecule in self .MALFORMATTED_MOLECULE_LIST :
243297 if text .find (bad_molecule .split ()[1 ]) > - 1 :
@@ -316,7 +370,7 @@ def _parse_result(self, response, *, verbose=False):
316370 except ValueError as ex :
317371 # Give users a more helpful exception when parsing fails
318372 new_message = ("Failed to parse CDMS response. This may be caused by a malformed search return. "
319- "You can check this by running `CDMS.get_molecule('<id> ')` instead; if it works, the "
373+ f "You can check this by running `CDMS.get_molecule('{ molname } ')` instead; if it works, the "
320374 "problem is caused by the CDMS search interface and cannot be worked around." )
321375 raise ValueError (new_message ) from ex
322376
@@ -456,14 +510,25 @@ def get_molecule(self, molecule_id, *, cache=True, return_response=False):
456510
457511 def _parse_cat (self , text , * , verbose = False ):
458512 """
459- Parse a catalog response into an `~astropy.table.Table`
513+ Parse a CDMS-format catalog file into an `~astropy.table.Table`.
514+
515+ The catalog data files are composed of 80-character card images.
516+ Format: [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]:
517+ FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN
460518
461- See details in _parse_response; this is a very similar function,
462- but the catalog responses have a slightly different format.
519+ Parameters
520+ ----------
521+ text : str
522+ The catalog file text content.
523+ verbose : bool, optional
524+ Not used currently.
525+
526+ Returns
527+ -------
528+ Table : `~astropy.table.Table`
529+ Parsed catalog data.
463530 """
464- # notes about the format
465- # [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN noqa
466- # 13 21 29 31 41 44 51 55 57 59 61 63 65 67 69 71 73 75 77 79 noqa
531+ # Column start positions
467532 starts = {'FREQ' : 0 ,
468533 'ERR' : 14 ,
469534 'LGINT' : 22 ,
@@ -494,7 +559,9 @@ def _parse_cat(self, text, *, verbose=False):
494559 col_starts = list (starts .values ()),
495560 format = 'fixed_width' , fast_reader = False )
496561
562+ # Ensure TAG is integer type for computation
497563 # int truncates - which is what we want
564+ result ['TAG' ] = result ['TAG' ].astype (int )
498565 result ['MOLWT' ] = [int (x / 1e3 ) for x in result ['TAG' ]]
499566
500567 result ['FREQ' ].unit = u .MHz
@@ -527,29 +594,6 @@ def _parse_cat(self, text, *, verbose=False):
527594CDMS = CDMSClass ()
528595
529596
530- def parse_letternumber (st ):
531- """
532- Parse CDMS's two-letter QNs into integers.
533-
534- Masked values are converted to -999999.
535-
536- From the CDMS docs:
537- "Exactly two characters are available for each quantum number. Therefore, half
538- integer quanta are rounded up ! In addition, capital letters are used to
539- indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters
540- are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc."
541- """
542- if np .ma .is_masked (st ):
543- return - 999999
544-
545- asc = string .ascii_lowercase
546- ASC = string .ascii_uppercase
547- newst = '' .join (['-' + str ((asc .index (x )+ 1 )) if x in asc else
548- str ((ASC .index (x )+ 10 )) if x in ASC else
549- x for x in st ])
550- return int (newst )
551-
552-
553597class Lookuptable (dict ):
554598
555599 def find (self , st , flags ):
0 commit comments