1010from astroquery .utils import async_to_sync
1111# import configurable items declared in __init__.py
1212from astroquery .linelists .cdms import conf
13- from astroquery .jplspec import lookup_table
1413from astroquery .exceptions import InvalidQueryError , EmptyResponseError
1514
15+ import re
16+ import string
1617
1718__all__ = ['CDMS' , 'CDMSClass' ]
1819
@@ -52,6 +53,13 @@ def query_lines_async(self, min_frequency, max_frequency, *,
5253 molecule : list, string of regex if parse_name_locally=True, optional
5354 Identifiers of the molecules to search for. If this parameter
5455 is not provided the search will match any species. Default is 'All'.
56+ As a first pass, the molecule will be searched for with a direct
57+ string match. If no string match is found, a regular expression
58+ match is attempted. Note that if the molecule name regex contains
59+ parentheses, they must be escaped. For example, 'H2C(CN)2.*' must be
60+ specified as 'H2C\\ (CN\\ )2.*' (but because of the first-attempt
61+ full-string matching, 'H2C(CN)2' will match that molecule
62+ successfully).
5563
5664 temperature_for_intensity : float
5765 The temperature to use when computing the intensity Smu^2. Set
@@ -126,12 +134,12 @@ def query_lines_async(self, min_frequency, max_frequency, *,
126134 if parse_name_locally :
127135 self .lookup_ids = build_lookup ()
128136 luts = self .lookup_ids .find (molecule , flags )
129- payload ['Molecules' ] = tuple (f"{ val :06d} { key } "
130- for key , val in luts .items ())[0 ]
131- if len (molecule ) == 0 :
137+ if len (luts ) == 0 :
132138 raise InvalidQueryError ('No matching species found. Please '
133139 'refine your search or read the Docs '
134140 'for pointers on how to search.' )
141+ payload ['Molecules' ] = tuple (f"{ val :06d} { key } "
142+ for key , val in luts .items ())[0 ]
135143 else :
136144 payload ['Molecules' ] = molecule
137145
@@ -187,12 +195,14 @@ def _parse_result(self, response, verbose=False):
187195
188196 ELO: Lower state energy in cm^{-1} relative to the ground state.
189197 GUP: Upper state degeneracy.
190- TAG: Species tag or molecular identifier.
191- A negative value flags that the line frequency has
192- been measured in the laboratory. The absolute value of TAG is then the
193- species tag and ERR is the reported experimental error. The three most
194- significant digits of the species tag are coded as the mass number of
195- the species.
198+ MOLWT: The first half of the molecular weight tag, which is the mass in atomic
199+ mass units (Daltons).
200+ TAG: Species tag or molecular identifier. This only includes the
201+ last 3 digits of the CDMS tag
202+
203+ A negative value of MOLWT flags that the line frequency has been
204+ measured in the laboratory. We record this boolean in the 'Lab'
205+ column. ERR is the reported experimental error.
196206
197207 QNFMT: Identifies the format of the quantum numbers
198208 Ju/Ku/vu and Jl/Kl/vl are the upper/lower QNs
@@ -215,15 +225,21 @@ def _parse_result(self, response, verbose=False):
215225 'DR' : 36 ,
216226 'ELO' : 38 ,
217227 'GUP' : 48 ,
218- 'TAG' : 51 ,
219- 'QNFMT' : 57 ,
228+ 'MOLWT' : 51 ,
229+ 'TAG' : 54 ,
230+ 'QNFMT' : 58 ,
220231 'Ju' : 61 ,
221232 'Ku' : 63 ,
222233 'vu' : 65 ,
223- 'Jl' : 67 ,
224- 'Kl' : 69 ,
225- 'vl' : 71 ,
226- 'F' : 73 ,
234+ 'F1u' : 67 ,
235+ 'F2u' : 69 ,
236+ 'F3u' : 71 ,
237+ 'Jl' : 73 ,
238+ 'Kl' : 75 ,
239+ 'vl' : 77 ,
240+ 'F1l' : 79 ,
241+ 'F2l' : 81 ,
242+ 'F3l' : 83 ,
227243 'name' : 89 }
228244
229245 result = ascii .read (text , header_start = None , data_start = 0 ,
@@ -235,6 +251,18 @@ def _parse_result(self, response, verbose=False):
235251 result ['FREQ' ].unit = u .MHz
236252 result ['ERR' ].unit = u .MHz
237253
254+ result ['Lab' ] = result ['MOLWT' ] < 0
255+ result ['MOLWT' ] = np .abs (result ['MOLWT' ])
256+ result ['MOLWT' ].unit = u .Da
257+
258+ for suf in 'ul' :
259+ for qn in ('J' , 'v' , 'K' , 'F1' , 'F2' , 'F3' ):
260+ qnind = qn + suf
261+ if result [qnind ].dtype != int :
262+ intcol = np .array (list (map (parse_letternumber , result [qnind ])),
263+ dtype = int )
264+ result [qnind ] = intcol
265+
238266 # if there is a crash at this step, something went wrong with the query
239267 # and the _last_query_temperature was not set. This shouldn't ever
240268 # happen, but, well, I anticipate it will.
@@ -303,12 +331,66 @@ def tryfloat(x):
303331CDMS = CDMSClass ()
304332
305333
334+ def parse_letternumber (st ):
335+ """
336+ Parse CDMS's two-letter QNs
337+
338+ From the CDMS docs:
339+ "Exactly two characters are available for each quantum number. Therefore, half
340+ integer quanta are rounded up ! In addition, capital letters are used to
341+ indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Small
342+ types are used to signal corresponding negative quantum numbers."
343+ """
344+ asc = string .ascii_lowercase
345+ ASC = string .ascii_uppercase
346+ newst = '' .join (['-' + str (asc .index (x )+ 10 ) if x in asc else
347+ str (ASC .index (x )+ 10 ) if x in ASC else
348+ x for x in st ])
349+ return int (newst )
350+
351+
352+ class Lookuptable (dict ):
353+
354+ def find (self , st , flags ):
355+ """
356+ Search dictionary keys for a regex match to string s
357+
358+ Parameters
359+ ----------
360+ s : str
361+ String to compile as a regular expression
362+ Can be entered non-specific for broader results
363+ ('H2O' yields 'H2O' but will also yield 'HCCCH2OD')
364+ or as the specific desired regular expression for
365+ catered results, for example: ('H20$' yields only 'H2O')
366+
367+ flags : int
368+ Regular expression flags.
369+
370+ Returns
371+ -------
372+ The list of values corresponding to the matches
373+
374+ """
375+
376+ out = {}
377+
378+ for kk , vv in self .items ():
379+ # note that the string-match attempt here differs from the jplspec
380+ # implementation
381+ match = (st in kk ) or re .search (st , str (kk ), flags = flags )
382+ if match :
383+ out [kk ] = vv
384+
385+ return out
386+
387+
306388def build_lookup ():
307389
308390 result = CDMS .get_species_table ()
309391 keys = list (result [1 ][:]) # convert NAME column to list
310392 values = list (result [0 ][:]) # convert TAG column to list
311393 dictionary = dict (zip (keys , values )) # make k,v dictionary
312- lookuptable = lookup_table . Lookuptable (dictionary ) # apply the class above
394+ lookuptable = Lookuptable (dictionary ) # apply the class above
313395
314396 return lookuptable
0 commit comments