diff --git a/astrodb_utils/publications.py b/astrodb_utils/publications.py index 6d1eff4..36c9b86 100644 --- a/astrodb_utils/publications.py +++ b/astrodb_utils/publications.py @@ -268,18 +268,23 @@ def ingest_publication( return if not ignore_ads: - use_ads = check_ads_token() - if not use_ads and (not reference and (not doi or not bibcode)): - logger.error( - "An ADS_TOKEN environment variable must be set" - "in order to auto-populate the fields.\n" - "Without an ADS_TOKEN, name and bibcode or DOI must be set explicity." - ) - return - else: - use_ads = False + ads_token = check_ads_token() - logger.debug(f"Use ADS set to {use_ads}") + if not ads_token: + logger.warning( + "An ADS_TOKEN environment variable is not set.\n" + "setting ignore_ads=True.") + ignore_ads = True + + if (not reference and (not doi or not bibcode)): + logger.error( + "An ADS_TOKEN environment variable must be set" + "in order to auto-populate the fields.\n" + "Without an ADS_TOKEN, name and bibcode or DOI must be set explicity." + ) + return + + logger.debug(f"ignore_ads set to {ignore_ads}") if bibcode: if "arXiv" in bibcode: @@ -291,40 +296,15 @@ def ingest_publication( arxiv_id = None name_add, bibcode_add, doi_add = "", "", "" - # Search ADS uing a provided arxiv id - if arxiv_id and use_ads: - arxiv_matches = ads.SearchQuery( - q=arxiv_id, fl=["id", "bibcode", "title", "first_author", "year", "doi"] - ) - arxiv_matches_list = list(arxiv_matches) - if len(arxiv_matches_list) != 1: - logger.error("should only be one matching arxiv id") - return - - if len(arxiv_matches_list) == 1: - logger.debug(f"Publication found in ADS using arxiv id: , {arxiv_id}") - article = arxiv_matches_list[0] - logger.debug( - f"{article.first_author}, {article.year}, {article.bibcode}, {article.title}" - ) - if not reference: # generate the name if it was not provided - name_stub = article.first_author.replace(",", "").replace(" ", "") - name_add = name_stub[0:4] + article.year[-2:] - else: - name_add = reference - description = article.title[0] - bibcode_add = article.bibcode - doi_add = article.doi[0] + using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" - using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" - elif arxiv_id: - name_add = reference - bibcode_add = arxiv_id - doi_add = doi - using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" + # Search ADS uing a provided arxiv id + if arxiv_id: + name_add, bibcode_add, doi_add, description = find_pub_using_arxiv_id(arxiv_id, reference, doi, ignore_ads) + using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" # Search ADS using a provided DOI - if doi and use_ads: + if doi and not ignore_ads: doi_matches = ads.SearchQuery( doi=doi, fl=["id", "bibcode", "title", "first_author", "year", "doi"] ) @@ -335,7 +315,6 @@ def ingest_publication( if len(doi_matches_list) == 1: logger.debug(f"Publication found in ADS using DOI: {doi}") - using = doi article = doi_matches_list[0] logger.debug( f"{article.first_author}, {article.year}," @@ -349,12 +328,14 @@ def ingest_publication( description = article.title[0] bibcode_add = article.bibcode doi_add = article.doi[0] + using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" elif doi: name_add = reference bibcode_add = bibcode doi_add = doi + using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" - if bibcode and use_ads: + if bibcode and not ignore_ads: bibcode_matches = ads.SearchQuery( bibcode=bibcode, fl=["id", "bibcode", "title", "first_author", "year", "doi"], @@ -370,7 +351,6 @@ def ingest_publication( elif len(bibcode_matches_list) == 1: logger.debug(f"Publication found in ADS using bibcode: {bibcode}") - using = str(bibcode) article = bibcode_matches_list[0] logger.debug( f"{article.first_author}, {article.year}, " @@ -387,6 +367,7 @@ def ingest_publication( doi_add = None else: doi_add = article.doi[0] + using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" elif bibcode: name_add = reference bibcode_add = bibcode @@ -395,7 +376,7 @@ def ingest_publication( if reference and not bibcode and not doi: name_add = reference - using = "user input" + using = "ref: {reference} user input. No bibcode or doi provided." new_ref = [ { @@ -438,3 +419,36 @@ def check_ads_token(): return use_ads + +def find_pub_using_arxiv_id(arxiv_id, reference, doi, ignore_ads): + if not ignore_ads: + arxiv_matches = ads.SearchQuery( + q=arxiv_id, fl=["id", "bibcode", "title", "first_author", "year", "doi"] + ) + arxiv_matches_list = list(arxiv_matches) + if len(arxiv_matches_list) != 1: + logger.error("should only be one matching arxiv id") + return + + if len(arxiv_matches_list) == 1: + logger.debug(f"Publication found in ADS using arxiv id: , {arxiv_id}") + article = arxiv_matches_list[0] + logger.debug( + f"{article.first_author}, {article.year}, {article.bibcode}, {article.title}" + ) + if not reference: # generate the name if it was not provided + name_stub = article.first_author.replace(",", "").replace(" ", "") + name_add = name_stub[0:4] + article.year[-2:] + else: + name_add = reference + description = article.title[0] + bibcode_add = article.bibcode + doi_add = article.doi[0] + + else: + name_add = reference + bibcode_add = arxiv_id + doi_add = doi + description = None + + return name_add, bibcode_add, doi_add, description diff --git a/astrodb_utils/sources.py b/astrodb_utils/sources.py index da6cbff..b90fe48 100644 --- a/astrodb_utils/sources.py +++ b/astrodb_utils/sources.py @@ -28,9 +28,11 @@ def find_source_in_db( ra_col_name="ra_deg", dec_col_name="dec_deg", use_simbad=True, + fuzzy=False ): """ Find a source in the database given a source name and optional coordinates. + Uses astrodbkit .search_object and .query_region methods to search the Sources and Names table. Parameters ---------- @@ -50,6 +52,8 @@ def find_source_in_db( use_simbad: bool Use Simbad to resolve the source name if it is not found in the database. Default is True. Set to False if no internet connection. + fuzzy: bool + Use fuzzy search to find source name in database. Default is False. Returns ------- @@ -74,7 +78,7 @@ def find_source_in_db( # NO MATCHES # If no matches, try fuzzy search - if len(db_name_matches) == 0: + if len(db_name_matches) == 0 and fuzzy: logger.debug(f"{source}: No name matches, trying fuzzy search") db_name_matches = db.search_object( source, @@ -224,8 +228,8 @@ def ingest_names( def ingest_source( db, source, + reference: str, *, - reference: str = None, ra: float = None, dec: float = None, epoch: str = None, @@ -282,6 +286,19 @@ def ingest_source( logger.debug(f"Trying to ingest source: {source}") + # Make sure reference is provided and in References table + ref_check = find_publication(db, reference=reference) + logger.debug(f"ref_check: {ref_check}") + if ref_check[0] is False: + msg = ( + f"Skipping: {source}." + f"Discovery reference {reference} is either missing or " + " is not in Publications table. \n" + f"(Add it with ingest_publication function.)" + ) + exit_function(msg, raise_error) + return + # Find out if source is already in database or not if search_db: logger.debug(f"Checking database for: {source} at ra: {ra}, dec: {dec}") @@ -316,19 +333,6 @@ def ingest_source( exit_function(msg1 + msg2, raise_error) return - # Make sure reference is provided and in References table - ref_check = find_publication(db, reference=reference) - logger.debug(f"ref_check: {ref_check}") - if ref_check[0] is False: - msg = ( - f"Skipping: {source}." - f"Discovery reference {reference} is either missing or " - " is not in Publications table. \n" - f"(Add it with ingest_publication function.)" - ) - exit_function(msg, raise_error) - return - # Try to get coordinates from SIMBAD if they were not provided if (ra is None or dec is None) and use_simbad: # Try to get coordinates from SIMBAD diff --git a/astrodb_utils/tests/conftest.py b/astrodb_utils/tests/conftest.py index afa689a..0af753e 100644 --- a/astrodb_utils/tests/conftest.py +++ b/astrodb_utils/tests/conftest.py @@ -4,6 +4,8 @@ import pytest from astrodb_utils import load_astrodb, logger +from astrodb_utils.publications import ingest_publication +from astrodb_utils.sources import ingest_source logger.setLevel("DEBUG") @@ -24,6 +26,22 @@ def db(): # Confirm file was created assert os.path.exists(DB_NAME) - logger.info("Loaded SIMPLE database using load_astrodb function in conftest.py") + logger.info("Loaded AstroDB Template database using load_astrodb function in conftest.py") + + + + ingest_publication( + db, + reference="Refr20", + bibcode="2020MNRAS.496.1922B", + doi="10.1093/mnras/staa1522", + ignore_ads=True, + ) + + ingest_publication(db, doi="10.1086/161442", reference="Prob83") + + ingest_source(db, "LHS 2924", reference="Prob83") + + return db + - return db \ No newline at end of file diff --git a/astrodb_utils/tests/test_publications.py b/astrodb_utils/tests/test_publications.py index 2d6b46d..bd2d50f 100644 --- a/astrodb_utils/tests/test_publications.py +++ b/astrodb_utils/tests/test_publications.py @@ -4,23 +4,6 @@ from astrodb_utils.publications import find_publication, ingest_publication -def test_ingest_publications(db): - # add a made up publication and make sure it's there - ingest_publication( - db, - reference="Refr20", - bibcode="2020MNRAS.496.1922B", - doi="10.1093/mnras/staa1522", - ignore_ads=True, - ) - assert ( - db.query(db.Publications) - .filter(db.Publications.c.reference == "Refr20") - .count() - == 1 - ) - - def test_find_publication(db): assert not find_publication(db)[0] # False assert find_publication(db, reference="Refr20")[0] # True diff --git a/astrodb_utils/tests/test_sources.py b/astrodb_utils/tests/test_sources.py index bea0503..e8edeb1 100644 --- a/astrodb_utils/tests/test_sources.py +++ b/astrodb_utils/tests/test_sources.py @@ -11,8 +11,6 @@ ingest_source, ) -# TODO: Ingest publication just for these tests so they can be run independent of test_publications.py - @pytest.mark.parametrize( "source_data", @@ -90,6 +88,15 @@ def test_find_source_in_db(db): ) assert len(search_result) == 0 + search_result = find_source_in_db(db,"LHS 2924") + assert search_result[0] == "LHS 2924" + + search_result = find_source_in_db(db,"LHS 292", fuzzy=False) + assert len(search_result) == 0 + + search_result = find_source_in_db(db,"LHS 292", fuzzy=True) + assert search_result[0] == "LHS 2924" # This is wrong and a result of fuzzy matching + def test_find_source_in_db_errors(db): with pytest.raises(KeyError) as error_message: