From e073fd72156b5b9d04d410b4e8cf52da333aeaee Mon Sep 17 00:00:00 2001 From: kelle Date: Mon, 10 Mar 2025 11:09:18 -0400 Subject: [PATCH 01/10] add some data to be used in all tests --- astrodb_utils/tests/conftest.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/astrodb_utils/tests/conftest.py b/astrodb_utils/tests/conftest.py index afa689a..979f3f9 100644 --- a/astrodb_utils/tests/conftest.py +++ b/astrodb_utils/tests/conftest.py @@ -4,6 +4,8 @@ import pytest from astrodb_utils import load_astrodb, logger +from astrodb_utils.publications import ingest_publication +from astrodb_utils.sources import ingest_source logger.setLevel("DEBUG") @@ -24,6 +26,20 @@ def db(): # Confirm file was created assert os.path.exists(DB_NAME) - logger.info("Loaded SIMPLE database using load_astrodb function in conftest.py") + logger.info("Loaded AstroDB Template database using load_astrodb function in conftest.py") + + ingest_publication(db, doi="10.1086/161442") # Prob83 + + ingest_publication( + db, + reference="Refr20", + bibcode="2020MNRAS.496.1922B", + doi="10.1093/mnras/staa1522", + ignore_ads=True, + ) + + ingest_source(db, "LHS 2924", reference="Prob83") + + return db + - return db \ No newline at end of file From 7f940e8621c970edefe7b1dbeeb32e181d29174d Mon Sep 17 00:00:00 2001 From: kelle Date: Mon, 10 Mar 2025 11:09:41 -0400 Subject: [PATCH 02/10] check ref first in 'ingest_source' --- astrodb_utils/sources.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/astrodb_utils/sources.py b/astrodb_utils/sources.py index da6cbff..0773b58 100644 --- a/astrodb_utils/sources.py +++ b/astrodb_utils/sources.py @@ -224,8 +224,8 @@ def ingest_names( def ingest_source( db, source, - *, reference: str = None, + *, ra: float = None, dec: float = None, epoch: str = None, @@ -282,6 +282,19 @@ def ingest_source( logger.debug(f"Trying to ingest source: {source}") + # Make sure reference is provided and in References table + ref_check = find_publication(db, reference=reference) + logger.debug(f"ref_check: {ref_check}") + if ref_check[0] is False: + msg = ( + f"Skipping: {source}." + f"Discovery reference {reference} is either missing or " + " is not in Publications table. \n" + f"(Add it with ingest_publication function.)" + ) + exit_function(msg, raise_error) + return + # Find out if source is already in database or not if search_db: logger.debug(f"Checking database for: {source} at ra: {ra}, dec: {dec}") @@ -316,19 +329,6 @@ def ingest_source( exit_function(msg1 + msg2, raise_error) return - # Make sure reference is provided and in References table - ref_check = find_publication(db, reference=reference) - logger.debug(f"ref_check: {ref_check}") - if ref_check[0] is False: - msg = ( - f"Skipping: {source}." - f"Discovery reference {reference} is either missing or " - " is not in Publications table. \n" - f"(Add it with ingest_publication function.)" - ) - exit_function(msg, raise_error) - return - # Try to get coordinates from SIMBAD if they were not provided if (ra is None or dec is None) and use_simbad: # Try to get coordinates from SIMBAD From c346048b95a0cae30da06d045b5d24f06db29c98 Mon Sep 17 00:00:00 2001 From: kelle Date: Mon, 10 Mar 2025 11:09:55 -0400 Subject: [PATCH 03/10] move to conftest --- astrodb_utils/tests/test_publications.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/astrodb_utils/tests/test_publications.py b/astrodb_utils/tests/test_publications.py index 2d6b46d..bd2d50f 100644 --- a/astrodb_utils/tests/test_publications.py +++ b/astrodb_utils/tests/test_publications.py @@ -4,23 +4,6 @@ from astrodb_utils.publications import find_publication, ingest_publication -def test_ingest_publications(db): - # add a made up publication and make sure it's there - ingest_publication( - db, - reference="Refr20", - bibcode="2020MNRAS.496.1922B", - doi="10.1093/mnras/staa1522", - ignore_ads=True, - ) - assert ( - db.query(db.Publications) - .filter(db.Publications.c.reference == "Refr20") - .count() - == 1 - ) - - def test_find_publication(db): assert not find_publication(db)[0] # False assert find_publication(db, reference="Refr20")[0] # True From ab73faa6325bc945850d0d05a6e366e369dd7b2f Mon Sep 17 00:00:00 2001 From: kelle Date: Mon, 10 Mar 2025 11:10:16 -0400 Subject: [PATCH 04/10] add LHS292 / LHS2924 fuzzy test --- astrodb_utils/tests/test_sources.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/astrodb_utils/tests/test_sources.py b/astrodb_utils/tests/test_sources.py index bea0503..7d97195 100644 --- a/astrodb_utils/tests/test_sources.py +++ b/astrodb_utils/tests/test_sources.py @@ -11,8 +11,6 @@ ingest_source, ) -# TODO: Ingest publication just for these tests so they can be run independent of test_publications.py - @pytest.mark.parametrize( "source_data", @@ -90,6 +88,16 @@ def test_find_source_in_db(db): ) assert len(search_result) == 0 + search_result = find_source_in_db(db,"LHS 2924") + # print(f"LHS 2924: {search_result}") + assert search_result[0] == "LHS 2924" + + search_result = find_source_in_db(db,"LHS 292") + assert search_result[0] == "LHS 2924" # This is wrong and a result of fuzzy matching + + search_result = find_source_in_db(db,"LHS 292", fuzzy=False) + assert len(search_result) == 0 + def test_find_source_in_db_errors(db): with pytest.raises(KeyError) as error_message: From 667af3803e090f8dd650f506b1241a157261b8dd Mon Sep 17 00:00:00 2001 From: kelle Date: Mon, 10 Mar 2025 16:47:52 -0400 Subject: [PATCH 05/10] implement fuzzy=false as default --- astrodb_utils/sources.py | 6 +++++- astrodb_utils/tests/test_sources.py | 7 +++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/astrodb_utils/sources.py b/astrodb_utils/sources.py index 0773b58..4cf80d7 100644 --- a/astrodb_utils/sources.py +++ b/astrodb_utils/sources.py @@ -28,9 +28,11 @@ def find_source_in_db( ra_col_name="ra_deg", dec_col_name="dec_deg", use_simbad=True, + fuzzy=False ): """ Find a source in the database given a source name and optional coordinates. + Uses astrodbkit .search_object and .query_region methods to search the Sources and Names table. Parameters ---------- @@ -50,6 +52,8 @@ def find_source_in_db( use_simbad: bool Use Simbad to resolve the source name if it is not found in the database. Default is True. Set to False if no internet connection. + fuzzy: bool + Use fuzzy search to find source name in database. Default is False. Returns ------- @@ -74,7 +78,7 @@ def find_source_in_db( # NO MATCHES # If no matches, try fuzzy search - if len(db_name_matches) == 0: + if len(db_name_matches) == 0 and fuzzy: logger.debug(f"{source}: No name matches, trying fuzzy search") db_name_matches = db.search_object( source, diff --git a/astrodb_utils/tests/test_sources.py b/astrodb_utils/tests/test_sources.py index 7d97195..e8edeb1 100644 --- a/astrodb_utils/tests/test_sources.py +++ b/astrodb_utils/tests/test_sources.py @@ -89,15 +89,14 @@ def test_find_source_in_db(db): assert len(search_result) == 0 search_result = find_source_in_db(db,"LHS 2924") - # print(f"LHS 2924: {search_result}") assert search_result[0] == "LHS 2924" - search_result = find_source_in_db(db,"LHS 292") - assert search_result[0] == "LHS 2924" # This is wrong and a result of fuzzy matching - search_result = find_source_in_db(db,"LHS 292", fuzzy=False) assert len(search_result) == 0 + search_result = find_source_in_db(db,"LHS 292", fuzzy=True) + assert search_result[0] == "LHS 2924" # This is wrong and a result of fuzzy matching + def test_find_source_in_db_errors(db): with pytest.raises(KeyError) as error_message: From 586f80a73bab7f95dcfe9bedf8faa4ab17ff1dff Mon Sep 17 00:00:00 2001 From: kelle Date: Mon, 10 Mar 2025 16:52:26 -0400 Subject: [PATCH 06/10] specific reference name since Github Actions does not have an ADS_TOKEN --- astrodb_utils/tests/conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/astrodb_utils/tests/conftest.py b/astrodb_utils/tests/conftest.py index 979f3f9..0af753e 100644 --- a/astrodb_utils/tests/conftest.py +++ b/astrodb_utils/tests/conftest.py @@ -28,7 +28,7 @@ def db(): logger.info("Loaded AstroDB Template database using load_astrodb function in conftest.py") - ingest_publication(db, doi="10.1086/161442") # Prob83 + ingest_publication( db, @@ -38,6 +38,8 @@ def db(): ignore_ads=True, ) + ingest_publication(db, doi="10.1086/161442", reference="Prob83") + ingest_source(db, "LHS 2924", reference="Prob83") return db From 3aa5f7cfd9156f97263026f40d86af44728773c6 Mon Sep 17 00:00:00 2001 From: kelle Date: Mon, 10 Mar 2025 18:04:58 -0400 Subject: [PATCH 07/10] fix missing using. Closes #106 --- astrodb_utils/publications.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/astrodb_utils/publications.py b/astrodb_utils/publications.py index 6d1eff4..7554716 100644 --- a/astrodb_utils/publications.py +++ b/astrodb_utils/publications.py @@ -267,8 +267,15 @@ def ingest_publication( logger.error("Publication, DOI, or Bibcode is required input") return - if not ignore_ads: - use_ads = check_ads_token() + use_ads = check_ads_token() + + if not use_ads and not ignore_ads: + logger.warning( + "An ADS_TOKEN environment variable is not set.\n" + "setting ignore_ads=True.") + ignore_ads = True + + if not ignore_ads: if not use_ads and (not reference and (not doi or not bibcode)): logger.error( "An ADS_TOKEN environment variable must be set" @@ -279,7 +286,7 @@ def ingest_publication( else: use_ads = False - logger.debug(f"Use ADS set to {use_ads}") + logger.debug(f"ignore_ads set to {not use_ads}") if bibcode: if "arXiv" in bibcode: @@ -322,6 +329,8 @@ def ingest_publication( bibcode_add = arxiv_id doi_add = doi using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" + else: + using = "not sure yet, no arxiv id provided" # Search ADS using a provided DOI if doi and use_ads: @@ -353,6 +362,7 @@ def ingest_publication( name_add = reference bibcode_add = bibcode doi_add = doi + using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" if bibcode and use_ads: bibcode_matches = ads.SearchQuery( @@ -370,7 +380,7 @@ def ingest_publication( elif len(bibcode_matches_list) == 1: logger.debug(f"Publication found in ADS using bibcode: {bibcode}") - using = str(bibcode) + using = f"bibcode: {bibcode}" article = bibcode_matches_list[0] logger.debug( f"{article.first_author}, {article.year}, " @@ -395,7 +405,7 @@ def ingest_publication( if reference and not bibcode and not doi: name_add = reference - using = "user input" + using = "ref: {reference} user input. No bibcode or doi provided." new_ref = [ { From 0c0f331349ca289362e0f78fab0c21f0b69a9f4b Mon Sep 17 00:00:00 2001 From: kelle Date: Mon, 10 Mar 2025 18:11:56 -0400 Subject: [PATCH 08/10] polish using --- astrodb_utils/publications.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/astrodb_utils/publications.py b/astrodb_utils/publications.py index 7554716..8bb69e0 100644 --- a/astrodb_utils/publications.py +++ b/astrodb_utils/publications.py @@ -298,6 +298,7 @@ def ingest_publication( arxiv_id = None name_add, bibcode_add, doi_add = "", "", "" + using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" # Search ADS uing a provided arxiv id if arxiv_id and use_ads: arxiv_matches = ads.SearchQuery( @@ -329,8 +330,6 @@ def ingest_publication( bibcode_add = arxiv_id doi_add = doi using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" - else: - using = "not sure yet, no arxiv id provided" # Search ADS using a provided DOI if doi and use_ads: @@ -344,7 +343,6 @@ def ingest_publication( if len(doi_matches_list) == 1: logger.debug(f"Publication found in ADS using DOI: {doi}") - using = doi article = doi_matches_list[0] logger.debug( f"{article.first_author}, {article.year}," @@ -358,6 +356,7 @@ def ingest_publication( description = article.title[0] bibcode_add = article.bibcode doi_add = article.doi[0] + using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" elif doi: name_add = reference bibcode_add = bibcode @@ -380,7 +379,6 @@ def ingest_publication( elif len(bibcode_matches_list) == 1: logger.debug(f"Publication found in ADS using bibcode: {bibcode}") - using = f"bibcode: {bibcode}" article = bibcode_matches_list[0] logger.debug( f"{article.first_author}, {article.year}, " @@ -397,6 +395,7 @@ def ingest_publication( doi_add = None else: doi_add = article.doi[0] + using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" elif bibcode: name_add = reference bibcode_add = bibcode From 20cb90022a505ac26cf762b69d1d799f489b48b1 Mon Sep 17 00:00:00 2001 From: kelle Date: Mon, 10 Mar 2025 18:29:24 -0400 Subject: [PATCH 09/10] trying to decrease complexity of ingest_publication --- astrodb_utils/publications.py | 107 ++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/astrodb_utils/publications.py b/astrodb_utils/publications.py index 8bb69e0..36c9b86 100644 --- a/astrodb_utils/publications.py +++ b/astrodb_utils/publications.py @@ -267,26 +267,24 @@ def ingest_publication( logger.error("Publication, DOI, or Bibcode is required input") return - use_ads = check_ads_token() - - if not use_ads and not ignore_ads: - logger.warning( - "An ADS_TOKEN environment variable is not set.\n" - "setting ignore_ads=True.") - ignore_ads = True - - if not ignore_ads: - if not use_ads and (not reference and (not doi or not bibcode)): - logger.error( - "An ADS_TOKEN environment variable must be set" - "in order to auto-populate the fields.\n" - "Without an ADS_TOKEN, name and bibcode or DOI must be set explicity." - ) - return - else: - use_ads = False + if not ignore_ads: + ads_token = check_ads_token() - logger.debug(f"ignore_ads set to {not use_ads}") + if not ads_token: + logger.warning( + "An ADS_TOKEN environment variable is not set.\n" + "setting ignore_ads=True.") + ignore_ads = True + + if (not reference and (not doi or not bibcode)): + logger.error( + "An ADS_TOKEN environment variable must be set" + "in order to auto-populate the fields.\n" + "Without an ADS_TOKEN, name and bibcode or DOI must be set explicity." + ) + return + + logger.debug(f"ignore_ads set to {ignore_ads}") if bibcode: if "arXiv" in bibcode: @@ -299,40 +297,14 @@ def ingest_publication( name_add, bibcode_add, doi_add = "", "", "" using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" - # Search ADS uing a provided arxiv id - if arxiv_id and use_ads: - arxiv_matches = ads.SearchQuery( - q=arxiv_id, fl=["id", "bibcode", "title", "first_author", "year", "doi"] - ) - arxiv_matches_list = list(arxiv_matches) - if len(arxiv_matches_list) != 1: - logger.error("should only be one matching arxiv id") - return - if len(arxiv_matches_list) == 1: - logger.debug(f"Publication found in ADS using arxiv id: , {arxiv_id}") - article = arxiv_matches_list[0] - logger.debug( - f"{article.first_author}, {article.year}, {article.bibcode}, {article.title}" - ) - if not reference: # generate the name if it was not provided - name_stub = article.first_author.replace(",", "").replace(" ", "") - name_add = name_stub[0:4] + article.year[-2:] - else: - name_add = reference - description = article.title[0] - bibcode_add = article.bibcode - doi_add = article.doi[0] - - using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" - elif arxiv_id: - name_add = reference - bibcode_add = arxiv_id - doi_add = doi - using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" + # Search ADS uing a provided arxiv id + if arxiv_id: + name_add, bibcode_add, doi_add, description = find_pub_using_arxiv_id(arxiv_id, reference, doi, ignore_ads) + using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" # Search ADS using a provided DOI - if doi and use_ads: + if doi and not ignore_ads: doi_matches = ads.SearchQuery( doi=doi, fl=["id", "bibcode", "title", "first_author", "year", "doi"] ) @@ -363,7 +335,7 @@ def ingest_publication( doi_add = doi using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}" - if bibcode and use_ads: + if bibcode and not ignore_ads: bibcode_matches = ads.SearchQuery( bibcode=bibcode, fl=["id", "bibcode", "title", "first_author", "year", "doi"], @@ -447,3 +419,36 @@ def check_ads_token(): return use_ads + +def find_pub_using_arxiv_id(arxiv_id, reference, doi, ignore_ads): + if not ignore_ads: + arxiv_matches = ads.SearchQuery( + q=arxiv_id, fl=["id", "bibcode", "title", "first_author", "year", "doi"] + ) + arxiv_matches_list = list(arxiv_matches) + if len(arxiv_matches_list) != 1: + logger.error("should only be one matching arxiv id") + return + + if len(arxiv_matches_list) == 1: + logger.debug(f"Publication found in ADS using arxiv id: , {arxiv_id}") + article = arxiv_matches_list[0] + logger.debug( + f"{article.first_author}, {article.year}, {article.bibcode}, {article.title}" + ) + if not reference: # generate the name if it was not provided + name_stub = article.first_author.replace(",", "").replace(" ", "") + name_add = name_stub[0:4] + article.year[-2:] + else: + name_add = reference + description = article.title[0] + bibcode_add = article.bibcode + doi_add = article.doi[0] + + else: + name_add = reference + bibcode_add = arxiv_id + doi_add = doi + description = None + + return name_add, bibcode_add, doi_add, description From 7e97614ead1052ef876ab666a095e9cd26a17ded Mon Sep 17 00:00:00 2001 From: kelle Date: Tue, 11 Mar 2025 16:24:54 -0400 Subject: [PATCH 10/10] remove None default for reference since it's requierd. --- astrodb_utils/sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrodb_utils/sources.py b/astrodb_utils/sources.py index 4cf80d7..b90fe48 100644 --- a/astrodb_utils/sources.py +++ b/astrodb_utils/sources.py @@ -228,7 +228,7 @@ def ingest_names( def ingest_source( db, source, - reference: str = None, + reference: str, *, ra: float = None, dec: float = None,