Skip to content
106 changes: 60 additions & 46 deletions astrodb_utils/publications.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,18 +268,23 @@ def ingest_publication(
return

if not ignore_ads:
use_ads = check_ads_token()
if not use_ads and (not reference and (not doi or not bibcode)):
logger.error(
"An ADS_TOKEN environment variable must be set"
"in order to auto-populate the fields.\n"
"Without an ADS_TOKEN, name and bibcode or DOI must be set explicity."
)
return
else:
use_ads = False
ads_token = check_ads_token()

logger.debug(f"Use ADS set to {use_ads}")
if not ads_token:
logger.warning(
"An ADS_TOKEN environment variable is not set.\n"
"setting ignore_ads=True.")
ignore_ads = True

if (not reference and (not doi or not bibcode)):
logger.error(
"An ADS_TOKEN environment variable must be set"
"in order to auto-populate the fields.\n"
"Without an ADS_TOKEN, name and bibcode or DOI must be set explicity."
)
return

logger.debug(f"ignore_ads set to {ignore_ads}")

if bibcode:
if "arXiv" in bibcode:
Expand All @@ -291,40 +296,15 @@ def ingest_publication(
arxiv_id = None

name_add, bibcode_add, doi_add = "", "", ""
# Search ADS uing a provided arxiv id
if arxiv_id and use_ads:
arxiv_matches = ads.SearchQuery(
q=arxiv_id, fl=["id", "bibcode", "title", "first_author", "year", "doi"]
)
arxiv_matches_list = list(arxiv_matches)
if len(arxiv_matches_list) != 1:
logger.error("should only be one matching arxiv id")
return

if len(arxiv_matches_list) == 1:
logger.debug(f"Publication found in ADS using arxiv id: , {arxiv_id}")
article = arxiv_matches_list[0]
logger.debug(
f"{article.first_author}, {article.year}, {article.bibcode}, {article.title}"
)
if not reference: # generate the name if it was not provided
name_stub = article.first_author.replace(",", "").replace(" ", "")
name_add = name_stub[0:4] + article.year[-2:]
else:
name_add = reference
description = article.title[0]
bibcode_add = article.bibcode
doi_add = article.doi[0]
using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}"

using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}"
elif arxiv_id:
name_add = reference
bibcode_add = arxiv_id
doi_add = doi
using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}"
# Search ADS uing a provided arxiv id
if arxiv_id:
name_add, bibcode_add, doi_add, description = find_pub_using_arxiv_id(arxiv_id, reference, doi, ignore_ads)
using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}"

# Search ADS using a provided DOI
if doi and use_ads:
if doi and not ignore_ads:
doi_matches = ads.SearchQuery(
doi=doi, fl=["id", "bibcode", "title", "first_author", "year", "doi"]
)
Expand All @@ -335,7 +315,6 @@ def ingest_publication(

if len(doi_matches_list) == 1:
logger.debug(f"Publication found in ADS using DOI: {doi}")
using = doi
article = doi_matches_list[0]
logger.debug(
f"{article.first_author}, {article.year},"
Expand All @@ -349,12 +328,14 @@ def ingest_publication(
description = article.title[0]
bibcode_add = article.bibcode
doi_add = article.doi[0]
using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}"
elif doi:
name_add = reference
bibcode_add = bibcode
doi_add = doi
using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}"

if bibcode and use_ads:
if bibcode and not ignore_ads:
bibcode_matches = ads.SearchQuery(
bibcode=bibcode,
fl=["id", "bibcode", "title", "first_author", "year", "doi"],
Expand All @@ -370,7 +351,6 @@ def ingest_publication(

elif len(bibcode_matches_list) == 1:
logger.debug(f"Publication found in ADS using bibcode: {bibcode}")
using = str(bibcode)
article = bibcode_matches_list[0]
logger.debug(
f"{article.first_author}, {article.year}, "
Expand All @@ -387,6 +367,7 @@ def ingest_publication(
doi_add = None
else:
doi_add = article.doi[0]
using = f"ref: {name_add}, bibcode: {bibcode_add}, doi: {doi_add}"
elif bibcode:
name_add = reference
bibcode_add = bibcode
Expand All @@ -395,7 +376,7 @@ def ingest_publication(

if reference and not bibcode and not doi:
name_add = reference
using = "user input"
using = "ref: {reference} user input. No bibcode or doi provided."

new_ref = [
{
Expand Down Expand Up @@ -438,3 +419,36 @@ def check_ads_token():

return use_ads


def find_pub_using_arxiv_id(arxiv_id, reference, doi, ignore_ads):
if not ignore_ads:
arxiv_matches = ads.SearchQuery(
q=arxiv_id, fl=["id", "bibcode", "title", "first_author", "year", "doi"]
)
arxiv_matches_list = list(arxiv_matches)
if len(arxiv_matches_list) != 1:
logger.error("should only be one matching arxiv id")
return

if len(arxiv_matches_list) == 1:
logger.debug(f"Publication found in ADS using arxiv id: , {arxiv_id}")
article = arxiv_matches_list[0]
logger.debug(
f"{article.first_author}, {article.year}, {article.bibcode}, {article.title}"
)
if not reference: # generate the name if it was not provided
name_stub = article.first_author.replace(",", "").replace(" ", "")
name_add = name_stub[0:4] + article.year[-2:]
else:
name_add = reference
description = article.title[0]
bibcode_add = article.bibcode
doi_add = article.doi[0]

else:
name_add = reference
bibcode_add = arxiv_id
doi_add = doi
description = None

return name_add, bibcode_add, doi_add, description
34 changes: 19 additions & 15 deletions astrodb_utils/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ def find_source_in_db(
ra_col_name="ra_deg",
dec_col_name="dec_deg",
use_simbad=True,
fuzzy=False
):
"""
Find a source in the database given a source name and optional coordinates.
Uses astrodbkit .search_object and .query_region methods to search the Sources and Names table.

Parameters
----------
Expand All @@ -50,6 +52,8 @@ def find_source_in_db(
use_simbad: bool
Use Simbad to resolve the source name if it is not found in the database. Default is True.
Set to False if no internet connection.
fuzzy: bool
Use fuzzy search to find source name in database. Default is False.

Returns
-------
Expand All @@ -74,7 +78,7 @@ def find_source_in_db(

# NO MATCHES
# If no matches, try fuzzy search
if len(db_name_matches) == 0:
if len(db_name_matches) == 0 and fuzzy:
logger.debug(f"{source}: No name matches, trying fuzzy search")
db_name_matches = db.search_object(
source,
Expand Down Expand Up @@ -224,8 +228,8 @@ def ingest_names(
def ingest_source(
db,
source,
reference: str,
*,
reference: str = None,
ra: float = None,
dec: float = None,
epoch: str = None,
Expand Down Expand Up @@ -282,6 +286,19 @@ def ingest_source(

logger.debug(f"Trying to ingest source: {source}")

# Make sure reference is provided and in References table
ref_check = find_publication(db, reference=reference)
logger.debug(f"ref_check: {ref_check}")
if ref_check[0] is False:
msg = (
f"Skipping: {source}."
f"Discovery reference {reference} is either missing or "
" is not in Publications table. \n"
f"(Add it with ingest_publication function.)"
)
exit_function(msg, raise_error)
return

# Find out if source is already in database or not
if search_db:
logger.debug(f"Checking database for: {source} at ra: {ra}, dec: {dec}")
Expand Down Expand Up @@ -316,19 +333,6 @@ def ingest_source(
exit_function(msg1 + msg2, raise_error)
return

# Make sure reference is provided and in References table
ref_check = find_publication(db, reference=reference)
logger.debug(f"ref_check: {ref_check}")
if ref_check[0] is False:
msg = (
f"Skipping: {source}."
f"Discovery reference {reference} is either missing or "
" is not in Publications table. \n"
f"(Add it with ingest_publication function.)"
)
exit_function(msg, raise_error)
return

# Try to get coordinates from SIMBAD if they were not provided
if (ra is None or dec is None) and use_simbad:
# Try to get coordinates from SIMBAD
Expand Down
22 changes: 20 additions & 2 deletions astrodb_utils/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import pytest

from astrodb_utils import load_astrodb, logger
from astrodb_utils.publications import ingest_publication
from astrodb_utils.sources import ingest_source

logger.setLevel("DEBUG")

Expand All @@ -24,6 +26,22 @@ def db():
# Confirm file was created
assert os.path.exists(DB_NAME)

logger.info("Loaded SIMPLE database using load_astrodb function in conftest.py")
logger.info("Loaded AstroDB Template database using load_astrodb function in conftest.py")



ingest_publication(
db,
reference="Refr20",
bibcode="2020MNRAS.496.1922B",
doi="10.1093/mnras/staa1522",
ignore_ads=True,
)

ingest_publication(db, doi="10.1086/161442", reference="Prob83")

ingest_source(db, "LHS 2924", reference="Prob83")

return db


return db
17 changes: 0 additions & 17 deletions astrodb_utils/tests/test_publications.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,6 @@
from astrodb_utils.publications import find_publication, ingest_publication


def test_ingest_publications(db):
# add a made up publication and make sure it's there
ingest_publication(
db,
reference="Refr20",
bibcode="2020MNRAS.496.1922B",
doi="10.1093/mnras/staa1522",
ignore_ads=True,
)
assert (
db.query(db.Publications)
.filter(db.Publications.c.reference == "Refr20")
.count()
== 1
)


def test_find_publication(db):
assert not find_publication(db)[0] # False
assert find_publication(db, reference="Refr20")[0] # True
Expand Down
11 changes: 9 additions & 2 deletions astrodb_utils/tests/test_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
ingest_source,
)

# TODO: Ingest publication just for these tests so they can be run independent of test_publications.py


@pytest.mark.parametrize(
"source_data",
Expand Down Expand Up @@ -90,6 +88,15 @@ def test_find_source_in_db(db):
)
assert len(search_result) == 0

search_result = find_source_in_db(db,"LHS 2924")
assert search_result[0] == "LHS 2924"

search_result = find_source_in_db(db,"LHS 292", fuzzy=False)
assert len(search_result) == 0

search_result = find_source_in_db(db,"LHS 292", fuzzy=True)
assert search_result[0] == "LHS 2924" # This is wrong and a result of fuzzy matching


def test_find_source_in_db_errors(db):
with pytest.raises(KeyError) as error_message:
Expand Down