Skip to content

Commit

Permalink
Issue #553: compatibility with bibreader
Browse files Browse the repository at this point in the history
  • Loading branch information
drepeeters committed Oct 7, 2024
1 parent ffafae0 commit 08b1794
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 59 deletions.
68 changes: 31 additions & 37 deletions scripts/automatic_update/get_biblatex.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,39 +84,37 @@ def _get_doi_abstract(self):
return abstract_text

@staticmethod
def _clean_author_abbreviation(auth_abr, year, bib_file):
auth_abr = auth_abr+year
substring_end = ","
max_length = 3

count = 0
start_index = 0
while True:
start = bib_file.find(auth_abr, start_index)
if start == -1:
break
end = bib_file.find(substring_end, start + len(auth_abr))
if end == -1:
break
substring = bib_file[start+len(auth_abr):end]
if len(substring) <= max_length:
count += 1
start_index = end + len(substring_end)

letters = {1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e',
6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j',
11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o',
16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't',
21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}

if count != 0:
for i in range(1, count+2):
abbreviation = auth_abr+letters[i]
if bib_file.count(abbreviation) == 0:
auth_abr = auth_abr + letters[i]
break

return auth_abr
def _clean_author_abbreviation(auth_abr, year, entries):
"""
Creates a unique author-year key for a BibEntry based on existing entries.
Args:
auth_abr (str): The initial author abbreviation (e.g., 'Peet').
year (str): Year of publication (e.g., '24' for 2024).
entries (list): List of existing BibEntry objects with `entry.key` attributes.
Returns:
str: A unique author-year key (e.g., 'Peet24', 'Peet24a', 'Peet24b').
"""
# Combine author abbreviation with the year to form the initial key (e.g., "Peet24").
base_key = auth_abr + year

# Collect existing keys to check for duplicates
existing_keys = {entry.key for entry in entries}

# If the base key is not in existing keys, return it as the new unique key
if base_key not in existing_keys:
return base_key

# If the base key exists, try adding alphabetical suffixes (a, b, c, etc.) until a unique key is found
letters = 'abcdefghijklmnopqrstuvwxyz'
for letter in letters:
new_key = base_key + letter # Create new keys like "Peet24a", "Peet24b", etc.
if new_key not in existing_keys:
return new_key

# If all single-letter suffixes are used, raise an error or use a different strategy
raise ValueError(f"Could not generate a unique key for {base_key}. Consider expanding suffix options.")

def get_bib_text(self):

Expand Down Expand Up @@ -146,10 +144,6 @@ def get_bib_text(self):
kind = 'article'
journal = 'Preprint'





author_string = "{"
for index, author in enumerate(response_json["author"]):
if index == len(response_json["author"])-1:
Expand Down
52 changes: 30 additions & 22 deletions scripts/automatic_update/update_bibfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,28 +94,36 @@ def get_citations(semantic_scholar_ids, sch):


def get_bib_info(diag_bib_file, item): #diag_bib_file is the file read in as a string, item is row from csv
#Get DOI information
"""
Checks if a DOI exists in the list of BibEntry objects. If not, generates and returns a new BibTeX entry.
# if no ss_doi exists
if len(str(item['ss_doi']))==0 or str(item['ss_doi'])=='nan':
print('no ss_doi available, I cannot add new bib entry', item['ss_id'])
return None

# make sure doi is not already in diag.bib
if item['ss_doi'] in diag_bib_file:
Args:
diag_bib_entries (list): List of existing DiagBib objects.
item (dict): Row from the CSV file containing 'ss_doi' and 'ss_id'.
start_index = diag_bib_file.find(item['ss_doi'])
end_index = diag_bib_file.find('}', start_index) # Include the closing brace
matching_item_str = diag_bib_file[start_index:end_index]
Returns:
dict or None: Returns a new BibTeX entry as a dictionary if it doesn't already exist; otherwise, returns None.
"""

print('DOI already exists in bib file. Matching item:', matching_item_str)
# Ensure 'ss_doi' is present in the item and is not empty or NaN
ss_doi = str(item.get('ss_doi', ''))
if len(ss_doi) == 0 or ss_doi.lower() == 'nan':
print(f'No ss_doi available; cannot add new bib entry for {item.get("ss_id")}')
return None

if matching_item_str == item['ss_doi']:
print('doi already exists in bib file, I will not add new bib entry', item['ss_doi'], item['ss_id'])
return None

else:
print('similar doi already exists in bib file, but new item will be added for ', item['ss_doi'], item['ss_id'])
# Check if the DOI already exists in the BibEntry list
existing_entry = None
for entry in diag_bib_file:
# Check if the 'doi' field exists in the current BibEntry and matches 'ss_doi'
if 'doi' in entry.fields and entry.fields['doi'] == ss_doi:
existing_entry = entry
break

# If the DOI is found in the existing entries, print information and return None
if existing_entry:
print(f'DOI already exists in bib file. Matching item key: {existing_entry.key}')
print(f'doi already exists in bib file, I will not add new bib entry for {ss_doi} and {item.get("ss_id")}')
return None

# Get BibLatex information based on DOI if not in the file
reader = GetBiblatex(doi=item['ss_doi'], ss_id=item['ss_id'], diag_bib=diag_bib_file)
Expand Down Expand Up @@ -323,21 +331,21 @@ def main():


# load bib file just for reading at this point
diag_bib_path = os.path.join('diag.bib')
diag_bib_path = os.path.join(r"C:\Users\drepeeters\OneDrive - Radboudumc\Desktop\webteam\Literature\diag.bib")

diag_bib_raw = read_bibfile(None, diag_bib_path)
remove_items = manually_checked[manually_checked['action']=='[update item]']['bibkey'].tolist()
diag_bib_raw = [entry for entry in diag_bib_raw if entry.key not in remove_items]
save_to_file(diag_bib_raw, None, 'diag.bib')


blacklist_items, items_to_add, items_to_update, failed_new_items, failed_updated_items, failed_to_find_actions, dict_new_items_bibkey_pmid = loop_manual_check(manually_checked, diag_bib_raw)

with open(diag_bib_path, 'r', encoding="utf8") as orig_bib_file:
diag_bib_orig = orig_bib_file.read()

blacklist_items, items_to_add, items_to_update, failed_new_items, failed_updated_items, failed_to_find_actions, dict_new_items_bibkey_pmid = loop_manual_check(manually_checked, diag_bib_orig)

#Add new bib entries to the diag.bib file
diag_bib_added_items = diag_bib_orig + items_to_add
diag_bib_added_items = diag_bib_orig + items_to_add
with open('diag.bib', 'w', encoding="utf8") as bibtex_file:
bibtex_file.write(diag_bib_added_items)

Expand Down

0 comments on commit 08b1794

Please sign in to comment.