Issue #553: compatibility with bibreader

DIAGNijmegen · Oct 7, 2024 · 08b1794 · 08b1794
1 parent ffafae0
commit 08b1794
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 59 deletions.
diff --git a/scripts/automatic_update/get_biblatex.py b/scripts/automatic_update/get_biblatex.py
@@ -84,39 +84,37 @@ def _get_doi_abstract(self):
         return abstract_text
 
     @staticmethod
-    def _clean_author_abbreviation(auth_abr, year, bib_file):
-        auth_abr = auth_abr+year
-        substring_end = ","
-        max_length = 3
-
-        count = 0
-        start_index = 0
-        while True:
-            start = bib_file.find(auth_abr, start_index)
-            if start == -1:
-                break
-            end = bib_file.find(substring_end, start + len(auth_abr))
-            if end == -1:
-                break
-            substring = bib_file[start+len(auth_abr):end]
-            if len(substring) <= max_length:
-                count += 1
-            start_index = end + len(substring_end)
-
-        letters = {1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e',
-                   6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j',
-                   11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o',
-                   16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't',
-                   21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}
-
-        if count != 0:
-            for i in range(1, count+2):
-                abbreviation = auth_abr+letters[i]
-                if bib_file.count(abbreviation) == 0:
-                    auth_abr = auth_abr + letters[i]
-                    break
-
-        return auth_abr
+    def _clean_author_abbreviation(auth_abr, year, entries):
+        """
+        Creates a unique author-year key for a BibEntry based on existing entries.
+
+        Args:
+            auth_abr (str): The initial author abbreviation (e.g., 'Peet').
+            year (str): Year of publication (e.g., '24' for 2024).
+            entries (list): List of existing BibEntry objects with `entry.key` attributes.
+
+        Returns:
+            str: A unique author-year key (e.g., 'Peet24', 'Peet24a', 'Peet24b').
+        """
+        # Combine author abbreviation with the year to form the initial key (e.g., "Peet24").
+        base_key = auth_abr + year
+
+        # Collect existing keys to check for duplicates
+        existing_keys = {entry.key for entry in entries}
+
+        # If the base key is not in existing keys, return it as the new unique key
+        if base_key not in existing_keys:
+            return base_key
+
+        # If the base key exists, try adding alphabetical suffixes (a, b, c, etc.) until a unique key is found
+        letters = 'abcdefghijklmnopqrstuvwxyz'
+        for letter in letters:
+            new_key = base_key + letter  # Create new keys like "Peet24a", "Peet24b", etc.
+            if new_key not in existing_keys:
+                return new_key
+
+        # If all single-letter suffixes are used, raise an error or use a different strategy
+        raise ValueError(f"Could not generate a unique key for {base_key}. Consider expanding suffix options.")
 
     def get_bib_text(self):
 
@@ -146,10 +144,6 @@ def get_bib_text(self):
             kind = 'article'
             journal = 'Preprint'
 
-
-
-
-
         author_string = "{"
         for index, author in enumerate(response_json["author"]):
             if index == len(response_json["author"])-1:

diff --git a/scripts/automatic_update/update_bibfile.py b/scripts/automatic_update/update_bibfile.py
@@ -94,28 +94,36 @@ def get_citations(semantic_scholar_ids, sch):
 
 
 def get_bib_info(diag_bib_file, item): #diag_bib_file is the file read in as a string, item is row from csv
-    #Get DOI information
+    """
+    Checks if a DOI exists in the list of BibEntry objects. If not, generates and returns a new BibTeX entry.
 
-    # if no ss_doi exists
-    if len(str(item['ss_doi']))==0 or str(item['ss_doi'])=='nan':
-        print('no ss_doi available, I cannot add new bib entry', item['ss_id'])
-        return None
-
-    # make sure doi is not already in diag.bib
-    if item['ss_doi'] in diag_bib_file:
+    Args:
+        diag_bib_entries (list): List of existing DiagBib objects.
+        item (dict): Row from the CSV file containing 'ss_doi' and 'ss_id'.
 
-        start_index = diag_bib_file.find(item['ss_doi'])
-        end_index = diag_bib_file.find('}', start_index)  # Include the closing brace
-        matching_item_str = diag_bib_file[start_index:end_index]
+    Returns:
+        dict or None: Returns a new BibTeX entry as a dictionary if it doesn't already exist; otherwise, returns None.
+    """
 
-        print('DOI already exists in bib file. Matching item:', matching_item_str)
+    # Ensure 'ss_doi' is present in the item and is not empty or NaN
+    ss_doi = str(item.get('ss_doi', ''))
+    if len(ss_doi) == 0 or ss_doi.lower() == 'nan':
+        print(f'No ss_doi available; cannot add new bib entry for {item.get("ss_id")}')
+        return None
 
-        if matching_item_str == item['ss_doi']:
-            print('doi already exists in bib file, I will not add new bib entry', item['ss_doi'], item['ss_id'])
-            return None
-
-        else:
-            print('similar doi already exists in bib file, but new item will be added for ', item['ss_doi'], item['ss_id'])
+    # Check if the DOI already exists in the BibEntry list
+    existing_entry = None
+    for entry in diag_bib_file:
+        # Check if the 'doi' field exists in the current BibEntry and matches 'ss_doi'
+        if 'doi' in entry.fields and entry.fields['doi'] == ss_doi:
+            existing_entry = entry
+            break
+
+    # If the DOI is found in the existing entries, print information and return None
+    if existing_entry:
+        print(f'DOI already exists in bib file. Matching item key: {existing_entry.key}')
+        print(f'doi already exists in bib file, I will not add new bib entry for {ss_doi} and {item.get("ss_id")}')
+        return None
 
     # Get BibLatex information based on DOI if not in the file
     reader = GetBiblatex(doi=item['ss_doi'], ss_id=item['ss_id'], diag_bib=diag_bib_file)
@@ -323,21 +331,21 @@ def main():
 
 
     # load bib file just for reading at this point
-    diag_bib_path = os.path.join('diag.bib')
+    diag_bib_path = os.path.join(r"C:\Users\drepeeters\OneDrive - Radboudumc\Desktop\webteam\Literature\diag.bib")
 
     diag_bib_raw = read_bibfile(None, diag_bib_path)
     remove_items = manually_checked[manually_checked['action']=='[update item]']['bibkey'].tolist()
     diag_bib_raw = [entry for entry in diag_bib_raw if entry.key not in remove_items]
     save_to_file(diag_bib_raw, None, 'diag.bib')
 
 
+    blacklist_items, items_to_add, items_to_update, failed_new_items, failed_updated_items, failed_to_find_actions, dict_new_items_bibkey_pmid = loop_manual_check(manually_checked, diag_bib_raw)
+
     with open(diag_bib_path, 'r', encoding="utf8") as orig_bib_file:
         diag_bib_orig = orig_bib_file.read()
 
-    blacklist_items, items_to_add, items_to_update, failed_new_items, failed_updated_items, failed_to_find_actions, dict_new_items_bibkey_pmid = loop_manual_check(manually_checked, diag_bib_orig)
-
     #Add new bib entries to the diag.bib file
-    diag_bib_added_items = diag_bib_orig + items_to_add  
+    diag_bib_added_items = diag_bib_orig + items_to_add
     with open('diag.bib', 'w', encoding="utf8") as bibtex_file:
         bibtex_file.write(diag_bib_added_items)