Update scripts style autoPEP8

JabRef · Aug 19, 2023 · 2fc8000 · 2fc8000
1 parent 1eabdb7
commit 2fc8000
Show file tree

Hide file tree

Showing 8 changed files with 64 additions and 46 deletions.
diff --git a/scripts/check_ampersands.py b/scripts/check_ampersands.py
@@ -34,7 +34,8 @@
                 if ('\&' in line):
                     errFileNames.append(file)
                     errRows.append(i + 1)
-                    errCols.append([index + 1 for index in range(len(line)) if line.startswith('\&', index)])
+                    errCols.append(
+                        [index + 1 for index in range(len(line)) if line.startswith('\&', index)])
 
 
 # In the case where we do find escaped &, the len() will be non-zero
@@ -43,8 +44,9 @@
     # For each file, append every row:col location to the error message
     for i, fname in enumerate(errFileNames):
         for col in errCols[i]:
-            err_msg += "("+ fname + ", " + str(errRows[i]) + ":" + str(col) + "), "
+            err_msg += "(" + fname + ", " + \
+                str(errRows[i]) + ":" + str(col) + "), "
     # Format end of string and return as Value Error to 'fail' GitHub Actions process
     err_msg = err_msg[:len(err_msg) - 2]
     err_msg += "]"
-    raise ValueError("Found Escaped Ampersands at: " + err_msg)
+    raise ValueError("Found Escaped Ampersands at: " + err_msg)
diff --git a/scripts/combine_journal_lists.py b/scripts/combine_journal_lists.py
@@ -11,11 +11,12 @@
 import sys
 import pandas as pd
 
+
 def main(output_filename):
     # Read and merge CSV files
     # dfs = [pd.read_csv(file, header=None) for file in import_order]
     dfs = []
-    for file in  sys.argv[2:]:
+    for file in sys.argv[2:]:
         df = pd.read_csv(file, header=None)
         dfs.append(df)
         print(f"{file}: {len(df)}")
@@ -38,5 +39,5 @@ def main(output_filename):
         filename = sys.argv[1]
     else:
         filename = "journalList.csv"
-    
+
     main(filename)
diff --git a/scripts/combine_journal_lists_dotless.py b/scripts/combine_journal_lists_dotless.py
@@ -18,11 +18,12 @@
 
 # Define the list of CSV files
 import_order = [
-  'journals/journal_abbreviations_entrez.csv',
-  'journals/journal_abbreviations_medicus.csv',
-  'journals/journal_abbreviations_webofscience-dotless.csv'
+    'journals/journal_abbreviations_entrez.csv',
+    'journals/journal_abbreviations_medicus.csv',
+    'journals/journal_abbreviations_webofscience-dotless.csv'
 ]
 
+
 def main(output_filename):
     # Read and merge CSV files
     # dfs = [pd.read_csv(file, header=None) for file in import_order]
@@ -50,5 +51,5 @@ def main(output_filename):
         filename = sys.argv[1]
     else:
         filename = "journalList_dotless.csv"
-    
+
     main(filename)
diff --git a/scripts/combine_journal_lists_dots.py b/scripts/combine_journal_lists_dots.py
@@ -31,6 +31,7 @@
     'journals/journal_abbreviations_webofscience-dots.csv'
 ]
 
+
 def main(output_filename):
     # Read and merge CSV files
     # dfs = [pd.read_csv(file, header=None) for file in import_order]
@@ -58,5 +59,5 @@ def main(output_filename):
         filename = sys.argv[1]
     else:
         filename = "journalList_dots.csv"
-    
+
     main(filename)
diff --git a/scripts/convert_to_comma.py b/scripts/convert_to_comma.py
@@ -10,6 +10,7 @@
 
 import csv
 
+
 def convert_semicolon_to_comma(input_file, output_file):
     with open(input_file, 'r', newline='', encoding='utf-8') as infile:
         csv_reader = csv.reader(infile, delimiter=';')

diff --git a/scripts/convert_txt2csv.py b/scripts/convert_txt2csv.py
@@ -23,7 +23,8 @@
                 separator = " = " if " = " in line else "="
                 break
             commented_lines += 1
-    df = pd.read_csv(fileName + ".txt", sep=separator, skiprows=commented_lines, header=None, engine="python", skipinitialspace=True, index_col=0, names=["Name", "Abbrev"])
+    df = pd.read_csv(fileName + ".txt", sep=separator, skiprows=commented_lines, header=None,
+                     engine="python", skipinitialspace=True, index_col=0, names=["Name", "Abbrev"])
     df.index = df.index.str.strip()
     df = df.Abbrev.str.split(",", expand=True)
     df.to_csv(fileName + ".csv", sep=",", header=False)

diff --git a/scripts/delete_general_duplicates_lists.py b/scripts/delete_general_duplicates_lists.py
@@ -1,55 +1,63 @@
 
 import pandas as pd
 import_order = [
-  '../journals/journal_abbreviations_acs.csv',
-  '../journals/journal_abbreviations_aea.csv',
-  '../journals/journal_abbreviations_ams.csv',
-  '../journals/journal_abbreviations_annee-philologique.csv',
-  '../journals/journal_abbreviations_astronomy.csv',
-  '../journals/journal_abbreviations_dainst.csv',
-  '../journals/journal_abbreviations_entrez.csv',
-  '../journals/journal_abbreviations_geology_physics.csv',
-  '../journals/journal_abbreviations_geology_physics_variations.csv',
-  '../journals/journal_abbreviations_ieee.csv',
-  '../journals/journal_abbreviations_ieee_strings.csv',
-  '../journals/journal_abbreviations_lifescience.csv',
-  '../journals/journal_abbreviations_mathematics.csv',
-  '../journals/journal_abbreviations_mechanical.csv',
-  '../journals/journal_abbreviations_medicus.csv',
-  '../journals/journal_abbreviations_meteorology.csv',
-  '../journals/journal_abbreviations_sociology.csv',
-  '../journals/journal_abbreviations_webofscience-dotless.csv',
-  '../journals/journal_abbreviations_webofscience-dots.csv'
+    '../journals/journal_abbreviations_acs.csv',
+    '../journals/journal_abbreviations_aea.csv',
+    '../journals/journal_abbreviations_ams.csv',
+    '../journals/journal_abbreviations_annee-philologique.csv',
+    '../journals/journal_abbreviations_astronomy.csv',
+    '../journals/journal_abbreviations_dainst.csv',
+    '../journals/journal_abbreviations_entrez.csv',
+    '../journals/journal_abbreviations_geology_physics.csv',
+    '../journals/journal_abbreviations_geology_physics_variations.csv',
+    '../journals/journal_abbreviations_ieee.csv',
+    '../journals/journal_abbreviations_ieee_strings.csv',
+    '../journals/journal_abbreviations_lifescience.csv',
+    '../journals/journal_abbreviations_mathematics.csv',
+    '../journals/journal_abbreviations_mechanical.csv',
+    '../journals/journal_abbreviations_medicus.csv',
+    '../journals/journal_abbreviations_meteorology.csv',
+    '../journals/journal_abbreviations_sociology.csv',
+    '../journals/journal_abbreviations_webofscience-dotless.csv',
+    '../journals/journal_abbreviations_webofscience-dots.csv'
 ]
+
+
 def handle_bad_line(line):
     print("Handle the problematic line manually:", line)
 
+
 # read the csv files into dataframes
 file_in = "../journals/journal_abbreviations_general.csv"
-general = pd.read_csv(file_in, delimiter=',',header=None, names=["Title", "abbreviation","ShortestAbbreviation","frequency"],dtype={"Title": str,"abbreviation":str,"ShortestAbbreviation":str})
-#Creating a new column Title lc which is Title in lower case for case insensitive comparison
-general['Title_lc']=general['Title'].str.lower()
+general = pd.read_csv(file_in, delimiter=',', header=None, names=["Title", "abbreviation", "ShortestAbbreviation", "frequency"], dtype={
+                      "Title": str, "abbreviation": str, "ShortestAbbreviation": str})
+# Creating a new column Title lc which is Title in lower case for case insensitive comparison
+general['Title_lc'] = general['Title'].str.lower()
 
-dflist=[]
+dflist = []
 for filename in import_order:
-    df = pd.read_csv(filename,delimiter=',',on_bad_lines=handle_bad_line, engine='python' ,names=["Title", "abbreviation","ShortestAbbreviation","frequency"],dtype={"Title": str,"abbreviation":str,"ShortestAbbreviation":str})
+    df = pd.read_csv(filename, delimiter=',', on_bad_lines=handle_bad_line, engine='python', names=[
+                     "Title", "abbreviation", "ShortestAbbreviation", "frequency"], dtype={"Title": str, "abbreviation": str, "ShortestAbbreviation": str})
     dflist.append(df)
 
-non_general_csv_df=pd.concat(dflist,ignore_index=True)
+non_general_csv_df = pd.concat(dflist, ignore_index=True)
 
 # Remove duplicates from non_general_csv_df to avoid removing valid entries
-non_general_csv_df.drop_duplicates(subset=['Title'], inplace=True,keep='first')
-#Creating a new column Title lc which is Title in lower case for case insensitive comparison
-non_general_csv_df['Title_lc']=non_general_csv_df['Title'].str.lower()
+non_general_csv_df.drop_duplicates(
+    subset=['Title'], inplace=True, keep='first')
+# Creating a new column Title lc which is Title in lower case for case insensitive comparison
+non_general_csv_df['Title_lc'] = non_general_csv_df['Title'].str.lower()
 
 # Merge the two dataframes on only the Title in lower case column
-merged_df = pd.merge(general, non_general_csv_df, on='Title_lc', how='left', indicator=True)
+merged_df = pd.merge(general, non_general_csv_df,
+                     on='Title_lc', how='left', indicator=True)
 
 # Keep only the rows that are present in general but not in non_general_csv_df
 result_df = merged_df.loc[merged_df['_merge'] == 'left_only', ['Title_lc']]
 
-result_df = pd.merge(general[['Title', 'abbreviation', 'ShortestAbbreviation','Title_lc']], result_df, on='Title_lc', how='inner')
-#Dropping the newly added column only used for comparison
+result_df = pd.merge(general[['Title', 'abbreviation', 'ShortestAbbreviation',
+                     'Title_lc']], result_df, on='Title_lc', how='inner')
+# Dropping the newly added column only used for comparison
 result_df.drop('Title_lc', axis=1, inplace=True)
 # Save the result dataframe to a csv file
-result_df.to_csv(file_in,  header=None, index=None,sep=',')
+result_df.to_csv(file_in,  header=None, index=None, sep=',')
diff --git a/scripts/update_mathscinet.py b/scripts/update_mathscinet.py
@@ -6,13 +6,16 @@
 file_out = "journals/journal_abbreviations_mathematics.csv"
 
 # Get the first two fields of the last version of MathSciNet data file, without empty values
-df_new = pd.read_csv(file_in, usecols=[0, 1]).dropna()[["Full Title", "Abbrev"]]
+df_new = pd.read_csv(file_in, usecols=[0, 1]).dropna()[
+    ["Full Title", "Abbrev"]]
 
 # Get our last mathematics data file
-df_old = pd.read_csv(file_out, sep=",", escapechar="\\", header=None, names=["Full Title", "Abbrev"])
+df_old = pd.read_csv(file_out, sep=",", escapechar="\\",
+                     header=None, names=["Full Title", "Abbrev"])
 
 # Concatenate, remove duplicates and sort by journal name
-df = pd.concat([df_new, df_old], axis=0).drop_duplicates().sort_values(by=["Full Title", "Abbrev"])
+df = pd.concat([df_new, df_old], axis=0).drop_duplicates(
+).sort_values(by=["Full Title", "Abbrev"])
 
 # Remove values where journal name is equal to abbreviation
 df = df[df["Full Title"].str.lower() != df["Abbrev"].str.lower()]