bobleesj · bobleesj · Mar 11, 2024 · Mar 11, 2024 · Mar 11, 2024
diff --git a/...liynyk_test_atom_mixing_formatted/csv/20240229_oliynyk_test_atom_mixing_formatted_log.csv b/...liynyk_test_atom_mixing_formatted/csv/20240229_oliynyk_test_atom_mixing_formatted_log.csv
@@ -1,2 +1,2 @@
 File,Number of atoms in supercell,Processing time (s)
-539016,334,3.585
+539016,334,3.945
diff --git a/filter/occupancy.py b/filter/occupancy.py
@@ -17,9 +17,7 @@ def get_coord_occupancy_sum(cif_loop_values):
     coord_occupancy_sum = {}
 
     for i in range(num_atom_labels):
-        _, occupancy, coordinates = cif_parser.get_atom_info(
-            cif_loop_values, i
-        )
+        _, occupancy, coordinates = cif_parser.get_atom_info(cif_loop_values, i)
         occupancy_num = coord_occupancy_sum.get(coordinates, 0) + occupancy
         coord_occupancy_sum[coordinates] = occupancy_num
 
@@ -69,25 +67,24 @@ def get_all_possible_ordered_label_pairs(cif_loop_values):
     # Get a list of unique pairs from atomic labels
     label_list = cif_parser.get_atom_label_list(cif_loop_values)
     all_possible_label_pairs = list(product(label_list, repeat=2))
-    
+
     # Step 1: Sort each pair to standardize order
     sorted_pairs = pair_order.sort_tuple_in_list(all_possible_label_pairs)
 
     # Step 2: Get only the unique pairs
     unique_sorted_pairs = list(set(sorted_pairs))
 
     # Step 3. Order pairs based on Mendeleev ordering
-    unique_sorted_pairs_ordered = (
-        [tuple(pair_order.order_pair_by_mendeleev(pair))
-         for pair in unique_sorted_pairs]
-    )
+    unique_sorted_pairs_ordered = [
+        tuple(pair_order.order_pair_by_mendeleev(pair))
+        for pair in unique_sorted_pairs
+    ]
 
     return unique_sorted_pairs_ordered
 
 
 # Get atom site mixing label for all pairs possible
-def get_atom_site_mixing_dict(
-        atom_site_mixing_file_info, cif_loop_values):
+def get_atom_site_mixing_dict(atom_site_mixing_file_info, cif_loop_values):
     """
     Gets atomic site mixing dictionary for all possible label pairs using cif loop values.
     """
@@ -122,7 +119,7 @@ def get_atom_site_mixing_dict(
             if first_label_occ == 1 and second_label_occ == 1:
                 atom_site_pair_dict[pair] = "4"
                 continue
-    
+
             # Step 4. Check deficiecny at the pair level
             # Check whehter one of the sites is deficient
             is_first_label_site_deficient = None
@@ -136,7 +133,7 @@ def get_atom_site_mixing_dict(
 
                 if occupancy_sum[second_label_coord] < 1:
                     is_second_label_deficient = True
-                    
+
                 else:
                     is_second_label_deficient = False
 
@@ -162,28 +159,29 @@ def get_atom_site_mixing_dict(
             # Assign "3" for "deficiency_no_atomic_mixing"
             # Check 1. One of the labels is deficient
             # Check 2. Both labels are not atomic mixed
-            if ((is_first_label_site_deficient or
-                 is_second_label_deficient) and
-                (not is_first_label_atomic_mixed and
-                 not is_second_label_atomic_mixed)):
+            if (
+                is_first_label_site_deficient or is_second_label_deficient
+            ) and (
+                not is_first_label_atomic_mixed
+                and not is_second_label_atomic_mixed
+            ):
                 atom_site_pair_dict[pair] = "3"
 
             # Assign "2" for "full_occupancy_atomic_mixing"
             # Check 1. Both labels are not deficient
             # Check 2. At least one label is atomic mixed
-            if ((not is_first_label_site_deficient
-                 and not is_second_label_deficient) and
-                (is_first_label_atomic_mixed or
-                 is_second_label_atomic_mixed)):
+            if (
+                not is_first_label_site_deficient
+                and not is_second_label_deficient
+            ) and (is_first_label_atomic_mixed or is_second_label_atomic_mixed):
                 atom_site_pair_dict[pair] = "2"
 
             # Assign "1" for "deficiency"
             # Check 1. At least one label is deficient
             # Check 2. At least one label mixed
-            if ((is_first_label_site_deficient or
-                 is_second_label_deficient) and
-                (is_first_label_atomic_mixed or
-                 is_second_label_atomic_mixed)):
+            if (
+                is_first_label_site_deficient or is_second_label_deficient
+            ) and (is_first_label_atomic_mixed or is_second_label_atomic_mixed):
                 atom_site_pair_dict[pair] = "1"
 
     return atom_site_pair_dict
diff --git a/main.py b/main.py
@@ -41,8 +41,10 @@ def main(is_iteractive_mode=True, dir_path=None):
 
         # If the user chooses no option, then it's simply 3
         if not supercell_method:
-            print("\nYour default option is generating a 2-2-2 supercell for",
-                "files more than 100 atoms in the unit cell.")
+            print(
+                "\nYour default option is generating a 2-2-2 supercell for",
+                "files more than 100 atoms in the unit cell.",
+            )
             supercell_method = 1
 
     if not is_iteractive_mode:
@@ -52,7 +54,7 @@ def main(is_iteractive_mode=True, dir_path=None):
     file_path_list = folder.get_cif_file_path_list(dir_path)
 
     # PART 2: PREPROCESS
-    
+
     dist_mix_pair_dict = {}
 
     overall_start_time = time.perf_counter()
@@ -62,17 +64,13 @@ def main(is_iteractive_mode=True, dir_path=None):
         filename_with_ext = os.path.basename(file_path)
         filename, ext = os.path.splitext(filename_with_ext)
         num_of_atoms = None
-    
+
         # Process CIF files and return a list of coordinates
         result = cif_parser_handler.get_cif_info(
-            file_path,
-            cif_parser.get_loop_tags(),
-            supercell_method
+            file_path, cif_parser.get_loop_tags(), supercell_method
         )
 
-        CIF_loop_values = cif_parser_handler.get_cif_loop_values(
-            file_path
-        )
+        CIF_loop_values = cif_parser_handler.get_cif_loop_values(file_path)
 
         _, lenghts, angles_rad, _, all_points, _, atom_site_list = result
 
@@ -82,14 +80,13 @@ def main(is_iteractive_mode=True, dir_path=None):
         echo(
             style(
                 f"Processing {filename_with_ext} with "
-                f"{num_of_atoms} atoms {index}", fg="yellow"
+                f"{num_of_atoms} atoms {index}",
+                fg="yellow",
             )
         )
 
         atomic_pair_list = supercell.get_atomic_pair_list(
-            all_points,
-            lenghts,
-            angles_rad
+            all_points, lenghts, angles_rad
         )
 
         # Get atomic site mixing info -> String
@@ -104,35 +101,26 @@ def main(is_iteractive_mode=True, dir_path=None):
 
         # Find the shortest pair from each reference atom
         ordered_pairs = bond.process_and_order_pairs(
-            all_points,
-            atomic_pair_list
+            all_points, atomic_pair_list
         )
 
         # Determine unique pairs and get the shortest dist for each pair
-        unique_pairs_dict = bond.get_unique_pairs_dict(
-            ordered_pairs,
-            filename
-        )
+        unique_pairs_dict = bond.get_unique_pairs_dict(ordered_pairs, filename)
 
         dist_mix_pair_dict = bond.get_dist_mix_pair_dict(
-            dist_mix_pair_dict,
-            unique_pairs_dict,
-            label_pair_mixing_dict
+            dist_mix_pair_dict, unique_pairs_dict, label_pair_mixing_dict
         )
 
         elapsed_time = time.perf_counter() - start_time
 
         prompt.print_progress(
-            filename_with_ext,
-            num_of_atoms,
-            elapsed_time,
-            is_finished=True
+            filename_with_ext, num_of_atoms, elapsed_time, is_finished=True
         )
 
         data = {
-            'File': filename,
+            "File": filename,
             "Number of atoms in supercell": num_of_atoms,
-            "Processing time (s)": round(elapsed_time, 3)
+            "Processing time (s)": round(elapsed_time, 3),
         }
         log_list.append(data)
 
@@ -147,10 +135,8 @@ def main(is_iteractive_mode=True, dir_path=None):
 
     prompt.print_dict_in_json(dist_mix_element_pair_dict)
 
-    missing_label_pairs = bond.get_sorted_missing_pairs(
-        dist_mix_pair_dict
-    )
-
+    missing_label_pairs = bond.get_sorted_missing_pairs(dist_mix_pair_dict)
+
     missing_element_pairs = bond.get_sorted_missing_pairs(
         dist_mix_element_pair_dict
     )
@@ -170,54 +156,40 @@ def main(is_iteractive_mode=True, dir_path=None):
             dist_mix_pair_dict,
             missing_label_pairs,
             "summary_label.txt",
-            dir_path
+            dir_path,
         )
-        
+
         # Save Excel file with label pair
         excel.write_label_pair_dict_to_excel_json(
-            dist_mix_pair_dict,
-            "label",
-            dir_path
+            dist_mix_pair_dict, "label", dir_path
         )
 
         # Draw histograms with label pari
-        histogram.plot_histograms_from_label_dict(
-            dist_mix_pair_dict,
-            dir_path
-        )
-
+        histogram.plot_histograms_from_label_dict(dist_mix_pair_dict, dir_path)
+
         # Write elesummary-element.txt
         writer.write_summary_and_missing_pairs_with_element_dict(
             dist_mix_element_pair_dict,
             missing_element_pairs,
             "summary_element.txt",
-            dir_path
-        ) 
+            dir_path,
+        )
 
         # Save Excel file with element pair
         excel.write_element_pair_dict_to_excel_json(
-            dist_mix_element_pair_dict,
-            "element",
-            dir_path
+            dist_mix_element_pair_dict, "element", dir_path
         )
 
         # Draw histograms with element pair
         histogram.plot_histograms_from_element_dict(
-            dist_mix_element_pair_dict,
-            dir_path
+            dist_mix_element_pair_dict, dir_path
         )
-
-
 
         total_elapsed_time = time.perf_counter() - overall_start_time
         print(f"Total processing time: {total_elapsed_time:.2f}s")
 
         # Save log csv
-        folder.save_to_csv_directory(
-            dir_path,
-            pd.DataFrame(log_list),
-            "log"
-        )
+        folder.save_to_csv_directory(dir_path, pd.DataFrame(log_list), "log")
 
     # print("\nAll files successfully processed.")
 

diff --git a/postprocess/bond.py b/postprocess/bond.py
@@ -86,7 +86,9 @@ def get_sorted_missing_pairs(pair_dict):
     )
 
     # Sort the pairs in the data as well before comparison
-    missing_label_pairs = [pair for pair in all_pairs if pair not in pairs_found]
+    missing_label_pairs = [
+        pair for pair in all_pairs if pair not in pairs_found
+    ]
 
     return missing_label_pairs
 
@@ -118,15 +120,18 @@ def get_unique_pairs_dict(ordered_pairs, filename):
         # if this pair is shorter than the previous pair
         if (
             label_tuple not in unique_pairs_dict[filename]
-            or pair["distance"] < unique_pairs_dict[filename][label_tuple]["distance"]
+            or pair["distance"]
+            < unique_pairs_dict[filename][label_tuple]["distance"]
         ):
             # Add this pair to the dictionary
             unique_pairs_dict[filename][label_tuple] = pair
 
     return unique_pairs_dict
 
 
-def get_dist_mix_pair_dict(dist_pair_dict, unique_pairs_dict, label_pair_mixing_dict):
+def get_dist_mix_pair_dict(
+    dist_pair_dict, unique_pairs_dict, label_pair_mixing_dict
+):
     """
     Returns dict containing files and dist per pair.
     """