1+ from typing import Optional
2+ import csv
3+ import pathlib
4+ from openpyxl import load_workbook
5+
6+ def load_csv (
7+ csv_file : str | pathlib .Path ,
8+ ) -> list [list [str | float ]]:
9+ """
10+ Reads the csv table at 'csv_file'
11+
12+ str values that are numeric are converted to ints or floats,
13+ if possible.
14+ """
15+ with open (csv_file , 'r' ) as file :
16+ return to_numeric (list (csv .reader (file )))
17+
18+
19+ def load_excel_sheet (
20+ wb_file : str | pathlib .Path ,
21+ sheet_indexes : Optional [int | list [int ]] = None ,
22+ ) -> list [list [str | float ]] | dict [str , list [list [str | float ]]]:
23+ """
24+ Reads the
25+ """
26+ wb = load_workbook (wb_file )
27+ if isinstance (sheet_indexes , int ):
28+ ws = wb .worksheets [sheet_indexes ]
29+ return table_cell_values (ws )
30+ else :
31+ wb_acc = {}
32+ for ws in wb .worksheets :
33+ wb_acc .update ({ws .title : table_cell_values (ws )})
34+ return wb_acc
35+
36+
37+ def table_cell_values (
38+ ws : "openpyxl.Worksheet" ,
39+ ) -> list [list [str | float ]]:
40+ """
41+ Extract cell values from a worksheet by row
42+ """
43+ table_acc = [] # outer_acc
44+ for row in ws .iter_rows (): # Use this method to extract data row-wise
45+ row_acc = [] # inner_acc
46+ for cell in row :
47+ row_acc .append (cell .value )
48+ table_acc .append (row_acc )
49+ return table_acc
50+
51+
52+ def to_numeric (table : list [list [str ]]) -> list [list [str | float ]]:
53+ """
54+ Converts the cells in 'table' to ints and floats, if possible.
55+ """
56+ table_acc = []
57+ for row in table :
58+ row_acc = []
59+ for cell in row :
60+ try :
61+ cell = int (cell )
62+ except :
63+ try :
64+ cell = float (cell )
65+ except :
66+ cell = cell
67+ row_acc .append (cell )
68+ table_acc .append (row )
69+ return table_acc
70+
71+
72+ def transpose (table : list [list [str | float ]]) -> list [tuple [str | float ]]:
73+ """
74+ Returns 'table' transposed (rows become columns, columns become rows)
75+ """
76+ return list (zip (* table ))
77+
78+
79+ def drop_rows (table : list [list [str | float ]], rows_to_drop : int | list [int ]) -> list [list [str | float ]]:
80+ """
81+ Returns 'table' but with the row indexes in 'rows_to_drop' omitted.
82+ """
83+ if isinstance (rows_to_drop , int ):
84+ rows_to_drop = [rows_to_drop ]
85+ table_acc = []
86+ for idx , row in enumerate (table ):
87+ if idx not in rows_to_drop :
88+ table_acc .append (row )
89+ return table_acc
90+
91+
92+ def drop_columns (table : list [list [str | float ]], cols_to_drop : int | list [int ]) -> list [tuple [str | float ]]:
93+ """
94+ Returns 'table' but with the col indexes in 'cols_to_drop' omitted.
95+ """
96+ table_cols = transpose (table )
97+ if isinstance (cols_to_drop , int ):
98+ cols_to_drop = [cols_to_drop ]
99+ table_acc = []
100+ for idx , col in enumerate (table_cols ):
101+ if idx not in cols_to_drop :
102+ table_acc .append (col )
103+ return transpose (table_acc )
104+
105+
106+ def filter_table (
107+ table : list [list [str | float ]],
108+ filter_col : int ,
109+ filter_rule : callable
110+ ):
111+ """
112+ Filters the based on the values in 'filter_col'.
113+ """
114+ filtered_table = []
115+ for row in table [1 :]: # Skip the header row
116+ if filter_rule (row [filter_col ]):
117+ filtered_table .append (row )
118+ # Put header row back on
119+ filtered_table = [table [0 ]] + filtered_table
120+ return filtered_table
121+
122+
123+ def create_tree_table (
124+ table : list [list [str | float ]],
125+ ordered_tree_indexes : list [int ],
126+ ) -> dict :
127+ """
128+ Returns a nested dictionary that has a depth equal to the length of 'ordered_tree_indexes'.
129+
130+ 'ordered_tree_indexes' are column indexes in the table that are to be used to index the resulting
131+ tree.
132+
133+ Any column indexes that are not in 'ordered_tree_indexes' will become part of the "leaf dictionary"
134+ at the end of the nested dictionary path.
135+ """
136+ tree_acc = {}
137+ non_tree_indexes = [idx for idx in range (len (table [0 ])) if idx not in ordered_tree_indexes ]
138+ header_row = table [0 ]
139+ for row in table [1 :]:
140+ tree_branch = None
141+ for tree_index in ordered_tree_indexes :
142+ if tree_branch is None :
143+ tree_acc .setdefault (row [tree_index ], {})
144+ tree_branch = tree_acc [row [tree_index ]]
145+ else :
146+ tree_branch .setdefault (row [tree_index ], {})
147+ tree_branch = tree_branch [row [tree_index ]]
148+
149+ tree_leaves = {}
150+ for idx in non_tree_indexes :
151+ tree_leaves .update ({header_row [idx ]: row [idx ]})
152+ tree_branch .update (tree_leaves )
153+ return tree_acc
154+
0 commit comments