Skip to content

Commit d1282e1

Browse files
Merge pull request #3 from StructuralPython/features/tables
feat: add tables
2 parents 3faa57b + 8f3432c commit d1282e1

File tree

1 file changed

+154
-0
lines changed

1 file changed

+154
-0
lines changed

src/jsonchain/tables.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
from typing import Optional
2+
import csv
3+
import pathlib
4+
from openpyxl import load_workbook
5+
6+
def load_csv(
7+
csv_file: str | pathlib.Path,
8+
) -> list[list[str | float]]:
9+
"""
10+
Reads the csv table at 'csv_file'
11+
12+
str values that are numeric are converted to ints or floats,
13+
if possible.
14+
"""
15+
with open(csv_file, 'r') as file:
16+
return to_numeric(list(csv.reader(file)))
17+
18+
19+
def load_excel_sheet(
20+
wb_file: str | pathlib.Path,
21+
sheet_indexes: Optional[int | list[int]] = None,
22+
) -> list[list[str | float]] | dict[str, list[list[str | float]]]:
23+
"""
24+
Reads the
25+
"""
26+
wb = load_workbook(wb_file)
27+
if isinstance(sheet_indexes, int):
28+
ws = wb.worksheets[sheet_indexes]
29+
return table_cell_values(ws)
30+
else:
31+
wb_acc = {}
32+
for ws in wb.worksheets:
33+
wb_acc.update({ws.title: table_cell_values(ws)})
34+
return wb_acc
35+
36+
37+
def table_cell_values(
38+
ws: "openpyxl.Worksheet",
39+
) -> list[list[str | float]]:
40+
"""
41+
Extract cell values from a worksheet by row
42+
"""
43+
table_acc = [] # outer_acc
44+
for row in ws.iter_rows(): # Use this method to extract data row-wise
45+
row_acc = [] # inner_acc
46+
for cell in row:
47+
row_acc.append(cell.value)
48+
table_acc.append(row_acc)
49+
return table_acc
50+
51+
52+
def to_numeric(table: list[list[str]]) -> list[list[str | float]]:
53+
"""
54+
Converts the cells in 'table' to ints and floats, if possible.
55+
"""
56+
table_acc = []
57+
for row in table:
58+
row_acc = []
59+
for cell in row:
60+
try:
61+
cell = int(cell)
62+
except:
63+
try:
64+
cell = float(cell)
65+
except:
66+
cell = cell
67+
row_acc.append(cell)
68+
table_acc.append(row)
69+
return table_acc
70+
71+
72+
def transpose(table: list[list[str | float]]) -> list[tuple[str | float]]:
73+
"""
74+
Returns 'table' transposed (rows become columns, columns become rows)
75+
"""
76+
return list(zip(*table))
77+
78+
79+
def drop_rows(table: list[list[str | float]], rows_to_drop: int | list[int]) -> list[list[str | float]]:
80+
"""
81+
Returns 'table' but with the row indexes in 'rows_to_drop' omitted.
82+
"""
83+
if isinstance(rows_to_drop, int):
84+
rows_to_drop = [rows_to_drop]
85+
table_acc = []
86+
for idx, row in enumerate(table):
87+
if idx not in rows_to_drop:
88+
table_acc.append(row)
89+
return table_acc
90+
91+
92+
def drop_columns(table: list[list[str | float]], cols_to_drop: int | list[int]) -> list[tuple[str | float]]:
93+
"""
94+
Returns 'table' but with the col indexes in 'cols_to_drop' omitted.
95+
"""
96+
table_cols = transpose(table)
97+
if isinstance(cols_to_drop, int):
98+
cols_to_drop = [cols_to_drop]
99+
table_acc = []
100+
for idx, col in enumerate(table_cols):
101+
if idx not in cols_to_drop:
102+
table_acc.append(col)
103+
return transpose(table_acc)
104+
105+
106+
def filter_table(
107+
table: list[list[str | float]],
108+
filter_col: int,
109+
filter_rule: callable
110+
):
111+
"""
112+
Filters the based on the values in 'filter_col'.
113+
"""
114+
filtered_table = []
115+
for row in table[1:]: # Skip the header row
116+
if filter_rule(row[filter_col]):
117+
filtered_table.append(row)
118+
# Put header row back on
119+
filtered_table = [table[0]] + filtered_table
120+
return filtered_table
121+
122+
123+
def create_tree_table(
124+
table: list[list[str | float]],
125+
ordered_tree_indexes: list[int],
126+
) -> dict:
127+
"""
128+
Returns a nested dictionary that has a depth equal to the length of 'ordered_tree_indexes'.
129+
130+
'ordered_tree_indexes' are column indexes in the table that are to be used to index the resulting
131+
tree.
132+
133+
Any column indexes that are not in 'ordered_tree_indexes' will become part of the "leaf dictionary"
134+
at the end of the nested dictionary path.
135+
"""
136+
tree_acc = {}
137+
non_tree_indexes = [idx for idx in range(len(table[0])) if idx not in ordered_tree_indexes]
138+
header_row = table[0]
139+
for row in table[1:]:
140+
tree_branch = None
141+
for tree_index in ordered_tree_indexes:
142+
if tree_branch is None:
143+
tree_acc.setdefault(row[tree_index], {})
144+
tree_branch = tree_acc[row[tree_index]]
145+
else:
146+
tree_branch.setdefault(row[tree_index], {})
147+
tree_branch = tree_branch[row[tree_index]]
148+
149+
tree_leaves = {}
150+
for idx in non_tree_indexes:
151+
tree_leaves.update({header_row[idx]: row[idx]})
152+
tree_branch.update(tree_leaves)
153+
return tree_acc
154+

0 commit comments

Comments
 (0)