Skip to content

Commit

Permalink
Add initial helper scripts
Browse files Browse the repository at this point in the history
This adds two new helper scripts that are useful when working with large
comparisons.
scripts/gen_constants.py creates a constants file that can help reduce
the number of constants generated in a comparison.
scripts/clean_csv.py performs a number of simplifications of a csv file
and creates a new output that is much easier for a human to review. One
important note here is that the resulting report is not a valid csv
file.
  • Loading branch information
JacobCallahan committed Jun 26, 2024
1 parent d39ea9f commit 84dfbc8
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 0 deletions.
51 changes: 51 additions & 0 deletions scripts/clean_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import sys

HELP_TEXT = """
This script cleans a CSV file by removing lines that contain consecutive commas (,,,)
and formats the output by aligning the first two columns of each line.
Usage:
python clean_csv.py <input_file> [output_file]
Arguments:
<input_file> - The path to the CSV file to be cleaned.
[output_file] - Optional. The path where the cleaned CSV file will be saved.
If not provided, the output will be saved as 'cleaned.report'.
Note: The output report will not be a valid csv file.
Example:
python clean_csv.py data.csv cleaned_data.report
"""


def strip_extra(line):
split_line = line.split(",")
if len(split_line) == 1:
return line
return f"{split_line[0]:<50} | {split_line[1]}\n"


def clean_empty(file_handler):
out_lines = []
for line in file_handler:
if ",,," not in line:
out_lines.append(strip_extra(line))
return out_lines


if sys.argv[1] in ("-h", "--help", "help"):
print(HELP_TEXT)
sys.exit()

to_clean = sys.argv[1]
with open(to_clean) as fh:
cleaned = clean_empty(fh)

if len(sys.argv) > 2:
as_name = sys.argv[2]
else:
as_name = "cleaned.report"
with open(as_name, "w") as fh:
fh.writelines(cleaned)

print(f"Cleaned file saved to {as_name}")
95 changes: 95 additions & 0 deletions scripts/gen_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import csv
import sys

import yaml

KEEP_FIELDS = ["name", "label", "title", "url", "description", "path"]
SKIP_DICT = {}
HELP_TEXT = """
This script processes a comparison report, in the form of a csv file, and outputs a constants file.
This constants file can then be used to run another comparison report with more filtered results.
Usage:
python gen_constants.py <input_csv_file> [output_yaml_file]
Arguments:
<input_csv_file> - The path to the CSV file to be processed.
[output_yaml_file] - Optional. The path where the resulting YAML file will be saved.
If not provided, the output will be saved as 'constants.yaml'.
Example:
python gen_constants.py my_results.csv my_constants.yaml
"""

csv.field_size_limit(sys.maxsize)


def filter_parts(parts):
for check in KEEP_FIELDS:
if check in parts[-1]:
return True


def add_path_original(path, dest=None):
if dest is None:
dest = SKIP_DICT
curr = path.pop(0)
if not (c_val := dest.get(curr, {})):
dest[curr] = c_val
if len(path) >= 2:
add_path_original(path, c_val)
elif path:
dest[curr].update({path.pop(0): {}})
else:
dest[curr] = c_val


def simplify(indict):
as_list = False
for key in indict:
if isinstance(indict[key], dict):
indict[key] = simplify(indict[key])
if not indict[key]:
as_list = True
if as_list: # convert the dictionary into a list
res = []
for key, val in indict.items():
if not val:
res.append(key)
else:
res.append({key: val})
return res
if len(indict.items()) == 1:
if not next(iter(indict.values())):
return next(iter(indict.keys()))
if any([isinstance(i, str) for i in indict.items()]):
indict[key] = list(indict[key])
return indict or {}


if sys.argv[1] in ("-h", "--help", "help"):
print(HELP_TEXT)
sys.exit()


input_file = sys.argv[1]
print(f"Getting baseline from {input_file}.")
with open(input_file) as fh:
reader = csv.reader(fh)
next(reader) # pulse for headers
for row in reader:
parts = [p for p in row[0].split("/") if not p.isdigit()]
if not filter_parts(parts):
add_path_original(parts)

SKIP_DICT = simplify(SKIP_DICT)

if len(sys.argv) > 2:
as_name = sys.argv[2]
else:
as_name = "constants.yaml"
with open(as_name, "w") as of:
export = {"expected_constants": SKIP_DICT, "skipped_constants": []}
yaml.dump(export, of)

print(f"Constants file saved to {as_name}.")

0 comments on commit 84dfbc8

Please sign in to comment.