Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial helper scripts #22

Merged
merged 1 commit into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions scripts/clean_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import sys

HELP_TEXT = """
This script cleans a CSV file by removing lines that contain consecutive commas (,,,)
and formats the output by aligning the first two columns of each line.

Usage:
python clean_csv.py <input_file> [output_file]

Arguments:
<input_file> - The path to the CSV file to be cleaned.
[output_file] - Optional. The path where the cleaned CSV file will be saved.
If not provided, the output will be saved as 'cleaned.report'.
Note: The output report will not be a valid csv file.

Example:
python clean_csv.py data.csv cleaned_data.report
"""


def strip_extra(line):
split_line = line.split(",")
if len(split_line) == 1:
return line
return f"{split_line[0]:<50} | {split_line[1]}\n"


def clean_empty(file_handler):
out_lines = []
for line in file_handler:
if ",,," not in line:
out_lines.append(strip_extra(line))
return out_lines


if sys.argv[1] in ("-h", "--help", "help"):
print(HELP_TEXT)
sys.exit()

to_clean = sys.argv[1]
with open(to_clean) as fh:
cleaned = clean_empty(fh)

if len(sys.argv) > 2:
as_name = sys.argv[2]
else:
as_name = "cleaned.report"
with open(as_name, "w") as fh:
fh.writelines(cleaned)

print(f"Cleaned file saved to {as_name}")
95 changes: 95 additions & 0 deletions scripts/gen_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import csv
import sys

import yaml

KEEP_FIELDS = ["name", "label", "title", "url", "description", "path"]
SKIP_DICT = {}
HELP_TEXT = """
This script processes a comparison report, in the form of a csv file, and outputs a constants file.
This constants file can then be used to run another comparison report with more filtered results.

Usage:
python gen_constants.py <input_csv_file> [output_yaml_file]

Arguments:
<input_csv_file> - The path to the CSV file to be processed.
[output_yaml_file] - Optional. The path where the resulting YAML file will be saved.
If not provided, the output will be saved as 'constants.yaml'.

Example:
python gen_constants.py my_results.csv my_constants.yaml
"""

csv.field_size_limit(sys.maxsize)


def filter_parts(parts):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does a CSV should have that mark it as an expected constant ?

for check in KEEP_FIELDS:
if check in parts[-1]:
return True


def add_path_original(path, dest=None):
if dest is None:
dest = SKIP_DICT
curr = path.pop(0)
if not (c_val := dest.get(curr, {})):
dest[curr] = c_val
if len(path) >= 2:
add_path_original(path, c_val)
elif path:
dest[curr].update({path.pop(0): {}})
else:
dest[curr] = c_val


def simplify(indict):
as_list = False
for key in indict:
if isinstance(indict[key], dict):
indict[key] = simplify(indict[key])
if not indict[key]:
as_list = True
if as_list: # convert the dictionary into a list
res = []
for key, val in indict.items():
if not val:
res.append(key)
else:
res.append({key: val})
return res
if len(indict.items()) == 1:
if not next(iter(indict.values())):
return next(iter(indict.keys()))
if any([isinstance(i, str) for i in indict.items()]):
indict[key] = list(indict[key])
return indict or {}


if sys.argv[1] in ("-h", "--help", "help"):
print(HELP_TEXT)
sys.exit()


input_file = sys.argv[1]
print(f"Getting baseline from {input_file}.")
with open(input_file) as fh:
reader = csv.reader(fh)
next(reader) # pulse for headers
for row in reader:
parts = [p for p in row[0].split("/") if not p.isdigit()]
if not filter_parts(parts):
add_path_original(parts)

SKIP_DICT = simplify(SKIP_DICT)

if len(sys.argv) > 2:
as_name = sys.argv[2]
else:
as_name = "constants.yaml"
with open(as_name, "w") as of:
export = {"expected_constants": SKIP_DICT, "skipped_constants": []}
yaml.dump(export, of)

print(f"Constants file saved to {as_name}.")