-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcsv_to_arff.py
73 lines (59 loc) · 2.92 KB
/
csv_to_arff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import csv
import argparse
import os
from configparser import ConfigParser
import helpers
def csv_to_arff(csv_filepath, attribute_filepath, output_arff_filepath, relation_name="dataset"):
# Read attribute definitions from the attribute file
with open(attribute_filepath, 'r') as attr_file:
attribute_definitions = attr_file.read()
# Open the CSV file and ARFF output file
with open(csv_filepath, 'r') as csv_file, open(output_arff_filepath, 'w') as arff_file:
csv_reader = csv.reader(csv_file)
# Write the relation name and attribute definitions to the ARFF file
arff_file.write(f"@relation {relation_name}\n\n")
arff_file.write(attribute_definitions + "\n")
# Extract the header (column names) from the CSV file
header = next(csv_reader)
# Write the data section header in the ARFF file
arff_file.write("\n@data\n")
# Write each row of the CSV file to the ARFF file
for row in csv_reader:
processed_row = []
for value in row:
if ' ' in value: # If a value contains a space, enclose it in single quotes
processed_row.append(f"'{value}'")
else:
processed_row.append(value)
# Convert each row to a comma-separated string and write to ARFF
arff_file.write(",".join(processed_row) + "\n")
print(f"Conversion completed: {output_arff_filepath} created.")
def main():
parser = argparse.ArgumentParser()
parser.add_argument('indir', help="""
A directory containing a config.ini file. All classifier results will be
output to this file.
""")
args = parser.parse_args()
config_file = helpers.assert_dir_contains_config(args.indir)
# Load the configuration file
config = ConfigParser()
config.optionxform = str # preserve case in config keys
config.read(config_file)
# Loop over each section in the config file and process the conversions
for section in config.sections():
if section.startswith('meta'):
# Read the paths and details from the config file for this section
csv_filepath = config[section].get('csv_filepath')
attribute_filepath = config[section].get('attribute_filepath')
output_arff_filepath = config[section].get('output_arff_filepath')
relation_name = config[section].get('relation_name', fallback='dataset')
# Check if paths exist
if not os.path.exists(csv_filepath):
raise FileNotFoundError(f"CSV file not found: {csv_filepath}")
if not os.path.exists(attribute_filepath):
raise FileNotFoundError(f"Attribute file not found: {attribute_filepath}")
# Call the csv_to_arff function for this section
csv_to_arff(csv_filepath, attribute_filepath, output_arff_filepath, relation_name)
if __name__ == "__main__":
main()