-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeature_selection.py
81 lines (70 loc) · 2.8 KB
/
feature_selection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import argparse
import helpers
import os
import sys
import csv
from configparser import ConfigParser
from datetime import datetime
from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection
from weka.filters import Filter
def select_attributes(data, searcher, evaluator, filename):
attsel = AttributeSelection()
attsel.search(searcher)
attsel.evaluator(evaluator)
start_time = datetime.now()
attsel.select_attributes(data)
end_time = datetime.now()
# format selected attributes
subset = ','.join(str(int(d+1)) for d in attsel.selected_attributes)
print(f"Selected subset: {subset}")
# remove all attributes not in subset
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
options=["-R", subset, "-V"])
remove.inputformat(data)
filtered = remove.filter(data)
with open(f'{filename}.log', 'w') as f:
f.write(f"Searcher: {searcher}")
f.write(f"Seacher options: {searcher_options}\n")
f.write(f"Evaluator: {evaluator}")
f.write(f"Evaluator options: {evaluator_options}\n\n")
f.write(f"Runtime (HH:mm:ss): {end_time - start_time}\n")
f.write(attsel.results_string)
helpers.save_csv(filtered, f"{filename}.csv")
# parse arguments
parser = argparse.ArgumentParser()
parser.add_argument('indir')
parser.add_argument('--max-heap-size', default="512g")
args = parser.parse_args()
config_file = helpers.assert_dir_contains_config(args.indir)
try:
config = ConfigParser()
config.optionxform = str # preserve case in config keys
config.read(config_file)
os.chdir(args.indir) # treat contents of file relative to config.ini
data_filepath = config['meta']['data_path']
helpers.assert_file_exists(data_filepath)
with helpers.JVM(max_heap_size=args.max_heap_size):
data = helpers.load_csv(data_filepath)
# data = helpers.data_to_nominal(data) # all features are already nominal
for section in config:
selector = config[section]
searcher = selector.get('searcher', None)
evaluator = selector.get('evaluator', None)
if searcher is None or evaluator is None:
continue
searcher_options = selector.get('searcher_options', "")
evaluator_options = selector.get('evaluator_options', "")
select_attributes(
data,
searcher=ASSearch(
classname=searcher,
options=searcher_options.split()),
evaluator=ASEvaluation(
classname=evaluator,
options=evaluator_options.split()),
filename=section.replace(" ", "-")
)
except Exception as e:
with open('error.log', 'w') as f:
f.write(str(e))
sys.exit(1)