-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathraw_dataset.py
62 lines (48 loc) · 2.4 KB
/
raw_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import abc
import dataclasses
import mashumaro
import experiments
import utils
from . import base, raw_poisoning
class DatasetRaw(abc.ABC):
"""
Abstract class introduced to generalize over the behavior of
"what-to-do-with-a-loaded-dataset".
In fact, a loaded dataset that needs to be poisoned needs to export the generated datasets.
while a loaded dataset that is already poisoned does not, neither it needs to re-execute poisoning.
"""
@abc.abstractmethod
def parse_and_load(self, base_output_directory: str, exists_ok: bool) -> experiments.DatasetGenerator:
pass
@dataclasses.dataclass
class DatasetToPoisonRaw(mashumaro.DataClassDictMixin,
base.RawToParsed[experiments.DatasetGenerator],
DatasetRaw):
dataset_path_training: str
dataset_path_testing: str
poisoning_input: raw_poisoning.PoisoningGenerationInfoRaw
def parse(self) -> experiments.DatasetGenerator:
X_train, y_train = utils.load_dataset_from_csv(dataset_path=self.dataset_path_training)
X_test, y_test = utils.load_dataset_from_csv(dataset_path=self.dataset_path_testing)
poisoning_input = self.poisoning_input.parse()
return experiments.DatasetGenerator.from_dataset_to_poison(X_train=X_train, y_train=y_train, X_test=X_test,
y_test=y_test, poisoning_generation_input=poisoning_input)
def parse_and_load(self, base_output_directory: str, exists_ok: bool) -> experiments.DatasetGenerator:
dg = self.parse()
# we generate
dg.generate()
# and we export.
dg.export(exists_ok=exists_ok, base_directory=base_output_directory)
return dg
@dataclasses.dataclass
class DatasetAlreadyPoisonedRaw(mashumaro.DataClassDictMixin,
base.RawToParsed[experiments.DatasetGenerator],
DatasetRaw):
exported_dataset_path: str
poisoning_input: raw_poisoning.PoisoningGenerationInfoRaw
def parse(self) -> experiments.DatasetGenerator:
dg = experiments.DatasetGenerator.import_from_directory(
base_directory=self.exported_dataset_path, poisoning_generation_input=self.poisoning_input.parse())
return dg
def parse_and_load(self, base_output_directory: str, exists_ok: bool) -> experiments.DatasetGenerator:
return self.parse()