-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathstore.py
275 lines (225 loc) · 8.65 KB
/
store.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
"""Different classes to optimize RAM usage with repeated features over time."""
import numpy as np
from choice_learn.data.indexer import OneHotStoreIndexer, StoreIndexer
class Store:
"""Class to keep OneHotStore and FeaturesStore with same parent."""
def __init__(self, indexes=None, values=None, sequence=None, name=None, indexer=StoreIndexer):
"""Build the store.
Parameters
----------
indexes : array_like or None
list of indexes of features to store. If None is given, indexes are created from
apparition order of values
values : array_like
list of values of features to store
sequence : array_like
sequence of apparitions of the features
name: string, optional
name of the features store -- not used at the moment
"""
if indexes is None:
indexes = list(range(len(values)))
self.store = {k: v for (k, v) in zip(indexes, values)}
self.sequence = np.array(sequence)
self.name = name
if sequence is not None and values is not None:
try:
width = len(values[0])
except TypeError:
width = 1
self.shape = (len(sequence), width)
self.indexer = indexer(self)
def _get_store_element(self, index):
"""Getter method over self.sequence.
Returns the features stored at index index. Compared to __getitem__, it does take
the index-th element of sequence but the index-th element of the store.
Parameters
----------
index : (int, list, slice)
index argument of the feature
Returns
-------
array_like
features corresponding to the index index in self.store
"""
if isinstance(index, list):
return [self.store[i] for i in index]
# else:
return self.store[index]
def __len__(self):
"""Return the length of the sequence of apparition of the features."""
return len(self.sequence)
@property
def batch(self):
"""Indexing attribute."""
return self.indexer
class FeaturesStore(Store):
"""Base class to store features and a sequence of apparitions.
Mainly useful when features are repeated frequently over the sequence.
An example would be to store the features of a customers (supposing that the same customers come
several times over the work sequence) and to save which customer is concerned for each choice.
Attributes
----------
store : dict
Dictionary stocking features that can be called from indexes: {index: features}
shape : tuple
shape of the features store: (sequence_length, features_number)
sequence : array_like
List of elements of indexes representing the sequence of apparitions of the features
name: string, optional
name of the features store -- not used at the moment
dtype: type
type of the features
"""
@classmethod
def from_dict(cls, values_dict, sequence):
"""Instantiate the FeaturesStore from a dictionary of values.
Parameters
----------
values_dict : dict
dictionary of values to store, {index: value}
sequence : array_like
sequence of apparitions of the features
Returns
-------
FeaturesStore created from the values in the dictionnary
"""
# Check uniform shape of values
return cls(
indexes=list(values_dict.keys()), values=list(values_dict.values()), sequence=sequence
)
@classmethod
def from_list(cls, values_list, sequence):
"""Instantiate the FeaturesStore from a list of values.
Creates indexes for each value
Parameters
----------
values_list : list
List of values to store
sequence : array_like
sequence of apparitions of the features
Returns
-------
FeaturesStore
"""
# Check uniform shape of list
# Useful ? To rethink...
return cls(indexes=list(range(len(values_list))), values=values_list, sequence=sequence)
def __getitem__(self, sequence_index):
"""Subsets self with sequence_index.
Parameters
----------
sequence_index : (int, list, slice)
index position of the sequence
Returns
-------
array_like
features corresponding to the sequence_index-th position of sequence
"""
if isinstance(sequence_index, int):
sequence_index = [sequence_index]
new_sequence = self.sequence[sequence_index]
store = {}
for k, v in self.store.items():
if k in new_sequence:
store[k] = v
else:
print(f"Key {k} of store with value {v} not in sequence anymore")
return FeaturesStore.from_dict(store, new_sequence)
def astype(self, dtype):
"""Change the dtype of the features.
The type of the features should implement the astype method.
Typically, should work like np.ndarrays.
Parameters
----------
dtype : str or type
type to set the features as
"""
for k, v in self.store.items():
self.store[k] = v.astype(dtype)
class OneHotStore(Store):
"""Specific FeaturesStore for one hot features storage.
Inherits from FeaturesStore.
For example can be used to store a OneHot representation of the days of week.
Has the same attributes as FeaturesStore, only differs whit some One-Hot optimized methods.
"""
def __init__(
self,
indexes=None,
values=None,
sequence=None,
name=None,
dtype=np.float32,
):
"""Build the OneHot features store.
Parameters
----------
indexes : array_like or None
list of indexes of features to store. If None is given, indexes are created from
apparition order of values
values : array_like or None
list of values of features to store that must be One-Hot. If None given they are created
from order of apparition in sequence
sequence : array_like
sequence of apparitions of the features
name: string, optional
name of the features store -- not used at the moment
"""
self.name = name
self.sequence = np.array(sequence)
if values is None:
self = self.from_sequence(sequence)
else:
self.store = {k: v for (k, v) in zip(indexes, values)}
self.shape = (len(sequence), np.max(values) + 1)
self.dtype = dtype
self.indexer = OneHotStoreIndexer(self)
@classmethod
def from_sequence(cls, sequence):
"""Create a OneHotFeatureStore from a sequence of apparition.
One Hot vector are created from the order of apparition in the sequence: feature vectors
created have a length of the number of different values in the sequence and the 1 is
positioned in order of first appartitions in the sequence.
Parameters
----------
sequence : array-like
Sequence of apparitions of values, or indexes. Will be used to index self.store
Returns
-------
FeatureStore
Created from the sequence.
"""
all_indexes = np.unique(sequence)
values = np.arange(len(all_indexes))
return cls(indexes=all_indexes, values=values, sequence=sequence)
def __getitem__(self, sequence_index):
"""Get an element at sequence_index-th position of self.sequence.
Parameters
----------
sequence_index : (int, list, slice)
index from sequence of element to get
Returns
-------
np.ndarray
OneHot features corresponding to the sequence_index-th position of sequence
"""
if isinstance(sequence_index, int):
sequence_index = [sequence_index]
new_sequence = self.sequence[sequence_index]
store = {}
for k, v in self.store.items():
if k in new_sequence:
store[k] = v
else:
print(f"Key {k} of store with value {v} not in sequence anymore")
return OneHotStore(
indexes=list(store.keys()), values=list(store.values()), sequence=new_sequence
)
def astype(self, dtype):
"""Change (mainly int or float) type of returned OneHot features vectors.
Parameters
----------
dtype : type
Type to set the features as
"""
self.dtype = dtype