forked from justincinmd/cs221-project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
feature_creator.py
132 lines (113 loc) · 4.58 KB
/
feature_creator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# -*- coding: utf-8 -*-
"""
Feature Creator
Usage:
feature_creator.py [--baseline | --oracle] [options]
Options:
-h --help Show this screen.
--version Version number.
--verbose Print output to STDOUT
--skip_combinators Turn off feature combinators
Feature Groups:
--baseline Extract only baseline features.
--oracle Extract only oracle features.
--skip_combinators Skip the combination features.
"""
import inspect
import logging
import os
import sys
from docopt import docopt
from cache import Cache
from session import session
from movie_filter import MovieFilter
import models
import feature_extractors
import feature_extractors.combinators
import feature_extractors.util_feature_extractors
from models.info_type import RATING_ID
from helpers import encode
class FeatureCreator(object):
FEATURES_PATH = os.path.join('data', 'features.json')
LOGGING_FORMAT = '%(levelname)s %(asctime)s: %(message)s'
def __init__(self):
self.configure()
self.movie_ids = MovieFilter().load_feature_ids()
self.features = {}
self._load_feature_extractors()
def run(self):
self.generate_feature_base()
self.extract_features()
self.save_features()
def configure(self):
self.arguments = docopt(__doc__, version='0.0.1')
if self.arguments['--verbose']:
logging.basicConfig(
stream=sys.stdout,
format=self.LOGGING_FORMAT,
)
else:
logging.basicConfig(
filename='feature_creator.log',
format=self.LOGGING_FORMAT
)
logging.getLogger().setLevel(logging.DEBUG)
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
def generate_feature_base(self):
for movie, rating in self._load_movies_with_ratings():
title = encode(movie.title)
logging.info("Generating Base Features for %s (%s)" % (title, rating))
self.features[int(movie.id)] = {
'rating': float(rating),
'rating_rounded': int(round(float(rating))),
'title': title,
'features': {},
'util_features': {}
}
def extract_features(self):
for feature_extractor in self.feature_extractors:
for movie_id, features in feature_extractor(self.movie_ids).extract_cached().iteritems():
self.features[int(movie_id)]['features'].update(features)
for combinator in self.feature_extractor_combinators:
for movie_id, feature in self.features.iteritems():
combinator.combine(movie_id, feature['features'])
for util_feature_extractor in self.util_feature_extractors:
for movie_id, features in util_feature_extractor(self.movie_ids).extract_cached().iteritems():
self.features[int(movie_id)]['util_features'].update(features)
def save_features(self):
Cache.save_file(self.FEATURES_PATH, self.features)
def _load_movies_with_ratings(self):
return session.query(
models.Movie, models.MovieInfoIDX.info
).join(
models.MovieInfoIDX
).filter(
models.MovieInfoIDX.info_type_id==RATING_ID,
models.Movie.id.in_(self.movie_ids)
)
def _load_feature_extractors(self):
self.feature_extractors = []
for name, obj in inspect.getmembers(feature_extractors):
if inspect.isclass(obj):
if self.arguments['--oracle']:
# Add everything
self.feature_extractors.append(obj)
elif self.arguments['--baseline']:
# It's not an oracle and it is a baseline
if not obj.oracle and obj.baseline:
self.feature_extractors.append(obj)
else:
# Skip oracles
if not obj.oracle:
self.feature_extractors.append(obj)
self.feature_extractor_combinators = []
if not self.arguments['--skip_combinators']:
for name, obj in inspect.getmembers(feature_extractors.combinators):
if inspect.isclass(obj):
self.feature_extractor_combinators.append(obj())
self.util_feature_extractors = []
for name, obj in inspect.getmembers(feature_extractors.util_feature_extractors):
if inspect.isclass(obj):
self.util_feature_extractors.append(obj)
if __name__ == '__main__':
FeatureCreator().run()