-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathpredict_constants.py
58 lines (38 loc) · 1.12 KB
/
predict_constants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""
produce an answers file using only samples per device information
usage: predict_constants.py questions.csv samples_per_device.csv answers.csv
AUC 0.80343
"""
print __doc__
import sys, csv
questions_file = sys.argv[1]
input_file = sys.argv[2] # samples per device file produced with count_samples.py
output_file = sys.argv[3]
# samples per device
reader = csv.reader( open( input_file ))
samples_per_device = {}
sum_of_samples = 0
for line in reader:
device, samples = line
samples = int( samples )
samples_per_device[device] = samples
sum_of_samples += samples
# probs for each device based on number of samples
sum_of_samples = float( sum_of_samples )
device_probs = {}
for d in samples_per_device:
prob = samples_per_device[d] / sum_of_samples
device_probs[d] = prob
###
reader = csv.reader( open( questions_file ))
writer = csv.writer( open( output_file, 'wb' ))
headers = reader.next()
writer.writerow( [ 'QuestionId', 'IsTrue' ] )
n = 0
for line in reader:
q_id, q_sequence, q_device = line
prob = device_probs[q_device]
writer.writerow( [ q_id, prob ] )
n += 1
if n % 10000 == 0:
print n