forked from shubhomoydas/ad_examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathactivity_word2vec.py
executable file
·153 lines (119 loc) · 5.63 KB
/
activity_word2vec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import random
import numpy as np
import numpy.random as rnd
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
from sklearn import manifold
import collections
import math
import tensorflow as tf
import matplotlib.image as mpimg
from common.utils import *
from common.timeseries_datasets import *
from common.data_plotter import *
from common.nn_utils import *
from timeseries.casas import *
from timeseries.word2vec import *
from timeseries.word2vec_custom import *
from timeseries.simulate_timeseries import *
# Just disables the warning, doesn't enable AVX/FMA
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
"""
Simple word2vec model for acitivity labels. We will try to find an embedding
of the sensors in an apartment. This embedding will be derived from the
sequential sensor triggerings as the occupant of the apartment moves or
conducts daily activities. The goal is to show that relative positions of
sensors in the embedding space reflect their relative positions in the real-world.
To compare the embedding and the real layout, check the pdf output generated by
this program and compare with ../datasets/CASAS/floor_plans/HH101-sensormap.png
This is a simple application of word2vec on a small dataset and should be good
for educational purposes.
The activity dataset is obtained from: http://casas.wsu.edu/datasets/hh101.zip
This dataset has sensor readings for a single-occupant home. Multiple sensors are
placed at various locations in an apartment and they turn ON/OFF based on the
occupant's movements (we only use the sensors whose ids start with 'M' or 'MA').
The related floor plan showing the sensor layout can be found at:
http://ailab.wsu.edu/casas/hh/hh101/profile/page-6.html
More details on the setup can be found in:
D. Cook, A. Crandall, B. Thomas, and N. Krishnan.
CASAS: A smart home in a box. IEEE Computer, 46(7):62-69, 2013.
and on the CASAS website: http://ailab.wsu.edu/casas/hh
To execute:
pythonw -m timeseries.activity_word2vec --log_file=temp/timeseries/activity_word2vec.log --n_epochs=1 --debug
"""
if __name__ == "__main__":
logger = logging.getLogger(__name__)
args = get_command_args(debug=False, debug_args=["--n_lags=20",
"--n_epochs=1",
"--debug",
"--plot",
"--log_file=temp/timeseries/activity_word2vec.log"])
# print "log file: %s" % args.log_file
configure_logger(args)
dir_create("./temp/timeseries") # for logging and plots
random.seed(42)
rnd.seed(42)
localpath = "./temp/timeseries"
url = "http://casas.wsu.edu/datasets/"
casasfile = maybe_download_casas("hh101.zip", url, "hh101/ann.txt", localpath)
logger.debug("casasfile: %s" % casasfile)
cas = Casas(dataset_path=casasfile)
logger.debug("cas.sensor2code:\n%s" % str(cas.sensor2code))
logger.debug("cas.code2sensor:\n%s" % str(cas.code2sensor))
logger.debug("cas.sensor_seq:\n%s" % str(cas.sensor_seq[0:10]))
logger.debug("cas.sensor_enc:\n%s" % str(cas.sensor_enc[0:10]))
timer = Timer()
modes = {0: "", 1: "custom", 2: "original"}
mode = 2
dims = 15
window_size = 3
n_epochs = args.n_epochs
neg_samples = 3
normalize_embeddings = False
use_tsne = dims > 2
w2v = None
embeddings = None
signature = "d%d%s%s%s" % (dims, "_norm" if normalize_embeddings else "",
"_" + modes[mode], "_tsne" if use_tsne else "")
logger.debug("signature: %s" % signature)
# write_sensor_data_as_document(cas)
if mode == 1:
w2v = CustomWord2vec(sensors=cas.sensors,
code2sensor=cas.code2sensor, sensor2code=cas.sensor2code,
dims=dims, window_size=window_size, neg_samples=neg_samples,
n_epochs=n_epochs, debug=args.debug)
w2v.fit(cas.sensor_enc)
logger.debug(timer.message("Completed training in"))
embeddings = w2v.get_embeddings(normalized=normalize_embeddings)
elif mode == 2:
w2v = Word2vec(sensors=cas.sensors,
code2sensor=cas.code2sensor, sensor2code=cas.sensor2code,
dims=dims, window_size=window_size, n_epochs=n_epochs, num_skips=2,
num_sampled=neg_samples, num_steps=100001, debug=args.debug)
w2v.fit(cas.sensor_enc)
embeddings = w2v.get_embeddings(normalized=normalize_embeddings)
if embeddings is not None:
logger.debug(embeddings.shape)
# logger.debug(embeddings)
if not use_tsne:
x_tr = embeddings
else:
logger.debug("computing t-SNE for embedded space...")
# perplexity=30, as used in original word2vec, does not result
# in good visualization...
tsne_embed = manifold.TSNE(perplexity=3,
n_components=2, init='pca',
random_state=0, method='exact', n_iter=5000)
x_tr = tsne_embed.fit_transform(embeddings)
# get the floor plan image
# img = mpimg.imread("../datasets/CASAS/floor_plans/HH101-sensormap.png")
pdfpath = "temp/timeseries/activity_sensors_%s.pdf" % signature
dp = DataPlotter(pdfpath=pdfpath, rows=1, cols=1)
# pl = dp.get_next_plot()
# pl.imshow(img)
pl = dp.get_next_plot()
dp.plot_points(x_tr, pl, marker='+', s=20)
for k in cas.code2sensor.keys():
pl.text(x_tr[k, 0], x_tr[k, 1], cas.code2sensor[k])
dp.close()