-
Notifications
You must be signed in to change notification settings - Fork 0
/
hive_commands.txt
212 lines (212 loc) · 5.08 KB
/
hive_commands.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
set hive.execution.engine=mr;
set hive.fetch.task.conversion=minimal;
use rp3752_nyu_edu;
show tables
;
create external table h1 (trump_favour float, biden_favour float, dem_favour float, repub_favour float, date DATE, comment string) row format delimited fields terminated by ',' location '/user/rp3752_nyu_edu/project/hive/reddit';
create external table h1 (trump_favour float, biden_favour float, dem_favour float, repub_favour float, date string, comment string) row format delimited fields terminated by ',' location '/user/rp3752_nyu_edu/project/hive/reddit';
set hive.execution.engine=mr;
set hive.fetch.task.conversion=minimal;
use rp3752_nyu_edu;
show tables
;
create external table h1 (trump_favour float, biden_favour float, dem_favour float, repub_favour float, year date , comment string) row format delimited fields terminated by ',' location '/user/rp3752_nyu_edu/project/hive/reddit';
show tables
;
describe formatted h1;
select * from h1 limit 5;
drop table h1;
select * from h1 limit 5;
create external table h1 (trump_favour float, biden_favour float, dem_favour float, repub_favour float, date string , comment string) row format delimited fields terminated by ',' location '/user/rp3752_nyu_edu/project/hive/reddit';
create external table h1 (trump_favour float, biden_favour float, dem_favour float, repub_favour float, year string , comment string) row format delimited fields terminated by ',' location '/user/rp3752_nyu_edu/project/hive/reddit';
select * from h1 limit 5;
set hive.execution.engine=mr;
set hive.fetch.task.conversion=minimal;
use rp3752_nyu_edu;
ALTER TABLE h1 ADD COLUMN converted_date DATE;
show tables;
CREATE TABLE new_table AS
SELECT
*,
TO_DATE(FROM_UNIXTIME(UNIX_TIMESTAMP(date_column, 'MM/dd/yyyy'))) AS converted_date
FROM
your_table;
CREATE TABLE h2 AS
SELECT
*,
TO_DATE(FROM_UNIXTIME(UNIX_TIMESTAMP(year, 'MM/dd/yyyy'))) AS converted_date
FROM
h1;
select * from h2 limit 5;
ALTER TABLE h2 DROP COLUMN year;
SELECT
converted_date,
AVG(trump_favour) AS aggregate_trump_favour,
AVG(biden_favour) AS aggregate_biden_favour,
AVG(dem_favour) AS aggregate_dem_favour,
AVG(repub_favour) AS aggregate_repub_favour
FROM
h2
GROUP BY
converted_date
ORDER BY
converted_date;
CREATE TABLE IF NOT EXISTS final_scores AS
SELECT
converted_date as date,
AVG(trump_favour) AS aggregate_trump_favour,
AVG(biden_favour) AS aggregate_biden_favour,
AVG(dem_favour) AS aggregate_dem_favour,
AVG(repub_favour) AS aggregate_repub_favour
FROM
h2
GROUP BY
converted_date
ORDER BY
converted_date;
CREATE TABLE IF NOT EXISTS final_scores AS
SELECT
converted_date as day,
AVG(trump_favour) AS aggregate_trump_favour,
AVG(biden_favour) AS aggregate_biden_favour,
AVG(dem_favour) AS aggregate_dem_favour,
AVG(repub_favour) AS aggregate_repub_favour
FROM
h2
GROUP BY
converted_date
ORDER BY
converted_date;
select * from final_scores limit 5;
select * from final_scores;
set hive.execution.engine=mr;
set hive.fetch.task.conversion=minimal;
use rp3752_nyu_edu;
show tables;
create external table poll (candidate string, score float, party string, day date)
row format delimited fields terminated by ','
location '/user/rp3752_nyu_edu/project/hive/polls'
TBLPROPERTIES ("skip.header.line.count"="1");
select * from polls limit 5;
show tables;
select * from poll limit 5;
SELECT
day,
AVG(score) AS score,
FROM
poll
WHERE party = 'DEM'
GROUP BY
day
ORDER BY
day;
SELECT
day,
AVG(score) AS score
FROM
poll
WHERE party = 'DEM'
GROUP BY
day
ORDER BY
day;
SELECT
day,
AVG(score)/100 AS score
FROM
poll
WHERE party = 'DEM'
GROUP BY
day
ORDER BY
day;
CREATE TABLE IF NOT EXISTS dem_poll AS
SELECT
day,
AVG(score)/100 AS score
FROM
poll
WHERE party = 'DEM'
GROUP BY
day
ORDER BY
day;
select * from dem_poll limit 5;
CREATE TABLE IF NOT EXISTS rep_poll AS
SELECT
day,
AVG(score)/100 AS score
FROM
poll
WHERE party = 'REP'
GROUP BY
day
ORDER BY
day;
CREATE TABLE IF NOT EXISTS trump_poll AS
SELECT
day,
AVG(score)/100 AS score
FROM
poll
WHERE candidate = 'Donald Trump'
GROUP BY
day
ORDER BY
day;
CREATE TABLE IF NOT EXISTS biden_poll AS
SELECT
day,
AVG(score)/100 AS score
FROM
poll
WHERE candidate = 'Joe Biden'
GROUP BY
day
ORDER BY
day;
select * from rep_poll limit 5;
select * from trump_poll limit 5;
select * from biden_poll limit 5;
show tables;
set hive.execution.engine=mr;
set hive.fetch.task.conversion=minimal;
use rp3752_nyu_edu;
show tables
;
create external table news (trump float, biden float, democrats float, republicans float, source string, day date)
row format delimited fields terminated by ','
location '/user/rp3752_nyu_edu/project/hive/news'
TBLPROPERTIES ("skip.header.line.count"="1");
select * from news limit 5;
SELECT
day,
AVG(trump) AS trump,
AVG(biden) AS biden,
AVG(democrats) AS democrats,
AVG(republicans) AS republicans
FROM
news
GROUP BY
day
ORDER BY
day;
CREATE TABLE IF NOT EXISTS hive_news AS
SELECT
day,
AVG(trump) AS trump,
AVG(biden) AS biden,
AVG(democrats) AS democrats,
AVG(republicans) AS republicans
FROM
news
GROUP BY
day
ORDER BY
day;
select * from hive_news;
history
;
ls
;
pwd;