-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsql_basics.sql
287 lines (222 loc) · 7.28 KB
/
sql_basics.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
--SCHEMATICS
--------------------------------------
--Create Database
CREATE DATABASE tablename
DEFAULT CHARACTER SET utf8;
--User Access
--can specific CRUD specific access rights too
GRANT ALL ON misc.* TO 'fred'@'localhost' IDENTIFIED BY 'zap';
-- Show Tables
SELECT * FROM pg_catalog.pg_tables;
--Create Schema
CREATE SCHEMA schema_name;
CREATE SCHEMA AUTHORIZATION user_name;
--Alter Existing Table
ALTER TABLE tablename RENAME fieldname1 to fieldname2
ALTER TABLE table_name ADD fieldname float;
--Create Table
CREATE table tablename (
id int not null auto_increment,
rule_id int unsigned, --unsigned = +ve
date date,
consequent varchar,
support float,
confidence float,
lift float,
support_cnt int,
primary key (id),
index(rule_id)
);
CREATE table newtable (
artist_id int not null auto_increment
artist varchar(128),
album_id int,
primary key(artist_id)
constraint foreign key(album_id) references album(album_id)
on delete cascade on update cascade,
);
--Add Primary keys & Indexes
ALTER TABLE tablename ADD PRIMARY KEY (fieldname);
CREATE INDEX indexname ON tablename (fieldname);
CREATE INDEX indexname
ON tablename (date(datetime), building_id, equipment_type, equipment_unit, sensor);
--insert
insert into operator_lookup(user_nm_domain, operator) values ('y5zwew.sg','testdata');
--update
UPDATE Customers
SET ContactName = 'Alfred Schmidt', City= 'Frankfurt'
WHERE CustomerID = 1;
UPDATE species_info --update replace multiple rows
SET column_name = REPLACE(column_name, 'Image', 'image')
--update from another table selection
INSERT INTO table2 (column1, column2, column3, ...)
SELECT column1, column2, column3, ...
FROM table1
WHERE condition;
--delete rows
DELETE FROM ts_biz_100m_ap; --delete entire table
DELETE FROM Customers
WHERE CustomerName='Alfreds Futterkiste';
UPDATE table_name
SET your_column_name = NULL; --not really delete but to purge data in a column
--query column names from a table
SELECT column_name,*
FROM information_schema.columns
WHERE table_name = 'wsg_radius_log'
--create views
CREATE VIEW view_name AS
select * from tablename
where column 1
--QUERIES
--------------------------------------
--select consecutive before & after rows
select *,
lead(column_nm) over(partition by column_nm2 order by column_nm3), --selects the row below the current
lead(column_nm) over(partition by column_nm2 order by column_nm3), --selects the row above the current
from table_name
--if else
select (case when column1 - column2 >= 14 then 1
when column1 - column3 = 0 then 2
else 0 end) as difference
from table_name
-- JOIN
--------------------------------------
-- normal join
select b.name
from country a
join city b on a.code=b.countrycode
where continent = 'Africa'
-- multiple joins
select a.id, name, friend_id, c.salary as salary_1, d.salary as salary_2
from students a
left join friends b on a.id=b.id
left join packages c on a.id=c.id
left join packages d on b.friend_id=d.id
-- join using where clause
select a.name, b.grade, a.marks
from students a, grades b
where a.marks between b.min_Mark and b.Max_Mark --joined using a between
--CONVERT DATA TYPE
--------------------------------------
--simple conversion (POSTGRES, VERTICA)
select column1::date, column2::int
--using CAST (IMPALA)
select CAST('2016-02-03 06:37:51' as timestamp)
from table_name
select CAST(salary as int)
from table_name
--AGGREGATE FUNCTIONS
--------------------------------------
--number of rows is reduced
--must group by column names that are not aggregated in selected columns
--"where" clause is change to "having" when using group by
select sum(sales), salesperson
from tableName
group by salesperson
having sum(sales) > 1000
--ANALYTIC FUNCTIONS
--------------------------------------
--no change in total rows
select sum(sales) over (partition by salesperson order by column_nm)
from tableName
--DATETIME
--------------------------------------
--CONVERT TO DATETIME
--from EPOCH to TIMESTAMP (IMPALA)
select to_timestamp(1456314748)
--from TIMESTAMP to DATE (IMPALA)
select to_date(now())
--from string to TIMESTAMP
select CAST('2016-02-03 06:37:51' as timestamp)
--fixed constant parts for datetime
select to_char(starttime, '1900-01-01 HH24:00:00')::timestamp --(POSTGRES)
select cast(concat('1991-01-01', ' ', from_unixtime(1392394861, 'HH'), ':00:00') as timestamp) --(IMPALA)
--DATETIME DELTA (ADD/DIFF)
select now() - interval '15 minutes' -- (POSTGRES)
select now() - interval 15 minutes -- (IMPALA)
select datediff(day, startdate, enddate)
--TRUNCATE
select trunc(starttime, 'dd')
--EXTRACT (POSTGRES, IMPALA)
select extract(now(), 'year');
--2017
-- SUBQUERY
--------------------------------------
-- WITH CLAUSE
--easily readable and recommended for initial scripting. however performance issues, so need to change to real subquery after coding
with table1 as (select *
from tablenm
where column1 = 'stop')
select *
from table1
with table1 as (select * from tablenm where col1 > 10),
table2 as (select * from tablenm where col2 = 'sex')
select * from table1 a
join table2 b on a.col1 = b.col1
-- SUBQUERY
--very difficult to interpret when query is long. But performance is optimised.
select *
from (select *
from tablenm
where column1 = 'stop') a
-- WHERE
--------------------------------------
-- using IN clause for multiple values
SELECT column_name(s)
FROM table_name
WHERE column_name IN (value1, value2, ...)
-- ORDER
--------------------------------------
-- default order is ascending
select *
from table_name
order by name asc, marks desc
-- indicate col number instead
select name, sex, salary
from table_name
order by 1
-- ROUNDING FLOATS
--------------------------------------
-- round to 2 decimal place
select continent, round(avg(population), 2)
from country
group by continent
-- round down
select continent, floor(avg(population))
from country
group by continent
-- round up
select continent, ceil(avg(population))
from country
group by continent
-- NEW COLUMNS
--------------------------------------
-- row number
select row_number() over(order by name), name from occupations
-- lead & lag
select hacker_id, name, cnt,
lag(cnt) over(order by cnt desc, hacker_id asc) as lag,
lead(cnt) over(order by cnt desc, hacker_id asc) as lead
from table1
--USEFUL QUERIES
--------------------------------------
-- MODULUS
select distinct city
from station
where id %2=0 --only even numbers
-- show # duplicate rows
select column1, column2, count(*)-1 as duplicate_cnt
from wsg_radius_log
group by column1, column2
having count(*) > 0
-- delete duplicated rows
DELETE users
WHERE rowid NOT IN (
-- set a row_number
with rownm as (select row_number() over() as rowid, *
from tablename)
-- only keep a unique, non-duplicate row defined by min. rowid
select min(rowid)
from rownm
GROUP BY date, file_name -- add all columns necessary to define duplicates
order by 2, 3) a