-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsql_project.sql
194 lines (127 loc) · 4.83 KB
/
sql_project.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
-- Cleaning Data in SQL Queries
SELECT * FROM [dbo].[Nashvillahousing]
----------------------------------------------------------
-- Standardize Date Format
SELECT SaleDateConvert,CAST(SaleDate AS date) As SaleDate FROM dbo.Nashvillahousing
--Add one Extra Column
ALTER TABLE dbo.Nashvillahousing
ADD SaleDateConvert Date;
--Then Put Convert SalesDate Values
UPDATE dbo.Nashvillahousing
SET SaleDateConvert = CAST(SaleDate AS date)
------------------------------------------------------------
-- Populate Property Address Data
SELECT * FROM dbo.Nashvillahousing
--WHERE PropertyAddress IS NULL
ORDER BY ParcelID
SELECT a.ParcelID,a.PropertyAddress,b.ParcelID,b.PropertyAddress, ISNULL(a.PropertyAddress,b.PropertyAddress)
FROM dbo.Nashvillahousing a
JOIN dbo.Nashvillahousing b ON a.ParcelID = b.ParcelID
AND a.[UniqueID ] <> b.[UniqueID ]
WHERE a.PropertyAddress IS NULL
UPDATE a
SET PropertyAddress = ISNULL(a.PropertyAddress,b.PropertyAddress)
FROM dbo.Nashvillahousing a
JOIN dbo.Nashvillahousing b ON a.ParcelID = b.ParcelID
AND a.[UniqueID ] <> b.[UniqueID ]
WHERE a.PropertyAddress IS NULL
---------------------------------------------------
--Breaking out PropertyAddress into Individual Columns (Address, City, State)
SELECT
SUBSTRING(a.PropertyAddress,1, CHARINDEX(',', a.PropertyAddress) - 1) AS Address,
SUBSTRING(a.PropertyAddress, CHARINDEX(',', a.PropertyAddress) + 1, LEN(PropertyAddress)) AS City
FROM dbo.Nashvillahousing a
--Add Column in table name is PropertySplitAddress
ALTER TABLE Nashvillahousing
Add PropertySplitAddress Nvarchar(255);
--Update Column name is PropertySplitAddress
UPDATE Nashvillahousing
SET PropertySplitAddress = SUBSTRING(PropertyAddress,1, CHARINDEX(',', PropertyAddress) - 1);
--Add Column in table name is PropertySplitCity
ALTER TABLE Nashvillahousing
ADD PropertySplitCity NVARCHAR(255);
--Update Column name is PropertySplitCity
UPDATE Nashvillahousing
SET PropertySplitCity = SUBSTRING(PropertyAddress, CHARINDEX(',', PropertyAddress) + 1, LEN(PropertyAddress));
SELECT
PARSENAME(REPLACE(n.OwnerAddress, ',', '.'), 3) AS OwnerAddress,
PARSENAME(REPLACE(n.OwnerAddress, ',', '.'), 2) AS OwnerCity,
PARSENAME(REPLACE(n.OwnerAddress, ',', '.'), 1) AS Ownerstate
FROM dbo.Nashvillahousing n
--Add Column in table name is OwnersplitAddress
ALTER TABLE Nashvillahousing
ADD OwnersplitAddress NVARCHAR(255);
--Update Data in Column name is OwnersplitAddress
UPDATE Nashvillahousing
SET OwnersplitAddress = PARSENAME(REPLACE(OwnerAddress, ',', '.'), 3)
--Add Column in table name is OwnersplitCity
ALTER TABLE Nashvillahousing
ADD OwnersplitCity NVARCHAR(255);
--Update Data in Column name is OwnersplitCity
UPDATE Nashvillahousing
SET OwnersplitCity = PARSENAME(REPLACE(OwnerAddress, ',', '.'), 2)
--Add Column in table name is OwnersplitState
ALTER TABLE Nashvillahousing
ADD OwnersplitState NVARCHAR(255);
--Update Data in Column name is OwnersplitState
UPDATE Nashvillahousing
SET OwnersplitState = PARSENAME(REPLACE(OwnerAddress, ',', '.'), 1)
----------------------------------------------------------------------------
--Change Y and N to Yes and No in "Sold as Vacant" field
Select Distinct(SoldAsVacant), Count(SoldAsVacant) AS total_count
From dbo.Nashvillahousing
Group by SoldAsVacant
order by 2
SELECT DISTINCT(SoldAsVacant)
FROM Nashvillahousing
--There Two ways to Update N as NO and Y as Yes in Column
SELECT SoldAsVacant, CASE
WHEN SoldAsVacant ='Y' THEN 'Yes'
WHEN SoldAsVacant = 'N' THEN 'No'
ELSE SoldAsVacant
END AS Sold_as_Vacant
FROM Nashvillahousing n
WHERE n.SoldAsVacant ='Y' OR n.SoldAsVacant ='N'
--Frist Update Query
UPDATE Nashvillahousing
SET SoldAsVacant = 'No'
WHERE SoldAsVacant = 'N'
UPDATE Nashvillahousing
SET SoldAsVacant = 'Yes'
WHERE SoldAsVacant = 'Y'
--Second Update Query
UPDATE Nashvillahousing
SET SoldAsVacant = CASE
WHEN SoldAsVacant ='Y' THEN 'Yes'
WHEN SoldAsVacant = 'N' THEN 'No'
ELSE SoldAsVacant
END
FROM Nashvillahousing n
--------------------------------------------------------------------------------
--Remove Duplicates
WITH RowNumCTE AS(
Select *,
ROW_NUMBER() OVER (
PARTITION BY ParcelID,
PropertyAddress,
SalePrice,
SaleDate,
LegalReference
ORDER BY
UniqueID
) row_num
From Nashvillahousing
--order by ParcelID
)
--SELECT all duplicates
Select *
From RowNumCTE
Where row_num <> 1
Order by PropertyAddress
--Delete Duplicate Rows in table
DELETE From RowNumCTE
Where row_num <> 1
------------------------------------------------------------------------
-- Delete Unused Columns
ALTER TABLE Nashvillahousing
DROP COLUMN OwnerAddress, TaxDistrict, PropertyAddress, SaleDate