-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathAnalyzing 311 call data in NYC.py
87 lines (49 loc) · 1.92 KB
/
Analyzing 311 call data in NYC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>
# <codecell>
import pandas as pd
font = {'family' : 'normal',
'weight' : 'bold',
'size' : 22}
matplotlib.rc('font', **font)
pd.set_option('display.mpl_style', 'default')
figsize(15, 6)
pd.set_option('display.line_width', 4000)
pd.set_option('display.max_columns', 100)
orig_data = pd.read_csv('./311-service-requests.csv', nrows=100000, parse_dates=['Created Date'])
orig_data['Street Name'].fillna("", inplace=True) # This is replacing missing street names
# <headingcell level=1>
# Plotting all complaints by locations gives us a crude map of the city
# <codecell>
plot(orig_data['Longitude'], orig_data['Latitude'], '.', color='blue')
# <headingcell level=1>
# Have a quick look at what people complain about
# <codecell>
orig_data['Complaint Type'].value_counts()[:20].plot(kind='bar')
plt.title('Complaints')
# <headingcell level=1>
# Complaints by borough
# <codecell>
orig_data['Borough'].value_counts()[:20].plot(kind='bar')
# <headingcell level=1>
# Complaints by zip
# <codecell>
orig_data['Incident Zip'].value_counts()[:20].plot(kind='bar')
# <headingcell level=1>
# Ok Lets now look at rodent complaints by borough
# <codecell>
rodent_complaints = orig_data[orig_data['Complaint Type'] == 'Rodent']
rodent_complaints['Borough'].value_counts().plot(kind='bar')
# <headingcell level=1>
# Lets take a look at complaints at my neighborhood
# <codecell>
my_zip = orig_data[orig_data['Incident Zip'].str.contains('11220').fillna(False)]
my_zip['Complaint Type'].value_counts()[:20].plot(kind='bar')
# <headingcell level=1>
# Lets compare that to the worlds wealthiest zip code
# <codecell>
rich_people_zip = orig_data[orig_data['Incident Zip'].str.contains('10065').fillna(False)]
rich_people_zip['Complaint Type'].value_counts()[:20].plot(kind='bar')
# <codecell>
a = orig_data[orig_data['Incident Zip'].str.contains('10065').fillna(False)]
# <codecell>