-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsfpd.py
78 lines (62 loc) · 1.89 KB
/
sfpd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
'''SFPD data'''
import os, re
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import misc
basedir = '/Users/robert/Documents/Code/OpenData'
def matchdrug(descript):
'''Descide which drug was involved'''
drug_regexp = dict(Amphetamine = '\sAMPHETAMINE', Base = 'BASE/ROCK', Cocaine = '(?<!ROCK )COCAINE', \
Hallucinogen = 'HALLUCINO', Heroin = 'HEROIN', Marijuana = 'MARIJUANA', \
Meth = 'METH', Methadone = 'METHADONE', Opiates = 'OPI[A|U]')
match = 'NA'
for i, j in drug_regexp.iteritems():
if len(re.findall(j, descript))>0:
match = i
return match
def load():
df = pd.read_csv(os.path.join('SFPD_Incidents_-_Previous_Three_Months.csv'))
# '''make bag of words from description field'''
# df1 = df[df.Category=='DRUG/NARCOTIC']
# words1 = ' '.join(df.Descript)
# words = words1.split(' ')
# uwords = np.unique(words)
'''Add drug field to data frame'''
drug = []
for i in df.Descript:
drug.append(matchdrug(i))
df['Drug'] = drug
return df
def plot_loc_by_drug(df):
udrug = ['Cocaine', 'Marijuana']
colors = 'br'
fig = plt.figure();
ax = fig.add_subplot(111);
for i, drug in enumerate(udrug):
df_ = df[df.Drug==drug]
ax.plot(df_.X, df_.Y, '.', ms = 2, color = colors[i], label = drug)
# ax.set_aspect('equal')
ax.legend();
def hist2_loc_by_drug(df):
df = df[df.Drug!='NA']
fig = plt.figure();
udrug = np.unique(df.Drug)
for i, drug in enumerate(udrug):
ax = fig.add_subplot(2, 4, i+1)
df_ = df[df.Drug==drug]
ax.hist2d(df_.X, df_.Y, bins = 100)
ax.set_title(drug)
ax.set_xticklabels('')
ax.set_yticklabels('')
# ax.set_aspect('equal')
def plot_drugs_by_dist(df):
fig = plt.figure();
ax = []
udist = np.unique(df.PdDistrict)
for i, dist in enumerate(udist):
df_ = df[df.PdDistrict==dist]
ax.append(fig.add_subplot(2, 5, i+1))
df_.Drug.value_counts()[:5].plot(kind = 'bar', ax = ax[-1])
ax[-1].set_title(dist)
misc.sameyaxis(ax)