This repository was archived by the owner on Oct 25, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgeofunctions.py
120 lines (110 loc) · 4.23 KB
/
geofunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# load all of the geometries
import fiona
from shapely.geometry import Point,Polygon,MultiPolygon
from shapely import speedups
speedups.enable()
# we'll use most of these
from json import loads
import codecs
import datetime
# import re
# import numpy
# from labMTsimple.storyLab import *
import sys
def loadCities():
c = fiona.open('shapefiles/cb_2013_us_ua10_500k.shp','r')
polygonList = []
nameList = []
polygonCount = 0
multiPolygonCount = 0
for city in list(c):
nameList.append(city['properties']['NAME10'])
if city['geometry']['type'] == 'Polygon':
polygonCount += 1
coordinates = city['geometry']['coordinates']
polygonList.append(Polygon(coordinates[0]))
elif city['geometry']['type'] == 'MultiPolygon':
multiPolygonCount += 1
coordinates = city['geometry']['coordinates']
coordinates_w_holes = [(tuple(c[0]),()) if len(c) == 1 else (tuple(c[0]),(c[1:])) for c in coordinates]
polygonList.append(MultiPolygon(coordinates_w_holes))
else:
raise('unknown geometry ' % city['geometry']['type'])
print('done loading')
return polygonList,nameList
def loadGeneric(shapefile,key="NAME10"):
"""Load any shapefile.
Ex usage:
park_polygon_list,park_name_list = loadGeneric("shapefiles/nps_boundary.shp",key="UNIT_NAME"):"""
c = fiona.open(shapefile,'r')
polygonList = []
nameList = []
polygonCount = 0
multiPolygonCount = 0
for city in list(c):
nameList.append(city['properties'][key])
if city['geometry']['type'] == 'Polygon':
polygonCount += 1
coordinates = city['geometry']['coordinates']
polygonList.append(Polygon(coordinates[0]))
elif city['geometry']['type'] == 'MultiPolygon':
multiPolygonCount += 1
coordinates = city['geometry']['coordinates']
coordinates_w_holes = [(tuple(c[0]),()) if len(c) == 1 else (tuple(c[0]),(c[1:])) for c in coordinates]
polygonList.append(MultiPolygon(coordinates_w_holes))
else:
raise('unknown geometry ' % city['geometry']['type'])
print('done loading')
return polygonList,nameList
def cityID(polygonList,pt):
for i,city in enumerate(polygonList):
if city.contains(pt):
return i
return -1
def gzipper(polygonList,nameList,outfile,folder="citytweets"):
lineCount = 0
tweetCount = 0
geoCount = 0
geoLocatedCount = 0
f = sys.stdin
for line in f:
lineCount += 1
try:
tweet = loads(line)
tweetCount +=1
except:
print("failed to load a tweet")
geo_tweet_bool = False
myCityCoords = []
try:
if tweet['coordinates']:
myCityCoords = tweet['coordinates']['coordinates']
geo_tweet_bool = True
elif tweet['geo']:
myCityCoords = tweet['geo']['coordinates']
geo_tweet_bool = True
except:
pass
if geo_tweet_bool:
geoCount += 1
myPt = Point(myCityCoords)
myCityID = cityID(polygonList,myPt)
if myCityID > -1:
# print(nameList[myCityID])
geoLocatedCount += 1
f = codecs.open('{0}/{1}/{2}.txt'.format(folder,myCityID,outfile),'a','utf8')
f.write("{0}\t{1}\t{2}\t".format(tweet['user']['id'],myCityCoords[0],myCityCoords[1]))
# tweettext = unicode(tweet['text'])
# f.write(tweet['text']) # .replace('\n',' ').replace('\t',' '))
f.write(tweet['text'].replace('\n',' ').replace('\t',' '))
f.write("\n")
f.close()
else:
pass
print('read {0} lines, {1} tweets, and classified {3} of {2} geotweets'.format(lineCount,tweetCount,geoCount,geoLocatedCount))
if __name__ == '__main__':
date = datetime.datetime.strptime(sys.argv[1],'%Y-%m-%d')
# polygonList,nameList = loadCities()
# polygonList,nameList = loadCities()
polygonList,nameList = loadGeneric("shapefiles/nps_boundary.shp",key="UNIT_NAME")
gzipper(polygonList,nameList,date.strftime('%Y-%m-%d'),folder="parktweets")