forked from tru-hy/gtfs_shape_mapfit
-
Notifications
You must be signed in to change notification settings - Fork 3
/
gtfs_stop_cleaner.py
executable file
·89 lines (77 loc) · 2.97 KB
/
gtfs_stop_cleaner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/python2
from imposm.parser import OSMParser
from common import read_gtfs_stops
from collections import OrderedDict, defaultdict
import sys
from math import radians, cos, sin, asin, sqrt
import csv
# Stolen from http://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points
def haversine((lat1, lon1), (lat2, lon2)):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
# 6367 km is the radius of the Earth
km = 6367 * c
return km*1000.0
def read_osm_stops(osmfile):
stop_coords = defaultdict(list)
def handle_node(node):
if (node[1].get('highway') != 'bus_stop' and
node[1].get('railway') != 'tram_stop'):
return
if 'ref' not in node[1] and 'ref:findr' not in node[1]: return
if 'ref' in node[1]:
ref = node[1]['ref']
if not ref: return
stop_coords[('code',ref)].append((node[2],('ref',node[1]['ref'])))
if 'ref:findr' in node[1]:
reffindr = node[1]['ref:findr']
if not reffindr: return
stop_coords[('id',reffindr)].append((node[2],('ref:findr',node[1]['ref:findr'])))
def handle_nodes(nodes):
for node in nodes:
handle_node(node)
OSMParser(nodes_callback=handle_nodes).parse(osmfile)
return stop_coords
def fit_gtfs_stops(osmfile, stopsfile, distance_threshold=200.0):
osmstops = read_osm_stops(osmfile)
reader = read_gtfs_stops(open(stopsfile))
names = reader.tupletype._fields
writer = csv.writer(sys.stdout)
writer.writerow(names)
#import matplotlib.pyplot as plt
position_errors = []
for stop in reader:
row = OrderedDict(zip(names, stop))
candidates = []
for candidate in osmstops[('code',row['stop_code'])] + osmstops[('id',row['stop_id'])]:
lat, lon = candidate[0][::-1]
candid = candidate[1]
distance = haversine((lat, lon), (float(row['stop_lat']), float(row['stop_lon'])))
candidates.append((distance, (lat, lon),candid))
if len(candidates) > 0:
error, (lat, lon), candid = min(candidates)
position_errors.append(error)
if error < distance_threshold:
row['stop_lat'] = lat
row['stop_lon'] = lon
else:
print >>sys.stderr, "Huge error;Stop ID: %s / %s;[%s, %s];[%s, %s];%.1f;%s"%(row['stop_id'],row['stop_code'], row['stop_lon'], row['stop_lat'], lon, lat,error,candid)
else:
if row['stop_code'] or row['stop_id']:
print >>sys.stderr, "No OSM stop;Stop ID: %s / %s;[%s, %s]"%(row['stop_id'],row['stop_code'], row['stop_lon'], row['stop_lat'])
writer.writerow(row.values())
sys.stdout.flush()
#plt.hist(filter(lambda x: x < distance_threshold, position_errors), bins=50)
#plt.show()
if __name__ == "__main__":
import argh
argh.dispatch_command(fit_gtfs_stops)