-
Notifications
You must be signed in to change notification settings - Fork 0
/
County_check.py
33 lines (26 loc) · 1.01 KB
/
County_check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#Checking Counties in the Data Set within the source 'Tiger' to make sure all
#data is for the Seattle region only and is in the correct format.
import xml.etree.ElementTree as ET
import re
OSM_FILE = "sample.osm"
file = open(OSM_FILE, "r")
colon_search = re.compile(r'[:]')
semi_colon_search = re.compile(r'[;]')
def check_county (elem):
if(colon_search.search(elem.attrib['v'])):
vals = elem.attrib['v'].split(':')
print("Before: " + elem.attrib['v'])
print("After: " + vals[0])
elif(semi_colon_search.search(elem.attrib['v'])):
vals = elem.attrib['v'].split(';')
print("Before: " + elem.attrib['v'])
print("After: " + vals[0])
for event, elem in ET.iterparse(file):
if(elem.tag == 'way'):
for sub_elem in elem.iter('tag'):
if(sub_elem.attrib['k']=='tiger:county'):
check_county(sub_elem)
'''
The function "check_county" performs that funtion. A modified version of this
function has been added in the file "Cleaning.py".
'''