-
Notifications
You must be signed in to change notification settings - Fork 1
/
Starbucks_LocationCollector.py
74 lines (63 loc) · 2.6 KB
/
Starbucks_LocationCollector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# coding=utf-8
import csv
from contextlib import closing
from selenium.webdriver import Firefox
from selenium.webdriver.support.ui import WebDriverWait
from BeautifulSoup import BeautifulSoup
import time
from selenium import webdriver
print 'Start time: ',time.strftime('%X %x %Z')
stores = [] #for not repeating the ones that are written through the script run
with open('Existing.txt', 'r') as ExistingLocs:
ExistingLocations = [line.strip() for line in ExistingLocs]#for stores already existing through previous run
ExistingLocs.close()
with open('zip.txt', 'r') as zipfile:
USZips = [line.strip() for line in zipfile]
zipfile.close()
with open('StarbucksLocations_5.csv', 'wb') as file:
writer = csv.writer(file, delimiter=',')
writer.writerow(['StoreName', 'Address', 'City', 'State','Zip'])
print 'Headings Written'# column headings
for zip in USZips:
time.sleep(25)
print 'Last Zip Processed: ',zip
url = 'http://www.starbucks.com/store-locator/search/location/' + zip
driver = webdriver.PhantomJS()
browser = driver.get(url)
# wait for the page to load
# store it to string variable
page_source = driver.page_source
soup = BeautifulSoup(page_source)
for link in soup.find(id="searchResults"):
line = str(link)
line = line.split('>')
'''Parse Results'''
# Storename
storename = line[2]
storename = storename.replace('</h2', "")
storename = storename.replace('&', "&")
if storename not in ExistingLocations and storename not in stores:
print 'Writing Location'
stores.append(storename)
#Address
Address = line[5]
Address = Address.replace('</li',"")
#City,State,Zip
City = line[9]
City = City.replace('</li',"")
City = str(City)
FCity = City.split(',')
City = FCity[0]
StateZip= (FCity[1])
StateZip = StateZip.replace(' ',",")
StateZip = StateZip.split(',')
State = StateZip[1]
Zip = StateZip[2]
#print storename,City,StateZip
writer.writerow([storename, Address,City, State,Zip])
file.flush()
#sys.exit()
#except:
# sys.exit()
file.close()
print 'End time: ',time.strftime('%X %x %Z')