-
Notifications
You must be signed in to change notification settings - Fork 1
/
Hibbett.py
89 lines (85 loc) · 4.19 KB
/
Hibbett.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
'''-------------------------------------------------------------------------
Name: Hibbett's Locations Scraper
Description: The Script scrapes all Hibbett locations from
the website- all links listed by state
State Links are pulled into a text file
Addresses are then scraped from each state
Does not have auto-updating capability yet
Date: 12/12/2017
Author: pverma
-------------------------------------------------------------------------'''
from selenium import webdriver
import time
import csv
import pandas
_storesCurrentRun = pandas.read_csv('HibbettsStores.csv',header=0) #Read all stores from previous run into dataframe
collectedAddress = _storesCurrentRun['Address'].tolist() #Addresses from df for comparison
with open('HibbettStores.csv', 'ab') as file:
scrapedLocations = [] # Avoid duplicate addresses
writer = csv.writer(file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['Address', 'City', 'State', 'Zipcode'])
# driver = webdriver.PhantomJS(executable_path='C:/Users/u760979/Desktop/Python/Executables/phantomjs.exe')
driver = webdriver.Firefox(executable_path='C:/Users/u760979/Desktop/Python/Executables/geckodriver.exe')
url = 'https://www.hibbett.com/storedirectory' #url for map
driver.get(url) #retrieve url
time.sleep(5)
'''---Get links from the state page---'''
StatelinksFile = open('StatelinksFile.txt', 'w')
linksList= driver.find_elements_by_xpath('//*[@id="primary"]/ul/li[*]/a') #Links
for l in linksList: #Get all
link = l.get_attribute('href')
print link
StatelinksFile.write("%s\n" % link) #Write links to file
StatelinksFile.close()
time.sleep(20)
'''---Get links from the City page---'''
stateLinkList = open("StatelinksFile.txt").readlines() # Reads all links into a list
CitylinksFile = open('CitylinksFile.txt', 'w')
for sLink in stateLinkList:
citydriver = webdriver.Firefox(executable_path='C:/Users/u760979/Desktop/Python/Executables/geckodriver.exe')
citydriver.get(sLink) # retrieve url
time.sleep(5)
linksList = citydriver.find_elements_by_xpath('//*[@id="primary"]/ul/li[*]/a') # Link Element
for l in linksList: # Get all
link = l.get_attribute('href')
print link
CitylinksFile.write("%s\n" % link) # Write links to file
citydriver.quit()
CitylinksFile.close()
'''---Get locations from links---'''
storeLinkList = open("Citylinks3.txt").readlines() #Reads all links into a list
for link in storeLinkList:
try:
storeDriver = webdriver.Firefox(executable_path='C:/Users/u760979/Desktop/Python/Executables/geckodriver.exe')
# storeDriver = webdriver.PhantomJS(executable_path='C:/Users/u760979/Desktop/Python/Executables/phantomjs.exe')
storeDriver.get(link) # retrieve url
time.sleep(15)
Location = storeDriver.find_elements_by_class_name('name')
fullAddress = storeDriver.find_elements_by_class_name('address') #get all info
#Process addresses
if len(fullAddress) > 1:
for l,a in zip(Location,fullAddress):
location = l.text
a = a.text
Address = a.split('\n')[0]
cityStateZip = a.split('\n')[1]
City = cityStateZip.split(',')[0]
stateZip = cityStateZip.split(',')[1]
State = stateZip[-8:-6]
Zipcode = stateZip[-5:]
if Address not in scrapedLocations: #Check if Location was pulled
scrapedLocations.append(Address)
writer.writerow([location,Address,City,State,Zipcode]) #write to file
print location,Address,City,State,Zipcode
file.flush()
storeDriver.quit()
time.sleep(15)
else:
storeDriver.quit()
time.sleep(15)
continue
except BaseException as e:
print str(e)
storeDriver.quit()
print "Error in: ", link
continue