-
Notifications
You must be signed in to change notification settings - Fork 0
/
lkq.py
53 lines (38 loc) · 1.82 KB
/
lkq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
"""
This file scrapes the html from lkq car
listings. It iterates through a list of cities, grabs the info from each
respective page, and exports the cleaned data into csv file. It then inserts
the data from those CSVs into my PSQL DB.
"""
from lkq_utils import Lkq_insight as Lkq
browser = Lkq.create_browser()
for location in Lkq.locations:
print(f'Starting process for {Lkq.locations[location]}')
Lkq.navigate_browser(browser, 'honda insight', location)
market_soup = Lkq.get_html(browser)
(title_list, url_list, color_list, vin_list, section_list, stocknum_list,
available_list, img_list) = Lkq.parse_html(market_soup)
if len(title_list) == 0:
print(f'No Listings for {Lkq.locations[location]}')
if location == 'milwaukee-1256':
browser.quit()
continue
clean_title_list = Lkq.clean_titles(title_list)
clean_url_list = Lkq.clean_urls(url_list)
clean_color_list = Lkq.clean_colors(color_list)
clean_vin_list = Lkq.clean_vins(vin_list)
(clean_section_list, clean_row_list,
clean_space_list) = Lkq.clean_sections(section_list)
clean_stocknum_list = Lkq.clean_stocknums(stocknum_list)
clean_available_date_list = Lkq.clean_availables(available_list)
clean_img_list = Lkq.clean_imgs(img_list)
vehicles_list = Lkq.organize_data(Lkq.locations[location], clean_title_list, clean_url_list,
clean_color_list, clean_vin_list,
clean_section_list, clean_row_list,
clean_space_list, clean_available_date_list,
img_list)
Lkq.data_to_csv(vehicles_list, location)
print(f'Successfully Scraped for {Lkq.locations[location]}')
if location == 'milwaukee-1256':
browser.quit()
Lkq.csv_to_db()