-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.py
76 lines (55 loc) · 2.07 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#Python Graphics Card Web Scraper by Daniel Kantor (2021)
#Compatible with Newegg website
from urllib.request import urlopen
from bs4 import BeautifulSoup
import csv
url_scrape = input("Enter Newegg website address: ")
print("\n")
#Example link: "https://www.newegg.com/Desktop-Graphics-Cards/SubCategory/ID-48?Tid=7709"
request_page = urlopen(url_scrape)
page_html = request_page.read()
request_page.close()
html_soup = BeautifulSoup(page_html, 'html.parser')
filename = 'products.csv'
f = open(filename, 'w')
headers = 'Brand, Chipset, Price, Number of Ratings, Title \n'
f.write(headers)
for graphics_cards in html_soup(attrs="item-container"):
#Offical title of product
title_get = graphics_cards.find('a', class_="item-title")
if title_get is not None:
title = title_get.text
title = title.replace(',','')
else:
title = "Unknown"
#Splits offical title
title_list = title.split(" ")
array_length = len(title_list)
#Checks number of ratings
num_rating_get = graphics_cards.find('a', class_="item-rating")
if num_rating_get is not None:
num_rating = num_rating_get.text
else:
num_rating = "Unknown"
chipset = ""
#Checks chipset
for i in range(array_length):
if title_list[i] == 'GTX' or title_list[i] == 'RTX' or title_list[i] == 'GT' or title_list[i] == 'RX':
chipset = title_list[i] + title_list[i + 1]
#Finds price
price_get = graphics_cards.find('li', class_="price-current")
if price_get is not None:
price = price_get.text
price = price.replace(',','')
else:
price = "Unknown"
f.write(title_list[0] + "," + chipset + ',' + price + "," + num_rating + "," + title + "\n")
f.close()
with open("products.txt", "w") as my_output_file:
with open("products.csv", "r") as my_input_file:
[ my_output_file.write(" ".join(row)+'\n') for row in csv.reader(my_input_file)]
my_output_file.close()
u = open("products.txt", "r")
print(u.read())
u.close()
input("----------------------------------\nPress Enter to exit the program")