Skip to content

Commit

Permalink
add proxy manager in scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
krankir committed Jul 14, 2023
1 parent 60b2981 commit 6258f00
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 1 deletion.
1 change: 1 addition & 0 deletions mvideo_scrap_bot/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ uvicorn==0.22.0
vine==5.0.0
wcwidth==0.2.6
yarl==1.9.2
proxy-manager-g4==1.1.0
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ uvicorn==0.22.0
vine==5.0.0
wcwidth==0.2.6
yarl==1.9.2
proxy-manager-g4==1.1.0
1 change: 1 addition & 0 deletions scrap_1_hour/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ uvicorn==0.22.0
vine==5.0.0
wcwidth==0.2.6
yarl==1.9.2
proxy-manager-g4==1.1.0
2 changes: 1 addition & 1 deletion scrap_data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@

class Item(BaseModel):
modelName: str
description: str
description: str = 'Описание отсутствует'
rating: dict[str, float]
10 changes: 10 additions & 0 deletions scrap_data/scrap_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@
cookies, headers, cookies_price, headers_price,
)
from scrap_data.models import Item
from proxy_manager_g4 import ProxyManager
from proxy_manager_g4.consts import PROTOCOL_HTTPS


proxy_manager = ProxyManager(protocol=PROTOCOL_HTTPS, anonymity=True)

pr = proxy_manager.get_random()
proxy = {pr.ip: pr.port}


class ScrapDataProduct:
Expand All @@ -29,6 +37,7 @@ def scrap(self):
cookies=cookies,
headers=headers,
timeout=10,
proxies=proxy
)
products_infos = Item.parse_obj(response.json()['body'])
data_dict = {
Expand All @@ -55,6 +64,7 @@ def scrap_price(self):
cookies=cookies_price,
headers=headers_price,
timeout=10,
proxies=proxy
)
price = (response_pr.json()).get('body').get('materialPrices')[0].get(
'price').get('salePrice')
Expand Down

0 comments on commit 6258f00

Please sign in to comment.