diff --git a/main.py b/main.py index 16bb706e..4eb56b2e 100644 --- a/main.py +++ b/main.py @@ -36,7 +36,8 @@ def scrape(): # Create instances of class "Scraper" products = [ - scraper.Scraper(category, url) for category, url in zip(products_df["category"], products_df["url"]) + scraper.Scraper(category, url) + for category, url in zip(products_df["category"], products_df["url"]) ] # Scrape and save scraped data for each product (sequentially) @@ -52,13 +53,12 @@ def scrape_with_threads(): # Create instances of class "Scraper" products = [ - scraper.Scraper(category, url) for category, url in zip(products_df["category"], products_df["url"]) + scraper.Scraper(category, url) + for category, url in zip(products_df["category"], products_df["url"]) ] # Create threads - threads = [ - threading.Thread(target=product.scrape_info) for product in products - ] + threads = [threading.Thread(target=product.scrape_info) for product in products] # Start scraping on all threads for thread in threads: @@ -123,7 +123,9 @@ def hard_reset(): if __name__ == "__main__": logging.config.fileConfig( fname=f"{scraper.Filemanager.root_path}/scraper/logging.ini", - defaults={"logfilename": f"{scraper.Filemanager.root_path}/scraper/logfile.log"} + defaults={ + "logfilename": f"{scraper.Filemanager.root_path}/scraper/logfile.log" + }, ) main() diff --git a/scraper/visualize.py b/scraper/visualize.py index 65bdb69a..b1367cc3 100644 --- a/scraper/visualize.py +++ b/scraper/visualize.py @@ -5,147 +5,178 @@ def show_id(id: str) -> None: print(f"Visualizing product with id: {id}") - data = format_data() - for category_name, category_info in data.items(): - for product_name, product_info in category_info.items(): - for website_name, website_info in product_info.items(): - if id == str(website_info["id"]): - fig = go.Figure() - fig.add_trace( - go.Scatter( - name=website_name.capitalize(), - x=website_info["dates"], - y=website_info["prices"], - line={"color": WEBSITE_COLORS[website_name], "width": 2}, - hovertemplate="Price: %{y:.0f}", - ) - ) - - fig.update_traces(mode="markers+lines") - fig.update_layout( - title=f"Price(s) of {product_name.upper()}", - xaxis_title="Date", - yaxis_title="Price", - hovermode="x", - separators=".,", - ) - - fig.show() - return + product_data = get_product_with_id(id) + product_name = product_data["name"] + product_info = product_data["info"] -def show_category(category: str) -> None: - print(f"Visualizing products in category: {category}") - data = format_data() + fig = go.Figure() + add_scatter_plot( + fig, + product_info["website_name"], + str(product_info["id"]), + product_info["currency"], + product_info["dates"], + product_info["prices"], + ) - for category_name, category_info in data.items(): - if category == category_name: - for product_name, product_info in category_info.items(): - fig = go.Figure() - for website_name, website_info in product_info.items(): - fig.add_trace( - go.Scatter( - name=website_name.capitalize(), - x=website_info["dates"], - y=website_info["prices"], - line={"color": WEBSITE_COLORS[website_name], "width": 2}, - hovertemplate="Price: %{y:.0f}", - ) - ) - - fig.update_traces(mode="markers+lines") - fig.update_layout( - title=f"Price(s) of {product_name.upper()}", - xaxis_title="Date", - yaxis_title="Price", - hovermode="x", - separators=".,", - ) + config_figure(fig, f"Price(s) of {product_name.upper()} - ID {product_info['id']}") + fig.show() + + +def show_category(category: str) -> None: + print(f"Visualizing products in category: {category.lower()}") + + for product_info in get_products_with_category(category): + product_name = product_info["name"] + fig = go.Figure() + + for website_info in product_info["websites"]: + add_scatter_plot( + fig, + website_info["website_name"], + str(website_info["id"]), + website_info["currency"], + website_info["dates"], + website_info["prices"], + ) - fig.show() - return + config_figure(fig, f"Price(s) of {product_name.upper()}") + fig.show() def show_name(name: str) -> None: - print(f"Visualizing product with name: {name}") - data = format_data() + print(f"Visualizing product with name: {name.lower()}") - for category_name, category_info in data.items(): - for product_name, product_info in category_info.items(): - if name == product_name: - fig = go.Figure() - for website_name, website_info in product_info.items(): - fig.add_trace( - go.Scatter( - name=website_name.capitalize(), - x=website_info["dates"], - y=website_info["prices"], - line={"color": WEBSITE_COLORS[website_name], "width": 2}, - hovertemplate="Price: %{y:.0f}", - ) - ) - - fig.update_traces(mode="markers+lines") - fig.update_layout( - title=f"Price(s) of {product_name.upper()}", - xaxis_title="Date", - yaxis_title="Price", - hovermode="x", - separators=".,", - ) + product_info = get_product_with_name(name) - fig.show() - return + fig = go.Figure() + for website_info in product_info["websites"]: + add_scatter_plot( + fig, + website_info["website_name"], + str(website_info["id"]), + website_info["currency"], + website_info["dates"], + website_info["prices"], + ) + + config_figure(fig, f"Price(s) of {name.upper()}") + fig.show() def show_all_products() -> None: print("Visualizing all products") - data = format_data() - for category_name, category_info in data.items(): - for product_name, product_info in category_info.items(): - fig = go.Figure() - for website_name, website_info in product_info.items(): - fig.add_trace( - go.Scatter( - name=website_name.capitalize(), - x=website_info["dates"], - y=website_info["prices"], - line={"color": WEBSITE_COLORS[website_name], "width": 2}, - hovertemplate="Price: %{y:.0f}", - ) - ) - - fig.update_traces(mode="markers+lines") - fig.update_layout( - title=f"Price(s) of {product_name.upper()}", - xaxis_title="Date", - yaxis_title="Price", - hovermode="x", - separators=".,", + for product_info in get_all_products(): + fig = go.Figure() + for website_info in product_info["websites"]: + add_scatter_plot( + fig, + website_info["website_name"], + str(website_info["id"]), + website_info["currency"], + website_info["dates"], + website_info["prices"], ) - fig.show() + config_figure(fig, f"Price(s) of {product_info['name'].upper()}") + fig.show() def format_data() -> dict: records_data = Filemanager.get_record_data() - data = {} + data = {"products": []} for category_name, category_info in records_data.items(): - data.update({category_name: {}}) for product_name, product_info in category_info.items(): - data[category_name].update({product_name: {}}) - for website_name, website_info in product_info.items(): + product_data = { + "name": product_name, + "category": category_name, + "websites": [], + } + for website_name, website_info in product_info.items(): dates = [datapoint["date"] for datapoint in website_info["datapoints"]] prices = [datapoint["price"] for datapoint in website_info["datapoints"]] - id = website_info["info"]["id"] - - data[category_name][product_name].update( - {website_name: {"dates": dates, "prices": prices, "id": id}} + product_data["websites"].append( + { + "website_name": website_name, + "id": website_info["info"]["id"], + "currency": website_info["info"]["currency"], + "dates": dates, + "prices": prices, + } ) + data["products"].append(product_data) + return data + + +def config_figure(figure: go.Figure, figure_title: str) -> None: + figure.update_traces(mode="markers+lines") + figure.update_layout( + title=figure_title, + xaxis_title="Date", + yaxis_title="Price", + hovermode="x", + separators=".,", + ) + + +def add_scatter_plot( + figure: go.Figure, + website_name: str, + id: str, + currency: str, + dates: list, + prices: list, +) -> None: + figure.add_trace( + go.Scatter( + name=f"{website_name.capitalize()} - {id}", + x=dates, + y=prices, + line={"color": WEBSITE_COLORS[website_name], "width": 2}, + hovertemplate="Price: %{y:.0f}" + f" {currency}", + ) + ) + + +def get_products_with_category(category_name: str) -> dict: + data = format_data() + + for product_info in data["products"]: + if product_info["category"].lower() == category_name.lower(): + yield product_info + + +def get_product_with_id(id: str) -> dict: + data = format_data() + + for product_info in data["products"]: + for website_info in product_info["websites"]: + if id == str(website_info["id"]): + return { + "name": product_info["name"], + "category": product_info["category"], + "info": website_info, + } + + +def get_product_with_name(name: str) -> dict: + data = format_data() + + for product_info in data["products"]: + if product_info["name"].lower() == name.lower(): + return product_info + + +def get_all_products() -> dict: + data = format_data() + + for product_info in data["products"]: + yield product_info