From 2752db744444a48b0a9189d174d25d40ad8a7531 Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Fri, 16 Apr 2021 15:44:53 +0200 Subject: [PATCH 1/4] Create generators and functions to simplify "show.." functions --- scraper/visualize.py | 257 ++++++++++++++++++++++++------------------- 1 file changed, 144 insertions(+), 113 deletions(-) diff --git a/scraper/visualize.py b/scraper/visualize.py index 65bdb69a..5eff9a11 100644 --- a/scraper/visualize.py +++ b/scraper/visualize.py @@ -5,147 +5,178 @@ def show_id(id: str) -> None: print(f"Visualizing product with id: {id}") - data = format_data() - for category_name, category_info in data.items(): - for product_name, product_info in category_info.items(): - for website_name, website_info in product_info.items(): - if id == str(website_info["id"]): - fig = go.Figure() - fig.add_trace( - go.Scatter( - name=website_name.capitalize(), - x=website_info["dates"], - y=website_info["prices"], - line={"color": WEBSITE_COLORS[website_name], "width": 2}, - hovertemplate="Price: %{y:.0f}", - ) - ) - - fig.update_traces(mode="markers+lines") - fig.update_layout( - title=f"Price(s) of {product_name.upper()}", - xaxis_title="Date", - yaxis_title="Price", - hovermode="x", - separators=".,", - ) - - fig.show() - return + product_data = get_product_with_id(id) + product_name = product_data["name"] + product_info = product_data["info"] -def show_category(category: str) -> None: - print(f"Visualizing products in category: {category}") - data = format_data() + fig = go.Figure() + add_scatter_plot( + fig, + product_info["website_name"], + str(product_info["id"]), + product_info["currency"], + product_info["dates"], + product_info["prices"], + ) - for category_name, category_info in data.items(): - if category == category_name: - for product_name, product_info in category_info.items(): - fig = go.Figure() - for website_name, website_info in product_info.items(): - fig.add_trace( - go.Scatter( - name=website_name.capitalize(), - x=website_info["dates"], - y=website_info["prices"], - line={"color": WEBSITE_COLORS[website_name], "width": 2}, - hovertemplate="Price: %{y:.0f}", - ) - ) - - fig.update_traces(mode="markers+lines") - fig.update_layout( - title=f"Price(s) of {product_name.upper()}", - xaxis_title="Date", - yaxis_title="Price", - hovermode="x", - separators=".,", - ) + config_figure(fig, f"Price(s) of {product_name.upper()} - ID {product_info['id']}") + fig.show() + + +def show_category(category: str) -> None: + print(f"Visualizing products in category: {category.capitalize()}") + + for product_info in get_products_with_category(category): + product_name = product_info["name"] + fig = go.Figure() + + for website_info in product_info["websites"]: + add_scatter_plot( + fig, + website_info["website_name"], + str(website_info["id"]), + website_info["currency"], + website_info["dates"], + website_info["prices"], + ) - fig.show() - return + config_figure(fig, f"Price(s) of {product_name.upper()}") + fig.show() def show_name(name: str) -> None: - print(f"Visualizing product with name: {name}") - data = format_data() + print(f"Visualizing product with name: {name.capitalize()}") - for category_name, category_info in data.items(): - for product_name, product_info in category_info.items(): - if name == product_name: - fig = go.Figure() - for website_name, website_info in product_info.items(): - fig.add_trace( - go.Scatter( - name=website_name.capitalize(), - x=website_info["dates"], - y=website_info["prices"], - line={"color": WEBSITE_COLORS[website_name], "width": 2}, - hovertemplate="Price: %{y:.0f}", - ) - ) - - fig.update_traces(mode="markers+lines") - fig.update_layout( - title=f"Price(s) of {product_name.upper()}", - xaxis_title="Date", - yaxis_title="Price", - hovermode="x", - separators=".,", - ) + product_info = get_product_with_name(name) - fig.show() - return + fig = go.Figure() + for website_info in product_info["websites"]: + add_scatter_plot( + fig, + website_info["website_name"], + str(website_info["id"]), + website_info["currency"], + website_info["dates"], + website_info["prices"], + ) + + config_figure(fig, f"Price(s) of {name.upper()}") + fig.show() def show_all_products() -> None: print("Visualizing all products") - data = format_data() - for category_name, category_info in data.items(): - for product_name, product_info in category_info.items(): - fig = go.Figure() - for website_name, website_info in product_info.items(): - fig.add_trace( - go.Scatter( - name=website_name.capitalize(), - x=website_info["dates"], - y=website_info["prices"], - line={"color": WEBSITE_COLORS[website_name], "width": 2}, - hovertemplate="Price: %{y:.0f}", - ) - ) - - fig.update_traces(mode="markers+lines") - fig.update_layout( - title=f"Price(s) of {product_name.upper()}", - xaxis_title="Date", - yaxis_title="Price", - hovermode="x", - separators=".,", + for product_info in get_all_products(): + fig = go.Figure() + for website_info in product_info["websites"]: + add_scatter_plot( + fig, + website_info["website_name"], + str(website_info["id"]), + website_info["currency"], + website_info["dates"], + website_info["prices"], ) - fig.show() + config_figure(fig, f"Price(s) of {product_info['name'].upper()}") + fig.show() def format_data() -> dict: records_data = Filemanager.get_record_data() - data = {} + data = {"products": []} for category_name, category_info in records_data.items(): - data.update({category_name: {}}) for product_name, product_info in category_info.items(): - data[category_name].update({product_name: {}}) - for website_name, website_info in product_info.items(): + product_data = { + "name": product_name, + "category": category_name, + "websites": [], + } + for website_name, website_info in product_info.items(): dates = [datapoint["date"] for datapoint in website_info["datapoints"]] prices = [datapoint["price"] for datapoint in website_info["datapoints"]] - id = website_info["info"]["id"] - - data[category_name][product_name].update( - {website_name: {"dates": dates, "prices": prices, "id": id}} + product_data["websites"].append( + { + "website_name": website_name, + "id": website_info["info"]["id"], + "currency": website_info["info"]["currency"], + "dates": dates, + "prices": prices, + } ) + data["products"].append(product_data) + return data + + +def config_figure(figure: go.Figure, figure_title: str) -> None: + figure.update_traces(mode="markers+lines") + figure.update_layout( + title=figure_title, + xaxis_title="Date", + yaxis_title="Price", + hovermode="x", + separators=".,", + ) + + +def add_scatter_plot( + figure: go.Figure, + website_name: str, + id: str, + currency: str, + dates: list, + prices: list, +) -> None: + figure.add_trace( + go.Scatter( + name=f"{website_name.capitalize()} - {id}", + x=dates, + y=prices, + line={"color": WEBSITE_COLORS[website_name], "width": 2}, + hovertemplate="Price: %{y:.0f}" + f" {currency}", + ) + ) + + +def get_products_with_category(category_name: str) -> dict: + data = format_data() + + for product in data["products"]: + if product["category"] == category_name: + yield product + + +def get_product_with_id(id: str) -> dict: + data = format_data() + + for product_info in data["products"]: + for website_info in product_info["websites"]: + if id == str(website_info["id"]): + return { + "name": product_info["name"], + "category": product_info["category"], + "info": website_info, + } + + +def get_product_with_name(name: str) -> dict: + data = format_data() + + for product_info in data["products"]: + if product_info["name"].lower() == name.lower(): + return product_info + + +def get_all_products() -> dict: + data = format_data() + + for product_info in data["products"]: + yield product_info From 8fc34b274c333a66aead866e32599e35c964ea6e Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Fri, 16 Apr 2021 15:54:51 +0200 Subject: [PATCH 2/4] Small change to generator function "get_products_with_category" When checking product category to the specified category, make both categories to lowercase --- scraper/visualize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scraper/visualize.py b/scraper/visualize.py index 5eff9a11..32bb2c52 100644 --- a/scraper/visualize.py +++ b/scraper/visualize.py @@ -149,9 +149,9 @@ def add_scatter_plot( def get_products_with_category(category_name: str) -> dict: data = format_data() - for product in data["products"]: - if product["category"] == category_name: - yield product + for product_info in data["products"]: + if product_info["category"].lower() == category_name.lower(): + yield product_info def get_product_with_id(id: str) -> dict: From 41648bee4a895e0fffa865395fa4ac02d861b7ee Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Fri, 16 Apr 2021 16:14:50 +0200 Subject: [PATCH 3/4] Format with Black --- main.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index 16bb706e..4eb56b2e 100644 --- a/main.py +++ b/main.py @@ -36,7 +36,8 @@ def scrape(): # Create instances of class "Scraper" products = [ - scraper.Scraper(category, url) for category, url in zip(products_df["category"], products_df["url"]) + scraper.Scraper(category, url) + for category, url in zip(products_df["category"], products_df["url"]) ] # Scrape and save scraped data for each product (sequentially) @@ -52,13 +53,12 @@ def scrape_with_threads(): # Create instances of class "Scraper" products = [ - scraper.Scraper(category, url) for category, url in zip(products_df["category"], products_df["url"]) + scraper.Scraper(category, url) + for category, url in zip(products_df["category"], products_df["url"]) ] # Create threads - threads = [ - threading.Thread(target=product.scrape_info) for product in products - ] + threads = [threading.Thread(target=product.scrape_info) for product in products] # Start scraping on all threads for thread in threads: @@ -123,7 +123,9 @@ def hard_reset(): if __name__ == "__main__": logging.config.fileConfig( fname=f"{scraper.Filemanager.root_path}/scraper/logging.ini", - defaults={"logfilename": f"{scraper.Filemanager.root_path}/scraper/logfile.log"} + defaults={ + "logfilename": f"{scraper.Filemanager.root_path}/scraper/logfile.log" + }, ) main() From d7cf4bc3fd50dfa5a32bd7228b0a8c5f152a79c6 Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Fri, 16 Apr 2021 16:45:11 +0200 Subject: [PATCH 4/4] Change staring print-statement in functions "show_category" and "show_name" Lower instead of capitalize parameter in starting print-statement --- scraper/visualize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scraper/visualize.py b/scraper/visualize.py index 32bb2c52..b1367cc3 100644 --- a/scraper/visualize.py +++ b/scraper/visualize.py @@ -26,7 +26,7 @@ def show_id(id: str) -> None: def show_category(category: str) -> None: - print(f"Visualizing products in category: {category.capitalize()}") + print(f"Visualizing products in category: {category.lower()}") for product_info in get_products_with_category(category): product_name = product_info["name"] @@ -47,7 +47,7 @@ def show_category(category: str) -> None: def show_name(name: str) -> None: - print(f"Visualizing product with name: {name.capitalize()}") + print(f"Visualizing product with name: {name.lower()}") product_info = get_product_with_name(name)