Merge pull request #134 from Crinibus/dev

Version v1.1
Crinibus · Apr 13, 2021 · 16d194b · 16d194b
2 parents b307806 + 6072d11
commit 16d194b
Show file tree

Hide file tree

Showing 11 changed files with 169 additions and 61 deletions.
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2020 Crinibus
+Copyright (c) 2021 Crinibus
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -9,6 +9,7 @@
 - [User settings](#user-settings)
 - [Visualize data](#visualize-data)
     - [Command examples](#command-examples)
+- [Clean up data](#clean-up-data)
 
 <br/>
 
@@ -26,6 +27,16 @@ Feel free to fork the project and create a pull request with new features or ref
 <br/>
 
 
+## UPDATE TO HOW DATA IS STORED IN V1.1
+In version v1.1, I have changed how data is stored in "records.json". "dates" under each product have been changed to "datapoints" and now a list containing dicts with "date" and "price" keys. <br/>
+If you want to update your data to be compatible with version v1.1, then open a interactive python session where this repository is located and run the following commands:
+```
+>>> from scraper.format import Format
+>>> Format.format_old_records_to_new()
+```
+
+<br/>
+
 ## First setup <a name="first-setup"></a>
 Clone this repository and move into the repository:
 ```
@@ -44,7 +55,7 @@ pip3 install -r requirements.txt
 
 
 ## Start from scratch <a name="start-scratch"></a>
-If you want to start from scratch with no data in the records.json file, then just run the following command:
+If you want to start from scratch with no data in the records.json and products.csv files, then just run the following command:
 ```
 python3 main.py --hard-reset
 ```
@@ -57,7 +68,7 @@ If you just want to reset your data for each product, just delete all data-point
 ```
 python3 main.py --reset
 ```
-This deletes all the data inside each product, such as id, url and and dates with prices.
+This deletes all the data inside each product, such as id, url and all datapoints.
 
 <br/>
 
@@ -97,7 +108,7 @@ python3 main.py -a -c <category> -u <url> -c <category2> -u <url2>
 
 **OBS**: The category can only be one word, so add a underscore instead of a space if needed.<br/>
 **OBS**: The url must have the "https://" part.<br/>
-**OBS**: If an error occures when adding a product, then the error might happen because the url has a "&" in it, when this happens then just put quotation mark around the url. This should solve the problem. If this doesn't solve the problem then summit a issue.<br/>
+**OBS**: If an error occures when adding a product, then the error might happen because the url has a "&" in it, when this happens then just put quotation marks around the url. This should solve the problem. If this doesn't solve the problem then summit a issue.<br/>
 
 <br/>
 
@@ -147,7 +158,7 @@ To visualize your data, just run main.py with the ```-v``` or ```--visualize```
 - ```-va``` or ```--visualize-all``` to visualize all your products
 - ```-vc [<category> [<category> ...]]``` or ```--visualize-category [<category> [<category> ...]]``` to visualize all products in one or more categories
 - ```-id [<id> [<id> ...]]``` or ```--visualize-id [<id> [<id> ...]]``` to visualize one or more products with the specified id(s)
-
+- ```-vn [<name> [<name> ...]]``` or ```--visualize-name [<name> ...]]``` to visualize one or more products with the specified name(s)
 
 
 ### Command examples <a name="command-examples"></a>
@@ -178,7 +189,31 @@ To show graphs for all products in one category, run the following command where
 python3 main.py -v -vc <category>
 ```
 
-For multiple categories, just add another flag, like so:
+For multiple categories, just add another category, like so:
 ```
 python3 main.py -v -vc <category> <category>
 ```
+
+
+**Show graps for products with a specific name**
+
+To show graphs for product(s) with a specific name, run the following command where ```<name>``` is the name of the product(s) you want graphs for:
+```
+python3 main.py -v -vn <name>
+```
+
+For multiple products with different names, just add another name, like so:
+```
+python3 main.py -v -vn <name> <name2>
+```
+
+If the name of a product has multiple words in it, then just add quotation marks around the name.
+
+<br/>
+
+
+## Clean up data <a name="clean-up-data"></a>
+If you want to clean up your data, meaning you want to remove unnecessary datapoints (datapoints that have the same price as the datapoint before and after it), then run the following command:
+```
+python3 main.py --clean-data
+```
diff --git a/main.py b/main.py
@@ -7,6 +7,9 @@
 def main():
     args = scraper.argparse_setup()
 
+    if args.clean_data:
+        scraper.clean_data()
+
     if args.visualize:
         visualize(args)
 
@@ -76,6 +79,7 @@ def add_products(args):
 
 
 def visualize(args):
+    print("Visualizing...")
     if args.show_all:
         scraper.show_all_products()
 
@@ -102,7 +106,7 @@ def reset():
         for product in category.values():
             for website in product.values():
                 website["info"] = {"id": "", "url": "", "currency": ""}
-                website["dates"] = {}
+                website["datapoints"] = []
 
     scraper.Filemanager.save_record_data(data)
 
@@ -118,8 +122,8 @@ def hard_reset():
 
 if __name__ == "__main__":
     logging.config.fileConfig(
-        fname=f"{scraper.Filemanager.get_root_path()}/scraper/logging.ini",
-        defaults={"logfilename": f"{scraper.Filemanager.get_root_path()}/scraper/logfile.log"}
+        fname=f"{scraper.Filemanager.root_path}/scraper/logging.ini",
+        defaults={"logfilename": f"{scraper.Filemanager.root_path}/scraper/logfile.log"}
     )
 
     main()
diff --git a/scraper/__init__.py b/scraper/__init__.py
@@ -3,5 +3,6 @@
 from .add_product import add_product
 from .filemanager import Filemanager
 from .visualize import show_id, show_category, show_all_products, show_name
+from .clean_data import clean_data
 
 __author__ = "Crinibus"
diff --git a/scraper/add_product.py b/scraper/add_product.py
@@ -19,54 +19,65 @@ def add_product(category, url) -> None:
     new_product = Scraper(category, url)
     new_product.scrape_info()
 
-    is_product_added = add_product_to_records(new_product)
+    product_exists = check_if_product_exits(new_product)
 
-    if is_product_added:
+    if not product_exists:
+        add_product_to_records(new_product)
         Filemanager.add_product_to_csv(new_product.category, new_product.url)
         new_product.save_info()
     else:
-        logger.info("Adding product cancelled")
+        user_input = input(
+            "A product with the same name and from the same website already exist in your data, do you want to override this product? (y/n) > "
+        )
+        if user_input.lower() in ("y", "yes"):
+            print("Overriding product...")
+            add_product_to_records(new_product)
+            Filemanager.add_product_to_csv(new_product.category, new_product.url)
+            new_product.save_info()
+        else:
+            print("Product was not added nor overrided")
+            logger.info("Adding product cancelled")
 
 
-def add_product_to_records(product: Scraper) -> bool:
+def check_if_product_exits(product: Scraper) -> bool:
     data = Filemanager.get_record_data()
 
-    category_exist = True if data.get(product.category) else False
+    # Check category
+    if data.get(product.category):
+        # Check product name
+        if data[product.category].get(product.info.name):
+            # Check product website name
+            if data[product.category][product.info.name].get(product.website_name):
+                return True
 
-    if category_exist:
-        product_name_exist = (
-            True if data[product.category].get(product.info.name) else False
-        )
-        if product_name_exist:
-            product_and_website_exist = (
-                True
-                if data[product.category][product.info.name].get(product.website_name)
-                else False
-            )
+    return False
 
-            if product_and_website_exist:
-                user_input = input(
-                    "A product with the same name and from the same website already exist in your data, do you want to override this product? (y/n) > "
-                )
-                if user_input.lower() in ("n", "no"):
-                    print("Product was not overridden")
-                    return False
 
+def add_product_to_records(product: Scraper) -> None:
+    data = Filemanager.get_record_data()
+
+    if data.get(product.category):
+        if data[product.category].get(product.info.name):
             data[product.category][product.info.name].update(
-                {product.website_name: {"info": {}, "dates": {}}}
+                {product.website_name: {"info": {}, "datapoints": []}}
             )
         else:
             data[product.category].update(
-                {product.info.name: {product.website_name: {"info": {}, "dates": {}}}}
+                {
+                    product.info.name: {
+                        product.website_name: {"info": {}, "datapoints": []}
+                    }
+                }
             )
     else:
         data.update(
             {
                 product.category: {
-                    product.info.name: {product.website_name: {"info": {}, "dates": {}}}
+                    product.info.name: {
+                        product.website_name: {"info": {}, "datapoints": []}
+                    }
                 }
             }
         )
 
     Filemanager.save_record_data(data)
-    return True
diff --git a/scraper/arguments.py b/scraper/arguments.py
@@ -90,7 +90,14 @@ def argparse_setup() -> ArgumentParser.parse_args:
         type=str,
         nargs="*",
         dest="visualize_names",
-        metavar="name"
+        metavar="name",
+    )
+
+    parser.add_argument(
+        "--clean-data",
+        help="clean data, so unnecessary datapoints is removed from records",
+        action="store_true",
+        dest="clean_data",
     )
 
     validate_arguments(parser)

diff --git a/scraper/clean_data.py b/scraper/clean_data.py
@@ -0,0 +1,35 @@
+from scraper.filemanager import Filemanager
+
+
+def clean_data() -> None:
+    print("Cleaning data...")
+    records_data = Filemanager.get_record_data()
+
+    for category_info in records_data.values():
+        for product_info in category_info.values():
+            for website_info in product_info.values():
+                datapoints = website_info["datapoints"]
+
+                new_datapoints = []
+
+                for index, datapoint in enumerate(datapoints):
+                    if index in (0, len(datapoints) - 1):
+                        new_datapoints.append(datapoint)
+                        continue
+
+                    previous_datapoint = datapoints[index - 1]
+                    next_datapoint = datapoints[index + 1]
+
+                    # Skip unnecessary datapoints
+                    if (
+                        datapoint["price"] == previous_datapoint["price"]
+                        and datapoint["price"] == next_datapoint["price"]
+                    ):
+                        continue
+
+                    new_datapoints.append(datapoint)
+
+                website_info["datapoints"] = new_datapoints
+
+    Filemanager.save_record_data(records_data)
+    print("Done cleaning data")
diff --git a/scraper/filemanager.py b/scraper/filemanager.py
@@ -6,10 +6,8 @@
 
 
 class Filemanager:
-    @staticmethod
-    def get_root_path() -> str:
-        """Return root path of this repository"""
-        return pathlib.Path(__file__).parent.parent.absolute()
+    # root path of this repository
+    root_path = pathlib.Path(__file__).parent.parent.absolute()
 
     @staticmethod
     def read_json(filename: str) -> dict:
@@ -35,37 +33,30 @@ def clear_csv(filename: str):
 
     @staticmethod
     def get_record_data() -> dict:
-        data = Filemanager.read_json(
-            f"{Filemanager.get_root_path()}/scraper/records.json"
-        )
+        data = Filemanager.read_json(f"{Filemanager.root_path}/scraper/records.json")
         return data
 
     @staticmethod
     def save_record_data(data: dict) -> None:
-        Filemanager.write_json(
-            f"{Filemanager.get_root_path()}/scraper/records.json", data
-        )
+        Filemanager.write_json(f"{Filemanager.root_path}/scraper/records.json", data)
 
     @staticmethod
     def get_products_data() -> pd.DataFrame:
         df = pd.read_csv(
-            f"{Filemanager.get_root_path()}/scraper/products.csv", sep=",", header=0
+            f"{Filemanager.root_path}/scraper/products.csv", sep=",", header=0
         )
         return df
 
     @staticmethod
     def add_product_to_csv(category: str, url: str) -> None:
         data = [category, url]
-        Filemanager.append_csv(
-            f"{Filemanager.get_root_path()}/scraper/products.csv", data
-        )
+        Filemanager.append_csv(f"{Filemanager.root_path}/scraper/products.csv", data)
 
     @staticmethod
     def clear_product_csv():
-        Filemanager.clear_csv(f"{Filemanager.get_root_path()}/scraper/products.csv")
-        Filemanager.add_product_to_csv(
-            "category", "url"
-        )  # header for csv to use in pandas.DataFrame
+        Filemanager.clear_csv(f"{Filemanager.root_path}/scraper/products.csv")
+        # header for csv to use in pandas.DataFrame
+        Filemanager.add_product_to_csv("category", "url")
 
 
 class Config:
@@ -83,7 +74,7 @@ def write(filename: str, config: configparser.ConfigParser) -> None:
     @staticmethod
     def get_user_product_names() -> configparser.SectionProxy:
         """Get section 'ChangeName' from settings.ini file"""
-        config = Config.read(f"{Filemanager.get_root_path()}/scraper/settings.ini")
+        config = Config.read(f"{Filemanager.root_path}/scraper/settings.ini")
         return config["ChangeName"]
 
     @staticmethod

diff --git a/scraper/format.py b/scraper/format.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 import logging
-from scraper.filemanager import Config
+from scraper.filemanager import Config, Filemanager
 
 
 @dataclass
@@ -57,3 +57,22 @@ def get_user_product_name(product_name: str) -> str:
                 return user_product_names[value_key]
 
         return product_name
+
+    @staticmethod
+    def format_old_records_to_new() -> None:
+        """Format records data from pre v1.1 to new records data format in v1.1"""
+        records_data = Filemanager.get_record_data()
+
+        for category_info in records_data.values():
+            for product_info in category_info.values():
+                for website_info in product_info.values():
+                    website_info.update({"datapoints": []})
+
+                    for date_name, date_info in website_info["dates"].items():
+                        website_info["datapoints"].append(
+                            {"date": date_name, "price": float(date_info["price"])}
+                        )
+
+                    website_info.pop("dates")
+
+        Filemanager.save_record_data(records_data)