Skip to content

Commit

Permalink
Merge pull request #20 from JacobCallahan/things
Browse files Browse the repository at this point in the history
Add additional extraction options
  • Loading branch information
jyejare authored Jun 24, 2024
2 parents 5bbdd0c + 2720fcd commit 5ab760b
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 23 deletions.
4 changes: 3 additions & 1 deletion candore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def __init__(self, settings):
def list_endpoints(self):
return self.api_lister.lister_endpoints()

async def save_all_entities(self, mode, output_file, full):
async def save_all_entities(self, mode, output_file, full, max_pages=None, skip_percent=None):
"""Save all the entities to a json file
:param mode: Pre or Post
Expand All @@ -36,6 +36,8 @@ async def save_all_entities(self, mode, output_file, full):
async with Extractor(settings=self.settings, apilister=self.api_lister) as extractor:
if full:
extractor.full = True
extractor.max_pages = max_pages
extractor.skip_percent = skip_percent
data = await extractor.extract_all_entities()

if not data:
Expand Down
20 changes: 16 additions & 4 deletions candore/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,21 @@ def apis(ctx):
@click.option("--mode", type=str, help="The mode must be 'pre' or 'post'")
@click.option("-o", "--output", type=str, help="The output file name")
@click.option("--full", is_flag=True, help="Extract data from all the pages of a component")
@click.option("--max-pages", type=int, help="The maximum number of pages to extract per entity")
@click.option("--skip-percent", type=int, help="The percentage of pages to skip per entity")
@click.pass_context
def extract(ctx, mode, output, full):
def extract(ctx, mode, output, full, max_pages, skip_percent):
loop = asyncio.get_event_loop()
candore_obj = ctx.parent.candore
loop.run_until_complete(candore_obj.save_all_entities(mode=mode, output_file=output, full=full))
loop.run_until_complete(
candore_obj.save_all_entities(
mode=mode,
output_file=output,
full=full,
max_pages=max_pages,
skip_percent=skip_percent,
)
)


@candore.command(help="Compare pre and post upgrade data")
Expand Down Expand Up @@ -86,9 +96,11 @@ def compare(ctx, pre, post, inverse, output, report_type, record_evs):
"e.g entity/5/description",
)
@click.option(
"--data-file", type=str, help="The data file from which to search the data on a given path"
"--data-file",
type=str,
help="The data file from which to search the data on a given path",
)
@click.option("--delimiter", type=str, default='/', help="Settings file path. Default is '/'")
@click.option("--delimiter", type=str, default="/", help="Settings file path. Default is '/'")
@click.pass_context
def reader(ctx, path, data_file, delimiter):
candore_obj = ctx.parent.candore
Expand Down
27 changes: 19 additions & 8 deletions candore/modules/comparator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json

from candore.modules.variations import Variations, Constants
from candore.utils import last_index_of_element, is_list_contains_dict
from candore.modules.variations import Constants
from candore.modules.variations import Variations
from candore.utils import is_list_contains_dict
from candore.utils import last_index_of_element


class Comparator:
Expand Down Expand Up @@ -29,7 +31,10 @@ def record_variation(self, pre, post, var_details=None):
big_key = [str(itm) for itm in self.big_key]
full_path = "/".join(big_key)
var_full_path = "/".join([itm for itm in self.big_key if not isinstance(itm, int)])
if var_full_path in self.variations.expected_variations or var_full_path in self.variations.skipped_variations:
if (
var_full_path in self.variations.expected_variations
or var_full_path in self.variations.skipped_variations
):
if self.record_evs:
variation = {
"pre": pre,
Expand All @@ -48,7 +53,10 @@ def record_constants(self, pre, post, var_details=None):
big_key = [str(itm) for itm in self.big_key]
full_path = "/".join(big_key)
var_full_path = "/".join([itm for itm in self.big_key if not isinstance(itm, int)])
if var_full_path in self.constants.expected_constants or var_full_path in self.constants.skipped_constants:
if (
var_full_path in self.constants.expected_constants
or var_full_path in self.constants.skipped_constants
):
if self.record_evs:
variation = {
"pre": pre,
Expand Down Expand Up @@ -93,19 +101,22 @@ def _is_data_type_list_contains_dict(self, pre, post):
self.compare_all_pres_with_posts(
pre_entity, post_entity, unique_key=pre_entity["id"]
)
post.remove(post_entity)
break
else:
key = list(pre_entity.keys())[0]
if pre_entity[key] == post_entity[key]:
if pre_entity[key] == post_entity.get(key):
self.compare_all_pres_with_posts(
pre_entity[key], post_entity[key], unique_key=key
)
del post_entity[key]
break
if "id" in pre_entity:
self.remove_path(pre_entity["id"])
else:
self.remove_path(pre_entity[list(pre_entity.keys())[0]])

def _is_data_type_list(self, pre, post, unique_key=""):

def custom_key(elem):
return 'None' if elem is None else str(elem)

Expand All @@ -121,9 +132,9 @@ def custom_key(elem):
def compare_all_pres_with_posts(self, pre_data, post_data, unique_key="", var_details=None):
if unique_key:
self.big_key.append(unique_key)
if type(pre_data) is dict:
if isinstance(pre_data, dict):
self._is_data_type_dict(pre_data, post_data, unique_key=unique_key)
elif type(pre_data) is list:
elif isinstance(pre_data, list):
self._is_data_type_list(pre_data, post_data, unique_key=unique_key)
else:
if pre_data != post_data:
Expand Down
36 changes: 26 additions & 10 deletions candore/modules/extractor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio # noqa: F401
import math
from functools import cached_property

import aiohttp
Expand Down Expand Up @@ -68,9 +69,18 @@ async def fetch_page(self, page, _request):
page_entities = await self.paged_results(**_request)
return page_entities

async def fetch_all_pages(self, total_pages, _request):
async def fetch_all_pages(self, total_pages, _request, max_pages=None, skip_percent=None):
if max_pages:
stop = min(total_pages, max_pages)
else:
stop = total_pages
if skip_percent:
step = stop // math.ceil(stop * (100 - skip_percent) / 100)
else:
step = 1
tasks = []
for page in range(2, total_pages + 1):
print(f"Fetching {len(list(range(1, stop, step)))} more page(s).")
for page in range(1, stop, step):
task = asyncio.ensure_future(self.fetch_page(page, _request))
tasks.append(task)
responses = await asyncio.gather(*tasks)
Expand All @@ -96,15 +106,21 @@ async def fetch_component_entities(self, **comp_params):
return entity_data
else:
return entity_data
# If the entity has multiple pages, fetch them all
if self.full:
total_pages = results.get("total") // results.get("per_page") + 1
if total_pages > 1:
print(f"Endpoint {endpoint} has {total_pages} pages.")
total_pages = results.get("total") // results.get("per_page") + 1
if total_pages > 1:
print(f"Endpoint {endpoint} has {total_pages} pages.")
# If the entity has multiple pages, fetch them all
if self.full:
pages_data = await self.fetch_all_pages(total_pages, _request)
for page_entities in pages_data:
if page_entities:
entity_data.extend(page_entities)
elif self.max_pages or self.skip_percent:
pages_data = await self.fetch_all_pages(
total_pages, _request, self.max_pages, self.skip_percent
)
else:
return entity_data
for page_entities in pages_data:
if page_entities:
entity_data.extend(page_entities)
return entity_data

async def dependency_ids(self, dependency):
Expand Down

0 comments on commit 5ab760b

Please sign in to comment.