From 13c7af3dd5c21f03b1725fd2c41923e94c353628 Mon Sep 17 00:00:00 2001 From: Lukas Gold Date: Tue, 2 Jul 2024 16:04:21 +0200 Subject: [PATCH] moving the setting of the parallel argument into the Params object(s) --- src/osw/core.py | 51 ++++++++++++++++++++++++++++----------- src/osw/wiki_tools.py | 27 +++++++-------------- src/osw/wtsite.py | 56 ++++++++++++++++++++++++++++++------------- 3 files changed, 85 insertions(+), 49 deletions(-) diff --git a/src/osw/core.py b/src/osw/core.py index da1691a4..fef895ed 100644 --- a/src/osw/core.py +++ b/src/osw/core.py @@ -620,13 +620,22 @@ def load_entity( if isinstance(entity_title, OSW.LoadEntityParam): # LoadEntityParam return OSW.LoadEntityResult(entities=entities) - class StoreEntityParam(model.OswBaseModel): + class StoreEntityParam(OswBaseModel): entities: Union[OswBaseModel, List[OswBaseModel]] namespace: Optional[str] - parallel: Optional[bool] = False + parallel: Optional[bool] = None meta_category_title: Optional[str] = "Category:Category" debug: Optional[bool] = False + def __init__(self, **data): + super().__init__(**data) + if not isinstance(self.entities, list): + self.entities = [self.entities] + if len(self.entities) > 5 and self.parallel is None: + self.parallel = True + if self.parallel is None: + self.parallel = False + def store_entity( self, param: Union[StoreEntityParam, OswBaseModel, List[OswBaseModel]] ) -> None: @@ -645,8 +654,6 @@ def store_entity( param.entities = [param.entities] max_index = len(param.entities) - if max_index >= 5: - param.parallel = True meta_category = self.site.get_page( WtSite.GetPageParam(titles=[param.meta_category_title]) @@ -713,14 +720,23 @@ def store_entity_( for i, e in enumerate(param.entities) ] - class DeleteEntityParam(model.OswBaseModel): - entities: List[model.OswBaseModel] + class DeleteEntityParam(OswBaseModel): + entities: Union[OswBaseModel, List[OswBaseModel]] comment: Optional[str] = None - parallel: Optional[bool] = False + parallel: Optional[bool] = None debug: Optional[bool] = False + def __init__(self, **data): + super().__init__(**data) + if not isinstance(self.entities, list): + self.entities = [self.entities] + if len(self.entities) > 5 and self.parallel is None: + self.parallel = True + if self.parallel is None: + self.parallel = False + def delete_entity( - self, entity: Union[model.OswBaseModel, DeleteEntityParam], comment: str = None + self, entity: Union[OswBaseModel, DeleteEntityParam], comment: str = None ): """Deletes the given entity/entities from the OSW instance.""" if not isinstance(entity, OSW.DeleteEntityParam): @@ -730,8 +746,6 @@ def delete_entity( entity = OSW.DeleteEntityParam(entities=[entity]) if comment is not None: entity.comment = comment - if len(entity.entities) >= 5: - entity.parallel = True def delete_entity_(entity, comment_: str = None): """Deletes the given entity from the OSW instance. @@ -778,12 +792,21 @@ def delete_entity_(entity, comment_: str = None): else: _ = [delete_entity_(e, entity.comment) for e in entity.entities] - class QueryInstancesParam(model.OswBaseModel): - categories: List[Union[str, OswBaseModel]] - parallel: Optional[bool] = False + class QueryInstancesParam(OswBaseModel): + categories: Union[Union[str, OswBaseModel], List[Union[str, OswBaseModel]]] + parallel: Optional[bool] = None debug: Optional[bool] = False limit: Optional[int] = 1000 + def __init__(self, **data): + super().__init__(**data) + if not isinstance(self.categories, list): + self.categories = [self.categories] + if len(self.categories) > 5 and self.parallel is None: + self.parallel = True + if self.parallel is None: + self.parallel = False + def query_instances( self, category: Union[str, OswBaseModel, OSW.QueryInstancesParam] ) -> List[str]: @@ -794,7 +817,7 @@ def get_page_title(category_: Union[str, OswBaseModel]) -> str: ) if isinstance(category_, str): return category_.split(":")[-1] # page title w/o namespace - elif isinstance(category_, model.OswBaseModel): + elif isinstance(category_, OswBaseModel): type_ = getattr(category_, "type", None) if type_: full_page_title = type_[0] diff --git a/src/osw/wiki_tools.py b/src/osw/wiki_tools.py index 8fd71714..b75b1382 100644 --- a/src/osw/wiki_tools.py +++ b/src/osw/wiki_tools.py @@ -117,17 +117,18 @@ class SearchParam(OswBaseModel): """Search parameters for semantic and prefix search""" query: Union[str, List[str]] - parallel: Optional[bool] = False # is set to true if query is a list longer than 5 + parallel: Optional[bool] = None # is set to true if query is a list longer than 5 debug: Optional[bool] = True limit: Optional[int] = 1000 - # todo: @Simon: Bad style? Better to make it explicit in every function using it? def __init__(self, **data): super().__init__(**data) - if isinstance(self.query, str): + if not isinstance(self.query, list): self.query = [self.query] - if len(self.query) > 5: + if len(self.query) > 5 and self.parallel is None: self.parallel = True + if self.parallel is None: + self.parallel = False def prefix_search( @@ -148,14 +149,10 @@ def prefix_search( page_list : List of page titles """ - if isinstance(text, str): - query = SearchParam(query=[text]) - elif isinstance(text, list): + if not isinstance(text, SearchParam): query = SearchParam(query=text) else: query = text - if len(query.query) > 5: - query.parallel = True def prefix_search_(single_text): page_list = list() @@ -208,12 +205,8 @@ def semantic_search( page_list: List of page titles """ - if isinstance(query, str): - query = SearchParam(query=[query]) - elif isinstance(query, list): + if not isinstance(query, SearchParam): query = SearchParam(query=query) - if len(query.query) > 5: - query.parallel = True def semantic_search_(single_query): page_list = list() @@ -321,14 +314,10 @@ def get_file_info_and_usage( Use the sandbox to design and test the queries: https://demo.open-semantic-lab.org/wiki/Special:ApiSandbox """ - if isinstance(title, str): - query = SearchParam(query=[title], debug=False) - elif isinstance(title, list): + if not isinstance(title, SearchParam): query = SearchParam(query=title, debug=False) else: # SearchParam query = title - if len(query.query) > 5: - query.parallel = True def get_file_info_and_usage_(single_title): api_request_result = site.api( diff --git a/src/osw/wtsite.py b/src/osw/wtsite.py index aac4e3ba..b6da738c 100644 --- a/src/osw/wtsite.py +++ b/src/osw/wtsite.py @@ -193,6 +193,15 @@ class GetPageParam(model.OswBaseModel): debug: Optional[bool] = False """Whether to print debug messages""" + def __init__(self, **data): + super().__init__(**data) + if not isinstance(self.titles, list): + self.titles = [self.titles] + if len(self.titles) > 5 and self.parallel is None: + self.parallel = True + if self.parallel is None: + self.parallel = False + class GetPageResult(model.OswBaseModel): pages: List["WtPage"] """List of pages that have been downloaded""" @@ -210,12 +219,7 @@ def get_page(self, param: GetPageParam) -> GetPageResult: param: GetPageParam object """ - # ensure that titles is a list - if not isinstance(param.titles, list): - param.titles = [param.titles] max_index = len(param.titles) - if param.parallel is None and max_index >= 5: - param.parallel = True exeptions = [] pages = [] @@ -438,6 +442,15 @@ class UploadPageParam(model.OswBaseModel): class Config: arbitrary_types_allowed = True + def __init__(self, **data): + super().__init__(**data) + if not isinstance(self.pages, list): + self.pages = [self.pages] + if len(self.pages) > 5 and self.parallel is None: + self.parallel = True + if self.parallel is None: + self.parallel = False + def upload_page( self, param: Union[UploadPageParam, "WtPage", List["WtPage"]], @@ -449,14 +462,10 @@ def upload_page( param: UploadPageParam object or a WtPage object or a list of WtPage objects. """ - if isinstance(param, WtPage): - param = WtSite.UploadPageParam(pages=[param]) - elif isinstance(param, list): + if not isinstance(param, WtSite.UploadPageParam): param = WtSite.UploadPageParam(pages=param) max_index = len(param.pages) - if max_index >= 5: - param.parallel = True def upload_page_(page, index: int = None): # Before uploading: Check if the page is uploaded to the WtSite that is @@ -486,11 +495,13 @@ class CopyPagesParam(model.OswBaseModel): source_site: "WtSite" """The source site to copy the pages from""" - existing_pages: List[str] + existing_pages: Union[str, List[str]] """The full page title of the pages on the source site""" overwrite: Optional[bool] = False """If true, pages will be overwritten if they already exists on the target site""" + parallel: Optional[bool] = None + """If true, uploads the pages in parallel.""" comment: Optional[str] = None """Edit comment for the page history. If set to none, will be replaced with '[bot edit] Copied from {source_site.host}'.""" @@ -498,6 +509,15 @@ class CopyPagesParam(model.OswBaseModel): class Config: arbitrary_types_allowed = True + def __init__(self, **data): + super().__init__(**data) + if not isinstance(self.existing_pages, list): + self.existing_pages = [self.existing_pages] + if len(self.existing_pages) > 5 and self.parallel is None: + self.parallel = True + if self.parallel is None: + self.parallel = False + def copy_pages(self, param: CopyPagesParam): """Copies pages from a source site to this (target) site.""" @@ -515,11 +535,15 @@ def copy_single_page(content_dict: dict): page_contents = param.source_site.get_page_content(param.existing_pages) content_list = [{key: value} for key, value in page_contents.contents.items()] - return ut.parallelize( - copy_single_page, - content_list, - flush_at_end=True, - ) + + if param.parallel: + return ut.parallelize( + copy_single_page, + content_list, + flush_at_end=True, + ) + else: + return [copy_single_page(content) for content in content_list] class CreatePagePackageParam(model.OswBaseModel): """Parameter object for create_page_package method."""