Skip to content

Commit

Permalink
moving the setting of the parallel argument into the Params object(s)
Browse files Browse the repository at this point in the history
  • Loading branch information
LukasGold committed Jul 2, 2024
1 parent 0514445 commit 13c7af3
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 49 deletions.
51 changes: 37 additions & 14 deletions src/osw/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,13 +620,22 @@ def load_entity(
if isinstance(entity_title, OSW.LoadEntityParam): # LoadEntityParam
return OSW.LoadEntityResult(entities=entities)

class StoreEntityParam(model.OswBaseModel):
class StoreEntityParam(OswBaseModel):
entities: Union[OswBaseModel, List[OswBaseModel]]
namespace: Optional[str]
parallel: Optional[bool] = False
parallel: Optional[bool] = None
meta_category_title: Optional[str] = "Category:Category"
debug: Optional[bool] = False

def __init__(self, **data):
super().__init__(**data)
if not isinstance(self.entities, list):
self.entities = [self.entities]
if len(self.entities) > 5 and self.parallel is None:
self.parallel = True
if self.parallel is None:
self.parallel = False

def store_entity(
self, param: Union[StoreEntityParam, OswBaseModel, List[OswBaseModel]]
) -> None:
Expand All @@ -645,8 +654,6 @@ def store_entity(
param.entities = [param.entities]

max_index = len(param.entities)
if max_index >= 5:
param.parallel = True

meta_category = self.site.get_page(
WtSite.GetPageParam(titles=[param.meta_category_title])
Expand Down Expand Up @@ -713,14 +720,23 @@ def store_entity_(
for i, e in enumerate(param.entities)
]

class DeleteEntityParam(model.OswBaseModel):
entities: List[model.OswBaseModel]
class DeleteEntityParam(OswBaseModel):
entities: Union[OswBaseModel, List[OswBaseModel]]
comment: Optional[str] = None
parallel: Optional[bool] = False
parallel: Optional[bool] = None
debug: Optional[bool] = False

def __init__(self, **data):
super().__init__(**data)
if not isinstance(self.entities, list):
self.entities = [self.entities]
if len(self.entities) > 5 and self.parallel is None:
self.parallel = True
if self.parallel is None:
self.parallel = False

def delete_entity(
self, entity: Union[model.OswBaseModel, DeleteEntityParam], comment: str = None
self, entity: Union[OswBaseModel, DeleteEntityParam], comment: str = None
):
"""Deletes the given entity/entities from the OSW instance."""
if not isinstance(entity, OSW.DeleteEntityParam):
Expand All @@ -730,8 +746,6 @@ def delete_entity(
entity = OSW.DeleteEntityParam(entities=[entity])
if comment is not None:
entity.comment = comment
if len(entity.entities) >= 5:
entity.parallel = True

def delete_entity_(entity, comment_: str = None):
"""Deletes the given entity from the OSW instance.
Expand Down Expand Up @@ -778,12 +792,21 @@ def delete_entity_(entity, comment_: str = None):
else:
_ = [delete_entity_(e, entity.comment) for e in entity.entities]

class QueryInstancesParam(model.OswBaseModel):
categories: List[Union[str, OswBaseModel]]
parallel: Optional[bool] = False
class QueryInstancesParam(OswBaseModel):
categories: Union[Union[str, OswBaseModel], List[Union[str, OswBaseModel]]]
parallel: Optional[bool] = None
debug: Optional[bool] = False
limit: Optional[int] = 1000

def __init__(self, **data):
super().__init__(**data)
if not isinstance(self.categories, list):
self.categories = [self.categories]
if len(self.categories) > 5 and self.parallel is None:
self.parallel = True
if self.parallel is None:
self.parallel = False

def query_instances(
self, category: Union[str, OswBaseModel, OSW.QueryInstancesParam]
) -> List[str]:
Expand All @@ -794,7 +817,7 @@ def get_page_title(category_: Union[str, OswBaseModel]) -> str:
)
if isinstance(category_, str):
return category_.split(":")[-1] # page title w/o namespace
elif isinstance(category_, model.OswBaseModel):
elif isinstance(category_, OswBaseModel):
type_ = getattr(category_, "type", None)
if type_:
full_page_title = type_[0]
Expand Down
27 changes: 8 additions & 19 deletions src/osw/wiki_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,17 +117,18 @@ class SearchParam(OswBaseModel):
"""Search parameters for semantic and prefix search"""

query: Union[str, List[str]]
parallel: Optional[bool] = False # is set to true if query is a list longer than 5
parallel: Optional[bool] = None # is set to true if query is a list longer than 5
debug: Optional[bool] = True
limit: Optional[int] = 1000

# todo: @Simon: Bad style? Better to make it explicit in every function using it?
def __init__(self, **data):
super().__init__(**data)
if isinstance(self.query, str):
if not isinstance(self.query, list):
self.query = [self.query]
if len(self.query) > 5:
if len(self.query) > 5 and self.parallel is None:
self.parallel = True
if self.parallel is None:
self.parallel = False


def prefix_search(
Expand All @@ -148,14 +149,10 @@ def prefix_search(
page_list :
List of page titles
"""
if isinstance(text, str):
query = SearchParam(query=[text])
elif isinstance(text, list):
if not isinstance(text, SearchParam):
query = SearchParam(query=text)
else:
query = text
if len(query.query) > 5:
query.parallel = True

def prefix_search_(single_text):
page_list = list()
Expand Down Expand Up @@ -208,12 +205,8 @@ def semantic_search(
page_list:
List of page titles
"""
if isinstance(query, str):
query = SearchParam(query=[query])
elif isinstance(query, list):
if not isinstance(query, SearchParam):
query = SearchParam(query=query)
if len(query.query) > 5:
query.parallel = True

def semantic_search_(single_query):
page_list = list()
Expand Down Expand Up @@ -321,14 +314,10 @@ def get_file_info_and_usage(
Use the sandbox to design and test the queries:
https://demo.open-semantic-lab.org/wiki/Special:ApiSandbox
"""
if isinstance(title, str):
query = SearchParam(query=[title], debug=False)
elif isinstance(title, list):
if not isinstance(title, SearchParam):
query = SearchParam(query=title, debug=False)
else: # SearchParam
query = title
if len(query.query) > 5:
query.parallel = True

def get_file_info_and_usage_(single_title):
api_request_result = site.api(
Expand Down
56 changes: 40 additions & 16 deletions src/osw/wtsite.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,15 @@ class GetPageParam(model.OswBaseModel):
debug: Optional[bool] = False
"""Whether to print debug messages"""

def __init__(self, **data):
super().__init__(**data)
if not isinstance(self.titles, list):
self.titles = [self.titles]
if len(self.titles) > 5 and self.parallel is None:
self.parallel = True
if self.parallel is None:
self.parallel = False

class GetPageResult(model.OswBaseModel):
pages: List["WtPage"]
"""List of pages that have been downloaded"""
Expand All @@ -210,12 +219,7 @@ def get_page(self, param: GetPageParam) -> GetPageResult:
param:
GetPageParam object
"""
# ensure that titles is a list
if not isinstance(param.titles, list):
param.titles = [param.titles]
max_index = len(param.titles)
if param.parallel is None and max_index >= 5:
param.parallel = True

exeptions = []
pages = []
Expand Down Expand Up @@ -438,6 +442,15 @@ class UploadPageParam(model.OswBaseModel):
class Config:
arbitrary_types_allowed = True

def __init__(self, **data):
super().__init__(**data)
if not isinstance(self.pages, list):
self.pages = [self.pages]
if len(self.pages) > 5 and self.parallel is None:
self.parallel = True
if self.parallel is None:
self.parallel = False

def upload_page(
self,
param: Union[UploadPageParam, "WtPage", List["WtPage"]],
Expand All @@ -449,14 +462,10 @@ def upload_page(
param:
UploadPageParam object or a WtPage object or a list of WtPage objects.
"""
if isinstance(param, WtPage):
param = WtSite.UploadPageParam(pages=[param])
elif isinstance(param, list):
if not isinstance(param, WtSite.UploadPageParam):
param = WtSite.UploadPageParam(pages=param)

max_index = len(param.pages)
if max_index >= 5:
param.parallel = True

def upload_page_(page, index: int = None):
# Before uploading: Check if the page is uploaded to the WtSite that is
Expand Down Expand Up @@ -486,18 +495,29 @@ class CopyPagesParam(model.OswBaseModel):

source_site: "WtSite"
"""The source site to copy the pages from"""
existing_pages: List[str]
existing_pages: Union[str, List[str]]
"""The full page title of the pages on the source site"""
overwrite: Optional[bool] = False
"""If true, pages will be overwritten if they already exists on the target
site"""
parallel: Optional[bool] = None
"""If true, uploads the pages in parallel."""
comment: Optional[str] = None
"""Edit comment for the page history. If set to none, will be replaced with
'[bot edit] Copied from {source_site.host}'."""

class Config:
arbitrary_types_allowed = True

def __init__(self, **data):
super().__init__(**data)
if not isinstance(self.existing_pages, list):
self.existing_pages = [self.existing_pages]
if len(self.existing_pages) > 5 and self.parallel is None:
self.parallel = True
if self.parallel is None:
self.parallel = False

def copy_pages(self, param: CopyPagesParam):
"""Copies pages from a source site to this (target) site."""

Expand All @@ -515,11 +535,15 @@ def copy_single_page(content_dict: dict):

page_contents = param.source_site.get_page_content(param.existing_pages)
content_list = [{key: value} for key, value in page_contents.contents.items()]
return ut.parallelize(
copy_single_page,
content_list,
flush_at_end=True,
)

if param.parallel:
return ut.parallelize(
copy_single_page,
content_list,
flush_at_end=True,
)
else:
return [copy_single_page(content) for content in content_list]

class CreatePagePackageParam(model.OswBaseModel):
"""Parameter object for create_page_package method."""
Expand Down

0 comments on commit 13c7af3

Please sign in to comment.