Skip to content

Commit

Permalink
Adding anonymize_snippet as a input filed in /loader/doc API (#558)
Browse files Browse the repository at this point in the history
* Adding Anonymize_snippet as a input filed in /loader/doc API

* Added type checking, and some private functions

* Added UTs
  • Loading branch information
dristysrivastava authored Sep 20, 2024
1 parent 8c95f3d commit 4024d3d
Show file tree
Hide file tree
Showing 6 changed files with 376 additions and 26 deletions.
1 change: 1 addition & 0 deletions pebblo/app/api/req_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class ReqLoaderDoc(BaseModel):
source_owner: str
classifier_location: str
classifier_mode: Optional[str] = None
anonymize_snippets: Optional[bool] = None


class Context(BaseModel):
Expand Down
24 changes: 16 additions & 8 deletions pebblo/app/service/doc_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from datetime import datetime

from pebblo.app.enums.common import ClassificationMode
from pebblo.app.enums.enums import CacheDir, ClassifierConstants, ReportConstants
from pebblo.app.enums.enums import CacheDir, ReportConstants
from pebblo.app.models.models import (
AiDataModel,
AiDocs,
Expand Down Expand Up @@ -37,16 +37,24 @@ class LoaderHelper:
Class for loader doc related task
"""

def __init__(self, app_details, data, load_id, classifier_mode):
def __init__(
self,
app_details: dict,
data: dict,
load_id: str,
classifier_mode: str = "all",
anonymize_snippets: bool = False,
):
self.app_details = app_details
self.data = data
self.load_id = load_id
self.loader_mapper = {}
self.classifier_mode = classifier_mode
self.anonymize_snippets = anonymize_snippets
self.entity_classifier_obj = EntityClassifier()

# Initialization
def _initialize_raw_data(self):
def _initialize_raw_data(self) -> dict:
"""
Initializing raw data and return as dict object
"""
Expand All @@ -69,7 +77,7 @@ def _initialize_raw_data(self):
return raw_data

@staticmethod
def _fetch_variables(raw_data):
def _fetch_variables(raw_data: dict):
"""
Return list of variable's
"""
Expand Down Expand Up @@ -111,7 +119,7 @@ def _update_raw_data(
)

# Model Creation
def _create_doc_model(self, doc, doc_info):
def _create_doc_model(self, doc: dict, doc_info: AiDataModel) -> dict:
"""
Create doc model and return its object
"""
Expand Down Expand Up @@ -163,7 +171,7 @@ def _get_top_n_findings(raw_data):
]
return top_n_findings

def _count_files_with_findings(self):
def _count_files_with_findings(self) -> int:
"""
Return the count of files that have associated findings.
"""
Expand All @@ -176,7 +184,7 @@ def _count_files_with_findings(self):
files_with_findings_count += 1
return files_with_findings_count

def _get_classifier_response(self, doc):
def _get_classifier_response(self, doc: dict) -> AiDataModel:
doc_info = AiDataModel(
data=doc.get("doc", None),
entities={},
Expand Down Expand Up @@ -209,7 +217,7 @@ def _get_classifier_response(self, doc):
entity_details,
) = self.entity_classifier_obj.presidio_entity_classifier_and_anonymizer(
doc_info.data,
anonymize_snippets=ClassifierConstants.anonymize_snippets.value,
anonymize_snippets=self.anonymize_snippets,
)
doc_info.entities = entities
doc_info.entityDetails = entity_details
Expand Down
47 changes: 35 additions & 12 deletions pebblo/app/service/loader/loader_doc_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from pebblo.app.config.config import var_server_config_dict
from pebblo.app.enums.common import ClassificationMode
from pebblo.app.enums.enums import ApplicationTypes, CacheDir, ClassifierConstants
from pebblo.app.enums.enums import ApplicationTypes, CacheDir
from pebblo.app.libs.responses import PebbloJsonResponse
from pebblo.app.models.db_models import (
AiDataModel,
Expand Down Expand Up @@ -40,8 +40,16 @@ def __init__(self):
self.data = None
self.app_name = None
self.classifier_mode = None
self.anonymize_snippets = None
self.entity_classifier_obj = EntityClassifier()

def _initialize_data(self, data: dict):
self.db = SQLiteClient()
self.data = data
self.app_name = data.get("name")
self._set_classifier_mode()
self._set_anonymize_snippets()

@staticmethod
def _create_return_response(message, output=None, status_code=200):
if output is None:
Expand Down Expand Up @@ -202,7 +210,7 @@ def _get_doc_classification(self, doc):
entity_details,
) = self.entity_classifier_obj.presidio_entity_classifier_and_anonymizer(
doc_info.data,
anonymize_snippets=ClassifierConstants.anonymize_snippets.value,
anonymize_snippets=self.anonymize_snippets,
)
doc_info.entities = entities
doc_info.entityDetails = entity_details
Expand Down Expand Up @@ -276,19 +284,34 @@ def _get_or_create_data_source(self):
logger.debug("Data Source has been created successfully.")
return data_source_obj.data

def _set_classifier_mode(self):
"""
This function defines the value of the classifier_mode: if it is included in the API request,
it will be used; otherwise, the value will be taken from the config.
"""
if not self.data.get("classifier_mode"):
self.classifier_mode = config_details.get("classifier", {}).get(
"mode", ClassificationMode.ALL.value
)
else:
self.classifier_mode = self.data.get("classifier_mode")

def _set_anonymize_snippets(self):
"""
This function defines the value of the anonymize_snippets: if it is included in the API request,
it will be used; otherwise, the value will be taken from the config.
"""
if not self.data.get("anonymize_snippets"):
self.anonymize_snippets = config_details.get("classifier", {}).get(
"anonymizeSnippets", False
)
else:
self.anonymize_snippets = self.data.get("anonymize_snippets")

@timeit
def process_request(self, data):
try:
self.db = SQLiteClient()
self.data = data
self.app_name = data.get("name")

if not self.data.get("classifier_mode"):
self.classifier_mode = config_details.get("classifier", {}).get(
"mode", ClassificationMode.ALL.value
)
else:
self.classifier_mode = self.data.get("classifier_mode")
self._initialize_data(data)

# create session
self.db.create_session()
Expand Down
36 changes: 30 additions & 6 deletions pebblo/app/service/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,13 @@ def __init__(self):
self.data = None
self.app_name = None
self.classifier_mode = None
self.anonymize_snippets = None

def _initialize_data(self, data):
def _initialize_data(self, data: dict):
self.data = data
self.app_name = data.get("name")
self._set_classifier_mode()
self._set_anonymize_snippets()

def _write_pdf_report(self, final_report):
"""
Expand Down Expand Up @@ -122,17 +125,34 @@ def _upsert_loader_details(self, app_details):
loader_list.append(new_loader_data.model_dump())
app_details["loaders"] = loader_list

def process_request(self, data):
def _set_classifier_mode(self):
"""
This process is entrypoint function for loader doc API implementation.
This function defines the value of the classifier_mode: if it is included in the API request,
it will be used; otherwise, the value will be taken from the config.
"""
if not data.get("classifier_mode"):
if not self.data.get("classifier_mode"):
self.classifier_mode = config_details.get("classifier", {}).get(
"mode", ClassificationMode.ALL.value
)
else:
self.classifier_mode = data.get("classifier_mode")
self.classifier_mode = self.data.get("classifier_mode")

def _set_anonymize_snippets(self):
"""
This function defines the value of the anonymize_snippets: if it is included in the API request,
it will be used; otherwise, the value will be taken from the config.
"""
if not self.data.get("anonymize_snippets"):
self.anonymize_snippets = config_details.get("classifier", {}).get(
"anonymizeSnippets", False
)
else:
self.anonymize_snippets = self.data.get("anonymize_snippets")

def process_request(self, data: dict):
"""
This process is entrypoint function for loader doc API implementation.
"""
self._initialize_data(data)

try:
Expand Down Expand Up @@ -173,7 +193,11 @@ def process_request(self, data):

# process input docs, app details, and generate final report
loader_helper_obj = LoaderHelper(
app_details, self.data, load_id, self.classifier_mode
app_details,
self.data,
load_id,
self.classifier_mode,
self.anonymize_snippets,
)
(
app_details,
Expand Down
Loading

0 comments on commit 4024d3d

Please sign in to comment.