Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Confidence score changes for DB #523

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pebblo/app/models/db_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,10 @@ class AiDataModel(BaseModel):
data: Optional[Union[list, str]] = None
entityCount: int
entities: dict
entityDetails: Optional[dict] = {}
topicCount: Optional[int] = 0
topics: Optional[dict] = {}

def dict(self, **kwargs):
kwargs["exclude_none"] = True
return super().dict(**kwargs)
topicDetails: Optional[dict] = {}


class RetrievalContext(BaseModel):
Expand Down Expand Up @@ -183,5 +181,7 @@ class AiSnippet(BaseModel):
lastModified: Optional[str] = None
entities: dict
topics: dict
entityDetails: Optional[dict] = {}
topicDetails: Optional[dict] = {}
policyViolations: Optional[List[dict]] = []
# label_feedback: Optional[List[LabelFeedback]] = []
6 changes: 6 additions & 0 deletions pebblo/app/service/loader/loader_doc_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,10 @@ def _get_doc_classification(self, doc):
data=doc.get("doc", None),
entities={},
entityCount=0,
entityDetails={},
topics={},
topicCount=0,
topicDetails={},
)
try:
if doc_info.data:
Expand All @@ -189,8 +191,10 @@ def _get_doc_classification(self, doc):
)
doc_info.topics = topics
doc_info.entities = entities
doc_info.entityDetails = entity_details
doc_info.topicCount = topic_count
doc_info.entityCount = entity_count
doc_info.topicDetails = topic_details
doc_info.data = anonymized_doc
logger.debug("Doc classification finished.")
return doc_info
Expand All @@ -209,6 +213,8 @@ def _update_doc_details(doc, doc_info):
logger.debug("Update doc details with classification result")
doc["entities"] = doc_info.entities
doc["topics"] = doc_info.topics
doc["entity_details"] = doc_info.entityDetails
doc["topic_details"] = doc_info.topicDetails
logger.debug("Input doc updated with classification result")

@timeit
Expand Down
2 changes: 2 additions & 0 deletions pebblo/app/service/loader/snippet/snippet.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ def create_snippet(self, doc, data_source, document):
"loaderSourcePath": data_source.get("sourcePath"),
"entities": doc.get("entities", {}),
"topics": doc.get("topics", {}),
"entityDetails": doc.get("entity_details", {}),
"topicDetails": doc.get("topic_details", {}),
}
ai_snippet_obj = AiSnippet(**snippet_details)
ai_snippet = ai_snippet_obj.dict()
Expand Down
238 changes: 150 additions & 88 deletions pebblo/app/service/local_ui/loader_apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,106 +37,139 @@ def __init__(self):
self.loader_document_with_findings_list = []
self.loader_findings_summary_list = []

def _get_snippet_details(self, snippet_ids, owner):
def _get_snippet_details(self, snippet_ids, owner, label_name):
dristysrivastava marked this conversation as resolved.
Show resolved Hide resolved
"""
This function finds snippet details based on labels
"""

response = []
for snippet_id in snippet_ids:
status, output = self.db.query(AiSnippetsTable, {"id": snippet_id})
if not status or len(output) == 0:
continue
snippet_details = output[0].data
entity_details = {}
topic_details = {}
if snippet_details.get("topicDetails") and snippet_details[
"topicDetails"
].get(label_name):
topic_details = {
label_name: snippet_details["topicDetails"].get(label_name)
}
if snippet_details.get("entityDetails") and snippet_details[
"entityDetails"
].get(label_name):
entity_details = {
label_name: snippet_details["entityDetails"].get(label_name)
}
snippet_obj = {
"snippet": snippet_details["doc"],
"sourcePath": snippet_details["sourcePath"],
# "topicDetails": {}, # TODO: To be added post 0.1.18
# "entityDetails": {}, # TODO: to be added post 0.1.18
"topicDetails": topic_details,
"entityDetails": entity_details,
"fileOwner": owner,
"authorizedIdentities": [],
}
response.append(snippet_obj)
return response

def get_findings_for_loader_app(self, app_data):
topic_count = 0
entity_count = 0
total_snippet_count = 0
snippets = []
if app_data.get("docEntities"):
for entity, entity_data in app_data.get("docEntities").items():
entity_count += entity_data.get("count")
self.loader_findings += entity_data.get("count")

findings_exists = False
for findings in self.loader_findings_list:
if findings.get("labelName") == entity:
findings_exists = True
findings["findings"] += entity_data["count"]
findings["snippetCount"] += len(entity_data["snippetIds"])
findings["fileCount"] = len(app_data["documents"])
total_snippet_count += findings["snippetCount"]
snippets.extend(
self._get_snippet_details(
entity_data["snippetIds"], app_data["owner"]
)
)
break
if not findings_exists:
logger.debug("finding not exist")
findings = {
"appName": app_data["name"],
"labelName": entity,
"findings": entity_data["count"],
"findingsType": "entities",
"snippetCount": len(entity_data["snippetIds"]),
"fileCount": len(app_data["documents"]),
"snippets": self._get_snippet_details(
entity_data["snippetIds"], app_data["owner"]
),
}
total_snippet_count += findings["snippetCount"]
shallow_copy = findings.copy()
self.loader_findings_list.append(shallow_copy)
del findings["snippets"]
self.loader_findings_summary_list.append(findings)
def _findings_for_app_entities(
self, app_data, snippets, total_snippet_count, entity_count
):
"""
This function finds findings for apps with entities
"""

if app_data.get("docTopics"):
for topic, topic_data in app_data.get("docTopics").items():
topic_count += topic_data.get("count")
self.loader_findings += topic_data.get("count")

findings_exists = False
for findings in self.loader_findings_list:
if findings.get("labelName") == topic:
findings_exists = True
findings["findings"] += topic_data["count"]
findings["snippetCount"] += len(topic_data["snippetIds"])
findings["fileCount"] = len(app_data["documents"])
total_snippet_count += findings["snippetCount"]
snippets.extend(
self._get_snippet_details(
topic_data["snippetIds"], app_data["owner"]
)
for entity, entity_data in app_data.get("docEntities").items():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if app_data.get("docEntities") is None this line will throw an error

entity_count += entity_data.get("count")
self.loader_findings += entity_data.get("count")

findings_exists = False
for findings in self.loader_findings_list:
if findings.get("labelName") == entity:
findings_exists = True
findings["findings"] += entity_data["count"]
findings["snippetCount"] += len(entity_data["snippetIds"])
findings["fileCount"] = len(app_data["documents"])
total_snippet_count += findings["snippetCount"]
snippets.extend(
self._get_snippet_details(
entity_data["snippetIds"], app_data["owner"], entity
)
break
if not findings_exists:
findings = {
"appName": app_data["name"],
"labelName": topic,
"findings": topic_data["count"],
"findingsType": "topics",
"snippetCount": len(topic_data["snippetIds"]),
"fileCount": len(app_data["documents"]),
"snippets": self._get_snippet_details(
topic_data["snippetIds"], app_data["owner"]
),
}
)
break
if not findings_exists:
logger.debug("finding not exist")
findings = {
"appName": app_data["name"],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

name , count , and snippetId, documents, owner will always have all these values present. any scenario when they will be missing

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These values will always be there

"labelName": entity,
"findings": entity_data["count"],
"findingsType": "entities",
"snippetCount": len(entity_data["snippetIds"]),
"fileCount": len(app_data["documents"]),
"snippets": self._get_snippet_details(
entity_data["snippetIds"], app_data["owner"], entity
),
}
total_snippet_count += findings["snippetCount"]
shallow_copy = findings.copy()
self.loader_findings_list.append(shallow_copy)
del findings["snippets"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will snippet be always there any scenario where it will not be present

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will be always there, as we are creating findings in line no 103

self.loader_findings_summary_list.append(findings)
return entity_count, snippets, total_snippet_count

def _findings_for_app_topics(
self, app_data, snippets, total_snippet_count, topic_count
):
"""
This function finds findings for apps with topics
"""

for topic, topic_data in app_data.get("docTopics").items():
topic_count += topic_data.get("count")
self.loader_findings += topic_data.get("count")

findings_exists = False
for findings in self.loader_findings_list:
if findings.get("labelName") == topic:
findings_exists = True
findings["findings"] += topic_data["count"]
findings["snippetCount"] += len(topic_data["snippetIds"])
findings["fileCount"] = len(app_data["documents"])
total_snippet_count += findings["snippetCount"]
shallow_copy = findings.copy()
self.loader_findings_list.append(shallow_copy)
del findings["snippets"]
self.loader_findings_summary_list.append(findings)
snippets.extend(
self._get_snippet_details(
topic_data["snippetIds"], app_data["owner"], topic
)
)
break
if not findings_exists:
findings = {
"appName": app_data["name"],
"labelName": topic,
"findings": topic_data["count"],
"findingsType": "topics",
"snippetCount": len(topic_data["snippetIds"]),
"fileCount": len(app_data["documents"]),
"snippets": self._get_snippet_details(
topic_data["snippetIds"], app_data["owner"], topic
),
}
total_snippet_count += findings["snippetCount"]
shallow_copy = findings.copy()
self.loader_findings_list.append(shallow_copy)
del findings["snippets"]
self.loader_findings_summary_list.append(findings)
return topic_count, snippets, total_snippet_count

def _update_loader_datasource(
self, app_data, entity_count, topic_count, total_snippet_count
):
"""
This function updates loader datasource details and count
"""

# Data Source Details
status, data_sources = self.db.query(
_, data_sources = self.db.query(
AiDataSourceTable, {"loadId": app_data.get("id")}
)
for data_source in data_sources:
Expand All @@ -158,10 +191,12 @@ def get_findings_for_loader_app(self, app_data):
# Data Source Count
self.loader_data_source = len(self.loader_data_source_list)

# Fetch required data for DocumentWithFindings
status, documents = self.db.query(
AiDocumentTable, {"loadId": app_data.get("id")}
)
def _get_documents_with_findings(self, app_data):
"""
Fetch required data for DocumentWithFindings
"""

_, documents = self.db.query(AiDocumentTable, {"loadId": app_data.get("id")})
loader_document_with_findings = app_data.get("documentsWithFindings")
documents_with_findings_data = []
for document in documents:
Expand All @@ -184,6 +219,33 @@ def get_findings_for_loader_app(self, app_data):
# Documents with findings Count
self.loader_files_findings = len(self.loader_document_with_findings_list)

def get_findings_for_loader_app(self, app_data):
"""
This function calculates findings for loader app
"""

entity_count = 0
topic_count = 0
total_snippet_count = 0
snippets = []
if app_data.get("docEntities"):
entity_count, snippets, total_snippet_count = (
self._findings_for_app_entities(
app_data, snippets, total_snippet_count, entity_count
)
)

if app_data.get("docTopics"):
topic_count, snippets, total_snippet_count = self._findings_for_app_topics(
app_data, snippets, total_snippet_count, topic_count
)

self._update_loader_datasource(
app_data, entity_count, topic_count, total_snippet_count
)

self._get_documents_with_findings(app_data)

app_details = LoaderAppListDetails(
name=app_data.get("name"),
topics=topic_count,
Expand Down Expand Up @@ -216,9 +278,9 @@ def get_all_loader_apps(self):
continue

self.loader_apps_at_risk += 1
loader_app = self.get_findings_for_loader_app(app_data)
all_loader_apps.append(loader_app)
app_processed.append(app_data["name"])
loader_app = self.get_findings_for_loader_app(app_data)
all_loader_apps.append(loader_app)
app_processed.append(app_data["name"])

# TODO: Sort loader apps
# sorted_loader_apps = self._sort_loader_apps(all_loader_apps)
Expand Down