Skip to content

Commit

Permalink
[NL Interface] Adding some detection logging info to the Debug Info o…
Browse files Browse the repository at this point in the history
…n frontend (#2377)

See some screenshot:

<img width="639" alt="Screenshot 2023-03-06 at 12 21 21 PM"
src="https://user-images.githubusercontent.com/1021616/223221954-cf1e6a86-b022-4d34-b26c-0806ac481c33.png">
  • Loading branch information
jehangiramjad authored Mar 7, 2023
1 parent 23df31a commit 56195d7
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 21 deletions.
2 changes: 0 additions & 2 deletions server/integration_tests/nlnext_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import multiprocessing
import os
import sys
import time

from flask_testing import LiveServerTestCase
import requests
Expand Down Expand Up @@ -74,7 +73,6 @@ def run_sequence(self,
check_debug_info=True):
ctx = {}
for i, q in enumerate(queries):
time.sleep(5)
print('Issuing ', test_dir, f'query[{i}]', q)
resp = requests.post(self.get_server_url() + f'/nl/data?q={q}',
json={
Expand Down
96 changes: 78 additions & 18 deletions server/routes/nl.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,13 @@ def _remove_places(query, places_str_found: List[str]):
return ' '.join(query.split())


def _get_place_from_dcids(place_dcids: List[str]) -> List[Place]:
def _get_place_from_dcids(place_dcids: List[str],
debug_logs: Dict) -> List[Place]:
places = []
place_types_dict = dc.property_values(place_dcids, 'typeOf')
place_names_dict = dc.property_values(place_dcids, 'name')

dc_resolve_failures = []
# Iterate in the same order as place_dcids.
for p_dcid in place_dcids:

Expand All @@ -126,16 +128,23 @@ def _get_place_from_dcids(place_dcids: List[str]) -> List[Place]:
logging.info(
f"Place DCID ({p_dcid}) did not correspond to a place_type and/or place name."
)
dc_resolve_failures.append(p_dcid)

debug_logs.update({
"dc_resolution_failure": dc_resolve_failures,
"dc_resolved_places": places,
})
return places


def _infer_place_dcids(places_str_found: List[str]) -> List[str]:
# TODO: propagate several of the logging errors in this function to place detection
# state displayed in debugInfo.
def _infer_place_dcids(places_str_found: List[str],
debug_logs: Dict) -> List[str]:
if not places_str_found:
logging.info("places_found is empty. Nothing to retrieve from Maps API.")
return []

override_places = []
maps_api_failures = []
no_dcids_found = []
place_dcids = []
# Iterate over all the places until a valid place DCID is found.
for p_str in places_str_found:
Expand All @@ -147,6 +156,7 @@ def _infer_place_dcids(places_str_found: List[str]) -> List[str]:
f"{p_str} was found in OVERRIDE_PLACE_TO_DCID_FOR_MAPS_API. Recording its DCID {place_dcid} without querying Maps API."
)
place_dcids.append(place_dcid)
override_places.append((p_str.lower(), place_dcid))
continue

logging.info(f"Searching Maps API with: {p_str}")
Expand All @@ -168,12 +178,22 @@ def _infer_place_dcids(places_str_found: List[str]) -> List[str]:
logging.info(
f"Maps API found a place {place_id} but no DCID match found for place string: {p_str}."
)
no_dcids_found.append(place_id)
else:
logging.info("Maps API did not find a place for place string: {p_str}.")
logging.info(f"Maps API did not find a place for place string: {p_str}.")
maps_api_failures.append(p_str)

if not place_dcids:
logging.info(
f"No place DCIDs were found. Using places_found = {places_str_found}")
f"No place DCIDs were found. Using places_found = {places_str_found}.")

# Update the debug_logs dict.
debug_logs.update({
"dcids_resolved": place_dcids,
"dcid_overrides_found": override_places,
"maps_api_failures": maps_api_failures,
"dcid_not_found_for_place_ids": no_dcids_found
})
return place_dcids


Expand All @@ -183,8 +203,8 @@ def _empty_svs_score_dict():

def _result_with_debug_info(data_dict: Dict, status: str,
query_detection: Detection,
uttr_history: List[Dict],
debug_counters: Dict) -> Dict:
uttr_history: List[Dict], debug_counters: Dict,
query_detection_debug_logs: str) -> Dict:
"""Using data_dict and query_detection, format the dictionary response."""
svs_dict = {
'SV': query_detection.svs_detected.sv_dcids,
Expand Down Expand Up @@ -262,6 +282,8 @@ def _result_with_debug_info(data_dict: Dict, status: str,
places_found_formatted,
'query_with_places_removed':
query_detection.places_detected.query_without_place_substr,
'query_detection_debug_logs':
query_detection_debug_logs,
})

if query_detection.places_detected.main_place:
Expand All @@ -278,7 +300,8 @@ def _result_with_debug_info(data_dict: Dict, status: str,
return data_dict


def _detection(orig_query, cleaned_query) -> Detection:
def _detection(orig_query: str, cleaned_query: str,
query_detection_debug_logs: Dict) -> Detection:
model = current_app.config['NL_MODEL']

# Step 1: find all relevant places and the name/type of the main place found.
Expand All @@ -294,19 +317,36 @@ def _detection(orig_query, cleaned_query) -> Detection:
main_place = None
resolved_places = []

# Start updating the query_detection_debug_logs. Create space for place dcid inference
# and place resolution. If they remain empty, the function belows were never triggered.
query_detection_debug_logs["place_dcid_inference"] = {}
query_detection_debug_logs["place_resolution"] = {}
# Look to find place DCIDs.
if places_str_found:
place_dcids = _infer_place_dcids(places_str_found)
place_dcids = _infer_place_dcids(
places_str_found, query_detection_debug_logs["place_dcid_inference"])
logging.info(f"Found {len(place_dcids)} place dcids: {place_dcids}.")

# Step 2: replace the places in the query sentence with "".
query = _remove_places(cleaned_query.lower(), places_str_found)

if place_dcids:
resolved_places = _get_place_from_dcids(place_dcids)
resolved_places = _get_place_from_dcids(
place_dcids, query_detection_debug_logs["place_resolution"])
logging.info(
f"Resolved {len(resolved_places)} place dcids: {resolved_places}.")

# Step 2: replace the place strings with "" if place_dcids were found.
# Typically, this could also be done under the check for resolved_places
# but we don't expected the resolution from place dcids to fail (typically).
# Also, even if the resolution fails, if there is a place dcid found, it should
# be considered good enough to remove the place strings.
# TODO: investigate whether it is better to only remove place strings for which
# a DCID is found and leave the others in the query string. This is now relevant
# because we support multiple place detection+resolution. Previously, even if
# just one place was used (resolved), it made sense to remove all place strings.
# But now that multiple place strings are being resolved, if there is a failure
# in resolving a place, perhaps it should not be removed? This would be a change
# and would need to be validated first.
query = _remove_places(cleaned_query.lower(), places_str_found)

if resolved_places:
main_place = resolved_places[0]
logging.info(f"Using main_place as: {main_place}")
Expand All @@ -318,10 +358,26 @@ def _detection(orig_query, cleaned_query) -> Detection:
places_found=resolved_places,
main_place=main_place)

# Update the various place detection and query transformation debug logs dict.
query_detection_debug_logs["places_found_str"] = places_str_found
query_detection_debug_logs["main_place_inferred"] = main_place
query_detection_debug_logs["query_transformations"] = {
"place_detection_input": cleaned_query.lower(),
"place_detection_with_places_removed": query,
}
if not query_detection_debug_logs["place_dcid_inference"]:
query_detection_debug_logs[
"place_dcid_inference"] = "Place DCID Inference did not trigger (no place strings found)."
if not query_detection_debug_logs["place_resolution"]:
query_detection_debug_logs[
"place_resolution"] = "Place resolution did not trigger (no place dcids found)."

# Step 3: Identify the SV matched based on the query.
sv_debug_logs = {}
svs_scores_dict = _empty_svs_score_dict()
try:
svs_scores_dict = model.detect_svs(query)
svs_scores_dict = model.detect_svs(
query, query_detection_debug_logs["query_transformations"])
except ValueError as e:
logging.info(e)
logging.info("Using an empty svs_scores_dict")
Expand Down Expand Up @@ -443,9 +499,12 @@ def data():
_detection("", ""), escaped_context_history,
{})

query_detection_debug_logs = {}
query_detection_debug_logs["original_query"] = query
# Query detection routine:
# Returns detection for Place, SVs and Query Classifications.
query_detection = _detection(str(escape(original_query)), query)
query_detection = _detection(str(escape(original_query)), query,
query_detection_debug_logs)

# Generate new utterance.
prev_utterance = nl_utterance.load_utterance(context_history)
Expand Down Expand Up @@ -500,7 +559,8 @@ def data():
loop.run_until_complete(bt.write_row(session_info))

data_dict = _result_with_debug_info(data_dict, status_str, query_detection,
context_history, dbg_counters)
context_history, dbg_counters,
query_detection_debug_logs)

return data_dict

Expand Down
5 changes: 4 additions & 1 deletion server/services/nl.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,14 +398,17 @@ def heuristic_correlation_classification(
return NLClassifier(type=ClassificationType.CORRELATION,
attributes=attributes)

def detect_svs(self, query) -> Dict[str, Union[Dict, List]]:
def detect_svs(self, query: str,
debug_logs: Dict) -> Dict[str, Union[Dict, List]]:
# Remove stop words.
# Check comment at the top of this file above `ALL_STOP_WORDS` to understand
# the potential areas for improvement. For now, this removal blanket removes
# any words in ALL_STOP_WORDS which includes contained_in places and their
# plurals and any other query attribution/classification trigger words.
logging.info(f"SV Detection: Query provided to SV Detection: {query}")
debug_logs["sv_detection_query_input"] = query
query = utils.remove_stop_words(query, ALL_STOP_WORDS)
debug_logs["sv_detection_query_stop_words_removal"] = query
logging.info(f"SV Detection: Query used after removing stop words: {query}")

# Make API call to the NL models/embeddings server.
Expand Down
11 changes: 11 additions & 0 deletions static/js/apps/nl_interface/debug_info.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ export function DebugInfo(props: DebugInfoProps): JSX.Element {
mainPlaceDCID: props.debugData["main_place_dcid"],
mainPlaceName: props.debugData["main_place_name"],
queryWithoutPlaces: props.debugData["query_with_places_removed"],
queryDetectionDebugLogs: props.debugData["query_detection_debug_logs"],
svScores: props.debugData["sv_matching"],
svSentences: props.debugData["svs_to_sentences"],
rankingClassification: props.debugData["ranking_classification"],
Expand Down Expand Up @@ -220,6 +221,16 @@ export function DebugInfo(props: DebugInfoProps): JSX.Element {
{svToSentences(debugInfo.svScores, debugInfo.svSentences)}
</Col>
</Row>
<Row>
<b>Query Detection Debug Logs:</b>
</Row>
<Row>
<Col>
<pre>
{JSON.stringify(debugInfo.queryDetectionDebugLogs, null, 2)}
</pre>
</Col>
</Row>
<Row>
<b>Debug Counters</b>
</Row>
Expand Down

0 comments on commit 56195d7

Please sign in to comment.