chore: show llm filter calls info, remove redundant remove_timestamp param in Env (microsoft#1040)

XianBW · web-flow · commit 2551d20a405d · 2025-07-09T14:20:33.000+08:00
* show llm calls info

* remove `remove_timestamp` param in Env

* use cache default
diff --git a/rdagent/log/ui/ds_trace.py b/rdagent/log/ui/ds_trace.py
@@ -22,6 +22,7 @@
     extract_loopid_func_name,
     is_valid_session,
 )
+from rdagent.utils.agent.tpl import T
 from rdagent.utils.repo.diff import generate_diff_from_dict
 
 if "show_stdout" not in state:
@@ -44,7 +45,7 @@ def convert_defaultdict_to_dict(d):
     return d
 
 
-# @st.cache_data(persist=True)
+@st.cache_data(persist=True)
 def load_data(log_path: Path):
     data = defaultdict(lambda: defaultdict(dict))
     llm_data = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
@@ -60,8 +61,6 @@ def load_data(log_path: Path):
         if ei is not None:
             ei = int(ei)
         if "debug_" in msg.tag:
-            if "debug_tpl" in msg.tag and "filter_" in msg.content["uri"]:
-                continue
             if ei is not None:
                 llm_data[li][fn][ei].append(
                     {
@@ -201,9 +200,9 @@ def workspace_win(workspace, cmp_workspace=None, cmp_name="last code."):
 def show_text(text, lang=None):
     """显示文本代码块"""
     if lang:
-        st.code(text, language=lang, wrap_lines=True)
+        st.code(text, language=lang, wrap_lines=True, line_numbers=True)
     elif "\n" in text:
-        st.code(text, language="python", wrap_lines=True)
+        st.code(text, language="python", wrap_lines=True, line_numbers=True)
     else:
         st.code(text, language="html", wrap_lines=True)
 
@@ -220,6 +219,8 @@ def llm_log_win(llm_d: list):
     for d in llm_d:
         if "debug_tpl" in d["tag"]:
             uri = d["obj"]["uri"]
+            if "filter_redundant_text" in uri:
+                continue
             tpl = d["obj"]["template"]
             cxt = d["obj"]["context"]
             rd = d["obj"]["rendered"]
@@ -240,32 +241,19 @@ def llm_log_win(llm_d: list):
                 with t1:
                     try:
                         rdict = json.loads(resp)
-                        if "code" in rdict:
-                            code = rdict["code"]
-                            st.markdown(":red[**Code in response dict:**]")
-                            st.code(code, language="python", wrap_lines=True, line_numbers=True)
-                            rdict.pop("code")
-                        elif "spec" in rdict:
-                            spec = rdict["spec"]
-                            st.markdown(":red[**Spec in response dict:**]")
-                            st.markdown(spec)
-                            rdict.pop("spec")
-                        else:
-                            showed_keys = []
-                            for k, v in rdict.items():
-                                if k.endswith(".py"):
-                                    st.markdown(f":red[**{k}**]")
-                                    st.code(v, language="python", wrap_lines=True, line_numbers=True)
-                                    showed_keys.append(k)
-                            for k in showed_keys:
-                                rdict.pop(k)
-                        st.write(":red[**Other parts (except for the code or spec) in response dict:**]")
+                        showed_keys = []
+                        for k, v in rdict.items():
+                            if k.endswith(".py") or k.endswith(".md"):
+                                st.markdown(f":red[**{k}**]")
+                                st.code(v, language="python", wrap_lines=True, line_numbers=True)
+                                showed_keys.append(k)
+                        for k in showed_keys:
+                            rdict.pop(k)
+                        if len(showed_keys) > 0:
+                            st.write(":red[**Other parts (except for the code or spec) in response dict:**]")
                         st.json(rdict)
                     except:
-                        try:
-                            st.json(resp)
-                        except:
-                            show_text(resp)
+                        show_text(resp)
                 with t2:
                     show_text(user)
                 with t3:
@@ -466,18 +454,37 @@ def replace_ep_path(p: Path):
     return p
 
 
+def get_llm_call_stats(llm_data: dict) -> tuple[int, int]:
+    total_llm_call = 0
+    total_filter_call = 0
+    filter_sys_prompt = T("rdagent.utils.prompts:filter_redundant_text.system").r()
+    for li, loop_d in llm_data.items():
+        for fn, loop_fn_d in loop_d.items():
+            for k, v in loop_fn_d.items():
+                for d in v:
+                    if "debug_llm" in d["tag"]:
+                        total_llm_call += 1
+                        if filter_sys_prompt == d["obj"]["system"]:
+                            total_filter_call += 1
+    return total_llm_call, total_filter_call
+
+
 def summarize_win():
     st.header("Summary", divider="rainbow")
     with st.container(border=True):
         min_id, max_id = get_state_data_range(state.data)
-        info0, info1, info2, info3 = st.columns([2, 1, 1, 1])
+        info0, info1, info2, info3, info4, info5 = st.columns([1, 1, 1, 1, 1, 1])
         show_trace_dag = info0.toggle("Show trace DAG", key="show_trace_dag")
-        with info1.popover("LITELLM_SETTINGS", icon="⚙️"):
-            st.write(state.data.get("SETTINGS", {}).get("LITELLM_SETTINGS", "No settings found."))
-        with info2.popover("RD_AGENT_SETTINGS", icon="⚙️"):
-            st.write(state.data.get("SETTINGS", {}).get("RD_AGENT_SETTINGS", "No settings found."))
-        with info3.popover("RDLOOP_SETTINGS", icon="⚙️"):
-            st.write(state.data.get("SETTINGS", {}).get("RDLOOP_SETTINGS", "No settings found."))
+        with info1.popover("LITELLM", icon="⚙️"):
+            st.write(state.data.get("settings", {}).get("LITELLM_SETTINGS", "No settings found."))
+        with info2.popover("RD_AGENT", icon="⚙️"):
+            st.write(state.data.get("settings", {}).get("RD_AGENT_SETTINGS", "No settings found."))
+        with info3.popover("RDLOOP", icon="⚙️"):
+            st.write(state.data.get("settings", {}).get("RDLOOP_SETTINGS", "No settings found."))
+
+        llm_call, llm_filter_call = get_llm_call_stats(state.llm_data)
+        info4.metric("LLM Calls", llm_call)
+        info5.metric("LLM Filter Calls", f"{llm_filter_call}({round(llm_filter_call / llm_call * 100, 2)}%)")
         if show_trace_dag:
             st.markdown("### Trace DAG")
             final_trace_loop_id = max_id
diff --git a/rdagent/utils/env.py b/rdagent/utils/env.py
@@ -213,14 +213,15 @@ def __run_with_retry(
         local_path: str = ".",
         env: dict | None = None,
         running_extra_volume: Mapping = MappingProxyType({}),
-        remove_timestamp: bool = True,
     ) -> EnvResult:
-        # TODO: remove_timestamp can be implemented in a shallower way...
         for retry_index in range(self.conf.retry_count + 1):
             try:
                 start = time.time()
                 log_output, return_code = self._run(
-                    entry, local_path, env, running_extra_volume=running_extra_volume, remove_timestamp=remove_timestamp
+                    entry,
+                    local_path,
+                    env,
+                    running_extra_volume=running_extra_volume,
                 )
                 end = time.time()
                 logger.info(f"Running time: {end - start} seconds")
@@ -316,7 +317,10 @@ def _get_path_stem(path: str) -> str | None:
             result = self.cached_run(entry_add_timeout, local_path, env, running_extra_volume)
         else:
             result = self.__run_with_retry(
-                entry_add_timeout, local_path, env, running_extra_volume, remove_timestamp=False
+                entry_add_timeout,
+                local_path,
+                env,
+                running_extra_volume,
             )
 
         return result
@@ -327,7 +331,6 @@ def cached_run(
         local_path: str = ".",
         env: dict | None = None,
         running_extra_volume: Mapping = MappingProxyType({}),
-        remove_timestamp: bool = True,
     ) -> EnvResult:
         """
         Run the folder under the environment.
@@ -364,7 +367,7 @@ def cached_run(
                 ret = pickle.load(f)
             self.unzip_a_file_into_a_folder(str(target_folder / f"{key}.zip"), local_path)
         else:
-            ret = self.__run_with_retry(entry, local_path, env, running_extra_volume, remove_timestamp)
+            ret = self.__run_with_retry(entry, local_path, env, running_extra_volume)
             with open(target_folder / f"{key}.pkl", "wb") as f:
                 pickle.dump(ret, f)
             self.zip_a_folder_into_a_file(local_path, str(target_folder / f"{key}.zip"))
@@ -846,20 +849,12 @@ def _f() -> dict:
 
         return _f()
 
-    def replace_time_info(self, input_string: str) -> str:
-        """To remove any time related information from the logs since it will destroy the cache mechanism"""
-        """We currently set this function as default, but it can be changed in the future"""
-        datetime_pattern = r"\b\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(?:\.\d+)?\b"
-        output_string = re.sub(datetime_pattern, "[DATETIME]", input_string)
-        return output_string
-
     def _run(
         self,
         entry: str | None = None,
         local_path: str = ".",
         env: dict | None = None,
         running_extra_volume: Mapping = MappingProxyType({}),
-        remove_timestamp: bool = True,
         **kwargs: Any,
     ) -> tuple[str, int]:
         if env is None:
@@ -918,7 +913,6 @@ def _run(
             print(table)
             for log in logs:
                 decoded_log = log.strip().decode()
-                decoded_log = self.replace_time_info(decoded_log) if remove_timestamp else decoded_log
                 Console().print(decoded_log, markup=False)
                 log_output += decoded_log + "\n"
             exit_status = container.wait()["StatusCode"]