Merge pull request #28 from microsoft/vyokky/dev

Vyokky/dev Experience Learning
microsoft · Mar 21, 2024 · 15739fc · 15739fc
2 parents e101fb4 + e6c0f2a
commit 15739fc
Show file tree

Hide file tree

Showing 35 changed files with 2,262 additions and 1,409 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,7 @@
 # Ignore Jupyter Notebook checkpoints
 .ipynb_checkpoints
 /test/*
+/deprecated/*
 /test/*.ipynb
 /logs/*
 __pycache__/
@@ -19,6 +20,7 @@ ufo/config/config_llm.yaml
 ufo/rag/app_docs/*
 learner/records.json
 vectordb/docs/*
+vectordb/experience/*
 
 # Don't ignore the example files
 !vectordb/docs/example/

diff --git a/learner/indexer.py b/learner/indexer.py
@@ -8,6 +8,8 @@
 import os
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
 
+
+
 def create_indexer(app: str, docs: str, format: str, incremental: bool, save_path: str):
     """
     Create an indexer for the given application.

diff --git a/requirements.txt b/requirements.txt
@@ -9,5 +9,6 @@ pywin32==304
 pywinauto==0.6.8
 PyYAML==6.0.1
 Requests==2.31.0
-faiss-cpu==1.23.5
-lxml==5.1.0
+faiss-cpu==1.8.0
+lxml==5.1.0
+psutil==5.9.8
diff --git a/ufo/config/config.py b/ufo/config/config.py
@@ -14,6 +14,7 @@ def load_config(config_path="ufo/config/"):
     :return: Merged configuration from environment variables and YAML file.
     """
     # Copy environment variables to avoid modifying them directly
+    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow warnings
     configs = dict(os.environ)
 
     path = config_path

diff --git a/ufo/config/config.yaml.template b/ufo/config/config.yaml.template
@@ -33,6 +33,11 @@ ACTION_SELECTION_PROMPT: "ufo/prompts/base/{mode}/action_selection.yaml"  # The
 APP_SELECTION_EXAMPLE_PROMPT: "ufo/prompts/examples/{mode}/app_example.yaml"  # The prompt for the app selection
 ACTION_SELECTION_EXAMPLE_PROMPT: "ufo/prompts/examples/{mode}/action_example.yaml"  # The prompt for the action selection
 
+
+## For experience learning
+EXPERIENCE_PROMPT: "ufo/prompts/experience/{mode}/experience_summary.yaml"
+EXPERIENCE_SAVED_PATH: "vectordb/experience/"
+
 API_PROMPT: "ufo/prompts/base/{mode}/api.yaml"  # The prompt for the API
 INPUT_TEXT_API: "type_keys" # The input text API
 INPUT_TEXT_ENTER: True # whether to press enter after typing the text
@@ -46,4 +51,9 @@ RAG_OFFLINE_DOCS_RETRIEVED_TOPK: 1  # The topk for the offline retrieved documen
 ## RAG Configuration for the Bing search
 RAG_ONLINE_SEARCH: False  # Whether to use the online search for the RAG.
 RAG_ONLINE_SEARCH_TOPK: 5  # The topk for the online search
-RAG_ONLINE_RETRIEVED_TOPK: 1 # The topk for the online retrieved documents
+RAG_ONLINE_RETRIEVED_TOPK: 1 # The topk for the online retrieved documents
+
+
+## RAG Configuration for experience
+RAG_EXPERIENCE: True  # Whether to use the offline RAG.
+RAG_EXPERIENCE_RETRIEVED_TOPK: 5  # The topk for the offline retrieved documents
diff --git a/ufo/config/config_llm.yaml.template b/ufo/config/config_llm.yaml.template
@@ -3,20 +3,20 @@ APP_AGENT: {
   # API_TYPE: "azure_ad",  # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
   # API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
   # API_KEY: "YOUR_KEY",  # The OpenAI API key
-  # API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
+  # API_VERSION: "2024-02-15-preview" ,# "2024-02-15-preview" by default
   # API_MODEL: "YOUR_MODEL",  # The only OpenAI model by now that accepts visual input
 
   API_TYPE: "aoai" , # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.  
   API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
   API_KEY: "YOUR_KEY",  # The OpenAI API key
-  API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
+  API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
   API_MODEL: "YOUR_MODEL",  # The only OpenAI model by now that accepts visual input
 
   ###For the AOAI
   API_DEPLOYMENT_ID: "gpt-4-visual-preview", # The deployment id for the AOAI API
   ### For Azure_AD
   AAD_TENANT_ID: "YOUR_TENANT_ID", # Set the value to your tenant id for the llm model
-  AAD_API_SCOPE: "YOUR_SCOPE" # Set the value to your scope for the llm model
+  AAD_API_SCOPE: "YOUR_SCOPE", # Set the value to your scope for the llm model
   AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE, and the only need is the YOUR_SCOPE_BASE
 }
 
@@ -25,20 +25,20 @@ ACTION_AGENT: {
   API_TYPE: "azure_ad",  # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
   API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
   API_KEY: "YOUR_KEY",  # The OpenAI API key
-  API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
+  API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
   API_MODEL: "YOUR_MODEL",  # The only OpenAI model by now that accepts visual input
 
   # API_TYPE: "aoai" , # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.  
   # API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
   # API_KEY: "YOUR_KEY",  # The OpenAI API key
-  # API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
+  # API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
   # API_MODEL: "YOUR_MODEL",  # The only OpenAI model by now that accepts visual input
 
   ###For the AOAI
   API_DEPLOYMENT_ID: "gpt-4-visual-preview", # The deployment id for the AOAI API
   ### For Azure_AD
   AAD_TENANT_ID: "YOUR_TENANT_ID", # Set the value to your tenant id for the llm model
-  AAD_API_SCOPE: "YOUR_SCOPE" # Set the value to your scope for the llm model
+  AAD_API_SCOPE: "YOUR_SCOPE", # Set the value to your scope for the llm model
   AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE, and the only need is the YOUR_SCOPE_BASE
   }
 
@@ -47,26 +47,25 @@ BACKUP_AGENT: {
   API_TYPE: "azure_ad",  # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.
   API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
   API_KEY: "YOUR_KEY",  # The OpenAI API key
-  API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
+  API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
   API_MODEL: "YOUR_MODEL",  # The only OpenAI model by now that accepts visual input
 
   # API_TYPE: "aoai" , # The API type, "openai" for the OpenAI API, "aoai" for the AOAI API, 'azure_ad' for the ad authority of the AOAI API.  
   # API_BASE: "YOUR_ENDPOINT", # The the OpenAI API endpoint, "https://api.openai.com/v1/chat/completions" for the OpenAI API. As for the AAD, it should be your endpoints.
   # API_KEY: "YOUR_KEY",  # The OpenAI API key
-  # API_VERSION: "2023-12-01-preview" ,# "2024-02-15-preview" by default
+  # API_VERSION: "2024-02-15-preview", # "2024-02-15-preview" by default
   # API_MODEL: "YOUR_MODEL",  # The only OpenAI model by now that accepts visual input
 
   ###For the AOAI
   API_DEPLOYMENT_ID: "gpt-4-visual-preview", # The deployment id for the AOAI API
   ### For Azure_AD
   AAD_TENANT_ID: "YOUR_TENANT_ID", # Set the value to your tenant id for the llm model
-  AAD_API_SCOPE: "YOUR_SCOPE" # Set the value to your scope for the llm model
+  AAD_API_SCOPE: "YOUR_SCOPE", # Set the value to your scope for the llm model
   AAD_API_SCOPE_BASE: "YOUR_SCOPE_BASE" # Set the value to your scope base for the llm model, whose format is API://YOUR_SCOPE_BASE, and the only need is the YOUR_SCOPE_BASE
 }
 
 
 
-
 ### For parameters
 MAX_TOKENS: 2000  # The max token limit for the response completion
 MAX_RETRY: 3  # The max retry limit for the response completion

diff --git a/ufo/experience/__init__.py b/ufo/experience/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
diff --git a/ufo/experience/parser.py b/ufo/experience/parser.py
@@ -0,0 +1,200 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import json
+import os
+import re
+from ..utils import encode_image_from_path, print_with_color
+
+
+class ExperienceLogLoader:
+    """
+    Loading the logs from previous runs.
+    """
+
+    def __init__(self, log_path: str):
+        """
+        Initialize the LogLoader.
+        :param log_path: The path of the log file.
+        """
+        self.log_path = log_path
+        self.response = self.load_response_log()
+        self.max_stepnum = self.find_max_number_in_filenames(log_path)
+        self.request_partition = self.get_request_partition()
+        self.screenshots = {}
+
+        self.logs = []
+
+
+    def load_response_log(self):
+        """
+        Load the response log.
+        :return: The response log.
+        """
+
+        response = []
+        response_log_path = os.path.join(self.log_path, "response.log")
+        with open(response_log_path, 'r', encoding='utf-8') as file:
+            # Read the lines and split them into a list
+            response_log = file.readlines()
+        for response_string in response_log:
+            try:
+                response.append(json.loads(response_string))
+            except json.JSONDecodeError:
+                print_with_color(f"Error loading response log: {response_string}", "yellow")
+        return response
+
+
+    @staticmethod
+    def find_max_number_in_filenames(log_path) -> int:
+        """
+        Find the maximum number in the filenames.
+        :return: The maximum number in the filenames.
+        """
+
+        # Get the list of files in the folder
+        files = os.listdir(log_path)
+
+        # Initialize an empty list to store extracted numbers
+        numbers = []
+
+        # Iterate through each file
+        for file in files:
+            # Extract the number from the filename
+            number = ExperienceLogLoader.extract_action_step_count(file)
+            if number is not None:
+                # Append the extracted number to the list
+                numbers.append(number)
+
+        if numbers:
+            # Return the maximum number if numbers list is not empty
+            return max(numbers)
+        else:
+            # Return None if no numbers are found in filenames
+            return None
+
+
+    def load_screenshot(self, stepnum: int = 0, version: str = "") -> str:
+        """
+        Load the screenshot.
+        :param stepnum: The step number of the screenshot.
+        :param version: The version of the screenshot.
+        :return: The screenshot.
+        """
+
+        # create version tag
+        if version:
+            version_tag = "_" + version
+        else:
+            version_tag = ""
+
+        # Get the filename of the screenshot
+        filename = "action_step{stepnum}{version}.png".format(stepnum=stepnum, version=version_tag)
+        screenshot_path = os.path.join(self.log_path, filename)
+
+        # Check if the screenshot exists
+        if os.path.exists(screenshot_path):
+            image_url = encode_image_from_path(screenshot_path)
+        else:
+            image_url = None
+
+        return image_url
+
+
+    def create_logs(self) -> list:
+        """
+        Create the response log.
+        :return: The response log.
+        """
+        self.logs = []
+        for partition in self.request_partition:
+            request = self.response[partition[0]]["Request"]
+            nround = self.response[partition[0]]["Round"]
+            partitioned_logs = {
+                "request": request,
+                "round": nround,
+                "step_num": len(partition),
+                **{
+                    "step_%s" % local_step: {
+                        "response": self.response[step],
+                        "is_first_action": local_step == 1,
+                        "screenshot": {
+                            version: self.load_screenshot(step, "" if version == "raw" else version)
+                            for version in ["raw", "selected_controls"]
+                        }
+                    }
+                    for local_step, step in enumerate(partition)
+                },
+                "application": list({self.response[step]["Application"] for step in partition})
+            }
+            self.logs.append(partitioned_logs)
+        return self.logs
+
+
+    def get_request_partition(self) -> list:
+        """
+        Partition the logs.
+        :return: The partitioned logs.
+        """
+        request_partition = []
+        current_round = 0
+        current_partition = []
+
+        for step in range(self.max_stepnum):
+            nround = self.response[step]["Round"]
+
+            if nround != current_round:
+                if current_partition:
+                    request_partition.append(current_partition)
+                current_partition = [step]
+                current_round = nround
+            else:
+                current_partition.append(step)
+
+        if current_partition:
+            request_partition.append(current_partition)
+
+        return request_partition
+
+
+
+    @staticmethod
+    def get_user_request(log_partition: dict) -> str: 
+        """
+        Get the user request.
+        :param log_partition: The log partition.
+        :return: The user request.
+        """
+        return log_partition.get("request")
+
+
+
+    @staticmethod
+    def get_app_list(log_partition: dict) -> list:
+        """
+        Get the user request.
+        :param log_partition: The log partition.
+        :return: The application list.
+        """
+        return log_partition.get("application")
+
+
+    @staticmethod
+    def extract_action_step_count(filename : str) -> int:
+        """
+        Extract the action step count from the filename.
+        :param filename: The filename.
+        :return: The number extracted from the filename.
+        """
+
+        # Define a regular expression pattern to extract numbers
+        pattern = r'action_step(\d+)\.png'
+        # Use re.search to find the matching pattern in the filename
+        match = re.search(pattern, filename)
+        if match:
+            # Return the extracted number as an integer
+            return int(match.group(1))
+        else:
+            # Return None if no match is found
+            return None
+