art-from-the-machine · art-from-the-machine · Jan 7, 2025 · Jul 1, 2024 · Jul 3, 2024 · Jul 3, 2024
diff --git a/.gitignore b/.gitignore
@@ -4,8 +4,7 @@ data/**/*.lip
 data/**/*.json
 data/**/*.txt
 data/tmp/
-GPT_SECRET_KEY.txt
-STT_SECRET_KEY.txt
+*SECRET_KEY.txt
 logging.log
 player_recording.wav
 MantellaEnv*

diff --git a/main.py b/main.py
@@ -21,7 +21,7 @@ def main():
         mantella_http_server = http_server()
 
         #start the http server
-        conversation = mantella_route(config, 'STT_SECRET_KEY.txt', 'GPT_SECRET_KEY.txt', language_info, should_debug_http)
+        conversation = mantella_route(config, 'STT_SECRET_KEY.txt', 'IMAGE_SECRET_KEY.txt', 'GPT_SECRET_KEY.txt', language_info, should_debug_http)
         stt = stt_route(config, 'STT_SECRET_KEY.txt', 'GPT_SECRET_KEY.txt', should_debug_http)
         ui = StartUI(config)
         routes: list[routeable] = [conversation, stt, ui]

diff --git a/requirements.txt b/requirements.txt
@@ -21,4 +21,4 @@ sphinxcontrib-youtube==1.4.1
 furo==2023.9.10
 pywin32==306
 mss==9.0.1
-opencv-python==4.10.0.84
+opencv-python==4.10.0.84
diff --git a/src/config/config_loader.py b/src/config/config_loader.py
@@ -111,17 +111,20 @@ def __update_config_values_from_current_state(self):
                 self.game = str(self.game).lower().replace(' ', '').replace('_', '')
                 if self.game =="fallout4":
                     self.game ="Fallout4"
+                    self.game_path: str = self.__definitions.get_string_value("fallout4_folder")
                     self.mod_path: str = self.__definitions.get_string_value("fallout4_mod_folder") #config['Paths']['fallout4_mod_folder']
                 elif self.game =="fallout4vr":
                     self.game ="Fallout4VR"
                     self.game_path: str = self.__definitions.get_string_value("fallout4vr_folder") #config['Paths']['fallout4vr_folder']
                     self.mod_path: str = self.__definitions.get_string_value("fallout4vr_mod_folder") #config['Paths']['fallout4vr_mod_folder']
                 elif self.game =="skyrimvr":
                     self.game ="SkyrimVR"
+                    self.game_path = None
                     self.mod_path: str = self.__definitions.get_string_value("skyrimvr_mod_folder") #config['Paths']['skyrimvr_mod_folder']
                 #if the game is not recognized Mantella will assume it's Skyrim since that's the most frequent one.
                 else:
                     self.game ="Skyrim"
+                    self.game_path = None
                     self.mod_path: str = self.__definitions.get_string_value("skyrim_mod_folder") #config['Paths']['skyrim_mod_folder']
 
                 self.facefx_path = self.__definitions.get_string_value("facefx_folder")
@@ -212,19 +215,12 @@ def __update_config_values_from_current_state(self):
             # if self.llm_api == "Custom":
             #     self.llm_api = self.__definitions.get_string_value("llm_custom_service_url")
             self.custom_token_count = self.__definitions.get_int_value("custom_token_count")
-            self.temperature = self.__definitions.get_float_value("temperature")
-            self.top_p = self.__definitions.get_float_value("top_p")
-
-            stop_value = self.__definitions.get_string_value("stop")
-            if ',' in stop_value:
-                # If there are commas in the stop value, split the string by commas and store the values in a list
-                self.stop = stop_value.split(',')
-            else:
-                # If there are no commas, put the single value into a list
-                self.stop = [stop_value]
-
-            self.frequency_penalty = self.__definitions.get_float_value("frequency_penalty")
-            self.max_tokens = self.__definitions.get_int_value("max_tokens")
+            try:
+                self.llm_params = json.loads(self.__definitions.get_string_value("llm_params").replace('\n', ''))
+            except Exception as e:
+                logging.error(f"""Error in parsing LLM parameter list: {e}
+LLM parameter list must follow the Python dictionary format: https://www.w3schools.com/python/python_dictionaries.asp""")
+                self.llm_params = None
 
             # self.stop_llm_generation_on_assist_keyword: bool = self.__definitions.get_bool_value("stop_llm_generation_on_assist_keyword")
             self.try_filter_narration: bool = self.__definitions.get_bool_value("try_filter_narration")
@@ -269,6 +265,7 @@ def __update_config_values_from_current_state(self):
             self.radiant_end_prompt = self.__definitions.get_string_value("radiant_end_prompt")
             self.memory_prompt = self.__definitions.get_string_value("memory_prompt")
             self.resummarize_prompt = self.__definitions.get_string_value("resummarize_prompt")
+            self.vision_prompt = self.__definitions.get_string_value("vision_prompt")
 
             # Vision
             self.vision_enabled = self.__definitions.get_bool_value('vision_enabled')
@@ -277,6 +274,20 @@ def __update_config_values_from_current_state(self):
             self.image_quality = self.__definitions.get_int_value("image_quality")
             self.resize_method = self.__definitions.get_string_value("resize_method")
             self.capture_offset = json.loads(self.__definitions.get_string_value("capture_offset"))
+            self.use_game_screenshots = self.__definitions.get_bool_value("use_game_screenshots")
+
+            # Custom Vision Model
+            self.custom_vision_model = self.__definitions.get_bool_value("custom_vision_model")
+            self.vision_llm_api = self.__definitions.get_string_value("vision_llm_api")
+            self.vision_llm = self.__definitions.get_string_value("vision_model")
+            self.vision_llm = self.vision_llm.split(' |')[0] if ' |' in self.vision_llm else self.vision_llm
+            self.vision_custom_token_count = self.__definitions.get_int_value("vision_custom_token_count")
+            try:
+                self.vision_llm_params = json.loads(self.__definitions.get_string_value("vision_llm_params").replace('\n', ''))
+            except Exception as e:
+                logging.error(f"""Error in parsing LLM parameter list: {e}
+LLM parameter list must follow the Python dictionary format: https://www.w3schools.com/python/python_dictionaries.asp""")
+                self.vision_llm_params = None
 
             pass
         except Exception as e:

diff --git a/src/config/definitions/game_definitions.py b/src/config/definitions/game_definitions.py
@@ -1,5 +1,5 @@
 import os
-from src.config.types.config_value import ConfigValue
+from src.config.types.config_value import ConfigValue, ConfigValueTag
 from src.config.types.config_value_path import ConfigValuePath
 from src.config.types.config_value_selection import ConfigValueSelection
 from src.config.config_value_constraint import ConfigValueConstraint, ConfigValueConstraintResult
@@ -64,9 +64,15 @@ def get_fallout4vr_mod_folder_config_value() -> ConfigValue:
         identifier = "fallout4vr_mod_folder"
         game_folder = "Fallout 4 VR"
         return ConfigValuePath(identifier, f"{game_folder}: Path to Mantella Gun Mod", GameDefinitions.MOD_FOLDER_DESCRIPTION.format(game_folder), "C:\\Modding\\MO2\\Fallout4VR\\mods\\Mantella","Sound",[GameDefinitions.ProgramFilesChecker(game_folder), GameDefinitions.ModFolderChecker(identifier)])
+
+    @staticmethod
+    def get_fallout4_folder_config_value() -> ConfigValue:
+        description = """The filepath of the Fallout 4 (desktop) directory where the executable is located.
+                        This path only needs to be set if `Vision`->`Use Game Screenshots` is enabled."""
+        return ConfigValuePath("fallout4_folder", "Fallout 4: Path to Fallout 4 Folder", description, "C:\Games\Steam\steamapps\common\Fallout 4", "Fallout4.exe", [GameDefinitions.ProgramFilesChecker("Fallout4")], tags=[ConfigValueTag.advanced])
 
     @staticmethod
     def get_fallout4vr_folder_config_value() -> ConfigValue:
-        fallout4vr_folder_description = """If your game is Fallout 4 VR, point this to the folder containing the Fallout4VR.exe that is run to start the game.
-        Due to compatibility reasons, communication with Fallout 4 VR needs to happen via reading and writing to a file that is located in your Fallout4 VR main game folder."""
-        return ConfigValuePath("fallout4vr_folder", "Fallout 4 VR: Path Fallout 4 VR Folder", fallout4vr_folder_description, "C:\\Games\\Steam\\steamapps\\common\\Fallout4VR","Fallout4VR.exe",[GameDefinitions.ProgramFilesChecker("Fallout4VR")])
+        fallout4vr_folder_description = """The filepath of the Fallout 4 VR directory where the executable is located.
+                        This path only needs to be set if `Vision`->`Use Game Screenshots` is enabled."""
+        return ConfigValuePath("fallout4vr_folder", "Fallout 4 VR: Path to Fallout 4 VR Folder", fallout4vr_folder_description, "C:\\Games\\Steam\\steamapps\\common\\Fallout4VR","Fallout4VR.exe",[GameDefinitions.ProgramFilesChecker("Fallout4VR")], tags=[ConfigValueTag.advanced])
diff --git a/src/config/definitions/llm_definitions.py b/src/config/definitions/llm_definitions.py
@@ -51,37 +51,25 @@ def get_wait_time_buffer_config_value() -> ConfigValue:
         return ConfigValueFloat("wait_time_buffer","Wait Time Buffer",description, -1.0, -999, 999,tags=[ConfigValueTag.advanced,ConfigValueTag.share_row])
 
     @staticmethod
-    def get_temperature_config_value() -> ConfigValue:
-        return ConfigValueFloat("temperature","Temperature","", 1.0, 0, 2,tags=[ConfigValueTag.advanced,ConfigValueTag.share_row])
-
-    @staticmethod
-    def get_top_p_config_value() -> ConfigValue:
-        return ConfigValueFloat("top_p","Top P","", 1.0, 0, 1,tags=[ConfigValueTag.advanced,ConfigValueTag.share_row])
-
-    @staticmethod
-    def get_stop_config_value() -> ConfigValue:
-        description = """A list of up to FOUR strings, by default only # is used.
-                        If you want more than one stopping string use this format: string1,string2,string3,string4"""
-        return ConfigValueString("stop","Stop",description, "#",tags=[ConfigValueTag.advanced,ConfigValueTag.share_row])
-
-    @staticmethod
-    def get_frequency_penalty_config_value() -> ConfigValue:
-        return ConfigValueFloat("frequency_penalty","Frequency Penalty","", 0, -2, 2,tags=[ConfigValueTag.advanced,ConfigValueTag.share_row])
+    def get_try_filter_narration() -> ConfigValue:
+        try_filter_narration_description = """If checked, sentences containing asterisks (*) will not be spoken aloud."""
+        return ConfigValueBool("try_filter_narration","Filter Narration",try_filter_narration_description,True,tags=[ConfigValueTag.advanced,ConfigValueTag.share_row])
 
     @staticmethod
-    def get_max_tokens_config_value() -> ConfigValue:
-        return ConfigValueInt("max_tokens","Max Tokens","Lowering this value can sometimes result in empty responses.", 250, 1, 999999,tags=[ConfigValueTag.advanced,ConfigValueTag.share_row])
+    def get_llm_params_config_value() -> ConfigValue:
+        value = """{
+                        "max_tokens": 250,
+                        "temperature": 1.0,
+                        "stop": ["#"]
+                    }"""
+        description = """Parameters passed as part of the request to the LLM.
+                        A list of the most common parameters can be found here: https://openrouter.ai/docs/parameters.
+                        Note that available parameters can vary per LLM provider."""
+        return ConfigValueString("llm_params", "Parameters", description, value, tags=[ConfigValueTag.advanced])
 
     # @staticmethod
     # def get_stop_llm_generation_on_assist_keyword() -> ConfigValue:
     #     stop_llm_generation_on_assist_keyword_description = """Should the generation of the LLM be stopped if the word 'assist' is found?
     #                                                             A lot of LLMs are trained to be virtual assistants use the word excessively.
     #                                                             Default: Checked"""
-    #     return ConfigValueBool("stop_llm_generation_on_assist_keyword","Stop LLM generation if 'assist' keyword is found",stop_llm_generation_on_assist_keyword_description,True,tags=[ConfigValueTag.advanced])
-
-    @staticmethod
-    def get_try_filter_narration() -> ConfigValue:
-        try_filter_narration_description = """If checked, sentences containing asterisks (*) will not be spoken aloud."""
-        return ConfigValueBool("try_filter_narration","Filter Narration",try_filter_narration_description,True,tags=[ConfigValueTag.advanced])
-
-
+    #     return ConfigValueBool("stop_llm_generation_on_assist_keyword","Stop LLM generation if 'assist' keyword is found",stop_llm_generation_on_assist_keyword_description,True,tags=[ConfigValueTag.advanced])    
diff --git a/src/config/definitions/prompt_definitions.py b/src/config/definitions/prompt_definitions.py
@@ -202,6 +202,12 @@ def get_resummarize_prompt_config_value() -> ConfigValue:
         return ConfigValueString("resummarize_prompt","Resummarize Prompt",resummarize_prompt_description,resummarize_prompt,[PromptDefinitions.PromptChecker(["name", "language", "game"])])
 
     @staticmethod
+    def get_vision_prompt_config_value() -> ConfigValue:
+        vision_prompt_description = """The prompt passed to the vision-capable LLM when `Custom Vision Model` is enabled."""
+        vision_prompt = """This image is to give context and is from the player's point of view in the game of {game}. 
+                            Describe the details visible inside it without mentioning the game. Refer to it as a scene instead of an image."""
+        return ConfigValueString("vision_prompt","Vision Prompt",vision_prompt_description,vision_prompt)
+
     def get_radiant_start_prompt_config_value() -> ConfigValue:
         radiant_start_prompt_description = """Once a radiant conversation has started and the radiant prompt has been passed to the LLM, the below text is passed in replace of the player response.
                                         This prompt is used to steer the radiant conversation.""" 
@@ -214,4 +220,4 @@ def get_radiant_end_prompt_config_value() -> ConfigValue:
         radiant_end_prompt_description = """The final prompt sent to the LLM before ending a radiant conversation.
                                             This prompt is used to guide the LLM to end the conversation naturally.""" 
         radiant_end_prompt = """Please wrap up the current topic between the NPCs in a natural way. Nobody is leaving, so there is no need for formal goodbyes."""
-        return ConfigValueString("radiant_end_prompt","Radiant End Prompt",radiant_end_prompt_description,radiant_end_prompt,[PromptDefinitions.PromptChecker([])])
+        return ConfigValueString("radiant_end_prompt","Radiant End Prompt",radiant_end_prompt_description,radiant_end_prompt,[PromptDefinitions.PromptChecker([])])