From d3ec141ee5039338f7081516a9fbea9bbf2b1edb Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Fri, 10 Jan 2025 09:01:54 +0200 Subject: [PATCH 01/20] remove libundreamai_ios.a from embed frameworks --- Editor/LLMBuildProcessor.cs | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/Editor/LLMBuildProcessor.cs b/Editor/LLMBuildProcessor.cs index 96d0f3a8..4cedb00e 100644 --- a/Editor/LLMBuildProcessor.cs +++ b/Editor/LLMBuildProcessor.cs @@ -48,24 +48,40 @@ private void OnBuildError(string condition, string stacktrace, LogType type) #if UNITY_IOS /// - /// Adds the Accelerate framework (for ios) + /// Postprocess the iOS Build /// - public static void AddAccelerate(string outputPath) + public static void PostprocessIOSBuild(string outputPath) { string projPath = PBXProject.GetPBXProjectPath(outputPath); - PBXProject proj = new PBXProject(); - proj.ReadFromFile(projPath); - proj.AddFrameworkToProject(proj.GetUnityMainTargetGuid(), "Accelerate.framework", false); - proj.AddFrameworkToProject(proj.GetUnityFrameworkTargetGuid(), "Accelerate.framework", false); - proj.WriteToFile(projPath); + PBXProject project = new PBXProject(); + project.ReadFromFile(projPath); + + string targetGuid = project.GetUnityFrameworkTargetGuid(); + string frameworkTargetGuid = project.GetUnityFrameworkTargetGuid(); + string unityMainTargetGuid = project.GetUnityMainTargetGuid(); + string embedFrameworksGuid = project.GetResourcesBuildPhaseByTarget(frameworkTargetGuid); + + // Add Accelerate framework + project.AddFrameworkToProject(unityMainTargetGuid, "Accelerate.framework", false); + project.AddFrameworkToProject(targetGuid, "Accelerate.framework", false); + + // Remove libundreamai_ios.a from Embed Frameworks + string libraryFile = "libundreamai_ios.a"; + string fileGuid = project.FindFileGuidByProjectPath(libraryFile); + + if (string.IsNullOrEmpty(fileGuid)) Debug.LogError("Library file {libraryFile} not found in project"); + else project.RemoveFileFromBuild(embedFrameworksGuid, fileGuid); + + project.WriteToFile(projPath); } + #endif // called after the build public void OnPostprocessBuild(BuildReport report) { #if UNITY_IOS - AddAccelerate(report.summary.outputPath); + PostprocessIOSBuild(report.summary.outputPath); #endif BuildCompleted(); } From 556162e035af687c8a85a3f8f7f75e85e611504b Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Fri, 10 Jan 2025 10:57:29 +0200 Subject: [PATCH 02/20] use relative path of library in build --- Editor/LLMBuildProcessor.cs | 6 ++++-- Runtime/LLMBuilder.cs | 21 ++++++++++++++++----- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/Editor/LLMBuildProcessor.cs b/Editor/LLMBuildProcessor.cs index 4cedb00e..680cda98 100644 --- a/Editor/LLMBuildProcessor.cs +++ b/Editor/LLMBuildProcessor.cs @@ -2,6 +2,8 @@ using UnityEditor.Build; using UnityEditor.Build.Reporting; using UnityEngine; +using System.IO; + #if UNITY_IOS using UnityEditor.iOS.Xcode; #endif @@ -66,10 +68,10 @@ public static void PostprocessIOSBuild(string outputPath) project.AddFrameworkToProject(targetGuid, "Accelerate.framework", false); // Remove libundreamai_ios.a from Embed Frameworks - string libraryFile = "libundreamai_ios.a"; + string libraryFile = Path.Combine("Libraries", LLMBuilder.PluginLibraryDir("iOS", true), "libundreamai_ios.a"); string fileGuid = project.FindFileGuidByProjectPath(libraryFile); - if (string.IsNullOrEmpty(fileGuid)) Debug.LogError("Library file {libraryFile} not found in project"); + if (string.IsNullOrEmpty(fileGuid)) Debug.LogError($"Library file {libraryFile} not found in project"); else project.RemoveFileFromBuild(embedFrameworksGuid, fileGuid); project.WriteToFile(projPath); diff --git a/Runtime/LLMBuilder.cs b/Runtime/LLMBuilder.cs index e8a6dd2f..98232ccf 100644 --- a/Runtime/LLMBuilder.cs +++ b/Runtime/LLMBuilder.cs @@ -16,8 +16,6 @@ public class LLMBuilder { static List movedPairs = new List(); public static string BuildTempDir = Path.Combine(Application.temporaryCachePath, "LLMUnityBuild"); - public static string androidPluginDir = Path.Combine(Application.dataPath, "Plugins", "Android", "LLMUnity"); - public static string iOSPluginDir = Path.Combine(Application.dataPath, "Plugins", "iOS", "LLMUnity"); static string movedCache = Path.Combine(BuildTempDir, "moved.json"); [InitializeOnLoadMethod] @@ -26,6 +24,18 @@ private static void InitializeOnLoad() Reset(); } + public static string PluginDir(string platform, bool relative = false) + { + string pluginDir = Path.Combine("Plugins", platform, "LLMUnity"); + if (!relative) pluginDir = Path.Combine(Application.dataPath, pluginDir); + return pluginDir; + } + + public static string PluginLibraryDir(string platform, bool relative = false) + { + return Path.Combine(PluginDir(platform, relative), LLMUnitySetup.libraryName); + } + /// /// Performs an action for a file or a directory recursively /// @@ -88,7 +98,7 @@ public static void MovePath(string source, string target) /// path public static bool DeletePath(string path) { - string[] allowedDirs = new string[] { LLMUnitySetup.GetAssetPath(), BuildTempDir, androidPluginDir, iOSPluginDir}; + string[] allowedDirs = new string[] { LLMUnitySetup.GetAssetPath(), BuildTempDir, PluginDir("Android"), PluginDir("iOS")}; bool deleteOK = false; foreach (string allowedDir in allowedDirs) deleteOK = deleteOK || LLMUnitySetup.IsSubPath(path, allowedDir); if (!deleteOK) @@ -175,9 +185,10 @@ public static void BuildLibraryPlatforms(string platform) if (platform == "android" || platform == "ios") { - string pluginDir = platform == "android"? androidPluginDir: iOSPluginDir; + string pluginPlatform = platform == "android" ? "Android" : "iOS"; string source = Path.Combine(LLMUnitySetup.libraryPath, platform); - string target = Path.Combine(pluginDir, LLMUnitySetup.libraryName); + string target = PluginLibraryDir(pluginPlatform); + string pluginDir = PluginDir(pluginPlatform); MoveAction(source, target); MoveAction(source + ".meta", target + ".meta"); AddActionAddMeta(pluginDir); From 4f26da8b11f98534c9599e0a2b4f08c11aebdb3c Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Mon, 13 Jan 2025 12:04:36 +0200 Subject: [PATCH 03/20] fix embed frameworks code --- Editor/LLMBuildProcessor.cs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Editor/LLMBuildProcessor.cs b/Editor/LLMBuildProcessor.cs index 680cda98..75e59b2f 100644 --- a/Editor/LLMBuildProcessor.cs +++ b/Editor/LLMBuildProcessor.cs @@ -70,9 +70,19 @@ public static void PostprocessIOSBuild(string outputPath) // Remove libundreamai_ios.a from Embed Frameworks string libraryFile = Path.Combine("Libraries", LLMBuilder.PluginLibraryDir("iOS", true), "libundreamai_ios.a"); string fileGuid = project.FindFileGuidByProjectPath(libraryFile); - if (string.IsNullOrEmpty(fileGuid)) Debug.LogError($"Library file {libraryFile} not found in project"); - else project.RemoveFileFromBuild(embedFrameworksGuid, fileGuid); + else + { + foreach (var phaseGuid in project.GetAllBuildPhasesForTarget(unityMainTargetGuid)) + { + if (project.GetBuildPhaseName(phaseGuid) == "Embed Frameworks") + { + project.RemoveFileFromBuild(phaseGuid, fileGuid); + break; + } + } + project.RemoveFileFromBuild(unityMainTargetGuid, fileGuid); + } project.WriteToFile(projPath); } From c648e1fd15f5ecdc46f68178474da3a642e7d8fe Mon Sep 17 00:00:00 2001 From: amakropoulos Date: Fri, 17 Jan 2025 13:29:00 +0000 Subject: [PATCH 04/20] update changelogs --- CHANGELOG.md | 6 ++++++ CHANGELOG.release.md | 6 +----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64f3ed8a..e8a40040 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## v2.4.2 +#### 🐛 Fixes + +- Fix code signing on iOS (PR: #298) + + ## v2.4.1 #### 🚀 Features diff --git a/CHANGELOG.release.md b/CHANGELOG.release.md index 8733cb3d..36992c1f 100644 --- a/CHANGELOG.release.md +++ b/CHANGELOG.release.md @@ -1,8 +1,4 @@ -### 🚀 Features - -- Static library linking on mobile (fixes iOS signing) (PR: #289) - ### 🐛 Fixes -- Fix support for extras (flash attention, iQ quants) (PR: #292) +- Fix code signing on iOS (PR: #298) From d310cdec8edc30aa69ad61a0ba8c532398bc3beb Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Tue, 21 Jan 2025 11:37:02 +0200 Subject: [PATCH 05/20] add warm-up for provided prompt --- Runtime/LLMCharacter.cs | 67 ++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/Runtime/LLMCharacter.cs b/Runtime/LLMCharacter.cs index e22e8604..fbf97ac7 100644 --- a/Runtime/LLMCharacter.cs +++ b/Runtime/LLMCharacter.cs @@ -416,6 +416,24 @@ protected virtual async Task CompletionRequest(string json, Callback PromptWithQuery(string query) + { + ChatRequest result = default; + await chatLock.WaitAsync(); + try + { + AddPlayerMessage(query); + string prompt = template.ComputePrompt(chat, playerName, AIName); + result = GenerateRequest(prompt); + chat.RemoveAt(chat.Count - 1); + } + finally + { + chatLock.Release(); + } + return result; + } + /// /// Chat functionality of the LLM. /// It calls the LLM completion based on the provided query including the previous chat history. @@ -436,20 +454,7 @@ public virtual async Task Chat(string query, Callback callback = if (!CheckTemplate()) return null; if (!await InitNKeep()) return null; - string json; - await chatLock.WaitAsync(); - try - { - AddPlayerMessage(query); - string prompt = template.ComputePrompt(chat, playerName, AIName); - json = JsonUtility.ToJson(GenerateRequest(prompt)); - chat.RemoveAt(chat.Count - 1); - } - finally - { - chatLock.Release(); - } - + string json = JsonUtility.ToJson(await PromptWithQuery(query)); string result = await CompletionRequest(json, callback); if (addToHistory && result != null) @@ -494,23 +499,43 @@ public virtual async Task Complete(string prompt, Callback callb } /// - /// Allow to warm-up a model by processing the prompt. + /// Allow to warm-up a model by processing the system prompt. /// The prompt processing will be cached (if cachePrompt=true) allowing for faster initialisation. - /// The function allows callback for when the prompt is processed and the response received. - /// - /// The function calls the Chat function with a predefined query without adding it to history. + /// The function allows a callback function for when the prompt is processed and the response received. /// /// callback function called when the full response has been received - /// user prompt used during the initialisation (not added to history) /// the LLM response public virtual async Task Warmup(EmptyCallback completionCallback = null) + { + await Warmup(null, completionCallback); + } + + /// + /// Allow to warm-up a model by processing the provided prompt without adding it to history. + /// The prompt processing will be cached (if cachePrompt=true) allowing for faster initialisation. + /// The function allows a callback function for when the prompt is processed and the response received. + /// + /// + /// user prompt used during the initialisation (not added to history) + /// callback function called when the full response has been received + /// the LLM response + public virtual async Task Warmup(string query, EmptyCallback completionCallback = null) { await LoadTemplate(); if (!CheckTemplate()) return; if (!await InitNKeep()) return; - string prompt = template.ComputePrompt(chat, playerName, AIName); - ChatRequest request = GenerateRequest(prompt); + ChatRequest request; + if (String.IsNullOrEmpty(query)) + { + string prompt = template.ComputePrompt(chat, playerName, AIName); + request = GenerateRequest(prompt); + } + else + { + request = await PromptWithQuery(query); + } + request.n_predict = 0; string json = JsonUtility.ToJson(request); await CompletionRequest(json); From d030272624b7f367227c366b3ccf5924ecaa2368 Mon Sep 17 00:00:00 2001 From: amakropoulos Date: Tue, 21 Jan 2025 09:38:29 +0000 Subject: [PATCH 06/20] update changelogs --- CHANGELOG.md | 4 ++++ CHANGELOG.release.md | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8a40040..c7ee85f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ ## v2.4.2 +#### 🚀 Features + +- Add warm-up function for provided prompt (PR: #301) + #### 🐛 Fixes - Fix code signing on iOS (PR: #298) diff --git a/CHANGELOG.release.md b/CHANGELOG.release.md index 36992c1f..5026766b 100644 --- a/CHANGELOG.release.md +++ b/CHANGELOG.release.md @@ -1,3 +1,7 @@ +### 🚀 Features + +- Add warm-up function for provided prompt (PR: #301) + ### 🐛 Fixes - Fix code signing on iOS (PR: #298) From f37f25dcf1ff360dc5fe1c8d5bb461479eb47712 Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Tue, 21 Jan 2025 16:30:50 +0200 Subject: [PATCH 07/20] add tooltips for the different parameters --- Runtime/LLM.cs | 40 +++++++----- Runtime/LLMCaller.cs | 23 ++++--- Runtime/LLMCharacter.cs | 106 ++++++++++++++++++-------------- Runtime/LLMLib.cs | 101 +++++++++++++++--------------- Runtime/LLMUnitySetup.cs | 9 ++- Runtime/RAG/DBSearch.cs | 8 ++- Runtime/RAG/RAG.cs | 8 +++ Runtime/RAG/SentenceSplitter.cs | 2 + Runtime/RAG/TokenSplitter.cs | 4 +- Runtime/RAG/WordSplitter.cs | 4 +- 10 files changed, 178 insertions(+), 127 deletions(-) diff --git a/Runtime/LLM.cs b/Runtime/LLM.cs index 3564d5bf..e3866dd5 100644 --- a/Runtime/LLM.cs +++ b/Runtime/LLM.cs @@ -17,28 +17,37 @@ namespace LLMUnity /// public class LLM : MonoBehaviour { - /// toggle to show/hide advanced options in the GameObject + /// show/hide advanced options in the GameObject + [Tooltip("show/hide advanced options in the GameObject")] [HideInInspector] public bool advancedOptions = false; - /// toggle to enable remote server functionality + /// enable remote server functionality + [Tooltip("enable remote server functionality")] [LocalRemote] public bool remote = false; - /// port to use for the LLM server + /// port to use for the remote LLM server + [Tooltip("port to use for the remote LLM server")] [Remote] public int port = 13333; /// number of threads to use (-1 = all) + [Tooltip("number of threads to use (-1 = all)")] [LLM] public int numThreads = -1; /// number of model layers to offload to the GPU (0 = GPU not used). - /// Use a large number i.e. >30 to utilise the GPU as much as possible. /// If the user's GPU is not supported, the LLM will fall back to the CPU + [Tooltip("number of model layers to offload to the GPU (0 = GPU not used). If the user's GPU is not supported, the LLM will fall back to the CPU")] [LLM] public int numGPULayers = 0; - /// select to log the output of the LLM in the Unity Editor. + /// log the output of the LLM in the Unity Editor. + [Tooltip("log the output of the LLM in the Unity Editor.")] [LLM] public bool debug = false; /// number of prompts that can happen in parallel (-1 = number of LLMCaller objects) + [Tooltip("number of prompts that can happen in parallel (-1 = number of LLMCaller objects)")] [LLMAdvanced] public int parallelPrompts = -1; - /// select to not destroy the LLM GameObject when loading a new Scene. + /// do not destroy the LLM GameObject when loading a new Scene. + [Tooltip("do not destroy the LLM GameObject when loading a new Scene.")] [LLMAdvanced] public bool dontDestroyOnLoad = true; /// Size of the prompt context (0 = context size of the model). /// This is the number of tokens the model can take as input when generating responses. + [Tooltip("Size of the prompt context (0 = context size of the model). This is the number of tokens the model can take as input when generating responses.")] [DynamicRange("minContextLength", "maxContextLength", false), Model] public int contextSize = 8192; /// Batch size for prompt processing. + [Tooltip("Batch size for prompt processing.")] [ModelAdvanced] public int batchSize = 512; /// Boolean set to true if the server has started and is ready to receive requests, false otherwise. public bool started { get; protected set; } = false; @@ -48,22 +57,25 @@ public class LLM : MonoBehaviour public static bool modelSetupFailed { get; protected set; } = false; /// Boolean set to true if the server has started and is ready to receive requests, false otherwise. public static bool modelSetupComplete { get; protected set; } = false; - - /// the LLM model to use. - /// Models with .gguf format are allowed. + /// LLM model to use (.gguf format) + [Tooltip("LLM model to use (.gguf format)")] [ModelAdvanced] public string model = ""; - /// Chat template used for the model + /// Chat template for the model + [Tooltip("Chat template for the model")] [ModelAdvanced] public string chatTemplate = ChatTemplate.DefaultTemplate; - /// the paths of the LORA models being used (relative to the Assets/StreamingAssets folder). - /// Models with .gguf format are allowed. + /// LORA models to use (.gguf format) + [Tooltip("LORA models to use (.gguf format)")] [ModelAdvanced] public string lora = ""; /// the weights of the LORA models being used. + [Tooltip("the weights of the LORA models being used.")] [ModelAdvanced] public string loraWeights = ""; /// enable use of flash attention + [Tooltip("enable use of flash attention")] [ModelExtras] public bool flashAttention = false; - - /// API key to use for the server (optional) + /// API key to use for the server + [Tooltip("API key to use for the server")] public string APIKey; + // SSL certificate [SerializeField] private string SSLCert = ""; diff --git a/Runtime/LLMCaller.cs b/Runtime/LLMCaller.cs index 2ef586d6..b222b8d3 100644 --- a/Runtime/LLMCaller.cs +++ b/Runtime/LLMCaller.cs @@ -15,26 +15,31 @@ namespace LLMUnity /// public class LLMCaller : MonoBehaviour { - /// toggle to show/hide advanced options in the GameObject + /// show/hide advanced options in the GameObject + [Tooltip("show/hide advanced options in the GameObject")] [HideInInspector] public bool advancedOptions = false; - /// toggle to use remote LLM server or local LLM + /// use remote LLM server + [Tooltip("use remote LLM server")] [LocalRemote] public bool remote = false; - /// the LLM object to use + /// LLM GameObject to use + [Tooltip("LLM GameObject to use")] [Local, SerializeField] protected LLM _llm; public LLM llm { get => _llm;//whatever set => SetLLM(value); } - - /// allows to use a server with API key + /// API key for the remote server + [Tooltip("API key for the remote server")] [Remote] public string APIKey; - - /// host to use for the LLM server + /// host of the remote LLM server + [Tooltip("host of the remote LLM server")] [Remote] public string host = "localhost"; - /// port to use for the LLM server + /// port of the remote LLM server + [Tooltip("port of the remote LLM server")] [Remote] public int port = 13333; - /// number of retries to use for the LLM server requests (-1 = infinite) + /// number of retries to use for the remote LLM server requests (-1 = infinite) + [Tooltip("number of retries to use for the remote LLM server requests (-1 = infinite)")] [Remote] public int numRetries = 10; protected LLM _prellm; diff --git a/Runtime/LLMCharacter.cs b/Runtime/LLMCharacter.cs index fbf97ac7..17650e35 100644 --- a/Runtime/LLMCharacter.cs +++ b/Runtime/LLMCharacter.cs @@ -18,96 +18,113 @@ namespace LLMUnity public class LLMCharacter : LLMCaller { /// file to save the chat history. - /// The file is saved only for Chat calls with addToHistory set to true. - /// The file will be saved within the persistentDataPath directory (see https://docs.unity3d.com/ScriptReference/Application-persistentDataPath.html). + /// The file will be saved within the persistentDataPath directory. + [Tooltip("file to save the chat history. The file will be saved within the persistentDataPath directory.")] [LLM] public string save = ""; - /// toggle to save the LLM cache. This speeds up the prompt calculation but also requires ~100MB of space per character. + /// save the LLM cache. Speeds up the prompt calculation when reloading from history but also requires ~100MB of space per character. + [Tooltip("save the LLM cache. Speeds up the prompt calculation when reloading from history but also requires ~100MB of space per character.")] [LLM] public bool saveCache = false; - /// select to log the constructed prompt the Unity Editor. + /// log the constructed prompt the Unity Editor. + [Tooltip("log the constructed prompt the Unity Editor.")] [LLM] public bool debugPrompt = false; - /// number of tokens to predict (-1 = infinity, -2 = until context filled). - /// This is the amount of tokens the model will maximum predict. - /// When N predict is reached the model will stop generating. - /// This means words / sentences might not get finished if this is too low. + /// maximum number of tokens that the LLM will predict (-1 = infinity, -2 = until context filled). + [Tooltip("maximum number of tokens that the LLM will predict (-1 = infinity, -2 = until context filled).")] [Model] public int numPredict = 256; - /// specify which slot of the server to use for computation (affects caching) + /// slot of the server to use for computation (affects caching) + [Tooltip("slot of the server to use for computation (affects caching)")] [ModelAdvanced] public int slot = -1; - /// grammar file used for the LLM in .cbnf format (relative to the Assets/StreamingAssets folder) + /// grammar file used for the LLMCharacter (.gbnf format) + [Tooltip("grammar file used for the LLMCharacter (.gbnf format)")] [ModelAdvanced] public string grammar = null; - /// option to cache the prompt as it is being created by the chat to avoid reprocessing the entire prompt every time (default: true) + /// cache the processed prompt to avoid reprocessing the entire prompt every time (default: true, recommended!) + [Tooltip("cache the processed prompt to avoid reprocessing the entire prompt every time (default: true, recommended!)")] [ModelAdvanced] public bool cachePrompt = true; - /// seed for reproducibility. For random results every time set to -1. + /// seed for reproducibility (-1 = no reproducibility). + [Tooltip("seed for reproducibility (-1 = no reproducibility).")] [ModelAdvanced] public int seed = 0; - /// LLM temperature, lower values give more deterministic answers. - /// The temperature setting adjusts how random the generated responses are. - /// Turning it up makes the generated choices more varied and unpredictable. - /// Turning it down makes the generated responses more predictable and focused on the most likely options. + /// LLM temperature, lower values give more deterministic answers. + [Tooltip("LLM temperature, lower values give more deterministic answers.")] [ModelAdvanced, Float(0f, 2f)] public float temperature = 0.2f; - /// top-k sampling (0 = disabled). - /// The top k value controls the top k most probable tokens at each step of generation. This value can help fine tune the output and make this adhere to specific patterns or constraints. + /// Top-k sampling selects the next token only from the top k most likely predicted tokens (0 = disabled). + /// Higher values lead to more diverse text, while lower value will generate more focused and conservative text. + /// + [Tooltip("Top-k sampling selects the next token only from the top k most likely predicted tokens (0 = disabled). Higher values lead to more diverse text, while lower value will generate more focused and conservative text. ")] [ModelAdvanced, Int(-1, 100)] public int topK = 40; - /// top-p sampling (1.0 = disabled). - /// The top p value controls the cumulative probability of generated tokens. - /// The model will generate tokens until this theshold (p) is reached. - /// By lowering this value you can shorten output & encourage / discourage more diverse output. + /// Top-p sampling selects the next token from a subset of tokens that together have a cumulative probability of at least p (1.0 = disabled). + /// Higher values lead to more diverse text, while lower value will generate more focused and conservative text. + /// + [Tooltip("Top-p sampling selects the next token from a subset of tokens that together have a cumulative probability of at least p (1.0 = disabled). Higher values lead to more diverse text, while lower value will generate more focused and conservative text. ")] [ModelAdvanced, Float(0f, 1f)] public float topP = 0.9f; - /// minimum probability for a token to be used. - /// The probability is defined relative to the probability of the most likely token. + /// minimum probability for a token to be used. + [Tooltip("minimum probability for a token to be used.")] [ModelAdvanced, Float(0f, 1f)] public float minP = 0.05f; - /// control the repetition of token sequences in the generated text. - /// The penalty is applied to repeated tokens. + /// Penalty based on repeated tokens to control the repetition of token sequences in the generated text. + [Tooltip("Penalty based on repeated tokens to control the repetition of token sequences in the generated text.")] [ModelAdvanced, Float(0f, 2f)] public float repeatPenalty = 1.1f; - /// repeated token presence penalty (0.0 = disabled). - /// Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + /// Penalty based on token presence in previous responses to control the repetition of token sequences in the generated text. (0.0 = disabled). + [Tooltip("Penalty based on token presence in previous responses to control the repetition of token sequences in the generated text. (0.0 = disabled).")] [ModelAdvanced, Float(0f, 1f)] public float presencePenalty = 0f; - /// repeated token frequency penalty (0.0 = disabled). - /// Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + /// Penalty based on token frequency in previous responses to control the repetition of token sequences in the generated text. (0.0 = disabled). + [Tooltip("Penalty based on token frequency in previous responses to control the repetition of token sequences in the generated text. (0.0 = disabled).")] [ModelAdvanced, Float(0f, 1f)] public float frequencyPenalty = 0f; - - /// enable tail free sampling with parameter z (1.0 = disabled). - [ModelAdvanced, Float(0f, 1f)] public float tfsZ = 1f; - /// enable locally typical sampling with parameter p (1.0 = disabled). + /// enable locally typical sampling (1.0 = disabled). Higher values will promote more contextually coherent tokens, while lower values will promote more diverse tokens. + [Tooltip("enable locally typical sampling (1.0 = disabled). Higher values will promote more contextually coherent tokens, while lower values will promote more diverse tokens.")] [ModelAdvanced, Float(0f, 1f)] public float typicalP = 1f; /// last n tokens to consider for penalizing repetition (0 = disabled, -1 = ctx-size). + [Tooltip("last n tokens to consider for penalizing repetition (0 = disabled, -1 = ctx-size).")] [ModelAdvanced, Int(0, 2048)] public int repeatLastN = 64; /// penalize newline tokens when applying the repeat penalty. + [Tooltip("penalize newline tokens when applying the repeat penalty.")] [ModelAdvanced] public bool penalizeNl = true; - /// prompt for the purpose of the penalty evaluation. - /// Can be either null, a string or an array of numbers representing tokens (null/"" = use original prompt) + /// prompt for the purpose of the penalty evaluation. Can be either null, a string or an array of numbers representing tokens (null/'' = use original prompt) + [Tooltip("prompt for the purpose of the penalty evaluation. Can be either null, a string or an array of numbers representing tokens (null/'' = use original prompt)")] [ModelAdvanced] public string penaltyPrompt; /// enable Mirostat sampling, controlling perplexity during text generation (0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0). + [Tooltip("enable Mirostat sampling, controlling perplexity during text generation (0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0).")] [ModelAdvanced, Int(0, 2)] public int mirostat = 0; - /// set the Mirostat target entropy, parameter tau. + /// The Mirostat target entropy (tau) controls the balance between coherence and diversity in the generated text. + [Tooltip("The Mirostat target entropy (tau) controls the balance between coherence and diversity in the generated text.")] [ModelAdvanced, Float(0f, 10f)] public float mirostatTau = 5f; - /// set the Mirostat learning rate, parameter eta. + /// The Mirostat learning rate (eta) controls how quickly the algorithm responds to feedback from the generated text. + [Tooltip("The Mirostat learning rate (eta) controls how quickly the algorithm responds to feedback from the generated text.")] [ModelAdvanced, Float(0f, 1f)] public float mirostatEta = 0.1f; /// if greater than 0, the response also contains the probabilities of top N tokens for each generated token. + [Tooltip("if greater than 0, the response also contains the probabilities of top N tokens for each generated token.")] [ModelAdvanced, Int(0, 10)] public int nProbs = 0; /// ignore end of stream token and continue generating. + [Tooltip("ignore end of stream token and continue generating.")] [ModelAdvanced] public bool ignoreEos = false; - /// number of tokens to retain from the prompt when the model runs out of context (-1 = LLMCharacter prompt tokens if setNKeepToPrompt is set to true). + [Tooltip("number of tokens to retain from the prompt when the model runs out of context (-1 = LLMCharacter prompt tokens if setNKeepToPrompt is set to true).")] public int nKeep = -1; /// stopwords to stop the LLM in addition to the default stopwords from the chat template. + [Tooltip("stopwords to stop the LLM in addition to the default stopwords from the chat template.")] public List stop = new List(); /// the logit bias option allows to manually adjust the likelihood of specific tokens appearing in the generated text. /// By providing a token ID and a positive or negative bias value, you can increase or decrease the probability of that token being generated. + [Tooltip("the logit bias option allows to manually adjust the likelihood of specific tokens appearing in the generated text. By providing a token ID and a positive or negative bias value, you can increase or decrease the probability of that token being generated.")] public Dictionary logitBias = null; - - /// option to receive the reply from the model as it is produced (recommended!). - /// If it is not selected, the full reply from the model is received in one go + /// Receive the reply from the model as it is produced (recommended!). + /// If not selected, the full reply from the model is received in one go + [Tooltip("Receive the reply from the model as it is produced (recommended!). If not selected, the full reply from the model is received in one go")] [Chat] public bool stream = true; /// the name of the player + [Tooltip("the name of the player")] [Chat] public string playerName = "user"; /// the name of the AI + [Tooltip("the name of the AI")] [Chat] public string AIName = "assistant"; - /// a description of the AI role. This defines the LLMCharacter system prompt + /// a description of the AI role (system prompt) + [Tooltip("a description of the AI role (system prompt)")] [TextArea(5, 10), Chat] public string prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."; - /// option to set the number of tokens to retain from the prompt (nKeep) based on the LLMCharacter system prompt + /// set the number of tokens to always retain from the prompt (nKeep) based on the LLMCharacter system prompt + [Tooltip("set the number of tokens to always retain from the prompt (nKeep) based on the LLMCharacter system prompt")] public bool setNKeepToPrompt = true; /// the chat history as list of chat messages + [Tooltip("the chat history as list of chat messages")] public List chat = new List(); /// the grammar to use + [Tooltip("the grammar to use")] public string grammarString; /// \cond HIDE @@ -324,7 +341,6 @@ protected virtual ChatRequest GenerateRequest(string prompt) chatRequest.n_keep = nKeep; chatRequest.stream = stream; chatRequest.stop = GetStopwords(); - chatRequest.tfs_z = tfsZ; chatRequest.typical_p = typicalP; chatRequest.repeat_penalty = repeatPenalty; chatRequest.repeat_last_n = repeatLastN; diff --git a/Runtime/LLMLib.cs b/Runtime/LLMLib.cs index 98dcc78c..90f45db1 100644 --- a/Runtime/LLMLib.cs +++ b/Runtime/LLMLib.cs @@ -370,7 +370,7 @@ public class LLMLib #if (UNITY_ANDROID || UNITY_IOS) && !UNITY_EDITOR - public LLMLib(string arch){} + public LLMLib(string arch) {} #if UNITY_ANDROID public const string LibraryName = "libundreamai_android"; @@ -378,79 +378,79 @@ public LLMLib(string arch){} public const string LibraryName = "__Internal"; #endif - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="Logging")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "Logging")] public static extern void LoggingStatic(IntPtr stringWrapper); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="StopLogging")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "StopLogging")] public static extern void StopLoggingStatic(); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Construct")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Construct")] public static extern IntPtr LLM_ConstructStatic(string command); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Delete")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Delete")] public static extern void LLM_DeleteStatic(IntPtr LLMObject); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_StartServer")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_StartServer")] public static extern void LLM_StartServerStatic(IntPtr LLMObject); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_StopServer")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_StopServer")] public static extern void LLM_StopServerStatic(IntPtr LLMObject); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Start")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Start")] public static extern void LLM_StartStatic(IntPtr LLMObject); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Started")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Started")] public static extern bool LLM_StartedStatic(IntPtr LLMObject); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Stop")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Stop")] public static extern void LLM_StopStatic(IntPtr LLMObject); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_SetTemplate")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_SetTemplate")] public static extern void LLM_SetTemplateStatic(IntPtr LLMObject, string chatTemplate); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_SetSSL")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_SetSSL")] public static extern void LLM_SetSSLStatic(IntPtr LLMObject, string SSLCert, string SSLKey); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Tokenize")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Tokenize")] public static extern void LLM_TokenizeStatic(IntPtr LLMObject, string jsonData, IntPtr stringWrapper); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Detokenize")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Detokenize")] public static extern void LLM_DetokenizeStatic(IntPtr LLMObject, string jsonData, IntPtr stringWrapper); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Embeddings")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Embeddings")] public static extern void LLM_EmbeddingsStatic(IntPtr LLMObject, string jsonData, IntPtr stringWrapper); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Lora_Weight")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Lora_Weight")] public static extern void LLM_LoraWeightStatic(IntPtr LLMObject, string jsonData, IntPtr stringWrapper); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Lora_List")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Lora_List")] public static extern void LLM_LoraListStatic(IntPtr LLMObject, IntPtr stringWrapper); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Completion")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Completion")] public static extern void LLM_CompletionStatic(IntPtr LLMObject, string jsonData, IntPtr stringWrapper); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Slot")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Slot")] public static extern void LLM_SlotStatic(IntPtr LLMObject, string jsonData, IntPtr stringWrapper); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Cancel")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Cancel")] public static extern void LLM_CancelStatic(IntPtr LLMObject, int idSlot); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="LLM_Status")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "LLM_Status")] public static extern int LLM_StatusStatic(IntPtr LLMObject, IntPtr stringWrapper); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="StringWrapper_Construct")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "StringWrapper_Construct")] public static extern IntPtr StringWrapper_ConstructStatic(); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="StringWrapper_Delete")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "StringWrapper_Delete")] public static extern void StringWrapper_DeleteStatic(IntPtr instance); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="StringWrapper_GetStringSize")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "StringWrapper_GetStringSize")] public static extern int StringWrapper_GetStringSizeStatic(IntPtr instance); - [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint="StringWrapper_GetString")] + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "StringWrapper_GetString")] public static extern void StringWrapper_GetStringStatic(IntPtr instance, IntPtr buffer, int bufferSize, bool clear = false); - public void Logging(IntPtr stringWrapper){ LoggingStatic(stringWrapper); } - public void StopLogging(){ StopLoggingStatic(); } - public IntPtr LLM_Construct(string command){ return LLM_ConstructStatic(command); } - public void LLM_Delete(IntPtr LLMObject){ LLM_DeleteStatic(LLMObject); } - public void LLM_StartServer(IntPtr LLMObject){ LLM_StartServerStatic(LLMObject); } - public void LLM_StopServer(IntPtr LLMObject){ LLM_StopServerStatic(LLMObject); } - public void LLM_Start(IntPtr LLMObject){ LLM_StartStatic(LLMObject); } - public bool LLM_Started(IntPtr LLMObject){ return LLM_StartedStatic(LLMObject); } - public void LLM_Stop(IntPtr LLMObject){ LLM_StopStatic(LLMObject); } - public void LLM_SetTemplate(IntPtr LLMObject, string chatTemplate){ LLM_SetTemplateStatic(LLMObject, chatTemplate); } - public void LLM_SetSSL(IntPtr LLMObject, string SSLCert, string SSLKey){ LLM_SetSSLStatic(LLMObject, SSLCert, SSLKey); } - public void LLM_Tokenize(IntPtr LLMObject, string jsonData, IntPtr stringWrapper){ LLM_TokenizeStatic(LLMObject, jsonData, stringWrapper); } - public void LLM_Detokenize(IntPtr LLMObject, string jsonData, IntPtr stringWrapper){ LLM_DetokenizeStatic(LLMObject, jsonData, stringWrapper); } - public void LLM_Embeddings(IntPtr LLMObject, string jsonData, IntPtr stringWrapper){ LLM_EmbeddingsStatic(LLMObject, jsonData, stringWrapper); } - public void LLM_LoraWeight(IntPtr LLMObject, string jsonData, IntPtr stringWrapper){ LLM_LoraWeightStatic(LLMObject, jsonData, stringWrapper); } - public void LLM_LoraList(IntPtr LLMObject, IntPtr stringWrapper){ LLM_LoraListStatic(LLMObject, stringWrapper); } - public void LLM_Completion(IntPtr LLMObject, string jsonData, IntPtr stringWrapper){ LLM_CompletionStatic(LLMObject, jsonData, stringWrapper); } - public void LLM_Slot(IntPtr LLMObject, string jsonData, IntPtr stringWrapper){ LLM_SlotStatic(LLMObject, jsonData, stringWrapper); } - public void LLM_Cancel(IntPtr LLMObject, int idSlot){ LLM_CancelStatic(LLMObject, idSlot); } - public int LLM_Status(IntPtr LLMObject, IntPtr stringWrapper){ return LLM_StatusStatic(LLMObject, stringWrapper); } - public IntPtr StringWrapper_Construct(){ return StringWrapper_ConstructStatic(); } - public void StringWrapper_Delete(IntPtr instance){ StringWrapper_DeleteStatic(instance); } - public int StringWrapper_GetStringSize(IntPtr instance){ return StringWrapper_GetStringSizeStatic(instance); } - public void StringWrapper_GetString(IntPtr instance, IntPtr buffer, int bufferSize, bool clear = false){ StringWrapper_GetStringStatic(instance, buffer, bufferSize, clear); } + public void Logging(IntPtr stringWrapper) { LoggingStatic(stringWrapper); } + public void StopLogging() { StopLoggingStatic(); } + public IntPtr LLM_Construct(string command) { return LLM_ConstructStatic(command); } + public void LLM_Delete(IntPtr LLMObject) { LLM_DeleteStatic(LLMObject); } + public void LLM_StartServer(IntPtr LLMObject) { LLM_StartServerStatic(LLMObject); } + public void LLM_StopServer(IntPtr LLMObject) { LLM_StopServerStatic(LLMObject); } + public void LLM_Start(IntPtr LLMObject) { LLM_StartStatic(LLMObject); } + public bool LLM_Started(IntPtr LLMObject) { return LLM_StartedStatic(LLMObject); } + public void LLM_Stop(IntPtr LLMObject) { LLM_StopStatic(LLMObject); } + public void LLM_SetTemplate(IntPtr LLMObject, string chatTemplate) { LLM_SetTemplateStatic(LLMObject, chatTemplate); } + public void LLM_SetSSL(IntPtr LLMObject, string SSLCert, string SSLKey) { LLM_SetSSLStatic(LLMObject, SSLCert, SSLKey); } + public void LLM_Tokenize(IntPtr LLMObject, string jsonData, IntPtr stringWrapper) { LLM_TokenizeStatic(LLMObject, jsonData, stringWrapper); } + public void LLM_Detokenize(IntPtr LLMObject, string jsonData, IntPtr stringWrapper) { LLM_DetokenizeStatic(LLMObject, jsonData, stringWrapper); } + public void LLM_Embeddings(IntPtr LLMObject, string jsonData, IntPtr stringWrapper) { LLM_EmbeddingsStatic(LLMObject, jsonData, stringWrapper); } + public void LLM_LoraWeight(IntPtr LLMObject, string jsonData, IntPtr stringWrapper) { LLM_LoraWeightStatic(LLMObject, jsonData, stringWrapper); } + public void LLM_LoraList(IntPtr LLMObject, IntPtr stringWrapper) { LLM_LoraListStatic(LLMObject, stringWrapper); } + public void LLM_Completion(IntPtr LLMObject, string jsonData, IntPtr stringWrapper) { LLM_CompletionStatic(LLMObject, jsonData, stringWrapper); } + public void LLM_Slot(IntPtr LLMObject, string jsonData, IntPtr stringWrapper) { LLM_SlotStatic(LLMObject, jsonData, stringWrapper); } + public void LLM_Cancel(IntPtr LLMObject, int idSlot) { LLM_CancelStatic(LLMObject, idSlot); } + public int LLM_Status(IntPtr LLMObject, IntPtr stringWrapper) { return LLM_StatusStatic(LLMObject, stringWrapper); } + public IntPtr StringWrapper_Construct() { return StringWrapper_ConstructStatic(); } + public void StringWrapper_Delete(IntPtr instance) { StringWrapper_DeleteStatic(instance); } + public int StringWrapper_GetStringSize(IntPtr instance) { return StringWrapper_GetStringSizeStatic(instance); } + public void StringWrapper_GetString(IntPtr instance, IntPtr buffer, int bufferSize, bool clear = false) { StringWrapper_GetStringStatic(instance, buffer, bufferSize, clear); } #else @@ -694,7 +694,7 @@ public static List PossibleArchitectures(bool gpu = false) } return architectures; } - + /// /// Allows to retrieve a string from the library (Unity only allows marshalling of chars) /// @@ -728,6 +728,5 @@ public void Destroy() if (libraryHandle != IntPtr.Zero) LibraryLoader.FreeLibrary(libraryHandle); } } - } /// \endcond diff --git a/Runtime/LLMUnitySetup.cs b/Runtime/LLMUnitySetup.cs index 0d50e7ff..d733c411 100644 --- a/Runtime/LLMUnitySetup.cs +++ b/Runtime/LLMUnitySetup.cs @@ -219,13 +219,13 @@ public static string GetLibraryName(string version) public static string GetAssetPath(string relPath = "") { - string assetsDir = Application.platform == RuntimePlatform.Android? Application.persistentDataPath : Application.streamingAssetsPath; + string assetsDir = Application.platform == RuntimePlatform.Android ? Application.persistentDataPath : Application.streamingAssetsPath; return Path.Combine(assetsDir, relPath).Replace('\\', '/'); } public static string GetDownloadAssetPath(string relPath = "") { - string assetsDir = (Application.platform == RuntimePlatform.Android || Application.platform == RuntimePlatform.IPhonePlayer)? Application.persistentDataPath : Application.streamingAssetsPath; + string assetsDir = (Application.platform == RuntimePlatform.Android || Application.platform == RuntimePlatform.IPhonePlayer) ? Application.persistentDataPath : Application.streamingAssetsPath; return Path.Combine(assetsDir, relPath).Replace('\\', '/'); } @@ -414,13 +414,12 @@ static async Task DownloadAndExtractInsideDirectory(string url, string path, str File.Delete(zipPath); } - static void DeleteEarlierVersions() { List assetPathSubDirs = new List(); - foreach (string dir in new string[]{GetAssetPath(), Path.Combine(Application.dataPath, "Plugins", "Android")}) + foreach (string dir in new string[] {GetAssetPath(), Path.Combine(Application.dataPath, "Plugins", "Android")}) { - if(Directory.Exists(dir)) assetPathSubDirs.AddRange(Directory.GetDirectories(dir)); + if (Directory.Exists(dir)) assetPathSubDirs.AddRange(Directory.GetDirectories(dir)); } Regex regex = new Regex(GetLibraryName("(.+)")); diff --git a/Runtime/RAG/DBSearch.cs b/Runtime/RAG/DBSearch.cs index 91265947..e0888530 100644 --- a/Runtime/RAG/DBSearch.cs +++ b/Runtime/RAG/DBSearch.cs @@ -17,17 +17,23 @@ namespace LLMUnity public class DBSearch : SearchMethod { protected USearchIndex index; - /// toggle to show/hide advanced options in the GameObject + /// show/hide advanced options in the GameObject + [Tooltip("show/hide advanced options in the GameObject")] [HideInInspector] public bool advancedOptions = false; /// The quantisation type used for vector data during indexing. + [Tooltip("The quantisation type used for vector data during indexing.")] [ModelAdvanced] public ScalarKind quantization = ScalarKind.Float16; /// The metric kind used for distance calculation between vectors. + [Tooltip("The metric kind used for distance calculation between vectors.")] [ModelAdvanced] public MetricKind metricKind = MetricKind.Cos; /// The connectivity parameter limits the connections-per-node in the graph. + [Tooltip("The connectivity parameter limits the connections-per-node in the graph.")] [ModelAdvanced] public ulong connectivity = 32; /// The expansion factor used for index construction when adding vectors. + [Tooltip("The expansion factor used for index construction when adding vectors.")] [ModelAdvanced] public ulong expansionAdd = 40; /// The expansion factor used for index construction during search operations. + [Tooltip("The expansion factor used for index construction during search operations.")] [ModelAdvanced] public ulong expansionSearch = 16; private Dictionary)> incrementalSearchCache = new Dictionary)>(); diff --git a/Runtime/RAG/RAG.cs b/Runtime/RAG/RAG.cs index 55988175..a1b26183 100644 --- a/Runtime/RAG/RAG.cs +++ b/Runtime/RAG/RAG.cs @@ -37,9 +37,17 @@ public enum ChunkingMethods [Serializable] public class RAG : Searchable { + /// Search method type to use for RAG. SimpleSearch is a simple brute-force search, while DBSearch is a fast Approximate Nearest Neighbor (ANN) method (recommended!). + [Tooltip("Search method type to use for RAG. SimpleSearch is a simple brute-force search, while DBSearch is a fast Approximate Nearest Neighbor (ANN) method (recommended!).")] public SearchMethods searchType = SearchMethods.SimpleSearch; + /// Search method GameObject. + [Tooltip("Search method GameObject.")] public SearchMethod search; + /// Chunking method type to use for RAG for splitting the inputs into chunks. This is useful to have a more consistent meaning within each data part. + [Tooltip("Chunking method type to use for RAG for splitting the inputs into chunks. This is useful to have a more consistent meaning within each data part.")] public ChunkingMethods chunkingType = ChunkingMethods.NoChunking; + /// Chunking method GameObject. + [Tooltip("Chunking method GameObject.")] public Chunking chunking; /// diff --git a/Runtime/RAG/SentenceSplitter.cs b/Runtime/RAG/SentenceSplitter.cs index 75507b7b..809300f0 100644 --- a/Runtime/RAG/SentenceSplitter.cs +++ b/Runtime/RAG/SentenceSplitter.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.Threading.Tasks; using System.Linq; +using UnityEngine; namespace LLMUnity { @@ -16,6 +17,7 @@ public class SentenceSplitter : Chunking { public const string DefaultDelimiters = ".!:;?\n\r"; /// delimiters used to split the phrases + [Tooltip("delimiters used to split the phrases")] public char[] delimiters = DefaultDelimiters.ToCharArray(); /// diff --git a/Runtime/RAG/TokenSplitter.cs b/Runtime/RAG/TokenSplitter.cs index e95a9f0d..cb57313b 100644 --- a/Runtime/RAG/TokenSplitter.cs +++ b/Runtime/RAG/TokenSplitter.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Threading.Tasks; +using UnityEngine; namespace LLMUnity { @@ -13,7 +14,8 @@ namespace LLMUnity [Serializable] public class TokenSplitter : Chunking { - /// the number of tokens to split phrases into chunks + /// number of tokens by which to split phrases into chunks + [Tooltip("number of tokens by which to split phrases into chunks")] public int numTokens = 10; protected int DetermineEndIndex(string input, string detokenised, int startIndex, int searchRange = 5, int charsFromEnd = 3) diff --git a/Runtime/RAG/WordSplitter.cs b/Runtime/RAG/WordSplitter.cs index f1540c61..251bdff5 100644 --- a/Runtime/RAG/WordSplitter.cs +++ b/Runtime/RAG/WordSplitter.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Threading.Tasks; +using UnityEngine; namespace LLMUnity { @@ -13,7 +14,8 @@ namespace LLMUnity [Serializable] public class WordSplitter : Chunking { - /// the number of words to split phrases into chunks + /// number of words by which to split phrases into chunks + [Tooltip("number of words by which to split phrases into chunks")] public int numWords = 10; /// From bea75d5dded9ffcea0a19a700eca984077023f9e Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Tue, 21 Jan 2025 16:31:09 +0200 Subject: [PATCH 08/20] remove tail-free sampling --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index ad242081..e270e09d 100644 --- a/README.md +++ b/README.md @@ -600,7 +600,6 @@ If it is not selected, the full reply from the model is received in one go -
Repeat Penalty control the repetition of token sequences in the generated text (default: 1.1)The penalty is applied to repeated tokens.
-
Presence Penalty repeated token presence penalty (default: 0.0, 0.0 = disabled) Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
-
Frequency Penalty repeated token frequency penalty (default: 0.0, 0.0 = disabled) Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
- - `Tfs_z`: enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled). - `Typical P`: enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled). - `Repeat Last N`: last N tokens to consider for penalizing repetition (default: 64, 0 = disabled, -1 = ctx-size). - `Penalize Nl`: penalize newline tokens when applying the repeat penalty (default: true). From 9741585792460178e605a6c598501c74ba8a98b7 Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Tue, 21 Jan 2025 16:31:42 +0200 Subject: [PATCH 09/20] script to automatically add tooltips based on --- .github/update_tooltips.py | 105 +++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 .github/update_tooltips.py diff --git a/.github/update_tooltips.py b/.github/update_tooltips.py new file mode 100644 index 00000000..c05f07e9 --- /dev/null +++ b/.github/update_tooltips.py @@ -0,0 +1,105 @@ +import os +import sys + + +def get_classname(line): + if ' class ' not in line or ':' not in line: + return None, None + classParts = line.strip().split(' ') + delimInd = classParts.index(':') + className = classParts[delimInd - 1] + parentName = classParts[delimInd + 1] + return className, parentName + +def find_eligible_classes(file_paths): + child_classes = {} + for file_path in file_paths: + with open(file_path, 'r') as file: + lines = file.readlines() + for line in lines: + className, parentName = get_classname(line) + if className is not None: + child_classes[parentName] = child_classes.get(parentName, []) + [className] + + ret_classes = [] + check_classes = ['MonoBehaviour'] + while len(check_classes) > 0: + check_class = check_classes.pop() + if check_class in ret_classes: + continue + if check_class != 'MonoBehaviour': + ret_classes.append(check_class) + check_classes += child_classes.get(check_class, []) + return ret_classes + + + +def add_tooltips_to_unity_file(file_path, allowed_classes): + # Read the content of the file + with open(file_path, 'r') as file: + lines = file.readlines() + + # Initialize variables + updated_lines = [] + in_summary = False + allowed_class = False + summary_text = "" + + for line in lines: + stripped_line = line.strip() + className, __ = get_classname(line) + if className is not None: + allowed_class = className in allowed_classes + + if allowed_class: + if '' in stripped_line: + in_summary = True + summary_text = '' + + if in_summary: + if summary_text != "": summary_text += ' ' + summary_text += stripped_line.replace("///", "").replace("", "").replace("", "").strip() + + if '' in stripped_line: + in_summary = False + + if 'Tooltip' in stripped_line: + # in_summary = False + # summary_text = '' + continue + + include_terms = ['public', ';'] + exclude_terms = ['{', 'static', 'abstract'] + if all([x in stripped_line for x in include_terms]) and not any([x in stripped_line for x in exclude_terms]): + if summary_text != '': + num_spaces = len(line) - len(line.lstrip()) + tooltip = ''.join([' '] * num_spaces + [f'[Tooltip("{summary_text}")]', '\n']) + updated_lines.append(tooltip) + summary_text = '' + + if not in_summary and ('{' in stripped_line or '}' in stripped_line): + summary_text = '' + + # Add the current line to the updated lines + updated_lines.append(line) + + # Write the updated content back to the file + with open(file_path, 'w') as file: + file.writelines(updated_lines) + + + + + +if __name__ == '__main__': + # Find all .cs files + search_directory = 'Runtime' + cs_files = [] + for root, _, files in os.walk(search_directory): + for file in files: + if file.endswith(".cs"): + cs_files.append(os.path.join(root, file)) + + classes = find_eligible_classes(cs_files) + for file in cs_files: + add_tooltips_to_unity_file(file, classes) From 5952bafced84292e56b8775c04619c83530379f5 Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Tue, 21 Jan 2025 16:31:56 +0200 Subject: [PATCH 10/20] workflow to automatically add tooltips based on --- .github/workflows/update_tooltips.yaml | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/update_tooltips.yaml diff --git a/.github/workflows/update_tooltips.yaml b/.github/workflows/update_tooltips.yaml new file mode 100644 index 00000000..94968e74 --- /dev/null +++ b/.github/workflows/update_tooltips.yaml @@ -0,0 +1,30 @@ +name: Changelog +on: + pull_request: + types: [closed] + +jobs: + build: + runs-on: ubuntu-latest + if: startsWith(github.base_ref, 'release/') && github.event.pull_request.merged == true + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + persist-credentials: false + token: ${{ github.token }} + + - name: Update tooltips + id: update_tooltips + run: | + python .github/update_tooltips.py + git config --global user.name $GITHUB_ACTOR + git config --global user.email $GITHUB_ACTOR@users.noreply.github.com + git add Runtime + git commit -m "update tooltips" + + - name: Push changes + uses: ad-m/github-push-action@master + with: + github_token: ${{ github.token }} + branch: ${{ github.base_ref }} From 7174aa80110508e5c9f1e596489b59df3023558c Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Tue, 21 Jan 2025 16:37:16 +0200 Subject: [PATCH 11/20] deprecate options page --- Options.md | 116 +++++++++++++++++++++++++++++++++++++++++++++++ Options.md.meta | 7 +++ README.md | 117 +----------------------------------------------- 3 files changed, 124 insertions(+), 116 deletions(-) create mode 100644 Options.md create mode 100644 Options.md.meta diff --git a/Options.md b/Options.md new file mode 100644 index 00000000..04fcaea8 --- /dev/null +++ b/Options.md @@ -0,0 +1,116 @@ +# Options (deprecated) +## LLM Settings + +- `Show/Hide Advanced Options` Toggle to show/hide advanced options from below +- `Log Level` select how verbose the log messages are +- `Use extras` select to install and allow the use of extra features (flash attention and IQ quants) + +## 💻 Setup Settings + +
+ +
+ +- `Remote` select to provide remote access to the LLM +- `Port` port to run the LLM server (if `Remote` is set) +- `Num Threads` number of threads to use (default: -1 = all) +- `Num GPU Layers` number of model layers to offload to the GPU. +If set to 0 the GPU is not used. Use a large number i.e. >30 to utilise the GPU as much as possible. +Note that higher values of context size will use more VRAM. +If the user's GPU is not supported, the LLM will fall back to the CPU +- `Debug` select to log the output of the model in the Unity Editor +-
Advanced options + + -
Parallel Prompts number of prompts / slots that can happen in parallel (default: -1 = number of LLMCharacter objects). Note that the context size is divided among the slots. If you want to retain as much context for the LLM and don't need all the characters present at the same time, you can set this number and specify the slot for each LLMCharacter object. + e.g. Setting `Parallel Prompts` to 1 and slot 0 for all LLMCharacter objects will use the full context, but the entire prompt will need to be computed (no caching) whenever a LLMCharacter object is used for chat.
+ - `Dont Destroy On Load` select to not destroy the LLM GameObject when loading a new Scene + +
+ +## Server Security Settings + +- `API key` API key to use to allow access to requests from LLMCharacter objects (if `Remote` is set) +-
Advanced options + + - `Load SSL certificate` allows to load a SSL certificate for end-to-end encryption of requests (if `Remote` is set). Requires SSL key as well. + - `Load SSL key` allows to load a SSL key for end-to-end encryption of requests (if `Remote` is set). Requires SSL certificate as well. + - `SSL certificate path` the SSL certificate used for end-to-end encryption of requests (if `Remote` is set). + - `SSL key path` the SSL key used for end-to-end encryption of requests (if `Remote` is set). + +
+ +## 🤗 Model Settings +- `Download model` click to download one of the default models +- `Load model` click to load your own model in .gguf format +- `Download on Start` enable to downloaded the LLM models the first time the game starts. Alternatively the LLM models wil be copied directly in the build +-
Context Size size of the prompt context (0 = context size of the model) This is the number of tokens the model can take as input when generating responses. Higher values use more RAM or VRAM (if using GPU).
+ +-
Advanced options + + - `Download lora` click to download a LoRA model in .gguf format + - `Load lora` click to load a LoRA model in .gguf format + - `Batch Size` batch size for prompt processing (default: 512) + - `Model` the path of the model being used (relative to the Assets/StreamingAssets folder) + - `Chat Template` the chat template being used for the LLM + - `Lora` the path of the LoRAs being used (relative to the Assets/StreamingAssets folder) + - `Lora Weights` the weights of the LoRAs being used + - `Flash Attention` click to use flash attention in the model (if `Use extras` is enabled) + +
+ +## LLMCharacter Settings + +- `Show/Hide Advanced Options` Toggle to show/hide advanced options from below +- `Log Level` select how verbose the log messages are +- `Use extras` select to install and allow the use of extra features (flash attention and IQ quants) + +## 💻 Setup Settings +
+ +
+ +- `Remote` whether the LLM used is remote or local +- `LLM` the LLM GameObject (if `Remote` is not set) +- `Hort` ip of the LLM server (if `Remote` is set) +- `Port` port of the LLM server (if `Remote` is set) +- `Num Retries` number of HTTP request retries from the LLM server (if `Remote` is set) +- `API key` API key of the LLM server (if `Remote` is set) +-
Save save filename or relative path If set, the chat history and LLM state (if save cache is enabled) is automatically saved to file specified.
The chat history is saved with a json suffix and the LLM state with a cache suffix.
Both files are saved in the [persistentDataPath folder of Unity](https://docs.unity3d.com/ScriptReference/Application-persistentDataPath.html).
+- `Save Cache` select to save the LLM state along with the chat history. The LLM state is typically around 100MB+. +- `Debug Prompt` select to log the constructed prompts in the Unity Editor + +## 🗨️ Chat Settings +- `Player Name` the name of the player +- `AI Name` the name of the AI +- `Prompt` description of the AI role + +## 🤗 Model Settings +- `Stream` select to receive the reply from the model as it is produced (recommended!).
+If it is not selected, the full reply from the model is received in one go +-
Num Predict maximum number of tokens to predict (default: 256, -1 = infinity, -2 = until context filled)This is the maximum amount of tokens the model will maximum predict. When N tokens are reached the model will stop generating. This means words / sentences might not get finished if this is too low.
+ +-
Advanced options + + - `Load grammar` click to load a grammar in .gbnf format + - `Grammar` the path of the grammar being used (relative to the Assets/StreamingAssets folder) + -
Cache Prompt save the ongoing prompt from the chat (default: true) Saves the prompt while it is being created by the chat to avoid reprocessing the entire prompt every time
+ - `Slot` slot of the server to use for computation. Value can be set from 0 to `Parallel Prompts`-1 (default: -1 = new slot for each character) + - `Seed` seed for reproducibility. For random results every time use -1 + -
Temperature LLM temperature, lower values give more deterministic answers (default: 0.2)The temperature setting adjusts how random the generated responses are. Turning it up makes the generated choices more varied and unpredictable. Turning it down makes the generated responses more predictable and focused on the most likely options.
+ -
Top K top-k sampling (default: 40, 0 = disabled)The top k value controls the top k most probable tokens at each step of generation. This value can help fine tune the output and make this adhere to specific patterns or constraints.
+ -
Top P top-p sampling (default: 0.9, 1.0 = disabled)The top p value controls the cumulative probability of generated tokens. The model will generate tokens until this theshold (p) is reached. By lowering this value you can shorten output & encourage / discourage more diverse outputs.
+ -
Min P minimum probability for a token to be used (default: 0.05) The probability is defined relative to the probability of the most likely token.
+ -
Repeat Penalty control the repetition of token sequences in the generated text (default: 1.1)The penalty is applied to repeated tokens.
+ -
Presence Penalty repeated token presence penalty (default: 0.0, 0.0 = disabled) Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
+ -
Frequency Penalty repeated token frequency penalty (default: 0.0, 0.0 = disabled) Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+ - `Typical P`: enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled). + - `Repeat Last N`: last N tokens to consider for penalizing repetition (default: 64, 0 = disabled, -1 = ctx-size). + - `Penalize Nl`: penalize newline tokens when applying the repeat penalty (default: true). + - `Penalty Prompt`: prompt for the purpose of the penalty evaluation. Can be either `null`, a string or an array of numbers representing tokens (default: `null` = use original `prompt`). + - `Mirostat`: enable Mirostat sampling, controlling perplexity during text generation (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0). + - `Mirostat Tau`: set the Mirostat target entropy, parameter tau (default: 5.0). + - `Mirostat Eta`: set the Mirostat learning rate, parameter eta (default: 0.1). + - `N Probs`: if greater than 0, the response also contains the probabilities of top N tokens for each generated token (default: 0) + - `Ignore Eos`: enable to ignore end of stream tokens and continue generating (default: false). + +
\ No newline at end of file diff --git a/Options.md.meta b/Options.md.meta new file mode 100644 index 00000000..8af4ca41 --- /dev/null +++ b/Options.md.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 1c6766850c55dd06c89ff9ddaf1d4a41 +TextScriptImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/README.md b/README.md index e270e09d..3b61edd0 100644 --- a/README.md +++ b/README.md @@ -495,122 +495,7 @@ In the scene, select the `LLM` GameObject and click the `Download Model` button Save the scene, run and enjoy! ## Options - -### LLM Settings - -- `Show/Hide Advanced Options` Toggle to show/hide advanced options from below -- `Log Level` select how verbose the log messages are -- `Use extras` select to install and allow the use of extra features (flash attention and IQ quants) - -#### 💻 Setup Settings - -
- -
- -- `Remote` select to provide remote access to the LLM -- `Port` port to run the LLM server (if `Remote` is set) -- `Num Threads` number of threads to use (default: -1 = all) -- `Num GPU Layers` number of model layers to offload to the GPU. -If set to 0 the GPU is not used. Use a large number i.e. >30 to utilise the GPU as much as possible. -Note that higher values of context size will use more VRAM. -If the user's GPU is not supported, the LLM will fall back to the CPU -- `Debug` select to log the output of the model in the Unity Editor --
Advanced options - - -
Parallel Prompts number of prompts / slots that can happen in parallel (default: -1 = number of LLMCharacter objects). Note that the context size is divided among the slots. If you want to retain as much context for the LLM and don't need all the characters present at the same time, you can set this number and specify the slot for each LLMCharacter object. - e.g. Setting `Parallel Prompts` to 1 and slot 0 for all LLMCharacter objects will use the full context, but the entire prompt will need to be computed (no caching) whenever a LLMCharacter object is used for chat.
- - `Dont Destroy On Load` select to not destroy the LLM GameObject when loading a new Scene - -
- -### Server Security Settings - -- `API key` API key to use to allow access to requests from LLMCharacter objects (if `Remote` is set) --
Advanced options - - - `Load SSL certificate` allows to load a SSL certificate for end-to-end encryption of requests (if `Remote` is set). Requires SSL key as well. - - `Load SSL key` allows to load a SSL key for end-to-end encryption of requests (if `Remote` is set). Requires SSL certificate as well. - - `SSL certificate path` the SSL certificate used for end-to-end encryption of requests (if `Remote` is set). - - `SSL key path` the SSL key used for end-to-end encryption of requests (if `Remote` is set). - -
- -#### 🤗 Model Settings -- `Download model` click to download one of the default models -- `Load model` click to load your own model in .gguf format -- `Download on Start` enable to downloaded the LLM models the first time the game starts. Alternatively the LLM models wil be copied directly in the build --
Context Size size of the prompt context (0 = context size of the model) This is the number of tokens the model can take as input when generating responses. Higher values use more RAM or VRAM (if using GPU).
- --
Advanced options - - - `Download lora` click to download a LoRA model in .gguf format - - `Load lora` click to load a LoRA model in .gguf format - - `Batch Size` batch size for prompt processing (default: 512) - - `Model` the path of the model being used (relative to the Assets/StreamingAssets folder) - - `Chat Template` the chat template being used for the LLM - - `Lora` the path of the LoRAs being used (relative to the Assets/StreamingAssets folder) - - `Lora Weights` the weights of the LoRAs being used - - `Flash Attention` click to use flash attention in the model (if `Use extras` is enabled) - -
- -### LLMCharacter Settings - -- `Show/Hide Advanced Options` Toggle to show/hide advanced options from below -- `Log Level` select how verbose the log messages are -- `Use extras` select to install and allow the use of extra features (flash attention and IQ quants) - -#### 💻 Setup Settings -
- -
- -- `Remote` whether the LLM used is remote or local -- `LLM` the LLM GameObject (if `Remote` is not set) -- `Hort` ip of the LLM server (if `Remote` is set) -- `Port` port of the LLM server (if `Remote` is set) -- `Num Retries` number of HTTP request retries from the LLM server (if `Remote` is set) -- `API key` API key of the LLM server (if `Remote` is set) --
Save save filename or relative path If set, the chat history and LLM state (if save cache is enabled) is automatically saved to file specified.
The chat history is saved with a json suffix and the LLM state with a cache suffix.
Both files are saved in the [persistentDataPath folder of Unity](https://docs.unity3d.com/ScriptReference/Application-persistentDataPath.html).
-- `Save Cache` select to save the LLM state along with the chat history. The LLM state is typically around 100MB+. -- `Debug Prompt` select to log the constructed prompts in the Unity Editor - -#### 🗨️ Chat Settings -- `Player Name` the name of the player -- `AI Name` the name of the AI -- `Prompt` description of the AI role - -#### 🤗 Model Settings -- `Stream` select to receive the reply from the model as it is produced (recommended!).
-If it is not selected, the full reply from the model is received in one go --
Num Predict maximum number of tokens to predict (default: 256, -1 = infinity, -2 = until context filled)This is the maximum amount of tokens the model will maximum predict. When N tokens are reached the model will stop generating. This means words / sentences might not get finished if this is too low.
- --
Advanced options - - - `Load grammar` click to load a grammar in .gbnf format - - `Grammar` the path of the grammar being used (relative to the Assets/StreamingAssets folder) - -
Cache Prompt save the ongoing prompt from the chat (default: true) Saves the prompt while it is being created by the chat to avoid reprocessing the entire prompt every time
- - `Slot` slot of the server to use for computation. Value can be set from 0 to `Parallel Prompts`-1 (default: -1 = new slot for each character) - - `Seed` seed for reproducibility. For random results every time use -1 - -
Temperature LLM temperature, lower values give more deterministic answers (default: 0.2)The temperature setting adjusts how random the generated responses are. Turning it up makes the generated choices more varied and unpredictable. Turning it down makes the generated responses more predictable and focused on the most likely options.
- -
Top K top-k sampling (default: 40, 0 = disabled)The top k value controls the top k most probable tokens at each step of generation. This value can help fine tune the output and make this adhere to specific patterns or constraints.
- -
Top P top-p sampling (default: 0.9, 1.0 = disabled)The top p value controls the cumulative probability of generated tokens. The model will generate tokens until this theshold (p) is reached. By lowering this value you can shorten output & encourage / discourage more diverse outputs.
- -
Min P minimum probability for a token to be used (default: 0.05) The probability is defined relative to the probability of the most likely token.
- -
Repeat Penalty control the repetition of token sequences in the generated text (default: 1.1)The penalty is applied to repeated tokens.
- -
Presence Penalty repeated token presence penalty (default: 0.0, 0.0 = disabled) Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
- -
Frequency Penalty repeated token frequency penalty (default: 0.0, 0.0 = disabled) Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
- - `Typical P`: enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled). - - `Repeat Last N`: last N tokens to consider for penalizing repetition (default: 64, 0 = disabled, -1 = ctx-size). - - `Penalize Nl`: penalize newline tokens when applying the repeat penalty (default: true). - - `Penalty Prompt`: prompt for the purpose of the penalty evaluation. Can be either `null`, a string or an array of numbers representing tokens (default: `null` = use original `prompt`). - - `Mirostat`: enable Mirostat sampling, controlling perplexity during text generation (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0). - - `Mirostat Tau`: set the Mirostat target entropy, parameter tau (default: 5.0). - - `Mirostat Eta`: set the Mirostat learning rate, parameter eta (default: 0.1). - - `N Probs`: if greater than 0, the response also contains the probabilities of top N tokens for each generated token (default: 0) - - `Ignore Eos`: enable to ignore end of stream tokens and continue generating (default: false). - -
+Details on the different parameters are provided as Unity Tooltips. Previous documentation can be found [here](Options.md) (deprecated). ## License The license of LLM for Unity is MIT ([LICENSE.md](LICENSE.md)) and uses third-party software with MIT and Apache licenses. From b304a419428ce50861c1d3550bd4fcd00fadb38f Mon Sep 17 00:00:00 2001 From: amakropoulos Date: Tue, 21 Jan 2025 14:38:47 +0000 Subject: [PATCH 12/20] update tooltips --- Runtime/LLMCaller.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/Runtime/LLMCaller.cs b/Runtime/LLMCaller.cs index b222b8d3..dd75c136 100644 --- a/Runtime/LLMCaller.cs +++ b/Runtime/LLMCaller.cs @@ -22,7 +22,6 @@ public class LLMCaller : MonoBehaviour [Tooltip("use remote LLM server")] [LocalRemote] public bool remote = false; /// LLM GameObject to use - [Tooltip("LLM GameObject to use")] [Local, SerializeField] protected LLM _llm; public LLM llm { From a4c435c9fce5584aa5c20974cebe3de39fc2c796 Mon Sep 17 00:00:00 2001 From: amakropoulos Date: Tue, 21 Jan 2025 14:39:08 +0000 Subject: [PATCH 13/20] update VERSION --- .github/doxygen/Doxyfile | 2 +- Runtime/LLMUnitySetup.cs | 2 +- VERSION | 2 +- package.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/doxygen/Doxyfile b/.github/doxygen/Doxyfile index d10a6532..f127b7ce 100644 --- a/.github/doxygen/Doxyfile +++ b/.github/doxygen/Doxyfile @@ -48,7 +48,7 @@ PROJECT_NAME = "LLM for Unity" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = v2.4.1 +PROJECT_NUMBER = v2.4.2 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/Runtime/LLMUnitySetup.cs b/Runtime/LLMUnitySetup.cs index d733c411..5db6540a 100644 --- a/Runtime/LLMUnitySetup.cs +++ b/Runtime/LLMUnitySetup.cs @@ -101,7 +101,7 @@ public class LLMUnitySetup { // DON'T CHANGE! the version is autocompleted with a GitHub action /// LLM for Unity version - public static string Version = "v2.4.1"; + public static string Version = "v2.4.2"; /// LlamaLib version public static string LlamaLibVersion = "v1.2.1"; /// LlamaLib release url diff --git a/VERSION b/VERSION index a3721209..3dfbe336 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v2.4.1 +v2.4.2 diff --git a/package.json b/package.json index 396eddc3..4c9ee6cd 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "ai.undream.llm", - "version": "2.4.1", + "version": "2.4.2", "displayName": "LLM for Unity", "description": "LLM for Unity allows to run and distribute Large Language Models (LLMs) in the Unity engine.", "unity": "2022.3", From e7c4fdccc000ecef85c2fd22a703414955fc3875 Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Tue, 21 Jan 2025 16:42:47 +0200 Subject: [PATCH 14/20] allow to override tooltip removal --- .github/update_tooltips.py | 5 ++--- Runtime/LLMCaller.cs | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/update_tooltips.py b/.github/update_tooltips.py index c05f07e9..1b9a119e 100644 --- a/.github/update_tooltips.py +++ b/.github/update_tooltips.py @@ -64,9 +64,8 @@ def add_tooltips_to_unity_file(file_path, allowed_classes): in_summary = False if 'Tooltip' in stripped_line: - # in_summary = False - # summary_text = '' - continue + if ('Tooltip: ignore' not in stripped_line): + continue include_terms = ['public', ';'] exclude_terms = ['{', 'static', 'abstract'] diff --git a/Runtime/LLMCaller.cs b/Runtime/LLMCaller.cs index dd75c136..4a56277a 100644 --- a/Runtime/LLMCaller.cs +++ b/Runtime/LLMCaller.cs @@ -22,6 +22,7 @@ public class LLMCaller : MonoBehaviour [Tooltip("use remote LLM server")] [LocalRemote] public bool remote = false; /// LLM GameObject to use + [Tooltip("LLM GameObject to use")] // Tooltip: ignore [Local, SerializeField] protected LLM _llm; public LLM llm { From 473c91b473e8fc95bd65371f280a314eaf867a3b Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Tue, 21 Jan 2025 16:50:11 +0200 Subject: [PATCH 15/20] note on Allow Downloads Over HTTP --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 3b61edd0..0c566eeb 100644 --- a/README.md +++ b/README.md @@ -346,6 +346,8 @@ Alternatively you can use a server binary for easier deployment: - Find the architecture you are interested in from the folder above e.g. for Windows and CUDA use the `windows-cuda-cu12.2.0`.
You can also check the architecture that works for your system from the Debug messages (starting with "Using architecture"). - From command line change directory to the architecture folder selected and start the server by running the command copied from above. +In both cases you'll need to enable 'Allow Downloads Over HTTP' in the project settings. + **Create the characters**
Create a second project with the game characters using the `LLMCharacter` script as described above. Enable the `Remote` option and configure the host with the IP address (starting with "http://") and port of the server. From 7c44b296d658ade982b681a75f136af885836c5d Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Tue, 21 Jan 2025 17:09:34 +0200 Subject: [PATCH 16/20] automatically update llamalib url --- .github/workflows/version.yml | 6 ++++++ README.md | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/version.yml b/.github/workflows/version.yml index 23071ab4..68403bdf 100644 --- a/.github/workflows/version.yml +++ b/.github/workflows/version.yml @@ -31,6 +31,12 @@ jobs: git add package.json git add Runtime/LLMUnitySetup.cs git add .github/doxygen/Doxyfile + + llamalibVersion=`cat Runtime/LLMUnitySetup.cs | grep LlamaLibVersion | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+'` + sed -Ei "s:https\://github.com/undreamai/LlamaLib/releases/download/v[0-9]+\.[0-9]+\.[0-9]+/undreamai-v[0-9]+\.[0-9]+\.[0-9]+:https\://github.com/undreamai/LlamaLib/releases/download/$llamalibVersion/undreamai-$llamalibVersion:g" README.md + + git add README.md + git commit -m "update VERSION" - name: Push changes diff --git a/README.md b/README.md index 0c566eeb..c9bc7264 100644 --- a/README.md +++ b/README.md @@ -342,7 +342,7 @@ To create the server: Alternatively you can use a server binary for easier deployment: - Run the above scene from the Editor and copy the command from the Debug messages (starting with "Server command:") -- Download the [server binaries](https://github.com/undreamai/LlamaLib/releases/download/v1.1.12/undreamai-v1.1.12-server.zip) and [DLLs](https://github.com/undreamai/LlamaLib/releases/download/v1.1.12/undreamai-v1.1.12-llamacpp-full.zip) and extract them into the same folder +- Download the [server binaries](https://github.com/undreamai/LlamaLib/releases/download/v1.2.1/undreamai-v1.2.1-server.zip) and [DLLs](https://github.com/undreamai/LlamaLib/releases/download/v1.2.1/undreamai-v1.2.1-llamacpp-full.zip) and extract them into the same folder - Find the architecture you are interested in from the folder above e.g. for Windows and CUDA use the `windows-cuda-cu12.2.0`.
You can also check the architecture that works for your system from the Debug messages (starting with "Using architecture"). - From command line change directory to the architecture folder selected and start the server by running the command copied from above. From 519e70eeb02c86f04bb44213394ea34b1e825a7e Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Tue, 21 Jan 2025 17:33:17 +0200 Subject: [PATCH 17/20] add link to MaiMai AI Agent System project --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c9bc7264..740bb57c 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ LLM for Unity is built on top of the awesome [llama.cpp](https://github.com/gger At a glance  •  How to help  •  -Games using LLM for Unity  •  +Games / Projects using LLM for Unity  •  Setup  •  How to use  •  RAG  •  @@ -53,7 +53,7 @@ LLM for Unity is built on top of the awesome [llama.cpp](https://github.com/gger - [![](https://img.shields.io/static/v1?label=Sponsor&message=%E2%9D%A4&logo=GitHub&color=%23fe8e86)](https://github.com/sponsors/amakropoulos) this work to allow even cooler features! -## Games using LLM for Unity +## Games / Projects using LLM for Unity - [Verbal Verdict](https://store.steampowered.com/app/2778780/Verbal_Verdict/) - [I, Chatbot: AISYLUM](https://store.epicgames.com/de/p/i-chatbot-aisylum-83b2b5) - [Nameless Souls of the Void](https://unicorninteractive.itch.io/nameless-souls-of-the-void) @@ -61,6 +61,7 @@ LLM for Unity is built on top of the awesome [llama.cpp](https://github.com/gger - [Finicky Food Delivery AI](https://helixngc7293.itch.io/finicky-food-delivery-ai) - [AI Emotional Girlfriend](https://whynames.itch.io/aiemotionalgirlfriend) - [Case Closed](https://store.steampowered.com/app/2532160/Case_Closed) +- [MaiMai AI Agent System](https://github.com/IhateCreatingUserNames2/MaiMai) Contact us to add your project! From 48d667f9b2d11f9d912bd034af76470660412bf2 Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Tue, 21 Jan 2025 18:37:14 +0200 Subject: [PATCH 18/20] persist debug mode and use of extras to the build --- Runtime/LLMManager.cs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Runtime/LLMManager.cs b/Runtime/LLMManager.cs index 29f0c132..72e58a06 100644 --- a/Runtime/LLMManager.cs +++ b/Runtime/LLMManager.cs @@ -97,6 +97,8 @@ public class LLMManagerStore { public bool downloadOnStart; public List modelEntries; + public int debugMode; + public bool fullLlamaLib; } /// \endcond @@ -324,6 +326,8 @@ public static void LoadFromDisk() LLMManagerStore store = JsonUtility.FromJson(File.ReadAllText(LLMUnitySetup.LLMManagerPath)); downloadOnStart = store.downloadOnStart; modelEntries = store.modelEntries; + LLMUnitySetup.DebugMode = (LLMUnitySetup.DebugModeType)store.debugMode; + LLMUnitySetup.FullLlamaLib = store.fullLlamaLib; } #if UNITY_EDITOR @@ -610,7 +614,11 @@ public static void SetLoraProgress(float progress) ///
public static void Save() { - string json = JsonUtility.ToJson(new LLMManagerStore { modelEntries = modelEntries, downloadOnStart = downloadOnStart }, true); + string json = JsonUtility.ToJson(new LLMManagerStore + { + modelEntries = modelEntries, + downloadOnStart = downloadOnStart, + }, true); PlayerPrefs.SetString(LLMManagerPref, json); PlayerPrefs.Save(); } @@ -638,7 +646,13 @@ public static void SaveToDisk() if (!modelEntry.includeInBuild) continue; modelEntriesBuild.Add(modelEntry.OnlyRequiredFields()); } - string json = JsonUtility.ToJson(new LLMManagerStore { modelEntries = modelEntriesBuild, downloadOnStart = downloadOnStart }, true); + string json = JsonUtility.ToJson(new LLMManagerStore + { + modelEntries = modelEntriesBuild, + downloadOnStart = downloadOnStart, + debugMode = (int)LLMUnitySetup.DebugMode, + fullLlamaLib = LLMUnitySetup.FullLlamaLib + }, true); File.WriteAllText(LLMUnitySetup.LLMManagerPath, json); } From a4460458ac70cf5091409be861b1b7b79af9368e Mon Sep 17 00:00:00 2001 From: amakropoulos Date: Tue, 21 Jan 2025 16:38:14 +0000 Subject: [PATCH 19/20] update changelogs --- CHANGELOG.md | 2 ++ CHANGELOG.release.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7ee85f2..05e57c03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,12 @@ #### 🚀 Features - Add warm-up function for provided prompt (PR: #301) +- Add documentation in Unity tooltips (PR: #302) #### 🐛 Fixes - Fix code signing on iOS (PR: #298) +- Persist debug mode and use of extras to the build (PR: #304) ## v2.4.1 diff --git a/CHANGELOG.release.md b/CHANGELOG.release.md index 5026766b..c814a37e 100644 --- a/CHANGELOG.release.md +++ b/CHANGELOG.release.md @@ -1,8 +1,10 @@ ### 🚀 Features - Add warm-up function for provided prompt (PR: #301) +- Add documentation in Unity tooltips (PR: #302) ### 🐛 Fixes - Fix code signing on iOS (PR: #298) +- Persist debug mode and use of extras to the build (PR: #304) From 390a1b2cd9246806469224a459ab97809fa28dcc Mon Sep 17 00:00:00 2001 From: Antonis Makropoulos Date: Wed, 22 Jan 2025 12:45:33 +0200 Subject: [PATCH 20/20] add caller graph --- .github/doxygen/Doxyfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/doxygen/Doxyfile b/.github/doxygen/Doxyfile index f127b7ce..e33a45bb 100644 --- a/.github/doxygen/Doxyfile +++ b/.github/doxygen/Doxyfile @@ -2689,7 +2689,7 @@ CALL_GRAPH = NO # The default value is: NO. # This tag requires that the tag HAVE_DOT is set to YES. -CALLER_GRAPH = NO +CALLER_GRAPH = YES # If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical # hierarchy of all classes instead of a textual one.