- 
                Notifications
    You must be signed in to change notification settings 
- Fork 474
Open
Labels
Description
using LLama;
using LLama.Batched;
using LLama.Common;
using LLama.Native;
using LLama.Sampling;
using Spectre.Console;
using System.Text;
var ModelPath = Path.Combine(Environment.CurrentDirectory, "llava-v1.6-mistral-7b-Q5_K_S.gguf"); // change it to your own model path
var ModelPath4 = Path.Combine(Environment.CurrentDirectory, "mmproj.gguf");
string multiModalProj = ModelPath4;//@"<Your multi-modal proj file path>"
string modelPath = ModelPath;//@"<Your LLaVA model file path>"
string modelImage = "C:\\Users\\Administrator\\Favorites\\Documents";//@"<Your image path>"
var parameters = new ModelParams(modelPath);
var model = await LLamaWeights.LoadFromFileAsync(parameters);
var llava = await LLavaWeights.LoadFromFileAsync(ModelPath);
/// <summary>
/// Demonstrates using LLava (image embeddings) with the batched executor.
/// </summary>
/// <summary>
/// How many tokens of response to generate
/// </summary>
int TokenCount = 64;
Run();
    async Task Run()
    {
        // Load model weights
        // Decide on the prompt
        var prompt = model.Tokenize(AnsiConsole.Ask("Prompt (or ENTER for default):", "\nUSER: Provide a full description of the image.\nASSISTANT: "), true, false, Encoding.UTF8);
        
        // Get image and show it
        var image = modelImage;
        //AnsiConsole.Write(new CanvasImage(image));
        
        // Create an executor with one conversation
        using var executor = new BatchedExecutor(model, parameters);
        using var conversation = executor.Create();
        // Embed the image
        SafeLlavaImageEmbedHandle embedding = null!;
        await AnsiConsole
             .Status()
             .StartAsync("[yellow]Embedding image with CLIP[/]", async _ =>
              {
                  // ReSharper disable once AccessToDisposedClosure
                  embedding = llava.CreateImageEmbeddings(await File.ReadAllBytesAsync(image));
              });
        
        // Pass in the image and run inference until the entire image has been processed
        await AnsiConsole
             .Status()
             .StartAsync("[yellow]Processing image embedding with language model[/]", async _ =>
              {
                  conversation.Prompt(embedding);
                  while (executor.BatchedTokenCount > 0)
                      await executor.Infer();
              });
        // Prompt with the text prompt
        conversation.Prompt(prompt);
        
        // Run inference loop
        var decoder = new StreamingTokenDecoder(executor.Context);
        var sampler = new DefaultSamplingPipeline();
        await AnsiConsole
             .Progress()
             .StartAsync(async ctx =>
              {
                  var task = ctx.AddTask("Generating Response");
                  task.MaxValue = TokenCount;
                  // Run a normal inference loop
                  for (var i = 0; i < TokenCount; i++)
                  {
                      task.Increment(1);
                      await executor.Infer();
                      
                      var token = sampler.Sample(executor.Context.NativeHandle, conversation.GetSampleIndex());
                      if (token.IsEndOfGeneration(executor.Context.Vocab))
                          break;
                      
                      decoder.Add(token);
                      conversation.Prompt(token);
                  }
              });
        // Print final result
        var str = decoder.Read();
        AnsiConsole.MarkupInterpolated($"[green]{str}[/]");
    }
cpu