From 467428172ea2d0de0cbbf39b0e449b913d03252c Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:36:56 +0100 Subject: [PATCH] .Net: Bump ONNX to 0.5.2 (#9644) ### Motivation and Context - The Latest 0.5.0 package also requires the caller to handle the resources with the `OgaHandler` instance, when a service is instantiated this resource needs to be also present and exposed together with the service. Otherwise a message will be sent to the console and the application will crash before finishing. - Resolves #9628 --------- Co-authored-by: westey <164392973+westey-m@users.noreply.github.com> --- dotnet/Directory.Packages.props | 8 +-- .../ChatCompletion/Onnx_ChatCompletion.cs | 18 ++++- .../Onnx_ChatCompletionStreaming.cs | 67 +++++++++++-------- .../Demos/OnnxSimpleRAG/OnnxSimpleRAG.csproj | 2 +- dotnet/samples/Demos/OnnxSimpleRAG/Program.cs | 32 +++++++-- .../Connectors.Onnx/Connectors.Onnx.csproj | 2 +- .../OnnxRuntimeGenAIChatCompletionService.cs | 1 - ...OnnxRuntimeGenAIPromptExecutionSettings.cs | 1 + 8 files changed, 88 insertions(+), 43 deletions(-) diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props index 18d29da0e49d..51ad9b7923f1 100644 --- a/dotnet/Directory.Packages.props +++ b/dotnet/Directory.Packages.props @@ -43,7 +43,7 @@ - + @@ -161,8 +161,8 @@ runtime; build; native; contentfiles; analyzers; buildtransitive - - - + + + \ No newline at end of file diff --git a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs index 563ed3475b5e..823e711d1942 100644 --- a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs +++ b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs @@ -34,7 +34,7 @@ public async Task ServicePromptAsync() Console.WriteLine("======== Onnx - Chat Completion ========"); - var chatService = new OnnxRuntimeGenAIChatCompletionService( + using var chatService = new OnnxRuntimeGenAIChatCompletionService( modelId: TestConfiguration.Onnx.ModelId, modelPath: TestConfiguration.Onnx.ModelPath); @@ -105,5 +105,21 @@ public async Task ChatPromptAsync() reply = await kernel.InvokePromptAsync(chatPrompt.ToString()); Console.WriteLine(reply); + + DisposeServices(kernel); + } + + /// + /// To avoid any potential memory leak all disposable services created by the kernel are disposed. + /// + /// Target kernel + private static void DisposeServices(Kernel kernel) + { + foreach (var target in kernel + .GetAllServices() + .OfType()) + { + target.Dispose(); + } } } diff --git a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs index d6ad1f05e7f2..2c1bd1369677 100644 --- a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs +++ b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs @@ -29,18 +29,36 @@ public class Onnx_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest(o /// /// [Fact] - public Task StreamChatAsync() + public async Task StreamChatAsync() { Assert.NotNull(TestConfiguration.Onnx.ModelId); // dotnet user-secrets set "Onnx:ModelId" "" Assert.NotNull(TestConfiguration.Onnx.ModelPath); // dotnet user-secrets set "Onnx:ModelPath" "" Console.WriteLine("======== Onnx - Chat Completion Streaming ========"); - var chatService = new OnnxRuntimeGenAIChatCompletionService( + using var chatService = new OnnxRuntimeGenAIChatCompletionService( modelId: TestConfiguration.Onnx.ModelId, modelPath: TestConfiguration.Onnx.ModelPath); - return this.StartStreamingChatAsync(chatService); + Console.WriteLine("Chat content:"); + Console.WriteLine("------------------------"); + + var chatHistory = new ChatHistory("You are a librarian, expert about books"); + OutputLastMessage(chatHistory); + + // First user message + chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); + OutputLastMessage(chatHistory); + + // First assistant message + await StreamMessageOutputAsync(chatService, chatHistory, AuthorRole.Assistant); + + // Second user message + chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion?"); + OutputLastMessage(chatHistory); + + // Second assistant message + await StreamMessageOutputAsync(chatService, chatHistory, AuthorRole.Assistant); } /// @@ -86,6 +104,8 @@ public async Task StreamChatPromptAsync() reply = await StreamMessageOutputFromKernelAsync(kernel, chatPrompt.ToString()); Console.WriteLine(reply); + + DisposeServices(kernel); } /// @@ -115,7 +135,7 @@ public async Task StreamTextFromChatAsync() Console.WriteLine("======== Stream Text from Chat Content ========"); // Create chat completion service - var chatService = new OnnxRuntimeGenAIChatCompletionService( + using var chatService = new OnnxRuntimeGenAIChatCompletionService( modelId: TestConfiguration.Onnx.ModelId, modelPath: TestConfiguration.Onnx.ModelPath); @@ -135,30 +155,7 @@ public async Task StreamTextFromChatAsync() } } - private async Task StartStreamingChatAsync(IChatCompletionService chatCompletionService) - { - Console.WriteLine("Chat content:"); - Console.WriteLine("------------------------"); - - var chatHistory = new ChatHistory("You are a librarian, expert about books"); - OutputLastMessage(chatHistory); - - // First user message - chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); - OutputLastMessage(chatHistory); - - // First assistant message - await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant); - - // Second user message - chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion?"); - OutputLastMessage(chatHistory); - - // Second assistant message - await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant); - } - - private async Task StreamMessageOutputAsync(IChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole) + private async Task StreamMessageOutputAsync(OnnxRuntimeGenAIChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole) { bool roleWritten = false; string fullMessage = string.Empty; @@ -205,4 +202,18 @@ private async Task StreamMessageOutputFromKernelAsync(Kernel kernel, str Console.WriteLine("\n------------------------"); return fullMessage; } + + /// + /// To avoid any potential memory leak all disposable services created by the kernel are disposed. + /// + /// Target kernel + private static void DisposeServices(Kernel kernel) + { + foreach (var target in kernel + .GetAllServices() + .OfType()) + { + target.Dispose(); + } + } } diff --git a/dotnet/samples/Demos/OnnxSimpleRAG/OnnxSimpleRAG.csproj b/dotnet/samples/Demos/OnnxSimpleRAG/OnnxSimpleRAG.csproj index 8b22d740edb9..bbb5f38ba81d 100644 --- a/dotnet/samples/Demos/OnnxSimpleRAG/OnnxSimpleRAG.csproj +++ b/dotnet/samples/Demos/OnnxSimpleRAG/OnnxSimpleRAG.csproj @@ -3,7 +3,7 @@ Exe net8.0 - $(NoWarn);CA2007;CS0612;VSTHRD111 + $(NoWarn);CA2007;CS0612;VSTHRD111;SKEXP0070;SKEXP0050;SKEXP0001;SKEXP0020 5ee045b0-aea3-4f08-8d31-32d1a6f8fed0 diff --git a/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs b/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs index 9727e600145f..4ab9cd0ef9b5 100644 --- a/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs +++ b/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs @@ -1,17 +1,15 @@ // Copyright (c) Microsoft. All rights reserved. -#pragma warning disable SKEXP0070 -#pragma warning disable SKEXP0050 -#pragma warning disable SKEXP0001 -#pragma warning disable SKEXP0020 - using System; using System.IO; +using System.Linq; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.VectorData; +using Microsoft.ML.OnnxRuntimeGenAI; using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.Connectors.InMemory; +using Microsoft.SemanticKernel.Connectors.Onnx; using Microsoft.SemanticKernel.Data; using Microsoft.SemanticKernel.Embeddings; using Microsoft.SemanticKernel.PromptTemplates.Handlebars; @@ -29,6 +27,10 @@ // Path to the vocab file your ONNX BGE-MICRO-V2 model var embeddingVocabPath = config["Onnx:EmbeddingVocabPath"]!; +// If using Onnx GenAI 0.5.0 or later, the OgaHandle class must be used to track +// resources used by the Onnx services, before using any of the Onnx services. +using var ogaHandle = new OgaHandle(); + // Load the services var builder = Kernel.CreateBuilder() .AddOnnxRuntimeGenAIChatCompletion(chatModelId, chatModelPath) @@ -38,7 +40,7 @@ var kernel = builder.Build(); // Get the instances of the services -var chatService = kernel.GetRequiredService(); +using var chatService = kernel.GetRequiredService() as OnnxRuntimeGenAIChatCompletionService; var embeddingService = kernel.GetRequiredService(); // Create a vector store and a collection to store information @@ -71,9 +73,12 @@ await collection.UpsertAsync(new() Console.Write("User > "); var question = Console.ReadLine()!; + // Clean resources and exit the demo if the user input is null or empty if (question is null || string.IsNullOrWhiteSpace(question)) { - // Exit the demo if the user input is null or empty + // To avoid any potential memory leak all disposable + // services created by the kernel are disposed + DisposeServices(kernel); return; } @@ -105,6 +110,19 @@ await collection.UpsertAsync(new() Console.WriteLine(); } +static void DisposeServices(Kernel kernel) +{ + foreach (var target in kernel + .GetAllServices() + .OfType()) + { + target.Dispose(); + } +} + +/// +/// Information item to represent the embedding data stored in the memory +/// internal sealed class InformationItem { [VectorStoreRecordKey] diff --git a/dotnet/src/Connectors/Connectors.Onnx/Connectors.Onnx.csproj b/dotnet/src/Connectors/Connectors.Onnx/Connectors.Onnx.csproj index df49c6da0bfb..b5ff2314face 100644 --- a/dotnet/src/Connectors/Connectors.Onnx/Connectors.Onnx.csproj +++ b/dotnet/src/Connectors/Connectors.Onnx/Connectors.Onnx.csproj @@ -5,8 +5,8 @@ Microsoft.SemanticKernel.Connectors.Onnx $(AssemblyName) net8.0;netstandard2.0 - alpha true + alpha diff --git a/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs b/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs index 8a6210253729..7419b07799d0 100644 --- a/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs +++ b/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs @@ -25,7 +25,6 @@ public sealed class OnnxRuntimeGenAIChatCompletionService : IChatCompletionServi private readonly JsonSerializerOptions? _jsonSerializerOptions; private Model? _model; private Tokenizer? _tokenizer; - private Dictionary AttributesInternal { get; } = new(); /// diff --git a/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIPromptExecutionSettings.cs b/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIPromptExecutionSettings.cs index 8a7c0ccd3cae..e8c7f058fd24 100644 --- a/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIPromptExecutionSettings.cs +++ b/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIPromptExecutionSettings.cs @@ -11,6 +11,7 @@ namespace Microsoft.SemanticKernel.Connectors.Onnx; /// /// OnnxRuntimeGenAI Execution Settings. /// +[JsonNumberHandling(JsonNumberHandling.AllowReadingFromString)] public sealed class OnnxRuntimeGenAIPromptExecutionSettings : PromptExecutionSettings { ///