From d9fd8ff5ba12c8047ca582fcd17f85bbe1978357 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20M=C3=B6ller?= Date: Fri, 29 Nov 2024 18:01:52 +0100 Subject: [PATCH] Python: Fix Onnx Connector Memory Problem with Onnx (#9716) Onnx currently faces memory issues when dividing functionality among multiple methods ### Motivation and Context I was experiencing weird non reproducible memory issues with connector when using phi-3 vision, after tracing the memory it turned out there are some issues when Parameters & Generation are not in the same function. I am already in contact with the PG to adress the issue also in onnx. There seems to be a memory problem with pybind, because the parameters show a non deterministc behavior, but they should determistic. To fix the current problem i've decided to merge the Parameter Method and the Generation Method. ### Description ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: Co-authored-by: Tao Chen --- .../services/onnx_gen_ai_completion_base.py | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py index c7e2c47d12d4..40ce552ed88b 100644 --- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py +++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py @@ -53,23 +53,6 @@ def __init__(self, ai_model_path: str, **kwargs) -> None: **kwargs, ) - def _prepare_input_params( - self, prompt: str, settings: OnnxGenAIPromptExecutionSettings, image: ImageContent | None = None - ) -> Any: - params = OnnxRuntimeGenAi.GeneratorParams(self.model) - params.set_search_options(**settings.prepare_settings_dict()) - if not self.enable_multi_modality: - input_tokens = self.tokenizer.encode(prompt) - params.input_ids = input_tokens - else: - if image is not None: - # With the use of Pybind there is currently no way to load images from bytes - # We can only open images from a file path currently - image = OnnxRuntimeGenAi.Images.open(str(image.uri)) - input_tokens = self.tokenizer(prompt, images=image) - params.set_inputs(input_tokens) - return params - async def _generate_next_token_async( self, prompt: str, @@ -77,7 +60,18 @@ async def _generate_next_token_async( image: ImageContent | None = None, ) -> AsyncGenerator[list[str], Any]: try: - params = self._prepare_input_params(prompt, settings, image) + params = OnnxRuntimeGenAi.GeneratorParams(self.model) + params.set_search_options(**settings.prepare_settings_dict()) + if not self.enable_multi_modality: + input_tokens = self.tokenizer.encode(prompt) + params.input_ids = input_tokens + else: + if image is not None: + # With the use of Pybind there is currently no way to load images from bytes + # We can only open images from a file path currently + image = OnnxRuntimeGenAi.Images.open(str(image.uri)) + input_tokens = self.tokenizer(prompt, images=image) + params.set_inputs(input_tokens) generator = OnnxRuntimeGenAi.Generator(self.model, params) while not generator.is_done():