From d9fd8ff5ba12c8047ca582fcd17f85bbe1978357 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20M=C3=B6ller?= <nkm.moeller@gmail.com>
Date: Fri, 29 Nov 2024 18:01:52 +0100
Subject: [PATCH] Python: Fix Onnx Connector Memory Problem with Onnx (#9716)

Onnx currently faces memory issues when dividing functionality among
multiple methods

### Motivation and Context

I was experiencing weird non reproducible memory issues with connector
when using phi-3 vision, after tracing the memory it turned out there
are some issues when Parameters & Generation are not in the same
function. I am already in contact with the PG to adress the issue also
in onnx.

There seems to be a memory problem with pybind, because the parameters
show a non deterministc behavior, but they should determistic.

To fix the current problem i've decided to merge the Parameter Method
and the Generation Method.

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:

Co-authored-by: Tao Chen <taochen@microsoft.com>
---
 .../services/onnx_gen_ai_completion_base.py   | 30 ++++++++-----------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py
index c7e2c47d12d4..40ce552ed88b 100644
--- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py
+++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py
@@ -53,23 +53,6 @@ def __init__(self, ai_model_path: str, **kwargs) -> None:
             **kwargs,
         )
 
-    def _prepare_input_params(
-        self, prompt: str, settings: OnnxGenAIPromptExecutionSettings, image: ImageContent | None = None
-    ) -> Any:
-        params = OnnxRuntimeGenAi.GeneratorParams(self.model)
-        params.set_search_options(**settings.prepare_settings_dict())
-        if not self.enable_multi_modality:
-            input_tokens = self.tokenizer.encode(prompt)
-            params.input_ids = input_tokens
-        else:
-            if image is not None:
-                # With the use of Pybind there is currently no way to load images from bytes
-                # We can only open images from a file path currently
-                image = OnnxRuntimeGenAi.Images.open(str(image.uri))
-            input_tokens = self.tokenizer(prompt, images=image)
-            params.set_inputs(input_tokens)
-        return params
-
     async def _generate_next_token_async(
         self,
         prompt: str,
@@ -77,7 +60,18 @@ async def _generate_next_token_async(
         image: ImageContent | None = None,
     ) -> AsyncGenerator[list[str], Any]:
         try:
-            params = self._prepare_input_params(prompt, settings, image)
+            params = OnnxRuntimeGenAi.GeneratorParams(self.model)
+            params.set_search_options(**settings.prepare_settings_dict())
+            if not self.enable_multi_modality:
+                input_tokens = self.tokenizer.encode(prompt)
+                params.input_ids = input_tokens
+            else:
+                if image is not None:
+                    # With the use of Pybind there is currently no way to load images from bytes
+                    # We can only open images from a file path currently
+                    image = OnnxRuntimeGenAi.Images.open(str(image.uri))
+                input_tokens = self.tokenizer(prompt, images=image)
+                params.set_inputs(input_tokens)
             generator = OnnxRuntimeGenAi.Generator(self.model, params)
 
             while not generator.is_done():