diff --git a/.github/workflows/python-build.yml b/.github/workflows/python-build.yml
index 1c20a5280720..95fdf159670a 100644
--- a/.github/workflows/python-build.yml
+++ b/.github/workflows/python-build.yml
@@ -16,14 +16,14 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+        uses: astral-sh/setup-uv@v4
         with:
-          version: "0.4.30"
+          version: "0.5.x"
           enable-cache: true
           cache-suffix: ${{ runner.os }}-${{ matrix.python-version }}
       - name: Check version
         run: |
-            echo "Building and uploading Python package version: ${{ github.event.release.tag_name }}"
+          echo "Building and uploading Python package version: ${{ github.event.release.tag_name }}"
       - name: Build the package
         run: cd python && make build
       - name: Release
diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
index 7100ad334308..92d20f937193 100644
--- a/.github/workflows/python-integration-tests.yml
+++ b/.github/workflows/python-integration-tests.yml
@@ -20,7 +20,6 @@ permissions:
 env:
   # Configure a constant location for the uv cache
   UV_CACHE_DIR: /tmp/.uv-cache
-  HNSWLIB_NO_NATIVE: 1
   Python_Integration_Tests: Python_Integration_Tests
   AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME }} # azure-text-embedding-ada-002
   AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
@@ -92,8 +91,8 @@ jobs:
         if: steps.filter.outputs.python != 'true'
         run: echo "NOT python file"
 
-  python-merge-gate:
-    name: Python Pre-Merge Integration Tests
+  python-merge-gate-ai-services:
+    name: Python Pre-Merge Integration Tests - AI Services (incl samples using those)
     needs: paths-filter
     if: github.event_name != 'pull_request' && github.event_name != 'schedule' && needs.paths-filter.outputs.pythonChanges == 'true'
     strategy:
@@ -107,21 +106,18 @@ jobs:
         working-directory: python
     runs-on: ${{ matrix.os }}
     environment: "integration"
+    env:
+      UV_PYTHON: ${{ matrix.python-version }}
+      COMPLETIONS_CONCEPT_SAMPLE: "true"
     steps:
       - uses: actions/checkout@v4
       - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+        uses: astral-sh/setup-uv@v4
         with:
-          version: "0.4.30"
+          version: "0.5.x"
           enable-cache: true
           cache-suffix: ${{ runner.os }}-${{ matrix.python-version }}
-      - name: Install dependencies with hnswlib native disabled
-        if: matrix.os == 'macos-latest' && matrix.python-version == '3.11'
-        run: |
-          export HNSWLIB_NO_NATIVE=1
-          uv sync --all-extras --dev
-      - name: Install dependencies with hnswlib native enabled
-        if: matrix.os != 'macos-latest' || matrix.python-version != '3.11'
+      - name: Install dependencies
         run: |
           uv sync --all-extras --dev
       - name: Install Ollama
@@ -152,18 +148,6 @@ jobs:
           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
           aws-region: ${{ vars.AWS_REGION }}
-      - name: Setup Redis Stack Server
-        if: matrix.os == 'ubuntu-latest'
-        run: docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest
-      - name: Setup Weaviate docker deployment
-        if: matrix.os == 'ubuntu-latest'
-        run: docker run -d -p 8080:8080 -p 50051:50051 cr.weaviate.io/semitechnologies/weaviate:1.26.6
-      - name: Start Azure Cosmos DB emulator
-        if: matrix.os == 'windows-latest'
-        run: |
-          Write-Host "Launching Cosmos DB Emulator"
-          Import-Module "$env:ProgramFiles\Azure Cosmos DB Emulator\PSModules\Microsoft.Azure.CosmosDB.Emulator"
-          Start-CosmosDbEmulator
       - name: Azure CLI Login
         if: github.event_name != 'pull_request'
         uses: azure/login@v2
@@ -171,55 +155,72 @@ jobs:
           client-id: ${{ secrets.AZURE_CLIENT_ID }}
           tenant-id: ${{ secrets.AZURE_TENANT_ID }}
           subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
-      - name: Run Integration Tests - Completions
-        id: run_tests_completions
-        timeout-minutes: 15
+      - name: Run Integration Tests
+        id: run_tests_ai_services
+        timeout-minutes: 25
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/completions -v --junitxml=pytest-completions.xml
-      - name: Run Integration Tests - Embeddings
-        id: run_tests_embeddings
-        timeout-minutes: 5
-        shell: bash
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/completions ./tests/integration/embeddings ./tests/samples ./tests/integration/cross_language
+
+  python-merge-gate-memory:
+    name: Python Pre-Merge Integration Tests - Memory (incl samples using those)
+    needs: paths-filter
+    if: github.event_name != 'pull_request' && github.event_name != 'schedule' && needs.paths-filter.outputs.pythonChanges == 'true'
+    strategy:
+      max-parallel: 1
+      fail-fast: false
+      matrix:
+        python-version: ["3.11"]
+        os: [ubuntu-latest]
+    defaults:
+      run:
+        working-directory: python
+    runs-on: ${{ matrix.os }}
+    environment: "integration"
+    env:
+      UV_PYTHON: ${{ matrix.python-version }}
+      MEMORY_CONCEPT_SAMPLE: "true"
+    # Service containers to run with for the memory connectors, this only works on Ubuntu
+    services:
+      # Label used to access the service container
+      redis:
+        # Docker Hub image
+        image: redis/redis-stack-server:latest
+        ports:
+          # Opens tcp port 6379 on the host and service container
+          - 6379:6379
+      weaviate:
+        image: cr.weaviate.io/semitechnologies/weaviate:1.26.6
+        ports:
+          - 8080:8080
+          - 50051:50051
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "0.5.x"
+          enable-cache: true
+          cache-suffix: ${{ runner.os }}-${{ matrix.python-version }}
+      - name: Install dependencies
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/embeddings -v --junitxml=pytest-embeddings.xml
-      - name: Run Integration Tests - Memory
+          uv sync --all-extras --dev
+      - name: Azure CLI Login
+        if: github.event_name != 'pull_request'
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      - name: Run Integration Tests
         id: run_tests_memory
         timeout-minutes: 10
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/memory -v --junitxml=pytest-memory.xml
-      - name: Run Integration Tests - Cross Language
-        id: run_tests_cross_language
-        timeout-minutes: 5
-        shell: bash
-        run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/cross_language -v --junitxml=pytest-cross.xml
-      - name: Run Integration Tests - Planning
-        id: run_tests_planning
-        timeout-minutes: 5
-        shell: bash
-        run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/planning -v --junitxml=pytest-planning.xml
-      - name: Run Integration Tests - Samples
-        id: run_tests_samples
-        timeout-minutes: 5
-        shell: bash
-        run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/samples -v --junitxml=pytest-samples.xml
-      - name: Surface failing tests
-        if: always()
-        uses: pmeier/pytest-results-action@main
-        with:
-          path: python/pytest-*.xml
-          summary: true
-          display-options: fEX
-          fail-on-empty: true
-          title: Test results
-      - name: Minimize uv cache
-        run: uv cache prune --ci
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/memory ./tests/samples
 
   python-integration-tests:
+    name: Python Integration Tests - Scheduled run
     needs: paths-filter
     if: (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && needs.paths-filter.outputs.pythonChanges == 'true'
     strategy:
@@ -233,21 +234,33 @@ jobs:
         working-directory: python
     runs-on: ${{ matrix.os }}
     environment: "integration"
+    env:
+      UV_PYTHON: ${{ matrix.python-version }}
+      MEMORY_CONCEPT_SAMPLE: "true"
+      COMPLETIONS_CONCEPT_SAMPLE: "true"
+    # Service containers to run with for the memory connectors, this only works on Ubuntu
+    services:
+      # Label used to access the service container
+      redis:
+        # Docker Hub image
+        image: redis/redis-stack-server:latest
+        ports:
+          # Opens tcp port 6379 on the host and service container
+          - 6379:6379
+      weaviate:
+        image: cr.weaviate.io/semitechnologies/weaviate:1.26.6
+        ports:
+          - 8080:8080
+          - 50051:50051
     steps:
       - uses: actions/checkout@v4
       - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+        uses: astral-sh/setup-uv@v4
         with:
-          version: "0.4.30"
+          version: "0.5.x"
           enable-cache: true
           cache-suffix: ${{ runner.os }}-${{ matrix.python-version }}
-      - name: Install dependencies with hnswlib native disabled
-        if: matrix.os == 'macos-latest' && matrix.python-version == '3.11'
-        run: |
-          export HNSWLIB_NO_NATIVE=1
-          uv sync --all-extras --dev
-      - name: Install dependencies with hnswlib native enabled
-        if: matrix.os != 'macos-latest' || matrix.python-version != '3.11'
+      - name: Install dependencies
         run: |
           uv sync --all-extras --dev
       - name: Install Ollama
@@ -278,12 +291,6 @@ jobs:
           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
           aws-region: ${{ vars.AWS_REGION }}
-      - name: Setup Redis Stack Server
-        if: matrix.os == 'ubuntu-latest'
-        run: docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest
-      - name: Setup Weaviate docker deployment
-        if: matrix.os == 'ubuntu-latest'
-        run: docker run -d -p 8080:8080 -p 50051:50051 cr.weaviate.io/semitechnologies/weaviate:1.26.6
       - name: Start Azure Cosmos DB emulator
         if: matrix.os == 'windows-latest'
         run: |
@@ -302,48 +309,37 @@ jobs:
         timeout-minutes: 10
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/completions -v --junitxml=pytest-completions.xml
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/completions
       - name: Run Integration Tests - Embeddings
         id: run_tests_embeddings
         timeout-minutes: 5
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/embeddings -v --junitxml=pytest-embeddings.xml
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/embeddings
       - name: Run Integration Tests - Memory
         id: run_tests_memory
         timeout-minutes: 5
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/memory -v --junitxml=pytest-memory.xml
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/memory
       - name: Run Integration Tests - Cross Language
         id: run_tests_cross_language
         timeout-minutes: 5
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/cross_language -v --junitxml=pytest-cross.xml
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/cross_language
       - name: Run Integration Tests - Planning
         id: run_tests_planning
         timeout-minutes: 5
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/planning -v --junitxml=pytest-planning.xml
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/planning
       - name: Run Integration Tests - Samples
         id: run_tests_samples
         timeout-minutes: 5
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/samples -v --junitxml=pytest-samples.xml
-      - name: Surface failing tests
-        if: always()
-        uses: pmeier/pytest-results-action@main
-        with:
-          path: python/pytest-*.xml
-          summary: true
-          display-options: fEX
-          fail-on-empty: true
-          title: Test results
-      - name: Minimize uv cache
-        run: uv cache prune --ci
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/samples
 
   # This final job is required to satisfy the merge queue. It must only run (or succeed) if no tests failed
   python-integration-tests-check:
@@ -352,7 +348,12 @@ jobs:
     strategy:
       max-parallel: 1
       fail-fast: false
-    needs: [python-merge-gate, python-integration-tests]
+    needs:
+      [
+        python-merge-gate-ai-services,
+        python-merge-gate-memory,
+        python-integration-tests,
+      ]
     steps:
       - name: Get Date
         shell: bash
@@ -399,7 +400,7 @@ jobs:
           dry_run: ${{ env.run_type != 'Daily' && env.run_type != 'Manual'}}
           job: ${{ toJson(job) }}
           steps: ${{ toJson(steps) }}
-          overwrite: "{title: ` ${{ env.run_type }}: ${{ env.date }} `, text: ` ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`}"
+          title: "{title: ` ${{ env.run_type }}: ${{ env.date }} `, text: ` ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`}"
 
       - name: Microsoft Teams Notification (Dry Run)
         uses: skitionek/notify-microsoft-teams@master
@@ -409,4 +410,4 @@ jobs:
           dry_run: ${{ env.run_type != 'Daily' && env.run_type != 'Manual'}}
           job: ${{ toJson(job) }}
           steps: ${{ toJson(steps) }}
-          overwrite: "{title: ` ${{ env.run_type }}: ${{ env.date }} `, text: ` ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`}"
+          title: "{title: ` ${{ env.run_type }}: ${{ env.date }} `, text: ` ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`}"
diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml
index f3c945ce263d..18444e097b6c 100644
--- a/.github/workflows/python-lint.yml
+++ b/.github/workflows/python-lint.yml
@@ -2,13 +2,13 @@ name: Python Code Quality Checks
 on:
   workflow_dispatch:
   pull_request:
-    branches: [ "main", "feature*" ]
+    branches: ["main", "feature*"]
     paths:
-      - 'python/**'
+      - "python/**"
 
 jobs:
   pre-commit:
-    if: '!cancelled()'
+    if: "!cancelled()"
     strategy:
       fail-fast: false
       matrix:
@@ -25,9 +25,9 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+        uses: astral-sh/setup-uv@v4
         with:
-          version: "0.4.30"
+          version: "0.5.x"
           enable-cache: true
           cache-suffix: ${{ runner.os }}-${{ matrix.python-version }}
       - name: Install the project
@@ -38,5 +38,3 @@ jobs:
           extra_args: --config python/.pre-commit-config.yaml --all-files
       - name: Run Mypy
         run: uv run mypy -p semantic_kernel --config-file mypy.ini
-      - name: Minimize uv cache
-        run: uv cache prune --ci
diff --git a/.github/workflows/python-test-coverage.yml b/.github/workflows/python-test-coverage.yml
index 4b5f6ea1778c..d2aebc3796c6 100644
--- a/.github/workflows/python-test-coverage.yml
+++ b/.github/workflows/python-test-coverage.yml
@@ -30,25 +30,19 @@ jobs:
       - uses: actions/checkout@v4
       - name: Setup filename variables
         run: echo "FILE_ID=${{ github.event.number }}" >> $GITHUB_ENV
-      - name: Download coverage
-        uses: dawidd6/action-download-artifact@v6
+      - name: Download Files
+        uses: actions/download-artifact@v4
         with:
-          name: python-coverage-${{ env.FILE_ID }}.txt
-          github_token: ${{ secrets.GH_ACTIONS_PR_WRITE }}
-          workflow: python-unit-tests.yml
-          search_artifacts: true
-          if_no_artifact_found: warn
-      - name: Download pytest
-        uses: dawidd6/action-download-artifact@v6
-        with:
-          name: pytest-${{ env.FILE_ID }}.xml
-          github_token: ${{ secrets.GH_ACTIONS_PR_WRITE }}
-          workflow: python-unit-tests.yml
-          search_artifacts: true
-          if_no_artifact_found: warn
+          github-token: ${{ secrets.GH_ACTIONS_PR_WRITE }}
+          run-id: ${{ github.event.workflow_run.id }}
+          path: python/
+          merge-multiple: true
+      - name: Display structure of downloaded files
+        run: ls python/
       - name: Pytest coverage comment
         id: coverageComment
         uses: MishaKav/pytest-coverage-comment@main
+        continue-on-error: true
         with:
           github-token: ${{ secrets.GH_ACTIONS_PR_WRITE }}
           pytest-coverage-path: python-coverage.txt
diff --git a/.github/workflows/python-unit-tests.yml b/.github/workflows/python-unit-tests.yml
index ef1f481ae769..aec1937984f5 100644
--- a/.github/workflows/python-unit-tests.yml
+++ b/.github/workflows/python-unit-tests.yml
@@ -42,9 +42,9 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+        uses: astral-sh/setup-uv@v4
         with:
-          version: "0.4.30"
+          version: "0.5.x"
           enable-cache: true
           cache-suffix: ${{ runner.os }}-${{ matrix.python-version }}
       - name: Install the project
@@ -62,8 +62,6 @@ jobs:
           display-options: fEX
           fail-on-empty: true
           title: Test results
-      - name: Minimize uv cache
-        run: uv cache prune --ci
   python-test-coverage:
     name: Python Test Coverage
     runs-on: [ubuntu-latest]
@@ -80,15 +78,15 @@ jobs:
       - name: Setup filename variables
         run: echo "FILE_ID=${{ github.event.number }}" >> $GITHUB_ENV
       - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+        uses: astral-sh/setup-uv@v4
         with:
-          version: "0.4.30"
+          version: "0.5.x"
           enable-cache: true
           cache-suffix: ${{ runner.os }}-${{ env.UV_PYTHON }}
       - name: Install the project
         run: uv sync --all-extras --dev
       - name: Test with pytest
-        run: uv run --frozen pytest -q --junitxml=pytest.xml  --cov=semantic_kernel --cov-report=term-missing:skip-covered ./tests/unit | tee python-coverage.txt      
+        run: uv run --frozen pytest -q --junitxml=pytest.xml  --cov=semantic_kernel --cov-report=term-missing:skip-covered ./tests/unit | tee python-coverage.txt
       - name: Upload coverage
         if: always()
         uses: actions/upload-artifact@v4
@@ -105,5 +103,3 @@ jobs:
           path: python/pytest.xml
           overwrite: true
           retention-days: 1
-      - name: Minimize uv cache
-        run: uv cache prune --ci
diff --git a/python/samples/concepts/chat_completion/simple_chatbot.py b/python/samples/concepts/chat_completion/simple_chatbot.py
index a52b52aaace1..630bd75061f2 100644
--- a/python/samples/concepts/chat_completion/simple_chatbot.py
+++ b/python/samples/concepts/chat_completion/simple_chatbot.py
@@ -6,7 +6,7 @@
     Services,
     get_chat_completion_service_and_request_settings,
 )
-from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents import ChatHistory
 
 # This sample shows how to create a chatbot. This sample uses the following two main components:
 # - a ChatCompletionService: This component is responsible for generating responses to user messages.
@@ -26,7 +26,7 @@
 # - Services.ONNX
 # - Services.VERTEX_AI
 # Please make sure you have configured your environment correctly for the selected chat completion service.
-chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.OPENAI)
 
 # This is the system message that gives the chatbot its personality.
 system_message = """
@@ -64,10 +64,11 @@ async def chat() -> bool:
         chat_history=chat_history,
         settings=request_settings,
     )
-    print(f"Mosscap:> {response}")
+    if response:
+        print(f"Mosscap:> {response}")
 
-    # Add the chat message to the chat history to keep track of the conversation.
-    chat_history.add_assistant_message(str(response))
+        # Add the chat message to the chat history to keep track of the conversation.
+        chat_history.add_message(response)
 
     return True
 
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py b/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py
index 361e4e706d5d..6ed249276c08 100644
--- a/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py
+++ b/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py
@@ -6,9 +6,9 @@
     Services,
     get_chat_completion_service_and_request_settings,
 )
-from semantic_kernel.contents.chat_history import ChatHistory
-from semantic_kernel.functions.kernel_arguments import KernelArguments
-from semantic_kernel.kernel import Kernel
+from semantic_kernel import Kernel
+from semantic_kernel.contents import ChatHistory
+from semantic_kernel.functions import KernelArguments
 
 # This sample shows how to create a chatbot using a kernel function.
 # This sample uses the following two main components:
@@ -97,15 +97,15 @@ async def chat() -> bool:
         user_input=user_input,
     )
 
-    answer = await kernel.invoke(chat_function, kernel_arguments)
+    answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments)
     # Alternatively, you can invoke the function directly with the kernel as an argument:
     # answer = await chat_function.invoke(kernel, kernel_arguments)
-
-    print(f"Mosscap:> {answer}")
-
-    # Add the chat message to the chat history to keep track of the conversation.
-    chat_history.add_user_message(user_input)
-    chat_history.add_assistant_message(str(answer))
+    if answer:
+        print(f"Mosscap:> {answer}")
+        # Since the user_input is rendered by the template, it is not yet part of the chat history, so we add it here.
+        chat_history.add_user_message(user_input)
+        # Add the chat message to the chat history to keep track of the conversation.
+        chat_history.add_message(answer.value[0])
 
     return True
 
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py b/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py
index 63fa49e1dc4c..f852cb1744c6 100644
--- a/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py
+++ b/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py
@@ -6,7 +6,7 @@
     Services,
     get_chat_completion_service_and_request_settings,
 )
-from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents import ChatHistory
 
 # This sample shows how to create a chatbot that whose output can be biased using logit bias.
 # This sample uses the following three main components:
@@ -33,7 +33,6 @@
 
 # Create a chat history object with the system message.
 chat_history = ChatHistory(system_message=system_message)
-
 # Create a list of tokens whose bias value will be reduced.
 # The token ids of these words can be obtained using the GPT Tokenizer: https://platform.openai.com/tokenizer
 # the targeted model series is GPT-4o & GPT-4o mini
@@ -61,7 +60,7 @@
 ]
 # Configure the logit bias settings to minimize the likelihood of the
 # tokens in the banned_tokens list appearing in the output.
-request_settings.logit_bias = {k: -100 for k in banned_tokens}
+request_settings.logit_bias = {k: -100 for k in banned_tokens}  # type: ignore
 
 
 async def chat() -> bool:
@@ -86,10 +85,11 @@ async def chat() -> bool:
         chat_history=chat_history,
         settings=request_settings,
     )
-    print(f"Mosscap:> {response}")
+    if response:
+        print(f"Mosscap:> {response}")
 
-    # Add the chat message to the chat history to keep track of the conversation.
-    chat_history.add_assistant_message(str(response))
+        # Add the chat message to the chat history to keep track of the conversation.
+        chat_history.add_message(response)
 
     return True
 
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_streaming.py b/python/samples/concepts/chat_completion/simple_chatbot_streaming.py
index 21744ffd53c2..b513aeeb408d 100644
--- a/python/samples/concepts/chat_completion/simple_chatbot_streaming.py
+++ b/python/samples/concepts/chat_completion/simple_chatbot_streaming.py
@@ -1,13 +1,12 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import asyncio
-from functools import reduce
 
 from samples.concepts.setup.chat_completion_services import (
     Services,
     get_chat_completion_service_and_request_settings,
 )
-from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents import ChatHistory, StreamingChatMessageContent
 
 # This sample shows how to create a chatbot that streams responses.
 # This sample uses the following two main components:
@@ -70,15 +69,16 @@ async def chat() -> bool:
     )
 
     # Capture the chunks of the response and print them as they come in.
-    chunks = []
+    chunks: list[StreamingChatMessageContent] = []
     print("Mosscap:> ", end="")
     async for chunk in response:
-        chunks.append(chunk)
-        print(chunk, end="")
+        if chunk:
+            chunks.append(chunk)
+            print(chunk, end="")
     print("")
 
     # Combine the chunks into a single message to add to the chat history.
-    full_message = reduce(lambda first, second: first + second, chunks)
+    full_message = sum(chunks[1:], chunks[0])
     # Add the chat message to the chat history to keep track of the conversation.
     chat_history.add_message(full_message)
 
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_image.py b/python/samples/concepts/chat_completion/simple_chatbot_with_image.py
index f7fac3448816..5ee1244f5d6d 100644
--- a/python/samples/concepts/chat_completion/simple_chatbot_with_image.py
+++ b/python/samples/concepts/chat_completion/simple_chatbot_with_image.py
@@ -6,10 +6,7 @@
     Services,
     get_chat_completion_service_and_request_settings,
 )
-from semantic_kernel.contents.chat_history import ChatHistory
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.image_content import ImageContent
-from semantic_kernel.contents.text_content import TextContent
+from semantic_kernel.contents import AuthorRole, ChatHistory, ChatMessageContent, ImageContent, TextContent
 
 # This sample shows how to create a chatbot that responds to user messages with image input.
 # This sample uses the following three main components:
@@ -30,6 +27,11 @@
 # - Services.ONNX
 # - Services.VERTEX_AI
 # Please make sure you have configured your environment correctly for the selected chat completion service.
+
+# [NOTE]
+# Not all models support image input. Make sure to select a model that supports image input.
+# Not all services support image input from an image URI. If your image is saved in a remote location,
+# make sure to use a service that supports image input from a URI.
 chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
 
 IMAGE_URI = "https://upload.wikimedia.org/wikipedia/commons/d/d5/Half-timbered_mansion%2C_Zirkel%2C_East_view.jpg"
@@ -41,12 +43,6 @@
 image_content_local = ImageContent.from_image_file(IMAGE_PATH)
 
 
-# [NOTE]
-# Not all models support image input. Make sure to select a model that supports image input.
-# Not all services support image input from an image URI. If your image is saved in a remote location,
-# make sure to use a service that supports image input from a URI.
-
-
 # This is the system message that gives the chatbot its personality.
 system_message = """
 You are an image reviewing chat bot. Your name is Mosscap and you have one goal critiquing images that are supplied.
@@ -56,7 +52,7 @@
 chat_history = ChatHistory(system_message=system_message)
 chat_history.add_message(
     ChatMessageContent(
-        role="user",
+        role=AuthorRole.USER,
         items=[TextContent(text="What is in this image?"), image_content_local],
     )
 )
@@ -90,10 +86,11 @@ async def chat(skip_user_input: bool = False) -> bool:
         chat_history=chat_history,
         settings=request_settings,
     )
-    print(f"Mosscap:> {response}")
+    if response:
+        print(f"Mosscap:> {response}")
 
-    # Add the chat message to the chat history to keep track of the conversation.
-    chat_history.add_assistant_message(str(response))
+        # Add the chat message to the chat history to keep track of the conversation.
+        chat_history.add_message(response)
 
     return True
 
diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py
index 903b59f42928..40dd127eda47 100644
--- a/python/samples/concepts/setup/chat_completion_services.py
+++ b/python/samples/concepts/setup/chat_completion_services.py
@@ -1,29 +1,14 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 from enum import Enum
+from typing import TYPE_CHECKING
 
-from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.azure_ai_inference import (
-    AzureAIInferenceChatCompletion,
-    AzureAIInferenceChatPromptExecutionSettings,
-)
-from semantic_kernel.connectors.ai.bedrock import BedrockChatCompletion, BedrockChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
-from semantic_kernel.connectors.ai.google.google_ai import GoogleAIChatCompletion, GoogleAIChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.google.vertex_ai import VertexAIChatCompletion, VertexAIChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion, MistralAIChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings, ONNXTemplate
-from semantic_kernel.connectors.ai.open_ai import (
-    AzureChatCompletion,
-    AzureChatPromptExecutionSettings,
-    OpenAIChatCompletion,
-    OpenAIChatPromptExecutionSettings,
-)
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
-
-
-class Services(Enum):
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
+
+class Services(str, Enum):
     """Enum for supported chat completion services.
 
     For service specific settings, refer to this documentation:
@@ -42,9 +27,12 @@ class Services(Enum):
     VERTEX_AI = "vertex_ai"
 
 
+service_id = "default"
+
+
 def get_chat_completion_service_and_request_settings(
-    service_name: str,
-) -> tuple[ChatCompletionClientBase, PromptExecutionSettings]:
+    service_name: Services,
+) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]:
     """Return service and request settings."""
     chat_services = {
         Services.OPENAI: get_openai_chat_completion_service_and_request_settings,
@@ -58,12 +46,11 @@ def get_chat_completion_service_and_request_settings(
         Services.ONNX: get_onnx_chat_completion_service_and_request_settings,
         Services.VERTEX_AI: get_vertex_ai_chat_completion_service_and_request_settings,
     }
-
     return chat_services[service_name]()
 
 
 def get_openai_chat_completion_service_and_request_settings() -> tuple[
-    OpenAIChatCompletion, OpenAIChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return OpenAI chat completion service and request settings.
 
@@ -78,14 +65,21 @@ def get_openai_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel-python
     """
-    chat_service = OpenAIChatCompletion()
-    request_settings = OpenAIChatPromptExecutionSettings(max_tokens=2000, temperature=0.7, top_p=0.8)
+    from semantic_kernel.connectors.ai.open_ai import (
+        OpenAIChatCompletion,
+        OpenAIChatPromptExecutionSettings,
+    )
+
+    chat_service = OpenAIChatCompletion(service_id=service_id)
+    request_settings = OpenAIChatPromptExecutionSettings(
+        service_id=service_id, max_tokens=2000, temperature=0.7, top_p=0.8
+    )
 
     return chat_service, request_settings
 
 
 def get_azure_openai_chat_completion_service_and_request_settings() -> tuple[
-    AzureChatCompletion, AzureChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Azure OpenAI chat completion service and request settings.
 
@@ -100,14 +94,19 @@ def get_azure_openai_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = AzureChatCompletion()
-    request_settings = AzureChatPromptExecutionSettings()
+    from semantic_kernel.connectors.ai.open_ai import (
+        AzureChatCompletion,
+        AzureChatPromptExecutionSettings,
+    )
+
+    chat_service = AzureChatCompletion(service_id=service_id)
+    request_settings = AzureChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
 def get_azure_ai_inference_chat_completion_service_and_request_settings() -> tuple[
-    AzureAIInferenceChatCompletion, AzureAIInferenceChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Azure AI Inference chat completion service and request settings.
 
@@ -122,16 +121,22 @@ def get_azure_ai_inference_chat_completion_service_and_request_settings() -> tup
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
+    from semantic_kernel.connectors.ai.azure_ai_inference import (
+        AzureAIInferenceChatCompletion,
+        AzureAIInferenceChatPromptExecutionSettings,
+    )
+
     chat_service = AzureAIInferenceChatCompletion(
+        service_id=service_id,
         ai_model_id="id",  # The model ID is simply an identifier as the model id cannot be obtained programmatically.
     )
-    request_settings = AzureAIInferenceChatPromptExecutionSettings()
+    request_settings = AzureAIInferenceChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
 def get_anthropic_chat_completion_service_and_request_settings() -> tuple[
-    AnthropicChatCompletion, AnthropicChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Anthropic chat completion service and request settings.
 
@@ -146,14 +151,16 @@ def get_anthropic_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = AnthropicChatCompletion()
-    request_settings = AnthropicChatPromptExecutionSettings()
+    from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings
+
+    chat_service = AnthropicChatCompletion(service_id=service_id)
+    request_settings = AnthropicChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
 def get_bedrock_chat_completion_service_and_request_settings() -> tuple[
-    BedrockChatCompletion, BedrockChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Anthropic chat completion service and request settings.
 
@@ -168,11 +175,14 @@ def get_bedrock_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = BedrockChatCompletion(model_id="cohere.command-r-v1:0")
+    from semantic_kernel.connectors.ai.bedrock import BedrockChatCompletion, BedrockChatPromptExecutionSettings
+
+    chat_service = BedrockChatCompletion(service_id=service_id, model_id="cohere.command-r-v1:0")
     request_settings = BedrockChatPromptExecutionSettings(
         # For model specific settings, specify them in the extension_data dictionary.
         # For example, for Cohere Command specific settings, refer to:
         # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
+        service_id=service_id,
         extension_data={
             "presence_penalty": 0.5,
             "seed": 5,
@@ -183,7 +193,7 @@ def get_bedrock_chat_completion_service_and_request_settings() -> tuple[
 
 
 def get_google_ai_chat_completion_service_and_request_settings() -> tuple[
-    GoogleAIChatCompletion, GoogleAIChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Google AI chat completion service and request settings.
 
@@ -198,14 +208,19 @@ def get_google_ai_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = GoogleAIChatCompletion()
-    request_settings = GoogleAIChatPromptExecutionSettings()
+    from semantic_kernel.connectors.ai.google.google_ai import (
+        GoogleAIChatCompletion,
+        GoogleAIChatPromptExecutionSettings,
+    )
+
+    chat_service = GoogleAIChatCompletion(service_id=service_id)
+    request_settings = GoogleAIChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
 def get_mistral_ai_chat_completion_service_and_request_settings() -> tuple[
-    MistralAIChatCompletion, MistralAIChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Mistral AI chat completion service and request settings.
 
@@ -220,14 +235,16 @@ def get_mistral_ai_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = MistralAIChatCompletion()
-    request_settings = MistralAIChatPromptExecutionSettings()
+    from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion, MistralAIChatPromptExecutionSettings
+
+    chat_service = MistralAIChatCompletion(service_id=service_id)
+    request_settings = MistralAIChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
 def get_ollama_chat_completion_service_and_request_settings() -> tuple[
-    OllamaChatCompletion, OllamaChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Ollama chat completion service and request settings.
 
@@ -242,21 +259,24 @@ def get_ollama_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = OllamaChatCompletion()
+    from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings
+
+    chat_service = OllamaChatCompletion(service_id=service_id)
     request_settings = OllamaChatPromptExecutionSettings(
         # For model specific settings, specify them in the options dictionary.
         # For more information on the available options, refer to the Ollama API documentation:
         # https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
+        service_id=service_id,
         options={
             "temperature": 0.8,
-        }
+        },
     )
 
     return chat_service, request_settings
 
 
 def get_onnx_chat_completion_service_and_request_settings() -> tuple[
-    OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Onnx chat completion service and request settings.
 
@@ -271,14 +291,20 @@ def get_onnx_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = OnnxGenAIChatCompletion(ONNXTemplate.PHI3)
-    request_settings = OnnxGenAIPromptExecutionSettings()
+    from semantic_kernel.connectors.ai.onnx import (
+        OnnxGenAIChatCompletion,
+        OnnxGenAIPromptExecutionSettings,
+        ONNXTemplate,
+    )
+
+    chat_service = OnnxGenAIChatCompletion(ONNXTemplate.PHI3, service_id=service_id)
+    request_settings = OnnxGenAIPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
 def get_vertex_ai_chat_completion_service_and_request_settings() -> tuple[
-    VertexAIChatCompletion, VertexAIChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Vertex AI chat completion service and request settings.
 
@@ -293,7 +319,12 @@ def get_vertex_ai_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = VertexAIChatCompletion()
-    request_settings = VertexAIChatPromptExecutionSettings()
+    from semantic_kernel.connectors.ai.google.vertex_ai import (
+        VertexAIChatCompletion,
+        VertexAIChatPromptExecutionSettings,
+    )
+
+    chat_service = VertexAIChatCompletion(service_id=service_id)
+    request_settings = VertexAIChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
diff --git a/python/samples/getting_started_with_processes/step01/step01_processes.py b/python/samples/getting_started_with_processes/step01/step01_processes.py
index 738f41212b0d..3ad8dcc04685 100644
--- a/python/samples/getting_started_with_processes/step01/step01_processes.py
+++ b/python/samples/getting_started_with_processes/step01/step01_processes.py
@@ -37,7 +37,7 @@ class UserInputState(KernelBaseModel):
     current_input_index: int = 0
 
 
-class ScriptedUserInputStep(KernelProcessStep[UserInputState]):
+class UserInputStep(KernelProcessStep[UserInputState]):
     GET_USER_INPUT: ClassVar[str] = "get_user_input"
 
     def create_default_state(self) -> "UserInputState":
@@ -48,16 +48,11 @@ def populate_user_inputs(self):
         """Method to be overridden by the user to populate with custom user messages."""
         pass
 
-    async def on_activate(self):
-        """This is called during the activation of the process step."""
-        self.populate_user_inputs()
-
     async def activate(self, state: KernelProcessStepState[UserInputState]):
         """Activates the step and sets the state."""
         state.state = state.state or self.create_default_state()
         self.state = state.state
         self.populate_user_inputs()
-        pass
 
     @kernel_function(name=GET_USER_INPUT)
     async def get_user_input(self, context: KernelProcessStepContext):
@@ -65,9 +60,9 @@ async def get_user_input(self, context: KernelProcessStepContext):
         if not self.state:
             raise ValueError("State has not been initialized")
 
-        user_message = self.state.user_inputs[self.state.current_input_index]
+        user_message = input("USER: ")
 
-        print(f"USER: {user_message}")
+        # print(f"USER: {user_message}")
 
         if "exit" in user_message:
             await context.emit_event(process_event=ChatBotEvents.Exit, data=None)
@@ -79,7 +74,7 @@ async def get_user_input(self, context: KernelProcessStepContext):
         await context.emit_event(process_event=CommonEvents.UserInputReceived, data=user_message)
 
 
-class ChatUserInputStep(ScriptedUserInputStep):
+class ScriptedInputStep(UserInputStep):
     def populate_user_inputs(self):
         """Override the method to populate user inputs for the chat step."""
         if self.state is not None:
@@ -89,6 +84,25 @@ def populate_user_inputs(self):
             self.state.user_inputs.append("How wide is the widest river?")
             self.state.user_inputs.append("exit")
 
+    @kernel_function
+    async def get_user_input(self, context: KernelProcessStepContext):
+        """Gets the user input."""
+        if not self.state:
+            raise ValueError("State has not been initialized")
+
+        user_message = self.state.user_inputs[self.state.current_input_index]
+
+        print(f"USER: {user_message}")
+
+        if "exit" in user_message:
+            await context.emit_event(process_event=ChatBotEvents.Exit, data=None)
+            return
+
+        self.state.current_input_index += 1
+
+        # Emit the user input event
+        await context.emit_event(process_event=CommonEvents.UserInputReceived, data=user_message)
+
 
 class IntroStep(KernelProcessStep):
     @kernel_function
@@ -146,14 +160,14 @@ async def get_chat_response(self, context: "KernelProcessStepContext", user_mess
 kernel = Kernel()
 
 
-async def step01_processes():
+async def step01_processes(scripted: bool = True):
     kernel.add_service(OpenAIChatCompletion(service_id="default"))
 
     process = ProcessBuilder(name="ChatBot")
 
     # Define the steps on the process builder based on their types, not concrete objects
     intro_step = process.add_step(IntroStep)
-    user_input_step = process.add_step(ChatUserInputStep)
+    user_input_step = process.add_step(ScriptedInputStep if scripted else UserInputStep)
     response_step = process.add_step(ChatBotResponseStep)
 
     # Define the input event that starts the process and where to send it
@@ -186,4 +200,5 @@ async def step01_processes():
 
 
 if __name__ == "__main__":
-    asyncio.run(step01_processes())
+    # if you want to run this sample with your won input, set the below parameter to False
+    asyncio.run(step01_processes(scripted=False))
diff --git a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
index 51bd4f4f7dcb..5c80506e3297 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import logging
-from typing import Any
+from typing import Annotated, Any
 
 from pydantic import Field, model_validator
 
@@ -15,7 +15,7 @@
 class AnthropicPromptExecutionSettings(PromptExecutionSettings):
     """Common request settings for Anthropic services."""
 
-    ai_model_id: str | None = Field(None, serialization_alias="model")
+    ai_model_id: Annotated[str | None, Field(serialization_alias="model")] = None
 
 
 class AnthropicChatPromptExecutionSettings(AnthropicPromptExecutionSettings):
@@ -24,20 +24,26 @@ class AnthropicChatPromptExecutionSettings(AnthropicPromptExecutionSettings):
     messages: list[dict[str, Any]] | None = None
     stream: bool | None = None
     system: str | None = None
-    max_tokens: int = Field(default=1024, gt=0)
-    temperature: float | None = Field(None, ge=0.0, le=2.0)
+    max_tokens: Annotated[int, Field(gt=0)] = 1024
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
     stop_sequences: list[str] | None = None
-    top_p: float | None = Field(None, ge=0.0, le=1.0)
-    top_k: int | None = Field(None, ge=0)
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description=("Do not set this manually. It is set by the service based on the function choice configuration."),
-    )
-    tool_choice: dict[str, str] | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    top_p: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
+    top_k: Annotated[int | None, Field(ge=0)] = None
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description=(
+                "Do not set this manually. It is set by the service based on the function choice configuration."
+            ),
+        ),
+    ] = None
+    tool_choice: Annotated[
+        dict[str, str] | None,
+        Field(
+            description="Do not set this manually. It is set by the service based on the function choice configuration."
+        ),
+    ] = None
 
     @model_validator(mode="after")
     def validate_tool_choice(self) -> "AnthropicChatPromptExecutionSettings":
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_prompt_execution_settings.py
index 9f0d8bba851d..a8be8303e6b3 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_prompt_execution_settings.py
@@ -1,6 +1,6 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from typing import Any, Literal
+from typing import Annotated, Any, Literal
 
 from pydantic import Field
 
@@ -16,13 +16,13 @@ class AzureAIInferencePromptExecutionSettings(PromptExecutionSettings):
         `extra_parameters` is a dictionary to pass additional model-specific parameters to the model.
     """
 
-    frequency_penalty: float | None = Field(None, ge=-2, le=2)
-    max_tokens: int | None = Field(None, gt=0)
-    presence_penalty: float | None = Field(None, ge=-2, le=2)
+    frequency_penalty: Annotated[float | None, Field(ge=-2.0, le=2.0)] = None
+    max_tokens: Annotated[int | None, Field(gt=0)] = None
+    presence_penalty: Annotated[float | None, Field(ge=-2.0, le=2.0)] = None
     seed: int | None = None
     stop: str | None = None
-    temperature: float | None = Field(None, ge=0.0, le=1.0)
-    top_p: float | None = Field(None, ge=0.0, le=1.0)
+    temperature: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
+    top_p: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
     extra_parameters: dict[str, Any] | None = None
 
 
@@ -30,15 +30,21 @@ class AzureAIInferencePromptExecutionSettings(PromptExecutionSettings):
 class AzureAIInferenceChatPromptExecutionSettings(AzureAIInferencePromptExecutionSettings):
     """Azure AI Inference Chat Prompt Execution Settings."""
 
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    tool_choice: str | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    tool_choice: Annotated[
+        str | None,
+        Field(
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
 
 
 @experimental_class
@@ -49,7 +55,7 @@ class AzureAIInferenceEmbeddingPromptExecutionSettings(PromptExecutionSettings):
         `extra_parameters` is a dictionary to pass additional model-specific parameters to the model.
     """
 
-    dimensions: int | None = Field(None, gt=0)
+    dimensions: Annotated[int | None, Field(gt=0)] = None
     encoding_format: Literal["base64", "binary", "float", "int8", "ubinary", "uint8"] | None = None
     input_type: Literal["text", "query", "document"] | None = None
     extra_parameters: dict[str, str] | None = None
diff --git a/python/semantic_kernel/connectors/ai/bedrock/bedrock_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/bedrock/bedrock_prompt_execution_settings.py
index a291f221a9d3..ca33d3123490 100644
--- a/python/semantic_kernel/connectors/ai/bedrock/bedrock_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/bedrock/bedrock_prompt_execution_settings.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 
-from typing import Any
+from typing import Annotated, Any
 
 from pydantic import Field
 
@@ -11,25 +11,31 @@
 class BedrockPromptExecutionSettings(PromptExecutionSettings):
     """Bedrock Prompt Execution Settings."""
 
-    temperature: float | None = Field(None, ge=0.0, le=1.0)
-    top_p: float | None = Field(None, ge=0.0, le=1.0)
-    top_k: int | None = Field(None, gt=0)
-    max_tokens: int | None = Field(None, gt=0)
+    temperature: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
+    top_p: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
+    top_k: Annotated[int | None, Field(gt=0)] = None
+    max_tokens: Annotated[int | None, Field(gt=0)] = None
     stop: list[str] = Field(default_factory=list)
 
 
 class BedrockChatPromptExecutionSettings(BedrockPromptExecutionSettings):
     """Bedrock Chat Prompt Execution Settings."""
 
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    tool_choice: dict[str, Any] | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    tool_choice: Annotated[
+        dict[str, Any] | None,
+        Field(
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
 
 
 class BedrockTextPromptExecutionSettings(BedrockPromptExecutionSettings):
diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/google_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/google/google_ai/google_ai_prompt_execution_settings.py
index 91f81fb18580..99e7ad1f8d56 100644
--- a/python/semantic_kernel/connectors/ai/google/google_ai/google_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/google/google_ai/google_ai_prompt_execution_settings.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import sys
-from typing import Any, Literal
+from typing import Annotated, Any, Literal
 
 from pydantic import Field
 
@@ -16,12 +16,12 @@
 class GoogleAIPromptExecutionSettings(PromptExecutionSettings):
     """Google AI Prompt Execution Settings."""
 
-    stop_sequences: list[str] | None = Field(None, max_length=5)
+    stop_sequences: Annotated[list[str] | None, Field(max_length=5)] = None
     response_mime_type: Literal["text/plain", "application/json"] | None = None
     response_schema: Any | None = None
-    candidate_count: int | None = Field(None, ge=1)
-    max_output_tokens: int | None = Field(None, ge=1)
-    temperature: float | None = Field(None, ge=0.0, le=2.0)
+    candidate_count: Annotated[int | None, Field(ge=1)] = None
+    max_output_tokens: Annotated[int | None, Field(ge=1)] = None
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
     top_p: float | None = None
     top_k: int | None = None
 
@@ -35,15 +35,21 @@ class GoogleAITextPromptExecutionSettings(GoogleAIPromptExecutionSettings):
 class GoogleAIChatPromptExecutionSettings(GoogleAIPromptExecutionSettings):
     """Google AI Chat Prompt Execution Settings."""
 
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    tool_config: dict[str, Any] | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    tool_config: Annotated[
+        dict[str, Any] | None,
+        Field(
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
 
     @override
     def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
@@ -62,4 +68,4 @@ def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
 class GoogleAIEmbeddingPromptExecutionSettings(PromptExecutionSettings):
     """Google AI Embedding Prompt Execution Settings."""
 
-    output_dimensionality: int | None = Field(None, le=768)
+    output_dimensionality: Annotated[int | None, Field(le=768)] = None
diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/vertex_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/vertex_ai_prompt_execution_settings.py
index 28c8eb6f28be..29b9e13e1278 100644
--- a/python/semantic_kernel/connectors/ai/google/vertex_ai/vertex_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/vertex_ai_prompt_execution_settings.py
@@ -1,15 +1,14 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import sys
-from typing import Any, Literal
-
-from pydantic import Field
-from vertexai.generative_models import Tool, ToolConfig
+from typing import Annotated, Any, Literal
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
 else:
     from typing_extensions import override  # pragma: no cover
+from pydantic import Field
+from vertexai.generative_models import Tool, ToolConfig
 
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 
@@ -17,12 +16,12 @@
 class VertexAIPromptExecutionSettings(PromptExecutionSettings):
     """Vertex AI Prompt Execution Settings."""
 
-    stop_sequences: list[str] | None = Field(None, max_length=5)
+    stop_sequences: Annotated[list[str] | None, Field(max_length=5)] = None
     response_mime_type: Literal["text/plain", "application/json"] | None = None
     response_schema: Any | None = None
-    candidate_count: int | None = Field(None, ge=1)
-    max_output_tokens: int | None = Field(None, ge=1)
-    temperature: float | None = Field(None, ge=0.0, le=2.0)
+    candidate_count: Annotated[int | None, Field(ge=1)] = None
+    max_output_tokens: Annotated[int | None, Field(ge=1)] = None
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
     top_p: float | None = None
     top_k: int | None = None
 
@@ -36,15 +35,21 @@ class VertexAITextPromptExecutionSettings(VertexAIPromptExecutionSettings):
 class VertexAIChatPromptExecutionSettings(VertexAIPromptExecutionSettings):
     """Vertex AI Chat Prompt Execution Settings."""
 
-    tools: list[Tool] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    tool_config: ToolConfig | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    tools: Annotated[
+        list[Tool] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    tool_config: Annotated[
+        ToolConfig | None,
+        Field(
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
 
     @override
     def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/prompt_execution_settings/mistral_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/mistral_ai/prompt_execution_settings/mistral_ai_prompt_execution_settings.py
index d0409b1be659..ce61d75740f5 100644
--- a/python/semantic_kernel/connectors/ai/mistral_ai/prompt_execution_settings/mistral_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/mistral_ai/prompt_execution_settings/mistral_ai_prompt_execution_settings.py
@@ -2,7 +2,7 @@
 
 import logging
 import sys
-from typing import Any, Literal
+from typing import Annotated, Any, Literal
 
 from mistralai import utils
 
@@ -21,7 +21,7 @@
 class MistralAIPromptExecutionSettings(PromptExecutionSettings):
     """Common request settings for MistralAI services."""
 
-    ai_model_id: str | None = Field(None, serialization_alias="model")
+    ai_model_id: Annotated[str | None, Field(serialization_alias="model")] = None
 
 
 class MistralAIChatPromptExecutionSettings(MistralAIPromptExecutionSettings):
@@ -29,28 +29,34 @@ class MistralAIChatPromptExecutionSettings(MistralAIPromptExecutionSettings):
 
     response_format: dict[Literal["type"], Literal["text", "json_object"]] | None = None
     messages: list[dict[str, Any]] | None = None
-    safe_mode: bool = Field(False, exclude=True)
+    safe_mode: Annotated[bool, Field(exclude=True)] = False
     safe_prompt: bool = False
-    max_tokens: int | None = Field(None, gt=0)
+    max_tokens: Annotated[int | None, Field(gt=0)] = None
     seed: int | None = None
-    temperature: float | None = Field(None, ge=0.0, le=2.0)
-    top_p: float | None = Field(None, ge=0.0, le=1.0)
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
+    top_p: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
     random_seed: int | None = None
-    presence_penalty: float | None = Field(None, gt=0)
-    frequency_penalty: float | None = Field(None, gt=0)
-    n: int | None = Field(None, gt=1)
+    presence_penalty: Annotated[float | None, Field(gt=0)] = None
+    frequency_penalty: Annotated[float | None, Field(gt=0)] = None
+    n: Annotated[int | None, Field(gt=1)] = None
     retries: utils.RetryConfig | None = None
     server_url: str | None = None
     timeout_ms: int | None = None
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    tool_choice: str | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    tool_choice: Annotated[
+        str | None,
+        Field(
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
 
     @field_validator("safe_mode")
     @classmethod
diff --git a/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py
index e5c4c5b4510b..f315f971e91f 100644
--- a/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py
@@ -1,6 +1,6 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from typing import Any, Literal
+from typing import Annotated, Any, Literal
 
 from pydantic import Field
 
@@ -29,11 +29,14 @@ class OllamaTextPromptExecutionSettings(OllamaPromptExecutionSettings):
 class OllamaChatPromptExecutionSettings(OllamaPromptExecutionSettings):
     """Settings for Ollama chat prompt execution."""
 
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
 
 
 class OllamaEmbeddingPromptExecutionSettings(OllamaPromptExecutionSettings):
diff --git a/python/semantic_kernel/connectors/ai/onnx/onnx_gen_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/onnx/onnx_gen_ai_prompt_execution_settings.py
index aebc2d231cb2..75c3f29699ff 100644
--- a/python/semantic_kernel/connectors/ai/onnx/onnx_gen_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/onnx/onnx_gen_ai_prompt_execution_settings.py
@@ -1,6 +1,8 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 
+from typing import Annotated
+
 from pydantic import Field
 
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
@@ -9,17 +11,17 @@
 class OnnxGenAIPromptExecutionSettings(PromptExecutionSettings):
     """OnnxGenAI prompt execution settings."""
 
-    diversity_penalty: float | None = Field(None, ge=0.0, le=1.0)
+    diversity_penalty: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
     do_sample: bool = False
     early_stopping: bool = True
-    length_penalty: float | None = Field(None, ge=0.0, le=1.0)
-    max_length: int = Field(3072, gt=0)
-    min_length: int | None = Field(None, gt=0)
+    length_penalty: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
+    max_length: Annotated[int, Field(gt=0)] = 3072
+    min_length: Annotated[int | None, Field(gt=0)] = None
     no_repeat_ngram_size: int = 0
-    num_beams: int | None = Field(None, gt=0)
-    num_return_sequences: int | None = Field(None, gt=0)
+    num_beams: Annotated[int | None, Field(gt=0)] = None
+    num_return_sequences: Annotated[int | None, Field(gt=0)] = None
     past_present_share_buffer: int = True
-    repetition_penalty: float | None = Field(None, ge=0.0, le=1.0)
-    temperature: float | None = Field(None, ge=0.0, le=2.0)
-    top_k: int | None = Field(None, gt=0)
-    top_p: float | None = Field(None, ge=0.0, le=1.0)
+    repetition_penalty: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
+    top_k: Annotated[int | None, Field(gt=0)] = None
+    top_p: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py
index 3f627e12c665..bb247cb55e43 100644
--- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py
@@ -47,6 +47,7 @@ def __init__(
         ai_model_id: str | None = None,
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
+        **kwargs: Any,
     ) -> None:
         """Initializes a new instance of the OnnxGenAITextCompletion class.
 
@@ -57,6 +58,7 @@ def __init__(
             env_file_path : Use the environment settings file as a fallback
                 to environment variables.
             env_file_encoding : The encoding of the environment settings file.
+            kwargs : Additional arguments.
         """
         try:
             settings = OnnxGenAISettings.create(
@@ -76,7 +78,7 @@ def __init__(
         if ai_model_id is None:
             ai_model_id = settings.chat_model_folder
 
-        super().__init__(ai_model_id=ai_model_id, ai_model_path=settings.chat_model_folder, template=template)
+        super().__init__(ai_model_id=ai_model_id, ai_model_path=settings.chat_model_folder, template=template, **kwargs)
 
     @override
     async def _inner_get_chat_message_contents(
diff --git a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/azure_chat_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/azure_chat_prompt_execution_settings.py
index 19ec573da19b..543b4e2c64a5 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/azure_chat_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/azure_chat_prompt_execution_settings.py
@@ -157,8 +157,8 @@ class ExtraBody(KernelBaseModel):
     """Extra body for the Azure Chat Completion endpoint."""
 
     data_sources: list[DataSource] | None = None
-    input_language: str | None = Field(None, serialization_alias="inputLanguage")
-    output_language: str | None = Field(None, serialization_alias="outputLanguage")
+    input_language: Annotated[str | None, Field(serialization_alias="inputLanguage")] = None
+    output_language: Annotated[str | None, Field(serialization_alias="outputLanguage")] = None
 
     def __getitem__(self, item):
         """Get an item from the ExtraBody."""
diff --git a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
index f87e3ccedd65..f85f03289d92 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
@@ -2,7 +2,7 @@
 
 import logging
 import sys
-from typing import Any, Literal
+from typing import Annotated, Any, Literal
 
 if sys.version_info >= (3, 11):
     from typing import Self  # pragma: no cover
@@ -21,29 +21,29 @@
 class OpenAIPromptExecutionSettings(PromptExecutionSettings):
     """Common request settings for (Azure) OpenAI services."""
 
-    ai_model_id: str | None = Field(None, serialization_alias="model")
-    frequency_penalty: float | None = Field(None, ge=-2.0, le=2.0)
+    ai_model_id: Annotated[str | None, Field(serialization_alias="model")] = None
+    frequency_penalty: Annotated[float | None, Field(ge=-2.0, le=2.0)] = None
     logit_bias: dict[str | int, float] | None = None
-    max_tokens: int | None = Field(None, gt=0)
-    number_of_responses: int | None = Field(None, ge=1, le=128, serialization_alias="n")
-    presence_penalty: float | None = Field(None, ge=-2.0, le=2.0)
+    max_tokens: Annotated[int | None, Field(gt=0)] = None
+    number_of_responses: Annotated[int | None, Field(ge=1, le=128, serialization_alias="n")] = None
+    presence_penalty: Annotated[float | None, Field(ge=-2.0, le=2.0)] = None
     seed: int | None = None
     stop: str | list[str] | None = None
     stream: bool = False
-    temperature: float | None = Field(None, ge=0.0, le=2.0)
-    top_p: float | None = Field(None, ge=0.0, le=1.0)
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
+    top_p: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
     user: str | None = None
 
 
 class OpenAITextPromptExecutionSettings(OpenAIPromptExecutionSettings):
     """Specific settings for the completions endpoint."""
 
-    prompt: str | None = Field(
-        None, description="Do not set this manually. It is set by the service based on the text content."
-    )
-    best_of: int | None = Field(None, ge=1)
+    prompt: Annotated[
+        str | None, Field(description="Do not set this manually. It is set by the service based on the text content.")
+    ] = None
+    best_of: Annotated[int | None, Field(ge=1)] = None
     echo: bool = False
-    logprobs: int | None = Field(None, ge=0, le=5)
+    logprobs: Annotated[int | None, Field(ge=0, le=5)] = None
     suffix: str | None = None
 
     @model_validator(mode="after")
@@ -68,25 +68,33 @@ class OpenAIChatPromptExecutionSettings(OpenAIPromptExecutionSettings):
     ) = None
     function_call: str | None = None
     functions: list[dict[str, Any]] | None = None
-    messages: list[dict[str, Any]] | None = Field(
-        None, description="Do not set this manually. It is set by the service based on the chat history."
-    )
-    function_call_behavior: FunctionCallBehavior | None = Field(None, exclude=True)
+    messages: Annotated[
+        list[dict[str, Any]] | None, Field(description="Do not set this manually. It is set by the service.")
+    ] = None
+    function_call_behavior: Annotated[FunctionCallBehavior | None, Field(exclude=True)] = None
     parallel_tool_calls: bool = True
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    tool_choice: str | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    structured_json_response: bool = Field(False, description="Do not set this manually. It is set by the service.")
-    stream_options: dict[str, Any] | None = Field(
-        None,
-        description="Additional options to pass when streaming is used. Do not set this manually.",
-    )
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    tool_choice: Annotated[
+        str | None,
+        Field(
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    structured_json_response: Annotated[
+        bool, Field(description="Do not set this manually. It is set by the service.")
+    ] = False
+    stream_options: Annotated[
+        dict[str, Any] | None,
+        Field(description="Additional options to pass when streaming is used. Do not set this manually."),
+    ] = None
 
     @field_validator("functions", "function_call", mode="after")
     @classmethod
@@ -160,11 +168,11 @@ class OpenAIEmbeddingPromptExecutionSettings(PromptExecutionSettings):
     """Specific settings for the text embedding endpoint."""
 
     input: str | list[str] | list[int] | list[list[int]] | None = None
-    ai_model_id: str | None = Field(None, serialization_alias="model")
+    ai_model_id: Annotated[str | None, Field(serialization_alias="model")] = None
     encoding_format: Literal["float", "base64"] | None = None
     user: str | None = None
     extra_headers: dict | None = None
     extra_query: dict | None = None
     extra_body: dict | None = None
     timeout: float | None = None
-    dimensions: int | None = Field(None, gt=0, le=3072)
+    dimensions: Annotated[int | None, Field(gt=0, le=3072)] = None
diff --git a/python/semantic_kernel/connectors/ai/prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/prompt_execution_settings.py
index 4c3abc8f5419..3865b63a62ac 100644
--- a/python/semantic_kernel/connectors/ai/prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/prompt_execution_settings.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import logging
-from typing import Any, TypeVar
+from typing import Annotated, Any, TypeVar
 
 from pydantic import Field, model_validator
 
@@ -32,9 +32,9 @@ class PromptExecutionSettings(KernelBaseModel):
         from_prompt_execution_settings: Create a prompt execution settings from another prompt execution settings.
     """
 
-    service_id: str | None = Field(None, min_length=1)
+    service_id: Annotated[str | None, Field(min_length=1)] = None
     extension_data: dict[str, Any] = Field(default_factory=dict)
-    function_choice_behavior: FunctionChoiceBehavior | None = Field(None, exclude=True)
+    function_choice_behavior: Annotated[FunctionChoiceBehavior | None, Field(exclude=True)] = None
 
     @model_validator(mode="before")
     @classmethod
diff --git a/python/semantic_kernel/contents/kernel_content.py b/python/semantic_kernel/contents/kernel_content.py
index 98e64e8b810c..70ef59892bd6 100644
--- a/python/semantic_kernel/contents/kernel_content.py
+++ b/python/semantic_kernel/contents/kernel_content.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 from abc import ABC, abstractmethod
-from typing import Any, TypeVar
+from typing import Annotated, Any, TypeVar
 
 from pydantic import Field
 
@@ -15,7 +15,7 @@ class KernelContent(KernelBaseModel, ABC):
 
     # NOTE: if you wish to hold on to the inner content, you are responsible
     # for saving it before serializing the content/chat history as it won't be included.
-    inner_content: Any | None = Field(None, exclude=True)
+    inner_content: Annotated[Any | None, Field(exclude=True)] = None
     ai_model_id: str | None = None
     metadata: dict[str, Any] = Field(default_factory=dict)
 
diff --git a/python/semantic_kernel/processes/kernel_process/kernel_process_step.py b/python/semantic_kernel/processes/kernel_process/kernel_process_step.py
index 7fd09efbd32d..887dcfac47e1 100644
--- a/python/semantic_kernel/processes/kernel_process/kernel_process_step.py
+++ b/python/semantic_kernel/processes/kernel_process/kernel_process_step.py
@@ -21,7 +21,3 @@ class KernelProcessStep(ABC, KernelBaseModel, Generic[TState]):
     async def activate(self, state: "KernelProcessStepState[TState]"):
         """Activates the step and sets the state."""
         pass  # pragma: no cover
-
-    async def on_activate(self):
-        """To be overridden by subclasses if needed."""
-        pass  # pragma: no cover
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index d8d9f52e26f9..697cce70712e 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -1,5 +1,6 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import logging
 from collections.abc import Callable
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Annotated
@@ -30,6 +31,15 @@
     from semantic_kernel.services.ai_service_client_base import AIServiceClientBase
 
 
+def pytest_configure(config):
+    logging.basicConfig(level=logging.ERROR)
+    logging.getLogger("tests.utils").setLevel(logging.INFO)
+    logging.getLogger("openai").setLevel(logging.WARNING)
+    logging.getLogger("httpx").setLevel(logging.WARNING)
+    logging.getLogger("httpcore").setLevel(logging.WARNING)
+    logging.getLogger("semantic_kernel").setLevel(logging.INFO)
+
+
 @fixture(scope="function")
 def kernel() -> "Kernel":
     from semantic_kernel.kernel import Kernel
diff --git a/python/tests/integration/audio_to_text/audio_to_text_test_base.py b/python/tests/integration/audio_to_text/audio_to_text_test_base.py
index 8375b1b39a47..78b5ab78cf1c 100644
--- a/python/tests/integration/audio_to_text/audio_to_text_test_base.py
+++ b/python/tests/integration/audio_to_text/audio_to_text_test_base.py
@@ -6,7 +6,7 @@
 
 from semantic_kernel.connectors.ai.audio_to_text_client_base import AudioToTextClientBase
 from semantic_kernel.connectors.ai.open_ai import AzureAudioToText, OpenAIAudioToText
-from tests.integration.utils import is_service_setup_for_testing
+from tests.utils import is_service_setup_for_testing
 
 # There is only the whisper model available on Azure OpenAI for audio to text. And that model is
 # only available in the North Switzerland region. Therefore, the endpoint is different than the one
diff --git a/python/tests/integration/completions/chat_completion_test_base.py b/python/tests/integration/completions/chat_completion_test_base.py
index d94c7e61442d..1fe87415e865 100644
--- a/python/tests/integration/completions/chat_completion_test_base.py
+++ b/python/tests/integration/completions/chat_completion_test_base.py
@@ -38,7 +38,7 @@
 from semantic_kernel.kernel_pydantic import KernelBaseModel
 from semantic_kernel.utils.authentication.entra_id_authentication import get_entra_auth_token
 from tests.integration.completions.completion_test_base import CompletionTestBase, ServiceType
-from tests.integration.utils import is_service_setup_for_testing, is_test_running_on_supported_platforms
+from tests.utils import is_service_setup_for_testing, is_test_running_on_supported_platforms
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
diff --git a/python/tests/integration/completions/conftest.py b/python/tests/integration/completions/conftest.py
index 17a1e3968661..34731e4da958 100644
--- a/python/tests/integration/completions/conftest.py
+++ b/python/tests/integration/completions/conftest.py
@@ -1,9 +1,15 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import logging
 
 import pytest
 
 from semantic_kernel.kernel import Kernel
+from semantic_kernel.utils.logging import setup_logging
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+setup_logging()
 
 
 @pytest.fixture(scope="function")
diff --git a/python/tests/integration/completions/test_chat_completion_with_function_calling.py b/python/tests/integration/completions/test_chat_completion_with_function_calling.py
index f749f6db3768..76e759557e15 100644
--- a/python/tests/integration/completions/test_chat_completion_with_function_calling.py
+++ b/python/tests/integration/completions/test_chat_completion_with_function_calling.py
@@ -25,7 +25,7 @@
     vertex_ai_setup,
 )
 from tests.integration.completions.completion_test_base import ServiceType
-from tests.integration.utils import retry
+from tests.utils import retry
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
diff --git a/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py b/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py
index 4b82a75f8fab..137445495fd9 100644
--- a/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py
+++ b/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py
@@ -20,7 +20,7 @@
     vertex_ai_setup,
 )
 from tests.integration.completions.completion_test_base import ServiceType
-from tests.integration.utils import retry
+from tests.utils import retry
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
diff --git a/python/tests/integration/completions/test_chat_completions.py b/python/tests/integration/completions/test_chat_completions.py
index c78537a4f4c3..17ab67e12c83 100644
--- a/python/tests/integration/completions/test_chat_completions.py
+++ b/python/tests/integration/completions/test_chat_completions.py
@@ -21,7 +21,7 @@
     vertex_ai_setup,
 )
 from tests.integration.completions.completion_test_base import ServiceType
-from tests.integration.utils import retry
+from tests.utils import retry
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
diff --git a/python/tests/integration/completions/test_conversation_summary_plugin.py b/python/tests/integration/completions/test_conversation_summary_plugin.py
index 4cc47b47f4c5..5bd80fbe445a 100644
--- a/python/tests/integration/completions/test_conversation_summary_plugin.py
+++ b/python/tests/integration/completions/test_conversation_summary_plugin.py
@@ -7,7 +7,7 @@
 from semantic_kernel.core_plugins.conversation_summary_plugin import ConversationSummaryPlugin
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 from semantic_kernel.prompt_template.prompt_template_config import PromptTemplateConfig
-from tests.integration.utils import retry
+from tests.utils import retry
 
 
 @pytest.mark.asyncio
diff --git a/python/tests/integration/completions/test_text_completion.py b/python/tests/integration/completions/test_text_completion.py
index 9a8e60d2d0b6..c4c8058a9ab0 100644
--- a/python/tests/integration/completions/test_text_completion.py
+++ b/python/tests/integration/completions/test_text_completion.py
@@ -32,7 +32,7 @@
 from semantic_kernel import Kernel
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from tests.integration.completions.completion_test_base import CompletionTestBase, ServiceType
-from tests.integration.utils import is_service_setup_for_testing, is_test_running_on_supported_platforms, retry
+from tests.utils import is_service_setup_for_testing, is_test_running_on_supported_platforms, retry
 
 ollama_setup: bool = is_service_setup_for_testing(["OLLAMA_TEXT_MODEL_ID"]) and is_test_running_on_supported_platforms([
     "Linux"
diff --git a/python/tests/integration/embeddings/test_embedding_service_base.py b/python/tests/integration/embeddings/test_embedding_service_base.py
index d88706c17167..4bb68b8729fd 100644
--- a/python/tests/integration/embeddings/test_embedding_service_base.py
+++ b/python/tests/integration/embeddings/test_embedding_service_base.py
@@ -30,7 +30,7 @@
 )
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.utils.authentication.entra_id_authentication import get_entra_auth_token
-from tests.integration.utils import is_service_setup_for_testing, is_test_running_on_supported_platforms
+from tests.utils import is_service_setup_for_testing, is_test_running_on_supported_platforms
 
 # Make sure all services are setup for before running the tests
 # The following exceptions apply:
diff --git a/python/tests/integration/memory/memory_stores/test_astradb_memory_store.py b/python/tests/integration/memory/memory_stores/test_astradb_memory_store.py
index 3bd91945e880..39ccf99fc09a 100644
--- a/python/tests/integration/memory/memory_stores/test_astradb_memory_store.py
+++ b/python/tests/integration/memory/memory_stores/test_astradb_memory_store.py
@@ -8,7 +8,7 @@
 
 from semantic_kernel.connectors.memory.astradb import AstraDBMemoryStore
 from semantic_kernel.connectors.memory.astradb.astradb_settings import AstraDBSettings
-from tests.integration.utils import retry
+from tests.utils import retry
 
 astradb_installed: bool
 try:
diff --git a/python/tests/integration/memory/memory_stores/test_pinecone_memory_store.py b/python/tests/integration/memory/memory_stores/test_pinecone_memory_store.py
index c4c4d9b6b8ea..58c58bd9724b 100644
--- a/python/tests/integration/memory/memory_stores/test_pinecone_memory_store.py
+++ b/python/tests/integration/memory/memory_stores/test_pinecone_memory_store.py
@@ -11,7 +11,7 @@
 from semantic_kernel.connectors.memory.pinecone.pinecone_settings import PineconeSettings
 from semantic_kernel.exceptions.service_exceptions import ServiceResourceNotFoundError
 from semantic_kernel.memory.memory_record import MemoryRecord
-from tests.integration.utils import retry
+from tests.utils import retry
 
 pinecone_installed = importlib.util.find_spec("pinecone") is not None
 pytestmark = pytest.mark.skipif(not pinecone_installed, reason="pinecone is not installed")
diff --git a/python/tests/integration/memory/vector_stores/azure_cosmos_db/test_azure_cosmos_db_no_sql.py b/python/tests/integration/memory/vector_stores/azure_cosmos_db/test_azure_cosmos_db_no_sql.py
index c278304077aa..51675b06831b 100644
--- a/python/tests/integration/memory/vector_stores/azure_cosmos_db/test_azure_cosmos_db_no_sql.py
+++ b/python/tests/integration/memory/vector_stores/azure_cosmos_db/test_azure_cosmos_db_no_sql.py
@@ -31,24 +31,23 @@ async def test_list_collection_names(
         data_model_type: type,
     ):
         """Test list collection names."""
-        store = stores["azure_cosmos_db_no_sql"]
-
-        assert await store.list_collection_names() == []
+        async with stores["azure_cosmos_db_no_sql"] as store:
+            assert await store.list_collection_names() == []
 
-        collection_name = "list_collection_names"
-        collection = store.get_collection(collection_name, data_model_type)
-        await collection.create_collection()
+            collection_name = "list_collection_names"
+            collection = store.get_collection(collection_name, data_model_type)
+            await collection.create_collection()
 
-        collection_names = await store.list_collection_names()
-        assert collection_name in collection_names
+            collection_names = await store.list_collection_names()
+            assert collection_name in collection_names
 
-        await collection.delete_collection()
-        assert await collection.does_collection_exist() is False
-        collection_names = await store.list_collection_names()
-        assert collection_name not in collection_names
+            await collection.delete_collection()
+            assert await collection.does_collection_exist() is False
+            collection_names = await store.list_collection_names()
+            assert collection_name not in collection_names
 
-        # Deleting the collection doesn't remove it from the vector_record_collections list in the store
-        assert collection_name in store.vector_record_collections
+            # Deleting the collection doesn't remove it from the vector_record_collections list in the store
+            assert collection_name in store.vector_record_collections
 
     @pytest.mark.asyncio
     async def test_collection_not_created(
@@ -58,27 +57,27 @@ async def test_collection_not_created(
         data_record: dict[str, Any],
     ):
         """Test get without collection."""
-        store = stores["azure_cosmos_db_no_sql"]
-        collection_name = "collection_not_created"
-        collection = store.get_collection(collection_name, data_model_type)
+        async with stores["azure_cosmos_db_no_sql"] as store:
+            collection_name = "collection_not_created"
+            collection = store.get_collection(collection_name, data_model_type)
 
-        assert await collection.does_collection_exist() is False
+            assert await collection.does_collection_exist() is False
 
-        with pytest.raises(
-            MemoryConnectorException, match="The collection does not exist yet. Create the collection first."
-        ):
-            await collection.upsert(data_model_type(**data_record))
+            with pytest.raises(
+                MemoryConnectorException, match="The collection does not exist yet. Create the collection first."
+            ):
+                await collection.upsert(data_model_type(**data_record))
 
-        with pytest.raises(
-            MemoryConnectorException, match="The collection does not exist yet. Create the collection first."
-        ):
-            await collection.get(data_record["id"])
+            with pytest.raises(
+                MemoryConnectorException, match="The collection does not exist yet. Create the collection first."
+            ):
+                await collection.get(data_record["id"])
 
-        with pytest.raises(MemoryConnectorException):
-            await collection.delete(data_record["id"])
+            with pytest.raises(MemoryConnectorException):
+                await collection.delete(data_record["id"])
 
-        with pytest.raises(MemoryConnectorException, match="Container could not be deleted."):
-            await collection.delete_collection()
+            with pytest.raises(MemoryConnectorException, match="Container could not be deleted."):
+                await collection.delete_collection()
 
     @pytest.mark.asyncio
     async def test_custom_partition_key(
@@ -88,33 +87,35 @@ async def test_custom_partition_key(
         data_record: dict[str, Any],
     ):
         """Test custom partition key."""
-        store = stores["azure_cosmos_db_no_sql"]
-        collection_name = "custom_partition_key"
-        collection = store.get_collection(
-            collection_name,
-            data_model_type,
-            partition_key=PartitionKey(path="/product_type"),
-        )
-
-        composite_key = AzureCosmosDBNoSQLCompositeKey(key=data_record["id"], partition_key=data_record["product_type"])
-
-        # Upsert
-        await collection.create_collection()
-        await collection.upsert(data_model_type(**data_record))
-
-        # Verify
-        record = await collection.get(composite_key)
-        assert record is not None
-        assert isinstance(record, data_model_type)
-
-        # Remove
-        await collection.delete(composite_key)
-        record = await collection.get(composite_key)
-        assert record is None
-
-        # Remove collection
-        await collection.delete_collection()
-        assert await collection.does_collection_exist() is False
+        async with stores["azure_cosmos_db_no_sql"] as store:
+            collection_name = "custom_partition_key"
+            collection = store.get_collection(
+                collection_name,
+                data_model_type,
+                partition_key=PartitionKey(path="/product_type"),
+            )
+
+            composite_key = AzureCosmosDBNoSQLCompositeKey(
+                key=data_record["id"], partition_key=data_record["product_type"]
+            )
+
+            # Upsert
+            await collection.create_collection()
+            await collection.upsert(data_model_type(**data_record))
+
+            # Verify
+            record = await collection.get(composite_key)
+            assert record is not None
+            assert isinstance(record, data_model_type)
+
+            # Remove
+            await collection.delete(composite_key)
+            record = await collection.get(composite_key)
+            assert record is None
+
+            # Remove collection
+            await collection.delete_collection()
+            assert await collection.does_collection_exist() is False
 
     @pytest.mark.asyncio
     async def test_get_include_vector(
@@ -124,28 +125,28 @@ async def test_get_include_vector(
         data_record: dict[str, Any],
     ):
         """Test get with include_vector."""
-        store = stores["azure_cosmos_db_no_sql"]
-        collection_name = "get_include_vector"
-        collection = store.get_collection(collection_name, data_model_type)
+        async with stores["azure_cosmos_db_no_sql"] as store:
+            collection_name = "get_include_vector"
+            collection = store.get_collection(collection_name, data_model_type)
 
-        # Upsert
-        await collection.create_collection()
-        await collection.upsert(data_model_type(**data_record))
+            # Upsert
+            await collection.create_collection()
+            await collection.upsert(data_model_type(**data_record))
 
-        # Verify
-        record = await collection.get(data_record["id"], include_vectors=True)
-        assert record is not None
-        assert isinstance(record, data_model_type)
-        assert record.vector == data_record["vector"]
+            # Verify
+            record = await collection.get(data_record["id"], include_vectors=True)
+            assert record is not None
+            assert isinstance(record, data_model_type)
+            assert record.vector == data_record["vector"]
 
-        # Remove
-        await collection.delete(data_record["id"])
-        record = await collection.get(data_record["id"])
-        assert record is None
+            # Remove
+            await collection.delete(data_record["id"])
+            record = await collection.get(data_record["id"])
+            assert record is None
 
-        # Remove collection
-        await collection.delete_collection()
-        assert await collection.does_collection_exist() is False
+            # Remove collection
+            await collection.delete_collection()
+            assert await collection.does_collection_exist() is False
 
     @pytest.mark.asyncio
     async def test_get_not_include_vector(
@@ -155,28 +156,28 @@ async def test_get_not_include_vector(
         data_record: dict[str, Any],
     ):
         """Test get with include_vector."""
-        store = stores["azure_cosmos_db_no_sql"]
-        collection_name = "get_not_include_vector"
-        collection = store.get_collection(collection_name, data_model_type)
+        async with stores["azure_cosmos_db_no_sql"] as store:
+            collection_name = "get_not_include_vector"
+            collection = store.get_collection(collection_name, data_model_type)
 
-        # Upsert
-        await collection.create_collection()
-        await collection.upsert(data_model_type(**data_record))
+            # Upsert
+            await collection.create_collection()
+            await collection.upsert(data_model_type(**data_record))
 
-        # Verify
-        record = await collection.get(data_record["id"], include_vectors=False)
-        assert record is not None
-        assert isinstance(record, data_model_type)
-        assert record.vector is None
+            # Verify
+            record = await collection.get(data_record["id"], include_vectors=False)
+            assert record is not None
+            assert isinstance(record, data_model_type)
+            assert record.vector is None
 
-        # Remove
-        await collection.delete(data_record["id"])
-        record = await collection.get(data_record["id"])
-        assert record is None
+            # Remove
+            await collection.delete(data_record["id"])
+            record = await collection.get(data_record["id"])
+            assert record is None
 
-        # Remove collection
-        await collection.delete_collection()
-        assert await collection.does_collection_exist() is False
+            # Remove collection
+            await collection.delete_collection()
+            assert await collection.does_collection_exist() is False
 
     @pytest.mark.asyncio
     async def test_collection_with_key_as_key_field(
@@ -186,29 +187,29 @@ async def test_collection_with_key_as_key_field(
         data_record_with_key_as_key_field: dict[str, Any],
     ):
         """Test collection with key as key field."""
-        store = stores["azure_cosmos_db_no_sql"]
-        collection_name = "collection_with_key_as_key_field"
-        collection = store.get_collection(collection_name, data_model_type_with_key_as_key_field)
-
-        # Upsert
-        await collection.create_collection()
-        result = await collection.upsert(data_model_type_with_key_as_key_field(**data_record_with_key_as_key_field))
-        assert data_record_with_key_as_key_field["key"] == result
-
-        # Verify
-        record = await collection.get(data_record_with_key_as_key_field["key"])
-        assert record is not None
-        assert isinstance(record, data_model_type_with_key_as_key_field)
-        assert record.key == data_record_with_key_as_key_field["key"]
-
-        # Remove
-        await collection.delete(data_record_with_key_as_key_field["key"])
-        record = await collection.get(data_record_with_key_as_key_field["key"])
-        assert record is None
-
-        # Remove collection
-        await collection.delete_collection()
-        assert await collection.does_collection_exist() is False
+        async with stores["azure_cosmos_db_no_sql"] as store:
+            collection_name = "collection_with_key_as_key_field"
+            collection = store.get_collection(collection_name, data_model_type_with_key_as_key_field)
+
+            # Upsert
+            await collection.create_collection()
+            result = await collection.upsert(data_model_type_with_key_as_key_field(**data_record_with_key_as_key_field))
+            assert data_record_with_key_as_key_field["key"] == result
+
+            # Verify
+            record = await collection.get(data_record_with_key_as_key_field["key"])
+            assert record is not None
+            assert isinstance(record, data_model_type_with_key_as_key_field)
+            assert record.key == data_record_with_key_as_key_field["key"]
+
+            # Remove
+            await collection.delete(data_record_with_key_as_key_field["key"])
+            record = await collection.get(data_record_with_key_as_key_field["key"])
+            assert record is None
+
+            # Remove collection
+            await collection.delete_collection()
+            assert await collection.does_collection_exist() is False
 
     @pytest.mark.asyncio
     async def test_custom_client(
@@ -219,13 +220,14 @@ async def test_custom_client(
         url = os.environ.get("AZURE_COSMOS_DB_NO_SQL_URL")
         key = os.environ.get("AZURE_COSMOS_DB_NO_SQL_KEY")
 
-        async with CosmosClient(url, key) as custom_client:
-            store = AzureCosmosDBNoSQLStore(
+        async with (
+            CosmosClient(url, key) as custom_client,
+            AzureCosmosDBNoSQLStore(
                 database_name="test_database",
                 cosmos_client=custom_client,
                 create_database=True,
-            )
-
+            ) as store,
+        ):
             assert await store.list_collection_names() == []
 
             collection_name = "list_collection_names"
diff --git a/python/tests/integration/text_to_audio/text_to_audio_test_base.py b/python/tests/integration/text_to_audio/text_to_audio_test_base.py
index 2ad5bd11df76..167fd9c332a0 100644
--- a/python/tests/integration/text_to_audio/text_to_audio_test_base.py
+++ b/python/tests/integration/text_to_audio/text_to_audio_test_base.py
@@ -6,7 +6,7 @@
 
 from semantic_kernel.connectors.ai.open_ai import AzureTextToAudio, OpenAITextToAudio
 from semantic_kernel.connectors.ai.text_to_audio_client_base import TextToAudioClientBase
-from tests.integration.utils import is_service_setup_for_testing
+from tests.utils import is_service_setup_for_testing
 
 # TTS model on Azure model is not available in regions at which we have chat completion models.
 # Therefore, we need to use a different endpoint for testing.
diff --git a/python/tests/samples/samples_utils.py b/python/tests/samples/samples_utils.py
deleted file mode 100644
index de2b8257e7b7..000000000000
--- a/python/tests/samples/samples_utils.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-import logging
-
-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger()
-
-
-async def retry(func, reset=None, max_retries=3):
-    """Retry a function a number of times before raising an exception.
-
-    args:
-        func: the async function to retry (required)
-        reset: a function to reset the state of any variables used in the function (optional)
-        max_retries: the number of times to retry the function before raising an exception (optional)
-    """
-    attempt = 0
-    while attempt < max_retries:
-        try:
-            if reset:
-                reset()
-            await func()
-            break
-        except Exception as e:
-            attempt += 1
-            logger.error(f"Attempt {attempt} for {func.__name__} failed: {e}")
-            if attempt == max_retries:
-                logger.error(f"All {max_retries} attempts for {func.__name__} failed")
-                raise e
-            await asyncio.sleep(1)
diff --git a/python/tests/samples/test_concepts.py b/python/tests/samples/test_concepts.py
index abce5d4018f8..d3ae4646dbae 100644
--- a/python/tests/samples/test_concepts.py
+++ b/python/tests/samples/test_concepts.py
@@ -1,6 +1,9 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import copy
+import os
+from collections.abc import Awaitable, Callable
+from typing import Any
 
 import pytest
 from pytest import mark, param
@@ -54,39 +57,149 @@
 from samples.getting_started_with_agents.step2_plugins import main as step2_plugins
 from samples.getting_started_with_agents.step3_chat import main as step3_chat
 from samples.getting_started_with_agents.step7_assistant import main as step7_assistant
-from tests.samples.samples_utils import retry
+from tests.utils import retry
+
+# These environment variable names are used to control which samples are run during integration testing.
+# This has to do with the setup of the tests and the services they depend on.
+COMPLETIONS_CONCEPT_SAMPLE = "COMPLETIONS_CONCEPT_SAMPLE"
+MEMORY_CONCEPT_SAMPLE = "MEMORY_CONCEPT_SAMPLE"
 
 concepts = [
-    param(chat_gpt_api_function_calling, ["What is 3+3?", "exit"], id="chat_gpt_api_function_calling"),
-    param(simple_chatbot, ["Why is the sky blue in one sentence?", "exit"], id="simple_chatbot"),
-    param(simple_chatbot_streaming, ["Why is the sky blue in one sentence?", "exit"], id="simple_chatbot_streaming"),
-    param(simple_chatbot_with_image, ["exit"], id="simple_chatbot_with_image"),
+    param(
+        simple_chatbot,
+        ["Why is the sky blue in one sentence?", "exit"],
+        id="simple_chatbot",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        simple_chatbot_streaming,
+        ["Why is the sky blue in one sentence?", "exit"],
+        id="simple_chatbot_streaming",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        simple_chatbot_with_image,
+        ["exit"],
+        id="simple_chatbot_with_image",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
     param(
         simple_chatbot_logit_bias,
         ["Who has the most career points in NBA history?", "exit"],
         id="simple_chatbot_logit_bias",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
     ),
     param(
         simple_chatbot_kernel_function,
         ["Why is the sky blue in one sentence?", "exit"],
         id="simple_chatbot_kernel_function",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        chat_gpt_api_function_calling,
+        ["What is 3+3?", "exit"],
+        id="chat_gpt_api_function_calling",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        auto_function_invoke_filters,
+        ["What is 3+3?", "exit"],
+        id="auto_function_invoke_filters",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        function_invocation_filters,
+        ["What is 3+3?", "exit"],
+        id="function_invocation_filters",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        function_invocation_filters_stream,
+        ["What is 3+3?", "exit"],
+        id="function_invocation_filters_stream",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        prompt_filters,
+        ["What is the fastest animal?", "exit"],
+        id="prompt_filters",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        kernel_arguments,
+        [],
+        id="kernel_arguments",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        grounded,
+        [],
+        id="grounded",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        azure_openai_function_calling_stepwise_planner,
+        [],
+        id="azure_openai_function_calling_stepwise_planner",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        openai_function_calling_stepwise_planner,
+        [],
+        id="openai_function_calling_stepwise_planner",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        sequential_planner,
+        [],
+        id="sequential_planner",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        openai_function_calling_with_custom_plugin,
+        [],
+        id="openai_function_calling_with_custom_plugin",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
     ),
-    param(auto_function_invoke_filters, ["What is 3+3?", "exit"], id="auto_function_invoke_filters"),
-    param(function_invocation_filters, ["What is 3+3?", "exit"], id="function_invocation_filters"),
-    param(function_invocation_filters_stream, ["What is 3+3?", "exit"], id="function_invocation_filters_stream"),
-    param(prompt_filters, ["What is the fastest animal?", "exit"], id="prompt_filters"),
-    param(kernel_arguments, [], id="kernel_arguments"),
-    param(grounded, [], id="grounded"),
-    param(azure_cognitive_search_memory, [], id="azure_cognitive_search_memory"),
-    param(memory, ["What are my investments?", "exit"], id="memory"),
-    param(azure_openai_function_calling_stepwise_planner, [], id="azure_openai_function_calling_stepwise_planner"),
-    param(openai_function_calling_stepwise_planner, [], id="openai_function_calling_stepwise_planner"),
-    param(sequential_planner, [], id="sequential_planner"),
-    param(openai_function_calling_with_custom_plugin, [], id="openai_function_calling_with_custom_plugin"),
     param(
         openai_plugin_azure_key_vault,
         ["Create a secret with the name 'Foo' and value 'Bar'", "exit"],
         id="openai_plugin_azure_key_vault",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
     ),
     param(
         openai_plugin_klarna,
@@ -96,12 +209,66 @@
             reason="Temporarily: https://www.klarna.com/us/shopping/public/openai/v0/api-docs/ returns 404"
         ),
     ),
-    param(plugins_from_dir, [], id="plugins_from_dir"),
-    param(azure_chat_gpt_api_handlebars, ["What is 3+3?", "exit"], id="azure_chat_gpt_api_handlebars"),
-    param(azure_chat_gpt_api_jinja2, ["What is 3+3?", "exit"], id="azure_chat_gpt_api_jinja2"),
-    param(configuring_prompts, ["What is my name?", "exit"], id="configuring_prompts"),
-    param(load_yaml_prompt, [], id="load_yaml_prompt"),
-    param(template_language, [], id="template_language"),
+    param(
+        plugins_from_dir,
+        [],
+        id="plugins_from_dir",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        azure_chat_gpt_api_handlebars,
+        ["What is 3+3?", "exit"],
+        id="azure_chat_gpt_api_handlebars",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        azure_chat_gpt_api_jinja2,
+        ["What is 3+3?", "exit"],
+        id="azure_chat_gpt_api_jinja2",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        configuring_prompts,
+        ["What is my name?", "exit"],
+        id="configuring_prompts",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        load_yaml_prompt,
+        [],
+        id="load_yaml_prompt",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        template_language,
+        [],
+        id="template_language",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        azure_cognitive_search_memory,
+        [],
+        id="azure_cognitive_search_memory",
+        marks=pytest.mark.skipif(os.getenv(MEMORY_CONCEPT_SAMPLE, None) is None, reason="Not running memory samples."),
+    ),
+    param(
+        memory,
+        ["What are my investments?", "exit"],
+        id="memory",
+        marks=pytest.mark.skipif(os.getenv(MEMORY_CONCEPT_SAMPLE, None) is None, reason="Not running memory samples."),
+    ),
     param(rag_with_text_memory_plugin, [], id="rag_with_text_memory_plugin"),
     param(
         bing_search_plugin,
@@ -109,13 +276,62 @@
         id="bing_search_plugin",
         marks=pytest.mark.skip(reason="Flaky test due to Azure OpenAI content policy"),
     ),
-    param(custom_service_selector, [], id="custom_service_selector"),
-    param(function_defined_in_json_prompt, ["What is 3+3?", "exit"], id="function_defined_in_json_prompt"),
-    param(function_defined_in_yaml_prompt, ["What is 3+3?", "exit"], id="function_defined_in_yaml_prompt"),
-    param(step1_agent, [], id="step1_agent"),
-    param(step2_plugins, [], id="step2_agent_plugins"),
-    param(step3_chat, [], id="step3_chat"),
-    param(step7_assistant, [], id="step7_assistant"),
+    param(
+        custom_service_selector,
+        [],
+        id="custom_service_selector",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        function_defined_in_json_prompt,
+        ["What is 3+3?", "exit"],
+        id="function_defined_in_json_prompt",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        function_defined_in_yaml_prompt,
+        ["What is 3+3?", "exit"],
+        id="function_defined_in_yaml_prompt",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        step1_agent,
+        [],
+        id="step1_agent",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        step2_plugins,
+        [],
+        id="step2_agent_plugins",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        step3_chat,
+        [],
+        id="step3_chat",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        step7_assistant,
+        [],
+        id="step7_assistant",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
     param(
         ollama_chat_completion,
         ["Why is the sky blue?", "exit"],
@@ -134,13 +350,20 @@
         id="lm_studio_text_embedding",
         marks=pytest.mark.skip(reason="Need to set up LM Studio locally. Check out the module for more details."),
     ),
-    param(image_generation, [], id="image_generation"),
+    param(
+        image_generation,
+        [],
+        id="image_generation",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
 ]
 
 
 @mark.asyncio
-@mark.parametrize("func, responses", concepts)
-async def test_concepts(func, responses, monkeypatch):
+@mark.parametrize("sample, responses", concepts)
+async def test_concepts(sample: Callable[..., Awaitable[Any]], responses: list[str], monkeypatch):
     saved_responses = copy.deepcopy(responses)
 
     def reset():
@@ -148,4 +371,4 @@ def reset():
         responses.extend(saved_responses)
 
     monkeypatch.setattr("builtins.input", lambda _: responses.pop(0))
-    await retry(lambda: func(), reset=reset)
+    await retry(sample, retries=3, reset=reset)
diff --git a/python/tests/samples/test_learn_resources.py b/python/tests/samples/test_learn_resources.py
index 428515d30f35..43d69d3907e8 100644
--- a/python/tests/samples/test_learn_resources.py
+++ b/python/tests/samples/test_learn_resources.py
@@ -14,7 +14,7 @@
 from samples.learn_resources.templates import main as templates
 from samples.learn_resources.using_the_kernel import main as using_the_kernel
 from samples.learn_resources.your_first_prompt import main as your_first_prompt
-from tests.samples.samples_utils import retry
+from tests.utils import retry
 
 
 @mark.asyncio
diff --git a/python/tests/integration/utils.py b/python/tests/utils.py
similarity index 63%
rename from python/tests/integration/utils.py
rename to python/tests/utils.py
index 1af9e94bf4cd..a8397f1398fc 100644
--- a/python/tests/integration/utils.py
+++ b/python/tests/utils.py
@@ -4,22 +4,40 @@
 import logging
 import os
 import platform
+from collections.abc import Awaitable, Callable
+from typing import Any
 
-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger()
+logger = logging.getLogger(__name__)
 
 
-async def retry(func, retries=20):
-    """Retry the function if it raises an exception."""
+async def retry(
+    func: Callable[..., Awaitable[Any]],
+    retries: int = 20,
+    reset: Callable[..., None] | None = None,
+):
+    """Retry the function if it raises an exception.
+
+    Args:
+        func (function): The function to retry.
+        retries (int): Number of retries.
+        reset (function): Function to reset the state of any variables used in the function
+
+    """
+    logger.info(f"Running {retries} retries with func: {func.__module__}")
     for i in range(retries):
+        logger.info(f"   Try {i + 1} for {func.__module__}")
         try:
+            if reset:
+                reset()
             return await func()
         except Exception as e:
-            logger.error(f"Retry {i + 1}: {e}")
+            logger.info(f"   On try {i + 1} got this error: {e}")
             if i == retries - 1:  # Last retry
                 raise
             # Binary exponential backoff
-            await asyncio.sleep(2**i)
+            backoff = 2**i
+            logger.info(f"   Sleeping for {backoff} seconds before retrying")
+            await asyncio.sleep(backoff)
     return None