GoogleCloudPlatform · alpha-amundson · May 3, 2024 · May 3, 2024 · May 6, 2024 · May 6, 2024
diff --git a/applications/rag/frontend/container/Dockerfile b/applications/rag/frontend/container/Dockerfile
@@ -19,4 +19,9 @@ WORKDIR /workspace/frontend
 
 RUN pip install -r requirements.txt
 
-CMD ["python", "main.py"]
+EXPOSE 8080
+
+ENV FLASK_APP=/workspace/frontend/main.py
+ENV PYTHONPATH=.
+# Run the application with Gunicorn
+CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:8080", "main:app"]
diff --git a/applications/rag/frontend/container/__init__.py b/applications/rag/frontend/container/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/applications/rag/frontend/container/application/__init__.py b/applications/rag/frontend/container/application/__init__.py
@@ -0,0 +1,24 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+from flask import Flask
+
+def create_app():
+    app = Flask(__name__, static_folder='static', template_folder='templates')
+    app.jinja_env.trim_blocks = True
+    app.jinja_env.lstrip_blocks = True
+    app.config['SECRET_KEY'] = os.environ.get("SECRET_KEY")
+
+    return app
diff --git a/.../frontend/container/cloud_sql/__init__.py → ...ntainer/application/cloud_sql/__init__.py b/.../frontend/container/cloud_sql/__init__.py → ...ntainer/application/cloud_sql/__init__.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# This file is required to make Python treat the subfolder as a package
+# This file is required to make Python treat the subfolder as a package
diff --git a/applications/rag/frontend/container/application/cloud_sql/cloud_sql.py b/applications/rag/frontend/container/application/cloud_sql/cloud_sql.py
@@ -0,0 +1,92 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import logging
+
+from google.cloud.sql.connector import IPTypes
+
+from langchain_google_cloud_sql_pg import PostgresEngine, PostgresVectorStore
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+ENVIRONMENT = os.environ.get("ENVIRONMENT")
+
+GCP_PROJECT_ID = os.environ.get("PROJECT_ID")
+GCP_CLOUD_SQL_REGION = os.environ.get("CLOUDSQL_INSTANCE_REGION")
+GCP_CLOUD_SQL_INSTANCE = os.environ.get("CLOUDSQL_INSTANCE")
+
+INSTANCE_CONNECTION_NAME = (
+    f"{GCP_PROJECT_ID}:{GCP_CLOUD_SQL_REGION}:{GCP_CLOUD_SQL_INSTANCE}"
+)
+
+DB_NAME = os.environ.get("DB_NAME", "pgvector-database")
+VECTOR_EMBEDDINGS_TABLE_NAME = os.environ.get("EMBEDDINGS_TABLE_NAME", "")
+CHAT_HISTORY_TABLE_NAME = os.environ.get("CHAT_HISTORY_TABLE_NAME", "message_store")
+
+VECTOR_DIMENSION = os.environ.get("VECTOR_DIMENSION", 384)
+
+try:
+    db_username_file = open("/etc/secret-volume/username", "r")
+    DB_USER = db_username_file.read()
+    db_username_file.close()
+
+    db_password_file = open("/etc/secret-volume/password", "r")
+    DB_PASS = db_password_file.read()
+    db_password_file.close()
+except:
+    DB_USER = os.environ.get("DB_USERNAME", "postgres")
+    DB_PASS = os.environ.get("DB_PASS", "postgres")
+
+
+def create_sync_postgres_engine():
+    engine = PostgresEngine.from_instance(
+        project_id=GCP_PROJECT_ID,
+        region=GCP_CLOUD_SQL_REGION,
+        instance=GCP_CLOUD_SQL_INSTANCE,
+        database=DB_NAME,
+        user=DB_USER,
+        password=DB_PASS,
+        ip_type=IPTypes.PUBLIC if ENVIRONMENT == "development" else IPTypes.PRIVATE,
+    )
+    try:
+        engine.init_chat_history_table(table_name=CHAT_HISTORY_TABLE_NAME)
+        engine.init_vectorstore_table(
+            VECTOR_EMBEDDINGS_TABLE_NAME,
+            vector_size=VECTOR_DIMENSION,
+            overwrite_existing=False,
+        )
+    except Exception as e:
+        logging.info(f"Error: {e}")
+
+    return engine
+
+
+def create_sync_postgres_vector_store(engine, embedding_provider):
+    vector_store = PostgresVectorStore.create_sync(
+        engine=engine,
+        embedding_service=embedding_provider,
+        table_name=VECTOR_EMBEDDINGS_TABLE_NAME,
+    )
+
+    return vector_store
diff --git a/applications/rag/frontend/container/application/models/__init__.py b/applications/rag/frontend/container/application/models/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .vector_embeddings import VectorEmbeddings
+
+__all__ = ["VectorEmbeddings"]
diff --git a/applications/rag/frontend/container/application/models/vector_embeddings.py b/applications/rag/frontend/container/application/models/vector_embeddings.py
@@ -0,0 +1,31 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from sqlalchemy import Column, String, Text
+from sqlalchemy.orm import mapped_column, declarative_base
+from pgvector.sqlalchemy import Vector
+
+Base = declarative_base()
+
+VECTOR_EMBEDDINGS_TABLE_NAME = os.environ.get("EMBEDDINGS_TABLE_NAME", "")
+
+
+class VectorEmbeddings(Base):
+    __tablename__ = VECTOR_EMBEDDINGS_TABLE_NAME
+
+    id = Column(String(255), primary_key=True)
+    text = Column(Text)
+    text_embedding = mapped_column(Vector(384))
diff --git a/applications/rag/frontend/container/application/rag_langchain/__init__.py b/applications/rag/frontend/container/application/rag_langchain/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/applications/rag/frontend/container/application/rag_langchain/huggingface_inference_model.py b/applications/rag/frontend/container/application/rag_langchain/huggingface_inference_model.py
@@ -0,0 +1,131 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import logging
+from typing import Any, Dict, Iterator, List, Optional
+
+from langchain_core.callbacks.manager import CallbackManagerForLLMRun
+from langchain_core.language_models.llms import LLM
+from langchain_core.outputs import GenerationChunk
+
+from application.utils import post_request
+
+INFERENCE_ENDPOINT = os.environ.get("INFERENCE_ENDPOINT", "127.0.0.1:8081")
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+
+class HuggingFaceCustomChatModel(LLM):
+    """A custom chat model that calls to an HuggingFace TGI API and returns the generated
+    content based on the given message.
+
+    Example:
+
+        .. code-block:: python
+
+            model = HuggingFaceCustomChatModel()
+            result = model.invoke([HumanMessage(content="hello")])
+            result = model.batch([[HumanMessage(content="hello")],
+                                 [HumanMessage(content="world")]])
+    """
+
+    def _call(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        """Run the LLM on the given input.
+        Args:
+            prompt: The prompt to generate from.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of the stop substrings.
+                If stop tokens are not supported consider raising NotImplementedError.
+            run_manager: Callback manager for the run.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            The model output as a string.
+        """
+        if stop is not None:
+            raise ValueError("stop kwargs are not permitted.")
+
+        api_endpoint = f"http://{INFERENCE_ENDPOINT}/generate"
+        body = {"inputs": prompt}
+        headers = {"Content-Type": "application/json"}
+        generated_output = post_request(api_endpoint, body, headers)
+        generated_text = generated_output.get("generated_text", "")
+
+        return generated_text
+
+    def _stream(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[GenerationChunk]:
+        """Stream the LLM on the given prompt.
+
+        This method should be overridden by subclasses that support streaming.
+
+        If not implemented, the default behavior of calls to stream will be to
+        fallback to the non-streaming version of the model and return
+        the output as a single chunk.
+
+        Args:
+            prompt: The prompt to generate from.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            run_manager: Callback manager for the run.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            An iterator of GenerationChunks.
+        """
+        api_endpoint = f"http://{INFERENCE_ENDPOINT}/generate_stream"
+        body = {"inputs": prompt}
+        headers = {"Content-Type": "application/json"}
+        logging.info("Calling external model")
+        generated_output = post_request(api_endpoint, body, headers)
+        generated_text = generated_output.get("generated_text", "")
+
+        for char in generated_text:
+            chunk = GenerationChunk(text=char)
+            if run_manager:
+                run_manager.on_llm_new_token(chunk.text, chunk=chunk)
+
+            yield chunk
+
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        """Return a dictionary of identifying parameters."""
+        return {
+            # The model name allows users to specify custom token counting
+            # rules in LLM monitoring applications (e.g., in LangSmith users
+            # can provide per token pricing for their model and monitor
+            # costs for the given LLM.)
+            "model_name": "HuggingFaceTGI",
+        }
+
+    @property
+    def _llm_type(self) -> str:
+        """Get the type of language model used by this chat model. Used for logging purposes only."""
+        return "custom"