diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 89db6533b..dfbd6db10 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -143,9 +143,6 @@ jobs: pip install . pip install -r requirements/dev.txt - - name: Format check - run: | - make format - name: Lint check run: make lint - name: Type check diff --git a/docs/howtos/integrations/opik.ipynb b/docs/howtos/integrations/opik.ipynb index 73cf4d573..6e7fb1d91 100644 --- a/docs/howtos/integrations/opik.ipynb +++ b/docs/howtos/integrations/opik.ipynb @@ -30,7 +30,9 @@ "import getpass\n", "\n", "os.environ[\"OPIK_API_KEY\"] = getpass.getpass(\"Opik API Key: \")\n", - "os.environ[\"OPIK_WORKSPACE\"] = input(\"Comet workspace (often the same as your username): \")" + "os.environ[\"OPIK_WORKSPACE\"] = input(\n", + " \"Comet workspace (often the same as your username): \"\n", + ")" ] }, { @@ -151,6 +153,7 @@ "import asyncio\n", "from ragas.integrations.opik import OpikTracer\n", "\n", + "\n", "# Define the scoring function\n", "def compute_metric(opik_tracer, metric, row):\n", " async def get_score(opik_tracer, metric, row):\n", @@ -162,16 +165,17 @@ " result = loop.run_until_complete(get_score(opik_tracer, metric, row))\n", " return result\n", "\n", + "\n", "# Score a simple example\n", "row = {\n", " \"question\": \"What is the capital of France?\",\n", " \"answer\": \"Paris\",\n", - " \"contexts\": [\"Paris is the capital of France.\", \"Paris is in France.\"]\n", + " \"contexts\": [\"Paris is the capital of France.\", \"Paris is in France.\"],\n", "}\n", "\n", "opik_tracer = OpikTracer()\n", "score = compute_metric(opik_tracer, answer_relevancy_metric, row)\n", - "print(\"Answer Relevancy score:\", score)\n" + "print(\"Answer Relevancy score:\", score)" ] }, { @@ -207,16 +211,19 @@ "from opik import track\n", "from opik.opik_context import get_current_trace\n", "\n", + "\n", "@track\n", "def retrieve_contexts(question):\n", " # Define the retrieval function, in this case we will hard code the contexts\n", " return [\"Paris is the capital of France.\", \"Paris is in France.\"]\n", "\n", + "\n", "@track\n", "def answer_question(question, contexts):\n", " # Define the answer function, in this case we will hard code the answer\n", " return \"Paris\"\n", "\n", + "\n", "@track(name=\"Compute Ragas metric score\", capture_input=False)\n", "def compute_rag_score(answer_relevancy_metric, question, answer, contexts):\n", " # Define the score function\n", @@ -224,6 +231,7 @@ " score = compute_metric(answer_relevancy_metric, row)\n", " return score\n", "\n", + "\n", "@track\n", "def rag_pipeline(question):\n", " # Define the pipeline\n", @@ -233,9 +241,10 @@ " trace = get_current_trace()\n", " score = compute_rag_score(answer_relevancy_metric, question, answer, contexts)\n", " trace.log_feedback_score(\"answer_relevancy\", round(score, 4), category_name=\"ragas\")\n", - " \n", + "\n", " return answer\n", "\n", + "\n", "rag_pipeline(\"What is the capital of France?\")" ] }, @@ -297,7 +306,7 @@ "result = evaluate(\n", " fiqa_eval[\"baseline\"].select(range(3)),\n", " metrics=[context_precision, faithfulness, answer_relevancy],\n", - " callbacks=[opik_tracer_eval]\n", + " callbacks=[opik_tracer_eval],\n", ")\n", "\n", "print(result)" diff --git a/src/ragas/integrations/opik.py b/src/ragas/integrations/opik.py index f8956b567..e552f9b3f 100644 --- a/src/ragas/integrations/opik.py +++ b/src/ragas/integrations/opik.py @@ -1,14 +1,20 @@ import typing as t try: - from opik.integrations.langchain import OpikTracer as LangchainOpikTracer # type: ignore + from opik.integrations.langchain import ( # type: ignore + OpikTracer as LangchainOpikTracer, + ) + from ragas.evaluation import RAGAS_EVALUATION_CHAIN_NAME except ImportError: - raise ImportError("Opik is not installed. Please install it using `pip install opik` to use the Opik tracer.") + raise ImportError( + "Opik is not installed. Please install it using `pip install opik` to use the Opik tracer." + ) if t.TYPE_CHECKING: from langchain_core.tracers.schemas import Run + class OpikTracer(LangchainOpikTracer): """ Callback for Opik that can be used to log traces and evaluation scores to the Opik platform. @@ -20,6 +26,7 @@ class OpikTracer(LangchainOpikTracer): metadata: dict Additional metadata to log for each trace. """ + _evaluation_run_id: t.Optional[str] = None def _persist_run(self, run: "Run"): @@ -48,8 +55,11 @@ def _on_chain_end(self, run: "Run"): span = self._span_map[run.id] trace_id = span.trace_id if run.outputs: - self._opik_client.log_traces_feedback_scores([ - {"id": trace_id, "name": name, "value": round(value, 4)} for name, value in run.outputs.items() - ]) - + self._opik_client.log_traces_feedback_scores( + [ + {"id": trace_id, "name": name, "value": round(value, 4)} + for name, value in run.outputs.items() + ] + ) + self._persist_run(run) diff --git a/src/ragas/metrics/base.py b/src/ragas/metrics/base.py index ae5ea8e0f..e14d129b9 100644 --- a/src/ragas/metrics/base.py +++ b/src/ragas/metrics/base.py @@ -26,9 +26,13 @@ from ragas.embeddings import BaseRagasEmbeddings from ragas.llms import BaseRagasLLM +import inspect + from pysbd import Segmenter from pysbd.languages import LANGUAGE_CODES +from ragas.experimental.llms.prompt import PydanticPrompt as Prompt + logger = logging.getLogger(__name__) @@ -154,6 +158,26 @@ def init(self, run_config: RunConfig): ) self.llm.set_run_config(run_config) + def get_prompts(self) -> t.Dict[str, Prompt]: + prompts = {} + for name, value in inspect.getmembers(self): + if isinstance(value, Prompt): + prompts.update({name: value}) + return prompts + + def set_prompts(self, **prompts): + available_prompts = self.get_prompts() + for key, value in prompts.items(): + if key not in available_prompts: + raise ValueError( + f"Prompt with name '{key}' does not exist in the metric {self.name}. Use get_prompts() to see available prompts." + ) + if not isinstance(value, Prompt): + raise ValueError( + f"Prompt with name '{key}' must be an instance of 'Prompt'" + ) + setattr(self, key, value) + @dataclass class MetricWithEmbeddings(Metric):