From 09df61f820bef16e4017089545868e84f6811fcc Mon Sep 17 00:00:00 2001
From: Harry Xie <harry@humanloop.com>
Date: Wed, 16 Oct 2024 19:15:04 +0100
Subject: [PATCH] suppress unhelpful logging of "No previous versions to
 compare with." * number of evaluators when there's only one evaluatee

---
 src/humanloop/eval_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/humanloop/eval_utils.py b/src/humanloop/eval_utils.py
index 97cf59b..319c021 100644
--- a/src/humanloop/eval_utils.py
+++ b/src/humanloop/eval_utils.py
@@ -398,7 +398,7 @@ def process_datapoint(datapoint: Datapoint):
 
     # Wait for the Evaluation to complete then print the results
     complete = False
-    stats = None
+
     while not complete:
         stats = client.evaluations.get_stats(id=evaluation.id)
         logger.info(f"\r{stats.progress}")
@@ -410,6 +410,10 @@ def process_datapoint(datapoint: Datapoint):
     logger.info(stats.report)
 
     checks: List[EvaluatorCheck] = []
+    if all(evaluator.get("threshold") is None for evaluator in evaluators) and len(stats.version_stats) == 1:
+        # Skip `check_evaluation_improvement` if no thresholds were provided and there is only one run.
+        # (Or the logs would not be helpful)
+        return checks
     for evaluator in evaluators:
         improvement_check, score, delta = check_evaluation_improvement(
             evaluation=evaluation,