From 09df61f820bef16e4017089545868e84f6811fcc Mon Sep 17 00:00:00 2001 From: Harry Xie Date: Wed, 16 Oct 2024 19:15:04 +0100 Subject: [PATCH] suppress unhelpful logging of "No previous versions to compare with." * number of evaluators when there's only one evaluatee --- src/humanloop/eval_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/humanloop/eval_utils.py b/src/humanloop/eval_utils.py index 97cf59b..319c021 100644 --- a/src/humanloop/eval_utils.py +++ b/src/humanloop/eval_utils.py @@ -398,7 +398,7 @@ def process_datapoint(datapoint: Datapoint): # Wait for the Evaluation to complete then print the results complete = False - stats = None + while not complete: stats = client.evaluations.get_stats(id=evaluation.id) logger.info(f"\r{stats.progress}") @@ -410,6 +410,10 @@ def process_datapoint(datapoint: Datapoint): logger.info(stats.report) checks: List[EvaluatorCheck] = [] + if all(evaluator.get("threshold") is None for evaluator in evaluators) and len(stats.version_stats) == 1: + # Skip `check_evaluation_improvement` if no thresholds were provided and there is only one run. + # (Or the logs would not be helpful) + return checks for evaluator in evaluators: improvement_check, score, delta = check_evaluation_improvement( evaluation=evaluation,