diff --git a/paddlenlp/trainer/trainer.py b/paddlenlp/trainer/trainer.py index 2418b84791f8..5d44c35767c7 100644 --- a/paddlenlp/trainer/trainer.py +++ b/paddlenlp/trainer/trainer.py @@ -3197,7 +3197,9 @@ def evaluation_loop( # Metrics! if self.compute_metrics is not None and all_preds is not None and all_labels is not None: - metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels)) + # all_labels maybe is a tuple when prediction_steps output label_mask + batch_labels = all_labels[0] if isinstance(all_labels, (list, tuple)) else all_labels + metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=batch_labels)) else: metrics = {}