From 945693d1b0ff9b3ee71281d59e4dd03157cd3cb0 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Sun, 15 Sep 2024 21:08:18 +0200
Subject: [PATCH] explain choice of % in validation/fold

---
 n3fit/src/n3fit/hyper_optimization/rewards.py | 17 +++++++++++------
 n3fit/src/n3fit/tests/test_hyperopt.py        |  6 +++++-
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/n3fit/src/n3fit/hyper_optimization/rewards.py b/n3fit/src/n3fit/hyper_optimization/rewards.py
index d1bb9a49de..6a0a85e083 100644
--- a/n3fit/src/n3fit/hyper_optimization/rewards.py
+++ b/n3fit/src/n3fit/hyper_optimization/rewards.py
@@ -72,7 +72,7 @@ def _average_best(fold_losses: np.ndarray, proportion: float = 0.05, axis: int =
     return _average(best_losses, axis=axis)
 
 
-def _average(fold_losses: np.ndarray, axis: int = 0) -> float:
+def _average(fold_losses: np.ndarray, axis: int = 0, **kwargs) -> float:
     """
     Compute the average of the input array along the specified axis.
 
@@ -90,7 +90,7 @@ def _average(fold_losses: np.ndarray, axis: int = 0) -> float:
     return np.average(fold_losses, axis=axis).item()
 
 
-def _best_worst(fold_losses: np.ndarray, axis: int = 0) -> float:
+def _best_worst(fold_losses: np.ndarray, axis: int = 0, **kwargs) -> float:
     """
     Compute the maximum value of the input array along the specified axis.
 
@@ -108,7 +108,7 @@ def _best_worst(fold_losses: np.ndarray, axis: int = 0) -> float:
     return np.max(fold_losses, axis=axis).item()
 
 
-def _std(fold_losses: np.ndarray, axis: int = 0) -> float:
+def _std(fold_losses: np.ndarray, axis: int = 0, **kwargs) -> float:
     """
     Compute the standard deviation of the input array along the specified axis.
 
@@ -265,9 +265,14 @@ def compute_loss(
         if self.loss_type == "chi2":
             # calculate statistics of chi2 over replicas for a given k-fold_statistic
 
-            ### Experiment:
-            # Use the validation loss as the loss
-            # summed with how far from 2 are we for the kfold
+            # Construct the final loss as a sum of
+            # 1. The validation chi2
+            # 2. The distance to 2 for the kfold chi2
+            # If a proportion allow as a keyword argument, use 80% and 10%
+            # as a proxy of
+            # "80% of the replicas should be good, but only a small % has to cover the folds"
+            # The values of 80% and 10% are completely empirical and should be investigated further
+
             validation_loss_average = self.reduce_over_replicas(validation_loss, proportion=0.8)
             kfold_loss_average = self.reduce_over_replicas(kfold_loss, proportion=0.1)
             loss = validation_loss_average + (max(kfold_loss_average, 2.0) - 2.0)
diff --git a/n3fit/src/n3fit/tests/test_hyperopt.py b/n3fit/src/n3fit/tests/test_hyperopt.py
index 274394b25a..68bfae960c 100644
--- a/n3fit/src/n3fit/tests/test_hyperopt.py
+++ b/n3fit/src/n3fit/tests/test_hyperopt.py
@@ -79,7 +79,11 @@ def test_compute_per_fold_loss(loss_type, replica_statistic, expected_per_fold_l
     # calculate statistic loss for one specific fold
     pdf_object = N3PDF(pdf_model.split_replicas())
     predicted_per_fold_loss = loss.compute_loss(
-        penalties, experimental_loss, pdf_object, experimental_data
+        penalties,
+        kfold_loss=experimental_loss,
+        validation_loss=experimental_loss,
+        pdf_object=pdf_object,
+        experimental_data=experimental_data,
     )
 
     # Assert