Skip to content

Commit

Permalink
explain choice of % in validation/fold
Browse files Browse the repository at this point in the history
  • Loading branch information
scarlehoff authored and Radonirinaunimi committed Sep 23, 2024
1 parent 7027bf6 commit 6075488
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 7 deletions.
17 changes: 11 additions & 6 deletions n3fit/src/n3fit/hyper_optimization/rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def _average_best(fold_losses: np.ndarray, proportion: float = 0.05, axis: int =
return _average(best_losses, axis=axis)


def _average(fold_losses: np.ndarray, axis: int = 0) -> float:
def _average(fold_losses: np.ndarray, axis: int = 0, **kwargs) -> float:
"""
Compute the average of the input array along the specified axis.
Expand All @@ -90,7 +90,7 @@ def _average(fold_losses: np.ndarray, axis: int = 0) -> float:
return np.average(fold_losses, axis=axis).item()


def _best_worst(fold_losses: np.ndarray, axis: int = 0) -> float:
def _best_worst(fold_losses: np.ndarray, axis: int = 0, **kwargs) -> float:
"""
Compute the maximum value of the input array along the specified axis.
Expand All @@ -108,7 +108,7 @@ def _best_worst(fold_losses: np.ndarray, axis: int = 0) -> float:
return np.max(fold_losses, axis=axis).item()


def _std(fold_losses: np.ndarray, axis: int = 0) -> float:
def _std(fold_losses: np.ndarray, axis: int = 0, **kwargs) -> float:
"""
Compute the standard deviation of the input array along the specified axis.
Expand Down Expand Up @@ -265,9 +265,14 @@ def compute_loss(
if self.loss_type == "chi2":
# calculate statistics of chi2 over replicas for a given k-fold_statistic

### Experiment:
# Use the validation loss as the loss
# summed with how far from 2 are we for the kfold
# Construct the final loss as a sum of
# 1. The validation chi2
# 2. The distance to 2 for the kfold chi2
# If a proportion allow as a keyword argument, use 80% and 10%
# as a proxy of
# "80% of the replicas should be good, but only a small % has to cover the folds"
# The values of 80% and 10% are completely empirical and should be investigated further

validation_loss_average = self.reduce_over_replicas(validation_loss, proportion=0.8)
kfold_loss_average = self.reduce_over_replicas(kfold_loss, proportion=0.1)
loss = validation_loss_average + (max(kfold_loss_average, 2.0) - 2.0)
Expand Down
6 changes: 5 additions & 1 deletion n3fit/src/n3fit/tests/test_hyperopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,11 @@ def test_compute_per_fold_loss(loss_type, replica_statistic, expected_per_fold_l
# calculate statistic loss for one specific fold
pdf_object = N3PDF(pdf_model.split_replicas())
predicted_per_fold_loss = loss.compute_loss(
penalties, experimental_loss, pdf_object, experimental_data
penalties,
kfold_loss=experimental_loss,
validation_loss=experimental_loss,
pdf_object=pdf_object,
experimental_data=experimental_data,
)

# Assert
Expand Down

0 comments on commit 6075488

Please sign in to comment.