Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify how losses are computed in a multi-replicas hyperopt #2145

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ datacuts:

############################################################
theory:
theoryid: 700 # database id
theoryid: 40_000_000

hyperscan_config:
architecture:
Expand Down Expand Up @@ -128,12 +128,7 @@ kfold:
loss_type: chi2
replica_statistic: average_best
fold_statistic: average
penalties_in_loss: True
penalties:
- saturation
- patience
- integrability
threshold: 10
threshold: 20
partitions:
- datasets:
# DIS
Expand Down Expand Up @@ -243,34 +238,56 @@ fitting:
savepseudodata: false
fitbasis: EVOL
basis:
- {fl: sng, trainable: false, smallx: [1.091, 1.119], largex: [1.471, 3.021]}
- {fl: g, trainable: false, smallx: [0.7795, 1.095], largex: [2.742, 5.547]}
- {fl: v, trainable: false, smallx: [0.472, 0.7576], largex: [1.571, 3.559]}
- {fl: v3, trainable: false, smallx: [0.07483, 0.4501], largex: [1.714, 3.467]}
- {fl: v8, trainable: false, smallx: [0.5731, 0.779], largex: [1.555, 3.465]}
- {fl: t3, trainable: false, smallx: [-0.5498, 1.0], largex: [1.778, 3.5]}
- {fl: t8, trainable: false, smallx: [0.5469, 0.857], largex: [1.555, 3.391]}
- {fl: t15, trainable: false, smallx: [1.081, 1.142], largex: [1.491, 3.092]}
- {fl: sng, trainable: false, smallx: [1.089, 1.119], largex: [1.475, 3.119]}
- {fl: g, trainable: false, smallx: [0.7504, 1.098], largex: [2.814, 5.669]}
- {fl: v, trainable: false, smallx: [0.479, 0.7384], largex: [1.549, 3.532]}
- {fl: v3, trainable: false, smallx: [0.1073, 0.4397], largex: [1.733, 3.458]}
- {fl: v8, trainable: false, smallx: [0.5507, 0.7837], largex: [1.516, 3.356]}
- {fl: t3, trainable: false, smallx: [-0.4506, 0.9305], largex: [1.745, 3.424]}
- {fl: t8, trainable: false, smallx: [0.5877, 0.8687], largex: [1.522, 3.515]}
- {fl: t15, trainable: false, smallx: [1.089, 1.141], largex: [1.492, 3.222]}

############################################################
################################################################################
positivity:
posdatasets:
- {dataset: NNPDF_POS_2P24GEV_F2U, maxlambda: 1e6} # Positivity Lagrange Multiplier
# Positivity Lagrange Multiplier
- {dataset: NNPDF_POS_2P24GEV_F2U, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_F2D, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_F2S, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_FLL-19PTS, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_FLL, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_DYU, maxlambda: 1e10}
- {dataset: NNPDF_POS_2P24GEV_DYD, maxlambda: 1e10}
- {dataset: NNPDF_POS_2P24GEV_DYS, maxlambda: 1e10}
- {dataset: NNPDF_POS_2P24GEV_F2C-17PTS, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XUQ, maxlambda: 1e6} # Positivity of MSbar PDFs
- {dataset: NNPDF_POS_2P24GEV_F2C, maxlambda: 1e6}
# Positivity of MSbar PDFs
- {dataset: NNPDF_POS_2P24GEV_XUQ, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XUB, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XDQ, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XDB, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XSQ, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XSB, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XGL, maxlambda: 1e6}

added_filter_rules:
- dataset: NNPDF_POS_2P24GEV_FLL
rule: "x > 5.0e-7"
- dataset: NNPDF_POS_2P24GEV_F2C
rule: "x < 0.74"
- dataset: NNPDF_POS_2P24GEV_XGL
rule: "x > 0.1"
- dataset: NNPDF_POS_2P24GEV_XUQ
rule: "x > 0.1"
- dataset: NNPDF_POS_2P24GEV_XUB
rule: "x > 0.1"
- dataset: NNPDF_POS_2P24GEV_XDQ
rule: "x > 0.1"
- dataset: NNPDF_POS_2P24GEV_XDB
rule: "x > 0.1"
- dataset: NNPDF_POS_2P24GEV_XSQ
rule: "x > 0.1"
- dataset: NNPDF_POS_2P24GEV_XSB
rule: "x > 0.1"

############################################################
integrability:
integdatasets:
Expand Down
30 changes: 21 additions & 9 deletions n3fit/src/n3fit/hyper_optimization/rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
log = logging.getLogger(__name__)


def _average_best(fold_losses: np.ndarray, proportion: float = 0.9, axis: int = 0) -> float:
def _average_best(fold_losses: np.ndarray, proportion: float = 0.05, axis: int = 0) -> float:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure we do want to have a default value for proportion ?

"""
Compute the average of the input array along the specified axis, among the best `proportion`
of replicas.
Expand Down Expand Up @@ -72,7 +72,7 @@ def _average_best(fold_losses: np.ndarray, proportion: float = 0.9, axis: int =
return _average(best_losses, axis=axis)


def _average(fold_losses: np.ndarray, axis: int = 0) -> float:
def _average(fold_losses: np.ndarray, axis: int = 0, **kwargs) -> float:
"""
Compute the average of the input array along the specified axis.

Expand All @@ -90,7 +90,7 @@ def _average(fold_losses: np.ndarray, axis: int = 0) -> float:
return np.average(fold_losses, axis=axis).item()


def _best_worst(fold_losses: np.ndarray, axis: int = 0) -> float:
def _best_worst(fold_losses: np.ndarray, axis: int = 0, **kwargs) -> float:
"""
Compute the maximum value of the input array along the specified axis.

Expand All @@ -108,7 +108,7 @@ def _best_worst(fold_losses: np.ndarray, axis: int = 0) -> float:
return np.max(fold_losses, axis=axis).item()


def _std(fold_losses: np.ndarray, axis: int = 0) -> float:
def _std(fold_losses: np.ndarray, axis: int = 0, **kwargs) -> float:
"""
Compute the standard deviation of the input array along the specified axis.

Expand Down Expand Up @@ -195,7 +195,8 @@ def __init__(
def compute_loss(
self,
penalties: dict[str, np.ndarray],
experimental_loss: np.ndarray,
validation_loss: np.ndarray,
kfold_loss: np.ndarray,
pdf_object: N3PDF,
experimental_data: list[DataGroupSpec],
fold_idx: int = 0,
Expand Down Expand Up @@ -250,20 +251,31 @@ def compute_loss(

# update hyperopt metrics
# these are saved in the phi_vector and chi2_matrix attributes, excluding penalties
self._save_hyperopt_metrics(phi_per_fold, experimental_loss, penalties, fold_idx)
self._save_hyperopt_metrics(phi_per_fold, kfold_loss, penalties, fold_idx)

# Prepare the output loss, including penalties if necessary
if self._penalties_in_loss:
# include penalties to experimental loss
experimental_loss += sum(penalties.values())
kfold_loss += sum(penalties.values())

# add penalties to phi in the form of a sum of per-replicas averages
phi_per_fold += sum(np.mean(penalty) for penalty in penalties.values())

# define loss for hyperopt according to the chosen loss_type
if self.loss_type == "chi2":
# calculate statistics of chi2 over replicas for a given k-fold
loss = self.reduce_over_replicas(experimental_loss)
# calculate statistics of chi2 over replicas for a given k-fold_statistic

# Construct the final loss as a sum of
# 1. The validation chi2
# 2. The distance to 2 for the kfold chi2
# If a proportion allow as a keyword argument, use 80% and 10%
# as a proxy of
# "80% of the replicas should be good, but only a small % has to cover the folds"
# The values of 80% and 10% are completely empirical and should be investigated further
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on a similar line maybe we can pass the values from the runcard?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tbh, it would be a good idea. It was added quickly there for the sake of the meeting, but it would be good to have it as an input parameter.


validation_loss_average = self.reduce_over_replicas(validation_loss, proportion=0.8)
kfold_loss_average = self.reduce_over_replicas(kfold_loss, proportion=0.1)
loss = validation_loss_average + (max(kfold_loss_average, 2.0) - 2.0)
elif self.loss_type == "phi2":
loss = phi_per_fold**2

Expand Down
3 changes: 2 additions & 1 deletion n3fit/src/n3fit/model_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,8 @@ def hyperparametrizable(self, params):
# Compute per replica hyper losses
hyper_loss = self._hyper_loss.compute_loss(
penalties=penalties,
experimental_loss=experimental_loss,
kfold_loss=experimental_loss,
validation_loss=validation_loss,
pdf_object=vplike_pdf,
experimental_data=experimental_data,
fold_idx=k,
Expand Down
6 changes: 5 additions & 1 deletion n3fit/src/n3fit/tests/test_hyperopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,11 @@ def test_compute_per_fold_loss(loss_type, replica_statistic, expected_per_fold_l
# calculate statistic loss for one specific fold
pdf_object = N3PDF(pdf_model.split_replicas())
predicted_per_fold_loss = loss.compute_loss(
penalties, experimental_loss, pdf_object, experimental_data
penalties,
kfold_loss=experimental_loss,
validation_loss=experimental_loss,
pdf_object=pdf_object,
experimental_data=experimental_data,
)

# Assert
Expand Down