Skip to content

Commit

Permalink
Merge pull request #76 from HEP-KBFI/comet
Browse files Browse the repository at this point in the history
comet, new evaluators, OmniParT, new ntupelizer with index matching
  • Loading branch information
Laurits7 authored Oct 1, 2024
2 parents 9bf3a40 + 209d87b commit d30122a
Show file tree
Hide file tree
Showing 45 changed files with 3,622 additions and 1,611 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ venv/
ENV/
env.bak/
venv.bak/
.idea/

# Spyder project settings
.spyderproject
Expand Down
13 changes: 13 additions & 0 deletions README_Tallinn.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,19 @@ To submit the training of the models to `gpu0`, check and run
./enreg/scripts/submit-pytorch-gpu-all.sh
```

## Logging with comet-ml
For comet-ml logging one needs to create an account at https://www.comet.com/ and get the API key.
```bash
comet login
```
This will prompt you to enter the API key and will create a file in ```~/.comet.config```, where you should add the workspace name and project, such that the config file will look as such:
```
[comet]
api_key = YOUR_API_KEY
project_name = YOUR_PROJECT
workspace = YOUR_WORKSPACE
```

# Plotting

Change `enreg/config/benchmarking.yaml` and `enreg/config/metrics/regression.yaml` as needed.
Expand Down
2 changes: 1 addition & 1 deletion enreg/config/benchmarking.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ load_from_json: False
#this path contains the input ntuples to the ML model (jet & tau properties)
#the model predictions are stored in separte files
#and are configured in metrics/regression.yaml -> algorithms/ntuples_dir
base_ntuple_path: /scratch/persistent/joosep/ml-tau/20240701_lowered_ptcut_merged
base_ntuple_path: /home/laurits/ntuples/20240924_lowered_recoPtCut/
comparison_samples:
- zh_test
- z_test
Expand Down
98 changes: 52 additions & 46 deletions enreg/config/metrics/classifier.yaml
Original file line number Diff line number Diff line change
@@ -1,55 +1,61 @@
classifier:
plotting:
output_dir: /home/laurits/CLIC_metrics_202300921
n_files: -1
sig_sample: Z_Ztautau
algorithms:
DeepSet:
name: "DeepSet"
marker: "*"
hatch: "//"
color: "tab:purple"
linestyle: ""
marker_size: 15
OmniParT:
name: "OmniParT"
marker: "^"
hatch: "."
color: "tab:green"
linestyle: ""
marker_size: 15
ParticleTransformer:
sig_ntuples_dir: /scratch/persistent/veelken/CLIC_tau_ntuples/2023Jun22_wLifetime/ParticleTransformer/ZH_Htautau
bkg_ntuples_dir: /scratch/persistent/veelken/CLIC_tau_ntuples/2023Jun22_wLifetime/ParticleTransformer/QCD
json_metrics_path: /foo/bar
compare: True
metrics: # Are those still needed?
denominator: "pt >= 20 && |eta| < 2.3"
numerator: "pt >= 20 && |eta| < 2.3"
WPs: # Are those still needed?
ParticleTransformer:
Loose: 0.975
Medium: 0.967
Tight: 0.930
name: "ParticleTransformer"
marker: "v"
hatch: "\\\\"
color: "tab:red"
linestyle: ""
marker_size: 15
performances:
efficiency:
variables:
- name: pt
x_range: [20, 180]
n_bins: 9
- name: eta
x_range: [-2.6, 2.6]
n_bins: 9
- name: theta
x_range: [10, 90]
n_bins: 9
xlabel:
pt: "$p_T^{gen\\mathrm{-}\\tau_h}\\,\\, [GeV]$"
eta: "$\\eta^{gen\\mathrm{-}\\tau_h}\\,\\, [GeV]$"
theta: "$\\theta^{gen\\mathrm{-}\\tau_h}\\,\\, [ ^{o} ]$"
ylabel: "$\\varepsilon_{\\tau}$"
yscale: "linear"
ylim: [0, 1]
fakerate:
variables:
- name: pt
x_range: [20, 180]
n_bins: 8
- name: eta
x_range: [-2.6, 2.6]
n_bins: 8
- name: theta
x_range: [10, 90]
n_bins: 8
markers:
ParticleTransformer: "v"
colors:
ParticleTransformer: "tab:purple"
xlabel:
pt: "$p_T^{gen\\mathrm{-}jet}\\,\\, [GeV]$"
eta: "$\\eta^{gen\\mathrm{-}jet}\\,\\, [GeV]$"
theta: "$\\theta^{gen\\mathrm{-}jet}\\,\\, [ ^{o} ]$"
ylabel: "$P_{misid}$"
yscale: "log"
ylim: [5e-6, 2e-2]
cuts:
min_pt: 20
min_theta: 10
max_theta: 170
metrics:
pt:
x_range: [20, 180]
n_bins: 9
x_maj_tick_spacing: 40
eta:
x_range: [-2.6, 2.6]
n_bins: 9
x_maj_tick_spacing: 20
theta:
x_range: [10, 90]
n_bins: 9
x_maj_tick_spacing: 20
defaults:
- datasets: datasets
- _self_
plotting_metrics:
ROC: True
fakerate: True
efficiency: True
tauClassifier: False
energy_resolution: False
decaymode: False
17 changes: 17 additions & 0 deletions enreg/config/metrics/dm_reconstruction.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
dm_reconstruction:
output_path: metrics/dm_reconstruction/Results
signal_samples:
- zh_test
- z_test
algorithms:
ParticleTransformer:
data_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/dm_multiclass/ParticleTransformer
LorentzNet:
data_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/dm_multiclass/LorentzNet
DeepSet:
data_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/dm_multiclass/DeepSet
HPS:
data_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/dm_multiclass/HPS

defaults:
- _self_
1 change: 1 addition & 0 deletions enreg/config/metrics/metrics.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
defaults:
- classifier
- regression
- dm_reconstruction
- _self_
83 changes: 63 additions & 20 deletions enreg/config/metrics/regression.yaml
Original file line number Diff line number Diff line change
@@ -1,46 +1,89 @@
regression:
ratio_plot:
bin_edges:
zh_test: [20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175]
z_test: [20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175, 200]
cls_wp: Tight
classifier_WPs: # The values for ParticleTransformer from the previous paper
Loose: 0.930
Medium: 0.967
Tight: 0.975
zh: [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175]
z: [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175, 200]
resolution_plot:
ylabel: "$p_T\\ resol.\\ (q_{75} - q_{25})/q_{50}$"
ylim: [0, 0.06]
xlabel: "$p_T^{gen}$"
xscale: "linear"
yscale: "linear"
nticks: 7
response_plot:
ylabel: "$p_T\\ scale\\ (q_{50})$"
ylim: [0.99, 1.01]
xlabel: "$p_T^{gen}$"
xscale: "linear"
yscale: "linear"
nticks: 3
algorithms:
240701_pt_paper:
ntuples_dir: /local/joosep/ml-tau-en-reg/results/20240701_lowered_ptcut_merged/v1/jet_regression/ParticleTransformer/
DeepSet:
ntuples_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/jet_regression/DeepSet/
json_metrics_path: plotting_data.json
load_from_json: False
compare: True
marker: "*"
hatch: "//"
color: "tab:purple"
240809_pt_retrain:
ntuples_dir: /local/joosep/ml-tau-en-reg/results/240809_particletransformer_vars/jet_regression/ParticleTransformer/
ls: "solid"
label: DeepSet
lw: 1
HPS:
ntuples_dir: /path/to/files
json_metrics_path: plotting_data.json
load_from_json: False
compare: True
marker: "D"
marker: "^"
hatch: "\\\\"
color: "tab:green"
ls: "solid"
label: HPS
lw: 3
HPS_:
ntuples_dir: /path/to/files
json_metrics_path: plotting_data.json
load_from_json: False
compare: True
marker: "^"
hatch: "\\\\"
color: "tab:green"
240809_pt_3var:
ntuples_dir: /local/joosep/ml-tau-en-reg/results/240809_3var_kinematics/jet_regression/ParticleTransformer/
ls: "dashed"
label: "HPS (ideal)"
lw: 1
RecoJet:
ntuples_dir: /home/laurits/ntuples/20240924_lowered_recoPtCut/recoJet/
json_metrics_path: plotting_data.json
load_from_json: False
compare: True
marker: "v"
hatch: "."
color: "black"
240809_omnipart_3var:
ntuples_dir: /local/joosep/ml-tau-en-reg/results/240809_3var_kinematics/jet_regression/OmniParT/
color: "tab:red"
ls: "solid"
label: RecoJet
lw: 1
LorentzNet:
ntuples_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/jet_regression/LorentzNet/
json_metrics_path: plotting_data.json
load_from_json: False
compare: True
marker: "^"
hatch: "||"
color: "blue"
marker: "D"
hatch: "."
color: "tab:orange"
ls: "solid"
label: LorentzNet
lw: 1
ParticleTransformer:
ntuples_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/jet_regression/ParticleTransformer/
json_metrics_path: plotting_data.json
load_from_json: False
compare: True
marker: "X"
hatch: "."
color: "tab:blue"
ls: "solid"
label: ParticleTransformer
lw: 3

defaults:
- _self_
11 changes: 7 additions & 4 deletions enreg/config/ml_datasets.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# list_dir: $HOME/ml-tau-en-reg/enreg/config/datasets
list_dir: /home/laurits/ml-tau-en-reg/enreg/config/datasets
relative_sizes:
train: 0.7
test: 0.2
validation: 0.1
only_append_to_test: False
datasets:
zh:
title: "ee $\\rightarrow$ ZH (H $\\rightarrow \\tau\\tau$)"
x_max: 170
z:
title: "ee $\\rightarrow$ Z (Z $\\rightarrow \\tau\\tau$)"
x_max: 190
defaults:
- datasets: datasets
- ntupelizer
- _self_
24 changes: 14 additions & 10 deletions enreg/config/model_training.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ verbosity: 1
train: True
test: True

comet:
experiment: null

#everything that the user should modify is here
#override these using command line args
output_dir: training-outputs/240809_3var_kinematics
data_path: /scratch/persistent/joosep/ml-tau/20240701_lowered_ptcut_merged
data_path: /scratch/persistent/laurits/ml-tau/20240924_lowered_recoPtCut

#override this using command line args
training_type: dm_multiclass
Expand All @@ -23,19 +26,20 @@ model_type: ParticleTransformer
#qq is added back using command line flags for the binary classification task
training_samples:
- z_train.parquet
# - zh_train.parquet
# - zh_train.parquet
- qq_train.parquet

#these are never used in the training code, only for the final evaluation
test_samples:
- z_test.parquet
# - zh_test.parquet
# - zh_test.parquet
- qq_test.parquet

#the training sample will be further subdividied as follows to train and valid
fraction_train: 0.8
fraction_valid: 0.2


dataset:
max_cands: 16
use_lifetime: False
Expand Down Expand Up @@ -73,13 +77,13 @@ training:

#disable creation of the outputs dir which we don't use
#https://stackoverflow.com/questions/65104134/disable-file-output-of-hydra
defaults:
defaults:
- models: models
- _self_
- override hydra/hydra_logging: disabled
- override hydra/job_logging: disabled
- _self_
- override hydra/hydra_logging: disabled
- override hydra/job_logging: disabled

hydra:
output_subdir: null
run:
hydra:
output_subdir: null
run:
dir: .
6 changes: 5 additions & 1 deletion enreg/config/models/OmniParT/OmniParT.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
ckpt_path: /home/laurits/ml-tau-en-reg/enreg/omnijet_alpha/gabbro_repo/checkpoints/vqvae_32000_tokens_p3_mass_pid/model_ckpt.ckpt
bb_path: /home/laurits/ml-tau-en-reg/enreg/omnijet_alpha/gabbro_repo/checkpoints/generative_32000_tokens_p4_mass_pid/OmniJet_generative_model_FiduciaryCagoule_254.ckpt
num_rounds_frozen_backbone: 30
version: v3.1
version: "from_scratch"
versions:
- "from_scratch"
- "fixed_backbone"
- "fine_tuning"
hyperparameters:
num_layers: 2
embed_dims:
Expand Down
Loading

0 comments on commit d30122a

Please sign in to comment.