Merge pull request #76 from HEP-KBFI/comet

comet, new evaluators, OmniParT, new ntupelizer with index matching
HEP-KBFI · Oct 1, 2024 · d30122a · d30122a
2 parents 9bf3a40 + 209d87b
commit d30122a
Show file tree

Hide file tree

Showing 45 changed files with 3,622 additions and 1,611 deletions.
diff --git a/.gitignore b/.gitignore
@@ -109,6 +109,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
+.idea/
 
 # Spyder project settings
 .spyderproject

diff --git a/README_Tallinn.md b/README_Tallinn.md
@@ -70,6 +70,19 @@ To submit the training of the models to `gpu0`, check and run
 ./enreg/scripts/submit-pytorch-gpu-all.sh
 ```
 
+## Logging with comet-ml
+For comet-ml logging one needs to create an account at https://www.comet.com/ and get the API key.
+```bash
+comet login
+```
+This will prompt you to enter the API key and will create a file in ```~/.comet.config```, where you should add the workspace name and project, such that the config file will look as such:
+```
+[comet]
+api_key = YOUR_API_KEY
+project_name = YOUR_PROJECT
+workspace = YOUR_WORKSPACE
+```
+
 # Plotting
 
 Change `enreg/config/benchmarking.yaml` and `enreg/config/metrics/regression.yaml` as needed.

diff --git a/enreg/config/benchmarking.yaml b/enreg/config/benchmarking.yaml
@@ -5,7 +5,7 @@ load_from_json: False
 #this path contains the input ntuples to the ML model (jet & tau properties)
 #the model predictions are stored in separte files
 #and are configured in metrics/regression.yaml -> algorithms/ntuples_dir
-base_ntuple_path: /scratch/persistent/joosep/ml-tau/20240701_lowered_ptcut_merged
+base_ntuple_path: /home/laurits/ntuples/20240924_lowered_recoPtCut/
 comparison_samples:
   - zh_test
   - z_test

diff --git a/enreg/config/metrics/classifier.yaml b/enreg/config/metrics/classifier.yaml
@@ -1,55 +1,61 @@
 classifier:
   plotting:
     output_dir: /home/laurits/CLIC_metrics_202300921
-    n_files: -1
-  sig_sample: Z_Ztautau
   algorithms:
+    DeepSet:
+      name: "DeepSet"
+      marker: "*"
+      hatch: "//"
+      color: "tab:purple"
+      linestyle: ""
+      marker_size: 15
+    OmniParT:
+      name: "OmniParT"
+      marker: "^"
+      hatch: "."
+      color: "tab:green"
+      linestyle: ""
+      marker_size: 15
     ParticleTransformer:
-      sig_ntuples_dir: /scratch/persistent/veelken/CLIC_tau_ntuples/2023Jun22_wLifetime/ParticleTransformer/ZH_Htautau
-      bkg_ntuples_dir: /scratch/persistent/veelken/CLIC_tau_ntuples/2023Jun22_wLifetime/ParticleTransformer/QCD
-      json_metrics_path: /foo/bar
-      compare: True
-  metrics:  # Are those still needed?
-    denominator: "pt >= 20 && |eta| < 2.3"
-    numerator: "pt >= 20 && |eta| < 2.3"
-    WPs:  # Are those still needed?
-      ParticleTransformer:
-      Loose: 0.975
-      Medium: 0.967
-      Tight: 0.930
+      name: "ParticleTransformer"
+      marker: "v"
+      hatch: "\\\\"
+      color: "tab:red"
+      linestyle: ""
+      marker_size: 15
+  performances:
     efficiency:
-      variables:
-        - name: pt
-          x_range: [20, 180]
-          n_bins: 9
-        - name: eta
-          x_range: [-2.6, 2.6]
-          n_bins: 9
-        - name: theta
-          x_range: [10, 90]
-          n_bins: 9
+      xlabel:
+        pt: "$p_T^{gen\\mathrm{-}\\tau_h}\\,\\, [GeV]$"
+        eta: "$\\eta^{gen\\mathrm{-}\\tau_h}\\,\\, [GeV]$"
+        theta: "$\\theta^{gen\\mathrm{-}\\tau_h}\\,\\, [ ^{o} ]$"
+      ylabel: "$\\varepsilon_{\\tau}$"
+      yscale: "linear"
+      ylim: [0, 1]
     fakerate:
-      variables:
-        - name: pt
-          x_range: [20, 180]
-          n_bins: 8
-        - name: eta
-          x_range: [-2.6, 2.6]
-          n_bins: 8
-        - name: theta
-          x_range: [10, 90]
-          n_bins: 8
-  markers:
-    ParticleTransformer: "v"
-  colors:
-    ParticleTransformer: "tab:purple"
+      xlabel:
+        pt: "$p_T^{gen\\mathrm{-}jet}\\,\\, [GeV]$"
+        eta: "$\\eta^{gen\\mathrm{-}jet}\\,\\, [GeV]$"
+        theta: "$\\theta^{gen\\mathrm{-}jet}\\,\\, [ ^{o} ]$"
+      ylabel: "$P_{misid}$"
+      yscale: "log"
+      ylim: [5e-6, 2e-2]
+  cuts:
+    min_pt: 20
+    min_theta: 10
+    max_theta: 170
+  metrics:
+    pt:
+      x_range: [20, 180]
+      n_bins: 9
+      x_maj_tick_spacing: 40
+    eta:
+      x_range: [-2.6, 2.6]
+      n_bins: 9
+      x_maj_tick_spacing: 20
+    theta:
+      x_range: [10, 90]
+      n_bins: 9
+      x_maj_tick_spacing: 20
   defaults:
-    - datasets: datasets
     - _self_
-  plotting_metrics:
-    ROC: True
-    fakerate: True
-    efficiency: True
-    tauClassifier: False
-    energy_resolution: False
-    decaymode: False
diff --git a/enreg/config/metrics/dm_reconstruction.yaml b/enreg/config/metrics/dm_reconstruction.yaml
@@ -0,0 +1,17 @@
+dm_reconstruction:
+  output_path: metrics/dm_reconstruction/Results
+  signal_samples:
+    - zh_test
+    - z_test
+  algorithms:
+    ParticleTransformer:
+      data_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/dm_multiclass/ParticleTransformer
+    LorentzNet:
+      data_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/dm_multiclass/LorentzNet
+    DeepSet:
+      data_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/dm_multiclass/DeepSet
+    HPS:
+      data_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/dm_multiclass/HPS
+
+defaults:
+  - _self_
diff --git a/enreg/config/metrics/metrics.yaml b/enreg/config/metrics/metrics.yaml
@@ -1,4 +1,5 @@
 defaults:
   - classifier
   - regression
+  - dm_reconstruction
   - _self_
diff --git a/enreg/config/metrics/regression.yaml b/enreg/config/metrics/regression.yaml
@@ -1,46 +1,89 @@
 regression:
   ratio_plot:
     bin_edges:
-      zh_test: [20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175]
-      z_test: [20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175, 200]
-  cls_wp: Tight
-  classifier_WPs:  # The values for ParticleTransformer from the previous paper
-      Loose: 0.930
-      Medium: 0.967
-      Tight: 0.975
+      zh: [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175]
+      z: [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175, 200]
+    resolution_plot:
+      ylabel: "$p_T\\ resol.\\ (q_{75} - q_{25})/q_{50}$"
+      ylim: [0, 0.06]
+      xlabel: "$p_T^{gen}$"
+      xscale: "linear"
+      yscale: "linear"
+      nticks: 7
+    response_plot:
+      ylabel: "$p_T\\ scale\\ (q_{50})$"
+      ylim: [0.99, 1.01]
+      xlabel: "$p_T^{gen}$"
+      xscale: "linear"
+      yscale: "linear"
+      nticks: 3
   algorithms:
-    240701_pt_paper:
-      ntuples_dir: /local/joosep/ml-tau-en-reg/results/20240701_lowered_ptcut_merged/v1/jet_regression/ParticleTransformer/
+    DeepSet:
+      ntuples_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/jet_regression/DeepSet/
       json_metrics_path: plotting_data.json
       load_from_json: False
       compare: True
       marker: "*"
       hatch: "//"
       color: "tab:purple"
-    240809_pt_retrain:
-      ntuples_dir: /local/joosep/ml-tau-en-reg/results/240809_particletransformer_vars/jet_regression/ParticleTransformer/
+      ls: "solid"
+      label: DeepSet
+      lw: 1
+    HPS:
+      ntuples_dir: /path/to/files
       json_metrics_path: plotting_data.json
       load_from_json: False
       compare: True
-      marker: "D"
+      marker: "^"
+      hatch: "\\\\"
+      color: "tab:green"
+      ls: "solid"
+      label: HPS
+      lw: 3
+    HPS_:
+      ntuples_dir: /path/to/files
+      json_metrics_path: plotting_data.json
+      load_from_json: False
+      compare: True
+      marker: "^"
       hatch: "\\\\"
       color: "tab:green"
-    240809_pt_3var:
-      ntuples_dir: /local/joosep/ml-tau-en-reg/results/240809_3var_kinematics/jet_regression/ParticleTransformer/
+      ls: "dashed"
+      label: "HPS (ideal)"
+      lw: 1
+    RecoJet:
+      ntuples_dir: /home/laurits/ntuples/20240924_lowered_recoPtCut/recoJet/
       json_metrics_path: plotting_data.json
       load_from_json: False
       compare: True
       marker: "v"
       hatch: "."
-      color: "black"
-    240809_omnipart_3var:
-      ntuples_dir: /local/joosep/ml-tau-en-reg/results/240809_3var_kinematics/jet_regression/OmniParT/
+      color: "tab:red"
+      ls: "solid"
+      label: RecoJet
+      lw: 1
+    LorentzNet:
+      ntuples_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/jet_regression/LorentzNet/
       json_metrics_path: plotting_data.json
       load_from_json: False
       compare: True
-      marker: "^"
-      hatch: "||"
-      color: "blue"
+      marker: "D"
+      hatch: "."
+      color: "tab:orange"
+      ls: "solid"
+      label: LorentzNet
+      lw: 1
+    ParticleTransformer:
+      ntuples_dir: /home/laurits/ml-tau-en-reg/training-outputs/20240921_recoPtCut_removed_samples/v1/jet_regression/ParticleTransformer/
+      json_metrics_path: plotting_data.json
+      load_from_json: False
+      compare: True
+      marker: "X"
+      hatch: "."
+      color: "tab:blue"
+      ls: "solid"
+      label: ParticleTransformer
+      lw: 3
 
 defaults:
   - _self_
diff --git a/enreg/config/ml_datasets.yaml b/enreg/config/ml_datasets.yaml
@@ -1,11 +1,14 @@
-# list_dir: $HOME/ml-tau-en-reg/enreg/config/datasets
-list_dir: /home/laurits/ml-tau-en-reg/enreg/config/datasets
 relative_sizes:
   train: 0.7
   test: 0.2
   validation: 0.1
-only_append_to_test: False
+datasets:
+  zh:
+    title: "ee $\\rightarrow$ ZH (H $\\rightarrow \\tau\\tau$)"
+    x_max: 170
+  z:
+    title: "ee $\\rightarrow$ Z (Z $\\rightarrow \\tau\\tau$)"
+    x_max: 190
 defaults:
-  - datasets: datasets
   - ntupelizer
   - _self_
diff --git a/enreg/config/model_training.yaml b/enreg/config/model_training.yaml
@@ -4,10 +4,13 @@ verbosity: 1
 train: True
 test: True
 
+comet:
+  experiment: null
+
 #everything that the user should modify is here
 #override these using command line args
 output_dir: training-outputs/240809_3var_kinematics
-data_path: /scratch/persistent/joosep/ml-tau/20240701_lowered_ptcut_merged
+data_path: /scratch/persistent/laurits/ml-tau/20240924_lowered_recoPtCut
 
 #override this using command line args
 training_type: dm_multiclass
@@ -23,19 +26,20 @@ model_type: ParticleTransformer
 #qq is added back using command line flags for the binary classification task
 training_samples:
   - z_train.parquet
-#  - zh_train.parquet
+  #  - zh_train.parquet
   - qq_train.parquet
 
 #these are never used in the training code, only for the final evaluation
 test_samples:
   - z_test.parquet
-#  - zh_test.parquet
+  #  - zh_test.parquet
   - qq_test.parquet
 
 #the training sample will be further subdividied as follows to train and valid
 fraction_train: 0.8
 fraction_valid: 0.2
 
+
 dataset:
   max_cands: 16
   use_lifetime: False
@@ -73,13 +77,13 @@ training:
 
 #disable creation of the outputs dir which we don't use
 #https://stackoverflow.com/questions/65104134/disable-file-output-of-hydra
-defaults:  
+defaults:
   - models: models
-  - _self_ 
-  - override hydra/hydra_logging: disabled  
-  - override hydra/job_logging: disabled  
+  - _self_
+  - override hydra/hydra_logging: disabled
+  - override hydra/job_logging: disabled
 
-hydra:  
-  output_subdir: null  
-  run:  
+hydra:
+  output_subdir: null
+  run:
     dir: .
diff --git a/enreg/config/models/OmniParT/OmniParT.yaml b/enreg/config/models/OmniParT/OmniParT.yaml
@@ -1,7 +1,11 @@
 ckpt_path: /home/laurits/ml-tau-en-reg/enreg/omnijet_alpha/gabbro_repo/checkpoints/vqvae_32000_tokens_p3_mass_pid/model_ckpt.ckpt
 bb_path: /home/laurits/ml-tau-en-reg/enreg/omnijet_alpha/gabbro_repo/checkpoints/generative_32000_tokens_p4_mass_pid/OmniJet_generative_model_FiduciaryCagoule_254.ckpt
 num_rounds_frozen_backbone: 30
-version: v3.1
+version: "from_scratch"
+versions:
+  - "from_scratch"
+  - "fixed_backbone"
+  - "fine_tuning"
 hyperparameters:
   num_layers: 2
   embed_dims: