From 956a26f690009e1cb618795334070a2e2fdb9213 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Sat, 9 Nov 2024 20:01:11 +0000 Subject: [PATCH] added covariance matrix decomposition --- .../commondata/ATLAS_WPWM_13TEV/filter.py | 31 +++---- .../ATLAS_WPWM_13TEV/filter_utils.py | 82 +++++++++++++------ .../commondata/ATLAS_WPWM_13TEV/metadata.yaml | 15 ++-- .../HEPData-ins1436497-v1-Table_24.yaml | 47 +++++++++++ .../ATLAS_WPWM_13TEV/uncertainties.yaml | 20 +++++ 5 files changed, 144 insertions(+), 51 deletions(-) create mode 100644 nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/rawdata/HEPData-ins1436497-v1-Table_24.yaml create mode 100644 nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/uncertainties.yaml diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/filter.py b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/filter.py index 218bd6d8a6..a15cd45732 100644 --- a/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/filter.py @@ -31,7 +31,7 @@ def filter_ATLAS_WPWM_13TEV_TOT_data_kinetic(): yaml.dump(kinematics_yaml, file, sort_keys=False) -def filter_ATLAS_Z0_8TEV_LOWMASS_systematics(version=3): +def filter_ATLAS_WPWM_13TEV_TOT_systematics(): """ This function writes the systematics to a yaml file. """ @@ -39,35 +39,34 @@ def filter_ATLAS_Z0_8TEV_LOWMASS_systematics(version=3): with open("metadata.yaml", "r") as file: metadata = yaml.safe_load(file) - systematics = get_systematics(version=version) + systematics = get_systematics() # error definition error_definitions = {} errors = [] for sys in systematics: - if (sys[0]['name'] == 'stat') or (sys[0]['name'] == 'sys,uncor'): + if sys[0]['name'] == 'stat': error_definitions[sys[0]['name']] = { "description": f"{sys[0]['name']}", - "treatment": "ADD", - "type": "UNCORR", + "treatment": "MULT", + "type": "CORR", } - elif (sys[0]['name'] == 'ATLAS_LUMI') or (sys[0]['name'] == 'Lumi:M'): - error_definitions[sys[0]['name']] = { - "description": f"{sys[0]['name']}", + elif sys[0]['name'] == 'lumi': + error_definitions["ATLASLUMI13"] = { + "description": f"ATLASLUMI13", "treatment": "MULT", - "type": "CORR", + "type": "SPECIAL", } else: error_definitions[sys[0]['name']] = { "description": f"{sys[0]['name']}", - "treatment": "ADD", + "treatment": "MULT", "type": "CORR", } - # for i in range(metadata['implemented_observables'][0]['ndata']): error_value = {} @@ -79,14 +78,10 @@ def filter_ATLAS_Z0_8TEV_LOWMASS_systematics(version=3): uncertainties_yaml = {"definitions": error_definitions, "bins": errors} # write uncertainties - if version == 1: - with open(f"uncertainties_v1.yaml", 'w') as file: - yaml.dump(uncertainties_yaml, file, sort_keys=False) - else: - with open(f"uncertainties.yaml", 'w') as file: - yaml.dump(uncertainties_yaml, file, sort_keys=False) + with open(f"uncertainties.yaml", 'w') as file: + yaml.dump(uncertainties_yaml, file, sort_keys=False) if __name__ == "__main__": filter_ATLAS_WPWM_13TEV_TOT_data_kinetic() - # filter_ATLAS_Z0_8TEV_LOWMASS_systematics() + filter_ATLAS_WPWM_13TEV_TOT_systematics() diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/filter_utils.py index c37c0ddebe..36833113e8 100644 --- a/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/filter_utils.py @@ -4,6 +4,8 @@ """ import yaml +import numpy as np +from nnpdf_data.filter_utils.utils import decompose_covmat def get_kinematics(): @@ -50,38 +52,66 @@ def get_data_values(): return data_central -def get_systematics(version=3): - """ """ +def get_correlation_matrix(): + """ + See extra material page: https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/STDM-2015-03/tabaux_03.pdf - uncertainties = [] + Note that this does not include the normalisation uncertainty due to the luminosity. + """ + + correlation_matrix = np.ones((2, 2)) + correlation_matrix[0, 1] = 0.93 + correlation_matrix[1, 0] = correlation_matrix[0, 1] + + return correlation_matrix + + +def get_covariance_matrices(): + """ + For the systematics see Table 3 of paper: https://arxiv.org/abs/1603.09222 + + Returns: + -------- + tuple: (cov_matrix_no_lumi, lumi_cov) + cov_matrix_no_lumi: np.array, the sum of stat and syst covmats -> to be decomposed into artificial systematics + lumi_cov: np.array, the lumi covmat. This is correlated between experiments so needs to be saved with type: SPECIAL + """ + corr_matrix = get_correlation_matrix() + cv = get_data_values() + + stat_wm = 0.01 * cv[0] + stat_wp = 0.01 * cv[1] + + syst_wm = 0.07 * cv[0] + syst_wp = 0.09 * cv[1] - hepdata_table = f"rawdata/HEPData-ins1630886-v{version}-Table_5.yaml" + lumi_wm = 0.10 * cv[0] + lumi_wp = 0.07 * cv[1] - with open(hepdata_table, 'r') as file: - input = yaml.safe_load(file) + stat_cov = np.diag([stat_wm**2, stat_wp**2]) + # lumi_cov = np.einsum("i,j->ij", np.array([lumi_wm, lumi_wp]), np.array([lumi_wm, lumi_wp])) + lumi_unc = np.array([lumi_wm, lumi_wp]) + syst_cov = corr_matrix * np.outer(np.array([syst_wm, syst_wp]), np.array([syst_wm, syst_wp])) - # loop over systematics - for unc_labels in input['dependent_variables'][0]['values'][0]['errors']: + cov_matrix_no_lumi = stat_cov + syst_cov - name = f"{unc_labels['label']}" - values = [] + return cov_matrix_no_lumi, lumi_unc - # loop over data points - for unc in input['dependent_variables'][0]['values']: - err = unc['errors'] - # convert unc from TeV to GeV - for e in err: - if e['label'] == name: - if name == 'Lumi:M': - values.append(e['symerror'] * unc['value'] * 1000) - else: - values.append(e['symerror'] * 1000) - uncertainties.append([{"name": name, "values": values}]) +def get_systematics(): + """ + Does cholesky decomposition of syst + stat covmat and returns uncertainties + list with artificial sys + lumi uncertainties. + """ + cov_matrix_no_lumi, lumi_unc = get_covariance_matrices() + + # decompose covmat + syst_unc = decompose_covmat(cov_matrix_no_lumi) + + uncertainties = [] + + uncertainties.append([{"name": "stat", "values": [syst_unc[0, 0], syst_unc[1, 0]]}]) + uncertainties.append([{"name": "sys1", "values": [syst_unc[0, 1], syst_unc[1, 1]]}]) + uncertainties.append([{"name": "ATLAS_LUMI", "values": [lumi_unc[0], lumi_unc[1]]}]) - # # Luminosity uncertainty is 1.8 % of the central value (see https://inspirehep.net/literature/1630886) - if version == 3: # in version 1 Lumi is included in the hepdata file already - name = "ATLAS_LUMI" - values = [ATLAS_LUMI_UNC * val for val in get_data_values()] - uncertainties.append([{"name": name, "values": values}]) return uncertainties diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/metadata.yaml index 12576f0e67..8f89740bf2 100644 --- a/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/metadata.yaml @@ -1,6 +1,6 @@ setname: ATLAS_WPWM_13TEV version: 1 -version_comment: New implementation +version_comment: New implementation, as for the old one, data is taken from Table 3 of the paper nnpdf_metadata: nnpdf31_process: DY CC experiment: ATLAS @@ -39,30 +39,31 @@ implemented_observables: plot_x: ' ' kinematic_coverage: - k1 - - k2 - - k3 + - M2 + - sqrts kinematics: variables: k1: description: Variable k1 label: k1 units: '' - k2: + M2: description: Variable k2 label: k2 units: '' - k3: + sqrts: description: Variable k3 label: k3 units: '' - file: kinematics_TOT.yaml + file: kinematics.yaml theory: conversion_factor: 1.0 operation: 'null' FK_tables: - - ATLAS_WZ_TOT_13TEV-ATLASWZTOT13TEV81PB_WM_tot - ATLAS_WZ_TOT_13TEV-ATLASWZTOT13TEV81PB_WP_tot - data_uncertainties: [] + data_uncertainties: + - uncertainties.yaml variants: legacy: data_uncertainties: diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/rawdata/HEPData-ins1436497-v1-Table_24.yaml b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/rawdata/HEPData-ins1436497-v1-Table_24.yaml new file mode 100644 index 0000000000..738bd23b7d --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/rawdata/HEPData-ins1436497-v1-Table_24.yaml @@ -0,0 +1,47 @@ +dependent_variables: +- header: {name: Q} + qualifiers: + - {name: ABS(ETARAP(C=ELECTRON)), value: < 2.5} + - {name: ABS(ETARAP(C=MUON)), value: < 2.5} + - {name: MT, units: GEV, value: '> 50'} + - {name: MZ, units: GEV, value: 66-116} + - {name: PT(C=ELECTRON), units: GEV, value: '> 25'} + - {name: PT(C=MUON), units: GEV, value: '> 25'} + - {name: PT(C=NU), units: GEV, value: '> 25'} + - {name: RE(Q=W+), value: P P --> W+ < E+ MUE + MU+ NUMU > X} + - {name: RE(Q=W-), value: P P --> W- < E- NUEBAR + MU- NUMUBAR > X} + - {name: RE(Q=Z/GAMMA*), value: P P --> ( Z0 < E+ E- + MU+ MU- > + GAMMA* < E+ E- + + MU+ MU- > ) X} + values: + - {value: P P --> W- < E- NUEBAR + MU- NUMUBAR> X} + - {value: P P --> W- < E- NUEBAR + MU- NUMUBAR> X} + - {value: P P --> W+ < E+ NUE + MU+ NUMU> X} + - {value: P P --> W+ < E+ NUE + MU+ NUMU> X} + - {value: P P --> ( Z0 < E+ E- + MU+ MU- > + GAMMA* < E+ E- + MU+ MU- > ) X} + - {value: P P --> ( Z0 < E+ E- + MU+ MU- > + GAMMA* < E+ E- + MU+ MU- > ) X} + - {value: 0.93} + - {value: 0.18} + - {value: 0.19} +independent_variables: +- header: {name: SQRT(S), units: GEV} + values: + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} +- header: {name: SQRT(S), units: GEV} + values: + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} + - {value: 13000.0} diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/uncertainties.yaml b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/uncertainties.yaml new file mode 100644 index 0000000000..9229df46e7 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/ATLAS_WPWM_13TEV/uncertainties.yaml @@ -0,0 +1,20 @@ +definitions: + stat: + description: stat + treatment: MULT + type: CORR + sys1: + description: sys1 + treatment: MULT + type: CORR + ATLAS_LUMI: + description: ATLAS_LUMI + treatment: MULT + type: CORR +bins: +- stat: -7.52597747e+04 + sys1: -2.35766763e+05 + ATLAS_LUMI: 350000.0 +- stat: 4.35006902e+04 + sys1: -4.07895906e+05 + ATLAS_LUMI: 3.17100000e+05