NNPDF · scarlehoff · Dec 6, 2024 · Oct 15, 2024 · Oct 16, 2024 · Oct 17, 2024
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/data.yaml b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/data.yaml
@@ -0,0 +1,14 @@
+data_central:
+- 224.0
+- 102.0
+- 51.2
+- 28.400000000000002
+- 18.700000000000003
+- 10.7
+- 8.229999999999999
+- 4.66
+- 1.7
+- 0.474
+- 0.146
+- 0.022099999999999998
+- 0.0028799999999999997
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/filter.py b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/filter.py
@@ -0,0 +1,91 @@
+"""
+filter.py module for ATLAS_Z0_7TEV_49FB dataset
+When running `python filter.py` the relevant data yaml
+file will be created in the `nnpdf_data/commondata/ATLAS_Z0_7TEV_LOMASS` directory.
+"""
+
+import yaml
+from filter_utils import get_data_values, get_kinematics, get_systematics
+
+
+def filter_ATLAS_Z0_7TEV_49FB_data_kinetic():
+    """
+    This function writes the central values to yaml files.
+    """
+    central_values = list(get_data_values())
+
+    kin = get_kinematics()
+
+    data_central_yaml = {"data_central": central_values}
+
+    kinematics_yaml = {"bins": kin}
+
+    # write central values and kinematics to yaml file
+    with open("data.yaml", "w") as file:
+        yaml.dump(data_central_yaml, file, sort_keys=False)
+
+    with open("kinematics.yaml", "w") as file:
+        yaml.dump(kinematics_yaml, file, sort_keys=False)
+
+
+def filter_ATLAS_Z0_7TEV_49FB_systematics():
+    """
+    This function writes the systematics to a yaml file.
+    """
+
+    with open("metadata.yaml", "r") as file:
+        metadata = yaml.safe_load(file)
+
+    systematics = get_systematics()
+
+    # error definition
+    error_definitions = {}
+    errors = []
+
+    for sys in systematics:
+        if sys[0]['name'] == 'Stat':
+            error_definitions[sys[0]['name']] = {
+                "description": f"{sys[0]['name']}",
+                "treatment": "ADD",
+                "type": "UNCORR",
+            }
+
+        elif (sys[0]['name'] == 'Nbkg_stat') or (sys[0]['name'] == 'CDY_stat'):
+            error_definitions[sys[0]['name']] = {
+                "description": f"{sys[0]['name']}",
+                "treatment": "MULT",
+                "type": "UNCORR",
+            }
+
+        elif sys[0]['name'] == 'Lumi':
+            error_definitions[sys[0]['name']] = {
+                "description": f"{sys[0]['name']}",
+                "treatment": "MULT",
+                "type": "ATLASLUMI11",
+            }
+
+        else:
+            error_definitions[sys[0]['name']] = {
+                "description": f"{sys[0]['name']}",
+                "treatment": "MULT",
+                "type": "CORR",
+            }
+
+    for i in range(metadata['implemented_observables'][0]['ndata']):
+        error_value = {}
+
+        for sys in systematics:
+            error_value[sys[0]['name']] = float(sys[0]['values'][i])
+
+        errors.append(error_value)
+
+    uncertainties_yaml = {"definitions": error_definitions, "bins": errors}
+
+    # write uncertainties
+    with open(f"uncertainties.yaml", 'w') as file:
+        yaml.dump(uncertainties_yaml, file, sort_keys=False)
+
+
+if __name__ == "__main__":
+    filter_ATLAS_Z0_7TEV_49FB_data_kinetic()
+    filter_ATLAS_Z0_7TEV_49FB_systematics()
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/filter_utils.py
@@ -0,0 +1,91 @@
+"""
+This module contains helper functions that are used to extract the data values 
+from the rawdata files.
+"""
+
+import yaml
+import pandas as pd
+import numpy as np
+
+
+def get_data_values():
+    """
+    returns the central data values in the form of a list.
+    """
+
+    data_central = []
+
+    hepdata_table = f"rawdata/HEPData-ins1234228-v1-Table_1.yaml"
+
+    with open(hepdata_table, 'r') as file:
+        input = yaml.safe_load(file)
+
+    values = input['dependent_variables'][0]['values']
+
+    for value in values:
+        # store data central and convert the units
+        data_central.append(value['value'] * 1000)
+
+    return data_central
+
+
+def get_kinematics():
+    """
+    returns the kinematics in the form of a list of dictionaries.
+    """
+    kin = []
+
+    hepdata_table = f"rawdata/HEPData-ins1234228-v1-Table_1.yaml"
+
+    with open(hepdata_table, 'r') as file:
+        input = yaml.safe_load(file)
+
+    for i, M in enumerate(input["independent_variables"][0]['values']):
+
+        kin_value = {
+            'y': {'min': None, 'mid': (0.5 * (M['low'] + M['high'])), 'max': None},  # y~M(EE)?
+            'M2': {
+                'min': M['low'] ** 2,
+                'mid': (0.5 * (M['low'] + M['high'])) ** 2,
+                'max': M['high'] ** 2,
+            },
+            'sqrts': {'min': None, 'mid': 7000.0, 'max': None},
+        }
+
+        kin.append(kin_value)
+
+    return kin
+
+
+def get_systematics_dataframe():
+    """
+    returns the absolute systematic uncertainties in the form of a pandas dataframe.
+    """
+    sys_rawdata_path = "rawdata/ATLAS-49fb-Zhighmass.csv"
+
+    df = pd.read_csv(sys_rawdata_path)
+    data_central = np.array(get_data_values())
+
+    # convert (MULT) percentage unc to absolute unc
+    abs_unc_df = (df.T[2:] * data_central).T / 100
+
+    return abs_unc_df
+
+
+def get_systematics():
+    """ """
+    abs_unc_df = get_systematics_dataframe()
+
+    uncertainties = []
+
+    for i, unc_dp in enumerate(abs_unc_df.values.T):
+        name = f"{abs_unc_df.columns[i]}"
+        values = [unc_dp[j] for j in range(len(unc_dp))]
+        uncertainties.append([{"name": name, "values": values}])
+
+    return uncertainties
+
+
+if __name__ == "__main__":
+    get_data_values()
+    get_systematics_dataframe()
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/metadata.yaml
@@ -8,9 +8,9 @@ arXiv:
   url: https://arxiv.org/abs/1305.4192
   journal: Phys.Lett. B725 (2013) 223
 iNSPIRE:
-  url: ''
+  url: 'https://inspirehep.net/literature/1234228'
 hepdata:
-  url: 10.17182/hepdata.61422.v1/t1
+  url: https://www.hepdata.net/record/ins1234228
   version: -1
 implemented_observables:
 - observable_name: HIMASS
@@ -19,7 +19,7 @@ implemented_observables:
     label: ATLAS HM DY 7 TeV
     units: ''
   process_type: EWK_MLL
-  tables: []
+  tables: [1]
   npoints: []
   ndata: 13
   plotting:

diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/rawdata/ATLAS-49fb-Zhighmass.csv b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/rawdata/ATLAS-49fb-Zhighmass.csv
@@ -0,0 +1,14 @@
+mee_xmin,mee_xmax,Stat,Nbkg_stat,CDY_stat,Nbkg,Reco,Id,Energy_scale_res,Unfolding,Trigger,MC_Modelling,Theoretical,Lumi
+116,130,1.1,0.1,0.7,1.3,1.6,2.3,2.1,1.5,0.8,0.2,0.3,1.8
+130,150,1.4,0.2,0.7,1.8,1.6,2.3,1.7,1.5,0.8,0.5,0.2,1.8
+150,170,2.0,0.3,1.0,2.5,1.6,2.3,1.6,1.5,0.8,0.2,0.2,1.8
+170,190,2.7,0.4,1.3,2.8,1.6,2.3,1.0,1.5,0.8,0.2,0.2,1.8
+190,210,3.0,0.5,1.7,3.4,1.6,2.4,1.5,1.5,0.8,0.3,0.4,1.8
+210,230,4.4,0.9,2.0,4.1,1.6,2.4,2.0,1.5,0.8,0.8,0.5,1.8
+230,250,5.2,0.9,2.4,3.8,1.6,2.4,1.2,1.5,0.8,0.2,0.3,1.8
+250,300,4.3,0.7,0.9,4.1,1.6,2.4,1.7,1.5,0.8,0.2,0.2,1.8
+300,400,5.1,0.9,1.0,4.4,1.6,2.5,1.7,1.5,0.8,0.3,0.3,1.8
+400,500,9.4,2.0,0.9,4.0,1.6,2.6,2.3,1.5,0.8,0.5,0.4,1.8
+500,700,11,2.0,0.8,3.1,1.6,2.6,2.4,1.5,0.8,0.2,0.3,1.8
+700,1000,24,4.0,0.6,4.3,1.6,2.6,2.8,1.5,0.8,0.2,0.4,1.8
+1000,1500,50,7.6,0.4,3.1,1.7,2.5,3.3,1.5,0.8,0.3,0.4,1.8
diff --git a/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/rawdata/ATLAS-49fb-Zhighmass.sys b/nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/rawdata/ATLAS-49fb-Zhighmass.sys
@@ -0,0 +1,19 @@
+Born level
+============================================================================================================
+mee	mee	Stat.	Nbkg	CDY	Nbkg	Reco.	Id.   Energy Unfolding Trigger	MC  Theoretical	Lumi
+xmin	xmax            stat.  stat.                         scale&res               modelling
+    GeV         <-uncorrelated (%)->   <---------------------bin-to-bin correlated (%)--------------------->
+============================================================================================================
+116	130	1.1	0.1	0.7	1.3	1.6	2.3	2.1	1.5	0.8	0.2	0.3	1.8
+130	150	1.4	0.2	0.7	1.8	1.6	2.3	1.7	1.5	0.8	0.5	0.2	1.8
+150	170	2.0	0.3	1.0	2.5	1.6	2.3	1.6	1.5	0.8	0.2	0.2	1.8
+170	190	2.7	0.4	1.3	2.8	1.6	2.3	1.0	1.5	0.8	0.2	0.2	1.8
+190	210	3.0	0.5	1.7	3.4	1.6	2.4	1.5	1.5	0.8	0.3	0.4	1.8
+210	230	4.4	0.9	2.0	4.1	1.6	2.4	2.0	1.5	0.8	0.8	0.5	1.8
+230	250	5.2	0.9	2.4	3.8	1.6	2.4	1.2	1.5	0.8	0.2	0.3	1.8
+250	300	4.3	0.7	0.9	4.1	1.6	2.4	1.7	1.5	0.8	0.2	0.2	1.8
+300	400	5.1	0.9	1.0	4.4	1.6	2.5	1.7	1.5	0.8	0.3	0.3	1.8
+400	500	9.4	2.0	0.9	4.0	1.6	2.6	2.3	1.5	0.8	0.5	0.4	1.8
+500	700	11	2.0	0.8	3.1	1.6	2.6	2.4	1.5	0.8	0.2	0.3	1.8
+700	1000	24	4.0	0.6	4.3	1.6	2.6	2.8	1.5	0.8	0.2	0.4	1.8
+1000	1500	50	7.6	0.4	3.1	1.7	2.5	3.3	1.5	0.8	0.3	0.4	1.8
diff --git a/..._data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/rawdata/HEPData-ins1234228-v1-Table_1.yaml b/..._data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/rawdata/HEPData-ins1234228-v1-Table_1.yaml
@@ -0,0 +1,137 @@
+dependent_variables:
+- header: {name: D(SIG)/DM(EE), units: PB/GEV}
+  qualifiers:
+  - {name: ABS(ETARAP(EE)), value: < 2.5}
+  - {name: PT(C=E), units: GEV, value: '> 25'}
+  - {name: RE, value: P P --> E+ E- X}
+  - {name: SQRT(S), units: GeV, value: '7000.0'}
+  - {name: '', value: BORN}
+  values:
+  - errors:
+    - {label: stat, symerror: 1.1%}
+    - {label: sys, symerror: 4.2%}
+    value: 0.224
+  - errors:
+    - {label: stat, symerror: 1.4%}
+    - {label: sys, symerror: 4.3%}
+    value: 0.102
+  - errors:
+    - {label: stat, symerror: 2.0%}
+    - {label: sys, symerror: 4.6%}
+    value: 0.0512
+  - errors:
+    - {label: stat, symerror: 2.7%}
+    - {label: sys, symerror: 4.7%}
+    value: 0.0284
+  - errors:
+    - {label: stat, symerror: 3.0%}
+    - {label: sys, symerror: 5.3%}
+    value: 0.0187
+  - errors:
+    - {label: stat, symerror: 4.4%}
+    - {label: sys, symerror: 6.1%}
+    value: 0.0107
+  - errors:
+    - {label: stat, symerror: 5.2%}
+    - {label: sys, symerror: 5.9%}
+    value: 0.00823
+  - errors:
+    - {label: stat, symerror: 4.3%}
+    - {label: sys, symerror: 5.8%}
+    value: 0.00466
+  - errors:
+    - {label: stat, symerror: 5.1%}
+    - {label: sys, symerror: 5.9%}
+    value: 0.0017
+  - errors:
+    - {label: stat, symerror: 9.4%}
+    - {label: sys, symerror: 6.3%}
+    value: 0.000474
+  - errors:
+    - {label: stat, symerror: 11.0%}
+    - {label: sys, symerror: 5.7%}
+    value: 0.000146
+  - errors:
+    - {label: stat, symerror: 24.0%}
+    - {label: sys, symerror: 7.5%}
+    value: 2.21e-05
+  - errors:
+    - {label: stat, symerror: 50.0%}
+    - {label: sys, symerror: 9.8%}
+    value: 2.88e-06
+- header: {name: D(SIG)/DM(EE), units: PB/GEV}
+  qualifiers:
+  - {name: ABS(ETARAP(EE)), value: < 2.5}
+  - {name: PT(C=E), units: GEV, value: '> 25'}
+  - {name: RE, value: P P --> E+ E- X}
+  - {name: SQRT(S), units: GeV, value: '7000.0'}
+  - {name: '', value: DRESSED}
+  values:
+  - errors:
+    - {label: stat, symerror: 1.1%}
+    - {label: sys, symerror: 4.2%}
+    value: 0.215
+  - errors:
+    - {label: stat, symerror: 1.4%}
+    - {label: sys, symerror: 4.3%}
+    value: 0.0984
+  - errors:
+    - {label: stat, symerror: 2.0%}
+    - {label: sys, symerror: 4.6%}
+    value: 0.0493
+  - errors:
+    - {label: stat, symerror: 2.7%}
+    - {label: sys, symerror: 4.7%}
+    value: 0.0276
+  - errors:
+    - {label: stat, symerror: 3.0%}
+    - {label: sys, symerror: 5.3%}
+    value: 0.0182
+  - errors:
+    - {label: stat, symerror: 4.4%}
+    - {label: sys, symerror: 6.1%}
+    value: 0.0104
+  - errors:
+    - {label: stat, symerror: 5.2%}
+    - {label: sys, symerror: 5.9%}
+    value: 0.00798
+  - errors:
+    - {label: stat, symerror: 4.3%}
+    - {label: sys, symerror: 5.8%}
+    value: 0.00452
+  - errors:
+    - {label: stat, symerror: 5.1%}
+    - {label: sys, symerror: 5.9%}
+    value: 0.00165
+  - errors:
+    - {label: stat, symerror: 9.4%}
+    - {label: sys, symerror: 6.3%}
+    value: 0.000458
+  - errors:
+    - {label: stat, symerror: 11.0%}
+    - {label: sys, symerror: 5.7%}
+    value: 0.000141
+  - errors:
+    - {label: stat, symerror: 24.0%}
+    - {label: sys, symerror: 7.5%}
+    value: 2.13e-05
+  - errors:
+    - {label: stat, symerror: 50.0%}
+    - {label: sys, symerror: 9.8%}
+    value: 2.76e-06
+independent_variables:
+- header: {name: M(EE), units: GEV}
+  values:
+  - {high: 130.0, low: 116.0}
+  - {high: 150.0, low: 130.0}
+  - {high: 170.0, low: 150.0}
+  - {high: 190.0, low: 170.0}
+  - {high: 210.0, low: 190.0}
+  - {high: 230.0, low: 210.0}
+  - {high: 250.0, low: 230.0}
+  - {high: 300.0, low: 250.0}
+  - {high: 400.0, low: 300.0}
+  - {high: 500.0, low: 400.0}
+  - {high: 700.0, low: 500.0}
+  - {high: 1000.0, low: 700.0}
+  - {high: 1500.0, low: 1000.0}