Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reimplement ATLAS_Z0_7TEV_49FB_HIMASS #2178

Merged
merged 20 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
data_central:
- 224.0
- 102.0
- 51.2
- 28.400000000000002
- 18.700000000000003
- 10.7
- 8.229999999999999
- 4.66
- 1.7
- 0.474
- 0.146
- 0.022099999999999998
- 0.0028799999999999997
91 changes: 91 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_Z0_7TEV_49FB/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
filter.py module for ATLAS_Z0_7TEV_49FB dataset
When running `python filter.py` the relevant data yaml
file will be created in the `nnpdf_data/commondata/ATLAS_Z0_7TEV_LOMASS` directory.
"""

import yaml
from filter_utils import get_data_values, get_kinematics, get_systematics


def filter_ATLAS_Z0_7TEV_49FB_data_kinetic():
"""
This function writes the central values to yaml files.
"""
central_values = list(get_data_values())

kin = get_kinematics()

data_central_yaml = {"data_central": central_values}

kinematics_yaml = {"bins": kin}

# write central values and kinematics to yaml file
with open("data.yaml", "w") as file:
yaml.dump(data_central_yaml, file, sort_keys=False)

with open("kinematics.yaml", "w") as file:
yaml.dump(kinematics_yaml, file, sort_keys=False)


def filter_ATLAS_Z0_7TEV_49FB_systematics():
"""
This function writes the systematics to a yaml file.
"""

with open("metadata.yaml", "r") as file:
metadata = yaml.safe_load(file)

systematics = get_systematics()

# error definition
error_definitions = {}
errors = []

for sys in systematics:
if sys[0]['name'] == 'Stat':
error_definitions[sys[0]['name']] = {
"description": f"{sys[0]['name']}",
"treatment": "ADD",
"type": "UNCORR",
}

elif (sys[0]['name'] == 'Nbkg_stat') or (sys[0]['name'] == 'CDY_stat'):
error_definitions[sys[0]['name']] = {
"description": f"{sys[0]['name']}",
"treatment": "MULT",
"type": "UNCORR",
}

elif sys[0]['name'] == 'Lumi':
error_definitions[sys[0]['name']] = {
"description": f"{sys[0]['name']}",
"treatment": "MULT",
"type": "ATLASLUMI11",
}

else:
error_definitions[sys[0]['name']] = {
"description": f"{sys[0]['name']}",
"treatment": "MULT",
"type": "CORR",
}

for i in range(metadata['implemented_observables'][0]['ndata']):
error_value = {}

for sys in systematics:
error_value[sys[0]['name']] = float(sys[0]['values'][i])

errors.append(error_value)

uncertainties_yaml = {"definitions": error_definitions, "bins": errors}

# write uncertainties
with open(f"uncertainties.yaml", 'w') as file:
yaml.dump(uncertainties_yaml, file, sort_keys=False)


if __name__ == "__main__":
filter_ATLAS_Z0_7TEV_49FB_data_kinetic()
filter_ATLAS_Z0_7TEV_49FB_systematics()
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
This module contains helper functions that are used to extract the data values
from the rawdata files.
"""

import yaml
import pandas as pd
import numpy as np


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe some of the utilities in this file can either use utilities in https://github.com/NNPDF/nnpdf/tree/master/nnpdf_data/nnpdf_data/filter_utils (or, if not, be added there)

def get_data_values():
"""
returns the central data values in the form of a list.
"""

data_central = []

hepdata_table = f"rawdata/HEPData-ins1234228-v1-Table_1.yaml"

with open(hepdata_table, 'r') as file:
input = yaml.safe_load(file)

values = input['dependent_variables'][0]['values']

for value in values:
# store data central and convert the units
data_central.append(value['value'] * 1000)

return data_central


def get_kinematics():
"""
returns the kinematics in the form of a list of dictionaries.
"""
kin = []

hepdata_table = f"rawdata/HEPData-ins1234228-v1-Table_1.yaml"

with open(hepdata_table, 'r') as file:
input = yaml.safe_load(file)

for i, M in enumerate(input["independent_variables"][0]['values']):

kin_value = {
'y': {'min': None, 'mid': (0.5 * (M['low'] + M['high'])), 'max': None}, # y~M(EE)?
'M2': {
'min': M['low'] ** 2,
'mid': (0.5 * (M['low'] + M['high'])) ** 2,
'max': M['high'] ** 2,
},
'sqrts': {'min': None, 'mid': 7000.0, 'max': None},
}

kin.append(kin_value)

return kin


def get_systematics_dataframe():
"""
returns the absolute systematic uncertainties in the form of a pandas dataframe.
"""
sys_rawdata_path = "rawdata/ATLAS-49fb-Zhighmass.csv"

df = pd.read_csv(sys_rawdata_path)
data_central = np.array(get_data_values())

# convert (MULT) percentage unc to absolute unc
abs_unc_df = (df.T[2:] * data_central).T / 100

return abs_unc_df


def get_systematics():
""" """
abs_unc_df = get_systematics_dataframe()

uncertainties = []

for i, unc_dp in enumerate(abs_unc_df.values.T):
name = f"{abs_unc_df.columns[i]}"
values = [unc_dp[j] for j in range(len(unc_dp))]
uncertainties.append([{"name": name, "values": values}])

return uncertainties


if __name__ == "__main__":
get_data_values()
get_systematics_dataframe()
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ arXiv:
url: https://arxiv.org/abs/1305.4192
journal: Phys.Lett. B725 (2013) 223
iNSPIRE:
url: ''
url: 'https://inspirehep.net/literature/1234228'
hepdata:
url: 10.17182/hepdata.61422.v1/t1
url: https://www.hepdata.net/record/ins1234228
version: -1
implemented_observables:
- observable_name: HIMASS
Expand All @@ -19,7 +19,7 @@ implemented_observables:
label: ATLAS HM DY 7 TeV
units: ''
process_type: EWK_MLL
tables: []
tables: [1]
npoints: []
ndata: 13
plotting:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
mee_xmin,mee_xmax,Stat,Nbkg_stat,CDY_stat,Nbkg,Reco,Id,Energy_scale_res,Unfolding,Trigger,MC_Modelling,Theoretical,Lumi
116,130,1.1,0.1,0.7,1.3,1.6,2.3,2.1,1.5,0.8,0.2,0.3,1.8
130,150,1.4,0.2,0.7,1.8,1.6,2.3,1.7,1.5,0.8,0.5,0.2,1.8
150,170,2.0,0.3,1.0,2.5,1.6,2.3,1.6,1.5,0.8,0.2,0.2,1.8
170,190,2.7,0.4,1.3,2.8,1.6,2.3,1.0,1.5,0.8,0.2,0.2,1.8
190,210,3.0,0.5,1.7,3.4,1.6,2.4,1.5,1.5,0.8,0.3,0.4,1.8
210,230,4.4,0.9,2.0,4.1,1.6,2.4,2.0,1.5,0.8,0.8,0.5,1.8
230,250,5.2,0.9,2.4,3.8,1.6,2.4,1.2,1.5,0.8,0.2,0.3,1.8
250,300,4.3,0.7,0.9,4.1,1.6,2.4,1.7,1.5,0.8,0.2,0.2,1.8
300,400,5.1,0.9,1.0,4.4,1.6,2.5,1.7,1.5,0.8,0.3,0.3,1.8
400,500,9.4,2.0,0.9,4.0,1.6,2.6,2.3,1.5,0.8,0.5,0.4,1.8
500,700,11,2.0,0.8,3.1,1.6,2.6,2.4,1.5,0.8,0.2,0.3,1.8
700,1000,24,4.0,0.6,4.3,1.6,2.6,2.8,1.5,0.8,0.2,0.4,1.8
1000,1500,50,7.6,0.4,3.1,1.7,2.5,3.3,1.5,0.8,0.3,0.4,1.8
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Born level
============================================================================================================
mee mee Stat. Nbkg CDY Nbkg Reco. Id. Energy Unfolding Trigger MC Theoretical Lumi
xmin xmax stat. stat. scale&res modelling
GeV <-uncorrelated (%)-> <---------------------bin-to-bin correlated (%)--------------------->
============================================================================================================
116 130 1.1 0.1 0.7 1.3 1.6 2.3 2.1 1.5 0.8 0.2 0.3 1.8
130 150 1.4 0.2 0.7 1.8 1.6 2.3 1.7 1.5 0.8 0.5 0.2 1.8
150 170 2.0 0.3 1.0 2.5 1.6 2.3 1.6 1.5 0.8 0.2 0.2 1.8
170 190 2.7 0.4 1.3 2.8 1.6 2.3 1.0 1.5 0.8 0.2 0.2 1.8
190 210 3.0 0.5 1.7 3.4 1.6 2.4 1.5 1.5 0.8 0.3 0.4 1.8
210 230 4.4 0.9 2.0 4.1 1.6 2.4 2.0 1.5 0.8 0.8 0.5 1.8
230 250 5.2 0.9 2.4 3.8 1.6 2.4 1.2 1.5 0.8 0.2 0.3 1.8
250 300 4.3 0.7 0.9 4.1 1.6 2.4 1.7 1.5 0.8 0.2 0.2 1.8
300 400 5.1 0.9 1.0 4.4 1.6 2.5 1.7 1.5 0.8 0.3 0.3 1.8
400 500 9.4 2.0 0.9 4.0 1.6 2.6 2.3 1.5 0.8 0.5 0.4 1.8
500 700 11 2.0 0.8 3.1 1.6 2.6 2.4 1.5 0.8 0.2 0.3 1.8
700 1000 24 4.0 0.6 4.3 1.6 2.6 2.8 1.5 0.8 0.2 0.4 1.8
1000 1500 50 7.6 0.4 3.1 1.7 2.5 3.3 1.5 0.8 0.3 0.4 1.8
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
dependent_variables:
- header: {name: D(SIG)/DM(EE), units: PB/GEV}
qualifiers:
- {name: ABS(ETARAP(EE)), value: < 2.5}
- {name: PT(C=E), units: GEV, value: '> 25'}
- {name: RE, value: P P --> E+ E- X}
- {name: SQRT(S), units: GeV, value: '7000.0'}
- {name: '', value: BORN}
values:
- errors:
- {label: stat, symerror: 1.1%}
- {label: sys, symerror: 4.2%}
value: 0.224
- errors:
- {label: stat, symerror: 1.4%}
- {label: sys, symerror: 4.3%}
value: 0.102
- errors:
- {label: stat, symerror: 2.0%}
- {label: sys, symerror: 4.6%}
value: 0.0512
- errors:
- {label: stat, symerror: 2.7%}
- {label: sys, symerror: 4.7%}
value: 0.0284
- errors:
- {label: stat, symerror: 3.0%}
- {label: sys, symerror: 5.3%}
value: 0.0187
- errors:
- {label: stat, symerror: 4.4%}
- {label: sys, symerror: 6.1%}
value: 0.0107
- errors:
- {label: stat, symerror: 5.2%}
- {label: sys, symerror: 5.9%}
value: 0.00823
- errors:
- {label: stat, symerror: 4.3%}
- {label: sys, symerror: 5.8%}
value: 0.00466
- errors:
- {label: stat, symerror: 5.1%}
- {label: sys, symerror: 5.9%}
value: 0.0017
- errors:
- {label: stat, symerror: 9.4%}
- {label: sys, symerror: 6.3%}
value: 0.000474
- errors:
- {label: stat, symerror: 11.0%}
- {label: sys, symerror: 5.7%}
value: 0.000146
- errors:
- {label: stat, symerror: 24.0%}
- {label: sys, symerror: 7.5%}
value: 2.21e-05
- errors:
- {label: stat, symerror: 50.0%}
- {label: sys, symerror: 9.8%}
value: 2.88e-06
- header: {name: D(SIG)/DM(EE), units: PB/GEV}
qualifiers:
- {name: ABS(ETARAP(EE)), value: < 2.5}
- {name: PT(C=E), units: GEV, value: '> 25'}
- {name: RE, value: P P --> E+ E- X}
- {name: SQRT(S), units: GeV, value: '7000.0'}
- {name: '', value: DRESSED}
values:
- errors:
- {label: stat, symerror: 1.1%}
- {label: sys, symerror: 4.2%}
value: 0.215
- errors:
- {label: stat, symerror: 1.4%}
- {label: sys, symerror: 4.3%}
value: 0.0984
- errors:
- {label: stat, symerror: 2.0%}
- {label: sys, symerror: 4.6%}
value: 0.0493
- errors:
- {label: stat, symerror: 2.7%}
- {label: sys, symerror: 4.7%}
value: 0.0276
- errors:
- {label: stat, symerror: 3.0%}
- {label: sys, symerror: 5.3%}
value: 0.0182
- errors:
- {label: stat, symerror: 4.4%}
- {label: sys, symerror: 6.1%}
value: 0.0104
- errors:
- {label: stat, symerror: 5.2%}
- {label: sys, symerror: 5.9%}
value: 0.00798
- errors:
- {label: stat, symerror: 4.3%}
- {label: sys, symerror: 5.8%}
value: 0.00452
- errors:
- {label: stat, symerror: 5.1%}
- {label: sys, symerror: 5.9%}
value: 0.00165
- errors:
- {label: stat, symerror: 9.4%}
- {label: sys, symerror: 6.3%}
value: 0.000458
- errors:
- {label: stat, symerror: 11.0%}
- {label: sys, symerror: 5.7%}
value: 0.000141
- errors:
- {label: stat, symerror: 24.0%}
- {label: sys, symerror: 7.5%}
value: 2.13e-05
- errors:
- {label: stat, symerror: 50.0%}
- {label: sys, symerror: 9.8%}
value: 2.76e-06
independent_variables:
- header: {name: M(EE), units: GEV}
values:
- {high: 130.0, low: 116.0}
- {high: 150.0, low: 130.0}
- {high: 170.0, low: 150.0}
- {high: 190.0, low: 170.0}
- {high: 210.0, low: 190.0}
- {high: 230.0, low: 210.0}
- {high: 250.0, low: 230.0}
- {high: 300.0, low: 250.0}
- {high: 400.0, low: 300.0}
- {high: 500.0, low: 400.0}
- {high: 700.0, low: 500.0}
- {high: 1000.0, low: 700.0}
- {high: 1500.0, low: 1000.0}
Loading
Loading