-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess_data.py
105 lines (87 loc) · 3.24 KB
/
preprocess_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import ROOT
import uproot
from utils import *
######################################## CONFIGS ########################################
input_rootfile_path = "./entangled_ttbar_atlas.root"
output_rootfile_path = "./ttbar_qunfold.root"
variables = [
Variable(name="c_thetap", nbins=20, bounds=(-1, 1)),
Variable(name="ttbar_mass", nbins=20, bounds=(300, 2000)),
]
min_energy = 0
#########################################################################################
# Remove statistics info box from ROOT histograms
ROOT.gStyle.SetOptStat(0)
# Open input ROOT file and get tree
tree = uproot.open(input_rootfile_path)["particle_level"]
# Open output ROOT file and make directories
output_rootfile = ROOT.TFile(output_rootfile_path, "RECREATE")
particle_dir = output_rootfile.mkdir("particle")
parton_dir = output_rootfile.mkdir("parton")
migration_dir = output_rootfile.mkdir("migration")
# Get boolean mask to identify particle and/or parton events
particle = get_numpy_array(tree, varname="passed_particle_sel", dtype=bool)
parton = get_numpy_array(tree, varname="passed_parton_sel", dtype=bool)
# Get events weight (negative for background process events)
event_weight = get_numpy_array(tree=tree, varname="eventweight")
for var in variables:
# Get numpy arrays for particle and parton events
arr_particle = get_numpy_array(tree, varname=var.name)
arr_parton = get_numpy_array(tree, varname=var.name_parton)
arr_mass = get_numpy_array(tree, varname="ttbar_mass_parton")
# Set proper binning according to data distribution
binning = find_binning(x=arr_parton, nbins=var.nbins, bounds=var.bounds)
# Create and fill histograms for particle(=measured) and parton(=truth) events
th1_particle = create_histo(
name=var.name,
title=var.name,
binning=binning,
data=arr_particle,
weights=event_weight,
mask=particle & (arr_mass > min_energy),
)
th1_parton = create_histo(
name=var.name_parton,
title=var.name_parton,
binning=binning,
data=arr_parton,
weights=event_weight,
mask=parton & (arr_mass > min_energy),
)
# Create and fill missed and fake events histograms
th1_miss = create_histo(
name=var.name + "_miss",
title=var.name + "_miss",
binning=binning,
data=arr_parton,
weights=event_weight,
mask=~particle & parton & (arr_mass > min_energy),
)
th1_fake = create_histo(
name=var.name + "_fake",
title=var.name + "_fake",
binning=binning,
data=arr_particle,
weights=event_weight,
mask=particle & ~parton & (arr_mass > min_energy),
)
# Create and fill migration matrix histogram (measured X truth)
migration = create_migration(
name=var.name + "_migration",
title=var.name + "_migration",
binning=binning,
data_reco=arr_particle,
data_truth=arr_parton,
weights=event_weight,
mask=particle & parton & (arr_mass > min_energy),
)
# Write histograms to output ROOT file
particle_dir.cd()
th1_particle.Write()
th1_fake.Write()
parton_dir.cd()
th1_parton.Write()
th1_miss.Write()
migration_dir.cd()
migration.Write()
output_rootfile.Close()