Skip to content

Commit

Permalink
Merge pull request #49 from LPC-HH/updates-jan
Browse files Browse the repository at this point in the history
Add selections to skimmer
  • Loading branch information
cmantill authored Jan 29, 2024
2 parents 076195a + 0b935b2 commit 8c539b5
Show file tree
Hide file tree
Showing 9 changed files with 14,690 additions and 68,623 deletions.
537 changes: 115 additions & 422 deletions data/make_filelists.py

Large diffs are not rendered by default.

1,462 changes: 0 additions & 1,462 deletions data/nanoindex_v10.json

This file was deleted.

15,668 changes: 0 additions & 15,668 deletions data/nanoindex_v11.json

This file was deleted.

23,337 changes: 0 additions & 23,337 deletions data/nanoindex_v11_private.json

This file was deleted.

21,923 changes: 12,831 additions & 9,092 deletions data/nanoindex_v12.json

Large diffs are not rendered by default.

3,334 changes: 1,667 additions & 1,667 deletions data/nanoindex_v9.json

Large diffs are not rendered by default.

16,883 changes: 0 additions & 16,883 deletions data/nanoindex_v9_privatepfnano.json

This file was deleted.

150 changes: 63 additions & 87 deletions src/HH4b/processors/bbbbSkimmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
add_pileup_weight,
add_trig_weights,
get_jec_jets,
# get_jmsr,
get_jetveto_event,
)

# get_jmsr,
from .GenSelection import gen_selection_Hbb, gen_selection_HHbbbb
from .utils import P4, PAD_VAL, add_selection, dump_table, pad_val, to_pandas

Expand Down Expand Up @@ -71,8 +72,8 @@ class bbbbSkimmer(processor.ProcessorABC):

preselection = { # noqa: RUF012
"fatjet_pt": 300,
"fatjet_msd": 40,
"fatjet_mreg": 40,
"fatjet_msd": 60,
"fatjet_mreg": 60,
"Txbb0": 0.8,
}

Expand Down Expand Up @@ -125,6 +126,17 @@ def __init__(

self._nano_version = nano_version

# https://twiki.cern.ch/twiki/bin/viewauth/CMS/MissingETOptionalFiltersRun2#Run_3_recommendations
self.met_filters = [
"goodVertices",
"globalSuperTightHalo2016Filter",
"EcalDeadCellTriggerPrimitiveFilter",
"BadPFMuonFilter",
"BadPFMuonDzFilter",
"eeBadScFilter",
"ecalBadCalibFilter",
]

"""
signal:
"""
Expand Down Expand Up @@ -167,24 +179,22 @@ def process(self, events: ak.Array):

cutflow = OrderedDict()
cutflow["all"] = n_events

# preselection = (
# (ak.count(events.FatJet.pt, axis=1) >= 2)
# * (ak.all(events.FatJet.pt[:, :2] >= 200, axis=1))
# )
# events = events[preselection]
# gen_weights = gen_weights[preselection] if gen_weights is not None else gen_weights
# cutflow["2jetpreselection"] = len(events) if isData else np.sum(gen_weights)

selection = PackedSelection()
weights = Weights(len(events), storeIndividual=True)
selection_args = (selection, cutflow, isData, gen_weights)

#########################
# Object definitions
#########################
if year == "2018":
veto_muon_sel = objects.veto_muons_run2(events.Muon)
veto_electron_sel = objects.veto_electrons_run2(events.Electron)
else:
veto_muon_sel = objects.veto_muons(events.Muon)
veto_electron_sel = objects.veto_electrons(events.Electron)

num_jets = 6
print("Starting Objects", f"{time.time() - start:.2f}")
print("starting object selection", f"{time.time() - start:.2f}")
# TODO: this is tricky, should we apply JEC first and then selection (including vetoes)
jets, jec_shifted_jetvars = get_jec_jets(
events,
Expand All @@ -198,11 +208,12 @@ def process(self, events: ak.Array):
dataset=dataset,
nano_version=self._nano_version,
)
print("ak4 JECs", f"{time.time() - start:.2f}")
jets_sel = objects.good_ak4jets(jets, year, events.run.to_numpy(), isData)
jets = jets[jets_sel]
ht = ak.sum(jets.pt, axis=1)

num_fatjets = 2 # number to save
num_fatjets = 3 # number to save
num_fatjets_cut = 2 # number to consider for selection
fatjets = objects.get_ak8jets(events.FatJet)
print("ak8 jets", f"{time.time() - start:.2f}")
Expand All @@ -224,20 +235,20 @@ def process(self, events: ak.Array):
fatjets = fatjets[fatjets_sel]
fatjet_0 = ak.firsts(fatjets)

# VBF objects
# similar to run 2 selection
vbf_jets = jets[(jets.pt > 25) & (jets.delta_r(fatjet_0) > 1.2)]
vbf_jet_0 = vbf_jets[:, 0:1]
vbf_jet_1 = vbf_jets[:, 1:2]
vbf_mass = (ak.firsts(vbf_jet_0) + ak.firsts(vbf_jet_1)).mass
vbf_deta = abs(ak.firsts(vbf_jet_0).eta - ak.firsts(vbf_jet_1).eta)
vbf_selection = (vbf_mass > 500) & (vbf_deta > 4.0)
# jmsr_shifted_vars = get_jmsr(fatjets, num_fatjets, year, isData)

#########################
# Save / derive variables
#########################

# gen variables - saving HH and bbbb 4-vector info
# Gen variables - saving HH and bbbb 4-vector info
genVars = {}
for d in gen_selection_dict:
if d in dataset:
Expand Down Expand Up @@ -293,13 +304,13 @@ def process(self, events: ak.Array):
# overwrite saved mass vars with corrected ones
label = "" if shift == "" else "_" + shift
ak8FatJetVars[f"ak8FatJet{key}{label}"] = vals
"""

# dijet variables
fatDijetVars = {}
for shift in jec_shifted_fatjetvars["pt"]:
label = "" if shift == "" else "_" + shift
fatDijetVars = {**fatDijetVars, **self.getFatDijetVars(ak8FatJetVars, pt_shift=label)}
# TODO: add shifts to dijet variables
fatDijetVars = self.getFatDijetVars(ak8FatJetVars, pt_shift="")

"""
for shift in jmsr_shifted_vars["msoftdrop"]:
if shift != "":
label = "_" + shift
Expand All @@ -312,23 +323,17 @@ def process(self, events: ak.Array):
eventVars = {
key: events[key].to_numpy() for key in self.skim_vars["Event"] if key in events.fields
}

eventVars["ht"] = ht.to_numpy()
eventVars["nJets"] = ak.sum(jets_sel, axis=1).to_numpy()
eventVars["nFatJets"] = ak.sum(fatjets_sel, axis=1).to_numpy()

print("Event vars", f"{time.time() - start:.2f}")

if isData:
pileupVars = {key: np.ones(len(events)) * PAD_VAL for key in self.skim_vars["Pileup"]}
else:
eventVars["lumi"] = np.ones(len(events)) * PAD_VAL
pileupVars = {key: events.Pileup[key].to_numpy() for key in self.skim_vars["Pileup"]}

pileupVars = {**pileupVars, "nPV": events.PV["npvs"].to_numpy()}

print("pileup vars", f"{time.time() - start:.2f}")

otherVars = {
key: events[var.split("_")[0]]["_".join(var.split("_")[1:])].to_numpy()
for (var, key) in self.skim_vars["Other"].items()
Expand All @@ -344,6 +349,7 @@ def process(self, events: ak.Array):
"AK8PFJet230_SoftDropMass40_PFAK8ParticleNetBB0p35",
"AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35",
"AK8PFJet275_SoftDropMass40_PFAK8ParticleNetBB0p35",
"AK8PFJet230_SoftDropMass40",
]
)

Expand All @@ -363,6 +369,7 @@ def process(self, events: ak.Array):
**genVars,
**ak4JetVars,
**ak8FatJetVars,
**fatDijetVars,
**eventVars,
**pileupVars,
**otherVars,
Expand Down Expand Up @@ -394,62 +401,54 @@ def process(self, events: ak.Array):
if apply_trigger:
add_selection("trigger", HLT_triggered, *selection_args)

# temporary metfilters https://twiki.cern.ch/twiki/bin/viewauth/CMS/MissingETOptionalFiltersRun2#Run_3_recommendations
met_filters = [
"goodVertices",
"globalSuperTightHalo2016Filter",
"EcalDeadCellTriggerPrimitiveFilter",
"BadPFMuonFilter",
"BadPFMuonDzFilter",
"eeBadScFilter",
"ecalBadCalibFilter",
]
metfilters = np.ones(len(events), dtype="bool")
# metfilterkey = "data" if isData else "mc"
for mf in met_filters:
# metfilters
cut_metfilters = np.ones(len(events), dtype="bool")
for mf in self.met_filters:
if mf in events.Flag.fields:
metfilters = metfilters & events.Flag[mf]
# add_selection("met_filters", metfilters, *selection_args)
cut_metfilters = cut_metfilters & events.Flag[mf]
add_selection("met_filters", cut_metfilters, *selection_args)

# jet veto maps
if year == "2022" or year == "2022EE":
jetveto_selection = get_jetveto_event(jets, year, events.run.to_numpy(), isData)
add_selection("ak4_jetveto", jetveto_selection, *selection_args)
cut_jetveto = get_jetveto_event(jets, year, events.run.to_numpy(), isData)
add_selection("ak4_jetveto", cut_jetveto, *selection_args)

# at least two fatjets
add_selection("ak8_numjets", (ak.num(fatjets) >= 2), *selection_args)

# BOTH fatjets with pt above self.preselection["fatjet_pt"]
# TODO: check if fatjet passes pt cut in any of the JEC variations
cut = np.sum(ak8FatJetVars["ak8FatJetPt"] >= self.preselection["fatjet_pt"], axis=1)
add_selection("ak8_pt", cut, *selection_args)
cut_pt = np.sum(ak8FatJetVars["ak8FatJetPt"] >= self.preselection["fatjet_pt"], axis=1) >= 2
add_selection("ak8_pt", cut_pt, *selection_args)

# BOTH fajets with OR of msd or mpnet
# TODO: check if fatjet passes mass cut in any of the JMS/R variations
# cut_mpnet = np.all(
# ak8FatJetVars["ak8FatJetPNetMass"] >= self.preselection["fatjet_mreg"], axis=1
# )
cut_msd = np.all(ak8FatJetVars["ak8FatJetMsd"] >= self.preselection["fatjet_msd"], axis=1)
add_selection("ak8_msd", cut_msd, *selection_args)

# num_leptons = 2
if year == "2018":
veto_muon_sel = objects.veto_muons_run2(events.Muon)
veto_electron_sel = objects.veto_electrons_run2(events.Electron)
else:
veto_muon_sel = objects.veto_muons(events.Muon)
veto_electron_sel = objects.veto_electrons(events.Electron)

print("Lepton vetoes", f"{time.time() - start:.2f}")
cut_mass = (
np.sum(
(ak8FatJetVars["ak8FatJetMsd"] >= self.preselection["fatjet_msd"])
| (ak8FatJetVars["ak8FatJetPNetMass"] >= self.preselection["fatjet_mreg"]),
axis=1,
)
>= 2
)
add_selection("ak8_mass", cut_mass, *selection_args)

# veto leptons
# no leptons
add_selection(
"0lep",
(ak.sum(veto_muon_sel, axis=1) == 0) & (ak.sum(veto_electron_sel, axis=1) == 0),
*selection_args,
)

# Txbb pre-selection cut
# txbb_cut = np.sum(ak8FatJetVars["ak8FatJetPNetXbb"] >= self.preselection["Txbb0"], axis=1)
# add_selection("ak8bb_txbb0", txbb_cut, *selection_args)
cut_txbb = (
np.sum(ak8FatJetVars["ak8FatJetPNetXbb"] >= self.preselection["Txbb0"], axis=1) >= 1
)
add_selection("ak8bb_txbb0", cut_txbb, *selection_args)

# VBF veto cut
add_selection("vbf_veto", ~(vbf_selection), *selection_args)
cut_vbf = (vbf_mass > 500) & (vbf_deta > 4.0)
add_selection("vbf_veto", ~(cut_vbf), *selection_args)

print("Selection", f"{time.time() - start:.2f}")

Expand All @@ -466,29 +465,6 @@ def process(self, events: ak.Array):

add_trig_weights(weights, fatjets, year, num_fatjets_cut)

# add_VJets_kFactors(weights, events.GenPart, dataset)

# if dataset.startswith("TTTo"):
# # TODO: need to add uncertainties and rescale yields (?)
# add_top_pt_weight(weights, events)

# TODO: figure out which of these apply to VBF, single Higgs, ttbar etc.

"""
if "GluGlutoHHto4B" in dataset or "WJets" in dataset or "ZJets" in dataset:
add_ps_weight(weights, events.PSWeight)
if "GluGlutoHHto4B" in dataset:
if "LHEPdfWeight" in events.fields:
add_pdf_weight(weights, events.LHEPdfWeight)
else:
add_pdf_weight(weights, [])
if "LHEScaleWeight" in events.fields:
add_scalevar_7pt(weights, events.LHEScaleWeight)
else:
add_scalevar_7pt(weights, [])
"""

# xsec and luminosity and normalization
# this still needs to be normalized with the acceptance of the pre-selection (done in post processing)
if dataset in self.XSECS:
Expand Down
19 changes: 14 additions & 5 deletions src/condor/submit_configs/skimmer_24_01_18_v12.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
{
"2022": {
"TT": {"subsamples": ["TTto2L2Nu", "TTto4Q", "TTtoLNu2Q"], "files_per_job": 10},
"2022EE": {
"TT": {"subsamples": ["TTto4Q", "TTtoLNu2Q"], "files_per_job": 10},
"HH": {
"subsamples": ["VBFHHto4B_CV_1_C2V_1_C3_1_TuneCP5_13p6TeV_madgraph-pythia8", "GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_Private"],
"subsamples": [
"VBFHHto4B_CV_1_C2V_1_C3_1_TuneCP5_13p6TeV_madgraph-pythia8",
"GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV"
],
"files_per_job": 10},
"Hbb": {
"subsamples": [
"WminusH_Hto2B_Wto2Q_M-125",
"WplusH_Hto2B_Wto2Q_M-125",
"ggZH_Hto2B_Zto2Q_M-125",
"ZH_Hto2B_Zto2Q_M-125"
],
"files_per_job": 10},
"Hbb": {"files_per_job": 10},
"QCD": {
"subsamples": [
"QCD_HT-100to200",
"QCD_HT-200to400",
"QCD_HT-400to600",
"QCD_HT-600to800",
Expand Down

0 comments on commit 8c539b5

Please sign in to comment.