diff --git a/src/HH4b/matching_study/MatchingYields_Oct3.ipynb b/src/HH4b/matching_study/MatchingYields_Oct3.ipynb index 58ed7ecd..baaf576c 100644 --- a/src/HH4b/matching_study/MatchingYields_Oct3.ipynb +++ b/src/HH4b/matching_study/MatchingYields_Oct3.ipynb @@ -16,6 +16,17 @@ "vector.register_awkward()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# automatically reloads imported files on edits\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, { "cell_type": "code", "execution_count": null, @@ -24,6 +35,8 @@ "source": [ "import matplotlib.pyplot as plt\n", "import matplotlib.ticker as mticker\n", + "from matplotlib.lines import Line2D\n", + "import matplotlib.patches as mpatches\n", "\n", "# mplhep for CMS-style plots\n", "import mplhep as hep\n", @@ -61,8 +74,11 @@ "metadata": {}, "outputs": [], "source": [ + "MAIN_DIR = \"../../../\"\n", "events = pd.read_parquet(\n", - " \"/eos/uscms/store/user/cmantill/bbbb/matching/Oct2/2018/GluGlutoHHto4B_cHHH1_TuneCP5_PSWeights_13TeV-powheg-pythia8/parquet\"\n", + " #\"/eos/uscms/store/user/cmantill/bbbb/matching/Oct2/2018/GluGlutoHHto4B_cHHH1_TuneCP5_PSWeights_13TeV-powheg-pythia8/parquet\"\n", + " #f\"{MAIN_DIR}/../data/matching/23Nov18_WSel_v9_private/2018/GluGlutoHHto4B_cHHH1_TuneCP5_PSWeights_13TeV-powheg-pythia8/parquet\"\n", + " f\"{MAIN_DIR}/../data/matching/Oct30/2018/GluGlutoHHto4B_cHHH1_TuneCP5_PSWeights_13TeV-powheg-pythia8/parquet\"\n", ")\n", "jets = make_vector(events, \"ak4Jet\")\n", "gen_higgs = make_vector(events, \"GenHiggs\")\n", @@ -157,8 +173,8 @@ "outputs": [], "source": [ "fig, ax = plt.subplots(1, 1, figsize=(8, 4))\n", - "hep.histplot(hist_dr_h1, ax=ax, label=\"fJ matched to H1\")\n", - "hep.histplot(hist_dr_h2, ax=ax, label=\"fJ matched to H2\")\n", + "hep.histplot(hist_dr_h1, ax=ax, label=\"fJ matched to H1\", ls='solid')\n", + "hep.histplot(hist_dr_h2, ax=ax, label=\"fJ matched to H2\", ls='dotted')\n", "leg = ax.legend(fontsize=10)\n", "leg.set_title(\"Gen-Level categories\", prop={\"size\": 10})\n", "ax.set_xlabel(r\"Max dR(fatjet, b-quark)\")" @@ -186,11 +202,14 @@ "h1m1ak8 = num_ak8m2h1 == 1\n", "h2m1ak8 = num_ak8m2h2 == 1\n", "\n", + "# categorization (prioritize semi-boosted)\n", "boosted = h1m1ak8 & h2m1ak8\n", - "resolved = (h1m2ak4 & h2m2ak4) & ~(boosted) & (num_ak8m2h1 == 0) & (num_ak8m2h2 == 0)\n", - "semi_resolved_h1 = h1m2ak4 & (h2m1ak8 & ~h2m2ak4) & ~(resolved) & ~(boosted)\n", - "semi_resolved_h2 = h2m2ak4 & (h1m1ak8 & ~h1m2ak4) & ~(resolved) & ~(boosted)\n", + "semi_resolved_h1 = h1m2ak4 & h2m1ak8 & ~(boosted)\n", + "semi_resolved_h2 = h2m2ak4 & h1m1ak8 & ~(boosted)\n", "semi_resolved = semi_resolved_h1 | semi_resolved_h2\n", + "resolved = (\n", + " (h1m2ak4 & h2m2ak4) & ~(boosted) & ~(semi_resolved)\n", + ")\n", "not_categorized = ~(resolved | boosted | semi_resolved)" ] }, @@ -221,10 +240,10 @@ "metadata": {}, "outputs": [], "source": [ - "semi_resolved_h1_b2 = h1m2ak4 & (h2m1ak8b2 & ~h2m2ak4) & ~(resolved) & ~(boosted)\n", - "semi_resolved_h2_b2 = h2m2ak4 & (h1m1ak8b2 & ~h1m2ak4) & ~(resolved) & ~(boosted)\n", - "semi_resolved_h1_b1 = h1m2ak4 & (h2m1ak8b1 & ~h2m2ak4) & ~(resolved) & ~(boosted)\n", - "semi_resolved_h2_b1 = h2m2ak4 & (h1m1ak8b1 & ~h1m2ak4) & ~(resolved) & ~(boosted)\n", + "semi_resolved_h1_b2 = h1m2ak4 & h2m1ak8b2 & ~(boosted)\n", + "semi_resolved_h2_b2 = h2m2ak4 & h1m1ak8b2 & ~(boosted)\n", + "semi_resolved_h1_b1 = h1m2ak4 & h2m1ak8b1 & ~(boosted)\n", + "semi_resolved_h2_b1 = h2m2ak4 & h1m1ak8b1 & ~(boosted)\n", "\n", "semi_resolved_h_b2 = semi_resolved_h1_b2 | semi_resolved_h2_b2\n", "semi_resolved_h_b1 = semi_resolved_h1_b1 | semi_resolved_h2_b1" @@ -284,8 +303,8 @@ "mhh_out = mhh[not_categorized]\n", "\n", "# start to plot, code modified on Christina's plot_h1h2_fj\n", - "bins = np.arange(mhh.min(), mhh.max(), 10)\n", - "bins = np.arange(100, 2000, 10)\n", + "bins = np.arange(mhh.min(), mhh.max(), 40)\n", + "bins = np.arange(100, 2000, 40)\n", "var_axis = hist.axis.Variable(bins, name=\"var\", label=\"variable\")\n", "cat_axis = hist.axis.StrCategory([], name=\"cat\", growth=True)\n", "\n", @@ -296,11 +315,24 @@ "hist_mhh.fill(var=mhh_out, cat=\"non-categorized\")\n", "\n", "fig, ax = plt.subplots(1, 1, figsize=(8, 4))\n", - "hist_mhh[{\"cat\": \"boosted\"}].plot1d(ax=ax, label=\"Boosted\")\n", - "hist_mhh[{\"cat\": \"resolved\"}].plot1d(ax=ax, label=\"Resolved\")\n", - "hist_mhh[{\"cat\": \"semi_resolved\"}].plot1d(ax=ax, label=\"Semi-Resolved\")\n", - "hist_mhh[{\"cat\": \"non-categorized\"}].plot1d(ax=ax, label=\"Outside of these categories\")\n", - "leg = ax.legend(fontsize=10)\n", + "legend_elements = []\n", + "hist_mhh[{\"cat\": \"boosted\"}].plot1d(ax=ax, label=\"Boosted\", hatch=r'\\\\\\\\', alpha=0.2, histtype='fill', color='blue')\n", + "hist_mhh[{\"cat\": \"resolved\"}].plot1d(ax=ax, label=\"Resolved\", lw=1, color='orange')\n", + "hist_mhh[{\"cat\": \"semi_resolved\"}].plot1d(ax=ax, label=\"Semi-Resolved\", lw=2, color='green')\n", + "hist_mhh[{\"cat\": \"non-categorized\"}].plot1d(ax=ax, label=\"Outside of these categories\", lw=2, ls='dotted', color='red')\n", + "legend_elements.append(\n", + " mpatches.Patch(color='blue', alpha=0.2, hatch=r'\\\\\\\\',label='Boosted')\n", + ")\n", + "legend_elements.append(\n", + " Line2D([0], [0], ls='solid', lw=1, label=\"Resolved\", color='orange')\n", + ")\n", + "legend_elements.append(\n", + " Line2D([0], [0], ls='solid', lw=2, label=\"Semi-Resolved\", color='green')\n", + ")\n", + "legend_elements.append(\n", + " Line2D([0], [0], ls='dotted', lw=2, label=\"Outside of these categories\", color='red')\n", + ")\n", + "leg = ax.legend(handles=legend_elements, fontsize=10)\n", "leg.set_title(\"Gen-Level categories\", prop={\"size\": 10})\n", "ax.set_xlim(0, 2000)\n", "ax.set_xlabel(r\"$m_{HH}$\")" @@ -312,22 +344,7 @@ "metadata": {}, "outputs": [], "source": [ - "fig, ax = plt.subplots(1, 1, figsize=(8, 4))\n", - "hep.histplot(hist_mhh[{\"cat\": \"boosted\"}], ax=ax, label=\"Boosted\")\n", - "hep.histplot(hist_mhh[{\"cat\": \"semi_resolved\"}], ax=ax, label=\"Semi-Resolved\")\n", - "leg = ax.legend(fontsize=10)\n", - "leg.set_title(\"Gen-Level categories\", prop={\"size\": 10})\n", - "ax.set_xlabel(r\"$m_{HH}$\")\n", - "ax.set_ylim(0, 5000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bins = np.arange(mhh.min(), mhh.max(), 10)\n", + "bins = np.arange(100, 2000, 40)\n", "var_axis = hist.axis.Variable(bins, name=\"var\", label=\"variable\")\n", "cat_axis = hist.axis.StrCategory([], name=\"cat\", growth=True)\n", "\n", @@ -337,9 +354,9 @@ "hist_mhh.fill(var=mhh[boosted_nb1nb2], cat=\"boosted-2b1b\")\n", "\n", "fig, ax = plt.subplots(1, 1, figsize=(8, 4))\n", - "hep.histplot(hist_mhh[{\"cat\": \"boosted-2b\"}], ax=ax, label=\"Boosted (2 bs)\")\n", - "hep.histplot(hist_mhh[{\"cat\": \"boosted-1b\"}], ax=ax, label=\"Boosted (1 bs)\")\n", - "hep.histplot(hist_mhh[{\"cat\": \"boosted-2b1b\"}], ax=ax, label=\"Boosted (1b, 2bs matched)\")\n", + "hep.histplot(hist_mhh[{\"cat\": \"boosted-2b\"}], ax=ax, label=\"Boosted (2 bs)\", hatch=r'\\\\\\\\', alpha=0.2, histtype='fill', color='blue')\n", + "hep.histplot(hist_mhh[{\"cat\": \"boosted-1b\"}], ax=ax, label=\"Boosted (1 bs)\", hatch='+', alpha=0.4, histtype='fill', color='blue')\n", + "hep.histplot(hist_mhh[{\"cat\": \"boosted-2b1b\"}], ax=ax, label=\"Boosted (1b, 2bs matched)\", hatch='--', alpha=0.1, histtype='fill', color='blue')\n", "leg = ax.legend(fontsize=10)\n", "leg.set_title(\"Gen-Level categories\", prop={\"size\": 10})\n", "ax.set_xlabel(r\"$m_{HH}$\")\n", @@ -352,7 +369,7 @@ "metadata": {}, "outputs": [], "source": [ - "bins = np.arange(mhh.min(), mhh.max(), 10)\n", + "bins = np.arange(100, 2000, 40)\n", "var_axis = hist.axis.Variable(bins, name=\"var\", label=\"variable\")\n", "cat_axis = hist.axis.StrCategory([], name=\"cat\", growth=True)\n", "\n", @@ -361,12 +378,19 @@ "hist_mhh.fill(var=mhh[semi_resolved_h_b1], cat=\"semi-1b\")\n", "\n", "fig, ax = plt.subplots(1, 1, figsize=(8, 4))\n", - "hep.histplot(hist_mhh[{\"cat\": \"semi-2b\"}], ax=ax, label=\"Semi-Resolved (fj-2 bs)\")\n", - "hep.histplot(hist_mhh[{\"cat\": \"semi-1b\"}], ax=ax, label=\"Semi-Resolved (fj-1 bs)\")\n", - "leg = ax.legend(fontsize=10)\n", + "legend_elements = []\n", + "hep.histplot(hist_mhh[{\"cat\": \"semi-2b\"}], ax=ax, label=\"Semi-Resolved (fj-2 bs)\", color='green', linestyle='solid')\n", + "hep.histplot(hist_mhh[{\"cat\": \"semi-1b\"}], ax=ax, label=\"Semi-Resolved (fj-1 bs)\", color='green', linestyle='dotted')\n", + "legend_elements.append(\n", + " Line2D([0], [0], ls='solid', lw=2, label=\"Semi-Resolved (fj-2 bs)\", color='green')\n", + ")\n", + "legend_elements.append(\n", + " Line2D([0], [0], ls='dotted', lw=2, label=\"Semi-Resolved (fj-2 bs)\", color='green')\n", + ")\n", + "leg = ax.legend(handles=legend_elements, fontsize=10)\n", "leg.set_title(\"Gen-Level categories\", prop={\"size\": 10})\n", "ax.set_xlabel(r\"$m_{HH}$\")\n", - "ax.set_ylim(0, 5000)" + "#ax.set_ylim(0, 5000)" ] }, { @@ -503,6 +527,296 @@ "# filter out weird events" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- can we change gen matching definition for resolved?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "genJs = make_vector(events, \"ak4GenJet\")\n", + "genBs = make_vector(events, \"Genb\")\n", + "ak4JetGenJetIdx = events[\"ak4JetGenJetIdx\"].to_numpy()\n", + "\n", + "# dissemble gen_b by H\n", + "b_m2h1 = genBs[:, 0:2]\n", + "b_m2h2 = genBs[:, 2:4]\n", + "\n", + "# calculate dR between genb and each genJet\n", + "dR_h1b1_genJs = b_m2h1[:, 0][:, np.newaxis].deltaR(genJs)\n", + "dR_h1b2_genJs = b_m2h1[:, 1][:, np.newaxis].deltaR(genJs)\n", + "dR_h2b1_genJs = b_m2h2[:, 0][:, np.newaxis].deltaR(genJs)\n", + "dR_h2b2_genJs = b_m2h2[:, 1][:, np.newaxis].deltaR(genJs)\n", + "\n", + "# count how many jets are matched to each b\n", + "GenJm2b1h1 = (dR_h1b1_genJs < 0.4)[:, :, np.newaxis]\n", + "GenJm2b2h1 = (dR_h1b2_genJs < 0.4)[:, :, np.newaxis]\n", + "GenJm2b1h2 = (dR_h2b1_genJs < 0.4)[:, :, np.newaxis]\n", + "GenJm2b2h2 = (dR_h2b2_genJs < 0.4)[:, :, np.newaxis]\n", + "print(GenJm2b1h1.shape)\n", + "\n", + "# concatenate the arrays\n", + "# 1st dimension: event\n", + "# 2nd dimension: GenJ\n", + "# 3rd dimension: match to each b true or false\n", + "GenJm2b = np.concatenate([GenJm2b1h1, GenJm2b2h1, GenJm2b1h2, GenJm2b2h2], axis=2)\n", + "print(GenJm2b.shape)\n", + "\n", + "# construct ak4GenJet info\n", + "ak4GenJetHiggsMatch = np.zeros(shape=genJs.shape, dtype=bool)\n", + "ak4GenJetHiggsMatchIndex = -1 * np.ones(shape=genJs.shape, dtype=int)\n", + "\n", + "# If a gen J is matched to at least one genb,\n", + "# it is considered to be matched to the mother H\n", + "# and will be assigned with a ak4GenJetHiggsMatchIndex\n", + "GenJm2H = np.sum(GenJm2b, axis=2) == 1\n", + "ak4GenJetHiggsMatch[GenJm2H] = True\n", + "\n", + "# find which genb and H matched the genJ\n", + "# draft in the sense that only the matched genJ's\n", + "# genB and genH indices will be used\n", + "bIdx_draft = np.argmax(GenJm2b, axis=2)\n", + "hIdx_draft = np.floor(bIdx_draft / 2)\n", + "ak4GenJetHiggsMatchIndex[ak4GenJetHiggsMatch] = hIdx_draft[ak4GenJetHiggsMatch]\n", + "\n", + "# Gen-Jet Idx\n", + "ak4JetGenJetIdx_ak = ak.from_regular(ak4JetGenJetIdx)\n", + "# get good gen-jet indices\n", + "not_padded = ak4JetGenJetIdx_ak > -1\n", + "not_oor = ak4JetGenJetIdx_ak < 6 # oor=out of range\n", + "select = not_padded & not_oor\n", + "\n", + "# select the genJet Higgs matching index that genJet also matched to reco jet\n", + "ak4GenJetHiggsMatchIndex_ak = ak.from_regular(ak4GenJetHiggsMatchIndex)\n", + "ak4JetHiggsMatchIndex_new = ak4GenJetHiggsMatchIndex_ak[ak4JetGenJetIdx_ak[select]]\n", + "ak4JetHiggsMatchIndex_new = ak.fill_none(\n", + " ak.pad_none(ak4JetHiggsMatchIndex_new, 6, axis=-1), -1\n", + ").to_numpy()\n", + "ak4JetHiggsMatchIndex_new\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ak4JetGenJetIdx_ak" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ak4JetGenJetIdx_ak[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ak4JetGenJetIdx_ak[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ak4GenJetHiggsMatchIndex_ak" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ak4GenJetHiggsMatchIndex_ak[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ak4GenJetHiggsMatchIndex_ak[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ak4GenJetHiggsMatchIndex_ak[ak4JetGenJetIdx_ak[select]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for event 1\n", + "# [0, 1, 2, 3, -1, -1]\n", + "# [0, 1, -1, 1, -1, -1]\n", + "ak4GenJetHiggsMatchIndex_ak[ak4JetGenJetIdx_ak[select]][1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for event 0\n", + "# [0, 1, 3, 2, -1, -1]\n", + "# [1, 0, -1, 0]\n", + "ak4GenJetHiggsMatchIndex_ak[ak4JetGenJetIdx_ak[select]][0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "indexak4_new = ak4JetHiggsMatchIndex_new\n", + "indexak4_new\n", + "\n", + "h1ak4_new = indexak4_new == 0\n", + "h2ak4_new = indexak4_new == 1\n", + "num_ak4m2h1_new = h1ak4_new.sum(axis=1)\n", + "num_ak4m2h2_new = h2ak4_new.sum(axis=1)\n", + "h1m2ak4_new = num_ak4m2h1_new == 2\n", + "h2m2ak4_new = num_ak4m2h2_new == 2\n", + "\n", + "boosted_new = h1m1ak8 & h2m1ak8\n", + "semi_resolved_h1_new = h1m2ak4_new & h2m1ak8 & ~(boosted_new)\n", + "semi_resolved_h2_new = h2m2ak4_new & h1m1ak8 & ~(boosted_new)\n", + "semi_resolved_new = semi_resolved_h1_new | semi_resolved_h2_new\n", + "resolved_new = (\n", + " (h1m2ak4_new & h2m2ak4_new) & ~(boosted_new) & ~(semi_resolved_new)\n", + ")\n", + "not_categorized_new = ~(resolved_new | boosted_new | semi_resolved_new)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# bypass into each category\n", + "mhh_boosted = mhh[boosted_new]\n", + "mhh_resolved = mhh[resolved_new]\n", + "mhh_semi = mhh[semi_resolved_new]\n", + "mhh_out = mhh[not_categorized_new]\n", + "\n", + "bins = np.arange(100, 2000, 40)\n", + "var_axis = hist.axis.Variable(bins, name=\"var\", label=\"variable\")\n", + "cat_axis = hist.axis.StrCategory([], name=\"cat\", growth=True)\n", + "\n", + "hist_mhh = hist.Hist(var_axis, cat_axis)\n", + "hist_mhh.fill(var=mhh_boosted, cat=\"boosted\")\n", + "hist_mhh.fill(var=mhh_semi, cat=\"semi_resolved\")\n", + "hist_mhh.fill(var=mhh_resolved, cat=\"resolved\")\n", + "hist_mhh.fill(var=mhh_out, cat=\"non-categorized\")\n", + "\n", + "hist_mhh.fill(var=mhh[resolved], cat=\"resolved-old\")\n", + "hist_mhh.fill(var=mhh[not_categorized_new], cat=\"non-categorized-old\")\n", + "\n", + "fig, ax = plt.subplots(1, 1, figsize=(8, 4))\n", + "legend_elements = []\n", + "hist_mhh[{\"cat\": \"resolved\"}].plot1d(ax=ax, label=\"Resolved\", lw=1, color='orange')\n", + "hist_mhh[{\"cat\": \"non-categorized\"}].plot1d(ax=ax, label=\"Outside of these categories\", lw=2, ls='dotted', color='red')\n", + "hist_mhh[{\"cat\": \"resolved-old\"}].plot1d(ax=ax, label=\"Resolved (No gen-jet)\", lw=1, color='orange', alpha=0.2, histtype='fill')\n", + "hist_mhh[{\"cat\": \"non-categorized-old\"}].plot1d(ax=ax, label=\"Outside of these categories (No gen-jet)\", lw=1, ls='dashed', color='red')\n", + "\n", + "legend_elements.append(\n", + " Line2D([0], [0], ls='solid', lw=1, label=\"Resolved\", color='orange')\n", + ")\n", + "legend_elements.append(\n", + " Line2D([0], [0], ls='dotted', lw=2, label=\"Outside of these categories\", color='red')\n", + ")\n", + "legend_elements.append(\n", + " Line2D([0], [0], ls='solid', lw=1, label=\"Resolved (No gen-jet)\", color='orange')\n", + ")\n", + "legend_elements.append(\n", + " Line2D([0], [0], ls='dashed', lw=1, label=\"Outside of these categories (No gen-jet)\", color='red')\n", + ")\n", + "leg = ax.legend(handles=legend_elements, fontsize=10)\n", + "leg.set_title(\"Gen-Level categories\", prop={\"size\": 10})\n", + "ax.set_xlim(0, 2000)\n", + "ax.set_xlabel(r\"$m_{HH}$\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# maximum\n", + "max_dR_h1b1_genJs = np.max(dR_h1b1_genJs, axis=1)\n", + "max_dR_h1b2_genJs = np.max(dR_h1b2_genJs, axis=1)\n", + "max_dR_h2b1_genJs = np.max(dR_h2b1_genJs, axis=1)\n", + "max_dR_h2b2_genJs = np.max(dR_h2b2_genJs, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hist_dr_h1 = hist.Hist(hist.axis.Regular(40, 0, 4, name=\"drb\", label=\"max dr with b-quark\"))\n", + "hist_dr_h1.fill(max_dR_h1b1_genJs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hist_dr_h1 = hist.Hist(hist.axis.Regular(40, 0, 4, name=\"drb\", label=\"max dr with b-quark\"))\n", + "hist_dr_h1.fill(max_dR_h1b2_genJs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hist_dr_h2 = hist.Hist(hist.axis.Regular(40, 0, 4, name=\"drb\", label=\"max dr with b-quark\"))\n", + "hist_dr_h2.fill(max_dR_h2b1_genJs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hist_dr_h2 = hist.Hist(hist.axis.Regular(40, 0, 4, name=\"drb\", label=\"max dr with b-quark\"))\n", + "hist_dr_h2.fill(max_dR_h2b2_genJs)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -518,13 +832,22 @@ "metadata": {}, "outputs": [], "source": [ - "fj_mwh1_idx = events[\"ak8FatJetHiggsMatchIndex\"].to_numpy() == 0\n", - "fj_mwh2_idx = events[\"ak8FatJetHiggsMatchIndex\"].to_numpy() == 1\n", + "fj_mwh1_idx = (events[\"ak8FatJetHiggsMatchIndex\"].to_numpy()[:, 0:2]) == 0\n", + "fj_mwh2_idx = events[\"ak8FatJetHiggsMatchIndex\"].to_numpy()[:, 0:2] == 1\n", "\n", "fj_xbb = events[\"ak8FatJetPNetXbb\"].to_numpy()\n", "fj_mass = events[\"ak8FatJetPNetMass\"].to_numpy()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fj_mwh1_idx" + ] + }, { "cell_type": "code", "execution_count": null, @@ -550,34 +873,39 @@ " h1 = hist.Hist(var_axis, cat_axis)\n", " h1.fill(var=fj_mwh1_boosted, cat=\"boosted\")\n", " h1.fill(var=fj_mwh1_semi_resolved, cat=\"semi_resolved\")\n", - " # h1.fill(var=fj_mwh1_resolved, cat=\"resolved\")\n", " h1.fill(var=fj_mwh1_outside, cat=\"non-categorized\")\n", "\n", " h2 = hist.Hist(var_axis, cat_axis)\n", " h2.fill(var=fj_mwh2_boosted, cat=\"boosted\")\n", " h2.fill(var=fj_mwh2_semi_resolved, cat=\"semi_resolved\")\n", - " # h2.fill(var=fj_mwh2_resolved, cat=\"resolved\")\n", " h2.fill(var=fj_mwh2_outside, cat=\"non-categorized\")\n", "\n", - " print(fj_mwh1_resolved)\n", + " legend_elements = []\n", " fig, ax = plt.subplots(1, 2, figsize=(8, 4), sharey=True)\n", - " hep.histplot(h1[{\"cat\": \"boosted\"}], ax=ax[0], label=\"Boosted\", density=True)\n", - " # hep.histplot(h1[{\"cat\": \"resolved\"}], ax=ax[0], label=\"Resolved\", density=True)\n", - " hep.histplot(h1[{\"cat\": \"semi_resolved\"}], ax=ax[0], label=\"Semi-Resolved\", density=True)\n", + " hep.histplot(h1[{\"cat\": \"boosted\"}], ax=ax[0], label=\"Boosted\", density=True, histtype='fill', alpha=0.2, hatch=r'\\\\\\\\', color='blue')\n", + " hep.histplot(h1[{\"cat\": \"semi_resolved\"}], ax=ax[0], label=\"Semi-Resolved\", density=True, lw=2, ls='solid', color='green')\n", " hep.histplot(\n", - " h1[{\"cat\": \"non-categorized\"}], ax=ax[0], label=\"Outside of these categories\", density=True\n", + " h1[{\"cat\": \"non-categorized\"}], ax=ax[0], label=\"Outside of these categories\", density=True, color='red', ls='dotted'\n", + " )\n", + " legend_elements.append(\n", + " mpatches.Patch(color='blue', alpha=0.2, hatch=r'\\\\\\\\',label='Boosted')\n", + " )\n", + " legend_elements.append(\n", + " Line2D([0], [0], ls='solid', lw=2, label=\"Semi-Resolved\", color='green')\n", + " )\n", + " legend_elements.append(\n", + " Line2D([0], [0], ls='dotted', lw=2, label=\"Outside of these categories\", color='red')\n", " )\n", - " leg = ax[0].legend(fontsize=10)\n", + " leg = ax[0].legend(handles=legend_elements, fontsize=10)\n", " leg.set_title(\"Gen-Level categories\", prop={\"size\": 10})\n", " ax[0].set_xlabel(label + \" - matched to H1\")\n", "\n", - " hep.histplot(h2[{\"cat\": \"boosted\"}], ax=ax[1], label=\"Boosted\", density=True)\n", - " # hep.histplot(h2[{\"cat\": \"resolved\"}], ax=ax[1], label=\"Resolved\", density=True)\n", + " hep.histplot(h2[{\"cat\": \"boosted\"}], ax=ax[1], label=\"Boosted\", density=True, histtype='fill', alpha=0.2, hatch=r'\\\\\\\\', color='blue')\n", " hep.histplot(h2[{\"cat\": \"semi_resolved\"}], ax=ax[1], label=\"Semi-Resolved\", density=True)\n", " hep.histplot(\n", - " h2[{\"cat\": \"non-categorized\"}], ax=ax[1], label=\"Outside of these categories\", density=True\n", + " h2[{\"cat\": \"non-categorized\"}], ax=ax[1], label=\"Outside of these categories\", density=True, color='red', ls='dotted'\n", " )\n", - " leg = ax[1].legend(fontsize=10)\n", + " leg = ax[1].legend(handles=legend_elements, fontsize=10)\n", " leg.set_title(\"Gen-Level categories\", prop={\"size\": 10})\n", " ax[1].set_xlabel(label + \" matched to H2\")\n", " ax[0].set_ylabel(\"Density\")" @@ -589,7 +917,7 @@ "metadata": {}, "outputs": [], "source": [ - "plot_h1h2_fj(fj_xbb, r\"fj $T_{Xbb}$ Score\", np.arange(-2, 1, 0.03))\n", + "# plot_h1h2_fj(fj_xbb, r\"fj $T_{Xbb}$ Score\", np.arange(-2, 1, 0.03))\n", "plot_h1h2_fj(fj_mass, r\"fj PN mass\", np.arange(0, 200, 7))" ] }, @@ -613,21 +941,23 @@ " h2.fill(var=x[boosted_nb1][fj_mwh2_idx[boosted_nb1]], cat=\"boosted-1b\")\n", " h2.fill(var=x[boosted_nb1nb2][fj_mwh2_idx[boosted_nb1nb2]], cat=\"boosted-2b1b\")\n", "\n", - " fig, ax = plt.subplots(1, 2, figsize=(8, 4))\n", - " hep.histplot(h1[{\"cat\": \"boosted-2b\"}], ax=ax[0], label=\"Boosted 2b\", density=True)\n", - " hep.histplot(h1[{\"cat\": \"boosted-1b\"}], ax=ax[0], label=\"Boosted 1b\", density=True)\n", - " hep.histplot(h1[{\"cat\": \"boosted-2b1b\"}], ax=ax[0], label=\"Boosted 2b1b\", density=True)\n", - " leg = ax[0].legend(fontsize=10)\n", + " fig, ax = plt.subplots(1, 1, figsize=(8, 4))\n", + " hep.histplot(h1[{\"cat\": \"boosted-2b\"}], ax=ax, label=\"Boosted 2b\", density=True, hatch=r'\\\\\\\\', alpha=0.1, histtype='fill', color='blue')\n", + " hep.histplot(h1[{\"cat\": \"boosted-1b\"}], ax=ax, label=\"Boosted 1b\", density=True, alpha=0.2, histtype='fill', color='blue')\n", + " hep.histplot(h1[{\"cat\": \"boosted-2b1b\"}], ax=ax, label=\"Boosted 2b1b\", density=True, alpha=0.4, histtype='fill', color='blue')\n", + " leg = ax.legend(fontsize=10)\n", " leg.set_title(\"Gen-Level categories\", prop={\"size\": 10})\n", - " ax[0].set_xlabel(label + \" - matched to H1\")\n", + " ax.set_xlabel(label + \" - matched to H1\")\n", "\n", + " \"\"\"\n", " hep.histplot(h2[{\"cat\": \"boosted-2b\"}], ax=ax[1], label=\"Boosted 2b\", density=True)\n", " hep.histplot(h2[{\"cat\": \"boosted-1b\"}], ax=ax[1], label=\"Boosted 1b\", density=True)\n", " hep.histplot(h2[{\"cat\": \"boosted-2b1b\"}], ax=ax[1], label=\"Boosted 2b1b\", density=True)\n", " leg = ax[1].legend(fontsize=10)\n", " leg.set_title(\"Gen-Level categories\", prop={\"size\": 10})\n", " ax[1].set_xlabel(label + \" matched to H2\")\n", - " ax[0].set_ylabel(\"Density\")" + " ax[0].set_ylabel(\"Density\")\n", + " \"\"\"" ] }, { diff --git a/src/HH4b/matching_study/PostProcessSemiBoosted.ipynb b/src/HH4b/matching_study/PostProcessSemiBoosted.ipynb index bd074d8a..919e1e2b 100644 --- a/src/HH4b/matching_study/PostProcessSemiBoosted.ipynb +++ b/src/HH4b/matching_study/PostProcessSemiBoosted.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -48,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -72,309 +72,18 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'../../..//../data/matching/23Nov17_v9_private/': {'hh4b': ['GluGlutoHHto4B_cHHH1_TuneCP5_PSWeights_13TeV-powheg-pythia8'],\n", - " 'qcd': ['QCD_HT-1000to1500-13TeV',\n", - " 'QCD_HT-1500to2000-13TeV',\n", - " 'QCD_HT-2000toInf-13TeV',\n", - " 'QCD_HT-200to300-13TeV',\n", - " 'QCD_HT-300to500-13TeV',\n", - " 'QCD_HT-500to700-13TeV',\n", - " 'QCD_HT-700to1000-13TeV'],\n", - " 'data': ['Run2018A', 'Run2018B', 'Run2018C', 'Run2018D'],\n", - " 'ttbar': ['TTTo2L2Nu_13TeV', 'TTToHadronic_13TeV', 'TTToSemiLeptonic_13TeV'],\n", - " 'vjets': ['WJetsToQQ_HT-200to400_13TeV',\n", - " 'WJetsToQQ_HT-400to600_13TeV',\n", - " 'WJetsToQQ_HT-600to800_13TeV',\n", - " 'WJetsToQQ_HT-800toInf_13TeV',\n", - " 'ZJetsToQQ_HT-200to400_13TeV',\n", - " 'ZJetsToQQ_HT-400to600_13TeV',\n", - " 'ZJetsToQQ_HT-600to800_13TeV',\n", - " 'ZJetsToQQ_HT-800toInf_13TeV'],\n", - " 'diboson': ['ZZTo4B01j_5f_TuneCP5_13TeV-amcatnloFXFX-pythia8'],\n", - " 'gghtobb': ['GluGluHToBB_M-125_TuneCP5_MINLO_NNLOPS_13TeV-powheg-pythia8'],\n", - " 'vbfhtobb': ['VBFHToBB_M-125_dipoleRecoilOn_TuneCP5_13TeV-powheg-pythia8'],\n", - " 'vhtobb': ['WminusH_HToBB_WToQQ_M-125_TuneCP5_13TeV-powheg-pythia8',\n", - " 'WplusH_HToBB_WToQQ_M-125_TuneCP5_13TeV-powheg-pythia8',\n", - " 'ZH_HToBB_ZToQQ_M-125_TuneCP5_13TeV-powheg-pythia8'],\n", - " 'tthtobb': ['ttHTobb_M125_TuneCP5_13TeV-powheg-pythia8']}}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sample_dirs" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('ht', 0),\n", - " ('nCentralJets', 0),\n", - " ('nOutsideJets', 0),\n", - " ('nFatJets', 0),\n", - " ('GenHiggsEta', 0),\n", - " ('GenHiggsEta', 1),\n", - " ('GenHiggsPhi', 0),\n", - " ('GenHiggsPhi', 1),\n", - " ('GenHiggsMass', 0),\n", - " ('GenHiggsMass', 1),\n", - " ('GenHiggsPt', 0),\n", - " ('GenHiggsPt', 1),\n", - " ('GenbEta', 0),\n", - " ('GenbEta', 1),\n", - " ('GenbEta', 2),\n", - " ('GenbEta', 3),\n", - " ('GenbPhi', 0),\n", - " ('GenbPhi', 1),\n", - " ('GenbPhi', 2),\n", - " ('GenbPhi', 3),\n", - " ('GenbMass', 0),\n", - " ('GenbMass', 1),\n", - " ('GenbMass', 2),\n", - " ('GenbMass', 3),\n", - " ('GenbPt', 0),\n", - " ('GenbPt', 1),\n", - " ('GenbPt', 2),\n", - " ('GenbPt', 3),\n", - " ('ak4JetHiggsMatch', 0),\n", - " ('ak4JetHiggsMatch', 1),\n", - " ('ak4JetHiggsMatch', 2),\n", - " ('ak4JetHiggsMatch', 3),\n", - " ('ak4JetHiggsMatch', 4),\n", - " ('ak4JetHiggsMatch', 5),\n", - " ('ak4JetHiggsMatchIndex', 0),\n", - " ('ak4JetHiggsMatchIndex', 1),\n", - " ('ak4JetHiggsMatchIndex', 2),\n", - " ('ak4JetHiggsMatchIndex', 3),\n", - " ('ak4JetHiggsMatchIndex', 4),\n", - " ('ak4JetHiggsMatchIndex', 5),\n", - " ('ak4JethadronFlavour', 0),\n", - " ('ak4JethadronFlavour', 1),\n", - " ('ak4JethadronFlavour', 2),\n", - " ('ak4JethadronFlavour', 3),\n", - " ('ak4JethadronFlavour', 4),\n", - " ('ak4JethadronFlavour', 5),\n", - " ('ak8FatJetHiggsMatch', 0),\n", - " ('ak8FatJetHiggsMatch', 1),\n", - " ('ak8FatJetHiggsMatchIndex', 0),\n", - " ('ak8FatJetHiggsMatchIndex', 1),\n", - " ('ak8FatJetNumBMatchedH1', 0),\n", - " ('ak8FatJetNumBMatchedH1', 1),\n", - " ('ak8FatJetNumBMatchedH2', 0),\n", - " ('ak8FatJetNumBMatchedH2', 1),\n", - " ('ak8FatJetMaxdRH1', 0),\n", - " ('ak8FatJetMaxdRH1', 1),\n", - " ('ak8FatJetMaxdRH2', 0),\n", - " ('ak8FatJetMaxdRH2', 1),\n", - " ('PFHT330PT30_QuadPFJet_75_60_45_40_TriplePFBTagDeepCSV_4p5', 0),\n", - " ('PFHT1050', 0),\n", - " ('PFJet500', 0),\n", - " ('AK8PFJet500', 0),\n", - " ('AK8PFJet400_TrimMass30', 0),\n", - " ('AK8PFHT800_TrimMass50', 0),\n", - " ('AK8PFJet330_TrimMass30_PFAK8BoostedDoubleB_np4', 0),\n", - " ('QuadPFJet103_88_75_15_DoublePFBTagDeepCSV_1p3_7p7_VBF1', 0),\n", - " ('QuadPFJet103_88_75_15_PFBTagDeepCSV_1p3_VBF2', 0),\n", - " ('PFHT400_SixPFJet32_DoublePFBTagDeepCSV_2p94', 0),\n", - " ('PFHT450_SixPFJet36_PFBTagDeepCSV_1p59', 0),\n", - " ('AK8PFJet330_TrimMass30_PFAK8BTagDeepCSV_p17', 0),\n", - " ('QuadPFJet98_83_71_15_DoublePFBTagDeepCSV_1p3_7p7_VBF1', 0),\n", - " ('QuadPFJet98_83_71_15_PFBTagDeepCSV_1p3_VBF2', 0),\n", - " ('PFMET100_PFMHT100_IDTight_CaloBTagDeepCSV_3p1', 0),\n", - " ('ak4JetbtagDeepFlavB', 0),\n", - " ('ak4JetbtagDeepFlavB', 1),\n", - " ('ak4JetbtagDeepFlavB', 2),\n", - " ('ak4JetbtagDeepFlavB', 3),\n", - " ('ak4JetbtagDeepFlavB', 4),\n", - " ('ak4JetbtagDeepFlavB', 5),\n", - " ('ak4JetbtagPNetProbb', 0),\n", - " ('ak4JetbtagPNetProbb', 1),\n", - " ('ak4JetbtagPNetProbb', 2),\n", - " ('ak4JetbtagPNetProbb', 3),\n", - " ('ak4JetbtagPNetProbb', 4),\n", - " ('ak4JetbtagPNetProbb', 5),\n", - " ('ak4JetbtagPNetProbbb', 0),\n", - " ('ak4JetbtagPNetProbbb', 1),\n", - " ('ak4JetbtagPNetProbbb', 2),\n", - " ('ak4JetbtagPNetProbbb', 3),\n", - " ('ak4JetbtagPNetProbbb', 4),\n", - " ('ak4JetbtagPNetProbbb', 5),\n", - " ('ak4JetbtagPNetProbc', 0),\n", - " ('ak4JetbtagPNetProbc', 1),\n", - " ('ak4JetbtagPNetProbc', 2),\n", - " ('ak4JetbtagPNetProbc', 3),\n", - " ('ak4JetbtagPNetProbc', 4),\n", - " ('ak4JetbtagPNetProbc', 5),\n", - " ('ak4JetbtagPNetProbcc', 0),\n", - " ('ak4JetbtagPNetProbcc', 1),\n", - " ('ak4JetbtagPNetProbcc', 2),\n", - " ('ak4JetbtagPNetProbcc', 3),\n", - " ('ak4JetbtagPNetProbcc', 4),\n", - " ('ak4JetbtagPNetProbcc', 5),\n", - " ('ak4JetbtagPNetProbpu', 0),\n", - " ('ak4JetbtagPNetProbpu', 1),\n", - " ('ak4JetbtagPNetProbpu', 2),\n", - " ('ak4JetbtagPNetProbpu', 3),\n", - " ('ak4JetbtagPNetProbpu', 4),\n", - " ('ak4JetbtagPNetProbpu', 5),\n", - " ('ak4JetbtagPNetProbuds', 0),\n", - " ('ak4JetbtagPNetProbuds', 1),\n", - " ('ak4JetbtagPNetProbuds', 2),\n", - " ('ak4JetbtagPNetProbuds', 3),\n", - " ('ak4JetbtagPNetProbuds', 4),\n", - " ('ak4JetbtagPNetProbuds', 5),\n", - " ('ak4JetbtagPNetProbg', 0),\n", - " ('ak4JetbtagPNetProbg', 1),\n", - " ('ak4JetbtagPNetProbg', 2),\n", - " ('ak4JetbtagPNetProbg', 3),\n", - " ('ak4JetbtagPNetProbg', 4),\n", - " ('ak4JetbtagPNetProbg', 5),\n", - " ('ak4JetbtagPNetProbundef', 0),\n", - " ('ak4JetbtagPNetProbundef', 1),\n", - " ('ak4JetbtagPNetProbundef', 2),\n", - " ('ak4JetbtagPNetProbundef', 3),\n", - " ('ak4JetbtagPNetProbundef', 4),\n", - " ('ak4JetbtagPNetProbundef', 5),\n", - " ('ak4JetEta', 0),\n", - " ('ak4JetEta', 1),\n", - " ('ak4JetEta', 2),\n", - " ('ak4JetEta', 3),\n", - " ('ak4JetEta', 4),\n", - " ('ak4JetEta', 5),\n", - " ('ak4JetPhi', 0),\n", - " ('ak4JetPhi', 1),\n", - " ('ak4JetPhi', 2),\n", - " ('ak4JetPhi', 3),\n", - " ('ak4JetPhi', 4),\n", - " ('ak4JetPhi', 5),\n", - " ('ak4JetMass', 0),\n", - " ('ak4JetMass', 1),\n", - " ('ak4JetMass', 2),\n", - " ('ak4JetMass', 3),\n", - " ('ak4JetMass', 4),\n", - " ('ak4JetMass', 5),\n", - " ('ak4JetPt', 0),\n", - " ('ak4JetPt', 1),\n", - " ('ak4JetPt', 2),\n", - " ('ak4JetPt', 3),\n", - " ('ak4JetPt', 4),\n", - " ('ak4JetPt', 5),\n", - " ('ak4JetOutsidebtagDeepFlavB', 0),\n", - " ('ak4JetOutsidebtagDeepFlavB', 1),\n", - " ('ak4JetOutsidebtagDeepFlavB', 2),\n", - " ('ak4JetOutsidebtagDeepFlavB', 3),\n", - " ('ak4JetOutsidebtagDeepFlavB', 4),\n", - " ('ak4JetOutsidebtagDeepFlavB', 5),\n", - " ('ak4JetOutsidebtagPNetProbb', 0),\n", - " ('ak4JetOutsidebtagPNetProbb', 1),\n", - " ('ak4JetOutsidebtagPNetProbb', 2),\n", - " ('ak4JetOutsidebtagPNetProbb', 3),\n", - " ('ak4JetOutsidebtagPNetProbb', 4),\n", - " ('ak4JetOutsidebtagPNetProbb', 5),\n", - " ('ak4JetOutsidebtagPNetProbbb', 0),\n", - " ('ak4JetOutsidebtagPNetProbbb', 1),\n", - " ('ak4JetOutsidebtagPNetProbbb', 2),\n", - " ('ak4JetOutsidebtagPNetProbbb', 3),\n", - " ('ak4JetOutsidebtagPNetProbbb', 4),\n", - " ('ak4JetOutsidebtagPNetProbbb', 5),\n", - " ('ak4JetOutsidebtagPNetProbc', 0),\n", - " ('ak4JetOutsidebtagPNetProbc', 1),\n", - " ('ak4JetOutsidebtagPNetProbc', 2),\n", - " ('ak4JetOutsidebtagPNetProbc', 3),\n", - " ('ak4JetOutsidebtagPNetProbc', 4),\n", - " ('ak4JetOutsidebtagPNetProbc', 5),\n", - " ('ak4JetOutsidebtagPNetProbcc', 0),\n", - " ('ak4JetOutsidebtagPNetProbcc', 1),\n", - " ('ak4JetOutsidebtagPNetProbcc', 2),\n", - " ('ak4JetOutsidebtagPNetProbcc', 3),\n", - " ('ak4JetOutsidebtagPNetProbcc', 4),\n", - " ('ak4JetOutsidebtagPNetProbcc', 5),\n", - " ('ak4JetOutsidebtagPNetProbpu', 0),\n", - " ('ak4JetOutsidebtagPNetProbpu', 1),\n", - " ('ak4JetOutsidebtagPNetProbpu', 2),\n", - " ('ak4JetOutsidebtagPNetProbpu', 3),\n", - " ('ak4JetOutsidebtagPNetProbpu', 4),\n", - " ('ak4JetOutsidebtagPNetProbpu', 5),\n", - " ('ak4JetOutsidebtagPNetProbuds', 0),\n", - " ('ak4JetOutsidebtagPNetProbuds', 1),\n", - " ('ak4JetOutsidebtagPNetProbuds', 2),\n", - " ('ak4JetOutsidebtagPNetProbuds', 3),\n", - " ('ak4JetOutsidebtagPNetProbuds', 4),\n", - " ('ak4JetOutsidebtagPNetProbuds', 5),\n", - " ('ak4JetOutsidebtagPNetProbg', 0),\n", - " ('ak4JetOutsidebtagPNetProbg', 1),\n", - " ('ak4JetOutsidebtagPNetProbg', 2),\n", - " ('ak4JetOutsidebtagPNetProbg', 3),\n", - " ('ak4JetOutsidebtagPNetProbg', 4),\n", - " ('ak4JetOutsidebtagPNetProbg', 5),\n", - " ('ak4JetOutsidebtagPNetProbundef', 0),\n", - " ('ak4JetOutsidebtagPNetProbundef', 1),\n", - " ('ak4JetOutsidebtagPNetProbundef', 2),\n", - " ('ak4JetOutsidebtagPNetProbundef', 3),\n", - " ('ak4JetOutsidebtagPNetProbundef', 4),\n", - " ('ak4JetOutsidebtagPNetProbundef', 5),\n", - " ('ak4JetOutsideEta', 0),\n", - " ('ak4JetOutsideEta', 1),\n", - " ('ak4JetOutsideEta', 2),\n", - " ('ak4JetOutsideEta', 3),\n", - " ('ak4JetOutsideEta', 4),\n", - " ('ak4JetOutsideEta', 5),\n", - " ('ak4JetOutsidePhi', 0),\n", - " ('ak4JetOutsidePhi', 1),\n", - " ('ak4JetOutsidePhi', 2),\n", - " ('ak4JetOutsidePhi', 3),\n", - " ('ak4JetOutsidePhi', 4),\n", - " ('ak4JetOutsidePhi', 5),\n", - " ('ak4JetOutsideMass', 0),\n", - " ('ak4JetOutsideMass', 1),\n", - " ('ak4JetOutsideMass', 2),\n", - " ('ak4JetOutsideMass', 3),\n", - " ('ak4JetOutsideMass', 4),\n", - " ('ak4JetOutsideMass', 5),\n", - " ('ak4JetOutsidePt', 0),\n", - " ('ak4JetOutsidePt', 1),\n", - " ('ak4JetOutsidePt', 2),\n", - " ('ak4JetOutsidePt', 3),\n", - " ('ak4JetOutsidePt', 4),\n", - " ('ak4JetOutsidePt', 5),\n", - " ('ak8FatJetEta', 0),\n", - " ('ak8FatJetEta', 1),\n", - " ('ak8FatJetPhi', 0),\n", - " ('ak8FatJetPhi', 1),\n", - " ('ak8FatJetMass', 0),\n", - " ('ak8FatJetMass', 1),\n", - " ('ak8FatJetPt', 0),\n", - " ('ak8FatJetPt', 1),\n", - " ('ak8FatJetMsd', 0),\n", - " ('ak8FatJetMsd', 1),\n", - " ('ak8FatJetPNetXbb', 0),\n", - " ('ak8FatJetPNetXbb', 1),\n", - " ('ak8FatJetPNetXjj', 0),\n", - " ('ak8FatJetPNetXjj', 1),\n", - " ('ak8FatJetPNetMass', 0),\n", - " ('ak8FatJetPNetMass', 1),\n", - " ('weight', 0)]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = pd.read_parquet(\"../../../../data/matching/23Nov17_v9_private/2018/GluGlutoHHto4B_cHHH1_TuneCP5_PSWeights_13TeV-powheg-pythia8/parquet\")\n", "list(df.columns)" @@ -382,64 +91,15 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading GluGlutoHHto4B_cHHH1_TuneCP5_PSWeights_13TeV-powheg-pythia8\n", - "Loading QCD_HT-200to300-13TeV\n", - "Loading QCD_HT-1000to1500-13TeV\n", - "Loading QCD_HT-500to700-13TeV\n", - "Loading QCD_HT-700to1000-13TeV\n", - "Loading QCD_HT-300to500-13TeV\n", - "Loading QCD_HT-1500to2000-13TeV\n", - "Loading QCD_HT-2000toInf-13TeV\n", - "Loading Run2018A\n", - "Loading Run2018C\n", - "Loading Run2018D\n", - "Loading Run2018B\n", - "Loading TTToHadronic_13TeV\n", - "Loading TTToSemiLeptonic_13TeV\n", - "Loading TTTo2L2Nu_13TeV\n", - "Loading ZJetsToQQ_HT-400to600_13TeV\n", - "Loading WJetsToQQ_HT-600to800_13TeV\n", - "Loading WJetsToQQ_HT-800toInf_13TeV\n", - "Loading WJetsToQQ_HT-200to400_13TeV\n", - "Loading ZJetsToQQ_HT-200to400_13TeV\n", - "Loading ZJetsToQQ_HT-800toInf_13TeV\n", - "Loading ZJetsToQQ_HT-600to800_13TeV\n", - "Loading WJetsToQQ_HT-400to600_13TeV\n", - "Loading ZZTo4B01j_5f_TuneCP5_13TeV-amcatnloFXFX-pythia8\n", - "Loading GluGluHToBB_M-125_TuneCP5_MINLO_NNLOPS_13TeV-powheg-pythia8\n", - "Loading VBFHToBB_M-125_dipoleRecoilOn_TuneCP5_13TeV-powheg-pythia8\n", - "Loading WplusH_HToBB_WToQQ_M-125_TuneCP5_13TeV-powheg-pythia8\n", - "Loading ZH_HToBB_ZToQQ_M-125_TuneCP5_13TeV-powheg-pythia8\n", - "Loading WminusH_HToBB_WToQQ_M-125_TuneCP5_13TeV-powheg-pythia8\n", - "Loading ttHTobb_M125_TuneCP5_13TeV-powheg-pythia8\n", - "\n", - " Preselection\n", - "hh4b 9.094289\n", - "qcd 111808.238206\n", - "data 179968.000000\n", - "ttbar 20593.231553\n", - "vjets 4160.656675\n", - "diboson 34.553340\n", - "gghtobb 192.131317\n", - "vbfhtobb 30.646243\n", - "vhtobb 49.611911\n", - "tthtobb 216.339654\n" - ] - } - ], + "outputs": [], "source": [ "filters = [\n", " # one good fatjet (fatjet with index 0 has the largest Xbb score)\n", " (\"('ak8FatJetPt', '0')\", \">=\", 300),\n", - " (\"('ak8FatJetMsd', '0')\", \">=\", 60),\n", - " (\"('ak8FatJetPNetXbb', '0')\", \">=\", 0.9734), # medium WP\n", + " (\"('ak8FatJetPNetMass', '0')\", \">=\", 60),\n", + " #(\"('ak8FatJetPNetXbb', '0')\", \">=\", 0.9734), # medium WP\n", " # second fatjet has lower Xbb score\n", " (\"('ak8FatJetPNetXbb', '1')\", \"<\", 0.9880), # tight WP\n", " # (\"('ak8FatJetPNetXbb', '1')\", \"<\", 0.9734), # medium WP\n", @@ -469,26 +129,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "hh4b 16934\n", - "qcd 21203\n", - "data 179968\n", - "ttbar 363766\n", - "vjets 42608\n", - "diboson 79403\n", - "gghtobb 1634\n", - "vbfhtobb 1721\n", - "vhtobb 25943\n", - "tthtobb 125540\n" - ] - } - ], + "outputs": [], "source": [ "for key,events in events_dict.items():\n", " print(key, len(events))" @@ -503,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -525,172 +168,16 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " | 0 | \n", - "1 | \n", - "2 | \n", - "3 | \n", - "4 | \n", - "5 | \n", - "
---|---|---|---|---|---|---|
0 | \n", - "0.868961 | \n", - "0.136897 | \n", - "0.006412 | \n", - "0.019220 | \n", - "0.008519 | \n", - "0.011070 | \n", - "
1 | \n", - "0.997986 | \n", - "0.961903 | \n", - "0.023145 | \n", - "0.006817 | \n", - "0.022713 | \n", - "0.009239 | \n", - "
2 | \n", - "0.984380 | \n", - "0.973485 | \n", - "0.910023 | \n", - "0.034095 | \n", - "0.009683 | \n", - "0.033072 | \n", - "
3 | \n", - "0.212848 | \n", - "0.226530 | \n", - "0.012299 | \n", - "0.015050 | \n", - "0.001965 | \n", - "0.005824 | \n", - "
4 | \n", - "0.999067 | \n", - "0.838231 | \n", - "0.993367 | \n", - "0.033913 | \n", - "0.015753 | \n", - "0.005880 | \n", - "
... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "
125535 | \n", - "0.996173 | \n", - "0.991208 | \n", - "0.926521 | \n", - "0.005725 | \n", - "0.002599 | \n", - "0.011035 | \n", - "
125536 | \n", - "0.900944 | \n", - "0.211356 | \n", - "0.078694 | \n", - "0.053525 | \n", - "0.013406 | \n", - "0.008765 | \n", - "
125537 | \n", - "0.925364 | \n", - "0.782766 | \n", - "0.009691 | \n", - "0.007181 | \n", - "0.001209 | \n", - "0.001047 | \n", - "
125538 | \n", - "0.942156 | \n", - "0.286837 | \n", - "0.011574 | \n", - "0.003299 | \n", - "-99999.000000 | \n", - "-99999.000000 | \n", - "
125539 | \n", - "0.886543 | \n", - "0.053071 | \n", - "0.098375 | \n", - "0.037029 | \n", - "0.014498 | \n", - "0.018832 | \n", - "
125540 rows × 6 columns
\n", - "