diff --git a/data/tutorials/longexercise/plot1DScan.py b/data/tutorials/longexercise/plot1DScan.py index 98172ad62f7..42b4e1449dd 100755 --- a/data/tutorials/longexercise/plot1DScan.py +++ b/data/tutorials/longexercise/plot1DScan.py @@ -9,7 +9,7 @@ from six.moves import range -import CombineHarvester.CombineTools.plotting as plot +import HiggsAnalysis.CombinedLimit.util.plotting as plot import ROOT ROOT.PyConfig.IgnoreCommandLineOptions = True diff --git a/data/tutorials/longexercise/postFitPlot.py b/data/tutorials/longexercise/postFitPlot.py index 563a4c4405b..2619c491555 100644 --- a/data/tutorials/longexercise/postFitPlot.py +++ b/data/tutorials/longexercise/postFitPlot.py @@ -1,6 +1,6 @@ from __future__ import absolute_import -import CombineHarvester.CombineTools.plotting as plot +import HiggsAnalysis.CombinedLimit.util.plotting as plot import ROOT ROOT.PyConfig.IgnoreCommandLineOptions = True diff --git a/data/tutorials/tutorial_unfolding_2023/scripts/get_migration_matrix.py b/data/tutorials/tutorial_unfolding_2023/scripts/get_migration_matrix.py index c34442f8310..3f5a8d33101 100644 --- a/data/tutorials/tutorial_unfolding_2023/scripts/get_migration_matrix.py +++ b/data/tutorials/tutorial_unfolding_2023/scripts/get_migration_matrix.py @@ -1,5 +1,5 @@ import CombineHarvester.CombineTools.ch as ch -import CombineHarvester.CombineTools.plotting as plot +import HiggsAnalysis.CombinedLimit.util.plotting as plot import ROOT import sys diff --git a/data/tutorials/tutorial_unfolding_2023/scripts/plot1DScan.py b/data/tutorials/tutorial_unfolding_2023/scripts/plot1DScan.py index 3bca841da94..1bdee1adf21 100755 --- a/data/tutorials/tutorial_unfolding_2023/scripts/plot1DScan.py +++ b/data/tutorials/tutorial_unfolding_2023/scripts/plot1DScan.py @@ -4,7 +4,7 @@ import ROOT import math from functools import partial -import CombineHarvester.CombineTools.plotting as plot +import HiggsAnalysis.CombinedLimit.util.plotting as plot import json import argparse import os.path diff --git a/data/tutorials/tutorial_unfolding_2023/scripts/plotCorrelations_pois.py b/data/tutorials/tutorial_unfolding_2023/scripts/plotCorrelations_pois.py index a12d5c099f3..28134c446ca 100644 --- a/data/tutorials/tutorial_unfolding_2023/scripts/plotCorrelations_pois.py +++ b/data/tutorials/tutorial_unfolding_2023/scripts/plotCorrelations_pois.py @@ -2,7 +2,7 @@ import ROOT import argparse -import CombineHarvester.CombineTools.plotting as plot +import HiggsAnalysis.CombinedLimit.util.plotting as plot ROOT.PyConfig.IgnoreCommandLineOptions = True ROOT.gROOT.SetBatch(ROOT.kTRUE) diff --git a/python/tool_base/CombineToolBase.py b/python/tool_base/CombineToolBase.py new file mode 100755 index 00000000000..a44e66aea61 --- /dev/null +++ b/python/tool_base/CombineToolBase.py @@ -0,0 +1,399 @@ +from __future__ import absolute_import +from __future__ import print_function +import os +import stat +from functools import partial +from multiprocessing import Pool +from six.moves import range + +DRY_RUN = False + +JOB_PREFIX = """#!/bin/sh +ulimit -s unlimited +set -e +cd %(CMSSW_BASE)s/src +export SCRAM_ARCH=%(SCRAM_ARCH)s +source /cvmfs/cms.cern.ch/cmsset_default.sh +eval `scramv1 runtime -sh` +cd %(PWD)s +""" % ( + {"CMSSW_BASE": os.environ["CMSSW_BASE"], "SCRAM_ARCH": os.environ["SCRAM_ARCH"], "PWD": os.environ["PWD"]} +) + +CONDOR_TEMPLATE = """executable = %(EXE)s +arguments = $(ProcId) +output = %(TASK)s.$(ClusterId).$(ProcId).out +error = %(TASK)s.$(ClusterId).$(ProcId).err +log = %(TASK)s.$(ClusterId).log + +# Send the job to Held state on failure. +on_exit_hold = (ExitBySignal == True) || (ExitCode != 0) + +# Periodically retry the jobs every 10 minutes, up to a maximum of 5 retries. +periodic_release = (NumJobStarts < 3) && ((CurrentTime - EnteredCurrentStatus) > 600) + +%(EXTRA)s +queue %(NUMBER)s + +""" + +CRAB_PREFIX = """ +set -x +set -e +ulimit -s unlimited +ulimit -c 0 + +function error_exit +{ + if [ $1 -ne 0 ]; then + echo "Error with exit code ${1}" + if [ -e FrameworkJobReport.xml ] + then + cat << EOF > FrameworkJobReport.xml.tmp + + + Error with exit code ${1} + +EOF + tail -n+2 FrameworkJobReport.xml >> FrameworkJobReport.xml.tmp + mv FrameworkJobReport.xml.tmp FrameworkJobReport.xml + else + cat << EOF > FrameworkJobReport.xml + + + Error with exit code ${1} + + +EOF + fi + exit 0 + fi +} + +trap 'error_exit $?' ERR +""" + +CRAB_POSTFIX = """ +tar -cf combine_output.tar higgsCombine*.root +rm higgsCombine*.root +""" + +CRAB_DO_NOTHING = """ +import FWCore.ParameterSet.Config as cms +process = cms.Process("MAIN") + +process.source = cms.Source("EmptySource") +process.options = cms.untracked.PSet() +""" + + +def run_command(dry_run, command, pre_cmd=""): + if command.startswith("combine"): + command = pre_cmd + command + if not dry_run: + print(">> " + command) + return os.system(command) + else: + print("[DRY-RUN]: " + command) + + +class CombineToolBase(object): + description = "Base class that passes through all arguments to combine and handles job creation and submission" + requires_root = False + + def __init__(self): + self.job_queue = [] + self.args = None + self.passthru = [] + self.job_mode = "interactive" + self.job_dir = "" + self.prefix_file = "" + self.parallel = 1 + self.merge = 1 + self.task_name = "combine_task" + self.dry_run = False + self.bopts = "" # batch submission options + self.custom_crab = None + self.custom_crab_post = None + self.pre_cmd = "" + self.crab_files = [] + + def attach_job_args(self, group): + group.add_argument( + "--job-mode", default=self.job_mode, choices=["interactive", "script", "lxbatch", "SGE", "slurm", "condor", "crab3"], help="Task execution mode" + ) + group.add_argument("--job-dir", default=self.job_dir, help="Path to directory containing job scripts and logs") + group.add_argument("--prefix-file", default=self.prefix_file, help="Path to file containing job prefix") + group.add_argument("--task-name", default=self.task_name, help="Task name, used for job script and log filenames for batch system tasks") + group.add_argument("--parallel", type=int, default=self.parallel, help="Number of jobs to run in parallel [only affects interactive job-mode]") + group.add_argument("--merge", type=int, default=self.merge, help="Number of jobs to run in a single script [only affects batch submission]") + group.add_argument("--dry-run", action="store_true", help="Print commands to the screen but do not run them") + group.add_argument("--sub-opts", default=self.bopts, help="Options for batch/crab submission") + group.add_argument("--memory", type=int, help="Request memory for job [MB]") + group.add_argument("--cores", type=int, help="Request number of cores for job") + group.add_argument("--crab-area", help="crab working area") + group.add_argument( + "--custom-crab", + default=self.custom_crab, + help='python file containing a function with name signature "custom_crab(config)" that can be used to modify the default crab configuration', + ) + group.add_argument("--crab-extra-files", nargs="+", default=self.crab_files, help="Extra files that should be shipped to crab") + group.add_argument("--pre-cmd", default=self.pre_cmd, help="Prefix the call to combine with this string") + group.add_argument("--post-job-cmd", default="", help="Postfix cmd for combine jobs [condor]") + group.add_argument( + "--custom-crab-post", + default=self.custom_crab_post, + help="txt file containing command lines that can be used in the crab job script instead of the defaults.", + ) + + def attach_intercept_args(self, group): + pass + + def attach_args(self, group): + pass + + def set_args(self, known, unknown): + self.args = known + self.job_mode = self.args.job_mode + self.job_dir = self.args.job_dir + self.prefix_file = self.args.prefix_file + self.task_name = self.args.task_name + self.parallel = self.args.parallel + self.merge = self.args.merge + self.dry_run = self.args.dry_run + self.passthru.extend(unknown) + self.bopts = self.args.sub_opts + self.custom_crab = self.args.custom_crab + self.memory = self.args.memory + self.cores = self.args.cores + self.crab_area = self.args.crab_area + self.crab_files = self.args.crab_extra_files + self.pre_cmd = self.args.pre_cmd + self.custom_crab_post = self.args.custom_crab_post + self.post_job_cmd = self.args.post_job_cmd + + def put_back_arg(self, arg_name, target_name): + if hasattr(self.args, arg_name): + self.passthru.extend([target_name, getattr(self.args, arg_name)]) + delattr(self.args, arg_name) + + def extract_arg(self, arg, args_str): + args_str = args_str.replace(arg + "=", arg + " ") + args = args_str.split() + if arg in args: + idx = args.index(arg) + assert idx != -1 and idx < len(args) + val = args[idx + 1] + del args[idx : idx + 2] + return val, (" ".join(args)) + else: + return None, args_str + + def create_job_script(self, commands, script_filename, do_log=False): + fname = script_filename + logname = script_filename.replace(".sh", ".log") + with open(fname, "w") as text_file: + text_file.write(JOB_PREFIX) + for i, command in enumerate(commands): + tee = "tee" if i == 0 else "tee -a" + log_part = "\n" + if do_log: + log_part = " 2>&1 | %s " % tee + logname + log_part + if command.startswith("combine") or command.startswith("pushd"): + text_file.write(self.pre_cmd + "eval " + command + log_part) + else: + text_file.write(command) + text_file.write("\n" + self.post_job_cmd + "\n") + st = os.stat(fname) + os.chmod(fname, st.st_mode | stat.S_IEXEC) + # print JOB_PREFIX + command + print("Created job script: %s" % script_filename) + + def run_method(self): + print(vars(self.args)) + # Put the method back in because we always take it out + self.put_back_arg("method", "-M") + print(self.passthru) + command = "combine " + " ".join(self.passthru) + self.job_queue.append(command) + self.flush_queue() + + def extract_workspace_arg(self, cmd_list=[]): + for arg in ["-d", "--datacard"]: + if arg in cmd_list: + idx = cmd_list.index(arg) + assert idx != -1 and idx < len(cmd_list) + return cmd_list[idx + 1] + raise RuntimeError("The workspace argument must be specified explicity with -d or --datacard") + + def extract_lib_arg(self, cmd_list=[]): + for arg in ["-L", "--LoadLibrary"]: + if arg in cmd_list: + idx = cmd_list.index(arg) + assert idx != -1 and idx < len(cmd_list) + return cmd_list[idx + 1] + return None + + def flush_queue(self): + if self.job_mode == "interactive": + pool = Pool(processes=self.parallel) + _ = pool.map(partial(run_command, self.dry_run, pre_cmd=self.pre_cmd), self.job_queue) + script_list = [] + if self.job_mode in ["script", "lxbatch", "SGE", "slurm"]: + if self.prefix_file != "": + if self.prefix_file.endswith(".txt"): + job_prefix_file = open(self.prefix_file, "r") + else: + job_prefix_file = open( + os.environ["CMSSW_BASE"] + "/src/HiggsAnalysis.CombinedLimit/input/job_prefixes/job_prefix_" + self.prefix_file + ".txt", "r" + ) + global JOB_PREFIX + JOB_PREFIX = job_prefix_file.read() % ( + {"CMSSW_BASE": os.environ["CMSSW_BASE"], "SCRAM_ARCH": os.environ["SCRAM_ARCH"], "PWD": os.environ["PWD"]} + ) + job_prefix_file.close() + if self.job_mode in ["script", "lxbatch", "SGE"]: + for i, j in enumerate(range(0, len(self.job_queue), self.merge)): + script_name = "job_%s_%i.sh" % (self.task_name, i) + # each job is given a slice from the list of combine commands of length 'merge' + # we also keep track of the files that were created in case submission to a + # batch system was also requested + if self.job_dir: + if not os.path.exists(self.job_dir): + os.makedirs(self.job_dir) + script_name = os.path.join(self.job_dir, script_name) + self.create_job_script(self.job_queue[j : j + self.merge], script_name, self.job_mode == "script") + script_list.append(script_name) + if self.job_mode == "lxbatch": + for script in script_list: + full_script = os.path.abspath(script) + logname = full_script.replace(".sh", "_%J.log") + run_command(self.dry_run, "bsub -o %s %s %s" % (logname, self.bopts, full_script)) + if self.job_mode == "SGE": + for script in script_list: + full_script = os.path.abspath(script) + logname = full_script.replace(".sh", "_%J.log") + run_command(self.dry_run, "qsub -o %s %s %s" % (logname, self.bopts, full_script)) + if self.job_mode == "slurm": + script_name = "slurm_%s.sh" % self.task_name + if self.job_dir: + if not os.path.exists(self.job_dir): + os.makedirs(self.job_dir) + script_name = os.path.join(self.job_dir, script_name) + commands = [] + jobs = 0 + # each job is given a slice from the list of combine commands of length 'merge' + for j in range(0, len(self.job_queue), self.merge): + jobs += 1 + commands += ( + [ + "if [ ${SLURM_ARRAY_TASK_ID} -eq %i ]; then\n" % jobs, + ] + + [" %s\n" % ln for ln in self.job_queue[j : j + self.merge]] + + ["fi\n"] + ) + self.create_job_script(commands, script_name, self.job_mode == "script") + full_script = os.path.abspath(script_name) + logname = full_script.replace(".sh", "_%A_%a.log") + run_command(self.dry_run, "sbatch --array=1-%i -o %s %s %s" % (jobs, logname, self.bopts, full_script)) + if self.job_mode == "condor": + outscriptname = "condor_%s.sh" % self.task_name + subfilename = "condor_%s.sub" % self.task_name + print(">> condor job script will be %s" % outscriptname) + outscript = open(outscriptname, "w") + outscript.write(JOB_PREFIX) + jobs = 0 + wsp_files = set() + for i, j in enumerate(range(0, len(self.job_queue), self.merge)): + outscript.write("\nif [ $1 -eq %i ]; then\n" % jobs) + jobs += 1 + for line in self.job_queue[j : j + self.merge]: + newline = self.pre_cmd + line + outscript.write(" " + newline + "\n") + outscript.write("fi") + outscript.write("\n" + self.post_job_cmd + "\n") + outscript.close() + st = os.stat(outscriptname) + os.chmod(outscriptname, st.st_mode | stat.S_IEXEC) + subfile = open(subfilename, "w") + condor_settings = CONDOR_TEMPLATE % { + "EXE": outscriptname, + "TASK": self.task_name, + "EXTRA": self.bopts.encode("UTF-8").decode("unicode_escape"), + "NUMBER": jobs, + } + subfile.write(condor_settings) + subfile.close() + run_command(self.dry_run, "condor_submit %s" % (subfilename)) + + if self.job_mode == "crab3": + # import the stuff we need + from CRABAPI.RawCommand import crabCommand + from six.moves.http_client import HTTPException + + print(">> crab3 requestName will be %s" % self.task_name) + outscriptname = "crab_%s.sh" % self.task_name + print(">> crab3 script will be %s" % outscriptname) + outscript = open(outscriptname, "w") + outscript.write(CRAB_PREFIX) + jobs = 0 + wsp_files = set() + for extra in self.crab_files: + wsp_files.add(extra) + for i, j in enumerate(range(0, len(self.job_queue), self.merge)): + jobs += 1 + outscript.write("\nif [ $1 -eq %i ]; then\n" % jobs) + for line in self.job_queue[j : j + self.merge]: + newline = line + if line.startswith("combine"): + newline = self.pre_cmd + line.replace("combine", "./combine", 1) + wsp = str(self.extract_workspace_arg(newline.split())) + + newline = newline.replace(wsp, os.path.basename(wsp)) + if wsp.startswith("root://"): + newline = ("./copyRemoteWorkspace.sh %s ./%s; " % (wsp, os.path.basename(wsp))) + newline + else: + wsp_files.add(wsp) + if self.extract_lib_arg(newline.split()) is not None: + lib = str(self.extract_lib_arg(newline.split())) + newline = newline.replace(lib, os.path.basename(lib)) + wsp_files.add(lib) + wsp_files.add(lib.replace(".so", "_ACLiC_dict_rdict.pcm")) + wsp_files.add(lib.replace("_cc.so", ".cc")) + wsp_files.add(lib.replace("_cc.so", ".h")) + outscript.write(" " + newline + "\n") + outscript.write("fi") + if self.custom_crab_post is not None: + with open(self.custom_crab_post, "r") as postfile: + outscript.write(postfile.read()) + else: + outscript.write(CRAB_POSTFIX) + outscript.close() + from HiggsAnalysis.CombinedLimit.tool_base.crab import config + + config.General.requestName = self.task_name + config.JobType.scriptExe = outscriptname + config.JobType.inputFiles.extend(wsp_files) + config.Data.totalUnits = jobs + config.Data.outputDatasetTag = config.General.requestName + if self.memory is not None: + config.JobType.maxMemoryMB = self.memory + do_nothing_script = open(os.environ["CMSSW_BASE"] + "/src/HiggsAnalysis.CombinedLimit/scripts/do_nothing_cfg.py", "w") + do_nothing_script.write(CRAB_DO_NOTHING) + if self.cores is not None: + config.JobType.numCores = self.cores + do_nothing_script.write("\nprocess.options.numberOfThreads=cms.untracked.uint32(%i)" % self.cores) + do_nothing_script.close() + if self.crab_area is not None: + config.General.workArea = self.crab_area + if self.custom_crab is not None: + d = {} + exec(compile(open(self.custom_crab).read(), self.custom_crab, "exec"), d) + d["custom_crab"](config) + print(config) + if not self.dry_run: + try: + crabCommand("submit", config=config) + except HTTPException as hte: + print(hte.headers) + del self.job_queue[:] diff --git a/python/tool_base/CovMatrix.py b/python/tool_base/CovMatrix.py new file mode 100755 index 00000000000..88d8fbfef32 --- /dev/null +++ b/python/tool_base/CovMatrix.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python + +from __future__ import absolute_import +from __future__ import print_function +import ROOT + +import HiggsAnalysis.CombinedLimit.tool_base.utils as utils +from HiggsAnalysis.CombinedLimit.tool_base.opts import OPTS + +from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase +from six.moves import range +import ctypes + + +class CovMatrix(CombineToolBase): + description = "Build a fit covariance matrix from scan results" + requires_root = True + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument("-i", "--input", nargs="+", default=[], help="The input file containing the MultiDimFit singles mode output") + group.add_argument("-o", "--output", help="The output name in the format file:prefix") + group.add_argument("-P", "--POIs", help="The params that were scanned (in scan order)") + group.add_argument("--POIs-from-set", help="Extract from file:workspace:set instead") + group.add_argument("--compare", help="Compare to RooFitResult") + + def run_method(self): + POIs = [] + if self.args.POIs is not None: + POIs = self.args.POIs.split(",") + if self.args.POIs_from_set is not None: + ws_in = self.args.POIs_from_set.split(":") + print(ws_in) + POIs = utils.list_from_workspace(ws_in[0], ws_in[1], ws_in[2]) + + compare = self.args.compare is not None + if compare: + f_in = self.args.compare.split(":") + f = ROOT.TFile(f_in[0]) + fitres = f.Get(f_in[1]) + fitres_cov = ROOT.TMatrixDSym(len(POIs)) + fitres_cov_src = fitres.covarianceMatrix() + fitres_cor = ROOT.TMatrixDSym(len(POIs)) + fitres_cor_src = fitres.correlationMatrix() + ipos = [] + for p in POIs: + ipos.append(fitres.floatParsFinal().index(p)) + for i, ip in enumerate(POIs): + for j, jp in enumerate(POIs): + fitres_cor[i][j] = ctypes.c_double(fitres_cor_src[ipos[i]][ipos[j]]) + fitres_cov[i][j] = ctypes.c_double(fitres_cov_src[ipos[i]][ipos[j]]) + + if compare: + print("RooFitResult correlation matrix:") + fitres_cor.Print() + + if compare: + print("RooFitResult covariance matrix:") + fitres_cov.Print() + if self.args.output is not None: + out = self.args.output.split(":") + fout = ROOT.TFile(out[0], "RECREATE") + prefix = out[1] + if compare: + fout.WriteTObject(fitres_cor, prefix + "_comp_cor") + h_cor_compare = self.fix_TH2(ROOT.TH2D(fitres_cor), POIs) + fout.WriteTObject(h_cor_compare, prefix + "_comp_h_cor") + fout.WriteTObject(fitres_cov, prefix + "_comp_cov") + h_cov_compare = self.fix_TH2(ROOT.TH2D(fitres_cov), POIs) + fout.WriteTObject(h_cov_compare, prefix + "_comp_h_cov") + + def fix_TH2(self, h, labels): + h_fix = h.Clone() + for y in range(1, h.GetNbinsY() + 1): + for x in range(1, h.GetNbinsX() + 1): + h_fix.SetBinContent(x, y, h.GetBinContent(x, h.GetNbinsY() + 1 - y)) + for x in range(1, h_fix.GetNbinsX() + 1): + h_fix.GetXaxis().SetBinLabel(x, labels[x - 1]) + for y in range(1, h_fix.GetNbinsY() + 1): + h_fix.GetYaxis().SetBinLabel(y, labels[-y]) + return h_fix diff --git a/python/tool_base/EnhancedCombine.py b/python/tool_base/EnhancedCombine.py new file mode 100755 index 00000000000..0378687a1aa --- /dev/null +++ b/python/tool_base/EnhancedCombine.py @@ -0,0 +1,254 @@ +from __future__ import absolute_import +from __future__ import print_function +import itertools +import HiggsAnalysis.CombinedLimit.tool_base.utils as utils +import json +import os +import bisect +from HiggsAnalysis.CombinedLimit.tool_base.opts import OPTS +from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase +import six +from six.moves import zip + + +def isfloat(value): + try: + float(value) + return True + except ValueError: + return False + + +class EnhancedCombine(CombineToolBase): + description = "combine pass-through with special treatment for some options [DEFAULT]" + requires_root = False + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_intercept_args(self, group): + CombineToolBase.attach_intercept_args(self, group) + group.add_argument( + "-m", + "--mass", + help='Supports range strings for multiple masses, e.g. "120:130:5,140 will produce three combine calls with mass values of 120, 125, 130 and 140"', + ) + group.add_argument("--points", help='For use with "-M MultiDimFit --algo grid" to split scan points into separate jobs') + group.add_argument("--singlePoint", help="Supports range strings for multiple points to test, uses the same format as the --mass argument") + group.add_argument("-s", "--seed", help="Supports range strings for multiple RNG seeds, uses the same format as the --mass argument") + group.add_argument("-d", "--datacard", nargs="*", default=[], help="Operate on multiple datacards") + group.add_argument("--name", "-n", default=".Test", help="Name used to label the combine output file, can be modified by other options") + group.add_argument("--setParameterRanges", help="Some other options will modify or add to the list of parameter ranges") + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument("--opts", nargs="+", default=[], help="Add preset combine option groups") + group.add_argument("--there", action="store_true", help="Run combine in the same directory as the workspace") + group.add_argument( + "--split-points", + type=int, + default=0, + help="When used in conjunction with --points will create multiple combine calls that each run at most the number of points specified here.", + ) + group.add_argument( + "--boundlist", help="Name of json-file which contains the ranges of physical parameters depending on the given mass and given physics model" + ) + group.add_argument("--generate", nargs="*", default=[], help="Generate sets of options") + + def set_args(self, known, unknown): + CombineToolBase.set_args(self, known, unknown) + if hasattr(self.args, "opts"): + for opt in self.args.opts: + self.passthru.append(OPTS[opt]) + + def run_method(self): + # Put the method back in because we always take it out + self.put_back_arg("method", "-M") + + # cmd_queue = [] + subbed_vars = {} + + # pre_cmd = '' + + if self.args.mass is not None: + mass_vals = utils.split_vals(self.args.mass) + subbed_vars[("MASS",)] = [(mval,) for mval in mass_vals] + self.passthru.extend(["-m", "%(MASS)s"]) + + if self.args.singlePoint is not None: + single_points = utils.split_vals(self.args.singlePoint) + subbed_vars[("SINGLEPOINT",)] = [(pval,) for pval in single_points] + self.passthru.extend(["--singlePoint", "%(SINGLEPOINT)s"]) + self.args.name += ".POINT.%(SINGLEPOINT)s" + + if self.args.seed is not None: + seed_vals = utils.split_vals(self.args.seed) + subbed_vars[("SEED",)] = [(sval,) for sval in seed_vals] + self.passthru.extend(["-s", "%(SEED)s"]) + + for i, generate in enumerate(self.args.generate): + split_char = ":" if "::" in generate else ";" + gen_header, gen_content = generate.split(split_char * 2) + print(gen_header) + print(gen_content) + gen_headers = gen_header.split(split_char) + gen_entries = gen_content.split(split_char) + key = tuple() + arglist = [] + for header in gen_headers: + if header == "n" or header == "name": + self.args.name += ".%(GENNAME" + str(i) + ")s" + key += ("GENNAME" + str(i),) + else: + self.passthru.extend(["%(" + header + ")s"]) + key += (header,) + for entry in gen_entries: + if ",," in entry: + split_entry = entry.split(",,") + else: + split_entry = entry.split(",") + final_arg = [] + for header, e in zip(gen_headers, split_entry): + argname = "-%s" % header if len(header) == 1 else "--%s" % header + if header == "n" or header == "name": + final_arg.append(e) + elif len(e) and e != "!": + final_arg.append("%s %s" % (argname, e)) + else: + final_arg.append("") + arglist.append(tuple(final_arg)) + subbed_vars[key] = arglist + + if len(self.args.datacard) >= 1: + # Two lists of tuples, one which does specify the mass, and one + # which doesn't + dc_mass = [] + dc_no_mass = [] + for dc in self.args.datacard: + # Split workspace into path and filename + path, file = os.path.split(dc) + # If the wsp is in the current directory should call it '.' + if path == "": + path = "." + # If we're not using the --there option then leave the + # workspace argument as the full path + if not self.args.there: + file = dc + # Figure out if the enclosing directory is a mass value + dirs = path.split("/") + if self.args.mass is None and len(dirs) >= 1 and isfloat(dirs[-1]): + print("Assuming card %s uses mass value %s" % (dc, dirs[-1])) + dc_mass.append((path, file, dirs[-1])) + dc_no_mass.append((path, file)) + # If at least one mass value was inferred assume all of them are like this + if len(dc_mass) > 0: + subbed_vars[("DIR", "DATACARD", "MASS")] = dc_mass + self.passthru.extend(["-d", "%(DATACARD)s", "-m", "%(MASS)s"]) + else: + subbed_vars[ + ( + "DIR", + "DATACARD", + ) + ] = dc_no_mass + self.passthru.extend(["-d", "%(DATACARD)s"]) + # elif len(self.args.datacard) == 1: + # self.passthru.extend(['-d', self.args.datacard[0]]) + + current_ranges = self.args.setParameterRanges + put_back_ranges = current_ranges is not None + + if self.args.boundlist is not None: + # We definitely don't need to put the parameter ranges back + # into the args because they're going in via the boundlist + # option instead + put_back_ranges = False + with open(self.args.boundlist) as json_file: + bnd = json.load(json_file) + bound_pars = list(bnd.keys()) + print("Found bounds for parameters %s" % ",".join(bound_pars)) + # Fill a dictionaries of the bound info of the form: + # { 'PAR1' : [(MASS, LOWER, UPER), ...], ...} + bound_vals = {} + for par in bound_pars: + bound_vals[par] = list() + for mass, bounds in six.iteritems(bnd[par]): + bound_vals[par].append((float(mass), bounds[0], bounds[1])) + bound_vals[par].sort(key=lambda x: x[0]) + # find the subbed_vars entry containing the mass + # We will extend it to also specify the ranges + dict_key = None + mass_idx = None + for key in subbed_vars.keys(): + if "MASS" in key: + dict_key = key + mass_idx = dict_key.index("MASS") + new_key = dict_key + ("MODELBOUND",) + new_list = [] + for entry in subbed_vars[dict_key]: + command = [] + if current_ranges is not None: + command.append(current_ranges) + mval = entry[mass_idx] + for par in bound_pars: + # The (mass, None, None) is just a trick to make bisect_left do the comparison + # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding + # issues + lower_bound = bisect.bisect_left(bound_vals[par], (float(mval) + 1e-5, None, None)) + # If lower_bound == 0 this means we are at or below the lowest mass point, + # in which case we should increase by one to take the bounds from this lowest + # point + if lower_bound == 0: + lower_bound += 1 + command.append("%s=%g,%g" % (par, bound_vals[par][lower_bound - 1][1], bound_vals[par][lower_bound - 1][2])) + new_list.append(entry + (str(":".join(command)),)) + # now remove the current mass information from subbed_vars + # and replace it with the updated one + del subbed_vars[dict_key] + subbed_vars[new_key] = new_list + self.passthru.extend(["--setParameterRanges", "%(MODELBOUND)s"]) + + # We might need to put the intercepted --setParameterRanges arg back in + if put_back_ranges: + self.put_back_arg("setParameterRanges", "--setParameterRanges") + + if self.args.points is not None: + self.passthru.extend(["--points", self.args.points]) + if self.args.split_points is not None and self.args.split_points > 0 and self.args.points is not None: + points = int(self.args.points) + split = self.args.split_points + start = 0 + ranges = [] + while (start + (split - 1)) < points: + # filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(start+(split-1))+".MultiDimFit.mH"+str(self.args.mass)+".root" + # if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024): + # # Send job, if the file it's supposed to create doesn't exist yet + # # or if the file is empty because the previous job didn't finish + ranges.append((start, start + (split - 1))) + start += split + if start < points: + # filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(points - 1)+".MultiDimFit.mH"+str(self.args.mass)+".root" + # if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024): + ranges.append((start, points - 1)) + # if (ranges == []): + # print "No jobs were created; All files already exist" + # exit() + subbed_vars[("P_START", "P_END")] = [(r[0], r[1]) for r in ranges] + self.passthru.extend(["--firstPoint %(P_START)s --lastPoint %(P_END)s"]) + self.args.name += ".POINTS.%(P_START)s.%(P_END)s" + + # can only put the name option back now because we might have modified + # it from what the user specified + self.put_back_arg("name", "-n") + proto = "combine " + (" ".join(self.passthru)) + if self.args.there: + proto = "pushd %(DIR)s; combine " + (" ".join(self.passthru)) + "; popd" + + for it in itertools.product(*list(subbed_vars.values())): + keys = list(subbed_vars.keys()) + dict = {} + for i, k in enumerate(keys): + for tuple_i, tuple_ele in enumerate(k): + dict[tuple_ele] = it[i][tuple_i] + self.job_queue.append(proto % dict) + self.flush_queue() diff --git a/python/tool_base/FastScan.py b/python/tool_base/FastScan.py new file mode 100755 index 00000000000..8df5454cfbc --- /dev/null +++ b/python/tool_base/FastScan.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python + +from __future__ import absolute_import +from __future__ import print_function +import sys +import json +import ROOT +import re +import HiggsAnalysis.CombinedLimit.tool_base.utils as utils + +from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase +import HiggsAnalysis.CombinedLimit.util.plotting as plot +from six.moves import range + + +class FastScan(CombineToolBase): + description = "Calculate nuisance parameter impacts" + requires_root = True + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_intercept_args(self, group): + CombineToolBase.attach_intercept_args(self, group) + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument( + "-w", + "--workspace", + required=True, + help="Input ROOT file and workspace object name, in the format [file.root]:[name]. For workspaces produced by combine, the name is usually w.", + ) + group.add_argument( + "-d", + "--data", + help="By default reads data_obs from the input workspace. Alternative can be specified as [file.root]:[dataset name] or [file.root]:[wsp name]:[dataset name], where in both cases [dataset name] identifies an object inheriting from RooAbsData", + ) + group.add_argument("-f", "--fitres", help="Optionally supply a RooFitResult to update the initial parameter values, format [file.root]:[RooFitResult]") + group.add_argument("--match", help="Regular expression to only run for matching parameter names") + group.add_argument("--no-match", help="Regular expression to skip certain parameter names") + group.add_argument("-o", "--output", default="nll", help="Name of the output file, without the .pdf extension") + group.add_argument("-p", "--points", default=200, type=int, help="Number of NLL points to sample in each scan") + + def RooColIter(self, coll): + it = coll.createIterator() + var = it.Next() + while var: + yield var + var = it.Next() + + def run_method(self): + ROOT.gROOT.SetBatch(ROOT.kTRUE) + outfile = ROOT.TFile("%s.root" % self.args.output, "RECREATE") + points = self.args.points + file = ROOT.TFile(self.args.workspace.split(":")[0]) + wsp = file.Get(self.args.workspace.split(":")[1]) + mc = wsp.genobj("ModelConfig") + pdf = mc.GetPdf() + if self.args.data is None: + data = wsp.data("data_obs") + else: + ws_d = self.args.data.split(":") + print(">> Data: " + str(ws_d)) + f_d = ROOT.TFile(ws_d[0]) + if len(ws_d) == 2: + data = f_d.Get(ws_d[1]) + else: + data = f_d.Get(ws_d[1]).data(ws_d[2]) + ll = ROOT.RooLinkedList() + nll = pdf.createNLL(data, ll) + pars = pdf.getParameters(data) + pars.Print() + snap = pars.snapshot() + # nll.setZeroPoint() + nll.Print() + if self.args.fitres is not None: + fitfile = ROOT.TFile(self.args.fitres.split(":")[0]) + rfr = fitfile.Get(self.args.fitres.split(":")[1]) + snap = rfr.floatParsFinal() + pars.assignValueOnly(snap) + + page = 0 + doPars = [] + + for par in self.RooColIter(pars): + if par.isConstant(): + continue + if self.args.match is not None: + if not re.match(self.args.match, par.GetName()): + continue + if self.args.no_match is not None: + if re.match(self.args.no_match, par.GetName()): + continue + par.Print() + if not (par.hasMax() and par.hasMin()): + print("Parameter does not have an associated range, skipping") + continue + doPars.append(par) + plot.ModTDRStyle(width=700, height=1000) + for idx, par in enumerate(doPars): + print("%s : (%i/%i)" % (par.GetName(), idx + 1, len(doPars))) + nlld1 = nll.derivative(par, 1) + nlld2 = nll.derivative(par, 2) + xmin = par.getMin() + xmax = par.getMax() + gr = ROOT.TGraph(points) + grd1 = ROOT.TGraph(points) + grd2 = ROOT.TGraph(points) + gr.SetName(par.GetName()) + grd1.SetName(par.GetName() + "_d1") + grd2.SetName(par.GetName() + "_d2") + w = (xmax - xmin) / float(points) + for i in range(points): + x = xmin + (float(i) + 0.5) * w + par.setVal(x) + gr.SetPoint(i, x, nll.getVal()) + grd1.SetPoint(i, x, nlld1.getVal()) + grd2.SetPoint(i, x, nlld2.getVal()) + plot.ReZeroTGraph(gr, True) + # plot.RemoveGraphYAbove(gr, 2.) + # gr.Print() + outfile.cd() + gr.Write() + grd1.Write() + grd2.Write() + pars.assignValueOnly(snap) + canv = ROOT.TCanvas(self.args.output, self.args.output) + pads = plot.MultiRatioSplit([0.4, 0.3], [0.005, 0.005], [0.005, 0.005]) + pads[0].cd() + plot.Set(gr, MarkerSize=0.5) + gr.Draw("APL") + axis1 = plot.GetAxisHist(pads[0]) + axis1.GetYaxis().SetTitle("NLL") + pads[1].cd() + plot.Set(grd1, MarkerSize=0.5) + grd1.Draw("APL") + axis2 = plot.GetAxisHist(pads[1]) + axis2.GetYaxis().SetTitle("NLL'") + pads[2].cd() + plot.Set(grd2, MarkerSize=0.5) + grd2.Draw("APL") + axis3 = plot.GetAxisHist(pads[2]) + axis3.GetYaxis().SetTitle("NLL''") + plot.Set( + axis3.GetXaxis(), + Title=par.GetName(), + TitleSize=axis3.GetXaxis().GetTitleSize() * 0.5, + TitleOffset=axis3.GetXaxis().GetTitleOffset() * 2, + ) + extra = "" + if page == 0: + extra = "(" + if page == len(doPars) - 1: + extra = ")" + print(extra) + canv.Print(".pdf%s" % extra) + page += 1 + + outfile.Write() diff --git a/python/tool_base/Impacts.py b/python/tool_base/Impacts.py new file mode 100755 index 00000000000..43c06f1bfa8 --- /dev/null +++ b/python/tool_base/Impacts.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python + +from __future__ import absolute_import +from __future__ import print_function +import sys +import re +import json +import ROOT +import HiggsAnalysis.CombinedLimit.tool_base.utils as utils + +from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase +from six.moves import map + + +class Impacts(CombineToolBase): + description = "Calculate nuisance parameter impacts" + requires_root = True + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_intercept_args(self, group): + CombineToolBase.attach_intercept_args(self, group) + group.add_argument("-m", "--mass", required=True) + group.add_argument("-d", "--datacard", required=True) + group.add_argument( + "--redefineSignalPOIs", + help="""This option will be + forwarded to combine, and accepts arguments in the same X,Y,Z format. + For models with multiple POIs, the impacts will be calculated for all + of them at the same time. It is important to specify the POI list with this + option, which must be included in the --doInitialFit, --doFits and + --output stages. Note the ordering of POIs in the list must also be + identical in each step.""", + ) + group.add_argument("--setPhysicsModelParameters") + group.add_argument("--setParameters") + group.add_argument("--name", "-n", default="Test") + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument( + "--named", + metavar="PARAM1,PARAM2,...", + help=""" By + default the list of nuisance parameters will be loaded from the + input workspace. Use this option to specify a different list""", + ) + group.add_argument( + "--exclude", + metavar="PARAM1,PARAM2,...", + help=""" Skip + these nuisances. Also accepts regexp with syntax 'rgx{}'""", + ) + group.add_argument( + "--doInitialFit", + action="store_true", + help="""Find + the crossings of all the POIs. Must have the output from this + before running with --doFits""", + ) + group.add_argument( + "--splitInitial", + action="store_true", + help="""In + the initial fits generate separate jobs for each POI""", + ) + group.add_argument( + "--doFits", + action="store_true", + help="""Actually + run the fits for the nuisance parameter impacts, otherwise just + looks for the results""", + ) + group.add_argument( + "--allPars", + action="store_true", + help="""Run the + impacts for all free parameters of the model, not just those + listed as nuisance parameters""", + ) + group.add_argument( + "--output", + "-o", + help="""write output json to a + file""", + ) + group.add_argument("--approx", default=None, choices=["hesse", "robust"], help="""Calculate impacts using the covariance matrix instead""") + group.add_argument("--noInitialFit", action="store_true", default=False, help="""Do not look for results from the initial Fit""") + + def run_method(self): + if self.args.allPars: + print("Info: the behaviour of --allPars is now always enabled and the option will be removed in a future update") + passthru = self.passthru + mh = self.args.mass + ws = self.args.datacard + name = self.args.name if self.args.name is not None else "" + named = [] + if self.args.named is not None: + named = self.args.named.split(",") + # Put intercepted args back + passthru.extend(["-m", mh]) + passthru.extend(["-d", ws]) + if self.args.setPhysicsModelParameters is not None: + passthru.extend(["--setPhysicsModelParameters", self.args.setPhysicsModelParameters]) + if self.args.setParameters is not None: + passthru.extend(["--setParameters", self.args.setParameters]) + self.args.setPhysicsModelParameters = self.args.setParameters + pass_str = " ".join(passthru) + + paramList = [] + if self.args.redefineSignalPOIs is not None: + poiList = self.args.redefineSignalPOIs.split(",") + else: + poiList = utils.list_from_workspace(ws, "w", "ModelConfig_POI") + print("Have POIs: " + str(poiList)) + poistr = ",".join(poiList) + + if self.args.approx == "hesse" and self.args.doFits: + self.job_queue.append( + "combine -M MultiDimFit -n _approxFit_%(name)s --algo none --redefineSignalPOIs %(poistr)s --floatOtherPOIs 1 --saveInactivePOI 1 --saveFitResult %(pass_str)s" + % {"name": name, "poistr": poistr, "pass_str": pass_str} + ) + self.flush_queue() + sys.exit(0) + elif self.args.approx == "robust" and self.args.doFits: + self.job_queue.append( + "combine -M MultiDimFit -n _approxFit_%(name)s --algo none --redefineSignalPOIs %(poistr)s --floatOtherPOIs 1 --saveInactivePOI 1 --robustHesse 1 %(pass_str)s" + % {"name": name, "poistr": poistr, "pass_str": pass_str} + ) + self.flush_queue() + sys.exit(0) + + ################################################ + # Generate the initial fit(s) + ################################################ + if self.args.doInitialFit and self.args.approx is not None: + print("No --initialFit needed with --approx, use --output directly") + sys.exit(0) + if self.args.doInitialFit: + if self.args.splitInitial: + for poi in poiList: + self.job_queue.append( + "combine -M MultiDimFit -n _initialFit_%(name)s_POI_%(poi)s --algo singles --redefineSignalPOIs %(poistr)s --floatOtherPOIs 1 --saveInactivePOI 1 -P %(poi)s %(pass_str)s" + % {"name": name, "poi": poi, "poistr": poistr, "pass_str": pass_str} + ) + else: + self.job_queue.append( + "combine -M MultiDimFit -n _initialFit_%(name)s --algo singles --redefineSignalPOIs %(poistr)s %(pass_str)s" + % {"name": name, "poistr": poistr, "pass_str": pass_str} + ) + self.flush_queue() + sys.exit(0) + + # Read the initial fit results + if not self.args.noInitialFit: + initialRes = {} + if self.args.approx is not None: + if self.args.approx == "hesse": + fResult = ROOT.TFile("multidimfit_approxFit_%(name)s.root" % {"name": name}) + rfr = fResult.Get("fit_mdf") + fResult.Close() + initialRes = utils.get_roofitresult(rfr, poiList, poiList) + elif self.args.approx == "robust": + fResult = ROOT.TFile("robustHesse_approxFit_%(name)s.root" % {"name": name}) + floatParams = fResult.Get("floatParsFinal") + rfr = fResult.Get("h_correlation") + rfr.SetDirectory(0) + fResult.Close() + initialRes = utils.get_robusthesse(floatParams, rfr, poiList, poiList) + elif self.args.splitInitial: + for poi in poiList: + initialRes.update( + utils.get_singles_results("higgsCombine_initialFit_%(name)s_POI_%(poi)s.MultiDimFit.mH%(mh)s.root" % vars(), [poi], poiList) + ) + else: + initialRes = utils.get_singles_results("higgsCombine_initialFit_%(name)s.MultiDimFit.mH%(mh)s.root" % vars(), poiList, poiList) + + ################################################ + # Build the parameter list + ################################################ + if len(named) > 0: + paramList = named + else: + paramList = self.all_free_parameters(ws, "w", "ModelConfig", poiList) + # else: + # paramList = utils.list_from_workspace( + # ws, 'w', 'ModelConfig_NuisParams') + + # Exclude some parameters + if self.args.exclude is not None: + exclude = self.args.exclude.split(",") + expExclude = [] + for exParam in exclude: + if "rgx{" in exParam: + pattern = exParam.replace("'rgx{", "").replace("}'", "") + pattern = pattern.replace("rgx{", "").replace("}", "") + for param in paramList: + if re.search(pattern, param): + expExclude.append(param) + else: + expExclude.append(exParam) + paramList = [x for x in paramList if x not in expExclude] + + print("Have parameters: " + str(len(paramList))) + + prefit = utils.prefit_from_workspace(ws, "w", paramList, self.args.setPhysicsModelParameters) + res = {} + if not self.args.noInitialFit: + res["POIs"] = [] + res["params"] = [] + if not self.args.noInitialFit: + for poi in poiList: + res["POIs"].append({"name": poi, "fit": initialRes[poi][poi]}) + + missing = [] + for param in paramList: + pres = {"name": param} + pres.update(prefit[param]) + # print 'Doing param ' + str(counter) + ': ' + param + if self.args.doFits: + self.job_queue.append( + "combine -M MultiDimFit -n _paramFit_%(name)s_%(param)s --algo impact --redefineSignalPOIs %(poistr)s -P %(param)s --floatOtherPOIs 1 --saveInactivePOI 1 %(pass_str)s" + % vars() + ) + else: + if self.args.approx == "hesse": + paramScanRes = utils.get_roofitresult(rfr, [param], poiList + [param]) + elif self.args.approx == "robust": + if floatParams.find(param): + paramScanRes = utils.get_robusthesse(floatParams, rfr, [param], poiList + [param]) + else: + paramScanRes = None + else: + paramScanRes = utils.get_singles_results( + "higgsCombine_paramFit_%(name)s_%(param)s.MultiDimFit.mH%(mh)s.root" % vars(), [param], poiList + [param] + ) + if paramScanRes is None: + missing.append(param) + continue + pres["fit"] = paramScanRes[param][param] + for p in poiList: + pres.update( + { + p: paramScanRes[param][p], + "impact_" + p: max(list(map(abs, (x - paramScanRes[param][p][1] for x in (paramScanRes[param][p][2], paramScanRes[param][p][0]))))), + } + ) + res["params"].append(pres) + self.flush_queue() + + if self.args.approx == "hesse": + res["method"] = "hesse" + elif self.args.approx == "robust": + res["method"] = "robust" + else: + res["method"] = "default" + jsondata = json.dumps(res, sort_keys=True, indent=2, separators=(",", ": ")) + # print jsondata + if self.args.output is not None: + with open(self.args.output, "w") as out_file: + out_file.write(jsondata) + if len(missing) > 0: + print("Missing inputs: " + ",".join(missing)) + + def all_free_parameters(self, file, wsp, mc, pois): + res = [] + wsFile = ROOT.TFile.Open(file) + w = wsFile.Get(wsp) + config = w.genobj(mc) + pdfvars = config.GetPdf().getParameters(config.GetObservables()) + it = pdfvars.createIterator() + var = it.Next() + while var: + if var.GetName() not in pois and (not var.isConstant()) and var.InheritsFrom("RooRealVar"): + res.append(var.GetName()) + var = it.Next() + return res diff --git a/python/tool_base/ImpactsFromScans.py b/python/tool_base/ImpactsFromScans.py new file mode 100755 index 00000000000..cef352f9ac6 --- /dev/null +++ b/python/tool_base/ImpactsFromScans.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python + +from __future__ import absolute_import +from __future__ import print_function +import argparse +import os +import re +import sys +import json +import math +import itertools +import stat +import glob +import ROOT +from array import array +from multiprocessing import Pool +from numpy import matrix +from numpy.linalg import solve +import HiggsAnalysis.CombinedLimit.tool_base.utils as utils +from HiggsAnalysis.CombinedLimit.tool_base.opts import OPTS + +from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase +from six.moves import range +import ctypes + + +class ImpactsFromScans(CombineToolBase): + description = "Calculate nuisance parameter impacts" + requires_root = True + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_intercept_args(self, group): + CombineToolBase.attach_intercept_args(self, group) + group.add_argument("--name", "-n", default="Test") + group.add_argument("-m", "--mass", required=True) + + def get_fixed_results(self, file, POIs): + """Extracts the output from the MultiDimFit singles mode + Note: relies on the list of parameters that were run (scanned) being correct""" + res = {} + f = ROOT.TFile(file) + if f is None or f.IsZombie(): + return None + t = f.Get("limit") + for i, evt in enumerate(t): + if i != 1: + continue + for POI in POIs: + res[POI] = getattr(evt, POI) + return res + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + # group.add_argument('--offset', default=0, type=int, + # help='Start the loop over parameters with this offset (default: %(default)s)') + # group.add_argument('--advance', default=1, type=int, + # help='Advance this many parameters each step in the loop (default: %(default)s') + group.add_argument("--input-json", help=("json file and dictionary containing the fit values, of form file:key1:key2..")) + group.add_argument("--do-fits", action="store_true", help=("Actually do the fits")) + group.add_argument("--cov-method", choices=["full", "asymm"], default="full") + group.add_argument("--cor-method", choices=["full", "asymm", "average"], default="full") + group.add_argument("--asymm-vals", default="") + group.add_argument("--do-generic", action="store_true") + + def run_method(self): + mass = self.args.mass + self.put_back_arg("mass", "-m") + in_json = self.args.input_json.split(":") + with open(in_json[0]) as jsonfile: + js = json.load(jsonfile) + for key in in_json[1:]: + js = js[key] + POIs = sorted([str(x) for x in js.keys()]) + print(POIs) + for POI in POIs: + if not self.args.do_fits: + break + arg_str = "-M MultiDimFit --algo fixed --saveInactivePOI 1 --floatOtherPOIs 1 -P %s" % POI + cmd_hi = arg_str + " -n %s --fixedPointPOIs %s=%f" % (self.args.name + ".%s.Hi" % POI, POI, js[POI]["Val"] + js[POI]["ErrorHi"]) + cmd_lo = arg_str + " -n %s --fixedPointPOIs %s=%f" % (self.args.name + ".%s.Lo" % POI, POI, js[POI]["Val"] + js[POI]["ErrorLo"]) + self.job_queue.append("combine %s %s" % (cmd_hi, " ".join(self.passthru))) + self.job_queue.append("combine %s %s" % (cmd_lo, " ".join(self.passthru))) + self.flush_queue() + if self.args.do_fits: + print(">> Re-run without --do-fits to harvest the results") + return + res = {} + for POI in POIs: + res[POI] = {} + name_hi = "higgsCombine%s.%s.Hi.MultiDimFit.mH%s.root" % (self.args.name, POI, mass) + name_lo = "higgsCombine%s.%s.Lo.MultiDimFit.mH%s.root" % (self.args.name, POI, mass) + res_hi = self.get_fixed_results(name_hi, POIs) + res_lo = self.get_fixed_results(name_lo, POIs) + for fPOI in POIs: + res[POI][fPOI] = [res_lo[fPOI], js[fPOI]["Val"], res_hi[fPOI]] + # print res + cor = ROOT.TMatrixDSym(len(POIs)) + cov = ROOT.TMatrixDSym(len(POIs)) + bf_vals = {x.split("=")[0]: float(x.split("=")[1]) for x in self.args.asymm_vals.split(",") if x != ""} + + xvars = [] + muvars = [] + covvars = [] + xvec = ROOT.RooArgList() + mu = ROOT.RooArgList() + for POI in POIs: + xvars.append(ROOT.RooRealVar(POI, "", js[POI]["Val"], -100, 100)) + muvars.append(ROOT.RooRealVar(POI + "_In", "", js[POI]["Val"], -100, 100)) + muvars[-1].setConstant(True) + xvec.add(xvars[-1]) + mu.add(muvars[-1]) + + print("-----------------------------------------------------------") + print("Diagonal Covariance") + print("-----------------------------------------------------------") + print("%-30s %-7s %-7s %-7s %-7s %-7s" % ("POI", "Val", "Sym", "Hi", "Lo", "(Hi-Lo)/(Hi+Lo)")) + for i, p in enumerate(POIs): + cor[i][i] = ctypes.c_double(1.0) # diagonal correlation is 1 + d1 = res[p][p][1] + d21 = res[p][p][2] - res[p][p][1] + d10 = res[p][p][1] - res[p][p][0] + d20 = (res[p][p][2] - res[p][p][0]) / 2.0 + vlo = js[p]["ValidErrorLo"] + print("%-30s %+.3f %+.3f %+.3f %+.3f %+.3f" % (p, d1, d20, d21, d10, (d21 - d10) / (d21 + d10))) + covv = 0.0 + if self.args.cov_method == "full": + covv = d20 + elif self.args.cov_method == "asymm": + bf_val = 1.0 + for x in bf_vals: + if x in p: + bf_val = bf_vals[x] + print("Using %s=%g" % (x, bf_vals[x])) + covv = d21 if bf_val >= d1 else d10 + if p == "mu_XS_ZH_BR_WW": + covv = covv * 0.89 + if p == "mu_XS_ttHtH_BR_tautau": + covv = covv * 1.2 + # if p == 'mu_XS_ttHtH_BR_tautau': covv = 6.3 + if not vlo: + print("No ValidErrorLo, using d21") + covv = d21 + print("Chosen: %+.3f" % covv) + cov[i][i] = ctypes.c_double(pow(covv, 2.0)) + + x1 = -1.0 * d10 + x2 = 0.0 + x3 = d21 + x4 = js[p]["2sig_ErrorHi"] + y1 = d10 * d10 + y2 = d20 * d20 + y3 = d21 * d21 + y4 = (x4 / 2.0) * (x4 / 2.0) + if not vlo and abs(d10) < 1e-4: + x1 = -1.0 * d21 + y1 = d21 * d21 + print((x1, y1)) + print((x2, y2)) + print((x3, y3)) + print((x4, y4)) + + mtx = matrix([[x1 * x1, x1, 1], [x3 * x3, x3, 1], [x4 * x4, x4, 1]]) + yvec = matrix([[y1], [y3], [y4]]) + # print mtx + # print yvec + xres = solve(mtx, yvec) + # print xres + covvars.append( + ROOT.RooFormulaVar("cov%i" % i, "", "%g*(@0-%g)*(@0-%g)+%g*(@0-%g)+%g" % (xres[0], d1, d1, xres[1], d1, xres[2]), ROOT.RooArgList(xvars[i])) + ) + # covvars.append(ROOT.RooFormulaVar('cov%i'%i,'', '%g' % (y2), ROOT.RooArgList())) + covvars[-1].Print() + + print("-----------------------------------------------------------") + print("Correlation") + print("-----------------------------------------------------------") + print( + "%-30s %-30s %-7s %-7s %-7s %-7s %-7s %-7s %-7s %-7s %-7s" + % ("i", "j", "Val_i", "Val_j", "ij_Sym", "ij_Hi", "ij_Lo", "ji_Sym", "ji_Hi", "ji_Lo", "Sym_Asym") + ) + cors = [] + mvals = ROOT.RooArgList() + mvals_store = [] + for i, ip in enumerate(POIs): + for j, jp in enumerate(POIs): + if i == j: + mvals_store.append(ROOT.RooFormulaVar("ele_%i_%i" % (i, j), "@0", ROOT.RooArgList(covvars[i]))) + mvals.add(mvals_store[-1]) + continue + # Check the scans + di_1 = res[ip][ip][1] + di_21 = res[ip][ip][2] - res[ip][ip][1] + di_10 = res[ip][ip][1] - res[ip][ip][0] + di_20 = (res[ip][ip][2] - res[ip][ip][0]) / 2.0 + cj_21 = res[ip][jp][2] - res[ip][jp][1] + cj_10 = res[ip][jp][1] - res[ip][jp][0] + cj_20 = (res[ip][jp][2] - res[ip][jp][0]) / 2.0 + vi_lo = js[ip]["ValidErrorLo"] + dj_1 = res[jp][jp][1] + dj_21 = res[jp][jp][2] - res[jp][jp][1] + dj_10 = res[jp][jp][1] - res[jp][jp][0] + dj_20 = (res[jp][jp][2] - res[jp][jp][0]) / 2.0 + ci_21 = res[jp][ip][2] - res[jp][ip][1] + ci_10 = res[jp][ip][1] - res[jp][ip][0] + ci_20 = (res[jp][ip][2] - res[jp][ip][0]) / 2.0 + vj_lo = js[jp]["ValidErrorLo"] + + cij_20 = ci_20 / di_20 + cij_21 = ci_21 / (di_21 if (ci_21 >= 0 or not vi_lo) else di_10) + cij_10 = ci_10 / (di_21 if (ci_21 < 0 or not vi_lo) else di_10) + # cij_21 = ci_21/di_21 + # cij_10 = ci_10/di_21 + + cji_20 = cj_20 / dj_20 + cji_21 = cj_21 / (dj_21 if (cj_21 >= 0 or not vj_lo) else dj_10) + cji_10 = cj_10 / (dj_21 if (cj_21 < 0 or not vj_lo) else dj_10) + # cji_21 = cj_21/dj_21 + # cji_10 = cj_10/dj_21 + + a_20 = (cij_20 - cji_20) / ((cij_20 + cji_20) if (cij_20 + cji_20) != 0.0 else 1.0) + + a_i = (cij_21 - cij_10) / ((cij_21 + cij_10) if (cij_21 + cij_10) != 0.0 else 1.0) + a_j = (cji_21 - cji_10) / ((cji_21 + cji_10) if (cji_21 + cji_10) != 0.0 else 1.0) + + max_c = max([abs(x) for x in [cij_20, cij_21, cij_10, cji_20, cji_21, cji_10]]) + + line = "%-30s %-30s %+.3f %+.3f | %+.3f %+.3f %+.3f %+.3f | %+.3f %+.3f %+.3f %+.3f | %+.3f" % ( + ip, + jp, + di_1, + dj_1, + cij_20, + cij_21, + cij_10, + a_i, + cji_20, + cji_21, + cji_10, + a_j, + a_20, + ) + print(line) + + cors.append((line, max_c)) + + val_i = 0.0 + val_j = 0.0 + if self.args.cor_method == "full": + val_i = cij_20 + val_j = cji_20 + elif self.args.cor_method == "average": + val_i = (cij_21 + cij_10) / 2.0 + val_j = (cji_21 + cji_10) / 2.0 + elif self.args.cor_method == "asymm": + bf_val_i = 1.0 + bf_val_j = 1.0 + for x in bf_vals: + if x in ip: + bf_val_i = bf_vals[x] + print("Using %s=%g for POI i" % (x, bf_vals[x])) + if x in jp: + bf_val_j = bf_vals[x] + print("Using %s=%g for POI j" % (x, bf_vals[x])) + + val_i = cji_21 if bf_val_i >= di_1 else cji_10 + val_j = cij_21 if bf_val_j >= dj_1 else cij_10 + if not vi_lo: + print("No ValidErrorLo for POI i, using d21") + val_i = cji_21 + if not vj_lo: + print("No ValidErrorLo for POI j, using d21") + val_j = cij_21 + print("Chosen: %+.3f for val_i" % val_i) + print("Chosen: %+.3f for val_j" % val_j) + + correlation = (val_i + val_j) / 2.0 # take average correlation? + # if ip == 'mu_XS_ttHtH_BR_WW' and jp == 'mu_XS_ttHtH_BR_tautau': correlation = correlation * 1.15 + # if jp == 'mu_XS_ttHtH_BR_WW' and ip == 'mu_XS_ttHtH_BR_tautau': correlation = correlation * 1.15 + # correlation = min(sorted([val_i, val_j],key=lambda x: abs(x), reverse=True)) + # correlation = min(val_i,val_j, key=abs) # take the max? + cor[i][j] = correlation + cor[j][i] = correlation + covariance = correlation * math.sqrt(cov[i][i]) * math.sqrt(cov[j][j]) + cov[i][j] = covariance + cov[j][i] = covariance + mvals_store.append(ROOT.RooFormulaVar("ele_%i_%i" % (i, j), "%g*sqrt(@0)*sqrt(@1)" % (correlation), ROOT.RooArgList(covvars[i], covvars[j]))) + # mvals_store.append(ROOT.RooFormulaVar('ele_%i_%i'%(i,j),'%g'%(covariance),ROOT.RooArgList())) + mvals.add(mvals_store[-1]) + cors.sort(key=lambda tup: tup[1], reverse=True) + for tup in cors: + print(tup[0]) + # cor.Print() + fout = ROOT.TFile("covariance_%s.root" % self.args.name, "RECREATE") + fout.WriteTObject(cor, "cor") + h_cor = self.fix_TH2(ROOT.TH2D(cor), POIs) + fout.WriteTObject(h_cor, "h_cor") + fout.WriteTObject(cov, "cov") + h_cov = self.fix_TH2(ROOT.TH2D(cov), POIs) + fout.WriteTObject(h_cov, "h_cov") + + xvec.Print("v") + mu.Print("v") + if self.args.do_generic: + pdf = ROOT.RooGenericMultiVarGaussian("pdf", "", xvec, mu, mvals) + else: + pdf = ROOT.RooMultiVarGaussian("pdf", "", xvec, mu, cov) + dat = ROOT.RooDataSet("global_obs", "", ROOT.RooArgSet(mu)) + dat.add(ROOT.RooArgSet(mu)) + pdf.Print() + dat.Print() + # fitRes = pdf.fitTo(dat, ROOT.RooFit.Minimizer('Minuit2', 'Migrad'), ROOT.RooFit.Hesse(True), ROOT.RooFit.Save(True)) + # fitRes.Print('v') + wsp = ROOT.RooWorkspace("w", "") + getattr(wsp, "import")(pdf) + getattr(wsp, "import")(dat) + wsp.Write() + fout.Close() + + def fix_TH2(self, h, labels): + h_fix = h.Clone() + for y in range(1, h.GetNbinsY() + 1): + for x in range(1, h.GetNbinsX() + 1): + h_fix.SetBinContent(x, y, h.GetBinContent(x, h.GetNbinsY() + 1 - y)) + for x in range(1, h_fix.GetNbinsX() + 1): + h_fix.GetXaxis().SetBinLabel(x, labels[x - 1]) + for y in range(1, h_fix.GetNbinsY() + 1): + h_fix.GetYaxis().SetBinLabel(y, labels[-y]) + return h_fix + + +# self.job_queue.append('combine -M MultiDimFit -n _initialFit_%(name)s_POI_%(poi)s --algo singles --redefineSignalPOIs %(poistr)s --floatOtherPOIs 1 --saveInactivePOI 1 -P %(poi)s %(pass_str)s --altCommit' % vars()) +# else: +# self.job_queue.append('combine -M MultiDimFit -n _initialFit_%(name)s --algo singles --redefineSignalPOIs %(poistr)s %(pass_str)s --altCommit' % vars()) +# self.flush_queue() +# sys.exit(0) +# initialRes = utils.get_singles_results('higgsCombine_initialFit_%(name)s.MultiDimFit.mH%(mh)s.root' % vars(), poiList, poiList) +# if len(named) > 0: +# paramList = named +# else: +# paramList = utils.list_from_workspace(ws, 'w', 'ModelConfig_NuisParams') +# print 'Have nuisance parameters: ' + str(len(paramList)) +# prefit = utils.prefit_from_workspace(ws, 'w', paramList) +# res = { } +# res["POIs"] = [] +# res["params"] = [] +# # for poi in poiList: +# # res["POIs"].append({"name" : poi, "fit" : initialRes[poi][poi]}) +# +# missing = [ ] +# for param in paramList: +# pres = { } +# # print 'Doing param ' + str(counter) + ': ' + param +# if self.args.doFits: +# self.job_queue.append('combine -M MultiDimFit -n _paramFit_%(name)s_%(param)s --algo singles --redefineSignalPOIs %(param)s,%(poistr)s -P %(param)s --floatOtherPOIs 1 --saveInactivePOI 1 %(pass_str)s --altCommit' % vars()) +# else: +# paramScanRes = get_singles_results('higgsCombine_paramFit_%(name)s_%(param)s.MultiDimFit.mH%(mh)s.root' % vars(), [param], poiList + [param]) +# if paramScanRes is None: +# missing.append(param) +# continue +# pres.update({"name" : param, "fit" : paramScanRes[param][param], "prefit" : prefit[param]}) +# for p in poiList: +# pres.update({p : paramScanRes[param][p], 'impact_'+p : (paramScanRes[param][p][2] - paramScanRes[param][p][0])/2.}) +# res['params'].append(pres) +# self.flush_queue() +# jsondata = json.dumps(res, sort_keys=True, indent=2, separators=(',', ': ')) +# print jsondata +# if self.args.output is not None: +# with open(args.output, 'w') as out_file: +# out_file.write(jsondata) +# if len(missing) > 0: +# print 'Missing inputs: ' + ','.join(missing) diff --git a/python/tool_base/LimitGrids.py b/python/tool_base/LimitGrids.py new file mode 100755 index 00000000000..2765745d555 --- /dev/null +++ b/python/tool_base/LimitGrids.py @@ -0,0 +1,781 @@ +from __future__ import absolute_import +from __future__ import print_function +import ROOT +import json +import itertools +import glob +import sys +import re +import zipfile +import os +import bisect +from math import floor +from array import array + +import HiggsAnalysis.CombinedLimit.tool_base.utils as utils +from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase +import HiggsAnalysis.CombinedLimit.util.plotting as plot +import six +from six.moves import range + + +class AsymptoticGrid(CombineToolBase): + description = "Calculate asymptotic limits on parameter grids" + requires_root = True + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_intercept_args(self, group): + CombineToolBase.attach_intercept_args(self, group) + group.add_argument("--setParameters", default=None) + group.add_argument("--freezeParameters", default=None) + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument("config", help="json config file") + + def run_method(self): + ROOT.PyConfig.IgnoreCommandLineOptions = True + ROOT.gROOT.SetBatch(ROOT.kTRUE) + + # This is what the logic should be: + # - get the list of model points + # - figure out which files are: + # - completely missing + # - there but corrupt, missing tree + # - ok + # - If we have anything in the third category proceed to produce output files + # - Anything in the first two gets added to the queue only if --doFits is specified + + # Step 1 - open the json config file + with open(self.args.config) as json_file: + cfg = json.load(json_file) + # to do - have to handle the case where it doesn't exist + points = [] + blacklisted_points = [] + for igrid in cfg["grids"]: + assert len(igrid) == 3 + if igrid[2] == "": + points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]))) + else: + blacklisted_points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2]))) + POIs = cfg["POIs"] + opts = cfg["opts"] + + # remove problematic points (points with NaN values) + points_to_remove = [] + grids_to_remove = cfg.get("grids_to_remove", None) + if grids_to_remove is not None: + for igrid in grids_to_remove: + assert len(igrid) == 2 + points_to_remove.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]))) + for p in points_to_remove: + points.remove(p) + + # Have to merge some arguments from both the command line and the "opts" in the json file + to_freeze = [] + to_set = [] + set_opt, opts = self.extract_arg("--setParameters", opts) + if set_opt is not None: + to_set.append(set_opt) + freeze_opt, opts = self.extract_arg("--freezeParameters", opts) + if freeze_opt is not None: + to_freeze.append(freeze_opt) + if hasattr(self.args, "setParameters") and self.args.setParameters is not None: + to_set.append(self.args.setParameters) + if hasattr(self.args, "freezeParameters") and self.args.freezeParameters is not None: + to_freeze.append(self.args.freezeParameters) + + file_dict = {} + for p in points: + file_dict[p] = [] + + for f in glob.glob("higgsCombine.%s.*.%s.*.AsymptoticLimits.mH*.root" % (POIs[0], POIs[1])): + # print f + rgx = re.compile(r"higgsCombine\.%s\.(?P.*)\.%s\.(?P.*)\.AsymptoticLimits\.mH.*\.root" % (POIs[0], POIs[1])) + matches = rgx.search(f) + p = (matches.group("p1"), matches.group("p2")) + if p in file_dict: + file_dict[p].append(f) + + for key, val in six.iteritems(file_dict): + name = "%s.%s.%s.%s" % (POIs[0], key[0], POIs[1], key[1]) + print(">> Point %s" % name) + if len(val) == 0: + print("Going to run limit for point %s" % (key,)) + set_arg = ",".join(["%s=%s,%s=%s" % (POIs[0], key[0], POIs[1], key[1])] + to_set) + freeze_arg = ",".join(["%s,%s" % (POIs[0], POIs[1])] + to_freeze) + point_args = "-n .%s --setParameters %s --freezeParameters %s" % (name, set_arg, freeze_arg) + cmd = " ".join(["combine -M AsymptoticLimits", opts, point_args] + self.passthru) + self.job_queue.append(cmd) + + bail_out = len(self.job_queue) > 0 + self.flush_queue() + + if bail_out: + print(">> New jobs were created / run in this cycle, run the script again to collect the output") + sys.exit(0) + + xvals = [] + yvals = [] + zvals_m2s = [] + zvals_m1s = [] + zvals_exp = [] + zvals_p1s = [] + zvals_p2s = [] + zvals_obs = [] + for key, val in six.iteritems(file_dict): + for filename in val: + fin = ROOT.TFile(filename) + if fin.IsZombie(): + continue + tree = fin.Get("limit") + for evt in tree: + if abs(evt.quantileExpected + 1) < 0.01: + xvals.append(float(key[0])) + yvals.append(float(key[1])) + # print 'At point %s have observed CLs = %f' % (key, evt.limit) + zvals_obs.append(float(evt.limit)) + if abs(evt.quantileExpected - 0.025) < 0.01: + # print 'At point %s have -2sigma CLs = %f' % (key, evt.limit) + zvals_m2s.append(float(evt.limit)) + if abs(evt.quantileExpected - 0.16) < 0.01: + # print 'At point %s have -1sigma CLs = %f' % (key, evt.limit) + zvals_m1s.append(float(evt.limit)) + if abs(evt.quantileExpected - 0.5) < 0.01: + # print 'At point %s have expected CLs = %f' % (key, evt.limit) + zvals_exp.append(float(evt.limit)) + if abs(evt.quantileExpected - 0.84) < 0.01: + # print 'At point %s have +1sigma CLs = %f' % (key, evt.limit) + zvals_p1s.append(float(evt.limit)) + if abs(evt.quantileExpected - 0.975) < 0.01: + # print 'At point %s have +2sigma CLs = %f' % (key, evt.limit) + zvals_p2s.append(float(evt.limit)) + for POI1, POI2, CLs in blacklisted_points: + xvals.append(float(POI1)) + yvals.append(float(POI2)) + zvals_m2s.append(float(CLs)) + zvals_m1s.append(float(CLs)) + zvals_exp.append(float(CLs)) + zvals_p1s.append(float(CLs)) + zvals_p2s.append(float(CLs)) + zvals_obs.append(float(CLs)) + graph_m2s = ROOT.TGraph2D(len(zvals_m2s), array("d", xvals), array("d", yvals), array("d", zvals_m2s)) + graph_m1s = ROOT.TGraph2D(len(zvals_m1s), array("d", xvals), array("d", yvals), array("d", zvals_m1s)) + graph_exp = ROOT.TGraph2D(len(zvals_exp), array("d", xvals), array("d", yvals), array("d", zvals_exp)) + graph_p1s = ROOT.TGraph2D(len(zvals_p1s), array("d", xvals), array("d", yvals), array("d", zvals_p1s)) + graph_p2s = ROOT.TGraph2D(len(zvals_p2s), array("d", xvals), array("d", yvals), array("d", zvals_p2s)) + graph_obs = ROOT.TGraph2D(len(zvals_obs), array("d", xvals), array("d", yvals), array("d", zvals_obs)) + # h_bins = cfg['hist_binning'] + # hist = ROOT.TH2F('h_observed', '', h_bins[0], h_bins[1], h_bins[2], h_bins[3], h_bins[4], h_bins[5]) + # for i in xrange(1, hist.GetNbinsX()+1): + # for j in xrange(1, hist.GetNbinsY()+1): + # hist.SetBinContent(i, j, graph.Interpolate(hist.GetXaxis().GetBinCenter(i), hist.GetYaxis().GetBinCenter(j))) + fout = ROOT.TFile("asymptotic_grid.root", "RECREATE") + fout.WriteTObject(graph_m2s, "exp-2") + fout.WriteTObject(graph_m1s, "exp-1") + fout.WriteTObject(graph_exp, "exp0") + fout.WriteTObject(graph_p1s, "exp+1") + fout.WriteTObject(graph_p2s, "exp+2") + fout.WriteTObject(graph_obs, "obs") + # fout.WriteTObject(hist) + fout.Close() + # Next step: open output files + # Fill TGraph2D with CLs, CLs+b + + +class HybridNewGrid(CombineToolBase): + description = "Calculate toy-based limits on parameter grids" + requires_root = True + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_intercept_args(self, group): + CombineToolBase.attach_intercept_args(self, group) + group.add_argument("--setParameters", default=None) + group.add_argument("--freezeParameters", default=None) + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument("config", help="json config file") + group.add_argument("--cycles", default=0, type=int, help="Number of job cycles to create per point") + group.add_argument("--output", action="store_true", help="Write CLs grids into an output file") + group.add_argument("--from-asymptotic", default=None, help="JSON file which will be used to create a limit grid automatically") + + def GetCombinedHypoTest(self, files): + if len(files) == 0: + return None + results = [] + for file in files: + found_res = False + f = ROOT.TFile(file) + ROOT.gDirectory.cd("toys") + for key in ROOT.gDirectory.GetListOfKeys(): + if ROOT.gROOT.GetClass(key.GetClassName()).InheritsFrom(ROOT.RooStats.HypoTestResult.Class()): + results.append(ROOT.gDirectory.Get(key.GetName())) + found_res = True + f.Close() + if not found_res: + print(">> Warning, did not find a HypoTestResult object in file %s" % file) + if (len(results)) > 1: + for r in results[1:]: + results[0].Append(r) + ntoys = min(results[0].GetNullDistribution().GetSize(), results[0].GetAltDistribution().GetSize()) + if ntoys == 0: + print(">> Warning, HypoTestResult from file(s) %s does not contain any toy results, did something go wrong in your fits?" % "+".join(files)) + return results[0] + + def ValidateHypoTest(self, hyp_res, min_toys, max_toys, contours, signif, cl, output=False, verbose=False, precomputed=None, feldman_cousins=False): + results = {} + + if hyp_res is None and precomputed is None: + return (False, {"ntoys": 0}) + + ntoys = None + + if hyp_res is not None: + # We will take the number of toys thrown as the minimum of the number of b-only or s+b toys + if feldman_cousins: + # For F-C we expect only s+b toys + ntoys = hyp_res.GetAltDistribution().GetSize() + else: + ntoys = min(hyp_res.GetNullDistribution().GetSize(), hyp_res.GetAltDistribution().GetSize()) + print(">>> Number of b toys %i" % (hyp_res.GetNullDistribution().GetSize())) + print(">>> Number of s+b toys %i" % (hyp_res.GetAltDistribution().GetSize())) + + if precomputed is not None: + ntoys = precomputed["ntoys"] + + results["ntoys"] = ntoys + + if verbose: + print(">>> Toys completed: %i [min=%i, max=%i]" % (ntoys, min_toys, max_toys)) + + # If we're not going to write the CLs grids out and we fail the ntoys criteria then we + # don't need to waste time getting all the CLs values. Can just return the results dict as-is. + # 1st test - have we thrown at least the minimum number of toys? + if ntoys < min_toys and not output: + return (False, results) + # 2nd test - have we thrown the maximum (or more) toys? + if ntoys >= max_toys and not output: + return (True, results) + + if hyp_res is not None: + # 3rd test - are we > X sigma away from the exclusion CLs? This must be true for all the + # contours we're interested in + btoys = sorted([x for x in hyp_res.GetNullDistribution().GetSamplingDistribution()]) + # save the real observed test stat, we'll change it in this + # loop to get the expected but we'll want to restore it at the end + q_obs = hyp_res.GetTestStatisticData() + + crossing = 1 - cl + signif_results = {} + + if verbose: + print(">>> CLs target is a significance of %.1f standard deviations from %.3f" % (signif, crossing)) + + for contour in contours: + # Start by assuming this contour passes, we'll set it to False if it fails + signif_results[contour] = True + + # If this is an expected contour we will extract the quantile from the name + if "exp" in contour: + quantile = ROOT.Math.normal_cdf(float(contour.replace("exp", ""))) + if verbose: + print(">>> Checking the %s contour at quantile=%f" % (contour, quantile)) + if hyp_res is not None: + # Get the stat statistic value at this quantile by rounding to the nearest b-only toy + testStat = btoys[int(min(floor(quantile * len(btoys) + 0.5), len(btoys) - 1))] + hyp_res.SetTestStatisticData(testStat) + elif contour == "obs": + if verbose: + print(">>> Checking the %s contour" % contour) + else: + raise RuntimeError("Contour %s not recognised" % contour) + + if hyp_res is not None: + # Currently assume we always want to use CLs, should provide option + # for CLs+b at some point + if not feldman_cousins: + CLs = hyp_res.CLs() + CLsErr = hyp_res.CLsError() + else: + # For simplicity label CLs+b the same as CLs when using FC mode... + CLs = hyp_res.CLsplusb() + CLsErr = hyp_res.CLsplusbError() + testStatObs = hyp_res.GetTestStatisticData() + if precomputed is not None: + CLs = precomputed[contour][0] + CLsErr = precomputed[contour][1] + testStatObs = precomputed[contour][3] + if ntoys == 0: + CLsErr = 0 # If there were no toys then ClsError() will return inf + dist = 0.0 + if CLsErr == 0.0: + if verbose: + print(">>>> CLs = %g +/- %g (infinite significance), will treat as passing" % (CLs, CLsErr)) + dist = 999.0 + else: + dist = abs(CLs - crossing) / CLsErr + if verbose: + print(">>>> CLs = %g +/- %g, reached %.1f sigma signifance" % (CLs, CLsErr, dist)) + if dist < signif: + signif_results[contour] = False + results[contour] = (CLs, CLsErr, dist, testStatObs) + if hyp_res is not None: + # Set the observed test statistic back to the real data value + hyp_res.SetTestStatisticData(q_obs) + + # Now do the full logic of the validation and return + all_ok = ntoys >= min_toys # OK if min toys passes + for key, val in six.iteritems(signif_results): + all_ok = all_ok and val # still OK if all contour significances pass + all_ok = all_ok or (ntoys >= max_toys) # Always OK if we've reached the maximum + results["ok"] = all_ok + return (all_ok, results) + + def run_method(self): + ROOT.PyConfig.IgnoreCommandLineOptions = True + ROOT.gROOT.SetBatch(ROOT.kTRUE) + + # Open the json config file + with open(self.args.config) as json_file: + cfg = json.load(json_file) + + # Set all the parameter values locally using defaults if necessary + grids = cfg["grids"] + grids_to_remove = cfg.get("grids_to_remove", None) + POIs = cfg["POIs"] + opts = cfg["opts"] + toys_per_cycle = cfg["toys_per_cycle"] + zipname = cfg.get("zipfile", None) + statfile = cfg.get("statusfile", None) + contours = cfg.get("contours", ["obs", "exp-2", "exp-1", "exp0", "exp+1", "exp+2"]) + min_toys = cfg.get("min_toys", 500) + max_toys = cfg.get("max_toys", 5000) + signif = cfg.get("signif", 3.0) + cl = cfg.get("CL", 0.95) + verbose = cfg.get("verbose", False) + make_plots = cfg.get("make_plots", False) + # Write CLs values into the output even if current toys do not pass validation + incomplete = cfg.get("output_incomplete", False) + outfile = cfg.get("output", "hybrid_grid.root") + from_asymptotic_settings = cfg.get("from_asymptotic_settings", dict()) + feldman_cousins = cfg.get("FC", False) + # NB: blacklisting not yet implemented for this method + + # Have to merge some arguments from both the command line and the "opts" in the json file + to_freeze = [] + to_set = [] + set_opt, opts = self.extract_arg("--setParameters", opts) + if set_opt is not None: + to_set.append(set_opt) + freeze_opt, opts = self.extract_arg("--freezeParameters", opts) + if freeze_opt is not None: + to_freeze.append(freeze_opt) + if hasattr(self.args, "setParameters") and self.args.setParameters is not None: + to_set.append(self.args.setParameters) + if hasattr(self.args, "freezeParameters") and self.args.freezeParameters is not None: + to_freeze.append(self.args.freezeParameters) + + points = [] + blacklisted_points = [] + + # For the automatic grid for the "from_asymptotic option" we should fix the format specifier for + # the grid points, as the numerical precision of a given point may change once the grid spacing is + # modified. By default we let split_vals do it's thing however + fmt_spec = None + + # In this mode we're doing a classic limit search vs MH instead of a 2D grid. + # Most of the same code can be used however. First we'll use the json file containing the + # asymptotic limits to create a new grid from scratch. + if self.args.from_asymptotic is not None: + grids = [] + bound_vals = None + bound_pars = [] + fmt_spec = "%.5g" + with open(self.args.from_asymptotic) as limit_json: + limits = json.load(limit_json) + for m in limits.keys(): + limit_vals = [x for x in limits[m].values()] + max_limit = max(limit_vals) + min_limit = min(limit_vals) + # print (min_limit, max_limit) + width = max_limit - min_limit + max_limit += width * 0.3 + min_limit = max(0.0, min_limit - width * 0.3) + nsteps = from_asymptotic_settings.get("points", 100) + step_width = (max_limit - min_limit) / nsteps + grids.append([m, "%g:%g|%g" % (min_limit, max_limit, step_width), ""]) + boundlist_file = from_asymptotic_settings.get("boundlist", "") + if boundlist_file: + with open(boundlist_file) as json_file: + bnd = json.load(json_file) + bound_pars = list(bnd.keys()) + print("Found bounds for parameters %s" % ",".join(bound_pars)) + bound_vals = {} + for par in bound_pars: + bound_vals[par] = list() + for mass, bounds in six.iteritems(bnd[par]): + bound_vals[par].append((float(mass), bounds[0], bounds[1])) + bound_vals[par].sort(key=lambda x: x[0]) + # print (min_limit, max_limit) + # sys.exit(0) + + for igrid in grids: + assert len(igrid) == 3 + if igrid[2] == "": + points.extend(itertools.product(utils.split_vals(igrid[0], fmt_spec=fmt_spec), utils.split_vals(igrid[1], fmt_spec=fmt_spec))) + else: + blacklisted_points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2]))) + + # In between cycles of toys we may find there's something wrong with some of the points in the grid and therefore want to remove them: + points_to_remove = [] + if grids_to_remove is not None: + for igrid in grids_to_remove: + assert len(igrid) == 2 + points_to_remove.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]))) + + for p in points_to_remove: + points.remove(p) + + # This dictionary will keep track of the combine output files for each model point + file_dict = {} + for p in points: + file_dict[p] = {} + + # The regex we will use to identify output files and extract POI values + rgx = re.compile(r"higgsCombine\.%s\.(?P.*)\.%s\.(?P.*)\.HybridNew\.mH.*\.(?P.*)\.root" % (POIs[0], POIs[1])) + + stats = {} + if statfile and os.path.isfile(statfile): + with open(statfile) as stat_json: + stats = json.load(stat_json) + + # Can optionally copy output root files into a zip archive + # If the user has specified a zipfile we will first + # look for output files in this archive before scanning the + # current directory + if zipname: + # Open the zip file in append mode, this should also + # create it if it doesn't exist + zipf = zipfile.ZipFile(zipname, "a") + for f in zipf.namelist(): + matches = rgx.search(f) + p = (matches.group("p1"), matches.group("p2")) + seed = int(matches.group("toy")) + if p in file_dict: + if seed not in file_dict[p]: + # For each model point have a dictionary keyed on the seed number + # with a value pointing to the file in the archive in the format + # ROOT expects: "zipfile.zip#higgsCombine.blah.root" + file_dict[p][seed] = zipname + "#" + f + + # Now look for files in the local directory + for f in glob.glob("higgsCombine.%s.*.%s.*.HybridNew.mH*.root" % (POIs[0], POIs[1])): + matches = rgx.search(f) + p = (matches.group("p1"), matches.group("p2")) + seed = int(matches.group("toy")) + if p in file_dict: + # Don't add this file to the list if its seed number is already + # a value in the dict. + if seed not in file_dict[p]: + # If we're using the zipfile we'll add this now and + # then delete it from the local directory + # But: only in the file is good, we don't want to pollute the zip + # file with incomplete or failed jobs + if zipname and plot.TFileIsGood(f): + zipf.write(f) # assume this throws if it fails + print("Adding %s to %s" % (f, zipname)) + file_dict[p][seed] = zipname + "#" + f + os.remove(f) + else: # otherwise just add the file to the dict in the normal way + file_dict[p][seed] = f + + if zipname: + zipf.close() + + # These lists will keep track of the CLs values which we will use + # to create the output TGraph2Ds + output_x = [] + output_y = [] + output_data = {} + output_ntoys = [] + output_clserr = {} + output_signif = {} + # One list of Z-values per contour + for contour in contours: + output_data[contour] = [] + output_clserr[contour] = [] + output_signif[contour] = [] + + # Also keep track of the number of model points which have met the + # CLs criteria + total_points = 0 + complete_points = 0 + + for key, val in six.iteritems(file_dict): + status_changed = True + total_points += 1 + status_key = ":".join(key) + name = "%s.%s.%s.%s" % (POIs[0], key[0], POIs[1], key[1]) + + # First check if we use the status json + all_files = list(val.values()) + status_files = [] + files = [] + + if status_key in stats: + status_files = stats[status_key]["files"] + if set(all_files) == set(status_files): + print("For point %s, no files have been updated" % name) + status_changed = False + files = all_files + else: + files = [x for x in val.values() if plot.TFileIsGood(x)] + if set(files) == set(status_files) and len(files) < len(all_files): + print("For point %s, new files exist but they are not declared good" % name) + status_changed = False + else: + files = [x for x in val.values() if plot.TFileIsGood(x)] + + # Merge the HypoTestResult objects from each file into one + res = None + precomputed = None + if status_key in stats and not status_changed and stats[status_key]["ntoys"] > 0: + precomputed = stats[status_key] + else: + res = self.GetCombinedHypoTest(files) + + # Do the validation of this model point + # + ok, point_res = self.ValidateHypoTest( + res, + min_toys=min_toys, + max_toys=max_toys, + contours=contours, + signif=signif, + cl=cl, + output=self.args.output, + verbose=verbose, + precomputed=precomputed, + feldman_cousins=feldman_cousins, + ) + + print(">> Point %s [%i toys, %s]" % (name, point_res["ntoys"], "DONE" if ok else "INCOMPLETE")) + + stats[status_key] = {"files": files, "ntoys": point_res["ntoys"]} + for cont in contours: + if cont in point_res: + stats[status_key][cont] = point_res[cont] + + if ok: + complete_points += 1 + + # Make plots of the test statistic distributions if requested + if res is not None and make_plots: + self.PlotTestStat(res, "plot_" + name, opts=cfg["plot_settings"], poi_vals=(float(key[0]), float(key[1])), point_info=point_res) + + # Add the resulting CLs values to the output arrays. Normally just + # for the model points that passed the validation criteria, but if "output_incomplete" + # has been set to true then we'll write all model points where at least one HypoTestResult + # is present + if (res is not None or precomputed is not None) and (ok or incomplete) and self.args.output: + output_x.append(float(key[0])) + output_y.append(float(key[1])) + output_ntoys.append(point_res["ntoys"]) + for contour in contours: + output_data[contour].append(point_res[contour][0]) + output_clserr[contour].append(point_res[contour][1]) + output_signif[contour].append(point_res[contour][2]) + + # Do the job cycle generation if requested + if not ok and self.args.cycles > 0: + print(">>> Going to generate %i job(s) for point %s" % (self.args.cycles, key)) + # Figure out the next seed numbers we need to run by finding the maximum seed number + # so far + done_cycles = list(val.keys()) + new_idx = max(done_cycles) + 1 if len(done_cycles) > 0 else 1 + new_cycles = list(range(new_idx, new_idx + self.args.cycles)) + + print(">>> Done cycles: " + ",".join(str(x) for x in done_cycles)) + print(">>> New cycles: " + ",".join(str(x) for x in new_cycles)) + + # Build to combine command. Here we'll take responsibility for setting the name and the + # model parameters, making sure the latter are frozen + if not feldman_cousins: + set_arg = ",".join(["%s=%s,%s=%s" % (POIs[0], key[0], POIs[1], key[1])] + to_set) + freeze_arg = ",".join(["%s,%s" % (POIs[0], POIs[1])] + to_freeze) + point_args = "-n .%s --setParameters %s --freezeParameters %s" % (name, set_arg, freeze_arg) + else: + single_point_arg = ".".join(["%s=%s,%s=%s" % (POIs[0], key[0], POIs[1], key[1])]) + if len(to_set) > 0 and len(to_freeze) > 0: + point_args = "-n .%s --singlePoint %s --setParameters %s --freezeParameters %s" % (name, single_point_arg, to_set, to_freeze) + elif len(to_set) > 0: + point_args = "-n .%s --singlePoint %s --setParameters %s" % (name, single_point_arg, to_set) + elif len(to_freeze) > 0: + point_args = "-n .%s --singlePoint %s --freezeParameters %s" % (name, single_point_arg, to_freeze) + else: + point_args = "-n .%s --singlePoint %s " % (name, single_point_arg) + + if self.args.from_asymptotic: + mval = key[0] + command = [] + for par in bound_pars: + # The (mass, None, None) is just a trick to make bisect_left do the comparison + # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding + # issues + lower_bound = bisect.bisect_left(bound_vals[par], (float(mval) + 1e-5, None, None)) + # If lower_bound == 0 this means we are at or below the lowest mass point, + # in which case we should increase by one to take the bounds from this lowest + # point + if lower_bound == 0: + lower_bound += 1 + command.append("%s=%g,%g" % (par, bound_vals[par][lower_bound - 1][1], bound_vals[par][lower_bound - 1][2])) + if len(command) > 0: + point_args += " --setParameterRanges %s" % (":".join(command)) + # print per_mass_point_args + point_args += " --singlePoint %s" % key[1] + point_args += " -m %s" % mval + # Build a command for each job cycle setting the number of toys and random seed and passing through any other + # user options from the config file or the command line + for idx in new_cycles: + cmd = " ".join(["combine -M HybridNew", opts, point_args, "-T %i" % toys_per_cycle, "-s %i" % idx] + self.passthru) + self.job_queue.append(cmd) + + print(">> %i/%i points have completed and require no further toys" % (complete_points, total_points)) + self.flush_queue() + + # Create and write output CLs TGraph2Ds here + # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes + if self.args.output and not self.args.from_asymptotic: + fout = ROOT.TFile(outfile, "RECREATE") + for c in contours: + graph = ROOT.TGraph2D(len(output_data[c]), array("d", output_x), array("d", output_y), array("d", output_data[c])) + graph.SetName(c) + fout.WriteTObject(graph, c) + # Also write a Graph with the CLsErr + graph = ROOT.TGraph2D(len(output_clserr[c]), array("d", output_x), array("d", output_y), array("d", output_clserr[c])) + graph.SetName("clsErr_" + c) + fout.WriteTObject(graph, "clsErr_" + c) + # And a Graph with the significance + graph = ROOT.TGraph2D(len(output_signif[c]), array("d", output_x), array("d", output_y), array("d", output_signif[c])) + graph.SetName("signif_" + c) + fout.WriteTObject(graph, "signif_" + c) + graph = ROOT.TGraph2D(len(output_ntoys), array("d", output_x), array("d", output_y), array("d", output_ntoys)) + graph.SetName("ntoys" + c) + fout.WriteTObject(graph, "ntoys") + fout.Close() + + if self.args.output and self.args.from_asymptotic: + # Need to collect all the files for each mass point and hadd them: + files_by_mass = {} + for key, val in six.iteritems(file_dict): + if key[0] not in files_by_mass: + files_by_mass[key[0]] = list() + files_by_mass[key[0]].extend(list(val.values())) + for m, files in six.iteritems(files_by_mass): + gridfile = "higgsCombine.gridfile.%s.%s.%s.root" % (POIs[0], m, POIs[1]) + self.job_queue.append("hadd -f %s %s" % (gridfile, " ".join(files))) + for exp in ["", "0.025", "0.160", "0.500", "0.840", "0.975"]: + self.job_queue.append( + " ".join( + [ + "combine -M HybridNew --rAbsAcc 0", + opts, + "--grid %s" % gridfile, + "-n .final.%s.%s.%s" % (POIs[0], m, POIs[1]), + "-m %s" % (m), + ("--expectedFromGrid %s" % exp) if exp else "--noUpdateGrid", + ] + + self.passthru + ) + ) + self.flush_queue() + + if statfile: + with open(statfile, "w") as stat_out: + stat_json = json.dumps(stats, sort_keys=True, indent=2, separators=(",", ": ")) + stat_out.write(stat_json) + + def PlotTestStat(self, result, name, opts, poi_vals, point_info=None): + sign = -1.0 + if opts["one_sided"]: + sign = 1.0 + null_vals = [x * sign * 2.0 for x in result.GetNullDistribution().GetSamplingDistribution()] + alt_vals = [x * sign * 2.0 for x in result.GetAltDistribution().GetSamplingDistribution()] + if len(null_vals) == 0 or len(alt_vals) == 0: + print(">> Errror in PlotTestStat for %s, null and/or alt distributions are empty") + return + plot.ModTDRStyle() + canv = ROOT.TCanvas(name, name) + pad = plot.OnePad()[0] + min_val = min(min(alt_vals), min(null_vals)) + max_val = max(max(alt_vals), max(null_vals)) + min_plot_range = min_val - 0.05 * (max_val - min_val) + if opts["one_sided"]: + min_plot_range = 0.0 + pad.SetLogy(True) + max_plot_range = max_val + 0.05 * (max_val - min_val) + hist_null = ROOT.TH1F("null", "null", 40, min_plot_range, max_plot_range) + hist_alt = ROOT.TH1F("alt", "alt", 40, min_plot_range, max_plot_range) + for val in null_vals: + hist_null.Fill(val) + for val in alt_vals: + hist_alt.Fill(val) + hist_alt.SetLineColor(ROOT.TColor.GetColor(4, 4, 255)) + hist_alt.SetFillColor(plot.CreateTransparentColor(ROOT.TColor.GetColor(4, 4, 255), 0.4)) + hist_alt.GetXaxis().SetTitle("-2 #times ln(^{}L_{%s}/^{}L_{%s})" % (opts["alt_label"], opts["null_label"])) + hist_alt.GetYaxis().SetTitle("Pseudo-experiments") + hist_alt.Draw() + hist_null.SetLineColor(ROOT.TColor.GetColor(252, 86, 11)) + hist_null.SetFillColor(plot.CreateTransparentColor(ROOT.TColor.GetColor(254, 195, 40), 0.4)) + hist_null.Draw("SAME") + val_obs = result.GetTestStatisticData() * sign * 2.0 + obs = ROOT.TArrow(val_obs, 0, val_obs, hist_alt.GetMaximum() * 0.3, 0.05, "<-|") + obs.SetLineColor(ROOT.kRed) + obs.SetLineWidth(3) + obs.Draw() + # exp_line = ROOT.TLine() + # plot.Set(exp_line, LineStyle=2, LineColor=ROOT.kRed, LineWidth=1) + # if point_info is not None: + # for exp in ['exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2']: + # if exp in point_info: + # exp_line.DrawLine(2*sign*point_info[exp][3], 0, 2*sign*point_info[exp][3], hist_alt.GetMaximum() * 0.3) + plot.FixTopRange(pad, plot.GetPadYMax(pad), 0.25) + leg = plot.PositionedLegend(0.22, 0.2, 3, 0.02) + leg.AddEntry(hist_alt, opts["alt_label"], "F") + leg.AddEntry(hist_null, opts["null_label"], "F") + leg.AddEntry(obs, "Observed", "L") + leg.Draw() + plot.DrawCMSLogo(pad, "CMS", opts["cms_subtitle"], 0, 0.15, 0.035, 1.2) + pt_l = ROOT.TPaveText(0.23, 0.75, 0.33, 0.9, "NDCNB") + pt_l.AddText("Model:") + pt_l.AddText("Toys:") + pt_l.AddText("CLs+b:") + pt_l.AddText("CLb:") + pt_l.AddText("CLs:") + # if point_info is not None: + # for exp in ['exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2']: + # pt_l.AddText(exp) + plot.Set(pt_l, TextAlign=11, TextFont=62, BorderSize=0) + pt_l.Draw() + pt_r = ROOT.TPaveText(0.33, 0.75, 0.63, 0.9, "NDCNB") + pt_r.AddText("%s [%s = %.1f, %s = %.1f]" % (opts["model_label"], opts["poi_labels"][0], poi_vals[0], opts["poi_labels"][1], poi_vals[1])) + pt_r.AddText( + "%i (%s) + %i (%s)" % (result.GetNullDistribution().GetSize(), opts["null_label"], result.GetAltDistribution().GetSize(), opts["alt_label"]) + ) + pt_r.AddText("%.3f #pm %.3f" % (result.CLsplusb(), result.CLsplusbError())) + pt_r.AddText("%.3f #pm %.3f" % (result.CLb(), result.CLbError())) + pt_r.AddText("%.3f #pm %.3f" % (result.CLs(), result.CLsError())) + # if point_info is not None: + # for exp in ['exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2']: + # pt_r.AddText('%.3f #pm %.3f' % (point_info[exp][0], point_info[exp][1])) + plot.Set(pt_r, TextAlign=11, TextFont=42, BorderSize=0) + pt_r.Draw() + pad.GetFrame().Draw() + pad.RedrawAxis() + for fmt in opts["formats"]: + canv.SaveAs(fmt) diff --git a/python/tool_base/Output.py b/python/tool_base/Output.py new file mode 100755 index 00000000000..1963e33bf03 --- /dev/null +++ b/python/tool_base/Output.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python + +from __future__ import absolute_import +from __future__ import print_function +import ROOT +import json +import os +import pprint +from collections import defaultdict +from array import array + +import HiggsAnalysis.CombinedLimit.tool_base.utils as utils +import HiggsAnalysis.CombinedLimit.util.plotting as plot + +# from HiggsAnalysis.CombinedLimit.tool_base.opts import OPTS + +from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase +import six + + +def isfloat(value): + try: + float(value) + return True + except ValueError: + return False + + +class PrintFit(CombineToolBase): + description = "Print the output of MultimDitFit" + requires_root = True + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument("input", help="The input file") + group.add_argument("--algo", help="The algo used in MultiDimFit", default="none") + group.add_argument("-P", "--POIs", help="The params that were scanned (in scan order)") + group.add_argument("--json", help="Write json output (format file.json:key1:key2..") + + def run_method(self): + if self.args.json is not None: + json_structure = self.args.json.split(":") + assert len(json_structure) >= 1 + if os.path.isfile(json_structure[0]): + with open(json_structure[0]) as jsonfile: + js = json.load(jsonfile) + else: + js = {} + js_target = js + if len(json_structure) >= 2: + for key in json_structure[1:]: + js_target[key] = {} + js_target = js_target[key] + POIs = self.args.POIs.split(",") + if self.args.algo == "none": + res = utils.get_none_results(self.args.input, POIs) + for p in POIs: + val = res[p] + print("%-30s = %+.3f" % (p, val)) + if self.args.json is not None: + for key, val in six.iteritems(res): + js_target[key] = {"Val": val} + with open(json_structure[0], "w") as outfile: + json.dump(js, outfile, sort_keys=True, indent=4, separators=(",", ": ")) + elif self.args.algo == "singles": + res = utils.get_singles_results(self.args.input, POIs, POIs) + for p in POIs: + val = res[p][p] + print("%s = %.3f -%.3f/+%.3f" % (p, val[1], val[1] - val[0], val[2] - val[1])) + elif self.args.algo == "fixed": + res = utils.get_fixed_results(self.args.input, POIs) + print("%-30s bestfit : fixed" % ("")) + for p in POIs: + print("%-30s = %+.3f : %+.3f" % (p, res["bestfit"][p], res["fixedpoint"][p])) + print("-" * 60) + print("2*deltaNLL = %f, nPOIs = %i, p-value = %0.4f" % (2.0 * res["deltaNLL"], len(POIs), res["pvalue"])) + + # pprint.pprint(res) + + +class CollectLimits(CombineToolBase): + description = "Aggregate limit output from combine" + requires_root = True + default_name = "limits.json" + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument("input", nargs="+", default=[], help="The input files") + group.add_argument( + "-o", + "--output", + nargs="?", + const="limits.json", + default="limits.json", + help="""The name of the output json file. + When the --use-dirs option is set the enclosing directory name + will be appended to the filename given here.""", + ) + group.add_argument( + "--use-dirs", + action="store_true", + help="""Use the directory structure to create multiple limit + outputs and to set the output file names""", + ) + group.add_argument("--toys", action="store_true", help="""Collect toy values""") + group.add_argument("--limit-err", action="store_true", help="""Also store the uncertainties on the limit""") + + def run_method(self): + limit_sets = defaultdict(list) + for filename in self.args.input: + if not plot.TFileIsGood(filename): + print(">> File %s is corrupt or incomplete, skipping" % filename) + continue + if self.args.use_dirs is False: + limit_sets["default"].append(filename) + else: + label = "default" + dirs = filename.split("/") + # The last dir could be the mass, if so we ignore it and check the next + if len(dirs) > 1: + if not isfloat(dirs[-2]): + label = dirs[-2] + elif len(dirs) > 2: + label = dirs[-3] + limit_sets[label].append(filename) + # print limit_sets + + for label, filenames in six.iteritems(limit_sets): + js_out = {} + for filename in filenames: + if plot.TFileIsGood(filename): + file = ROOT.TFile(filename) + tree = file.Get("limit") + for evt in tree: + mh = str(evt.mh) + if mh not in js_out: + js_out[mh] = {} + if self.args.toys: + js_out[mh]["toys"] = {} + for limit in ["obs", "exp0", "exp-2", "exp-1", "exp+1", "exp+2"]: + js_out[mh]["toys"][limit] = [] + if self.args.toys: + if evt.iToy > 0: + if evt.quantileExpected == -1: + js_out[mh]["toys"]["obs"].append(evt.limit) + elif abs(evt.quantileExpected - 0.5) < 1e-4: + js_out[mh]["toys"]["exp0"].append(evt.limit) + elif abs(evt.quantileExpected - 0.025) < 1e-4: + js_out[mh]["toys"]["exp-2"].append(evt.limit) + elif abs(evt.quantileExpected - 0.160) < 1e-4: + js_out[mh]["toys"]["exp-1"].append(evt.limit) + elif abs(evt.quantileExpected - 0.840) < 1e-4: + js_out[mh]["toys"]["exp+1"].append(evt.limit) + elif abs(evt.quantileExpected - 0.975) < 1e-4: + js_out[mh]["toys"]["exp+2"].append(evt.limit) + elif evt.iToy == 0: + if evt.quantileExpected == -1: + js_out[mh]["obs"].append(evt.limit) + + else: + if evt.quantileExpected == -1: + js_out[mh]["obs"] = evt.limit + if self.args.limit_err: + js_out[mh]["obs_err"] = evt.limitErr + elif abs(evt.quantileExpected - 0.5) < 1e-4: + js_out[mh]["exp0"] = evt.limit + if self.args.limit_err: + js_out[mh]["exp0_err"] = evt.limitErr + elif abs(evt.quantileExpected - 0.025) < 1e-4: + js_out[mh]["exp-2"] = evt.limit + if self.args.limit_err: + js_out[mh]["exp-2_err"] = evt.limitErr + elif abs(evt.quantileExpected - 0.160) < 1e-4: + js_out[mh]["exp-1"] = evt.limit + if self.args.limit_err: + js_out[mh]["exp-1_err"] = evt.limitErr + elif abs(evt.quantileExpected - 0.840) < 1e-4: + js_out[mh]["exp+1"] = evt.limit + if self.args.limit_err: + js_out[mh]["exp+1_err"] = evt.limitErr + elif abs(evt.quantileExpected - 0.975) < 1e-4: + js_out[mh]["exp+2"] = evt.limit + if self.args.limit_err: + js_out[mh]["exp+2_err"] = evt.limitErr + + if self.args.toys: + for mh in js_out.keys(): + print("Expected bands will be taken from toys") + print(mh) + limits = sorted(js_out[mh]["toys"]["obs"]) + # if mh == '160.0' or mh == '90.0' : + # limits = [x for x in limits if x > 0.1] + quantiles = array("d", [0.025, 0.160, 0.5, 0.840, 0.975]) + res = array("d", [0.0, 0.0, 0.0, 0.0, 0.0]) + empty = array("i", [0]) + ROOT.TMath.Quantiles(len(limits), len(quantiles), array("d", limits), res, quantiles, True, empty, 1) + print(res) + js_out[mh]["exp-2"] = res[0] + js_out[mh]["exp-1"] = res[1] + js_out[mh]["exp0"] = res[2] + js_out[mh]["exp+1"] = res[3] + js_out[mh]["exp+2"] = res[4] + # print js_out + jsondata = json.dumps(js_out, sort_keys=True, indent=2, separators=(",", ": ")) + # print jsondata + if self.args.output is not None: + outname = self.args.output.replace(".json", "_%s.json" % label) if self.args.use_dirs else self.args.output + with open(outname, "w") as out_file: + print(">> Writing output %s from files:" % outname) + pprint.pprint(filenames, indent=2) + out_file.write(jsondata) + + +class CollectGoodnessOfFit(CombineToolBase): + description = "Aggregate Goodness of Fit output from fit and toys" + requires_root = True + default_name = "gof.json" + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument("--input", nargs="+", default=[], help="The input files") + group.add_argument( + "-o", + "--output", + nargs="?", + const="gof.json", + default="gof.json", + help="""The name of the output json file. + When the --use-dirs option is set the enclosing directory name + will be appended to the filename given here.""", + ) + group.add_argument( + "--use-dirs", + action="store_true", + help="""Use the directory structure to create multiple limit + outputs and to set the output file names""", + ) + + def run_method(self): + limit_sets = defaultdict(list) + for filename in self.args.input: + if not plot.TFileIsGood(filename): + print(">> File %s is corrupt or incomplete, skipping" % filename) + continue + if not self.args.use_dirs: + if "default" not in limit_sets: + limit_sets["default"] = ([], []) + limit_sets["default"][0].append(filename) + else: + label = "default" + dirs = filename.split("/") + # The last dir could be the mass, if so we ignore it and check the next + if len(dirs) > 1: + if not isfloat(dirs[-2]): + label = dirs[-2] + elif len(dirs) > 2: + label = dirs[-3] + if label not in limit_sets: + limit_sets[label] = ([], []) + limit_sets[label][0].append(filename) + + for label, (filenames, toyfiles) in six.iteritems(limit_sets): + js_out = {} + for filename in filenames: + file = ROOT.TFile(filename) + tree = file.Get("limit") + adding_cat_branch = False + branches = [] + for branch in tree.GetListOfBranches(): + # Current logic says any branch after quantileExpected is a special + # GOF branch labelled according to category + if adding_cat_branch: + branches.append(branch.GetName()) + if branch.GetName() == "quantileExpected": + adding_cat_branch = True + # print branches + failedToys = 0 + nEvts = tree.GetEntries() + for evt in tree: + mh = str(evt.mh) + if mh not in js_out: + js_out[mh] = {} + if evt.quantileExpected != -1: + continue + if evt.iToy > 0 and evt.limit < -0.5: # Exclude toys with negative test statistic + failedToys += 1 + continue + if branches: + for branch in branches: + if branch not in js_out[mh]: + js_out[mh][branch] = {} + js_out[mh][branch]["toy"] = [] + if evt.iToy <= 0: + js_out[mh][branch]["obs"] = [getattr(evt, branch)] + else: + js_out[mh][branch]["toy"].append(getattr(evt, branch)) + else: + if "toy" not in js_out[mh]: + js_out[mh]["toy"] = [] + if evt.iToy <= 0: + js_out[mh]["obs"] = [evt.limit] + else: + js_out[mh]["toy"].append(evt.limit) + if failedToys > 0: + print( + ">> %i/%i toys have negative test statistic values, and are excluded. This might indicate a failure in the calculation within combine, or for the KS and AD tests, an undefined value in toys with zero events. Note that the resulting p-value could be biased." + % (failedToys, nEvts) + ) + for mh in js_out: + if all([entry in js_out[mh] for entry in ["toy", "obs"]]): + js_out[mh]["p"] = float(len([toy for toy in js_out[mh]["toy"] if toy >= js_out[mh]["obs"][0]])) / len(js_out[mh]["toy"]) + else: + for branch in js_out[mh]: + js_out[mh][branch]["p"] = float(len([toy for toy in js_out[mh][branch]["toy"] if toy >= js_out[mh][branch]["obs"][0]])) / len( + js_out[mh][branch]["toy"] + ) + + # print js_out + jsondata = json.dumps(js_out, sort_keys=True, indent=2, separators=(",", ": ")) + # print jsondata + if self.args.output is not None: + outname = self.args.output.replace(".json", "_%s.json" % label) if self.args.use_dirs else self.args.output + with open(outname, "w") as out_file: + print(">> Writing output %s from files:" % outname) + pprint.pprint(filenames, indent=2) + out_file.write(jsondata) diff --git a/python/tool_base/T2W.py b/python/tool_base/T2W.py new file mode 100755 index 00000000000..92039b6c378 --- /dev/null +++ b/python/tool_base/T2W.py @@ -0,0 +1,153 @@ +from __future__ import absolute_import +from __future__ import print_function +import itertools +import HiggsAnalysis.CombinedLimit.tool_base.utils as utils +import json +import os +from HiggsAnalysis.CombinedLimit.tool_base.opts import OPTS + +from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase + + +def isfloat(value): + try: + float(value) + return True + except ValueError: + return False + + +class T2W(CombineToolBase): + """ + combineTool.py -M T2W [-m MASS] [--cc [card.txt]] [card1.txt some/dir/125/card2.txt some/dir some/dir/125 ...] + + Algorithm: + 1) Each argument is either a single datacard file or a directory + 2) If a datacard, and if the --cc option is not specified, go to the + directory where the card is located, do text2workspace.py on that + card. If -m is not set explicity and the enclosing directory name + is convertible to float set the -m argument to this, otherwise the + -m argument will not be used. + 3) If a directory, cd to it and combine all .txt files, then do + text2workspace. If the --cc option is specified, use the given name + for the combined card that is created, otherwise a default name will + be used. Then do text2workspace.py on the combined card, following the + same rule with the -m option as 2) + 4) If individual datacards are included in the list and the --cc + option is used then combine all of these cards first. As these + cards could be in different directories the combined card and + workspace will be created in the current directory. + """ + + description = "Run text2workspace.py on multiple cards or directories" + requires_root = False + default_card = "combined.txt" + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_intercept_args(self, group): + CombineToolBase.attach_intercept_args(self, group) + group.add_argument( + "-m", + "--mass", + help=""" + The mass value to set in the text2workspace.py call""", + ) + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument( + "-i", + "--input", + nargs="+", + help=""" A list of + input datacards and directories. For the latter, all .txt files + within the directory will be combined. If the -m option has not + been specified and the enclosing directory is a number, this will + be taken as the mass value to set. """, + ) + group.add_argument( + "--cc", + nargs="?", + const=self.default_card, + default=None, + help=""" Create a combined datacard + with a specified name from the individual cards given by the -i + option. Note that if this option is used without an argument a + default card name will be used. For directory arguments in -i, the + cards will be combined regardless of whether --cc is specified, + but can still be used to set the name of the combined card that is + created. """, + ) + + def set_args(self, known, unknown): + CombineToolBase.set_args(self, known, unknown) + + def run_method(self): + # The basic structure of each command - we'll fill in the blanks later + proto = "pushd %(DIR)s; %(CC)stext2workspace.py %(PASSTHRU)s %(CARD)s; popd" + proto_cc = "combineCards.py %(CARDS)s &> %(COMBINED)s" + cc_cards_post = [] + for arg in self.args.input: + passthru = [x for x in self.passthru] + # Deal with the directory case first (3) + if os.path.isdir(arg): + print(">> Directory %s, looking for datacards" % arg) + files = sorted([file for file in os.listdir(arg) if file.endswith(".txt")]) + if len(files) == 0: + print(">> No .txt files found, skipping this directory") + continue + # else: + # print '>> Will combine %i cards: %s' % (len(files), ' '.join(files)) + cc_cards = [os.path.splitext(file)[0] + "=" + file for file in files] + cardname = self.args.cc if self.args.cc is not None else self.default_card + # put an extra extension to avoid accidentally reusing this combined card + # in a subsequent combination + cardname += ".cmb" + cc_cmd = proto_cc % ({"CARDS": " ".join(cc_cards), "COMBINED": cardname}) + base = os.path.basename(arg) + if self.args.mass is None and isfloat(base): + print(">> Enclosing directory will be treated as mass value %s" % base) + passthru.extend(["-m", base]) + elif self.args.mass is not None: + passthru.extend(["-m", self.args.mass]) + cmd = proto % ({"DIR": arg, "PASSTHRU": " ".join(passthru), "CARD": cardname, "CC": cc_cmd + "; "}) + self.job_queue.append(cmd) + # Now do case (2) of a single datacard and --cc isn't specified + elif self.args.cc is None: + dirname = os.path.dirname(arg) + if dirname == "": + dirname = "." + base = os.path.split(dirname)[-1] + if self.args.mass is None and isfloat(base): + print(">> Enclosing directory will be treated as mass value %s" % base) + passthru.extend(["-m", base]) + elif self.args.mass is not None: + passthru.extend(["-m", self.args.mass]) + cmd = proto % ({"DIR": dirname, "PASSTHRU": " ".join(passthru), "CARD": os.path.basename(arg), "CC": ""}) + self.job_queue.append(cmd) + # Case (2) where --cc is specified + else: + cc_cards_post.append(os.path.splitext(os.path.basename(arg))[0] + "=" + arg) + + # Check if we need to combine some individual cards + if len(cc_cards_post) > 0: + passthru = [x for x in self.passthru] + if self.args.mass is not None: + passthru.extend(["-m", self.args.mass]) + cc_cmd = proto_cc % ({"CARDS": " ".join(cc_cards_post), "COMBINED": self.args.cc}) + cmd = proto % ({"DIR": ".", "PASSTHRU": " ".join(passthru), "CARD": self.args.cc, "CC": cc_cmd + "; "}) + self.job_queue.append(cmd) + self.flush_queue() + + # self.put_back_arg('name', '-n') + # proto = 'text2workspace.py ' + (' '.join(self.passthru)) + # for it in itertools.product(*subbed_vars.values()): + # keys = subbed_vars.keys() + # dict = {} + # for i, k in enumerate(keys): + # for tuple_i, tuple_ele in enumerate(k): + # dict[tuple_ele] = it[i][tuple_i] + # self.job_queue.append(proto % dict) + # self.flush_queue() diff --git a/python/tool_base/TaylorExpand.py b/python/tool_base/TaylorExpand.py new file mode 100755 index 00000000000..1cbc61d1ddf --- /dev/null +++ b/python/tool_base/TaylorExpand.py @@ -0,0 +1,628 @@ +#!/usr/bin/env python + +from __future__ import absolute_import +from __future__ import print_function +import os +import six.moves.cPickle as pickle +import math +import json +import itertools +import hashlib +import ROOT +from array import array +import numpy as np +from pprint import pprint +from functools import partial +from HiggsAnalysis.CombinedLimit.tool_base.opts import OPTS +from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase +import six +from six.moves import range +from six.moves import zip + + +def Eval(obj, x, params): + return obj.Eval(x[0]) + + +def Permutations(indicies): + """Calculate the number of unique permutations of a set of indicies + + Args: + indicies (list): A list of integer indicies, e.g [0, 1, 2, 3] + + Returns: + int: number of unique permutations + """ + uniques = sorted(set(indicies)) + freqs = [indicies.count(x) for x in uniques] + n_perms = math.factorial(len(indicies)) + for freq in freqs: + n_perms = n_perms / math.factorial(freq) + return n_perms + + +def GenerateStencils(d, h, s): + N = len(s) + smatrix = np.zeros((N, N)) + dvec = np.zeros((N)) + for i in range(N): + for j in range(N): + smatrix[i, j] = pow(s[j], i) + dvec[i] = math.factorial(d) if i == d else 0.0 + # print smatrix + # print dvec + res = (1.0 / pow(h, d)) * np.dot(np.linalg.inv(smatrix), dvec) + return res + + +def GenerateDebugGraph(wsp, var_name, ingraph): + nll = wsp.function("nll") + var = wsp.var(var_name) + outgraph = ingraph.Clone() + for i in range(ingraph.GetN()): + var.setVal(ingraph.GetX()[i]) + outgraph.GetY()[i] = nll.getVal() * 2.0 + return outgraph + + +# Calculate partial derivatives using finite differences +class ExpansionTerm: + def __init__(self, parameter_values, derivatives, stencils): + self.fnval = 0.0 + self.parameter_values = np.array(parameter_values, dtype=np.float32) + self.derivatives = np.array(derivatives, dtype=np.uint8) + unique_derivatives = np.array(sorted(set(derivatives)), dtype=np.uint8) + derivative_frequency = np.zeros(len(unique_derivatives), dtype=np.uint8) + for idx, i in enumerate(unique_derivatives): + derivative_frequency[idx] = np.count_nonzero(self.derivatives == i) + terms = list() + # self.terms = list() + if len(derivative_frequency): + parameter = unique_derivatives[0] + order = derivative_frequency[0] + self.fundamental = False + stencil = stencils[parameter][order] + self.coeffs = np.zeros(len(stencil), dtype=np.float64) + for i in range(len(stencil)): + remaining_derivatives = np.array(list([a for a in self.derivatives if a != parameter]), dtype=np.uint8) + # Make a copy of the current parameters and adjust the + # value for the current stencil point + new_parameter_values = np.array(self.parameter_values, dtype=np.float32) + new_parameter_values[parameter] += stencil[i][0] + # Add this to the list of terms + self.coeffs[i] = stencil[i][1] + terms.append(ExpansionTerm(new_parameter_values, remaining_derivatives, stencils)) + self.terms = np.array(terms) + else: + self.terms = [] + self.fundamental = True + + def FormattedPars(self): + return tuple([float("%f" % p) for p in self.parameter_values]) + + def Eval(self, with_permutations=False, with_factorial=False): + if self.fundamental: + return self.fnval + else: + summed = 0.0 + for i in range(len(self.terms)): + summed += self.coeffs[i] * self.terms[i].Eval() + if with_permutations: + n_perms = Permutations(list(self.derivatives)) + summed *= float(n_perms) + if with_factorial: + summed *= 1.0 / float(math.factorial(len(self.derivatives))) + return summed + + def Print(self, indent=0, coeff=None): + sp = " " * indent + extra = "" + if self.fundamental: + extra = " %s = %f" % (list(self.FormattedPars()), self.fnval) + if coeff is None: + print("%s%s%s" % (sp, self.derivatives, extra)) + else: + print("%s%+.1f*%s%s" % (sp, coeff, self.derivatives, extra)) + for i in range(len(self.terms)): + self.terms[i].Print(indent + 2, self.coeffs[i]) + + def GatherFundamentalTerms(self, termlist): + if self.fundamental: + termlist.append(self) + else: + for t in self.terms: + t.GatherFundamentalTerms(termlist) + + +class TaylorExpand(CombineToolBase): + description = "Calculate nuisance parameter impacts" + requires_root = True + + def __init__(self): + CombineToolBase.__init__(self) + self.nll = None + + def attach_intercept_args(self, group): + CombineToolBase.attach_intercept_args(self, group) + group.add_argument("--name", "-n", default="Test") + group.add_argument("-m", "--mass", required=True) + group.add_argument("-d", "--datacard", required=True) + + def get_results(self, file): + res = [] + f = ROOT.TFile(file) + if f is None or f.IsZombie(): + return None + t = f.Get("limit") + for i, evt in enumerate(t): + if i == 0: + continue + res.append(getattr(evt, "deltaNLL")) + print(res) + if len(res) == 0: + print(file) + return res + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument("--config", help=("json configuration file")) + group.add_argument("--order", type=int, default=2, help=("Taylor expand up to and including this order")) + group.add_argument("--cross-order", type=int, default=2, help=("Taylor expand up to and including this order for the cross-order terms")) + group.add_argument("--choose-POIs", default=None, help=("Explict list POIs to expand in")) + group.add_argument("--do-fits", action="store_true", help=("Actually do the fits")) + group.add_argument("--test-mode", type=int, default=0, help=("Test on the workspace")) + group.add_argument("--test-args", type=str, default="", help=("List of comma separated args to be interpreted by the test-mode")) + group.add_argument("--save", default=None, help=("Save results to a json file")) + group.add_argument("--load", default=None, help=("Load results from a json file")) + group.add_argument("--stencil-add", type=int, default=0, help=("Add additional points to each stencil")) + group.add_argument("--stencil-min", type=int, default=3, help=("Minimum number of points in stencil")) + group.add_argument("--drop-threshold", type=float, default=0.0, help=("Drop contributions below this threshold")) + group.add_argument("--multiple", type=int, default=1, help=("Run multiple fixed points in one combine job")) + group.add_argument("--workspace-bestfit", action="store_true", help=("Update the best-fit using the workspace snapshot")) + group.add_argument("--linear-workaround", default=None, help=("Comma separated list of POIs that require special treatment due to a linear NLL")) + + def load_workspace(self, file, POIs, data="data_obs", snapshot="MultiDimFit"): + if self.nll is not None: + return + print("Loading NLL...") + self.infile = ROOT.TFile(file) + self.loaded_wsp = self.infile.Get("w") + # infile.Close() + mc = self.loaded_wsp.genobj("ModelConfig") + pdf = mc.GetPdf() + data = self.loaded_wsp.data(data) + ll = ROOT.RooLinkedList() + self.nll = pdf.createNLL(data, ll) + self.loaded_wsp.loadSnapshot("MultiDimFit") + print("...NLL loaded") + # nll.Print() + self.nll0 = self.nll.getVal() + self.wsp_vars = {} + for POI in POIs: + self.wsp_vars[POI] = self.loaded_wsp.var(POI) + + def get_snpashot_pois(self, file, POIs, snapshot="MultiDimFit"): + infile = ROOT.TFile.Open(file) + loaded_wsp = infile.Get("w") + loaded_wsp.loadSnapshot("MultiDimFit") + fit_vals = {} + for POI in POIs: + fit_vals[POI] = loaded_wsp.var(POI).getVal() + return fit_vals + + def fix_TH2(self, h, labels): + h_fix = h.Clone() + for y in range(1, h.GetNbinsY() + 1): + for x in range(1, h.GetNbinsX() + 1): + h_fix.SetBinContent(x, y, h.GetBinContent(x, h.GetNbinsY() + 1 - y)) + for x in range(1, h_fix.GetNbinsX() + 1): + h_fix.GetXaxis().SetBinLabel(x, labels[x - 1]) + for y in range(1, h_fix.GetNbinsY() + 1): + h_fix.GetYaxis().SetBinLabel(y, labels[-y]) + return h_fix + + def run_method(self): + mass = self.args.mass + dc = self.args.datacard + self.put_back_arg("mass", "-m") + self.put_back_arg("datacard", "-d") + + ###################################################################### + # Step 1 - establish parameter ranges + ###################################################################### + with open(self.args.config) as jsonfile: + cfg = json.load(jsonfile) + if self.args.choose_POIs is None: + POIs = sorted([str(x) for x in cfg.keys()]) + else: + POIs = self.args.choose_POIs.split(",") + + Nx = len(POIs) + print(">> Taylor expansion in %i variables up to order %i:" % (Nx, self.args.order)) + pprint(cfg) + + if self.args.workspace_bestfit: + fitvals = self.get_snpashot_pois(dc, POIs) + for POI, val in six.iteritems(fitvals): + print(">> Updating POI best fit from %f to %f" % (cfg[POI]["BestFit"], val)) + cfg[POI]["BestFit"] = val + + xvec = np.zeros(Nx, dtype=np.float32) + # hvec = [] + valvec = [] + for i, P in enumerate(POIs): + valvec.append(cfg[P]["BestFit"]) + xvec[i] = cfg[P]["BestFit"] + + ###################################################################### + # Step 2 - generate stencils + ###################################################################### + + linear_POIs = [] + if self.args.linear_workaround is not None: + linear_POIs = self.args.linear_workaround.split(",") + + stencils = {} + validity = [] + + do_cheb = False + + for i, P in enumerate(POIs): + stencils[i] = {} + if "StencilRange" in cfg[P]: + s_min = cfg[P]["StencilRange"][0] - valvec[i] + s_max = cfg[P]["StencilRange"][1] - valvec[i] + elif "StencilSize" in cfg[P]: + s_min = -cfg[P]["StencilSize"] + s_max = +cfg[P]["StencilSize"] + s_min *= 1.0 + s_max *= 1.0 + validity.append(cfg[P]["Validity"]) + for n in range(self.args.order + 1): + if n == 0: + continue + stencil_size = max(self.args.stencil_min, 1 + (((n + 1) / 2) * 2) + self.args.stencil_add) + stencil = list() + stencil_spacing = (s_max - s_min) / (stencil_size - 1) + for s in range(stencil_size): + stencil.append(s_min + float(s) * stencil_spacing) + + if do_cheb: + cheb_list = [] + a = stencil[0] + b = stencil[-1] + chebN = len(stencil) + for inode in range(1, chebN + 1): + cheb_list.append(0.5 * (a + b) + 0.5 * (b - a) * math.cos((((2.0 * inode) - 1.0) * math.pi) / (2.0 * chebN))) + cheb_list.sort() + stencil = cheb_list + coefficients = GenerateStencils(n, 1, stencil) + + ## special case here for linear + if n == 2 and P in linear_POIs: + ## First requirement is that s_min or s_max == the best-fit + if abs(s_min) < 1e-6: + xprime = s_max + print(xprime) + stencil = [s_min, s_max] + coefficients = [0.0, 2.0 / (xprime * xprime)] + elif abs(s_max) < 1e-6: + xprime = s_min + stencil = [s_min, s_max] + coefficients = [2.0 / (xprime * xprime), 0.0] + else: + print("Special treatment of %s not viable, one stencil range endpoint must correspond to the best fit" % P) + + stencils[i][n] = list(zip(stencil, coefficients)) + + pprint(stencils) + + cached_terms = {} + cached_evals = {} + + can_skip = [] + + drop_thresh = self.args.drop_threshold + + stats = {} + + graph_func = None + test_mode_args = [] + test_mode_ws = ROOT.RooWorkspace() + ## In this mode we are loading a TGraph from a scan + if self.args.test_mode == 2: + test_mode_args = self.args.test_args.split(",") + graph_filename = test_mode_args[0] + graph_name = test_mode_args[1] + graph_file = ROOT.TFile(graph_filename) + graph = graph_file.Get(graph_name) + spline = ROOT.TSpline3("spline3", graph) + graph_func = ROOT.TF1("splinefn", partial(Eval, spline), graph.GetX()[0], graph.GetX()[graph.GetN() - 1], 1) + + if self.args.test_mode == 3: + test_mode_args = [self.args.test_args] + print(test_mode_args) + test_mode_ws.factory("expr::func(%s)" % test_mode_args[0]) + + ###################################################################### + # Step 3 - load pre-existing terms and evals + ###################################################################### + if self.args.load is not None: + term_cachefile = self.args.load + "_terms.pkl" + eval_cachefile = self.args.load + "_evals.pkl" + if os.path.isfile(term_cachefile): + with open(term_cachefile) as pkl_file: + cached_terms = pickle.load(pkl_file) + if os.path.isfile(eval_cachefile): + with open(eval_cachefile) as pkl_file: + cached_evals = pickle.load(pkl_file) + + for i in range(self.args.order + 1): + print(">> Order %i" % i) + if i == 0 or i == 1: + continue + + evallist = [] + termlist = [] + to_check_list = [] + + stats[i] = {} + stats[i]["nTerms"] = 0 + stats[i]["nCachedTerms"] = 0 + stats[i]["nSmallTerms"] = 0 + stats[i]["nAllNewTerms"] = 0 + stats[i]["nActualNewTerms"] = 0 + stats[i]["nEvals"] = 0 + stats[i]["nUniqueEvals"] = 0 + for item in itertools.combinations_with_replacement(list(range(Nx)), i): + if len(set(item)) != 1 and i > self.args.cross_order: + if item in cached_terms: + del cached_terms[item] + continue + + stats[i]["nTerms"] += 1 + + # If already in the cache we can skip evaluating this term, but first check + # if it's small enough to be used added to the list of 'can_skip' terms + if item in cached_terms: + stats[i]["nCachedTerms"] += 1 + to_check_list.append((item, cached_terms[item])) + continue + + stats[i]["nAllNewTerms"] += 1 + + # Check if this new term can be skipped + skip_term = False + for skip_item in can_skip: + has_all_terms = True + for x, freq in six.iteritems(skip_item[1]): + if item.count(x) < freq: + has_all_terms = False + break + if has_all_terms: + # print 'Testing if term %s is skipable based on %s' % (str(item), str(skip_item)) + perm_ratio = float(Permutations(item)) / float(Permutations(skip_item[0])) + fact_ratio = float(math.factorial(len(skip_item[0]))) / float(math.factorial(len(item))) + expected = cached_terms[skip_item[0]] * perm_ratio * fact_ratio + for index in item: + expected *= validity[index] + # print 'Original = %g, permutations ratio = %g, factorial ratio = %g, final = %g' % ( + # skip_item[2], + # perm_ratio, + # fact_ratio, + # expected) + # print 'Original estimate was %g, scaling with missing %s = %g' % (skip_item[2], str(remainder_terms), expected) + if abs(expected) < drop_thresh: + skip_term = True + break + if skip_term: + # print 'Skipping negligible: %s' % str(item) + continue + + stats[i]["nActualNewTerms"] += 1 + + termlist.append(ExpansionTerm(xvec, item, stencils)) + termlist[-1].GatherFundamentalTerms(evallist) + + stats[i]["nEvals"] = len(evallist) + unique_evallist = [x for x in set(x.FormattedPars() for x in evallist)] + stats[i]["nUniqueEvals"] = len(unique_evallist) + actual_evallist = [x for x in unique_evallist if x not in cached_evals] + stats[i]["nActualUniqueEvals"] = len(actual_evallist) + + ## Prepare the inputs for the different test modes + if len(actual_evallist) > 0 and self.args.test_mode == 1: + self.load_workspace(dc, POIs) + + multicount = 0 + multivars = [] + for idx, vals in enumerate(actual_evallist): + if self.args.multiple == 1: + set_vals = [] + for POI, val in zip(POIs, vals): + set_vals.append("%s=%f" % (POI, val)) + if self.args.test_mode == 1: + self.wsp_vars[POI].setVal(val) + set_vals_str = ",".join(set_vals) + else: + multivars.append(vals) + if multicount == 0: + multiargs = [] + for POI in POIs: + multiargs.append("%s=" % POI) + for ival, val in enumerate(vals): + multiargs[ival] += "%f" % val + if multicount < (self.args.multiple - 1) and idx < (len(actual_evallist) - 1): + multiargs[ival] += "," + if multicount == (self.args.multiple - 1) or idx == (len(actual_evallist) - 1): + set_vals_str = ":".join(multiargs) + multicount = 0 + else: + multicount += 1 + continue + + hash_id = hashlib.sha1(set_vals_str).hexdigest() + filename = "higgsCombine.TaylorExpand.%s.MultiDimFit.mH%s.root" % (hash_id, mass) + arg_str = "-M MultiDimFit -n .TaylorExpand.%s --algo fixed --redefineSignalPOIs %s --fixedPointPOIs " % (hash_id, ",".join(POIs)) + arg_str += set_vals_str + + if self.args.do_fits: + if self.args.test_mode == 0 and not os.path.isfile(filename): + self.job_queue.append("combine %s %s" % (arg_str, " ".join(self.passthru))) + if self.args.test_mode == 1: + if idx % 10000 == 0: + print("Done %i/%i NLL evaluations..." % (idx, len(actual_evallist))) + cached_evals[vals] = self.nll.getVal() - self.nll0 + if self.args.test_mode == 2: + # Divide by 2 here because the graph is already 2*deltaNLL + cached_evals[vals] = graph_func.Eval(vals[0]) / 2.0 + if self.args.test_mode == 3: + # Divide by 2 here because the graph is already 2*deltaNLL + test_mode_ws.var("x").setVal(vals[0]) + cached_evals[vals] = test_mode_ws.function("func").getVal() + else: + if self.args.test_mode == 0: + if self.args.multiple == 1: + cached_evals[vals] = self.get_results(filename)[0] + else: + results = self.get_results(filename) + for xidx, xvals in enumerate(multivars): + cached_evals[xvals] = results[xidx] + multivars = list() + + if self.args.do_fits and len(self.job_queue): + njobs = len(self.job_queue) + self.flush_queue() + pprint(stats[i]) + print("Number of jobs = %i" % njobs) + print("Raw number of evaluations: %i" % len(evallist)) + return + + for x in evallist: + x.fnval = cached_evals[x.FormattedPars()] + + for term in termlist: + item = tuple(term.derivatives) + term.Print() + cached_terms[item] = term.Eval(with_permutations=True, with_factorial=True) + print("Raw term: %f" % term.Eval(with_permutations=False, with_factorial=False)) + to_check_list.append((item, cached_terms[item])) + + for item, estimate in to_check_list: + for index in item: + estimate *= validity[index] + if abs(estimate) < drop_thresh: + can_skip.append((item, {x: item.count(x) for x in set(item)}, estimate)) + stats[i]["nSmallTerms"] += 1 + + pprint(stats[i]) + + if self.args.save is not None: + term_cachefile = self.args.save + "_terms.pkl" + eval_cachefile = self.args.save + "_evals.pkl" + jsondata = pickle.dumps(cached_terms) + with open(term_cachefile, "w") as out_file: + out_file.write(jsondata) + jsondata = pickle.dumps(cached_evals) + with open(eval_cachefile, "w") as out_file: + out_file.write(jsondata) + + # Build the taylor expansion object + xvars = [] + x0vars = [] + xvec = ROOT.RooArgList() + x0vec = ROOT.RooArgList() + for i, POI in enumerate(POIs): + xvars.append(ROOT.RooRealVar(POI, "", cfg[POI]["BestFit"], cfg[POI]["OutputRange"][0], cfg[POI]["OutputRange"][1])) + x0vars.append(ROOT.RooRealVar(POI + "_In", "", cfg[POI]["BestFit"], -100, 100)) + x0vars[-1].setConstant(True) + xvec.add(xvars[-1]) + x0vec.add(x0vars[-1]) + + te_terms = ROOT.vector("double")() + pos = 0 + te_tracker = ROOT.vector("std::vector")() + + save_cov_matrix = False + if save_cov_matrix: + hessian = ROOT.TMatrixDSym(len(POIs)) + cov_matrix = ROOT.TMatrixDSym(len(POIs)) + cor_matrix = ROOT.TMatrixDSym(len(POIs)) + sorted_terms = [] + for i in range(self.args.order + 1): + sorted_tmp = [] + for tracker, val in six.iteritems(cached_terms): + if len(tracker) == i: + sorted_tmp.append((tracker, val)) + if i == 2 and save_cov_matrix: + multi = 1.0 + if tracker[0] == tracker[1]: + multi = 2.0 + hessian[int(tracker[0])][int(tracker[1])] = multi * val + hessian[int(tracker[1])][int(tracker[0])] = multi * val + sorted_terms.extend(sorted(sorted_tmp, key=lambda x: x[0])) + if save_cov_matrix: + # hessian.Print() + cov_matrix = hessian.Clone() + cov_matrix.Invert() + # cov_matrix.Print() + cor_matrix = cov_matrix.Clone() + for i in range(len(POIs)): + for j in range(len(POIs)): + print(i, j, cor_matrix[i][j], (cov_matrix[i][i]), (cov_matrix[j][j])) + cor_matrix[i][j] = cor_matrix[i][j] / (math.sqrt(cov_matrix[i][i]) * math.sqrt(cov_matrix[j][j])) + # cor_matrix.Print() + fout = ROOT.TFile("covariance.root", "RECREATE") + fout.WriteTObject(cor_matrix, "cor") + h_cor = self.fix_TH2(ROOT.TH2D(cor_matrix), POIs) + fout.WriteTObject(h_cor, "h_cor") + fout.WriteTObject(cov_matrix, "cov") + h_cov = self.fix_TH2(ROOT.TH2D(cov_matrix), POIs) + fout.WriteTObject(h_cov, "h_cov") + for tracker, val in sorted_terms: + # Check if this is a really big value + # if abs(val) > 1E9: + # print '%i -- %s --> %s: %f ' % (pos, tracker, tracker, val) + # val = 0. + + # Push back the terms + te_terms.push_back(val) + i_tracker = ROOT.vector("int")(len(tracker)) + for idx, tr in enumerate(tracker): + i_tracker[idx] = tr + te_tracker.push_back(i_tracker) + + # Print it + print("%i -- %s --> %s: %f" % (pos, tracker, tracker, val)) + + # print 'NEGLIGIBLE TERMS AT ORDER %i: %i/%i' % (n, n_below, n_terms) + + nllfn = ROOT.RooTaylorExpansion("nll", "", xvec, x0vec, te_tracker, te_terms) + + fout = ROOT.TFile("taylor_expansion.root", "RECREATE") + # xvec.Print('v') + # x0vec.Print('v') + wsp = ROOT.RooWorkspace("w", "") + getattr(wsp, "import")(nllfn, ROOT.RooCmdArg()) + wsp.Write() + fout.Close() + + if self.args.test_mode == 2: + testgraph = GenerateDebugGraph(wsp, POIs[0], graph) + fgout = ROOT.TFile("test_mode_%s.root" % (POIs[0]), "RECREATE") + testgraph.SetName("main") + testgraph.Write() + fgout.Close() + + print("%-10s %-20s %-20s %-20s %-20s %-20s" % ("Order", "nTerms", "nCachedTerms", "nSmallTerms", "nAllNewTerms", "nActualNewTerms")) + print("-" * 94) + for i in stats: + print( + "%-10i %-20i %-20i %-20i %-20i %-20i" + % (i, stats[i]["nTerms"], stats[i]["nCachedTerms"], stats[i]["nSmallTerms"], stats[i]["nAllNewTerms"], stats[i]["nActualNewTerms"]) + ) + + print("\n%-10s %-20s %-20s %-20s" % ("Order", "nEvals", "nUniqueEvals", "nActualUniqueEvals")) + print("-" * 74) + for i in stats: + print("%-10i %-20i %-20i %-20i" % (i, stats[i]["nEvals"], stats[i]["nUniqueEvals"], stats[i]["nActualUniqueEvals"])) diff --git a/python/tool_base/Workspace.py b/python/tool_base/Workspace.py new file mode 100755 index 00000000000..5ec4596bca3 --- /dev/null +++ b/python/tool_base/Workspace.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +from __future__ import absolute_import +from __future__ import print_function +import ROOT + +import HiggsAnalysis.CombinedLimit.tool_base.utils as utils +from HiggsAnalysis.CombinedLimit.tool_base.opts import OPTS + +from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase + + +class PrintWorkspace(CombineToolBase): + description = "Load a Workspace and call Print()" + requires_root = True + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument("input", help="The input specified as FILE:WORKSPACE") + + def run_method(self): + ws_in = self.args.input.split(":") + f = ROOT.TFile(ws_in[0]) + ws = f.Get(ws_in[1]) + ws.Print() + + +class ModifyDataSet(CombineToolBase): + description = "Change the name of a dataset in an existing workspace" + requires_root = True + + def __init__(self): + CombineToolBase.__init__(self) + + def attach_args(self, group): + CombineToolBase.attach_args(self, group) + group.add_argument("input", help="The input specified as FILE:WORKSPACE:DATASET or FILE:WORKSPACE") + group.add_argument("output", help="The output specified as FILE:WORKSPACE:DATASET or FILE:WORKSPACE") + group.add_argument("-d", "--data", help="Source data from other file, either FILE:WORKSPACE:DATA or FILE:DATA") + + def run_method(self): + ws_in = self.args.input.split(":") + print(">> Input: " + str(ws_in)) + ws_out = self.args.output.split(":") + print(">> Output: " + str(ws_out)) + f = ROOT.TFile(ws_in[0]) + ws = f.Get(ws_in[1]) + if len(ws_in) == 3: + data = ws.data(ws_in[2]) + if len(ws_out) == 3: + data.SetName(ws_out[2]) + else: + ws_d = self.args.data.split(":") + print(">> Data: " + str(ws_d)) + f_d = ROOT.TFile(ws_d[0]) + if len(ws_d) == 2: + data = f_d.Get(ws_d[1]) + else: + data = f_d.Get(ws_d[1]).data(ws_d[2]) + if len(ws_out) == 3: + data.SetName(ws_out[2]) + getattr(ws, "import")(data, ROOT.RooCmdArg()) + ws.SetName(ws_out[1]) + ws.writeToFile(ws_out[0]) diff --git a/python/tool_base/crab.py b/python/tool_base/crab.py new file mode 100755 index 00000000000..b28887bb022 --- /dev/null +++ b/python/tool_base/crab.py @@ -0,0 +1,37 @@ +from __future__ import absolute_import +import os +from WMCore.Configuration import Configuration + + +config = Configuration() + +config.section_("General") +config.General.requestName = "" +# if (args.workArea != ''): +# config.General.workArea = args.workArea + +config.section_("JobType") +config.JobType.pluginName = "PrivateMC" +config.JobType.psetName = os.environ["CMSSW_BASE"] + "/src/HiggsAnalysis.CombinedLimit/scripts/do_nothing_cfg.py" +config.JobType.scriptExe = "" +config.JobType.inputFiles = [ + os.environ["CMSSW_BASE"] + "/src/HiggsAnalysis.CombinedLimit/scripts/FrameworkJobReport.xml", + os.environ["CMSSW_BASE"] + "/src/HiggsAnalysis.CombinedLimit/scripts/copyRemoteWorkspace.sh", + os.environ["CMSSW_BASE"] + "/bin/" + os.environ["SCRAM_ARCH"] + "/combine", +] +config.JobType.outputFiles = ["combine_output.tar"] +# config.JobType.maxMemoryMB = args.maxMemory + +config.section_("Data") +config.Data.outputPrimaryDataset = "Combine" +config.Data.splitting = "EventBased" +config.Data.unitsPerJob = 1 +config.Data.totalUnits = 1 +config.Data.publication = False +config.Data.outputDatasetTag = "" + +config.section_("User") + +config.section_("Site") +config.Site.blacklist = ["T3_IT_Bologna", "T3_US_UMiss"] +config.Site.storageSite = "T2_CH_CERN" diff --git a/python/tool_base/opts.py b/python/tool_base/opts.py new file mode 100644 index 00000000000..e5101147ee5 --- /dev/null +++ b/python/tool_base/opts.py @@ -0,0 +1,12 @@ +# a dictionary with some pre-defined (mostly minimizer) combine options + +OPTS = { + "vanilla": "--minimizerStrategy 0 --minimizerTolerance 0.1 --cminOldRobustMinimize 0", + "prefitAsimovSToy": "-M GenerateOnly --expectSignal 1 -t -1 --saveToys --saveWorkspace --noMCbonly 1", + "prefitAsimovBToy": "-M GenerateOnly --expectSignal 0 -t -1 --saveToys --saveWorkspace --noMCbonly 1", + "robust": '--robustFit 1 --minimizerTolerance 0.1 --minimizerAlgo Minuit2 --minimizerStrategy 0 --minimizerAlgoForMinos Minuit2 --minimizerStrategyForMinos 0 --cminPreScan --cminPreFit 1 --X-rtd FITTER_DYN_STEP --cminFallbackAlgo "Minuit2,0:0.1" --cminFallbackAlgo "Minuit2,Minimize,0:0.1" --cminOldRobustMinimize 0', + "robustL": '--robustFit 1 --minimizerTolerance 0.1 --minimizerAlgo Minuit2 --minimizerStrategy 0 --minimizerAlgoForMinos Minuit2 --minimizerStrategyForMinos 0 --cminPreScan --cminPreFit 1 --X-rtd FITTER_DYN_STEP --cminFallbackAlgo "Minuit2,0:0.1" --cminFallbackAlgo "Minuit2,Minimize,0:0.1" --cminOldRobustMinimize 0 --minimizerToleranceForMinos 0.001', + "robustLNoScan": '--robustFit 1 --minimizerTolerance 0.1 --minimizerAlgo Minuit2 --minimizerStrategy 0 --minimizerAlgoForMinos Minuit2 --minimizerStrategyForMinos 0 --cminPreFit 1 --X-rtd FITTER_DYN_STEP --cminFallbackAlgo "Minuit2,0:0.1" --cminFallbackAlgo "Minuit2,Minimize,0:0.1" --cminOldRobustMinimize 0 --minimizerToleranceForMinos 0.001', + "robustNew": '--robustFit 1 --minimizerTolerance 0.1 --minimizerAlgo Minuit2 --minimizerStrategy 0 --minimizerAlgoForMinos Minuit2 --minimizerStrategyForMinos 0 --cminPreScan --cminPreFit 1 --cminFallbackAlgo "Minuit2,0:0.1" --cminFallbackAlgo "Minuit2,Minimize,0:0.1" --cminOldRobustMinimize 0 --X-rtd FITTER_NEW_CROSSING_ALGO --X-rtd FITTER_NEVER_GIVE_UP --X-rtd FITTER_BOUND --minimizerToleranceForMinos 0.1', + "MLHesse": '--minimizerTolerance 0.1 --minimizerAlgo Minuit2 --minimizerStrategy 0 --cminFallbackAlgo "Minuit2,0:0.1" --cminFallbackAlgo "Minuit2,Minimize,0:0.1" --cminOldRobustMinimize 0 --out ./ --minos none --skipBOnlyFit --noMCbonly 1 --cminPreScan', +} diff --git a/python/tool_base/rounding.py b/python/tool_base/rounding.py new file mode 100644 index 00000000000..b4282570b06 --- /dev/null +++ b/python/tool_base/rounding.py @@ -0,0 +1,388 @@ +""" +Performs rounding of values with uncertainties and produces output that can be used in ROOT or LaTeX + +Written by andre.david@cern.ch +""" + +from __future__ import absolute_import +from __future__ import print_function +from math import * +from decimal import * +from six.moves import range + + +### +def roundUnc(unc, method="Publication"): + """By default, rounds uncertainty 'unc' according to the PDG rules plus one significant digit ("Publication"). + + Optionally it rounds according with 'method': + - "PDG" applies the PDG algorithm + - "Publication" is like "PDG" with an extra significant digit (for results that need to be combined later) + - "OneDigit" forces one single significant digit (useful when there are multiple uncertainties that vary by more than a factor 10 among themselves) + + Returns a tuple with (uncString, uncMagnitude), where magnitude is the power of 10 that applies to the string to recover the uncertainty. + + """ + + # PDG rules (from the Introduction, Section 5.3) + # + # Uncertainty leading digits in range: + # 100 to 354 -> keep 2 digits + # 355 to 949 -> keep 1 digit + # 950 to 999 -> keep 2 digits, rounding up to 1000 (e.g. 0.099 -> 0.10, not 0.1) + + uncDigs, uncMagnitude = getDigsMag(unc) + + prec = 1 + unc3Digs = int(round(100 * uncDigs)) + + if method == "SingleDigit": + pass + elif method == "PDG" or method == "Publication": + if method == "Publication": + prec += 1 + if 100 <= unc3Digs <= 354: + prec += 1 + else: + raise TypeError('Unknown precision method ("%s")' % method) + + uncStr = matchPrec(uncDigs, str(10 ** int(1 - prec))) + + # put String in integer form + uncString = str((Decimal(uncStr) * (10 ** int(prec - 1))).quantize(Decimal("1"))) + uncMagnitude -= prec - 1 + + return (uncString, uncMagnitude) + + +### +def PDGRoundUnc(unc): + """Rounds uncertainty unc according to the PDG rules.""" + + return roundUnc(unc, "PDG") + + +### +def matchPrec(val, refStr): + """Returns a string version of val matching refStr in terms of significant digits.""" + + valDec = Decimal(str(val)) + refDec = Decimal(refStr) + return str(valDec.quantize(refDec)) + + +### +def getDigsMag(val): + """Extracts the mantissa and exponent of val. + + Returns (valDigs, valMag) + + """ + try: + valMag = int(floor(log10(val))) + valDigs = val / pow(10, valMag) + except Exception: + print(val) + valDigs = 1 + valMag = 1 + + return (valDigs, valMag) + + +### +def PDGRoundSym(val, unc): + """Rounds a value with a single symmetric uncertainty according to the PDG rules and calculates the order of magnitude of both. + + Returns (valStr, [uncStr], uncMag) + + """ + + assert unc > 0 + uncStr, uncMag = PDGRoundUnc(unc) + valStr = matchPrec(val / pow(10, uncMag), uncStr) + return (valStr, [uncStr], uncMag) + + +### +def PDGRoundAsym(val, uncP, uncM): + """Rounds a value with a single asymmetric uncertainty according to the PDG rules and calculates the order of magnitude of both. + + Returns (valStr, [[uncPStr, uncMStr]], uncMag) + + """ + + assert uncP > 0 + assert uncM > 0 + + uncRef = min(uncP, uncM) + uncRefStr, uncRefMag = PDGRoundUnc(uncRef) + + uncPStr = matchPrec(uncP / pow(10, uncRefMag), uncRefStr) + uncMStr = matchPrec(uncM / pow(10, uncRefMag), uncRefStr) + valStr = matchPrec(val / pow(10, uncRefMag), uncRefStr) + + return (valStr, [[uncPStr, uncMStr]], uncRefMag) + + +### +def roundMultiple(vals, uncs, method="PDG"): + """Rounds value with multiple symmetric or asymmetric uncertainties, ignoring the PDG rule when the uncertainty values are too disparate. + + Uncertainties should be a tuple or list of the form + uncs = (symunc1,(asymP2,asymM2),sym3,etc) + + Returns (valStr, [symunc1,[asymP2,asymM2],sym3,etc], order of magnitude) + + """ + + uncList = list() + + if not isinstance(uncs, (list, tuple)): + uncs = [uncs] + + for unc in uncs: + try: + uncList.append(unc[0]) + uncList.append(unc[1]) + except Exception: + uncList.append(unc) + + uncMin = min(uncList) + uncMax = max(uncList) + + # If the discrepancy in the uncertainties is too big, downgrade the number of precision digits. + if uncMax > 10 * uncMin: + if method == "Publication": + method = "PDG" + elif method == "PDG": + method = "SingleDigit" + + uncRefStr, uncRefMag = roundUnc(uncMin, method) + + try: + valsStr = [matchPrec(val / pow(10, uncRefMag), uncRefStr) for val in vals] + print("foo") + except Exception: + valsStr = matchPrec(vals / pow(10, uncRefMag), uncRefStr) + + uncsStr = list() + + for unc in uncs: + if isinstance(unc, (list, tuple)): + elt = [matchPrec(x / pow(10, uncRefMag), uncRefStr) for x in unc] + else: + elt = matchPrec(unc / pow(10, uncRefMag), uncRefStr) + uncsStr.append(elt) + + # print valsStr, uncsStr, uncRefMag + + return (valsStr, uncsStr, uncRefMag) + + +### +def downgradePrec(valStr, valMag): + """Returns a string with valStr multiplied by the exponent valMag.""" + + # assert valMag<=0 + mag = 10 ** int(valMag) + return matchPrec(float(valStr) * mag, str(mag)) + + +### +def toROOTRounded(vals, uncs, uncLbls=None, units=None): + + valStr, uncsStr, mag = roundMultiple(vals, uncs) + return toROOTorLatex(valStr, uncsStr, mag, uncLbls, units, mode="ROOT") + + +### +def toLatexRounded(vals, uncs, uncLbls=None, units=None): + + valStr, uncsStr, mag = roundMultiple(vals, uncs) + return toROOTorLatex(valStr, uncsStr, mag, uncLbls, units, mode="Latex") + + +commonSIPrefixes = {12: "T", 9: "G", 6: "M", 3: "k", -3: "m", -6: r"\mu ", -9: "n", -12: "p", -15: "f"} + + +### +def toROOT(valStr, uncsStr, mag, uncLbls=None, units=None): + return toROOTorLatex(valStr, uncsStr, mag, uncLbls, units, mode="ROOT") + + +### +def toLatex(valStr, uncsStr, mag, uncLbls=None, units=None): + return toROOTorLatex(valStr, uncsStr, mag, uncLbls, units, mode="Latex") + + +### +def toROOTorLatex(valStr, uncsStr, mag, uncLbls=None, units=None, mode=None): + + # Used http://www.codecogs.com/latex/eqneditor.php to check results + + if uncLbls: + assert len(uncsStr) == len(uncLbls) + + salt = -1 if mag >= 0 else 0 + magTen = 3 * int((mag + salt) / 3 + 1) + # if magTen==-3: magTen=0 + magTgt = mag - magTen + + if mode == "Latex": + t = { + "sep": "\\", + "space": "\;", # noqa: W605 + "times": "\\times", + "left": "\\left", + "right": "\\right", + } + elif mode == "ROOT": + t = { + "sep": "#", + "space": "", + "times": "#times", + "left": "#left", + "right": "#right", + } + else: + raise TypeError('Unknown mode ("%s")' % mode) + + symUncStr = t["sep"] + "pm%s " + asymUncStr = "^{+%s}_{-%s} " + if units and magTen in list(commonSIPrefixes.keys()): + pwrStr = t["space"] + t["sep"] + "mathrm{" + commonSIPrefixes[magTen] + units + "} " + else: + pwrStr = t["times"] + "10^{%d} " % magTen + lblStr = t["sep"] + "mathrm{(%s)} " + + def transform(x): + return downgradePrec(x, magTgt) + + # Build the string + outStr = "" + + if mode == "Latex": + outStr += "$ " + + if magTen and not units: + outStr += "[ " # "t["left"]+"( " + + outStr += transform(valStr) + " " + + for i, unc in enumerate(uncsStr): + if isinstance(unc, (list, tuple)): + outStr += asymUncStr % (transform(unc[0]), transform(unc[1])) + else: + outStr += symUncStr % transform(unc) + if uncLbls: + outStr += lblStr % uncLbls[i] + + if magTen: + if not units: + outStr += "] " # "t["right"]+") " + outStr += pwrStr + + if mode == "Latex": + outStr += "$ " + + return outStr + + +import unittest + + +class PDGRoundingTests(unittest.TestCase): + + knownValues = ( + (0.119, ("12", -2)), + (0.367, ("4", -1)), + (9.99, ("10", 0)), + (35002, ("35", 3)), + (10.54, ("11", 0)), + (0.099, ("10", -2)), + ) + + def testPDGRoundUnc(self): + """Uncertainty roundings according to the PDG rules""" + for toround, rounded in self.knownValues: + result = PDGRoundUnc(toround) + self.assertEquals(result, rounded) + + +class RoundSymUncTests(unittest.TestCase): + + knownValues = ( + ((0.827, 0.119), ("83", ["12"], -2)), + ((0.827, 0.367), ("8", ["4"], -1)), + ((0.827, 0.99), ("8", ["10"], -1)), + ((100.32, 0.843), ("1003", ["8"], -1)), + ((10032.34332, 8.6234), ("10032", ["9"], 0)), + ((10583, 984), ("106", ["10"], 2)), + ((10.543e5, 73.42e4), ("11", ["7"], 5)), + ((1.030, 0.032), ("1030", ["32"], -3)), + ) + + def testSymmErrors(self): + """PDG rules: symmetric errors""" + for toround, rounded in self.knownValues: + result = PDGRoundSym(toround[0], toround[1]) + self.assertEquals(result, rounded) + + +class RoundAsymUncTests(unittest.TestCase): + + knownValues = ( + ((0.827, 0.119, 0.020), ("827", [["119", "20"]], -3)), + ((0.827, 0.260, 0.025), ("827", [["260", "25"]], -3)), + ) + + def testAsymmErrors(self): + """PDG rules: asymmetric errors""" + for toround, rounded in self.knownValues: + result = PDGRoundAsym(toround[0], toround[1], toround[2]) + self.assertEquals(result, rounded) + + +class RoundMultipleTests(unittest.TestCase): + + knownValues = ( + ((0.827, (0.119, (0.020, 0.04))), ("827", ["119", ["20", "40"]], -3)), + ((5.234, (0.035, 0.361)), ("523", ["4", "36"], -2)), + ((0.827, [[0.260, 0.025]]), ("83", [["26", "2"]], -2)), + ((0.827, 0.119), ("83", ["12"], -2)), + ((1.030, 0.032), ("1030", ["32"], -3)), + ) + + def testAsymmErrors(self): + """Human rules: multiple symmetric and/or asymmetric errors""" + for toround, rounded in self.knownValues: + result = roundMultiple(toround[0], toround[1]) + self.assertEquals(result, rounded) + + +if __name__ == "__main__": + + # run latex trials here + # + print() + for i in range(-6, 6 + 1): + print(toLatexRounded(5.234 * pow(10, i), (0.045 * pow(10, i), 0.361 * pow(10, i)), None, "W")) + print() + for i in range(-6, 6 + 1): + print(toLatexRounded(5.746 * pow(10, i), (0.023 * pow(10, i), 0.954 * pow(10, i)))) + print() + print(toLatexRounded(0.8274e-18, (0.1191e-18, (0.0202e-18, 0.0432e-18), 0.0582e-18), ("stat.", "syst.", "theo."))) + print(toLatexRounded(0.8274e-4, (0.1191e-4, (0.0202e-6, 0.0432e-4), 0.0582e-4), ("stat.", "syst.", "theo."), "b")) + print() + for i in range(-6, 6 + 1): + print(toLatexRounded(1.030 * pow(10, i), (0.032 * pow(10, i)))) + print() + for i in range(-6, 6 + 1): + print(toLatexRounded(0.549 * pow(10, i), (0.019 * pow(10, i), 0.063 * pow(10, i), 0.060 * pow(10, i)))) + print() + + print(toROOTRounded(2850e9, (2850e9 * 0.11))) + + # unit tests exit after running + runner = unittest.TextTestRunner(verbosity=2) + unittest.main(testRunner=runner) diff --git a/python/tool_base/utils.py b/python/tool_base/utils.py new file mode 100644 index 00000000000..5dad4de34fe --- /dev/null +++ b/python/tool_base/utils.py @@ -0,0 +1,192 @@ +from __future__ import absolute_import +from __future__ import print_function +import ROOT +import re + + +def split_vals(vals, fmt_spec=None): + """Converts a string '1:3|1,4,5' into a list [1, 2, 3, 4, 5]""" + res = set() + res_extra = list() + first = vals.split(",") + for f in first: + second = re.split("[:|]", f) + # print second + if len(second) == 1: + res.add(second[0]) + if len(second) == 2: + res_extra.extend([second[0]] * int(second[1])) + if len(second) == 3: + x1 = float(second[0]) + ndigs = "0" + split_step = second[2].split(".") + if len(split_step) == 2: + ndigs = len(split_step[1]) + fmt = "%." + str(ndigs) + "f" + if fmt_spec is not None: + fmt = fmt_spec + while x1 < float(second[1]) + 0.0001: + res.add(fmt % x1) + x1 += float(second[2]) + return sorted([x for x in res] + res_extra, key=lambda x: float(x)) + + +def list_from_workspace(file, workspace, set): + """Create a list of strings from a RooWorkspace set""" + res = [] + wsFile = ROOT.TFile(file) + ws = wsFile.Get(workspace) + argSet = ws.set(set) + it = argSet.createIterator() + var = it.Next() + while var: + res.append(var.GetName()) + var = it.Next() + return res + + +def prefit_from_workspace(file, workspace, params, setPars=None): + """Given a list of params, return a dictionary of [-1sig, nominal, +1sig]""" + res = {} + wsFile = ROOT.TFile(file) + ws = wsFile.Get(workspace) + ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.WARNING) + if setPars is not None: + parsToSet = [tuple(x.split("=")) for x in setPars.split(",")] + allParams = ws.allVars() + allParams.add(ws.allCats()) + for par, val in parsToSet: + tmp = allParams.find(par) + isrvar = tmp.IsA().InheritsFrom(ROOT.RooRealVar.Class()) + if isrvar: + print("Setting parameter %s to %g" % (par, float(val))) + tmp.setVal(float(val)) + else: + print("Setting index %s to %g" % (par, float(val))) + tmp.setIndex(int(val)) + + for p in params: + res[p] = {} + + var = ws.var(p) + pdf = ws.pdf(p + "_Pdf") + gobs = ws.var(p + "_In") + + # For pyROOT NULL test: "pdf != None" != "pdf is not None" + if pdf != None and gobs != None: + # To get the errors we can just fit the pdf + # But don't do pdf.fitTo(globalObs), it forces integration of the + # range of the global observable. Instead we make a RooConstraintSum + # which is what RooFit creates by default when we have external constraints + nll = ROOT.RooConstraintSum("NLL", "", ROOT.RooArgSet(pdf), ROOT.RooArgSet(var)) + minim = ROOT.RooMinimizer(nll) + minim.setEps(0.001) # Might as well get some better precision... + minim.setErrorLevel(0.5) # Unlike for a RooNLLVar we must set this explicitly + minim.setPrintLevel(-1) + minim.setVerbose(False) + # Run the fit then run minos for the error + minim.minimize("Minuit2", "migrad") + minim.minos(ROOT.RooArgSet(var)) + # Should really have checked that these converged ok... + # var.Print() + # pdf.Print() + val = var.getVal() + errlo = -1 * var.getErrorLo() + errhi = +1 * var.getErrorHi() + res[p]["prefit"] = [val - errlo, val, val + errhi] + if pdf.IsA().InheritsFrom(ROOT.RooGaussian.Class()): + res[p]["type"] = "Gaussian" + elif pdf.IsA().InheritsFrom(ROOT.RooPoisson.Class()): + res[p]["type"] = "Poisson" + elif pdf.IsA().InheritsFrom(ROOT.RooBifurGauss.Class()): + res[p]["type"] = "AsymmetricGaussian" + else: + res[p]["type"] = "Unrecognised" + elif pdf == None or pdf.IsA().InheritsFrom(ROOT.RooUniform.Class()): + res[p]["type"] = "Unconstrained" + res[p]["prefit"] = [var.getVal(), var.getVal(), var.getVal()] + res[p]["groups"] = [x.replace("group_", "") for x in var.attributes() if x.startswith("group_")] + return res + + +def get_singles_results(file, scanned, columns): + """Extracts the output from the MultiDimFit singles mode + Note: relies on the list of parameters that were run (scanned) being correct""" + res = {} + f = ROOT.TFile(file) + if f is None or f.IsZombie(): + return None + t = f.Get("limit") + for i, param in enumerate(scanned): + res[param] = {} + for col in columns: + allvals = [getattr(evt, col) for evt in t] + if len(allvals) < (1 + len(scanned) * 2): + print("File %s did not contain a sufficient number of entries, skipping" % file) + return None + res[param][col] = [allvals[i * 2 + 1], allvals[0], allvals[i * 2 + 2]] + return res + + +def get_roofitresult(rfr, params, others): + res = {} + if rfr.covQual() != 3: + print("Error: the covariance matrix in the RooFitResult is not accurate and cannot be used") + return None + for i, param in enumerate(params): + res[param] = {} + for j, other in enumerate(others): + pj = rfr.floatParsFinal().find(other) + vj = pj.getVal() + ej = pj.getError() + c = rfr.correlation(param, other) + res[param][other] = [vj - ej * c, vj, vj + ej * c] + return res + + +def get_robusthesse(floatParams, corr, params, others): + res = {} + for i, param in enumerate(params): + res[param] = {} + idx_p = corr.GetXaxis().FindBin(param) + for j, other in enumerate(others): + pj = floatParams.find(other) + vj = pj.getVal() + ej = pj.getError() + idx = corr.GetXaxis().FindBin(other) + c = corr.GetBinContent(idx_p, idx) + res[param][other] = [vj - ej * c, vj, vj + ej * c] + return res + + +def get_none_results(file, params): + """Extracts the output from the MultiDimFit none (just fit) mode""" + res = {} + f = ROOT.TFile(file) + if f is None or f.IsZombie(): + return None + t = f.Get("limit") + t.GetEntry(0) + for param in params: + res[param] = getattr(t, param) + return res + + +def get_fixed_results(file, params): + """Extracts the output from the MultiDimFit fixed mode""" + res = {} + f = ROOT.TFile(file) + if f is None or f.IsZombie(): + return None + t = f.Get("limit") + t.GetEntry(0) + res["bestfit"] = {} + res["fixedpoint"] = {} + for param in params: + res["bestfit"][param] = getattr(t, param) + t.GetEntry(1) + for param in params: + res["fixedpoint"][param] = getattr(t, param) + res["deltaNLL"] = getattr(t, "deltaNLL") + res["pvalue"] = getattr(t, "quantileExpected") + return res diff --git a/python/util/plotting.py b/python/util/plotting.py new file mode 100644 index 00000000000..3705c9a8a78 --- /dev/null +++ b/python/util/plotting.py @@ -0,0 +1,1889 @@ +from __future__ import absolute_import +from __future__ import print_function +import ROOT as R +import math +from array import array +import re +import json +import types +import six +import ctypes +from six.moves import range + +COL_STORE = [] + +## @name Global Style +## +## @details Set the properties of the global gStyle object and create colours +## and colour palettes +##@{ + + +def SetTDRStyle(): + """Sets the PubComm recommended style + + Just a copy of + @sa ModTDRStyle() to use this style with some additional customisation. + """ + # For the canvas: + R.gStyle.SetCanvasBorderMode(0) + R.gStyle.SetCanvasColor(R.kWhite) + R.gStyle.SetCanvasDefH(600) # Height of canvas + R.gStyle.SetCanvasDefW(600) # Width of canvas + R.gStyle.SetCanvasDefX(0) # POsition on screen + R.gStyle.SetCanvasDefY(0) + + # For the Pad: + R.gStyle.SetPadBorderMode(0) + # R.gStyle.SetPadBorderSize(Width_t size = 1) + R.gStyle.SetPadColor(R.kWhite) + R.gStyle.SetPadGridX(False) + R.gStyle.SetPadGridY(False) + R.gStyle.SetGridColor(0) + R.gStyle.SetGridStyle(3) + R.gStyle.SetGridWidth(1) + + # For the frame: + R.gStyle.SetFrameBorderMode(0) + R.gStyle.SetFrameBorderSize(1) + R.gStyle.SetFrameFillColor(0) + R.gStyle.SetFrameFillStyle(0) + R.gStyle.SetFrameLineColor(1) + R.gStyle.SetFrameLineStyle(1) + R.gStyle.SetFrameLineWidth(1) + + # For the histo: + # R.gStyle.SetHistFillColor(1) + # R.gStyle.SetHistFillStyle(0) + R.gStyle.SetHistLineColor(1) + R.gStyle.SetHistLineStyle(0) + R.gStyle.SetHistLineWidth(1) + # R.gStyle.SetLegoInnerR(Float_t rad = 0.5) + # R.gStyle.SetNumberContours(Int_t number = 20) + + R.gStyle.SetEndErrorSize(2) + # R.gStyle.SetErrorMarker(20) + # R.gStyle.SetErrorX(0.) + + R.gStyle.SetMarkerStyle(20) + + # For the fit/function: + R.gStyle.SetOptFit(1) + R.gStyle.SetFitFormat("5.4g") + R.gStyle.SetFuncColor(2) + R.gStyle.SetFuncStyle(1) + R.gStyle.SetFuncWidth(1) + + # For the date: + R.gStyle.SetOptDate(0) + # R.gStyle.SetDateX(Float_t x = 0.01) + # R.gStyle.SetDateY(Float_t y = 0.01) + + # For the statistics box: + R.gStyle.SetOptFile(0) + R.gStyle.SetOptStat(0) + # To display the mean and RMS: SetOptStat('mr') + R.gStyle.SetStatColor(R.kWhite) + R.gStyle.SetStatFont(42) + R.gStyle.SetStatFontSize(0.025) + R.gStyle.SetStatTextColor(1) + R.gStyle.SetStatFormat("6.4g") + R.gStyle.SetStatBorderSize(1) + R.gStyle.SetStatH(0.1) + R.gStyle.SetStatW(0.15) + # R.gStyle.SetStatStyle(Style_t style = 1001) + # R.gStyle.SetStatX(Float_t x = 0) + # R.gStyle.SetStatY(Float_t y = 0) + + # Margins: + R.gStyle.SetPadTopMargin(0.05) + R.gStyle.SetPadBottomMargin(0.13) + R.gStyle.SetPadLeftMargin(0.16) + R.gStyle.SetPadRightMargin(0.02) + + # For the Global title: + R.gStyle.SetOptTitle(0) + R.gStyle.SetTitleFont(42) + R.gStyle.SetTitleColor(1) + R.gStyle.SetTitleTextColor(1) + R.gStyle.SetTitleFillColor(10) + R.gStyle.SetTitleFontSize(0.05) + # R.gStyle.SetTitleH(0); # Set the height of the title box + # R.gStyle.SetTitleW(0); # Set the width of the title box + # R.gStyle.SetTitleX(0); # Set the position of the title box + # R.gStyle.SetTitleY(0.985); # Set the position of the title box + # R.gStyle.SetTitleStyle(Style_t style = 1001) + # R.gStyle.SetTitleBorderSize(2) + + # For the axis titles: + R.gStyle.SetTitleColor(1, "XYZ") + R.gStyle.SetTitleFont(42, "XYZ") + R.gStyle.SetTitleSize(0.06, "XYZ") + # Another way to set the size? + # R.gStyle.SetTitleXSize(Float_t size = 0.02) + # R.gStyle.SetTitleYSize(Float_t size = 0.02) + R.gStyle.SetTitleXOffset(0.9) + R.gStyle.SetTitleYOffset(1.25) + # R.gStyle.SetTitleOffset(1.1, 'Y'); # Another way to set the Offset + + # For the axis labels: + + R.gStyle.SetLabelColor(1, "XYZ") + R.gStyle.SetLabelFont(42, "XYZ") + R.gStyle.SetLabelOffset(0.007, "XYZ") + R.gStyle.SetLabelSize(0.05, "XYZ") + + # For the axis: + + R.gStyle.SetAxisColor(1, "XYZ") + R.gStyle.SetStripDecimals(True) + R.gStyle.SetTickLength(0.03, "XYZ") + R.gStyle.SetNdivisions(510, "XYZ") + R.gStyle.SetPadTickX(1) + R.gStyle.SetPadTickY(1) + + # Change for log plots: + R.gStyle.SetOptLogx(0) + R.gStyle.SetOptLogy(0) + R.gStyle.SetOptLogz(0) + + # Postscript options: + R.gStyle.SetPaperSize(20.0, 20.0) + # R.gStyle.SetLineScalePS(Float_t scale = 3) + # R.gStyle.SetLineStyleString(Int_t i, const char* text) + # R.gStyle.SetHeaderPS(const char* header) + # R.gStyle.SetTitlePS(const char* pstitle) + + # R.gStyle.SetBarOffset(Float_t baroff = 0.5) + # R.gStyle.SetBarWidth(Float_t barwidth = 0.5) + # R.gStyle.SetPaintTextFormat(const char* format = 'g') + # R.gStyle.SetPalette(Int_t ncolors = 0, Int_t* colors = 0) + # R.gStyle.SetTimeOffset(Double_t toffset) + # R.gStyle.SetHistMinimumZero(kTRUE) + + R.gStyle.SetHatchesLineWidth(5) + R.gStyle.SetHatchesSpacing(0.05) + + +def ModTDRStyle(width=600, height=600, t=0.06, b=0.12, l=0.16, r=0.04): + """Modified version of the tdrStyle + + Args: + width (int): Canvas width in pixels + height (int): Canvas height in pixels + t (float): Pad top margin [0-1] + b (float): Pad bottom margin [0-1] + l (float): Pad left margin [0-1] + r (float): Pad right margin [0-1] + """ + SetTDRStyle() + + # Set the default canvas width and height in pixels + R.gStyle.SetCanvasDefW(width) + R.gStyle.SetCanvasDefH(height) + + # Set the default margins. These are given as fractions of the pad height + # for `Top` and `Bottom` and the pad width for `Left` and `Right`. But we + # want to specify all of these as fractions of the shortest length. + def_w = float(R.gStyle.GetCanvasDefW()) + def_h = float(R.gStyle.GetCanvasDefH()) + + scale_h = (def_w / def_h) if (def_h > def_w) else 1.0 + scale_w = (def_h / def_w) if (def_w > def_h) else 1.0 + + def_min = def_h if (def_h < def_w) else def_w + + R.gStyle.SetPadTopMargin(t * scale_h) + # default 0.05 + R.gStyle.SetPadBottomMargin(b * scale_h) + # default 0.13 + R.gStyle.SetPadLeftMargin(l * scale_w) + # default 0.16 + R.gStyle.SetPadRightMargin(r * scale_w) + # default 0.02 + # But note the new CMS style sets these: + # 0.08, 0.12, 0.12, 0.04 + + # Set number of axis tick divisions + R.gStyle.SetNdivisions(506, "XYZ") # default 510 + + # Some marker properties not set in the default tdr style + R.gStyle.SetMarkerColor(R.kBlack) + R.gStyle.SetMarkerSize(1.0) + + R.gStyle.SetLabelOffset(0.007, "YZ") + # This is an adhoc adjustment to scale the x-axis label + # offset when we stretch plot vertically + # Will also need to increase if first x-axis label has more than one digit + R.gStyle.SetLabelOffset(0.005 * (3.0 - 2.0 / scale_h), "X") + + # In this next part we do a slightly involved calculation to set the axis + # title offsets, depending on the values of the TPad dimensions and + # margins. This is to try and ensure that regardless of how these pad + # values are set, the axis titles will be located towards the edges of the + # canvas and not get pushed off the edge - which can often happen if a + # fixed value is used. + title_size = 0.05 + title_px = title_size * def_min + label_size = 0.04 + R.gStyle.SetTitleSize(title_size, "XYZ") + R.gStyle.SetLabelSize(label_size, "XYZ") + + R.gStyle.SetTitleXOffset(0.5 * scale_h * (1.2 * (def_h * b * scale_h - 0.6 * title_px)) / title_px) + R.gStyle.SetTitleYOffset(0.5 * scale_w * (1.2 * (def_w * l * scale_w - 0.6 * title_px)) / title_px) + + # Only draw ticks where we have an axis + R.gStyle.SetPadTickX(0) + R.gStyle.SetPadTickY(0) + R.gStyle.SetTickLength(0.02, "XYZ") + + R.gStyle.SetLegendBorderSize(0) + R.gStyle.SetLegendFont(42) + R.gStyle.SetLegendFillColor(0) + R.gStyle.SetFillColor(0) + + R.gROOT.ForceStyle() + + +def SetBirdPalette(): + nRGBs = 9 + stops = array("d", [0.0000, 0.1250, 0.2500, 0.3750, 0.5000, 0.6250, 0.7500, 0.8750, 1.0000]) + red = array("d", [0.2082, 0.0592, 0.0780, 0.0232, 0.1802, 0.5301, 0.8186, 0.9956, 0.9764]) + green = array("d", [0.1664, 0.3599, 0.5041, 0.6419, 0.7178, 0.7492, 0.7328, 0.7862, 0.9832]) + blue = array("d", [0.5293, 0.8684, 0.8385, 0.7914, 0.6425, 0.4662, 0.3499, 0.1968, 0.0539]) + R.TColor.CreateGradientColorTable(nRGBs, stops, red, green, blue, 255, 1) + + +def SetDeepSeaPalette(): + nRGBs = 9 + stops = array("d", [0.0000, 0.1250, 0.2500, 0.3750, 0.5000, 0.6250, 0.7500, 0.8750, 1.0000]) + red = array("d", reversed([0.0 / 255.0, 9.0 / 255.0, 13.0 / 255.0, 17.0 / 255.0, 24.0 / 255.0, 32.0 / 255.0, 27.0 / 255.0, 25.0 / 255.0, 29.0 / 255.0])) + green = array("d", reversed([0.0 / 255.0, 0.0 / 255.0, 0.0 / 255.0, 2.0 / 255.0, 37.0 / 255.0, 74.0 / 255.0, 113.0 / 255.0, 160.0 / 255.0, 221.0 / 255.0])) + blue = array( + "d", reversed([28.0 / 255.0, 42.0 / 255.0, 59.0 / 255.0, 78.0 / 255.0, 98.0 / 255.0, 129.0 / 255.0, 154.0 / 255.0, 184.0 / 255.0, 221.0 / 255.0]) + ) + R.TColor.CreateGradientColorTable(nRGBs, stops, red, green, blue, 255, 1) + + +def SetCorrMatrixPalette(): + R.TColor.CreateGradientColorTable( + 3, array("d", [0.00, 0.50, 1.00]), array("d", [1.00, 1.00, 0.00]), array("d", [0.70, 1.00, 0.34]), array("d", [0.00, 1.00, 0.82]), 255, 1.0 + ) + + +def CreateTransparentColor(color, alpha): + adapt = R.gROOT.GetColor(color) + new_idx = R.gROOT.GetListOfColors().GetLast() + 1 + trans = R.TColor(new_idx, adapt.GetRed(), adapt.GetGreen(), adapt.GetBlue(), "", alpha) + COL_STORE.append(trans) + trans.SetName("userColor%i" % new_idx) + return new_idx + + +def Set(obj, **kwargs): + for key, value in six.iteritems(kwargs): + if value is None: + getattr(obj, "Set" + key)() + elif isinstance(value, (list, tuple)): + getattr(obj, "Set" + key)(*value) + else: + getattr(obj, "Set" + key)(value) + + +##@} + + +## @name TPad Layout +## +## @details Create TPads for pre-defined layouts, for example adding a smaller +## lower TPad to contain ratios of objects in the main pad. +##@{ + + +def OnePad(): + pad = R.TPad("pad", "pad", 0.0, 0.0, 1.0, 1.0) + pad.Draw() + pad.cd() + result = [pad] + return result + + +def TwoPadSplit(split_point, gap_low, gap_high): + upper = R.TPad("upper", "upper", 0.0, 0.0, 1.0, 1.0) + upper.SetBottomMargin(split_point + gap_high) + upper.SetFillStyle(4000) + upper.Draw() + lower = R.TPad("lower", "lower", 0.0, 0.0, 1.0, 1.0) + lower.SetTopMargin(1 - split_point + gap_low) + lower.SetFillStyle(4000) + lower.Draw() + upper.cd() + result = [upper, lower] + return result + + +def ThreePadSplit(upper_split_point, split_point, gap_low, gap_high): + upper2 = R.TPad("upper2", "upper2", 0.0, 0.0, 1.0, 1.0) + upper2.SetTopMargin(1 - upper_split_point) + upper2.SetBottomMargin(split_point + gap_high) + upper2.SetFillStyle(4000) + upper2.Draw() + upper1 = R.TPad("upper1", "upper1", 0.0, 0.0, 1.0, 1.0) + upper1.SetBottomMargin(upper_split_point) + upper1.SetFillStyle(4000) + upper1.Draw() + lower = R.TPad("lower", "lower", 0.0, 0.0, 1.0, 1.0) + lower.SetTopMargin(1 - split_point + gap_low) + lower.SetFillStyle(4000) + lower.Draw() + upper1.cd() + result = [upper1, lower, upper2] + return result + + +def MultiRatioSplit(split_points, gaps_low, gaps_high): + """Create a set of TPads split vertically on the TCanvas + + This is a generalisation of the two pad main/ratio split but for the case + of multiple ratio pads. + + Args: + + split_points (list[float]): Height of each ratio pad as a fraction of the + canvas height. Pads will be created from the bottom of the frame + upwards. The final, main pad will occupy however much space remains, + therefore the size of this list should be [number of pads] - 1. + gaps_low (list[float]): Gaps between ratio pad frames created on the + lower pad side at each boundary. Give a list of zeroes for no gap + between pad frames. Should be the same length as `split_points`.1 + gaps_high (list[float]): Gaps between ratio pad frames created on the + upper pad side at each boundary. Give a list of zeroes for no gap + between pad frames. + + Returns: + list[TPad]: List of TPads, indexed from top to bottom on the canvas. + """ + pads = [] + for i in range(len(split_points) + 1): + pad = R.TPad("pad%i" % i, "", 0.0, 0.0, 1.0, 1.0) + if i > 0: + pad.SetBottomMargin(sum(split_points[0:i]) + gaps_high[i - 1]) + if i < len(split_points): + pad.SetTopMargin(1.0 - sum(split_points[0 : i + 1]) + gaps_low[i]) + pad.SetFillStyle(4000) + pad.Draw() + pads.append(pad) + pads.reverse() + return pads + + +def TwoPadSplitColumns(split_point, gap_left, gap_right): + left = R.TPad("left", "left", 0.0, 0.0, 1.0, 1.0) + left.SetRightMargin(1 - split_point + gap_right) + left.SetFillStyle(4000) + left.Draw() + right = R.TPad("right", "right", 0.0, 0.0, 1.0, 1.0) + right.SetLeftMargin(split_point + gap_left) + right.SetFillStyle(4000) + right.Draw() + left.cd() + result = [left, right] + return result + + +def MultiRatioSplitColumns(split_points, gaps_left, gaps_right): + pads = [] + for i in range(len(split_points) + 1): + pad = R.TPad("pad%i" % i, "", 0.0, 0.0, 1.0, 1.0) + if i > 0: + pad.SetLeftMargin(sum(split_points[0:i]) + gaps_left[i - 1]) + if i < len(split_points): + pad.SetRightMargin(1.0 - sum(split_points[0 : i + 1]) + gaps_right[i]) + pad.SetFillStyle(4000) + pad.Draw() + pads.append(pad) + pads[0].cd() + # pads.reverse() + return pads + + +def SetupTwoPadSplitAsRatio(pads, upper, lower, y_title, y_centered, y_min, y_max): + if lower.GetXaxis().GetTitle() == "": + lower.GetXaxis().SetTitle(upper.GetXaxis().GetTitle()) + upper.GetXaxis().SetTitle("") + upper.GetXaxis().SetLabelSize(0) + upper_h = 1.0 - pads[0].GetTopMargin() - pads[0].GetBottomMargin() + lower_h = 1.0 - pads[1].GetTopMargin() - pads[1].GetBottomMargin() + lower.GetYaxis().SetTickLength(R.gStyle.GetTickLength() * upper_h / lower_h) + pads[1].SetTickx(1) + pads[1].SetTicky(1) + lower.GetYaxis().SetTitle(y_title) + lower.GetYaxis().CenterTitle(y_centered) + if y_max > y_min: + lower.SetMinimum(y_min) + lower.SetMaximum(y_max) + + +def StandardAxes(xaxis, yaxis, var, units, fmt=".1f"): + width = xaxis.GetBinWidth(1) + w_label = ("%" + fmt) % width + if units == "": + xaxis.SetTitle(var) + yaxis.SetTitle("Events / " + w_label) + else: + xaxis.SetTitle(var + " (" + units + ")") + yaxis.SetTitle("Events / " + w_label + " " + units) + + +##@} + + +## @name Axis histograms +# +# @details By default the first TH1 or TGraph drawn on a pad takes control of +# the x- and y-axis settings. A better way is to create dedicated "axis" +# TH1s that can be drawn first, one per pad, which will then control all +# the axis properties. +##@{ + + +def CreateAxisHist(src, at_limits=True): + backup = R.gPad + tmp = R.TCanvas() + tmp.cd() + src.Draw("AP") + result = src.GetHistogram().Clone("tmp") + if at_limits: + min = 0.0 + max = 0.0 + x = ctypes.c_double(0.0) + y = ctypes.c_double(0.0) + src.GetPoint(0, x, y) + min = float(x.value) + max = float(x.value) + for i in range(1, src.GetN()): + src.GetPoint(i, x, y) + if x < min: + min = float(x.value) + if x > max: + max = float(x.value) + result.GetXaxis().SetLimits(min, max) + R.gPad = backup + return result + + +def CreateAxisHists(n, src, at_limits): + res = [] + h = CreateAxisHist(src, at_limits) + for i in range(n): + res.append(h.Clone("tmp%i" % i)) + return res + + +def GetAxisHist(pad): + pad_obs = pad.GetListOfPrimitives() + if pad_obs is None: + return None + obj = None + for obj in pad_obs: + if obj.InheritsFrom(R.TH1.Class()): + return obj + if obj.InheritsFrom(R.TMultiGraph.Class()): + return obj.GetHistogram() + if obj.InheritsFrom(R.TGraph.Class()): + return obj.GetHistogram() + if obj.InheritsFrom(R.THStack.Class()): + return obj.GetHistogram() + return None + + +##@} + + +## @name TFile functions +# +# @details A collection of functions for working with TFiles. +##@{ + + +def TFileIsGood(filename): + """Performs a series of tests on a TFile to ensure that it can be opened + without errors + + Args: + filename: `str` The name of the TFile to check + + Returns: + `bool` True if the file can opened, is not a zombie, and if ROOT did + not need to try and recover the contents + """ + fin = R.TFile(filename) + if not fin: + return False + if fin and not fin.IsOpen(): + return False + elif fin and fin.IsOpen() and fin.IsZombie(): + fin.Close() + return False + elif fin and fin.IsOpen() and fin.TestBit(R.TFile.kRecovered): + fin.Close() + # don't consider a recovered file to be OK + return False + else: + fin.Close() + return True + + +def MakeTChain(files, tree): + chain = R.TChain(tree) + for f in files: + chain.Add(f) + return chain + + +def Get(file, obj): + R.TH1.AddDirectory(False) + f_in = R.TFile(file) + res = R.gDirectory.Get(obj) + f_in.Close() + return res + + +def ParamFromFilename(filename, param): + if len(re.findall(param + r"\.\d+\.\d+", filename)): + num1 = re.findall(param + r"\.\d+\.\d+", filename)[0].replace(param + ".", "") + return float(num1) + elif len(re.findall(param + r"\.\d+", filename)): + num1 = re.findall(param + r"\.\d+", filename)[0].replace(param + ".", "") + return int(num1) + else: + print("Error: parameter " + param + " not found in filename") + + +##@} + + +## @name Object creation +# +# @details These functions take existing objects (TH1s, TGraphs, TTrees, etc) +# and build new objects +##@{ + + +def TGraphFromTree(tree, xvar, yvar, selection): + tree.Draw(xvar + ":" + yvar, selection, "goff") + gr = R.TGraph(tree.GetSelectedRows(), tree.GetV1(), tree.GetV2()) + return gr + + +def TGraph2DFromTree(tree, xvar, yvar, zvar, selection): + tree.Draw(xvar + ":" + yvar + ":" + zvar, selection, "goff") + gr = R.TGraph2D(tree.GetSelectedRows(), tree.GetV1(), tree.GetV2(), tree.GetV3()) + return gr + + +def RocCurveFrom1DHists(h_x, h_y, cut_is_greater_than): + backup = R.TH1.AddDirectoryStatus() + R.TH1.AddDirectory(False) + x_den = h_x.Clone() + x_num = h_x.Clone() + x_err = ctypes.c_double(0.0) + x_int = h_x.IntegralAndError(0, h_x.GetNbinsX() + 1, x_err) + for i in range(1, h_x.GetNbinsX() + 1): + x_part_err = ctypes.c_double(0.0) + x_part_int = h_x.IntegralAndError(i, h_x.GetNbinsX() + 1, x_part_err) if cut_is_greater_than else h_x.IntegralAndError(0, i, x_part_err) + x_den.SetBinContent(i, x_int) + x_den.SetBinError(i, x_err) + x_num.SetBinContent(i, x_part_int) + x_num.SetBinError(i, x_part_err) + y_den = h_y.Clone() + y_num = h_y.Clone() + y_err = ctypes.c_double(0.0) + y_int = h_y.IntegralAndError(0, h_y.GetNbinsX() + 1, y_err) + for i in range(1, h_y.GetNbinsX() + 1): + y_part_err = ctypes.c_double(0.0) + y_part_int = h_y.IntegralAndError(i, h_y.GetNbinsX() + 1, y_part_err) if cut_is_greater_than else h_y.IntegralAndError(0, i, y_part_err) + y_den.SetBinContent(i, y_int) + y_den.SetBinError(i, y_err) + y_num.SetBinContent(i, y_part_int) + y_num.SetBinError(i, y_part_err) + # x_den.Print('all') + # x_num.Print('all') + # y_den.Print('all') + # y_num.Print('all') + x_gr = R.TGraphAsymmErrors(x_num, x_den) + y_gr = R.TGraphAsymmErrors(y_num, y_den) + + res = y_gr.Clone() + for i in range(0, res.GetN()): + res.GetX()[i] = x_gr.GetY()[i] + res.GetEXlow()[i] = x_gr.GetEYlow()[i] + res.GetEXhigh()[i] = x_gr.GetEYhigh()[i] + res.Sort() + R.TH1.AddDirectory(backup) + return res + + +def TH2FromTGraph2D(graph, method="BinEdgeAligned", force_x_width=None, force_y_width=None): + """Build an empty TH2 from the set of points in a TGraph2D + + There is no unique way to define a TH2 binning given an arbitrary + TGraph2D, therefore this function supports multiple named methods: + + - `BinEdgeAligned` simply takes the sets of x- and y- values in the + TGraph2D and uses these as the bin edge arrays in the TH2. The + implication of this is that when filling the bin contents interpolation + will be required when evaluating the TGraph2D at the bin centres. + - `BinCenterAligned` will try to have the TGraph2D points at the bin + centers, but this will only work completely correctly when the input + point spacing is regular. The algorithm first identifies the bin width + as the smallest interval between points on each axis. The start + position of the TH2 axis is then defined as the lowest value in the + TGraph2D minus half this width, and the axis continues with regular + bins until the graph maximum is passed. + + Args: + graph (TGraph2D): Should have at least two unique x and y values, + otherwise we can't define any bins + method (str): The binning algorithm to use + force_x_width (bool): Override the derived x-axis bin width in the + CenterAligned method + force_y_width (bool): Override the derived y-axis bin width in the + CenterAligned method + + Raises: + RuntimeError: If the method name is not recognised + + Returns: + TH2F: The exact binning of the TH2F depends on the chosen method + """ + x_vals = set() + y_vals = set() + + for i in range(graph.GetN()): + x_vals.add(graph.GetX()[i]) + y_vals.add(graph.GetY()[i]) + + x_vals = sorted(x_vals) + y_vals = sorted(y_vals) + if method == "BinEdgeAligned": + h_proto = R.TH2F("prototype", "", len(x_vals) - 1, array("d", x_vals), len(y_vals) - 1, array("d", y_vals)) + elif method == "BinCenterAligned": + x_widths = [] + y_widths = [] + for i in range(1, len(x_vals)): + x_widths.append(x_vals[i] - x_vals[i - 1]) + for i in range(1, len(y_vals)): + y_widths.append(y_vals[i] - y_vals[i - 1]) + x_min = min(x_widths) if force_x_width is None else force_x_width + y_min = min(y_widths) if force_y_width is None else force_y_width + x_bins = int(((x_vals[-1] - (x_vals[0] - 0.5 * x_min)) / x_min) + 0.5) + y_bins = int(((y_vals[-1] - (y_vals[0] - 0.5 * y_min)) / y_min) + 0.5) + print("[TH2FromTGraph2D] x-axis binning: (%i, %g, %g)" % (x_bins, x_vals[0] - 0.5 * x_min, x_vals[0] - 0.5 * x_min + x_bins * x_min)) + print("[TH2FromTGraph2D] y-axis binning: (%i, %g, %g)" % (y_bins, y_vals[0] - 0.5 * y_min, y_vals[0] - 0.5 * y_min + y_bins * y_min)) + # Use a number slightly smaller than 0.49999 because the TGraph2D interpolation + # is fussy about evaluating on the boundary + h_proto = R.TH2F( + "prototype", + "", + x_bins, + x_vals[0] - 0.49999 * x_min, + x_vals[0] - 0.50001 * x_min + x_bins * x_min, + y_bins, + y_vals[0] - 0.49999 * y_min, + y_vals[0] - 0.50001 * y_min + y_bins * y_min, + ) + else: + raise RuntimeError("[TH2FromTGraph2D] Method %s not supported" % method) + h_proto.SetDirectory(0) + return h_proto + + +def MakeErrorBand(LowerGraph, UpperGraph): + errorBand = R.TGraphAsymmErrors() + lower_list = [] + upper_list = [] + for i in range(LowerGraph.GetN()): + lower_list.append((float(LowerGraph.GetX()[i]), float(LowerGraph.GetY()[i]))) + upper_list.append((float(UpperGraph.GetX()[i]), float(UpperGraph.GetY()[i]))) + lower_list = sorted(set(lower_list)) + upper_list = sorted(set(upper_list)) + for i in range(LowerGraph.GetN()): + errorBand.SetPoint(i, lower_list[i][0], lower_list[i][1]) + errorBand.SetPointEYlow(i, lower_list[i][1] - lower_list[i][1]) + errorBand.SetPointEYhigh(i, upper_list[i][1] - lower_list[i][1]) + return errorBand + + +def LimitTGraphFromJSON(js, label): + xvals = [] + yvals = [] + for key in js: + xvals.append(float(key)) + yvals.append(js[key][label]) + graph = R.TGraph(len(xvals), array("d", xvals), array("d", yvals)) + graph.Sort() + return graph + + +def LimitTGraphFromJSONFile(jsfile, label): + with open(jsfile) as jsonfile: + js = json.load(jsonfile) + return LimitTGraphFromJSON(js, label) + + +def ToyTGraphFromJSON(js, label): + xvals = [] + yvals = [] + if isinstance(label, (str,)): + for entry in js[label]: + xvals.append(float(entry)) + yvals.append(1.0) + else: + if len(label) == 1: + return ToyTGraphFromJSON(js, label[0]) + else: + return ToyTGraphFromJSON(js[label[0]], label[1:]) + graph = R.TGraph(len(xvals), array("d", xvals), array("d", yvals)) + graph.Sort() + return graph + # hist = R.TH1F("toy", "toy", 100, min(xvals), max(xvals)) + # for xval in xvals: + # hist.AddBinContent(hist.GetXaxis().FindBin(xval)) + # return hist + + +def ToyTGraphFromJSONFile(jsfile, label): + with open(jsfile) as jsonfile: + js = json.load(jsonfile) + return ToyTGraphFromJSON(js, label) + + +def LimitBandTGraphFromJSON(js, central, lo, hi): + xvals = [] + yvals = [] + yvals_lo = [] + yvals_hi = [] + for key in js: + xvals.append(float(key)) + yvals.append(js[key][central]) + yvals_lo.append(js[key][central] - js[key][lo]) + yvals_hi.append(js[key][hi] - js[key][central]) + graph = R.TGraphAsymmErrors(len(xvals), array("d", xvals), array("d", yvals), array("d", [0]), array("d", [0]), array("d", yvals_lo), array("d", yvals_hi)) + graph.Sort() + return graph + + +def StandardLimitsFromJSONFile(json_file, draw=["obs", "exp0", "exp1", "exp2"]): + graphs = {} + data = {} + with open(json_file) as jsonfile: + data = json.load(jsonfile) + if "obs" in draw: + graphs["obs"] = LimitTGraphFromJSON(data, "obs") + if "exp0" in draw or "exp" in draw: + graphs["exp0"] = LimitTGraphFromJSON(data, "exp0") + if "exp1" in draw or "exp" in draw: + graphs["exp1"] = LimitBandTGraphFromJSON(data, "exp0", "exp-1", "exp+1") + if "exp2" in draw or "exp" in draw: + graphs["exp2"] = LimitBandTGraphFromJSON(data, "exp0", "exp-2", "exp+2") + return graphs + + +def bestFit(tree, x, y, cut): + nfind = tree.Draw(y + ":" + x, cut + "deltaNLL == 0") + gr0 = R.TGraph(1) + if nfind == 0: + gr0.SetPoint(0, -999, -999) + else: + grc = R.gROOT.FindObject("Graph").Clone() + if grc.GetN() > 1: + grc.Set(1) + gr0.SetPoint(0, grc.GetXmax(), grc.GetYmax()) + gr0.SetMarkerStyle(34) + gr0.SetMarkerSize(2.0) + return gr0 + + +def treeToHist2D(t, x, y, name, cut, xmin, xmax, ymin, ymax, xbins, ybins): + t.Draw("2*deltaNLL:%s:%s>>%s_prof(%d,%10g,%10g,%d,%10g,%10g)" % (y, x, name, xbins, xmin, xmax, ybins, ymin, ymax), cut + "deltaNLL != 0", "PROF") + prof = R.gROOT.FindObject(name + "_prof") + h2d = R.TH2D(name, name, xbins, xmin, xmax, ybins, ymin, ymax) + for ix in range(1, xbins + 1): + for iy in range(1, ybins + 1): + z = prof.GetBinContent(ix, iy) + if (z != z) or (z > 4294967295): # protect against NANs + z = 0 + h2d.SetBinContent(ix, iy, z) + h2d.GetXaxis().SetTitle(x) + h2d.GetYaxis().SetTitle(y) + h2d.SetDirectory(0) + h2d = NewInterpolate(h2d) + return h2d + + +def makeHist1D(name, xbins, graph, scaleXrange=1.0, absoluteXrange=None): + len_x = graph.GetX()[graph.GetN() - 1] - graph.GetX()[0] + binw_x = (len_x * 0.5 / (float(xbins) - 1.0)) - 1e-5 + if absoluteXrange: + hist = R.TH1F(name, "", xbins, absoluteXrange[0], absoluteXrange[1]) + else: + hist = R.TH1F(name, "", xbins, graph.GetX()[0], scaleXrange * (graph.GetX()[graph.GetN() - 1] + binw_x)) + return hist + + +def makeHist2D(name, xbins, ybins, graph2d): + len_x = graph2d.GetXmax() - graph2d.GetXmin() + binw_x = (len_x * 0.5 / (float(xbins) - 1.0)) - 1e-5 + len_y = graph2d.GetYmax() - graph2d.GetYmin() + binw_y = (len_y * 0.5 / (float(ybins) - 1.0)) - 1e-5 + hist = R.TH2F(name, "", xbins, graph2d.GetXmin() - binw_x, graph2d.GetXmax() + binw_x, ybins, graph2d.GetYmin() - binw_y, graph2d.GetYmax() + binw_y) + return hist + + +def makeVarBinHist2D(name, xbins, ybins): + # create new arrays in which bin low edge is adjusted to make measured + # points at the bin centres + xbins_new = [None] * (len(xbins) + 1) + for i in range(len(xbins) - 1): + if i == 0 or i == 1: + xbins_new[i] = xbins[i] - ((xbins[i + 1] - xbins[i]) / 2) + 1e-5 + else: + xbins_new[i] = xbins[i] - ((xbins[i + 1] - xbins[i]) / 2) + xbins_new[len(xbins) - 1] = xbins[len(xbins) - 2] + ((xbins[len(xbins) - 2] - xbins[len(xbins) - 3]) / 2) + xbins_new[len(xbins)] = xbins[len(xbins) - 1] + ((xbins[len(xbins) - 1] - xbins[len(xbins) - 2]) / 2) - 1e-5 + + ybins_new = [None] * (len(ybins) + 1) + for i in range(len(ybins) - 1): + if i == 0 or i == 1: + ybins_new[i] = ybins[i] - ((ybins[i + 1] - ybins[i]) / 2) + 1e-5 + else: + ybins_new[i] = ybins[i] - ((ybins[i + 1] - ybins[i]) / 2) + ybins_new[len(ybins) - 1] = ybins[len(ybins) - 2] + ((ybins[len(ybins) - 2] - ybins[len(ybins) - 3]) / 2) + ybins_new[len(ybins)] = ybins[len(ybins) - 1] + ((ybins[len(ybins) - 1] - ybins[len(ybins) - 2]) / 2) - 1e-5 + hist = R.TH2F(name, "", len(xbins_new) - 1, array("d", xbins_new), len(ybins_new) - 1, array("d", ybins_new)) + return hist + + +def GraphDifference(graph1, graph2, relative): + xvals = [] + yvals = [] + if graph1.GetN() != graph2.GetN(): + return graph1 + for i in range(graph1.GetN()): + xvals.append(graph1.GetX()[i]) + if relative: + yvals.append(2 * abs(graph1.GetY()[i] - graph2.GetY()[i]) / (graph1.GetY()[i] + graph2.GetY()[i])) + else: + yvals.append(2 * (graph1.GetY()[i] - graph2.GetY()[i]) / (graph1.GetY()[i] + graph2.GetY()[i])) + diff_graph = R.TGraph(len(xvals), array("d", xvals), array("d", yvals)) + diff_graph.Sort() + return diff_graph + + +def GraphDivide(num, den): + res = num.Clone() + for i in range(num.GetN()): + res.GetY()[i] = res.GetY()[i] / den.Eval(res.GetX()[i]) + if type(res) is R.TGraphAsymmErrors: + for i in range(num.GetN()): + res.GetEYhigh()[i] = res.GetEYhigh()[i] / den.Eval(res.GetX()[i]) + res.GetEYlow()[i] = res.GetEYlow()[i] / den.Eval(res.GetX()[i]) + + return res + + +def MakeRatioHist(num, den, num_err, den_err): + """Make a new ratio TH1 from numerator and denominator TH1s with optional + error propagation + + Args: + num (TH1): Numerator histogram + den (TH1): Denominator histogram + num_err (bool): Propagate the error in the numerator TH1 + den_err (bool): Propagate the error in the denominator TH1 + + Returns: + TH1: A new TH1 containing the ratio + """ + result = num.Clone() + if not num_err: + for i in range(1, result.GetNbinsX() + 1): + result.SetBinError(i, 0.0) + den_fix = den.Clone() + if not den_err: + for i in range(1, den_fix.GetNbinsX() + 1): + den_fix.SetBinError(i, 0.0) + result.Divide(den_fix) + return result + + +##@} + + +## @name Graph manipulation +# +# @details These functions are mostly used to modify TGraphs +# corresponding to likelihood scans. +##@{ +def RemoveGraphXDuplicates(graph): + i = 0 + while i < graph.GetN() - 1: + if graph.GetX()[i + 1] == graph.GetX()[i]: + # print 'Removing duplicate point (%f, %f)' % (graph.GetX()[i+1], graph.GetY()[i+1]) + graph.RemovePoint(i + 1) + else: + i += 1 + + +def ApplyGraphYOffset(graph, y_off): + for i in range(graph.GetN() - 1): + graph.GetY()[i] = graph.GetY()[i] + y_off + + +def RemoveGraphYAll(graph, val): + for i in range(graph.GetN()): + if graph.GetY()[i] == val: + print("[RemoveGraphYAll] Removing point (%f, %f)" % (graph.GetX()[i], graph.GetY()[i])) + graph.RemovePoint(i) + RemoveGraphYAll(graph, val) + break + + +def RemoveSmallDelta(graph, val): + for i in range(graph.GetN()): + diff = abs(graph.GetY()[i]) + if diff < val: + print("[RemoveSmallDelta] Removing point (%f, %f)" % (graph.GetX()[i], graph.GetY()[i])) + graph.RemovePoint(i) + RemoveSmallDelta(graph, val) + break + + +def RemoveGraphYAbove(graph, val): + for i in range(graph.GetN()): + if graph.GetY()[i] > val: + # print 'Removing point (%f, %f)' % (graph.GetX()[i], + # graph.GetY()[i]) + graph.RemovePoint(i) + RemoveGraphYAbove(graph, val) + break + + +def SetMinToZero(graph): + min = 999.0 + for i in range(graph.GetN()): + if graph.GetY()[i] < min: + min = graph.GetY()[i] + for i in range(graph.GetN()): + graph.SetPoint(i, graph.GetX()[i], graph.GetY()[i] - min) + + +def ImproveMinimum(graph, func, doIt=False): + fit_x = 0.0 + fit_y = 999.0 + fit_i = 0 + for i in range(graph.GetN()): + if graph.GetY()[i] < fit_y: + fit_i = i + fit_x = graph.GetX()[i] + fit_y = graph.GetY()[i] + if fit_i == 0 or fit_i == (graph.GetN() - 1): + if doIt: + min_x = graph.GetX()[fit_i] + min_y = graph.GetY()[fit_i] + for i in range(graph.GetN()): + before = graph.GetY()[i] + graph.GetY()[i] -= min_y + after = graph.GetY()[i] + print("Point %i, before=%f, after=%f" % (i, before, after)) + return (fit_x, fit_y) + search_min = fit_i - 2 if fit_i >= 2 else fit_i - 1 + search_max = fit_i + 2 if fit_i + 2 < graph.GetN() else fit_i + 1 + min_x = func.GetMinimumX(graph.GetX()[search_min], graph.GetX()[search_max]) + min_y = func.Eval(min_x) + print("[ImproveMinimum] Fit minimum was (%f, %f)" % (fit_x, fit_y)) + print("[ImproveMinimum] Better minimum was (%f, %f)" % (min_x, min_y)) + if doIt: + for i in range(graph.GetN()): + before = graph.GetY()[i] + graph.GetY()[i] -= min_y + after = graph.GetY()[i] + print("Point %i, before=%f, after=%f" % (i, before, after)) + graph.Set(graph.GetN() + 1) + graph.SetPoint(graph.GetN() - 1, min_x, 0) + graph.Sort() + return (min_x, min_y) + + +def FindCrossingsWithSpline(graph, func, yval): + crossings = [] + intervals = [] + current = None + for i in range(graph.GetN() - 1): + if (graph.GetY()[i] - yval) * (graph.GetY()[i + 1] - yval) < 0.0: + cross = func.GetX(yval, graph.GetX()[i], graph.GetX()[i + 1]) + if (graph.GetY()[i] - yval) > 0.0 and current is None: + current = {"lo": cross, "hi": graph.GetX()[graph.GetN() - 1], "valid_lo": True, "valid_hi": False} + if (graph.GetY()[i] - yval) < 0.0 and current is None: + current = {"lo": graph.GetX()[0], "hi": cross, "valid_lo": False, "valid_hi": True} + intervals.append(current) + current = None + if (graph.GetY()[i] - yval) < 0.0 and current is not None: + current["hi"] = cross + current["valid_hi"] = True + intervals.append(current) + current = None + # print 'Crossing between: (%f, %f) -> (%f, %f) at %f' % + # (graph.GetX()[i], graph.GetY()[i], graph.GetX()[i+1], + # graph.GetY()[i+1], cross) + crossings.append(cross) + if current is not None: + intervals.append(current) + if len(intervals) == 0: + current = {"lo": graph.GetX()[0], "hi": graph.GetX()[graph.GetN() - 1], "valid_lo": False, "valid_hi": False} + intervals.append(current) + print(intervals) + return intervals + # return crossings + + +def ReZeroTGraph(gr, doIt=False): + fit_x = 0.0 + fit_y = 0.0 + for i in range(gr.GetN()): + if gr.GetY()[i] == 0.0: + fit_x = gr.GetX()[i] + fit_y = gr.GetY()[i] + break + min_x = 0.0 + min_y = 0.0 + for i in range(gr.GetN()): + if gr.GetY()[i] < min_y: + min_y = gr.GetY()[i] + min_x = gr.GetX()[i] + if min_y < fit_y: + print("[ReZeroTGraph] Fit minimum was (%f, %f)" % (fit_x, fit_y)) + print("[ReZeroTGraph] Better minimum was (%f, %f)" % (min_x, min_y)) + if doIt: + for i in range(gr.GetN()): + # before = gr.GetY()[i] + gr.GetY()[i] -= min_y + # after = gr.GetY()[i] + # print 'Point %i, before=%f, after=%f' % (i, before, after) + return min_y + + +def FilterGraph(gr, n=3): + counter = 0 + remove_list = [] + for i in range(gr.GetN()): + if gr.GetY()[i] == 0.0: + continue + if counter % n < (n - 1): + remove_list.append(i) + counter += 1 + + for i in reversed(remove_list): + gr.RemovePoint(i) + + +def RemoveInXRange(gr, xmin=0, xmax=1): + remove_list = [] + for i in range(gr.GetN()): + if gr.GetY()[i] == 0.0: + continue + if gr.GetX()[i] > xmin and gr.GetX()[i] < xmax: + remove_list.append(i) + + for i in reversed(remove_list): + gr.RemovePoint(i) + + +def RemoveNearMin(graph, val, spacing=None): + # assume graph is sorted: + n = graph.GetN() + if n < 5: + return + if spacing is None: + spacing = (graph.GetX()[n - 1] - graph.GetX()[0]) / float(n - 2) + # print '[RemoveNearMin] Graph has spacing of %.3f' % spacing + bf_i = None + for i in range(graph.GetN()): + if graph.GetY()[i] == 0.0: + bf = graph.GetX()[i] + bf_i = i + # print '[RemoveNearMin] Found best-fit at %.3f' % bf + break + if bf_i is None: + print("[RemoveNearMin] No minimum found!") + return + for i in range(graph.GetN()): + if i == bf_i: + continue + if abs(graph.GetX()[i] - bf) < (val * spacing): + print("[RemoveNearMin] Removing point (%f, %f) close to minimum at %f" % (graph.GetX()[i], graph.GetY()[i], bf)) + graph.RemovePoint(i) + RemoveNearMin(graph, val, spacing) + break + + +def SortGraph(Graph): + sortedGraph = R.TGraph() + graph_list = [] + for i in range(Graph.GetN()): + graph_list.append((float(Graph.GetX()[i]), float(Graph.GetY()[i]))) + graph_list = sorted(set(graph_list)) + for i in range(Graph.GetN()): + sortedGraph.SetPoint(i, graph_list[i][0], graph_list[i][1]) + return sortedGraph + + +##@} + + +## @name TPad adjustments +# +# @details These functions are mostly concerned with adjusting the axis +# ranges to make sure objects are not being drawn outside the range of +# the pad or underneath other objects, e.g. the legend. +##@{ +def FixTopRange(pad, fix_y, fraction): + hobj = GetAxisHist(pad) + ymin = hobj.GetMinimum() + hobj.SetMaximum((fix_y - fraction * ymin) / (1.0 - fraction)) + if R.gPad.GetLogy(): + if ymin == 0.0: + print("Cannot adjust log-scale y-axis range if the minimum is zero!") + return + maxval = (math.log10(fix_y) - fraction * math.log10(ymin)) / (1 - fraction) + maxval = math.pow(10, maxval) + hobj.SetMaximum(maxval) + + +def FixBothRanges(pad, fix_y_lo, frac_lo, fix_y_hi, frac_hi): + """Adjusts y-axis range such that a lower and a higher value are located a + fixed fraction of the frame height away from a new minimum and maximum + respectively. + + This function is useful in conjunction with GetPadYMax which returns the + maximum or minimum y value of all histograms and graphs drawn on the pad. + + In the example below, the minimum and maximum values found via this function + are used as the `fix_y_lo` and `fix_y_hi` arguments, and the spacing fractions + as 0.15 and 0.30 respectively. + + @code + FixBothRanges(pad, GetPadYMin(pad), 0.15, GetPadYMax(pad), 0.30) + @endcode + + ![](figures/FixBothRanges.png) + + Args: + pad (TPad): A TPad on which histograms and graphs have already been drawn + fix_y_lo (float): The y value which will end up a fraction `frac_lo` above + the new axis minimum. + frac_lo (float): A fraction of the y-axis height + fix_y_hi (float): The y value which will end up a fraction `frac_hi` below + from the new axis maximum. + frac_hi (float): A fraction of the y-axis height + """ + hobj = GetAxisHist(pad) + ymin = fix_y_lo + ymax = fix_y_hi + if R.gPad.GetLogy(): + if ymin == 0.0: + print("Cannot adjust log-scale y-axis range if the minimum is zero!") + return + ymin = math.log10(ymin) + ymax = math.log10(ymax) + fl = frac_lo + fh = frac_hi + + ymaxn = (1.0 / (1.0 - (fh * fl / ((1.0 - fl) * (1.0 - fh))))) * (1.0 / (1.0 - fh)) * (ymax - fh * ymin) + yminn = (ymin - fl * ymaxn) / (1.0 - fl) + if R.gPad.GetLogy(): + yminn = math.pow(10, yminn) + ymaxn = math.pow(10, ymaxn) + hobj.SetMinimum(yminn) + hobj.SetMaximum(ymaxn) + + +def GetPadYMaxInRange(pad, x_min, x_max, do_min=False): + pad_obs = pad.GetListOfPrimitives() + if pad_obs is None: + return 0.0 + h_max = -99999.0 + h_min = +99999.0 + for obj in pad_obs: + if obj.InheritsFrom(R.TH1.Class()): + hobj = obj + for j in range(1, hobj.GetNbinsX() + 1): + if hobj.GetBinLowEdge(j) + hobj.GetBinWidth(j) < x_min or hobj.GetBinLowEdge(j) > x_max: + continue + if hobj.GetBinContent(j) + hobj.GetBinError(j) > h_max: + h_max = hobj.GetBinContent(j) + hobj.GetBinError(j) + if (hobj.GetBinContent(j) - hobj.GetBinError(j) < h_min) and not do_min: + # If we're looking for the minimum don't count TH1s + # because we probably only care about graphs + h_min = hobj.GetBinContent(j) - hobj.GetBinError(j) + elif obj.InheritsFrom(R.TGraphAsymmErrors.Class()): + gobj = obj + n = gobj.GetN() + for k in range(0, n): + x = gobj.GetX()[k] + y = gobj.GetY()[k] + if x < x_min or x > x_max: + continue + if (y + gobj.GetEYhigh()[k]) > h_max: + h_max = y + gobj.GetEYhigh()[k] + if (y - gobj.GetEYlow()[k]) < h_min: + h_min = y - gobj.GetEYlow()[k] + elif obj.InheritsFrom(R.TGraphErrors.Class()): + gobj = obj + n = gobj.GetN() + for k in range(0, n): + x = gobj.GetX()[k] + y = gobj.GetY()[k] + if x < x_min or x > x_max: + continue + if (y + gobj.GetEY()[k]) > h_max: + h_max = y + gobj.GetEY()[k] + if (y - gobj.GetEY()[k]) < h_min: + h_min = y - gobj.GetEY()[k] + elif obj.InheritsFrom(R.TGraph.Class()): + gobj = obj + n = gobj.GetN() + for k in range(0, n): + x = gobj.GetX()[k] + y = gobj.GetY()[k] + if x < x_min or x > x_max: + continue + if y > h_max: + h_max = y + if y < h_min: + h_min = y + return h_max if do_min is False else h_min + + +def GetPadYMax(pad, do_min=False): + pad_obs = pad.GetListOfPrimitives() + if pad_obs is None: + return 0.0 + xmin = GetAxisHist(pad).GetXaxis().GetXmin() + xmax = GetAxisHist(pad).GetXaxis().GetXmax() + return GetPadYMaxInRange(pad, xmin, xmax, do_min) + + +def GetPadYMin(pad): + return GetPadYMax(pad, True) + + +def FixOverlay(): + R.gPad.GetFrame().Draw() + R.gPad.RedrawAxis() + + +def FixBoxPadding(pad, box, frac): + # Get the bounds of the box - these are in the normalised + # Pad co-ordinates. + p_x1 = box.GetX1() + p_x2 = box.GetX2() + p_y1 = box.GetY1() + + # Convert to normalised co-ordinates in the frame + f_x1 = (p_x1 - pad.GetLeftMargin()) / (1.0 - pad.GetLeftMargin() - pad.GetRightMargin()) + f_x2 = (p_x2 - pad.GetLeftMargin()) / (1.0 - pad.GetLeftMargin() - pad.GetRightMargin()) + f_y1 = (p_y1 - pad.GetBottomMargin()) / (1.0 - pad.GetTopMargin() - pad.GetBottomMargin()) + + # Extract histogram providing the frame and axes + hobj = GetAxisHist(pad) + + xmin = hobj.GetBinLowEdge(hobj.GetXaxis().GetFirst()) + xmax = hobj.GetBinLowEdge(hobj.GetXaxis().GetLast() + 1) + ymin = hobj.GetMinimum() + ymax = hobj.GetMaximum() + + # Convert box bounds to x-axis values + a_x1 = xmin + (xmax - xmin) * f_x1 + a_x2 = xmin + (xmax - xmin) * f_x2 + + # Get the histogram maximum in this range, given as y-axis value + a_max_h = GetPadYMaxInRange(pad, a_x1, a_x2) + + # Convert this to a normalised frame value + f_max_h = (a_max_h - ymin) / (ymax - ymin) + if R.gPad.GetLogy() and f_max_h > 0.0: + f_max_h = (math.log10(a_max_h) - math.log10(ymin)) / (math.log10(ymax) - math.log10(ymin)) + + if f_y1 - f_max_h < frac: + f_target = 1.0 - (f_y1 - frac) + FixTopRange(pad, a_max_h, f_target) + + +##@} + +## @name Decoration +# +# @details Functions for drawing legends, logos, title, lines and boxes +##@{ + + +def DrawCMSLogo(pad, cmsText, extraText, iPosX, relPosX, relPosY, relExtraDY, extraText2="", cmsTextSize=0.8): + """Blah + + Args: + pad (TYPE): Description + cmsText (TYPE): Description + extraText (TYPE): Description + iPosX (TYPE): Description + relPosX (TYPE): Description + relPosY (TYPE): Description + relExtraDY (TYPE): Description + extraText2 (str): Description + cmsTextSize (float): Description + + Returns: + TYPE: Description + """ + pad.cd() + cmsTextFont = 62 # default is helvetic-bold + + writeExtraText = len(extraText) > 0 + writeExtraText2 = len(extraText2) > 0 + extraTextFont = 52 + + # text sizes and text offsets with respect to the top frame + # in unit of the top margin size + lumiTextOffset = 0.2 + # cmsTextSize = 0.8 + # float cmsTextOffset = 0.1; // only used in outOfFrame version + + # ratio of 'CMS' and extra text size + extraOverCmsTextSize = 0.76 + + outOfFrame = False + if iPosX / 10 == 0: + outOfFrame = True + + alignY_ = 3 + alignX_ = 2 + if iPosX / 10 == 0: + alignX_ = 1 + if iPosX == 0: + alignX_ = 1 + if iPosX == 0: + alignY_ = 1 + if iPosX / 10 == 1: + alignX_ = 1 + if iPosX / 10 == 2: + alignX_ = 2 + if iPosX / 10 == 3: + alignX_ = 3 + # if (iPosX == 0): relPosX = 0.14 + align_ = 10 * alignX_ + alignY_ + + l = pad.GetLeftMargin() + t = pad.GetTopMargin() + r = pad.GetRightMargin() + b = pad.GetBottomMargin() + + latex = R.TLatex() + latex.SetNDC() + latex.SetTextAngle(0) + latex.SetTextColor(R.kBlack) + + extraTextSize = extraOverCmsTextSize * cmsTextSize + pad_ratio = (float(pad.GetWh()) * pad.GetAbsHNDC()) / (float(pad.GetWw()) * pad.GetAbsWNDC()) + if pad_ratio < 1.0: + pad_ratio = 1.0 + + if outOfFrame: + latex.SetTextFont(cmsTextFont) + latex.SetTextAlign(11) + latex.SetTextSize(cmsTextSize * t * (1.0 / pad_ratio)) + latex.DrawLatex(l, 1 - t + lumiTextOffset * t, cmsText) + + posX_ = 0 + if iPosX % 10 <= 1: + posX_ = l + relPosX * (1 - l - r) + elif iPosX % 10 == 2: + posX_ = l + 0.5 * (1 - l - r) + elif iPosX % 10 == 3: + posX_ = 1 - r - relPosX * (1 - l - r) + + posY_ = 1 - t - relPosY * (1 - t - b) + if not outOfFrame: + latex.SetTextFont(cmsTextFont) + latex.SetTextSize(cmsTextSize * t * pad_ratio) + latex.SetTextAlign(align_) + latex.DrawLatex(posX_, posY_, cmsText) + if writeExtraText: + latex.SetTextFont(extraTextFont) + latex.SetTextAlign(align_) + latex.SetTextSize(extraTextSize * t * pad_ratio) + latex.DrawLatex(posX_, posY_ - relExtraDY * cmsTextSize * t, extraText) + if writeExtraText2: + latex.DrawLatex(posX_, posY_ - 1.8 * relExtraDY * cmsTextSize * t, extraText2) + elif writeExtraText: + if iPosX == 0: + posX_ = l + relPosX * (1 - l - r) + posY_ = 1 - t + lumiTextOffset * t + latex.SetTextFont(extraTextFont) + latex.SetTextSize(extraTextSize * t * (1.0 / pad_ratio)) + latex.SetTextAlign(align_) + latex.DrawLatex(posX_, posY_, extraText) + + +def PositionedLegend(width, height, pos, offset, horizontaloffset=None): + o = offset + ho = horizontaloffset + if not ho: + ho = o + w = width + h = height + l = R.gPad.GetLeftMargin() + t = R.gPad.GetTopMargin() + b = R.gPad.GetBottomMargin() + r = R.gPad.GetRightMargin() + if pos == 1: + return R.TLegend(l + ho, 1 - t - o - h, l + ho + w, 1 - t - o, "", "NBNDC") + if pos == 2: + c = l + 0.5 * (1 - l - r) + return R.TLegend(c - 0.5 * w, 1 - t - o - h, c + 0.5 * w, 1 - t - o, "", "NBNDC") + if pos == 3: + return R.TLegend(1 - r - ho - w, 1 - t - o - h, 1 - r - ho, 1 - t - o, "", "NBNDC") + if pos == 4: + return R.TLegend(l + ho, b + o, l + ho + w, b + o + h, "", "NBNDC") + if pos == 5: + c = l + 0.5 * (1 - l - r) + return R.TLegend(c - 0.5 * w, b + o, c + 0.5 * w, b + o + h, "", "NBNDC") + if pos == 6: + return R.TLegend(1 - r - ho - w, b + o, 1 - r - ho, b + o + h, "", "NBNDC") + + +def DrawHorizontalLine(pad, line, yval): + axis = GetAxisHist(pad) + xmin = axis.GetXaxis().GetXmin() + xmax = axis.GetXaxis().GetXmax() + line.DrawLine(xmin, yval, xmax, yval) + + +def DrawVerticalLine(pad, line, xval): + axis = GetAxisHist(pad) + ymin = axis.GetYaxis().GetXmin() + ymax = axis.GetYaxis().GetXmax() + line.DrawLine(xval, ymin, xval, ymax) + + +def DrawVerticalBand(pad, box, x1, x2): + axis = GetAxisHist(pad) + ymin = axis.GetYaxis().GetXmin() + ymax = axis.GetYaxis().GetXmax() + box.DrawBox(x1, ymin, x2, ymax) + + +def DrawTitle(pad, text, align, textOffset=0.2, textSize=0.6): + pad_backup = R.gPad + pad.cd() + t = pad.GetTopMargin() + l = pad.GetLeftMargin() + r = pad.GetRightMargin() + + pad_ratio = (float(pad.GetWh()) * pad.GetAbsHNDC()) / (float(pad.GetWw()) * pad.GetAbsWNDC()) + if pad_ratio < 1.0: + pad_ratio = 1.0 + + latex = R.TLatex() + latex.SetNDC() + latex.SetTextAngle(0) + latex.SetTextColor(R.kBlack) + latex.SetTextFont(42) + latex.SetTextSize(textSize * t * pad_ratio) + + y_off = 1 - t + textOffset * t + if align == 1: + latex.SetTextAlign(11) + if align == 1: + latex.DrawLatex(l, y_off, text) + if align == 2: + latex.SetTextAlign(21) + if align == 2: + latex.DrawLatex(l + (1 - l - r) * 0.5, y_off, text) + if align == 3: + latex.SetTextAlign(31) + if align == 3: + latex.DrawLatex(1 - r, y_off, text) + pad_backup.cd() + + +##@} + + +## @name Limit plotting +# +# @details Common limit-plotting tasks, for example setting the Brazilian +# colour scheme for expected limit bands and drawing the associated +# TGraphs in the correct order +##@{ + + +def isclose(a, b, rel_tol=1e-9, abs_tol=0.0): + return abs(a - b) <= max(abs_tol, rel_tol * max(abs(a), abs(b))) + + +def StyleLimitBand(graph_dict, overwrite_style_dict=None): + style_dict = {"obs": {"LineWidth": 2}, "exp0": {"LineWidth": 2, "LineColor": R.kRed}, "exp1": {"FillColor": R.kGreen}, "exp2": {"FillColor": R.kYellow}} + if overwrite_style_dict is not None: + for key in overwrite_style_dict: + if key in style_dict: + style_dict[key].update(overwrite_style_dict[key]) + else: + style_dict[key] = overwrite_style_dict[key] + for key in graph_dict: + Set(graph_dict[key], **style_dict[key]) + + +def DrawLimitBand(pad, graph_dict, draw=["exp2", "exp1", "exp0", "obs"], draw_legend=None, legend=None, legend_overwrite=None): + legend_dict = { + "obs": {"Label": "Observed", "LegendStyle": "LP", "DrawStyle": "PLSAME"}, + "exp0": {"Label": "Expected", "LegendStyle": "L", "DrawStyle": "LSAME"}, + "exp1": {"Label": "#pm1#sigma Expected", "LegendStyle": "F", "DrawStyle": "3SAME"}, + "exp2": {"Label": "#pm2#sigma Expected", "LegendStyle": "F", "DrawStyle": "3SAME"}, + } + if legend_overwrite is not None: + for key in legend_overwrite: + if key in legend_dict: + legend_dict[key].update(legend_overwrite[key]) + else: + legend_dict[key] = legend_overwrite[key] + pad.cd() + for key in draw: + if key in graph_dict: + graph_dict[key].Draw(legend_dict[key]["DrawStyle"]) + if legend is not None: + if draw_legend is None: + draw_legend = reversed(draw) + for key in draw_legend: + if key in graph_dict: + legend.AddEntry(graph_dict[key], legend_dict[key]["Label"], legend_dict[key]["LegendStyle"]) + + +##@} + + +## @name Contour plotting +# +# @details Creating contour TGraphs using TH2s and TGraph2Ds +##@{ +def contourFromTH2(h2in, threshold, minPoints=10, frameValue=1000.0): + # // http://root.cern.ch/root/html/tutorials/hist/ContourList.C.html + contoursList = [threshold] + contours = array("d", contoursList) + # if (h2in.GetNbinsX() * h2in.GetNbinsY()) > 10000: minPoints = 50 + # if (h2in.GetNbinsX() * h2in.GetNbinsY()) <= 100: minPoints = 10 + + h2 = frameTH2D(h2in, threshold, frameValue) + + h2.SetContour(1, contours) + + # Draw contours as filled regions, and Save points + # backup = R.gPad # doesn't work in pyroot, backup behaves like a ref to gPad + canv = R.TCanvas("tmp", "tmp") + canv.cd() + h2.Draw("CONT Z LIST") + R.gPad.Update() # Needed to force the plotting and retrieve the contours in + + conts = R.gROOT.GetListOfSpecials().FindObject("contours") + contLevel = None + + if conts is None or conts.GetSize() == 0: + print("*** No Contours Were Extracted!") + return None + ret = R.TList() + for i in range(conts.GetSize()): + contLevel = conts.At(i) + print(">> Contour %d has %d Graphs" % (i, contLevel.GetSize())) + for j in range(contLevel.GetSize()): + gr1 = contLevel.At(j) + print("\t Graph %d has %d points" % (j, gr1.GetN())) + if gr1.GetN() > minPoints: + ret.Add(gr1.Clone()) + # // break; + # backup.cd() + canv.Close() + return ret + + +def frameTH2D(hist, threshold, frameValue=1000): + # Now supports variable-binned histograms First adds a narrow frame (1% of + # of bin widths) around the outside with same values as the real edge. Then + # adds another frame another frame around this one filled with some chosen + # value that will make the contours close + + # Get lists of the bin edges + x_bins = [hist.GetXaxis().GetBinLowEdge(x) for x in range(1, hist.GetNbinsX() + 2)] + y_bins = [hist.GetYaxis().GetBinLowEdge(y) for y in range(1, hist.GetNbinsY() + 2)] + + # New bin edge arrays will need an extra four values + x_new = [0.0] * (len(x_bins) + 4) + y_new = [0.0] * (len(y_bins) + 4) + + # Calculate bin widths at the edges + xw1 = x_bins[1] - x_bins[0] + xw2 = x_bins[-1] - x_bins[-2] + yw1 = y_bins[1] - y_bins[0] + yw2 = y_bins[-1] - y_bins[-2] + + # Set the edges of the outer framing bins and the adjusted + # edge of the real edge bins + x_new[0] = x_bins[0] - 2 * xw1 * 0.02 + x_new[1] = x_bins[0] - 1 * xw1 * 0.02 + x_new[-1] = x_bins[-1] + 2 * xw2 * 0.02 + x_new[-2] = x_bins[-1] + 1 * xw2 * 0.02 + y_new[0] = y_bins[0] - 2 * yw1 * 0.02 + y_new[1] = y_bins[0] - 1 * yw1 * 0.02 + y_new[-1] = y_bins[-1] + 2 * yw2 * 0.02 + y_new[-2] = y_bins[-1] + 1 * yw2 * 0.02 + + # Copy the remaining bin edges from the hist + for i in range(0, len(x_bins)): + x_new[i + 2] = x_bins[i] + for i in range(0, len(y_bins)): + y_new[i + 2] = y_bins[i] + + # print x_new + # print y_new + + framed = R.TH2D("%s framed" % hist.GetName(), "%s framed" % hist.GetTitle(), len(x_new) - 1, array("d", x_new), len(y_new) - 1, array("d", y_new)) + framed.SetDirectory(0) + + for x in range(1, framed.GetNbinsX() + 1): + for y in range(1, framed.GetNbinsY() + 1): + if x == 1 or x == framed.GetNbinsX() or y == 1 or y == framed.GetNbinsY(): + # This is a a frame bin + framed.SetBinContent(x, y, frameValue) + else: + # adjust x and y if we're in the first frame so as to copy the output + # values from the real TH2 + ux = x + uy = y + if x == 2: + ux += 1 + elif x == (len(x_new) - 2): + ux -= 1 + if y == 2: + uy += 1 + elif y == (len(y_new) - 2): + uy -= 1 + framed.SetBinContent(x, y, hist.GetBinContent(ux - 2, uy - 2)) + return framed + + +def fastFillTH2(hist2d, graph, initalValue=99999, interpolateMissing=False): + for x in range(1, hist2d.GetNbinsX() + 1): + for y in range(1, hist2d.GetNbinsY() + 1): + hist2d.SetBinContent(x, y, initalValue) + # for i in xrange(graph.GetN()): + # hist2d.Fill(graph.GetX()[i],graph.GetY()[i],graph.GetZ()[i]) + for i in range(graph.GetN()): + xbin = hist2d.GetXaxis().FindBin(graph.GetX()[i]) + ybin = hist2d.GetYaxis().FindBin(graph.GetY()[i]) + if isclose(hist2d.GetXaxis().GetBinCenter(xbin), graph.GetX()[i], rel_tol=1e-2) and isclose( + hist2d.GetYaxis().GetBinCenter(ybin), graph.GetY()[i], rel_tol=1e-2 + ): + hist2d.SetBinContent(xbin, ybin, graph.GetZ()[i]) + interpolated = 0 + if interpolateMissing: + for x in range(1, hist2d.GetNbinsX() + 1): + for y in range(1, hist2d.GetNbinsY() + 1): + if hist2d.GetBinContent(x, y) == initalValue: + interpolated += 1 + hist2d.SetBinContent(x, y, graph.Interpolate(hist2d.GetXaxis().GetBinCenter(x), hist2d.GetYaxis().GetBinCenter(y))) + + +def fillTH2(hist2d, graph): + for x in range(1, hist2d.GetNbinsX() + 1): + for y in range(1, hist2d.GetNbinsY() + 1): + xc = hist2d.GetXaxis().GetBinCenter(x) + yc = hist2d.GetYaxis().GetBinCenter(y) + val = graph.Interpolate(xc, yc) + hist2d.SetBinContent(x, y, val) + + +def fillInvertedTH2(hist2d, graph): + for x in range(1, hist2d.GetNbinsX() + 1): + for y in range(1, hist2d.GetNbinsY() + 1): + xc = hist2d.GetXaxis().GetBinCenter(x) + yc = hist2d.GetYaxis().GetBinCenter(y) + val = graph.Interpolate(xc, yc) + hist2d.SetBinContent(x, y, 1 - val) + + +# Functions 'NewInterpolate' and 'rebin' are taken, translated and modified into python from: +# https://indico.cern.ch/event/256523/contribution/2/attachments/450198/624259/07JUN2013_cawest.pdf +# http://hep.ucsb.edu/people/cawest/interpolation/interpolate.h +def NewInterpolate(hist): + histCopy = hist.Clone() + + # make temporary histograms to store the results of both steps + hist_step1 = histCopy.Clone() + hist_step1.Reset() + hist_step2 = histCopy.Clone() + hist_step2.Reset() + + nBinsX = histCopy.GetNbinsX() + nBinsY = histCopy.GetNbinsY() + + xMin = 1 + yMin = 1 + xMax = histCopy.GetNbinsX() + 1 + yMax = histCopy.GetNbinsY() + 1 + + for i in range(1, nBinsX + 1): + for j in range(1, nBinsY + 1): + # do not extrapolate outside the scan + if (i < xMin) or (i > xMax) or (j < yMin) or (j > yMax): + continue + binContent = histCopy.GetBinContent(i, j) + binContentNW = histCopy.GetBinContent(i + 1, j + 1) + binContentSE = histCopy.GetBinContent(i - 1, j - 1) + binContentNE = histCopy.GetBinContent(i + 1, j - 1) + binContentSW = histCopy.GetBinContent(i - 1, j + 1) + binContentUp = histCopy.GetBinContent(i, j + 1) + binContentDown = histCopy.GetBinContent(i, j - 1) + binContentLeft = histCopy.GetBinContent(i - 1, j) + binContentRight = histCopy.GetBinContent(i + 1, j) + nFilled = 0 + if binContentNW > 0: + nFilled += 1 + if binContentSE > 0: + nFilled += 1 + if binContentNE > 0: + nFilled += 1 + if binContentSW > 0: + nFilled += 1 + if binContentUp > 0: + nFilled += 1 + if binContentDown > 0: + nFilled += 1 + if binContentRight > 0: + nFilled += 1 + if binContentLeft > 0: + nFilled += 1 + # if we are at an empty bin and there are neighbors + # in specified direction with non-zero entries + if (binContent == 0) and (nFilled > 1): + # average over non-zero entries + binContent = ( + binContentNW + binContentSE + binContentNE + binContentSW + binContentUp + binContentDown + binContentRight + binContentLeft + ) / nFilled + hist_step1.SetBinContent(i, j, binContent) + + # add result of interpolation + histCopy.Add(hist_step1) + + for i in range(1, nBinsX): + for j in range(1, nBinsY): + if (i < xMin) or (i > xMax) or (j < yMin) or (j > yMax): + continue + binContent = histCopy.GetBinContent(i, j) + # get entries for "Swiss Cross" average + binContentUp = histCopy.GetBinContent(i, j + 1) + binContentDown = histCopy.GetBinContent(i, j - 1) + binContentLeft = histCopy.GetBinContent(i - 1, j) + binContentRight = histCopy.GetBinContent(i + 1, j) + nFilled = 0 + if binContentUp > 0: + nFilled += 1 + if binContentDown > 0: + nFilled += 1 + if binContentRight > 0: + nFilled += 1 + if binContentLeft > 0: + nFilled += 1 + if (binContent == 0) and (nFilled > 0): + # only average over non-zero entries + binContent = (binContentUp + binContentDown + binContentRight + binContentLeft) / nFilled + hist_step2.SetBinContent(i, j, binContent) + # add "Swiss Cross" average + histCopy.Add(hist_step2) + + return histCopy + + +def rebin(hist): + histName = hist.GetName() + histName += "_rebin" + + # bin widths are needed so as to not shift histogram by half a bin with each rebinning + # assume constant binning + # binWidthX = hist.GetXaxis().GetBinWidth(1) + # binWidthY = hist.GetYaxis().GetBinWidth(1) + + # histRebinned = R.TH2F(histName, histName, 2*hist.GetNbinsX(), hist.GetXaxis().GetXmin()+binWidthX/4, hist.GetXaxis().GetXmax()+binWidthX/4, 2*hist.GetNbinsY(), hist.GetYaxis().GetXmin()+binWidthY/4, hist.GetYaxis().GetXmax()+binWidthY/4) + histRebinned = R.TH2F( + histName, + histName, + 2 * hist.GetNbinsX() - 1, + hist.GetXaxis().GetXmin(), + hist.GetXaxis().GetXmax(), + 2 * hist.GetNbinsY() - 1, + hist.GetYaxis().GetXmin(), + hist.GetYaxis().GetXmax(), + ) + + # copy results from previous histogram + for iX in range(1, hist.GetNbinsX() + 1): + for iY in range(1, hist.GetNbinsY() + 1): + binContent = hist.GetBinContent(iX, iY) + histRebinned.SetBinContent(2 * iX - 1, 2 * iY - 1, binContent) + histRebinned.SetMaximum(hist.GetMaximum()) + histRebinned.SetMinimum(hist.GetMinimum()) + + # use interpolation to re-fill histogram + histRebinnedInterpolated = NewInterpolate(histRebinned) + + return histRebinnedInterpolated + + +def higgsConstraint(model, higgstype): + higgsBand = R.TGraph2D() + masslow = 150 + masshigh = 500 + massstep = 10 + n = 0 + for mass in range(masslow, masshigh, massstep): + myfile = open("../../HiggsAnalysis/HiggsToTauTau/data/Higgs125/" + model + "/higgs_" + str(mass) + ".dat", "r") + for line in myfile: + tanb = (line.split())[0] + mh = float((line.split())[1]) + mH = float((line.split())[3]) + if higgstype == "h": + higgsBand.SetPoint(n, mass, float(tanb), mh) + elif higgstype == "H": + higgsBand.SetPoint(n, mass, float(tanb), mH) + n = n + 1 + myfile.close() + return higgsBand + + +##@} + + +def getOverlayMarkerAndLegend(legend, entries, options, borderSize=2.0 / 3, markerStyle="P"): + borderLegend = legend.Clone() + borderLegend.Clear() + graphs = [] + for i in range(legend.GetNRows()): + if i in entries: + graph = entries[i].Clone() + options[i]["MarkerSize"] = graph.GetMarkerSize() * borderSize + Set(graph, **options[i]) + borderLegend.AddEntry(graph, " ", markerStyle) + graphs.append(graph) + else: + borderLegend.AddEntry("", " ", "") + borderLegend.SetFillStyle(0) + borderLegend.SetFillColor(0) + return (borderLegend, graphs) diff --git a/scripts/combineTool.py b/scripts/combineTool.py new file mode 100755 index 00000000000..85ca9eabd5f --- /dev/null +++ b/scripts/combineTool.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +from __future__ import absolute_import +import argparse +import ROOT + +from HiggsAnalysis.CombinedLimit.tool_base.CombineToolBase import CombineToolBase +from HiggsAnalysis.CombinedLimit.tool_base.EnhancedCombine import EnhancedCombine +from HiggsAnalysis.CombinedLimit.tool_base.Impacts import Impacts +from HiggsAnalysis.CombinedLimit.tool_base.ImpactsFromScans import ImpactsFromScans +from HiggsAnalysis.CombinedLimit.tool_base.Workspace import PrintWorkspace, ModifyDataSet +from HiggsAnalysis.CombinedLimit.tool_base.CovMatrix import CovMatrix +from HiggsAnalysis.CombinedLimit.tool_base.LimitGrids import AsymptoticGrid, HybridNewGrid +from HiggsAnalysis.CombinedLimit.tool_base.Output import PrintFit, CollectLimits, CollectGoodnessOfFit +from HiggsAnalysis.CombinedLimit.tool_base.T2W import T2W +from HiggsAnalysis.CombinedLimit.tool_base.FastScan import FastScan +from HiggsAnalysis.CombinedLimit.tool_base.TaylorExpand import TaylorExpand + +ROOT.PyConfig.IgnoreCommandLineOptions = True +ROOT.gROOT.SetBatch(ROOT.kTRUE) + + +def register_method(parser, method_dict, method_class): + class_name = method_class.__name__ + parser.description += " %-20s : %s\n" % (class_name, method_class.description) + method_dict[class_name] = method_class + + +parser = argparse.ArgumentParser(add_help=False, formatter_class=argparse.RawDescriptionHelpFormatter) + +parser.description = "Available methods:\n\n" +methods = {} +register_method(parser, methods, EnhancedCombine) +register_method(parser, methods, T2W) +register_method(parser, methods, PrintWorkspace) +register_method(parser, methods, ModifyDataSet) +register_method(parser, methods, Impacts) +register_method(parser, methods, ImpactsFromScans) +register_method(parser, methods, CollectLimits) +register_method(parser, methods, CollectGoodnessOfFit) +register_method(parser, methods, CovMatrix) +register_method(parser, methods, PrintFit) +register_method(parser, methods, AsymptoticGrid) +register_method(parser, methods, HybridNewGrid) +register_method(parser, methods, FastScan) +register_method(parser, methods, TaylorExpand) + +parser.add_argument("-M", "--method") + +(args, unknown) = parser.parse_known_args() + +# DRY_RUN = args.dry_run + +method = methods[args.method]() if args.method in methods else EnhancedCombine() + +# Loading libs is slow: only do it if the method has requested it +if method.__class__.requires_root: + ROOT.gSystem.Load("libHiggsAnalysisCombinedLimit") + +job_group = parser.add_argument_group("job options", "options for creating, running and submitting jobs") + +# One group of options that are specific to the chosen method +tool_group = parser.add_argument_group("%s options" % method.__class__.__name__, "options specific to this method") +# And another group for combine options that will be intercepted +intercept_group = parser.add_argument_group("combine options", "standard combine options that will be re-interpreted") + +# Let the chosen method create the arguments in both groups +method.attach_job_args(job_group) +method.attach_intercept_args(intercept_group) +method.attach_args(tool_group) + +# Now we can add the normal help option +parser.add_argument("-h", "--help", action="help") + +(args, unknown) = parser.parse_known_args() + +method.set_args(args, unknown) +method.run_method() diff --git a/scripts/plot1DScan.py b/scripts/plot1DScan.py new file mode 100755 index 00000000000..d46df7dfb49 --- /dev/null +++ b/scripts/plot1DScan.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python +from __future__ import absolute_import +from __future__ import print_function +import ROOT +import math +from functools import partial +import HiggsAnalysis.CombinedLimit.util.plotting as plot +import json +import argparse +import os.path +from six.moves import range + +ROOT.PyConfig.IgnoreCommandLineOptions = True +ROOT.gROOT.SetBatch(ROOT.kTRUE) + +plot.ModTDRStyle(width=700, l=0.13) +ROOT.gStyle.SetNdivisions(510, "XYZ") +ROOT.gStyle.SetMarkerSize(0.7) + +NAMECOUNTER = 0 + + +def read(scan, param, files, ycut): + goodfiles = [f for f in files if plot.TFileIsGood(f)] + limit = plot.MakeTChain(goodfiles, "limit") + graph = plot.TGraphFromTree(limit, param, "2*deltaNLL", "quantileExpected > -1.5") + graph.SetName(scan) + graph.Sort() + plot.RemoveGraphXDuplicates(graph) + plot.RemoveGraphYAbove(graph, ycut) + # graph.Print() + return graph + + +def Eval(obj, x, params): + return obj.Eval(x[0]) + + +def BuildScan(scan, param, files, color, yvals, ycut): + graph = read(scan, param, files, ycut) + if graph.GetN() <= 1: + graph.Print() + raise RuntimeError("Attempting to build %s scan from TGraph with zero or one point (see above)" % files) + bestfit = None + for i in range(graph.GetN()): + if graph.GetY()[i] == 0.0: + bestfit = graph.GetX()[i] + graph.SetMarkerColor(color) + spline = ROOT.TSpline3("spline3", graph) + global NAMECOUNTER + func_method = partial(Eval, spline) + func = ROOT.TF1("splinefn" + str(NAMECOUNTER), func_method, graph.GetX()[0], graph.GetX()[graph.GetN() - 1], 1) + func._method = func_method + NAMECOUNTER += 1 + func.SetLineColor(color) + func.SetLineWidth(3) + assert bestfit is not None + crossings = {} + cross_1sig = None + cross_2sig = None + other_1sig = [] + other_2sig = [] + val = None + val_2sig = None + for yval in yvals: + crossings[yval] = plot.FindCrossingsWithSpline(graph, func, yval) + for cr in crossings[yval]: + cr["contains_bf"] = cr["lo"] <= bestfit and cr["hi"] >= bestfit + for cr in crossings[yvals[0]]: + if cr["contains_bf"]: + val = (bestfit, cr["hi"] - bestfit, cr["lo"] - bestfit) + cross_1sig = cr + else: + other_1sig.append(cr) + if len(yvals) > 1: + for cr in crossings[yvals[1]]: + if cr["contains_bf"]: + val_2sig = (bestfit, cr["hi"] - bestfit, cr["lo"] - bestfit) + cross_2sig = cr + else: + other_2sig.append(cr) + else: + val_2sig = (0.0, 0.0, 0.0) + cross_2sig = cross_1sig + return { + "graph": graph, + "spline": spline, + "func": func, + "crossings": crossings, + "val": val, + "val_2sig": val_2sig, + "cross_1sig": cross_1sig, + "cross_2sig": cross_2sig, + "other_1sig": other_1sig, + "other_2sig": other_2sig, + } + + +parser = argparse.ArgumentParser() + +parser.add_argument("main", help="Main input file for the scan") +parser.add_argument("--y-cut", type=float, default=7.0, help="Remove points with y > y-cut") +parser.add_argument("--y-max", type=float, default=8.0, help="y-axis maximum") +parser.add_argument("--output", "-o", help="output name without file extension", default="scan") +parser.add_argument("--POI", help="use this parameter of interest", default="r") +parser.add_argument("--translate", default=None, help="json file with POI name translation") +parser.add_argument("--main-label", default="Observed", type=str, help="legend label for the main scan") +parser.add_argument("--main-color", default=1, type=int, help="line and marker color for main scan") +parser.add_argument("--others", nargs="*", help="add secondary scans processed as main: FILE:LABEL:COLOR") +parser.add_argument("--breakdown", help="do quadratic error subtraction using --others") +parser.add_argument("--logo", default="CMS") +parser.add_argument("--logo-sub", default="Internal") +args = parser.parse_args() + +print("--------------------------------------") +print(args.output) +print("--------------------------------------") + +fixed_name = args.POI +if args.translate is not None: + with open(args.translate) as jsonfile: + name_translate = json.load(jsonfile) + if args.POI in name_translate: + fixed_name = name_translate[args.POI] + +yvals = [1.0, 4.0] + + +main_scan = BuildScan(args.output, args.POI, [args.main], args.main_color, yvals, args.y_cut) + +other_scans = [] +other_scans_opts = [] +if args.others is not None: + for oargs in args.others: + splitargs = oargs.split(":") + other_scans_opts.append(splitargs) + other_scans.append(BuildScan(args.output, args.POI, [splitargs[0]], int(splitargs[2]), yvals, args.y_cut)) + + +canv = ROOT.TCanvas(args.output, args.output) +pads = plot.OnePad() +main_scan["graph"].SetMarkerColor(1) +main_scan["graph"].Draw("AP") + +axishist = plot.GetAxisHist(pads[0]) + +axishist.SetMinimum(min(main_scan["graph"].GetY())) +axishist.SetMaximum(args.y_max) +axishist.GetYaxis().SetTitle("- 2 #Delta ln L") +axishist.GetXaxis().SetTitle("%s" % fixed_name) + +new_min = axishist.GetXaxis().GetXmin() +new_max = axishist.GetXaxis().GetXmax() +mins = [] +maxs = [] +for other in other_scans: + mins.append(other["graph"].GetX()[0]) + maxs.append(other["graph"].GetX()[other["graph"].GetN() - 1]) + +if len(other_scans) > 0: + if min(mins) < main_scan["graph"].GetX()[0]: + new_min = min(mins) - (main_scan["graph"].GetX()[0] - new_min) + if max(maxs) > main_scan["graph"].GetX()[main_scan["graph"].GetN() - 1]: + new_max = max(maxs) + (new_max - main_scan["graph"].GetX()[main_scan["graph"].GetN() - 1]) + axishist.GetXaxis().SetLimits(new_min, new_max) + +for other in other_scans: + if args.breakdown is not None: + other["graph"].SetMarkerSize(0.4) + other["graph"].Draw("PSAME") + +line = ROOT.TLine() +line.SetLineColor(16) +# line.SetLineStyle(7) +for yval in yvals: + plot.DrawHorizontalLine(pads[0], line, yval) + if len(other_scans) == 0: + for cr in main_scan["crossings"][yval]: + if cr["valid_lo"]: + line.DrawLine(cr["lo"], 0, cr["lo"], yval) + if cr["valid_hi"]: + line.DrawLine(cr["hi"], 0, cr["hi"], yval) + +main_scan["func"].Draw("same") +for other in other_scans: + if args.breakdown is not None: + other["func"].SetLineStyle(2) + other["func"].SetLineWidth(2) + other["func"].Draw("SAME") + + +box = ROOT.TBox(axishist.GetXaxis().GetXmin(), 0.625 * args.y_max, axishist.GetXaxis().GetXmax(), args.y_max) +box.Draw() +pads[0].GetFrame().Draw() +pads[0].RedrawAxis() + +crossings = main_scan["crossings"] +val_nom = main_scan["val"] +val_2sig = main_scan["val_2sig"] + +textfit = "%s = %.3f{}^{#plus %.3f}_{#minus %.3f}" % (fixed_name, val_nom[0], val_nom[1], abs(val_nom[2])) + + +pt = ROOT.TPaveText(0.59, 0.82 - len(other_scans) * 0.08, 0.95, 0.91, "NDCNB") +pt.AddText(textfit) + +if args.breakdown is None: + for i, other in enumerate(other_scans): + textfit = "#color[%s]{%s = %.3f{}^{#plus %.3f}_{#minus %.3f}}" % ( + other_scans_opts[i][2], + fixed_name, + other["val"][0], + other["val"][1], + abs(other["val"][2]), + ) + pt.AddText(textfit) + + +if args.breakdown is not None: + pt.SetX1(0.50) + if len(other_scans) >= 3: + pt.SetX1(0.19) + pt.SetX2(0.88) + pt.SetY1(0.66) + pt.SetY2(0.82) + breakdown = args.breakdown.split(",") + v_hi = [val_nom[1]] + v_lo = [val_nom[2]] + for other in other_scans: + v_hi.append(other["val"][1]) + v_lo.append(other["val"][2]) + assert len(v_hi) == len(breakdown) + textfit = "%s = %.3f" % (fixed_name, val_nom[0]) + for i, br in enumerate(breakdown): + if i < (len(breakdown) - 1): + if abs(v_hi[i + 1]) > abs(v_hi[i]): + print("ERROR SUBTRACTION IS NEGATIVE FOR %s HI" % br) + hi = 0.0 + else: + hi = math.sqrt(v_hi[i] * v_hi[i] - v_hi[i + 1] * v_hi[i + 1]) + if abs(v_lo[i + 1]) > abs(v_lo[i]): + print("ERROR SUBTRACTION IS NEGATIVE FOR %s LO" % br) + lo = 0.0 + else: + lo = math.sqrt(v_lo[i] * v_lo[i] - v_lo[i + 1] * v_lo[i + 1]) + else: + hi = v_hi[i] + lo = v_lo[i] + textfit += "{}^{#plus %.3f}_{#minus %.3f}(%s)" % (hi, abs(lo), br) + pt.AddText(textfit) + + +pt.SetTextAlign(11) +pt.SetTextFont(42) +pt.Draw() + +plot.DrawCMSLogo(pads[0], args.logo, args.logo_sub, 11, 0.045, 0.035, 1.2, cmsTextSize=1.0) + +legend_l = 0.69 +if len(other_scans) > 0: + legend_l = legend_l - len(other_scans) * 0.04 +legend = ROOT.TLegend(0.15, legend_l, 0.45, 0.78, "", "NBNDC") +if len(other_scans) >= 3: + legend = ROOT.TLegend(0.46, 0.83, 0.95, 0.93, "", "NBNDC") + legend.SetNColumns(2) + +legend.AddEntry(main_scan["func"], args.main_label, "L") +for i, other in enumerate(other_scans): + legend.AddEntry(other["func"], other_scans_opts[i][1], "L") +legend.Draw() + +save_graph = main_scan["graph"].Clone() +save_graph.GetXaxis().SetTitle("%s = %.3f %+.3f/%+.3f" % (fixed_name, val_nom[0], val_nom[2], val_nom[1])) +outfile = ROOT.TFile(args.output + ".root", "RECREATE") +outfile.WriteTObject(save_graph) +outfile.Close() +canv.Print(".pdf") +canv.Print(".png") diff --git a/scripts/plotBSMxsBRLimit.py b/scripts/plotBSMxsBRLimit.py new file mode 100644 index 00000000000..e22cb314d0f --- /dev/null +++ b/scripts/plotBSMxsBRLimit.py @@ -0,0 +1,156 @@ +from __future__ import absolute_import +from __future__ import print_function +import HiggsAnalysis.CombinedLimit.util.plotting as plot +import CombineHarvester.CombineTools.maketable as maketable +import ROOT +import math +import argparse +import json +from six.moves import range + +ROOT.gROOT.SetBatch(ROOT.kTRUE) +parser = argparse.ArgumentParser() +parser.add_argument("--file", "-f", help="named input file") +parser.add_argument("--process", help="The process on which a limit has been calculated. [gg#phi, bb#phi]", default="gg#phi") +parser.add_argument("--custom_y_range", help="Fix y axis range", action="store_true", default=False) +parser.add_argument("--y_axis_min", help="Fix y axis minimum", default=0.001) +parser.add_argument("--y_axis_max", help="Fix y axis maximum", default=100.0) +parser.add_argument("--custom_x_range", help="Fix x axis range", action="store_true", default=False) +parser.add_argument("--x_axis_min", help="Fix x axis minimum", default=90.0) +parser.add_argument("--x_axis_max", help="Fix x axis maximum", default=1000.0) +parser.add_argument("--verbosity", "-v", help="verbosity", default=0) +parser.add_argument("--log", help="Set log range for x and y axis", action="store_true", default=False) +parser.add_argument("--expected_only", help="Plot expected only", action="store_true", default=False) +parser.add_argument("--outname", "-o", help="Output plot name", default="mssm_limit") +parser.add_argument("--title", help="Plot title", default="19.8 fb^{-1} (8 TeV)") +# parser.add_argument('--table_vals', help='Amount of values to be written in a table for different masses', default=10) +args = parser.parse_args() + + +# Store the mass list convert from json file or directly via tgraphs +graph_obs = ROOT.TGraph() +graph_minus2sigma = ROOT.TGraph() +graph_minus1sigma = ROOT.TGraph() +graph_exp = ROOT.TGraph() +graph_plus1sigma = ROOT.TGraph() +graph_plus2sigma = ROOT.TGraph() + +if ".root" in args.file: + file = ROOT.TFile(args.file, "r") + if not args.expected_only: + graph_obs = plot.SortGraph(file.Get("observed")) + graph_minus2sigma = plot.SortGraph(file.Get("minus2sigma")) + graph_minus1sigma = plot.SortGraph(file.Get("minus1sigma")) + graph_exp = plot.SortGraph(file.Get("expected")) + graph_plus1sigma = plot.SortGraph(file.Get("plus1sigma")) + graph_plus2sigma = plot.SortGraph(file.Get("plus2sigma")) + maketable.Tablefrom1DGraph(args.file, "mssm_limit_table.txt") +else: + data = {} + with open(args.file) as jsonfile: + data = json.load(jsonfile) + if not args.expected_only: + graph_obs = plot.LimitTGraphFromJSON(data, "observed") + graph_minus2sigma = plot.LimitTGraphFromJSON(data, "-2") + graph_minus1sigma = plot.LimitTGraphFromJSON(data, "-1") + graph_exp = plot.LimitTGraphFromJSON(data, "expected") + graph_plus1sigma = plot.LimitTGraphFromJSON(data, "+1") + graph_plus2sigma = plot.LimitTGraphFromJSON(data, "+2") + maketable.TablefromJson(args.file, "mssm_limit_table.txt") + +process_label = args.process + +mass_list = [] +for i in range(graph_exp.GetN()): + mass_list.append(float(graph_exp.GetX()[i])) +mass_list = sorted(set(mass_list)) +mass_bins = len(mass_list) +if int(args.verbosity) > 0: + print("mass_list: ", mass_list, "Total number: ", mass_bins) + +# Create canvas and TH1D +plot.ModTDRStyle(width=600, l=0.12) +ROOT.gStyle.SetFrameLineWidth(2) +c1 = ROOT.TCanvas() +axis = plot.makeHist1D("hist1d", mass_bins, graph_exp) +if process_label == "gg#phi": + axis.GetYaxis().SetTitle("95% CL limit on #sigma#font[42]{(gg#phi)}#upoint#font[52]{B}#font[42]{(#phi#rightarrow#tau#tau)} [pb]") +elif process_label == "bb#phi": + axis.GetYaxis().SetTitle("95% CL limit on #sigma#font[42]{(bb#phi)}#upoint#font[52]{B}#font[42]{(#phi#rightarrow#tau#tau)} [pb]") +else: + exit("Currently process is not supported") +if args.custom_y_range: + axis.GetYaxis().SetRangeUser(float(args.y_axis_min), float(args.y_axis_max)) +axis.GetXaxis().SetTitle("m_{#phi} [GeV]") +if args.custom_x_range: + axis.GetXaxis().SetRangeUser(float(args.x_axis_min), float(args.x_axis_max)) +# Create two pads, one is just for the Legend +pad_leg = ROOT.TPad("pad_leg", "pad_leg", 0, 0.82, 1, 1) +pad_leg.SetFillStyle(4000) +pad_leg.Draw() +pad_plot = ROOT.TPad("pad_plot", "pad_plot", 0, 0, 1, 0.82) +pad_plot.SetFillStyle(4000) +pad_plot.Draw() +pads = [pad_leg, pad_plot] +pads[1].cd() +if args.log: + pad_plot.SetLogx(1) + pad_plot.SetLogy(1) + axis.SetNdivisions(50005, "X") + axis.GetXaxis().SetMoreLogLabels() + axis.GetXaxis().SetNoExponent() + axis.GetXaxis().SetLabelSize(0.040) +axis.Draw() + +innerBand = plot.MakeErrorBand(graph_minus1sigma, graph_plus1sigma) +outerBand = plot.MakeErrorBand(graph_minus2sigma, graph_plus2sigma) + +outerBand.SetLineWidth(1) +outerBand.SetLineColor(ROOT.kBlack) +# if(injected) outerBand->SetFillColor(kAzure-9); +# else if(BG_Higgs) outerBand->SetFillColor(kSpring+5); +outerBand.SetFillColor(ROOT.TColor.GetColor(252, 241, 15)) +outerBand.Draw("3") + +innerBand.SetLineWidth(1) +innerBand.SetLineColor(ROOT.kBlack) +# if(injected) innerBand->SetFillColor(kAzure-4); +# else if(BG_Higgs) innerBand->SetFillColor(kGreen+2); +innerBand.SetFillColor(ROOT.kGreen) +innerBand.Draw("3same") + +graph_exp.SetLineColor(ROOT.kRed) +graph_exp.SetLineWidth(3) +graph_exp.SetLineStyle(1) +# if(mssm_log){ +# expected->SetLineColor(kBlack); +# expected->SetLineStyle(2); +# } +graph_exp.Draw("L") + +if not args.expected_only: + graph_obs.SetMarkerColor(ROOT.kBlack) + graph_obs.SetMarkerSize(1.0) + graph_obs.SetMarkerStyle(20) + graph_obs.SetLineWidth(3) + graph_obs.Draw("PLsame") + +pads[0].cd() +legend = plot.PositionedLegend(0.5, 0.9, 2, 0.03) +legend.SetNColumns(2) +legend.SetFillStyle(1001) +legend.SetTextSize(0.15) +legend.SetTextFont(62) +legend.SetHeader("95% CL Excluded:") +if not args.expected_only: + legend.AddEntry(graph_obs, "Observed", "L") +legend.AddEntry(innerBand, "#pm 1#sigma Expected", "F") +legend.AddEntry(graph_exp, "Expected", "L") +legend.AddEntry(outerBand, "#pm 2#sigma Expected", "F") +legend.Draw("same") + +plot.DrawCMSLogo(pads[1], "", "", 11, 0.045, 0.035, 1.2) +plot.DrawTitle(pads[1], "%s" % args.title, 3) +plot.FixOverlay() +c1.SaveAs("%s.pdf" % args.outname) +c1.SaveAs("%s.png" % args.outname) diff --git a/scripts/plotGof.py b/scripts/plotGof.py new file mode 100755 index 00000000000..dc12454c436 --- /dev/null +++ b/scripts/plotGof.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python3 +from __future__ import absolute_import +from __future__ import print_function +import ROOT +import HiggsAnalysis.CombinedLimit.util.plotting as plot +import argparse +import json +from six.moves import range + +parser = argparse.ArgumentParser() +parser.add_argument("input", help="""Input json file""") +parser.add_argument( + "--output", + "-o", + default="", + help="""Name of the output + plot without file extension""", +) +parser.add_argument("--mass", default="160.0", help="""Higgs Boson mass to be used""") +parser.add_argument("--statistic", default="saturated", help="""Used Test Statistic""") +parser.add_argument("--x-title", default="Goodness of Fit", help="""Title for the x-axis""") +parser.add_argument("--y-title", default="Number of Toys", help="""Title for the y-axis""") +parser.add_argument("--cms-sub", default="Internal", help="""Text below the CMS logo""") +parser.add_argument("--title-right", default="", help="""Right header text above the frame""") +parser.add_argument("--title-left", default="", help="""Left header text above the frame""") +parser.add_argument("--pad-style", default=None, help="""Extra style options for the pad, e.g. Grid=(1,1)""") +parser.add_argument("--auto-style", nargs="?", const="", default=None, help="""Take line colors and styles from a pre-defined list""") +parser.add_argument("--table_vals", help="Amount of values to be written in a table for different masses", default=10) +parser.add_argument("--bins", default=100, type=int, help="Number of bins in histogram") +parser.add_argument("--range", nargs=2, type=float, help="Range of histograms. Requires two arguments in the form of ") +parser.add_argument( + "--percentile", + nargs=2, + type=float, + help="Range of percentile from the distribution to be included. Requires two arguments in the form of . Overrides range option.", +) +args = parser.parse_args() + + +def DrawAxisHists(pads, axis_hists, def_pad=None): + for i, pad in enumerate(pads): + pad.cd() + axis_hists[i].Draw("AXIS") + axis_hists[i].Draw("AXIGSAME") + if def_pad is not None: + def_pad.cd() + + +## Boilerplate +ROOT.PyConfig.IgnoreCommandLineOptions = True +ROOT.gROOT.SetBatch(ROOT.kTRUE) +plot.ModTDRStyle() +ROOT.gStyle.SetNdivisions(510, "XYZ") # probably looks better + +canv = ROOT.TCanvas(args.output, args.output) + +pads = plot.OnePad() + +# Set the style options of the pads +for padx in pads: + # Use tick marks on oppsite axis edges + plot.Set(padx, Tickx=1, Ticky=1) + if args.pad_style is not None: + settings = {x.split("=")[0]: eval(x.split("=")[1]) for x in args.pad_style.split(",")} + print("Applying style options to the TPad(s):") + print(settings) + plot.Set(padx, **settings) + +graphs = [] +graph_sets = [] + +legend = plot.PositionedLegend(0.45, 0.10, 3, 0.015) +plot.Set(legend, NColumns=2) + +axis = None + +defcols = [ + ROOT.kGreen + 3, + ROOT.kRed, + ROOT.kBlue, + ROOT.kBlack, + ROOT.kYellow + 2, + ROOT.kOrange + 10, + ROOT.kCyan + 3, + ROOT.kMagenta + 2, + ROOT.kViolet - 5, + ROOT.kGray, +] + +deflines = [1, 2, 3] + +if args.auto_style is not None: + icol = {x: 0 for x in args.auto_style.split(",")} + icol["default"] = 0 + iline = {} + iline["default"] = 1 + for i, x in enumerate(args.auto_style.split(",")): + iline[x] = i + 1 + +pValue = 0 +if args.statistic in ["AD", "KS"]: + titles = { + "htt_em_8_13TeV": "e#mu, nobtag", + "htt_em_9_13TeV": "e#mu, btag", + "htt_et_8_13TeV": "e#tau_{h}, nobtag", + "htt_et_9_13TeV": "e#tau_{h}, btag", + "htt_mt_8_13TeV": "#mu#tau_{h}, nobtag", + "htt_mt_9_13TeV": "#mu#tau_{h}, btag", + "htt_tt_8_13TeV": "#tau_{h}#tau_{h}, nobtag", + "htt_tt_9_13TeV": "#tau_{h}#tau_{h}, btag", + } + with open(args.input) as jsfile: + js = json.load(jsfile) + for key in js[args.mass]: ## these are the channels + # title = key if key not in titles else titles[key] + title = titles.get(key, key) + # if key not in titles: + # continue + toy_graph = plot.ToyTGraphFromJSON(js, [args.mass, key, "toy"]) + if args.percentile: + min_range = toy_graph.GetX()[int(toy_graph.GetN() * args.percentile[0])] + max_range = toy_graph.GetX()[int(toy_graph.GetN() * args.percentile[1])] + toy_hist = plot.makeHist1D("toys", args.bins, toy_graph, absoluteXrange=(min_range, max_range)) + elif args.range: + toy_hist = plot.makeHist1D("toys", args.bins, toy_graph, absoluteXrange=args.range) + else: + toy_hist = plot.makeHist1D("toys", args.bins, toy_graph, 1.15) + for i in range(toy_graph.GetN()): + toy_hist.Fill(toy_graph.GetX()[i]) + pValue = js[args.mass][key]["p"] + obs = plot.ToyTGraphFromJSON(js, [args.mass, key, "obs"]) + arr = ROOT.TArrow(obs.GetX()[0], 0.001, obs.GetX()[0], toy_hist.GetMaximum() / 8, 0.02, "<|") + arr.SetLineColor(ROOT.kBlue) + arr.SetFillColor(ROOT.kBlue) + arr.SetFillStyle(1001) + arr.SetLineWidth(6) + arr.SetLineStyle(1) + arr.SetAngle(60) + toy_hist.Draw() + arr.Draw("<|same") + pads[0].RedrawAxis() + pads[0].RedrawAxis("g") + pads[0].GetFrame().Draw() + + # axis[0].GetYaxis().SetTitle(args.y_title) + # axis[0].GetXaxis().SetTitle(args.x_title) + # axis[0].GetXaxis().SetLabelOffset(axis[0].GetXaxis().GetLabelOffset()*2) + + y_min, y_max = (plot.GetPadYMin(pads[0]), plot.GetPadYMax(pads[0])) + plot.FixBothRanges(pads[0], 0, 0, y_max, 0.25) + + ratio_graph_sets = [] + ratio_graphs = [] + + pads[0].cd() + if legend.GetNRows() == 1: + legend.SetY1(legend.GetY2() - 0.5 * (legend.GetY2() - legend.GetY1())) + legend.Draw() + + box = ROOT.TPave(pads[0].GetLeftMargin(), 0.81, 1 - pads[0].GetRightMargin(), 1 - pads[0].GetTopMargin(), 1, "NDC") + box.Draw() + + legend.Draw() + + plot.DrawCMSLogo(pads[0], "CMS", args.cms_sub, 11, 0.045, 0.035, 1.2, "", 0.8) + plot.DrawTitle(pads[0], args.title_right, 3) + plot.DrawTitle(pads[0], title, 1) + + textlabel = ROOT.TPaveText(0.68, 0.88, 0.80, 0.92, "NDC") + textlabel.SetBorderSize(0) + textlabel.SetFillStyle(0) + textlabel.SetTextAlign(32) + textlabel.SetTextSize(0.04) + textlabel.SetTextColor(1) + textlabel.SetTextFont(62) + textlabel.AddText(args.statistic + ", %s Toys" % (toy_graph.GetN())) + textlabel.Draw() + + pvalue = ROOT.TPaveText(0.68, 0.83, 0.80, 0.87, "NDC") + pvalue.SetBorderSize(0) + pvalue.SetFillStyle(0) + pvalue.SetTextAlign(32) + pvalue.SetTextSize(0.04) + pvalue.SetTextColor(1) + pvalue.SetTextFont(62) + pvalue.AddText("p-value = %0.3f" % pValue) + pvalue.Draw() + + canv.Print(key + args.output + ".pdf") + canv.Print(key + args.output + ".png") + + # some cleaning up + del toy_hist + +else: + with open(args.input) as jsfile: + js = json.load(jsfile) + # graph_sets.append(plot.StandardLimitsFromJSONFile(file, args.show.split(','))) + toy_graph = plot.ToyTGraphFromJSON(js, [args.mass, "toy"]) + if args.percentile: + min_range = toy_graph.GetX()[int(toy_graph.GetN() * args.percentile[0])] + max_range = toy_graph.GetX()[int(toy_graph.GetN() * args.percentile[1])] + toy_hist = plot.makeHist1D("toys", args.bins, toy_graph, absoluteXrange=(min_range, max_range)) + elif args.range: + toy_hist = plot.makeHist1D("toys", args.bins, toy_graph, absoluteXrange=args.range) + else: + toy_hist = plot.makeHist1D("toys", args.bins, toy_graph, 1.15) + for i in range(toy_graph.GetN()): + toy_hist.Fill(toy_graph.GetX()[i]) + pValue = js[args.mass]["p"] + underflow_count = toy_hist.GetBinContent(0) + overflow_count = toy_hist.GetBinContent(args.bins + 1) + obs = plot.ToyTGraphFromJSON(js, [args.mass, "obs"]) + arr = ROOT.TArrow(obs.GetX()[0], 0.001, obs.GetX()[0], toy_hist.GetMaximum() / 8, 0.02, "<|") + # if axis is None: + # axis = plot.CreateAxisHists(1, graph_sets[-1].values()[0], True) + # DrawAxisHists(pads, axis, pads[0]) + arr.SetLineColor(ROOT.kBlue) + arr.SetFillColor(ROOT.kBlue) + arr.SetFillStyle(1001) + arr.SetLineWidth(6) + arr.SetLineStyle(1) + arr.SetAngle(60) + toy_hist.Draw() + arr.Draw("<|same") + pads[0].RedrawAxis() + pads[0].RedrawAxis("g") + pads[0].GetFrame().Draw() + # axis[0].GetYaxis().SetTitle(args.y_title) + # axis[0].GetXaxis().SetTitle(args.x_title) + # axis[0].GetXaxis().SetLabelOffset(axis[0].GetXaxis().GetLabelOffset()*2) + + y_min, y_max = (plot.GetPadYMin(pads[0]), plot.GetPadYMax(pads[0])) + plot.FixBothRanges(pads[0], 0, 0, y_max, 0.25) + + ratio_graph_sets = [] + ratio_graphs = [] + + pads[0].cd() + if legend.GetNRows() == 1: + legend.SetY1(legend.GetY2() - 0.5 * (legend.GetY2() - legend.GetY1())) + legend.Draw() + + box = ROOT.TPave(pads[0].GetLeftMargin(), 0.81, 1 - pads[0].GetRightMargin(), 1 - pads[0].GetTopMargin(), 1, "NDC") + box.Draw() + + legend.Draw() + + plot.DrawCMSLogo(pads[0], "CMS", args.cms_sub, 11, 0.045, 0.035, 1.2, "", 0.8) + plot.DrawTitle(pads[0], args.title_right, 3) + plot.DrawTitle(pads[0], args.title_left, 1) + + textlabel = ROOT.TPaveText(0.68, 0.88, 0.80, 0.92, "NDC") + textlabel.SetBorderSize(0) + textlabel.SetFillStyle(0) + textlabel.SetTextAlign(32) + textlabel.SetTextSize(0.04) + textlabel.SetTextColor(1) + textlabel.SetTextFont(62) + textlabel.AddText(args.statistic + ", %s Toys" % (toy_graph.GetN())) + textlabel.Draw() + + pvalue = ROOT.TPaveText(0.68, 0.83, 0.80, 0.87, "NDC") + pvalue.SetBorderSize(0) + pvalue.SetFillStyle(0) + pvalue.SetTextAlign(32) + pvalue.SetTextSize(0.04) + pvalue.SetTextColor(1) + pvalue.SetTextFont(62) + pvalue.AddText("p-value = %0.3f" % pValue) + pvalue.Draw() + + arrow_not_in_range = (obs.GetX()[0] > toy_hist.GetBinLowEdge(args.bins + 1)) or (obs.GetX()[0] < toy_hist.GetBinLowEdge(0)) + + warningtext1 = ROOT.TPaveText(0.68, 0.78, 0.80, 0.82, "NDC") + warningtext1.SetBorderSize(0) + warningtext1.SetFillStyle(0) + warningtext1.SetTextAlign(32) + warningtext1.SetTextSize(0.04) + warningtext1.SetTextColor(2) + warningtext1.SetTextFont(62) + + if arrow_not_in_range and ((underflow_count != 0) or (overflow_count != 0)): + warningstrings = [] + if underflow_count != 0: + warningstrings.append("%d underflow" % underflow_count) + if overflow_count != 0: + warningstrings.append("%d overflow" % overflow_count) + warningtext1.AddText(", ".join(warningstrings)) + warningtext1.Draw() + + warningtext2 = ROOT.TPaveText(0.68, 0.73, 0.80, 0.77, "NDC") + warningtext2.SetBorderSize(0) + warningtext2.SetFillStyle(0) + warningtext2.SetTextAlign(32) + warningtext2.SetTextSize(0.04) + warningtext2.SetTextColor(2) + warningtext2.SetTextFont(62) + warningtext2.AddText("observed value not in range") + warningtext2.Draw() + else: + if (underflow_count != 0) or (overflow_count != 0): + warningstrings = [] + if underflow_count != 0: + warningstrings.append("%d underflow" % underflow_count) + if overflow_count != 0: + warningstrings.append("%d overflow" % overflow_count) + warningtext1.AddText(", ".join(warningstrings)) + elif arrow_not_in_range: + warningtext1.AddText("observed value not in range") + warningtext1.Draw() + + canv.Print(".pdf") + canv.Print(".png") diff --git a/scripts/plotImpacts.py b/scripts/plotImpacts.py new file mode 100755 index 00000000000..26bb7540506 --- /dev/null +++ b/scripts/plotImpacts.py @@ -0,0 +1,601 @@ +#!/usr/bin/env python3 +from __future__ import absolute_import +from __future__ import print_function +import ROOT +import math +import json +import argparse +import HiggsAnalysis.CombinedLimit.util.plotting as plot +import HiggsAnalysis.CombinedLimit.tool_base.rounding as rounding +import HiggsAnalysis.CombinedLimit.calculate_pulls as CP +import six +from six.moves import range + +ROOT.PyConfig.IgnoreCommandLineOptions = True +ROOT.gROOT.SetBatch(ROOT.kTRUE) +ROOT.TH1.AddDirectory(0) + + +def Translate(name, ndict): + return ndict[name] if name in ndict else name + + +def GetRounded(nom, e_hi, e_lo): + if e_hi < 0.0: + e_hi = 0.0 + if e_lo < 0.0: + e_lo = 0.0 + rounded = rounding.PDGRoundAsym(nom, e_hi if e_hi != 0.0 else 1.0, e_lo if e_lo != 0.0 else 1.0) + s_nom = rounding.downgradePrec(rounded[0], rounded[2]) + s_hi = rounding.downgradePrec(rounded[1][0][0], rounded[2]) if e_hi != 0.0 else "0" + s_lo = rounding.downgradePrec(rounded[1][0][1], rounded[2]) if e_lo != 0.0 else "0" + return (s_nom, s_hi, s_lo) + + +def IsConstrained(param_info): + return param_info["type"] != "Unconstrained" + + +parser = argparse.ArgumentParser() +parser.add_argument("--input", "-i", help="input json file") +parser.add_argument("--output", "-o", help="name of the output file to create") +parser.add_argument("--translate", "-t", help="JSON file for remapping of parameter names") +parser.add_argument("--units", default=None, help="Add units to the best-fit parameter value") +parser.add_argument("--per-page", type=int, default=30, help="Number of parameters to show per page") +parser.add_argument("--max-pages", type=int, default=None, help="Maximum number of pages to write") +parser.add_argument("--height", type=int, default=600, help="Canvas height, in pixels") +parser.add_argument("--left-margin", type=float, default=0.4, help="Left margin, expressed as a fraction") +parser.add_argument("--label-size", type=float, default=0.021, help="Parameter name label size") +parser.add_argument("--cms-label", default="Internal", help="Label next to the CMS logo") +parser.add_argument("--checkboxes", action="store_true", help="Draw an extra panel with filled checkboxes") +parser.add_argument("--blind", action="store_true", help="Do not print best fit signal strength") +parser.add_argument("--color-groups", default=None, help="Comma separated list of GROUP=COLOR") +parser.add_argument("--pullDef", default=None, help="Choose the definition of the pull, see HiggsAnalysis/CombinedLimit/python/calculate_pulls.py for options") +parser.add_argument("--POI", default=None, help="Specify a POI to draw") +parser.add_argument("--sort", "-s", choices=["impact", "constraint", "pull"], default="impact", help="The metric to sort the list of parameters") +parser.add_argument("--relative", "-r", action="store_true", help="Show impacts relative to the uncertainty on the POI") +parser.add_argument("--summary", action="store_true", help="Produce additional summary page, named [output]_summary.pdf") +args = parser.parse_args() + +externalPullDef = args.pullDef is not None + +# Dictionary to translate parameter names +translate = {} +if args.translate is not None: + with open(args.translate) as jsonfile: + translate = json.load(jsonfile) + +# Load the json output of combineTool.py -M Impacts +data = {} +with open(args.input) as jsonfile: + data = json.load(jsonfile) + +# Set the global plotting style +plot.ModTDRStyle(l=args.left_margin, b=0.10, width=(900 if args.checkboxes else 700), height=args.height) + +# We will assume the first POI is the one to plot +POIs = [ele["name"] for ele in data["POIs"]] +POI = POIs[0] +if args.POI: + POI = args.POI + +for ele in data["POIs"]: + if ele["name"] == POI: + POI_info = ele + break + +POI_fit = POI_info["fit"] + +# Pre-compute info for each parameter +params = data["params"] +for ele in params: + # Calculate impacts and relative impacts. Note that here the impacts are signed. + ele["impact_hi"] = ele[POI][2] - ele[POI][1] + ele["impact_lo"] = ele[POI][0] - ele[POI][1] + # Some care needed with the relative ones, since we don't know the signs of hi and lo. + # We want to divide any positive impact by the positive uncert. on the POI, and similar for negative. + # We also need to be careful in case the uncertainties on the POI came out as zero (shouldn't happen...) + if (POI_fit[2] - POI_fit[1]) > 0.0 and (POI_fit[1] - POI_fit[0]) > 0.0: + ele["impact_rel_hi"] = ele["impact_hi"] / ((POI_fit[2] - POI_fit[1]) if ele["impact_hi"] >= 0 else (POI_fit[1] - POI_fit[0])) + ele["impact_rel_lo"] = ele["impact_lo"] / ((POI_fit[2] - POI_fit[1]) if ele["impact_lo"] >= 0 else (POI_fit[1] - POI_fit[0])) + else: + ele["impact_rel_hi"] = 0.0 + ele["impact_rel_lo"] = 0.0 + if args.relative: + # Now we have a real problem, best throw: + raise RuntimeError("Relative impacts requested (--relative), but uncertainty on the POI is zero") + + if IsConstrained(ele): + pre = ele["prefit"] + fit = ele["fit"] + pre_err_hi = pre[2] - pre[1] + pre_err_lo = pre[1] - pre[0] + fit_err_hi = fit[2] - fit[1] + fit_err_lo = fit[1] - fit[0] + pull = CP.diffPullAsym(fit[1], pre[1], fit_err_hi, pre_err_hi, fit_err_lo, pre_err_lo) + ele["pull"] = pull[0] + # Under some conditions (very small constraint) the calculated pull is not reliable. + # In this case, pull[1] will have a non-zero value. + ele["pull_ok"] = pull[1] == 0 + if not ele["pull_ok"]: + print(">> Warning, the pull for {} could not be computed".format(ele["name"])) + ele["constraint"] = (fit[2] - fit[0]) / (pre[2] - pre[0]) + + if externalPullDef: + sc_fit, sc_fit_hi, sc_fit_lo = CP.returnPullAsym(args.pullDef, fit[1], pre[1], fit_err_hi, pre_err_hi, fit_err_lo, pre_err_lo) + else: + sc_fit = fit[1] - pre[1] + sc_fit = (sc_fit / pre_err_hi) if sc_fit >= 0 else (sc_fit / pre_err_lo) + sc_fit_hi = fit[2] - pre[1] + sc_fit_hi = (sc_fit_hi / pre_err_hi) if sc_fit_hi >= 0 else (sc_fit_hi / pre_err_lo) + sc_fit_hi = sc_fit_hi - sc_fit + sc_fit_lo = fit[0] - pre[1] + sc_fit_lo = (sc_fit_lo / pre_err_hi) if sc_fit_lo >= 0 else (sc_fit_lo / pre_err_lo) + sc_fit_lo = sc_fit - sc_fit_lo + ele["sc_fit"] = sc_fit + ele["sc_fit_hi"] = sc_fit_hi + ele["sc_fit_lo"] = sc_fit_lo + else: + # For unconstrained parameters there is no pull to define. For sorting purposes we + # still need to set a value, so will put it to zero + ele["pull"] = 0 + ele["pull_ok"] = False + ele["constraint"] = 9999.0 + +if args.sort == "pull": + data["params"].sort(key=lambda x: abs(x["pull"]), reverse=True) +elif args.sort == "impact": + data["params"].sort(key=lambda x: abs(x["impact_%s" % POI]), reverse=True) +elif args.sort == "constraint": + data["params"].sort(key=lambda x: abs(x["constraint"]), reverse=False) +else: + raise RuntimeError("This error should not have happened!") + +# Now compute each parameters ranking according to: largest pull, strongest constraint, and largest impact +ranking_pull = sorted([(i, abs(v["pull"])) for i, v in enumerate(params)], reverse=True, key=lambda X: X[1]) +ranking_constraint = sorted([(i, abs(v["constraint"])) for i, v in enumerate(params)], reverse=False, key=lambda X: X[1]) +ranking_impact = sorted([(i, abs(v["impact_{}".format(POI)])) for i, v in enumerate(params)], reverse=True, key=lambda X: X[1]) +for i in range(len(params)): + params[ranking_pull[i][0]]["rank_pull"] = i + 1 + params[ranking_impact[i][0]]["rank_impact"] = i + 1 + params[ranking_constraint[i][0]]["rank_constraint"] = i + 1 + + +if args.checkboxes: + cboxes = data["checkboxes"] + +# Set the number of parameters per page (show) and the number of pages (n) +show = args.per_page +n = int(math.ceil(float(len(data["params"])) / float(show))) +if args.max_pages is not None and args.max_pages > 0: + n = args.max_pages + +colors = {"Gaussian": 1, "Poisson": 8, "AsymmetricGaussian": 9, "Unconstrained": 39, "Unrecognised": 2} +color_hists = {} +color_group_hists = {} + +if args.color_groups is not None: + color_groups = {x.split("=")[0]: int(x.split("=")[1]) for x in args.color_groups.split(",")} + +seen_types = set() + +for name, col in six.iteritems(colors): + color_hists[name] = ROOT.TH1F() + plot.Set(color_hists[name], FillColor=col, Title=name) + +if args.color_groups is not None: + for name, col in six.iteritems(color_groups): + color_group_hists[name] = ROOT.TH1F() + plot.Set(color_group_hists[name], FillColor=col, Title=name) + + +def MakeSummaryPage(): + def FmtROOTNumber(val, digits=2): + if val >= 0.0: + fmt_str = "#plus{:." + str(digits) + "f}" + else: + fmt_str = "#minus{:." + str(digits) + "f}" + return fmt_str.format(abs(val)) + + canv = ROOT.TCanvas(args.output, args.output) + canv.Divide(2, 2, 0.005, 0.005) + + latex = ROOT.TLatex() + + # min_pull = min([X['pull'] for X in params]) + # max_pull = max([X['pull'] for X in params]) + # TODO: Visualize larger pulls + h_pulls = ROOT.TH1F("pulls", "", 49, -5, 5) + + n_larger = [0, 0, 0] + n_entries = 0.0 + for par in params: + if par["pull_ok"]: + n_entries += 1.0 + h_pulls.Fill(par["pull"]) + a_pull = abs(par["pull"]) + if a_pull > 1.0: + n_larger[0] += 1 + if a_pull > 2.0: + n_larger[1] += 1 + if a_pull > 3.0: + n_larger[2] += 1 + f_normal = ROOT.TF1("normal", "[0]*exp(-0.5*((x-[1])/[2])**2)", -5, 5) + f_normal.SetParameter(0, 0.2 * h_pulls.Integral() / math.sqrt(2.0 * math.pi)) + f_normal.SetParameter(1, 0) + f_normal.SetParameter(2, 1) + canv.cd(1) + plot.Set(ROOT.gPad, TopMargin=0.12, LeftMargin=0.12, RightMargin=0.05, BottomMargin=0.10) + plot.Set(h_pulls.GetXaxis(), Title="Pull (s.d.)") + plot.Set(h_pulls.GetYaxis(), Title="Number of parameters", TitleOffset=0.9) + h_pulls.Draw("HIST") + f_normal.Draw("LSAME") + legend = ROOT.TLegend(0.62, 0.72, 0.92, 0.85, "", "NBNDC") + legend.AddEntry(h_pulls, "Pulls", "L") + legend.AddEntry(f_normal, "Gaussian(0,1)", "L") + legend.Draw() + plot.Set(latex, NDC=None, TextFont=42, TextSize=0.04, TextAlign=32) + latex.DrawLatex(0.3, 0.85, "N(> 1 s.d.)") + latex.DrawLatex(0.3, 0.8, "N(> 2 s.d.)") + latex.DrawLatex(0.3, 0.75, "N(> 3 s.d.)") + latex.DrawLatex(0.33, 0.85, "{}".format(n_larger[0])) + latex.DrawLatex(0.33, 0.8, "{}".format(n_larger[1])) + latex.DrawLatex(0.33, 0.75, "{}".format(n_larger[2])) + latex.DrawLatex(0.42, 0.85, "#color[2]{{{:.2f}}}".format(n_entries * 2.0 * ROOT.Math.normal_cdf_c(1.0))) + latex.DrawLatex(0.42, 0.8, "#color[2]{{{:.2f}}}".format(n_entries * 2.0 * ROOT.Math.normal_cdf_c(2.0))) + latex.DrawLatex(0.42, 0.75, "#color[2]{{{:.2f}}}".format(n_entries * 2.0 * ROOT.Math.normal_cdf_c(3.0))) + + plot.DrawCMSLogo(ROOT.gPad, "CMS", args.cms_label, 0, 0.20, 0.00, 0.00) + s_nom, s_hi, s_lo = GetRounded(POI_fit[1], POI_fit[2] - POI_fit[1], POI_fit[1] - POI_fit[0]) + if not args.blind: + plot.DrawTitle( + ROOT.gPad, + "#hat{%s} = %s^{#plus%s}_{#minus%s}%s" % (Translate(POI, translate), s_nom, s_hi, s_lo, "" if args.units is None else " " + args.units), + 3, + 0.27, + ) + + def SetTitleText(latex): + plot.Set(latex, NDC=None, TextFont=42, TextSize=0.06, TextAlign=12) + + def SetFormulaText(latex): + plot.Set(latex, NDC=None, TextFont=42, TextSize=0.06, TextAlign=32) + + def SetEntryText(latex): + plot.Set(latex, NDC=None, TextFont=42, TextSize=0.04, TextAlign=12) + + def SetEntryValue(latex): + plot.Set(latex, NDC=None, TextFont=42, TextSize=0.04, TextAlign=32) + + nDraw = 10 + max_y = 0.9 + min_y = 0.1 + # left_margin = 0.03 + # right_margin = 0.97 + row_h = (max_y - min_y) / float(nDraw) + boxes = [] + occurances = {} + + def DrawBoxes(color): + for i in range(nDraw): + box = ROOT.TPaveText(0.02, max_y - (float(i + 1) * row_h), 0.98, max_y - (float(i) * row_h), "NDC") + plot.Set(box, TextSize=0.02, BorderSize=0, FillColor=0, TextAlign=12, Margin=0.00) + if i % 2 == 1: + box.SetFillColor(color) + # box.AddText('%i' % (n_params - i + page * show)) + box.Draw() + boxes.append(box) + + canv.cd(2) + SetTitleText(latex) + latex.DrawLatex(0.03, 0.95, "Largest pulls") + SetFormulaText(latex) + latex.DrawLatex(0.97, 0.95, "(#hat{#theta}-#theta_{I})/#sqrt{#sigma_{I}^{2} - #sigma^{2}}") + + DrawBoxes(ROOT.kRed - 10) + for i in range(nDraw): + par = params[ranking_pull[i][0]] + if par["name"] not in occurances: + occurances[par["name"]] = [None, None, None] + occurances[par["name"]][0] = i + SetEntryText(latex) + latex.DrawLatex(0.03, max_y - ((float(i) + 0.5) * row_h), par["name"]) + SetEntryValue(latex) + latex.DrawLatex(0.97, max_y - ((float(i) + 0.5) * row_h), FmtROOTNumber(par["pull"])) + + canv.cd(3) + SetTitleText(latex) + latex.DrawLatex(0.03, 0.95, "Strongest constraints") + SetFormulaText(latex) + latex.DrawLatex(0.97, 0.95, "#sigma/#sigma_{I}") + DrawBoxes(ROOT.kGreen - 10) + for i in range(nDraw): + par = params[ranking_constraint[i][0]] + if par["name"] not in occurances: + occurances[par["name"]] = [None, None, None] + occurances[par["name"]][1] = i + SetEntryText(latex) + latex.DrawLatex(0.03, max_y - ((float(i) + 0.5) * ((0.9 - 0.1) / float(nDraw))), par["name"]) + SetEntryValue(latex) + latex.DrawLatex(0.97, max_y - ((float(i) + 0.5) * ((0.9 - 0.1) / float(nDraw))), "{:.2f}".format(par["constraint"])) + + canv.cd(4) + SetTitleText(latex) + latex.DrawLatex(0.03, 0.95, "Largest impacts") + SetFormulaText(latex) + latex.DrawLatex(0.97, 0.95, "#Delta{}(#pm#sigma_{{#theta}})/#sigma_{{{}}}".format(POI, POI)) + DrawBoxes(ROOT.kBlue - 10) + for i in range(nDraw): + par = params[ranking_impact[i][0]] + if par["name"] not in occurances: + occurances[par["name"]] = [None, None, None] + occurances[par["name"]][2] = i + SetEntryText(latex) + latex.DrawLatex(0.03, 0.9 - ((float(i) + 0.5) * ((0.9 - 0.1) / float(nDraw))), par["name"]) + SetEntryValue(latex) + latex.DrawLatex( + 0.97, + 0.9 - ((float(i) + 0.5) * ((0.9 - 0.1) / float(nDraw))), + "{{}}^{{{}}}_{{{}}}".format(FmtROOTNumber(par["impact_rel_hi"]), FmtROOTNumber(par["impact_rel_lo"])), + ) + + marker = ROOT.TMarker() + + marker_styles = [] + for style in [20, 23, 29, 34]: + for col in [1, 2, 3, 4, 6, 7, 15, ROOT.kOrange]: + marker_styles.append((style, col)) + curr_marker = 0 + for parname, entries in six.iteritems(occurances): + # print(parname, entries) + multiple = entries.count(None) <= 1 + if multiple: + plot.Set(marker, MarkerStyle=marker_styles[curr_marker][0], MarkerColor=marker_styles[curr_marker][1]) + if entries[0] is not None: + canv.cd(2) + marker.DrawMarker(0.01, 0.9 - ((float(entries[0]) + 0.5) * ((0.9 - 0.1) / float(nDraw)))) + if entries[1] is not None: + canv.cd(3) + marker.DrawMarker(0.01, 0.9 - ((float(entries[1]) + 0.5) * ((0.9 - 0.1) / float(nDraw)))) + if entries[2] is not None: + canv.cd(4) + marker.DrawMarker(0.01, 0.9 - ((float(entries[2]) + 0.5) * ((0.9 - 0.1) / float(nDraw)))) + curr_marker += 1 + if curr_marker >= len(marker_styles): + curr_marker = 0 + + canv.Print("{}_summary.pdf".format(args.output)) + + +if args.summary: + MakeSummaryPage() + +for page in range(n): + canv = ROOT.TCanvas(args.output, args.output) + n_params = len(data["params"][show * page : show * (page + 1)]) + pdata = data["params"][show * page : show * (page + 1)] + print(">> Doing page %i, have %i parameters" % (page, n_params)) + + boxes = [] + for i in range(n_params): + y1 = ROOT.gStyle.GetPadBottomMargin() + y2 = 1.0 - ROOT.gStyle.GetPadTopMargin() + h = (y2 - y1) / float(n_params) + y1 = y1 + float(i) * h + y2 = y1 + h + box = ROOT.TPaveText(0, y1, 1, y2, "NDC") + plot.Set(box, TextSize=0.02, BorderSize=0, FillColor=0, TextAlign=12, Margin=0.005) + if i % 2 == 0: + box.SetFillColor(18) + box.AddText("%i" % (n_params - i + page * show)) + box.Draw() + boxes.append(box) + + # Crate and style the pads + if args.checkboxes: + pads = plot.MultiRatioSplitColumns([0.54, 0.24], [0.0, 0.0], [0.0, 0.0]) + pads[2].SetGrid(1, 0) + else: + pads = plot.MultiRatioSplitColumns([0.7], [0.0], [0.0]) + pads[0].SetGrid(1, 0) + pads[0].SetTickx(1) + pads[1].SetGrid(1, 0) + pads[1].SetTickx(1) + + min_pull = -0.9 + max_pull = +0.9 + + g_fit = ROOT.TGraphAsymmErrors(n_params) + g_pull = ROOT.TGraph(n_params) + g_impacts_hi = ROOT.TGraphAsymmErrors(n_params) + g_impacts_lo = ROOT.TGraphAsymmErrors(n_params) + g_check = ROOT.TGraphAsymmErrors() + g_check_i = 0 + + impt_prefix = "impact" + if args.relative: + impt_prefix = "impact_rel" + + max_impact = 0.0 + + text_entries = [] + redo_boxes = [] + y_bin_labels = [] + for p in range(n_params): + par = pdata[p] + i = n_params - (p + 1) + pre = par["prefit"] + fit = par["fit"] + tp = par["type"] + seen_types.add(tp) + if IsConstrained(par): + if par["pull"] < min_pull: + min_pull = float(int(par["pull"]) - 1) + if par["pull"] > max_pull: + max_pull = float(int(par["pull"]) + 1) + if (par["sc_fit"] - par["sc_fit_lo"]) < min_pull: + min_pull = float(int(par["sc_fit"] - par["sc_fit_lo"]) - 1) + if (par["sc_fit"] + par["sc_fit_hi"]) > max_pull: + max_pull = float(int(par["sc_fit"] + par["sc_fit_hi"]) + 1) + + g_fit.SetPoint(i, par["sc_fit"], float(i) + 0.7) + g_fit.SetPointError(i, par["sc_fit_lo"], par["sc_fit_hi"], 0.0, 0.0) + if par["pull_ok"]: + g_pull.SetPoint(i, par["pull"], float(i) + 0.3) + else: + # If pull not valid, hide it + g_pull.SetPoint(i, 0.0, 9999.0) + else: + # Hide this point + g_fit.SetPoint(i, 0.0, 9999.0) + g_pull.SetPoint(i, 0.0, 9999.0) + y1 = ROOT.gStyle.GetPadBottomMargin() + y2 = 1.0 - ROOT.gStyle.GetPadTopMargin() + x1 = ROOT.gStyle.GetPadLeftMargin() + h = (y2 - y1) / float(n_params) + y1 = y1 + ((float(i) + 0.5) * h) + x1 = x1 + (1 - pads[0].GetRightMargin() - x1) / 2.0 + s_nom, s_hi, s_lo = GetRounded(fit[1], fit[2] - fit[1], fit[1] - fit[0]) + text_entries.append((x1, y1, "%s^{#plus%s}_{#minus%s}" % (s_nom, s_hi, s_lo))) + redo_boxes.append(i) + g_impacts_hi.SetPoint(i, 0, float(i) + 0.5) + g_impacts_lo.SetPoint(i, 0, float(i) + 0.5) + if args.checkboxes: + pboxes = pdata[p]["checkboxes"] + for pbox in pboxes: + cboxes.index(pbox) + g_check.SetPoint(g_check_i, cboxes.index(pbox) + 0.5, float(i) + 0.5) + g_check_i += 1 + imp = pdata[p][POI] + g_impacts_hi.SetPointError(i, 0, par[impt_prefix + "_hi"], 0.5, 0.5) + g_impacts_lo.SetPointError(i, -1.0 * par[impt_prefix + "_lo"], 0, 0.5, 0.5) + max_impact = max(max_impact, abs(par[impt_prefix + "_hi"]), abs(par[impt_prefix + "_lo"])) + col = colors.get(tp, 2) + if args.color_groups is not None and len(pdata[p]["groups"]) >= 1: + for p_grp in pdata[p]["groups"]: + if p_grp in color_groups: + col = color_groups[p_grp] + break + y_bin_labels.append((i, col, pdata[p]["name"])) + + h_pulls = ROOT.TH2F("pulls", "pulls", 6, -2.9, +2.9, n_params, 0, n_params) + for i, col, name in y_bin_labels: + h_pulls.GetYaxis().SetBinLabel(i + 1, ("#color[%i]{%s}" % (col, Translate(name, translate)))) + + # Style and draw the pulls histo + if externalPullDef: + plot.Set(h_pulls.GetXaxis(), TitleSize=0.04, LabelSize=0.03, Title=CP.returnTitle(args.pullDef)) + else: + plot.Set( + h_pulls.GetXaxis(), + TitleSize=0.04, + LabelSize=0.03, + Title="#scale[0.7]{(#hat{#theta}-#theta_{I})/#sigma_{I} #color[4]{(#hat{#theta}-#theta_{I})/#sqrt{#sigma_{I}^{2} - #sigma^{2}}}}", + ) + + plot.Set(h_pulls.GetYaxis(), LabelSize=args.label_size, TickLength=0.0) + h_pulls.GetYaxis().LabelsOption("v") + h_pulls.Draw() + + for i in redo_boxes: + newbox = boxes[i].Clone() + newbox.Clear() + newbox.SetY1(newbox.GetY1() + 0.005) + newbox.SetY2(newbox.GetY2() - 0.005) + newbox.SetX1(ROOT.gStyle.GetPadLeftMargin() + 0.001) + newbox.SetX2(0.7 - 0.001) + newbox.Draw() + boxes.append(newbox) + latex = ROOT.TLatex() + latex.SetNDC() + latex.SetTextFont(42) + latex.SetTextSize(0.02) + latex.SetTextAlign(22) + for entry in text_entries: + latex.DrawLatex(*entry) + + # Go to the other pad and draw the impacts histo + pads[1].cd() + if max_impact == 0.0: + max_impact = 1e-6 # otherwise the plotting gets screwed up + h_impacts = ROOT.TH2F("impacts", "impacts", 6, -max_impact * 1.1, max_impact * 1.1, n_params, 0, n_params) + impt_x_title = "#Delta#hat{%s}" % (Translate(POI, translate)) + if args.relative: + impt_x_title = "#Delta#hat{%s}/#sigma_{%s}" % (Translate(POI, translate), Translate(POI, translate)) + + plot.Set(h_impacts.GetXaxis(), LabelSize=0.03, TitleSize=0.04, Ndivisions=505, Title=impt_x_title) + plot.Set(h_impacts.GetYaxis(), LabelSize=0, TickLength=0.0) + h_impacts.Draw() + + if args.checkboxes: + pads[2].cd() + h_checkboxes = ROOT.TH2F("checkboxes", "checkboxes", len(cboxes), 0, len(cboxes), n_params, 0, n_params) + for i, cbox in enumerate(cboxes): + h_checkboxes.GetXaxis().SetBinLabel(i + 1, Translate(cbox, translate)) + plot.Set(h_checkboxes.GetXaxis(), LabelSize=0.03, LabelOffset=0.002) + h_checkboxes.GetXaxis().LabelsOption("v") + plot.Set(h_checkboxes.GetYaxis(), LabelSize=0, TickLength=0.0) + h_checkboxes.Draw() + # g_check.SetFillColor(ROOT.kGreen) + g_check.Draw("PSAME") + + # Back to the first pad to draw the pulls graph + pads[0].cd() + plot.Set(g_fit, MarkerSize=0.7, LineWidth=2) + g_fit.Draw("PSAME") + plot.Set(g_pull, MarkerSize=0.8, LineWidth=2, MarkerStyle=5, MarkerColor=4) + g_pull.Draw("PSAME") + # And back to the second pad to draw the impacts graphs + pads[1].cd() + alpha = 0.7 + + lo_color = {"default": 38, "hesse": ROOT.kOrange - 3, "robust": ROOT.kGreen + 1} + hi_color = {"default": 46, "hesse": ROOT.kBlue, "robust": ROOT.kAzure - 5} + method = "default" + if "method" in data and data["method"] in lo_color: + method = data["method"] + g_impacts_hi.SetFillColor(plot.CreateTransparentColor(hi_color[method], alpha)) + g_impacts_hi.Draw("2SAME") + g_impacts_lo.SetFillColor(plot.CreateTransparentColor(lo_color[method], alpha)) + g_impacts_lo.Draw("2SAME") + pads[1].RedrawAxis() + + legend = ROOT.TLegend(0.02, 0.02, 0.35, 0.09, "", "NBNDC") + legend.SetNColumns(2) + legend.AddEntry(g_fit, "Fit", "LP") + legend.AddEntry(g_impacts_hi, "+1#sigma Impact", "F") + legend.AddEntry(g_pull, "Pull", "P") + legend.AddEntry(g_impacts_lo, "-1#sigma Impact", "F") + legend.Draw() + + leg_width = pads[0].GetLeftMargin() - 0.01 + if args.color_groups is not None: + legend2 = ROOT.TLegend(0.01, 0.94, leg_width, 0.99, "", "NBNDC") + legend2.SetNColumns(2) + for name, h in six.iteritems(color_group_hists): + legend2.AddEntry(h, Translate(name, translate), "F") + legend2.Draw() + elif len(seen_types) > 1: + legend2 = ROOT.TLegend(0.01, 0.94, leg_width, 0.99, "", "NBNDC") + legend2.SetNColumns(2) + for name, h in six.iteritems(color_hists): + if name == "Unrecognised": + continue + legend2.AddEntry(h, name, "F") + legend2.Draw() + + plot.DrawCMSLogo(pads[0], "CMS", args.cms_label, 0, 0.25, 0.00, 0.00) + s_nom, s_hi, s_lo = GetRounded(POI_fit[1], POI_fit[2] - POI_fit[1], POI_fit[1] - POI_fit[0]) + if not args.blind: + plot.DrawTitle( + pads[1], + "#hat{%s} = %s^{#plus%s}_{#minus%s}%s" % (Translate(POI, translate), s_nom, s_hi, s_lo, "" if args.units is None else " " + args.units), + 3, + 0.27, + ) + extra = "" + if page == 0: + extra = "(" + if page == n - 1: + extra = ")" + canv.Print(".pdf%s" % extra) diff --git a/scripts/plotLimitGrid.py b/scripts/plotLimitGrid.py new file mode 100755 index 00000000000..b380830f74b --- /dev/null +++ b/scripts/plotLimitGrid.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python +from __future__ import absolute_import +from __future__ import print_function +import HiggsAnalysis.CombinedLimit.util.plotting as plot +import ROOT +import argparse +from six.moves import range + +ROOT.PyConfig.IgnoreCommandLineOptions = True +ROOT.gROOT.SetBatch(ROOT.kTRUE) + +parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument( + "input", + help="""ROOT file containing the output of the + combineTool.py AsymptoticGrid or HybridNewGrid methods""", +) +parser.add_argument( + "--output", + "-o", + default="limit_grid_output", + help="""Name of the output + plot without file extension""", +) +parser.add_argument( + "--contours", + default="exp-2,exp-1,exp0,exp+1,exp+2,obs", + help="""List of + contours to plot. These must correspond to the names of the TGraph2D + objects in the input file""", +) +parser.add_argument( + "--bin-method", + default="BinEdgeAligned", + help="""One of BinEdgeAligned or + BinCenterAligned. See plotting.py documentation for details.""", +) +parser.add_argument( + "--debug-output", + "-d", + help="""If specified, write the contour TH2s and + TGraphs into this output ROOT file""", +) +parser.add_argument("--CL", default=0.95, help="""Confidence level for contours""") +parser.add_argument("--x-title", default="m_{A} (GeV)", help="""Title for the x-axis""") +parser.add_argument("--y-title", default="tan#beta", help="""Title for the y-axis""") +parser.add_argument("--x-range", default=None, type=str, help="""x-axis range""") +parser.add_argument("--y-range", default=None, type=str, help="""y-axis range""") +parser.add_argument("--cms-sub", default="Internal", help="""Text below the CMS logo""") +parser.add_argument( + "--scenario-label", + default="", + help="""Scenario name to be drawn in top + left of plot""", +) +parser.add_argument("--title-right", default="", help="""Right header text above the frame""") +parser.add_argument("--title-left", default="", help="""Left header text above the frame""") +parser.add_argument("--logy", action="store_true", help="""Draw y-axis in log scale""") +parser.add_argument("--logx", action="store_true", help="""Draw x-axis in log scale""") +parser.add_argument( + "--force-x-width", + type=float, + default=None, + help="""Use this x bin width in + BinCenterAligned mode""", +) +parser.add_argument( + "--force-y-width", + type=float, + default=None, + help="""Use this y bin width in + BinCenterAligned mode""", +) +parser.add_argument( + "--hist", + default=None, + help="""Draw this TGraph2D as a histogram with + COLZ""", +) +parser.add_argument( + "--model-hist", + default=None, + help="""Draw this TGraph2D from model file as a + histogram with COLZ""", +) +parser.add_argument( + "--z-range", + default=None, + type=str, + help="""z-axis range of the COLZ + hist""", +) +parser.add_argument("--z-title", default=None, help="""z-axis title of the COLZ hist""") +parser.add_argument( + "--extra_contour_file", + default=None, + help="""Root file containing graphs + to be superimposed on plots""", +) +parser.add_argument( + "--extra_contour_title", + default="", + help="""Legend label for extra + contours""", +) +parser.add_argument( + "--extra_contour_style", + default="", + help="""Line style for plotting + extra contours""", +) +parser.add_argument( + "--model_file", + default=None, + help="""Model file for drawing mh + exclusion""", +) +parser.add_argument( + "--mass_histogram", + default="m_h", + help="""Specify histogram to extract + mh exclusion from""", +) +args = parser.parse_args() + + +plot.ModTDRStyle(r=0.06 if (args.hist or args.model_hist) is None else 0.17, l=0.12) +ROOT.gStyle.SetNdivisions(510, "XYZ") +plot.SetBirdPalette() + +file = ROOT.TFile(args.input) +types = args.contours.split(",") +CL = 1 - args.CL + +# Object storage +graphs = {c: file.Get(c) for c in types} +hists = {} +contours = {} + +h_proto = plot.TH2FromTGraph2D(graphs[types[0]], method=args.bin_method, force_x_width=args.force_x_width, force_y_width=args.force_y_width) +h_axis = h_proto +h_axis = plot.TH2FromTGraph2D(graphs[types[0]]) + + +# Get histogram to plot m_h exclusion from the model file if provided +if args.model_file is not None: + modelfile = ROOT.TFile(args.model_file) + h_mh = modelfile.Get(args.mass_histogram) +else: + h_mh = None + +# Get extra contours from file, if provided: +if args.extra_contour_file is not None: + contour_files = args.extra_contour_file.split(",") + extra_contours = [] + for filename in contour_files: + extra_contour_file = ROOT.TFile(filename) + extra_contour_file_contents = extra_contour_file.GetListOfKeys() + extra_contour_names = [] + for i in range(0, len(extra_contour_file_contents)): + extra_contour_names.append(extra_contour_file_contents[i].GetName()) + extra_contours_per_index = [extra_contour_file.Get(c) for c in extra_contour_names] + extra_contours.append(extra_contours_per_index) +else: + extra_contours = None + +# Create the debug output file if requested +if args.debug_output is not None: + debug = ROOT.TFile(args.debug_output, "RECREATE") +else: + debug = None + +# Fill TH2s by interpolating the TGraph2Ds, then extract contours +for c in types: + print("Filling histo for %s" % c) + hists[c] = h_proto.Clone(c) + plot.fillTH2(hists[c], graphs[c]) + contours[c] = plot.contourFromTH2(hists[c], CL, 5, frameValue=1) + if debug is not None: + debug.WriteTObject(hists[c], "hist_%s" % c) + for i, cont in enumerate(contours[c]): + debug.WriteTObject(cont, "cont_%s_%i" % (c, i)) + +# Extract mh contours if mh histogram exists: +if h_mh is not None: + h_mh_inverted = h_mh.Clone("mhInverted") + for i in range(1, h_mh.GetNbinsX() + 1): + for j in range(1, h_mh.GetNbinsY() + 1): + h_mh_inverted.SetBinContent(i, j, 1 - (1.0 / h_mh.GetBinContent(i, j))) + mh122_contours = plot.contourFromTH2(h_mh_inverted, (1 - 1.0 / 122), 5, frameValue=1) + mh128_contours = plot.contourFromTH2(h_mh, 128, 5, frameValue=1) +else: + mh122_contours = None + mh128_contours = None + +# Setup the canvas: we'll use a two pad split, with a small top pad to contain +# the CMS logo and the legend +canv = ROOT.TCanvas(args.output, args.output) +pads = plot.TwoPadSplit(0.8, 0, 0) +pads[1].cd() +h_axis.GetXaxis().SetTitle(args.x_title) +h_axis.GetYaxis().SetTitle(args.y_title) +if args.x_range is not None: + h_axis.GetXaxis().SetRangeUser(float(args.x_range.split(",")[0]), float(args.x_range.split(",")[1])) +if args.y_range is not None: + h_axis.GetYaxis().SetRangeUser(float(args.y_range.split(",")[0]), float(args.y_range.split(",")[1])) +h_axis.GetXaxis().SetNdivisions(5, 5, 0) +h_axis.Draw() + +if args.hist is not None: + colzhist = h_proto.Clone(c) + plot.fillTH2(colzhist, file.Get(args.hist)) + colzhist.SetContour(255) + colzhist.Draw("COLZSAME") + colzhist.GetZaxis().SetLabelSize(0.03) + if args.z_range is not None: + colzhist.SetMinimum(float(args.z_range.split(",")[0])) + colzhist.SetMaximum(float(args.z_range.split(",")[1])) + if args.z_title is not None: + colzhist.GetZaxis().SetTitle(args.z_title) + +if args.model_hist is not None: + colzhist = modelfile.Get(args.model_hist) + colzhist.SetContour(255) + colzhist.Draw("COLZSAME") + colzhist.GetZaxis().SetLabelSize(0.03) + if args.z_range is not None: + colzhist.SetMinimum(float(args.z_range.split(",")[0])) + colzhist.SetMaximum(float(args.z_range.split(",")[1])) + if args.z_title is not None: + colzhist.GetZaxis().SetTitle(args.z_title) + +pads[1].SetLogy(args.logy) +pads[1].SetLogx(args.logx) +pads[1].SetTickx() +pads[1].SetTicky() +# h_proto.GetXaxis().SetRangeUser(130,400) +# h_proto.GetYaxis().SetRangeUser(1,20) + +fillstyle = "FSAME" +if (args.hist or args.model_hist) is not None: + fillstyle = "LSAME" + +# Now we draw the actual contours +if "exp-2" in contours and "exp+2" in contours: + for i, gr in enumerate(contours["exp-2"]): + plot.Set(gr, LineColor=0, FillColor=ROOT.kGray + 0, FillStyle=1001) + if (args.hist or args.model_hist) is not None: + plot.Set(gr, LineColor=ROOT.kGray + 0, LineWidth=2) + gr.Draw(fillstyle) +if "exp-1" in contours and "exp+1" in contours: + for i, gr in enumerate(contours["exp-1"]): + plot.Set(gr, LineColor=0, FillColor=ROOT.kGray + 1, FillStyle=1001) + if (args.hist or args.model_hist) is not None: + plot.Set(gr, LineColor=ROOT.kGray + 1, LineWidth=2) + gr.Draw(fillstyle) + fill_col = ROOT.kGray + 0 + # If we're only drawing the 1 sigma contours then we should fill with + # white here instead + if "exp-2" not in contours and "exp+2" not in contours: + fill_col = ROOT.kWhite + for i, gr in enumerate(contours["exp+1"]): + plot.Set(gr, LineColor=0, FillColor=fill_col, FillStyle=1001) + if (args.hist or args.model_hist) is not None: + plot.Set(gr, LineColor=ROOT.kGray + 1, LineWidth=2) + gr.Draw(fillstyle) +if "exp-2" in contours and "exp+2" in contours: + for i, gr in enumerate(contours["exp+2"]): + plot.Set(gr, LineColor=0, FillColor=ROOT.kWhite, FillStyle=1001) + if (args.hist or args.model_hist) is not None: + plot.Set(gr, LineColor=ROOT.kGray + 0, LineWidth=2) + gr.Draw(fillstyle) +if "exp0" in contours: + for i, gr in enumerate(contours["exp0"]): + if (args.hist or args.model_hist) is not None: + plot.Set(gr, LineWidth=2) + if "obs" in contours: + plot.Set(gr, LineColor=ROOT.kBlack, LineStyle=2) + gr.Draw("LSAME") + else: + plot.Set(gr, LineStyle=2, FillStyle=1001, FillColor=plot.CreateTransparentColor(ROOT.kSpring + 6, 0.5)) + gr.Draw(fillstyle) + gr.Draw("LSAME") +if "obs" in contours: + for i, gr in enumerate(contours["obs"]): + plot.Set(gr, FillStyle=1001, FillColor=plot.CreateTransparentColor(ROOT.kAzure + 6, 0.5)) + if (args.hist or args.model_hist) is not None: + plot.Set(gr, LineWidth=2) + gr.Draw(fillstyle) + gr.Draw("LSAME") + +if mh122_contours is not None: + for i, gr in enumerate(mh122_contours): + plot.Set(gr, LineWidth=2, LineColor=ROOT.kRed, FillStyle=3004, FillColor=ROOT.kRed) + gr.Draw(fillstyle) + gr.Draw("LSAME") + for i, gr in enumerate(mh128_contours): + plot.Set(gr, LineWidth=2, LineColor=ROOT.kRed, FillStyle=3004, FillColor=ROOT.kRed) + gr.Draw(fillstyle) + gr.Draw("LSAME") + +if extra_contours is not None: + if args.extra_contour_style is not None: + contour_styles = args.extra_contour_style.split(",") + for i in range(0, len(extra_contours)): + for gr in extra_contours[i]: + plot.Set(gr, LineWidth=2, LineColor=ROOT.kBlue, LineStyle=int(contour_styles[i])) + gr.Draw("LSAME") + + +# We just want the top pad to look like a box, so set all the text and tick +# sizes to zero +pads[0].cd() +h_top = h_axis.Clone() +plot.Set(h_top.GetXaxis(), LabelSize=0, TitleSize=0, TickLength=0) +plot.Set(h_top.GetYaxis(), LabelSize=0, TitleSize=0, TickLength=0) +h_top.Draw() + +# Draw the legend in the top TPad +legend = plot.PositionedLegend(0.4, 0.11, 3, 0.015) +plot.Set(legend, NColumns=2, Header="#bf{%.0f%% CL Excluded:}" % (args.CL * 100.0)) +if "obs" in contours: + legend.AddEntry(contours["obs"][0], "Observed", "F") +if "exp-1" in contours and "exp+1" in contours: + legend.AddEntry(contours["exp-1"][0], "68% expected", "F") +if "exp0" in contours: + if "obs" in contours: + legend.AddEntry(contours["exp0"][0], "Expected", "L") + else: + legend.AddEntry(contours["exp0"][0], "Expected", "F") +if "exp-2" in contours and "exp+2" in contours: + legend.AddEntry(contours["exp-2"][0], "95% expected", "F") +if extra_contours is not None: + if args.extra_contour_title is not None: + contour_title = args.extra_contour_title.split(",") + for i in range(0, len(contour_title)): + legend.AddEntry(extra_contours[i][0], contour_title[i], "L") +legend.Draw() + +# Draw logos and titles +plot.DrawCMSLogo(pads[0], "CMS", args.cms_sub, 11, 0.045, 0.15, 1.0, "", 1.0) +plot.DrawTitle(pads[0], args.title_right, 3) +plot.DrawTitle(pads[0], args.title_left, 1) + + +# Redraw the frame because it usually gets covered by the filled areas +pads[1].cd() +pads[1].GetFrame().Draw() +pads[1].RedrawAxis() + +if mh122_contours is not None and len(mh122_contours) > 0: + legend2 = ROOT.TLegend(0.6, 0.18, 0.92, 0.23, "", "NBNDC") + # legend2 = plot.PositionedLegend(0.4, 0.11, 3, 0.015) + legend2.AddEntry(mh122_contours[0], "m_{h}^{MSSM} #neq 125 #pm 3 GeV", "F") + legend2.Draw() + + +# Draw the scenario label +latex = ROOT.TLatex() +latex.SetNDC() +latex.SetTextSize(0.04) +latex.DrawLatex(0.155, 0.75, args.scenario_label) + +canv.Print(".pdf") +canv.Print(".png") +canv.Close() + +if debug is not None: + debug.Close() diff --git a/scripts/plotLimits.py b/scripts/plotLimits.py new file mode 100755 index 00000000000..4956d5e1abe --- /dev/null +++ b/scripts/plotLimits.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +from __future__ import absolute_import +from __future__ import print_function +import ROOT +import HiggsAnalysis.CombinedLimit.util.plotting as plot +import argparse + +# import CombineHarvester.CombineTools.maketable as maketable + +parser = argparse.ArgumentParser() +parser.add_argument("input", nargs="+", help="""Input json files""") +parser.add_argument( + "--output", + "-o", + default="limit", + help="""Name of the output + plot without file extension""", +) +parser.add_argument("--show", default="exp,obs") +# parser.add_argument( +# '--debug-output', '-d', help="""If specified, write the +# TGraphs into this output ROOT file""") +parser.add_argument("--x-title", default="m_{H} (GeV)", help="""Title for the x-axis""") +parser.add_argument("--y-title", default=None, help="""Title for the y-axis""") +parser.add_argument("--limit-on", default="#sigma/#sigma_{SM}", help="""Shortcut for setting the y-axis label""") +parser.add_argument("--cms-sub", default="Internal", help="""Text below the CMS logo""") +parser.add_argument( + "--scenario-label", + default="", + help="""Scenario name to be drawn in top + left of plot""", +) +parser.add_argument("--title-right", default="", help="""Right header text above the frame""") +parser.add_argument("--title-left", default="", help="""Left header text above the frame""") +parser.add_argument("--logy", action="store_true", help="""Draw y-axis in log scale""") +parser.add_argument("--logx", action="store_true", help="""Draw x-axis in log scale""") +parser.add_argument("--ratio-to", default=None) +parser.add_argument("--pad-style", default=None, help="""Extra style options for the pad, e.g. Grid=(1,1)""") +parser.add_argument("--auto-style", nargs="?", const="", default=None, help="""Take line colors and styles from a pre-defined list""") +parser.add_argument("--table_vals", help="Amount of values to be written in a table for different masses", default=10) +args = parser.parse_args() + + +def DrawAxisHists(pads, axis_hists, def_pad=None): + for i, pad in enumerate(pads): + pad.cd() + axis_hists[i].Draw("AXIS") + axis_hists[i].Draw("AXIGSAME") + if def_pad is not None: + def_pad.cd() + + +## Boilerplate +ROOT.PyConfig.IgnoreCommandLineOptions = True +ROOT.gROOT.SetBatch(ROOT.kTRUE) +plot.ModTDRStyle() +ROOT.gStyle.SetNdivisions(510, "XYZ") # probably looks better + +canv = ROOT.TCanvas(args.output, args.output) + +if args.ratio_to is not None: + pads = plot.TwoPadSplit(0.30, 0.01, 0.01) +else: + pads = plot.OnePad() + +# Set the style options of the pads +for padx in pads: + # Use tick marks on oppsite axis edges + plot.Set(padx, Tickx=1, Ticky=1, Logx=args.logx) + if args.pad_style is not None: + settings = {x.split("=")[0]: eval(x.split("=")[1]) for x in args.pad_style.split(",")} + print("Applying style options to the TPad(s):") + print(settings) + plot.Set(padx, **settings) + +graphs = [] +graph_sets = [] + +legend = plot.PositionedLegend(0.45, 0.10, 3, 0.015) +plot.Set(legend, NColumns=2) + +axis = None + +defcols = [ + ROOT.kGreen + 3, + ROOT.kRed, + ROOT.kBlue, + ROOT.kBlack, + ROOT.kYellow + 2, + ROOT.kOrange + 10, + ROOT.kCyan + 3, + ROOT.kMagenta + 2, + ROOT.kViolet - 5, + ROOT.kGray, +] + +deflines = [1, 2, 3] + +if args.auto_style is not None: + icol = {x: 0 for x in args.auto_style.split(",")} + icol["default"] = 0 + iline = {} + iline["default"] = 1 + for i, x in enumerate(args.auto_style.split(",")): + iline[x] = i + 1 + +# Process each input argument +for src in args.input: + splitsrc = src.split(":") + file = splitsrc[0] + # limit.json => Draw as full obs + exp limit band + if len(splitsrc) == 1: + graph_sets.append(plot.StandardLimitsFromJSONFile(file, args.show.split(","))) + if axis is None: + axis = plot.CreateAxisHists(len(pads), list(graph_sets[-1].values())[0], True) + DrawAxisHists(pads, axis, pads[0]) + plot.StyleLimitBand(graph_sets[-1]) + plot.DrawLimitBand(pads[0], graph_sets[-1], legend=legend) + pads[0].RedrawAxis() + pads[0].RedrawAxis("g") + pads[0].GetFrame().Draw() + + # limit.json:X => Draw a single graph for entry X in the json file + # 'limit.json:X:Title="Blah",LineColor=4,...' => + # as before but also apply style options to TGraph + elif len(splitsrc) >= 2: + settings = {} + settings["Title"] = src + if args.auto_style is not None: + nm = "default" + for x in icol.keys(): + if x in splitsrc[1]: + nm = x + i = icol[nm] # take the next default color... + j = iline[nm] # take the next default line style... + settings["LineColor"] = defcols[i] + settings["MarkerColor"] = defcols[i] + settings["LineStyle"] = j + icol[nm] = (i + 1) if (i + 1) < len(defcols) else 0 + graphs.append(plot.LimitTGraphFromJSONFile(file, splitsrc[1])) + if len(splitsrc) >= 3: + settings.update({x.split("=")[0]: eval(x.split("=")[1]) for x in splitsrc[2].split(",")}) + plot.Set(graphs[-1], **settings) + if axis is None: + axis = plot.CreateAxisHists(len(pads), graphs[-1], True) + DrawAxisHists(pads, axis, pads[0]) + graphs[-1].Draw("PLSAME") + legend.AddEntry(graphs[-1], "", "PL") + + +axis[0].GetYaxis().SetTitle("95%% CL limit on %s" % args.limit_on) +if args.y_title is not None: + axis[0].GetYaxis().SetTitle(args.y_title) +axis[0].GetXaxis().SetTitle(args.x_title) +axis[0].GetXaxis().SetLabelOffset(axis[0].GetXaxis().GetLabelOffset() * 2) + +if args.logy: + axis[0].SetMinimum(0.1) # we'll fix this later + pads[0].SetLogy(True) + # axis[0].GetYaxis().SetMoreLogLabels() + # axis[0].SetNdivisions(50005, "X") + +y_min, y_max = (plot.GetPadYMin(pads[0]), plot.GetPadYMax(pads[0])) +plot.FixBothRanges(pads[0], y_min if args.logy else 0, 0.05 if args.logy else 0, y_max, 0.25) + +ratio_graph_sets = [] +ratio_graphs = [] + +if args.ratio_to is not None: + pads[1].cd() + plot.SetupTwoPadSplitAsRatio(pads, axis[0], axis[1], "Ratio_{}", True, 0.1, 2.4) + axis[1].SetNdivisions(506, "Y") + splitsrc = args.ratio_to.split(":") + ref = plot.LimitTGraphFromJSONFile(splitsrc[0], splitsrc[1]) + for gr_set in graph_sets: + ratio_set = {} + for key in gr_set: + ratio_set[key] = plot.GraphDivide(gr_set[key], ref) + ratio_graph_sets.append(ratio_set) + plot.DrawLimitBand(pads[1], ratio_graph_sets[-1]) + pads[1].RedrawAxis() + pads[1].RedrawAxis("g") + pads[1].GetFrame().Draw() + for gr in graphs: + ratio_graphs.append(plot.GraphDivide(gr, ref)) + ratio_graphs[-1].Draw("LP") + ry_min, ry_max = (plot.GetPadYMin(pads[1]), plot.GetPadYMax(pads[1])) + plot.FixBothRanges(pads[1], ry_min, 0.1, ry_max, 0.1) + + +pads[0].cd() +if legend.GetNRows() == 1: + legend.SetY1(legend.GetY2() - 0.5 * (legend.GetY2() - legend.GetY1())) +legend.Draw() + +# line = ROOT.TLine() +# line.SetLineColor(ROOT.kBlue) +# line.SetLineWidth(2) +# plot.DrawHorizontalLine(pads[0], line, 1) + +box = ROOT.TPave(pads[0].GetLeftMargin(), 0.81, 1 - pads[0].GetRightMargin(), 1 - pads[0].GetTopMargin(), 1, "NDC") +box.Draw() + +legend.Draw() + +plot.DrawCMSLogo(pads[0], "CMS", args.cms_sub, 11, 0.045, 0.035, 1.2, "", 0.8) +plot.DrawTitle(pads[0], args.title_right, 3) +plot.DrawTitle(pads[0], args.title_left, 1) + +canv.Print(".pdf") +canv.Print(".png") +# maketable.TablefromJson(args.table_vals, args.file, "TablefromJson.txt")