Skip to content

Commit

Permalink
Merge pull request #2371 from gartung/gartung-resources-diff
Browse files Browse the repository at this point in the history
Resources comparison: Compare absolute difference of fraction of total time per module
  • Loading branch information
smuzaffar authored Nov 20, 2024
2 parents a9e890c + 8a1b4e7 commit 0c07414
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 66 deletions.
2 changes: 1 addition & 1 deletion cmssw-pr-test-config
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ elif [ "$CMSSW_MAJOR" -ge 13 ] ; then
else
PR_TEST_MATRIX_EXTRAS_GPU=11634.586,11634.587
fi
PR_TEST_MATRIX_EXTRAS_PROFILING=29834.21,12634.21
PR_TEST_MATRIX_EXTRAS_PROFILING=29834.21,13034.21
PR_TEST_MATRIX_EXTRAS_HIGH_STATS=35034.0
PR_TEST_MATRIX_EXTRAS_NANO=all
MATRIX_OPTION_PROFILING=""
Expand Down
213 changes: 150 additions & 63 deletions comparisons/resources-diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,83 +5,120 @@
import os


def diff_from(metrics, data, dest, res):
# ratio = 0.0
# if not dest["events"] == 0:
# ratio = data["events"]/dest["events"]
# data["events"] = ratio * dest["events"]
def diff_from(metrics, data, data_total, dest, dest_total, res):
for metric in metrics:
dmetric = dest[metric] - data[metric]
dkey = "%s_diff" % metric
res[dkey] = dmetric
pdmetric = 0.0
if not data[metric] == 0.0:
pdmetric = 100 * dmetric / data[metric]
pdmetric = 100 * dmetric
pdkey = "%s_pdiff" % metric
res[pdkey] = pdmetric


# data[metric] = ratio * dest[metric]
fkey = "%s_frac" % metric
fdest = 100 * dest[metric] / dest_total[metric]
dest[fkey] = fdest
fdata = 100 * data[metric] / data_total[metric]
data[fkey] = fdata
dfmetric = fdest - fdata
dfkey = "%s_frac_diff" % metric
res[dfkey] = dfmetric
pdfmetric = 0.0
pdfmetric = 100 * dfmetric
dkpkey = "%s_frac_pdiff" % metric
res[dkpkey] = pdfmetric


if len(sys.argv) == 1:
print(
"""Usage: resources-diff.py FILE1 FILE2
"""Usage: resources-diff.py IB_FILE PR_FILE
Diff the content of two "resources.json" files and print the result to standard output."""
)
sys.exit(1)

with open(sys.argv[1]) as f:
output = json.load(f)
ibdata = json.load(f)

metrics = [label for resource in output["resources"] for label in resource]
metrics = [label for resource in ibdata["resources"] for label in resource]

datamap = {module["type"] + "|" + module["label"]: module for module in output["modules"]}
datamapib = {module["type"] + "|" + module["label"]: module for module in ibdata["modules"]}

datacumulsib = {}
for module in ibdata["modules"]:
datacumul = datacumulsib.get(module["type"])
if datacumul:
datacumul["count"] += 1
for metric in metrics:
datacumul[metric] += module[metric]
else:
datacumul = {}
datacumul["count"] = 1
for metric in metrics:
datacumul[metric] = module[metric]
datacumulsib[module["type"]] = datacumul
# print(datacumulsib)

with open(sys.argv[2]) as f:
input = json.load(f)
if output["resources"] != input["resources"]:
prdata = json.load(f)
if ibdata["resources"] != prdata["resources"]:
print("Error: input files describe different metrics")
sys.exit(1)

datamap2 = {module["type"] + "|" + module["label"]: module for module in input["modules"]}
datamappr = {module["type"] + "|" + module["label"]: module for module in prdata["modules"]}

if output["total"]["label"] != input["total"]["label"]:
datacumulspr = {}
for module in prdata["modules"]:
datacumul = datacumulspr.get(module["type"])
if datacumul:
datacumul["count"] += 1
for metric in metrics:
datacumul[metric] += module[metric]
else:
datacumul = {}
datacumul["count"] = 1
for metric in metrics:
datacumul[metric] = module[metric]
datacumulspr[module["type"]] = datacumul
# print(datacumulspr)

if ibdata["total"]["label"] != prdata["total"]["label"]:
print("Warning: input files describe different process names")

results = {}
results["resources"] = []
for resource in input["resources"]:
for resource in prdata["resources"]:
for k, v in resource.items():
dkey = "%s_diff" % k
pdkey = "%s_pdiff" % k
results["resources"].append({k: "%s" % v})
results["resources"].append({dkey: "%s diff" % v})
results["resources"].append({pdkey: "%s percentage diff" % v})
results["total"] = {}
results["total"]["label"] = input["total"]["label"]
results["total"]["events"] = input["total"]["events"]
results["total"]["type"] = input["total"]["type"]
results["modules"] = []

diff_from(metrics, input["total"], output["total"], results["total"])
results["total"] = {}
results["total"]["type"] = prdata["total"]["type"]
results["total"]["label"] = prdata["total"]["label"]
results["total"]["events"] = prdata["total"]["events"]
diff_from(
metrics, prdata["total"], prdata["total"], ibdata["total"], ibdata["total"], results["total"]
)

for module in input["modules"]:
results["modules"] = []
for module in prdata["modules"]:
key = module["type"] + "|" + module["label"]
result = {}
result["type"] = module["type"]
result["label"] = module["label"]
result["events"] = module["events"]
if key in datamap:
diff_from(metrics, module, datamap[key], result)
if key in datamapib:
diff_from(metrics, module, prdata["total"], datamapib[key], ibdata["total"], result)
results["modules"].append(result)
else:
datamap[key] = module
diff_from(metrics, module, datamap[key], result)
datamapib[key] = module
diff_from(metrics, module, prdata["total"], datamapib[key], ibdata["total"], result)
results["modules"].append(result)

datamap3 = {module["type"] + "|" + module["label"]: module for module in results["modules"]}
datamapres = {module["type"] + "|" + module["label"]: module for module in results["modules"]}

threshold = 1.0
error_threshold = 10.0

threshold = 5.0
error_threshold = 20.0


summaryLines = []
Expand All @@ -95,57 +132,107 @@ def diff_from(metrics, data, dest, res):
+ "warn threshold %0.2f" % threshold
+ '%</td><td></td></tr><tr><td bgcolor="red">'
+ "error threshold %0.2f" % error_threshold
+ '%</td><td></td></tr><tr><td bgcolor="green">'
+ "warn threshold -%0.2f" % threshold
+ '%</td><td></td></tr><tr><td bgcolor="cyan">'
+ "warn threshold -%0.2f" % error_threshold
+ "%</td><td></td></tr>",
"<tr><td>metric:<BR>&lt;pull request &gt;<BR>&lt;baseline&gt;<BR>(PR - baseline)</td><td><br>&lt;100* (PR - baseline)/baseline&gt;<br></td></tr></table><table>",
'<tr><td align="center">Module type</td>'
"<tr><td>metric:<BR>&lt;pull request &gt;<BR>&lt;baseline&gt;<BR>(PR - baseline)</td><td><br>&lt;100* (PR - baseline)&gt;<br></td></tr></table>"
+ "<table>"
+ '<tr><td align="center">Type</td>'
+ '<td align="center">Label</td>'
+ '<td align="center">real time</td>'
+ '<td align="center">cpu time</td>'
+ '<td align="center">allocated memory </td>'
+ '<td align="center">deallocated memory </td>'
+ '<td align="center">events</td>'
+ "</tr>"
+ "<td>%s</td>" % prdata["total"]["type"]
+ "<td>%s</td>" % prdata["total"]["label"]
+ '<td align="right">%0.6f<br>%0.6f<br>%0.6f</td>'
% (
prdata["total"]["time_real"],
ibdata["total"]["time_real"],
results["total"]["time_real_diff"],
)
+ '<td align="right">%0.6f<br>%0.6f<br>%0.6f</td>'
% (
prdata["total"]["time_thread"],
ibdata["total"]["time_thread"],
results["total"]["time_thread_diff"],
)
+ '<td align="right">%0.f<br>%0.f<br>%0.f</td>'
% (
prdata["total"]["mem_alloc"],
ibdata["total"]["mem_alloc"],
results["total"]["mem_alloc_diff"],
)
+ '<td align="right">%0.f<br>%0.f<br>%0.f</td>'
% (prdata["total"]["mem_free"], ibdata["total"]["mem_free"], results["total"]["mem_free_diff"])
+ "<td>%i<br>%i<br>%i</td>"
% (prdata["total"]["events"], ibdata["total"]["events"], results["total"]["events"])
+ "</tr></table>"
+ '<table><tr><td align="center">Module type</td>'
+ '<td align="center">Module label</td>'
+ '<td align="center">real time diff</td>'
+ '<td align="center">real time percent diff</td>'
+ '<td align="center">cpu time diff</td>'
+ '<td align="center">cpu time percent diff</td>'
+ '<td align="center">real time fraction</td>'
+ '<td align="center">real time fraction diff percent</td>'
+ '<td align="center">cpu time fraction </td>'
+ '<td align="center">cpu time fraction diff percent</td>'
+ '<td align="center">allocated memory diff</td>'
+ '<td align="center">allocated memory percent diff</td>'
+ '<td align="center">deallocated memory diff</td>'
+ '<td align="center">deallocated memory percent diff</td>'
+ '<td align="center">events</td>'
+ "</tr>",
]


for key in sorted(datamap3.keys()):
for item in sorted(datamapres.items(), key=lambda x: x[1]["time_thread_frac_pdiff"], reverse=True):
key = item[1]["type"] + "|" + item[1]["label"]
if not key == "|":
module1 = datamap[key]
module2 = datamap2[key]
module3 = datamap3[key]
moduleib = datamapib[key]
modulepr = datamappr[key]
moduleres = datamapres[key]
cellString = '<td align="right" '
color = ""
if abs(module3["time_thread_pdiff"]) > threshold:
if moduleres["time_thread_frac_pdiff"] > threshold:
color = 'bgcolor="orange"'
if abs(module3["time_thread_pdiff"]) > error_threshold:
if moduleres["time_thread_frac_pdiff"] > error_threshold:
color = 'bgcolor="red"'
if moduleres["time_thread_frac_pdiff"] < -1.0 * threshold:
color = 'bgcolor="cyan"'
if moduleres["time_thread_frac_pdiff"] < -1.0 * error_threshold:
color = 'bgcolor="green"'
cellString += color
cellString += ">"
summaryLines += [
"<tr>"
+ "<td>%s</td>" % module3["type"]
+ "<td>%s</td>" % module3["label"]
+ '<td align="right">%0.4f<br>%0.4f<br>%0.4f</td>'
% (module1["time_real"], module2["time_real"], module3["time_real_diff"])
+ '<td align="right">%0.2f%%</td>' % module3["time_real_pdiff"]
+ '<td align="right">%0.4f<br>%0.4f<br>%0.4f</td>'
% (module1["time_thread"], module2["time_thread"], module3["time_thread_diff"])
+ "<td>%s</td>" % moduleres["type"]
+ "<td>%s</td>" % moduleres["label"]
+ '<td align="right">%0.6f<br>%0.6f<br>%0.6f</td>'
% (
moduleib["time_real_frac"],
modulepr["time_real_frac"],
moduleres["time_real_frac_diff"],
)
+ '<td align="right">%0.6f%%</td>' % moduleres["time_real_frac_pdiff"]
+ '<td align="right">%0.6f<br>%0.6f<br>%0.6f</td>'
% (
moduleib["time_thread_frac"],
modulepr["time_thread_frac"],
moduleres["time_thread_frac_diff"],
)
+ cellString
+ "%0.2f%%</td>" % module3["time_thread_pdiff"]
+ "%0.6f%%</td>" % moduleres["time_thread_frac_pdiff"]
+ '<td align="right">%0.f<br>%0.f<br>%0.f</td>'
% (module1["mem_alloc"], module2["mem_alloc"], module3["mem_alloc_diff"])
+ '<td align="right">%0.2f%%</td>' % module3["mem_alloc_pdiff"]
% (moduleib["mem_alloc"], modulepr["mem_alloc"], moduleres["mem_alloc_diff"])
+ '<td align="right">%0.f<br>%0.f<br>%0.f</td>'
% (module1["mem_free"], module2["mem_free"], module3["mem_free_diff"])
+ '<td align="right">%0.2f%%</td>' % module3["mem_free_pdiff"]
+ "<td>%i<br>%i<br>%i</td>" % (module1["events"], module2["events"], module3["events"])
% (moduleib["mem_free"], modulepr["mem_free"], moduleres["mem_free_diff"])
+ "<td>%i<br>%i<br>%i</td>"
% (moduleib["events"], modulepr["events"], moduleres["events"])
+ "</tr>"
]
summaryLines += ["</table></body></html>"]

summaryLines += []
summaryLines += ["</body></html>"]

summaryFile = os.path.dirname(sys.argv[1]) + "/diff-" + os.path.basename(sys.argv[1]) + ".html"
with open(summaryFile, "w") as g:
Expand Down
2 changes: 1 addition & 1 deletion pr_testing/run-pr-profiling.sh
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ for PROFILING_WORKFLOW in $WORKFLOWS;do
ls -l $WORKSPACE/igprof/${CMSSW_VERSION}/${SCRAM_ARCH}/profiling/${PROFILING_WORKFLOW}/${UPLOAD_UNIQ_ID}/$BASENAME || true
echo "<li><a href=\"https://cmssdt.cern.ch/SDT/cgi-bin/igprof-navigator/${CMSSW_VERSION}/${SCRAM_ARCH}/profiling/${PROFILING_WORKFLOW}/${UPLOAD_UNIQ_ID}/${BASENAME//.sql3/}\"> $(basename $f)</a> </li>" >> $WORKSPACE/upload/profiling/index-$PROFILING_WORKFLOW.html
done
for f in $(find $PROFILING_WORKFLOW -type f -name '*.json' ) ; do
for f in $(find $PROFILING_WORKFLOW -type f -name 'step*.json' ) ; do
d=$(dirname $f)
mkdir -p $WORKSPACE/upload/profiling/$d || true
cp -p $f $WORKSPACE/upload/profiling/$d/ || true
Expand Down
2 changes: 1 addition & 1 deletion report-summary-merged-prs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1044,7 +1044,7 @@ def find_one_profiling_result(magic_command):
"""
Looks for one profiling result
"""
command_to_execute = magic_command.replace("WORKFLOW", "12634.21")
command_to_execute = magic_command.replace("WORKFLOW", "13034.21")
print("Running ", command_to_execute)
out, err, ret_code = get_output_command(command_to_execute)
print("Ran:", out, err, ret_code, command_to_execute)
Expand Down

0 comments on commit 0c07414

Please sign in to comment.