Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resources comparison: Compare absolute difference of fraction of total time per module #2371

Merged
merged 8 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmssw-pr-test-config
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ elif [ "$CMSSW_MAJOR" -ge 13 ] ; then
else
PR_TEST_MATRIX_EXTRAS_GPU=11634.586,11634.587
fi
PR_TEST_MATRIX_EXTRAS_PROFILING=29834.21,12634.21
PR_TEST_MATRIX_EXTRAS_PROFILING=29834.21,13034.21
PR_TEST_MATRIX_EXTRAS_HIGH_STATS=35034.0
PR_TEST_MATRIX_EXTRAS_NANO=all
MATRIX_OPTION_PROFILING=""
Expand Down
213 changes: 150 additions & 63 deletions comparisons/resources-diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,83 +5,120 @@
import os


def diff_from(metrics, data, dest, res):
# ratio = 0.0
# if not dest["events"] == 0:
# ratio = data["events"]/dest["events"]
# data["events"] = ratio * dest["events"]
def diff_from(metrics, data, data_total, dest, dest_total, res):
for metric in metrics:
dmetric = dest[metric] - data[metric]
dkey = "%s_diff" % metric
res[dkey] = dmetric
pdmetric = 0.0
if not data[metric] == 0.0:
pdmetric = 100 * dmetric / data[metric]
pdmetric = 100 * dmetric
pdkey = "%s_pdiff" % metric
res[pdkey] = pdmetric


# data[metric] = ratio * dest[metric]
fkey = "%s_frac" % metric
fdest = 100 * dest[metric] / dest_total[metric]
dest[fkey] = fdest
fdata = 100 * data[metric] / data_total[metric]
data[fkey] = fdata
dfmetric = fdest - fdata
dfkey = "%s_frac_diff" % metric
res[dfkey] = dfmetric
pdfmetric = 0.0
pdfmetric = 100 * dfmetric
dkpkey = "%s_frac_pdiff" % metric
res[dkpkey] = pdfmetric


if len(sys.argv) == 1:
print(
"""Usage: resources-diff.py FILE1 FILE2
"""Usage: resources-diff.py IB_FILE PR_FILE
Diff the content of two "resources.json" files and print the result to standard output."""
)
sys.exit(1)

with open(sys.argv[1]) as f:
output = json.load(f)
ibdata = json.load(f)

metrics = [label for resource in output["resources"] for label in resource]
metrics = [label for resource in ibdata["resources"] for label in resource]

datamap = {module["type"] + "|" + module["label"]: module for module in output["modules"]}
datamapib = {module["type"] + "|" + module["label"]: module for module in ibdata["modules"]}

datacumulsib = {}
for module in ibdata["modules"]:
datacumul = datacumulsib.get(module["type"])
if datacumul:
datacumul["count"] += 1
for metric in metrics:
datacumul[metric] += module[metric]
else:
datacumul = {}
datacumul["count"] = 1
for metric in metrics:
datacumul[metric] = module[metric]
datacumulsib[module["type"]] = datacumul
# print(datacumulsib)

with open(sys.argv[2]) as f:
input = json.load(f)
if output["resources"] != input["resources"]:
prdata = json.load(f)
if ibdata["resources"] != prdata["resources"]:
print("Error: input files describe different metrics")
sys.exit(1)

datamap2 = {module["type"] + "|" + module["label"]: module for module in input["modules"]}
datamappr = {module["type"] + "|" + module["label"]: module for module in prdata["modules"]}

if output["total"]["label"] != input["total"]["label"]:
datacumulspr = {}
for module in prdata["modules"]:
datacumul = datacumulspr.get(module["type"])
if datacumul:
datacumul["count"] += 1
for metric in metrics:
datacumul[metric] += module[metric]
else:
datacumul = {}
datacumul["count"] = 1
for metric in metrics:
datacumul[metric] = module[metric]
datacumulspr[module["type"]] = datacumul
# print(datacumulspr)

if ibdata["total"]["label"] != prdata["total"]["label"]:
print("Warning: input files describe different process names")

results = {}
results["resources"] = []
for resource in input["resources"]:
for resource in prdata["resources"]:
for k, v in resource.items():
dkey = "%s_diff" % k
pdkey = "%s_pdiff" % k
results["resources"].append({k: "%s" % v})
results["resources"].append({dkey: "%s diff" % v})
results["resources"].append({pdkey: "%s percentage diff" % v})
results["total"] = {}
results["total"]["label"] = input["total"]["label"]
results["total"]["events"] = input["total"]["events"]
results["total"]["type"] = input["total"]["type"]
results["modules"] = []

diff_from(metrics, input["total"], output["total"], results["total"])
results["total"] = {}
results["total"]["type"] = prdata["total"]["type"]
results["total"]["label"] = prdata["total"]["label"]
results["total"]["events"] = prdata["total"]["events"]
diff_from(
metrics, prdata["total"], prdata["total"], ibdata["total"], ibdata["total"], results["total"]
)

for module in input["modules"]:
results["modules"] = []
for module in prdata["modules"]:
key = module["type"] + "|" + module["label"]
result = {}
result["type"] = module["type"]
result["label"] = module["label"]
result["events"] = module["events"]
if key in datamap:
diff_from(metrics, module, datamap[key], result)
if key in datamapib:
diff_from(metrics, module, prdata["total"], datamapib[key], ibdata["total"], result)
results["modules"].append(result)
else:
datamap[key] = module
diff_from(metrics, module, datamap[key], result)
datamapib[key] = module
diff_from(metrics, module, prdata["total"], datamapib[key], ibdata["total"], result)
results["modules"].append(result)

datamap3 = {module["type"] + "|" + module["label"]: module for module in results["modules"]}
datamapres = {module["type"] + "|" + module["label"]: module for module in results["modules"]}

threshold = 1.0
error_threshold = 10.0

threshold = 5.0
error_threshold = 20.0


summaryLines = []
Expand All @@ -95,57 +132,107 @@ def diff_from(metrics, data, dest, res):
+ "warn threshold %0.2f" % threshold
+ '%</td><td></td></tr><tr><td bgcolor="red">'
+ "error threshold %0.2f" % error_threshold
+ '%</td><td></td></tr><tr><td bgcolor="green">'
+ "warn threshold -%0.2f" % threshold
+ '%</td><td></td></tr><tr><td bgcolor="cyan">'
+ "warn threshold -%0.2f" % error_threshold
+ "%</td><td></td></tr>",
"<tr><td>metric:<BR>&lt;pull request &gt;<BR>&lt;baseline&gt;<BR>(PR - baseline)</td><td><br>&lt;100* (PR - baseline)/baseline&gt;<br></td></tr></table><table>",
'<tr><td align="center">Module type</td>'
"<tr><td>metric:<BR>&lt;pull request &gt;<BR>&lt;baseline&gt;<BR>(PR - baseline)</td><td><br>&lt;100* (PR - baseline)&gt;<br></td></tr></table>"
+ "<table>"
+ '<tr><td align="center">Type</td>'
+ '<td align="center">Label</td>'
+ '<td align="center">real time</td>'
+ '<td align="center">cpu time</td>'
+ '<td align="center">allocated memory </td>'
+ '<td align="center">deallocated memory </td>'
+ '<td align="center">events</td>'
+ "</tr>"
+ "<td>%s</td>" % prdata["total"]["type"]
+ "<td>%s</td>" % prdata["total"]["label"]
+ '<td align="right">%0.6f<br>%0.6f<br>%0.6f</td>'
% (
prdata["total"]["time_real"],
ibdata["total"]["time_real"],
results["total"]["time_real_diff"],
)
+ '<td align="right">%0.6f<br>%0.6f<br>%0.6f</td>'
% (
prdata["total"]["time_thread"],
ibdata["total"]["time_thread"],
results["total"]["time_thread_diff"],
)
+ '<td align="right">%0.f<br>%0.f<br>%0.f</td>'
% (
prdata["total"]["mem_alloc"],
ibdata["total"]["mem_alloc"],
results["total"]["mem_alloc_diff"],
)
+ '<td align="right">%0.f<br>%0.f<br>%0.f</td>'
% (prdata["total"]["mem_free"], ibdata["total"]["mem_free"], results["total"]["mem_free_diff"])
+ "<td>%i<br>%i<br>%i</td>"
% (prdata["total"]["events"], ibdata["total"]["events"], results["total"]["events"])
+ "</tr></table>"
+ '<table><tr><td align="center">Module type</td>'
+ '<td align="center">Module label</td>'
+ '<td align="center">real time diff</td>'
+ '<td align="center">real time percent diff</td>'
+ '<td align="center">cpu time diff</td>'
+ '<td align="center">cpu time percent diff</td>'
+ '<td align="center">real time fraction</td>'
+ '<td align="center">real time fraction diff percent</td>'
+ '<td align="center">cpu time fraction </td>'
+ '<td align="center">cpu time fraction diff percent</td>'
+ '<td align="center">allocated memory diff</td>'
+ '<td align="center">allocated memory percent diff</td>'
+ '<td align="center">deallocated memory diff</td>'
+ '<td align="center">deallocated memory percent diff</td>'
+ '<td align="center">events</td>'
+ "</tr>",
]


for key in sorted(datamap3.keys()):
for item in sorted(datamapres.items(), key=lambda x: x[1]["time_thread_frac_pdiff"], reverse=True):
key = item[1]["type"] + "|" + item[1]["label"]
if not key == "|":
module1 = datamap[key]
module2 = datamap2[key]
module3 = datamap3[key]
moduleib = datamapib[key]
modulepr = datamappr[key]
moduleres = datamapres[key]
cellString = '<td align="right" '
color = ""
if abs(module3["time_thread_pdiff"]) > threshold:
if moduleres["time_thread_frac_pdiff"] > threshold:
color = 'bgcolor="orange"'
if abs(module3["time_thread_pdiff"]) > error_threshold:
if moduleres["time_thread_frac_pdiff"] > error_threshold:
color = 'bgcolor="red"'
if moduleres["time_thread_frac_pdiff"] < -1.0 * threshold:
color = 'bgcolor="cyan"'
if moduleres["time_thread_frac_pdiff"] < -1.0 * error_threshold:
color = 'bgcolor="green"'
cellString += color
cellString += ">"
summaryLines += [
"<tr>"
+ "<td>%s</td>" % module3["type"]
+ "<td>%s</td>" % module3["label"]
+ '<td align="right">%0.4f<br>%0.4f<br>%0.4f</td>'
% (module1["time_real"], module2["time_real"], module3["time_real_diff"])
+ '<td align="right">%0.2f%%</td>' % module3["time_real_pdiff"]
+ '<td align="right">%0.4f<br>%0.4f<br>%0.4f</td>'
% (module1["time_thread"], module2["time_thread"], module3["time_thread_diff"])
+ "<td>%s</td>" % moduleres["type"]
+ "<td>%s</td>" % moduleres["label"]
+ '<td align="right">%0.6f<br>%0.6f<br>%0.6f</td>'
% (
moduleib["time_real_frac"],
modulepr["time_real_frac"],
moduleres["time_real_frac_diff"],
)
+ '<td align="right">%0.6f%%</td>' % moduleres["time_real_frac_pdiff"]
+ '<td align="right">%0.6f<br>%0.6f<br>%0.6f</td>'
% (
moduleib["time_thread_frac"],
modulepr["time_thread_frac"],
moduleres["time_thread_frac_diff"],
)
+ cellString
+ "%0.2f%%</td>" % module3["time_thread_pdiff"]
+ "%0.6f%%</td>" % moduleres["time_thread_frac_pdiff"]
+ '<td align="right">%0.f<br>%0.f<br>%0.f</td>'
% (module1["mem_alloc"], module2["mem_alloc"], module3["mem_alloc_diff"])
+ '<td align="right">%0.2f%%</td>' % module3["mem_alloc_pdiff"]
% (moduleib["mem_alloc"], modulepr["mem_alloc"], moduleres["mem_alloc_diff"])
+ '<td align="right">%0.f<br>%0.f<br>%0.f</td>'
% (module1["mem_free"], module2["mem_free"], module3["mem_free_diff"])
+ '<td align="right">%0.2f%%</td>' % module3["mem_free_pdiff"]
+ "<td>%i<br>%i<br>%i</td>" % (module1["events"], module2["events"], module3["events"])
% (moduleib["mem_free"], modulepr["mem_free"], moduleres["mem_free_diff"])
+ "<td>%i<br>%i<br>%i</td>"
% (moduleib["events"], modulepr["events"], moduleres["events"])
+ "</tr>"
]
summaryLines += ["</table></body></html>"]

summaryLines += []
summaryLines += ["</body></html>"]

summaryFile = os.path.dirname(sys.argv[1]) + "/diff-" + os.path.basename(sys.argv[1]) + ".html"
with open(summaryFile, "w") as g:
Expand Down
2 changes: 1 addition & 1 deletion pr_testing/run-pr-profiling.sh
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ for PROFILING_WORKFLOW in $WORKFLOWS;do
ls -l $WORKSPACE/igprof/${CMSSW_VERSION}/${SCRAM_ARCH}/profiling/${PROFILING_WORKFLOW}/${UPLOAD_UNIQ_ID}/$BASENAME || true
echo "<li><a href=\"https://cmssdt.cern.ch/SDT/cgi-bin/igprof-navigator/${CMSSW_VERSION}/${SCRAM_ARCH}/profiling/${PROFILING_WORKFLOW}/${UPLOAD_UNIQ_ID}/${BASENAME//.sql3/}\"> $(basename $f)</a> </li>" >> $WORKSPACE/upload/profiling/index-$PROFILING_WORKFLOW.html
done
for f in $(find $PROFILING_WORKFLOW -type f -name '*.json' ) ; do
for f in $(find $PROFILING_WORKFLOW -type f -name 'step*.json' ) ; do
d=$(dirname $f)
mkdir -p $WORKSPACE/upload/profiling/$d || true
cp -p $f $WORKSPACE/upload/profiling/$d/ || true
Expand Down
2 changes: 1 addition & 1 deletion report-summary-merged-prs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1059,7 +1059,7 @@ def find_one_profiling_result(magic_command):
"""
Looks for one profiling result
"""
command_to_execute = magic_command.replace("WORKFLOW", "12634.21")
command_to_execute = magic_command.replace("WORKFLOW", "13034.21")
print("Running ", command_to_execute)
out, err, ret_code = get_output_command(command_to_execute)
print("Ran:", out, err, ret_code, command_to_execute)
Expand Down
Loading