Skip to content

Commit

Permalink
Populate the benchmark metadata (#5918)
Browse files Browse the repository at this point in the history
To ease the process of gathering the benchmark metadata before uploading
the the database, I'm adding a script
`.github/scripts/benchmarks/gather_metadata.py` to gather this
information and pass it to the upload script. From
#5839, the benchmark metadata
includes the following required fields:

```
-- Metadata
`timestamp` UInt64,
`schema_version` String DEFAULT 'v3',
`name` String,
-- About the change
`repo` String DEFAULT 'pytorch/pytorch',
`head_branch` String,
`head_sha` String,
`workflow_id` UInt64,
`run_attempt` UInt32,
`job_id` UInt64,
-- The raw records on S3
`s3_path` String,
```

I'm going to test this out with PT2 compiler instruction count benchmark
at pytorch/pytorch#140493

### Testing


https://github.com/pytorch/test-infra/actions/runs/11831746632/job/32967412160?pr=5918#step:5:105
gathers the metadata and upload the benchmark results correctly

Also, an actual upload at
https://github.com/pytorch/pytorch/actions/runs/11831781500/job/33006545698#step:24:138
  • Loading branch information
huydhn authored Nov 15, 2024
1 parent 6cacc52 commit 5397347
Show file tree
Hide file tree
Showing 7 changed files with 342 additions and 46 deletions.
78 changes: 75 additions & 3 deletions .github/actions/upload-benchmark-results/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ inputs:
# TODO (huydhn): Use this to gate the migration to oss_ci_benchmark_v3 on S3
schema-version:
default: 'v2'
github-token:
default: ''

runs:
using: composite
Expand All @@ -19,22 +21,92 @@ runs:
set -eux
python3 -mpip install boto3==1.35.33
- name: Check that GITHUB_TOKEN is defined
if: ${{ inputs.schema-version != 'v2' }}
env:
GITHUB_TOKEN: ${{ inputs.github-token }}
shell: bash
run: |
set -eux
if [[ -z "${GITHUB_TOKEN}" ]]; then
echo "Missing github-token input"
exit 1
fi
- name: Get workflow job id
if: ${{ inputs.github-token != '' }}
id: get-job-id
uses: pytorch/test-infra/.github/actions/get-workflow-job-id@main
with:
github-token: ${{ inputs.github-token }}

- name: Gather the metadata
id: gather-metadata
shell: bash
env:
SCHEMA_VERSION: ${{ inputs.schema-version }}
REPO: ${{ github.repository }}
HEAD_BRANCH: ${{ github.head_ref }}
HEAD_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
WORKFLOW_RUN_ID: ${{ github.run_id }}
RUN_ATTEMPT: ${{ github.run_attempt }}
JOB_ID: ${{ inputs.github-token != '' && steps.get-job-id.outputs.job-id || '0' }}
JOB_NAME: ${{ inputs.github-token != '' && steps.get-job-id.outputs.job-name || '' }}
run: |
set -eux
python3 "${GITHUB_ACTION_PATH}/../../scripts/benchmarks/gather_metadata.py" \
--schema-version "${SCHEMA_VERSION}" \
--repo "${REPO}" \
--head-branch "${HEAD_BRANCH}" \
--head-sha "${HEAD_SHA}" \
--workflow-id "${WORKFLOW_RUN_ID}" \
--run-attempt "${RUN_ATTEMPT}" \
--job-id "${JOB_ID}" \
--job-name "${JOB_NAME}"
- name: Gather the runner information
id: gather-runner-info
shell: bash
run: |
set -eux
# TODO (huydhn): Implement this part
echo "runners=[]" >> "${GITHUB_OUTPUT}"
- name: Gather the dependencies information
id: gather-dependencies
shell: bash
run: |
set -eux
# TODO (huydhn): Implement this part
echo "dependencies={}" >> "${GITHUB_OUTPUT}"
- name: Upload benchmark results
shell: bash
env:
BENCHMARK_RESULTS_DIR: ${{ inputs.benchmark-results-dir }}
DRY_RUN: ${{ inputs.dry-run }}
SCHEMA_VERSION: ${{ inputs.schema-version }}
# Additional information about the benchmarks
BENCHMARK_METADATA: ${{ steps.gather-metadata.outputs.metadata }}
RUNNER_INFO: ${{ steps.gather-runner-info.outputs.runners }}
DEPENDENCIES: ${{ steps.gather-dependencies.outputs.dependencies }}
run: |
set -eux
if [[ "${DRY_RUN}" == "true" ]]; then
python3 "${GITHUB_ACTION_PATH}/../../scripts/upload_benchmark_results.py" \
--benchmark-results-dir "${BENCHMARK_RESULTS_DIR}" \
--schema-version "${SCHEMA_VERSION}" \
--metadata "${BENCHMARK_METADATA}" \
--runners "${RUNNER_INFO}" \
--dependencies "${DEPENDENCIES}" \
--dry-run
else
python3 "${GITHUB_ACTION_PATH}/../../scripts/upload_benchmark_results.py" \
--benchmark-results-dir "${BENCHMARK_RESULTS_DIR}" \
--schema-version "${SCHEMA_VERSION}"
--metadata "${BENCHMARK_METADATA}" \
--runners "${RUNNER_INFO}" \
--dependencies "${DEPENDENCIES}"
fi
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"benchmark": {"name": "pr_time_benchmarks", "extra_info": {"is_dynamic": false, "device": "cpu", "description": "a loop over 100 add node"}}, "model": {"name": "add_loop_eager", "type": "add_loop", "backend": "eager"}, "metric": {"name": "compile_time_instruction_count", "benchmark_values": [3086359081]}}]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"benchmark": {"name": "pr_time_benchmarks", "extra_info": {"is_dynamic": true, "device": "cpu", "description": "a loop over 100 add node"}}, "model": {"name": "add_loop_eager_dynamic", "type": "add_loop", "backend": "eager"}, "metric": {"name": "compile_time_instruction_count", "benchmark_values": [5712213247]}}]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"benchmark": {"name": "pr_time_benchmarks", "extra_info": {"is_dynamic": true, "device": "cuda", "description": "a loop over 100 add node"}}, "model": {"name": "add_loop_inductor_dynamic_gpu", "type": "add_loop", "backend": "inductor"}, "metric": {"name": "compile_time_instruction_count", "benchmark_values": [40859830085]}}]
98 changes: 98 additions & 0 deletions .github/scripts/benchmarks/gather_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import os
import json
import time
from typing import Any


def parse_args() -> Any:
from argparse import ArgumentParser

parser = ArgumentParser("gather some metadata about the benchmark")
# v3 is defined at torchci/clickhouse_queries/oss_ci_benchmark_v3/query.sql
parser.add_argument(
"--schema-version",
choices=["v2", "v3"],
required=True,
help="the database schema to use",
)
parser.add_argument(
"--repo",
type=str,
required=True,
help="the name of repository where the benchmark is run",
)
parser.add_argument(
"--head-branch",
type=str,
required=True,
help="the name of branch where the benchmark is run",
)
parser.add_argument(
"--head-sha",
type=str,
required=True,
help="the commit that the benchmark uses",
)
parser.add_argument(
"--workflow-id",
type=int,
required=True,
help="the benchmark workflow id",
)
parser.add_argument(
"--run-attempt",
type=int,
default=1,
help="the workflow run attempt",
)
parser.add_argument(
"--job-id",
type=int,
required=True,
help="the benchmark job id",
)
parser.add_argument(
"--job-name",
type=str,
required=True,
help="the benchmark job name",
)

return parser.parse_args()


def set_output(name: str, val: Any) -> None:
if os.getenv("GITHUB_OUTPUT"):
with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
print(f"{name}={val}", file=env)
else:
print(f"::set-output name={name}::{val}")


def main() -> None:
args = parse_args()

# From https://github.com/pytorch/test-infra/pull/5839
metadata = {
"timestamp": int(time.time()),
"schema_version": args.schema_version,
"name": args.job_name,
"repo": args.repo,
"head_branch": args.head_branch,
"head_sha": args.head_sha,
"workflow_id": args.workflow_id,
"run_attempt": args.run_attempt,
"job_id": args.job_id,
}
set_output("metadata", json.dumps(metadata))


if __name__ == "__main__":
main()
Loading

0 comments on commit 5397347

Please sign in to comment.