Skip to content

8558: fixes

8558: fixes #5

name: "[T3K] T3000 model perf tests"
on:
push:
branches:
- tpatel/issue-8558
workflow_dispatch:
schedule:
- cron: "0 */12 * * *" # This cron schedule runs the workflow every 12 hours
jobs:
build-artifact:
uses: ./.github/workflows/build-artifact.yaml
secrets: inherit
t3000-model-perf-tests:
needs: build-artifact
strategy:
fail-fast: false
matrix:
test-group: [
{
name: "T3000 LLM model perf tests",
model-type: "LLM",
arch: wormhole_b0,
runs-on: [arch-wormhole_b0, "config-t3000", "in-service", "runner-test", "bare-metal", "pipeline-perf"],
cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type llm_model_perf_t3000_device --dispatch-mode ""'
},
{
name: "T3000 CNN model perf tests",
model-type: "CNN",
arch: wormhole_b0,
runs-on: [arch-wormhole_b0, "config-t3000", "in-service", "runner-test", "bare-metal", "pipeline-perf"],
cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type cnn_model_perf_t3000_device --dispatch-mode ""'
},
]
name: ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }}
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.test-group.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
environment: dev
runs-on: ${{ matrix.test-group.runs-on }}
steps:
- uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
- name: Enable performance mode
run: |
sudo cpupower frequency-set -g performance
- name: Ensure weka mount is active
run: |
sudo systemctl restart mnt-MLPerf.mount
sudo /etc/rc.local
ls -al /mnt/MLPerf/bit_error_tests
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
- name: Build tt-metal and libs
run: PYTHON_ENV_DIR=$(pwd)/build/python_env ./build_metal.sh
- name: Run model perf regression tests
timeout-minutes: 60
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
${{ matrix.test-group.cmd }}
- name: Check perf report exists
id: check-perf-report
if: ${{ !cancelled() }}
run: |
ls -hal
export PERF_REPORT_FILENAME=Models_Perf_$(date +%Y_%m_%d).csv
ls -hal $PERF_REPORT_FILENAME
echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT"
- name: Upload perf report
if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }}
uses: actions/upload-artifact@v4
with:
name: perf-report-csv-${{ matrix.model-type }}-${{ matrix.runner-info.arch }}-${{ matrix.runner-info.machine-type }}
path: "${{ steps.check-perf-report.outputs.perf_report_filename }}"
- name: Disable performance mode
if: always()
run: |
sudo cpupower frequency-set -g ondemand