Skip to content

Commit

Permalink
KAFKA-17767 Automatically quarantine new tests [5/n] (apache#17725)
Browse files Browse the repository at this point in the history
Reviewers: Chia-Ping Tsai <chia7712@gmail.com>
  • Loading branch information
mumrah authored Nov 19, 2024
1 parent 57299cf commit 5f4cbd4
Show file tree
Hide file tree
Showing 14 changed files with 698 additions and 60 deletions.
75 changes: 75 additions & 0 deletions .github/actions/run-gradle/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
---
name: "Gradle Setup"
description: "Setup Java and Gradle"
inputs:
# Composite actions do not support typed parameters. Everything is treated as a string
# See: https://github.com/actions/runner/issues/2238
test-task:
description: "The test suite to run. Either 'test' or 'quarantinedTest'."
required: true
timeout-minutes:
description: "The timeout for the tests, in minutes."
required: true
test-catalog-path:
description: "The file path of the test catalog file."
required: true
build-scan-artifact-name:
description: "The name to use for archiving the build scan."
required: true
outputs:
gradle-exitcode:
description: "The result of the Gradle test task."
value: ${{ steps.run-tests.outputs.exitcode }}
runs:
using: "composite"
steps:
- name: Run JUnit Tests (${{ inputs.test-task }})
# Gradle flags
# --build-cache: Let Gradle restore the build cache
# --no-scan: Don't attempt to publish the scan yet. We want to archive it first.
# --continue: Keep running even if a test fails
# -PcommitId Prevent the Git SHA being written into the jar files (which breaks caching)
shell: bash
id: run-tests
env:
TIMEOUT_MINUTES: ${{ inputs.timeout-minutes}}
TEST_CATALOG: ${{ inputs.test-catalog-path }}
TEST_TASK: ${{ inputs.test-task }}
run: |
set +e
./.github/scripts/thread-dump.sh &
timeout ${TIMEOUT_MINUTES}m ./gradlew --build-cache --continue --no-scan \
-PtestLoggingEvents=started,passed,skipped,failed \
-PmaxParallelForks=2 \
-PmaxTestRetries=1 -PmaxTestRetryFailures=3 \
-PmaxQuarantineTestRetries=3 -PmaxQuarantineTestRetryFailures=0 \
-Pkafka.test.catalog.file=$TEST_CATALOG \
-PcommitId=xxxxxxxxxxxxxxxx \
$TEST_TASK
exitcode="$?"
echo "exitcode=$exitcode" >> $GITHUB_OUTPUT
- name: Archive build scan (${{ inputs.test-task }})
if: always()
uses: actions/upload-artifact@v4
with:
name: ${{ inputs.build-scan-artifact-name }}
path: ~/.gradle/build-scan-data
compression-level: 9
if-no-files-found: ignore
52 changes: 38 additions & 14 deletions .github/scripts/junit.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def parse_report(workspace_path, report_path, fp) -> Iterable[TestSuite]:
cur_suite: Optional[TestSuite] = None
partial_test_case = None
test_case_failed = False
test_case_skipped = False
for (event, elem) in xml.etree.ElementTree.iterparse(fp, events=["start", "end"]):
if event == "start":
if elem.tag == "testsuite":
Expand Down Expand Up @@ -171,11 +172,12 @@ def parse_report(workspace_path, report_path, fp) -> Iterable[TestSuite]:
elif elem.tag == "skipped":
skipped = partial_test_case(None, None, None)
cur_suite.skipped_tests.append(skipped)
test_case_skipped = True
else:
pass
elif event == "end":
if elem.tag == "testcase":
if not test_case_failed:
if not test_case_failed and not test_case_skipped:
passed = partial_test_case(None, None, None)
cur_suite.passed_tests.append(passed)
partial_test_case = None
Expand Down Expand Up @@ -303,7 +305,7 @@ def split_report_path(base_path: str, report_path: str) -> Tuple[str, str]:
logger.debug(f"Found skipped test: {skipped_test}")
skipped_table.append((simple_class_name, skipped_test.test_name))

# Collect all tests that were run as part of quarantinedTest
# Only collect quarantined tests from the "quarantinedTest" task
if task == "quarantinedTest":
for test in all_suite_passed.values():
simple_class_name = test.class_name.split(".")[-1]
Expand All @@ -329,53 +331,75 @@ def split_report_path(base_path: str, report_path: str) -> Tuple[str, str]:
# The stdout (print) goes to the workflow step console output.
# The stderr (logger) is redirected to GITHUB_STEP_SUMMARY which becomes part of the HTML job summary.
report_url = get_env("JUNIT_REPORT_URL")
report_md = f"Download [HTML report]({report_url})."
summary = (f"{total_run} tests cases run in {duration}. "
if report_url:
report_md = f"Download [HTML report]({report_url})."
else:
report_md = "No report available. JUNIT_REPORT_URL was missing."
summary = (f"{total_run} tests cases run in {duration}.\n\n"
f"{total_success} {PASSED}, {total_failures} {FAILED}, "
f"{total_flaky} {FLAKY}, {total_skipped} {SKIPPED}, and {total_errors} errors.")
f"{total_flaky} {FLAKY}, {total_skipped} {SKIPPED}, {len(quarantined_table)} {QUARANTINED}, and {total_errors} errors.<br/>")
print("## Test Summary\n")
print(f"{summary} {report_md}\n")
print(f"{summary}\n\n{report_md}\n")

# Failed
if len(failed_table) > 0:
logger.info(f"Found {len(failed_table)} test failures:")
print("### Failed Tests\n")
print("<details open=\"true\">")
print(f"<summary>Failed Tests {FAILED} ({len(failed_table)})</summary>\n")
print(f"| Module | Test | Message | Time |")
print(f"| ------ | ---- | ------- | ---- |")
logger.info(f"Found {len(failed_table)} test failures:")
for row in failed_table:
logger.info(f"{FAILED} {row[0]} > {row[1]}")
row_joined = " | ".join(row)
print(f"| {row_joined} |")
print("\n</details>")
print("\n")

# Flaky
if len(flaky_table) > 0:
logger.info(f"Found {len(flaky_table)} flaky test failures:")
print("### Flaky Tests\n")
print("<details open=\"true\">")
print(f"<summary>Flaky Tests {FLAKY} ({len(flaky_table)})</summary>\n")
print(f"| Module | Test | Message | Time |")
print(f"| ------ | ---- | ------- | ---- |")
logger.info(f"Found {len(flaky_table)} flaky test failures:")
for row in flaky_table:
logger.info(f"{FLAKY} {row[0]} > {row[1]}")
row_joined = " | ".join(row)
print(f"| {row_joined} |")
print("\n</details>")
print("\n")

# Skipped
if len(skipped_table) > 0:
print("<details>")
print(f"<summary>{len(skipped_table)} Skipped Tests</summary>\n")
print(f"<summary>Skipped Tests {SKIPPED} ({len(skipped_table)})</summary>\n")
print(f"| Module | Test |")
print(f"| ------ | ---- |")
logger.debug(f"::group::Found {len(skipped_table)} skipped tests")
for row in skipped_table:
row_joined = " | ".join(row)
print(f"| {row_joined} |")
logger.debug(f"{row[0]} > {row[1]}")
print("\n</details>")
logger.debug("::endgroup::")
print("\n")

# Quarantined
if len(quarantined_table) > 0:
logger.info(f"Ran {len(quarantined_table)} quarantined test:")
print("<details>")
print(f"<summary>{len(quarantined_table)} Quarantined Tests</summary>\n")
print(f"<summary>Quarantined Tests {QUARANTINED} ({len(quarantined_table)})</summary>\n")
print(f"| Module | Test |")
print(f"| ------ | ---- |")
logger.debug(f"::group::Found {len(quarantined_table)} quarantined tests")
for row in quarantined_table:
logger.info(f"{QUARANTINED} {row[0]} > {row[1]}")
row_joined = " | ".join(row)
print(f"| {row_joined} |")
logger.debug(f"{row[0]} > {row[1]}")
print("\n</details>")
logger.debug("::endgroup::")

# Create a horizontal rule
print("-"*80)

# Print special message if there was a timeout
exit_code = get_env("GRADLE_EXIT_CODE", int)
Expand Down
54 changes: 24 additions & 30 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,32 +165,30 @@ jobs:
# If the load-catalog job failed, we won't be able to download the artifact. Since we don't want this to fail
# the overall workflow, so we'll continue here without a test catalog.
- name: Load Test Catalog
id: load-test-catalog
uses: actions/download-artifact@v4
continue-on-error: true
with:
name: combined-test-catalog

- name: Test
# Gradle flags
# --build-cache: Let Gradle restore the build cache
# --no-scan: Don't attempt to publish the scan yet. We want to archive it first.
# --continue: Keep running even if a test fails
# -PcommitId Prevent the Git SHA being written into the jar files (which breaks caching)
- name: JUnit Quarantined Tests
id: junit-quarantined-test
uses: ./.github/actions/run-gradle
with:
test-task: quarantinedTest
timeout-minutes: 30
test-catalog-path: ${{ steps.load-test-catalog.outputs.download-path }}/combined-test-catalog.txt
build-scan-artifact-name: build-scan-quarantined-test-${{ matrix.java }}

- name: JUnit Tests
id: junit-test
env:
TIMEOUT_MINUTES: 180 # 3 hours
run: |
set +e
./.github/scripts/thread-dump.sh &
timeout ${TIMEOUT_MINUTES}m ./gradlew --build-cache --continue --no-scan \
-PtestLoggingEvents=started,passed,skipped,failed \
-PmaxParallelForks=2 \
-PmaxTestRetries=1 -PmaxTestRetryFailures=3 \
-PmaxQuarantineTestRetries=3 -PmaxQuarantineTestRetryFailures=0 \
-PcommitId=xxxxxxxxxxxxxxxx \
quarantinedTest test
exitcode="$?"
echo "exitcode=$exitcode" >> $GITHUB_OUTPUT
uses: ./.github/actions/run-gradle
with:
test-task: test
timeout-minutes: 180 # 3 hours
test-catalog-path: ${{ steps.load-test-catalog.outputs.download-path }}/combined-test-catalog.txt
build-scan-artifact-name: build-scan-test-${{ matrix.java }}

- name: Archive JUnit HTML reports
uses: actions/upload-artifact@v4
id: junit-upload-artifact
Expand All @@ -200,6 +198,7 @@ jobs:
**/build/reports/tests/*
compression-level: 9
if-no-files-found: ignore

- name: Archive JUnit XML
uses: actions/upload-artifact@v4
with:
Expand All @@ -208,23 +207,26 @@ jobs:
build/junit-xml/**/*.xml
compression-level: 9
if-no-files-found: ignore

- name: Archive Thread Dumps
id: thread-dump-upload-artifact
if: always() && steps.junit-test.outputs.exitcode == '124'
if: always() && (steps.junit-test.outputs.gradle-exitcode == '124' || steps.junit-quarantined-test.outputs.gradle-exitcode == '124')
uses: actions/upload-artifact@v4
with:
name: junit-thread-dumps-${{ matrix.java }}
path: |
thread-dumps/*
compression-level: 9
if-no-files-found: ignore

- name: Parse JUnit tests
run: python .github/scripts/junit.py --export-test-catalog ./test-catalog >> $GITHUB_STEP_SUMMARY
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
JUNIT_REPORT_URL: ${{ steps.junit-upload-artifact.outputs.artifact-url }}
THREAD_DUMP_URL: ${{ steps.thread-dump-upload-artifact.outputs.artifact-url }}
GRADLE_EXIT_CODE: ${{ steps.junit-test.outputs.exitcode }}
GRADLE_EXIT_CODE: ${{ steps.junit-test.outputs.gradle-exitcode }}

- name: Archive Test Catalog
if: ${{ always() && matrix.java == '23' }}
uses: actions/upload-artifact@v4
Expand All @@ -233,14 +235,6 @@ jobs:
path: test-catalog
compression-level: 9
if-no-files-found: ignore
- name: Archive Build Scan
if: always()
uses: actions/upload-artifact@v4
with:
name: build-scan-test-${{ matrix.java }}
path: ~/.gradle/build-scan-data
compression-level: 9
if-no-files-found: ignore

update-test-catalog:
name: Update Test Catalog
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/ci-complete.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ jobs:
fail-fast: false
matrix:
java: [ 23, 11 ]
artifact-prefix: [ "build-scan-test-", "build-scan-quarantined-test-"]
steps:
- name: Env
run: printenv
Expand All @@ -66,7 +67,7 @@ jobs:
with:
github-token: ${{ github.token }}
run-id: ${{ github.event.workflow_run.id }}
name: build-scan-test-${{ matrix.java }}
name: ${{ matrix.artifact-prefix }}-${{ matrix.java }}
path: ~/.gradle/build-scan-data # This is where Gradle buffers unpublished build scan data when --no-scan is given
- name: Handle missing scan
if: ${{ steps.download-build-scan.outcome == 'failure' }}
Expand Down
Loading

0 comments on commit 5f4cbd4

Please sign in to comment.