diff --git a/.github/scripts/run-chaos-testing.sh b/.github/scripts/run-chaos-testing.sh new file mode 100755 index 000000000..1a705bf89 --- /dev/null +++ b/.github/scripts/run-chaos-testing.sh @@ -0,0 +1,28 @@ +curl -L https://github.com/alexei-led/pumba/releases/download/0.10.1/pumba_linux_amd64 -o /tmp/pumba +chmod +x /tmp/pumba + + +timeout -k 5 3m /tmp/pumba --random --interval 30s \ + restart --timeout 30s \ + re2:^ydb-dynamic & + +# sudo tc qdisc add dev lo root netem delay 100ms +# sudo tc qdisc change dev lo root netem loss 10% +# sleep 60s + +/tmp/pumba --random \ + netem --duration 1m --interface lo --tc-image gaiadocker/iproute2 \ + delay --time 90 --jitter 30 --correlation 20 \ + re2:^ydb + +/tmp/pumba --random \ + netem --duration 1m --interface lo --tc-image gaiadocker/iproute2 \ + loss --percent 20 \ + re2:^ydb + +/tmp/pumba --random \ + netem --duration 1m --interface lo --tc-image gaiadocker/iproute2 \ + corrupt --percent 20 \ + re2:^ydb + +sudo tc qdisc del dev lo root diff --git a/.github/workflows/slo.yml b/.github/workflows/slo.yml index 483985d92..85f464158 100644 --- a/.github/workflows/slo.yml +++ b/.github/workflows/slo.yml @@ -13,7 +13,7 @@ on: github_pull_request_number: required: true slo_workload_duration_seconds: - default: '600' + default: '180' required: false slo_workload_read_max_rps: default: '1000' @@ -54,7 +54,8 @@ jobs: label: xorm concurrency: - group: slo-${{ github.ref }}-${{matrix.sdk.name}} + group: slo-${{ github.ref }}-${{ matrix.sdk.name }} + cancel-in-progress: true steps: - name: Checkout repository @@ -79,16 +80,31 @@ jobs: github_token: ${{ secrets.GITHUB_TOKEN }} sdk_name: ${{ matrix.sdk.name }} + - name: Prepare SLO Database + run: | + timeout -k 5 10 ./tests/slo/.bin/${{matrix.sdk.id}}_linux_amd64 create grpc://localhost:2135 /Root/testdb + + - name: Run chaos testing + run: | + echo 'Performing chaos testing...' + bash ./.github/scripts/run-chaos-testing.sh & + - name: Run SLO Tests + timeout-minutes: 11 run: | - ./tests/slo/.bin/${{matrix.sdk.id}}_linux_amd64 create grpc://localhost:2135 /Root/testdb - ./tests/slo/.bin/${{matrix.sdk.id}}_linux_amd64 run grpc://localhost:2135 /Root/testdb \ + timeout -k 5 200 ./tests/slo/.bin/${{matrix.sdk.id}}_linux_amd64 run grpc://localhost:2135 /Root/testdb \ -prom-pgw localhost:9091 \ -report-period 250 \ - -time ${{inputs.slo_workload_duration_seconds || 600}} \ + -time ${{inputs.slo_workload_duration_seconds || 180}} \ -read-rps ${{inputs.slo_workload_read_max_rps || 1000}} \ -write-rps ${{inputs.slo_workload_write_max_rps || 100}} \ -read-timeout 10000 \ - -write-timeout 10000 \ - -shutdown-time 30 - ./tests/slo/.bin/${{matrix.sdk.id}}_linux_amd64 cleanup grpc://localhost:2135 /Root/testdb + -write-timeout 10000 + + - if: always() + run: sudo tc qdisc del dev lo root + + - name: Cleanup SLO Database + continue-on-error: true + run: | + timeout -k 5 10 ./tests/slo/.bin/${{matrix.sdk.id}}_linux_amd64 cleanup grpc://localhost:2135 /Root/testdb