From d846a15bced1b5fd3030101116d6eb5af89d4a34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Armando=20Ba=C3=B1uelos?= <32311654+abanuelo@users.noreply.github.com> Date: Wed, 24 Apr 2024 11:58:41 -0700 Subject: [PATCH] feat: adding ci for simulator testing (#226) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: adding ci/cd for simulator testing * fix: Update run-simulators.yml * Adding version checks into CI * fix: addressing broken parts of CI * fix: adding manual pip install carla * fix: adding warning comment in workflow * fix: reformatting file to check for latest sims with black and isort * fix: adding more environment variable exports to workflow * feat: adding matrix and multiple environment locations * fix: adding GH access token * feat: dividing steps to independent jobs * feat: opening virtual display for webots * fix: add command to grab versions programatically * fix: addressing feedback * fix: running webots and carla parallely * removing pip installation * fix: readjusting nvidia-smi-test * fix: addressing feedback --------- Co-authored-by: Armando Banuelos Co-authored-by: Armando BaƱuelos Co-authored-by: Armando Banuelos --- .github/check_latest_simulators.py | 49 +++++++ .github/workflows/run-simulators.yml | 201 +++++++++++++++++++++++++++ 2 files changed, 250 insertions(+) create mode 100644 .github/check_latest_simulators.py create mode 100644 .github/workflows/run-simulators.yml diff --git a/.github/check_latest_simulators.py b/.github/check_latest_simulators.py new file mode 100644 index 000000000..9864e5608 --- /dev/null +++ b/.github/check_latest_simulators.py @@ -0,0 +1,49 @@ +import os +import re + +import requests + +SIMULATORS = ["carla", "webots"] +SIMULATOR_URLS = { + "carla": "https://github.com/carla-simulator/carla/releases/latest", + "webots": "https://github.com/cyberbotics/webots/releases/latest", +} +SIMULATORS_REGEXES = { + "carla": 'carla-simulator/carla/releases/tag/([^"]+)', + "webots": 'cyberbotics/webots/releases/tag/([^"]+)', +} + + +def check_path_exists(version, simulator): + path = f"/software/{simulator}{version}" + if os.path.exists(path): + print(f"Latest {simulator} version {version} already present on the machine.\n") + else: + s = """ + _ + _ _____ ________ (_)__ ___ _ +| |/|/ / _ `/ __/ _ \/ / _ \/ _ `/ +|__,__/\_,_/_/ /_//_/_/_//_/\_, / + /___/ + """ + print(s) + print( + f"A new {simulator} version ({version}) needs to be installed and tested in CI.\n" + ) + + +def version_check(regex_match, sim_name): + try: + version = regex_match.group(1) + except AttributeError: + print(f"Error: Unable to find the latest {sim_name} version using regex.") + else: + check_path_exists(version, sim_name) + + +for sim in SIMULATORS: + print(f"Checking for {sim}...") + url = SIMULATOR_URLS[sim] + response = requests.get(url) + regex_match = re.search(SIMULATORS_REGEXES[sim], response.text) + version_check(regex_match, sim) diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml new file mode 100644 index 000000000..5b35cf85c --- /dev/null +++ b/.github/workflows/run-simulators.yml @@ -0,0 +1,201 @@ +name: run_simulators +on: + # IMPORTANT: this workflow should only be triggered manually via the Actions + # portal of the repo!!! Do not modify this workflow's trigger! + workflow_dispatch: + +jobs: + start_ec2_instance: + name: start_ec2_instance + runs-on: ubuntu-latest + concurrency: + group: sim + steps: + - name: Start EC2 Instance + env: + INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} + run: | + # Get the instance state + instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name') + + # If the machine is stopping wait for it to fully stop + while [ "$instance_state" == "stopping" ]; do + echo "Instance is stopping, waiting for it to fully stop..." + sleep 10 + instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name') + done + + # Check if instance state is "stopped" + if [[ "$instance_state" == "stopped" ]]; then + echo "Instance is stopped, starting it..." + aws ec2 start-instances --instance-ids $INSTANCE_ID + elif [[ "$instance_state" == "pending" ]]; then + echo "Instance startup is pending, continuing..." + elif [[ "$instance_state" == "running" ]]; then + echo "Instance is already running..." + exit 0 + else + echo "Unknown instance state: $instance_state" + exit 1 + fi + + # wait for status checks to pass + TIMEOUT=120 # Timeout in seconds + START_TIME=$(date +%s) + END_TIME=$((START_TIME + TIMEOUT)) + while true; do + response=$(aws ec2 describe-instance-status --instance-ids $INSTANCE_ID) + system_status=$(echo "$response" | jq -r '.InstanceStatuses[0].SystemStatus.Status') + instance_status=$(echo "$response" | jq -r '.InstanceStatuses[0].InstanceStatus.Status') + + if [[ "$system_status" == "ok" && "$instance_status" == "ok" ]]; then + echo "Both SystemStatus and InstanceStatus are 'ok'" + exit 0 + fi + + CURRENT_TIME=$(date +%s) + if [[ "$CURRENT_TIME" -ge "$END_TIME" ]]; then + echo "Timeout: Both SystemStatus and InstanceStatus have not reached 'ok' state within $TIMEOUT seconds." + exit 1 + fi + + sleep 10 # Check status every 10 seconds + done + + check_simulator_version_updates: + name: check_simulator_version_updates + runs-on: ubuntu-latest + needs: start_ec2_instance + steps: + - name: Check for Simulator Version Updates + env: + PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + HOSTNAME: ${{ secrets.SSH_HOST }} + USER_NAME: ${{ secrets.SSH_USERNAME }} + GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }} + run: | + echo "$PRIVATE_KEY" > private_key && chmod 600 private_key + ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} ' + cd /home/ubuntu/actions/ && + rm -rf Scenic && + git clone --branch $(basename "${{ github.ref }}") --single-branch https://$GH_ACCESS_TOKEN@github.com/BerkeleyLearnVerify/Scenic.git && + cd Scenic && + python3 -m venv venv && + source venv/bin/activate && + python3 -m pip install -e .[test-full] && + python3 .github/check_latest_simulators.py + ' + + check_nvidia_smi: + name: check_nvidia_smi + runs-on: ubuntu-latest + needs: start_ec2_instance + continue-on-error: true + steps: + - name: Check NVIDIA SMI + env: + PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + HOSTNAME: ${{ secrets.SSH_HOST}} + USER_NAME: ${{ secrets.SSH_USERNAME}} + run: | + echo "$PRIVATE_KEY" > private_key && chmod 600 private_key + ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} ' + output=$(nvidia-smi) + echo "$output" + if [ -z "$output" ]; then + echo "NVIDIA Driver is not set" + exit 1 + fi + ' + - name: NVIDIA Driver is not set + if: ${{ failure() }} + run: | + echo "NVIDIA SMI is not working, please run the steps here on the instance:" + echo "https://scenic-lang.atlassian.net/wiki/spaces/KAN/pages/2785287/Setting+Up+AWS+VM?parentProduct=JSW&initialAllowedFeatures=byline-contributors.byline-extensions.page-comments.delete.page-reactions.inline-comments.non-licensed-share&themeState=dark%253Adark%2520light%253Alight%2520spacing%253Aspacing%2520colorMode%253Alight&locale=en-US#Install-NVIDIA-Drivers" + + run_carla_simulators: + name: run_carla_simulators + runs-on: ubuntu-latest + needs: [check_simulator_version_updates, check_nvidia_smi] + steps: + - name: Run CARLA Tests + env: + PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + HOSTNAME: ${{secrets.SSH_HOST}} + USER_NAME: ${{secrets.SSH_USERNAME}} + run: | + echo "$PRIVATE_KEY" > private_key && chmod 600 private_key + ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} ' + cd /home/ubuntu/actions/Scenic && + source venv/bin/activate && + carla_versions=($(find /software -maxdepth 1 -type d -name 'carla*')) && + for version in "${carla_versions[@]}"; do + echo "============================= CARLA $version =============================" + export CARLA_ROOT="$version" + pytest tests/simulators/carla/test_carla.py + done + ' + + run_webots_simulators: + name: run_webots_simulators + runs-on: ubuntu-latest + needs: [check_simulator_version_updates, check_nvidia_smi] + steps: + - name: Run Webots Tests + env: + PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + HOSTNAME: ${{secrets.SSH_HOST}} + USER_NAME: ${{secrets.SSH_USERNAME}} + run: | + echo "$PRIVATE_KEY" > private_key && chmod 600 private_key + ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} ' + Xvfb :99 -screen 0 1024x768x16 & + cd /home/ubuntu/actions/Scenic && + source venv/bin/activate && + webots_versions=($(find /software -maxdepth 1 -type d -name 'webots*')) && + export DISPLAY=:99 && + for version in "${webots_versions[@]}"; do + echo "============================= Webots $version =============================" + export WEBOTS_ROOT="$version" + pytest tests/simulators/webots/test_webots.py + done + ' + + stop_ec2_instance: + name: stop_ec2_instance + runs-on: ubuntu-latest + needs: [run_carla_simulators, run_webots_simulators] + steps: + - name: Stop EC2 Instance + env: + INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} + run: | + # Get the instance state + instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name') + + # If the machine is pending wait for it to fully start + while [ "$instance_state" == "pending" ]; do + echo "Instance is pending startup, waiting for it to fully start..." + sleep 10 + instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name') + done + + # Check if instance state is "stopped" + if [[ "$instance_state" == "running" ]]; then + echo "Instance is running, stopping it..." + aws ec2 stop-instances --instance-ids $INSTANCE_ID + elif [[ "$instance_state" == "stopping" ]]; then + echo "Instance is stopping..." + elif [[ "$instance_state" == "stopped" ]]; then + echo "Instance is already stopped..." + exit 0 + else + echo "Unknown instance state: $instance_state" + exit 1 + fi