From 377a4b0af09f3f9483e8ed533edbf0a17124a360 Mon Sep 17 00:00:00 2001 From: "Wilkins, Emily (Counterpointe Solutions)" <80470879+ewilkins-csi@users.noreply.github.com> Date: Thu, 14 Nov 2024 09:18:43 -0600 Subject: [PATCH] [#467] document self-hosted runner setup The documentation for setting up self-hosted runners that can handle the aissemble build via the Actions Runner Controller project has been added to `devops`. In addition, the `helm install --dry-run` IT has been removed and the build action has been adjusted to skip cache-restore actions if the trigger was a schedule. The dry run was removed largely because it just requires too many permissions to function and isn't adding much more than our strict linting operation and unit tests. The cache restore skip was added as an alternative to the manual cache deletion logic we were doing in the build before. General cache cleanup is handled automatically by S3. --- .github/workflows/build.yml | 4 ++ devops/ARC_README.md | 67 ++++++++++++++++++++++++++ devops/arc-runner-service-account.yaml | 5 ++ devops/controller-values.yaml | 2 + devops/runnerset-values.yaml | 7 +++ extensions/extensions-helm/pom.xml | 22 --------- 6 files changed, 85 insertions(+), 22 deletions(-) create mode 100644 devops/ARC_README.md create mode 100644 devops/arc-runner-service-account.yaml create mode 100644 devops/controller-values.yaml create mode 100644 devops/runnerset-values.yaml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9d6f40ee8..c6831a012 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -49,6 +49,7 @@ jobs: - name: Load docker build cache id: cached-docker-build uses: runs-on/cache/restore@v4 + if: ${{ ! github.event.schedule }} with: path: ~/.docker/cache key: docker-cache-${{ hashFiles('**/Dockerfile') }} @@ -57,6 +58,7 @@ jobs: - name: Load m2 repository cache # Manually caching .m2 repo as the setup-java caching isn't falling back to older caches id: cached-m2-repo uses: runs-on/cache/restore@v4 + if: ${{ ! github.event.schedule }} with: path: ~/.m2/repository key: maven-${{ hashFiles('**/pom.xml') }} @@ -65,6 +67,7 @@ jobs: - name: Load m2 build cache id: cached-m2-build uses: runs-on/cache/restore@v4 + if: ${{ ! github.event.schedule }} with: path: ~/.m2/build-cache key: maven-build-cache-${{ hashFiles('**/pom.xml') }} @@ -74,6 +77,7 @@ jobs: - name: Poetry cache id: cached-poetry uses: runs-on/cache@v4 + if: ${{ ! github.event.schedule }} with: path: ~/.cache/pypoetry key: poetry-cache-${{ hashFiles('**/pyproject.toml') }} diff --git a/devops/ARC_README.md b/devops/ARC_README.md new file mode 100644 index 000000000..1bb3f5a54 --- /dev/null +++ b/devops/ARC_README.md @@ -0,0 +1,67 @@ +# Standing up ARC from scratch + +## Creating the Controller +The controller only needs to be created once for the whole cluster. The controller MUST be created before the runner +sets and the versions must match between the two. The controller cannot be upgraded in place according to the Github +documentation, so all runnersets in the cluster must be uninstalled, then the controller uninstalled, and then the new +controller version installed. A `helm upgrade` to simply update the values in the values file has not been tested, so +it is unclear if a full uninstall is needed for that case. + +```sh +helm install arc-controller oci://ghcr.io/actions/actions-runner-controller-charts/gha-runner-scale-set-controller \ + --namespace gh-actions-controller \ + --create-namespace \ + -f controller-values.yaml +``` + +## Creating the Runner Scale Set + +Previously, we needed special permissions on the runner pod to execute `helm install --dry-run` as an integration test +for our baseline charts. However, this requires pretty expansive permissions (cluster level + secrets retrieval). The +dry-run IT wasn't really adding much over the simple strict linting approach so it's been dropped. The custom service +account setup has been left in place simply to demonstrate how this _would_ be done if we need permissions for some +other use case in the future. + +>[!NOTE] +>The namespace is currently hard-coded in the YAML file, so if you intend to deploy to a different namespace when +>installing the runner-set, you must update the YAML file as well. This is required because a RoleBinding object in +>Kubernetes must specify the ServiceAccount namespace explicitly. + +```sh +kubectl apply -f arc-runner-service-account.yaml +``` + +Finally, the runner-set can be created via Helm. The Github token is any Personal Access Token (Classic) that has Repo +permissions. In the future, this could be changed to use a Github App so that it isn't tied to a specific user. +Additionally, we could consider using SealedSecrets or a pre-defined secret for the App settings. + +>[!NOTE] +>The installation name (`arc-runner-set-aissemble`) will be the label used to select the runner set in a workflow file. + +```sh +helm install arc-runner-set-aissemble oci://ghcr.io/actions/actions-runner-controller-charts/gha-runner-scale-set \ + --namespace gh-actions-aissemble \ + --create-namespace \ + --set githubConfigSecret.github_token="{TOKEN}" \ + -f runnerset-values.yaml +``` + +# Upgrading ARC or Updating configuration + +## Controller +The controller cannot be upgraded in place according to the Github +documentation, so all runnersets in the cluster must be uninstalled, then the controller uninstalled, and then the new +controller version installed. A `helm upgrade` to simply update the values in the values file has not been tested, so +it is unclear if a full uninstall is needed for that case. + +## Runner Set +If the values file has been updated and simply needs to be re-applied, the `--reuse-values` flag can be used to preserve +the existing GH PAT. It is unclear whether `--reuse-values` would allow removal of values from the values file, and so +the token may be required to achieve this. + +```sh +helm upgrade arc-runner-set-aissemble oci://ghcr.io/actions/actions-runner-controller-charts/gha-runner-scale-set \ + --namespace gh-actions-aissemble \ + --reuse-values \ + -f runnerset-values.yaml +``` diff --git a/devops/arc-runner-service-account.yaml b/devops/arc-runner-service-account.yaml new file mode 100644 index 000000000..34a30297c --- /dev/null +++ b/devops/arc-runner-service-account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + namespace: gh-actions-aissemble + name: arc-runner-set-aissemble-gha-rs-custom-permissions diff --git a/devops/controller-values.yaml b/devops/controller-values.yaml new file mode 100644 index 000000000..4f9abc51e --- /dev/null +++ b/devops/controller-values.yaml @@ -0,0 +1,2 @@ +flags: + logLevel: "info" diff --git a/devops/runnerset-values.yaml b/devops/runnerset-values.yaml new file mode 100644 index 000000000..b63a19ef6 --- /dev/null +++ b/devops/runnerset-values.yaml @@ -0,0 +1,7 @@ +containerMode: + type: "dind" +githubConfigUrl: "https://github.com/boozallen/aissemble" +maxRunners: 3 +template: + spec: + serviceAccountName: "arc-runner-set-aissemble-gha-rs-custom-permissions" diff --git a/extensions/extensions-helm/pom.xml b/extensions/extensions-helm/pom.xml index 028448534..4908bb70c 100644 --- a/extensions/extensions-helm/pom.xml +++ b/extensions/extensions-helm/pom.xml @@ -67,28 +67,6 @@ true - - integration-test - - - - - ${group.helm.plugin} - helm-maven-plugin - - - simulates-an-install - integration-test - - dry-run - - - - - - - -