-
Notifications
You must be signed in to change notification settings - Fork 4
228 lines (224 loc) · 13.3 KB
/
new-edgetest.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
name: New Edge Deployment and Tool Tests
on:
schedule:
- cron: "0 6 * * *"
- cron: "0 18 * * *"
workflow_dispatch:
inputs:
chunk:
description: 'Chunk to run. Valid range [0:13]. Out of range inputs will be ignored.'
required: true
default: '3'
jobs:
deploygke:
env:
GKE_ZONE: us-east1-b
GKE_VERSION: "1.24"
GXY_TMP: /tmp/gxy
PREFIX: edge
runs-on: ubuntu-latest
permissions:
contents: 'read'
id-token: 'write'
outputs:
prefix: ${{ steps.prefix.outputs.prefix }}
steps:
- name: Set prefix with date
id: prefix
run: echo "prefix=$(echo $PREFIX-$(date +'%y-%m-%d-%H-%M'))" >> $GITHUB_OUTPUT
- uses: 'actions/checkout@v3'
- id: 'Auth'
uses: 'google-github-actions/auth@v0'
with:
workload_identity_provider: 'projects/526897014808/locations/global/workloadIdentityPools/galaxy-tests-identity-pool/providers/gxy-tests-provider'
service_account: 'galaxy-tests-repo-actions-sa@anvil-and-terra-development.iam.gserviceaccount.com'
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v1'
- name: gcloud version
run: gcloud --version
- name: Create GKE cluster
run: gcloud container clusters create "${{steps.prefix.outputs.prefix}}" --cluster-version="$GKE_VERSION" --disk-size=100 --num-nodes=1 --machine-type=e2-custom-20-81920 --zone "$GKE_ZONE"
testgalaxy1:
needs: deploygke
env:
GKE_ZONE: us-east1-b
GXY_TMP: /tmp/gxy
GKM_VERSION: "2.7.0"
USE_GKE_GCLOUD_AUTH_PLUGIN: True
runs-on: ubuntu-latest
permissions:
contents: 'read'
id-token: 'write'
steps:
- uses: actions/checkout@v3
- id: 'Auth'
uses: 'google-github-actions/auth@v0'
with:
workload_identity_provider: 'projects/526897014808/locations/global/workloadIdentityPools/galaxy-tests-identity-pool/providers/gxy-tests-provider'
service_account: 'galaxy-tests-repo-actions-sa@anvil-and-terra-development.iam.gserviceaccount.com'
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v1'
- name: Get cluster kubeconfig
run: gcloud container clusters get-credentials "${{needs.deploygke.outputs.prefix}}" --zone "$GKE_ZONE" --project ${{ secrets.GCP_PROJECT_ID }}
- name: Create two GCP Disks
run: gcloud compute disks create "${{needs.deploygke.outputs.prefix}}-1-postgres-pd" --size 10Gi --zone "$GKE_ZONE" && gcloud compute disks create "${{needs.deploygke.outputs.prefix}}-1-nfs-pd" --size 250Gi --zone "$GKE_ZONE"
- name: Setup gcloud auth plugin for kubectl and fetch creds
run: |
gcloud components install gke-gcloud-auth-plugin
gcloud container clusters get-credentials "${{needs.deploygke.outputs.prefix}}" --zone "$GKE_ZONE"
- name: Install Kubectl
run: curl -LO "https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl" && chmod +x ./kubectl && sudo mv ./kubectl /usr/local/bin/kubectl && kubectl version
- name: Install Helm
run: curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash
- name: Add anvil repository
run: helm repo add anvil https://github.com/cloudve/helm-charts/raw/anvil
- name: Deploy GalaxyKubeMan
continue-on-error: true
run: >
(time sh -c "helm install --create-namespace -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1 anvil/galaxykubeman\
--wait\
--wait-for-jobs\
--timeout 720s\
--version $GKM_VERSION\
--set galaxy.service.type=LoadBalancer\
--set galaxy.configs.\"galaxy\.yml\".galaxy.admin_users=\"tests@fake.org\"\
--set galaxy.configs.\"galaxy\.yml\".galaxy.master_api_key=galaxypassword\
--set galaxy.configs.\"galaxy\.yml\".galaxy.single_user=\"tests@fake.org\"\
--set galaxy.configs.\"job_conf\.yml\".runners.k8s.k8s_node_selector=\"cloud.google.com/gke-nodepool: default-pool\"\
--set 'galaxy.configs.job_conf\.yml.execution.environments.tpv_dispatcher.tpv_config_files={https://raw.githubusercontent.com/galaxyproject/tpv-shared-database/main/tools.yml,lib/galaxy/jobs/rules/tpv_rules_local.yml,https://gist.githubusercontent.com/afgane/68d1dbbe0af2468ba347dc74b6d3f7fa/raw/20edda50161bdcb74ff38935e7f76d79bfdaf303/tvp_rules_tests.yml}'\
--set galaxy.nodeSelector.\"cloud\.google\.com\/gke-nodepool\"=\"default-pool\"\
--set galaxy.persistence.storageClass=\"nfs-${{needs.deploygke.outputs.prefix}}-1\"\
--set galaxy.postgresql.galaxyDatabasePassword=${{secrets.COMMON_PASSWORD}}\
--set galaxy.postgresql.master.nodeSelector.\"cloud\.google\.com\/gke-nodepool\"=\"default-pool\"\
--set galaxy.postgresql.persistence.existingClaim=\"${{needs.deploygke.outputs.prefix}}-1-postgres-disk-pvc\"\
--set galaxy.rabbitmq.nodeSelector.\"cloud\.google\.com\/gke-nodepool\"=\"default-pool\"\
--set galaxy.rabbitmq.persistence.storageClassName=\"nfs-${{needs.deploygke.outputs.prefix}}-1\"\
--set galaxy.ingress.ingressClassName=\"\"\
--set galaxy.tusd.ingress.ingressClassName=\"\"\
--set cvmfs.cvmfscsi.cache.alien.pvc.storageClass=\"nfs-${{needs.deploygke.outputs.prefix}}-1\"\
--set nfs.nodeSelector.\"cloud\.google\.com\/gke-nodepool\"=\"default-pool\"\
--set nfs.persistence.existingClaim=\"${{needs.deploygke.outputs.prefix}}-1-nfs-disk-pvc\"\
--set nfs.storageClass.name=\"nfs-${{needs.deploygke.outputs.prefix}}-1\"\
--set persistence.nfs.name=\"${{needs.deploygke.outputs.prefix}}-1-nfs-disk\"\
--set persistence.nfs.persistentVolume.extraSpec.gcePersistentDisk.pdName=\"${{needs.deploygke.outputs.prefix}}-1-nfs-pd\"\
--set persistence.postgres.name=\"${{needs.deploygke.outputs.prefix}}-1-postgres-disk\"\
--set persistence.postgres.persistentVolume.extraSpec.gcePersistentDisk.pdName=\"${{needs.deploygke.outputs.prefix}}-1-postgres-pd\"") 1> "$GXY_TMP" 2>> "$GXY_TMP"
- name: Print output
run: cat "$GXY_TMP"
- name: Helm get values
run: helm get values -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1
- name: Get pods, pvc, and pv, sc
run: kubectl get pods -n ${{needs.deploygke.outputs.prefix}}-1; kubectl get pvc -n ${{needs.deploygke.outputs.prefix}}-1; kubectl get pv -n ${{needs.deploygke.outputs.prefix}}-1; kubectl get sc -n ${{needs.deploygke.outputs.prefix}}-1
- name: Get events
run: kubectl get events -n ${{needs.deploygke.outputs.prefix}}-1
- uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install required system packages
run: sudo apt-get update && sudo apt-get install -y python3-h5py pkg-config libhdf5-dev
- name: Install dependencies
run: python -m pip install planemo ephemeris pysam "galaxy-tool-util>=21.1.0.dev6" bioblend Jinja2 matplotlib gxabm
- uses: actions/checkout@v3
with:
persist-credentials: false
- name: Add report from this run
run: bash ./.github/scripts/report_deployment.sh "$GXY_TMP" anvil-edge ${{ secrets.GIT_TOKEN }}
- name: Get Galaxy IP and port
id: galaxy
run: echo "url=$(echo \"http://$(kubectl get svc -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1-galaxy-nginx -o jsonpath='{.status.loadBalancer.ingress[0].ip}' | sed -e 's/\"//g'):$(kubectl get svc -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1-galaxy-nginx -o jsonpath='{.spec.ports[0].port}')$(kubectl get ingress -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1-galaxy -o jsonpath='{.spec.rules[0].http.paths[0].path}')/\")" >> $GITHUB_OUTPUT
- name: Create remote single user
run: curl ${{steps.galaxy.outputs.url}}
- name: Create reports dir for this date
id: reports
run: |
DIR=reports/anvil-edge/tool-tests/${{needs.deploygke.outputs.prefix}}-1
mkdir -p $DIR
echo "dir=$DIR" >> $GITHUB_OUTPUT
# - name: Get tool list
# run: cp production/anvil/tools.yaml reports/anvil-edge/tools.yaml
- name: Create and get API key
run: echo "key=$(python .github/scripts/create_api_key.py ${{steps.galaxy.outputs.url}} galaxypassword)" >> $GITHUB_OUTPUT
id: api
- name: Configure ABM
id: key
run: |
abm version
cp -R .abm ~/.abm
abm config url galaxy ${{ steps.galaxy.outputs.url }}
echo "Key is ${{ steps.api.outputs.key }}"
abm config key galaxy ${{ steps.api.outputs.key }}
abm config show galaxy
echo "key=$key" >> $GITHUB_OUTPUT
- name: Split tool tests (workflow_dispatch)
if: ${{ github.event_name == 'workflow_dispatch' }}
run: python .github/scripts/get_chunk.py --chunk ${{ github.event.inputs.chunk }} -o ${{ steps.reports.outputs.dir }} reports/anvil-edge/tools.yaml
- name: Split tool tests (cron job)
if: ${{ github.event_name != 'workflow_dispatch' }}
run: python .github/scripts/get_chunk.py reports/anvil-edge/tools.yaml -o ${{ steps.reports.outputs.dir }}
- name: Generate a history name
id: history
run: echo "history_name=anvil-test-$(date '+%F-%H-%M')" >> $GITHUB_OUTPUT
- name: Run tool tests
continue-on-error: true
run: sleep 30 && .github/scripts/run-galaxy-tests.sh ${{ steps.history.outputs.history_name }} ${{ steps.reports.outputs.dir }}
# - name: Write job results
# run: abm galaxy jobs list --history ${{ steps.history.outputs.history_name }} > ${{ steps.reports.outputs.dir }}/jobs.txt
- name: Generate Jobs reports
continue-on-error: true
shell: bash
run: |
abm galaxy jobs list --history ${{ steps.history.outputs.history_name }} > ${{ steps.reports.outputs.dir }}/jobs.txt
echo "Saving jobs that had errors."
abm galaxy jobs list --state error
for job in $(abm galaxy jobs list --state error | awk '{print $1}') ; do
if [[ ! -e ${{ steps.reports.outputs.dir }}/errors ]] ; then
mkdir -p ${{ steps.reports.outputs.dir }}/errors
fi
abm galaxy jobs show $job > ${{ steps.reports.outputs.dir }}/errors/$job.json
done
echo "Looking for paused jobs"
abm galaxy jobs list --state paused
for job in $(abm galaxy jobs list --state paused | awk '{print $1}') ; do
echo "Job $job was paused"
if [[ ! -e ${{ steps.reports.outputs.dir }}/paused ]] ; then
mkdir -p ${{ steps.reports.outputs.dir }}/paused
fi
abm galaxy jobs show $job > ${{ steps.reports.outputs.dir }}/paused/$job.json
done
abm galaxy history summarize ${{ steps.history.outputs.history_name }} > ${{ steps.reports.outputs.dir }}/job-metrics.csv
- name: Save job handler logs and node information
continue-on-error: true
shell: bash
run: |
pod=$(kubectl get pods -n ${{needs.deploygke.outputs.prefix}}-1 | grep galaxy-job | awk '{print $1}')
kubectl logs -n ${{needs.deploygke.outputs.prefix}}-1 $pod > ${{ steps.reports.outputs.dir }}/job.log
kubectl describe nodes > ${{ steps.reports.outputs.dir }}/node-info.txt
- name: Create HTML report
run: planemo test_reports ${{ steps.reports.outputs.dir }}/results.json --test_output_xunit ${{ steps.reports.outputs.dir }}/results.xunit --test_output ${{ steps.reports.outputs.dir }}/results.html
- name: Update Readme
run: python .github/scripts/update_readme.py ${{ steps.reports.outputs.dir }}/chunk.json reports/anvil-edge/tool-tests/chunks.json reports/anvil-edge/README.md
- name: Add updated README from this run
run: bash ./.github/scripts/report_tests.sh ${{ secrets.GIT_TOKEN }} "Updating anvil-edge README for ${{needs.deploygke.outputs.prefix}}-1" reports/anvil-edge
cleanup:
if: always()
needs: [deploygke,testgalaxy1]
env:
GKE_ZONE: us-east1-b
runs-on: ubuntu-latest
permissions:
contents: 'read'
id-token: 'write'
steps:
- uses: 'actions/checkout@v3'
- id: 'Auth'
uses: 'google-github-actions/auth@v0'
with:
workload_identity_provider: 'projects/526897014808/locations/global/workloadIdentityPools/galaxy-tests-identity-pool/providers/gxy-tests-provider'
service_account: 'galaxy-tests-repo-actions-sa@anvil-and-terra-development.iam.gserviceaccount.com'
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v1'
- name: Delete the GKE cluster
continue-on-error: true
run: gcloud container clusters delete "${{needs.deploygke.outputs.prefix}}" --zone "$GKE_ZONE" --quiet
- name: Delete the two GCP Disks for first Galaxy
run: gcloud compute disks delete "${{needs.deploygke.outputs.prefix}}-1-postgres-pd" --zone "$GKE_ZONE" --quiet && gcloud compute disks delete "${{needs.deploygke.outputs.prefix}}-1-nfs-pd" --zone "$GKE_ZONE" --quiet