Elasticsearch 7 support (#143)

* Initial ES7 and ETL integration * Update gitops.json * Add test-connection pods * Add test-connection pods * Add chart install + test * Add chart install + test * Add chart install + test * Add chart install + test * Add chart install + test * Add agg-mds sync job * Bump versions * Update gh actions * Update gh actions * Fix linting issues * Update KIND doc * Update KIND doc * Update ETL and ES values * Delete local ES chart and update ETL job * move aggMDS to cron that can be triggered
uc-cdis · Nov 28, 2023 · 87a0726 · 87a0726
1 parent 8c6fa6a
commit 87a0726
Show file tree

Hide file tree

Showing 102 changed files with 1,245 additions and 747 deletions.
diff --git a/.github/ct.yaml b/.github/ct.yaml
@@ -4,7 +4,9 @@ chart-dirs:
   - helm
 chart-repos:
   - bitnami=https://charts.bitnami.com/bitnami
+  - elastic=https://helm.elastic.co
 helm-extra-args: --timeout 600s
 check-version-increment: true
 debug: false
-validate-maintainers: false
+validate-maintainers: false
+helm-dependency-extra-args: "--skip-refresh"
diff --git a/.github/workflows/lint_test.yaml b/.github/workflows/lint_test.yaml
@@ -22,7 +22,7 @@ jobs:
           check-latest: true
 
       - name: Set up chart-testing
-        uses: helm/chart-testing-action@v2.3.1
+        uses: helm/chart-testing-action@v2.6.1
 
       - name: Run chart-testing (list-changed)
         id: list-changed
@@ -34,17 +34,29 @@ jobs:
 
       - name: Run chart-testing (lint)
         run: ct lint --config .github/ct.yaml
-
-  # deploy-charts-to-kind:
-  #   name: ${{ matrix.environments }} - gen3 data portal build
+
+  # TODO: add back in when we have tests
+  # deploy-and-test-chart:
+  #   name: Deploy and Test Chart
   #   timeout-minutes: 20
   #   runs-on: ubuntu-latest
-  #   needs: [get-changes-for-envs]
-  #   if: ${{ needs.get-changes-for-envs.outputs.matrix != '[]' && needs.get-changes-for-envs.outputs.matrix != '' }}
   #   steps:
+
+  #     - name: Checkout
+  #       uses: actions/checkout@v2
+  #       with:
+  #         fetch-depth: 0
+
+  #     - name: Set up Helm
+  #       uses: azure/setup-helm@v3
+
+  #     - name: Set up chart-testing
+  #       uses: helm/chart-testing-action@v2.6.1
+
+
   #     - name: Create kind cluster
-  #       uses: helm/kind-action@v1.4.0
-  #       if: steps.list-changed.outputs.changed == 'true'
+  #       uses: helm/kind-action@v1.8.0
+
 
-  #     - name: Run chart-testing (install)
-  #       run: ct install
+  #     - name: Run chart install + testing
+  #       run: ct install --charts ./helm/gen3 --config .github/ct.yaml
diff --git a/.secrets.baseline b/.secrets.baseline
@@ -3,7 +3,7 @@
     "files": "^.secrets.baseline$",
     "lines": null
   },
-  "generated_at": "2023-07-27T21:47:16Z",
+  "generated_at": "2023-11-20T21:39:41Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -108,6 +108,15 @@
         "type": "Secret Keyword"
       }
     ],
+    "docs/kubernetes-in-docker.md": [
+      {
+        "hashed_secret": "5320294d100314ce19330d99abada8c26c4993a3",
+        "is_secret": false,
+        "is_verified": false,
+        "line_number": 96,
+        "type": "Secret Keyword"
+      }
+    ],
     "examples/gke_dev_values.yaml": [
       {
         "hashed_secret": "75cb4c02576c9abae38fadc84bc832f2af203f3e",
@@ -365,7 +374,7 @@
         "hashed_secret": "1740c48fa3141d4851b14f97e3bc0f46f7670672",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 119,
+        "line_number": 127,
         "type": "Secret Keyword"
       }
     ],
@@ -374,13 +383,13 @@
         "hashed_secret": "9b5925ea817163740dfb287a9894e8ab3aba2c18",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 212,
+        "line_number": 216,
         "type": "Secret Keyword"
       }
     ],
     "helm/guppy/README.md": [
       {
-        "hashed_secret": "0d5cd5f3caaaf8354a6c62816b97bcae006d4bcf",
+        "hashed_secret": "39e819806b607b544fec2ea49fa88a7ab81929ca",
         "is_secret": false,
         "is_verified": false,
         "line_number": 43,
@@ -509,28 +518,28 @@
         "hashed_secret": "8a10cd156f8f43ec303f885a7985b1cf90635e23",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 41,
+        "line_number": 49,
         "type": "Secret Keyword"
       },
       {
         "hashed_secret": "2546383b95bb44732e9be6a877fd476c0442fdab",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 58,
+        "line_number": 66,
         "type": "Secret Keyword"
       },
       {
         "hashed_secret": "d84ce25b0f9bc2cc263006ae39453efb22cc2900",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 60,
+        "line_number": 68,
         "type": "Secret Keyword"
       },
       {
         "hashed_secret": "f09dd6e359833a12f48c4c4255d6e87a6e55cfe9",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 79,
+        "line_number": 87,
         "type": "Secret Keyword"
       }
     ],

diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
@@ -205,7 +205,7 @@ global:
 guppy:
   # -- (int) Only relevant if tireAccessLevel is set to "regular". 
   # The minimum amount of files unauthorized users can filter down to
-  tierAccessLimit: 1000
+  tierAccessLimit: "1000"
 
   # -- (list) Elasticsearch index configurations
   indices:
@@ -512,26 +512,97 @@ To configure sheepdog we require an entry in the versions block. It also require
 ``` -->
 
 ## Extra Information
-<!-- 
 ---
 # Sower
 
 ## What Does it Do
 
-Sower is a job dispatching service. Jobs are configured within the manifest and sower handles dispatching the jobs.
+Sower is a job dispatching service. Jobs are configured with .Values.sowerConfig and sower handles dispatching the jobs.
 
 ## How to Configure it
 
-<!-- To configure sower we require an entry in the versions block.
-
-```json
-{
-  "versions": {
-    "sower": "version"
-  }
-}
-``` -->
-
-<!-- From there you will also need to setup jobs for sower to dispatch within the sower block of a manifest. There are many jobs that can be configured but an example of some jobs configured can be found [here](https://github.com/uc-cdis/cdis-manifest/blob/master/gen3.biodatacatalyst.nhlbi.nih.gov/manifest.json#L48) -->
+```yaml
+sower:
+  enabled: true
+  sowerConfig:
+    - name: pelican-export
+      action: export
+      container:
+        name: job-task
+        image: quay.io/cdis/pelican-export:master
+        pull_policy: Always
+        env:
+        - name: DICTIONARY_URL
+          valueFrom:
+            configMapKeyRef:
+              name: manifest-global
+              key: dictionary_url
+        - name: GEN3_HOSTNAME
+          valueFrom:
+            configMapKeyRef:
+              name: manifest-global
+              key: hostname
+        - name: ROOT_NODE
+          value: subject
+        volumeMounts:
+        - name: pelican-creds-volume
+          readOnly: true
+          mountPath: "/pelican-creds.json"
+          subPath: config.json
+        - name: peregrine-creds-volume
+          readOnly: true
+          mountPath: "/peregrine-creds.json"
+          subPath: creds.json
+        cpu-limit: '1'
+        memory-limit: 12Gi
+      volumes:
+      - name: pelican-creds-volume
+        secret:
+          secretName: pelicanservice-g3auto
+      - name: peregrine-creds-volume
+        secret:
+          secretName: peregrine-creds
+      restart_policy: Never
+    - name: pelican-export-files
+      action: export-files
+      container:
+        name: job-task
+        image: quay.io/cdis/pelican-export:master
+        pull_policy: Always
+        env:
+        - name: DICTIONARY_URL
+          valueFrom:
+            configMapKeyRef:
+              name: manifest-global
+              key: dictionary_url
+        - name: GEN3_HOSTNAME
+          valueFrom:
+            configMapKeyRef:
+              name: manifest-global
+              key: hostname
+        - name: ROOT_NODE
+          value: file
+        - name: EXTRA_NODES
+          value: ''
+        volumeMounts:
+        - name: pelican-creds-volume
+          readOnly: true
+          mountPath: "/pelican-creds.json"
+          subPath: config.json
+        - name: peregrine-creds-volume
+          readOnly: true
+          mountPath: "/peregrine-creds.json"
+          subPath: creds.json
+        cpu-limit: '1'
+        memory-limit: 12Gi
+      volumes:
+      - name: pelican-creds-volume
+        secret:
+          secretName: pelicanservice-g3auto
+      - name: peregrine-creds-volume
+        secret:
+          secretName: peregrine-creds
+      restart_policy: Never
+```
 
 ## Extra Information -->
diff --git a/docs/etl.md b/docs/etl.md
@@ -0,0 +1,29 @@
+# ETL 
+
+The Gen3 Tube ETL is designed to translate data from a graph data model, stored in a PostgreSQL database, to indexed documents in ElasticSearch (ES), which supports efficient ways to query data from the front-end. The purpose of the Gen3 Tube ETL is to create indexed documents to reduce the response time of requests to query data. It is configured through an etlMapping.yaml configuration file, which describes which tables and fields to ETL to ElasticSearch.
+
+
+You can configure the ETL like this: 
+
+```yaml
+etl:
+  enabled: true
+  esEndpoint: ""
+  etlMapping:
+    <your etl mapping here>
+```
+
+To kick off etl job run this command: 
+
+```bash
+kubectl create job --from=cronjob/etl-cronjob etl
+```
+
+If you already have a job called etl run the following. This will delete the old job and create a new instance.
+
+```bash
+kubectl delete job etl
+kubectl create job --from=cronjob/etl-cronjob etl
+```
+
+For more information about our ETL read [here github.com/uc-cdis/tube](https://github.com/uc-cdis/tube)