diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 9f4de4de..caf93d5f 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -5,8 +5,6 @@ on: branches: - main pull_request: - branches: - - main jobs: test: @@ -16,8 +14,20 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Start Redis + uses: supercharge/redis-github-action@1.7.0 + with: + redis-version: 7 + - name: Build Docker image run: docker build -t glider-dac-build . - - name: Run tests - run: docker run --rm glider-dac-build pytest /glider-dac/tests + - name: Install testing requirements and run tests + # FIXME: Why is it necessary to set FLASK_ENV to TESTING here when + # pytest runs off GHA usually set this properly? + run: > + docker run --network host -e FLASK_ENV=TESTING + -e OVERRIDE_REDIS_HOST=localhost -e OVERRIDE_REDIS_URL=redis://localhost:6379/8 + --rm -u root glider-dac-build bash -c + "pip install --no-cache -r /glider-dac/test_requirements.txt && + pytest /glider-dac/glider_dac/tests" diff --git a/.github/workflows/sync_theme.yml b/.github/workflows/sync_theme.yml index 038afa38..898faea5 100644 --- a/.github/workflows/sync_theme.yml +++ b/.github/workflows/sync_theme.yml @@ -7,7 +7,7 @@ on: schedule: - cron: "00 14 * * *" - workflow_dispatch: + workflow_dispatch: null jobs: @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout repo - uses: actions/checkout@v4 + uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4 with: { ref: gh-pages } - name: submodule checkout @@ -39,3 +39,13 @@ jobs: timestamp=$(date -u) git commit -m "Update theme on: ${timestamp}" || exit 0 git push + + keepalive-job: + name: Keepalive Workflow + runs-on: ubuntu-latest + if: github.event_name == 'schedule' + permissions: + actions: write + steps: + - uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4 + - uses: gautamkrishnar/keepalive-workflow@995aec69bb3f2b45b20f4e107907992c8715086d # 2.0.8 diff --git a/.gitignore b/.gitignore index 58533f3f..a35c3a85 100644 --- a/.gitignore +++ b/.gitignore @@ -13,11 +13,10 @@ config.local.yml ftp_temp tds_temp tds_catalogs -data/ +glider_dac/tests/data/ +.pytest_cache secrets/ -# BerkeleyDB user database -users.db .env .venv/ activate diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 517af26e..148cb862 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,13 +1,13 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-added-large-files - repo: https://github.com/codespell-project/codespell - rev: v2.2.6 + rev: v2.3.0 hooks: - id: codespell exclude: > diff --git a/Dockerfile b/Dockerfile index e35a5aa8..8e048575 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8 +FROM python:3.9 ARG glider_gid_uid=1000 RUN apt-get update && \ @@ -6,26 +6,25 @@ RUN apt-get update && \ libnetcdf-dev netcdf-bin && \ mkdir glider-dac && groupadd -g $glider_gid_uid glider && \ useradd -u $glider_gid_uid -g $glider_gid_uid glider +ENV UDUNITS2_XML_PATH=/usr/share/xml/udunits COPY . /glider-dac # TODO: move logs elsewhere -VOLUME /glider-dac/logs/ /data -WORKDIR glider-dac +VOLUME /glider-dac/logs/ /data /usr/local/lib/python3.8/site-packages/compliance_checker/data +WORKDIR /glider-dac # not clear why reinstalling Mongo-related dependencies is necessary under # Python 3, but this allows the service to run without import or runtime errors -RUN pip install -U pip && \ - pip install --no-cache Cython thredds_crawler numpy==1.19.5 pytest && \ - pip install --no-cache -r requirements.txt && \ - pip uninstall -y mongokit && \ - pip install --no-cache --force-reinstall mongokit-py3==0.9.1.1 && \ - pip install -U pymongo==2.8 +RUN cd /usr/local/src && pip install -U pip && \ + pip install --no-cache Cython thredds_crawler numpy pytest && \ + pip install --no-cache -r /glider-dac/requirements.txt RUN mkdir -p /data/submission /data/data/priv_erddap /data/data/pub_erddap \ - /erddapData/flag /erddapData/hardFlag berkeleydb \ + /erddapData/flag /erddapData/hardFlag \ /data/catalog/priv_erddap && \ - chown -R glider:glider /glider-dac /data && \ + chown -R glider:glider /glider-dac /data /usr/local/lib/python3.9/site-packages/compliance_checker/data && \ ln -sf /glider-dac/scripts/crontab /etc/crontab USER glider -ENV PYTHONPATH="${PYTHONPATH}:/glider-dac" +ENV PYTHONPATH="${PYTHONPATH:-}:/glider-dac" +ENV FLASK_APP=glider_dac:create_app EXPOSE 5000 -CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:5000", "app:app"] +CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:5000", "glider_dac:create_app()"] diff --git a/app.py b/app.py index c21f9945..33555153 100644 --- a/app.py +++ b/app.py @@ -1,50 +1,49 @@ -from glider_dac import app -from glider_dac.common import log_formatter +from flask import current_app import os -def initialize_logs(app): - ''' - Initializes the Application Logger - ''' - import logging - log_path = app.config.get('LOG_DIR', 'logs') - if not os.path.exists(log_path): - os.makedirs(log_path) +#def initialize_logs(app): +# ''' +# Initializes the Application Logger +# ''' +# import logging +# log_path = app.config.get('LOG_DIR', 'logs') +# if not os.path.exists(log_path): +# os.makedirs(log_path) +# +# file_handler = logging.FileHandler(os.path.join(log_path, 'application.log')) +# stream_handler = logging.StreamHandler() +# file_handler.setFormatter(log_formatter) +# stream_handler.setFormatter(log_formatter) +# app.logger.addHandler(file_handler) +# app.logger.addHandler(stream_handler) +# app.logger.setLevel(logging.DEBUG) +# app.logger.info('Utility Application Process Started') - file_handler = logging.FileHandler(os.path.join(log_path, 'application.log')) - stream_handler = logging.StreamHandler() - file_handler.setFormatter(log_formatter) - stream_handler.setFormatter(log_formatter) - app.logger.addHandler(file_handler) - app.logger.addHandler(stream_handler) - app.logger.setLevel(logging.DEBUG) - app.logger.info('Utility Application Process Started') +#from flask import jsonify, url_for -from flask import jsonify, url_for - -def has_no_empty_params(rule): - ''' - Something to do with empty params? - ''' - defaults = rule.defaults if rule.defaults is not None else () - arguments = rule.arguments if rule.arguments is not None else () - return len(defaults) >= len(arguments) +#def has_no_empty_params(rule): +# ''' +# Something to do with empty params? +# ''' +# defaults = rule.defaults if rule.defaults is not None else () +# arguments = rule.arguments if rule.arguments is not None else () +# return len(defaults) >= len(arguments) #@app.route('/site-map', methods=['GET']) -def site_map(): - ''' - Returns a json structure for the site routes and handlers - ''' - links = [] - for rule in app.url_map.iter_rules(): - # Filter out rules we can't navigate to in a browser - # and rules that require parameters - if "GET" in rule.methods and has_no_empty_params(rule): - url = url_for(rule.endpoint) - links.append((url, rule.endpoint)) - # links is now a list of url, endpoint tuples - return jsonify(rules=links) -initialize_logs(app) +#def site_map(): +# ''' +# Returns a json structure for the site routes and handlers +# ''' +# links = [] +# for rule in app.url_map.iter_rules(): +# # Filter out rules we can't navigate to in a browser +# # and rules that require parameters +# if "GET" in rule.methods and has_no_empty_params(rule): +# url = url_for(rule.endpoint) +# links.append((url, rule.endpoint)) +# # links is now a list of url, endpoint tuples +# return jsonify(rules=links) +#initialize_logs(app) if __name__ == '__main__': app.run(host=app.config['HOST'], port=app.config['PORT'], debug=app.config['DEBUG']) diff --git a/config.yml b/config.yml index 013be75b..891aa5f9 100644 --- a/config.yml +++ b/config.yml @@ -1,12 +1,12 @@ COMMON: &common + FLASK_APP: "glider_dac:create_app()" HOST: localhost PORT: 5000 DEBUG: False - MONGODB_HOST: mongo - MONGODB_PORT: 27017 - MONGODB_DATABASE: gliderdac - APPLICATION_PREFIX: "/gliders/" + SERVER_NAME: "localhost:5000" + APPLICATION_ROOT: "/" + APPLICATION_PREFIX: "/" SECRET_KEY: thisisakey LOG_FILE: yes WEB_PASSWORD: password @@ -25,6 +25,11 @@ COMMON: &common MAILER_DEBUG: 1 MAIL_DEFAULT_TO: receiver@domain.com #MAIL_DEFAULT_LIST: YOUR_EMAIL + # + + # IMPORTANT: Modification tracking must be set to True for certain parts of + # deployment lifecycle to work properly! + SQLALCHEMY_TRACK_MODIFICATIONS: True # Google Analytics GA_ENABLED: True @@ -43,7 +48,6 @@ COMMON: &common INFLUXDB_PORT: 8086 ADMINS: - admin - USER_DB_FILE: berkeleydb/users.db REDIS_HOST: redis REDIS_PORT: 6379 @@ -69,6 +73,34 @@ COMMON: &common PUBLIC_CATALOG: '/data/catalog/pub_erddap/datasets.xml' PRIVATE_CATALOG: '/data/catalog/priv_erddap/datasets.xml' + SQLALCHEMY_DATABASE_URI: mysql://username:password@mysql/gliderdac + DEVELOPMENT: &development <<: *common DEBUG: True + PREFERRED_URL_SCHEME: http + +TESTING: + <<: *common + DEBUG: True + TESTING: True + PREFERRED_URL_SCHEME: http + # enable email for mock email testing + MAIL_ENABLED: True + # only use in memory DB for testing + SQLALCHEMY_DATABASE_URI: 'sqlite:///:memory:' + WTF_CSRF_ENABLED: False + # TODO: these directories need to be made relative to project root + path2priv: 'tests/test_fs/data/priv_erddap/' + path2pub: 'tests/test_fs/data/pub_erddap/' + path2thredds: 'tests/test_fs/data/data/thredds/' + flags_private: 'tests/test_fs/scratch/tomcat-erddap-private/flag' + flags_public: 'tests/test_fs/scratch/tomcat-erddap-public/flag' + DATA_ROOT: tests/test_fs/submission + PRIV_DATA_ROOT: tests/test_fs/data/priv_erddap + PUBLIC_DATA_ROOT: tests/test_fs/data/pub_erddap + THREDDS_DATA_ROOT: tests/test_fs/data/thredds + ARCHIVE_PATH: tests/test_fs/data/archive + NCEI_DIR: tests/test_fs/data/archive + SERVER: "http://localhost:8080/erddap" + erddap_private: 'localhost:8080' diff --git a/console b/console index e7a4997f..0589570d 100755 --- a/console +++ b/console @@ -1,8 +1,9 @@ #!/usr/bin/env python from IPython import embed -from glider_dac import app, db -from bson import ObjectId +from glider_dac import db +from flask import current_app +from glider_dac.extensions import db -with app.app_context(): +with current_app.app_context(): embed() diff --git a/data/qc_config.yml b/data/qc_config.yml deleted file mode 100644 index cb2a95ce..00000000 --- a/data/qc_config.yml +++ /dev/null @@ -1,139 +0,0 @@ -contexts: - - streams: - conductivity: - qartod: - gross_range_test: - suspect_span: [0, 6] - fail_span: [0, 9] - spike_test: - suspect_threshold: - fail_threshold: - rate_of_change_test: - threshold: 0.1 - flat_line_test: - tolerance: 1 - suspect_threshold: 3600 - fail_threshold: 9000 - temperature: - qartod: - gross_range_test: - suspect_span: [0, 35] - fail_span: [-2, 40] - spike_test: - suspect_threshold: - fail_threshold: - rate_of_change_test: - threshold: 0.1 - flat_line_test: - tolerance: 1 - suspect_threshold: 3600 - fail_threshold: 9000 - pressure: - qartod: - gross_range_test: - suspect_span: [0, 1000] - fail_span: [0, 6000] - spike_test: - suspect_threshold: - fail_threshold: - rate_of_change_test: - threshold: 0.1 - flat_line_test: - tolerance: 1 - suspect_threshold: 3600 - fail_threshold: 9000 - salinity: - qartod: - gross_range_test: - fail_span: [0, 42] - suspect_span: [10, 38] - spike_test: - suspect_threshold: - fail_threshold: - rate_of_change_test: - threshold: 0.1 - flat_line_test: - tolerance: 1 - suspect_threshold: 3600 - fail_threshold: 9000 - density: - qartod: - gross_range_test: - fail_span: [1000, 1100] - spike_test: - suspect_threshold: - fail_threshold: - rate_of_change_test: - threshold: 0.1 - flat_line_test: - tolerance: 1 - suspect_threshold: 3600 - fail_threshold: 9000 - -# sea_water_temperature: # deg_C -# flat_line: -# low_reps: 4 -# high_reps: 8 -# eps: 1.1920929e-07 -# gross_range: -# sensor_span: -# - -5. -# - 45. -# spike: {} -# rate_of_change: {} -# sea_water_electrical_conductivity: # S m-1 -# flat_line: -# low_reps: 4 -# high_reps: 8 -# eps: 1.1920929e-07 -# gross_range: -# sensor_span: -# - 0 -# - 7 -# spike: {} -# rate_of_change: {} -# sea_water_practical_salinity: # unitless -# flat_line: -# low_reps: 4 -# high_reps: 8 -# eps: 1.1920929e-07 -# gross_range: -# sensor_span: -# - 0 -# - 50 -# spike: {} -# rate_of_change: {} -# sea_water_salinity: # unitless -# flat_line: -# low_reps: 4 -# high_reps: 8 -# eps: 1.1920929e-07 -# gross_range: -# sensor_span: -# - 0 -# - 50 -# spike: {} -# rate_of_change: {} -# sea_water_density: # kg m-3 -# flat_line: -# low_reps: 4 -# high_reps: 8 -# eps: 1.1920929e-07 -# gross_range: -# sensor_span: -# - 900 -# - 1050 -# spike: {} -# rate_of_change: {} -# sea_water_pressure: # ` -# flat_line: -# low_reps: 4 -# high_reps: 8 -# eps: 1.1920929e-07 -# gross_range: -# sensor_span: -# - 0 -# - 11000 -# pressure: {} -# rate_of_change: {} -# spike: {} diff --git a/docker-compose.yml b/docker-compose.yml index 054d43c9..36deedd1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,3 @@ -version: '3.3' - - #secrets: # S3_ACCESS_KEY: # file: secrets/access_key.txt @@ -13,26 +10,25 @@ services: restart: always build: . ports: - - 3000:5000 + - 5000:5000 volumes: - # - ./config.local.yml:/glider-dac/config.local.yml + #- ./config.local.yml:/glider-dac/config.local.yml - log_volume:/glider-dac/logs - - berkeleydb_mount:/glider-dac/berkeleydb - ${DATA_VOLUME:-data_volume}:/data - erddap_big_parent_directory:/erddapData - ./datasets.xml:/data/catalog/priv_erddap/datasets.xml - /data/catalog/priv_erddap - restart: always #secrets: # - S3_ACCESS_KEY # - S3_SECRET_KEY environment: + - FLASK_APP=glider_dac:create_app - DATA_ROOT=${DATA_ROOT} - DATA_VOLUME=${DATA_VOLUME} - FLAGS_DIR=${FLAGS_DIR} - REDIS_HOST=${REDIS_HOST} depends_on: - - mongo + - mysql - redis glider-dac-worker: @@ -59,7 +55,6 @@ services: command: python /glider-dac/scripts/glider_qartod.py -w volumes: - ${DATA_VOLUME:-data_volume}:/data - - berkeleydb_mount:/glider-dac/berkeleydb - log_volume:/glider-dac/logs restart: always environment: @@ -68,14 +63,6 @@ services: - FLAGS_DIR=${FLAGS_DIR} - REDIS_HOST=${REDIS_HOST} - mongo: - container_name: mongo - image: mongo:4 - volumes: - - mongo_storage:/data/db - - mongo_config:/data/configdb - restart: always - # For John Kerfoot's new status page application -- move elsewhere if needed mysql: restart: unless-stopped @@ -88,7 +75,6 @@ services: - 3306:3306 volumes: - status_mysql:/var/lib/mysql - restart: always # TODO: THREDDS and ERDDAP will need volume configurations thredds: @@ -108,7 +94,7 @@ services: erddap: container_name: erddap - image: axiom/docker-erddap:latest-jdk17-openjdk + image: axiom/docker-erddap:2.23-jdk17-openjdk ports: - 8080:8080 restart: always @@ -125,17 +111,11 @@ services: image: redis volumes: - redis_data:/data - deploy: - restart_policy: - condition: always volumes: data_volume: - berkeleydb_mount: thredds_cache: thredds_logs: - mongo_storage: - mongo_config: redis_data: status_mysql: log_volume: diff --git a/fabfile.py b/fabfile.py deleted file mode 100644 index 295dcdb8..00000000 --- a/fabfile.py +++ /dev/null @@ -1,179 +0,0 @@ -from fabric.api import * -from fabric.contrib.files import * -import os -from copy import copy -import time - -""" - Call this with fab -c .fab TASK to pick up deploy variables - Required variables in .fab file: - mail_server = x - mail_port = x - mail_username = x - mail_password = x - mail_default_sender = x - mailer_debug = x - mail_default_to = x - mail_default_list = x - webpass = x - secret_key = x - data_root = x - rsync_ssh_user = x - rsync_remote_host = x - rsync_remote_path = x - rsync_to_path = x - dev_catalog_root = x - prod_catalog_root = x - mongo_db = x - admins = x,y,z - user_db_file = x -""" - -code_dir = "/home/glider/glider-dac" - -def deploy_dap(): - crontab_file = "/home/glider/crontab.txt" - with settings(sudo_user='glider'): - stop_supervisord(conf="/home/glider/supervisord.conf") - with cd(code_dir): - update_supervisord(src_file="deploy/dap.supervisord.conf", dst_file="/home/glider/supervisord.conf", virtual_env="gliderdac") - sudo("git pull origin master") - update_libs(virtual_env="gliderdac") - update_full_sync() - update_crontab(src_file="deploy/glider_crontab.txt", dst_file=crontab_file) - start_supervisord(conf="/home/glider/supervisord.conf", virtual_env="gliderdac") - -def update_ioosngdac(): - with settings(sudo_user='glider'): - stop_supervisord(conf="/home/glider/supervisord.conf", virtual_env="gliderdac") - with cd('/home/glider/ioosngdac'): - sudo("git pull --ff-only origin master") - start_supervisord(conf="/home/glider/supervisord.conf", virtual_env="gliderdac") - -def deploy_ftp(): - with settings(sudo_user='glider'): - stop_supervisord(conf="/home/glider/supervisord.conf", virtual_env="gliderdac") - with cd(code_dir): - sudo("git pull origin master") - update_supervisord(src_file="deploy/supervisord.conf", dst_file="/home/glider/supervisord.conf", virtual_env="gliderdac") - update_libs(virtual_env="gliderdac") - start_supervisord(conf="/home/glider/supervisord.conf", virtual_env="gliderdac") - start_supervisor_processes(conf="/home/glider/supervisord.conf", virtual_env="gliderdac") - upload_template('deploy/env', "/tmp/env", context=copy(env), use_jinja=True, use_sudo=False, backup=False, mirror_local_mode=True) - sudo("cp /tmp/env /home/glider/env") - - stop_supervisord(conf="/root/supervisord-perms-monitor.conf", virtual_env="root-monitor") - update_supervisord(src_file="deploy/supervisord-perms-monitor.conf", dst_file="/root/supervisord-perms-monitor.conf", virtual_env="root-monitor") - update_libs(virtual_env="root-monitor") - start_supervisord(conf="/root/supervisord-perms-monitor.conf", virtual_env="root-monitor") - start_supervisor_processes(conf="/root/supervisord-perms-monitor.conf", virtual_env="root-monitor") - - restart_nginx() - -def deploy_supervisord_dap(): - with settings(sudo_user='glider'): - stop_supervisord(conf="/home/glider/supervisord.conf") - update_supervisord('deploy/dap.supervisord.conf', '/home/glider/supervisord.conf', 'gliderdac') - start_supervisord(conf="/home/glider/supervisord.conf", virtual_env="gliderdac") - -def update_full_sync(): - # @BUG: same as in update_supervisord, need to do to temp location - upload_template("scripts/full_sync.j2", "/tmp/full_sync", context=copy(env), use_jinja=True, use_sudo=False, backup=False, mirror_local_mode=True) - sudo("cp /tmp/full_sync /home/glider/full_sync") - -def update_crontab(src_file, dst_file): - # @BUG: same - upload_template(src_file, "/tmp/glider-crontab.txt", context=copy(env), use_jinja=True, use_sudo=False, backup=False, mirror_local_mode=True) - sudo("cp /tmp/glider-crontab.txt %s" % dst_file) - sudo("crontab %s" % dst_file) - -def update_supervisord(src_file, dst_file, virtual_env=None): - """ - Run from within with settings block setting sudo_user - """ - if virtual_env is not None: - with prefix("workon %s" % virtual_env): - sudo("pip install supervisor") - else: - sudo("pip install supervisor") - - # @BUG: Fabric won't let you specify temp_dir to the underlying put call here, so it doesn't have perms to copy it out of the default - # temp location which is ec2-user's home. see https://github.com/fabric/fabric/pull/932 - # this is a workaround - upload_template(src_file, "/tmp/sd.conf", context=copy(env), use_jinja=True, use_sudo=False, backup=False, mirror_local_mode=True) - sudo("cp /tmp/sd.conf %s" % dst_file) - -def update_libs(virtual_env=None): - """ - Run from within with settings block setting sudo_user - """ - with cd(code_dir): - with settings(warn_only=True): - if virtual_env is not None: - with prefix("workon %s" % virtual_env): - sudo("pip install -r requirements.txt") - else: - sudo("pip install -r requirements.txt") - -def restart_nginx(): - sudo("/etc/init.d/nginx restart") - -def stop_supervisord(conf, virtual_env=None): - """ - Run from within settings block setting sudo_user - """ - with cd(code_dir): - with settings(warn_only=True): - if virtual_env is not None: - with prefix("workon %s" % virtual_env): - sudo("supervisorctl -c %s stop all" % conf) - else: - sudo("supervisorctl -c %s stop all" % conf) - sudo("kill -QUIT $(ps aux | grep supervisord | grep %s | grep -v grep | awk '{print $2}')" % conf) - - #kill_pythons() - -def kill_pythons(): - with settings(warn_only=True): - sudo("kill -QUIT $(ps aux | grep python | grep -v supervisord | awk '{print $2}')") - -def start_supervisord(conf, virtual_env=None): - """ - Run from within with settings block setting sudo_user - """ - with cd(code_dir): - with settings(warn_only=True): - if virtual_env is not None: - with prefix("workon %s" % virtual_env): - sudo("supervisord -c %s" % conf) - else: - sudo("supervisord -c %s" % conf) - -def start_supervisor_processes(conf, virtual_env=None): - """ - Run from within with settings block setting sudo_user - """ - with cd(code_dir): - with settings(warn_only=True): - if virtual_env is not None: - with prefix("workon %s" % virtual_env): - sudo("supervisorctl -c %s start all" % conf) - else: - sudo("supervisorctl -c %s start all" % conf) - -def create_index(): - MONGO_URI = env.get('mongo_db') - url = urlparse.urlparse(MONGO_URI) - MONGODB_DATABASE = url.path[1:] - - run('mongo "%s" --eval "db.deployments.ensureIndex({\'name\':1}, {unique:true})"' % MONGODB_DATABASE) - -def full_sync(): - with settings(sudo_user='glider'): - with prefix("workon gliderdac"): - sudo("~/full_sync") - -def services(command="restart"): - sudo("service tomcat-erddap-private %s" % command) - sudo("service tomcat-erddap-public %s" % command) - sudo("service tomcat-thredds %s" % command) diff --git a/glider_dac/__init__.py b/glider_dac/__init__.py index 9302ba26..87f3df0a 100644 --- a/glider_dac/__init__.py +++ b/glider_dac/__init__.py @@ -1,197 +1,114 @@ import os import datetime +#from glider_dac.common import log_format_str +from glider_dac.extensions import db, get_redis_connection_other + from flasgger import Swagger, LazyString, LazyJSONEncoder from flask import Flask, request from flask_session import Session from flask_cors import CORS, cross_origin from flask_wtf import CSRFProtect +from flask_sqlalchemy import SQLAlchemy from simplekv.memory.redisstore import RedisStore from flask_login import LoginManager from glider_dac.reverse_proxy import ReverseProxied +from glider_dac.models.user import User +from sqlalchemy import event +import os +import os.path import redis import yaml -from glider_dac.common import log_formatter +import logging +from rq import Queue, Worker +from glider_dac.views.deployment import deployment_bp +from glider_dac.views.index import index_bp +from glider_dac.views.institution import institution_bp +from glider_dac.views.user import user_bp +from glider_dac.config import get_config +import glider_dac.utilities as util csrf = CSRFProtect() - -# Create application object -app = Flask(__name__) -app.url_map.strict_slashes = False -app.wsgi_app = ReverseProxied(app.wsgi_app) - -csrf.init_app(app) -app.config['SWAGGER'] = { - 'title': 'glider-dac', - 'uiversion': 3, - 'openapi': '3.0.2' -} -app.json_encoder = LazyJSONEncoder -template = dict(swaggerUiPrefix=LazyString(lambda : request.environ.get('HTTP_X_SCRIPT_NAME', ''))) -Swagger(app, template=template) - -cur_dir = os.path.dirname(__file__) -with open(os.path.join(cur_dir, '..', 'config.yml')) as base_config: - config_dict = yaml.load(base_config, Loader=yaml.Loader) - -extra_config_path = os.path.join(cur_dir, '..', 'config.local.yml') -# merge in settings from config.local.yml, if it exists -if os.path.exists(extra_config_path): - with open(extra_config_path) as extra_config: - config_dict = {**config_dict, **yaml.load(extra_config, - Loader=yaml.Loader)} - -try: - app.config.update(config_dict["PRODUCTION"]) -except KeyError: - app.config.update(config_dict["DEVELOPMENT"]) - -app.secret_key = app.config["SECRET_KEY"] -app.config["SESSION_TYPE"] = "redis" -app.config["SESSION_REDIS"] = redis.from_url(app.config["REDIS_URL"]) -Session(app) - -import redis -redis_pool = redis.ConnectionPool(host=app.config.get('REDIS_HOST'), - port=app.config.get('REDIS_PORT'), - db=app.config.get('REDIS_DB')) -redis_connection = redis.Redis(connection_pool=redis_pool) -strict_redis = redis.StrictRedis(connection_pool=redis_pool) - - - - -from rq import Queue -queue = Queue('default', connection=redis_connection) - -import sys - -from flask_mongokit import MongoKit -import os -db = MongoKit(app) - -# Mailer -from flask_mail import Mail -mail = Mail(app) - # Login manager for frontend login_manager = LoginManager() -login_manager.init_app(app) -login_manager.login_view = "login" - +login_manager.login_view = "index.login" +log_format_str = '%(asctime)s - %(process)d - %(name)s - %(module)s:%(lineno)d - %(levelname)s - %(message)s' +log_formatter = logging.Formatter(log_format_str) -# User Auth DB file - create if not existing -if not os.path.exists(app.config.get('USER_DB_FILE')): - from glider_util.bdb import UserDB - UserDB.init_db(app.config.get('USER_DB_FILE')) - -# Create logging -if app.config.get('LOG_FILE') == True: +# Create application object +def create_app(): + app = Flask(__name__) + + # TODO: Move elsewhere? + app.url_map.strict_slashes = False + app.wsgi_app = ReverseProxied(app.wsgi_app) + + csrf.init_app(app) + app.config.update(get_config()) + # load REDIS prefixed environment variables + # this is mainly for test runners which may not be using the containerized versions + # of Redis + # TODO: Move elsewhere, perhaps in config module? + app.config.from_prefixed_env("OVERRIDE") + app.config["SESSION_TYPE"] = "redis" + app.config["SESSION_REDIS"] = redis.from_url(app.config["REDIS_URL"]) + app.json_encoder = LazyJSONEncoder + template = dict(swaggerUiPrefix=LazyString(lambda: request.environ.get('HTTP_X_SCRIPT_NAME', ''))) + Swagger(app, template=template) + app.secret_key = app.config["SECRET_KEY"] + app.config["SESSION_TYPE"] = "redis" + app.config["SESSION_REDIS"] = redis.from_url(app.config["REDIS_URL"]) + Session(app) + + + redis_connection = get_redis_connection_other(app.config.get('REDIS_HOST'), + app.config.get('REDIS_PORT'), + app.config.get('REDIS_DB')) + app.queue = Queue('default', connection=redis_connection) + + + db.init_app(app) + + with app.app_context(): + db.create_all() + + # Mailer + from flask_mail import Mail + app.mail = Mail(app) + + login_manager.init_app(app) + + from .models.user import User + @login_manager.user_loader + def load_user(username): + return User.query.filter_by(username=username).one_or_none() + + app.jinja_env.filters['datetimeformat'] = util.datetimeformat + app.jinja_env.filters['timedeltaformat'] = util.timedeltaformat + app.jinja_env.filters['prettydate'] = util.prettydate + app.jinja_env.filters['pluralize'] = util.pluralize + app.jinja_env.filters['padfit'] = util.padfit + + # Create logging import logging - from logging import FileHandler - file_handler = FileHandler(os.path.join(os.path.dirname(__file__), - '../logs/glider_dac.txt')) - file_handler.setFormatter(log_formatter) - file_handler.setLevel(logging.INFO) - app.logger.addHandler(file_handler) - app.logger.info('Application Process Started') - -# Create datetime jinja2 filter -def datetimeformat(value, format='%a, %b %d %Y at %I:%M%p'): - if isinstance(value, datetime.datetime): - return value.strftime(format) - return value - -def timedeltaformat(starting, ending): - if isinstance(starting, datetime.datetime) and isinstance(ending, datetime.datetime): - return ending - starting - return "unknown" - -def prettydate(d): - if d is None: - return "never" - utc_dt = datetime.datetime.utcnow() - #app.logger.info(utc_dt) - #app.logger.info(d) - if utc_dt > d: - return prettypastdate(d, utc_dt - d) - else: - return prettyfuturedate(d, d - utc_dt) - -# from http://stackoverflow.com/a/5164027/84732 -def prettypastdate(d, diff): - s = diff.seconds - if diff.days > 7: - return d.strftime('%Y %b %d') - elif diff.days > 1: - return '{} days ago'.format(diff.days) - elif diff.days == 1: - return '1 day ago' - elif s <= 1: - return 'just now' - elif s < 60: - return '{} seconds ago'.format(s) - elif s < 120: - return '1 minute ago' - elif s < 3600: - return '{} minutes ago'.format(s//60) - elif s < 7200: - return '1 hour ago' - else: - return '{} hours ago'.format(s//3600) - -def prettyfuturedate(d, diff): - s = diff.seconds - if diff.days > 7: - return d.strftime('%Y %b %d') - elif diff.days > 1: - return '{} days from now'.format(diff.days) - elif diff.days == 1: - return '1 day from now' - elif s <= 1: - return 'just now' - elif s < 60: - return '{} seconds from now'.format(s) - elif s < 120: - return '1 minute from now' - elif s < 3600: - return '{} minutes from now'.format(s/60) - elif s < 7200: - return '1 hour from now' - else: - return '{} hours from now'.format(s/3600) - -def pluralize(number, singular = '', plural = 's'): - if number == 1: - return singular - else: - return plural - -# pad/truncate filter (for making text tables) -def padfit(value, size): - if len(value) <= size: - return value.ljust(size) - - return value[0:(size-3)] + "..." - -app.jinja_env.filters['datetimeformat'] = datetimeformat -app.jinja_env.filters['timedeltaformat'] = timedeltaformat -app.jinja_env.filters['prettydate'] = prettydate -app.jinja_env.filters['pluralize'] = pluralize -app.jinja_env.filters['padfit'] = padfit - -def slugify(value): - """ - Normalizes string, removes non-alpha characters, and converts spaces to hyphens. - Pulled from Django - """ - import unicodedata - import re - #value = str(value) - #value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') - value = re.sub(r'[^\w\s-]', '', value).strip() - return re.sub(r'[-\s]+', '-', value) + log_format_str = '%(asctime)s - %(process)d - %(name)s - %(module)s:%(lineno)d - %(levelname)s - %(message)s' + log_formatter = logging.Formatter(log_format_str) + if app.config.get('LOG_FILE') == True: + from logging import FileHandler + file_handler = FileHandler(os.path.join(os.path.dirname(__file__), + '../logs/glider_dac.txt')) + file_handler.setFormatter(log_formatter) + file_handler.setLevel(logging.INFO) + app.logger.addHandler(file_handler) + app.logger.info('Application Process Started') + + app.register_blueprint(index_bp) + app.register_blueprint(deployment_bp) + app.register_blueprint(institution_bp) + app.register_blueprint(user_bp) + + return app # Import everything import glider_dac.views diff --git a/glider_dac/common.py b/glider_dac/common.py deleted file mode 100644 index a08a4fe9..00000000 --- a/glider_dac/common.py +++ /dev/null @@ -1,4 +0,0 @@ -import logging - -log_format_str = '%(asctime)s - %(process)d - %(name)s - %(module)s:%(lineno)d - %(levelname)s - %(message)s' -log_formatter = logging.Formatter(log_format_str) diff --git a/glider_dac/config.py b/glider_dac/config.py new file mode 100644 index 00000000..0213d0df --- /dev/null +++ b/glider_dac/config.py @@ -0,0 +1,37 @@ +""" +Configuration module so that other DB connection, etc. can access configuration +in addition to the main Flask application +""" +import os.path +import logging +import yaml + +def get_config() -> dict: + cur_dir = os.path.dirname(__file__) + with open(os.path.join(cur_dir, '..', 'config.yml')) as base_config: + config = yaml.load(base_config, Loader=yaml.Loader) + + extra_config_path = os.path.join(cur_dir, '..', 'config.local.yml') + # merge in settings from config.local.yml, if it exists + # TODO: Why does this not recognize top-level import when run in gunicorn? + # config.local.yml, if it exists + if os.environ.get("FLASK_ENV") != "TESTING" and os.path.exists(extra_config_path): + with open(extra_config_path) as extra_config: + config = {**config, **yaml.load(extra_config, + Loader=yaml.Loader)} + try: + config.update(config[os.environ["FLASK_ENV"]]) + except KeyError: + logging.error(f"Cannot find config for {os.environ.get('ENV', None)}, " + "falling back to DEVELOPMENT") + config.update(config["DEVELOPMENT"]) + + + # TODO: move to config YAML instead? + config['SWAGGER'] = { + 'title': 'glider-dac', + 'uiversion': 3, + 'openapi': '3.0.2' + } + + return config diff --git a/glider_dac/data/qc_config.yml b/glider_dac/data/qc_config.yml new file mode 100644 index 00000000..3218c703 --- /dev/null +++ b/glider_dac/data/qc_config.yml @@ -0,0 +1,72 @@ +contexts: + - streams: + conductivity: + qartod: + gross_range_test: + suspect_span: [0, 6] + fail_span: [0, 9] + spike_test: + suspect_threshold: + fail_threshold: + rate_of_change_test: + threshold: 0.1 + flat_line_test: + tolerance: 0.001 + suspect_threshold: 3000 + fail_threshold: 5000 + temperature: + qartod: + gross_range_test: + suspect_span: [0, 35] + fail_span: [-2, 40] + spike_test: + suspect_threshold: + fail_threshold: + rate_of_change_test: + threshold: 0.1 + flat_line_test: + tolerance: 0.001 + suspect_threshold: 3000 + fail_threshold: 5000 + pressure: + qartod: + gross_range_test: + suspect_span: [0, 1000] + fail_span: [0, 6000] + spike_test: + suspect_threshold: + fail_threshold: + rate_of_change_test: + threshold: 0.1 + flat_line_test: + tolerance: 0.001 + suspect_threshold: 3000 + fail_threshold: 5000 + salinity: + qartod: + gross_range_test: + fail_span: [0, 42] + suspect_span: [10, 38] + spike_test: + suspect_threshold: + fail_threshold: + rate_of_change_test: + threshold: 0.1 + flat_line_test: + tolerance: 0.001 + suspect_threshold: 3000 + fail_threshold: 5000 + density: + qartod: + gross_range_test: + fail_span: [1000, 1100] + suspect_span: null + spike_test: + suspect_threshold: + fail_threshold: + rate_of_change_test: + threshold: 0.1 + flat_line_test: + tolerance: 0.001 + suspect_threshold: 3000 + fail_threshold: 5000 diff --git a/glider_dac/defaults.py b/glider_dac/defaults.py deleted file mode 100644 index 977b359f..00000000 --- a/glider_dac/defaults.py +++ /dev/null @@ -1,57 +0,0 @@ -import os -import urllib.parse - -DEBUG = False -TESTING = False -LOG_FILE = True - -# This is the password for the admin user -WEB_PASSWORD = os.environ.get("WEB_PASSWORD") -# The application secret key, used for CSRF Protection -SECRET_KEY = os.environ.get("SECRET_KEY") -# Application context Server Name -SERVER_NAME = os.environ.get("SERVER_NAME", None) -# Application Root -APPLICATION_ROOT = os.environ.get("APPLICATION_ROOT", None) -# Specifies that the web server shouldn't interface with the filesystem -# this is mostly for development -NODATA = os.environ.get("NODATA", "False") == "True" - -# Location of the users.db berkleydb file -USER_DB_FILE = os.environ.get("USER_DB_FILE", "local-user.db") - -# A list of valid administrator user accounts -ADMINS = os.environ.get("ADMINS", "").split(",") - -# The root directory of where the data resides -DATA_ROOT = os.environ.get("DATA_ROOT") - -# Soon to be deprecated -ARCHIVE_PATH = os.environ.get("ARCHIVE_PATH") - -# Hosts -PRIVATE_ERDDAP = os.environ.get('PRIVATE_ERDDAP') -PUBLIC_ERDDAP = os.environ.get('PUBLIC_ERDDAP') -THREDDS = os.environ.get('THREDDS') - -# database -MONGO_URI = os.environ.get('MONGO_URI') -url = urllib.parse.urlparse(MONGO_URI) -MONGODB_HOST = url.hostname -MONGODB_PORT = url.port -MONGODB_USERNAME = url.username -MONGODB_PASSWORD = url.password -MONGODB_DATABASE = url.path[1:] - -# email -MAIL_ENABLED = os.environ.get('MAIL_ENABLED', None) == "True" -MAIL_SERVER = os.environ.get('MAIL_SERVER') -MAIL_PORT = os.environ.get('MAIL_PORT') -MAIL_USE_TLS = os.environ.get("MAIL_USE_TLS", "True") == "True" -MAIL_USE_SSL = os.environ.get("MAIL_USE_SSL", "False") == "True" -MAIL_USERNAME = os.environ.get('MAIL_USERNAME') -MAIL_PASSWORD = os.environ.get('MAIL_PASSWORD') - -MAIL_DEFAULT_SENDER = os.environ.get('MAIL_DEFAULT_SENDER') -MAIL_DEFAULT_TO = os.environ.get('MAIL_DEFAULT_TO') -MAIL_DEFAULT_LIST = os.environ.get('MAIL_DEFAULT_LIST', None) diff --git a/glider_dac/extensions.py b/glider_dac/extensions.py new file mode 100644 index 00000000..a1bfaf4a --- /dev/null +++ b/glider_dac/extensions.py @@ -0,0 +1,11 @@ +from redis import Redis +from flask_sqlalchemy import SQLAlchemy +from sqlalchemy.orm import DeclarativeBase + +class Base(DeclarativeBase): + pass + +db = SQLAlchemy(model_class=Base) + +def get_redis_connection_other(host, port, db): + return Redis(host, port, db) diff --git a/glider_dac/glider_emails.py b/glider_dac/glider_emails.py deleted file mode 100644 index a9ecd9ea..00000000 --- a/glider_dac/glider_emails.py +++ /dev/null @@ -1,208 +0,0 @@ -import os -from flask_mail import Message -from flask import render_template -from glider_dac import app, mail, db -from datetime import datetime -from compliance_checker.suite import CheckSuite -from compliance_checker.runner import ComplianceChecker -from urllib.parse import urljoin -import tempfile -import glob -import sys -import os -import json -import argparse -from collections import OrderedDict -import logging - - -root_logger = logging.getLogger() -root_logger.setLevel(logging.INFO) - -handler = logging.StreamHandler(sys.stderr) -handler.setLevel(logging.INFO) -root_logger.addHandler(handler) - - -def send_email_wrapper(message): - """ - Email sending function with exceptions to catch and log exceptions - """ - try: - mail.send(message) - except: - app.logger.exception("Exception occurred while attempting to send " - "email: ") - - -def send_registration_email(username, deployment): - if not app.config.get('MAIL_ENABLED', False): # Mail is disabled - app.logger.info("Email is disabled") - return - # sender comes from MAIL_DEFAULT_SENDER in env - app.logger.info("Sending email about new deployment to %s", app.config.get('MAIL_DEFAULT_TO')) - subject = "New Glider Deployment - %s" % deployment.name - recipients = [app.config.get('MAIL_DEFAULT_TO')] - cc_recipients = [] - if app.config.get('MAIL_DEFAULT_LIST') is not None: - cc_recipients.append(app.config.get('MAIL_DEFAULT_LIST')) - - msg = Message(subject, recipients=recipients, cc=cc_recipients) - msg.body = render_template( - 'deployment_registration.txt', - deployment=deployment, - username=username, - thredds_url=get_thredds_catalog_url(), - erddap_url=get_erddap_catalog_url()) - - send_email_wrapper(msg) - -def send_deployment_cchecker_email(user, failing_deployments, attachment_msgs): - if not app.config.get('MAIL_ENABLED', False): # Mail is disabled - app.logger.info("Email is disabled") - return - # sender comes from MAIL_DEFAULT_SENDER in env - - app.logger.info("Sending email about deployment compliance checker to {}".format(user['username'])) - subject = "Glider DAC Compliance Check on Deployments for user %s" % user['username'] - recipients = [user['email']] #app.config.get('MAIL_DEFAULT_TO')] - msg = Message(subject, recipients=recipients) - if len(failing_deployments) > 0: - message = ("The following glider deployments failed compliance check:" - "\n{}\n\nPlease see attached file for more details. " - "Valid CF standard names are required for NCEI archival." - .format("\n".join(d['name'] for d in failing_deployments))) - date_str_today = datetime.today().strftime("%Y-%m-%d") - attachment_filename = "failing_glider_md_{}".format(date_str_today) - msg.attach(attachment_filename, 'text/plain', data=attachment_msgs) - else: - return - msg.body = message - - send_email_wrapper(msg) - -def get_thredds_catalog_url(): - args = { - 'host' : app.config['THREDDS'] - } - url = 'http://%(host)s/thredds/catalog.xml' % args - return url - -def get_erddap_catalog_url(): - args = { - 'host' : app.config['PUBLIC_ERDDAP'] - } - url = 'http://%(host)s/erddap/metadata/iso19115/xml/' % args - return url - -def glider_deployment_check(data_type=None, completed=True, force=False, - deployment_dir=None, username=None): - """ - """ - # TODO: move this functionality to another module as compliance checks - # no longer send emails. - cs = CheckSuite() - cs.load_all_available_checkers() - with app.app_context(): - if data_type is not None: - is_delayed_mode = data_type == 'delayed' - if is_delayed_mode: - q_dict = {"delayed_mode": True, - "completed": completed} - else: - q_dict = {"$or": [{"delayed_mode": False}, - {"delayed_mode": {"$exists": False}}], - "completed": completed} - - if not force: - q_dict["compliance_check_passed"] = {"$ne": True} - - # TODO: combine username/deployment cases? - if username: - q_dict = {"username": username} - # a particular deployment has been specified - elif deployment_dir: - q_dict = {"deployment_dir": deployment_dir} - else: - q_dict = {} - - agg_pipeline = [{"$match": q_dict}, - {"$group": {"_id": "$user_id", - "deployments": {"$push": - {"_id": "$_id", - "name": "$name", - "deployment_dir": "$deployment_dir"} } } - }] - # if force is enabled, re-check the datasets no matter what - - # is this syntax still used? if the first fn call fails, use the - # second set of results - try: - agg_result_set = db.deployments.aggregate(agg_pipeline)['result'] - except: - agg_result_set = db.deployments.aggregate(agg_pipeline, cursor={}) - for res in agg_result_set: - user = db.users.find_one(res["_id"]) - all_messages = [] - failing_deployments = [] - for dep in res['deployments']: - root_logger.info("Running compliance check on glider " - "deployment: {}".format(dep)) - try: - dep_passed, dep_messages = process_deployment(dep) - all_messages.append(dep_messages) - if not dep_passed: - failing_deployments.append(dep) - except Exception as e: - root_logger.exception( - "Exception occurred while processing deployment {}".format(dep['name'])) - text_body = '' - # disable email for time being - #send_deployment_cchecker_email(user, failing_deployments, - # "\n".join(all_messages)) - -def process_deployment(dep): - deployment_issues = "Deployment {}".format(os.path.basename(dep['name'])) - groups = OrderedDict() - erddap_fmt_string = "erddap/tabledap/{}.nc?&time%3Emax(time)-1%20day" - base_url = app.config["PRIVATE_ERDDAP"] - # FIXME: determine a more robust way of getting scheme - if not base_url.startswith("http"): - base_url = "http://{}".format(base_url) - url_path = "/".join([base_url, - erddap_fmt_string.format(dep["name"])]) - # TODO: would be better if we didn't have to write to a temp file - outhandle, outfile = tempfile.mkstemp() - failures, _ = ComplianceChecker.run_checker(ds_loc=url_path, - checker_names=['gliderdac'], verbose=True, - criteria='lenient', output_format='json', - output_filename=outfile) - with open(outfile, 'r') as f: - errs = json.load(f)["gliderdac"] - - compliance_passed = errs['scored_points'] == errs['possible_points'] - - update_fields = {"compliance_check_passed": compliance_passed} - standard_name_errs = [] - if compliance_passed: - final_message = "All files passed compliance check on glider deployment {}".format(dep['name']) - else: - error_list = [err_msg for err_severity in ("high_priorities", - "medium_priorities", "low_priorities") for err_section in - errs[err_severity] for err_msg in err_section["msgs"]] - update_fields["compliance_check_report"] = errs - - for err in errs["high_priorities"]: - if err["name"] == "Standard Names": - standard_name_errs.extend(err["msgs"]) - - if not standard_name_errs: - final_message = "All files passed compliance check on glider deployment {}".format(dep['name']) - else: - root_logger.info(standard_name_errs) - final_message = ("Deployment {} has issues:\n{}".format(dep['name'], - "\n".join(standard_name_errs))) - - # Set fields. Don't use upsert as deployment ought to exist prior to write. - db.deployments.update({"_id": dep["_id"]}, {"$set": update_fields}) - return final_message.startswith("All files passed"), final_message diff --git a/glider_dac/models/__init__.py b/glider_dac/models/__init__.py deleted file mode 100644 index 35f8d8b0..00000000 --- a/glider_dac/models/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from glider_dac.models import deployment, user, institution diff --git a/glider_dac/models/deployment.py b/glider_dac/models/deployment.py index 41acc4b1..3b5f50c4 100644 --- a/glider_dac/models/deployment.py +++ b/glider_dac/models/deployment.py @@ -4,67 +4,70 @@ glider_dac/models/deployment.py Model definition for a Deployment ''' -from glider_dac import app, db, slugify, queue -from glider_dac.glider_emails import glider_deployment_check +from flask import current_app, render_template +from flask_mail import Message +from datetime import datetime, timedelta +from glider_dac.utilities import (slugify, slugify_sql, + email_exception_logging_wrapper, + get_thredds_catalog_url, + get_erddap_catalog_url) +from glider_dac.extensions import db +from glider_qc.glider_qc import get_redis_connection +import json +import geojson +from compliance_checker.suite import CheckSuite +from flask_sqlalchemy.track_modifications import models_committed +from sqlalchemy.ext.hybrid import hybrid_property +from sqlalchemy.orm import Mapped, relationship +from marshmallow.fields import Field, Method +from marshmallow_sqlalchemy import SQLAlchemyAutoSchema +from marshmallow_sqlalchemy.convert import ModelConverter from datetime import datetime -from flask_mongokit import Document -from bson.objectid import ObjectId -from rq import Queue, Connection, Worker +from rq import Queue, Worker +from rq.job import Job +from rq.exceptions import NoSuchJobError +#from glider_dac.services.emails import glider_deployment_check from shutil import rmtree import os import glob import hashlib +from compliance_checker.runner import ComplianceChecker +from collections import OrderedDict +import tempfile + + + +class Deployment(db.Model): + id = db.Column(db.Integer, primary_key=True) + name = db.Column(db.String(255), unique=True, nullable=False) + user_id = db.Column(db.Integer, db.ForeignKey("user.id")) + user = db.relationship("User", lazy='joined', backref="deployment") + # The operator of this Glider. Shows up in TDS as the title. + operator = db.Column(db.String(255), nullable=True)#nullable=False) + deployment_dir = db.Column(db.String(255), nullable=False) + #estimated_deploy_location = db.Column(Geometry(geometry_type='POINT', + # srid=4326)) + # TODO: Add constraints for WMO IDs?? + wmo_id = db.Column(db.String(255)) + completed = db.Column(db.Boolean, nullable=False, default=False) + created = db.Column(db.DateTime(timezone=True), nullable=False, + default=datetime.utcnow) + updated = db.Column(db.DateTime(timezone=True), nullable=False) + glider_name = db.Column(db.String(255), nullable=False) + deployment_date = db.Column(db.DateTime(timezone=True), nullable=True) # nullable= + archive_safe = db.Column(db.Boolean, nullable=False, default=True) + checksum = db.Column(db.String(255)) + attribution = db.Column(db.Text) + delayed_mode = db.Column(db.Boolean, nullable=True, default=False) + latest_file = db.Column(db.String(255)) + latest_file_mtime = db.Column(db.DateTime(timezone=True)) + compliance_check_passed = db.Column(db.Boolean, nullable=False, + default=False) + compliance_check_report = db.Column(db.JSON, nullable=True) + cf_standard_names_valid = db.Column(db.Boolean, nullable=True) -@db.register -class Deployment(Document): - __collection__ = 'deployments' - use_dot_notation = True - use_schemaless = True - - structure = { - 'name': str, - 'user_id': ObjectId, - 'username': str, # The cached username to lightly DB load - # The operator of this Glider. Shows up in TDS as the title. - 'operator': str, - 'deployment_dir': str, - 'estimated_deploy_date': datetime, - 'estimated_deploy_location': str, # WKT text - 'wmo_id': str, - 'completed': bool, - 'created': datetime, - 'updated': datetime, - 'glider_name': str, - 'deployment_date': datetime, - 'archive_safe': bool, - 'checksum': str, - 'attribution': str, - 'delayed_mode': bool, - 'latest_file': str, - 'latest_file_mtime': datetime, - } - - default_values = { - 'created': datetime.utcnow, - 'completed': False, - 'archive_safe': True, - 'delayed_mode': False, - } - - indexes = [ - { - 'fields': 'name', - 'unique': True, - }, - ] - def save(self): - if self.username is None or self.username == '': - user = db.User.find_one({'_id': self.user_id}) - self.username = user.username - - # Update the stats on the latest profile file modtime = None latest_file = self.get_latest_nc_file() @@ -77,21 +80,21 @@ def save(self): self.sync() self.updated = datetime.utcnow() app.logger.info("Update time is %s", self.updated) - update_vals = dict(self) try: doc_id = update_vals.pop("_id") # if we get a KeyError, this is a new deployment that hasn't been entered into the database yet # so we need to save it. This is when you add "New deployment" while logged in -- files must # later be added except KeyError: - Document.save(self) + # TODO: Update for SQLAlchemy + pass # otherwise, need to use update/upsert via Pymongo in case of queued job for # compliance so that result does not get clobbered. # use $set instead of replacing document else: - db.deployments.update({"_id": doc_id}, {"$set": update_vals}, upsert=True) + db.session.commit() # HACK: special logic for Navy gliders deployment - if self.username == "navoceano" and self.glider_name.startswith("ng"): + if self.user.username == "navoceano" and self.glider_name.startswith("ng"): glob_path = os.path.join(app.config.get('DATA_ROOT'), "hurricanes-20230601T000", f"{self.glider_name}*") @@ -101,88 +104,103 @@ def save(self): try: os.symlink(deployment_file, symlink_dest) except OSError: - logger.exception(f"Could not symlink {symlink_dest}") + app.logger.exception(f"Could not symlink {symlink_dest}") - def delete(self): + def delete_deployment(self): + self.delete_files() + db.session.delete(self) + db.session.commit() + + def delete_files(self): if os.path.exists(self.full_path): rmtree(self.full_path) if os.path.exists(self.public_erddap_path): rmtree(self.public_erddap_path) if os.path.exists(self.thredds_path): rmtree(self.thredds_path) - Document.delete(self) - @property + @hybrid_property def dap(self): ''' Returns the THREDDS DAP URL to this deployment ''' - args = { - 'host': app.config['THREDDS'], - 'user': slugify(self.username), - 'deployment': slugify(self.name) - } - dap_url = "http://%(host)s/thredds/dodsC/deployments/%(user)s/%(deployment)s/%(deployment)s.nc3.nc" % args + host = current_app.config['THREDDS'] + user = self.user.username + deployment = self.name + dap_url = "http://" + host + "/thredds/dodsC/deployments/" + user + "/" + deployment + "/" + deployment + ".nc3.nc" return dap_url - @property + @dap.expression + def dap(cls): + return f"http://{current_app.config['THREDDS']}/thredds/dodsC/deployments/{self.user.username}/{self.name}/{self.name}.nc3.nc" + + @hybrid_property def sos(self): ''' Returns the URL to the NcSOS endpoint ''' - args = { - 'host': app.config['THREDDS'], - 'user': slugify(self.username), - 'deployment': slugify(self.name) - } - sos_url = "http://%(host)s/thredds/sos/deployments/%(user)s/%(deployment)s/%(deployment)s.nc3.nc?service=SOS&request=GetCapabilities&AcceptVersions=1.0.0" % args - return sos_url + host = current_app.config['THREDDS'] + user = self.user.username + deployment = self.name + return "http://" + host + "thredds/sos/deployments/" + user + "/" + deployment + "/" + deployment + ".nc3.nc?service=SOS&request=GetCapabilities&AcceptVersions=1.0.0" - @property + @sos.expression + def sos(cls): + return f"http://{current_app.config['THREDDS']}/thredds/sos/deployments/{self.user.username}/{self.name}/{self.name}.nc3.nc?service=SOS&request=GetCapabilities&AcceptVersions=1.0.0" + + @hybrid_property def iso(self): - name = slugify(self.name) - iso_url = 'http://%(host)s/erddap/tabledap/%(name)s.iso19115' % { - 'host': app.config['PUBLIC_ERDDAP'], 'name': name} - return iso_url + host = current_app.config['PRIVATE_ERDDAP'] + name = self.name + return "http://" + host + "/erddap/tabledap/" + name + ".iso19115" - @property + @iso.expression + def iso(cls): + return f"http://{current_app.config['PRIVATE_ERDDAP']}/erddap/tabledap/{self.name}.iso19115" + + @hybrid_property def thredds(self): - args = { - 'host': app.config['THREDDS'], - 'user': slugify(self.username), - 'deployment': slugify(self.name) - } - thredds_url = "http://%(host)s/thredds/catalog/deployments/%(user)s/%(deployment)s/catalog.html?dataset=deployments/%(user)s/%(deployment)s/%(deployment)s.nc3.nc" % args - return thredds_url + host = current_app.config['THREDDS'] + user = self.user.username + deployment = self.name + return "http://" + host + "/thredds/catalog/deployments/" + user + "/" + deployment + "/catalog.html?dataset=deployments/" + user + "/" + deployment + "/" + deployment + ".nc3.nc" - @property + @thredds.expression + def thredds(cls): + return f"http://{current_app.config['THREDDS']}/thredds/catalog/deployments/{self.user.username}/{self.name}/catalog.html?dataset=deployments/{self.user.username}/{self.name}/{self.name}.nc3.nc" + + @hybrid_property def erddap(self): - args = { - 'host': app.config['PUBLIC_ERDDAP'], - 'user': slugify(self.username), - 'deployment': slugify(self.name) - } - erddap_url = "http://%(host)s/erddap/tabledap/%(deployment)s.html" % args - return erddap_url + host = current_app.config['PRIVATE_ERDDAP'] + user = self.user.username + deployment = self.name + return "http://" + host + "/erddap/tabledap/" + deployment + ".html" + + @erddap.expression + def erddap(cls): + return f"http://{current_app.config['PRIVATE_ERDDAP']}/erddap/tabledap/{self.name}.html" @property def title(self): if self.operator is not None and self.operator != "": return self.operator else: - return self.username + return self.user.username @property def full_path(self): - return os.path.join(app.config.get('DATA_ROOT'), self.deployment_dir) + return os.path.join(current_app.config.get('DATA_ROOT'), + self.deployment_dir) @property def public_erddap_path(self): - return os.path.join(app.config.get('PUBLIC_DATA_ROOT'), self.deployment_dir) + return os.path.join(current_app.config.get('PUBLIC_DATA_ROOT'), + self.deployment_dir) @property def thredds_path(self): - return os.path.join(app.config.get('THREDDS_DATA_ROOT'), self.deployment_dir) + return os.path.join(current_app.config.get('THREDDS_DATA_ROOT'), + self.deployment_dir) def on_complete(self): """ @@ -204,28 +222,36 @@ def on_complete(self): # generate md5s of all data files on completion if self.completed: - for dirpath, dirnames, filenames in os.walk(self.full_path): - for f in filenames: - if (f in ["deployment.json", "wmoid.txt", "completed.txt"] - or f.endswith(".md5") or not f.endswith('.nc')): - continue - - full_file = os.path.join(dirpath, f) # schedule the checker job to kick off the compliance checker email # on the deployment when the deployment is completed # on_complete might be a misleading function name -- this section # can run any time there is a sync, so check if a checker run has already been executed # if compliance check failed or has not yet been run, go ahead to next section - if not getattr(self, "compliance_check_passed", None): + ccheck_job_id = f"{self.name}_compliance_check" + # rerun a compliance check if unrun or failed and there is no + # other scheduled job already queued up + if not getattr(self, "compliance_check_passed", False): + try: + job = Job.fetch(id=ccheck_job_id, connection=redis_connection) + # if no job exists, continue on + except NoSuchJobError: + pass + # if the job already exists, do nothing -- it will run later + else: + app.logger.info("Deferred compliance check job already scheduled " + f"for deployment {self.name}, skipping...") + return + app.logger.info("Scheduling compliance check for completed " "deployment {}".format(self.deployment_dir)) - queue.enqueue(glider_deployment_check, - kwargs={"deployment_dir": self.deployment_dir}, - job_timeout=800) + queue.enqueue_in(timedelta(minutes=30), glider_deployment_check, + kwargs={"deployment_dir": self.deployment_dir}, + job_id=ccheck_job_id, job_timeout=800) else: for dirpath, dirnames, filenames in os.walk(self.full_path): for f in filenames: if f.endswith(".md5"): + # FIXME? this doesn't create md5sums, it removes them os.unlink(os.path.join(dirpath, f)) def get_latest_nc_file(self): @@ -260,7 +286,7 @@ def calculate_checksum(self): self.checksum = md5.hexdigest() def sync(self): - if app.config.get('NODATA'): + if current_app.config.get('NODATA'): return if not os.path.exists(self.full_path): try: @@ -275,8 +301,9 @@ def sync(self): # Serialize Deployment model to disk json_file = os.path.join(self.full_path, "deployment.json") + schema = DeploymentSchema() with open(json_file, 'w') as f: - f.write(self.to_json()) + f.write(json.dumps(schema.dump(self))) def update_wmoid_file(self): # Keep the WMO file updated if it is edited via the web form @@ -292,11 +319,172 @@ def update_wmoid_file(self): # Write the new wmo_id to file if new with open(wmo_id_file, 'w') as f: f.write(self.wmo_id) - @classmethod - def get_deployment_count_by_operator(cls): - return [count for count in db.deployments.aggregate({'$group': {'_id': - '$operator', - 'count': - {'$sum': - 1}}}, - cursor={})] + + @email_exception_logging_wrapper + def send_deployment_cchecker_email(self, user, failing_deployments, attachment_msgs): + if not app.config.get('MAIL_ENABLED', False): # Mail is disabled + app.logger.info("Email is disabled") + return + # sender comes from MAIL_DEFAULT_SENDER in env + + app.logger.info("Sending email about deployment compliance checker to {}".format(user['username'])) + subject = "Glider DAC Compliance Check on Deployments for user %s" % user['username'] + recipients = [user['email']] #app.config.get('MAIL_DEFAULT_TO')] + msg = Message(subject, recipients=recipients) + if len(failing_deployments) > 0: + message = ("The following glider deployments failed compliance check:" + "\n{}\n\nPlease see attached file for more details. " + "Valid CF standard names are required for NCEI archival." + .format("\n".join(d['name'] for d in failing_deployments))) + date_str_today = datetime.today().strftime("%Y-%m-%d") + attachment_filename = "failing_glider_md_{}".format(date_str_today) + msg.attach(attachment_filename, 'text/plain', data=attachment_msgs) + else: + return + msg.body = message + + current_app.mail.send(msg) + + def glider_deployment_check(self, data_type=None, completed=True, force=False, + deployment_dir=None, username=None): + """ + """ + # TODO: move this functionality to another module as compliance checks + # no longer send emails. + cs = CheckSuite() + cs.load_all_available_checkers() + query = Deployment.query + with current_app.app_context(): + if data_type is not None: + query = query.filter(Deployment.completed==completed, + func.coalesce(Deployment.delayed_mode, + False) == is_delayed_mode) + # TODO: force not null constraints in model on this field + if not force: + query = query.filter(compliance_check_passed != True) + + if username: + query = query.filter_by(username=username) + # a particular deployment has been specified + elif deployment_dir: + query = query.filter_by(deployment_dir=deployment_dir) + + for deployment in query.all(): + user = deployment.user.username + user_errors.setdefault(user, {"messages": [], "failed_deployments": []}) + + try: + dep_passed, dep_messages = self.process_deployment() + if not dep_passed: + user_errors[user]["failed_deployments"].append(deployment.name) + user_errors[user]["messages"].extend(dep_messages) + except Exception as e: + root_logger.exception("Exception occurred while processing deployment {}".format(deployment.name)) + + # TODO: Allow for disabling of sending compliance checker emails + for username, results_dict in user_errors.items(): + send_deployment_cchecker_email(username, + results_dict["failed_deployments"], + "\n".join(results_dict["messages"])) + + def process_deployment(self): + deployment_issues = "Deployment {}".format(os.path.basename(self.name)) + groups = OrderedDict() + erddap_fmt_string = "erddap/tabledap/{}.nc?&time%3Emax(time)-1%20day" + base_url = current_app.config["PRIVATE_ERDDAP"] + # FIXME: determine a more robust way of getting scheme + if not base_url.startswith("http"): + base_url = "http://{}".format(base_url) + url_path = "/".join([base_url, + erddap_fmt_string.format(self.name)]) + # TODO: would be better if we didn't have to write to a temp file + outhandle, outfile = tempfile.mkstemp() + failures, _ = ComplianceChecker.run_checker(ds_loc=url_path, + checker_names=['gliderdac'], verbose=True, + criteria='lenient', output_format='json', + output_filename=outfile) + with open(outfile, 'r') as f: + errs = json.load(f)["gliderdac"] + + compliance_passed = errs['scored_points'] == errs['possible_points'] + + self.compliance_check_passed = compliance_passed + standard_name_errs = [] + if compliance_passed: + final_message = "All files passed compliance check on glider deployment {}".format(self.name) + else: + error_list = [err_msg for err_severity in ("high_priorities", + "medium_priorities", "low_priorities") for err_section in + errs[err_severity] for err_msg in err_section["msgs"]] + self.compliance_check_report = errs + + for err in errs["high_priorities"]: + if err["name"] == "Standard Names": + standard_name_errs.extend(err["msgs"]) + + if not standard_name_errs: + final_message = "All files passed compliance check on glider deployment {}".format(self.name) + self.cf_standard_names_valid = True + else: + root_logger.info(standard_name_errs) + final_message = ("Deployment {} has issues:\n{}".format(self.name, + "\n".join(standard_name_errs))) + self.cf_standard_names_valid = False + + db.session.commit() + return final_message.startswith("All files passed"), final_message + +class GeoJSONField(Field): + def _serialize(self, value, attr, obj, **kwargs): + if value is None: + return None + # Convert GeoAlchemy geometry to GeoJSON format + return geojson.loads(db.session.scalar(value.ST_AsGeoJSON())) + + +class DeploymentModelConverter(ModelConverter): + SQLA_TYPE_MAPPING = { + **ModelConverter.SQLA_TYPE_MAPPING + #**{Geometry: Field} + } + +class DeploymentSchema(SQLAlchemyAutoSchema): + class Meta: + model = Deployment + model_converter = DeploymentModelConverter + + estimated_deploy_location = GeoJSONField() + + # TODO?: Aggressively Java-esque -- is there a better way to get these + # hybrid properties? + dap = Method("get_dap") + sos = Method("get_sos") + iso = Method("get_iso") + thredds = Method("get_thredds") + erddap = Method("get_erddap") + + def get_dap(self, obj): + return obj.dap + + def get_sos(self, obj): + return obj.sos + + def get_iso(self, obj): + return obj.iso + + def get_thredds(self, obj): + return obj.thredds + + def get_erddap(self, obj): + return obj.erddap + + +def on_models_committed(sender, changes): + for model, operation in changes: + if isinstance(model, Deployment): + if operation == "insert" or operation == "update": + if isinstance(model, (Deployment, User)): + model.save() + elif operation == "delete": + if isinstance(model, Deployment): + model.delete() diff --git a/glider_dac/models/institution.py b/glider_dac/models/institution.py index 26ac6fb4..ca23cb5e 100644 --- a/glider_dac/models/institution.py +++ b/glider_dac/models/institution.py @@ -5,29 +5,11 @@ Model definition for Institution ''' from datetime import datetime -from glider_dac import db -from flask_mongokit import Document +from glider_dac.extensions import db -@db.register -class Institution(Document): - __collection__ = 'institutions' - use_dot_notation = True - use_schemaless = True - - structure = { - 'name': str, - 'created': datetime, - 'updated': datetime - } - - default_values = { - 'created': datetime.utcnow - } - - indexes = [ - { - 'fields': 'name', - 'unique': True, - }, - ] +class Institution(db.Model): + id = db.Column(db.Integer, primary_key=True) + name = db.Column(db.String(255), unique=True) + created = db.Column(db.DateTime(timezone=True), default=datetime.utcnow) + updated = db.Column(db.DateTime(timezone=True)) diff --git a/glider_dac/models/user.py b/glider_dac/models/user.py index 07099f40..e41c8c3b 100644 --- a/glider_dac/models/user.py +++ b/glider_dac/models/user.py @@ -1,65 +1,49 @@ import os import os.path from datetime import datetime -from glider_dac import app, db -from glider_util.bdb import UserDB -from flask_mongokit import Document - -@db.register -class User(Document): - __collection__ = 'users' - use_dot_notation = True - use_schemaless = True - - structure = { - 'username' : str, - 'name' : str, - 'email' : str, - 'organization' : str, - 'created' : datetime, - 'updated' : datetime - } - - default_values = { - 'created': datetime.utcnow - } - - indexes = [ - { - 'fields': 'username', - 'unique': True, - }, - ] +from glider_dac import db +from glider_dac.utilities import email_exception_logging_wrapper +from flask import current_app +from flask_mail import Message +from marshmallow_sqlalchemy import SQLAlchemyAutoSchema +from passlib.hash import sha512_crypt + +class User(db.Model): + _tablename='user' + id = db.Column(db.Integer, primary_key=True) + username = db.Column(db.String(255), nullable=False) + name = db.Column(db.String(255), nullable=False) + email = db.Column(db.String(255)) + admin = db.Column(db.Boolean, nullable=False, default=False) + password = db.Column(db.String(255), nullable=False) + organization = db.Column(db.String(255)) + created = db.Column(db.DateTime(timezone=True), default=datetime.utcnow) + updated = db.Column(db.DateTime(timezone=True)) @classmethod - def _check_login(cls, username, password): - u = UserDB(app.config.get('USER_DB_FILE')) - return u.check(username.encode(), password.encode()) + def check_login(cls, username, password): + user = User.query.filter_by(username=username).one_or_none() + if user is None: + return False + return sha512_crypt.verify(password, user.password) @classmethod def authenticate(cls, username, password): - if cls._check_login(username, password): + if cls.check_login(username, password): # Return the ID of the user - usr = db.User.find_one( { 'username' : username } ) - if usr is None: - usr = db.User() - usr.username = username - usr.save() - return usr + current_user = User.query.filter_by(username=username).one_or_none() + if current_user is None: + current_user = User(username=username) + current_user.save() + return current_user return None - @classmethod - def update(cls, username, password): - u = UserDB(app.config.get('USER_DB_FILE')) - return u.set(username.encode(), password.encode()) - @property def data_root(self): - data_root = app.config.get('DATA_ROOT') + data_root = current_app.config.get('DATA_ROOT') return os.path.join(data_root, self.username) def save(self): - super().save() # on creation of user, ensure that a directory with user name is present self.ensure_dir("") @@ -80,13 +64,67 @@ def is_anonymous(self): # This method is not provided by flask-login. Make a property to bring it # in line with the rest of the expected properties from flask-login, namely: # is_active, is_authenticated, and is_anonymous. - @property - def is_admin(self): - return self.username in app.config.get("ADMINS") def get_id(self): - return str(self._id) - - @classmethod - def get_deployment_count_by_user(cls): - return [count for count in db.deployments.aggregate({ '$group': { '_id': '$user_id', 'count': { '$sum' : 1 }}}, cursor={})] + return str(self.username) + + @email_exception_logging_wrapper + def notify_user_incomplete_deployments(self): + """ + Notify user via email of any deployments older than two weeks which have not been marked + as completed + """ + # Calculate the date two weeks ago + two_weeks_ago = datetime.now() - timedelta(weeks=2) + + # Query for deployments that are not completed, last updated more than two weeks ago, and match the username + # TODO: fix representation? + query = (Deployment.query.filter(Deployment.completed == False, + Deployment.updated < two_weeks_ago, + Deployment.username == username) + .order_by(Deployment.updated)) + # Convert the cursor to a list + deployments = query.all() + + # Check if there are any deployments to notify about + if not deployments: + return + + # Prepare email content + subject = f"Reminder: Incomplete Deployments for {self.name}" + + # Start building the HTML table + body = f""" + +
+User {self.name} has the following incomplete glider deployment(s) on the IOOS Glider DAC that were last updated more than two weeks ago. + Please mark the following deployment(s) as complete if the associated deployments have finished.
+Deployment Name | +Last Updated | +
---|---|
{deployment.name} | +{deployment.updated.strftime('%Y-%m-%d %H:%M:%S')} | +
User {username} has the following incomplete glider deployment(s) on the IOOS Glider DAC that were last updated more than two weeks ago. + Please mark the following deployment(s) as complete if the associated deployments have finished.
+Deployment Name | +Last Updated | +
---|---|
{deployment.name} | +{deployment.updated.strftime('%Y-%m-%d %H:%M:%S')} | +