From 4a73defea4d0937d8bb89b92c937962595582142 Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Wed, 3 May 2023 14:28:51 +0100 Subject: [PATCH 1/7] - fixed some typo for es_port (str in place of int conversion) - Changed query back to filter only on jsonfile_path (removed division: it's possibly non unique, and the path is sufficient since it's unique) --- src/ensembl/production/datacheck/app/main.py | 10 ++----- src/ensembl/production/datacheck/config.py | 22 +++++++-------- src/ensembl/production/datacheck/utils.py | 29 ++------------------ 3 files changed, 17 insertions(+), 44 deletions(-) diff --git a/src/ensembl/production/datacheck/app/main.py b/src/ensembl/production/datacheck/app/main.py index f55a479..368d6bb 100644 --- a/src/ensembl/production/datacheck/app/main.py +++ b/src/ensembl/production/datacheck/app/main.py @@ -61,7 +61,7 @@ # set es details es_host = app.config['ES_HOST'] -es_port = str(app.config['ES_PORT']) +es_port = int(app.config['ES_PORT']) es_index = app.config['ES_INDEX'] es_user = app.config['ES_USER'] es_password = app.config['ES_PASSWORD'] @@ -343,9 +343,7 @@ def job_details(): if jsonfile is None: raise Exception('jsonfile needed ') if app_es_data_source: - ensembl_division = f"Ensembl{DatacheckConfig.DATACHECK_TYPE.capitalize()}" - res = get_datacheck_results(division=ensembl_division, - jsonfile_path=jsonfile, + res = get_datacheck_results(jsonfile_path=jsonfile, es_host=es_host, es_port=es_port, es_index=es_index, @@ -391,9 +389,7 @@ def download_dc_outputs(job_id): if app_es_data_source: jsonfile_path = job['output']['json_output_file'] - ensembl_division = f"Ensembl{DatacheckConfig.DATACHECK_TYPE.capitalize()}" - res = get_datacheck_results(division=ensembl_division, - jsonfile_path=jsonfile_path, + res = get_datacheck_results(jsonfile_path=jsonfile_path, es_host=es_host, es_port=es_port, es_index=es_index, diff --git a/src/ensembl/production/datacheck/config.py b/src/ensembl/production/datacheck/config.py index 3729013..2aacaf5 100644 --- a/src/ensembl/production/datacheck/config.py +++ b/src/ensembl/production/datacheck/config.py @@ -16,7 +16,7 @@ import pkg_resources import urllib3 import urllib -import json +import json import requests.exceptions from pathlib import Path @@ -45,18 +45,19 @@ def get_app_version(): def get_server_names(url, flag=0): try: - if flag : - url=urllib.parse.urljoin(url, '/api/dbcopy/dcservers') + if flag: + url = urllib.parse.urljoin(url, '/api/dbcopy/dcservers') loader = RemoteFileLoader('json') return loader.r_open(url) else: - server_file_path = os.environ.get("SERVER_NAMES", EnsemblConfig.file_config.get('server_names_file', - os.path.join( - os.path.dirname(__file__), - 'server_names.dev.json'))) + server_file_path = os.environ.get("SERVER_NAMES", + EnsemblConfig.file_config.get('server_names_file', + os.path.join(os.path.dirname(__file__), + 'server_names.dev.json'))) return json.load(open(server_file_path)) except Exception as e: - return {} + return {} + class DCConfigLoader: base_uri = 'https://raw.githubusercontent.com/Ensembl/ensembl-datacheck/' @@ -132,10 +133,9 @@ class DatacheckConfig(EnsemblConfig): ES_PORT = os.environ.get('ES_PORT', EnsemblConfig.file_config.get('es_port', '9200')) ES_SSL = os.environ.get('ES_SSL', EnsemblConfig.file_config.get('es_ssl', "f")).lower() in ['true', '1'] ES_INDEX = os.environ.get('ES_INDEX', EnsemblConfig.file_config.get('es_index', "datacheck_results")) - + GET_SERVER_NAMES = os.environ.get('GET_SERVER_NAMES', EnsemblConfig.file_config.get('get_server_names', 0)) SERVER_NAMES = get_server_names(COPY_URI_DROPDOWN, GET_SERVER_NAMES) - APP_VERSION = get_app_version() - + APP_VERSION = get_app_version() diff --git a/src/ensembl/production/datacheck/utils.py b/src/ensembl/production/datacheck/utils.py index 4051813..ea70152 100644 --- a/src/ensembl/production/datacheck/utils.py +++ b/src/ensembl/production/datacheck/utils.py @@ -15,8 +15,7 @@ from ensembl.production.datacheck.config import DatacheckConfig as dcg -def get_datacheck_results(division: str, - jsonfile_path: str, +def get_datacheck_results(jsonfile_path: str, es_host: str = dcg.ES_HOST, es_port: int = int(dcg.ES_PORT), es_index: str = dcg.ES_INDEX, @@ -26,7 +25,6 @@ def get_datacheck_results(division: str, """Get datacheck results stored in Elasticsearch Args: - division (str): Ensembl division to filter results jsonfile_path (str): unique file name to filter the results es_host (str): elastic search host to connect es_port (int): elastic search port @@ -38,33 +36,12 @@ def get_datacheck_results(division: str, Returns: dict: status with elasticsearch response """ - - if not all([division, jsonfile_path]): - raise Exception("Param division and jsonfile_path required") - with ElasticsearchConnectionManager(es_host, es_port, es_user, es_password, es_ssl) as es: try: res = es.client.search(index=es_index, body={ "query": { - "bool": { - "must": [ - { - "match": { - "division.keyword": { - "query": division, - "operator": "and" - } - } - }, - { - "match": { - "file.keyword": { - "query": jsonfile_path, - "operator": "and" - } - } - } - ] + "term": { + "file.keyword": jsonfile_path } }, "size": 1, From f8dcbdfe7fbf932c4f9d26d31ce779e424275cbc Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Wed, 3 May 2023 17:55:22 +0100 Subject: [PATCH 2/7] Fixed remaining value in logs from ES --- src/ensembl/production/datacheck/app/main.py | 4 +--- src/ensembl/production/datacheck/utils.py | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/ensembl/production/datacheck/app/main.py b/src/ensembl/production/datacheck/app/main.py index 368d6bb..3907e3b 100644 --- a/src/ensembl/production/datacheck/app/main.py +++ b/src/ensembl/production/datacheck/app/main.py @@ -384,7 +384,6 @@ def job_result(job_id): def download_dc_outputs(job_id): try: job = get_hive().get_result_for_job_id(job_id, progress=False) - ensembl_division = f"Ensembl{DatacheckConfig.DATACHECK_TYPE.capitalize()}" if 'output' in job: if app_es_data_source: @@ -395,8 +394,7 @@ def download_dc_outputs(job_id): es_index=es_index, es_user=es_user, es_password=es_password, - es_ssl=es_ssl - ) + es_ssl=es_ssl) if not res['status']: raise Exception(res['message']) diff --git a/src/ensembl/production/datacheck/utils.py b/src/ensembl/production/datacheck/utils.py index ea70152..0ab40c7 100644 --- a/src/ensembl/production/datacheck/utils.py +++ b/src/ensembl/production/datacheck/utils.py @@ -55,8 +55,7 @@ def get_datacheck_results(jsonfile_path: str, ] }) if len(res['hits']['hits']) == 0: - raise ElasticsearchException(f""" No Hits Found for given params division {division} - and jsonfile_path {jsonfile_path} """) + raise ElasticsearchException(f"""No Hits Found for given params jsonfile_path {jsonfile_path} """) return {"status": True, "message": "", "result": res['hits']['hits'][0]['_source']['content']} From 4c716a0dafdb6bf0b0330e0c1bc7d994f50551be Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Tue, 9 May 2023 09:12:01 +0100 Subject: [PATCH 3/7] Updated config to handle error loading remote files --- src/ensembl/production/datacheck/config.py | 44 ++++++++-------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/src/ensembl/production/datacheck/config.py b/src/ensembl/production/datacheck/config.py index 3729013..cfe4fb2 100644 --- a/src/ensembl/production/datacheck/config.py +++ b/src/ensembl/production/datacheck/config.py @@ -10,18 +10,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import logging import os import pathlib -import pkg_resources -import urllib3 -import urllib -import json import requests.exceptions +import urllib from pathlib import Path from ensembl.production.core.config import load_config_yaml - from ensembl.utils.rloader import RemoteFileLoader pathlib.Path(__file__).parent.absolute() @@ -29,7 +26,7 @@ config_file_path = os.environ.get('DATACHECK_CONFIG_PATH') from flask.logging import default_handler -logger = logging.getLogger() +logger = logging.getLogger(__name__) logger.addHandler(default_handler) @@ -50,45 +47,34 @@ def get_server_names(url, flag=0): loader = RemoteFileLoader('json') return loader.r_open(url) else: - server_file_path = os.environ.get("SERVER_NAMES", EnsemblConfig.file_config.get('server_names_file', - os.path.join( - os.path.dirname(__file__), - 'server_names.dev.json'))) + server_file_path = os.environ.get("SERVER_NAMES", EnsemblConfig.file_config.get('server_names_file')) return json.load(open(server_file_path)) except Exception as e: - return {} + raise RuntimeError(f"Unable to load the list of server names {e}") class DCConfigLoader: base_uri = 'https://raw.githubusercontent.com/Ensembl/ensembl-datacheck/' uri = base_uri + 'release/{}/lib/Bio/EnsEMBL/DataCheck/index.json' @classmethod - def load_config(cls, version=None): + def load_config(cls, version): loader = RemoteFileLoader('json') - if version is None: - logger.warning(f"No version specified, fall back on main") - uri = cls.base_uri + 'main/lib/Bio/EnsEMBL/DataCheck/index.json' - else: - uri = cls.uri.format(version) try: - return loader.r_open(uri) - except requests.exceptions.HTTPError as e: - logger.fatal(f"Load versioned index.json error: {version}") - return {} + return loader.r_open(cls.uri.format(version)) + except requests.HTTPError as e: + logger.warning(f"Loading {version} index.json from main") + return loader.r_open(f'{cls.base_uri}main/lib/Bio/EnsEMBL/DataCheck/index.json') class EnsemblConfig: file_config = load_config_yaml(config_file_path) - ENS_VERSION = os.environ.get("ENS_VERSION") - EG_VERSION = os.environ.get("EG_VERSION") + ENS_VERSION = os.environ.get("ENS_VERSION", file_config.get('ens_version')) + EG_VERSION = os.environ.get("EG_VERSION", file_config.get('eg_version')) SCRIPT_NAME = os.environ.get('SCRIPT_NAME', '') - BASE_DIR = os.environ.get('BASE_DIR', - file_config.get('base_dir')) - SECRET_KEY = os.environ.get('SECRET_KEY', - file_config.get('secret_key', os.urandom(32))) - SERVER_URIS_FILE = os.environ.get('SERVER_URIS_FILE', - file_config.get('server_uris_file', 'server_uris_list.json')) + BASE_DIR = os.environ.get('BASE_DIR', file_config.get('base_dir')) + SECRET_KEY = os.environ.get('SECRET_KEY', file_config.get('secret_key', os.urandom(32))) + SERVER_URIS_FILE = os.environ.get('SERVER_URIS_FILE', file_config.get('server_uris_file', 'server_uris_list.json')) SWAGGER = { 'title': 'Ensembl Datacheck Service', 'uiversion': 3, From 04fa792c69e244ad502f32118f68c5c595997d4c Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Tue, 9 May 2023 09:26:57 +0100 Subject: [PATCH 4/7] Updated tests accordingly. --- src/ensembl/production/datacheck/config.py | 21 +++++++++-------- .../datacheck/datachecks_config.dev.yaml | 7 ------ src/tests/conftest.py | 11 ++++++--- src/tests/datachecks_config.yaml | 7 ++++++ src/tests/test_config_loader.py | 2 +- src/tests/test_dc_results_from_es.py | 23 ++++++++++++++----- 6 files changed, 44 insertions(+), 27 deletions(-) delete mode 100644 src/ensembl/production/datacheck/datachecks_config.dev.yaml create mode 100644 src/tests/datachecks_config.yaml diff --git a/src/ensembl/production/datacheck/config.py b/src/ensembl/production/datacheck/config.py index cfe4fb2..7a0c3cd 100644 --- a/src/ensembl/production/datacheck/config.py +++ b/src/ensembl/production/datacheck/config.py @@ -42,31 +42,33 @@ def get_app_version(): def get_server_names(url, flag=0): try: - if flag : - url=urllib.parse.urljoin(url, '/api/dbcopy/dcservers') + if flag: + url = urllib.parse.urljoin(url, '/api/dbcopy/dcservers') loader = RemoteFileLoader('json') return loader.r_open(url) else: - server_file_path = os.environ.get("SERVER_NAMES", EnsemblConfig.file_config.get('server_names_file')) + server_file_path = os.environ.get("SERVER_NAMES", + EnsemblConfig.file_config.get('server_names_file')) return json.load(open(server_file_path)) except Exception as e: raise RuntimeError(f"Unable to load the list of server names {e}") + class DCConfigLoader: base_uri = 'https://raw.githubusercontent.com/Ensembl/ensembl-datacheck/' - uri = base_uri + 'release/{}/lib/Bio/EnsEMBL/DataCheck/index.json' @classmethod def load_config(cls, version): loader = RemoteFileLoader('json') try: - return loader.r_open(cls.uri.format(version)) - except requests.HTTPError as e: - logger.warning(f"Loading {version} index.json from main") + return loader.r_open(f'{cls.base_uri}release/{version}/lib/Bio/EnsEMBL/DataCheck/index.json') + except requests.exceptions.HTTPError as e: + logger.warning(f"Loading /{version}/ index.json from main") return loader.r_open(f'{cls.base_uri}main/lib/Bio/EnsEMBL/DataCheck/index.json') class EnsemblConfig: + logging.debug(f"Loading config from {config_file_path}") file_config = load_config_yaml(config_file_path) ENS_VERSION = os.environ.get("ENS_VERSION", file_config.get('ens_version')) @@ -118,10 +120,9 @@ class DatacheckConfig(EnsemblConfig): ES_PORT = os.environ.get('ES_PORT', EnsemblConfig.file_config.get('es_port', '9200')) ES_SSL = os.environ.get('ES_SSL', EnsemblConfig.file_config.get('es_ssl', "f")).lower() in ['true', '1'] ES_INDEX = os.environ.get('ES_INDEX', EnsemblConfig.file_config.get('es_index', "datacheck_results")) - + GET_SERVER_NAMES = os.environ.get('GET_SERVER_NAMES', EnsemblConfig.file_config.get('get_server_names', 0)) SERVER_NAMES = get_server_names(COPY_URI_DROPDOWN, GET_SERVER_NAMES) - APP_VERSION = get_app_version() - + APP_VERSION = get_app_version() diff --git a/src/ensembl/production/datacheck/datachecks_config.dev.yaml b/src/ensembl/production/datacheck/datachecks_config.dev.yaml deleted file mode 100644 index 3124f44..0000000 --- a/src/ensembl/production/datacheck/datachecks_config.dev.yaml +++ /dev/null @@ -1,7 +0,0 @@ ---- -datacheck_output_dir: !ENV '${HOME}/datachecks' -datacheck_common_dir: !ENV '${HOME}/datachecks' - -# need to be the same as mentioned in beekeeper datacheck-beekeeper (datacheck.ini) -hive_uri: !ENV mysql://ensembl@localhost:3306/${USER}_datachecks_test -server_names_file: !ENV './server_names.json' diff --git a/src/tests/conftest.py b/src/tests/conftest.py index 469ee08..048cf96 100644 --- a/src/tests/conftest.py +++ b/src/tests/conftest.py @@ -9,11 +9,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -import urllib3 +import os import pytest -from ensembl.production.datacheck.app.main import app +import urllib3 +from pathlib import Path + +os.environ['DATACHECK_CONFIG_PATH'] = f"{Path(__file__).parent}/datachecks_config.yaml" +os.environ['SERVER_NAMES'] = f"{Path(__file__).parents[2]}/server_names.json" + from ensembl.production.core.es import ElasticsearchConnectionManager +from ensembl.production.datacheck.app.main import app dc_success_result_es_doc = { diff --git a/src/tests/datachecks_config.yaml b/src/tests/datachecks_config.yaml new file mode 100644 index 0000000..86710bd --- /dev/null +++ b/src/tests/datachecks_config.yaml @@ -0,0 +1,7 @@ +--- +datacheck_output_dir: !ENV '${HOME}/datachecks' +datacheck_common_dir: !ENV '${HOME}/datachecks' +ens_version: 110 +eg_version: 57 +hive_uri: !ENV "mysql://ensembl@localhost:3306/${USER}_datachecks_test" + diff --git a/src/tests/test_config_loader.py b/src/tests/test_config_loader.py index af3df62..dcb5304 100644 --- a/src/tests/test_config_loader.py +++ b/src/tests/test_config_loader.py @@ -29,7 +29,7 @@ def test_config_load_106(self): self.assertIn('DuplicateComparaMemberXref', config.keys()) def test_config_load_fallback(self): - config = DCConfigLoader.load_config() + config = DCConfigLoader.load_config('999') # Load main instead self.assertIn('SpeciesCommonName', config.keys()) diff --git a/src/tests/test_dc_results_from_es.py b/src/tests/test_dc_results_from_es.py index 4a501cd..a51928a 100644 --- a/src/tests/test_dc_results_from_es.py +++ b/src/tests/test_dc_results_from_es.py @@ -1,26 +1,37 @@ -import urllib3 -import pytest +# .. See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import json - def test_without_jsonfile_param(appclient): response = appclient.get('/jobs/details') data = json.loads(response.data) assert response.status_code == 404 assert data['error'] == 'Failed to retrieve the details : jsonfile needed ' + def test_get_dc_results_success(appclient, elastic_search, es_query): elastic_search(es_query) - response = appclient.get('/jobs/details?jsonfile=/homes/user/test_es_output/user_sL2nmrNTRkjE/results_by_species.json') + response = appclient.get( + '/jobs/details?jsonfile=/homes/user/test_es_output/user_sL2nmrNTRkjE/results_by_species.json') data = json.loads(response.data) assert response.status_code == 200 assert data == {} + def test_get_dc_results_failed(appclient, elastic_search, es_query): elastic_search(es_query) - response = appclient.get('/jobs/details?jsonfile=/homes/user/test_es_output/user_sL3mnrNTRrr1/results_by_species.json') + response = appclient.get( + '/jobs/details?jsonfile=/homes/user/test_es_output/user_sL3mnrNTRrr1/results_by_species.json') data = json.loads(response.data) assert response.status_code == 200 assert len(data.keys()) == 2 - From 307edc50470452b15c71db67c79b5ed983415daa Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Tue, 9 May 2023 09:30:56 +0100 Subject: [PATCH 5/7] Fixed test to: - expect error when release is set but doesn't exists - expect default from main if release is not set --- src/ensembl/production/datacheck/server_names.dev.json | 10 ---------- src/ensembl/production/datacheck/server_names.json | 10 ---------- src/tests/conftest.py | 9 ++++++--- 3 files changed, 6 insertions(+), 23 deletions(-) delete mode 100755 src/ensembl/production/datacheck/server_names.dev.json delete mode 100755 src/ensembl/production/datacheck/server_names.json diff --git a/src/ensembl/production/datacheck/server_names.dev.json b/src/ensembl/production/datacheck/server_names.dev.json deleted file mode 100755 index 57892c4..0000000 --- a/src/ensembl/production/datacheck/server_names.dev.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "mysql://ensro@mysql-ens-test-1:4508/\n": { - "server_name": "mysql-ens-test-1", - "config_profile": "compara" - }, - "mysql://ensro@mysql-ens-test-2:4509/": { - "server_name": "mysql-ens-test-2", - "config_profile": "general" - } -} diff --git a/src/ensembl/production/datacheck/server_names.json b/src/ensembl/production/datacheck/server_names.json deleted file mode 100755 index 57892c4..0000000 --- a/src/ensembl/production/datacheck/server_names.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "mysql://ensro@mysql-ens-test-1:4508/\n": { - "server_name": "mysql-ens-test-1", - "config_profile": "compara" - }, - "mysql://ensro@mysql-ens-test-2:4509/": { - "server_name": "mysql-ens-test-2", - "config_profile": "general" - } -} diff --git a/src/tests/conftest.py b/src/tests/conftest.py index 048cf96..34f8d8f 100644 --- a/src/tests/conftest.py +++ b/src/tests/conftest.py @@ -9,6 +9,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging + import os import pytest import urllib3 @@ -115,9 +117,10 @@ def wait_for(url: str, retries: int = 2, backoff: float = 0.2): @pytest.fixture(scope="session") def elastic_search(): - wait_for(f"http://localhost:9200/") + es_host = 'http://localhost:9200/' + wait_for(es_host) with ElasticsearchConnectionManager("localhost", "9200", "", "", False) as es: - print("EsInfo", es.client.info()) + logging.info("EsInfo", es.client.info()) def search(body: dict) -> None: es.client.indices.flush() es.client.indices.refresh() @@ -127,7 +130,7 @@ def search(body: dict) -> None: #set mock es data es.client.index(index="datacheck_results", body=dc_success_result_es_doc, doc_type="report") es.client.index(index="datacheck_results", body=dc_failed_result_es_doc, doc_type="report") - print("Index created") + logging.info("Test Indexes created") yield search except: raise RuntimeWarning("Unable to create indexes!") From 83043f2c045ac337e88ded5539939d10678e898f Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Tue, 9 May 2023 17:09:05 +0100 Subject: [PATCH 6/7] Qualified name where needed --- src/ensembl/production/datacheck/app/main.py | 6 +++--- src/ensembl/production/datacheck/utils.py | 14 ++++++++++++++ src/tests/test_datacheck_app.py | 3 ++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/ensembl/production/datacheck/app/main.py b/src/ensembl/production/datacheck/app/main.py index f55a479..60de3ed 100644 --- a/src/ensembl/production/datacheck/app/main.py +++ b/src/ensembl/production/datacheck/app/main.py @@ -32,7 +32,7 @@ from ensembl.production.datacheck.config import DatacheckConfig from ensembl.production.datacheck.exceptions import MissingIndexException from ensembl.production.datacheck.forms import DatacheckSubmissionForm -from ensembl.production.datacheck.utils import get_datacheck_results +from ensembl.production.datacheck.utils import get_datacheck_results, qualified_name # Go up two levels to get to root, where we will find the static and template files app_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -148,7 +148,7 @@ def servers_dict(): def databases_list(): db_uri = request.args.get('db_uri') query = request.args.get('query') - return jsonify(get_databases_list(db_uri, query)) + return jsonify(get_databases_list(qualified_name(db_uri), query)) @app.route('/names/', methods=['GET']) @@ -515,7 +515,7 @@ def set_db_type(dbname, db_uri): if m is not None: db_type = m.group() else: - db_type = get_db_type(db_uri) + db_type = get_db_type(qualified_name(db_uri)) return db_type diff --git a/src/ensembl/production/datacheck/utils.py b/src/ensembl/production/datacheck/utils.py index 4051813..01938fe 100644 --- a/src/ensembl/production/datacheck/utils.py +++ b/src/ensembl/production/datacheck/utils.py @@ -11,9 +11,23 @@ # limitations under the License. from elasticsearch import ElasticsearchException +from sqlalchemy.engine import make_url + from ensembl.production.core.es import ElasticsearchConnectionManager from ensembl.production.datacheck.config import DatacheckConfig as dcg +def qualified_name(db_uri): + import re + db_url = make_url(db_uri) + if re.search('[a-z-]?(.ebi.ac.uk|.org)', db_url.host) or db_url.host in ('localhost', 'mysql'): + return db_uri + else: + host = f'{db_url.host}.ebi.ac.uk' + if db_url.password: + return f"{db_url.drivername}://{db_url.username}:{db_url.password}@{host}:{db_url.port}/{db_url.database}" + else: + return f"{db_url.drivername}://{db_url.username}@{host}:{db_url.port}/{db_url.database}" + def get_datacheck_results(division: str, jsonfile_path: str, diff --git a/src/tests/test_datacheck_app.py b/src/tests/test_datacheck_app.py index a346ba5..2eeff59 100644 --- a/src/tests/test_datacheck_app.py +++ b/src/tests/test_datacheck_app.py @@ -11,8 +11,9 @@ # limitations under the License. import unittest -from flask import Flask, render_template, jsonify, Request, request +from flask import Flask, Request, request from werkzeug.test import EnvironBuilder + from ensembl.production.datacheck.forms import DatacheckSubmissionForm valid_payload = { From 4f41b0ddf3f847b62ccc9b4975ca132ac75ad74e Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Tue, 9 May 2023 17:45:11 +0100 Subject: [PATCH 7/7] Retrieved updates from latest es-connexion (fix search docs in ES) --- src/ensembl/production/datacheck/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ensembl/production/datacheck/utils.py b/src/ensembl/production/datacheck/utils.py index 2a302a6..e2b29b7 100644 --- a/src/ensembl/production/datacheck/utils.py +++ b/src/ensembl/production/datacheck/utils.py @@ -16,6 +16,7 @@ from ensembl.production.core.es import ElasticsearchConnectionManager from ensembl.production.datacheck.config import DatacheckConfig as dcg + def qualified_name(db_uri): import re db_url = make_url(db_uri)