Skip to content

Commit

Permalink
Merge pull request #30 from Ensembl/features/release-2.0.0
Browse files Browse the repository at this point in the history
Features/release 2.0.0
  • Loading branch information
marcoooo authored May 21, 2023
2 parents 678a535 + 4f41b0d commit 4596e2f
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 115 deletions.
20 changes: 7 additions & 13 deletions src/ensembl/production/datacheck/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from ensembl.production.datacheck.config import DatacheckConfig
from ensembl.production.datacheck.exceptions import MissingIndexException
from ensembl.production.datacheck.forms import DatacheckSubmissionForm
from ensembl.production.datacheck.utils import get_datacheck_results
from ensembl.production.datacheck.utils import get_datacheck_results, qualified_name

# Go up two levels to get to root, where we will find the static and template files
app_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
Expand Down Expand Up @@ -61,7 +61,7 @@

# set es details
es_host = app.config['ES_HOST']
es_port = str(app.config['ES_PORT'])
es_port = int(app.config['ES_PORT'])
es_index = app.config['ES_INDEX']
es_user = app.config['ES_USER']
es_password = app.config['ES_PASSWORD']
Expand Down Expand Up @@ -148,7 +148,7 @@ def servers_dict():
def databases_list():
db_uri = request.args.get('db_uri')
query = request.args.get('query')
return jsonify(get_databases_list(db_uri, query))
return jsonify(get_databases_list(qualified_name(db_uri), query))


@app.route('/names/', methods=['GET'])
Expand Down Expand Up @@ -343,9 +343,7 @@ def job_details():
if jsonfile is None:
raise Exception('jsonfile needed ')
if app_es_data_source:
ensembl_division = f"Ensembl{DatacheckConfig.DATACHECK_TYPE.capitalize()}"
res = get_datacheck_results(division=ensembl_division,
jsonfile_path=jsonfile,
res = get_datacheck_results(jsonfile_path=jsonfile,
es_host=es_host,
es_port=es_port,
es_index=es_index,
Expand Down Expand Up @@ -386,21 +384,17 @@ def job_result(job_id):
def download_dc_outputs(job_id):
try:
job = get_hive().get_result_for_job_id(job_id, progress=False)
ensembl_division = f"Ensembl{DatacheckConfig.DATACHECK_TYPE.capitalize()}"
if 'output' in job:

if app_es_data_source:
jsonfile_path = job['output']['json_output_file']
ensembl_division = f"Ensembl{DatacheckConfig.DATACHECK_TYPE.capitalize()}"
res = get_datacheck_results(division=ensembl_division,
jsonfile_path=jsonfile_path,
res = get_datacheck_results(jsonfile_path=jsonfile_path,
es_host=es_host,
es_port=es_port,
es_index=es_index,
es_user=es_user,
es_password=es_password,
es_ssl=es_ssl
)
es_ssl=es_ssl)
if not res['status']:
raise Exception(res['message'])

Expand Down Expand Up @@ -515,7 +509,7 @@ def set_db_type(dbname, db_uri):
if m is not None:
db_type = m.group()
else:
db_type = get_db_type(db_uri)
db_type = get_db_type(qualified_name(db_uri))
return db_type


Expand Down
56 changes: 22 additions & 34 deletions src/ensembl/production/datacheck/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import logging
import os
import pathlib
import pkg_resources
import urllib3
import urllib
import json
import requests.exceptions
import urllib
from pathlib import Path

from ensembl.production.core.config import load_config_yaml

from ensembl.utils.rloader import RemoteFileLoader

pathlib.Path(__file__).parent.absolute()

config_file_path = os.environ.get('DATACHECK_CONFIG_PATH')
from flask.logging import default_handler

logger = logging.getLogger()
logger = logging.getLogger(__name__)
logger.addHandler(default_handler)


Expand All @@ -45,50 +42,42 @@ def get_app_version():

def get_server_names(url, flag=0):
try:
if flag :
url=urllib.parse.urljoin(url, '/api/dbcopy/dcservers')
if flag:
url = urllib.parse.urljoin(url, '/api/dbcopy/dcservers')
loader = RemoteFileLoader('json')
return loader.r_open(url)
else:
server_file_path = os.environ.get("SERVER_NAMES", EnsemblConfig.file_config.get('server_names_file',
os.path.join(
os.path.dirname(__file__),
'server_names.dev.json')))
server_file_path = os.environ.get("SERVER_NAMES",
EnsemblConfig.file_config.get('server_names_file'))
return json.load(open(server_file_path))
except Exception as e:
return {}
raise RuntimeError(f"Unable to load the list of server names {e}")



class DCConfigLoader:
base_uri = 'https://raw.githubusercontent.com/Ensembl/ensembl-datacheck/'
uri = base_uri + 'release/{}/lib/Bio/EnsEMBL/DataCheck/index.json'

@classmethod
def load_config(cls, version=None):
def load_config(cls, version):
loader = RemoteFileLoader('json')
if version is None:
logger.warning(f"No version specified, fall back on main")
uri = cls.base_uri + 'main/lib/Bio/EnsEMBL/DataCheck/index.json'
else:
uri = cls.uri.format(version)
try:
return loader.r_open(uri)
return loader.r_open(f'{cls.base_uri}release/{version}/lib/Bio/EnsEMBL/DataCheck/index.json')
except requests.exceptions.HTTPError as e:
logger.fatal(f"Load versioned index.json error: {version}")
return {}
logger.warning(f"Loading /{version}/ index.json from main")
return loader.r_open(f'{cls.base_uri}main/lib/Bio/EnsEMBL/DataCheck/index.json')


class EnsemblConfig:
logging.debug(f"Loading config from {config_file_path}")
file_config = load_config_yaml(config_file_path)

ENS_VERSION = os.environ.get("ENS_VERSION")
EG_VERSION = os.environ.get("EG_VERSION")
ENS_VERSION = os.environ.get("ENS_VERSION", file_config.get('ens_version'))
EG_VERSION = os.environ.get("EG_VERSION", file_config.get('eg_version'))
SCRIPT_NAME = os.environ.get('SCRIPT_NAME', '')
BASE_DIR = os.environ.get('BASE_DIR',
file_config.get('base_dir'))
SECRET_KEY = os.environ.get('SECRET_KEY',
file_config.get('secret_key', os.urandom(32)))
SERVER_URIS_FILE = os.environ.get('SERVER_URIS_FILE',
file_config.get('server_uris_file', 'server_uris_list.json'))
BASE_DIR = os.environ.get('BASE_DIR', file_config.get('base_dir'))
SECRET_KEY = os.environ.get('SECRET_KEY', file_config.get('secret_key', os.urandom(32)))
SERVER_URIS_FILE = os.environ.get('SERVER_URIS_FILE', file_config.get('server_uris_file', 'server_uris_list.json'))
SWAGGER = {
'title': 'Ensembl Datacheck Service',
'uiversion': 3,
Expand Down Expand Up @@ -132,10 +121,9 @@ class DatacheckConfig(EnsemblConfig):
ES_PORT = os.environ.get('ES_PORT', EnsemblConfig.file_config.get('es_port', '9200'))
ES_SSL = os.environ.get('ES_SSL', EnsemblConfig.file_config.get('es_ssl', "f")).lower() in ['true', '1']
ES_INDEX = os.environ.get('ES_INDEX', EnsemblConfig.file_config.get('es_index', "datacheck_results"))

GET_SERVER_NAMES = os.environ.get('GET_SERVER_NAMES', EnsemblConfig.file_config.get('get_server_names', 0))

SERVER_NAMES = get_server_names(COPY_URI_DROPDOWN, GET_SERVER_NAMES)

APP_VERSION = get_app_version()

APP_VERSION = get_app_version()
7 changes: 0 additions & 7 deletions src/ensembl/production/datacheck/datachecks_config.dev.yaml

This file was deleted.

10 changes: 0 additions & 10 deletions src/ensembl/production/datacheck/server_names.dev.json

This file was deleted.

10 changes: 0 additions & 10 deletions src/ensembl/production/datacheck/server_names.json

This file was deleted.

47 changes: 19 additions & 28 deletions src/ensembl/production/datacheck/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,26 @@
# limitations under the License.

from elasticsearch import ElasticsearchException
from sqlalchemy.engine import make_url

from ensembl.production.core.es import ElasticsearchConnectionManager
from ensembl.production.datacheck.config import DatacheckConfig as dcg


def get_datacheck_results(division: str,
jsonfile_path: str,
def qualified_name(db_uri):
import re
db_url = make_url(db_uri)
if re.search('[a-z-]?(.ebi.ac.uk|.org)', db_url.host) or db_url.host in ('localhost', 'mysql'):
return db_uri
else:
host = f'{db_url.host}.ebi.ac.uk'
if db_url.password:
return f"{db_url.drivername}://{db_url.username}:{db_url.password}@{host}:{db_url.port}/{db_url.database}"
else:
return f"{db_url.drivername}://{db_url.username}@{host}:{db_url.port}/{db_url.database}"


def get_datacheck_results(jsonfile_path: str,
es_host: str = dcg.ES_HOST,
es_port: int = int(dcg.ES_PORT),
es_index: str = dcg.ES_INDEX,
Expand All @@ -26,7 +40,6 @@ def get_datacheck_results(division: str,
"""Get datacheck results stored in Elasticsearch
Args:
division (str): Ensembl division to filter results
jsonfile_path (str): unique file name to filter the results
es_host (str): elastic search host to connect
es_port (int): elastic search port
Expand All @@ -38,33 +51,12 @@ def get_datacheck_results(division: str,
Returns:
dict: status with elasticsearch response
"""

if not all([division, jsonfile_path]):
raise Exception("Param division and jsonfile_path required")

with ElasticsearchConnectionManager(es_host, es_port, es_user, es_password, es_ssl) as es:
try:
res = es.client.search(index=es_index, body={
"query": {
"bool": {
"must": [
{
"match": {
"division.keyword": {
"query": division,
"operator": "and"
}
}
},
{
"match": {
"file.keyword": {
"query": jsonfile_path,
"operator": "and"
}
}
}
]
"term": {
"file.keyword": jsonfile_path
}
},
"size": 1,
Expand All @@ -78,8 +70,7 @@ def get_datacheck_results(division: str,
]
})
if len(res['hits']['hits']) == 0:
raise ElasticsearchException(f""" No Hits Found for given params division {division}
and jsonfile_path {jsonfile_path} """)
raise ElasticsearchException(f"""No Hits Found for given params jsonfile_path {jsonfile_path} """)

return {"status": True, "message": "", "result": res['hits']['hits'][0]['_source']['content']}

Expand Down
18 changes: 13 additions & 5 deletions src/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging

import urllib3
import os
import pytest
from ensembl.production.datacheck.app.main import app
import urllib3
from pathlib import Path

os.environ['DATACHECK_CONFIG_PATH'] = f"{Path(__file__).parent}/datachecks_config.yaml"
os.environ['SERVER_NAMES'] = f"{Path(__file__).parents[2]}/server_names.json"

from ensembl.production.core.es import ElasticsearchConnectionManager
from ensembl.production.datacheck.app.main import app


dc_success_result_es_doc = {
Expand Down Expand Up @@ -110,9 +117,10 @@ def wait_for(url: str, retries: int = 2, backoff: float = 0.2):

@pytest.fixture(scope="session")
def elastic_search():
wait_for(f"http://localhost:9200/")
es_host = 'http://localhost:9200/'
wait_for(es_host)
with ElasticsearchConnectionManager("localhost", "9200", "", "", False) as es:
print("EsInfo", es.client.info())
logging.info("EsInfo", es.client.info())
def search(body: dict) -> None:
es.client.indices.flush()
es.client.indices.refresh()
Expand All @@ -122,7 +130,7 @@ def search(body: dict) -> None:
#set mock es data
es.client.index(index="datacheck_results", body=dc_success_result_es_doc, doc_type="report")
es.client.index(index="datacheck_results", body=dc_failed_result_es_doc, doc_type="report")
print("Index created")
logging.info("Test Indexes created")
yield search
except:
raise RuntimeWarning("Unable to create indexes!")
Expand Down
7 changes: 7 additions & 0 deletions src/tests/datachecks_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
datacheck_output_dir: !ENV '${HOME}/datachecks'
datacheck_common_dir: !ENV '${HOME}/datachecks'
ens_version: 110
eg_version: 57
hive_uri: !ENV "mysql://ensembl@localhost:3306/${USER}_datachecks_test"

2 changes: 1 addition & 1 deletion src/tests/test_config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_config_load_106(self):
self.assertIn('DuplicateComparaMemberXref', config.keys())

def test_config_load_fallback(self):
config = DCConfigLoader.load_config()
config = DCConfigLoader.load_config('999')
# Load main instead
self.assertIn('SpeciesCommonName', config.keys())

Expand Down
3 changes: 2 additions & 1 deletion src/tests/test_datacheck_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
# limitations under the License.

import unittest
from flask import Flask, render_template, jsonify, Request, request
from flask import Flask, Request, request
from werkzeug.test import EnvironBuilder

from ensembl.production.datacheck.forms import DatacheckSubmissionForm

valid_payload = {
Expand Down
Loading

0 comments on commit 4596e2f

Please sign in to comment.