Skip to content

Commit

Permalink
Merge pull request #192 from Clinical-Genomics/add-pubmlst-utils (patch)
Browse files Browse the repository at this point in the history
### Fixed

- Merged in intended changes
  • Loading branch information
karlnyr authored Jan 8, 2025
2 parents 1a93071 + 2d13f27 commit fa80486
Show file tree
Hide file tree
Showing 16 changed files with 1,023 additions and 750 deletions.
23 changes: 12 additions & 11 deletions configExample.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,21 @@
"project": "production",
"type": "core"
},

"regex": {
"regex": {
"mail_recipient": "username@suffix.com",
"_comment": "File finding patterns. Only single capture group accepted (for reverse/forward identifier)",
"file_pattern": "\\w{8,12}_\\w{8,10}(?:-\\d+)*_L\\d_(?:R)*(\\d{1}).fastq.gz",
"_comment": "Organisms recognized enough to be considered stable",
"verified_organisms": []
},

"_comment": "Folders",
"folders": {
"folders": {
"_comment": "Root folder for ALL output",
"results": "/tmp/MLST/results/",
"_comment": "Report collection folder",
"reports": "/tmp/MLST/reports/",
"_comment": "Log file position and name",
"log_file": "/tmp/microsalt.log",

"_comment": "Root folder for input fasta sequencing data",
"seqdata": "/tmp/projects/",
"_comment": "ST profiles. Each ST profile file under 'profiles' have an identicial folder under references",
Expand All @@ -35,18 +32,18 @@
"_comment": "Resistances. Commonly from resFinder",
"resistances": "/tmp/MLST/references/resistances",
"_comment": "Download path for NCBI genomes, for alignment usage",
"genomes": "/tmp/MLST/references/genomes"
"genomes": "/tmp/MLST/references/genomes",
"_comment": "PubMLST credentials",
"pubmlst_credentials": "/tmp/MLST/credentials"
},

"_comment": "Database/Flask configuration",
"database": {
"SQLALCHEMY_DATABASE_URI": "sqlite:////tmp/microsalt.db",
"SQLALCHEMY_TRACK_MODIFICATIONS": "False",
"DEBUG": "True"
},

"_comment": "Thresholds for Displayed results",
"threshold": {
"threshold": {
"_comment": "Typing thresholds",
"mlst_id": 100,
"mlst_novel_id": 99.5,
Expand All @@ -72,11 +69,15 @@
"bp_50x_warn": 50,
"bp_100x_warn": 20
},

"_comment": "Genologics temporary configuration file",
"genologics": {
"baseuri": "https://lims.facility.se/",
"username": "limsuser",
"password": "mypassword"
},
"_comment": "PubMLST credentials",
"pubmlst": {
"client_id": "",
"client_secret": ""
}
}
}
7 changes: 7 additions & 0 deletions microSALT/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,15 @@
app.config["folders"] = preset_config.get("folders", {})

# Ensure PubMLST configuration is included

app.config["pubmlst"] = preset_config.get("pubmlst", {
"client_id": "",
"client_secret": ""
})

app.config["pubmlst"] = preset_config.get("pubmlst", {"client_id": "", "client_secret": ""})


# Add extrapaths to config
preset_config["folders"]["expec"] = os.path.abspath(
os.path.join(pathlib.Path(__file__).parent.parent, "unique_references/ExPEC.fsa")
Expand Down
Empty file.
106 changes: 106 additions & 0 deletions microSALT/utils/pubmlst/authentication.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import json
import os
from datetime import datetime, timedelta
from dateutil import parser
from rauth import OAuth1Session
from microSALT import logger
from microSALT.utils.pubmlst.helpers import BASE_API, save_session_token, load_auth_credentials, get_path, folders_config, credentials_path_key, pubmlst_session_credentials_file_name
from microSALT.utils.pubmlst.exceptions import (
PUBMLSTError,
SessionTokenRequestError,
SessionTokenResponseError,
)

session_token_validity = 12 # 12-hour validity
session_expiration_buffer = 60 # 60-second buffer

def get_new_session_token(db: str):
"""Request a new session token using all credentials for a specific database."""
logger.debug("Fetching a new session token for database '{db}'...")

try:
consumer_key, consumer_secret, access_token, access_secret = load_auth_credentials()

url = f"{BASE_API}/db/{db}/oauth/get_session_token"

session = OAuth1Session(
consumer_key=consumer_key,
consumer_secret=consumer_secret,
access_token=access_token,
access_token_secret=access_secret,
)

response = session.get(url, headers={"User-Agent": "BIGSdb downloader"})
logger.debug("Response Status Code: {status_code}")

if response.ok:
try:
token_data = response.json()
session_token = token_data.get("oauth_token")
session_secret = token_data.get("oauth_token_secret")

if not session_token or not session_secret:
raise SessionTokenResponseError(
db, "Missing 'oauth_token' or 'oauth_token_secret' in response."
)

expiration_time = datetime.now() + timedelta(hours=session_token_validity)

save_session_token(db, session_token, session_secret, expiration_time)
return session_token, session_secret

except (ValueError, KeyError) as e:
raise SessionTokenResponseError(db, f"Invalid response format: {str(e)}")
else:
raise SessionTokenRequestError(
db, response.status_code, response.text
)

except PUBMLSTError as e:
logger.error(f"Error during token fetching: {e}")
raise
except Exception as e:
logger.error(f"Unexpected error: {e}")
raise PUBMLSTError(f"Unexpected error while fetching session token for database '{db}': {e}")

def load_session_credentials(db: str):
"""Load session token from file for a specific database."""
try:
credentials_file = os.path.join(
get_path(folders_config, credentials_path_key),
pubmlst_session_credentials_file_name
)

if not os.path.exists(credentials_file):
logger.debug("Session file does not exist. Fetching a new session token.")
return get_new_session_token(db)

with open(credentials_file, "r") as f:
try:
all_sessions = json.load(f)
except json.JSONDecodeError as e:
raise SessionTokenResponseError(db, f"Failed to parse session file: {str(e)}")

db_session_data = all_sessions.get("databases", {}).get(db)
if not db_session_data:
logger.debug(f"No session token found for database '{db}'. Fetching a new session token.")
return get_new_session_token(db)

expiration = parser.parse(db_session_data.get("expiration", ""))
if datetime.now() < expiration - timedelta(seconds=session_expiration_buffer):
logger.debug(f"Using existing session token for database '{db}'.")
session_token = db_session_data.get("token")
session_secret = db_session_data.get("secret")

return session_token, session_secret

logger.debug(f"Session token for database '{db}' has expired. Fetching a new session token.")
return get_new_session_token(db)

except PUBMLSTError as e:
logger.error(f"PUBMLST-specific error occurred: {e}")
raise
except Exception as e:
logger.error(f"Unexpected error: {e}")
raise PUBMLSTError(f"Unexpected error while loading session token for database '{db}': {e}")

116 changes: 116 additions & 0 deletions microSALT/utils/pubmlst/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import requests
from urllib.parse import urlencode
from microSALT.utils.pubmlst.helpers import (
BASE_API,
generate_oauth_header,
load_auth_credentials,
parse_pubmlst_url
)
from microSALT.utils.pubmlst.constants import RequestType, HTTPMethod, ResponseHandler
from microSALT.utils.pubmlst.exceptions import PUBMLSTError, SessionTokenRequestError
from microSALT.utils.pubmlst.authentication import load_session_credentials
from microSALT import logger

class PubMLSTClient:
"""Client for interacting with the PubMLST authenticated API."""

def __init__(self):
"""Initialize the PubMLST client."""
try:
self.consumer_key, self.consumer_secret, self.access_token, self.access_secret = load_auth_credentials()
self.database = "pubmlst_test_seqdef"
self.session_token, self.session_secret = load_session_credentials(self.database)
except PUBMLSTError as e:
logger.error(f"Failed to initialize PubMLST client: {e}")
raise


@staticmethod
def parse_pubmlst_url(url: str):
"""
Wrapper for the parse_pubmlst_url function.
"""
return parse_pubmlst_url(url)


def _make_request(self, request_type: RequestType, method: HTTPMethod, url: str, db: str = None, response_handler: ResponseHandler = ResponseHandler.JSON):
""" Handle API requests."""
try:
if db:
session_token, session_secret = load_session_credentials(db)
else:
session_token, session_secret = self.session_token, self.session_secret

if request_type == RequestType.AUTH:
headers = {
"Authorization": generate_oauth_header(url, self.consumer_key, self.consumer_secret, self.access_token, self.access_secret)
}
elif request_type == RequestType.DB:
headers = {
"Authorization": generate_oauth_header(url, self.consumer_key, self.consumer_secret, session_token, session_secret)
}
else:
raise ValueError(f"Unsupported request type: {request_type}")

if method == HTTPMethod.GET:
response = requests.get(url, headers=headers)
elif method == HTTPMethod.POST:
response = requests.post(url, headers=headers)
elif method == HTTPMethod.PUT:
response = requests.put(url, headers=headers)
else:
raise ValueError(f"Unsupported HTTP method: {method}")

response.raise_for_status()

if response_handler == ResponseHandler.CONTENT:
return response.content
elif response_handler == ResponseHandler.TEXT:
return response.text
elif response_handler == ResponseHandler.JSON:
return response.json()
else:
raise ValueError(f"Unsupported response handler: {response_handler}")

except requests.exceptions.HTTPError as e:
raise SessionTokenRequestError(db or self.database, e.response.status_code, e.response.text) from e
except requests.exceptions.RequestException as e:
logger.error(f"Request failed: {e}")
raise PUBMLSTError(f"Request failed: {e}") from e
except Exception as e:
logger.error(f"Unexpected error during request: {e}")
raise PUBMLSTError(f"An unexpected error occurred: {e}") from e


def query_databases(self):
"""Query available PubMLST databases."""
url = f"{BASE_API}/db"
return self._make_request(RequestType.DB, HTTPMethod.GET, url, response_handler=ResponseHandler.JSON)


def download_locus(self, db: str, locus: str, **kwargs):
"""Download locus sequence files."""
base_url = f"{BASE_API}/db/{db}/loci/{locus}/alleles_fasta"
query_string = urlencode(kwargs)
url = f"{base_url}?{query_string}" if query_string else base_url
return self._make_request(RequestType.DB, HTTPMethod.GET, url, db=db, response_handler=ResponseHandler.TEXT)


def download_profiles_csv(self, db: str, scheme_id: int):
"""Download MLST profiles in CSV format."""
if not scheme_id:
raise ValueError("Scheme ID is required to download profiles CSV.")
url = f"{BASE_API}/db/{db}/schemes/{scheme_id}/profiles_csv"
return self._make_request(RequestType.DB, HTTPMethod.GET, url, db=db, response_handler=ResponseHandler.TEXT)


def retrieve_scheme_info(self, db: str, scheme_id: int):
"""Retrieve information about a specific MLST scheme."""
url = f"{BASE_API}/db/{db}/schemes/{scheme_id}"
return self._make_request(RequestType.DB, HTTPMethod.GET, url, db=db, response_handler=ResponseHandler.JSON)


def list_schemes(self, db: str):
"""List available MLST schemes for a specific database."""
url = f"{BASE_API}/db/{db}/schemes"
return self._make_request(RequestType.DB, HTTPMethod.GET, url, db=db, response_handler=ResponseHandler.JSON)
79 changes: 79 additions & 0 deletions microSALT/utils/pubmlst/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from enum import Enum
from werkzeug.routing import Map, Rule

class RequestType(Enum):
AUTH = "auth"
DB = "db"

class CredentialsFile(Enum):
MAIN = "main"
SESSION = "session"

class Encoding(Enum):
UTF8 = "utf-8"

class HTTPMethod(Enum):
GET = "GET"
POST = "POST"
PUT = "PUT"
DELETE = "DELETE"
PATCH = "PATCH"
HEAD = "HEAD"
OPTIONS = "OPTIONS"

class ResponseHandler(Enum):
CONTENT = "content"
TEXT = "text"
JSON = "json"

url_map = Map([
Rule('/', endpoint='root'),
Rule('/db', endpoint='db_root'),
Rule('/db/<db>', endpoint='database_root'),
Rule('/db/<db>/classification_schemes', endpoint='classification_schemes'),
Rule('/db/<db>/classification_schemes/<int:classification_scheme_id>', endpoint='classification_scheme'),
Rule('/db/<db>/classification_schemes/<int:classification_scheme_id>/groups', endpoint='classification_scheme_groups'),
Rule('/db/<db>/classification_schemes/<int:classification_scheme_id>/groups/<int:group_id>', endpoint='classification_scheme_group'),
Rule('/db/<db>/loci', endpoint='loci'),
Rule('/db/<db>/loci/<locus>', endpoint='locus'),
Rule('/db/<db>/loci/<locus>/alleles', endpoint='locus_alleles'),
Rule('/db/<db>/loci/<locus>/alleles_fasta', endpoint='locus_alleles_fasta'),
Rule('/db/<db>/loci/<locus>/alleles/<int:allele_id>', endpoint='locus_allele'),
Rule('/db/<db>/loci/<locus>/sequence', endpoint='locus_sequence_post'),
Rule('/db/<db>/sequence', endpoint='sequence_post'),
Rule('/db/<db>/sequences', endpoint='sequences'),
Rule('/db/<db>/schemes', endpoint='schemes'),
Rule('/db/<db>/schemes/<int:scheme_id>', endpoint='scheme'),
Rule('/db/<db>/schemes/<int:scheme_id>/loci', endpoint='scheme_loci'),
Rule('/db/<db>/schemes/<int:scheme_id>/fields/<field>', endpoint='scheme_field'),
Rule('/db/<db>/schemes/<int:scheme_id>/profiles', endpoint='scheme_profiles'),
Rule('/db/<db>/schemes/<int:scheme_id>/profiles_csv', endpoint='scheme_profiles_csv'),
Rule('/db/<db>/schemes/<int:scheme_id>/profiles/<int:profile_id>', endpoint='scheme_profile'),
Rule('/db/<db>/schemes/<int:scheme_id>/sequence', endpoint='scheme_sequence_post'),
Rule('/db/<db>/schemes/<int:scheme_id>/designations', endpoint='scheme_designations_post'),
Rule('/db/<db>/isolates', endpoint='isolates'),
Rule('/db/<db>/genomes', endpoint='genomes'),
Rule('/db/<db>/isolates/search', endpoint='isolates_search_post'),
Rule('/db/<db>/isolates/<int:isolate_id>', endpoint='isolate'),
Rule('/db/<db>/isolates/<int:isolate_id>/allele_designations', endpoint='isolate_allele_designations'),
Rule('/db/<db>/isolates/<int:isolate_id>/allele_designations/<locus>', endpoint='isolate_allele_designation_locus'),
Rule('/db/<db>/isolates/<int:isolate_id>/allele_ids', endpoint='isolate_allele_ids'),
Rule('/db/<db>/isolates/<int:isolate_id>/schemes/<int:scheme_id>/allele_designations', endpoint='isolate_scheme_allele_designations'),
Rule('/db/<db>/isolates/<int:isolate_id>/schemes/<int:scheme_id>/allele_ids', endpoint='isolate_scheme_allele_ids'),
Rule('/db/<db>/isolates/<int:isolate_id>/contigs', endpoint='isolate_contigs'),
Rule('/db/<db>/isolates/<int:isolate_id>/contigs_fasta', endpoint='isolate_contigs_fasta'),
Rule('/db/<db>/isolates/<int:isolate_id>/history', endpoint='isolate_history'),
Rule('/db/<db>/contigs/<int:contig_id>', endpoint='contig'),
Rule('/db/<db>/fields', endpoint='fields'),
Rule('/db/<db>/fields/<field>', endpoint='field'),
Rule('/db/<db>/users/<int:user_id>', endpoint='user'),
Rule('/db/<db>/curators', endpoint='curators'),
Rule('/db/<db>/projects', endpoint='projects'),
Rule('/db/<db>/projects/<int:project_id>', endpoint='project'),
Rule('/db/<db>/projects/<int:project_id>/isolates', endpoint='project_isolates'),
Rule('/db/<db>/submissions', endpoint='submissions'),
Rule('/db/<db>/submissions/<int:submission_id>', endpoint='submission'),
Rule('/db/<db>/submissions/<int:submission_id>/messages', endpoint='submission_messages'),
Rule('/db/<db>/submissions/<int:submission_id>/files', endpoint='submission_files'),
Rule('/db/<db>/submissions/<int:submission_id>/files/<filename>', endpoint='submission_file'),
])
Loading

0 comments on commit fa80486

Please sign in to comment.