Skip to content

Commit

Permalink
More tweaks to new logging implementation.
Browse files Browse the repository at this point in the history
  • Loading branch information
Kevin Armengol committed May 31, 2023
1 parent 8aa68ed commit 5ea754d
Show file tree
Hide file tree
Showing 41 changed files with 339 additions and 2,213 deletions.
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
/dist/
/docs
/notebooks/.ipynb_checkpoints
/notebooks/logger.log
__pycache__/
/ddcuimap/**/logging.txt
/logging.txt
/ddcuimap/**/logger.log
/ddcuimap-workspace.code-workspace
/.vscode
3 changes: 0 additions & 3 deletions ddcuimap/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
# from get_version import get_version
# __version__ = get_version(__file__)

# import logging.config
# logging.config.fileConfig('/configs/logging/logging.yaml')
10 changes: 5 additions & 5 deletions ddcuimap/curation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import logging
from ddcuimap.utils.setup_logging import log_setup

from ddcuimap.utils.logger.config_logging import setup_log, log, copy_log

# CREATE LOGGER
log_setup()
logger = logging.getLogger("curation_logger")
logger.propagate = False
setup_log()
cur_logger = logging.getLogger("curation_logger")
# logger.propagate = False
cur_logger.info("Initiating ddcuimap.curation logger.")
10 changes: 5 additions & 5 deletions ddcuimap/curation/check_cuis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
import numpy as np

from ddcuimap.utils import helper as helper
from ddcuimap.utils.decorators import log
from ddcuimap.curation import logger
from ddcuimap.curation import cur_logger, log, copy_log
from ddcuimap.curation.utils import dictionary_functions as dd


Expand Down Expand Up @@ -95,12 +94,13 @@ def check_cuis(cfg):
df_multi_cui = df_multi_cui.add_suffix("_multi_cui")
df_check = df_check.join(df_multi_cui, how="outer")

logger.info("Done checking CUIs for " + check)
cur_logger.info("Done checking CUIs for " + check)

# Save file
# SAVE FILE AND MOVE LOG
fp_check = os.path.join(dir_check, "dictionary-import-file-check.csv")
df_check.to_csv(fp_check, index=False)
logger.info("Saved file to " + fp_check)
cur_logger.info("Saved file to " + fp_check)
copy_log(cur_logger, dir_check, "dictionary-import-file-check.log")

return df_check

Expand Down
10 changes: 6 additions & 4 deletions ddcuimap/curation/create_dictionary_import_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from pathlib import Path

from ddcuimap.utils import helper as helper
from ddcuimap.utils.decorators import log
from ddcuimap.curation import logger
from ddcuimap.curation import cur_logger, log, copy_log
from ddcuimap.curation.utils import curation_functions as cur


Expand Down Expand Up @@ -70,12 +69,15 @@ def create_dd_file(cfg):
.pipe(cur.override_cols, cfg.custom.create_dictionary_import_settings.override)
)

# SAVE FINALIZED IMPORT TEMPLATE
# SAVE FINALIZED IMPORT
fp_step2 = f"{dir_step2}/{cfg.custom.curation_settings.file_settings.file_prefix}_Step-2_dictionary-import-file.csv"
cfg.custom.create_dictionary_import_settings.dict_file_path = fp_step2
df_final.to_csv(fp_step2, index=False) # output df_final dataframe to csv
logger.info(f"Saved {fp_step2}")
cur_logger.info(f"Saved {fp_step2}")

# SAVE CONFIG AND MOVE LOG
helper.save_config(cfg, dir_step2)
copy_log(cur_logger, dir_step2, "cur_logger.log")

return df_final

Expand Down
7 changes: 3 additions & 4 deletions ddcuimap/curation/utils/curation_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
import numpy as np
import pandas as pd

from ddcuimap.curation import cur_logger, log, copy_log
from ddcuimap.utils import helper as helper
from ddcuimap.utils.decorators import log
from ddcuimap.curation import logger
from ddcuimap.curation.utils import xlsx_formatting as xlsx


Expand Down Expand Up @@ -166,7 +165,7 @@ def filter_keep_col(df):
def order_keep_col(df):
"""Orders rows in keep column by number and letter e.g., 1a, 1b, 2a, 2b, 3a, 3b"""

# TODO: need to fix issue where 1a,1b,2,2c puts 2 first.
# TODO: need to fix issue where 1a,1b,2,2c puts 2 first and also treats 2 and 2b as separate (2|2b instead of 2/2b)
df["keep"] = df["keep"].astype(str)
df["keep_num"] = [x[0] for x in df["keep"]]
df["keep_letter"] = [x[1:] if len(x) > 1 else "" for x in df["keep"]]
Expand Down Expand Up @@ -241,7 +240,7 @@ def keep_existing_cols(df_cols, cols_to_check: list):
) # TODO: check why I wrote this
cols_excl = list(set(cols_to_check).difference(df_cols))
cols = [x for x in df_cols if x not in cols_excl]
logger.warning(
cur_logger.warning(
f"The following columns were not found and will be excluded: {cols_excl}"
)
return cols
Expand Down
12 changes: 5 additions & 7 deletions ddcuimap/curation/utils/process_data_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
"""


import pandas as pd

from ddcuimap.curation import cur_logger, log, copy_log
from ddcuimap.utils import helper as helper
from ddcuimap.utils.decorators import log
from ddcuimap.curation import logger
from ddcuimap.curation.utils import text_processing as tp


Expand All @@ -20,13 +18,13 @@ def load_data_dictionary(cfg):
if not cfg.custom.data_dictionary_settings.filepath:
fp_dd = helper.choose_file("Select data dictionary csv input file")
df_dd = pd.read_csv(fp_dd)
logger.info(f"Data Dictionary shape is: {df_dd.shape}")
cur_logger.info(f"Data Dictionary shape is: {df_dd.shape}")
cfg.custom.data_dictionary_settings.filepath = fp_dd
else:
fp_dd = cfg.custom.data_dictionary_settings.filepath
logger.warning(f"Loading data dictionary from filepath in configs.")
cur_logger.warning(f"Loading data dictionary from filepath in configs.")
df_dd = pd.read_csv(fp_dd)
logger.info(f"Data Dictionary shape is: {df_dd.shape}")
cur_logger.info(f"Data Dictionary shape is: {df_dd.shape}")
return df_dd, fp_dd


Expand Down Expand Up @@ -75,5 +73,5 @@ def process_data_dictionary(df_dd, cfg):
tp.remove_stopwords_cols, cols_extracted, cfg.custom.preprocessing_settings
)
)
logger.info(f"Processed Data Dictionary shape is: {df_dd_preprocessed.shape}")
cur_logger.info(f"Processed Data Dictionary shape is: {df_dd_preprocessed.shape}")
return df_dd_preprocessed
13 changes: 6 additions & 7 deletions ddcuimap/curation/utils/text_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
# import cchardet # TODO: may be useful in future

from ddcuimap.utils import helper
from ddcuimap.utils.decorators import log
from ddcuimap.curation import logger
from ddcuimap.curation import cur_logger, log, copy_log


# TEXT PROCESSING FUNCTIONS
Expand Down Expand Up @@ -74,10 +73,10 @@ def remove_stopwords_cols(df, columns, preprocessing_settings):
cols_query_terms = []
if preprocessing_settings.remove_stopwords:
if preprocessing_settings.stopwords_filepath:
logger.warning("Loading stopwords file from configs")
cur_logger.warning("Loading stopwords file from configs")
fp_stopwords = preprocessing_settings.stopwords_filepath
else:
logger.warning("Opening dialog box to choose stopwords file")
cur_logger.warning("Opening dialog box to choose stopwords file")
fp_stopwords = helper.choose_file("Select Stopwords csv file")
df_stopwords = pd.read_csv(fp_stopwords)
ls_stopwords = list(
Expand Down Expand Up @@ -106,18 +105,18 @@ def remove_vars_cheatsheet(df, preprocessing_settings): # TODO: not yet impleme

if preprocessing_settings.use_cheatsheet:
if preprocessing_settings.cheatsheet_filepath:
logger.warning("Loading cheatsheet file from configs")
cur_logger.warning("Loading cheatsheet file from configs")
fp_cheatsheet = preprocessing_settings.cheatsheet_filepath
else:
logger.warning("Opening dialog box to choose cheatsheet file")
cur_logger.warning("Opening dialog box to choose cheatsheet file")
fp_cheatsheet = helper.choose_file(title="Select Cheatsheet csv file")
df_cheatsheet = pd.read_csv(fp_cheatsheet)
curated_vars = df_cheatsheet[
"variable name"
] # TODO: need to add consistent formatting for use of a cheatsheet
df = df[~df["variable name"].isin(curated_vars)]
else:
logger.warning("Cheatsheet not used")
cur_logger.warning("Cheatsheet not used")
pass
return df

Expand Down
8 changes: 7 additions & 1 deletion ddcuimap/curation/utils/xlsx_formatting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
"""
Functions for formatting Excel curation file.
"""

from openpyxl.utils import get_column_letter

from ddcuimap.utils.decorators import log
from ddcuimap.curation import log


# EXCEL FORMATTING
Expand Down
10 changes: 4 additions & 6 deletions ddcuimap/hydra_search/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import logging
from ddcuimap.utils.setup_logging import log_setup

from ddcuimap.utils.logger.config_logging import setup_log, log, copy_log

# CREATE LOGGER
log_setup()
logger = logging.getLogger("hydra_search_logger")
logger.propagate = False
logger.info("Initiating ddcuimap.hydra_search logging.")
setup_log()
hydra_logger = logging.getLogger("hydra_search_logger")
hydra_logger.info("Initiating ddcuimap.hydra_search logger.")
11 changes: 7 additions & 4 deletions ddcuimap/hydra_search/batch_hydra_query_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
import pandas as pd
from pathlib import Path

from ddcuimap.hydra_search import hydra_logger, log, copy_log
import ddcuimap.utils.helper as helper
from ddcuimap.utils.decorators import log
from ddcuimap.hydra_search import logger
import ddcuimap.curation.utils.process_data_dictionary as proc_dd
import ddcuimap.curation.utils.curation_functions as cur
import ddcuimap.umls.batch_query_pipeline as umls
Expand Down Expand Up @@ -102,8 +101,12 @@ def run_hydra_batch(cfg_hydra, **kwargs):
df_final = cur.create_curation_file(
dir_step1, df_dd, df_dd_preprocessed, df_curation, df_results, cfg_hydra
)
helper.save_config(cfg_hydra, dir_step1)
logger.info("FINISHED batch hydra search query pipeline!!!")

hydra_logger.info("FINISHED batch hydra search query pipeline!!!")

# SAVE CONFIG FILE AND MOVE LOG
helper.save_config(cfg_hydra, dir_step1, "config_query.yaml")
copy_log(hydra_logger, dir_step1, "hydra_logger.log")

return df_final, cfg_hydra

Expand Down
10 changes: 4 additions & 6 deletions ddcuimap/metamap/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import logging
from ddcuimap.utils.setup_logging import log_setup

from ddcuimap.utils.logger.config_logging import setup_log, log, copy_log

# CREATE LOGGER
log_setup()
logger = logging.getLogger("metamap_logger")
logger.propagate = False
logger.info("Initiating ddcuimap.metamap logging.")
setup_log()
mm_logger = logging.getLogger("metamap_logger")
mm_logger.info("Initiating ddcuimap.metamap logger.")
15 changes: 9 additions & 6 deletions ddcuimap/metamap/batch_query_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
import sys
from pathlib import Path

from ddcuimap.metamap import mm_logger, log, copy_log
import ddcuimap.utils.helper as helper
from ddcuimap.utils.decorators import log
from ddcuimap.metamap import logger
import ddcuimap.curation.utils.process_data_dictionary as proc_dd

# MetaMap API
Expand Down Expand Up @@ -71,16 +70,20 @@ def run_mm_batch(cfg, **kwargs):
df_results = mm_qproc.process_mm_json_to_df(mm_json, cfg)
df_results = mm_qproc.rename_mm_columns(df_results, cfg)
else:
logger.warning(response.text)
logger.error("MetaMap batch query pipeline failed!!!")
mm_logger.warning(response.text)
mm_logger.error("MetaMap batch query pipeline failed!!!")
sys.exit()

# CREATE CURATION FILE
df_final = cur.create_curation_file(
dir_step1, df_dd, df_dd_preprocessed, df_curation, df_results, cfg
)
helper.save_config(cfg, dir_step1)
logger.info("FINISHED MetaMap batch query pipeline!!!")

mm_logger.info("FINISHED MetaMap batch query pipeline!!!")

# SAVE CONFIG FILE AND MOVE LOG
helper.save_config(cfg, dir_step1, "config_query.yaml")
# copy_log(mm_logger, dir_step1, "mm_logger.log")

return df_final, cfg

Expand Down
14 changes: 7 additions & 7 deletions ddcuimap/metamap/skr_web_api/casauth.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import requests
from requests_html import HTML

from ddcuimap.metamap import logger
from ddcuimap.metamap import mm_logger


def get_service_ticket(serverurl, ticket_granting_ticket, serviceurl):
Expand Down Expand Up @@ -35,12 +35,12 @@ def get_service_ticket(serverurl, ticket_granting_ticket, serviceurl):

def extract_tgt_ticket(htmlcontent):
"Extract ticket granting ticket from HTML."
# logger.info('htmlcontent: {}'.format(htmlcontent))
# mm_logger.info('htmlcontent: {}'.format(htmlcontent))
html = HTML(html=htmlcontent)
# get form element
elements = html.xpath("//form")
# logger.info('html response: {}'.format(etree.tostring(html.lxml).decode()))
# logger.info('action attribure: {}'.format(elements[0].attrs['action']))
# mm_logger.info('html response: {}'.format(etree.tostring(html.lxml).decode()))
# mm_logger.info('action attribure: {}'.format(elements[0].attrs['action']))
# extract ticket granting ticket out of 'action' attribute
if elements != []:
return elements[0].attrs["action"].split("/")[-1]
Expand Down Expand Up @@ -90,11 +90,11 @@ def get_ticket(cas_serverurl, apikey, serviceurl):
"""
if cas_serverurl is None:
logger.warning("cas server url must not be None")
mm_logger.warning("cas server url must not be None")
if apikey is None:
logger.warning("api key must not be null")
mm_logger.warning("api key must not be null")
if serviceurl is None:
logger.warning("service must not be null")
mm_logger.warning("service must not be null")
# set ticket granting ticket server url
tgtserverurl = cas_serverurl + "/api-key"
# set service ticket server url
Expand Down
21 changes: 12 additions & 9 deletions ddcuimap/metamap/utils/api_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,38 @@

from dotenv import load_dotenv

load_dotenv()
from ddcuimap.metamap import mm_logger, log

from ddcuimap.metamap import logger
from ddcuimap.utils.decorators import log
load_dotenv()


@log(msg="Checking MetaMap UMLS API credentials in config files or .env file")
def check_credentials(cfg):
"""Checks if api credentials exist in initialized config file or alternatively in an .env file"""

if not cfg.apis.metamap.user_info.apiKey:
logger.warning("No API_KEY_UMLS found in config files. Looking in .env file.")
mm_logger.warning(
"No API_KEY_UMLS found in config files. Looking in .env file."
)
try:
apiKey = os.getenv("API_KEY_UMLS")
logger.info("Using API_KEY_UMLS found in .env file.")
mm_logger.info("Using API_KEY_UMLS found in .env file.")
cfg.apis.metamap.user_info.apiKey = apiKey
except ValueError:
logger.error(
mm_logger.error(
"No API_KEY_UMLS in .env file. Please add your UMLS API key to configs.apis.config_umls_api.yaml OR .env file."
)
exit()
if not cfg.apis.metamap.user_info.email:
logger.warning("No API_EMAIL_UMLS found in config files. Looking in .env file.")
mm_logger.warning(
"No API_EMAIL_UMLS found in config files. Looking in .env file."
)
try:
email = os.getenv("API_EMAIL_UMLS")
logger.info("Using API_EMAIL_UMLS found in .env file.")
mm_logger.info("Using API_EMAIL_UMLS found in .env file.")
cfg.apis.metamap.user_info.email = email
except ValueError:
logger.error(
mm_logger.error(
"No API_EMAIL_UMLS in .env file. Please add your UMLS API email to configs.apis.config_metamap_api.yaml OR .env file."
)
exit()
Expand Down
Loading

0 comments on commit 5ea754d

Please sign in to comment.