From 5ea754df1d2f34f55a4edf5bcffbd7a25e9a0639 Mon Sep 17 00:00:00 2001 From: Kevin Armengol Date: Wed, 31 May 2023 08:23:23 -0400 Subject: [PATCH] More tweaks to new logging implementation. --- .gitignore | 6 +- ddcuimap/__init__.py | 3 - ddcuimap/curation/__init__.py | 10 +- ddcuimap/curation/check_cuis.py | 10 +- .../curation/create_dictionary_import_file.py | 10 +- ddcuimap/curation/utils/curation_functions.py | 7 +- .../curation/utils/process_data_dictionary.py | 12 +- ddcuimap/curation/utils/text_processing.py | 13 +- ddcuimap/curation/utils/xlsx_formatting.py | 8 +- ddcuimap/hydra_search/__init__.py | 10 +- .../batch_hydra_query_pipeline.py | 11 +- ddcuimap/metamap/__init__.py | 10 +- ddcuimap/metamap/batch_query_pipeline.py | 15 +- ddcuimap/metamap/skr_web_api/casauth.py | 14 +- ddcuimap/metamap/utils/api_connection.py | 21 +- .../metamap_query_processing_functions.py | 6 +- ddcuimap/semantic_search/__init__.py | 10 +- .../batch_hybrid_query_pipeline.py | 13 +- .../select_embed_upsert_pipeline.py | 5 +- .../step1_select_umls_subset.py | 27 +- .../step2_embed_umls_subset.py | 5 +- .../step3_upsert_umls_subset.py | 13 +- .../semantic_search/utils/api_connection.py | 15 +- ddcuimap/semantic_search/utils/builders.py | 11 +- ddcuimap/semantic_search/utils/checks.py | 2 +- ddcuimap/semantic_search/utils/runners.py | 2 +- ddcuimap/umls/__init__.py | 10 +- ddcuimap/umls/batch_query_pipeline.py | 9 +- ddcuimap/umls/utils/api_connection.py | 21 +- ddcuimap/umls/utils/runner.py | 15 +- .../utils/umls_query_processing_functions.py | 2 +- ddcuimap/utils/__init__.py | 8 +- ddcuimap/utils/decorators.py | 32 - ddcuimap/utils/helper.py | 14 +- ddcuimap/utils/logger/__init__.py | 1 + ddcuimap/utils/logger/config_logging.py | 123 ++ .../logger/config_logging.yaml} | 32 +- ddcuimap/utils/setup_logging.py | 55 - notebooks/cui_batch_query_pipeline.ipynb | 1871 +---------------- .../MetaMap_Settings_StopWords_2022-10-06.csv | 88 - pyproject.toml | 2 +- 41 files changed, 339 insertions(+), 2213 deletions(-) delete mode 100644 ddcuimap/utils/decorators.py create mode 100644 ddcuimap/utils/logger/__init__.py create mode 100644 ddcuimap/utils/logger/config_logging.py rename ddcuimap/{configs/logging/logging.yaml => utils/logger/config_logging.yaml} (73%) delete mode 100644 ddcuimap/utils/setup_logging.py delete mode 100644 notebooks/example_files/MetaMap_Settings_StopWords_2022-10-06.csv diff --git a/.gitignore b/.gitignore index 34cc2d9..69a3475 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,8 @@ /dist/ /docs /notebooks/.ipynb_checkpoints +/notebooks/logger.log __pycache__/ -/ddcuimap/**/logging.txt -/logging.txt +/ddcuimap/**/logger.log +/ddcuimap-workspace.code-workspace +/.vscode diff --git a/ddcuimap/__init__.py b/ddcuimap/__init__.py index 82088e9..253cd48 100644 --- a/ddcuimap/__init__.py +++ b/ddcuimap/__init__.py @@ -1,5 +1,2 @@ # from get_version import get_version # __version__ = get_version(__file__) - -# import logging.config -# logging.config.fileConfig('/configs/logging/logging.yaml') diff --git a/ddcuimap/curation/__init__.py b/ddcuimap/curation/__init__.py index f8eaba8..c644e03 100644 --- a/ddcuimap/curation/__init__.py +++ b/ddcuimap/curation/__init__.py @@ -1,8 +1,8 @@ import logging -from ddcuimap.utils.setup_logging import log_setup - +from ddcuimap.utils.logger.config_logging import setup_log, log, copy_log # CREATE LOGGER -log_setup() -logger = logging.getLogger("curation_logger") -logger.propagate = False +setup_log() +cur_logger = logging.getLogger("curation_logger") +# logger.propagate = False +cur_logger.info("Initiating ddcuimap.curation logger.") diff --git a/ddcuimap/curation/check_cuis.py b/ddcuimap/curation/check_cuis.py index 8945788..fc51e9b 100644 --- a/ddcuimap/curation/check_cuis.py +++ b/ddcuimap/curation/check_cuis.py @@ -8,8 +8,7 @@ import numpy as np from ddcuimap.utils import helper as helper -from ddcuimap.utils.decorators import log -from ddcuimap.curation import logger +from ddcuimap.curation import cur_logger, log, copy_log from ddcuimap.curation.utils import dictionary_functions as dd @@ -95,12 +94,13 @@ def check_cuis(cfg): df_multi_cui = df_multi_cui.add_suffix("_multi_cui") df_check = df_check.join(df_multi_cui, how="outer") - logger.info("Done checking CUIs for " + check) + cur_logger.info("Done checking CUIs for " + check) - # Save file + # SAVE FILE AND MOVE LOG fp_check = os.path.join(dir_check, "dictionary-import-file-check.csv") df_check.to_csv(fp_check, index=False) - logger.info("Saved file to " + fp_check) + cur_logger.info("Saved file to " + fp_check) + copy_log(cur_logger, dir_check, "dictionary-import-file-check.log") return df_check diff --git a/ddcuimap/curation/create_dictionary_import_file.py b/ddcuimap/curation/create_dictionary_import_file.py index 84e2b96..5680d31 100644 --- a/ddcuimap/curation/create_dictionary_import_file.py +++ b/ddcuimap/curation/create_dictionary_import_file.py @@ -7,8 +7,7 @@ from pathlib import Path from ddcuimap.utils import helper as helper -from ddcuimap.utils.decorators import log -from ddcuimap.curation import logger +from ddcuimap.curation import cur_logger, log, copy_log from ddcuimap.curation.utils import curation_functions as cur @@ -70,12 +69,15 @@ def create_dd_file(cfg): .pipe(cur.override_cols, cfg.custom.create_dictionary_import_settings.override) ) - # SAVE FINALIZED IMPORT TEMPLATE + # SAVE FINALIZED IMPORT fp_step2 = f"{dir_step2}/{cfg.custom.curation_settings.file_settings.file_prefix}_Step-2_dictionary-import-file.csv" cfg.custom.create_dictionary_import_settings.dict_file_path = fp_step2 df_final.to_csv(fp_step2, index=False) # output df_final dataframe to csv - logger.info(f"Saved {fp_step2}") + cur_logger.info(f"Saved {fp_step2}") + + # SAVE CONFIG AND MOVE LOG helper.save_config(cfg, dir_step2) + copy_log(cur_logger, dir_step2, "cur_logger.log") return df_final diff --git a/ddcuimap/curation/utils/curation_functions.py b/ddcuimap/curation/utils/curation_functions.py index 1b543f7..2ea9da6 100644 --- a/ddcuimap/curation/utils/curation_functions.py +++ b/ddcuimap/curation/utils/curation_functions.py @@ -9,9 +9,8 @@ import numpy as np import pandas as pd +from ddcuimap.curation import cur_logger, log, copy_log from ddcuimap.utils import helper as helper -from ddcuimap.utils.decorators import log -from ddcuimap.curation import logger from ddcuimap.curation.utils import xlsx_formatting as xlsx @@ -166,7 +165,7 @@ def filter_keep_col(df): def order_keep_col(df): """Orders rows in keep column by number and letter e.g., 1a, 1b, 2a, 2b, 3a, 3b""" - # TODO: need to fix issue where 1a,1b,2,2c puts 2 first. + # TODO: need to fix issue where 1a,1b,2,2c puts 2 first and also treats 2 and 2b as separate (2|2b instead of 2/2b) df["keep"] = df["keep"].astype(str) df["keep_num"] = [x[0] for x in df["keep"]] df["keep_letter"] = [x[1:] if len(x) > 1 else "" for x in df["keep"]] @@ -241,7 +240,7 @@ def keep_existing_cols(df_cols, cols_to_check: list): ) # TODO: check why I wrote this cols_excl = list(set(cols_to_check).difference(df_cols)) cols = [x for x in df_cols if x not in cols_excl] - logger.warning( + cur_logger.warning( f"The following columns were not found and will be excluded: {cols_excl}" ) return cols diff --git a/ddcuimap/curation/utils/process_data_dictionary.py b/ddcuimap/curation/utils/process_data_dictionary.py index a5e72a4..0662580 100644 --- a/ddcuimap/curation/utils/process_data_dictionary.py +++ b/ddcuimap/curation/utils/process_data_dictionary.py @@ -4,12 +4,10 @@ """ - import pandas as pd +from ddcuimap.curation import cur_logger, log, copy_log from ddcuimap.utils import helper as helper -from ddcuimap.utils.decorators import log -from ddcuimap.curation import logger from ddcuimap.curation.utils import text_processing as tp @@ -20,13 +18,13 @@ def load_data_dictionary(cfg): if not cfg.custom.data_dictionary_settings.filepath: fp_dd = helper.choose_file("Select data dictionary csv input file") df_dd = pd.read_csv(fp_dd) - logger.info(f"Data Dictionary shape is: {df_dd.shape}") + cur_logger.info(f"Data Dictionary shape is: {df_dd.shape}") cfg.custom.data_dictionary_settings.filepath = fp_dd else: fp_dd = cfg.custom.data_dictionary_settings.filepath - logger.warning(f"Loading data dictionary from filepath in configs.") + cur_logger.warning(f"Loading data dictionary from filepath in configs.") df_dd = pd.read_csv(fp_dd) - logger.info(f"Data Dictionary shape is: {df_dd.shape}") + cur_logger.info(f"Data Dictionary shape is: {df_dd.shape}") return df_dd, fp_dd @@ -75,5 +73,5 @@ def process_data_dictionary(df_dd, cfg): tp.remove_stopwords_cols, cols_extracted, cfg.custom.preprocessing_settings ) ) - logger.info(f"Processed Data Dictionary shape is: {df_dd_preprocessed.shape}") + cur_logger.info(f"Processed Data Dictionary shape is: {df_dd_preprocessed.shape}") return df_dd_preprocessed diff --git a/ddcuimap/curation/utils/text_processing.py b/ddcuimap/curation/utils/text_processing.py index f604082..4eb5110 100644 --- a/ddcuimap/curation/utils/text_processing.py +++ b/ddcuimap/curation/utils/text_processing.py @@ -14,8 +14,7 @@ # import cchardet # TODO: may be useful in future from ddcuimap.utils import helper -from ddcuimap.utils.decorators import log -from ddcuimap.curation import logger +from ddcuimap.curation import cur_logger, log, copy_log # TEXT PROCESSING FUNCTIONS @@ -74,10 +73,10 @@ def remove_stopwords_cols(df, columns, preprocessing_settings): cols_query_terms = [] if preprocessing_settings.remove_stopwords: if preprocessing_settings.stopwords_filepath: - logger.warning("Loading stopwords file from configs") + cur_logger.warning("Loading stopwords file from configs") fp_stopwords = preprocessing_settings.stopwords_filepath else: - logger.warning("Opening dialog box to choose stopwords file") + cur_logger.warning("Opening dialog box to choose stopwords file") fp_stopwords = helper.choose_file("Select Stopwords csv file") df_stopwords = pd.read_csv(fp_stopwords) ls_stopwords = list( @@ -106,10 +105,10 @@ def remove_vars_cheatsheet(df, preprocessing_settings): # TODO: not yet impleme if preprocessing_settings.use_cheatsheet: if preprocessing_settings.cheatsheet_filepath: - logger.warning("Loading cheatsheet file from configs") + cur_logger.warning("Loading cheatsheet file from configs") fp_cheatsheet = preprocessing_settings.cheatsheet_filepath else: - logger.warning("Opening dialog box to choose cheatsheet file") + cur_logger.warning("Opening dialog box to choose cheatsheet file") fp_cheatsheet = helper.choose_file(title="Select Cheatsheet csv file") df_cheatsheet = pd.read_csv(fp_cheatsheet) curated_vars = df_cheatsheet[ @@ -117,7 +116,7 @@ def remove_vars_cheatsheet(df, preprocessing_settings): # TODO: not yet impleme ] # TODO: need to add consistent formatting for use of a cheatsheet df = df[~df["variable name"].isin(curated_vars)] else: - logger.warning("Cheatsheet not used") + cur_logger.warning("Cheatsheet not used") pass return df diff --git a/ddcuimap/curation/utils/xlsx_formatting.py b/ddcuimap/curation/utils/xlsx_formatting.py index 48b16f8..2c42e8c 100644 --- a/ddcuimap/curation/utils/xlsx_formatting.py +++ b/ddcuimap/curation/utils/xlsx_formatting.py @@ -1,6 +1,12 @@ +""" + +Functions for formatting Excel curation file. + +""" + from openpyxl.utils import get_column_letter -from ddcuimap.utils.decorators import log +from ddcuimap.curation import log # EXCEL FORMATTING diff --git a/ddcuimap/hydra_search/__init__.py b/ddcuimap/hydra_search/__init__.py index bbe75d3..96e663e 100644 --- a/ddcuimap/hydra_search/__init__.py +++ b/ddcuimap/hydra_search/__init__.py @@ -1,9 +1,7 @@ import logging -from ddcuimap.utils.setup_logging import log_setup - +from ddcuimap.utils.logger.config_logging import setup_log, log, copy_log # CREATE LOGGER -log_setup() -logger = logging.getLogger("hydra_search_logger") -logger.propagate = False -logger.info("Initiating ddcuimap.hydra_search logging.") +setup_log() +hydra_logger = logging.getLogger("hydra_search_logger") +hydra_logger.info("Initiating ddcuimap.hydra_search logger.") diff --git a/ddcuimap/hydra_search/batch_hydra_query_pipeline.py b/ddcuimap/hydra_search/batch_hydra_query_pipeline.py index 3efc24c..826d0cf 100644 --- a/ddcuimap/hydra_search/batch_hydra_query_pipeline.py +++ b/ddcuimap/hydra_search/batch_hydra_query_pipeline.py @@ -7,9 +7,8 @@ import pandas as pd from pathlib import Path +from ddcuimap.hydra_search import hydra_logger, log, copy_log import ddcuimap.utils.helper as helper -from ddcuimap.utils.decorators import log -from ddcuimap.hydra_search import logger import ddcuimap.curation.utils.process_data_dictionary as proc_dd import ddcuimap.curation.utils.curation_functions as cur import ddcuimap.umls.batch_query_pipeline as umls @@ -102,8 +101,12 @@ def run_hydra_batch(cfg_hydra, **kwargs): df_final = cur.create_curation_file( dir_step1, df_dd, df_dd_preprocessed, df_curation, df_results, cfg_hydra ) - helper.save_config(cfg_hydra, dir_step1) - logger.info("FINISHED batch hydra search query pipeline!!!") + + hydra_logger.info("FINISHED batch hydra search query pipeline!!!") + + # SAVE CONFIG FILE AND MOVE LOG + helper.save_config(cfg_hydra, dir_step1, "config_query.yaml") + copy_log(hydra_logger, dir_step1, "hydra_logger.log") return df_final, cfg_hydra diff --git a/ddcuimap/metamap/__init__.py b/ddcuimap/metamap/__init__.py index c180d14..17d3afd 100644 --- a/ddcuimap/metamap/__init__.py +++ b/ddcuimap/metamap/__init__.py @@ -1,9 +1,7 @@ import logging -from ddcuimap.utils.setup_logging import log_setup - +from ddcuimap.utils.logger.config_logging import setup_log, log, copy_log # CREATE LOGGER -log_setup() -logger = logging.getLogger("metamap_logger") -logger.propagate = False -logger.info("Initiating ddcuimap.metamap logging.") +setup_log() +mm_logger = logging.getLogger("metamap_logger") +mm_logger.info("Initiating ddcuimap.metamap logger.") diff --git a/ddcuimap/metamap/batch_query_pipeline.py b/ddcuimap/metamap/batch_query_pipeline.py index f5db4bb..a857602 100644 --- a/ddcuimap/metamap/batch_query_pipeline.py +++ b/ddcuimap/metamap/batch_query_pipeline.py @@ -6,9 +6,8 @@ import sys from pathlib import Path +from ddcuimap.metamap import mm_logger, log, copy_log import ddcuimap.utils.helper as helper -from ddcuimap.utils.decorators import log -from ddcuimap.metamap import logger import ddcuimap.curation.utils.process_data_dictionary as proc_dd # MetaMap API @@ -71,16 +70,20 @@ def run_mm_batch(cfg, **kwargs): df_results = mm_qproc.process_mm_json_to_df(mm_json, cfg) df_results = mm_qproc.rename_mm_columns(df_results, cfg) else: - logger.warning(response.text) - logger.error("MetaMap batch query pipeline failed!!!") + mm_logger.warning(response.text) + mm_logger.error("MetaMap batch query pipeline failed!!!") sys.exit() # CREATE CURATION FILE df_final = cur.create_curation_file( dir_step1, df_dd, df_dd_preprocessed, df_curation, df_results, cfg ) - helper.save_config(cfg, dir_step1) - logger.info("FINISHED MetaMap batch query pipeline!!!") + + mm_logger.info("FINISHED MetaMap batch query pipeline!!!") + + # SAVE CONFIG FILE AND MOVE LOG + helper.save_config(cfg, dir_step1, "config_query.yaml") + # copy_log(mm_logger, dir_step1, "mm_logger.log") return df_final, cfg diff --git a/ddcuimap/metamap/skr_web_api/casauth.py b/ddcuimap/metamap/skr_web_api/casauth.py index 742197b..efd778c 100644 --- a/ddcuimap/metamap/skr_web_api/casauth.py +++ b/ddcuimap/metamap/skr_web_api/casauth.py @@ -3,7 +3,7 @@ import requests from requests_html import HTML -from ddcuimap.metamap import logger +from ddcuimap.metamap import mm_logger def get_service_ticket(serverurl, ticket_granting_ticket, serviceurl): @@ -35,12 +35,12 @@ def get_service_ticket(serverurl, ticket_granting_ticket, serviceurl): def extract_tgt_ticket(htmlcontent): "Extract ticket granting ticket from HTML." - # logger.info('htmlcontent: {}'.format(htmlcontent)) + # mm_logger.info('htmlcontent: {}'.format(htmlcontent)) html = HTML(html=htmlcontent) # get form element elements = html.xpath("//form") - # logger.info('html response: {}'.format(etree.tostring(html.lxml).decode())) - # logger.info('action attribure: {}'.format(elements[0].attrs['action'])) + # mm_logger.info('html response: {}'.format(etree.tostring(html.lxml).decode())) + # mm_logger.info('action attribure: {}'.format(elements[0].attrs['action'])) # extract ticket granting ticket out of 'action' attribute if elements != []: return elements[0].attrs["action"].split("/")[-1] @@ -90,11 +90,11 @@ def get_ticket(cas_serverurl, apikey, serviceurl): """ if cas_serverurl is None: - logger.warning("cas server url must not be None") + mm_logger.warning("cas server url must not be None") if apikey is None: - logger.warning("api key must not be null") + mm_logger.warning("api key must not be null") if serviceurl is None: - logger.warning("service must not be null") + mm_logger.warning("service must not be null") # set ticket granting ticket server url tgtserverurl = cas_serverurl + "/api-key" # set service ticket server url diff --git a/ddcuimap/metamap/utils/api_connection.py b/ddcuimap/metamap/utils/api_connection.py index 532309b..fa64470 100644 --- a/ddcuimap/metamap/utils/api_connection.py +++ b/ddcuimap/metamap/utils/api_connection.py @@ -2,10 +2,9 @@ from dotenv import load_dotenv -load_dotenv() +from ddcuimap.metamap import mm_logger, log -from ddcuimap.metamap import logger -from ddcuimap.utils.decorators import log +load_dotenv() @log(msg="Checking MetaMap UMLS API credentials in config files or .env file") @@ -13,24 +12,28 @@ def check_credentials(cfg): """Checks if api credentials exist in initialized config file or alternatively in an .env file""" if not cfg.apis.metamap.user_info.apiKey: - logger.warning("No API_KEY_UMLS found in config files. Looking in .env file.") + mm_logger.warning( + "No API_KEY_UMLS found in config files. Looking in .env file." + ) try: apiKey = os.getenv("API_KEY_UMLS") - logger.info("Using API_KEY_UMLS found in .env file.") + mm_logger.info("Using API_KEY_UMLS found in .env file.") cfg.apis.metamap.user_info.apiKey = apiKey except ValueError: - logger.error( + mm_logger.error( "No API_KEY_UMLS in .env file. Please add your UMLS API key to configs.apis.config_umls_api.yaml OR .env file." ) exit() if not cfg.apis.metamap.user_info.email: - logger.warning("No API_EMAIL_UMLS found in config files. Looking in .env file.") + mm_logger.warning( + "No API_EMAIL_UMLS found in config files. Looking in .env file." + ) try: email = os.getenv("API_EMAIL_UMLS") - logger.info("Using API_EMAIL_UMLS found in .env file.") + mm_logger.info("Using API_EMAIL_UMLS found in .env file.") cfg.apis.metamap.user_info.email = email except ValueError: - logger.error( + mm_logger.error( "No API_EMAIL_UMLS in .env file. Please add your UMLS API email to configs.apis.config_metamap_api.yaml OR .env file." ) exit() diff --git a/ddcuimap/metamap/utils/metamap_query_processing_functions.py b/ddcuimap/metamap/utils/metamap_query_processing_functions.py index af0a920..1735ac2 100644 --- a/ddcuimap/metamap/utils/metamap_query_processing_functions.py +++ b/ddcuimap/metamap/utils/metamap_query_processing_functions.py @@ -10,9 +10,7 @@ import pandas as pd -from ddcuimap.utils.decorators import log -from ddcuimap.metamap import logger - +from ddcuimap.metamap import mm_logger, log from ddcuimap.metamap.skr_web_api import Submission from ddcuimap.curation.utils.text_processing import ( check_query_terms_valid, @@ -76,7 +74,7 @@ def run_batch_metamap_api(fp_mm_inputfile, cfg): inst.set_batch_file(fp_mm_inputfile) inst.form["SingLinePMID"] = "yes" inst.form["Batch_Command"] = "{} {}".format(cmd, unescape_string(cmdargs)) - logger.info("MetaMap Batch in progress...") # TODO: put in progress bar here + mm_logger.info("MetaMap Batch in progress...") # TODO: put in progress bar here response = inst.submit() # print("response status: {}".format(response.status_code)) # print("content: {}".format(response.content.decode())) diff --git a/ddcuimap/semantic_search/__init__.py b/ddcuimap/semantic_search/__init__.py index b3ad149..6660320 100644 --- a/ddcuimap/semantic_search/__init__.py +++ b/ddcuimap/semantic_search/__init__.py @@ -1,9 +1,7 @@ import logging -from ddcuimap.utils.setup_logging import log_setup - +from ddcuimap.utils.logger.config_logging import setup_log, log, copy_log # CREATE LOGGER -log_setup() -logger = logging.getLogger("semantic_search_logger") -logger.propagate = False -logger.info("Initiating ddcuimap.semantic_search logging.") +setup_log() +ss_logger = logging.getLogger("semantic_search_logger") +ss_logger.info("Initiating ddcuimap.semantic_search logger.") diff --git a/ddcuimap/semantic_search/batch_hybrid_query_pipeline.py b/ddcuimap/semantic_search/batch_hybrid_query_pipeline.py index eee0663..efe4935 100644 --- a/ddcuimap/semantic_search/batch_hybrid_query_pipeline.py +++ b/ddcuimap/semantic_search/batch_hybrid_query_pipeline.py @@ -10,9 +10,8 @@ import pandas as pd from pathlib import Path +from ddcuimap.semantic_search import ss_logger, log, copy_log import ddcuimap.utils.helper as helper -from ddcuimap.semantic_search import logger -from ddcuimap.utils.decorators import log import ddcuimap.curation.utils.process_data_dictionary as proc_dd import ddcuimap.curation.utils.curation_functions as cur @@ -39,11 +38,11 @@ def run_hybrid_ss_batch(cfg, **kwargs): # CONNECT TO PINECONE cfg = check_credentials(cfg) pinecone = connect_to_pinecone(cfg) - logger.info( + ss_logger.info( f"Pinecone indexes available: {pinecone.list_indexes()}" ) # List all indexes currently present for your key index = pinecone.Index(cfg.semantic_search.pinecone.index.index_name) - logger.info( + ss_logger.info( f"Stats for index '{cfg.semantic_search.pinecone.index.index_name}': {index.describe_index_stats()}" ) @@ -134,9 +133,11 @@ def run_hybrid_ss_batch(cfg, **kwargs): dir_step1, df_dd, df_dd_preprocessed, df_curation, df_results, cfg ) # TODO: may want to include sparse tokens and scoring in curation file - helper.save_config(cfg, dir_step1, "config_query.yaml") + ss_logger.info("FINISHED Pinecone Semantic Search batch query pipeline!!!") - logger.info("FINISHED Pinecone Semantic Search batch query pipeline!!!") + # SAVE CONFIG AND MOVE LOG FILE + helper.save_config(cfg, dir_step1, "config_query.yaml") + # copy_log(ss_logger, dir_step1, "ss_logger.log") return df_final, cfg diff --git a/ddcuimap/semantic_search/configure_umls_index/select_embed_upsert_pipeline.py b/ddcuimap/semantic_search/configure_umls_index/select_embed_upsert_pipeline.py index 3b5affe..0ef6d3f 100644 --- a/ddcuimap/semantic_search/configure_umls_index/select_embed_upsert_pipeline.py +++ b/ddcuimap/semantic_search/configure_umls_index/select_embed_upsert_pipeline.py @@ -9,8 +9,7 @@ """ import ddcuimap.utils.helper as helper -from ddcuimap.utils.decorators import log -from ddcuimap.semantic_search import logger +from ddcuimap.semantic_search import ss_logger, log from semantic_search.configure_umls_index import ( step1_select_umls_subset as step1, @@ -41,7 +40,7 @@ def main_flow(cfg): ) index, cfg_step3 = step3.upsert_umls(cfg, df_umls_embeddings) - logger.info("FINISHED UMLS SUBSET/EMBED/UPSERT batch query pipeline!!!") + ss_logger.info("FINISHED UMLS SUBSET/EMBED/UPSERT batch query pipeline!!!") return df_umls, df_umls_embeddings, index, cfg diff --git a/ddcuimap/semantic_search/configure_umls_index/step1_select_umls_subset.py b/ddcuimap/semantic_search/configure_umls_index/step1_select_umls_subset.py index 9698434..1bf13ae 100644 --- a/ddcuimap/semantic_search/configure_umls_index/step1_select_umls_subset.py +++ b/ddcuimap/semantic_search/configure_umls_index/step1_select_umls_subset.py @@ -12,8 +12,7 @@ from tqdm import tqdm import ddcuimap.utils.helper as helper -from ddcuimap.semantic_search import logger -from ddcuimap.utils.decorators import log +from ddcuimap.semantic_search import ss_logger, log from ddcuimap.curation.utils.text_processing import clean_text cfg = helper.compose_config( @@ -25,7 +24,7 @@ def subset_umls(cfg): # GET UMLS MTH FOLDER if cfg.umls_subset.mth_local.dirpath_mth: - logger.warning( + ss_logger.warning( "Using local MTH folder: {}".format(cfg.umls_subset.mth_local.dirpath_mth) ) else: @@ -36,7 +35,7 @@ def subset_umls(cfg): # GET OUTPUT FOLDER if cfg.umls_subset.settings.dirpath_output: - logger.warning( + ss_logger.warning( "Using local output folder: {}".format( cfg.umls_subset.settings.dirpath_output ) @@ -48,7 +47,7 @@ def subset_umls(cfg): dirpath_output = Path(cfg.umls_subset.settings.dirpath_output) # CREATE DATAFRAMES FOR CONSO, DEF, STY, RANK - logger.info("Creating dataframes for CONSO, DEF, STY, RANK") + ss_logger.info("Creating dataframes for CONSO, DEF, STY, RANK") df_CONSO = pd.read_csv( dirpath_mth / cfg.umls_subset.mth_local.RRF_files.concepts.filename, sep="|", @@ -82,7 +81,7 @@ def subset_umls(cfg): # TODO: keep track of removed rows and save to file # KEEP ONLY ENG LAT - logger.info(f"Applying filters for {cfg.umls_subset.filters.items()}") + ss_logger.info(f"Applying filters for {cfg.umls_subset.filters.items()}") for col, vals in cfg.umls_subset.filters.items(): df_CONSO = df_CONSO[df_CONSO[col].isin(vals)] df_CONSO = df_CONSO[cfg.umls_subset.mth_local.RRF_files.concepts.subset] @@ -91,7 +90,7 @@ def subset_umls(cfg): ) # TODO: make this a config option # REMOVE ROWS WITH NULL VALUE - logger.info("Removing rows with null values in DEF and CONSO") + ss_logger.info("Removing rows with null values in DEF and CONSO") df_DEF = df_DEF[df_DEF["DEF"].notnull()] df_CONSO = df_CONSO[df_CONSO["STR"].notnull()] df_DEF["DEF_unprocessed"] = df_DEF[ @@ -104,7 +103,7 @@ def subset_umls(cfg): ## TEXT CLEANING # REMOVE: **Definition:**, Definition:, Description:, WHAT:, **** - logger.info("Removing: **Definition:**, Definition:, Description:, WHAT:, ****") + ss_logger.info("Removing: **Definition:**, Definition:, Description:, WHAT:, ****") remove = r"(\*\*Definition:\*\*|Definition:|Description:|WHAT:|\*\*\*\*)" df_DEF["DEF"] = df_DEF["DEF"].str.replace(remove, "", regex=True) df_CONSO["STR"] = df_CONSO["STR"].str.replace(remove, "", regex=True) @@ -122,7 +121,7 @@ def subset_umls(cfg): ) # RANK CUIS BY RANK FILE - logger.info("Sorting CUIs by precedence based on rank in TTY file") + ss_logger.info("Sorting CUIs by precedence based on rank in TTY file") rank_sorter = CategoricalDtype( df_RANK["TTY"].unique(), ordered=True ) # sort by rank giving precedence to PN. some cuis don't have PN, so next in line will be chosen when duplicates dropped. @@ -131,7 +130,7 @@ def subset_umls(cfg): df_CONSO.sort_values("TTY", inplace=True) # RANK DEFINITION PRECEDENCE BY ORDER OF SAB ABUNDANCE - logger.info("Sorting definitions by precedence based on SAB abundance") + ss_logger.info("Sorting definitions by precedence based on SAB abundance") sab_order = CategoricalDtype( df_DEF["SAB"].value_counts().sort_values(ascending=False).index, ordered=True ) @@ -139,11 +138,11 @@ def subset_umls(cfg): df_DEF.sort_values("SAB", inplace=True) # MERGE SEMANTIC TYPES FOR EACH CUI - logger.info("Merging semantic types for each CUI") + ss_logger.info("Merging semantic types for each CUI") df_STY_flat = df_STY.groupby(["CUI"])["STY"].apply(";".join).reset_index() # MERGE SAB FOR EACH CUI - logger.info("Merging SAB for each CUI") + ss_logger.info("Merging SAB for each CUI") conso_sab_concat = ( df_CONSO[["CUI", "SAB"]] .groupby("CUI")["SAB"] @@ -158,12 +157,12 @@ def subset_umls(cfg): ) # DROP DUPLICATES - logger.info("Dropping duplicates") + ss_logger.info("Dropping duplicates") df_CONSO.drop_duplicates(subset="CUI", inplace=True, keep="first") df_DEF.drop_duplicates(subset="CUI", inplace=True, keep="first") # MERGE DATAFRAMES - logger.info("Merging dataframes") + ss_logger.info("Merging dataframes") df_CONSO = pd.merge( df_CONSO, conso_sab_concat, diff --git a/ddcuimap/semantic_search/configure_umls_index/step2_embed_umls_subset.py b/ddcuimap/semantic_search/configure_umls_index/step2_embed_umls_subset.py index ad5b9ea..de0e5cf 100644 --- a/ddcuimap/semantic_search/configure_umls_index/step2_embed_umls_subset.py +++ b/ddcuimap/semantic_search/configure_umls_index/step2_embed_umls_subset.py @@ -8,8 +8,7 @@ from pathlib import Path import ddcuimap.utils.helper as helper -from ddcuimap.utils.decorators import log -from ddcuimap.semantic_search import logger +from ddcuimap.semantic_search import ss_logger, log from ddcuimap.semantic_search.utils import builders cfg = helper.compose_config( @@ -24,7 +23,7 @@ def embed_umls(cfg, **kwargs): df_umls = kwargs.get("df_umls") if df_umls is None or df_umls.empty: if cfg.upsert.filepath_raw: - logger.warning( + ss_logger.warning( f"Using UMLS concept dataframe: {cfg.upsert.filepath_raw}" ) else: diff --git a/ddcuimap/semantic_search/configure_umls_index/step3_upsert_umls_subset.py b/ddcuimap/semantic_search/configure_umls_index/step3_upsert_umls_subset.py index 5fb37ff..2576f9a 100644 --- a/ddcuimap/semantic_search/configure_umls_index/step3_upsert_umls_subset.py +++ b/ddcuimap/semantic_search/configure_umls_index/step3_upsert_umls_subset.py @@ -9,8 +9,7 @@ from pathlib import Path import ddcuimap.utils.helper as helper -from ddcuimap.semantic_search import logger -from ddcuimap.utils.decorators import log +from ddcuimap.semantic_search import ss_logger, log from ddcuimap.utils.BatchGenerator import BatchGenerator from ddcuimap.semantic_search.utils.api_connection import ( check_credentials, @@ -38,7 +37,7 @@ def upsert_umls(cfg, **kwargs): "Choose df_UMLS_embeddings.pkl file" ) df_umls_embeddings = pd.read_pickle(fp_umls_embeddings) - logger.info( + ss_logger.info( f"UMLD embeddings dataframe size is: {df_umls_embeddings.shape}" ) cfg.semantic_search.upsert.filepath_processed = fp_umls_embeddings @@ -46,7 +45,7 @@ def upsert_umls(cfg, **kwargs): # CONNECT TO PINECONE cfg = check_credentials(cfg) pinecone = connect_to_pinecone(cfg) - logger.info( + ss_logger.info( f"Pinecone indexes available: {pinecone.list_indexes()}" ) # List all indexes currently present for your key @@ -61,14 +60,14 @@ def upsert_umls(cfg, **kwargs): # INSERT UMLS VECTOR SEMANTIC_SEARCH INTO PINECONE INDEX index = pinecone.Index(index_name=cfg.semantic_search.pinecone.index.index_name) - logger.info( + ss_logger.info( f"Stats for index '{cfg.semantic_search.pinecone.index.index_name}': {index.describe_index_stats()}" ) # UPSERT EMBEDDINGS AND METADATA df_batcher = BatchGenerator(100) for col in cfg.semantic_search.upsert.embed_columns: - logger.info(f"Uploading vectors to {col} namespace..") + ss_logger.info(f"Uploading vectors to {col} namespace..") for batch_df in tqdm(df_batcher(df_umls_embeddings)): vectors = [] for i in range(len(batch_df)): @@ -93,7 +92,7 @@ def upsert_umls(cfg, **kwargs): index.upsert(vectors=vectors, namespace=col) # CHECK INDEX SIZE FOR EACH NAMESPACE - logger.info("Index size after upsert:") + ss_logger.info("Index size after upsert:") index.describe_index_stats() # SAVE CONFIG diff --git a/ddcuimap/semantic_search/utils/api_connection.py b/ddcuimap/semantic_search/utils/api_connection.py index 3244221..d3d21fe 100644 --- a/ddcuimap/semantic_search/utils/api_connection.py +++ b/ddcuimap/semantic_search/utils/api_connection.py @@ -6,8 +6,7 @@ import pinecone -from ddcuimap.utils.decorators import log -from ddcuimap.semantic_search import logger +from ddcuimap.semantic_search import ss_logger, log @log(msg="Checking Pinecone credentials in config files or .env file") @@ -15,24 +14,24 @@ def check_credentials(cfg): """Checks if api credentials exist in initialized config file or alternatively in an .env file""" if not cfg.apis.pinecone.index_info.apiKey: - logger.warning("No apiKey found in config files. Looking in .env file.") + ss_logger.warning("No apiKey found in config files. Looking in .env file.") try: apiKey = os.getenv("API_KEY_PINECONE") - logger.info("Using API_KEY_PINECONE found in .env file.") + ss_logger.info("Using API_KEY_PINECONE found in .env file.") cfg.apis.pinecone.index_info.apiKey = apiKey except ValueError: - logger.error( + ss_logger.error( "No API_KEY_PINECONE in .env file. Please add your Pinecone API key to configs.apis.config_pinecone_api.yaml OR .env file." ) exit() if not cfg.apis.pinecone.index_info.environment: - logger.warning("No environment found in config files. Looking in .env file.") + ss_logger.warning("No environment found in config files. Looking in .env file.") try: environment = os.getenv("API_ENV_PINECONE") - logger.info("Using API_KEY_PINECONE found in .env file.") + ss_logger.info("Using API_KEY_PINECONE found in .env file.") cfg.apis.pinecone.index_info.environment = environment except ValueError: - logger.error( + ss_logger.error( "No API_ENV_PINECONE in .env file. Please add your Pinecone API environment to configs.apis.config_pinecone_api.yaml OR .env file." ) exit() diff --git a/ddcuimap/semantic_search/utils/builders.py b/ddcuimap/semantic_search/utils/builders.py index 943cf8c..22dc059 100644 --- a/ddcuimap/semantic_search/utils/builders.py +++ b/ddcuimap/semantic_search/utils/builders.py @@ -9,8 +9,7 @@ from transformers import AutoTokenizer import torch -from ddcuimap.utils.decorators import log -from ddcuimap.semantic_search import logger +from ddcuimap.semantic_search import ss_logger, log from ddcuimap.semantic_search.utils.checks import ( normalize_unit_length, ) @@ -22,7 +21,7 @@ def check_set_device(cfg): device = "cuda" if torch.cuda.is_available() else "cpu" cfg.semantic_search_settings.device = device - logger.info(f"Running on {device}") + ss_logger.info(f"Running on {device}") @log(msg="Adding Vector ID to Dataframe") @@ -58,7 +57,7 @@ def tokenize_columns(df, columns, model_name): metadata = {} tokenizer = AutoTokenizer.from_pretrained(model_name, truncation=True) for col in columns: - logger.info(f"Tokenizing {col}") + ss_logger.info(f"Tokenizing {col}") batch = df[col].values.tolist() tokens = [tokenizer.tokenize(sentence.lower()) for sentence in batch] df[f"{col}_tokens"] = tokens @@ -91,7 +90,7 @@ def hybrid_builder( sparse_model.to(cfg.semantic_search_settings.device) # move to GPU if possible idx2token = {idx: token for token, idx in tokenizer.get_vocab().items()} for col in embed_columns: - logger.info(f"Embedding {col}") + ss_logger.info(f"Embedding {col}") batch = df[col].values.tolist() dense_vecs = dense_model.encode( batch, @@ -102,7 +101,7 @@ def hybrid_builder( sparse_upsert = [] sparse_idx2token = [] for i in range(0, len(batch), sparse_batch_size): - logger.info(f"Embedding {i} to {i + sparse_batch_size}") + ss_logger.info(f"Embedding {i} to {i + sparse_batch_size}") batch_splade = batch[i : i + sparse_batch_size] input_ids = tokenizer( batch_splade, return_tensors="pt", padding=True, truncation=True diff --git a/ddcuimap/semantic_search/utils/checks.py b/ddcuimap/semantic_search/utils/checks.py index 078fff2..08e1a67 100644 --- a/ddcuimap/semantic_search/utils/checks.py +++ b/ddcuimap/semantic_search/utils/checks.py @@ -7,7 +7,7 @@ import numpy as np from transformers import AutoTokenizer -from ddcuimap.utils.decorators import log +from ddcuimap.semantic_search import log # VECTOR OPERATIONS diff --git a/ddcuimap/semantic_search/utils/runners.py b/ddcuimap/semantic_search/utils/runners.py index 14eacfc..fa3e8f2 100644 --- a/ddcuimap/semantic_search/utils/runners.py +++ b/ddcuimap/semantic_search/utils/runners.py @@ -8,7 +8,7 @@ import pandas as pd from tqdm import tqdm -from ddcuimap.utils.decorators import log +from ddcuimap.semantic_search import log def fetch_id_metadata(index, cfg): diff --git a/ddcuimap/umls/__init__.py b/ddcuimap/umls/__init__.py index 3c9aec1..0e07892 100644 --- a/ddcuimap/umls/__init__.py +++ b/ddcuimap/umls/__init__.py @@ -1,9 +1,7 @@ import logging -from ddcuimap.utils.setup_logging import log_setup - +from ddcuimap.utils.logger.config_logging import setup_log, log, copy_log # CREATE LOGGER -log_setup() -logger = logging.getLogger("umls_logger") -logger.propagate = False -logger.info("Initiating ddcuimap.umls logging.") +setup_log() +umls_logger = logging.getLogger("umls_logger") +umls_logger.info("Initiating ddcuimap.umls logger.") diff --git a/ddcuimap/umls/batch_query_pipeline.py b/ddcuimap/umls/batch_query_pipeline.py index 042dc8a..bcd5895 100644 --- a/ddcuimap/umls/batch_query_pipeline.py +++ b/ddcuimap/umls/batch_query_pipeline.py @@ -7,9 +7,8 @@ import pandas as pd from pathlib import Path +from ddcuimap.umls import umls_logger, log, copy_log import ddcuimap.utils.helper as helper -from ddcuimap.utils.decorators import log -from ddcuimap.umls import logger import ddcuimap.curation.utils.process_data_dictionary as proc_dd import ddcuimap.curation.utils.curation_functions as cur @@ -70,9 +69,11 @@ def run_umls_batch(cfg, **kwargs): dir_step1, df_dd, df_dd_preprocessed, df_curation, df_results, cfg ) - helper.save_config(cfg, dir_step1) + umls_logger.info("FINISHED UMLS API batch query pipeline!!!") - logger.info("FINISHED UMLS API batch query pipeline!!!") + # SAVE CONFIG FILE AND MOVE LOG + helper.save_config(cfg, dir_step1, "config_query.yaml") + # copy_log(umls_logger, dir_step1, "umls_logger.log") return df_final, cfg diff --git a/ddcuimap/umls/utils/api_connection.py b/ddcuimap/umls/utils/api_connection.py index 215e55d..6f001df 100644 --- a/ddcuimap/umls/utils/api_connection.py +++ b/ddcuimap/umls/utils/api_connection.py @@ -3,8 +3,7 @@ import requests from dotenv import load_dotenv -from ddcuimap.utils.decorators import log -from ddcuimap.umls import logger +from ddcuimap.umls import umls_logger, log load_dotenv() @@ -14,25 +13,29 @@ def check_credentials(cfg): """Checks if api credentials exist in initialized config file or alternatively in an .env file""" if not cfg.apis.umls.user_info.apiKey: - logger.warning("No API_KEY_UMLS found in config files. Looking in .env file.") + umls_logger.warning( + "No API_KEY_UMLS found in config files. Looking in .env file." + ) try: apiKey = os.getenv("API_KEY_UMLS") - logger.warning("Using API_KEY_UMLS found in .env file.") + umls_logger.warning("Using API_KEY_UMLS found in .env file.") cfg.apis.umls.user_info.apiKey = apiKey cfg.apis.umls.query_params.apiKey = apiKey except ValueError: - logger.error( + umls_logger.error( "No API_KEY_UMLS in .env file. Please add your UMLS API key to configs.apis.config_umls_api.yaml OR .env file." ) exit() if not cfg.apis.umls.user_info.email: - logger.warning("No API_EMAIL_UMLS found in config files. Looking in .env file.") + umls_logger.warning( + "No API_EMAIL_UMLS found in config files. Looking in .env file." + ) try: email = os.getenv("API_EMAIL_UMLS") - logger.warning("Using API_EMAIL_UMLS found in .env file.") + umls_logger.warning("Using API_EMAIL_UMLS found in .env file.") cfg.apis.umls.user_info.email = email except ValueError: - logger.error( + umls_logger.error( "No API_EMAIL_UMLS in .env file. Please add your UMLS API email to configs.apis.config_umls_api.yaml OR .env file." ) exit() @@ -48,7 +51,7 @@ def connect_to_umls(cfg): response = requests.request( "POST", cfg.apis.umls.api_settings.url, headers=headers, data=payload ) - logger.info(f"Response status: {response.status_code}") + umls_logger.info(f"Response status: {response.status_code}") # TODO: write logic to handle response status codes, break if not 200 diff --git a/ddcuimap/umls/utils/runner.py b/ddcuimap/umls/utils/runner.py index 33e9553..140fb90 100644 --- a/ddcuimap/umls/utils/runner.py +++ b/ddcuimap/umls/utils/runner.py @@ -13,8 +13,7 @@ from ddcuimap.curation.utils.text_processing import check_query_terms_valid from ddcuimap.umls.utils import umls_query_processing_functions as uqproc -from ddcuimap.utils.decorators import log -from ddcuimap.umls import logger +from ddcuimap.umls import umls_logger, log @log(msg="Running UMLS Runner") @@ -30,7 +29,7 @@ def umls_runner(df_results, df_curation, cfg): """ search_ID += 1 vn = row[cfg.custom.data_dictionary_settings.variable_column] # variable name - logger.info(f"Querying search_ID [{search_ID}]: {vn}") + umls_logger.info(f"Querying search_ID [{search_ID}]: {vn}") query_terms_dict = { col: row[col] for col in df_curation.columns @@ -56,7 +55,7 @@ def umls_runner(df_results, df_curation, cfg): if ( recCount ): # if recCount is not 0, results were found with default exact search - logger.info( + umls_logger.info( f"({cnt_searchTerm}) {searchTerm}: {recCount} {searchType} matches." ) df_results_cols = uqproc.process_query_results( @@ -74,7 +73,7 @@ def umls_runner(df_results, df_curation, cfg): else: break # if search_all_cols is False, break out of loop and move to next row else: # for cases where the 'exact' search type results in an empty list - logger.warning( + umls_logger.warning( f"({cnt_searchTerm}) {searchTerm}: No exact match. Trying alternative searchType." ) temp_ls = uqproc.no_results_output( @@ -98,7 +97,7 @@ def umls_runner(df_results, df_curation, cfg): recCount ): # if recCount is not 0, results were found with approximate search cnt_searchTerm += 1 - logger.info( + umls_logger.info( f"({cnt_searchTerm}) {searchTerm}: {recCount} {searchType} matches." ) df_results_cols = uqproc.process_query_results( @@ -116,7 +115,7 @@ def umls_runner(df_results, df_curation, cfg): else: break else: # if approximate search still results in nothing, try next query_term if available - logger.warning( + umls_logger.warning( f"({cnt_searchTerm}) {searchTerm}: No alternative searchType match. Moving on to next query term option if available." ) temp_ls = uqproc.no_results_output( @@ -128,7 +127,7 @@ def umls_runner(df_results, df_curation, cfg): df_results = pd.concat([df_results, df_temp], ignore_index=True) continue else: # if query term is not valid, try next query term if available - logger.warning( + umls_logger.warning( f"({cnt_searchTerm}) {searchTerm}: Is nan or empty. Trying next query term option if available." ) results_ls = uqproc.invalid_query_term_output( diff --git a/ddcuimap/umls/utils/umls_query_processing_functions.py b/ddcuimap/umls/utils/umls_query_processing_functions.py index 94ca00e..fc50166 100644 --- a/ddcuimap/umls/utils/umls_query_processing_functions.py +++ b/ddcuimap/umls/utils/umls_query_processing_functions.py @@ -10,7 +10,7 @@ import pandas as pd import requests -from ddcuimap.utils.decorators import log +from ddcuimap.umls import log @log(msg="Checking if query term is valid") diff --git a/ddcuimap/utils/__init__.py b/ddcuimap/utils/__init__.py index 07af0b8..a5bb046 100644 --- a/ddcuimap/utils/__init__.py +++ b/ddcuimap/utils/__init__.py @@ -1,8 +1,6 @@ import logging -from ddcuimap.utils.setup_logging import log_setup - +from ddcuimap.utils.logger.config_logging import setup_log, log, copy_log # CREATE LOGGER -log_setup() -logger = logging.getLogger("helper_logger") -logger.propagate = False +setup_log() +utils_logger = logging.getLogger("helper_logger") diff --git a/ddcuimap/utils/decorators.py b/ddcuimap/utils/decorators.py deleted file mode 100644 index 0da8fee..0000000 --- a/ddcuimap/utils/decorators.py +++ /dev/null @@ -1,32 +0,0 @@ -""" - -Various decorators for the project. - -""" - -import functools -import logging - - -# LOGGING - - -def log(msg=None): - def decorator(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - # Retrieve the logger dynamically - logger = logging.getLogger(func.__module__) - # logger = logging.getLogger(func.__module__.split('.')[0]) - # Log the custom message if provided - if msg is not None: - logger.info(msg) - - # Call the original function - result = func(*args, **kwargs) - - return result - - return wrapper - - return decorator diff --git a/ddcuimap/utils/helper.py b/ddcuimap/utils/helper.py index 4d870ae..235a1e8 100644 --- a/ddcuimap/utils/helper.py +++ b/ddcuimap/utils/helper.py @@ -3,7 +3,7 @@ General helper functions for UI and configuration components of scripts. """ -import logging + import os import tkinter as tk from tkinter import filedialog @@ -13,8 +13,7 @@ from omegaconf.dictconfig import DictConfig from typing import List, Optional -from ddcuimap.utils.decorators import log -from ddcuimap.utils import logger +from ddcuimap.utils import utils_logger, log # CONFIG FILE FUNCTIONS @@ -26,6 +25,7 @@ def compose_config( overrides: Optional[List[str]] = None, ) -> DictConfig: """Load configurations from the file `config.yaml` under the `config` directory and specify overrides""" + with initialize( config_path=config_path ): # removed version_base=None when upgrading to hydra 1.1.0 @@ -69,7 +69,7 @@ def create_folder(folder_path): adjusted_folder_path = folder_path + " (" + str(counter) + ")" folder_found = os.path.isdir(adjusted_folder_path) os.mkdir(adjusted_folder_path) - logger.info(f"Folder created: {adjusted_folder_path}") + utils_logger.info(f"Folder created: {adjusted_folder_path}") elif isinstance(folder_path, Path): # using pathlib and not os check if folder_path already exists, append numbers incrementally adjusted_folder_path = folder_path @@ -82,7 +82,7 @@ def create_folder(folder_path): ) folder_found = adjusted_folder_path.exists() adjusted_folder_path.mkdir(parents=True, exist_ok=True) - logger.info(f"Folder created: {adjusted_folder_path}") + utils_logger.info(f"Folder created: {adjusted_folder_path}") else: raise TypeError("folder_path must be a string or pathlib Path object") @@ -105,7 +105,7 @@ def choose_file(prompt: str): root = manage_tk_dialogbox(tk) fp = filedialog.askopenfilename(parent=root, title=prompt) - logger.info(f"File chosen: {fp}") + utils_logger.info(f"File chosen: {fp}") return fp @@ -115,5 +115,5 @@ def choose_dir(prompt: str): root = manage_tk_dialogbox(tk) dp = filedialog.askdirectory(parent=root, title=prompt) - logger.info(f"Directory chosen: {dp}") + utils_logger.info(f"Directory chosen: {dp}") return dp diff --git a/ddcuimap/utils/logger/__init__.py b/ddcuimap/utils/logger/__init__.py new file mode 100644 index 0000000..ade1461 --- /dev/null +++ b/ddcuimap/utils/logger/__init__.py @@ -0,0 +1 @@ +import logging diff --git a/ddcuimap/utils/logger/config_logging.py b/ddcuimap/utils/logger/config_logging.py new file mode 100644 index 0000000..c972780 --- /dev/null +++ b/ddcuimap/utils/logger/config_logging.py @@ -0,0 +1,123 @@ +""" + +Logging configuration for the ddcuimap package. + +""" + + +import functools +import logging +import logging.config +import coloredlogs +import os +import yaml +from pathlib import Path +import shutil + + +# DEFAULTS +DEFAULT_LEVEL = logging.INFO + +# LOG DECORATOR + + +def log(msg=None): + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + # Retrieve the logger dynamically + logger = logging.getLogger(func.__module__) + # logger = logger.getLogger(func.__module__.split('.')[0]) + # Log the custom message if provided + if msg is not None: + logger.info(msg) + + # Call the original function + result = func(*args, **kwargs) + + return result + + return wrapper + + return decorator + + +# MODULE LOGGING CONFIGURATION +fp_cfg_logging = Path(__file__).parent / "config_logging.yaml" + + +def setup_log(fp_cfg_logging=fp_cfg_logging): + if fp_cfg_logging.is_file(): + with open(fp_cfg_logging, "rt") as cfg_logging: + try: + cfg_log = yaml.safe_load(cfg_logging.read()) + logging.config.dictConfig(cfg_log) + coloredlogs.install( + fmt=cfg_log["formatters"]["coloredlogs"]["format"], + level_styles=cfg_log["formatters"]["coloredlogs"]["level_styles"], + field_styles=cfg_log["formatters"]["coloredlogs"]["field_styles"], + ) + except Exception as e: + print("Error with file, using Default logger") + logging.basicConfig(level=DEFAULT_LEVEL) + coloredlogs.install( + level=DEFAULT_LEVEL, + fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + level_styles=cfg_log["formatters"]["coloredlogs"]["level_styles"], + field_styles=cfg_log["formatters"]["coloredlogs"]["field_styles"], + ) + else: + logging.basicConfig(level=DEFAULT_LEVEL) + coloredlogs.install( + level=DEFAULT_LEVEL, + fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + level_styles=dict( + debug=dict(color="white"), + info=dict(color="blue"), + warning=dict(color="yellow", bright=True), + error=dict(color="red", bold=True, bright=True), + critical=dict(color="black", bold=True, background="red"), + ), + field_styles=dict( + name=dict(color="white"), + asctime=dict(color="white"), + funcName=dict(color="white"), + lineno=dict(color="white"), + ), + ) + print("Config file not found, using Default logger") + + +@log("Moving log file to new directory") +def move_log(logger, new_dir): + """Moves log file to new directory""" + + fp_log_orig = Path(logger.handlers[0].baseFilename) + fp_log_new = Path(new_dir / Path(logger.handlers[0].baseFilename).name) + logger.handlers[0].close() + shutil.move(fp_log_orig, fp_log_new) + + +@log("Copying log file to new directory") +def copy_log(logger, new_dir, new_filename=None): + """Moves log file to new directory""" + + fp_log_orig = Path(logger.handlers[0].baseFilename) + if new_filename: + fp_log_new = Path(new_dir) / Path(new_filename) + else: + fp_log_new = Path(new_dir) / Path(logger.handlers[0].baseFilename).name + logger.handlers[0].close() + shutil.copy(fp_log_orig, fp_log_new) + + +# def log_to_dir(logger, dir_log, name_log): +# """Adds a FileHandler for a user-defined directory to output script run log.""" +# fp_log = Path(dir_log / f"{name_log}") +# logger.handlers[0].baseFilename = fp_log + + +if __name__ == "__main__": + setup_log() + logger = logging.getLogger(__name__) + logger.info("Setting up logger.") diff --git a/ddcuimap/configs/logging/logging.yaml b/ddcuimap/utils/logger/config_logging.yaml similarity index 73% rename from ddcuimap/configs/logging/logging.yaml rename to ddcuimap/utils/logger/config_logging.yaml index 75d2674..3816c66 100644 --- a/ddcuimap/configs/logging/logging.yaml +++ b/ddcuimap/utils/logger/config_logging.yaml @@ -2,10 +2,10 @@ version: 1 disable_existing_loggers: True formatters: standard: - format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s' coloredlogs: '()': 'coloredlogs.ColoredFormatter' - format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s' level_styles: debug: color: white @@ -35,38 +35,40 @@ formatters: handlers: file: class: logging.FileHandler - filename: logging.txt - formatter: coloredlogs + level: INFO + filename: logger.log + formatter: standard + mode: w console: class: logging.StreamHandler level: INFO formatter: coloredlogs loggers: + hydra_search_logger: + level: INFO + handlers: [file, console] + propagate: False umls_logger: level: INFO handlers: [file, console] propagate: False metamap_logger: level: INFO - handlers: [ file, console ] + handlers: [file, console] propagate: False semantic_search_logger: level: INFO - handlers: [ file, console ] - propagate: False - hydra_search_logger: - level: INFO - handlers: [ file, console ] + handlers: [file, console] propagate: False helper_logger: level: INFO - handlers: [ file, console ] + handlers: [file, console] propagate: False curation_logger: level: INFO - handlers: [ file, console ] + handlers: [file, console] propagate: False root: - level: INFO - handlers: [file, console] - propagate: True + level: INFO + handlers: [file, console] + propagate: True diff --git a/ddcuimap/utils/setup_logging.py b/ddcuimap/utils/setup_logging.py deleted file mode 100644 index f70e919..0000000 --- a/ddcuimap/utils/setup_logging.py +++ /dev/null @@ -1,55 +0,0 @@ -import logging -import logging.config -import coloredlogs -import os -import yaml - -DEFAULT_LEVEL = logging.INFO - - -def log_setup(fp_cfg_logging="../configs/logging/logging.yaml"): - if os.path.exists(fp_cfg_logging): - with open(fp_cfg_logging, "rt") as cfg_logging: - try: - cfg_log = yaml.safe_load(cfg_logging.read()) - logging.config.dictConfig(cfg_log) - coloredlogs.install( - fmt=cfg_log["formatters"]["coloredlogs"]["format"], - level_styles=cfg_log["formatters"]["coloredlogs"]["level_styles"], - field_styles=cfg_log["formatters"]["coloredlogs"]["field_styles"], - ) - except Exception as e: - print("Error with file, using Default logging") - logging.basicConfig(level=DEFAULT_LEVEL) - coloredlogs.install( - level=DEFAULT_LEVEL, - fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - level_styles=cfg_log["formatters"]["coloredlogs"]["level_styles"], - field_styles=cfg_log["formatters"]["coloredlogs"]["field_styles"], - ) - else: - logging.basicConfig(level=DEFAULT_LEVEL) - coloredlogs.install( - level=DEFAULT_LEVEL, - fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - level_styles=dict( - debug=dict(color="white"), - info=dict(color="blue"), - warning=dict(color="yellow", bright=True), - error=dict(color="red", bold=True, bright=True), - critical=dict(color="black", bold=True, background="red"), - ), - field_styles=dict( - name=dict(color="white"), - asctime=dict(color="white"), - funcName=dict(color="white"), - lineno=dict(color="white"), - ), - ) - print("Config file not found, using Default logging") - - -if __name__ == "__main__": - log_setup() - logger = logging.getLogger(__name__) - logger.info("Setting up logging.") diff --git a/notebooks/cui_batch_query_pipeline.ipynb b/notebooks/cui_batch_query_pipeline.ipynb index d376f59..a33a62f 100644 --- a/notebooks/cui_batch_query_pipeline.ipynb +++ b/notebooks/cui_batch_query_pipeline.ipynb @@ -9,23 +9,17 @@ }, { "cell_type": "markdown", + "metadata": {}, "source": [ "###### INSTALL PACKAGE" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": { "jupyter": { "outputs_hidden": false - }, - "ExecuteTime": { - "end_time": "2023-05-17T15:29:08.450780500Z", - "start_time": "2023-05-17T15:29:08.441535900Z" } }, "outputs": [], @@ -37,31 +31,22 @@ }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### STEP-1A: RUN BATCH QUERY PIPELINE\n" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "###### IMPORT PACKAGES" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 17, - "metadata": { - "ExecuteTime": { - "end_time": "2023-05-17T16:22:35.991517600Z", - "start_time": "2023-05-17T16:22:35.977749700Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "from ddcuimap.umls import batch_query_pipeline as umls_bqp\n", @@ -82,317 +67,18 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "jupyter": { "outputs_hidden": false - }, - "ExecuteTime": { - "end_time": "2023-05-17T16:22:39.495474300Z", - "start_time": "2023-05-17T16:22:38.739120500Z" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "apis:\n", - " umls:\n", - " user_info:\n", - " email: null\n", - " apiKey: null\n", - " api_settings:\n", - " url: https://utslogin.nlm.nih.gov/cas/v1/api-key\n", - " uri: https://uts-ws.nlm.nih.gov\n", - " version: current\n", - " content_endpoint: /rest/search/current\n", - " fullpath: https://uts-ws.nlm.nih.gov/rest/search/current\n", - " payload: {}\n", - " headers: {}\n", - " sabs: []\n", - " searchType1: exact\n", - " searchType2: normalizedWords\n", - " pageSize: 20\n", - " pages_max: 1\n", - " query_params:\n", - " apiKey: null\n", - " string: {}\n", - " searchType: exact\n", - " sabs: []\n", - " pageSize: 20\n", - " pageNumber: {}\n", - " metamap:\n", - " user_info:\n", - " email: null\n", - " apiKey: null\n", - " api_settings:\n", - " serverurl: https://utslogin.nlm.nih.gov/cas/v1/tickets\n", - " tgtserverurl: https://utslogin.nlm.nih.gov/cas/v1/api-key\n", - " serviceurl: https://ii.nlm.nih.gov/cgi-bin/II/UTS_Required/API_batchValidationII.pl\n", - " cmd: metamap\n", - " cmdargs:\n", - " mm_data_year: -Z 2020AB\n", - " mm_data_version: -V USAbase\n", - " strict_model: -A\n", - " show_candidates: -c\n", - " show_cuis: -I\n", - " ignore_word_order: -i\n", - " restrict to sources: -R NCI\n", - " sources: -G\n", - " composite phrases: -Q 4\n", - " term_processing: -z\n", - " word_sense_disambiguation: -y\n", - " formatted_json_output: --JSONf 2\n", - " output_settings:\n", - " columns:\n", - " - CandidateScore\n", - " - CandidateCUI\n", - " - CandidateMatched\n", - " - CandidatePreferred\n", - " - MatchedWords\n", - " - SemTypes\n", - " - MatchMaps\n", - " - IsHead\n", - " - IsOverMatch\n", - " - Sources\n", - " - ConceptPIs\n", - " - Status\n", - " - Negated\n", - " pinecone:\n", - " index_info:\n", - " apiKey: null\n", - " environment: null\n", - "custom:\n", - " settings:\n", - " custom_config: hydra\n", - " pipeline_name: null\n", - " data_dictionary_settings:\n", - " filepath: null\n", - " variable_column: variable name\n", - " query_term_columns:\n", - " - title\n", - " - definition\n", - " explode: false\n", - " column_sep: '|'\n", - " search_all_query_terms: false\n", - " preprocessing_settings:\n", - " remove_stopwords: null\n", - " stopwords_filepath: null\n", - " use_cheatsheet: null\n", - " cheatsheet_filepath: null\n", - " curation_settings:\n", - " information_columns:\n", - " - variable name\n", - " - title\n", - " - definition\n", - " - permissible values\n", - " - permissible value descriptions\n", - " - preferred question text\n", - " query_columns:\n", - " - variable name\n", - " - search_ID\n", - " - query_term_used\n", - " - query_term_used_col\n", - " - searchType\n", - " result_columns:\n", - " - recCount\n", - " - data element concept names\n", - " - data element concept identifiers\n", - " - data element terminology sources\n", - " file_settings:\n", - " directory_prefix: DE\n", - " file_prefix: DE\n", - " excel:\n", - " sheet_names:\n", - " sheet1: UMLS_curation\n", - " sheet2: Data_Dictionary\n", - " sheet3: Data_Dictionary_extracted\n", - " hide_cols_curation:\n", - " - PMID\n", - " - MatchMaps\n", - " - IsHead\n", - " - IsOverMatch\n", - " - ConceptPIs\n", - " - Status\n", - " - Negated\n", - " - title_extracted_dense_vecs\n", - " - title_extracted_sparse_vecs_upsert\n", - " - title_extracted_sparse_vecs_idx2token\n", - " - definition_extracted_dense_vecs\n", - " - definition_extracted_sparse_vecs_upsert\n", - " - definition_extracted_sparse_vecs_idx2token\n", - " - title_extracted_tokens\n", - " - definition_extracted_tokens\n", - " order_cols_curation:\n", - " - variable name\n", - " - title\n", - " - definition\n", - " - permissible values\n", - " - permissible value descriptions\n", - " - preferred question text\n", - " - pipeline_name\n", - " - pipeline_name_alpha\n", - " - search_ID\n", - " - query_term_1\n", - " - query_term_stopwords_removed_1\n", - " - query_term_2\n", - " - query_term_stopwords_removed_2\n", - " - query_term_used\n", - " - query_term_used_col\n", - " - searchType\n", - " - MetaMap_input\n", - " - PMID\n", - " - CandidateScore\n", - " - CandidateMatched\n", - " - MatchedWords\n", - " - SemTypes\n", - " - MatchMaps\n", - " - IsHead\n", - " - IsOverMatch\n", - " - ConceptPIs\n", - " - Status\n", - " - Negated\n", - " - title_extracted\n", - " - definition_extracted\n", - " - recCount\n", - " - overall_rank\n", - " - data element concept names\n", - " - data element concept identifiers\n", - " - data element terminology sources\n", - " - title_extracted_dense_vecs\n", - " - title_extracted_sparse_vecs_upsert\n", - " - title_extracted_sparse_vecs_idx2token\n", - " - definition_extracted_dense_vecs\n", - " - definition_extracted_sparse_vecs_upsert\n", - " - definition_extracted_sparse_vecs_idx2token\n", - " - title_extracted_tokens\n", - " - definition_extracted_tokens\n", - " - metadata\n", - " - result_id\n", - " - semantic_type\n", - " - definition_source\n", - " - overall_count\n", - " - average_score\n", - " - title_str_rank\n", - " - title_str_score\n", - " - definition_def_rank\n", - " - definition_def_score\n", - " - keep\n", - " format_cols_curation: null\n", - " create_dictionary_import_settings:\n", - " curation_file_path: null\n", - " umls_columns:\n", - " - data element concept names\n", - " - data element concept identifiers\n", - " - data element terminology sources\n", - " join_on:\n", - " - variable name\n", - " - title_extracted\n", - " cui_sep: '|'\n", - " multi_cui_sep: /\n", - " override:\n", - " sep: '|'\n", - " columns:\n", - " - data element terminology sources\n", - " value: UMLS\n", - " dict_file_path: null\n", - " dict_file_type: csv\n", - " dictionary_columns:\n", - " - variable name\n", - " - title\n", - " - element type\n", - " - definition\n", - " - short description\n", - " - datatype\n", - " - maximum character quantity\n", - " - input restriction\n", - " - minimum value\n", - " - maximum value\n", - " - data element concept names\n", - " - data element concept identifiers\n", - " - data element terminology sources\n", - " - permissible values\n", - " - permissible value descriptions\n", - " - permissible value output codes\n", - " - permissible value concept names\n", - " - permissible value concept identifiers\n", - " - permissible value terminology sources\n", - " - unit of measure\n", - " - guidelines/instructions\n", - " - notes\n", - " - preferred question text\n", - " - keywords\n", - " - references\n", - " - historical notes\n", - " - see also\n", - " - effective date\n", - " - until date\n", - " - population.all\n", - " - domain.general (for all diseases)\n", - " - domain.traumatic brain injury\n", - " - domain.Parkinson's disease\n", - " - domain.Friedreich's ataxia\n", - " - domain.stroke\n", - " - domain.amyotrophic lateral sclerosis\n", - " - domain.Huntington's disease\n", - " - domain.multiple sclerosis\n", - " - domain.neuromuscular diseases\n", - " - domain.myasthenia gravis\n", - " - domain.spinal muscular atrophy\n", - " - domain.Duchenne muscular dystrophy/Becker muscular dystrophy\n", - " - domain.congenital muscular dystrophy\n", - " - domain.spinal cord injury\n", - " - domain.headache\n", - " - domain.epilepsy\n", - " - classification.general (for all diseases)\n", - " - classification.acute hospitalized\n", - " - classification.concussion/mild TBI\n", - " - classification.epidemiology\n", - " - 'classification.moderate/severe TBI: rehabilitation'\n", - " - classification.Parkinson's disease\n", - " - classification.Friedreich's ataxia\n", - " - classification.stroke\n", - " - classification.amyotrophic lateral sclerosis\n", - " - classification.Huntington's disease\n", - " - classification.multiple sclerosis\n", - " - classification.neuromuscular diseases\n", - " - classification.myasthenia gravis\n", - " - classification.spinal muscular atrophy\n", - " - classification.Duchenne muscular dystrophy/Becker muscular dystrophy\n", - " - classification.congenital muscular dystrophy\n", - " - classification.spinal cord injury\n", - " - classification.headache\n", - " - classification.epilepsy\n", - " - Label(s)\n", - " - submitting organization name\n", - " - submitting contact name\n", - " - submitting contact information\n", - " - steward organization name\n", - " - steward contact name\n", - " - steward contact information\n", - " check_cuis:\n", - " de:\n", - " reference_column: variable name\n", - " check_columns:\n", - " - data element concept names\n", - " - data element concept identifiers\n", - " - data element terminology sources\n", - " pvd:\n", - " reference_column: permissible value descriptions\n", - " check_columns:\n", - " - permissible value concept names\n", - " - permissible value concept identifiers\n", - " - permissible value terminology sources\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "cfg_hydra = helper.compose_config.fn(overrides=[\"custom=hydra_base\"])\n", - "# cfg_umls = helper.compose_config.fn(overrides=[\"custom=de\", \"apis=config_umls_api\"])\n", - "cfg_mm = helper.compose_config.fn(overrides=[\"custom=de\", \"apis=config_metamap_api\"])\n", - "cfg_ss = helper.compose_config.fn(\n", + "cfg_hydra = helper.compose_config(overrides=[\"custom=hydra_base\"])\n", + "# cfg_umls = helper.compose_config(overrides=[\"custom=de\", \"apis=config_umls_api\"])\n", + "cfg_mm = helper.compose_config(overrides=[\"custom=de\", \"apis=config_metamap_api\"])\n", + "cfg_ss = helper.compose_config(\n", " overrides=[\n", " \"custom=title_def\",\n", " \"semantic_search=embeddings\",\n", @@ -424,1516 +110,13 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": { "jupyter": { "outputs_hidden": false - }, - "ExecuteTime": { - "end_time": "2023-05-17T16:23:53.509772500Z", - "start_time": "2023-05-17T16:22:43.714034500Z" } }, - "outputs": [ - { - "data": { - "text/plain": "12:22:43.890 | \u001B[36mINFO\u001B[0m | prefect.engine - Created flow run\u001B[35m 'elite-skunk'\u001B[0m for flow\u001B[1;35m 'run-hydra-batch'\u001B[0m\n", - "text/html": "
12:22:43.890 | INFO    | prefect.engine - Created flow run 'elite-skunk' for flow 'run-hydra-batch'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:44.180 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Created task run 'Loading data dictionary file-0' for task 'Loading data dictionary file'\n", - "text/html": "
12:22:44.180 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Created task run 'Loading data dictionary file-0' for task 'Loading data dictionary file'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:44.182 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Executing 'Loading data dictionary file-0' immediately...\n", - "text/html": "
12:22:44.182 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Executing 'Loading data dictionary file-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.281 | \u001B[36mINFO\u001B[0m | Task run 'Loading data dictionary file-0' - File chosen: C:/Users/armengolkm/Desktop/Full Pipeline Test v1.1.0/Hydra_Search/Main_FA_FITBIR_CDE_3-DE_2-PV.csv\n", - "text/html": "
12:22:48.281 | INFO    | Task run 'Loading data dictionary file-0' - File chosen: C:/Users/armengolkm/Desktop/Full Pipeline Test v1.1.0/Hydra_Search/Main_FA_FITBIR_CDE_3-DE_2-PV.csv\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.290 | \u001B[36mINFO\u001B[0m | Task run 'Loading data dictionary file-0' - Data Dictionary shape is: (3, 80)\n", - "text/html": "
12:22:48.290 | INFO    | Task run 'Loading data dictionary file-0' - Data Dictionary shape is: (3, 80)\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.319 | \u001B[36mINFO\u001B[0m | Task run 'Loading data dictionary file-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:48.319 | INFO    | Task run 'Loading data dictionary file-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.321 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Folder created: C:\\Users\\armengolkm\\Desktop\\Full Pipeline Test v1.1.0\\Hydra_Search\\DE_Step-1_Hydra-search (8)\n", - "text/html": "
12:22:48.321 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Folder created: C:\\Users\\armengolkm\\Desktop\\Full Pipeline Test v1.1.0\\Hydra_Search\\DE_Step-1_Hydra-search (8)\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.364 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Created task run 'Creating new folder-0' for task 'Creating new folder'\n", - "text/html": "
12:22:48.364 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Created task run 'Creating new folder-0' for task 'Creating new folder'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.367 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Executing 'Creating new folder-0' immediately...\n", - "text/html": "
12:22:48.367 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Executing 'Creating new folder-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.436 | \u001B[36mINFO\u001B[0m | Task run 'Creating new folder-0' - Folder created: C:\\Users\\armengolkm\\Desktop\\Full Pipeline Test v1.1.0\\Hydra_Search\\DE_Step-1_Hydra-search (8)\\DE_Step-1_metamap-search\n", - "text/html": "
12:22:48.436 | INFO    | Task run 'Creating new folder-0' - Folder created: C:\\Users\\armengolkm\\Desktop\\Full Pipeline Test v1.1.0\\Hydra_Search\\DE_Step-1_Hydra-search (8)\\DE_Step-1_metamap-search\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.466 | \u001B[36mINFO\u001B[0m | Task run 'Creating new folder-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:48.466 | INFO    | Task run 'Creating new folder-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.583 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Created subflow run\u001B[35m 'curly-frigatebird'\u001B[0m for flow\u001B[1;35m 'run-mm-batch'\u001B[0m\n", - "text/html": "
12:22:48.583 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Created subflow run 'curly-frigatebird' for flow 'run-mm-batch'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.706 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Created task run 'Checking MetaMap UMLS API credentials in config files or .env file-0' for task 'Checking MetaMap UMLS API credentials in config files or .env file'\n", - "text/html": "
12:22:48.706 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Created task run 'Checking MetaMap UMLS API credentials in config files or .env file-0' for task 'Checking MetaMap UMLS API credentials in config files or .env file'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.707 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Executing 'Checking MetaMap UMLS API credentials in config files or .env file-0' immediately...\n", - "text/html": "
12:22:48.707 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Executing 'Checking MetaMap UMLS API credentials in config files or .env file-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.765 | \u001B[36mINFO\u001B[0m | Task run 'Checking MetaMap UMLS API credentials in config files or .env file-0' - No API_KEY_UMLS found in config files. Looking in .env file.\n", - "text/html": "
12:22:48.765 | INFO    | Task run 'Checking MetaMap UMLS API credentials in config files or .env file-0' - No API_KEY_UMLS found in config files. Looking in .env file.\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.769 | \u001B[36mINFO\u001B[0m | Task run 'Checking MetaMap UMLS API credentials in config files or .env file-0' - Using API_KEY_UMLS found in .env file.\n", - "text/html": "
12:22:48.769 | INFO    | Task run 'Checking MetaMap UMLS API credentials in config files or .env file-0' - Using API_KEY_UMLS found in .env file.\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.771 | \u001B[36mINFO\u001B[0m | Task run 'Checking MetaMap UMLS API credentials in config files or .env file-0' - No API_EMAIL_UMLS found in config files. Looking in .env file.\n", - "text/html": "
12:22:48.771 | INFO    | Task run 'Checking MetaMap UMLS API credentials in config files or .env file-0' - No API_EMAIL_UMLS found in config files. Looking in .env file.\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.773 | \u001B[36mINFO\u001B[0m | Task run 'Checking MetaMap UMLS API credentials in config files or .env file-0' - Using API_EMAIL_UMLS found in .env file.\n", - "text/html": "
12:22:48.773 | INFO    | Task run 'Checking MetaMap UMLS API credentials in config files or .env file-0' - Using API_EMAIL_UMLS found in .env file.\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.808 | \u001B[36mINFO\u001B[0m | Task run 'Checking MetaMap UMLS API credentials in config files or .env file-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:48.808 | INFO    | Task run 'Checking MetaMap UMLS API credentials in config files or .env file-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:48.930 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Created subflow run\u001B[35m 'inquisitive-binturong'\u001B[0m for flow\u001B[1;35m 'process-data-dictionary'\u001B[0m\n", - "text/html": "
12:22:48.930 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Created subflow run 'inquisitive-binturong' for flow 'process-data-dictionary'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:49.046 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Cheatsheet not used\n", - "text/html": "
12:22:49.046 | INFO    | Flow run 'Preprocessing data dictionary' - Cheatsheet not used\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:49.079 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Created task run 'Exploding values in columns to query with-0' for task 'Exploding values in columns to query with'\n", - "text/html": "
12:22:49.079 | INFO    | Flow run 'Preprocessing data dictionary' - Created task run 'Exploding values in columns to query with-0' for task 'Exploding values in columns to query with'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:49.081 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Executing 'Exploding values in columns to query with-0' immediately...\n", - "text/html": "
12:22:49.081 | INFO    | Flow run 'Preprocessing data dictionary' - Executing 'Exploding values in columns to query with-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:49.176 | \u001B[36mINFO\u001B[0m | Task run 'Exploding values in columns to query with-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:49.176 | INFO    | Task run 'Exploding values in columns to query with-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:49.207 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Created task run 'Removing punctuation-0' for task 'Removing punctuation'\n", - "text/html": "
12:22:49.207 | INFO    | Flow run 'Preprocessing data dictionary' - Created task run 'Removing punctuation-0' for task 'Removing punctuation'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:49.208 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Executing 'Removing punctuation-0' immediately...\n", - "text/html": "
12:22:49.208 | INFO    | Flow run 'Preprocessing data dictionary' - Executing 'Removing punctuation-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:49.320 | \u001B[36mINFO\u001B[0m | Task run 'Removing punctuation-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:49.320 | INFO    | Task run 'Removing punctuation-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:49.357 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Created task run 'Removing stopwords from query columns-0' for task 'Removing stopwords from query columns'\n", - "text/html": "
12:22:49.357 | INFO    | Flow run 'Preprocessing data dictionary' - Created task run 'Removing stopwords from query columns-0' for task 'Removing stopwords from query columns'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:49.359 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Executing 'Removing stopwords from query columns-0' immediately...\n", - "text/html": "
12:22:49.359 | INFO    | Flow run 'Preprocessing data dictionary' - Executing 'Removing stopwords from query columns-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:49.431 | \u001B[36mINFO\u001B[0m | Task run 'Removing stopwords from query columns-0' - Opening dialog box to choose stopwords file\n", - "text/html": "
12:22:49.431 | INFO    | Task run 'Removing stopwords from query columns-0' - Opening dialog box to choose stopwords file\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.171 | \u001B[36mINFO\u001B[0m | Task run 'Removing stopwords from query columns-0' - File chosen: C:/Users/armengolkm/Desktop/Full Pipeline Test v1.1.0/MetaMap_Settings_StopWords_2022-10-06.csv\n", - "text/html": "
12:22:52.171 | INFO    | Task run 'Removing stopwords from query columns-0' - File chosen: C:/Users/armengolkm/Desktop/Full Pipeline Test v1.1.0/MetaMap_Settings_StopWords_2022-10-06.csv\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.207 | \u001B[36mINFO\u001B[0m | Task run 'Removing stopwords from query columns-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:52.207 | INFO    | Task run 'Removing stopwords from query columns-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.210 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Processed Data Dictionary shape is: (3, 83)\n", - "text/html": "
12:22:52.210 | INFO    | Flow run 'Preprocessing data dictionary' - Processed Data Dictionary shape is: (3, 83)\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.269 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'inquisitive-binturong'\u001B[0m - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:52.269 | INFO    | Flow run 'inquisitive-binturong' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.409 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Created subflow run\u001B[35m 'hulking-trout'\u001B[0m for flow\u001B[1;35m 'format-curation-dataframe'\u001B[0m\n", - "text/html": "
12:22:52.409 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Created subflow run 'hulking-trout' for flow 'format-curation-dataframe'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.536 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Created task run 'Subsetting dataframe with curation related columns-0' for task 'Subsetting dataframe with curation related columns'\n", - "text/html": "
12:22:52.536 | INFO    | Flow run 'Formatting dataframe for curation' - Created task run 'Subsetting dataframe with curation related columns-0' for task 'Subsetting dataframe with curation related columns'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.538 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Executing 'Subsetting dataframe with curation related columns-0' immediately...\n", - "text/html": "
12:22:52.538 | INFO    | Flow run 'Formatting dataframe for curation' - Executing 'Subsetting dataframe with curation related columns-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.640 | \u001B[36mINFO\u001B[0m | Task run 'Subsetting dataframe with curation related columns-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:52.640 | INFO    | Task run 'Subsetting dataframe with curation related columns-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.677 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Created task run 'Adding search_ID column-0' for task 'Adding search_ID column'\n", - "text/html": "
12:22:52.677 | INFO    | Flow run 'Formatting dataframe for curation' - Created task run 'Adding search_ID column-0' for task 'Adding search_ID column'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.679 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Executing 'Adding search_ID column-0' immediately...\n", - "text/html": "
12:22:52.679 | INFO    | Flow run 'Formatting dataframe for curation' - Executing 'Adding search_ID column-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.788 | \u001B[36mINFO\u001B[0m | Task run 'Adding search_ID column-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:52.788 | INFO    | Task run 'Adding search_ID column-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.829 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Created task run 'Adding search pipeline name column-0' for task 'Adding search pipeline name column'\n", - "text/html": "
12:22:52.829 | INFO    | Flow run 'Formatting dataframe for curation' - Created task run 'Adding search pipeline name column-0' for task 'Adding search pipeline name column'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.831 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Executing 'Adding search pipeline name column-0' immediately...\n", - "text/html": "
12:22:52.831 | INFO    | Flow run 'Formatting dataframe for curation' - Executing 'Adding search pipeline name column-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:52.958 | \u001B[36mINFO\u001B[0m | Task run 'Adding search pipeline name column-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:52.958 | INFO    | Task run 'Adding search pipeline name column-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:53.055 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'hulking-trout'\u001B[0m - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:53.055 | INFO    | Flow run 'hulking-trout' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:53.103 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Created task run 'Formatting query terms for MetaMap-0' for task 'Formatting query terms for MetaMap'\n", - "text/html": "
12:22:53.103 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Created task run 'Formatting query terms for MetaMap-0' for task 'Formatting query terms for MetaMap'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:53.105 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Executing 'Formatting query terms for MetaMap-0' immediately...\n", - "text/html": "
12:22:53.105 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Executing 'Formatting query terms for MetaMap-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:53.266 | \u001B[36mINFO\u001B[0m | Task run 'Formatting query terms for MetaMap-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:53.266 | INFO    | Task run 'Formatting query terms for MetaMap-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:53.321 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Created task run 'Creating MetaMap SinglinePMID.txt input file-0' for task 'Creating MetaMap SinglinePMID.txt input file'\n", - "text/html": "
12:22:53.321 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Created task run 'Creating MetaMap SinglinePMID.txt input file-0' for task 'Creating MetaMap SinglinePMID.txt input file'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:53.325 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Executing 'Creating MetaMap SinglinePMID.txt input file-0' immediately...\n", - "text/html": "
12:22:53.325 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Executing 'Creating MetaMap SinglinePMID.txt input file-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:53.464 | \u001B[36mINFO\u001B[0m | Task run 'Creating MetaMap SinglinePMID.txt input file-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:22:53.464 | INFO    | Task run 'Creating MetaMap SinglinePMID.txt input file-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:53.678 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Created subflow run\u001B[35m 'enchanted-trout'\u001B[0m for flow\u001B[1;35m 'run-batch-metamap-api'\u001B[0m\n", - "text/html": "
12:22:53.678 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Created subflow run 'enchanted-trout' for flow 'run-batch-metamap-api'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:22:53.898 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m MetaMap batch query'\u001B[0m - MetaMap Batch in progress...\n", - "text/html": "
12:22:53.898 | INFO    | Flow run 'Running MetaMap batch query' - MetaMap Batch in progress...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:25.582 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'enchanted-trout'\u001B[0m - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:25.582 | INFO    | Flow run 'enchanted-trout' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:25.616 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Created task run 'Converting MetaMap output to JSON-0' for task 'Converting MetaMap output to JSON'\n", - "text/html": "
12:23:25.616 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Created task run 'Converting MetaMap output to JSON-0' for task 'Converting MetaMap output to JSON'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:25.617 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Executing 'Converting MetaMap output to JSON-0' immediately...\n", - "text/html": "
12:23:25.617 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Executing 'Converting MetaMap output to JSON-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:25.735 | \u001B[36mINFO\u001B[0m | Task run 'Converting MetaMap output to JSON-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:25.735 | INFO    | Task run 'Converting MetaMap output to JSON-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:25.790 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Created task run 'Saving MetaMap output as JSON-0' for task 'Saving MetaMap output as JSON'\n", - "text/html": "
12:23:25.790 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Created task run 'Saving MetaMap output as JSON-0' for task 'Saving MetaMap output as JSON'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:25.792 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Executing 'Saving MetaMap output as JSON-0' immediately...\n", - "text/html": "
12:23:25.792 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Executing 'Saving MetaMap output as JSON-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:25.937 | \u001B[36mINFO\u001B[0m | Task run 'Saving MetaMap output as JSON-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:25.937 | INFO    | Task run 'Saving MetaMap output as JSON-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.014 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Created task run 'Processing JSON conversion to dataframe-0' for task 'Processing JSON conversion to dataframe'\n", - "text/html": "
12:23:26.014 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Created task run 'Processing JSON conversion to dataframe-0' for task 'Processing JSON conversion to dataframe'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.017 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Executing 'Processing JSON conversion to dataframe-0' immediately...\n", - "text/html": "
12:23:26.017 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Executing 'Processing JSON conversion to dataframe-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.143 | \u001B[36mINFO\u001B[0m | Task run 'Processing JSON conversion to dataframe-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:26.143 | INFO    | Task run 'Processing JSON conversion to dataframe-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.195 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Created task run 'Renaming MetaMap columns for curation file-0' for task 'Renaming MetaMap columns for curation file'\n", - "text/html": "
12:23:26.195 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Created task run 'Renaming MetaMap columns for curation file-0' for task 'Renaming MetaMap columns for curation file'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.200 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Executing 'Renaming MetaMap columns for curation file-0' immediately...\n", - "text/html": "
12:23:26.200 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Executing 'Renaming MetaMap columns for curation file-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.337 | \u001B[36mINFO\u001B[0m | Task run 'Renaming MetaMap columns for curation file-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:26.337 | INFO    | Task run 'Renaming MetaMap columns for curation file-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.394 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Created task run 'Creating curation file-0' for task 'Creating curation file'\n", - "text/html": "
12:23:26.394 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Created task run 'Creating curation file-0' for task 'Creating curation file'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.397 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Executing 'Creating curation file-0' immediately...\n", - "text/html": "
12:23:26.397 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Executing 'Creating curation file-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.671 | \u001B[36mINFO\u001B[0m | Task run 'Creating curation file-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:26.671 | INFO    | Task run 'Creating curation file-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.729 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Created task run 'Saving config file-0' for task 'Saving config file'\n", - "text/html": "
12:23:26.729 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Created task run 'Saving config file-0' for task 'Saving config file'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.733 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - Executing 'Saving config file-0' immediately...\n", - "text/html": "
12:23:26.733 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - Executing 'Saving config file-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.883 | \u001B[36mINFO\u001B[0m | Task run 'Saving config file-0' - Saving config file\n", - "text/html": "
12:23:26.883 | INFO    | Task run 'Saving config file-0' - Saving config file\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.976 | \u001B[36mINFO\u001B[0m | Task run 'Saving config file-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:26.976 | INFO    | Task run 'Saving config file-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:26.980 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'MetaMap search - batch_query_pipeline'\u001B[0m - FINISHED MetaMap batch query pipeline!!!\n", - "text/html": "
12:23:26.980 | INFO    | Flow run 'MetaMap search - batch_query_pipeline' - FINISHED MetaMap batch query pipeline!!!\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:27.067 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'curly-frigatebird'\u001B[0m - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:27.067 | INFO    | Flow run 'curly-frigatebird' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:27.143 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Created task run 'Creating new folder-1' for task 'Creating new folder'\n", - "text/html": "
12:23:27.143 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Created task run 'Creating new folder-1' for task 'Creating new folder'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:27.146 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Executing 'Creating new folder-1' immediately...\n", - "text/html": "
12:23:27.146 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Executing 'Creating new folder-1' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:27.368 | \u001B[36mINFO\u001B[0m | Task run 'Creating new folder-1' - Folder created: C:\\Users\\armengolkm\\Desktop\\Full Pipeline Test v1.1.0\\Hydra_Search\\DE_Step-1_Hydra-search (8)\\DE_Step-1_hybrid-semantic-search_alpha=[1.0, 0.0]\n", - "text/html": "
12:23:27.368 | INFO    | Task run 'Creating new folder-1' - Folder created: C:\\Users\\armengolkm\\Desktop\\Full Pipeline Test v1.1.0\\Hydra_Search\\DE_Step-1_Hydra-search (8)\\DE_Step-1_hybrid-semantic-search_alpha=[1.0, 0.0]\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:27.449 | \u001B[36mINFO\u001B[0m | Task run 'Creating new folder-1' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:27.449 | INFO    | Task run 'Creating new folder-1' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:27.729 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Created subflow run\u001B[35m 'just-dragon'\u001B[0m for flow\u001B[1;35m 'run-hybrid-ss-batch'\u001B[0m\n", - "text/html": "
12:23:27.729 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Created subflow run 'just-dragon' for flow 'run-hybrid-ss-batch'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:28.036 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Created task run 'Checking Pinecone credentials in config files or .env file-0' for task 'Checking Pinecone credentials in config files or .env file'\n", - "text/html": "
12:23:28.036 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Created task run 'Checking Pinecone credentials in config files or .env file-0' for task 'Checking Pinecone credentials in config files or .env file'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:28.050 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Executing 'Checking Pinecone credentials in config files or .env file-0' immediately...\n", - "text/html": "
12:23:28.050 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Executing 'Checking Pinecone credentials in config files or .env file-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:28.254 | \u001B[36mINFO\u001B[0m | Task run 'Checking Pinecone credentials in config files or .env file-0' - No apiKey found in config files. Looking in .env file.\n", - "text/html": "
12:23:28.254 | INFO    | Task run 'Checking Pinecone credentials in config files or .env file-0' - No apiKey found in config files. Looking in .env file.\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:28.257 | \u001B[36mINFO\u001B[0m | Task run 'Checking Pinecone credentials in config files or .env file-0' - Using API_KEY_PINECONE found in .env file.\n", - "text/html": "
12:23:28.257 | INFO    | Task run 'Checking Pinecone credentials in config files or .env file-0' - Using API_KEY_PINECONE found in .env file.\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:28.262 | \u001B[36mINFO\u001B[0m | Task run 'Checking Pinecone credentials in config files or .env file-0' - No environment found in config files. Looking in .env file.\n", - "text/html": "
12:23:28.262 | INFO    | Task run 'Checking Pinecone credentials in config files or .env file-0' - No environment found in config files. Looking in .env file.\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:28.265 | \u001B[36mINFO\u001B[0m | Task run 'Checking Pinecone credentials in config files or .env file-0' - Using API_KEY_PINECONE found in .env file.\n", - "text/html": "
12:23:28.265 | INFO    | Task run 'Checking Pinecone credentials in config files or .env file-0' - Using API_KEY_PINECONE found in .env file.\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:28.347 | \u001B[36mINFO\u001B[0m | Task run 'Checking Pinecone credentials in config files or .env file-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:28.347 | INFO    | Task run 'Checking Pinecone credentials in config files or .env file-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:28.453 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Created task run 'Connect to Pinecone index-0' for task 'Connect to Pinecone index'\n", - "text/html": "
12:23:28.453 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Created task run 'Connect to Pinecone index-0' for task 'Connect to Pinecone index'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:28.458 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Executing 'Connect to Pinecone index-0' immediately...\n", - "text/html": "
12:23:28.458 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Executing 'Connect to Pinecone index-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:29.035 | \u001B[36mINFO\u001B[0m | Task run 'Connect to Pinecone index-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:29.035 | INFO    | Task run 'Connect to Pinecone index-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:30.185 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Pinecone indexes available: ['umls-cui-hybrid-semantic-search']\n", - "text/html": "
12:23:30.185 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Pinecone indexes available: ['umls-cui-hybrid-semantic-search']\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:30.541 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Stats for index 'umls-cui-hybrid-semantic-search': {'dimension': 768,\n 'index_fullness': 0.6,\n 'namespaces': {'DEF': {'vector_count': 259368},\n 'STR': {'vector_count': 259368}},\n 'total_vector_count': 518736}\n", - "text/html": "
12:23:30.541 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Stats for index 'umls-cui-hybrid-semantic-search': {'dimension': 768,\n 'index_fullness': 0.6,\n 'namespaces': {'DEF': {'vector_count': 259368},\n                'STR': {'vector_count': 259368}},\n 'total_vector_count': 518736}\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:30.736 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Created subflow run\u001B[35m 'courageous-wrasse'\u001B[0m for flow\u001B[1;35m 'process-data-dictionary'\u001B[0m\n", - "text/html": "
12:23:30.736 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Created subflow run 'courageous-wrasse' for flow 'process-data-dictionary'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:30.854 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Cheatsheet not used\n", - "text/html": "
12:23:30.854 | INFO    | Flow run 'Preprocessing data dictionary' - Cheatsheet not used\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:30.901 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Created task run 'Exploding values in columns to query with-0' for task 'Exploding values in columns to query with'\n", - "text/html": "
12:23:30.901 | INFO    | Flow run 'Preprocessing data dictionary' - Created task run 'Exploding values in columns to query with-0' for task 'Exploding values in columns to query with'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:30.904 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Executing 'Exploding values in columns to query with-0' immediately...\n", - "text/html": "
12:23:30.904 | INFO    | Flow run 'Preprocessing data dictionary' - Executing 'Exploding values in columns to query with-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:31.108 | \u001B[36mINFO\u001B[0m | Task run 'Exploding values in columns to query with-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:31.108 | INFO    | Task run 'Exploding values in columns to query with-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:31.201 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Created task run 'Removing punctuation-0' for task 'Removing punctuation'\n", - "text/html": "
12:23:31.201 | INFO    | Flow run 'Preprocessing data dictionary' - Created task run 'Removing punctuation-0' for task 'Removing punctuation'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:31.204 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Executing 'Removing punctuation-0' immediately...\n", - "text/html": "
12:23:31.204 | INFO    | Flow run 'Preprocessing data dictionary' - Executing 'Removing punctuation-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:31.374 | \u001B[36mINFO\u001B[0m | Task run 'Removing punctuation-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:31.374 | INFO    | Task run 'Removing punctuation-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:31.440 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Created task run 'Removing stopwords from query columns-0' for task 'Removing stopwords from query columns'\n", - "text/html": "
12:23:31.440 | INFO    | Flow run 'Preprocessing data dictionary' - Created task run 'Removing stopwords from query columns-0' for task 'Removing stopwords from query columns'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:31.444 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Executing 'Removing stopwords from query columns-0' immediately...\n", - "text/html": "
12:23:31.444 | INFO    | Flow run 'Preprocessing data dictionary' - Executing 'Removing stopwords from query columns-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:31.612 | \u001B[36mINFO\u001B[0m | Task run 'Removing stopwords from query columns-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:31.612 | INFO    | Task run 'Removing stopwords from query columns-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:31.617 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Processed Data Dictionary shape is: (3, 86)\n", - "text/html": "
12:23:31.617 | INFO    | Flow run 'Preprocessing data dictionary' - Processed Data Dictionary shape is: (3, 86)\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:31.701 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'courageous-wrasse'\u001B[0m - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:31.701 | INFO    | Flow run 'courageous-wrasse' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:31.964 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Created subflow run\u001B[35m 'bright-dragon'\u001B[0m for flow\u001B[1;35m 'format-curation-dataframe'\u001B[0m\n", - "text/html": "
12:23:31.964 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Created subflow run 'bright-dragon' for flow 'format-curation-dataframe'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:32.231 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Created task run 'Subsetting dataframe with curation related columns-0' for task 'Subsetting dataframe with curation related columns'\n", - "text/html": "
12:23:32.231 | INFO    | Flow run 'Formatting dataframe for curation' - Created task run 'Subsetting dataframe with curation related columns-0' for task 'Subsetting dataframe with curation related columns'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:32.236 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Executing 'Subsetting dataframe with curation related columns-0' immediately...\n", - "text/html": "
12:23:32.236 | INFO    | Flow run 'Formatting dataframe for curation' - Executing 'Subsetting dataframe with curation related columns-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:32.453 | \u001B[36mINFO\u001B[0m | Task run 'Subsetting dataframe with curation related columns-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:32.453 | INFO    | Task run 'Subsetting dataframe with curation related columns-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:32.538 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Created task run 'Adding search_ID column-0' for task 'Adding search_ID column'\n", - "text/html": "
12:23:32.538 | INFO    | Flow run 'Formatting dataframe for curation' - Created task run 'Adding search_ID column-0' for task 'Adding search_ID column'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:32.542 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Executing 'Adding search_ID column-0' immediately...\n", - "text/html": "
12:23:32.542 | INFO    | Flow run 'Formatting dataframe for curation' - Executing 'Adding search_ID column-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:32.719 | \u001B[36mINFO\u001B[0m | Task run 'Adding search_ID column-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:32.719 | INFO    | Task run 'Adding search_ID column-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:32.812 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Created task run 'Adding search pipeline name column-0' for task 'Adding search pipeline name column'\n", - "text/html": "
12:23:32.812 | INFO    | Flow run 'Formatting dataframe for curation' - Created task run 'Adding search pipeline name column-0' for task 'Adding search pipeline name column'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:32.815 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Executing 'Adding search pipeline name column-0' immediately...\n", - "text/html": "
12:23:32.815 | INFO    | Flow run 'Formatting dataframe for curation' - Executing 'Adding search pipeline name column-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:33.036 | \u001B[36mINFO\u001B[0m | Task run 'Adding search pipeline name column-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:33.036 | INFO    | Task run 'Adding search pipeline name column-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:33.133 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'bright-dragon'\u001B[0m - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:33.133 | INFO    | Flow run 'bright-dragon' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:33.326 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Created task run 'Checking/Setting Device for embedding-0' for task 'Checking/Setting Device for embedding'\n", - "text/html": "
12:23:33.326 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Created task run 'Checking/Setting Device for embedding-0' for task 'Checking/Setting Device for embedding'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:33.329 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Executing 'Checking/Setting Device for embedding-0' immediately...\n", - "text/html": "
12:23:33.329 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Executing 'Checking/Setting Device for embedding-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:33.519 | \u001B[36mINFO\u001B[0m | Task run 'Checking/Setting Device for embedding-0' - Running on cpu\n", - "text/html": "
12:23:33.519 | INFO    | Task run 'Checking/Setting Device for embedding-0' - Running on cpu\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:33.620 | \u001B[36mINFO\u001B[0m | Task run 'Checking/Setting Device for embedding-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:33.620 | INFO    | Task run 'Checking/Setting Device for embedding-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:33.962 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Created subflow run\u001B[35m 'aquamarine-gibbon'\u001B[0m for flow\u001B[1;35m 'hybrid-builder'\u001B[0m\n", - "text/html": "
12:23:33.962 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Created subflow run 'aquamarine-gibbon' for flow 'hybrid-builder'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:38.751 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Building dense and sparse embeddings and adding metadata for upsert into Pinecone'\u001B[0m - Embedding title_extracted\n", - "text/html": "
12:23:38.751 | INFO    | Flow run 'Building dense and sparse embeddings and adding metadata for upsert into Pinecone' - Embedding title_extracted\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "Batches: 0%| | 0/1 [00:0012:23:38.825 | INFO | Flow run 'Building dense and sparse embeddings and adding metadata for upsert into Pinecone' - Embedding 0 to 100\n\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\armengolkm\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\data-dictionary-cui-mapping-U7yrxD7_-py3.9\\lib\\site-packages\\torch\\amp\\autocast_mode.py:204: UserWarning: User provided device_type of 'cuda', but CUDA is not available. Disabling\n", - " warnings.warn('User provided device_type of \\'cuda\\', but CUDA is not available. Disabling')\n" - ] - }, - { - "data": { - "text/plain": "12:23:38.942 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Building dense and sparse embeddings and adding metadata for upsert into Pinecone'\u001B[0m - Embedding definition_extracted\n", - "text/html": "
12:23:38.942 | INFO    | Flow run 'Building dense and sparse embeddings and adding metadata for upsert into Pinecone' - Embedding definition_extracted\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "Batches: 0%| | 0/1 [00:0012:23:39.330 | INFO | Flow run 'Building dense and sparse embeddings and adding metadata for upsert into Pinecone' - Embedding 0 to 100\n\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\armengolkm\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\data-dictionary-cui-mapping-U7yrxD7_-py3.9\\lib\\site-packages\\torch\\amp\\autocast_mode.py:204: UserWarning: User provided device_type of 'cuda', but CUDA is not available. Disabling\n", - " warnings.warn('User provided device_type of \\'cuda\\', but CUDA is not available. Disabling')\n" - ] - }, - { - "data": { - "text/plain": "12:23:40.332 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'aquamarine-gibbon'\u001B[0m - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:40.332 | INFO    | Flow run 'aquamarine-gibbon' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:40.412 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Created task run 'Tokenizing Columns and Adding to Metadata-0' for task 'Tokenizing Columns and Adding to Metadata'\n", - "text/html": "
12:23:40.412 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Created task run 'Tokenizing Columns and Adding to Metadata-0' for task 'Tokenizing Columns and Adding to Metadata'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:40.416 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Executing 'Tokenizing Columns and Adding to Metadata-0' immediately...\n", - "text/html": "
12:23:40.416 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Executing 'Tokenizing Columns and Adding to Metadata-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:41.862 | \u001B[36mINFO\u001B[0m | Task run 'Tokenizing Columns and Adding to Metadata-0' - Tokenizing title_extracted\n", - "text/html": "
12:23:41.862 | INFO    | Task run 'Tokenizing Columns and Adding to Metadata-0' - Tokenizing title_extracted\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:41.866 | \u001B[36mINFO\u001B[0m | Task run 'Tokenizing Columns and Adding to Metadata-0' - Tokenizing definition_extracted\n", - "text/html": "
12:23:41.866 | INFO    | Task run 'Tokenizing Columns and Adding to Metadata-0' - Tokenizing definition_extracted\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:41.946 | \u001B[36mINFO\u001B[0m | Task run 'Tokenizing Columns and Adding to Metadata-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:41.946 | INFO    | Task run 'Tokenizing Columns and Adding to Metadata-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Semantic Search Runner: 100%|██████████| 3/3 [00:01<00:00, 2.02it/s]\n", - "Aggregating Results: 100%|██████████| 3/3 [00:00<00:00, 225.69it/s]\n", - "Semantic Search Runner: 100%|██████████| 3/3 [00:01<00:00, 2.63it/s]\n", - "Aggregating Results: 100%|██████████| 3/3 [00:00<00:00, 238.88it/s]\n" - ] - }, - { - "data": { - "text/plain": "12:23:45.141 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Created task run 'Creating curation file-0' for task 'Creating curation file'\n", - "text/html": "
12:23:45.141 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Created task run 'Creating curation file-0' for task 'Creating curation file'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:45.143 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Executing 'Creating curation file-0' immediately...\n", - "text/html": "
12:23:45.143 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Executing 'Creating curation file-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:46.578 | \u001B[36mINFO\u001B[0m | Task run 'Creating curation file-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:46.578 | INFO    | Task run 'Creating curation file-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:46.622 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Created task run 'Saving config file-0' for task 'Saving config file'\n", - "text/html": "
12:23:46.622 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Created task run 'Saving config file-0' for task 'Saving config file'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:46.625 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - Executing 'Saving config file-0' immediately...\n", - "text/html": "
12:23:46.625 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - Executing 'Saving config file-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:46.760 | \u001B[36mINFO\u001B[0m | Task run 'Saving config file-0' - Saving config file\n", - "text/html": "
12:23:46.760 | INFO    | Task run 'Saving config file-0' - Saving config file\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:46.812 | \u001B[36mINFO\u001B[0m | Task run 'Saving config file-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:46.812 | INFO    | Task run 'Saving config file-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:46.815 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Pinecone Semantic Search - batch_hybrid_query_pipeline'\u001B[0m - FINISHED Pinecone Semantic Search batch query pipeline!!!\n", - "text/html": "
12:23:46.815 | INFO    | Flow run 'Pinecone Semantic Search - batch_hybrid_query_pipeline' - FINISHED Pinecone Semantic Search batch query pipeline!!!\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:46.979 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'just-dragon'\u001B[0m - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:46.979 | INFO    | Flow run 'just-dragon' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:47.802 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Created subflow run\u001B[35m 'flying-puffin'\u001B[0m for flow\u001B[1;35m 'process-data-dictionary'\u001B[0m\n", - "text/html": "
12:23:47.802 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Created subflow run 'flying-puffin' for flow 'process-data-dictionary'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:47.947 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Cheatsheet not used\n", - "text/html": "
12:23:47.947 | INFO    | Flow run 'Preprocessing data dictionary' - Cheatsheet not used\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:48.003 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Created task run 'Exploding values in columns to query with-0' for task 'Exploding values in columns to query with'\n", - "text/html": "
12:23:48.003 | INFO    | Flow run 'Preprocessing data dictionary' - Created task run 'Exploding values in columns to query with-0' for task 'Exploding values in columns to query with'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:48.006 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Executing 'Exploding values in columns to query with-0' immediately...\n", - "text/html": "
12:23:48.006 | INFO    | Flow run 'Preprocessing data dictionary' - Executing 'Exploding values in columns to query with-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:48.187 | \u001B[36mINFO\u001B[0m | Task run 'Exploding values in columns to query with-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:48.187 | INFO    | Task run 'Exploding values in columns to query with-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:48.297 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Created task run 'Removing punctuation-0' for task 'Removing punctuation'\n", - "text/html": "
12:23:48.297 | INFO    | Flow run 'Preprocessing data dictionary' - Created task run 'Removing punctuation-0' for task 'Removing punctuation'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:48.302 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Executing 'Removing punctuation-0' immediately...\n", - "text/html": "
12:23:48.302 | INFO    | Flow run 'Preprocessing data dictionary' - Executing 'Removing punctuation-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:48.526 | \u001B[36mINFO\u001B[0m | Task run 'Removing punctuation-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:48.526 | INFO    | Task run 'Removing punctuation-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:48.601 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Created task run 'Removing stopwords from query columns-0' for task 'Removing stopwords from query columns'\n", - "text/html": "
12:23:48.601 | INFO    | Flow run 'Preprocessing data dictionary' - Created task run 'Removing stopwords from query columns-0' for task 'Removing stopwords from query columns'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:48.604 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Executing 'Removing stopwords from query columns-0' immediately...\n", - "text/html": "
12:23:48.604 | INFO    | Flow run 'Preprocessing data dictionary' - Executing 'Removing stopwords from query columns-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:48.885 | \u001B[36mINFO\u001B[0m | Task run 'Removing stopwords from query columns-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:48.885 | INFO    | Task run 'Removing stopwords from query columns-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:48.890 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Preprocessing data dictionary'\u001B[0m - Processed Data Dictionary shape is: (3, 86)\n", - "text/html": "
12:23:48.890 | INFO    | Flow run 'Preprocessing data dictionary' - Processed Data Dictionary shape is: (3, 86)\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:48.998 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'flying-puffin'\u001B[0m - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:48.998 | INFO    | Flow run 'flying-puffin' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:49.234 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Created subflow run\u001B[35m 'strong-trout'\u001B[0m for flow\u001B[1;35m 'format-curation-dataframe'\u001B[0m\n", - "text/html": "
12:23:49.234 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Created subflow run 'strong-trout' for flow 'format-curation-dataframe'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:49.438 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Created task run 'Subsetting dataframe with curation related columns-0' for task 'Subsetting dataframe with curation related columns'\n", - "text/html": "
12:23:49.438 | INFO    | Flow run 'Formatting dataframe for curation' - Created task run 'Subsetting dataframe with curation related columns-0' for task 'Subsetting dataframe with curation related columns'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:49.442 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Executing 'Subsetting dataframe with curation related columns-0' immediately...\n", - "text/html": "
12:23:49.442 | INFO    | Flow run 'Formatting dataframe for curation' - Executing 'Subsetting dataframe with curation related columns-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:49.639 | \u001B[36mINFO\u001B[0m | Task run 'Subsetting dataframe with curation related columns-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:49.639 | INFO    | Task run 'Subsetting dataframe with curation related columns-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:49.722 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Created task run 'Adding search_ID column-0' for task 'Adding search_ID column'\n", - "text/html": "
12:23:49.722 | INFO    | Flow run 'Formatting dataframe for curation' - Created task run 'Adding search_ID column-0' for task 'Adding search_ID column'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:49.725 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Executing 'Adding search_ID column-0' immediately...\n", - "text/html": "
12:23:49.725 | INFO    | Flow run 'Formatting dataframe for curation' - Executing 'Adding search_ID column-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:49.970 | \u001B[36mINFO\u001B[0m | Task run 'Adding search_ID column-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:49.970 | INFO    | Task run 'Adding search_ID column-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:50.028 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Created task run 'Adding search pipeline name column-0' for task 'Adding search pipeline name column'\n", - "text/html": "
12:23:50.028 | INFO    | Flow run 'Formatting dataframe for curation' - Created task run 'Adding search pipeline name column-0' for task 'Adding search pipeline name column'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:50.032 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'Formatting dataframe for curation'\u001B[0m - Executing 'Adding search pipeline name column-0' immediately...\n", - "text/html": "
12:23:50.032 | INFO    | Flow run 'Formatting dataframe for curation' - Executing 'Adding search pipeline name column-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:50.215 | \u001B[36mINFO\u001B[0m | Task run 'Adding search pipeline name column-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:50.215 | INFO    | Task run 'Adding search pipeline name column-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:50.321 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'strong-trout'\u001B[0m - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:50.321 | INFO    | Flow run 'strong-trout' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:50.391 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Created task run 'Creating curation file-0' for task 'Creating curation file'\n", - "text/html": "
12:23:50.391 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Created task run 'Creating curation file-0' for task 'Creating curation file'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:50.394 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Executing 'Creating curation file-0' immediately...\n", - "text/html": "
12:23:50.394 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Executing 'Creating curation file-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:50.582 | \u001B[36mINFO\u001B[0m | Task run 'Creating curation file-0' - The following columns were not found and will be excluded: ['query_term_used_col', 'searchType', 'query_term_used']\n", - "text/html": "
12:23:50.582 | INFO    | Task run 'Creating curation file-0' - The following columns were not found and will be excluded: ['query_term_used_col', 'searchType', 'query_term_used']\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:52.998 | \u001B[36mINFO\u001B[0m | Task run 'Creating curation file-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:52.998 | INFO    | Task run 'Creating curation file-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:53.047 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Created task run 'Saving config file-0' for task 'Saving config file'\n", - "text/html": "
12:23:53.047 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Created task run 'Saving config file-0' for task 'Saving config file'\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:53.050 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - Executing 'Saving config file-0' immediately...\n", - "text/html": "
12:23:53.050 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - Executing 'Saving config file-0' immediately...\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:53.206 | \u001B[36mINFO\u001B[0m | Task run 'Saving config file-0' - Saving config file\n", - "text/html": "
12:23:53.206 | INFO    | Task run 'Saving config file-0' - Saving config file\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:53.292 | \u001B[36mINFO\u001B[0m | Task run 'Saving config file-0' - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:53.292 | INFO    | Task run 'Saving config file-0' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:53.295 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m '\u001B[0m\u001B[35mRunning\u001B[0m\u001B[35m UMLS/MetaMap/Semantic Search hydra search pipeline'\u001B[0m - FINISHED batch hydra search query pipeline!!!\n", - "text/html": "
12:23:53.295 | INFO    | Flow run 'Running UMLS/MetaMap/Semantic Search hydra search pipeline' - FINISHED batch hydra search query pipeline!!!\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": "12:23:53.361 | \u001B[36mINFO\u001B[0m | Flow run\u001B[35m 'elite-skunk'\u001B[0m - Finished in state \u001B[32mCompleted\u001B[0m()\n", - "text/html": "
12:23:53.361 | INFO    | Flow run 'elite-skunk' - Finished in state Completed()\n
\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " variable name title \\\n", - "0 AgeYrs Age in years \n", - "1 AgeYrs Age in years \n", - "2 AgeYrs Age in years \n", - "3 AgeYrs Age in years \n", - "4 AgeYrs Age in years \n", - "\n", - " definition permissible values \\\n", - "0 Value for participant's subject age, calculate... NaN \n", - "1 Value for participant's subject age, calculate... NaN \n", - "2 Value for participant's subject age, calculate... NaN \n", - "3 Value for participant's subject age, calculate... NaN \n", - "4 Value for participant's subject age, calculate... NaN \n", - "\n", - " permissible value descriptions preferred question text \\\n", - "0 NaN Subject's age (recorded in years): \n", - "1 NaN Subject's age (recorded in years): \n", - "2 NaN Subject's age (recorded in years): \n", - "3 NaN Subject's age (recorded in years): \n", - "4 NaN Subject's age (recorded in years): \n", - "\n", - " search_ID pipeline_name title_extracted query_term_1 ... \\\n", - "0 1 metamap (custom=de) Age in years age in years ... \n", - "1 1 metamap (custom=de) Age in years age in years ... \n", - "2 1 metamap (custom=de) Age in years age in years ... \n", - "3 1 metamap (custom=de) Age in years age in years ... \n", - "4 1 metamap (custom=de) Age in years age in years ... \n", - "\n", - " pipeline_name_alpha result_id semantic_type definition_source \\\n", - "0 NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN \n", - "\n", - " overall_count average_score title_str_rank title_str_score \\\n", - "0 NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN \n", - "\n", - " definition_def_rank definition_def_score \n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - "[5 rows x 51 columns]\n" - ] - } - ], + "outputs": [], "source": [ "# df_umls, cfg_umls = umls_bqp.run_umls_batch(cfg_umls)\n", "# df_mm, cfg_mm = mm_bqp.run_mm_batch(cfg_mm)\n", @@ -1953,12 +136,10 @@ }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### STEP-2A: CREATE DATA DICTIONARY IMPORT FILE" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", @@ -1969,13 +150,9 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": { - "tags": [], - "ExecuteTime": { - "end_time": "2023-05-17T15:39:18.311517700Z", - "start_time": "2023-05-17T15:39:18.294743100Z" - } + "tags": [] }, "outputs": [], "source": [ @@ -1999,7 +176,7 @@ }, "outputs": [], "source": [ - "cfg_step1 = helper.load_config.fn(helper.choose_file(\"Load config file from Step 1\"))\n", + "cfg_step1 = helper.load_config(helper.choose_file(\"Load config file from Step 1\"))\n", "df_dd = create_dictionary_import_file.create_dd_file(cfg_step1)\n", "print(df_dd.head())" ] @@ -2017,7 +194,7 @@ "metadata": {}, "outputs": [], "source": [ - "cfg_step2 = helper.load_config.fn(helper.choose_file(\"Load config file from Step 2\"))\n", + "cfg_step2 = helper.load_config(helper.choose_file(\"Load config file from Step 2\"))\n", "df_check = check_cuis.check_cuis(cfg_step2)\n", "print(df_check.head())" ] @@ -2039,7 +216,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.1" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/notebooks/example_files/MetaMap_Settings_StopWords_2022-10-06.csv b/notebooks/example_files/MetaMap_Settings_StopWords_2022-10-06.csv deleted file mode 100644 index ea8b996..0000000 --- a/notebooks/example_files/MetaMap_Settings_StopWords_2022-10-06.csv +++ /dev/null @@ -1,88 +0,0 @@ -##,Word,Source,StopWord?,StopWord Comment,Actions,,,last updated ,10/31/2022 -1, has been ,mapping,Yes,,,,,, -2, special setting ,mapping,Yes,,,,,, -3,&,mapping,Yes,,,,,, -4,"a, as a",mapping,Yes,,,,,, -5,adopted ,mapping,Yes,"it often maps to ""Personal status - Adopted"" UMLS CUI: C0425382 which does not really help",,,,, -6,Anatomic Site,caDSR rep terms,No,,,,,, -7,apply,mapping,Yes,,,,,, -8,approximately,mapping,Yes,,,,,, -9,are,mapping,Yes,,,,,, -10,at,mapping,Yes,,,,,, -11,at home,mapping,Yes,"it often maps to ""At home"" UMLS CUI: C4534363, however this concept does not even have a proper definition and UMLS",,,,, -12,Category,caDSR rep terms,Yes,,,,,, -13,CMS,mapping,Yes,"use instead Centers for Medicare and Medicaid Services , UMLS CUI: C0041718","Add the replacement of ""CMS"" to ""Centers for Medicare and Medicaid Services "" to the preprocessing steps",,,, -14,Code,caDSR rep terms,Yes,,,,,, -15,Consent for General and Linkage Research,mapping,Yes,"Better to use ""Research study consent (UMLS CUI: C4034855)""","Add the replacement of ""Consent for General and Linkage Research"" to ""Research study consent "" to the preprocessing steps",,,, -16,consequence ,mapping,Yes,,,,,, -17,Count,caDSR rep terms,Yes,,,,,, -18,current,mapping,Yes,"It often gets mapped to Electrical Current (C1705970), better to replace with ""now"" ","Add the replacement of ""current"" to ""now"" to the preprocessing steps. Create a cheat sheet and add mapping of ""current"" Current (present time) (C0521116)",,,, -19,Date,caDSR rep terms,No,,,,,, -20,Date and Time,caDSR rep terms,No,,,,,, -21,diagnosis,mapping,No,,,,,, -22,Dose,caDSR rep terms,No,,,,,, -23,Due To ,mapping,Yes,,,,,, -24,Duration,caDSR rep terms,Yes,,,,,, -25,effect,mapping,Yes,,,,,, -26,employes,mapping,Yes,"It often gets mapped to ""Employed (C0557351)"" which does not really help.",,,,, -27,Float,caDSR rep terms,Yes,,,,,, -28,For each item:,mapping,Yes,,,,,, -29,form,mapping,Yes,,,,,, -30,get,mapping,Yes,"Quite often MetaMap maps either to something genomics ""RESF1 gene (C1824356)"" or to ""GET complex (C1624050)"" or to ""Deglutition Disorders (C0011168)"". If absolutely necessary to use the word ""get"" use ""acquire"" instead.",,,,, -31,Grade,caDSR rep terms,Yes,,,,,, -32,Identifier,caDSR rep terms,Yes,,,,,, -33,if you,mapping,Yes,,,,,, -34,Impact,mapping,Yes,"Often MetaMap maps it to ""IMPACT (IMPACT gen)"" C1825598, which is wrong",,,,, -35,In general,mapping,Yes,,,,,, -36,"include, including",mapping,Yes,,,,,, -37,Indicator,caDSR rep terms,Yes,,,,,, -38,Integer,caDSR rep terms,Yes,,,,,, -39,Interval,caDSR rep terms,Yes,,,,,, -40,Item,mapping,Yes,,,,,, -41,keep house,mapping,Yes,"Use ""housekeeping"", UMLS CUI: C0020053, instead","Add the replacement of ""keep house"" to ""housekeeping"" to the preprocessing steps",,,, -42,"keep, keeping",mapping,Yes,,,,,, -43,may be,mapping,Yes,,,,,, -44,Measurement,caDSR rep terms,Yes,,,,,, -45,Name,caDSR rep terms,No,,,,,, -46,No,mapping,Yes,,,,,, -47,now,mapping,Yes,"Only, if used along with ""current"" or ""present"" use one or another","Add the replacement of ""current"" to ""now"" to the preprocessing steps",,,, -48,Number,caDSR rep terms,Yes,"It often gets mapped to ""Numbers (C0237753)"", which does not really help. One might want to replace ""number"" with ""count"" in the CDE attributes to make it mappable to ""Count (C0750480)""",,,,, -49,on,mapping,Yes,,,,,, -50,Or,mapping,Yes,,,,,, -51,or what,mapping,Yes,,,,,, -52,out,mapping,Yes,,,,,, -53,Outcome,caDSR rep terms,Yes,,,,,, -54,part,mapping,Yes,,,,,, -55,person,mapping,Yes,"Although UMLS does have concept Persons (C0027361) Semantic Types: Population Group, ""person"" is getting often mapped to C2347489:Person (Person Observer , which is a wrong concept. ","Create a cheat sheet and add mapping of ""person, persons, individual"" to Persons (C0027361) ",,,, -56,POLST,mapping,Yes,"Need to be replaced with ""Physician Orders for Life Sustaining Treatment"" UMLS (C5447535)""","Add the replacement of ""SDOH"" to ""Physician Orders for Life Sustaining Treatment"" to the preprocessing steps",,,, -57,QOL,mapping,Yes,,,,,, -58,Range,caDSR rep terms,Yes,,,,,, -59,Rate,caDSR rep terms,Yes,,,,,, -60,Reason,caDSR rep terms,Yes,,,,,, -61,Scale,caDSR rep terms,Yes,,,,,, -62,Score,caDSR rep terms,Yes,,,,,, -63,SDOH,mapping,Yes,"Need to be replaced with ""Social Determinants of Health"" UMLS C3658315","Add the replacement of ""POLST"" to ""Social Determinants of Health"" to the preprocessing steps",,,, -64,Source,caDSR rep terms,Yes,,,,,, -65,"Specific , Specified , Specify",mapping,Yes,,,,,, -66,Specify,caDSR rep terms,Yes,,,,,, -67,Stage,caDSR rep terms,Yes,,,,,, -68,Status,caDSR rep terms,Yes,,,,,, -69,such as ,mapping,Yes,,,,,, -70,Temporal Frequency,caDSR rep terms,Yes,,,,,, -71,Text,caDSR rep terms,Yes,,,,,, -72,the,mapping,Yes,,,,,, -73,the,mapping,Yes,,,,,, -74,the following ,mapping,Yes,,,,,, -75,The name that describes ,mapping,Yes,,,,,, -76,Time,caDSR rep terms,Yes,,,,,, -77,To,mapping,Yes,,,,,, -78,Type,caDSR rep terms,Yes,,,,,, -79,Unit of Measure,caDSR rep terms,No,,,,,, -80,"US, USA",mapping,Yes,"Should use ""United States"" UMLS C0041703, instead","Add the replacement of ""US"" or ""USA"" to ""United States"" to the preprocessing steps",,,, -81,Value,caDSR rep terms,Yes,,,,,, -82,was,mapping,Yes,This one drives MetaMap to really wrong directions,,,,, -83,what,mapping,Yes,,,,,, -84,would you,mapping,Yes,,,,,, -85,Yes,mapping,Yes,,,,,, -86,you,mapping,Yes,,,,,, -87,you would ,mapping,Yes,,,,,, diff --git a/pyproject.toml b/pyproject.toml index bb24f43..4838b14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "data-dictionary-cui-mapping" -version = "1.1.4" +version = "1.1.5" description = "This package allows you to load in a data dictionary and map cuis to defined fields using either the UMLS API or MetaMap API from NLM, or a Semantic Search pipeline using Pinecone vector database." authors = ["Kevin Armengol "] license = "MIT"