From 4bad23f600dfef3767a1d92da4d5767f9042b5c9 Mon Sep 17 00:00:00 2001 From: Kevin Armengol Date: Sun, 14 May 2023 12:40:43 -0400 Subject: [PATCH] Slight modifications to requirements and pyproject files. Changed curation file_settings excel sheet3 to _extracted. Fixed reference to hybrid query dictionary ids resource file. --- ddcuimap/configs/custom/de.yaml | 2 +- ddcuimap/configs/custom/hydra_base.yaml | 2 +- ddcuimap/configs/custom/pvd.yaml | 2 +- ddcuimap/configs/custom/title_def.yaml | 2 +- .../batch_hybrid_query_pipeline.py | 2 +- pyproject.toml | 36 +++++++++---------- requirements.txt | 7 ++-- 7 files changed, 26 insertions(+), 27 deletions(-) diff --git a/ddcuimap/configs/custom/de.yaml b/ddcuimap/configs/custom/de.yaml index 787a045..72aee78 100644 --- a/ddcuimap/configs/custom/de.yaml +++ b/ddcuimap/configs/custom/de.yaml @@ -45,7 +45,7 @@ curation_settings: sheet_names: sheet1 : 'UMLS_curation' sheet2 : 'Data_Dictionary' - sheet3 : 'Data_Dictionary_exploded' + sheet3 : 'Data_Dictionary_extracted' hide_cols_curation: order_cols_curation: diff --git a/ddcuimap/configs/custom/hydra_base.yaml b/ddcuimap/configs/custom/hydra_base.yaml index 2e93368..b6572f8 100644 --- a/ddcuimap/configs/custom/hydra_base.yaml +++ b/ddcuimap/configs/custom/hydra_base.yaml @@ -45,7 +45,7 @@ curation_settings: sheet_names: sheet1 : 'UMLS_curation' sheet2 : 'Data_Dictionary' - sheet3 : 'Data_Dictionary_exploded' + sheet3 : 'Data_Dictionary_extracted' hide_cols_curation: ['PMID', 'MatchMaps', 'IsHead', 'IsOverMatch', 'ConceptPIs', 'Status', 'Negated', 'title_extracted_dense_vecs', 'title_extracted_sparse_vecs_upsert', 'title_extracted_sparse_vecs_idx2token', 'definition_extracted_dense_vecs', 'definition_extracted_sparse_vecs_upsert', 'definition_extracted_sparse_vecs_idx2token', diff --git a/ddcuimap/configs/custom/pvd.yaml b/ddcuimap/configs/custom/pvd.yaml index 614bfb8..ba887a0 100644 --- a/ddcuimap/configs/custom/pvd.yaml +++ b/ddcuimap/configs/custom/pvd.yaml @@ -46,7 +46,7 @@ curation_settings: sheet_names: sheet1 : 'UMLS_curation' sheet2 : 'Data_Dictionary' - sheet3 : 'Data_Dictionary_exploded' + sheet3 : 'Data_Dictionary_extracted' hide_cols_curation: order_cols_curation: diff --git a/ddcuimap/configs/custom/title_def.yaml b/ddcuimap/configs/custom/title_def.yaml index 5eb62c3..1c7a8b7 100644 --- a/ddcuimap/configs/custom/title_def.yaml +++ b/ddcuimap/configs/custom/title_def.yaml @@ -44,7 +44,7 @@ curation_settings: sheet_names: sheet1 : 'UMLS_curation' sheet2 : 'Data_Dictionary' - sheet3 : 'Data_Dictionary_exploded' + sheet3 : 'Data_Dictionary_extracted' hide_cols_curation: order_cols_curation: diff --git a/ddcuimap/semantic_search/batch_hybrid_query_pipeline.py b/ddcuimap/semantic_search/batch_hybrid_query_pipeline.py index 6c809f7..4e94d23 100644 --- a/ddcuimap/semantic_search/batch_hybrid_query_pipeline.py +++ b/ddcuimap/semantic_search/batch_hybrid_query_pipeline.py @@ -105,7 +105,7 @@ def run_hybrid_ss_batch(cfg, **kwargs): # RETRIEVE UMLS VECTOR ID AS DICTIONARY ids = importlib.resources.read_binary( - "semantic_search.resources", "dict_umls_upsert_ids.pkl" + "ddcuimap.semantic_search.resources", "dict_umls_upsert_ids.pkl" ) dict_umls_upsert_ids = pickle.loads(ids) # dict_umls_upsert_ids = run.fetch_id_metadata(index, cfg) #TODO: need to work on this diff --git a/pyproject.toml b/pyproject.toml index de168a6..9b9bfb0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "data-dictionary-cui-mapping" -version = "1.1.2" +version = "1.1.3" description = "This package allows you to load in a data dictionary and map cuis to defined fields using either the UMLS API or MetaMap API from NLM, or a Semantic Search pipeline using Pinecone vector database." authors = ["Kevin Armengol "] license = "MIT" @@ -10,36 +10,36 @@ keywords = ["BRICS", "curation", "data dictionary", "UMLS", "MetaMap", "Metathes packages = [{include = "ddcuimap/**/*"}] [tool.poetry.dependencies] -python = "^3.8.1" -pandas = ">=1.5.2" -openpyxl = ">=3.0.10" -requests = ">=2.28.1" -python-dotenv = "0.21.1" -requests-html = ">=0.10.0" +cchardet = "^2.1.7" hydra-core = ">=1.1.0" omegaconf = "2.1.2" -prefect = {extras = ["viz"], version = "^2.8.3"} -transformers = "4.18.0" +openpyxl = ">=3.0.10" +pandas = ">=1.5.2" pathlib = "^1.0.1" pinecone-client = "^2.2.1" +prefect = {extras = ["viz"], version = "^2.8.3"} +python = "^3.8.1" +python-dotenv = "0.21.1" +requests = ">=2.28.1" +requests-html = ">=0.10.0" sentence-transformers = "^2.2.2" -cchardet = "^2.1.7" +transformers = "4.18.0" [tool.poetry.group.dev.dependencies] black = "^23.1.0" -radon = "^5.1.0" -vulture = "^2.7" +bs4 = "^0.0.1" +deptry = "^0.8.0" flake8 = "^6.0.0" -pre-commit = "^3.0.4" isort = "^5.11.3" -jupyterlab = "^3.6.1" jupyter = "^1.0.0" -wheel = "^0.38.4" -deptry = "^0.8.0" -bs4 = "^0.0.1" -splade = {git = "https://github.com/naver/splade.git"} +jupyterlab = "^3.6.1" monkeytype = "^23.3.0" +pre-commit = "^3.0.4" +radon = "^5.1.0" +splade = {git = "https://github.com/naver/splade.git"} +vulture = "^2.7" +wheel = "^0.38.4" [build-system] requires = ["poetry-core"] diff --git a/requirements.txt b/requirements.txt index 7d1cc75..5ebc93c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,12 +8,11 @@ omegaconf~=2.1.2 openpyxl~=3.1.1 pandas~=1.5.3 pathlib~=1.0.1 -pinecone-client = "^2.2.1" -prefect~=2.8.3 -python = "^3.8.1" +pinecone-client~=2.2.1 +prefect~=2.8.4 python-dotenv~=0.21.1 -requests-html = ">=0.10.0" requests~=2.28.2 +requests-html~=0.10.0 torch~=2.0.0 tqdm~=4.65.0 transformers~=4.18.0