disabling wos functionality due to bugs

alan-turing-institute · Aug 7, 2024 · ddf83bd · ddf83bd
1 parent a1993b6
commit ddf83bd
Show file tree

Hide file tree

Showing 4 changed files with 122 additions and 122 deletions.
diff --git a/art/__init__.py b/art/__init__.py
@@ -32,7 +32,7 @@
 
 from .importers.crossref import lookup_doi, lookup_dois, lookup_journal, lookup_journals, search_journals, get_journal_entries, search_journal_entries, lookup_funder, lookup_funders, search_funders, get_funder_works, search_funder_works
 from .importers.crossref import search_works as search_crossref
-from .importers.wos import search as search_wos
+# from .importers.wos import search as search_wos
 from .importers.scopus import search as search_scopus, lookup as lookup_scopus
 from .importers.orcid import lookup_orcid, search as search_orcid
 from .importers.search import search as api_search

diff --git a/art/classes/review.py b/art/classes/review.py
@@ -6,7 +6,7 @@
 from ..importers.crossref import search_works, lookup_doi, lookup_dois, lookup_journal, lookup_journals, search_journals, get_journal_entries, search_journal_entries, lookup_funder, lookup_funders, search_funders, get_funder_works, search_funder_works
 from ..importers.crossref import query_builder as crossref_query_builder
 from ..importers.scopus import query_builder as scopus_query_builder, search as search_scopus, lookup as lookup_scopus
-from ..importers.wos import search as search_wos, query_builder as wos_query_builder
+# from ..importers.wos import search as search_wos, query_builder as wos_query_builder
 from ..importers.search import search as api_search
 
 from ..internet.scrapers import scrape_article, scrape_doi, scrape_google_scholar, scrape_google_scholar_search
@@ -1778,92 +1778,92 @@ def search_scopus(self,
 
         return df
 
-    def search_wos(self,
-                   all_fields = None,
-            title = None,
-            year = None,
-            author = None,
-            author_identifier = None,
-            affiliation = None,
-            doctype = None,
-            doi = None,
-            issn = None,
-            isbn = None,
-            pubmed_id = None,
-            source_title = None,
-            volume = None,
-            page = None,
-            issue = None,
-            topics = None,
-            default_operator = 'AND',
-           database: str = 'WOK',
-           limit: int = 10,
-           page_limit: int = 1,
-           sort_field: str = 'RS+D',
-           modified_time_span = None,
-           tc_modified_time_span = None,
-           detail = None, 
-           add_to_results = False,
-           drop_duplicates = False,
-           drop_empty_rows = False
-           ):
-
-        df = search_wos(
-            all_fields = all_fields,
-            title = title,
-            year = year,
-            author = author,
-            author_identifier = author_identifier,
-            affiliation = affiliation,
-            doctype = doctype,
-            doi = doi,
-            issn = issn,
-            isbn = isbn,
-            pubmed_id = pubmed_id,
-            source_title = source_title,
-            volume = volume,
-            page = page,
-            issue = issue,
-            topics = topics,
-            default_operator = default_operator,
-           database = database,
-           limit = limit,
-           page_limit = page_limit,
-           sort_field = sort_field,
-           modified_time_span = modified_time_span,
-           tc_modified_time_span = tc_modified_time_span,
-           detail = detail
-           )
-
-        for c in df.columns:
-                if c not in self.results.columns:
-                    df = df.drop(c, axis=1)
-
-        if add_to_results == True:
+    # def search_wos(self,
+    #                all_fields = None,
+    #         title = None,
+    #         year = None,
+    #         author = None,
+    #         author_identifier = None,
+    #         affiliation = None,
+    #         doctype = None,
+    #         doi = None,
+    #         issn = None,
+    #         isbn = None,
+    #         pubmed_id = None,
+    #         source_title = None,
+    #         volume = None,
+    #         page = None,
+    #         issue = None,
+    #         topics = None,
+    #         default_operator = 'AND',
+    #        database: str = 'WOK',
+    #        limit: int = 10,
+    #        page_limit: int = 1,
+    #        sort_field: str = 'RS+D',
+    #        modified_time_span = None,
+    #        tc_modified_time_span = None,
+    #        detail = None, 
+    #        add_to_results = False,
+    #        drop_duplicates = False,
+    #        drop_empty_rows = False
+    #        ):
+
+    #     df = search_wos(
+    #         all_fields = all_fields,
+    #         title = title,
+    #         year = year,
+    #         author = author,
+    #         author_identifier = author_identifier,
+    #         affiliation = affiliation,
+    #         doctype = doctype,
+    #         doi = doi,
+    #         issn = issn,
+    #         isbn = isbn,
+    #         pubmed_id = pubmed_id,
+    #         source_title = source_title,
+    #         volume = volume,
+    #         page = page,
+    #         issue = issue,
+    #         topics = topics,
+    #         default_operator = default_operator,
+    #        database = database,
+    #        limit = limit,
+    #        page_limit = page_limit,
+    #        sort_field = sort_field,
+    #        modified_time_span = modified_time_span,
+    #        tc_modified_time_span = tc_modified_time_span,
+    #        detail = detail
+    #        )
+
+    #     for c in df.columns:
+    #             if c not in self.results.columns:
+    #                 df = df.drop(c, axis=1)
+
+    #     if add_to_results == True:
 
-            query = wos_query_builder(all_fields = all_fields,
-                                        title = title,
-                                        year = year,
-                                        author = author,
-                                        author_identifier = author_identifier,
-                                        affiliation = affiliation,
-                                        doctype = doctype,
-                                        doi = doi,
-                                        issn = issn,
-                                        isbn = isbn,
-                                        pubmed_id = pubmed_id,
-                                        source_title = source_title,
-                                        volume = volume,
-                                        page = page,
-                                        issue = issue,
-                                        topics = topics,
-                                        default_operator = default_operator)
-
-            self.activity_log.add_activity(type='API search', activity='searched World of Science and added to results', location=['results'], database=database, query=query)
-            self.results.add_dataframe(dataframe=df, drop_duplicates=drop_duplicates, drop_empty_rows=drop_empty_rows) # type: ignore
-
-
-        return df
+    #         query = wos_query_builder(all_fields = all_fields,
+    #                                     title = title,
+    #                                     year = year,
+    #                                     author = author,
+    #                                     author_identifier = author_identifier,
+    #                                     affiliation = affiliation,
+    #                                     doctype = doctype,
+    #                                     doi = doi,
+    #                                     issn = issn,
+    #                                     isbn = isbn,
+    #                                     pubmed_id = pubmed_id,
+    #                                     source_title = source_title,
+    #                                     volume = volume,
+    #                                     page = page,
+    #                                     issue = issue,
+    #                                     topics = topics,
+    #                                     default_operator = default_operator)
+
+    #         self.activity_log.add_activity(type='API search', activity='searched World of Science and added to results', location=['results'], database=database, query=query)
+    #         self.results.add_dataframe(dataframe=df, drop_duplicates=drop_duplicates, drop_empty_rows=drop_empty_rows) # type: ignore
+
+
+    #     return df
 
     def lookup_doi(self, doi = 'request_input', timeout = 60):
         return lookup_doi(doi=doi, timeout=timeout)
@@ -2221,7 +2221,7 @@ def api_search(self,
                     timeout = 60,
                     crossref = True,
                     scopus = True,
-                    wos = True, 
+                    wos = False, 
                     add_to_results = False):
 
         df = api_search(default_query = default_query,

diff --git a/art/datasets/stopwords/stopwords.py b/art/datasets/stopwords/stopwords.py
@@ -4,11 +4,11 @@
 
 # Importing Stopwords corpus as an NLTK text
 try:
-    from nltk.corpus import stopwords as nltk_stopwords
+    from nltk.corpus import stopwords as nltk_stopwords # type: ignore
     nltk_stopwords.words()
 except:
     download('stopwords')
-    from nltk.corpus import stopwords as nltk_stopwords
+    from nltk.corpus import stopwords as nltk_stopwords # type: ignore
 
 nltk_stopwords = list(nltk_stopwords.words())
 

diff --git a/art/importers/search.py b/art/importers/search.py
@@ -1,5 +1,5 @@
 from .scopus import search as search_scopus
-from .wos import search as search_wos
+# from .wos import search as search_wos
 from .crossref import search_works as search_crossref
 from .orcid import search as search_orcid
 
@@ -107,41 +107,41 @@ def search(default_query = None,
             print(f'Encountered Scopus search error: {e}')
             pass
 
-    if wos == True:
+    # if wos == True:
 
-        if (all_fields is None) and (default_query is not None):
-            all_fields_updated = default_query
-        else:
-            all_fields_updated = all_fields
+        # if (all_fields is None) and (default_query is not None):
+        #     all_fields_updated = default_query
+        # else:
+        #     all_fields_updated = all_fields
 
-        try:
-            wos_result = search_wos(
-                all_fields = all_fields_updated,
-                title = title,
-                year = year,
-                author = author,
-                author_identifier = author_identifier,
-                affiliation = affiliation,
-                doctype = entry_type,
-                doi = doi,
-                issn = issn,
-                isbn = isbn,
-                pubmed_id = pubmed_id,
-                source_title = source_title,
-                volume = volume,
-                page = page,
-                issue = issue,
-                topics = topics,
-                default_operator = default_operator,
-                limit = limit_per_api)
+        # try:
+        #     wos_result = search_wos(
+        #         all_fields = all_fields_updated,
+        #         title = title,
+        #         year = year,
+        #         author = author,
+        #         author_identifier = author_identifier,
+        #         affiliation = affiliation,
+        #         doctype = entry_type,
+        #         doi = doi,
+        #         issn = issn,
+        #         isbn = isbn,
+        #         pubmed_id = pubmed_id,
+        #         source_title = source_title,
+        #         volume = volume,
+        #         page = page,
+        #         issue = issue,
+        #         topics = topics,
+        #         default_operator = default_operator,
+        #         limit = limit_per_api)
 
-            wos_result['repository'] = 'WOK'
+        #     wos_result['repository'] = 'WOK'
 
-            df = pd.concat([df, wos_result])
-            df = df.reset_index().drop('index',axis=1)
-        except Exception as e:
-            print(f'Encountered Web of Science search error: {e}')
-            pass
+        #     df = pd.concat([df, wos_result])
+        #     df = df.reset_index().drop('index',axis=1)
+        # except Exception as e:
+        #     print(f'Encountered Web of Science search error: {e}')
+        #     pass
 
     if orcid == True: