diff --git a/examples/ja_example.py b/examples/ja_example.py index b5710c7..1815c2c 100644 --- a/examples/ja_example.py +++ b/examples/ja_example.py @@ -17,9 +17,9 @@ dict_type = 'neologd' path_mecab_config = '/usr/local/bin/' pos_condition = [('名詞', )] -mysql_username = '' +mysql_username = 'your-mysql-user-name-here' mysql_hostname = 'localhost' -mysql_password = '' +mysql_password = 'your-mysql-password-here' mysql_db_name = 'wikipedia' # ------------------------------------------------------------ entity_linking_model = load_entity_model(path_model_file) diff --git a/setup.py b/setup.py index 42ef857..34dc024 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup, find_packages name='word2vec_wikification_py' -version='0.15' +version='0.16' description='A package to run wikification' author='Kensuke Mitsuzawa' author_email='kensuke.mit@gmail.com' @@ -12,7 +12,6 @@ install_requires = [ 'gensim', - 'mysqlclient', 'pymysql', 'typing' ] diff --git a/word2vec_wikification_py/init_logger.py b/word2vec_wikification_py/init_logger.py index 7fc7684..0da0458 100644 --- a/word2vec_wikification_py/init_logger.py +++ b/word2vec_wikification_py/init_logger.py @@ -15,7 +15,7 @@ # StreamHandler STREAM_LEVEL = logging.DEBUG STREAM_FORMATTER = custmoFormatter -STREAM = sys.stdout +STREAM = sys.stderr st_handler = StreamHandler(stream=STREAM) st_handler.setLevel(STREAM_LEVEL) diff --git a/word2vec_wikification_py/make_lattice.py b/word2vec_wikification_py/make_lattice.py index 88c7704..ac3e64c 100644 --- a/word2vec_wikification_py/make_lattice.py +++ b/word2vec_wikification_py/make_lattice.py @@ -2,7 +2,7 @@ from gensim.models import Word2Vec from word2vec_wikification_py import init_logger from word2vec_wikification_py.models import WikipediaArticleObject, PersistentDict, LatticeObject, IndexDictionaryObject, EdgeObject -from typing import List, Tuple, Union, Any, Dict +from typing import List, Tuple, Union, Any, Dict, Set from tempfile import mkdtemp from scipy.sparse import csr_matrix import os @@ -48,9 +48,9 @@ def make_state_transition_edge(state_t_word_tuple:Tuple[int,str], - tuple object whose element is (transition_element, row2index, column2index) - transition_element is (row_index, column_index, transition_score) """ - if not state_t_word_tuple[1] in entity_vector.vocab: + if not state_t_word_tuple[1] in entity_vector.wv.vocab: raise Exception('Element does not exist in entity_voctor model. element={}'.format(state_t_word_tuple)) - if not state_t_plus_word_tuple[1] in entity_vector.vocab: + if not state_t_plus_word_tuple[1] in entity_vector.wv.vocab: raise Exception('Element does not exist in entity_voctor model. element={}'.format(state_t_plus_word_tuple)) transition_score = entity_vector.similarity(state_t_word_tuple[1], state_t_plus_word_tuple[1]) # type: float @@ -134,7 +134,7 @@ def make_state_transition_sequence(seq_wiki_article_name:List[WikipediaArticleOb return (state2index_obj, seq_edge_group, transition_matrix) -def filter_out_of_vocabulary_word(wikipedia_article_obj: WikipediaArticleObject, vocabulary_words:set)->Union[bool, WikipediaArticleObject]: +def filter_out_of_vocabulary_word(wikipedia_article_obj: WikipediaArticleObject, vocabulary_words:Set)->Union[bool, WikipediaArticleObject]: """* What you can do - You remove out-of-vocabulary word from wikipedia_article_obj.candidate_article_name """ @@ -173,7 +173,7 @@ def make_lattice_object(seq_wiki_article_name:List[WikipediaArticleObject], state2index=persistent_state2index, index2state={}) - vocabulary_words = set(entity_vector_model.vocab.keys()) + vocabulary_words = set(entity_vector_model.wv.vocab.keys()) seq_wiki_article_name = [ wiki_article_name for wiki_article_name in seq_wiki_article_name