diff --git a/src/main/java/org/aksw/word2vecrestful/tool/ModelNormalizer.java b/src/main/java/org/aksw/word2vecrestful/tool/ModelNormalizer.java index e1d5d16..cccc21f 100644 --- a/src/main/java/org/aksw/word2vecrestful/tool/ModelNormalizer.java +++ b/src/main/java/org/aksw/word2vecrestful/tool/ModelNormalizer.java @@ -9,7 +9,6 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; -import java.sql.PreparedStatement; import java.sql.SQLException; import org.aksw.word2vecrestful.utils.Cfg; @@ -97,58 +96,6 @@ public void generateNormalizedModel(File inputFile, File outputFile) throws IOEx } } - /** - * Method to persist a normalized model for a word2vec bin model - * - * @param inputFile - * - word2vec file of the model to be normalized - * @param dbName - * - name of the database - * @param tablName - * - name of the table to store the data in - * @throws IOException - * @throws SQLException - */ - public void persistNormalizedModel(File inputFile, String dbName, String tblName) throws IOException, SQLException { - // intialize handler instance - NormalizedDBModelGenerator dbHandler = null; - - FileInputStream fin = null; - try { - // reads file header - fin = new FileInputStream(inputFile); - String word = Word2VecModelLoader.readWord(fin); - int words = Integer.parseInt(word); - word = Word2VecModelLoader.readWord(fin); - int vectorSize = Integer.parseInt(word); - dbHandler = new NormalizedDBModelGenerator(dbName, tblName, vectorSize); - // open connection - dbHandler.connect(); - LOG.info("Expecting " + words + " words with " + vectorSize + " values per vector."); - // create preparedstatement - PreparedStatement ps = dbHandler.generateMainTblInsrtStmnt(); - for (int w = 0; w < words; ++w) { - word = Word2VecModelLoader.readWord(fin); - // LOG.info(word); - float[] vector = Word2VecModelLoader.readVector(fin, vectorSize); - // dbHandler.insertMainTblRecord(word, vector); - dbHandler.addMainTblInsrtBatch(word, Word2VecMath.normalize(vector), ps); - if ((w + 1) % 50000 == 0) { - dbHandler.executeBatchCommit(ps); - LOG.info((w + 1) + " Records inserted."); - } - } - dbHandler.executeBatchCommit(ps); - // Generate Index on completion - dbHandler.makeIndex(); - } catch (final IOException e) { - LOG.error(e.getLocalizedMessage(), e); - } finally { - fin.close(); - dbHandler.disconnect(); - } - } - /** * Method to generate a normalized model for a word2vec bin model * @@ -199,26 +146,6 @@ public void generateNormalizedBinModel(File inputFile, File outputFile) throws I } } - /* - * public static void main(String[] args) throws IOException { String - * cfgKeyModel = Word2VecFactory.class.getName().concat(".model"); String model - * = (Cfg.get(cfgKeyModel)); ModelNormalizer modelNormalizer = new - * ModelNormalizer(); File inputFile = new File(model); File outputFile = new - * File( - * "D:\\Nikit\\DICE-Group\\Jword2vec\\data\\normal\\GoogleNews-vectors-negative300-normalized.txt" - * ); modelNormalizer.generateNormalizedModel(inputFile, outputFile); } - */ - - /* - * public static void main(String[] args) throws IOException, SQLException { - * String cfgKeyModel = Word2VecFactory.class.getName().concat(".model"); String - * model = (Cfg.get(cfgKeyModel)); ModelNormalizer modelNormalizer = new - * ModelNormalizer(); File inputFile = new File(model); // - * modelNormalizer.generateNormalizedModel(inputFile, outputFile); - * modelNormalizer.persistNormalizedModel(inputFile, - * "data/nrmldb/word2vecmodel", "wordtovec"); } - */ - public static void main(String[] args) throws IOException, SQLException { String cfgKeyModel = Word2VecFactory.class.getName().concat(".model"); String model = (Cfg.get(cfgKeyModel)); diff --git a/src/main/java/org/aksw/word2vecrestful/tool/NormalizedDBModelGenerator.java b/src/main/java/org/aksw/word2vecrestful/tool/NormalizedDBModelGenerator.java deleted file mode 100644 index 662324b..0000000 --- a/src/main/java/org/aksw/word2vecrestful/tool/NormalizedDBModelGenerator.java +++ /dev/null @@ -1,91 +0,0 @@ -package org.aksw.word2vecrestful.tool; - -import java.sql.PreparedStatement; -import java.sql.SQLException; - -import org.aksw.word2vecrestful.db.SQLiteDBHandler; - -public class NormalizedDBModelGenerator extends SQLiteDBHandler { - - private String mainTblName; - private int vectorSize; - private String insertQuery; - - public NormalizedDBModelGenerator(String dbName, String mainTblName, int vectorSize) { - super(dbName); - this.mainTblName = mainTblName; - this.vectorSize = vectorSize; - this.insertQuery = this.createInsertQuery(); - createMainTable(); - } - - public void createMainTable() { - StringBuilder sqlStr = new StringBuilder("CREATE TABLE IF NOT EXISTS "); - sqlStr.append(this.mainTblName); - sqlStr.append(" ( word text "); - for (int i = 0; i < vectorSize; i++) { - sqlStr.append(", val").append(i + 1).append(" float NOT NULL "); - } - sqlStr.append(");"); - executeStatement(sqlStr.toString()); - } - - private String createInsertQuery() { - StringBuilder insrtStr = new StringBuilder(); - insrtStr.append("insert into ").append(this.mainTblName).append(" values ( ?"); - for (int i = 0; i < vectorSize; i++) { - insrtStr.append(", ?"); - } - insrtStr.append(") ;"); - return insrtStr.toString(); - } - - public PreparedStatement generateMainTblInsrtStmnt() throws SQLException { - PreparedStatement prep = connection.prepareStatement(this.insertQuery); - connection.setAutoCommit(false); - return prep; - } - - /** - * Creates an index. - */ - public void makeIndex() { - final String sql = "CREATE INDEX Idx1 ON " + this.mainTblName + "(word)"; - if (connection != null) { - try { - final PreparedStatement prep = connection.prepareStatement(sql); - prep.execute(); - prep.close(); - commit(); - } catch (final SQLException e) { - LOG.error(e.getLocalizedMessage(), e); - } - } - } - - public void addMainTblInsrtBatch(String word, float[] vector, PreparedStatement ps) throws SQLException { - ps.setString(1, word); - for (int i = 0; i < this.vectorSize; i++) { - ps.setFloat(i + 2, vector[i]); - } - ps.addBatch(); - } - - public int[] executeBatchCommit(PreparedStatement ps) throws SQLException { - int[] res = ps.executeBatch(); - connection.commit(); - return res; - } - - public boolean insertMainTblRecord(String word, float[] vector) throws SQLException { - boolean recInserted = false; - PreparedStatement prep = connection.prepareStatement(this.insertQuery); - prep.setString(1, word); - for (int i = 0; i < this.vectorSize; i++) { - prep.setFloat(i + 2, vector[i]); - } - recInserted = prep.execute(); - prep.close(); - return recInserted; - } -}