diff --git a/config/cfg.properties b/config/cfg.properties index 1af9b6f..b5eb8bc 100644 --- a/config/cfg.properties +++ b/config/cfg.properties @@ -6,4 +6,5 @@ org.aksw.word2vecrestful.Application.inmemory: true org.aksw.word2vecrestful.Application.subsetfiledir: data/subset-files-1/ org.aksw.word2vecrestful.word2vec.normalizedbinmodel.bin: true org.aksw.word2vecrestful.word2vec.normalizedbinmodel.model: data/normalbinmodel/GoogleNews-vectors-negative300-normalized.bin -org.aksw.word2vecrestful.word2vec.stats.sdfile: data/normal/stat/normal-model-sd.csv \ No newline at end of file +org.aksw.word2vecrestful.word2vec.stats.sdfile: data/normal/stat/normal-model-sd.csv +org.aksw.word2vecrestful.word2vec.W2VNrmlMemModelKMeans.filepath: data/kmeans/comparison-vecs.csv \ No newline at end of file diff --git a/src/main/java/org/aksw/word2vecrestful/word2vec/W2VNrmlMemModelBinSrch.java b/src/main/java/org/aksw/word2vecrestful/word2vec/W2VNrmlMemModelBinSrch.java index 4634997..f50c8a8 100644 --- a/src/main/java/org/aksw/word2vecrestful/word2vec/W2VNrmlMemModelBinSrch.java +++ b/src/main/java/org/aksw/word2vecrestful/word2vec/W2VNrmlMemModelBinSrch.java @@ -38,6 +38,9 @@ public class W2VNrmlMemModelBinSrch implements GenWord2VecModel { protected int bucketCount = 10; protected BitSet[][] csBucketContainer; + protected W2VNrmlMemModelBinSrch() { + } + public W2VNrmlMemModelBinSrch(final Map word2vec, final int vectorSize) throws IOException { this.word2vec = word2vec; this.vectorSize = vectorSize; diff --git a/src/main/java/org/aksw/word2vecrestful/word2vec/W2VNrmlMemModelKMeans.java b/src/main/java/org/aksw/word2vecrestful/word2vec/W2VNrmlMemModelKMeans.java index d3dc9f8..d8ec809 100644 --- a/src/main/java/org/aksw/word2vecrestful/word2vec/W2VNrmlMemModelKMeans.java +++ b/src/main/java/org/aksw/word2vecrestful/word2vec/W2VNrmlMemModelKMeans.java @@ -9,6 +9,7 @@ import java.util.List; import java.util.Map; +import org.aksw.word2vecrestful.utils.Cfg; import org.aksw.word2vecrestful.utils.ClusterableVec; import org.aksw.word2vecrestful.utils.Word2VecMath; import org.apache.commons.math3.ml.clustering.CentroidCluster; @@ -38,8 +39,8 @@ public class W2VNrmlMemModelKMeans extends W2VNrmlMemModelBinSrch { public static Logger LOG = LogManager.getLogger(GenWord2VecModel.class); - protected int kMeansMaxItr = 5; - protected String vecFilePath = "data/kmeans/comparison-vecs.csv"; + private static final int KMEANS_MAX_ITR = 5; + private static final String VEC_FILEPATH = Cfg.get(W2VNrmlMemModelKMeans.class.getName().concat(".filepath")); public W2VNrmlMemModelKMeans(final Map word2vec, final int vectorSize) throws IOException { super(word2vec, vectorSize); @@ -54,7 +55,7 @@ protected void process() throws IOException { } private void fetchComparisonVectors() throws IOException { - File vecFile = new File(vecFilePath); + File vecFile = new File(VEC_FILEPATH); if (vecFile.exists()) { LOG.info("Reading Comparsion vectors from the file."); // read the persisted vectors @@ -72,7 +73,7 @@ private void fetchComparisonVectors() throws IOException { private void generateComparisonVectors() { KMeansPlusPlusClusterer clusterer = new KMeansPlusPlusClusterer<>(compareVecCount, - kMeansMaxItr); + KMEANS_MAX_ITR); List vecList = new ArrayList<>(); for (float[] vec : word2vec.values()) { vecList.add(getClusterablePoint(vec));