@Test
public void testWord2VectorTrain() {
String trainFile = "D:\\Develop\\hanlp\\data\\hanlp-wiki-vec-zh.txt";
String modelFile = "D:\\Develop\\hanlp\\data\\model\\word2vec.txt";
Word2VecTrainer trainerBuilder = new Word2VecTrainer();
trainerBuilder.setCallback(new TrainingCallback() {
@Override
public void corpusLoading(float v) {
System.out.println("语料加载中 = " + v + "%");
}
@Override
public void corpusLoaded(int i, int i1, int i2) {
System.out.println("语料加载完毕 = 词表大小:" + i + "\t实际训练用到的词的总词频:" + i1 + "\t全部词语的总词频:"+i2);
}
@Override
public void training(float v, float v1) {
System.out.println("学习率:" + v + "\t练完成百分比:"+v1);
}
});
WordVectorModel wordVectorModel = trainerBuilder.train(trainFile, modelFile);
System.out.println("wordVectorModel = " + wordVectorModel.similarity("医疗", "医院"));
}
版本:portable-1.7.8
jvm:-Xmx5000m