fastText是facebook开源的一个词向量与文本分类工具
https://github.com/facebookresearch/fastText

提供了python和c的接口。
通过Java使用fastText,搜到了这个项目 https://github.com/vinhkhuc/JFastText ,使用起来很方便,性能也不错;但是有一个问题,就是如果模型多次更新,就会产生僵尸进程导致程序挂掉,并引起机器的负载升高。

所以决定使用JNI的方式调用fastText

步骤:
1、编写Java程序

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
public class FastTextJNI {

public static FastTextJNI ftJNI;

private static String LIB_PATH = ConstantsConfig.CONFIG_BASE_PATH + "libfasttext.so";

public native long FastTextJNILoadModel(String fileName);

public native float[] FastTextSentenceVector(long modelHandle, String words);

public native void FastTextJNIReleaseModel(long modelHandle);

public static void init(){
System.load(LIB_PATH);
ftJNI = new FastTextJNI();
}

public FastTextJNI(){
}

public FastTextJNI(String fileName){
FastTextJNILoadModel(fileName);
}

public float[] getSentenceVector(long modelHandle, String words){
return FastTextSentenceVector(modelHandle, words);
}

public void finalize(Long modelHandle){
FastTextJNIReleaseModel(modelHandle);
modelHandle = (long) 0;
}

}

2、编译Java程序 javac FastTextJNI.java

3、继续编译生成c的头文件(给c/c++程序用)
javah -jni -classpath . FastTextJNI,生成了头文件FastTextJNI.h

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class FastTextJNI */

#ifndef FastTextJNI
#define FastTextJNI
#ifdef __cplusplus
extern "C" {
#endif

JNIEXPORT jlong JNICALL Java_FastTextJNI_FastTextJNILoadModel
(JNIEnv *, jobject, jstring);

JNIEXPORT jfloatArray JNICALL Java_FastTextJNI_FastTextSentenceVector
(JNIEnv *, jobject, jlong, jstring);

JNIEXPORT void JNICALL Java_FastTextJNI_FastTextJNIReleaseModel
(JNIEnv *, jobject, jlong);

#ifdef __cplusplus
}
#endif
#endif

4、新建cpp文件,实现jni编译生成的头文件中的函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#include <iostream>
#include <sstream>

#include "FastTextJNI.h"
#include "fastText/src/fasttext.h"

using namespace fasttext;

JNIEXPORT jlong JNICALL Java_FastTextJNI_FastTextJNILoadModel
(JNIEnv *jenv, jobject jobj, jstring jmodelName)
{
const char* modelName = jenv->GetStringUTFChars(jmodelName, 0);
FastText *fastText = new FastText();
fastText->loadModel(modelName);
if(modelName)
{
jenv->ReleaseStringUTFChars(jmodelName, modelName);
}
return (jlong)fastText;
}

JNIEXPORT jfloatArray JNICALL Java_FastTextJNI_FastTextSentenceVector
(JNIEnv *jenv, jobject jobj, jlong jmodelHandle, jstring jwords)
{
FastText* fastText = (FastText*)(jmodelHandle);

const char* words = jenv->GetStringUTFChars(jwords, 0);
char* wordschar = const_cast<char*>(words);

std::istringstream in(wordschar);
fasttext::Vector vec(20);

try{
fastText->getSentenceVector(in, vec);
} catch(std::exception& e) {
std::cout << e.what() << std::endl;
}
float* data = vec.data_;

jfloatArray result;
int len = 20;
result = jenv->NewFloatArray(len);
jenv->SetFloatArrayRegion(result, 0, len, data);

return result;
}

JNIEXPORT void JNICALL Java_FastTextJNI_FastTextJNIReleaseModel
(JNIEnv *jenv, jobject jobj, jlong jmodelHandle)
{
if(jmodelHandle)
{
delete (FastText*)(jmodelHandle);
}
}

5、最后编译so库,生成 libfasttext.so 文件使用

1
g++ -fpic -shared a.cpp -o libfasttext.so -I./ -I/opt/soft/jdk/jdk1.6.0_45/include -I/opt/soft/jdk/jdk1.6.0_45/include/linux

生成so库之后,执行ldd libfasttext.so,如果有关联其他库说明生成库没有问题