Skip to content

Instantly share code, notes, and snippets.

@perchouli
Last active December 14, 2015 09:08
Show Gist options
  • Save perchouli/5062386 to your computer and use it in GitHub Desktop.
Save perchouli/5062386 to your computer and use it in GitHub Desktop.
Java 调用 ICTCLAS分词(自定义词库)
package ICTCLAS.I3S.test;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import ICTCLAS.I3S.AC.ICTCLAS50;
public class Test_ImportUsrDict {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
ICTCLAS50 ictclas = new ICTCLAS50();
String useage = "Useage : java Test_ICTCLAS_ParagraphProcess sPath [nPOSmap]";
if (args.length < 1) {
System.err.println(useage);
return;
}
try {
if (!ictclas.ICTCLAS_Init(args[0].getBytes("GB2312"))) {
System.err.println("Initial failed!");
return;
}
System.out.println("Initial successed!");
String input = "中国科学院计算技术研究所在多年研究工作积累的基础上,研制出了汉语词法分析系统ICTCLAS。";
/* 设置词性标注集(0 计算所二级标注集,1 计算所一级标注集,2 北大二级标注集,3 北大一级标注集) */
int nPosmap = args.length == 2 ? Integer.valueOf(args[1]) : 1;
ictclas.ICTCLAS_SetPOSmap(nPosmap);
/* 导入用户词典前 */
byte nativeBytes[] = ictclas.ICTCLAS_ParagraphProcess(input
.getBytes("GB2312"), 0, 1);
String nativeStr = new String(nativeBytes, 0, nativeBytes.length,
"GB2312");
System.out.println("未导入用户词典的分词结果:" + nativeStr);
int nCount = 0;
BufferedReader reader = new BufferedReader(new InputStreamReader(
System.in, "GB2312"));
System.out.print("input the src file:");
String usrdir = reader.readLine();
// 第一个参数为用户字典路径,第二个参数为用户字典的编码类型(0:type
// unknown;1:ASCII码;2:GB2312,GBK,GB10380;3:UTF-8;4:BIG5)
nCount = ictclas.ICTCLAS_ImportUserDictFile(usrdir
.getBytes("GB2312"), 2);
System.out.println("导入用户词个数" + nCount);
// 导入用户字典后再分词
byte nativeBytes1[] = ictclas.ICTCLAS_ParagraphProcess(input
.getBytes("GB2312"), 0, 1);
String nativeStr1 = new String(nativeBytes1, 0,
nativeBytes1.length, "GB2312");
System.out.println("导入用户词典:" + nativeStr1);
ictclas.ICTCLAS_SaveTheUsrDic(); /* 保存用户词典 */
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
// 释放分词组件资源
ictclas.ICTCLAS_Exit();
}
}
}
@yaoziyu
Copy link

yaoziyu commented May 12, 2014

hello,i am using the ICTCLAS tool now,but what i get after loading my dict is the same as what i get before loading it?Please help me!Thx!

@raclen
Copy link

raclen commented Oct 29, 2015

好像缺少这个ICTCLAS50

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment