Last active
December 14, 2015 09:08
-
-
Save perchouli/5062386 to your computer and use it in GitHub Desktop.
Java 调用 ICTCLAS分词(自定义词库)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package ICTCLAS.I3S.test; | |
import java.io.BufferedReader; | |
import java.io.IOException; | |
import java.io.InputStreamReader; | |
import java.io.UnsupportedEncodingException; | |
import ICTCLAS.I3S.AC.ICTCLAS50; | |
public class Test_ImportUsrDict { | |
/** | |
* @param args | |
*/ | |
public static void main(String[] args) { | |
// TODO Auto-generated method stub | |
ICTCLAS50 ictclas = new ICTCLAS50(); | |
String useage = "Useage : java Test_ICTCLAS_ParagraphProcess sPath [nPOSmap]"; | |
if (args.length < 1) { | |
System.err.println(useage); | |
return; | |
} | |
try { | |
if (!ictclas.ICTCLAS_Init(args[0].getBytes("GB2312"))) { | |
System.err.println("Initial failed!"); | |
return; | |
} | |
System.out.println("Initial successed!"); | |
String input = "中国科学院计算技术研究所在多年研究工作积累的基础上,研制出了汉语词法分析系统ICTCLAS。"; | |
/* 设置词性标注集(0 计算所二级标注集,1 计算所一级标注集,2 北大二级标注集,3 北大一级标注集) */ | |
int nPosmap = args.length == 2 ? Integer.valueOf(args[1]) : 1; | |
ictclas.ICTCLAS_SetPOSmap(nPosmap); | |
/* 导入用户词典前 */ | |
byte nativeBytes[] = ictclas.ICTCLAS_ParagraphProcess(input | |
.getBytes("GB2312"), 0, 1); | |
String nativeStr = new String(nativeBytes, 0, nativeBytes.length, | |
"GB2312"); | |
System.out.println("未导入用户词典的分词结果:" + nativeStr); | |
int nCount = 0; | |
BufferedReader reader = new BufferedReader(new InputStreamReader( | |
System.in, "GB2312")); | |
System.out.print("input the src file:"); | |
String usrdir = reader.readLine(); | |
// 第一个参数为用户字典路径,第二个参数为用户字典的编码类型(0:type | |
// unknown;1:ASCII码;2:GB2312,GBK,GB10380;3:UTF-8;4:BIG5) | |
nCount = ictclas.ICTCLAS_ImportUserDictFile(usrdir | |
.getBytes("GB2312"), 2); | |
System.out.println("导入用户词个数" + nCount); | |
// 导入用户字典后再分词 | |
byte nativeBytes1[] = ictclas.ICTCLAS_ParagraphProcess(input | |
.getBytes("GB2312"), 0, 1); | |
String nativeStr1 = new String(nativeBytes1, 0, | |
nativeBytes1.length, "GB2312"); | |
System.out.println("导入用户词典:" + nativeStr1); | |
ictclas.ICTCLAS_SaveTheUsrDic(); /* 保存用户词典 */ | |
} catch (UnsupportedEncodingException e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} catch (IOException e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} finally { | |
// 释放分词组件资源 | |
ictclas.ICTCLAS_Exit(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hello,i am using the ICTCLAS tool now,but what i get after loading my dict is the same as what i get before loading it?Please help me!Thx!