分词器的测试

对一些分词器进行了简单的测试

package analyzer; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cjk.CJKTokenizer; import org.apache.lucene.analysis.cn.ChineseTokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.*; import org.wltea.analyzer.lucene.IKAnalyzer; import jeasy.analysis.*; public class Analyzertest { public static String s = "从分词准确性准确和效率两方面两方两方面方进行比较"; public static void StandardAnalyzer(String s) throws IOException { Analyzer analyzer = new StandardAnalyzer(); Reader r = new StringReader(s); StopFilter sf = (StopFilter) analyzer.tokenStream("", r); System.out.print("StandardAnalyzer:"); Token t; while ((t = sf.next()) != null) { System.out.print(t.termText() + "|"); } System.out.println(); } public static void CJKAnalyzer(String s) throws IOException { StringReader sr = new StringReader(s); CJKTokenizer CJK = new CJKTokenizer(sr); Token t = null; System.out.print("CJKAnalyzer:"); while ((t = CJK.next()) != null) { System.out.print(t.termText() + "|"); } System.out.println(); } public static void CNAnalyzer(String s) throws IOException { StringReader sr = new StringReader(s); ChineseTokenizer cn = new ChineseTokenizer(sr); Token t = null; System.out.print("CNAnalyzer:"); while ((t = cn.next()) != null) { System.out.print(t.termText() + "|"); } System.out.println(); } public static void JEAnalyzer(String s) throws IOException { MMAnalyzer mm = new MMAnalyzer(); System.out.print("JEAnalyzer:"); System.out.print(mm.segment(s, "|")); System.out.println(""); int i = MMAnalyzer.size(); System.out.println(i); } public static void IKAnalyzer(String s) throws IOException { StringReader sr = new StringReader(s); IKAnalyzer IK = new IKAnalyzer(); TokenStream ts = IK.tokenStream("*", sr); Token t = null; System.out.print("IKAnalyzer:"); while ((t = ts.next()) != null) { System.out.print(t.termText() + "|"); } System.out.println(); } public static void main(String[] args) throws IOException { StandardAnalyzer(s); CJKAnalyzer(s); CNAnalyzer(s); JEAnalyzer(s); IKAnalyzer(s); } }

 

测试结果:

StandardAnalyzer:从|分|词|准|确|性|和|效|率|两|方|面|进|行|比|较|
CJKAnalyzer:从分|分词|词准|准确|确性|性和|和效|效率|率两|两方|方面|面进|进行|行比|比较|
CNAnalyzer:从|分|词|准|确|性|和|效|率|两|方|面|进|行|比|较|
JEAnalyzer:从|分词|准确性|效率|两方面|进行|比较|
IKAnalyzer:从|分词|准确性|准确|和|效率|两方面|两方|两|方面|方|进行|比较|

你可能感兴趣的:(String,测试,null,Class,token,import)