solr拼音检索

 拼音检索的大致思路是这样的:

            ①将需要使用拼音检索的字段汇集到一个拼音分词字段里(我的拼音分词字段使用pinyin4j+NGram做的);

                加入两个jar包:pinyin4j-2.5.0.jar、lucene-analyzers-smartcn-4.9.1.jar,pinyinAnalyzer.jar;将jar包拷入Java\apache-tomcat-7.0.55\webapps\solr\WEB-INF\lib下

                schema.xml文件设置:注意将拼音的stored设置为true

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> 
<field name="title" type="text_ik" indexed="true" stored="true" />
<field name="author" type="text_general" indexed="true" stored="true"  multiValued="true"/>
<field name="keywords" type="text_general" indexed="true" stored="true"/>
<field name="content" type="text_ik" indexed="true" stored="true"/>






<field name="tc" type="text_ik" indexed="true" stored="true" multiValued="true"/>
<copyField source="title" dest="tc"/>
<copyField source="content" dest="tc"/>




<field name="bookid" type="text_general" indexed="true" stored="true"/>
<field name="bookname" type="text_general" indexed="true" stored="true"/>
<field name="url" type="text_general" indexed="true" stored="true"/>
<field name="resourcetype" type="int" indexed="true" stored="true" />
<field name="classifycode" type="text_general" indexed="true" stored="true"/>
<field name="price"  type="float" indexed="true" stored="true"/>
<field name="updateTime" type="date" indexed="true" stored="true" />






<field name="pinyin" type ="text_pinyin" indexed ="true" stored ="true" multiValued ="true"/>
<copyField source="title" dest="pinyin"/>
<!-- by michael: pinyin  -->
   < fieldType  name = "text_pinyin"  class = "solr.TextField"  positionIncrementGap = "0"
      < analyzer  type = "index"
      < tokenizer  class = "org.apache.lucene.analysis.cn.smart.SmartChineseSentenceTokenizerFactory" /> 
      < filter  class = "org.apache.lucene.analysis.cn.smart.SmartChineseWordTokenFilterFactory" /> 
      < filter  class = "com.shentong.search.analyzers.PinyinTransformTokenFilterFactory"  minTermLenght = "2"  /> 
      < filter  class = "com.shentong.search.analyzers.PinyinNGramTokenFilterFactory"  minGram = "6"  maxGram = "20"  /> 
      </ analyzer
      < analyzer  type = "query"
      < tokenizer  class = "org.apache.lucene.analysis.cn.smart.SmartChineseSentenceTokenizerFactory" /> 
      < filter  class = "org.apache.lucene.analysis.cn.smart.SmartChineseWordTokenFilterFactory" /> 
      < filter  class = "com.shentong.search.analyzers.PinyinTransformTokenFilterFactory"  minTermLenght = "2"  /> 
      < filter  class = "com.shentong.search.analyzers.PinyinNGramTokenFilterFactory"  minGram = "6"  maxGram = "20"  /> 
      </ analyzer
   </ fieldType >

你可能感兴趣的:(solr拼音检索)