测试版本Lucene 2.9.4
====================================
lucene中BooleanQuery 实现与或的复合搜索 .
BooleanClause用于表示布尔查询子句关系的类,包 括:BooleanClause.Occur.MUST,BooleanClause.Occur.MUST_NOT,BooleanClause.Occur.SHOULD。 必须包含,不能包含,可以包含三种.有以下6种组合:
1.MUST和MUST:取得连个查询子句的交集。
2.MUST和MUST_NOT:表示查询结果中不能包含MUST_NOT所对应得查询子句的检索结果。
3.SHOULD与MUST_NOT:连用时,功能同MUST和MUST_NOT。
4.SHOULD与MUST连用时,结果为MUST子句的检索结果,但是SHOULD可影响排序。
5.SHOULD与SHOULD:表示“或”关系,最终检索结果为所有检索子句的并集。
6.MUST_NOT和MUST_NOT:无意义,检索无结果。
测试程序:
package org.apache.lucene.search; import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; public class TestBooleanClauseOccur extends LuceneTestCase { private IndexSearcher searcher; private IndexReader reader; private Directory directory; private Analyzer analyzer; private static final String DOC_TEXT_LINES[] = { "Well, this is just some plain text we use for creating the ", // 0 "test documents. It used to be a text from an online collection ", // 1 "devoted to first aid, but if there was there an (online) lawyers ", // 2 "first aid collection with legal advices, \"it\" might have quite ", // 3 "probably advised one not to include \"it\"'s text or the text of ", // 4 "any other online collection in one's code, unless one has money ",// 5 "that one don't need and one is happy to donate for lawyers ", // 6 "charity. Anyhow at some point, rechecking the usage of this text, ", // 7 "it became uncertain that this text is free to use, because ", //8 "the web site in the disclaimer of he eBook containing that text ", //9 "was not responding anymore, and at the same time, in projGut, ", //10 "searching for first aid no longer found that eBook as well. ", // 11 "So here we are, with a perhaps much less interesting ", // 12 "text for the test, but oh much much safer. ", //13 }; @Override protected void setUp() throws Exception { // TODO Auto-generated method stub super.setUp(); directory = new RAMDirectory(); analyzer = new Analyzer() { @Override public TokenStream tokenStream(String fieldName, Reader reader) { // TODO Auto-generated method stub return new WhitespaceTokenizer(reader); } @Override public int getPositionIncrementGap(String fieldName) { // TODO Auto-generated method stub return 100; } }; IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = null; for (int i = 0; i < DOC_TEXT_LINES.length; ++i) { doc = new Document(); doc.add(new Field("title", DOC_TEXT_LINES[i], Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } reader = writer.getReader(); writer.close(); searcher = new IndexSearcher(directory); } @Override protected void tearDown() throws Exception { // TODO Auto-generated method stub searcher.close(); reader.close(); directory.close(); analyzer.close(); super.tearDown(); } public void testOccurCombination1() throws Exception { BooleanQuery query = new BooleanQuery(); TermQuery termQuery = new TermQuery(new Term("title", "text")); termQuery.setBoost(3.0f); query.add(termQuery, BooleanClause.Occur.MUST); termQuery = new TermQuery(new Term("title", "test")); //query.add(termQuery, BooleanClause.Occur.MUST); termQuery = new TermQuery(new Term("title", "some")); termQuery.setBoost(1.2f); query.add(termQuery, BooleanClause.Occur.SHOULD); termQuery = new TermQuery(new Term("title", "the")); termQuery.setBoost(1.3f); query.add(termQuery, BooleanClause.Occur.SHOULD); termQuery = new TermQuery(new Term("title", "that")); query.add(termQuery, BooleanClause.Occur.SHOULD); ScoreDoc[] docs = searcher.search(query, null, 10).scoreDocs; System.out.println("=== query {text(must) some(should) the(should) that(should) } ==="); for (ScoreDoc doc : docs) { //Explanation explanation = searcher.explain(query, scoreDoc.doc); System.out.println("docID : " + doc.doc + ", score: " + doc.score + ", content :" + reader.document(doc.doc).get("title").toString()); } } public void testOccurCombination2() throws Exception { BooleanQuery query1 = new BooleanQuery(); TermQuery termQuery = new TermQuery(new Term("title", "text")); termQuery.setBoost(3.0f); query1.add(termQuery, BooleanClause.Occur.MUST); //termQuery = new TermQuery(new Term("title", "test")); //query1.add(termQuery, BooleanClause.Occur.MUST); BooleanQuery query2 = new BooleanQuery(); termQuery = new TermQuery(new Term("title", "some")); termQuery.setBoost(1.2f); query2.add(termQuery, BooleanClause.Occur.SHOULD); termQuery = new TermQuery(new Term("title", "the")); termQuery.setBoost(1.3f); query2.add(termQuery, BooleanClause.Occur.SHOULD); termQuery = new TermQuery(new Term("title", "that")); query2.add(termQuery, BooleanClause.Occur.SHOULD); //query.add(query1, BooleanClause.Occur.MUST); query1.add(query2, BooleanClause.Occur.MUST); TopDocs docs = searcher.search(query1, null, 10); System.out.println("=== query { text(must) { some(should) the(should) that(should) }(must) } ==="); for (ScoreDoc doc : docs.scoreDocs) { //Explanation explanation = searcher.explain(query, scoreDoc.doc); System.out.println("docID: " + doc.doc + ", score: " + doc.score + ", content :" + reader.document(doc.doc).get("title").toString()); } } public void testOccurCombination3() throws Exception { BooleanQuery query = new BooleanQuery(); TermQuery termQuery = new TermQuery(new Term("title", "text")); termQuery.setBoost(3.0f); query.add(termQuery, BooleanClause.Occur.SHOULD); termQuery = new TermQuery(new Term("title", "some")); termQuery.setBoost(1.2f); query.add(termQuery, BooleanClause.Occur.SHOULD); termQuery = new TermQuery(new Term("title", "the")); termQuery.setBoost(1.3f); query.add(termQuery, BooleanClause.Occur.SHOULD); termQuery = new TermQuery(new Term("title", "that")); query.add(termQuery, BooleanClause.Occur.SHOULD); TopDocs docs = searcher.search(query, null, 10); System.out.println("=== query { text(should) some(should) the(should) that(should) } ==="); for (ScoreDoc doc : docs.scoreDocs) { //Explanation explanation = searcher.explain(query, scoreDoc.doc); System.out.println("docID: " + doc.doc + ", score: " + doc.score + ", content :" + reader.document(doc.doc).get("title").toString()); } } }
=== query {text(must) some(should) the(should) that(should) } === docID : 0, score: 0.5669826, content :Well, this is just some plain text we use for creating the docID : 9, score: 0.50818175, content :the web site in the disclaimer of he eBook containing that text docID : 4, score: 0.29922757, content :probably advised one not to include "it"'s text or the text of docID : 13, score: 0.29017726, content :text for the test, but oh much much safer. docID : 8, score: 0.23953491, content :it became uncertain that this text is free to use, because docID : 1, score: 0.0809797, content :test documents. It used to be a text from an online collection === query { text(must) { some(should) the(should) that(should) }(must) } === docID: 0, score: 0.6119575, content :Well, this is just some plain text we use for creating the docID: 9, score: 0.55969, content :the web site in the disclaimer of he eBook containing that text docID: 4, score: 0.5048786, content :probably advised one not to include "it"'s text or the text of docID: 13, score: 0.46338382, content :text for the test, but oh much much safer. docID: 8, score: 0.3756358, content :it became uncertain that this text is free to use, because === query { text(should) some(should) the(should) that(should) } === docID: 0, score: 0.5669826, content :Well, this is just some plain text we use for creating the docID: 9, score: 0.50818175, content :the web site in the disclaimer of he eBook containing that text docID: 4, score: 0.29922757, content :probably advised one not to include "it"'s text or the text of docID: 13, score: 0.29017726, content :text for the test, but oh much much safer. docID: 8, score: 0.23953491, content :it became uncertain that this text is free to use, because docID: 7, score: 0.21602902, content :charity. Anyhow at some point, rechecking the usage of this text, docID: 1, score: 0.0809797, content :test documents. It used to be a text from an online collection docID: 6, score: 0.03878776, content :that one don't need and one is happy to donate for lawyers docID: 11, score: 0.03878776, content :searching for first aid no longer found that eBook as well. docID: 10, score: 0.035091203, content :was not responding anymore, and at the same time, in projGut,