Lucene之BooleanClause Occur MUST和SHOULD组合测试


测试版本Lucene 2.9.4

====================================

lucene中BooleanQuery 实现与或的复合搜索 .

BooleanClause用于表示布尔查询子句关系的类,包 括:BooleanClause.Occur.MUST,BooleanClause.Occur.MUST_NOT,BooleanClause.Occur.SHOULD。 必须包含,不能包含,可以包含三种.有以下6种组合: 
 
1.MUST和MUST:取得连个查询子句的交集。 
2.MUST和MUST_NOT:表示查询结果中不能包含MUST_NOT所对应得查询子句的检索结果。 
3.SHOULD与MUST_NOT:连用时,功能同MUST和MUST_NOT。
4.SHOULD与MUST连用时,结果为MUST子句的检索结果,但是SHOULD可影响排序。
5.SHOULD与SHOULD:表示“或”关系,最终检索结果为所有检索子句的并集。
6.MUST_NOT和MUST_NOT:无意义,检索无结果。

测试程序:

package org.apache.lucene.search;

import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;


public class TestBooleanClauseOccur extends LuceneTestCase {

  private IndexSearcher searcher;
  private IndexReader reader;
  private Directory directory;
  private Analyzer analyzer;
  
  private static final String DOC_TEXT_LINES[] = {
    "Well, this is just some plain text we use for creating the ", // 0
    "test documents. It used to be a text from an online collection ", // 1
    "devoted to first aid, but if there was there an (online) lawyers ", // 2
    "first aid collection with legal advices, \"it\" might have quite ", // 3
    "probably advised one not to include \"it\"'s text or the text of ", // 4
    "any other online collection in one's code, unless one has money ",// 5
    "that one don't need and one is happy to donate for lawyers ", // 6
    "charity. Anyhow at some point, rechecking the usage of this text, ", // 7
    "it became uncertain that this text is free to use, because ", //8
    "the web site in the disclaimer of he eBook containing that text ", //9
    "was not responding anymore, and at the same time, in projGut, ", //10
    "searching for first aid no longer found that eBook as well. ", // 11
    "So here we are, with a perhaps much less interesting ", // 12
    "text for the test, but oh much much safer. ", //13
  };
  
  @Override
  protected void setUp() throws Exception {
    // TODO Auto-generated method stub
    super.setUp();
    directory = new RAMDirectory();
    analyzer = new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        // TODO Auto-generated method stub
        return new WhitespaceTokenizer(reader);
      }

      @Override
      public int getPositionIncrementGap(String fieldName) {
        // TODO Auto-generated method stub
        return 100;
      }
    };
    
    IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = null;
    
    for (int i = 0; i < DOC_TEXT_LINES.length; ++i) {
      doc = new Document();
      doc.add(new Field("title", DOC_TEXT_LINES[i], Field.Store.YES, Field.Index.ANALYZED));
      writer.addDocument(doc);
    }
    
    reader = writer.getReader();
    writer.close();

    searcher = new IndexSearcher(directory);
  }

  @Override
  protected void tearDown() throws Exception {
    // TODO Auto-generated method stub
    searcher.close();
    reader.close();
    directory.close();
    analyzer.close();
    super.tearDown();
  }
  
  public void testOccurCombination1() throws Exception {
    BooleanQuery query = new BooleanQuery();
    
    TermQuery termQuery = new TermQuery(new Term("title", "text"));  
    termQuery.setBoost(3.0f);
    query.add(termQuery, BooleanClause.Occur.MUST);
    
    termQuery = new TermQuery(new Term("title", "test"));
    //query.add(termQuery, BooleanClause.Occur.MUST);
    
    termQuery = new TermQuery(new Term("title", "some"));
    termQuery.setBoost(1.2f);
    query.add(termQuery, BooleanClause.Occur.SHOULD);
    
    termQuery = new TermQuery(new Term("title", "the"));
    termQuery.setBoost(1.3f);
    query.add(termQuery, BooleanClause.Occur.SHOULD);
    
    termQuery = new TermQuery(new Term("title", "that"));
    query.add(termQuery, BooleanClause.Occur.SHOULD);
    
    ScoreDoc[] docs = searcher.search(query, null, 10).scoreDocs;
    
    System.out.println("=== query {text(must) some(should) the(should) that(should) } ===");
    for (ScoreDoc doc : docs) {
      //Explanation explanation = searcher.explain(query, scoreDoc.doc);
      System.out.println("docID : " + doc.doc + ", score: " + doc.score + ", content :" + reader.document(doc.doc).get("title").toString());
    }
  }
  
  public void testOccurCombination2() throws Exception {
    
    BooleanQuery query1 = new BooleanQuery();
    TermQuery termQuery = new TermQuery(new Term("title", "text"));  
    termQuery.setBoost(3.0f);
    query1.add(termQuery, BooleanClause.Occur.MUST);
    
    //termQuery = new TermQuery(new Term("title", "test"));
    //query1.add(termQuery, BooleanClause.Occur.MUST);
    
    BooleanQuery query2 = new BooleanQuery();
    
    termQuery = new TermQuery(new Term("title", "some"));
    termQuery.setBoost(1.2f);
    query2.add(termQuery, BooleanClause.Occur.SHOULD);
    
    termQuery = new TermQuery(new Term("title", "the"));
    termQuery.setBoost(1.3f);
    query2.add(termQuery, BooleanClause.Occur.SHOULD);
    
    termQuery = new TermQuery(new Term("title", "that"));
    query2.add(termQuery, BooleanClause.Occur.SHOULD);
    
    //query.add(query1, BooleanClause.Occur.MUST);
    query1.add(query2, BooleanClause.Occur.MUST);
    
    TopDocs docs = searcher.search(query1, null, 10);
    System.out.println("=== query { text(must) { some(should) the(should) that(should) }(must) } ===");
    
    for (ScoreDoc doc : docs.scoreDocs) {
      //Explanation explanation = searcher.explain(query, scoreDoc.doc);
      System.out.println("docID: " + doc.doc + ",  score: " + doc.score + ", content :" + reader.document(doc.doc).get("title").toString());
    }
  }
  
public void testOccurCombination3() throws Exception {
    
    BooleanQuery query = new BooleanQuery();
    TermQuery termQuery = new TermQuery(new Term("title", "text"));  
    termQuery.setBoost(3.0f);
    query.add(termQuery, BooleanClause.Occur.SHOULD);
    
    termQuery = new TermQuery(new Term("title", "some"));
    termQuery.setBoost(1.2f);
    query.add(termQuery, BooleanClause.Occur.SHOULD);
    
    termQuery = new TermQuery(new Term("title", "the"));
    termQuery.setBoost(1.3f);
    query.add(termQuery, BooleanClause.Occur.SHOULD);
    
    termQuery = new TermQuery(new Term("title", "that"));
    query.add(termQuery, BooleanClause.Occur.SHOULD);
    
    TopDocs docs = searcher.search(query, null, 10);
    System.out.println("=== query { text(should) some(should) the(should) that(should) } ===");
    
    for (ScoreDoc doc : docs.scoreDocs) {
      //Explanation explanation = searcher.explain(query, scoreDoc.doc);
      System.out.println("docID: " + doc.doc + ",  score: " + doc.score + ", content :" + reader.document(doc.doc).get("title").toString());
    }
  }
}


结果输出
=== query {text(must) some(should) the(should) that(should) } ===
docID : 0, score: 0.5669826, content :Well, this is just some plain text we use for creating the 
docID : 9, score: 0.50818175, content :the web site in the disclaimer of he eBook containing that text 
docID : 4, score: 0.29922757, content :probably advised one not to include "it"'s text or the text of 
docID : 13, score: 0.29017726, content :text for the test, but oh much much safer. 
docID : 8, score: 0.23953491, content :it became uncertain that this text is free to use, because 
docID : 1, score: 0.0809797, content :test documents. It used to be a text from an online collection 
=== query { text(must) { some(should) the(should) that(should) }(must) } ===
docID: 0,  score: 0.6119575, content :Well, this is just some plain text we use for creating the 
docID: 9,  score: 0.55969, content :the web site in the disclaimer of he eBook containing that text 
docID: 4,  score: 0.5048786, content :probably advised one not to include "it"'s text or the text of 
docID: 13,  score: 0.46338382, content :text for the test, but oh much much safer. 
docID: 8,  score: 0.3756358, content :it became uncertain that this text is free to use, because 
=== query { text(should) some(should) the(should) that(should) } ===
docID: 0,  score: 0.5669826, content :Well, this is just some plain text we use for creating the 
docID: 9,  score: 0.50818175, content :the web site in the disclaimer of he eBook containing that text 
docID: 4,  score: 0.29922757, content :probably advised one not to include "it"'s text or the text of 
docID: 13,  score: 0.29017726, content :text for the test, but oh much much safer. 
docID: 8,  score: 0.23953491, content :it became uncertain that this text is free to use, because 
docID: 7,  score: 0.21602902, content :charity. Anyhow at some point, rechecking the usage of this text, 
docID: 1,  score: 0.0809797, content :test documents. It used to be a text from an online collection 
docID: 6,  score: 0.03878776, content :that one don't need and one is happy to donate for lawyers 
docID: 11,  score: 0.03878776, content :searching for first aid no longer found that eBook as well. 
docID: 10,  score: 0.035091203, content :was not responding anymore, and at the same time, in projGut, 


你可能感兴趣的:(Lucene,BooleanClause)