Lucene创建索引入门案例

最近在学习lucene,参考网上的资料写了一个简单搜索demo;

项目jar包:


//索引关键类

<pre name="code" class="java">package com.lucene.index;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.lucene.vo.User;

/**
 *  * lucene 检索内存索引 非常简单的例子  *  * @author Administrator  *  
 */
public class searchIndex {
	private String[] ids = { "1", "2", "3", "4", "5", "6" };
	private String[] emails = { "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]" };
//	private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football",
//			"I like football and I like basketball too", "I like movie and swim" };
	private String[] contents = { "创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中", "创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中", "分词器,分词器就是将检索的关键字分割成一组组词组, 它是lucene检索查询的一大特色之一", "这个是分词器拆分最大长度,因为各种不同类型的分词器拆分的字符颗粒细化程度不一样,所以需要设置一个最长的拆分长度",
			"文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询", "I like movie and swim" };
	private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike", "jake" };
	// 创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中。
	private Directory directory = null;
	//IK分词器
	IKAnalyzer analyzer = null;
	public searchIndex() {
		try {
			directory = FSDirectory.open(new File("H:/lucene/index"));
			analyzer = new IKAnalyzer(true);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	public void index() {
		/*
		 * 创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中。
		 */
		IndexWriter writer;
		try {
			writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, analyzer));
			//创建之前先删除
			writer.deleteAll();
			// 创建Document
			// 文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询
	
			Document doc =null;
			
			for(int i=0;i<ids.length;i++){
				doc = new Document();
				doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
				doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
				doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
				doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
				writer.addDocument(doc);
			}
			writer.close(); 
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	public List<User> search(String keyword) {
		long startTime = System.currentTimeMillis();
		System.out.println("*****************检索开始**********************");
		List<User> userList = new ArrayList<User>();
		IndexReader reader;
		try {
			reader = IndexReader.open(directory);
			
			// 创建IndexSearcher 检索索引的对象,里面要传递上面写入的内存目录对象directory
			IndexSearcher searcher = new IndexSearcher(reader);
			// 根据搜索关键字 封装一个term组合对象,然后封装成Query查询对象
		
			QueryParser queryParser = new QueryParser(Version.LUCENE_36, "content", analyzer);
			Query query = queryParser.parse(keyword);
			

			// 去索引目录中查询,返回的是TopDocs对象,里面存放的就是上面放的document文档对象
			TopDocs rs = searcher.search(query, null, 10);
			long endTime = System.currentTimeMillis();
			System.out.println("总共花费" + (endTime - startTime) + "毫秒,检索到" + rs.totalHits + "条记录。");
			User user = null;
			for (int i = 0; i < rs.scoreDocs.length; i++) {
				// rs.scoreDocs[i].doc 是获取索引中的标志位id, 从0开始记录
				Document firstHit = searcher.doc(rs.scoreDocs[i].doc);
				user = new User();
				user.setId(Long.parseLong(firstHit.get("id")));
				user.setName(firstHit.get("name"));
				user.setSex(firstHit.get("sex"));
				user.setDosomething(firstHit.get("dosometing"));
				user.setEmail(firstHit.get("email"));
				user.setContent(firstHit.get("content"));
				userList.add(user);

//				System.out.println("name:" + firstHit.get("name"));
//				System.out.println("sex:" + firstHit.get("sex"));
//				System.out.println("dosomething:" + firstHit.get("dosometing"));
			}
			reader.close();
		} catch (CorruptIndexException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		} catch (IOException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		} catch (ParseException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		System.out.println("*****************检索结束**********************");
		return userList;
	}
	
}


 
 

package com.lucene;

import java.io.IOException;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import com.lucene.index.searchIndex;
import com.lucene.vo.User;

/**
 * Servlet implementation class searchServlet
 */
public class searchServlet extends HttpServlet {
	private static final long serialVersionUID = 1L;

    /**
     * Default constructor. 
     */
    public searchServlet() {
        // TODO Auto-generated constructor stub
    }

	/**
	 * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
	 */
	protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		// TODO Auto-generated method stub
	}

	/**
	 * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
	 */
	protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		request.setCharacterEncoding("UTF-8");
		String keyword = request.getParameter("keyword");
		if("".equals(keyword)){
			keyword="0";
		}
		searchIndex si = new searchIndex();
		si.index();
		List<User> userList = si.search(keyword);
		request.setAttribute("userList", userList);
		request.getRequestDispatcher("search.jsp").forward(request, response);
	}

}

package com.lucene.vo;

public class User {
	private Long id;
	private String name;
	private String sex;
	private String dosomething;
	private String email;
	private String content;
	
	public Long getId() {
		return id;
	}
	public void setId(Long id) {
		this.id = id;
	}
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public String getSex() {
		return sex;
	}
	public void setSex(String sex) {
		this.sex = sex;
	}
	public String getDosomething() {
		return dosomething;
	}
	public void setDosomething(String dosomething) {
		this.dosomething = dosomething;
	}
	public String getEmail() {
		return email;
	}
	public void setEmail(String email) {
		this.email = email;
	}
	public String getContent() {
		return content;
	}
	public void setContent(String content) {
		this.content = content;
	}
	
}


<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<title>lucene 全文检索</title>
</head>
<body style="text-align: center;">
	<form action="searchServlet.do" method="post">
		<input type="text" name="keyword" /> <input type="submit" value="搜索" />
	</form>
	<div style="height: 10px">
	</div>
	<c:if test="${not empty userList}">
		<div>相关信息:</div>
		<table border="1" align="center">
			<tr>
				<td>ID</td>
				<td>姓名</td>
				<td>性别</td>
				<td>邮箱</td>
				<td>爱好</td>
				<td>正文</td>
			</tr>
			<c:forEach items="${ userList}" var="user">
				<tr>
					<td>${user.id }</td>
					<td>${user.name }</td>
					<td>${user.sex }</td>
					<td>${user.email }</td>
					<td>${user.dosomething }</td>
					<td>${user.content }</td>
				</tr>
			</c:forEach>
		</table>
	</c:if>
</body>
</html>

代码测试:

Lucene创建索引入门案例_第1张图片



你可能感兴趣的:(Web,Lucene,全文检索)