最近在学习lucene,参考网上的资料写了一个简单搜索demo;
项目jar包:
//索引关键类
<pre name="code" class="java">package com.lucene.index; import java.io.File; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; import com.lucene.vo.User; /** * * lucene 检索内存索引 非常简单的例子 * * @author Administrator * */ public class searchIndex { private String[] ids = { "1", "2", "3", "4", "5", "6" }; private String[] emails = { "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]" }; // private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football", // "I like football and I like basketball too", "I like movie and swim" }; private String[] contents = { "创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中", "创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中", "分词器,分词器就是将检索的关键字分割成一组组词组, 它是lucene检索查询的一大特色之一", "这个是分词器拆分最大长度,因为各种不同类型的分词器拆分的字符颗粒细化程度不一样,所以需要设置一个最长的拆分长度", "文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询", "I like movie and swim" }; private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike", "jake" }; // 创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中。 private Directory directory = null; //IK分词器 IKAnalyzer analyzer = null; public searchIndex() { try { directory = FSDirectory.open(new File("H:/lucene/index")); analyzer = new IKAnalyzer(true); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public void index() { /* * 创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中。 */ IndexWriter writer; try { writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, analyzer)); //创建之前先删除 writer.deleteAll(); // 创建Document // 文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询 Document doc =null; for(int i=0;i<ids.length;i++){ doc = new Document(); doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); writer.addDocument(doc); } writer.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public List<User> search(String keyword) { long startTime = System.currentTimeMillis(); System.out.println("*****************检索开始**********************"); List<User> userList = new ArrayList<User>(); IndexReader reader; try { reader = IndexReader.open(directory); // 创建IndexSearcher 检索索引的对象,里面要传递上面写入的内存目录对象directory IndexSearcher searcher = new IndexSearcher(reader); // 根据搜索关键字 封装一个term组合对象,然后封装成Query查询对象 QueryParser queryParser = new QueryParser(Version.LUCENE_36, "content", analyzer); Query query = queryParser.parse(keyword); // 去索引目录中查询,返回的是TopDocs对象,里面存放的就是上面放的document文档对象 TopDocs rs = searcher.search(query, null, 10); long endTime = System.currentTimeMillis(); System.out.println("总共花费" + (endTime - startTime) + "毫秒,检索到" + rs.totalHits + "条记录。"); User user = null; for (int i = 0; i < rs.scoreDocs.length; i++) { // rs.scoreDocs[i].doc 是获取索引中的标志位id, 从0开始记录 Document firstHit = searcher.doc(rs.scoreDocs[i].doc); user = new User(); user.setId(Long.parseLong(firstHit.get("id"))); user.setName(firstHit.get("name")); user.setSex(firstHit.get("sex")); user.setDosomething(firstHit.get("dosometing")); user.setEmail(firstHit.get("email")); user.setContent(firstHit.get("content")); userList.add(user); // System.out.println("name:" + firstHit.get("name")); // System.out.println("sex:" + firstHit.get("sex")); // System.out.println("dosomething:" + firstHit.get("dosometing")); } reader.close(); } catch (CorruptIndexException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("*****************检索结束**********************"); return userList; } }
package com.lucene; import java.io.IOException; import java.util.List; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import com.lucene.index.searchIndex; import com.lucene.vo.User; /** * Servlet implementation class searchServlet */ public class searchServlet extends HttpServlet { private static final long serialVersionUID = 1L; /** * Default constructor. */ public searchServlet() { // TODO Auto-generated constructor stub } /** * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response) */ protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub } /** * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response) */ protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { request.setCharacterEncoding("UTF-8"); String keyword = request.getParameter("keyword"); if("".equals(keyword)){ keyword="0"; } searchIndex si = new searchIndex(); si.index(); List<User> userList = si.search(keyword); request.setAttribute("userList", userList); request.getRequestDispatcher("search.jsp").forward(request, response); } }
package com.lucene.vo; public class User { private Long id; private String name; private String sex; private String dosomething; private String email; private String content; public Long getId() { return id; } public void setId(Long id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getSex() { return sex; } public void setSex(String sex) { this.sex = sex; } public String getDosomething() { return dosomething; } public void setDosomething(String dosomething) { this.dosomething = dosomething; } public String getEmail() { return email; } public void setEmail(String email) { this.email = email; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } }
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> <title>lucene 全文检索</title> </head> <body style="text-align: center;"> <form action="searchServlet.do" method="post"> <input type="text" name="keyword" /> <input type="submit" value="搜索" /> </form> <div style="height: 10px"> </div> <c:if test="${not empty userList}"> <div>相关信息:</div> <table border="1" align="center"> <tr> <td>ID</td> <td>姓名</td> <td>性别</td> <td>邮箱</td> <td>爱好</td> <td>正文</td> </tr> <c:forEach items="${ userList}" var="user"> <tr> <td>${user.id }</td> <td>${user.name }</td> <td>${user.sex }</td> <td>${user.email }</td> <td>${user.dosomething }</td> <td>${user.content }</td> </tr> </c:forEach> </table> </c:if> </body> </html>