交换实体类: public class LuceneWrapper implements Serializable{ private String id; /** * 030 */ private String docType; /** * 创建时间 yyyy-MM-dd */ private String creationDate; /** * 标题 */ private String title; /** * 文号 */ private String wenhao; /** * 链接地址 */ private String url; /** * 正文 */ private Attachment zhengwen; /** * 附件 */ private Set<Attachment> attachments; public LuceneWrapper(){ } private static final long serialVersionUID = 6707156570710732532L; public static final String ID = "id"; public static final String TITLE = "title"; public static final String CREATIONDATE = "creationDate"; public static final String DOCTYPE = "docType"; public static final String ZHENGWEN = "zw_centent"; public static final String URL = "url"; public static final String WENHAO = "wenhao"; public static final String FUJIAN = "fj_content"; }
public interface LuceneManager{ public static String INDEX_DIR ="g:\\ztzx\\"; public void createIndex(LuceneWrapper entity) throws RuntimeException; public void updateIndex(LuceneWrapper entity) throws RuntimeException; public void deleteIndex(String entityId) throws RuntimeException; public Page searchPage(int pageNo, int pageSize,String queryString) throws RuntimeException; }
@Service("luc49Manager") public class Lucene49ManagerImpl implements LuceneManager { private Logger logger = LoggerFactory.getLogger(getClass()); public static Version LUCENE_VERSION = Version.LUCENE_46; static { com.aspose.words.Document.setLicence("sj_laokai"); } @Override public void createIndex(LuceneWrapper entity) throws RuntimeException { IndexWriter indexWriter = null; try { Directory director = FSDirectory.open(new File(OaConstants.INDEX_DIR));// 创建Directory关联源文件 Analyzer analyzer = new PaodingAnalyzer();// 创建 庖丁解牛 分词器 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);// 创建索引的配置信息 indexWriter = new IndexWriter(director, indexWriterConfig); Document doc = new Document();// 创建文档 // 读取正文中内容 String zhengwen = ""; String filePath = AttachmentUtils.getFileBasePath() + entity.getZhengwen().getFilePath(); File file = new File(filePath); String fileExt = ""; if (file.isFile()) { fileExt = FilenameUtils.getExtension(file.getName()).toLowerCase(); if ("doc".equals(fileExt) || "docx".equals(fileExt) || "wps".equals(fileExt)) { com.aspose.words.Document zwdoc = new com.aspose.words.Document(filePath); zhengwen = zwdoc.getText(); } else { logger.error("===正文文件创建索引格式只支持word和wps==={}", filePath); } } else { logger.error("===正文创建索引文件不存在==={}", filePath); } doc.add(new TextField(LuceneWrapper.ZHENGWEN, zhengwen, Store.YES)); // 读取附件中内容 zhengwen = ""; for (Attachment a : entity.getAttachments()) { filePath = AttachmentUtils.getFileBasePath() + a.getFilePath(); file = new File(filePath); if (file.isFile()) { fileExt = FilenameUtils.getExtension(file.getName()).toLowerCase(); if ("doc".equals(fileExt) || "docx".equals(fileExt) || "wps".equals(fileExt)) { com.aspose.words.Document zwdoc = new com.aspose.words.Document(filePath); zhengwen = zwdoc.getText(); } else { logger.error("===附件文件创建索引格式只支持word和wps==={}", filePath); } } else { logger.error("===附件创建索引文件不存在==={}", filePath); } } doc.add(new TextField(LuceneWrapper.FUJIAN, zhengwen, Store.YES)); doc.add(new StringField(LuceneWrapper.ID, entity.getId(), Store.YES)); doc.add(new StringField(LuceneWrapper.DOCTYPE, entity.getDocType(), Store.YES)); doc.add(new StringField(LuceneWrapper.CREATIONDATE, entity.getCreationDate(), Store.YES)); doc.add(new TextField(LuceneWrapper.TITLE, entity.getTitle(), Store.YES)); doc.add(new TextField(LuceneWrapper.WENHAO, entity.getWenhao(), Store.YES)); doc.add(new StringField(LuceneWrapper.URL, entity.getUrl(), Store.YES)); doc.add(new DoubleField("version", 1.0, Store.YES)); // 添加文本到索引中 indexWriter.addDocument(doc); logger.debug("===添加索引成功==={}", entity.getTitle()); } catch (Exception e) { e.printStackTrace(); } finally { try { indexWriter.close();// 关闭索引 } catch (IOException e) { } } } @Override public void updateIndex(LuceneWrapper entity) throws RuntimeException { IndexWriter indexWriter = null; try { Directory director = FSDirectory.open(new File(OaConstants.INDEX_DIR));// 创建Directory关联源文件 Analyzer analyzer = new PaodingAnalyzer();// 创建 庖丁解牛 分词器 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);// 创建索引的配置信息 indexWriter = new IndexWriter(director, indexWriterConfig); Document doc = new Document();// 创建文档 // 读取正文中内容 String zhengwen = ""; String filePath = AttachmentUtils.getFileBasePath() + entity.getZhengwen().getFilePath(); File file = new File(filePath); String fileExt = ""; if (file.isFile()) { fileExt = FilenameUtils.getExtension(file.getName()).toLowerCase(); if ("doc".equals(fileExt) || "docx".equals(fileExt) || "wps".equals(fileExt)) { com.aspose.words.Document zwdoc = new com.aspose.words.Document(filePath); zhengwen = zwdoc.getText(); } else { logger.error("===正文文件创建索引格式只支持word和wps==={}", filePath); } } else { logger.error("===正文创建索引文件不存在==={}", filePath); } doc.add(new TextField(LuceneWrapper.ZHENGWEN, zhengwen, Store.YES)); // 读取附件中内容 zhengwen = ""; for (Attachment a : entity.getAttachments()) { filePath = AttachmentUtils.getFileBasePath() + a.getFilePath(); file = new File(filePath); if (file.isFile()) { fileExt = FilenameUtils.getExtension(file.getName()).toLowerCase(); if ("doc".equals(fileExt) || "docx".equals(fileExt) || "wps".equals(fileExt)) { com.aspose.words.Document zwdoc = new com.aspose.words.Document(filePath); zhengwen = zwdoc.getText(); } else { logger.error("===附件文件创建索引格式只支持word和wps==={}", filePath); } } else { logger.error("===附件创建索引文件不存在==={}", filePath); } } doc.add(new TextField(LuceneWrapper.FUJIAN, zhengwen, Store.YES)); doc.add(new StringField(LuceneWrapper.ID, entity.getId(), Store.YES)); doc.add(new StringField(LuceneWrapper.DOCTYPE, entity.getDocType(), Store.YES)); doc.add(new StringField(LuceneWrapper.CREATIONDATE, entity.getCreationDate(), Store.YES)); doc.add(new TextField(LuceneWrapper.TITLE, entity.getTitle(), Store.YES)); doc.add(new TextField(LuceneWrapper.WENHAO, entity.getWenhao(), Store.YES)); doc.add(new StringField(LuceneWrapper.URL, entity.getUrl(), Store.YES)); doc.add(new DoubleField("version", 1.0, Store.YES)); // 更新索引 indexWriter.updateDocument(new Term(LuceneWrapper.ID, entity.getId()), doc); indexWriter.commit(); logger.debug("===更新索引成功==={}", entity.getTitle()); } catch (Exception e) { e.printStackTrace(); } finally { try { indexWriter.close();// 关闭索引 } catch (IOException e) { } } } @Override public void deleteIndex(String entityId) throws RuntimeException { IndexWriter indexWriter = null; try { Directory director = FSDirectory.open(new File(OaConstants.INDEX_DIR));// 创建Directory关联源文件 Analyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);// 创建一个分词器 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);// 创建索引的配置信息 indexWriter = new IndexWriter(director, indexWriterConfig); indexWriter.deleteDocuments(new Term(LuceneWrapper.ID, entityId)); indexWriter.commit(); logger.debug("===删除索引成功==="); } catch (Exception e) { e.printStackTrace(); } finally { try { indexWriter.close();// 关闭索引 } catch (IOException e) { } } } @Override public Page searchPage(int pageNo, int pageSize, String queryString) throws RuntimeException { try { if (pageNo < 1) { pageNo = 1; } if (pageSize < 1) { pageSize = 1; } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(OaConstants.INDEX_DIR)));// 索引读取类 IndexSearcher search = new IndexSearcher(reader);// 搜索入口工具类 Analyzer analyzer = new PaodingAnalyzer(); // 按 标题 文号 正文内容 附件内容 多条件查询 String[] queryConditions = { queryString, queryString, queryString,queryString }; String[] fields = { LuceneWrapper.TITLE, LuceneWrapper.WENHAO, LuceneWrapper.ZHENGWEN,LuceneWrapper.FUJIAN }; BooleanClause.Occur[] flags = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD }; Query query = MultiFieldQueryParser.parse(LUCENE_VERSION, queryConditions, fields, flags, analyzer); // 按创建时间倒排序 Sort sort = new Sort(new SortField("creationDate", SortField.Type.STRING, false));// false升序true降序 TopDocs topdocs = search.search(query, 10000, sort);// 查询前100条 // 高亮 Scorer score = new QueryScorer(query);// 检索评份 LuceneWrapper lw = null; List<LuceneWrapper> list = new ArrayList<LuceneWrapper>(); ScoreDoc scores[] = topdocs.scoreDocs;// 得到所有结果集 for (int i = (pageNo - 1) * pageSize; i < scores.length; i++) { int num = scores[i].doc;// 得到文档id Document doc = search.doc(num);// 拿到指定的文档 lw = new LuceneWrapper(); lw.setId(doc.get("id")); // lw.setZhengwen(entity.getZhengwen()); // 高亮显示title lw.setTitle(highLighter("title", doc.get("title"), queryString, score)); if (StringUtils.isBlank(lw.getTitle())) { lw.setTitle(doc.get("title")); } lw.setCreationDate(doc.get("creationDate")); lw.setDocType(doc.get("docType")); // 高亮显示wenhao lw.setWenhao(highLighter("wenhao", doc.get("wenhao"), queryString, score)); if (StringUtils.isBlank(lw.getWenhao())) { lw.setWenhao(doc.get("wenhao")); } lw.setUrl(doc.get("url")); list.add(lw); } return new Page(list, pageNo, pageSize, topdocs.scoreDocs.length); } catch (Exception e) { e.printStackTrace(); } return null; } /** * 高亮显示 * * @param content * 内容 * @param queryString * 查询语句 * @param score * 评分 * @return * @throws IOException * @throws InvalidTokenOffsetsException */ public String highLighter(String fieldName, String content, String queryString, Scorer score) throws IOException, InvalidTokenOffsetsException { Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");// 高亮html格式 Fragmenter fragmenter = new SimpleFragmenter(content.length());// 设置最大片断为100 Highlighter highlighter = new Highlighter(formatter, score);// 高亮显示类 highlighter.setTextFragmenter(fragmenter);// 设置格式 TokenStream tokenStream = new PaodingAnalyzer().tokenStream(fieldName, new StringReader(content)); return highlighter.getBestFragment(tokenStream, content);// 得到高亮显示后的内容 } }
public class Lucene49ManagerTest extends CommSpringJunitTest { static { com.aspose.words.Document.setLicence("sj_laokai"); } @Autowired private WfEntityManager wfManager; @Autowired private LuceneManager luc49Manager; @Autowired private PropertyManager prManager; @Test public void testRun(){ //testReadTitle(); testLucene(); } public void testReadTitle() { try { String filePath = "g:\\tmp\\word1.doc"; File file = new File(filePath); if (file.isFile()) { com.aspose.words.Document zwdoc = new com.aspose.words.Document(filePath); System.out.println("===word===" + zwdoc.getText()); } filePath = "g:\\tmp\\wps1.wps"; file = new File(filePath); if (file.isFile()) { com.aspose.words.Document zwdoc = new com.aspose.words.Document(filePath); System.out.println("===wps===" + zwdoc.getText()); } } catch (Exception e) { e.printStackTrace(); } } public void testLucene() { CommConstant.PROPERTY_MAP = prManager.init(); WfEntity entity = wfManager.get("8a8a8985501157d10150115ac49e0000"); LuceneWrapper lw = new LuceneWrapper(); lw.setId(entity.getId()); lw.setZhengwen(entity.getZhengwen()); lw.setTitle("关于国庆放假的通知"); lw.setCreationDate(CommUtils.FormatDateToString(entity.getCreationDate(), "yyyy-MM-dd")); lw.setDocType("收文"); lw.setWenhao("中央财政【2015】第20号"); lw.setUrl("urlurlurlurlurlurlurlurlurlurl"); // luc49Manager.createIndex(lw); luc49Manager.updateIndex(lw); // luc49Manager.searchPage(1, 20, "国庆"); } public void testQuery() { try { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(LuceneManager.INDEX_DIR)));// 索引读取类 IndexSearcher search = new IndexSearcher(reader);// 搜索入口工具类 String queryStr = "8a8a";// 搜索关键字 QueryParser queryParser = new QueryParser(Lucene49ManagerImpl.LUCENE_VERSION, "id", new PaodingAnalyzer());// 实例查询条件类 Query query = queryParser.parse(queryStr); TopDocs topdocs = search.search(query, 100);// 查询前100条 System.out.println("查询结果总数---" + topdocs.totalHits); ScoreDoc scores[] = topdocs.scoreDocs;// 得到所有结果集 for (int i = 0; i < scores.length; i++) { int num = scores[i].doc;// 得到文档id Document document = search.doc(num);// 拿到指定的文档 System.out.println("内容====" + document.get("zhengwen"));// 由于内容没有存储所以执行结果为null System.out.println("标题====" + document.get("title")); System.out.println("版本====" + document.get("version")); System.out.println("评分====" + document.get("score")); System.out.println("id--" + num + "---scors--" + scores[i].score + "---index--" + scores[i].shardIndex); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public void testAllIndex() { try { Directory indexDirectory = FSDirectory.open(new File(LuceneManager.INDEX_DIR)); IndexReader indexReader = DirectoryReader.open(indexDirectory); Fields fields = MultiFields.getFields(indexReader); Iterator<String> fieldsIterator = fields.iterator(); while (fieldsIterator.hasNext()) { String field = fieldsIterator.next(); Terms terms = fields.terms(field); TermsEnum termsEnums = terms.iterator(null); BytesRef byteRef = null; System.out.println("field : " + field); while ((byteRef = termsEnums.next()) != null) { String term = new String(byteRef.bytes, byteRef.offset, byteRef.length); System.out.println("term is : " + term); } } } catch (IOException e) { e.printStackTrace(); } } }