1,添加poi mavne依赖:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
2,创建目录 directory
并且在目录directory
新建一个接口 BigReadExcelProcessData.java
再实现该接口,新建 BigReadExcelProcessimpl.java
。
① :接口BigReadExcelProcessData.java
内容:
package com.murdock.examples.kuxingseng.directory;
import java.util.Map;
public interface BigReadExcelProcessData {
void processData(Map map);
}
②:实现类BigReadExcelProcessimpl.java
内容:
package com.murdock.examples.kuxingseng.directory.imp;
import com.murdock.examples.kuxingseng.directory.BigReadExcelProcessData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
public class BigReadExcelProcessimpl implements BigReadExcelProcessData {
private final static Logger logger = LoggerFactory.getLogger(BigReadExcelProcessimpl.class);
@Override
public void processData(Map map) {
logger.info("map:" + map);
}
}
3,封装excel 工具类,新建BigReadExcelUtils.java。
package com.murdock.examples.kuxingseng.utils;
import com.murdock.examples.kuxingseng.directory.BigReadExcelProcessData;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Pattern;
public class BigReadExcelUtils {
private final static Logger log = LoggerFactory.getLogger(BigReadExcelUtils.class);
private final int startRow;
private final int endRow;
private int currentRow = 0;
private final String filename;//文件的绝对路径
private BigReadExcelProcessData processData;
/**
* 构造方法
*/
public BigReadExcelUtils(String pathFileName, BigReadExcelProcessData process) throws Exception {
if (StringUtils.isEmpty(pathFileName)) throw new Exception("file is null");
processData = process;
this.filename = pathFileName;
this.startRow = 0;
this.endRow = 100_0000;//设置默认最大解析100W行数据
processSheet();
}
/**
* 指定获取第一个sheet
*/
private void processSheet() throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> it = r.getSheetsData();
while (it.hasNext()) {
InputStream sheet1 = it.next();
InputSource sheetSource = new InputSource(sheet1);
parser.parse(sheetSource);
sheet1.close();
}
}
/**
* 加载sax 解析器
*/
private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
String saxParser = "com.sun.org.apache.xerces.internal.parsers.SAXParser";
XMLReader parser = XMLReaderFactory.createXMLReader(saxParser);
ContentHandler handler = new PagingHandler(sst);
parser.setContentHandler(handler);
return parser;
}
private class PagingHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private String index = null;
private Object NULL = null;
private Map<String, String> map = new HashMap<>(100);//初始化内存空间,性能优化,10W数据能减少30秒左右(受列影响)
private Pattern pattern = Pattern.compile("^A[0-9]+$");
private PagingHandler(SharedStringsTable table) {
this.sst = table;
}
/**
* 获取key 值
*/
@Override
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
if (name.equals("c")) {
index = attributes.getValue("r");
//判断是否是新的一行
if (pattern.matcher(index).find()) {
if (map != NULL && isAccess() && !map.isEmpty()) {
processData.processData(map);
map.clear();//使用完数据,清理数据
}
currentRow++;
}
if (isAccess()) {
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
}
lastContents = "";
}
/**
* 获取value
*/
@Override
public void endElement(String uri, String localName, String name)
throws SAXException {
if (isAccess()) {
if (nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
if (name.equals("v")) {
map.put(index, lastContents);
}
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
if (isAccess()) {
lastContents += new String(ch, start, length);
}
}
@Override
public void endDocument() throws SAXException {
if (map != null && isAccess() && !map.isEmpty()) {
processData.processData(map);
map.clear();
}
}
}
private boolean isAccess() {
if (currentRow >= startRow && startRow <= endRow) {
return true;
}
return false;
}
}
以上代码可以直接使用亲测试通过,处理60M Excel占用内存260M 左右。
4,测试:
package com.murdock.examples.kuxingseng.directory.imp;
import com.murdock.examples.kuxingseng.directory.BigReadExcelProcessData;
import com.murdock.examples.kuxingseng.utils.BigReadExcelUtils;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.junit.Assert.*;
public class BigReadExcelProcessimplTest {
private final static Logger logger = LoggerFactory.getLogger(BigReadExcelProcessimplTest.class);
@Test
public void processData() {
String filePath = "D:\\tmp\\demo.xlsx";
try {
BigReadExcelProcessData processData = new BigReadExcelProcessimpl();
new BigReadExcelUtils(filePath, processData);
} catch (Exception e) {
logger.error("process faile.", e);
}
}
}
结果:
18:58:43.416 [main] INFO com.murdock.examples.kuxingseng.directory.imp.BigReadExcelProcessimpl - map:{M1=测试13, I1=测试9, E1=测试5, A1=测试1, N1=测试14, J1=测试10, F1=测试6, B1=测试2, O1=测试15, K1=测试11, G1=测试7, C1=测试3, L1=测试12, H1=测试8, D1=测试4}
18:58:43.422 [main] INFO com.murdock.examples.kuxingseng.directory.imp.BigReadExcelProcessimpl - map:{M2=测试13, I2=测试9, E2=测试5, A2=测试1, N2=测试14, J2=测试10, F2=测试6, B2=测试2, O2=测试15, K2=测试11, G2=测试7, C2=测试3, L2=测试12, H2=测试8, D2=测试4}
18:58:43.424 [main] INFO com.murdock.examples.kuxingseng.directory.imp.BigReadExcelProcessimpl - map:{M3=测试13, I3=测试9, E3=测试5, A3=测试1, N3=测试14, J3=测试10, F3=测试6, B3=测试2, O3=测试15, K3=测试11, G3=测试7, C3=测试3, L3=测试12, H3=测试8, D3=测试4}