java处理excel,解决内存溢出(sax事件驱动解析excel),百万级数据处理,开箱即用。

1,添加poi mavne依赖:

<dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.17</version>
        </dependency>

2,创建目录 directory 并且在目录directory新建一个接口 BigReadExcelProcessData.java 再实现该接口,新建 BigReadExcelProcessimpl.java
① :接口BigReadExcelProcessData.java内容:

package com.murdock.examples.kuxingseng.directory;

import java.util.Map;

public interface BigReadExcelProcessData {

    void processData(Map map);
}

②:实现类BigReadExcelProcessimpl.java内容:

package com.murdock.examples.kuxingseng.directory.imp;

import com.murdock.examples.kuxingseng.directory.BigReadExcelProcessData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Map;

public class BigReadExcelProcessimpl implements BigReadExcelProcessData {
    private final static Logger logger = LoggerFactory.getLogger(BigReadExcelProcessimpl.class);

    @Override
    public void processData(Map map) {
        logger.info("map:" + map);
    }

}

3,封装excel 工具类,新建BigReadExcelUtils.java。

package com.murdock.examples.kuxingseng.utils;

import com.murdock.examples.kuxingseng.directory.BigReadExcelProcessData;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import java.io.InputStream;
import java.util.*;
import java.util.regex.Pattern;


public class BigReadExcelUtils {
    private final static Logger log = LoggerFactory.getLogger(BigReadExcelUtils.class);

    private final int startRow;
    private final int endRow;
    private int currentRow = 0;
    private final String filename;//文件的绝对路径


    private BigReadExcelProcessData processData;

    /**
     * 构造方法
     */
    public BigReadExcelUtils(String pathFileName, BigReadExcelProcessData process) throws Exception {
        if (StringUtils.isEmpty(pathFileName)) throw new Exception("file is null");
        processData = process;
        this.filename = pathFileName;
        this.startRow = 0;
        this.endRow = 100_0000;//设置默认最大解析100W行数据
        processSheet();
    }

    /**
     * 指定获取第一个sheet
     */
    private void processSheet() throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();
        XMLReader parser = fetchSheetParser(sst);
        Iterator<InputStream> it = r.getSheetsData();
        while (it.hasNext()) {
            InputStream sheet1 = it.next();
            InputSource sheetSource = new InputSource(sheet1);
            parser.parse(sheetSource);
            sheet1.close();
        }
    }

    /**
     * 加载sax 解析器
     */
    private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        String saxParser = "com.sun.org.apache.xerces.internal.parsers.SAXParser";
        XMLReader parser = XMLReaderFactory.createXMLReader(saxParser);
        ContentHandler handler = new PagingHandler(sst);
        parser.setContentHandler(handler);
        return parser;
    }

    private class PagingHandler extends DefaultHandler {
        private SharedStringsTable sst;
        private String lastContents;
        private boolean nextIsString;
        private String index = null;
        private Object NULL = null;
        private Map<String, String> map = new HashMap<>(100);//初始化内存空间,性能优化,10W数据能减少30秒左右(受列影响)
        private Pattern pattern = Pattern.compile("^A[0-9]+$");

        private PagingHandler(SharedStringsTable table) {
            this.sst = table;
        }

        /**
         * 获取key 值
         */
        @Override
        public void startElement(String uri, String localName, String name,
                                 Attributes attributes) throws SAXException {
            if (name.equals("c")) {
                index = attributes.getValue("r");
                //判断是否是新的一行
                if (pattern.matcher(index).find()) {
                    if (map != NULL && isAccess() && !map.isEmpty()) {
                        processData.processData(map);
                        map.clear();//使用完数据,清理数据
                    }
                    currentRow++;
                }
                if (isAccess()) {
                    String cellType = attributes.getValue("t");
                    if (cellType != null && cellType.equals("s")) {
                        nextIsString = true;
                    } else {
                        nextIsString = false;
                    }
                }
            }
            lastContents = "";
        }

        /**
         * 获取value
         */
        @Override
        public void endElement(String uri, String localName, String name)
                throws SAXException {
            if (isAccess()) {
                if (nextIsString) {
                    int idx = Integer.parseInt(lastContents);
                    lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
                    nextIsString = false;
                }
                if (name.equals("v")) {
                    map.put(index, lastContents);
                }
            }

        }

        @Override
        public void characters(char[] ch, int start, int length)
                throws SAXException {
            if (isAccess()) {
                lastContents += new String(ch, start, length);
            }
        }

        @Override
        public void endDocument() throws SAXException {
            if (map != null && isAccess() && !map.isEmpty()) {
                processData.processData(map);
                map.clear();
            }
        }

    }

    private boolean isAccess() {
        if (currentRow >= startRow && startRow <= endRow) {
            return true;
        }
        return false;
    }

}

以上代码可以直接使用亲测试通过,处理60M Excel占用内存260M 左右。

4,测试:

package com.murdock.examples.kuxingseng.directory.imp;

import com.murdock.examples.kuxingseng.directory.BigReadExcelProcessData;
import com.murdock.examples.kuxingseng.utils.BigReadExcelUtils;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.junit.Assert.*;

public class BigReadExcelProcessimplTest {
    private final static Logger logger = LoggerFactory.getLogger(BigReadExcelProcessimplTest.class);

    @Test
    public void processData() {
        String filePath = "D:\\tmp\\demo.xlsx";
        try {
            BigReadExcelProcessData processData =  new BigReadExcelProcessimpl();
            new BigReadExcelUtils(filePath, processData);
        } catch (Exception e) {
            logger.error("process faile.", e);
        }
    }
}

结果:

18:58:43.416 [main] INFO com.murdock.examples.kuxingseng.directory.imp.BigReadExcelProcessimpl - map:{M1=测试13, I1=测试9, E1=测试5, A1=测试1, N1=测试14, J1=测试10, F1=测试6, B1=测试2, O1=测试15, K1=测试11, G1=测试7, C1=测试3, L1=测试12, H1=测试8, D1=测试4}
18:58:43.422 [main] INFO com.murdock.examples.kuxingseng.directory.imp.BigReadExcelProcessimpl - map:{M2=测试13, I2=测试9, E2=测试5, A2=测试1, N2=测试14, J2=测试10, F2=测试6, B2=测试2, O2=测试15, K2=测试11, G2=测试7, C2=测试3, L2=测试12, H2=测试8, D2=测试4}
18:58:43.424 [main] INFO com.murdock.examples.kuxingseng.directory.imp.BigReadExcelProcessimpl - map:{M3=测试13, I3=测试9, E3=测试5, A3=测试1, N3=测试14, J3=测试10, F3=测试6, B3=测试2, O3=测试15, K3=测试11, G3=测试7, C3=测试3, L3=测试12, H3=测试8, D3=测试4}

你可能感兴趣的:(Java)