springboot集成POI后续(记一次堆内存溢出问题)

使用POI(用户模式)导入大批量excel文件的情况下,会产生内存溢出的问题,如下:

image.png
1217276-20180120163349053-226959473.png

问题分析:

java.lang.OutOfMemoryError: GC overhead limit exceeded

问题排查:

  • java服务启动添加jvm参数:

-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=heapdump
heapdump是快照生成的路径

  • 使用jvisualvm.exe打开快照文件


    image.png
  • 定位到问题所在,确定为POI用户输入模式导致内存溢出

解决方案:将excel文件转为csv,通过多维数组解析表中数据

package com.galaxyeye.icservice.utils;

import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.springframework.web.multipart.MultipartFile;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.text.SimpleDateFormat;
import java.util.*;

/**
 * @Author: geyingke
 * @Date: 2020/7/10
 * @Class: XLSXConvertCSVReader
 * @Discription: TODO
 **/
public class XLSXConvertCSVReader {


    /**
     * The type of the data value is indicated by an attribute on the cell. The
     * value is usually in a "v" element within the cell.
     */
    enum xssfDataType {
        BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
    }

    /**
     * 使用xssf_sax_API处理Excel,请参考: http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
     * 

* Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at * http://www.ecma-international.org/publications/standards/Ecma-376.htm *

* A web-friendly version is http://openiso.org/Ecma/376/Part4 */ class MyXSSFSheetHandler extends DefaultHandler { /** * Table with styles */ private StylesTable stylesTable; /** * Table with unique strings */ private ReadOnlySharedStringsTable sharedStringsTable; /** * Destination for data */ private final PrintStream output; /** * Number of columns to read starting with leftmost */ private final int minColumnCount; // Set when V start element is seen private boolean vIsOpen; // Set when cell start element is seen; // used when cell close element is seen. private xssfDataType nextDataType; // Used to format numeric cell values. private short formatIndex; private String formatString; private final DataFormatter formatter; private int thisColumn = -1; // The last column printed to the output stream private int lastColumnNumber = -1; // Gathers characters as they are seen. private StringBuffer value; private String[] record; private List rows = new ArrayList(); private boolean isCellNull = false; /** * Accepts objects needed while parsing. * * @param styles Table of styles * @param strings Table of shared strings * @param cols Minimum number of columns to show * @param target Sink for output */ public MyXSSFSheetHandler(StylesTable styles, ReadOnlySharedStringsTable strings, int cols, PrintStream target) { this.stylesTable = styles; this.sharedStringsTable = strings; this.minColumnCount = cols; this.output = target; this.value = new StringBuffer(); this.nextDataType = xssfDataType.NUMBER; this.formatter = new DataFormatter(); record = new String[this.minColumnCount]; rows.clear();// 每次读取都清空行集合 } /* * (non-Javadoc) * * @see * org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, * java.lang.String, java.lang.String, org.xml.sax.Attributes) */ public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if ("inlineStr".equals(name) || "v".equals(name)) { vIsOpen = true; // Clear contents cache value.setLength(0); } // c => cell else if ("c".equals(name)) { // Get the cell reference String r = attributes.getValue("r"); int firstDigit = -1; for (int c = 0; c < r.length(); ++c) { if (Character.isDigit(r.charAt(c))) { firstDigit = c; break; } } thisColumn = nameToColumn(r.substring(0, firstDigit)); // Set up defaults. this.nextDataType = xssfDataType.NUMBER; this.formatIndex = -1; this.formatString = null; String cellType = attributes.getValue("t"); String cellStyleStr = attributes.getValue("s"); if ("b".equals(cellType)) nextDataType = xssfDataType.BOOL; else if ("e".equals(cellType)) nextDataType = xssfDataType.ERROR; else if ("inlineStr".equals(cellType)) nextDataType = xssfDataType.INLINESTR; else if ("s".equals(cellType)) nextDataType = xssfDataType.SSTINDEX; else if ("str".equals(cellType)) nextDataType = xssfDataType.FORMULA; else if (cellStyleStr != null) { // It's a number, but almost certainly one // with a special style or format int styleIndex = Integer.parseInt(cellStyleStr); XSSFCellStyle style = stylesTable.getStyleAt(styleIndex); this.formatIndex = style.getDataFormat(); this.formatString = style.getDataFormatString(); if (this.formatString == null) this.formatString = BuiltinFormats .getBuiltinFormat(this.formatIndex); } } } /* * (non-Javadoc) * * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, * java.lang.String, java.lang.String) */ public void endElement(String uri, String localName, String name) throws SAXException { String thisStr = null; // v => contents of a cell if ("v".equals(name)) { // Process the value contents as required. // Do now, as characters() may be called more than once switch (nextDataType) { case BOOL: char first = value.charAt(0); thisStr = first == '0' ? "FALSE" : "TRUE"; break; case ERROR: thisStr = "\"ERROR:" + value.toString() + '"'; break; case FORMULA: // A formula could result in a string value, // so always add double-quote characters. thisStr = value.toString(); break; case INLINESTR: // TODO: have seen an example of this, so it's untested. XSSFRichTextString rtsi = new XSSFRichTextString( value.toString()); thisStr = rtsi.toString(); break; case SSTINDEX: String sstIndex = value.toString(); try { int idx = Integer.parseInt(sstIndex); XSSFRichTextString rtss = new XSSFRichTextString( sharedStringsTable.getEntryAt(idx)); thisStr = rtss.toString(); } catch (NumberFormatException ex) { output.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString()); } break; case NUMBER: String n = value.toString(); // 判断是否是日期格式 if (HSSFDateUtil.isADateFormat(this.formatIndex, n)) { Double d = Double.parseDouble(n); Date date = HSSFDateUtil.getJavaDate(d); thisStr = formateDateToString(date); } else if (this.formatString != null) thisStr = formatter.formatRawCellContents( Double.parseDouble(n), this.formatIndex, this.formatString); else thisStr = n; break; default: thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; break; } // Output after we've seen the string contents // Emit commas for any fields that were missing on this row if (lastColumnNumber == -1) { lastColumnNumber = 0; } //判断单元格的值是否为空 if (thisStr == null || "".equals(isCellNull)) { isCellNull = true;// 设置单元格是否为空值 } record[thisColumn] = thisStr; // Update column if (thisColumn > -1) lastColumnNumber = thisColumn; } else if ("row".equals(name)) { // Print out any missing commas if needed if (minColumns > 0) { // Columns are 0 based if (lastColumnNumber == -1) { lastColumnNumber = 0; } if (isCellNull == false && record[0] != null && record[1] != null)// 判断是否空行 { rows.add(record.clone()); isCellNull = false; for (int i = 0; i < record.length; i++) { record[i] = null; } } } lastColumnNumber = -1; } } public List getRows() { return rows; } public void setRows(List rows) { this.rows = rows; } /** * Captures characters only if a suitable element is open. Originally * was just "v"; extended for inlineStr also. */ public void characters(char[] ch, int start, int length) throws SAXException { if (vIsOpen) value.append(ch, start, length); } /** * Converts an Excel column name like "C" to a zero-based index. * * @param name * @return Index corresponding to the specified name */ private int nameToColumn(String name) { int column = -1; for (int i = 0; i < name.length(); ++i) { int c = name.charAt(i); column = (column + 1) * 26 + c - 'A'; } return column; } private String formateDateToString(Date date) { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//格式化日期 return sdf.format(date); } } // ///////////////////////////////////// private OPCPackage xlsxPackage; private int minColumns; private PrintStream output; private String sheetName; /** * Creates a new XLSX -> CSV converter * * @param pkg The XLSX package to process * @param output The PrintStream to output the CSV to * @param minColumns The minimum number of columns to output, or -1 for no minimum */ public XLSXConvertCSVReader(OPCPackage pkg, PrintStream output, String sheetName, int minColumns) { this.xlsxPackage = pkg; this.output = output; this.minColumns = minColumns; this.sheetName = sheetName; } /** * Parses and shows the content of one sheet using the specified styles and * shared-strings tables. * * @param styles * @param strings * @param sheetInputStream */ public List processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException { InputSource sheetSource = new InputSource(sheetInputStream); SAXParserFactory saxFactory = SAXParserFactory.newInstance(); SAXParser saxParser = saxFactory.newSAXParser(); XMLReader sheetParser = ((SAXParser) saxParser).getXMLReader(); MyXSSFSheetHandler handler = new MyXSSFSheetHandler(styles, strings, this.minColumns, this.output); sheetParser.setContentHandler(handler); sheetParser.parse(sheetSource); return handler.getRows(); } /** * 初始化这个处理程序 将 * * @throws IOException * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ public List process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable( this.xlsxPackage); XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); Map> sheet2val = new HashMap<>(); List list = null; StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader .getSheetsData(); int index = 0; while (iter.hasNext()) { InputStream stream = iter.next(); String sheetNameTemp = iter.getSheetName(); if (this.sheetName.equals(sheetNameTemp)) { list = processSheet(styles, strings, stream); stream.close(); ++index; } } return list; } public Map> processAllSheet() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable( this.xlsxPackage); XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); Map> sheet2val = new HashMap<>(); List list = null; StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader .getSheetsData(); while (iter.hasNext()) { InputStream stream = iter.next(); String sheetNameTemp = iter.getSheetName(); sheet2val.put(sheetNameTemp, processSheet(styles, strings, stream)); } return sheet2val; } /** * 读取Excel * * @param path 文件路径 * @param sheetName sheet名称 * @param minColumns 列总数 * @return * @throws SAXException * @throws ParserConfigurationException * @throws OpenXML4JException * @throws IOException */ private static List readerExcel(String path, String sheetName, int minColumns) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { OPCPackage p = OPCPackage.open(path, PackageAccess.READ); XLSXConvertCSVReader xlsx2csv = new XLSXConvertCSVReader(p, System.out, sheetName, minColumns); List list = xlsx2csv.process(); p.close(); return list; } public static Map> readerExcel(MultipartFile file, int minColumns) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { OPCPackage p = OPCPackage.open(file.getInputStream()); XLSXConvertCSVReader xlsx2csv = new XLSXConvertCSVReader(p, System.out, null, minColumns); Map> stringListMap = xlsx2csv.processAllSheet(); p.close(); return stringListMap; } public static void main(String[] args) throws Exception { List list = XLSXConvertCSVReader .readerExcel( "E:\\平台知识库导入示例-温州社会保险.xlsx", "社会保险", 16); for (String[] record : list) { for (String cell : record) { System.out.print(cell + " "); } System.out.println(); } } }

数组解析关键代码

/**
     * 导入excel文件,并转换成指定类型的List集合
     *
     * @param 
     * @param file
     * @param fieldMap -- 标题和字段的映射,key为类中字段 |
     *                 value为标题字段匹配规则集合(支持正则)(可以存在多个)
     * @param tClass
     * @return
     * @throws Exception
     */
    public static  List importExcelSuper(MultipartFile file, Map> fieldMap, Class tClass, int titleRowIndex) throws Exception {
        List result = new ArrayList<>();
        Map> stringListMap = XLSXConvertCSVReader.readerExcel(file, 20);
        for (Map.Entry> entry : stringListMap.entrySet()) {
            int sheetIndex = 0;
            convertExcelToBeans(result, entry.getValue(), fieldMap, tClass, titleRowIndex, sheetIndex);
            sheetIndex++;
        }
        return result.size() > 0 ? result : null;
    }

private static  void convertExcelToBeans(List result, List sheet, Map> fieldMap, Class tClass, int titleRowIndex, int sheetIndex) throws Exception {
        if (sheet != null) {
            if (sheet.size() <= 1) {
                throw new MyExcelException("CHECK_ME: file is empty.");
            }
            //获取标题行,并创建标题的列映射
            Map> beanField2TitleMap = getBeanField2TitleMap(sheet.get(titleRowIndex), fieldMap);
            if (beanField2TitleMap != null && beanField2TitleMap.size() > 0) {
                //开始遍历除标题之外的行
                for (int rowIndex = 0; rowIndex < sheet.size(); rowIndex++){
                    if (rowIndex < titleRowIndex + 1) {
                        //越过标题行,及标题行之前的所有内容
                        continue;
                    }
                    T tInstance = tClass.newInstance();
                    //处理多表格的表格编号
                    checkHasSheetIndexSetVal(tInstance, tClass, sheetIndex);
                    //处理类中其它字段的赋值
                    for (Map.Entry> entry : beanField2TitleMap.entrySet()) {
                        String fieldName = entry.getKey();
                        List cellIndexList = entry.getValue();
                        Field field = getBeanInnerField(tClass, fieldName);
                        Class fieldType = field.getType();
                        checkFieldTypeAndSetVal(sheet.get(rowIndex), fieldType, cellIndexList, fieldName, tClass, tInstance);
                    }
                    if (!BeanUtil.allFieldIsNull(tInstance)) {
                        result.add(tInstance);
                    }
                }
            } else {
                throw new MyExcelException("字段匹配异常");
            }
        }
    }

/**
     * cvs格式,列头匹配
     * @param titleRow
     * @param fieldMap
     * @return
     */
    private static Map> getBeanField2TitleMap(String[] titleRow, Map> fieldMap) {
        Map> beanField2TitleMap = new HashMap<>();
        String[] fieldArr = fieldMap.keySet().toArray(new String[]{});
        for (String key : fieldArr) {
            List titleList = fieldMap.get(key);
            List titleIndexList = new ArrayList<>();
            for (String titleName : titleList) {
                for (int index = 0; ; index++) {
                    String cell = titleRow[index];
                    if (null != cell) {
                        if (StringUtils.isEmpty(cell)) {
                            break;
                        } else {
                            if (cell.matches(titleName)) {
                                titleIndexList.add(index);
                            }
                        }
                    } else {
                        break;
                    }
                }
            }
            if (titleIndexList != null && titleIndexList.size() > 0) {
                beanField2TitleMap.put(key, titleIndexList);
            }
        }
        return beanField2TitleMap;
    }

/**
     * 处理excel中表格编号
     *
     * @param tInstance
     * @param tClass
     * @param sheetIndex
     * @param 
     * @throws NoSuchFieldException
     * @throws NoSuchMethodException
     * @throws InvocationTargetException
     * @throws IllegalAccessException
     */
    private static  void checkHasSheetIndexSetVal(T tInstance, Class tClass, int sheetIndex) throws NoSuchFieldException, NoSuchMethodException, InvocationTargetException, IllegalAccessException {
        List fieldNameList = Arrays.stream(tClass.getDeclaredFields()).map(Field::getName).collect(Collectors.toList());
        if (fieldNameList.contains("sheetIndex")) {
            Field sheetIndexField = tClass.getDeclaredField("sheetIndex");
            Method method = tClass.getDeclaredMethod(generateSetMethodName("sheetIndex"), sheetIndexField.getType());
            method.invoke(tInstance, sheetIndex);
        }
    }

/**
     * 从当前类和父类,获取类中的feild
     *
     * @param tClass
     * @param fieldName
     * @param 
     * @return
     * @throws NoSuchFieldException
     */
    private static  Field getBeanInnerField(Class tClass, String fieldName) throws NoSuchFieldException {
        Field field = null;
        try {
            field = tClass.getDeclaredField(fieldName);
        } catch (NoSuchFieldException e) {
            field = tClass.getSuperclass().getDeclaredField(fieldName);
        }
        return field;
    }

private static  void checkFieldTypeAndSetVal(String[] row, Class fieldType, List cellIndexList, String fieldName, Class tClass, T tInstance) throws Exception {
        if (null != cellIndexList && cellIndexList.size() > 0) {
            if (fieldType.equals(List.class)) {
                List cellValLis = new ArrayList<>();
                for (Integer cellIndex : cellIndexList) {
                    String cell = row[cellIndex];
                    if (cell != null) {
                        //越过值为空的字段
                        if (StringUtils.isEmpty(cell)) {
                            continue;
                        }
                        cellValLis.add(cell);
                    }
                }
                Method method = getBeanInnerFieldMethod(tClass, fieldName, fieldType);
                setFieldValue(method, tInstance, fieldType, cellValLis);
            } else {
                for (Integer cellIndex : cellIndexList) {
                    String cell = row[cellIndex];
                    if (cell != null) {
                        //越过值为空的字段
                        if (!StringUtils.isEmpty(cell)) {
                            Method method = getBeanInnerFieldMethod(tClass, fieldName, fieldType);
                            setFieldValue(method, tInstance, fieldType, cell);
                        }
                    }
                }
            }
        }
    }
/**
     * 判断bean是否为全空,可以设置越过字段
     * final字段默认越过
     *
     * @param bean
     * @param skipFields
     * @param 
     * @return
     */
    public static  boolean allFieldIsNull(T bean, String... skipFields) {
        List skipFieldList = Arrays.stream(skipFields).collect(Collectors.toList());
        try {
            List fieldList = new ArrayList<>();
            //获取当前类到最上层类的所有field
            Class tempClass = bean.getClass();
            while (tempClass != null) {
                fieldList.addAll(Arrays.asList(tempClass.getDeclaredFields()));
                tempClass = tempClass.getSuperclass();
            }
            for (Field field : fieldList) {
                field.setAccessible(true);
                if (skipFieldList.contains(field.getName()) || Modifier.isFinal(field.getModifiers())) {
                    continue;
                }
                Object o = field.get(bean);
                if (!ObjectUtils.isEmpty(o)) {
                    return false;
                }
            }
        } catch (IllegalAccessException e) {
            logger.error("BeanUtil:allFieldIsNull =IllegalAccessException=>", e);
        }
        return true;
    }

你可能感兴趣的:(springboot集成POI后续(记一次堆内存溢出问题))