Java使用poi读取excel数据(excel可能很大,先转换为csv再读取)

  ————————————配置————————————
  jdbc.properties中加入: excelUrl=/……xlsx文件目录路径/ (excelUrl + “xxxx.xlsx” 为完整路径)
   
  导入poi-3.16下的6个jar包,poi-3.16/lib下的5个jar包,poi-3.16/ooxml-lib下的2个jar包
   
  将Excel_reader.java 和 XLSX2CSV.java 导入项目
   
  ————————————方法————————————
   
  Excel_reader类中的:
   
  xlsx_reader(String excel_name , ArrayList args)
   
  //excel_name为要读取的xlsx文件名(带后缀) , args为要获取的列号的列表
  //返回二维数组ArrayList> 第一维表示xlsx的行,第二维表示xlsx中该行的单元格
   
  //空单元格返回null,需要自己处理成—--或0
   
  //args可以填 int 或者 String ,若args[i]为int,那么返回的二维数组的第I列为xlsx中的第args[i]列
  //若args[i]为String,那么返回的二维数组的第i列为改字符常量
   
   
  //例如 xlsx_reader(“崇明县-表15:“夏淡”绿叶菜种植补贴-2014.xlsx”,args)
  // 其中 args=[7,8,9,”绿肥”]
  //那么返回的二维数组内容如下:
  [小明 , 350401219948383**** , null , 绿肥]
  [小红 , 645354354354323**** , null , 绿肥]
  [小兰 , 445353453425643**** , null , 绿肥]
  。。。。。。

XLSX2CSV.java

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;  
  
/** 
 * A rudimentary XLSX -> CSV processor modeled on the 
 * POI sample program XLS2CSVmra from the package 
 * org.apache.poi.hssf.eventusermodel.examples. 
 * As with the HSSF version, this tries to spot missing 
 * rows and cells, and output empty entries for them. 
 * 

* Data sheets are read using a SAX parser to keep the * memory footprint relatively small, so this should be * able to read enormous workbooks. The styles table and * the shared-string table must be kept in memory. The * standard POI styles table class is used, but a custom * (read-only) class is used for the shared string table * because the standard POI SharedStringsTable grows very * quickly with the number of unique strings. *

* For a more advanced implementation of SAX event parsing * of XLSX files, see {@link XSSFEventBasedExcelExtractor} * and {@link XSSFSheetXMLHandler}. Note that for many cases, * it may be possible to simply use those with a custom * {@link SheetContentsHandler} and no SAX code needed of * your own! */ public class XLSX2CSV { /** * Uses the XSSF Event SAX helpers to do most of the work * of parsing the Sheet XML, and outputs the contents * as a (basic) CSV. */ private class SheetToCSV implements SheetContentsHandler { private boolean firstCellOfRow = false; private int currentRow = -1; private int currentCol = -1; private void outputMissingRows(int number) { for (int i = 0; i < number; i++) { curstr = new ArrayList(); for (int j = 0; j < minColumns; j++) { curstr.add(null); } output.add(curstr); } } @Override public void startRow(int rowNum) { curstr = new ArrayList(); // If there were gaps, output the missing rows outputMissingRows(rowNum - currentRow - 1); // Prepare for this row firstCellOfRow = true; currentRow = rowNum; currentCol = -1; } @Override public void endRow(int rowNum) { // Ensure the minimum number of columns for (int i = currentCol; i < minColumns ; i++) { curstr.add(null); } output.add(curstr); } @Override public void cell(String cellReference, String formattedValue, XSSFComment comment) { // if (firstCellOfRow) { // firstCellOfRow = false; // } else { // curstr.append(','); // } // gracefully handle missing CellRef here in a similar way as XSSFCell does if (cellReference == null) { cellReference = new CellAddress(currentRow, currentCol).formatAsString(); } // Did we miss any cells? int thisCol = (new CellReference(cellReference)).getCol(); int missedCols = thisCol - currentCol - 1; for (int i = 0; i < missedCols; i++) { curstr.add(null); } currentCol = thisCol; // Number or string? try { Double.parseDouble(formattedValue); curstr.add(formattedValue); } catch (NumberFormatException e) { // output.append('"'); curstr.add(formattedValue); // output.append('"'); } } @Override public void headerFooter(String text, boolean isHeader, String tagName) { // Skip, no headers or footers in CSV } } /////////////////////////////////////// private final OPCPackage xlsxPackage; /** * Number of columns to read starting with leftmost */ private final int minColumns; /** * Destination for data */ private ArrayList> output; private ArrayList curstr; public ArrayList> get_output(){ return output; } /** * Creates a new XLSX -> CSV converter * * @param pkg The XLSX package to process * @param output The PrintStream to output the CSV to * @param minColumns The minimum number of columns to output, or -1 for no minimum */ public XLSX2CSV(OPCPackage pkg, int minColumns) { this.xlsxPackage = pkg; this.minColumns = minColumns; } /** * Parses and shows the content of one sheet * using the specified styles and shared-strings tables. * * @param styles * @param strings * @param sheetInputStream */ public void processSheet( StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler, InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException { DataFormatter formatter = new DataFormatter(); InputSource sheetSource = new InputSource(sheetInputStream); try { XMLReader sheetParser = SAXHelper.newXMLReader(); ContentHandler handler = new XSSFSheetXMLHandler( styles, null, strings, sheetHandler, formatter, false); sheetParser.setContentHandler(handler); sheetParser.parse(sheetSource); } catch (ParserConfigurationException e) { throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); } } /** * Initiates the processing of the XLS workbook file to CSV. * * @throws IOException * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ public void process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage); XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); int index = 0; while (iter.hasNext()) { output = new ArrayList> (); InputStream stream = iter.next(); String sheetName = iter.getSheetName(); System.out.println("正在读取sheet: "+sheetName + " [index=" + index + "]:"); processSheet(styles, strings, new SheetToCSV(), stream); System.out.println("sheet 读取完成!"); stream.close(); ++index; } } // public static void main(String[] args) throws Exception { // /* if (args.length < 1) { // System.err.println("Use:"); // System.err.println(" XLSX2CSV [min columns]"); // return; // }*/ // // File xlsxFile = new File("F:\\8月数据.xlsx"); // if (!xlsxFile.exists()) { // System.err.println("Not found or not a file: " + xlsxFile.getPath()); // return; // } // // int minColumns = -1; // if (args.length >= 2) // minColumns = Integer.parseInt(args[1]); // // // The package open is instantaneous, as it should be. // OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ); // XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns); // xlsx2csv.process(); // p.close(); // } }


Excel_reader.java

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Properties;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class Excel_reader {

	// *************xlsx文件读取函数************************
	// 在jdbc.properties上加上 excelUrl:xlsx文件的目录
	// excel_name为文件名,arg为需要查询的列号(输入数字则返回对应列 , 输入字符串则固定返回这个字符串)
	// 返回
	@SuppressWarnings({ "resource", "unused" })
	public static ArrayList> xlsx_reader(String excel_name, ArrayList args)
			throws IOException {
		// 读取excel文件夹url
		Properties properties = new Properties();
		InputStream inStream = JDBCTools.class.getClassLoader().getResourceAsStream("jdbc.properties");
		properties.load(inStream);
		String excelUrl = properties.getProperty("excelUrl");

		File xlsxFile = new File(excelUrl + excel_name);
		if (!xlsxFile.exists()) {
			System.err.println("Not found or not a file: " + xlsxFile.getPath());
			return null;
		}
		ArrayList> excel_output = new ArrayList>();
		try {
			OPCPackage p;
			p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
			XLSX2CSV xlsx2csv = new XLSX2CSV(p, 20); // 20代表最大列数
			xlsx2csv.process();
			excel_output = xlsx2csv.get_output();
			p.close();   //释放
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		System.out.println(excel_name + " 读取完毕");

		// //读取xlsx文件
		// XSSFWorkbook xssfWorkbook = null;
		// //寻找目录读取文件
		// System.out.println("开始读取 "+excel_name);
		// File excelFile = new File(excelUrl+excel_name);
		// InputStream is = new FileInputStream(excelFile);
		// xssfWorkbook = new XSSFWorkbook(is);
		//
		// if(xssfWorkbook==null){
		// System.out.println("未读取到内容,请检查路径!");
		// return null;
		// }else{
		// System.out.println(excel_name+" 读取完毕");
		// }

		ArrayList> ans = new ArrayList>();
		// 遍历xlsx中的sheet

		// 对于每个sheet,读取其中的每一行
		for (int rowNum = 0; rowNum < excel_output.size(); rowNum++) {
			ArrayList cur_output = excel_output.get(rowNum);
			ArrayList curarr = new ArrayList();
			for (int columnNum = 0; columnNum < args.size(); columnNum++) {
				Object obj = args.get(columnNum);
				if (obj instanceof String) {
					curarr.add(obj.toString());
				} else if (obj instanceof Integer) {
					String cell = cur_output.get((int) obj);
					curarr.add(cell);
				} else {
					System.out.print("类型错误!");
					return null;
				}
			}
			ans.add(curarr);
		}

		return ans;
	}

//	// 判断后缀为xlsx的excel文件的数据类
//	@SuppressWarnings("deprecation")
//	private static String getValue(XSSFCell xssfRow) {
//		if (xssfRow == null) {
//			return null;
//		}
//		if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BOOLEAN) {
//			return String.valueOf(xssfRow.getBooleanCellValue());
//		} else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_NUMERIC) {
//			double cur = xssfRow.getNumericCellValue();
//			long longVal = Math.round(cur);
//			Object inputValue = null;
//			if (Double.parseDouble(longVal + ".0") == cur)
//				inputValue = longVal;
//			else
//				inputValue = cur;
//			return String.valueOf(inputValue);
//		} else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BLANK
//				|| xssfRow.getCellType() == xssfRow.CELL_TYPE_ERROR) {
//			return "";
//		} else {
//			return String.valueOf(xssfRow.getStringCellValue());
//		}
//	}

} 
  

你可能感兴趣的:(Java,知识整理)