解析CSV文件,最简单,最直接,最突然的想法就是
"a,b,c,d".split(",")
但是如果复杂一点儿,加上一些特殊字符,比如引号“"”,逗号“,”,那你就会发现split(",")不灵光了。于是今天就闲着没事儿写了一个,没有完全测试,但是应该没问题,哈哈
package com.javaeye.terrencexu.csv import java.util.LinkedList; import java.util.List; public final class CSVUtils { private CSVUtils() {} private static char fieldSep = ','; private static List<String> fields = null; /** * This function will split the input on commas. It needs to decide whether * to parse normally or consider special scenarios like "AA,BB". This * function returns the number of fields present in the input. * * @param line * @return */ public static List<String> split(String line) { fields = new LinkedList<String>(); if (line.length() == 0) { return null; } int curPos = 0; while(curPos <= line.length()) { if (curPos < line.length() && line.charAt(curPos) == '"') { curPos = parseQuoted(line, ++curPos); } else { curPos = parsePlain(line, curPos); } curPos ++; } return fields; } public static List<String> split(String line, char separator) { fieldSep = separator; return split(line); } /** * This function will parse all fields that are not in quotes. * * @param line * @param curPos * @return */ private static int parsePlain(String line, int curPos) { int nextSepPos; nextSepPos = line.indexOf(fieldSep, curPos); if (nextSepPos == -1) { fields.add(line.substring(curPos)); return line.length(); } else { fields.add(line.substring(curPos, nextSepPos)); } return nextSepPos; } /** * This function will parse all fields that are in quotes. * * @param line * @param curPos * @return */ private static int parseQuoted(String line, int curPos) { int tmpPos; String fld = ""; for (tmpPos = curPos; tmpPos < line.length(); tmpPos++) { if (line.charAt(tmpPos) == '"' && tmpPos + 1 < line.length()) { if (line.charAt(tmpPos + 1) == '"') { tmpPos++; } else if (line.charAt(tmpPos + 1) == fieldSep) { tmpPos++; break; } } else if (line.charAt(tmpPos) == '"' && tmpPos + 1 == line.length()) { break; } fld = fld + line.charAt(tmpPos); } fields.add(fld); return tmpPos; } }
测试一把:
public static void main(String[] args) { String line = "col_1,Test,\"{\"\"key\"\":\"\"date\"\",\"\"order\"\":\"\"desc\"\"}\",,,,application/xml"; String line2 = "a|b|\"|\"|d"; System.out.println(CSVUtils.split(line)); System.out.println(CSVUtils.split(line2, '|')); }
运行结果:
[col_1, Test, {"key":"date","order":"desc"}, , , , application/xml] [a, b, |, d]
-- Done --