如下文件内容:
这里有特殊字符:\n 、\t 、\u4e0a 、\/
我要做的事,就是恢复其特殊字符的作用(而不是打印被转义后的效果)
直观的看,很容易:直接替换不就行了
line = line.replaceAll("\\n", "\r\n" ); line = line.replaceAll("\\t", "\t" ); line = line.replaceAll("\\/", "/" );但是,这是 错误的。新生成的文件没有任何改变。
这里有个问题以前没弄清楚:文本文件中为 \n ,在读取后字符串中实际为 \\n
因而正确的替换方法为:
line = line.replaceAll("\\\\n", "\r\n" ); line = line.replaceAll("\\\\t", "\t" ); line = line.replaceAll("\\\\/", "/" );
接下来就是处理 Unicode码,将其还原
来源:http://www.cnblogs.com/yuxuan/archive/2011/08/02/2124904.html
/** *//***************************************************** * 功能介绍:将unicode字符串转为汉字 * 输入参数:源unicode字符串 * 输出参数:转换后的字符串 *****************************************************/ static String decodeUnicode( final String dataStr ) { int start = 0; int end = 0; final StringBuffer buffer = new StringBuffer(); while( start > -1 ) { end = dataStr.indexOf( "\\\\u", start + 2 ); String charStr = ""; if( end == -1 ) { charStr = dataStr.substring( start + 2, dataStr.length() ); } else { charStr = dataStr.substring( start + 2, end); } char letter = (char) Integer.parseInt( charStr, 16 ); // 16进制parse整形字符串。 buffer.append( new Character( letter ).toString() ); start = end; } return buffer.toString(); }
有了 decodeUnicode 方法,接下来只需要将文件中匹配 \\uxxxx 这样的转换完即可:
static String replace( String s ) { try { Pattern regex = Pattern.compile("\\\\u[0-9a-z]{4}", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); Matcher matcher = regex.matcher(s); StringBuffer sb = new StringBuffer(); while (matcher.find()) { matcher.appendReplacement(sb, decodeUnicode( matcher.group()) ); } matcher.appendTail(sb); return sb.toString(); } catch (Exception ex) { throw new RuntimeException( "Something error." ); } }
总的转换代码:
static void readToWrite( File file ) { BufferedReader bufReader = null; BufferedWriter bufWriter = null; try { bufReader = new BufferedReader( new FileReader(file) ); bufWriter = buildWriter( file ); String line = null; while( (line = bufReader.readLine()) != null ) { line = line.replaceAll("\\\\n", "\r\n" ); line = line.replaceAll("\\\\t", "\t" ); line = line.replaceAll("\\\\/", "/" ); line = replace( line ); bufWriter.write( line ); bufWriter.newLine(); } } catch (IOException e) { e.printStackTrace(); } finally{ if( bufReader != null ){ try { bufReader.close(); } catch (IOException e) { e.printStackTrace(); } bufReader = null; } if( bufWriter != null ){ try { bufWriter.close(); } catch (IOException e) { e.printStackTrace(); } bufWriter = null; } } } static BufferedWriter buildWriter( File file ){ BufferedWriter bufWriter = null; try { String fullName = file.getCanonicalPath(); int splitPath = fullName.lastIndexOf( "\\" ); String path = fullName.substring( 0, splitPath ); String name = file.getName().replaceAll("\\.txt", "@\\.txt" ); bufWriter = new BufferedWriter( new FileWriter( path + "\\" + name ) ); return bufWriter; } catch (IOException e) { e.printStackTrace(); } return null; } static String replace( String s ) { try { Pattern regex = Pattern.compile("\\\\u[0-9a-z]{4}", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); Matcher matcher = regex.matcher(s); StringBuffer sb = new StringBuffer(); while (matcher.find()) { matcher.appendReplacement(sb, decodeUnicode( matcher.group()) ); } matcher.appendTail(sb); return sb.toString(); } catch (Exception ex) { throw new RuntimeException( "Something error." ); } } /** *//***************************************************** * 功能介绍:将unicode字符串转为汉字 * 输入参数:源unicode字符串 * 输出参数:转换后的字符串 *****************************************************/ static String decodeUnicode( final String dataStr ) { int start = 0; int end = 0; final StringBuffer buffer = new StringBuffer(); while( start > -1 ) { end = dataStr.indexOf( "\\\\u", start + 2 ); String charStr = ""; if( end == -1 ) { charStr = dataStr.substring( start + 2, dataStr.length() ); } else { charStr = dataStr.substring( start + 2, end); } char letter = (char) Integer.parseInt( charStr, 16 ); // 16进制parse整形字符串。 buffer.append( new Character( letter ).toString() ); start = end; } return buffer.toString(); }
============================================================
/**将中文转为unicode 及转回中文函数转为unicode */ public static void writeUnicode(final DataOutputStream out, final String value) { try { final String unicode = gbEncoding( value ); final byte[] data = unicode.getBytes(); final int dataLength = data.length; System.out.println( "Data Length is: " + dataLength ); System.out.println( "Data is: " + value ); out.writeInt( dataLength ); //先写出字符串的长度 out.write( data, 0, dataLength ); //然后写出转化后的字符串 } catch (IOException e) { } } public static String gbEncoding( final String gbString ) { char[] utfBytes = gbString.toCharArray(); String unicodeBytes = ""; for( int byteIndex = 0; byteIndex < utfBytes.length; byteIndex ++ ) { String hexB = Integer.toHexString( utfBytes[ byteIndex ] ); if( hexB.length() <= 2 ) { hexB = "00" + hexB; } unicodeBytes = unicodeBytes + "\\\\u" + hexB; } System.out.println( "unicodeBytes is: " + unicodeBytes ); return unicodeBytes; }