将文件内含有的特殊字符还原

如下文件内容:


这里有特殊字符:\n 、\t 、\u4e0a 、\/

我要做的事,就是恢复其特殊字符的作用(而不是打印被转义后的效果)


直观的看,很容易:直接替换不就行了

line = line.replaceAll("\\n", "\r\n" );
line = line.replaceAll("\\t", "\t" );
line = line.replaceAll("\\/", "/" );
但是,这是 错误的。新生成的文件没有任何改变。


这里有个问题以前没弄清楚:文本文件中为 \n ,在读取后字符串中实际为 \\n



因而正确的替换方法为:

line = line.replaceAll("\\\\n", "\r\n" );
line = line.replaceAll("\\\\t", "\t" );
line = line.replaceAll("\\\\/", "/" );


接下来就是处理 Unicode码,将其还原

来源:http://www.cnblogs.com/yuxuan/archive/2011/08/02/2124904.html

	/** *//*****************************************************
	* 功能介绍:将unicode字符串转为汉字
	* 输入参数:源unicode字符串
	* 输出参数:转换后的字符串
	*****************************************************/
	static String decodeUnicode( final String dataStr ) {
		int start = 0;
		int end = 0;
		final StringBuffer buffer = new StringBuffer();
		while( start > -1 ) {
			end = dataStr.indexOf( "\\\\u", start + 2 );
			String charStr = "";
			if( end == -1 ) {
				charStr = dataStr.substring( start + 2, dataStr.length() );
			} else {
				charStr = dataStr.substring( start + 2, end);
			}
			char letter = (char) Integer.parseInt( charStr, 16 ); // 16进制parse整形字符串。
			buffer.append( new Character( letter ).toString() );
			start = end;
		}
		return buffer.toString();
	}


有了 decodeUnicode 方法,接下来只需要将文件中匹配 \\uxxxx 这样的转换完即可:

	static String replace( String s )
	{
		try {
			Pattern regex = Pattern.compile("\\\\u[0-9a-z]{4}", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
			Matcher matcher = regex.matcher(s);
			
			StringBuffer sb = new StringBuffer();
			while (matcher.find()) {
				matcher.appendReplacement(sb, decodeUnicode( matcher.group()) );
			}
			matcher.appendTail(sb);
			
			return sb.toString();
		} catch (Exception ex) {
			throw new RuntimeException( "Something error." );
		}
	}


总的转换代码:

	static void readToWrite( File file )
	{
		BufferedReader bufReader = null;
		BufferedWriter bufWriter = null;
		try {
			bufReader = new BufferedReader( new FileReader(file) );
			bufWriter = buildWriter( file );
			String line = null;
			while( (line = bufReader.readLine()) != null )
			{
				line = line.replaceAll("\\\\n", "\r\n" );
				line = line.replaceAll("\\\\t", "\t" );
				line = line.replaceAll("\\\\/", "/" );
				line = replace( line );
				bufWriter.write( line );
				bufWriter.newLine();
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		finally{
			if( bufReader != null ){
				try {
					bufReader.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
				bufReader = null;
			}
			if( bufWriter != null ){
				try {
					bufWriter.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
				
				bufWriter = null;
			}
		}
	}
	static BufferedWriter buildWriter( File file ){
		BufferedWriter bufWriter = null;
		try {
			String fullName = file.getCanonicalPath();
			int splitPath = fullName.lastIndexOf( "\\" );
			String path = fullName.substring( 0, splitPath );
			String name = file.getName().replaceAll("\\.txt", "@\\.txt" );
			
			bufWriter = new BufferedWriter( new FileWriter( path + "\\" + name ) );
			return bufWriter;
		} catch (IOException e) {
			e.printStackTrace();
		}
		
		return null;
	}
	
	static String replace( String s )
	{
		try {
			Pattern regex = Pattern.compile("\\\\u[0-9a-z]{4}", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
			Matcher matcher = regex.matcher(s);
			
			StringBuffer sb = new StringBuffer();
			while (matcher.find()) {
				matcher.appendReplacement(sb, decodeUnicode( matcher.group()) );
			}
			matcher.appendTail(sb);
			
			return sb.toString();
		} catch (Exception ex) {
			throw new RuntimeException( "Something error." );
		}
	}
	
	/** *//*****************************************************
	* 功能介绍:将unicode字符串转为汉字
	* 输入参数:源unicode字符串
	* 输出参数:转换后的字符串
	*****************************************************/
	static String decodeUnicode( final String dataStr ) {
		int start = 0;
		int end = 0;
		final StringBuffer buffer = new StringBuffer();
		while( start > -1 ) {
			end = dataStr.indexOf( "\\\\u", start + 2 );
			String charStr = "";
			if( end == -1 ) {
				charStr = dataStr.substring( start + 2, dataStr.length() );
			} else {
				charStr = dataStr.substring( start + 2, end);
			}
			char letter = (char) Integer.parseInt( charStr, 16 ); // 16进制parse整形字符串。
			buffer.append( new Character( letter ).toString() );
			start = end;
		}
		return buffer.toString();
	}


============================================================


 

	 /**将中文转为unicode 及转回中文函数转为unicode
	 */
	public static void writeUnicode(final DataOutputStream out, final String value) {
		try {
			final String unicode = gbEncoding( value );
			final byte[] data = unicode.getBytes();
			final int dataLength = data.length;

			System.out.println( "Data Length is: " + dataLength );
			System.out.println( "Data is: " + value );
			out.writeInt( dataLength ); //先写出字符串的长度
			out.write( data, 0, dataLength ); //然后写出转化后的字符串
		} catch (IOException e) {

		}
	}
	
	public static String gbEncoding( final String gbString ) {
		char[] utfBytes = gbString.toCharArray();
		String unicodeBytes = "";
		for( int byteIndex = 0; byteIndex < utfBytes.length; byteIndex ++ ) {
			String hexB = Integer.toHexString( utfBytes[ byteIndex ] );
			if( hexB.length() <= 2 ) {
				hexB = "00" + hexB;
			}
			unicodeBytes = unicodeBytes + "\\\\u" + hexB;
		}
		System.out.println( "unicodeBytes is: " + unicodeBytes );
		return unicodeBytes;
	}

 

你可能感兴趣的:(将文件内含有的特殊字符还原)