package com.dg.util; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.StatusLine; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpClient; import org.apache.http.client.HttpResponseException; import org.apache.http.client.ResponseHandler; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.util.EntityUtils; public class HttpUtil { public static String getContent(String url, String encoding) throws Exception { URI uri = new URI(url); HttpClient client = new DefaultHttpClient(); CharsetHandler handler = new CharsetHandler(encoding); HttpGet httpget = new HttpGet(uri); httpget.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7 360EE"); String html = client.execute(httpget, handler); System.out.println("html:"+url); return html; } public static void main(String[] args){ String url="http://www.6vdy.com/dy/2011-12-20/16961.html"; String html=getContent(url,"gb2312"); } } class CharsetHandler implements ResponseHandler<String> { private String charset; public CharsetHandler(String charset) { this.charset = charset; } public String handleResponse(HttpResponse response) throws ClientProtocolException, IOException { StatusLine statusLine = response.getStatusLine(); if (statusLine.getStatusCode() >= 300) { throw new HttpResponseException(statusLine.getStatusCode(), statusLine.getReasonPhrase()); } HttpEntity entity = response.getEntity(); if (entity != null) { if (!StringUtils.isBlank(charset)) { return EntityUtils.toString(entity, charset); } else { return EntityUtils.toString(entity); } } else { return null; } } }
package com.dg.util; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URI; import java.net.URISyntaxException; import java.util.zip.GZIPInputStream; import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.params.HttpMethodParams; import org.apache.commons.lang3.StringUtils; public class HttpUtil { public static String getContent(String htmlurl, String charset) throws IOException { StringBuffer sb = new StringBuffer(); String acceptEncoding = ""; /* 1.生成 HttpClinet 对象并设置参数 */ HttpClient httpClient = new HttpClient(); // 设置 Http 连接超时 5s httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(5000); GetMethod method = new GetMethod(htmlurl); method.addRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7 360EE"); // 设置 get 请求超时 5s method.getParams().getDoubleParameter(HttpMethodParams.SO_TIMEOUT, 10000); // 设置请求重试处理 method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler()); int statusCode; try { statusCode = httpClient.executeMethod(method); // 判断访问的状态码 if (statusCode != HttpStatus.SC_OK) { return sb.toString(); } else { if (method.getResponseHeader("Content-Encoding") != null) acceptEncoding = method.getResponseHeader("Content-Encoding").getValue(); if (acceptEncoding.toLowerCase().indexOf("gzip") > -1) { // 建立gzip解压工作流 InputStream is; is = method.getResponseBodyAsStream(); GZIPInputStream gzin = new GZIPInputStream(is); InputStreamReader isr = new InputStreamReader(gzin, charset); // 设置读取流的编码格式,自定义编码 java.io.BufferedReader br = new java.io.BufferedReader(isr); String tempbf; while ((tempbf = br.readLine()) != null) { sb.append(tempbf); sb.append("\r\n"); } isr.close(); gzin.close(); System.out.println(sb); } else { InputStreamReader isr; isr = new InputStreamReader(method.getResponseBodyAsStream(), charset); java.io.BufferedReader br = new java.io.BufferedReader(isr); String tempbf; while ((tempbf = br.readLine()) != null) { sb.append(tempbf); sb.append("\r\n"); } isr.close(); } } } catch (HttpException e1) { e1.printStackTrace(); } catch (IOException e1) { e1.printStackTrace(); } method.abort(); method.releaseConnection(); return sb.toString(); } public static void main(String[] args) throws Exception{ String url="http://www.baidu.com"; String html=getContent(url,"gb2312"); System.out.println(html); } }