commons-httpClient Helper

阅读更多
使用HttpClient来发送请求获取数据最经典,以下呢我们使用jdk自带的HttpUrlConnection来操作, 很简单,发个请求取出数据,还可以条过https的验证.

package org.ycl.commons.text;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.URL;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;

import org.apache.commons.io.LineIterator;

/**
 * Functions:
 * 
 * 1. getInputStream(String url)/getInputStream(String url, Proxy proxy)
 * 
  • - get InputStream from url with proxy(or not)
  • * 2. getString(String url) *
  • - get String from url with one line
  • *
  • - this is simple get html content, {@link HttpClient}
  • * 3. getStringList(String url) *
  • - get List from url with any lines
  • * 4. getStringToday(String urlstring) *
  • - get String from url and save copy in file.
  • * 5. writeFileToday(String urlstring) *
  • - wirte urlstring content to file
  • * 6. needWriteFileToday(String urlstring) *
  • - check this file is generator today, or will be re-write file
  • * 7. getURLFile(String urlstring) *
  • - via urlstring to generator file
  • * 8. writeFile(String url, File file) *
  • - write url content to file
  • * 9. htmlEscape(String input)/htmlUnescape(String input) *
  • - turn Html language to transferred meaning, or reverse.
  • * * * NOTE:this is from my tool box * * {@link org.springframework.web.util.HtmlUtils} * @author e557400 * */ public class HtmlUtils { public static String DEFAULT_CONNECTION_POST = "POST";// request in "POST" method public static String DEFAULT_CONNECTION_GET = "GET";// request in "POST" method public static boolean DEFAULT_CONNECTION_DOOUTPUT = false;// if you intend to use the URL connection for output public static boolean DEFAULT_CONNECTION_ALLOWUSERINTERACTION = false;// Don't need to interaction with user, exp:Applet public static boolean DEFAULT_CONNECTION_DOINPUT = true;// if you intend to use the URL connection for input public static boolean DEFAULT_CONNECTION_FOLLOWREDIRECTS = true;//default is true public static boolean DEFAULT_DEBUG = true;// if DEBUG is true, will be print error message public static boolean DEFAULT_SKIP_SSL = false;// if we vistor https, should be skip ssl validate? public static String DEFAULT_ENCODE = "UTF-8";// we read html use this encode. public static String DEFAULT_HTML_FOLDER = "/usr";// we read html use this encode. public static int DEFAULT_CONNECTION_CONN_TIMEOUT = 3;// timeout in minutes public static int DEFAULT_CONNECTION_READ_TIMEOUT = 3;// timeout in minutes public static Proxy DEFAULT_PROXY = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("proxy.statestr.com", 80)); public static boolean DEFAULT_PROXY_FLAG = false; /** * The number of second is 1000 milliseconds. */ public static final int ONE_SEC = 1000; /** * The number of minute is 60 second */ public static final int ONE_MIN = ONE_SEC * 60; // remove in product env. static { DEFAULT_PROXY_FLAG = true; } /** * override default proxy * * @param proxy */ public static void setDefaultProxy(Proxy proxy) { DEFAULT_PROXY = proxy; } /** * main set Connection attribute of * requestMethod,ConnectTimeout,ReadTimeout. * * @param urlstring * @return * @throws IOException */ private static HttpURLConnection initConnection(String urlstring) throws IOException { return initConnection(urlstring, null); } /** * main set Connection attribute of * requestMethod,ConnectTimeout,ReadTimeout. we can give Proxy, or use * default Proxy, or no Proxy. * * @param urlstring * @param proxy * @return * @throws IOException */ private static HttpURLConnection initConnection(String urlstring, Proxy proxy) throws IOException { URL url = new URL(urlstring); HttpURLConnection conn = null; if (proxy != null) { conn = (HttpURLConnection) url.openConnection(proxy); } else { if (DEFAULT_PROXY_FLAG) { conn = (HttpURLConnection) url.openConnection(DEFAULT_PROXY); } else { conn = (HttpURLConnection) url.openConnection(); } } //NOTE: SSL valid must be set first, or will be unusable. if(DEFAULT_SKIP_SSL){ try{ // Create a trust manager that does not validate certificate chains final TrustManager[] trustAllCerts = new TrustManager[] { new X509TrustManager() { @Override public void checkClientTrusted( final X509Certificate[] chain, final String authType ) { } @Override public void checkServerTrusted( final X509Certificate[] chain, final String authType ) { } @Override public X509Certificate[] getAcceptedIssuers() { return null; } } }; // Install the all-trusting trust manager final SSLContext sslContext = SSLContext.getInstance( "SSL" ); sslContext.init( null, trustAllCerts, new java.security.SecureRandom() ); // Create an ssl socket factory with our all-trusting manager final SSLSocketFactory sslSocketFactory = sslContext.getSocketFactory(); ( (HttpsURLConnection) conn ).setSSLSocketFactory(sslSocketFactory); ( (HttpsURLConnection) conn ).setHostnameVerifier(new HostnameVerifier(){ @Override public boolean verify(String arg0, SSLSession arg1) { // TODO Auto-generated method stub return true; } }); }catch(Exception e){ if (DEFAULT_DEBUG) { e.printStackTrace(); } } } conn.setRequestMethod(DEFAULT_CONNECTION_GET); conn.setConnectTimeout(DEFAULT_CONNECTION_CONN_TIMEOUT * ONE_MIN); conn.setReadTimeout(DEFAULT_CONNECTION_READ_TIMEOUT * ONE_MIN); // set request property // conn.setRequestProperty("Content-Type", // "application/x-www-form-urlencoded"); // conn.setRequestProperty("Content-Type", "text/html; charset=utf-8"); // conn.setRequestProperty("Accept-Language", "en-US"); // conn.setRequestProperty("Accept", // "text/html, application/xhtml+xml, */*"); // conn.setRequestProperty("Accept-Encoding", "gzip, deflate"); // conn.setRequestProperty("User-Agent", // "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"); // conn.setRequestProperty("Content-Length","10"); conn.setAllowUserInteraction(DEFAULT_CONNECTION_ALLOWUSERINTERACTION); conn.setDoOutput(DEFAULT_CONNECTION_DOOUTPUT); conn.setDoInput(DEFAULT_CONNECTION_DOINPUT); if (DEFAULT_DEBUG) { Map> headers = conn.getHeaderFields(); if (headers != null) { System.out.println("begin header"); for (Map.Entry> header : headers .entrySet()) { System.out.println("key:" + header.getKey()); System.out.println("value:" + header.getValue()); } System.out.println("end header"); } } return conn; } /** * Unconditionally close a Closeable. *

    * Equivalent to {@link Closeable#close()}, except any exceptions will be * ignored. This is typically used in finally blocks. *

    * Example code: * *

    	 * Closeable closeable = null;
    	 * try {
    	 * 	closeable = new FileReader("foo.txt");
    	 * 	// process closeable
    	 * 	closeable.close();
    	 * } catch (Exception e) {
    	 * 	// error handling
    	 * } finally {
    	 * 	IOUtils.closeQuietly(closeable);
    	 * }
    	 * 
    * * @param closeable * the object to close, may be null or already closed * @since 2.0 */ private static void closeQuietly(Closeable closeable) { try { if (closeable != null) { closeable.close(); } } catch (IOException ioe) { // ignore } } /** * @{link org.apache.commons.io.IOUtils} Return an Iterator for the lines in * a Reader. *

    * LineIterator holds a reference to the open * Reader specified here. When you have finished with * the iterator you should close the reader to free internal * resources. This can be done by closing the reader directly, or by * calling {@link LineIterator#close()} or * {@link LineIterator#closeQuietly(LineIterator)}. *

    * The recommended usage pattern is: * *

    	 * try {
    	 * 	LineIterator it = IOUtils.lineIterator(reader);
    	 * 	while (it.hasNext()) {
    	 * 		String line = it.nextLine();
    	 * 		// / do something with line
    	 * 	}
    	 * } finally {
    	 * 	IOUtils.closeQuietly(reader);
    	 * }
    	 * 
    * * @param reader * the Reader to read from, not null * @return an Iterator of the lines in the reader, never null * @throws IllegalArgumentException * if the reader is null * @since 1.2 */ private static void closeQuietly(Reader input) { closeQuietly((Closeable) input); } /** * get URL content with InputStream * * @param url * @return * @throws IOException */ public static InputStream getInputStream(String url) throws IOException { HttpURLConnection conn = initConnection(url); InputStream is = null; if (conn.getResponseCode() >= 400) { is = conn.getErrorStream(); } else { is = conn.getInputStream(); } return is; } /** * get URL content with InputStream with Proxy * * @param url * @param proxy * @return * @throws IOException */ public static InputStream getInputStream(String url, Proxy proxy) throws IOException { HttpURLConnection conn = initConnection(url, proxy); InputStream is = conn.getInputStream(); return is; } /** * get URL content with String. 1. success return content. 2. fail null * * @param url * @return */ public static String getString(String url) { BufferedReader in = null; StringBuffer sb = new StringBuffer(); try { in = new BufferedReader(new InputStreamReader(getInputStream(url), DEFAULT_ENCODE)); String inputLine; while ((inputLine = in.readLine()) != null) { sb.append(inputLine); } } catch (IOException e) { if (DEFAULT_DEBUG) { e.printStackTrace(); } return null; } finally { closeQuietly(in); } return sb.toString(); } public static List getStringList(String url) { BufferedReader in = null; List contents = new ArrayList(); try { in = new BufferedReader(new InputStreamReader(getInputStream(url), DEFAULT_ENCODE)); String inputLine; while ((inputLine = in.readLine()) != null) { contents.add(inputLine); } } catch (IOException e) { if (DEFAULT_DEBUG) { e.printStackTrace(); } return null; } finally { closeQuietly(in); } return contents; } /** * get url to string, if this file is exist, then read it, or read from URL. * * @param urlstring * @return */ public static String getStringToday(String urlstring) { BufferedReader in = null; try { writeFileToday(urlstring); StringBuffer sb = new StringBuffer(); File file = getURLFile(urlstring); in = new BufferedReader(new InputStreamReader( FileUtils.openInputStream(file), DEFAULT_ENCODE)); String inputLine; while ((inputLine = in.readLine()) != null) { sb.append(inputLine); sb.append(System.getProperty("line.separator")); } // Scanner scanner = new Scanner(new FileInputStream(file), // DEFAULT_ENCODE); // while (scanner.hasNextLine()){ // sb.append(scanner.nextLine()); // } // scanner.close(); return sb.toString(); } catch (IOException e) { if (DEFAULT_DEBUG) { e.printStackTrace(); } return null; } finally { closeQuietly(in); } } /** * we may be read URL content to file, if we have read, so next test we just * get from file. not EveryTime from URL, it can save so may times. * * @param url * @param fileName */ public static void writeFile(String url, File file) { BufferedReader in = null; BufferedWriter fw = null; try { in = new BufferedReader(new InputStreamReader(getInputStream(url), DEFAULT_ENCODE)); fw = new BufferedWriter(new OutputStreamWriter( FileUtils.openOutputStream(file), DEFAULT_ENCODE)); String inputLine; while ((inputLine = in.readLine()) != null) { fw.write(inputLine); fw.write(System.getProperty("line.separator")); } } catch (IOException e) { if (DEFAULT_DEBUG) { e.printStackTrace(); } } finally { closeQuietly(in); closeQuietly(fw); } } /** * add file in weather folder * * how to judge we have download today, every day file is difference * * @param url */ public static void writeFileToday(String urlstring) { if (needWriteFileToday(urlstring)) { writeFile(urlstring, getURLFile(urlstring)); } } /** * Path /weather/101210101.shtml so we will be generator file in this * derectory. * * @param urlstring */ public static boolean needWriteFileToday(String urlstring) { File file = getURLFile(urlstring); Long lastmodify = file.lastModified(); Long todaymodify = new Date().getTime(); if (todaymodify - lastmodify < DateUtils.MILLIS_PER_DAY) { return false; } return true; } /** * via url generator file * * @param urlstring * @return */ public static File getURLFile(String urlstring) { URL url = null; File file = null; try { url = new URL(urlstring); String path = url.getPath(); file = new File(DEFAULT_HTML_FOLDER + path+ DateUtils.getDateAsString(new Date(), "MM-dd-yyyy") .concat(".html")); } catch (MalformedURLException e) { if (DEFAULT_DEBUG) { e.printStackTrace(); } } return file; } /** * Turn special characters into HTML character references. * Handles complete character set defined in HTML 4.01 recommendation. *

    Escapes all special characters to their corresponding * entity reference (e.g. {@code <}). *

    Reference: * * http://www.w3.org/TR/html4/sgml/entities.html * * @param input the (unescaped) input string * @return the escaped string */ public static String htmlEscape(String input) { return org.springframework.web.util.HtmlUtils.htmlEscape(input); } /** * Turn HTML character references into their plain text UNICODE equivalent. *

    Handles complete character set defined in HTML 4.01 recommendation * and all reference types (decimal, hex, and entity). *

    Correctly converts the following formats: *

    * &#Entity; - (Example: &amp;) case sensitive * &#Decimal; - (Example: &#68;)
    * &#xHex; - (Example: &#xE5;) case insensitive
    *
    * Gracefully handles malformed character references by copying original * characters as is when encountered.

    *

    Reference: * * http://www.w3.org/TR/html4/sgml/entities.html * * @param input the (escaped) input string * @return the unescaped string */ public static String htmlUnescape(String input) { return org.springframework.web.util.HtmlUtils.htmlUnescape(input); } public static void main(String args[]) throws Exception { //Listcontexts=getStringList("http://www.weather.com.cn/weather/101210101.shtml");//101210101 //String context =getStringToday("http://weather.yahooapis.com/forecastrss?w=1940345"); //String context = getStringToday("http://weather.yahooapis.com/forecastrss?w=2502265"); String context = getStringToday("http://weather.yahooapis.com/forecastrss?p=CHXX0044&u=c"); //String context = getStringToday("http://m.weather.com.cn/data/101270803.html"); //String context = getStringToday("http://m.weather.com.cn/data/101210101.html"); //String context = getStringToday("http://www.google.com"); //String context = getStringToday("http://www.baidu.com"); //String context = getStringToday("http://www.weather.com.cn/weather/101210101.shtml"); //String context = getStringToday("https://aplmd5.it.statestr.com:9445/PALMSServiceWEB/cacheReset"); //String context = getStringToday("http://aplmd5.it.statestr.com:9080/PLM/login.do"); //String context = getStringToday("http://aplmd5.it.statestr.com:9080/PLM/unittest/testproperties.jsp"); // for(String context:contexts) String escape = htmlEscape(context); System.out.println(escape); System.out.println(htmlUnescape(escape)); } }

    你可能感兴趣的:(commons-httpClient Helper)