抓取网页
public String getPageWeather(String weatherCode) {
String WEATHER_ROOT = sysConf.getValue("WEATHER_JSON");
String weatherUrl = WEATHER_ROOT + weatherCode + ".html";
logger.info("网页抓取开始,地址 【" + weatherUrl + " 】");
String sresult = "";
/*try {
HttpResult result = HttpUtil.getInstance().get(weatherUrl, "utf-8");
if (result.getOK()) {
logger.info("抓取成功");
sresult = result.getContent();
}
} catch (Exception e) {
e.printStackTrace();
return "";
}*/
sresult = this.getPageContent(weatherUrl, "", 50000000);
System.out.println(sresult);
return sresult;
}
public String getPageContent(String strUrl, String strPostRequest, int maxLength) {
// 读取结果网页
StringBuffer buffer = new StringBuffer();
try {
URL newUrl = new URL(strUrl);
HttpURLConnection hConnect = (HttpURLConnection) newUrl.openConnection();
hConnect.setReadTimeout(Integer.parseInt(sysConf.getValue("TIME_OUT")));
// POST方式的额外数据
if (strPostRequest.length() > 0) {
hConnect.setDoOutput(true);
OutputStreamWriter out = new OutputStreamWriter(hConnect.getOutputStream());
out.write(strPostRequest);
out.flush();
out.close();
}
// 读取内容
BufferedReader rd = new BufferedReader(new InputStreamReader(hConnect.getInputStream(), "utf-8"));
int ch;
for (int length = 0; (ch = rd.read()) > -1 && (maxLength <= 0 || length < maxLength); length++)
buffer.append((char) ch);
rd.close();
hConnect.disconnect();
return buffer.toString().trim();
} catch (Exception e) {
logger.info("对方主动关闭socket连接,放弃抓取--" );//+ e.getMessage(), e);
//e.printStackTrace();
return "";
}
}