抓网页数据

抓取网页

 

 

public String getPageWeather(String weatherCode) {

String WEATHER_ROOT = sysConf.getValue("WEATHER_JSON");

String weatherUrl = WEATHER_ROOT + weatherCode + ".html";

logger.info("网页抓取开始,地址 【" + weatherUrl + " 】");

String sresult = "";

/*try {

HttpResult result = HttpUtil.getInstance().get(weatherUrl, "utf-8");

if (result.getOK()) {

logger.info("抓取成功");

sresult = result.getContent();

}

} catch (Exception e) {

e.printStackTrace();

return "";

}*/

sresult = this.getPageContent(weatherUrl, "", 50000000);

System.out.println(sresult);

return sresult;

}

 

 

 

 

public String getPageContent(String strUrl, String strPostRequest, int maxLength) {

// 读取结果网页

StringBuffer buffer = new StringBuffer();

try {

URL newUrl = new URL(strUrl);

HttpURLConnection hConnect = (HttpURLConnection) newUrl.openConnection();

hConnect.setReadTimeout(Integer.parseInt(sysConf.getValue("TIME_OUT")));

// POST方式的额外数据

if (strPostRequest.length() > 0) {

hConnect.setDoOutput(true);

OutputStreamWriter out = new OutputStreamWriter(hConnect.getOutputStream());

out.write(strPostRequest);

out.flush();

out.close();

}

// 读取内容

 

BufferedReader rd = new BufferedReader(new InputStreamReader(hConnect.getInputStream(), "utf-8"));

int ch;

for (int length = 0; (ch = rd.read()) > -1 && (maxLength <= 0 || length < maxLength); length++)

buffer.append((char) ch);

rd.close();

hConnect.disconnect();

return buffer.toString().trim();

} catch (Exception e) {

logger.info("对方主动关闭socket连接,放弃抓取--" );//+ e.getMessage(), e);

//e.printStackTrace();

return "";

 

}

}

你可能感兴趣的:(抓网页数据,HttpResult)