网络爬虫学习2

写了个 网络爬虫的小Demo,有兴趣的可以试试:
package com.li.bean;

import java.util.HashMap;

import org.apache.http.Header;
import org.apache.http.HttpEntity;


public class Result {
    
	private String cookie;
	private int statusCode;
	private HashMap<String, Header> headerAll;
	private HttpEntity httpEntity;
	
	public String getCookie() {
		return cookie;
	}
	public void setCookie(String cookie) {
		this.cookie = cookie;
	}
	public int getStatusCode() {
		return statusCode;
	}
	public void setStatusCode(int statusCode) {
		this.statusCode = statusCode;
	}
	public HashMap<String, Header> getHeaders() {
		return headerAll;
	}
	
	public void setHeaders(Header[] headers){
		headerAll = new HashMap<String, Header>();
		for (Header header : headers) {
			headerAll.put(header.getName(), header);
		}
	}
	public HttpEntity getHttpEntity() {
		return httpEntity;
	}
	public void setHttpEntity(HttpEntity httpEntity) {
		this.httpEntity = httpEntity;
	}
}


package com.li.main;

import java.io.IOException;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Map;

import org.apache.http.client.ClientProtocolException;

import com.li.bean.Result;
import com.li.utli.SendRequest;

public class CSDN  {

	public static void downLoadAndAcomment(String cookie,String downLoadUrl,String acommentLink) throws ClientProtocolException, IOException {
    
		String referer[] = downLoadUrl.split("/");
		Map<String,String> headers = new HashMap<String,String>();
		headers.put("Cookie", cookie);
		headers.put("Referer", "http://d.download.csdn.net/down/"+referer[7]+"/"+referer[8]);
		headers.put("Host", "d.download.csdn.net");
		headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0");
	
		Map<String,String> parameters = new HashMap<String,String>();
		parameters.put("dx", "ds");
		
		Result res = SendRequest.sendPost(downLoadUrl, headers, parameters,"utf-8");
		String location = res.getHeaders().get("Location").getValue();
		SendRequest.sendGet(location, headers, null, "utf-8",true);
		
		headers.put("Referer",acommentLink);
		headers.put("Host","review.csdn.net");
		Map<String,String> acommentParameters = new HashMap<String,String>();
		acommentParameters.put("rt", "3");
		acommentParameters.put("title",URLEncoder.encode("CSDN 下载频道", "utf-8").replaceAll("\\+", "%20"));
		acommentParameters.put("description", "%E8%B5%84%E6%BA%90%E6%8C%BA%E4%B8%8D%E9%94%99%E7%9A%84%EF%BC%81%EF%BC%81");
		acommentParameters.put("rating", "4");
		acommentParameters.put("url", acommentLink);
		acommentParameters.put("extended", referer[8]);
		acommentParameters.put("jsoncallback", "jsonp1314023443336");
		acommentParameters.put("_", "1314023459837");
		
        SendRequest.sendGet("http://review.csdn.net/rest/v1/reviews/add", headers, acommentParameters, "utf-8");
		
		Map<String,String> acommentheaders = new HashMap<String,String>();
		acommentheaders.put("Cookie", cookie);
		SendRequest.sendGet("http://download.csdn.net/index.php/rest/users/addscoreByratings/"+referer[7]+"&jsoncallback=jsonp1314023443336?_=1314023459837", acommentheaders, null, "utf-8");
	}

	public static String testAccount(String name, String password,
			Map<String, String> params) throws ClientProtocolException, IOException {
		
		Map<String,String> parameters = new HashMap<String,String>();
		parameters.put("t", "log");
		parameters.put("u", name);
		parameters.put("p", password);
		parameters.put("remember", "0");
		parameters.put("f", "http://passport.csdn.net/account/login");
		String cookie = SendRequest.sendGet("http://passport.csdn.net/ajax/accounthandler.ashx", null, parameters, "utf-8").getCookie();
		return cookie;
	}
}


package com.li.main;

import javax.swing.JOptionPane;

import com.li.utli.CSDNUrlExtract;

public class Main {

	public static void main(String[] args) throws Exception {
		String cookie = CSDN.testAccount(JOptionPane.showInputDialog("请输入你的用户名"),JOptionPane.showInputDialog("请输入你的密码"), null);
		if(cookie.indexOf("UserInfo")==-1){
			JOptionPane.showMessageDialog(null, "用户名或密码错误");
			System.exit(1);
		}
		CSDNUrlExtract.cSDNShuFen(cookie);
	}
}


package com.li.utli;

import java.util.List;

import org.apache.http.util.EntityUtils;

import com.li.bean.Result;
import com.li.main.CSDN;

public class CSDNUrlExtract {
    
	public static void cSDNShuFen(String cookie) throws Exception{
		
		long startt = System.currentTimeMillis();
		
		Result result = SendRequest.sendGet("http://download.csdn.net/", null, null,"utf-8" );
		
		String sort = EntityUtils.toString(result.getHttpEntity(),"utf-8");
		
		List<String> sortList = HtmlParse.prase(sort, "/sort/class/\\d{5}");
		
		
		for (int i = 10; i < sortList.size(); i++) {
			try{
				Result classResult = SendRequest.sendGet("http://download.csdn.net"+sortList.get(i)+"/1", null, null, "utf-8");
				String classsAll = EntityUtils.toString(classResult.getHttpEntity(),"utf-8");
				String page = HtmlParse.prase(classsAll,"<a href=\"http://download.csdn.net/sort/class/\\d+/\\d+\">末页</a>" , 1).get(0);
				page = page.split("\"")[1];
				page = page.substring(page.lastIndexOf("/")+1);
				
				for (int j = 1; j <=Integer.parseInt(page); j++) {
					classResult = SendRequest.sendGet("http://download.csdn.net"+sortList.get(i)+"/"+j, null, null, "utf-8");
					classsAll = EntityUtils.toString(classResult.getHttpEntity(),"utf-8");
					List<String> sorce = HtmlParse.prase(classsAll, "http://download.csdn.net/source/\\d+");
					for (String string : sorce) {
						String addressHtml = null;
						String address = null;
						String acommentLink =null;
						Result downResult = null;
						String down = null;
						Result addressResult = null;
						try {
							downResult = SendRequest.sendGet(string, null, null, "utf-8");
							down =EntityUtils.toString(downResult.getHttpEntity(),"utf-8");
							down = HtmlParse.prase(down, "http://d.download.csdn.net/down/\\d+/.{3,21}>",1).get(0);
							down = down.substring(0,down.length()-2);
							addressResult = SendRequest.sendGet(down, null, null, "utf-8");
							addressHtml= EntityUtils.toString(addressResult.getHttpEntity(),"utf-8");
							address = HtmlParse.prase(addressHtml, "http://d.download.csdn.net/index.php/new/download/dodownload/\\d+/.{3,21}/\\w+",1).get(0);
							acommentLink = HtmlParse.prase(addressHtml, "http://download.csdn.net/source/\\d+",1).get(0);
							CSDN.downLoadAndAcomment(cookie,address,acommentLink);
						} catch (Exception e) {
						    continue;
						}
					}
				}
			}catch (Exception e) {
				 continue;
			}
			
		}
		long end = System.currentTimeMillis();
		System.out.println("耗时"+(end-startt));
	}
}


package com.li.utli;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HtmlParse {
    
	public static List<String> prase(String html,String regex,int number){
		
		Pattern patten = Pattern.compile(regex);
		Matcher mat = patten.matcher(html);
		List<String> list = new ArrayList<String>();
		while(mat.find()) {
	   if(number==-1){
		   list.add(mat.group());
		   continue;
	   }
		if(number>0){
			list.add(mat.group());
			number--;
     	  }else{
     		  break;
     	  }
		}
		return list;
	}	
	
	public static List<String> prase(String html,String regex){
		return prase(html, regex, -1);
	}
	
}


package com.li.utli;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;

import com.li.bean.Result;


public class SendRequest {
   
	public static Result sendGet(String url,Map<String,String> headers,Map<String,String>  params,String encoding,boolean duan) throws ClientProtocolException, IOException{
		DefaultHttpClient client = new DefaultHttpClient();
		url = url+(null==params?"":assemblyParameter(params));
		HttpGet hp = new HttpGet(url);
		if(null!=headers)hp.setHeaders(assemblyHeader(headers));
		HttpResponse response = client.execute(hp);
		if(duan==true) hp.abort();
		HttpEntity  entity = response.getEntity();
		Result result= new Result();
		result.setCookie(assemblyCookie(client.getCookieStore().getCookies()));
		result.setStatusCode(response.getStatusLine().getStatusCode());
		result.setHeaders(response.getAllHeaders());
		result.setHttpEntity(entity);
		return result;
	}
	public static Result sendGet(String url,Map<String,String> headers,Map<String,String>  params,String encoding) throws ClientProtocolException, IOException{
		return sendGet(url, headers, params, encoding,false);
	}
	
	public static Result sendPost(String url,Map<String,String> headers,Map<String,String>  params,String encoding) throws ClientProtocolException, IOException{
		DefaultHttpClient client = new DefaultHttpClient();
		HttpPost post = new HttpPost(url);
		List<NameValuePair> list  = new ArrayList<NameValuePair>();
		for (String temp : params.keySet()) {
			list.add(new BasicNameValuePair(temp,params.get(temp)));
		}
		post.setEntity(new UrlEncodedFormEntity(list,encoding));
		if(null!=headers)post.setHeaders(assemblyHeader(headers));

		HttpResponse response = client.execute(post);
		HttpEntity  entity = response.getEntity();
        Result result = new Result();
        result.setStatusCode(response.getStatusLine().getStatusCode());
        result.setHeaders(response.getAllHeaders());
		result.setCookie(assemblyCookie(client.getCookieStore().getCookies()));
		result.setHttpEntity(entity);
		return result ;
	}
	
	public static Header[] assemblyHeader(Map<String,String> headers){
		Header[] allHeader= new BasicHeader[headers.size()];
		int i  = 0;
		for (String str :headers.keySet()) {
			allHeader[i] = new BasicHeader(str,headers.get(str));
			i++;
		}
		return allHeader;
	}
	
	public static String assemblyCookie(List<Cookie> cookies){
		StringBuffer sbu = new StringBuffer();
		for (Cookie cookie : cookies) {
			sbu.append(cookie.getName()).append("=").append(cookie.getValue()).append(";");
		}
		if(sbu.length()>0)sbu.deleteCharAt(sbu.length()-1);
		return sbu.toString();
	}
	public static String assemblyParameter(Map<String,String> parameters){
		String para = "?";
		for (String str :parameters.keySet()) {
			para+=str+"="+parameters.get(str)+"&";
		}
		return para.substring(0,para.length()-1);
	}
}


网络爬虫学习2_第1张图片

你可能感兴趣的:(exception,String,header,null,Parameters,网络爬虫)