爬虫 蜘蛛 信息采集

HttpWebRequest
System.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.WebRequest.Create("");
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
System.Net.WebResponse response = request.GetResponse();
System.IO.Stream resStream = response.GetResponseStream();
System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding);
string content=sr.ReadToEnd();
resStream.Close();
sr.Close();

webrequest,WebClient
System.Net.WebClient wc = new System.Net.WebClient();
wc.Credentials = System.Net.CredentialCache.DefaultCredentials;
Byte[] pageData = wc.DownloadData("");
string content= System.Text.Encoding.Default.GetString(pageData);

     
       
/// <summary> /// 实现登录 /// </summary> /// <param name="targetURL"> 请求的路径,必须是实现登录的路径(*) </param> /// <param name="cc"> 用于维持cookies Or Session </param> /// <param name="param"> Post提交的信息(用户名,密码) </param> /// <returns> html page </returns> public static CookieContainer cc = new CookieContainer(); // 维持cookie或Session public static string PostAndGetHTML( string targetURL, Hashtable param) { // formData用于保存提交的信息 string formData = "" ; foreach (DictionaryEntry de in param) { formData += de.Key.ToString() + " = " + de.Value.ToString() + " & " ; } if (formData.Length > 0 ) formData = formData.Substring( 0 , formData.Length - 1 ); // 去除最后一个 '&' // 把提交的信息转码(post提交必须转码) ASCIIEncoding encoding = new ASCIIEncoding(); byte [] data = encoding.GetBytes(formData); // 开始创建请求 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(targetURL); request.Method = " POST " ; // 提交方式:post request.ContentType = " application/x-www-form-urlencoded " ; request.ContentLength = data.Length; request.UserAgent = " Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 2.0.1124) " ; request.AllowAutoRedirect = true ; request.KeepAlive = true ; Stream newStream = request.GetRequestStream(); newStream.Write(data, 0 , data.Length); // 将请求的信息写入request newStream.Close(); request.CookieContainer = cc; // 向服务器发送请求 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); // 获得Cookie 保存到Appliction中 string cookieHeader = request.CookieContainer.GetCookieHeader( new Uri( " http://login.xiaonei.com/Login.do " )); HttpContext.Current.Application.Lock(); HttpContext.Current.Application[ " cookieHeader " ] = cookieHeader; HttpContext.Current.Application.UnLock(); return " OK " ; } /// <summary> /// 访问其他页面 /// </summary> /// <param name="strUrl"></param> /// <returns></returns> public static string ReGetHtml( string strUrl) { // 第二次请求 HttpWebRequest request1 = (HttpWebRequest)WebRequest.Create(strUrl); string cookhead = HttpContext.Current.Application[ " cookieHeader " ].ToString(); request1.Method = " GET " ; request1.Headers.Add( " cookie: " + cookhead); request1.KeepAlive = true ; request1.AllowAutoRedirect = true ; HttpWebResponse response1 = (HttpWebResponse)request1.GetResponse(); Stream stream2 = response1.GetResponseStream(); // 获得回应的数据流 // 将数据流转成 String string result1 = new StreamReader(stream2, System.Text.Encoding.UTF8).ReadToEnd(); return result1; }


你可能感兴趣的:(信息采集)