简单的信息采集程序示例(小偷程序)

最近正准备做一个信息采集的程序,下面是一个简单的采集程序,提供给初学者入门参考。

aspx页面代码
       < asp:TextBox ID = " Txt_Url "  runat = " server "  Width = " 441px " ></ asp:TextBox >< br  />  
   
< asp:Button id = " Btn_GetUrlSource "  runat = " server "  Text = " 取得网页代码 "  OnClick = " Btn_GetUrlSource_Click " ></ asp:Button >
   
< br  />  
   
< asp:TextBox id = " Txt_UrlSource "  runat = " server "  Width = " 100% "  Height = " 195px "  TextMode = " MultiLine " ></ asp:TextBox >< br  />
      
< br  />
采集开始代码
      
< asp:TextBox ID = " Txt_First "  runat = " server "  Height = " 90px "  TextMode = " MultiLine "  Width = " 280px " ></ asp:TextBox >< br  />
      
< asp:Button ID = " Btn_ListCheck "  runat = " server "  OnClick = " Btn_ListCheck_Click "  Text = " 测试唯一性 "   />< br  />
采集结束代码      
< asp:TextBox ID = " Txt_Last "  runat = " server "  Height = " 90px "  TextMode = " MultiLine "
          Width
= " 280px " ></ asp:TextBox >< br  />
      
< br  />
      
< asp:Button ID = " Btn_Result "  runat = " server "  Text = " 取得采集结果 "  OnClick = " Btn_Result_Click "   />< br  />
      
< asp:TextBox ID = " Txt_Result "  runat = " server "  Height = " 134px "  TextMode = " MultiLine "  Width = " 579px " ></ asp:TextBox >

.cs页面代码
using  System;
using  System.Collections;
using  System.ComponentModel;
using  System.Data;
using  System.Drawing;
using  System.Web;
using  System.Web.SessionState;
using  System.Web.UI;
using  System.Web.UI.WebControls;
using  System.Web.UI.HtmlControls;
using  System.IO;
using  System.Net;
using  System.Text.RegularExpressions;
using  NetShuai.Database;


 
private   string  PageUrl  =   "" ;

    
private   void  Page_Load( object  sender, System.EventArgs e)
    
{
        
        
// 在此处放置用户代码以初始化页面 
    }



    
protected   void  Btn_GetUrlSource_Click( object  sender, EventArgs e)
    
{
        PageUrl 
= Txt_Url.Text;
        WebRequest request 
= WebRequest.Create(PageUrl);
        WebResponse response 
= request.GetResponse();
        Stream resStream 
= response.GetResponseStream();
        StreamReader sr 
= new StreamReader(resStream, System.Text.Encoding.Default);
        Txt_UrlSource.Text 
= sr.ReadToEnd();
        resStream.Close();
        sr.Close();
    }

    
protected   void  Btn_Result_Click( object  sender, EventArgs e)
    
{
        
string strExp;
        strExp 
= @"(?<=" + Server.HtmlEncode(Txt_First.Text) + ")[\w\W]*?(?=" + Server.HtmlEncode(Txt_Last.Text) + ")";
        MatchCollection mc 
= Regex.Matches(Server.HtmlEncode(Txt_UrlSource.Text), strExp);
        
for (int i = 0; i < mc.Count; i++)
        
{
            Txt_Result.Text 
+=Server.HtmlDecode(mc[i].Value);
        }

       
    }

    
protected   void  Btn_ListCheck_Click( object  sender, EventArgs e)
    
{
        
string strExp;
        strExp 
= Server.HtmlEncode(Txt_First.Text);
        MatchCollection mc 
= Regex.Matches(Server.HtmlEncode(Txt_UrlSource.Text), strExp);
        
if(mc.Count>1)
        
{
            Response.Write(
"<script>alert('列表开始代码有重复!')</script>");
            
return;
        }


        strExp 
= Server.HtmlEncode(Txt_Last.Text);
        mc 
= Regex.Matches(Server.HtmlEncode(Txt_UrlSource.Text), strExp);
        
if (mc.Count > 1)
        
{
            Response.Write(
"<script>alert('列表结束代码有重复!')</script>");
            
return;
        }

    }

你可能感兴趣的:(信息采集)