C# HttpWebRequest 从google服务器获取google的PageRank PR值

首先,本文的实现参考了这篇:

http://www.codeproject.com/KB/aspnet/Google_Pagerank.aspx

 

简述一下原理:

获取PR值,通过向google服务器发送一个http请求来实现。

http://toolbarqueries.google.com.hk/search?client=navclient-auto&hl=en&ch=6771535612&ie=UTF-8&oe=UTF-8&features=Rank&q=info:http%3A%2F%2Fwww.codeproject.com%2F

 

在浏览器中输入上面的链接会google服务器会返回一个字符串

 

上面的链接返回的是:Rank_1:1:6

 

要实现对任意链接的PR值的查询,关键要正确构造请求的URL,ch=6771535612这一项很重要,它是请求网址的hash值,这个hash值得算法在上面的那篇博文中就有详细的说明。

 

这里将主要的代码转贴在下面:

 

private const UInt32 GOOGLE_MAGIC = 0xE6359A60; private static void _mix(ref UInt32 a, ref UInt32 b, ref UInt32 c) { a -= b; a -= c; a ^= c >> 13; b -= c; b -= a; b ^= a << 8; c -= a; c -= b; c ^= b >> 13; a -= b; a -= c; a ^= c >> 12; b -= c; b -= a; b ^= a << 16; c -= a; c -= b; c ^= b >> 5; a -= b; a -= c; a ^= c >> 3; b -= c; b -= a; b ^= a << 10; c -= a; c -= b; c ^= b >> 15; } public static string GoogleCH(string url) { url = string.Format("info:{0}", url); int length = url.Length; UInt32 a, b; UInt32 c = GOOGLE_MAGIC; int k = 0; int len = length; a = b = 0x9E3779B9; while (len >= 12) { a += (UInt32)(url[k + 0] + (url[k + 1] << 8) + (url[k + 2] << 16) + (url[k + 3] << 24)); b += (UInt32)(url[k + 4] + (url[k + 5] << 8) + (url[k + 6] << 16) + (url[k + 7] << 24)); c += (UInt32)(url[k + 8] + (url[k + 9] << 8) + (url[k + 10] << 16) + (url[k + 11] << 24)); _mix(ref a, ref b, ref c); k += 12; len -= 12; } c += (UInt32)length; switch (len) /* all the case statements fall through */ { case 11: c += (UInt32)(url[k + 10] << 24); goto case 10; case 10: c += (UInt32)(url[k + 9] << 16); goto case 9; case 9: c += (UInt32)(url[k + 8] << 8); goto case 8; /* the first byte of c is reserved for the length */ case 8: b += (UInt32)(url[k + 7] << 24); goto case 7; case 7: b += (UInt32)(url[k + 6] << 16); goto case 6; case 6: b += (UInt32)(url[k + 5] << 8); goto case 5; case 5: b += (UInt32)(url[k + 4]); goto case 4; case 4: a += (UInt32)(url[k + 3] << 24); goto case 3; case 3: a += (UInt32)(url[k + 2] << 16); goto case 2; case 2: a += (UInt32)(url[k + 1] << 8); goto case 1; case 1: a += (UInt32)(url[k + 0]); break; default: break; /* case 0: nothing left to add */ } _mix(ref a, ref b, ref c); return string.Format("6{0}", c); }

 

再给出一个调用的用例供参考:

 

try { //构造请求的URL string checksum = GoogleCH(txtUrl.Text); string query = string.Format(@"http://toolbarqueries.google.com/search?client=navclient-auto&ch={0}&features=Rank&q=info:{1}", checksum, txtUrl.Text); //请求并获得响应 request = (HttpWebRequest)HttpWebRequest.Create(query); response = (HttpWebResponse)request.GetResponse(); if(response==null) { txtResponse.Text = "response==NULL"; return; } Stream stream = response.GetResponseStream(); txtResponse.Text = ""; byte []buf=new byte[1024]; int readlen; while ((readlen = stream.Read(buf, 0, 1024)) > 0) { //注意这里解析显示PR值 txtPR.Text = int.Parse(Regex.Match(Encoding.UTF8.GetString(buf, 0, readlen), "Rank_1:[0-9]:([0-9]+)").Groups[1].Value).ToString(); } } catch (System.UriFormatException) { txtResponse.Text = "无效的URL"; }

你可能感兴趣的:(c,String,Google,服务器,C#,url)