利用正则表达式过滤掉HTML字符及自己指定的字符

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using System.Text.RegularExpressions;

using System.Web;

namespace Utility

{

public class StringUtil

{

/// <summary>

/// 过滤掉HTML标签

/// </summary>

/// <param name="Htmlstring"></param>

/// <returns></returns>

public static string NoHTML(string Htmlstring)

{

//正则表达式&.*?; 可以去掉所有的&开头;结尾的

//正则表达式 <.*?> 可以去掉所有 <> </>之类的

//正则表达式 >(.*)</li> 获取<li></li>之间的内容

//正则表达式 <[^>]*> 获取网页文本内容

//删除脚本

Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",RegexOptions.IgnoreCase);

//删除HTML

Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9",RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "",RegexOptions.IgnoreCase);

Htmlstring.Replace("<", "");

Htmlstring.Replace(">", "");

Htmlstring.Replace("\r\n", "");

Htmlstring.Replace("|", "");

Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();

return Htmlstring;

}

}

}

你可能感兴趣的:(html,Web,正则表达式,脚本,LINQ)