c#使用正则表达式替换html标签

2018-07-20    来源:open-open

容器云强势上线!快速搭建集群,上万Linux镜像随意使用
using System.Text.RegularExpressions;     //包含正则表达式   

public static string NoHTML(string Htmlstring) //去除HTML标记   
{   
  //删除脚本   
  Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);   
  //删除HTML   
  Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"([/r/n])[/s]+", "", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);   

  Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "/"", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "/xa1", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "/xa2", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "/xa3", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "/xa9", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&#(/d+);", "", RegexOptions.IgnoreCase);   

  Htmlstring.Replace("<", "");   
  Htmlstring.Replace(">", "");   
  Htmlstring.Replace("/r/n", "");   
  Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();   

  return Htmlstring;   
}  

/// <summary>   
/// 将所有HTML标签替换成""   
/// </summary>   
/// <param name="strHtml"></param>   
/// <returns></returns>   
public static string StripHTML(string strHtml)   
{   
  string[] aryReg ={   
    @"<script[^>]*?>.*?</script>",   
    @"<(///s*)?!?((/w+:)?/w+)(/w+(/s*=?/s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",   
    @"([/r/n])[/s]+",   
    @"&(quot|#34);",   
    @"&(amp|#38);",   
    @"&(lt|#60);",   
    @"&(gt|#62);",    
    @"&(nbsp|#160);",    
    @"&(iexcl|#161);",   
    @"&(cent|#162);",   
    @"&(pound|#163);",   
    @"&(copy|#169);",   
    @"&#(/d+);",   
    @"-->",   
    @"<!--.*/n"  

    };   

  string[] aryRep = {   
    "",   
    "",   
    "",   
    "/"",   
    "&",   
    "<",   
    ">",   
    " ",   
    "/xa1",//chr(161),   
    "/xa2",//chr(162),   
    "/xa3",//chr(163),   
    "/xa9",//chr(169),   
    "",   
    "/r/n",   
    ""  
    };   

  string newReg = aryReg[0];   
  string strOutput = strHtml;   
  for (int i = 0; i < aryReg.Length; i++)   
  {   
      Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);   
      strOutput = regex.Replace(strOutput, aryRep[i]);   
  }   

  strOutput.Replace("<", "");   
  strOutput.Replace(">", "");   
  strOutput.Replace("/r/n", "");   
  return strOutput;   
}

标签: 脚本

版权申明:本站文章部分自网络,如有侵权,请联系:west999com@outlook.com
特别注意:本站所有转载文章言论不代表本站观点!
本站所提供的图片等素材,版权归原作者所有,如需使用,请与原作者联系。

上一篇:C#调用windows api关机代码

下一篇:C语言实现的链表结构