///<summary>
///去除HTML标记
///</summary>
///<paramname="NoHTML">包括HTML的源码</param>
///<returns>已经去除后的文字</returns>
publicstaticstringNoHTML(stringHtmlstring)
{
//删除脚本
Htmlstring=Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",
RegexOptions.IgnoreCase);
//删除HTML
Htmlstring=Regex.Replace(Htmlstring,@"<(.[^>]*)>","",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(quot|#34);","\"",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(amp|#38);","&",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(lt|#60);","<",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(gt|#62);",">",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(nbsp|#160);","",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(pound|#163);","\xa3",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&#(\d+);","",
RegexOptions.IgnoreCase);
Htmlstring.Replace("<","");
Htmlstring.Replace(">","");
Htmlstring.Replace("\r\n","");
Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
returnHtmlstring;
}
以上代码是从网上直接复制过来的,这个确实能过滤掉所有的HTML标签,但是这个不是我想要的,这个过滤得太干净了,我如果用textarea输入框的话,我是要保留空格跟换行的。
然后我就自己改了一下这个方法,textarea的换行是\n,所以我得把这些标签重新匹配替换成<br>,这样的话从数据库中读取到页面时,就能正确的换行了,把空格替换成HTML的空格符,大功告成。
代码如下:
///<summary>
///去除HTML标记(保留br跟\r\n)
///</summary>
///<paramname="NoHTML">包括HTML的源码</param>
///<returns>已经去除后的文字</returns>
publicstaticstringNewNoHTML(stringHtmlstring)
{
//Htmlstring.Replace("\\r\\n","%r%n").Replace("<br>","%br%").Replace("<br/>","%br&%").Replace("\\n","%n");
//删除脚本
Htmlstring=Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",
RegexOptions.IgnoreCase);
//删除HTML
Htmlstring=Regex.Replace(Htmlstring,@"<(.[^>]*)>","",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(quot|#34);","\"",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(amp|#38);","&",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(lt|#60);","<",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(gt|#62);",">",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(nbsp|#160);","",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(pound|#163);","\xa3",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&#(\d+);","",
RegexOptions.IgnoreCase);
Htmlstring.Replace("<","");
Htmlstring.Replace(">","");
//Htmlstring.Replace("\r\n","");
Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring);
Htmlstring=Regex.Replace(Htmlstring,@"((\r\n))","<br>");
Htmlstring=Regex.Replace(Htmlstring,@"(\r|\n)","<br>");
Htmlstring=Regex.Replace(Htmlstring,@"(\s)"," ");
returnHtmlstring;
}
这个过滤可以用于让用户输入发布内容时的过滤。
|