C# 替换去除HTML标记方法,正则表达式

[from] http://blog.csdn.net/sgear/article/details/6263848
/// <summary> /// 将所有HTML标签替换成"" /// </summary> /// <param name="strHtml"></param> /// <returns></returns> public static string StripHTML(string strHtml) { string[] aryReg ={ @"<script[^>]*?>.*?</script>", @"<(///s*)?!?((/w+:)?/w+)(/w+(/s*=?/s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>", @"([/r/n])[/s]+", @"&(quot|#34);", @"&(amp|#38);", @"&(lt|#60);", @"&(gt|#62);", @"&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);", @"&(copy|#169);", @"&#(/d+);", @"-->", @"<!--.*/n" }; string[] aryRep = { "", "", "", "/"", "&", "<", ">", " ", "/xa1",//chr(161), "/xa2",//chr(162), "/xa3",//chr(163), "/xa9",//chr(169), "", "/r/n", "" }; string newReg = aryReg[0]; string strOutput = strHtml; for (int i = 0; i < aryReg.Length; i++) { Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase); strOutput = regex.Replace(strOutput, aryRep[i]); } strOutput.Replace("<", ""); strOutput.Replace(">", ""); strOutput.Replace("/r/n", ""); return strOutput; }