Home > Article > Backend Development > Efficient .NET dirty word filtering algorithm and application examples
The example in this article describes the efficient .NET dirty word filtering algorithm. Share it with everyone for your reference, the details are as follows:
BadWordsFilter.cs class
using System; using System.Collections.Generic; using System.Linq; using System.Web; using System.Collections; using System.Data; namespace WNF { public class BadWordsFilter { private HashSet<string> hash = new HashSet<string>(); //关键字 private byte[] fastCheck = new byte[char.MaxValue]; private byte[] fastLength = new byte[char.MaxValue]; private BitArray charCheck = new BitArray(char.MaxValue); private BitArray endCheck = new BitArray(char.MaxValue); private int maxWordLength = 0; private int minWordLength = int.MaxValue; public BadWordsFilter() { } //初始化关键字 public void Init(DataTable badwords) { for (int j = 0; j < badwords.Rows.Count; j++) { string word = badwords.Rows[j][0].ToString(); maxWordLength = Math.Max(maxWordLength, word.Length); minWordLength = Math.Min(minWordLength, word.Length); for (int i = 0; i < 7 && i < word.Length; i++) { fastCheck[word[i]] |= (byte)(1 << i); } for (int i = 7; i < word.Length; i++) { fastCheck[word[i]] |= 0x80; } if (word.Length == 1) { charCheck[word[0]] = true; } else { fastLength[word[0]] |= (byte)(1 << (Math.Min(7, word.Length - 2))); endCheck[word[word.Length - 1]] = true; hash.Add(word); } } } public string Filter(string text, string mask) { throw new NotImplementedException(); } //检查是否有关键字 public bool HasBadWord(string text) { int index = 0; while (index < text.Length) { int count = 1; if (index > 0 || (fastCheck[text[index]] & 1) == 0) { while (index < text.Length - 1 && (fastCheck[text[++index]] & 1) == 0) ; } char begin = text[index]; if (minWordLength == 1 && charCheck[begin]) { return true; } for (int j = 1; j <= Math.Min(maxWordLength, text.Length - index - 1); j++) { char current = text[index + j]; if ((fastCheck[current] & 1) == 0) { ++count; } if ((fastCheck[current] & (1 << Math.Min(j, 7))) == 0) { break; } if (j + 1 >= minWordLength) { if ((fastLength[begin] & (1 << Math.Min(j - 1, 7))) > 0 && endCheck[current]) { string sub = text.Substring(index, j + 1); if (hash.Contains(sub)) { return true; } } } } index += count; } return false; } } }
Quote:
string sql = "select keywords from tb_keyword"; BadWordsFilter badwordfilter = new BadWordsFilter(); //初始化关键字 badwordfilter.Init(oEtb.GetDataSet(sql).Tables[0]); //检查是否有存在关键字 bool a = badwordfilter.HasBadWord(TextBox1.Text); if (a == true) { Page.RegisterClientScriptBlock("a", "<script>alert('该评论含有不合法文字!')</script>"); } else { PingLun();//写入评论表 }
I hope this article will be helpful to everyone in asp.net programming .
For more efficient .NET dirty word filtering algorithms and application examples, please pay attention to the PHP Chinese website!