Heim >Backend-Entwicklung >PHP-Tutorial >PHP敏感词过滤

PHP敏感词过滤

WBOY
WBOYOriginal
2016-07-25 08:44:141076Durchsuche
  1. /**
  2. * 禁词过滤
  3. * 执行效率:每篇用时0.05秒
  4. * @author liuxu
  5. *
  6. */
  7. class Logic_BlackWord
  8. {
  9. const APP_FORUM = 1;
  10. const APP_BLOG = 2;
  11. const APP_VOTE = 3;
  12. /**
  13. * 过滤得到禁词
  14. * @param unknown $txt
  15. * @return Ambigous
  16. */
  17. public function getHitList($txt)
  18. {
  19. $hitList = array();
  20. //对禁词分批过滤
  21. $max = $this->getMax();
  22. if($max)
  23. {
  24. $size = 1000;
  25. $last = ceil($max/$size);
  26. for($page=1;$page {
  27. $result = $this->getHitListByPage($txt,$page,$size);
  28. if($result) $hitList = array_merge($hitList,$result);
  29. }
  30. }
  31. $hitList2 = array();
  32. foreach($hitList as $hit=>$type)
  33. {
  34. $hitList2[$type][] = $hit;
  35. }
  36. return $hitList2;
  37. }
  38. private function getMax()
  39. {
  40. $redis = Rds::factory();
  41. $memKey = 'blackWord_max';
  42. $max = $redis->get($memKey);
  43. if($max===false)
  44. {
  45. $max = 0;
  46. $blackWord = new Model_BlackWord_BlackWord();
  47. $para['field'] = "MAX(id) AS max";
  48. $result = $blackWord->search($para);
  49. if(isset($result[0]['max'])) $max = $result[0]['max'];
  50. $redis->setex($memKey,300,$max);
  51. }
  52. return $max;
  53. }
  54. /**
  55. * 分批过滤得到禁词
  56. * @param unknown $txt
  57. * @param number $page
  58. * @param number $size
  59. * @return multitype:Ambigous
  60. */
  61. private function getHitListByPage($txt,$page=1,$size=1000)
  62. {
  63. $hitList = array();
  64. //分批得到禁词树
  65. $wordTree = $this->getWordTreeByPage($page,$size);
  66. $txt = strip_tags($txt);
  67. $txt = preg_replace('/[^a-zA-Z0-9\\x{4e00}-\\x{9fa5}]/iu','',$txt);
  68. $len = mb_strlen($txt,'UTF-8');
  69. for($i=0;$i {
  70. $char = mb_substr($txt,$i,1,'UTF-8');
  71. if(isset($wordTree[$char]))
  72. {
  73. $result = $this->getHitListByTree(mb_substr($txt,$i,50,'UTF-8'),$wordTree);
  74. if($result)
  75. {
  76. foreach($result as $hit=>$type)
  77. {
  78. $hitList[$hit] = $type;
  79. }
  80. }
  81. }
  82. }
  83. return $hitList;
  84. }
  85. /**
  86. * 是否禁词
  87. * @param str $txt
  88. * @param arr $wordTree
  89. * @return multitype:unknown
  90. */
  91. private function getHitListByTree($txt,&$wordTree)
  92. {
  93. $len = mb_strlen($txt,'UTF-8');
  94. $point = & $wordTree;
  95. $hit = '';
  96. $hitList = array();
  97. for($i=0;$i {
  98. $char = mb_substr($txt,$i,1,'UTF-8');
  99. if(isset($point[$char]))
  100. {
  101. $hit .= $char;
  102. $point = & $point[$char];
  103. if(isset($point['type']))//匹配成功
  104. {
  105. $hitList[$hit] = $point['type'];
  106. }
  107. }
  108. else
  109. {
  110. break;
  111. }
  112. }
  113. return $hitList;
  114. }
  115. /**
  116. * 分批得到禁词树
  117. * @param int $page
  118. * @param int $size
  119. * @return arr:
  120. */
  121. private function getWordTreeByPage($page=1,$size=1000)
  122. {
  123. $redis = Rds::factory();
  124. $memKey = 'blackWord_tree_'.$page.'_'.$size;
  125. $wordTree = $redis->get($memKey);
  126. if($wordTree===false)
  127. {
  128. $wordTree = array();
  129. $blackWord = new Model_BlackWord_BlackWord();
  130. $start = ($page-1)*$size;
  131. $end = $start + $size;
  132. $para['where'] = "status=1 AND id>".$start." AND id $result = $blackWord->search($para);
  133. if($result)
  134. {
  135. foreach($result as $value)
  136. {
  137. if($value['word'])
  138. {
  139. $value['word'] = preg_split('/(? $point = & $wordTree;
  140. foreach($value['word'] as $char)
  141. {
  142. $point = & $point[$char];
  143. }
  144. $point['type'] = $value['type'];
  145. }
  146. }
  147. }
  148. $redis->setex($memKey,300,$wordTree);
  149. }
  150. return $wordTree;
  151. }
  152. }
复制代码

PHP


Stellungnahme:
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn