-
- /**
- * 禁止ワードフィルタリング
- * 実行効率: 記事あたり 0.05 秒
- * @author liuxu
- *
- */
- class Logic_BlackWord
- {
-
- const APP_FORUM = 1;
- const APP_BLOG = 2;
- const APP_VOTE = 3;
-
- /**
- * 禁止単語を取得するためのフィルター
- * @param 不明 $txt
- * @return 曖昧な
- */
- public function getHitList($txt)
- {
- $hitList = array();
-
- //对禁词分批过滤
- $max = $this->getMax();
- if($max)
- {
- $size = 1000;
- $last = ceil($max/$size);
- for($page=1;$page<=$last;$page++)
- {
- $result = $this->getHitListByPage($txt,$ page,$size);
- if($result) $hitList = array_merge($hitList,$result);
- }
- }
-
- $hitList2 = array();
- foreach($hitList as $hit=>$type )
- {
- $hitList2[$type][] = $hit;
- }
-
- return $hitList2;
- }
-
- プライベート関数 getMax()
- {
- $redis = Rds::factory();
- $memKey = 'blackWord_max';
- $max = $redis->get($memKey);
- if($max===false)
- {
- $max = 0;
- $blackWord = new Model_BlackWord_BlackWord();
- $para[ 'field'] = "MAX(id) AS max";
- $result = $blackWord->search($para);
- if(isset($result[0]['max'])) $max = $ result[0]['max'];
-
- $redis->setex($memKey,300,$max);
- }
-
- return $max;
- }
-
- /**
- * バッチでフィルタリングして禁止単語を取得します
- * @param known $txt
- * @param number $page
- * @param number $size
- * @return multitype:Ambigous
- */
- private function getHitListByPage($txt,$page=1,$size=1000)
- {
- $hitList = array();
-
- //分批獲得禁止蠑
- $wordTree = $this->getWordTreeByPage($page, $size);
-
- $txt =trip_tags($txt);
- $txt = preg_replace('/[^a-zA-Z0-9\x{4e00}-\x{9fa5}]/iu','' ,$txt);
-
- $len = mb_strlen($txt,'UTF-8');
- for($i=0;$i {
- $char = mb_substr($txt, $i,1,'UTF-8');
- if(isset($wordTree[$char]))
- {
- $result = $this->getHitListByTree(mb_substr($txt,$i,50,'UTF) -8'),$wordTree);
- if($result)
- {
- foreach($result as $hit=>$type)
- {
- $hitList[$hit] = $type;
- }
- }
- }
- }
-
- return $hitList;
- }
-
- /**
- * 単語を禁止するかどうか
- * @param str $txt
- * @param arr $wordTree
- * @return multitype:unknown
- */
- プライベート関数 getHitListByTree($txt,&$wordTree)
- {
- $len = mb_strlen($txt,'UTF-8');
- $point = & $wordTree;
- $hit = '';
- $hitList = array();
- for($i=0;$i {
- $char = mb_substr($txt) ,$i,1,'UTF-8');
- if(isset($point[$char]))
- {
- $hit .= $char;
- $point = & $point[$char];
-
- if(isset($point['type']))//適合成功
- {
- $hitList[$hit] = $point['type'];
- }
- }
- else
- {
- Break;
- }
-
- }
-
- $hitList を返す;
- }
-
- /**
- * 禁止単語ツリーをバッチで取得します
- * @param int $page
- * @param int $size
- * @return arr:
- */
- プライベート関数 getWordTreeByPage($page=1,$size=1000)
- {
- $redis = Rds::factory();
- $memKey = 'blackWord_tree_'.$page.' _'.$size;
- $wordTree = $redis->get($memKey);
- if($wordTree===false)
- {
- $wordTree = array();
- $blackWord = new Model_BlackWord_BlackWord();
- $start = ($page-1)*$size;
- $end = $start + $size;
- $para['where'] = "status=1 AND id>".$start." AND id<= ".$end;
- $result = $blackWord->search($para);
- if($result)
- {
- foreach($result as $value)
- {
- if($value['word'])
- {
- $value['word'] = preg_split('/(? $point = & $wordTree;
- foreach ($value['word'] as $char)
- {
- $point = & $point[$char];
- }
-
- $point['type'] = $value['type'];
- }
- }
- }
-
- $redis->setex($memKey,300,$wordTree);
- }
-
- return $wordTree;
- }
-
- }
-
-
复制發
|