Heim >php教程 >php手册 >敏感词过滤

敏感词过滤

WBOY
WBOYOriginal
2016-06-06 19:34:591238Durchsuche

适用于规模较大的环境 无 /** * 禁词过滤 * 执行效率:每篇用时0.05秒 * @author liuxu * */class Logic_BlackWord{const APP_FORUM= 1;const APP_BLOG= 2;const APP_VOTE= 3;/** * 过滤得到禁词 * @param unknown $txt * @return Ambigous multitype:, unkno

适用于规模较大的环境
/**
 * 禁词过滤
 * 执行效率:每篇用时0.05秒
 * @author liuxu
 *
 */
class Logic_BlackWord
{
	
	const APP_FORUM	= 1;
	const APP_BLOG	= 2;
	const APP_VOTE	= 3;

	/**
	 * 过滤得到禁词
	 * @param unknown $txt
	 * @return Ambigous <multitype:, unknown>
	 */
	public function getHitList($txt)
	{
		$hitList = array();

		//对禁词分批过滤
		$max = $this->getMax();
		if($max)
		{
			$size = 1000;
			$last = ceil($max/$size);
			for($page=1;$page<=$last;$page++)
			{
				$result = $this->getHitListByPage($txt,$page,$size);
				if($result) $hitList = array_merge($hitList,$result);
			}
		}

		$hitList2 = array();
		foreach($hitList as $hit=>$type)
		{
			$hitList2[$type][] = $hit;
		}

		return $hitList2;
	}

	private function getMax()
	{
		$redis = Rds::factory();
		$memKey = 'blackWord_max';
		$max = $redis->get($memKey);
		if($max===false)
		{
			$max = 0;
			$blackWord = new Model_BlackWord_BlackWord();
			$para['field'] = "MAX(id) AS max";
			$result = $blackWord->search($para);
			if(isset($result[0]['max'])) $max = $result[0]['max'];

			$redis->setex($memKey,300,$max);
		}

		return $max;
	}

	/**
	 * 分批过滤得到禁词
	 * @param unknown $txt
	 * @param number $page
	 * @param number $size
	 * @return multitype:Ambigous <multitype:unknown, multitype:arr >
	 */
	private function getHitListByPage($txt,$page=1,$size=1000)
	{
		$hitList = array();

		//分批得到禁词树
		$wordTree = $this->getWordTreeByPage($page,$size);
	
		$txt = strip_tags($txt);
		$txt = preg_replace('/[^a-zA-Z0-9\x{4e00}-\x{9fa5}]/iu','',$txt);

		$len = mb_strlen($txt,'UTF-8');
		for($i=0;$i<$len;$i++)
		{
			$char = mb_substr($txt,$i,1,'UTF-8');
			if(isset($wordTree[$char]))
			{
				$result = $this->getHitListByTree(mb_substr($txt,$i,50,'UTF-8'),$wordTree);
				if($result)
				{
					foreach($result as $hit=>$type)
					{
						$hitList[$hit] = $type;
					}
				}
			}
		}

		return $hitList;
	}
	
	/**
	 * 是否禁词
	 * @param str $txt
	 * @param arr $wordTree
	 * @return multitype:unknown
	 */
	private function getHitListByTree($txt,&$wordTree)
	{
		$len = mb_strlen($txt,'UTF-8');
		$point = & $wordTree;
		$hit = '';
		$hitList = array();
		for($i=0;$i<$len;$i++)
		{
			$char = mb_substr($txt,$i,1,'UTF-8');
			if(isset($point[$char]))
			{
				$hit .= $char;
				$point = & $point[$char];

				if(isset($point['type']))//匹配成功
				{
					$hitList[$hit] = $point['type'];
				}
			}
			else
			{
				break;
			}

		}

		return $hitList;
	}

	/**
	 * 分批得到禁词树
	 * @param int $page
	 * @param int $size
	 * @return arr:
	 */
	private function getWordTreeByPage($page=1,$size=1000)
	{
		$redis = Rds::factory();
		$memKey = 'blackWord_tree_'.$page.'_'.$size;
		$wordTree = $redis->get($memKey);
		if($wordTree===false)
		{
			$wordTree = array();
			$blackWord = new Model_BlackWord_BlackWord();
			$start = ($page-1)*$size;
			$end = $start + $size;
			$para['where'] = "status=1 AND id>".$start." AND id<=".$end;
			$result = $blackWord->search($para);
			if($result)
			{
				foreach($result as $value)
				{
					if($value['word'])
					{
						$value['word'] = preg_split('/(?<!^)(?!$)/u',$value['word']);
						$point = & $wordTree;
						foreach($value['word'] as $char)
						{
							$point = & $point[$char];
						}
	
						$point['type'] = $value['type'];
					}
				}
			}
			
			$redis->setex($memKey,300,$wordTree);
		}

		return $wordTree;
	}

}
Stellungnahme:
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn