Home  >  Article  >  Backend Development  >  php keyword filtering

php keyword filtering

不言
不言Original
2018-04-10 17:33:352033browse

The content shared with you in this article is PHP keyword filtering, which has certain reference value. Friends in need can refer to it

<?php

class TrieTree
{

    public $tree = array(&#39;我&#39;,&#39;过&#39;);

    /**
     * 增加关键词到字典树
     *
     * @param string $utf8_str            
     */
    public function add($utf8_str)
    {
        $chars = &UTF8Util::getChars($utf8_str);
        // 串结尾字符
        $chars[] = null;
        $count = count($chars);
        $T = &$this->tree;
        for ($i = 0; $i < $count; $i ++) {
            $c = $chars[$i];
            if (! array_key_exists($c, $T)) {
                // 插入新字符,关联数组
                $T[$c] = array();
            }
            $T = &$T[$c];
        }
        return $this;
    }
    /**
     * 从字典树移除关键词
     *
     * @param string $utf8_str            
     */
    public function remove($utf8_str)
    {
        $chars = &UTF8Util::getChars($utf8_str);
        $chars[] = null;
        // 先保证此串在树中
        if ($this->_find($chars)) {
            $chars[] = null;
            $count = count($chars);
            $T = &$this->tree;
            for ($i = 0; $i < $count; $i ++) {
                $c = $chars[$i];
                // 表明仅有此串
                if (count($T[$c]) == 1) {
                    unset($T[$c]);
                    return;
                }
                $T = &$T[$c];
            }
        }
        return $this;
    }

    /**
     * 从字典树查找关键词
     *
     * @param string $utf8_str            
     * @return boolean
     */
    public function exists($utf8_str)
    {
        $chars = &UTF8Util::getChars($utf8_str);
        $chars[] = null;
        return $this->_find($chars);
    }

    private function _find(&$chars)
    {
        $count = count($chars);
        $T = &$this->tree;
        for ($i = 0; $i < $count; $i ++) {
            $c = $chars[$i];
            if (! array_key_exists($c, $T)) {
                return false;
            }
            $T = &$T[$c];
        }
        return true;
    }

    /**
     * 是否含有关键词
     *
     * @param string $utf8_str            
     * @param boolean $do_count            
     * @return boolean|number
     */
    public function contain($utf8_str, $do_count = false)
    {
        $chars = &UTF8Util::getChars($utf8_str);
        $chars[] = null;
        $len = count($chars);
        $Tree = &$this->tree;
        $count = 0;
        for ($i = 0; $i < $len; $i ++) {
            $c = $chars[$i];
            // 起始字符匹配
            if (array_key_exists($c, $Tree)) {
                $T = &$Tree[$c];
                for ($j = $i + 1; $j < $len; $j ++) {
                    $c = $chars[$j];
                    if (array_key_exists(null, $T)) {
                        if ($do_count) {
                            $count ++;
                        } else {
                            return true;
                        }
                    }
                    if (! array_key_exists($c, $T)) {
                        break;
                    }
                    $T = &$T[$c];
                }
            }
        }
        return $do_count ? $count : false;
    }

    /**
     * 批量检查是否包含关键词
     *
     * @param array $str_array            
     * @return boolean
     */
    public function containMulti($str_array)
    {
        if (\is_array($str_array)) {
            foreach ($str_array as $str) {
                if ($this->contain($str)) {
                    return true;
                }
            }
        }
        return false;
    }

    /**
     * 导出序列化后的字典树
     *
     * @return string
     */
    public function export()
    {
        return serialize($this->tree);
    }

    /**
     * 导入序列化后的字典树
     *
     * @param string $str            
     */
    public function import($str)
    {
        $this->tree = unserialize($str);
    }
}

class UTF8Util
{

    public static function getChars($utf8_str)
    {
        $s = $utf8_str;
        $len = strlen($s);
        if ($len == 0)
            return array();
        $chars = array();
        for ($i = 0; $i < $len; $i ++) {
            $c = $s[$i];
            $n = ord($c);
            // 0xxx xxxx, asci, single
            if (($n >> 7) == 0) {
                $chars[] = $c;
            } else 
                // 1111 xxxx, first in four char
                if (($n >> 4) == 15) {
                    if ($i < $len - 3) {
                        $chars[] = $c . $s[$i + 1] . $s[$i + 2] . $s[$i + 3];
                        $i += 3;
                    }
                } else 
                    // 111x xxxx, first in three char
                    if (($n >> 5) == 7) {
                        if ($i < $len - 2) {
                            $chars[] = $c . $s[$i + 1] . $s[$i + 2];
                            $i += 2;
                        }
                    } else 
                        // 11xx xxxx, first in two char
                        if (($n >> 6) == 3) {
                            if ($i < $len - 1) {
                                $chars[] = $c . $s[$i + 1];
                                $i ++;
                            }
                        }
        }
        return $chars;
    }
}

$utf8_str = new UTF8Util();

$utf_char = $utf8_str->getChars(&#39;佛教飞机撒方式的回复回复后我我认为回复日无法核实的回复我五花肉覅福热火么光和热规划局狂热韩国关乎二后过过过过过过过群军过军若绿可我让我陪我二骗人富可敌国及时来构架了&#39;);

Related recommendations:

PHP Keyword red processing class


#

The above is the detailed content of php keyword filtering. For more information, please follow other related articles on the PHP Chinese website!

Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Previous article:PHP for tcp connectionNext article:PHP for tcp connection