百度百科的关键词链接是怎样实现的呢
百度百科的关键词带有链接。我在想少量关键词,只需要简单替换成链接就可以。可是百度的关键词是狠多的,可能成万上千万个。如果替换上万次,那效率也太低了吧。请教这样的功能是怎样实现的呢?谢谢!
附截图:
include 'TTrie.php';<br /><br />class wordkey extends TTrie {<br /> function b() {<br /> $t = array_pop($this->buffer);<br /> $this->buffer[] = "<b>$t</b>";<br /> }<br />}<br />$p = new wordkey;<br />$p->set('秦始皇', 'b');<br />$p->set('洛阳', 'b');<br />$t = $p->match('秦始皇东巡洛阳');<br />echo join('', $t);<br />a4b561c25d9afb9ac8dc4d70affff419秦始皇0d36329ec37a2cc24d42c7229b69747a东巡a4b561c25d9afb9ac8dc4d70affff419洛阳0d36329ec37a2cc24d42c7229b69747a
class TTrie {<br> protected $buffer = array();<br> protected $dict = array( array() );<br> protected $input = 0; //字符串当前偏移<br> protected $backtracking = 0; //字符串回溯位置<br> public $debug = 0;<br> public $savematch = 1;<br><br> function set($word, $action='') {<br> if(is_array($word)) {<br> foreach($word as $k=>$v) $this->set($k, $v);<br> return;<br> }<br> $p = count($this->dict);<br> $cur = 0; //当前节点号<br> foreach(str_split($word) as $c) {<br> if (isset($this->dict[$cur][$c])) { //已存在就下移<br> $cur = $this->dict[$cur][$c];<br> continue;<br> }<br> $this->dict[$p]= array(); //创建新节点<br> $this->dict[$cur][$c] = $p; //在父节点记录子节点号<br> $cur = $p; //把当前节点设为新插入的<br> $p ;<br> }<br> $this->dict[$cur]['acc'] = $action; //一个词结束,标记叶子节点<br> }<br> function getto($ch) {<br> $i =& $this->input; //字符串当前偏移<br> $p =& $this->backtracking; //字符串回溯位置<br> $len = strlen($this->doc);<br> $t = '';<br> $this->input ;<br>// while($this->input2687c40d05088d95bc6e4baae64267a2doc{$this->input} != $ch) $t .= $this->doc{$this->input };<br>// $t .= $this->doc{$this->input };<br> do {<br> if($this->input >= $len) break;<br> $t .= $this->doc{$this->input};<br> }while($this->doc{$this->input } != $ch);<br> return trim($t);<br> } <br> function match($s) {<br> $this->doc =& $s;<br> $this->buffer = array();<br> $ret = array();<br> $cur = 0; //当前节点,初始为根节点<br> $i =& $this->input; //字符串当前偏移<br> $p =& $this->backtracking; //字符串回溯位置<br> $i = $p = 0;<br> $s .= "