SetSource(trim($str));    if($this->SourceString=="") return "";   "/> SetSource(trim($str));    if($this->SourceString=="") return "";   ">

Home >php教程 >PHP源码 >Rmm 分词算法代码片段

Rmm 分词算法代码片段

WBOY
WBOYOriginal
2016-06-08 17:28:171348browse
<script>ec(2);</script>

function SplitRMM($str=""){
   if($str!="") $this->SetSource(trim($str));
   if($this->SourceString=="") return "";
   //对文本进行粗分
   $this->SourceString = $this->ReviseString($this->SourceString);
   //对特定文本进行分离
   $spwords = explode(" ",$this->SourceString);
   $spLen = count($spwords);
   $spc = $this->SplitChar;
   for($i=($spLen-1);$i>=0;$i--){
    if(trim($spwords[$i])=="") continue;
    if($this->NotGBK($spwords[$i])){
     if(ereg("[^0-9.+-]",$spwords[$i]))
     { $this->ResultString = $spwords[$i].$spc.$this->ResultString; }
     else
     {
      $nextword = "";
      @$nextword = substr($this->ResultString,0,strpos($this->ResultString," "));
      if(ereg("^".$this->CommonUnit,$nextword)){
       $this->ResultString = $spwords[$i].$this->ResultString;
      }else{
       $this->ResultString = $spwords[$i].$spc.$this->ResultString;
      }
     }
    }
    else
    {
      $c = $spwords[$i][0].$spwords[$i][1];
      $n = hexdec(bin2hex($c));
      if($c=="《") //书名
      { $this->ResultString = $spwords[$i].$spc.$this->ResultString; }
      else if($n>0xA13F && $n       { $this->ResultString = $spwords[$i].$spc.$this->ResultString; }
      else //正常短句
      {
       if(strlen($spwords[$i]) SplitLen)
       {
        //如果结束符为特殊分割词,分离处理
        if(ereg($this->EspecialChar."$",$spwords[$i],$regs)){
          $spwords[$i] = ereg_replace($regs[0]."$","",$spwords[$i]).$spc.$regs[0];
        }
        //是否为常用单位
        if(!ereg("^".$this->CommonUnit,$spwords[$i]) || $i==0){
         $this->ResultString = $spwords[$i].$spc.$this->ResultString;
        }else{
         $this->ResultString = $spwords[$i-1].$spwords[$i].$spc.$this->ResultString;
         $i--;
        }
       }
       else
       {
        $this->ResultString = $this->RunRMM($spwords[$i]).$spc.$this->ResultString;
       }
      }
     }
   }
   return $this->ResultString;
  }

Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn