<?php $mincipin = 5; //最小词频 $minlen = 4; //关键字最小长度 tiqukeyword($tiqustr, $minlen, $mincipin); function tiqukeyword($tiqustr, $minlen, $mincipin) { $strlong = strlen($tiqustr); $arr = array(); $k = - 1; for ($i = 0; $i < ($strlong - $mincipin * $minlen); $i++) { $end = ceil(($strlong - $i) / $mincipin + $i); for ($j = $minlen; $j < $end; $j++) { $num = 0; if (($guanjianzi = substr($tiqustr, $i, $j)) !== false) { $wz = $i + $j; $num++; } else { break; } while ($wz < $strlong) { if (($wz = strpos($tiqustr, $guanjianzi, $wz)) !== false) { $num++; $wz = $wz + strlen($guanjianzi); } else break; } if ($j == $minlen) { if ($num >= $mincipin) { $maxnum = $num; $k++; $str = substr($tiqustr, $i, $j); $arr[$k] = array( $i, $j, $str, $num, 0 ); } else { break; } } else { if ($num >= $maxnum) { $maxnum = $num; $str = substr($tiqustr, $i, $j); $arr[$k] = array( $i, $j, $str, $num, 0 ); } else break; } } } echo '初步得到的数组:'; print_r($arr); //echo '<br/><br/><br/><br/>'; $arrlong = count($arr); for ($i = 0; $i < $arrlong; $i++) { $bjarr = $arr[$i]; $nowid = $i; if ($bjarr[4] == 1) continue; for ($j = $i + 1; $j < $arrlong; $j++) { if ($arr[$j][4] == 1) continue; $qujianks = $bjarr[0]; $qujianjs = $bjarr[1] + $bjarr[0] - 1; $a = $arr[$j][0]; $b = $arr[$j][1] + $arr[$j][0] - 1; if (($bjarr[2] == $arr[$j][2]) && ($bjarr[3] > $arr[$j][3])) $arr[$j][4] = 1; if ($a <= $qujianks && $qujianks <= $b && $a <= $qujianjs && $qujianjs <= $b) { if ($bjarr[3] <= $arr[$j][3]) { $arr[$nowid][4] = 1; $nowid = $j; $bjarr = $arr[$j]; } } elseif ($qujianks <= $a && $a <= $qujianjs && $qujianks <= $b && $b <= $qujianjs) { $arr[$j][4] = 1; } } } echo '<br/><br/><br/><br/>重叠加标记后的数组:'; print_r($arr); //开源代码phprm.com $jieguoarr = array(); for ($i = 0; $i < $arrlong; $i++) { if ($arr[$i][4] == 0) $jieguoarr[] = $arr[$i]; } echo '<br/><br/><br/><br/>'; echo '最后得到的数组:'; print_r($jieguoarr); }
文章链接:
随便收藏,请保留本文地址!