首頁 >後端開發 >php教程 >權重計算,稍加修改亦可用於分詞,詞頻統計,全文及spam檢測等

權重計算,稍加修改亦可用於分詞,詞頻統計,全文及spam檢測等

WBOY
WBOY原創
2016-07-25 08:49:09934瀏覽
效率非常客觀,你要是改成其他用途那效率我就不保證了
  1. /* vim: set expandtab tabstop=4 shiftwidth=4: */
  2. // ------------ -------------------------------------------------- ----------
  3. // Name : 權重計算
  4. // Description: 稍加修改,亦可用於分詞,詞頻統計,全文檢索和垃圾檢測
  5. // Date : 2013/12/16 08:51
  6. // Authors : latel
  7. // ---------------------- --------------------------------------------------
  8. //
  9. /*外部呼叫範例*/
  10. /*
  11. $aItems = array(
  12. 'chinaisbig',
  13. 'whichisnot',
  14. 'chinaisbig',
  15. 'whichisnot',
  16. 'chinaisrightforme ',
  17. );
  18. $aTable = array(
  19. 'china,is|small',
  20. 'china,big|me',
  21. 'china,is|big,which|not ,me',
  22. 'totaly|right,for,me',
  23. );
  24. $oWeight = new ttrie;
  25. $oWeight->newItems($aItems);
  26. $aResult = $oWeight->newTable($aTable);
  27. */
  28. class weight {
  29. protected $aDict = array(array());
  30. protected $aItems = array( );
  31. protected $sLastRule;
  32. protected $aMatchs = array();
  33. protected $aShow = array();
  34. private function init() {
  35. //清空記錄的匹配表和輸出結果
  36. unset($this->aShow);
  37. }
  38. public function newItems($mItems) {
  39. //導入新的項目
  40. $this ->aItems = (is_array($mItems))? $mItems: array($mItems);
  41. $this->init();
  42. }
  43. public function newTable(array $aTable) {
  44. //匯入新的對照表,並產生字典
  45. foreach($aTable as $iTableKey=>$sTableLine) {
  46. $aTableLine = explode(',', str_replace('|', ' ,', $sTableLine));
  47. $setter = function($v, $k, $paraMeter) {
  48. $k1 = $paraMeter[0]; $oWeight = $paraMeter[1];
  49. $oWeight->genDict($v, $k1);
  50. };
  51. array_walk($aTableLine, $setter, array($iTableKey, $this));
  52. }
  53. $this-> init();
  54. }
  55. public function getShow($sRule = 'max') {
  56. //取得最終的顯示結果
  57. if(empty($this->aItems) | | empty($this->aDict))
  58. return array();
  59. if (empty($this->aShow) || $sRule != $this->sLastRule)
  60. return $this- >genShow($sRule);
  61. return $this->aShow;
  62. }
  63. public function genShow($sRule) {
  64. $aShow = array();
  65. $aMatchs. = array();
  66. $getter = function($v, $k, $oWeight) use(&$aShow, &$aMatchs, $sRule) {
  67. $t = array_count_values($oWeight->matchWord( $v));
  68. $aMatchs[] = $t;
  69. switch ($sRule) {
  70. case 'max':
  71. $aShow[$k] = array_keys($t, max( $t));
  72. break;
  73. }
  74. };
  75. array_walk($this->aItems, $getter, $this);
  76. $this->aShow = $aShow;
  77. $this->aMatchs = $aMatchs;
  78. return $aShow;
  79. }
  80. private function genDict($mWord, $iKey = '') {
  81. $icountInsertnit =$mWord,PoiKey = '') {
  82. $icountInsertnit = $. $this->aDict);
  83. $iCur = 0; //目前節點號碼
  84. foreach (str_split($mWord) as $iChar) {
  85. if (isset($this->aDict[$iCur ][$iChar])) {
  86. $iCur = $this->aDict[$iCur][$iChar];
  87. continue;
  88. }
  89. $this->aDict[$iInsertPonit] = array();
  90. $this->aDict[$iCur][$iChar] = $iInsertPonit;
  91. $iCur = $iInsertPonit;
  92. $iInsertPonit ;
  93. }
  94. $this-> aDict[$iCur]['acc'][] = $iKey;
  95. }
  96. function matchWord($sLine) {
  97. $iCur = $iOffset = $iPosition = 0;
  98. $sLine .= "
  99. ?>
複製程式碼


陳述:
本文內容由網友自願投稿,版權歸原作者所有。本站不承擔相應的法律責任。如發現涉嫌抄襲或侵權的內容,請聯絡admin@php.cn