Home >Backend Development >PHP Tutorial >Weight calculation, with slight modifications, can also be used for word segmentation, word frequency statistics, full text and spam detection, etc.

Weight calculation, with slight modifications, can also be used for word segmentation, word frequency statistics, full text and spam detection, etc.

WBOY
WBOYOriginal
2016-07-25 08:49:09937browse
Efficiency is very objective. If you change it to other uses, I cannot guarantee the efficiency.
  1. /* vim: set expandtab tabstop=4 shiftwidth=4: */
  2. // +--------------------- --------------------------------------------------
  3. // Name: Weight calculation
  4. // Description: With slight modifications, it can also be used for word segmentation, word frequency statistics, full-text retrieval and garbage detection
  5. // Date: 2013/12/16 08:51
  6. // Authors: late < latelx64@gmail.com>
  7. // +---------------------------------------- --------------------------------
  8. //
  9. /*external call example*/
  10. /*
  11. $aItems = array(
  12. 'chinaisbig',
  13. 'whichisnot',
  14. 'totalyrightforme',
  15. );
  16. $aTable = array(
  17. 'china,is|small',
  18. 'china,big|me',
  19. 'china,is |big,which|not,me',
  20. 'totaly|right,for,me',
  21. );
  22. $oWeight = new ttrie;
  23. $oWeight->newItems($aItems);
  24. $aResult = $oWeight ->newTable($aTable);
  25. */
  26. class weight {
  27. protected $aDict = array(array());
  28. protected $aItems = array();
  29. protected $sLastRule;
  30. protected $aMatchs = array( );
  31. protected $aShow = array();
  32. private function init() {
  33. //Clear the recorded matching table and output results
  34. unset($this->aShow);
  35. }
  36. public function newItems($ mItems) {
  37. //Import new items
  38. $this->aItems = (is_array($mItems))? $mItems: array($mItems);
  39. $this->init();
  40. }
  41. public function newTable(array $aTable) {
  42. //Import a new comparison table and generate a dictionary
  43. foreach($aTable as $iTableKey=>$sTableLine) {
  44. $aTableLine = explode(',', str_replace('|' , ',', $sTableLine));
  45. $setter = function($v, $k, $paraMeter) {
  46. $k1 = $paraMeter[0]; $oWeight = $paraMeter[1];
  47. $oWeight-> ;genDict($v, $k1);
  48. };
  49. array_walk($aTableLine, $setter, array($iTableKey, $this));
  50. }
  51. $this->init();
  52. }
  53. public function getShow($sRule = 'max') {
  54. //Get the final display result
  55. if(empty($this->aItems) || empty($this->aDict))
  56. return array();
  57. if (empty($this->aShow) || $sRule != $this->sLastRule)
  58. return $this->genShow($sRule);
  59. return $this->aShow;
  60. }
  61. public function genShow($sRule) {
  62. $aShow = array();
  63. $aMatchs = array();
  64. $getter = function($v, $k, $oWeight) use(&$aShow, &$aMatchs, $sRule ) {
  65. $t = array_count_values($oWeight->matchWord($v));
  66. $aMatchs[] = $t;
  67. switch ($sRule) {
  68. case 'max':
  69. $aShow[$k] = array_keys($t, max($t));
  70. break;
  71. }
  72. };
  73. array_walk($this->aItems, $getter, $this);
  74. $this->aShow = $aShow;
  75. $ this->aMatchs = $aMatchs;
  76. return $aShow;
  77. }
  78. private function genDict($mWord, $iKey = '') {
  79. $iInsertPonit = count($this->aDict);
  80. $iCur = 0; //Current node number
  81. foreach (str_split($mWord) as $iChar) {
  82. if (isset($this->aDict[$iCur][$iChar])) {
  83. $iCur = $this-> ;aDict[$iCur][$iChar];
  84. continue;
  85. }
  86. $this->aDict[$iInsertPonit] = array();
  87. $this->aDict[$iCur][$iChar] = $iInsertPonit ;
  88. $iCur = $iInsertPonit;
  89. $iInsertPonit++;
  90. }
  91. $this->aDict[$iCur]['acc'][] = $iKey;
  92. }
  93. function matchWord($sLine) {
  94. $ iCur = $iOffset = $iPosition = 0;
  95. $sLine .= "
  96. ?>
Copy code


Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn