Home >php教程 >php手册 >分析PHP的similar_text函数

分析PHP的similar_text函数

WBOY
WBOYOriginal
2016-06-13 10:35:111283browse

//比较字串,返回两个字串第一个相同字符的区域
static void php_similar_str(const char *txt1, int len1, const char *txt2, int len2, int *pos1, int *pos2, int *max)
{
 char *p, *q;
 char *end1 = (char *) txt1 + len1;
 char *end2 = (char *) txt2 + len2;
 int l;
 
 *max = 0;
 //遍历字串
 for (p = (char *) txt1; p   for (q = (char *) txt2; q    for (l = 0; (p + l    if (l > *max) {
    //保存相同区域信息
    *max = l;
    *pos1 = p - txt1;
    *pos2 = q - txt2;
   }
  }
 }
}

//递归函数,比较txt1和txt2的相同字符数量
static int php_similar_char(const char *txt1, int len1, const char *txt2, int len2)
{
 int sum;
 int pos1, pos2, max;

 php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max);
 if ((sum = max)) {//这样写有什么意义?????!!!!!

  //递归上次不同部分的前部
  if (pos1 && pos2) {
   sum += php_similar_char(txt1, pos1, txt2, pos2);
  }

  //递归上次不同部分的后部
  if ((pos1 + max    sum += php_similar_char(txt1 + pos1 + max, len1 - pos1 - max,
      txt2 + pos2 + max, len2 - pos2 - max);
  }
 }

 //返回本次比较后的相同字符数量
 return sum;
}

//PHP函数本身,一堆宏,做了些串转换和返回值换算,主功能由上面两个函数做了。
PHP_FUNCTION(similar_text)
{
 zval **t1, **t2, **percent;
 int ac = ZEND_NUM_ARGS();
 int sim;
 
 if (ac 3 || zend_get_parameters_ex(ac, &t1, &t2, &percent) == FAILURE) {
  WRONG_PARAM_COUNT;
 }

 convert_to_string_ex(t1);
 convert_to_string_ex(t2);

 if (ac > 2) {
  convert_to_double_ex(percent);
 }
 
 if (Z_STRLEN_PP(t1) + Z_STRLEN_PP(t2) == 0) {
  if (ac > 2) {
   Z_DVAL_PP(percent) = 0;
  }

  RETURN_LONG(0);
 }
 
 sim = php_similar_char(Z_STRVAL_PP(t1), Z_STRLEN_PP(t1), Z_STRVAL_PP(t2), Z_STRLEN_PP(t2));

 if (ac > 2) {
  Z_DVAL_PP(percent) = sim * 200.0 / (Z_STRLEN_PP(t1) + Z_STRLEN_PP(t2));
 }

 RETURN_LONG(sim);
}


所有看出similar_text是根据ASCII做比较的,是不考虑词组问题的。

测试了一下,结果如下:

int main(int argc, char *argv[])
{
  char *str1="weican wuxin";
  char *str2="wuxin weican";
  int pos1,pos2,max;
 
  php_similar_str(str1,strlen(str1),str2,strlen(str2),&pos1,&pos2,&max);
  printf("php_similar_str:%d,%d=%d ",pos1,pos2,max);
 
  max=php_similar_char(str1,strlen(str1),str2,strlen(str2));
  printf("php_similar_char:%d ",max);
 
  system("PAUSE");
  return 0;
}

php_similar_str:0,6=6
php_similar_char:6

char *str1="TCP协议通讯工作原a理";
char *str2="TCPa协议通讯工作原理";

php_similar_str:3,4=14
php_similar_char:19

Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn