Home >Backend Development >PHP Tutorial >Analyzing PHP's similar_text function_PHP tutorial
//Compare strings and return the area where the first character of the two strings is the same //Recursive function, compare the number of identical characters in txt1 and txt2 php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max); //Recurse the front part of the last different part //Recurse the rear part of the last different part //Return the number of identical characters after this comparison //The PHP function itself, a bunch of macros, does some string conversion and return value conversion. The main function is done by the above two functions. convert_to_string_ex(t1); if (ac > 2) { RETURN_LONG(0); if (ac > 2) { RETURN_LONG(sim); Tested it and the results are as follows: int main(int argc, char *argv[]) php_similar_str:0,6=6 char *str1="The working principle of TCP protocol communication"; php_similar_str:3,4=14
static void php_similar_str(const char *txt1, int len1, const char *txt2, int len2, int *pos1 , int *pos2, int *max)
{
char *p, *q;
char *end1 = (char *) txt1 + len1;
char *end2 = (char *) txt2 + len2;
int l;
*max = 0;
//Traverse the string
for (p = (char *) txt1; p < end1; p++) {
for (q = (char *) txt2; q < end2; q++) {
for (l = 0; (p + l < end1) && (q + l < end2) && (p[ l] == q[l]); l++);
if (l > *max) {
//Save the same area information
*max = l;
*pos1 = p - txt1;
*pos2 = q - txt2;
}
}
}
}
static int php_similar_char(const char *txt1, int len1, const char *txt2, int len2)
{
int sum;
int pos1, pos2, max;
if ((sum = max)) {//What’s the point of writing like this????!!!!!!
if (pos1 && pos2) {
sum += php_similar_char(txt1, pos1, txt2, pos2);
}
if ((pos1 + max < len1) && (pos2 + max < len2)) {
sum += php_similar_char(txt1 + pos1 + max , len1 - pos1 - max,
txt2 + pos2 + max, len2 - pos2 - max);
}
}
return sum;
}
PHP_FUNCTION(similar_text)
{
zval **t1, **t2, **percent;
int ac = ZEND_NUM_ARGS();
int sim;
if (ac < 2 || ac > 3 || zend_get_parameters_ex(ac, &t1, &t2, &percent) == FAILURE) {
WRONG_PARAM_COUNT;
}
convert_to_string_ex(t2);
convert_to_double_ex(percent);
}
if (Z_STRLEN_PP(t1) + Z_STRLEN_PP(t2) == 0) {
if ( ac > 2) {
Z_DVAL_PP(percent) = 0;
}
}
sim = php_similar_char(Z_STRVAL_PP(t1), Z_STRLEN_PP(t1), Z_STRVAL_PP(t2), Z_STRLEN_PP(t2));
Z_DVAL_PP(percent) = sim * 200.0 / (Z_STRLEN_PP(t1) + Z_STRLEN_PP(t2));
}
}
It can be seen that similar_text is compared based on ASCII, and the phrase problem is not considered.
{
char *str1="weican wuxin";
char *str2="wuxin weican";
int pos1,pos2 ,max;
php_similar_str(str1,strlen(str1),str2,strlen(str2),&pos1,&pos2,&max);
printf("php_similar_str:%d,%d=%d
",pos1,pos2,max);
max=php_similar_char(str1,strlen(str1),str2,strlen(str2));
printf("php_similar_char:%d
",max);
system("PAUSE");
return 0;
}
php_similar_char:6
char *str2="The working principle of TCP protocol communication";
php_similar_char:19