Home >Backend Development >PHP Tutorial >Understand PHP's explode() function from source code_PHP tutorial
When we need to split an array into arrays based on a certain character or string, the explode() function is very useful, but do you know how explode() works? The problem of intercepting strings cannot avoid the consumption of reallocating space. Explode will also allocate space, no doubt.
//文件1:ext/standard/string.c //先来看下explode的源代码 PHP_FUNCTION(explode) { char *str, *delim; int str_len = 0, delim_len = 0; long limit = LONG_MAX; /* No limit */ zval zdelim, zstr; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &delim, &delim_len, &str, &str_len, &limit) == FAILURE) { return; } if (delim_len == 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); RETURN_FALSE; } //这里会开辟一个数组,用来存放分割后的数据 array_init(return_value); //因为这个,我们用explode('|', '');成为了合法的 if (str_len == 0) { if (limit >= 0) { add_next_index_stringl(return_value, "", sizeof("") - 1, 1); } return; } //下面这两个是将原字串和分割符都构建成_zval_struct 结构, //ZVAL_STRINGL会分配空间哦~~源代码随后贴出 ZVAL_STRINGL(&zstr, str, str_len, 0); ZVAL_STRINGL(&zdelim, delim, delim_len, 0); //limit值是explode中允许传递的explode的第三个参数,它允许正负 if (limit > 1) { php_explode(&zdelim, &zstr, return_value, limit); } else if (limit < 0) { php_explode_negative_limit(&zdelim, &zstr, return_value, limit); } else { add_index_stringl(return_value, 0, str, str_len, 1); } }
Let’s look at another paragraph:
//ZVAL_STRINGL的源代码: //文件2:zend/zend_API.c #define ZVAL_STRINGL(z, s, l, duplicate) { \ const char *__s=(s); int __l=l; \ Z_STRLEN_P(z) = __l; \ Z_STRVAL_P(z) = (duplicate?estrndup(__s, __l):(char*)__s);\ Z_TYPE_P(z) = IS_STRING; \ } .... //estrndup才是主菜: //文件3:zend/zend_alloc.h #define estrndup(s, length) _estrndup((s), (length) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC) .... //_estrndup的实现: zend/zend_alloc.c ZEND_API char *_estrndup(const char *s, uint length ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC) { char *p; p = (char *) _emalloc(length+1 ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC); if (UNEXPECTED(p == NULL)) { return p; } memcpy(p, s, length); //分配空间 p[length] = 0; return p; } //另外在substr和strrchr strstr中用到的ZVAL_STRING也是使用了上诉的实现
The following is an analysis of the call based on the third parameter limit of explode: the conditions correspond to the last three lines in explode, and the limit conditions are different. Note: When limit is defaulted (not passed), its default value is LONG_MAX, which is the case of branch 1.
Call the php_explode method, which can also be found in ext/standard/string.c, and appears immediately above the explode implementation (so it is very convenient to call methods from this file when searching for this function, almost All columns are immediately above the function ^_^).
PHPAPI void php_explode(zval *delim, zval *str, zval *return_value, long limit) { char *p1, *p2, *endp; //先得到的是源字串的末尾位置的指针 endp = Z_STRVAL_P(str) + Z_STRLEN_P(str); //记录开始位置 p1 = Z_STRVAL_P(str); //下面这个是获得分割符在str中的位置,可以看到在strrpos和strpos中也用到了这个方法去定位 p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp); if (p2 == NULL) { //因为这个,所以当我们调用explode('|', 'abc');是合法的,出来的的就是array(0 => 'abc') add_next_index_stringl(return_value, p1, Z_STRLEN_P(str), 1); } else { //依次循环获得下一个分隔符的位置,直到结束 do { //将得到的子字串(上个位置到这个位置中间的一段,第一次的时候上个位置就是开始 add_next_index_stringl(return_value, p1, p2 - p1, 1); //定位到分隔符位置p2+分隔符的长度的位置 //比如,分隔符='|', 原字串= ’ab|c', p2 = 2, 则p1=2+1=3 p1 = p2 + Z_STRLEN_P(delim); } while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL && --limit > 1); //将最后的一个分隔符后面的字串放到结果数组中 //explode('|', 'avc|sdf'); => array(0 => 'avc', 1= > 'sdf') if (p1 <= endp) add_next_index_stringl(return_value, p1, endp-p1, 1); } }
Call the php_explode_negative_limit method:
PHPAPI void php_explode_negative_limit(zval *delim, zval *str, zval *return_value, long limit) { #define EXPLODE_ALLOC_STEP 64 char *p1, *p2, *endp; endp = Z_STRVAL_P(str) + Z_STRLEN_P(str); p1 = Z_STRVAL_P(str); p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp); if (p2 == NULL) { //它这里竟然没有处理,那explode('|', 'abc', -1) 就成非法的了,获得不了任何值 /* do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0 by doing nothing we return empty array */ } else { int allocated = EXPLODE_ALLOC_STEP, found = 0; long i, to_return; char **positions = emalloc(allocated * sizeof(char *)); //注意这里的positions的声明,这个数组是用来保存所有子字串的读取位置 positions[found++] = p1; //当然起始位置还是需要保存 //下面两个循环,第一个是循环所有在字符串中出现的分隔符位置,并保存下一个子字串读取位置起来 do { if (found >= allocated) { allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */ positions = erealloc(positions, allocated*sizeof(char *)); } positions[found++] = p1 = p2 + Z_STRLEN_P(delim); } while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL); //这个就是从数组中开始获得返回的结果将从哪个子字串开始读 to_return = limit + found; /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */ for (i = 0;i < to_return;i++) { /* this checks also for to_return > 0 */ add_next_index_stringl(return_value, positions[i], (positions[i+1] - Z_STRLEN_P(delim)) - positions[i], 1 ); } efree(positions);//很重要,释放内存 } #undef EXPLODE_ALLOC_STEP }
When all the first and second conditions are not met, this branch will be entered. This branch is simply to put the source string into the output array, explode('|', 'avc|sd', 1) or explode('|', 'avc|sd', 0) will return array(0 => 'avc|sd');
//add_index_stringl源代码 //文件4:zend/zend_API.c ZEND_API int add_next_index_stringl(zval *arg, const char *str, uint length, int duplicate) /* {{{ */ { zval *tmp; MAKE_STD_ZVAL(tmp); ZVAL_STRINGL(tmp, str, length, duplicate); return zend_hash_next_index_insert(Z_ARRVAL_P(arg), &tmp, sizeof(zval *), NULL); } //zend_hash_next_index_insert //zend/zend_hash.h #define zend_hash_next_index_insert(ht, pData, nDataSize, pDest) \ _zend_hash_index_update_or_next_insert(ht, 0, pData, nDataSize, pDest, HASH_NEXT_INSERT ZEND_FILE_LINE_CC) //zend/zend_hash.c ///太长了~~~~不贴了
It can be seen (excluding allocated space) that when limit>1, the efficiency is O(N) [N is the limit value]. When limit