search
HomeBackend DevelopmentPHP TutorialUnderstand PHP's explode() function from source code_PHP tutorial

When we need to split an array into arrays based on a certain character or string, the explode() function is very useful, but do you know how explode() works? The problem of intercepting strings cannot avoid the consumption of reallocating space. Explode will also allocate space, no doubt.

//文件1:ext/standard/string.c
//先来看下explode的源代码
PHP_FUNCTION(explode)
{
	char *str, *delim;
	int str_len = 0, delim_len = 0;
	long limit = LONG_MAX; /* No limit */
	zval zdelim, zstr;
   
	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &delim, &delim_len, &str, &str_len, &limit) == FAILURE) {
		return;
	}
	if (delim_len == 0) {
		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
		RETURN_FALSE;
	}
  
	//这里会开辟一个数组,用来存放分割后的数据
	array_init(return_value);
		//因为这个,我们用explode('|', '');成为了合法的
		if (str_len == 0) {
			if (limit >= 0) {
				add_next_index_stringl(return_value, "", sizeof("") - 1, 1);
			} 
			return;
		}
		
		//下面这两个是将原字串和分割符都构建成_zval_struct 结构,
		//ZVAL_STRINGL会分配空间哦~~源代码随后贴出
		ZVAL_STRINGL(&zstr, str, str_len, 0);   
		ZVAL_STRINGL(&zdelim, delim, delim_len, 0);
		//limit值是explode中允许传递的explode的第三个参数,它允许正负
		if (limit > 1) {
			php_explode(&zdelim, &zstr, return_value, limit);
		} else if (limit < 0) {
			php_explode_negative_limit(&zdelim, &zstr, return_value, limit);
		} else {
			add_index_stringl(return_value, 0, str, str_len, 1);
		}
	}

Let’s look at another paragraph:

//ZVAL_STRINGL的源代码:  
//文件2:zend/zend_API.c    
#define ZVAL_STRINGL(z, s, l, duplicate) {    \
	const char *__s=(s); int __l=l;        \
	Z_STRLEN_P(z) = __l;                \
	Z_STRVAL_P(z) = (duplicate?estrndup(__s, __l):(char*)__s);\
	Z_TYPE_P(z) = IS_STRING;            \
}
	....
//estrndup才是主菜:
//文件3:zend/zend_alloc.h
#define estrndup(s, length)    _estrndup((s), (length) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC)
	....
//_estrndup的实现: zend/zend_alloc.c
ZEND_API char *_estrndup(const char *s, uint length ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC)
{
	char *p;
	p = (char *) _emalloc(length+1 ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
	if (UNEXPECTED(p == NULL)) {
		return p;
	}
	memcpy(p, s, length);   //分配空间
	p[length] = 0;
	return p;
}
//另外在substr和strrchr strstr中用到的ZVAL_STRING也是使用了上诉的实现

The following is an analysis of the call based on the third parameter limit of explode: the conditions correspond to the last three lines in explode, and the limit conditions are different. Note: When limit is defaulted (not passed), its default value is LONG_MAX, which is the case of branch 1.

1, limit > 1:

Call the php_explode method, which can also be found in ext/standard/string.c, and appears immediately above the explode implementation (so it is very convenient to call methods from this file when searching for this function, almost All columns are immediately above the function ^_^).

PHPAPI void php_explode(zval *delim, zval *str, zval *return_value, long limit) 
{
	char *p1, *p2, *endp;
	//先得到的是源字串的末尾位置的指针
	endp = Z_STRVAL_P(str) + Z_STRLEN_P(str);
	//记录开始位置
	p1 = Z_STRVAL_P(str);
	//下面这个是获得分割符在str中的位置,可以看到在strrpos和strpos中也用到了这个方法去定位
	p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp);
	
	if (p2 == NULL) {
		//因为这个,所以当我们调用explode('|', 'abc');是合法的,出来的的就是array(0 => 'abc')
		add_next_index_stringl(return_value, p1, Z_STRLEN_P(str), 1);
	} else {
		//依次循环获得下一个分隔符的位置,直到结束
		do {
		//将得到的子字串(上个位置到这个位置中间的一段,第一次的时候上个位置就是开始
		add_next_index_stringl(return_value, p1, p2 - p1, 1);
		//定位到分隔符位置p2+分隔符的长度的位置
		//比如,分隔符='|', 原字串= ’ab|c', p2 = 2,  则p1=2+1=3
		p1 = p2 + Z_STRLEN_P(delim);
	} while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL &&
                 --limit > 1);
	//将最后的一个分隔符后面的字串放到结果数组中
	//explode('|', 'avc|sdf');   => array(0 => 'avc', 1= > 'sdf')
		if (p1 <= endp)
			add_next_index_stringl(return_value, p1, endp-p1, 1);
	}
}

2. limit

Call the php_explode_negative_limit method:

PHPAPI void php_explode_negative_limit(zval *delim, zval *str, zval *return_value, long limit) 
{
	#define EXPLODE_ALLOC_STEP 64
	char *p1, *p2, *endp;
	
	endp = Z_STRVAL_P(str) + Z_STRLEN_P(str);
	
	p1 = Z_STRVAL_P(str);
	p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp);
	
	if (p2 == NULL) {
	//它这里竟然没有处理,那explode('|', 'abc', -1) 就成非法的了,获得不了任何值
		/*
		do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
		by doing nothing we return empty array
	*/
	} else {
		int allocated = EXPLODE_ALLOC_STEP, found = 0;
        long i, to_return;
		char **positions = emalloc(allocated * sizeof(char *));
		//注意这里的positions的声明,这个数组是用来保存所有子字串的读取位置
		positions[found++] = p1;   //当然起始位置还是需要保存
		//下面两个循环,第一个是循环所有在字符串中出现的分隔符位置,并保存下一个子字串读取位置起来
		do {
			if (found >= allocated) {
				allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
				positions = erealloc(positions, allocated*sizeof(char *));
			}
			positions[found++] = p1 = p2 + Z_STRLEN_P(delim);
		} while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL);
		//这个就是从数组中开始获得返回的结果将从哪个子字串开始读        
		to_return = limit + found;
			/* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
			for (i = 0;i < to_return;i++) { /* this checks also for to_return > 0 */
				add_next_index_stringl(return_value, positions[i], 
					(positions[i+1] - Z_STRLEN_P(delim)) - positions[i],
						1
					);
			}
			efree(positions);//很重要,释放内存
		}
	#undef EXPLODE_ALLOC_STEP
}

3. limit = 1 or limit = 0 :

When all the first and second conditions are not met, this branch will be entered. This branch is simply to put the source string into the output array, explode('|', 'avc|sd', 1) or explode('|', 'avc|sd', 0) will return array(0 => 'avc|sd');

//add_index_stringl源代码
//文件4:zend/zend_API.c
ZEND_API int add_next_index_stringl(zval *arg, const char *str, uint length, int duplicate) /* {{{ */
{
	zval *tmp;
	MAKE_STD_ZVAL(tmp);
	ZVAL_STRINGL(tmp, str, length, duplicate);
	return zend_hash_next_index_insert(Z_ARRVAL_P(arg), &tmp, sizeof(zval *), NULL);
}
//zend_hash_next_index_insert
//zend/zend_hash.h
#define zend_hash_next_index_insert(ht, pData, nDataSize, pDest) \
         _zend_hash_index_update_or_next_insert(ht, 0, pData, nDataSize, pDest, HASH_NEXT_INSERT ZEND_FILE_LINE_CC)
//zend/zend_hash.c
///太长了~~~~不贴了

It can be seen (excluding allocated space) that when limit>1, the efficiency is O(N) [N is the limit value]. When limit

www.bkjia.comtruehttp: //www.bkjia.com/PHPjc/752366.htmlTechArticleWhen we need to split an array into arrays based on a certain character or string, the explode() function Very useful, but do you know how explode() works? Question about intercepting strings...
Statement
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Explain how load balancing affects session management and how to address it.Explain how load balancing affects session management and how to address it.Apr 29, 2025 am 12:42 AM

Load balancing affects session management, but can be resolved with session replication, session stickiness, and centralized session storage. 1. Session Replication Copy session data between servers. 2. Session stickiness directs user requests to the same server. 3. Centralized session storage uses independent servers such as Redis to store session data to ensure data sharing.

Explain the concept of session locking.Explain the concept of session locking.Apr 29, 2025 am 12:39 AM

Sessionlockingisatechniqueusedtoensureauser'ssessionremainsexclusivetooneuseratatime.Itiscrucialforpreventingdatacorruptionandsecuritybreachesinmulti-userapplications.Sessionlockingisimplementedusingserver-sidelockingmechanisms,suchasReentrantLockinJ

Are there any alternatives to PHP sessions?Are there any alternatives to PHP sessions?Apr 29, 2025 am 12:36 AM

Alternatives to PHP sessions include Cookies, Token-based Authentication, Database-based Sessions, and Redis/Memcached. 1.Cookies manage sessions by storing data on the client, which is simple but low in security. 2.Token-based Authentication uses tokens to verify users, which is highly secure but requires additional logic. 3.Database-basedSessions stores data in the database, which has good scalability but may affect performance. 4. Redis/Memcached uses distributed cache to improve performance and scalability, but requires additional matching

Define the term 'session hijacking' in the context of PHP.Define the term 'session hijacking' in the context of PHP.Apr 29, 2025 am 12:33 AM

Sessionhijacking refers to an attacker impersonating a user by obtaining the user's sessionID. Prevention methods include: 1) encrypting communication using HTTPS; 2) verifying the source of the sessionID; 3) using a secure sessionID generation algorithm; 4) regularly updating the sessionID.

What is the full form of PHP?What is the full form of PHP?Apr 28, 2025 pm 04:58 PM

The article discusses PHP, detailing its full form, main uses in web development, comparison with Python and Java, and its ease of learning for beginners.

How does PHP handle form data?How does PHP handle form data?Apr 28, 2025 pm 04:57 PM

PHP handles form data using $\_POST and $\_GET superglobals, with security ensured through validation, sanitization, and secure database interactions.

What is the difference between PHP and ASP.NET?What is the difference between PHP and ASP.NET?Apr 28, 2025 pm 04:56 PM

The article compares PHP and ASP.NET, focusing on their suitability for large-scale web applications, performance differences, and security features. Both are viable for large projects, but PHP is open-source and platform-independent, while ASP.NET,

Is PHP a case-sensitive language?Is PHP a case-sensitive language?Apr 28, 2025 pm 04:55 PM

PHP's case sensitivity varies: functions are insensitive, while variables and classes are sensitive. Best practices include consistent naming and using case-insensitive functions for comparisons.

See all articles

Hot AI Tools

Undresser.AI Undress

Undresser.AI Undress

AI-powered app for creating realistic nude photos

AI Clothes Remover

AI Clothes Remover

Online AI tool for removing clothes from photos.

Undress AI Tool

Undress AI Tool

Undress images for free

Clothoff.io

Clothoff.io

AI clothes remover

Video Face Swap

Video Face Swap

Swap faces in any video effortlessly with our completely free AI face swap tool!

Hot Tools

SAP NetWeaver Server Adapter for Eclipse

SAP NetWeaver Server Adapter for Eclipse

Integrate Eclipse with SAP NetWeaver application server.

mPDF

mPDF

mPDF is a PHP library that can generate PDF files from UTF-8 encoded HTML. The original author, Ian Back, wrote mPDF to output PDF files "on the fly" from his website and handle different languages. It is slower than original scripts like HTML2FPDF and produces larger files when using Unicode fonts, but supports CSS styles etc. and has a lot of enhancements. Supports almost all languages, including RTL (Arabic and Hebrew) and CJK (Chinese, Japanese and Korean). Supports nested block-level elements (such as P, DIV),

SublimeText3 Mac version

SublimeText3 Mac version

God-level code editing software (SublimeText3)

Dreamweaver Mac version

Dreamweaver Mac version

Visual web development tools

EditPlus Chinese cracked version

EditPlus Chinese cracked version

Small size, syntax highlighting, does not support code prompt function