Home > Article > Backend Development > PHP intercepts HTML articles_PHP tutorial
PHP intercepts the html code and considers the closing problem of html tags
/* Generate summary
* @param (string) $body
* Text
* @param (int) $size
* Summary length
* @param (int) $format
* Input format id
*/
function blog_summary($body, $size, $format = NULL){
$_size = mb_strlen($body, "utf-8');
if($_size <= $size) return $body;
// There is a PHP filter in the input format
/*
if(!isset($format) && filter_is_php($format)){
Return $body;
}
*/
$strlen_var = strlen($body);
//Does not contain html tags
if(strpos($body, '<') === false){
Return mb_substr($body, 0, $size);
}
// Contains truncation flag, priority
if($e = strpos($body, '')){
Return mb_substr($body, 0, $e);
}
// html code tag
$html_tag = 0;
// Summary string
$summary_string = '';
/**
* The array is used to record the html tags
that appear within the summary range
* The start and end are saved under the left and right key names respectively
* If the string is:
a, assuming p is not closed
* The array is: array('left' => array('h3', 'p', 'b'), 'right' => 'b', 'h3');
* Only complete html tags, <% and other language tags, which will produce unpredictable results
*/
$html_array = array('left' => array(), 'right' => array());
for($i = 0; $i < $strlen_var; ++$i) {
If(!$size){
Break;
}
$current_var = substr($body, $i, 1);
If($current_var == '<'){
// html code starts
$html_tag = 1;
$html_array_str = '';
}else if($html_tag == 1){
// End of a piece of html code
If($current_var == '>'){
/**
* Remove leading and trailing spaces, such as
< img src="" / > etc. Leading and trailing spaces may appear
*/
$html_array_str = trim($html_array_str);
/**
* Determine whether the last character is /, if so, the label is closed and
is not recorded
*/
If(substr($html_array_str, -1) != '/'){
// Determine whether the first character is /, if so, place it in the right unit
$f = substr($html_array_str, 0, 1);
If($f == '/'){
// Remove /
$html_array['right'][] = str_replace('/', '', $html_array_str);
}else if($f != '?'){
// Determine whether it is ?, if so, it is PHP code, skip
/**
* Determine whether there is a half-width space. If so, separate it with spaces. The first unit is the html tag
* Such as
If(strpos($html_array_str, ' ') !== false){
// Divide into 2 units, there may be multiple spaces, such as:
etc.
* Convert to lowercase letters
*/
$html_array['left'][] = strtolower($html_array_str);
}
// String reset
$html_array_str = '';
$html_tag = 0;
}else{
/**
* Compose the characters between < > into a string
* Used to extract html tags
*/
$html_array_str .= $current_var;
}
}else{
// Only non-html codes are counted
--$size;
}
$ord_var_c = ord($body{$i});
switch (true) {
case (($ord_var_c & 0xE0) == 0xC0):
// 2 bytes
$summary_string .= substr($body, $i, 2);
$i += 1;
Break;
case (($ord_var_c & 0xF0) == 0xE0):
// 3 bytes
$summary_string .= substr($body, $i, 3);
$i += 2;
Break;
case (($ord_var_c & 0xF8) == 0xF0):
// 4 bytes
$summary_string .= substr($body, $i, 4);
$i += 3;
Break;
case (($ord_var_c & 0xFC) == 0xF8):
// 5 bytes
$summary_string .= substr($body, $i, 5);
$i += 4;
Break;
case (($ord_var_c & 0xFE) == 0xFC):
// 6 bytes
$summary_string .= substr($body, $i, 6);
$i += 5;
Break;
default:
// 1 byte
$summary_string .= $current_var;
}
}
if($html_array['left']){
/**
* Compare the left and right html tags and complete them if they are insufficient
*/
/**
* Exchange the order of left, and the order of supplementation should be opposite to the order in which html appears
* * If the string to be completed is:
abc
* * The supplementary order should be: