Home  >  Article  >  Backend Development  >  PHP intercepts HTML articles_PHP tutorial

PHP intercepts HTML articles_PHP tutorial

WBOY
WBOYOriginal
2016-07-13 17:53:231111browse

PHP intercepts the html code and considers the closing problem of html tags


/* Generate summary
* @param (string) $body
* Text
* @param (int) $size
* Summary length
* @param (int) $format
* Input format id
*/
function blog_summary($body, $size, $format = NULL){
$_size = mb_strlen($body, "utf-8');
 
if($_size <= $size) return $body;
 
// There is a PHP filter in the input format
/*
if(!isset($format) && filter_is_php($format)){
Return $body;
}
*/
 
$strlen_var = strlen($body);
 
//Does not contain html tags
if(strpos($body, '<') === false){
Return mb_substr($body, 0, $size);
}
 
// Contains truncation flag, priority
if($e = strpos($body, '')){
Return mb_substr($body, 0, $e);
}
 
// html code tag
$html_tag = 0;
 
// Summary string
$summary_string = '';
 
/**
* The array is used to record the html tags
that appear within the summary range * The start and end are saved under the left and right key names respectively
* If the string is:

a

, assuming p is not closed
* The array is: array('left' => array('h3', 'p', 'b'), 'right' => 'b', 'h3');
* Only complete html tags, ​*/
$html_array = array('left' => array(), 'right' => array());
for($i = 0; $i < $strlen_var; ++$i) {
If(!$size){
Break;
}  
         
$current_var = substr($body, $i, 1);
         
If($current_var == '<'){
// html code starts
$html_tag = 1;
$html_array_str = '';
}else if($html_tag == 1){
// End of a piece of html code
If($current_var == '>'){
/**
* Remove leading and trailing spaces, such as
< img src="" / > etc. Leading and trailing spaces may appear
                     */
          $html_array_str = trim($html_array_str);
                                                                         /**
* Determine whether the last character is /, if so, the label is closed and
is not recorded                      */
If(substr($html_array_str, -1) != '/'){
                                                                      // Determine whether the first character is /, if so, place it in the right unit
              $f = substr($html_array_str, 0, 1);
If($f == '/'){
                           // Remove /                 $html_array['right'][] = str_replace('/', '', $html_array_str);
              }else if($f != '?'){
                       // Determine whether it is ?, if so, it is PHP code, skip
                                                                                   /**
* Determine whether there is a half-width space. If so, separate it with spaces. The first unit is the html tag
* Such as


                                                If(strpos($html_array_str, ' ') !== false){
                                     // Divide into 2 units, there may be multiple spaces, such as:


                  $html_array['left'][] = strtolower(current(explode(' ', $html_array_str, 2)));
              }else{
                /**
                     * * If there are no spaces, the entire string is an html tag, such as:

etc.
* Convert to lowercase letters
                               */ 
                  $html_array['left'][] = strtolower($html_array_str);
                                                                                                                                                                                                                                                  } 
                                                                         // String reset
          $html_array_str = '';
          $html_tag = 0;
}else{
/**
* Compose the characters between < > into a string
* Used to extract html tags
                     */
          $html_array_str .= $current_var;
}  
}else{
// Only non-html codes are counted
--$size;
}  
         
$ord_var_c = ord($body{$i});
         
switch (true) {
case (($ord_var_c & 0xE0) == 0xC0):
              // 2 bytes
          $summary_string .= substr($body, $i, 2);
         $i += 1;
Break;
case (($ord_var_c & 0xF0) == 0xE0):
                                                                                       // 3 bytes
          $summary_string .= substr($body, $i, 3);
         $i += 2;
Break;
case (($ord_var_c & 0xF8) == 0xF0):
              // 4 bytes
          $summary_string .= substr($body, $i, 4);
         $i += 3;
Break;
case (($ord_var_c & 0xFC) == 0xF8):
               // 5 bytes
          $summary_string .= substr($body, $i, 5);
         $i += 4;
Break;
case (($ord_var_c & 0xFE) == 0xFC):
              // 6 bytes
          $summary_string .= substr($body, $i, 6);
         $i += 5;
Break;
default:
              // 1 byte
         $summary_string .= $current_var;
}  
}

if($html_array['left']){
/**
* Compare the left and right html tags and complete them if they are insufficient
​​*/
         
/**
* Exchange the order of left, and the order of supplementation should be opposite to the order in which html appears
* * If the string to be completed is:

abcabc

abc
* * The supplementary order should be:


​​*/
$html_array['left'] = array_reverse($html_array['left']);
         
foreach($html_array['left'] as $index => $tag){
// Determine whether the tag appears in right
$key = array_search($tag, $html_array['right']);
If($key !== false){
// appears, delete the unit from right
         unset($html_array['right'][$key]);
}else{
                                                                                                                                                                                                                                                                    $summary_string .= '';
}  
}  
}
return $summary_string;
}

-->

http://www.bkjia.com/PHPjc/478039.htmlwww.bkjia.comtruehttp: //www.bkjia.com/PHPjc/478039.htmlTechArticlePHP intercepts the html code and considers the closing problem of html tags/* Generate summary* @param (string) $body * Text* @param (int) $size * Summary length* @param (int) $format * Input format...

Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn