-
- header('Content-type:text/html;charset=utf-8');
- /**
- * Function to count the length of Chinese strings
- * @param $str The string to calculate the length
- * @param $type Calculation length type, 0 (default) means one Chinese character is counted as one character, 1 means one Chinese character is counted as two characters
- * @http://bbs.it-home.org
- *
- */
- function abslength($str)
- {
- if( empty($str)){
- return 0;
- }
- if(function_exists('mb_strlen')){
- return mb_strlen($str,'utf-8');
- }
- else {
- preg_match_all("/./ u", $str, $ar);
- return count($ar[0]);
- }
- }
- $str = 'Script Academy welcomes everyone, ye! ';
- $len = abslength($str);
- var_dump($len); //return 12
- $len = abslength($str,'1');
- echo '
'.$len ; //return 22
- /*
- utf-8 encoding to intercept Chinese strings, the parameters can refer to the substr function
- @param $str The string to be intercepted
- @param $start The starting position to be intercepted, negative numbers are inverse To intercept
- @param $end The length to be intercepted
- */
- function utf8_substr($str,$start=0) {
- if(empty($str)){
- return false;
- }
- if (function_exists(' mb_substr')){
- if(func_num_args() >= 3) {
- $end = func_get_arg(2);
- return mb_substr($str,$start,$end,'utf-8');
- }
- else {
- mb_internal_encoding("UTF-8");
- return mb_substr($str,$start);
- }
- }
- else {
- $null = "";
- preg_match_all("/./u", $str, $ ar);
- if(func_num_args() >= 3) {
- $end = func_get_arg(2);
- return join($null, array_slice($ar[0],$start,$end));
- }
- else {
- return join($null, array_slice($ar[0],$start));
- }
- }
- }
- $str2 = 'wo wants to intercept zhongwen';
- echo '
';
- echo utf8_substr($str2,0,-4); //return wo want to intercept zhon
- ?>
Copy code
2. Support gb2312, gbk, utf-8, big5 Chinese interception method
-
- /*
- * Chinese interception, supports gb2312, gbk, utf-8, big5
- * bbs.it-home.org
- * @param string $str The string to be intercepted
- * @param int $start interception starting position
- * @param int $length interception length
- * @param string $charset utf-8|gb2312|gbk|big5 encoding
- * @param $suffix whether to add a suffix
- */
- public function csubstr($str, $start=0, $length, $charset="utf-8", $suffix=true)
- {
- if(function_exists("mb_substr"))
- {
- if(mb_strlen($str , $charset) <= $length) return $str;
- $slice = mb_substr($str, $start, $length, $charset);
- }
- else
- {
- $re['utf-8'] = "/[x01-x7f]|[xc2-xdf][x80-xbf]|[xe0-xef][x80-xbf]{2}|[xf0-xff][x80-xbf]{3}/";
- $re['gb2312'] = "/[x01-x7f]|[xb0-xf7][xa0-xfe]/";
- $re['gbk'] = "/[x01-x7f]|[x81-xfe ][x40-xfe]/";
- $re['big5'] = "/[x01-x7f]|[x81-xfe]([x40-x7e]|xa1-xfe])/";
- preg_match_all($ re[$charset], $str, $match);
- if(count($match[0]) <= $length) return $str;
- $slice = join("",array_slice($match[0] , $start, $length));
- }
- if($suffix) return $slice."…";
- return $slice;
- }
- ?>
Copy code
|