Home >php教程 >PHP源码 >汉字转Unicode编码,Unicode编码转汉字

汉字转Unicode编码,Unicode编码转汉字

PHP中文网
PHP中文网Original
2016-05-25 17:01:502481browse

           

<?php

/**
 * 汉字转Unicode编码
 * @param string $str 原始汉字的字符串
 * @param string $encoding 原始汉字的编码
 * @param boot $ishex 是否为十六进制表示(支持十六进制和十进制)
 * @param string $prefix 编码后的前缀
 * @param string $postfix 编码后的后缀
 */
function unicode_encode($str, $encoding = &#39;UTF-8&#39;, $ishex = false, $prefix = &#39;&#&#39;, $postfix = &#39;;&#39;) {
	$str = iconv($encoding, &#39;UCS-2&#39;, $str);
	$arrstr = str_split($str, 2);
	$unistr = &#39;&#39;;
	for($i = 0, $len = count($arrstr); $i < $len; $i++) {
		$dec = $ishex ? bin2hex($arrstr[$i]) : hexdec(bin2hex($arrstr[$i]));
		$unistr .= $prefix . $dec . $postfix;
	}
	return $unistr;
}

/**
 * Unicode编码转汉字
 * @param string $str Unicode编码的字符串
 * @param string $decoding 原始汉字的编码
 * @param boot $ishex 是否为十六进制表示(支持十六进制和十进制)
 * @param string $prefix 编码后的前缀
 * @param string $postfix 编码后的后缀
 */
function unicode_decode($unistr, $encoding = &#39;UTF-8&#39;, $ishex = false, $prefix = &#39;&#&#39;, $postfix = &#39;;&#39;) {
	$arruni = explode($prefix, $unistr);
	$unistr = &#39;&#39;;
	for($i = 1, $len = count($arruni); $i < $len; $i++) {
		if (strlen($postfix) > 0) {
			$arruni[$i] = substr($arruni[$i], 0, strlen($arruni[$i]) - strlen($postfix));
		}
		$temp = $ishex ? hexdec($arruni[$i]) : intval($arruni[$i]);
		$unistr .= ($temp < 256) ? chr(0) . chr($temp) : chr($temp / 256) . chr($temp % 256);
	}
	return iconv(&#39;UCS-2&#39;, $encoding, $unistr);
}

header(&#39;Content-Type: text/html; charset=UTF-8&#39;);

// UTF-8字符串测试
$str = &#39;龕龖龗龘龙龚龛龜龝龞龟龠龡龢龣龤龥&#39;;
var_dump($str);

// 简单的
$uni_str = mb_convert_encoding($str, &#39;HTML-ENTITIES&#39;, &#39;UTF-8&#39;);
var_dump($uni_str);

$str3 = mb_convert_encoding($uni_str, &#39;UTF-8&#39;, &#39;HTML-ENTITIES&#39;);
var_dump($str3);

$uni_str = unicode_encode($str);
var_dump($uni_str); // 龕龖龗龘龙龚龛龜龝龞龟龠龡龢龣龤龥

$str2 = unicode_decode($uni_str);
var_dump($str2); // 龕龖龗龘龙龚龛龜龝龞龟龠龡龢龣龤龥

$uni_str = unicode_encode($str, &#39;UTF-8&#39;, true, &#39;\u&#39;, &#39;&#39;);
var_dump($uni_str); // \u9f95\u9f96\u9f97\u9f98\u9f99\u9f9a\u9f9b\u9f9c\u9f9d\u9f9e\u9f9f\u9fa0\u9fa1\u9fa2\u9fa3\u9fa4\u9fa5

$str2 = unicode_decode($uni_str, &#39;UTF-8&#39;, true, &#39;\u&#39;, &#39;&#39;);
var_dump($str2); // 龕龖龗龘龙龚龛龜龝龞龟龠龡龢龣龤龥


// GBK字符串测试
$str = &#39;PHP汉字转UNICODE&#39;;

$str = iconv(&#39;UTF-8&#39;, &#39;GBK//IGNORE&#39;, $str);
$uni_str = unicode_encode($str, &#39;GBK&#39;);
var_dump($uni_str); // PHP汉字转UNICODE

$str2 = unicode_decode($uni_str, &#39;GBK&#39;);
$str2 = iconv(&#39;GBK&#39;, &#39;UTF-8&#39;, $str2);
var_dump($str2); // PHP汉字转UNICODE

$uni_str = unicode_encode($str, &#39;GBK&#39;, true, &#39;\u&#39;, &#39;&#39;);
var_dump($uni_str); // \u0050\u0048\u0050\u6c49\u5b57\u8f6c\u0055\u004e\u0049\u0043\u004f\u0044\u0045

$str2 = unicode_decode($uni_str, &#39;GBK&#39;, true, &#39;\u&#39;, &#39;&#39;);
$str2 = iconv(&#39;GBK&#39;, &#39;UTF-8&#39;, $str2);
var_dump($str2); // PHP汉字转UNICODE

                                   

Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn