Heim >php教程 >PHP源码 >最全的PHP汉字转拼音函数(共25961字,包含20902个基本汉字+5059生僻字)

最全的PHP汉字转拼音函数(共25961字,包含20902个基本汉字+5059生僻字)

PHP中文网
PHP中文网Original
2016-05-25 17:01:463800Durchsuche

1. [文件] pinyin.php      

最全的PHP汉字转拼音函数(共25961字,包含20902个基本汉字+5059生僻字)pinyin.rar

                   

2. [文件] pinyin_gbk.php 

最全的PHP汉字转拼音函数(共25961字,包含20902个基本汉字+5059生僻字)pinyin_gbk.rar

 

3. [文件] pinyin_ms.php

最全的PHP汉字转拼音函数(共25961字,包含20902个基本汉字+5059生僻字)pinyin_ms.rar


4. [文件] pinyin_mini.php 

<?php
 
$start_time = microtime(1);
 
// 此类是根据ASCII码转换,GB2312库对多音字也无能为力,此类优点是性能比较高。
// GB2312标准共收录6763个汉字,此类的算法只支持其中的一级汉字3755个,不在范围内的汉字是无法转换,如:中国前总理朱镕基的“镕”字。
class pinyin{
    public static function utf8_to($s, $isfirst = false) {
        return self::to(self::utf8_to_gb2312($s), $isfirst);
    }
 
    public static function utf8_to_gb2312($s) {
        return iconv(&#39;UTF-8&#39;, &#39;GB2312//IGNORE&#39;, $s);
    }
 
    // 字符串必须为GB2312编码
    public static function to($s, $isfirst = false) {
        $res = &#39;&#39;;
        $len = strlen($s);
        $pinyin_arr = self::get_pinyin_array();
        for($i=0; $i<$len; $i++) {
            $ascii = ord($s[$i]);
            if($ascii > 0x80) {
                $ascii2 = ord($s[++$i]);
                $ascii = $ascii * 256 + $ascii2 - 65536;
            }
 
            if($ascii < 255 && $ascii > 0) {
                if(($ascii >= 48 && $ascii <= 57) || ($ascii >= 97 && $ascii <= 122)) {
                    $res .= $s[$i]; // 0-9 a-z
                }elseif($ascii >= 65 && $ascii <= 90) {
                    $res .= strtolower($s[$i]); // A-Z
                }else{
                    $res .= &#39;_&#39;;
                }
            }elseif($ascii < -20319 || $ascii > -10247) {
                $res .= &#39;_&#39;;
            }else{
                foreach($pinyin_arr as $py=>$asc) {
                    if($asc <= $ascii) {
                        $res .= $isfirst ? $py[0] : $py;
                        break;
                    }
                }
            }
        }
        return $res;
    }
 
    public static function to_first($s) {
        $ascii = ord($s[0]);
        if($ascii > 0xE0) {
            $s = self::utf8_to_gb2312($s[0].$s[1].$s[2]);
        }elseif($ascii < 0x80) {
            if($ascii >= 65 && $ascii <= 90) {
                return strtolower($s[0]);
            }elseif($ascii >= 97 && $ascii <= 122) {
                return $s[0];
            }else{
                return false;
            }
        }
 
        if(strlen($s) < 2) {
            return false;
        }
 
        $asc = ord($s[0]) * 256 + ord($s[1]) - 65536;
 
        if($asc>=-20319 && $asc<=-20284) return &#39;a&#39;;
        if($asc>=-20283 && $asc<=-19776) return &#39;b&#39;;
        if($asc>=-19775 && $asc<=-19219) return &#39;c&#39;;
        if($asc>=-19218 && $asc<=-18711) return &#39;d&#39;;
        if($asc>=-18710 && $asc<=-18527) return &#39;e&#39;;
        if($asc>=-18526 && $asc<=-18240) return &#39;f&#39;;
        if($asc>=-18239 && $asc<=-17923) return &#39;g&#39;;
        if($asc>=-17922 && $asc<=-17418) return &#39;h&#39;;
        if($asc>=-17417 && $asc<=-16475) return &#39;j&#39;;
        if($asc>=-16474 && $asc<=-16213) return &#39;k&#39;;
        if($asc>=-16212 && $asc<=-15641) return &#39;l&#39;;
        if($asc>=-15640 && $asc<=-15166) return &#39;m&#39;;
        if($asc>=-15165 && $asc<=-14923) return &#39;n&#39;;
        if($asc>=-14922 && $asc<=-14915) return &#39;o&#39;;
        if($asc>=-14914 && $asc<=-14631) return &#39;p&#39;;
        if($asc>=-14630 && $asc<=-14150) return &#39;q&#39;;
        if($asc>=-14149 && $asc<=-14091) return &#39;r&#39;;
        if($asc>=-14090 && $asc<=-13319) return &#39;s&#39;;
        if($asc>=-13318 && $asc<=-12839) return &#39;t&#39;;
        if($asc>=-12838 && $asc<=-12557) return &#39;w&#39;;
        if($asc>=-12556 && $asc<=-11848) return &#39;x&#39;;
        if($asc>=-11847 && $asc<=-11056) return &#39;y&#39;;
        if($asc>=-11055 && $asc<=-10247) return &#39;z&#39;;
        return false;
    }
 
    public static function get_pinyin_array() {
        static $py_arr;
        if(isset($py_arr)) return $py_arr;
 
        $k = &#39;a|ai|an|ang|ao|ba|bai|ban|bang|bao|bei|ben|beng|bi|bian|biao|bie|bin|bing|bo|bu|ca|cai|can|cang|cao|ce|ceng|cha|chai|chan|chang|chao|che|chen|cheng|chi|chong|chou|chu|chuai|chuan|chuang|chui|chun|chuo|ci|cong|cou|cu|cuan|cui|cun|cuo|da|dai|dan|dang|dao|de|deng|di|dian|diao|die|ding|diu|dong|dou|du|duan|dui|dun|duo|e|en|er|fa|fan|fang|fei|fen|feng|fo|fou|fu|ga|gai|gan|gang|gao|ge|gei|gen|geng|gong|gou|gu|gua|guai|guan|guang|gui|gun|guo|ha|hai|han|hang|hao|he|hei|hen|heng|hong|hou|hu|hua|huai|huan|huang|hui|hun|huo|ji|jia|jian|jiang|jiao|jie|jin|jing|jiong|jiu|ju|juan|jue|jun|ka|kai|kan|kang|kao|ke|ken|keng|kong|kou|ku|kua|kuai|kuan|kuang|kui|kun|kuo|la|lai|lan|lang|lao|le|lei|leng|li|lia|lian|liang|liao|lie|lin|ling|liu|long|lou|lu|lv|luan|lue|lun|luo|ma|mai|man|mang|mao|me|mei|men|meng|mi|mian|miao|mie|min|ming|miu|mo|mou|mu|na|nai|nan|nang|nao|ne|nei|nen|neng|ni|nian|niang|niao|nie|nin|ning|niu|nong|nu|nv|nuan|nue|nuo|o|ou|pa|pai|pan|pang|pao|pei|pen|peng|pi|pian|piao|pie|pin|ping|po|pu|qi|qia|qian|qiang|qiao|qie|qin|qing|qiong|qiu|qu|quan|que|qun|ran|rang|rao|re|ren|reng|ri|rong|rou|ru|ruan|rui|run|ruo|sa|sai|san|sang|sao|se|sen|seng|sha|shai|shan|shang|shao|she|shen|sheng|shi|shou|shu|shua|shuai|shuan|shuang|shui|shun|shuo|si|song|sou|su|suan|sui|sun|suo|ta|tai|tan|tang|tao|te|teng|ti|tian|tiao|tie|ting|tong|tou|tu|tuan|tui|tun|tuo|wa|wai|wan|wang|wei|wen|weng|wo|wu|xi|xia|xian|xiang|xiao|xie|xin|xing|xiong|xiu|xu|xuan|xue|xun|ya|yan|yang|yao|ye|yi|yin|ying|yo|yong|you|yu|yuan|yue|yun|za|zai|zan|zang|zao|ze|zei|zen|zeng|zha|zhai|zhan|zhang|zhao|zhe|zhen|zheng|zhi|zhong|zhou|zhu|zhua|zhuai|zhuan|zhuang|zhui|zhun|zhuo|zi|zong|zou|zu|zuan|zui|zun|zuo&#39;;
        $v = &#39;-20319|-20317|-20304|-20295|-20292|-20283|-20265|-20257|-20242|-20230|-20051|-20036|-20032|-20026|-20002|-19990|-19986|-19982|-19976|-19805|-19784|-19775|-19774|-19763|-19756|-19751|-19746|-19741|-19739|-19728|-19725|-19715|-19540|-19531|-19525|-19515|-19500|-19484|-19479|-19467|-19289|-19288|-19281|-19275|-19270|-19263|-19261|-19249|-19243|-19242|-19238|-19235|-19227|-19224|-19218|-19212|-19038|-19023|-19018|-19006|-19003|-18996|-18977|-18961|-18952|-18783|-18774|-18773|-18763|-18756|-18741|-18735|-18731|-18722|-18710|-18697|-18696|-18526|-18518|-18501|-18490|-18478|-18463|-18448|-18447|-18446|-18239|-18237|-18231|-18220|-18211|-18201|-18184|-18183|-18181|-18012|-17997|-17988|-17970|-17964|-17961|-17950|-17947|-17931|-17928|-17922|-17759|-17752|-17733|-17730|-17721|-17703|-17701|-17697|-17692|-17683|-17676|-17496|-17487|-17482|-17468|-17454|-17433|-17427|-17417|-17202|-17185|-16983|-16970|-16942|-16915|-16733|-16708|-16706|-16689|-16664|-16657|-16647|-16474|-16470|-16465|-16459|-16452|-16448|-16433|-16429|-16427|-16423|-16419|-16412|-16407|-16403|-16401|-16393|-16220|-16216|-16212|-16205|-16202|-16187|-16180|-16171|-16169|-16158|-16155|-15959|-15958|-15944|-15933|-15920|-15915|-15903|-15889|-15878|-15707|-15701|-15681|-15667|-15661|-15659|-15652|-15640|-15631|-15625|-15454|-15448|-15436|-15435|-15419|-15416|-15408|-15394|-15385|-15377|-15375|-15369|-15363|-15362|-15183|-15180|-15165|-15158|-15153|-15150|-15149|-15144|-15143|-15141|-15140|-15139|-15128|-15121|-15119|-15117|-15110|-15109|-14941|-14937|-14933|-14930|-14929|-14928|-14926|-14922|-14921|-14914|-14908|-14902|-14894|-14889|-14882|-14873|-14871|-14857|-14678|-14674|-14670|-14668|-14663|-14654|-14645|-14630|-14594|-14429|-14407|-14399|-14384|-14379|-14368|-14355|-14353|-14345|-14170|-14159|-14151|-14149|-14145|-14140|-14137|-14135|-14125|-14123|-14122|-14112|-14109|-14099|-14097|-14094|-14092|-14090|-14087|-14083|-13917|-13914|-13910|-13907|-13906|-13905|-13896|-13894|-13878|-13870|-13859|-13847|-13831|-13658|-13611|-13601|-13406|-13404|-13400|-13398|-13395|-13391|-13387|-13383|-13367|-13359|-13356|-13343|-13340|-13329|-13326|-13318|-13147|-13138|-13120|-13107|-13096|-13095|-13091|-13076|-13068|-13063|-13060|-12888|-12875|-12871|-12860|-12858|-12852|-12849|-12838|-12831|-12829|-12812|-12802|-12607|-12597|-12594|-12585|-12556|-12359|-12346|-12320|-12300|-12120|-12099|-12089|-12074|-12067|-12058|-12039|-11867|-11861|-11847|-11831|-11798|-11781|-11604|-11589|-11536|-11358|-11340|-11339|-11324|-11303|-11097|-11077|-11067|-11055|-11052|-11045|-11041|-11038|-11024|-11020|-11019|-11018|-11014|-10838|-10832|-10815|-10800|-10790|-10780|-10764|-10587|-10544|-10533|-10519|-10331|-10329|-10328|-10322|-10315|-10309|-10307|-10296|-10281|-10274|-10270|-10262|-10260|-10256|-10254&#39;;
        $key = explode(&#39;|&#39;, $k);
        $val = explode(&#39;|&#39;, $v);
        $py_arr = array_combine($key, $val);
        arsort($py_arr);
 
        return $py_arr;
    }
}
 
/*
var_dump(0xE0);
for($i=0; $i<=255; $i++) {
    var_dump("$i :". chr($i));
}
*/
 
var_dump(pinyin::utf8_to(&#39;朱镕基&#39;));
var_dump(pinyin::utf8_to(&#39;我是中国人&#39;));
var_dump(pinyin::utf8_to(&#39;PHP汉字转拼音类&#39;));
var_dump(pinyin::utf8_to(&#39;GB2312标准共收录6763个汉字,不在范围内的汉字是无法转换,如:中国前总理朱镕基的“镕”字。&#39;));
var_dump(pinyin::utf8_to(&#39;`1234567890-=QWERTYUIOP[]ASDFGHJKL;ZXCVBNM,./abcdefghijklmnopqrstuvwxyz&#39;));
 
var_dump(pinyin::utf8_to(&#39;朱镕基&#39;, 1));
var_dump(pinyin::utf8_to(&#39;我是中国人&#39;, 1));
var_dump(pinyin::utf8_to(&#39;PHP汉字转拼音类&#39;, 1));
var_dump(pinyin::utf8_to(&#39;GB2312标准共收录6763个汉字,不在范围内的汉字是无法转换,如:中国前总理朱镕基的“镕”字。&#39;, 1));
var_dump(pinyin::utf8_to(&#39;`1234567890-=QWERTYUIOP[]ASDFGHJKL;ZXCVBNM,./abcdefghijklmnopqrstuvwxyz&#39;, 1));
 
var_dump(pinyin::to_first(&#39;朱镕基&#39;));
var_dump(pinyin::to_first(&#39;我是中国人&#39;));
var_dump(pinyin::to_first(&#39;PHP汉字转拼音类&#39;));
var_dump(pinyin::to_first(&#39;GB2312标准共收录6763个汉字,不在范围内的汉字是无法转换,如:中国前总理朱镕基的“镕”字。&#39;));
var_dump(pinyin::to_first(&#39;▂▃▄▅▆▇█▉`1234567890-=QWERTYUIOP[]ASDFGHJKL;ZXCVBNM,./abcdefghijklmnopqrstuvwxyz&#39;));
 
echo number_format(microtime(1) - $start_time, 6);
 
?>

                       

5. [文件] uniku.zip 

最全的PHP汉字转拼音函数(共25961字,包含20902个基本汉字+5059生僻字)uniku.zip

                           

           

6. [文件] TableTextServiceSimplifiedQuanPin.zip 

最全的PHP汉字转拼音函数(共25961字,包含20902个基本汉字+5059生僻字)TableTextServiceSimplifiedQuanPin.zip

  


       

Stellungnahme:
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn