Home  >  Article  >  Backend Development  >  PHP implementation code for converting Chinese characters to Pinyin

PHP implementation code for converting Chinese characters to Pinyin

WBOY
WBOYOriginal
2016-07-25 08:55:551318browse
  1. /***
  2. * Pinyin.php
  3. * Example: Pinyin('Zhang Honghua',1); //The second parameter can be set arbitrarily, and the encoding is utf-8. If it is empty, the default is gb encoding.
  4. * edit: bbs.it-home.org
  5. **/
  6. function Pinyin($_String, $_Code='gb2312')
  7. {
  8. $_DataKey = "a|ai|an|ang|ao| ba|bai|ban|bang|bao|bei|ben|beng|bi|bian|biao|bie|bin|bing|bo|bu|ca|cai|can|cang|cao|ce|ceng|cha".
  9. "|chai|chan|chang|chao|che|chen|cheng|chi|chong|chou|chu|chuai|chuan|chuang|chui|chun|chuo|ci|cong|cou|cu|".
  10. "cuan| cui|cun|cuo|da|dai|dan|dang|dao|de|deng|di|dian|diao|die|ding|diu|dong|dou|du|duan|dui|dun|duo|e|en| er".
  11. "|fa|fan|fang|fei|fen|feng|fo|fou|fu|ga|gai|gan|gang|gao|ge|gei|gen|geng|gong|gou|gu|gua| guai|guan|guang|gui".
  12. "|gun|guo|ha|hai|han|hang|hao|he|hei|hen|heng|hong|hou|hu|hua|huai|huan|huang|hui| hun|huo|ji|jia|jian|jiang".
  13. "|jiao|jie|jin|jing|jiong|jiu|ju|juan|jue|jun|ka|kai|kan|kang|kao|ke|ken| keng|kong|kou|ku|kua|kuai|kuan|kuang".
  14. "|kui|kun|kuo|la|lai|lan|lang|lao|le|lei|leng|li|lia|lian|liang| liao|lie|lin|ling|liu|long|lou|lu|lv|luan|lue".
  15. "|lun|luo|ma|mai|man|mang|mao|me|mei|men|meng|mi| mian|miao|mie|min|ming|miu|mo|mou|mu|na|nai|nan|nang|nao|ne".
  16. "|nei|nen|neng|ni|nian|niang|niao|nie| nin|ning|niu|nong|nu|nv|nuan|nue|nuo|o|ou|pa|pai|pan|pang|pao|pei|pen".
  17. "|peng|pi|pian|piao|pie| pin|ping|po|pu|qi|qia|qian|qiang|qiao|qie|qin|qing|qiong|qiu|qu|quan|que|qun|ran|rang".
  18. "|rao|re|ren| reng|ri|rong|rou|ru|ruan|rui|run|ruo|sa|sai|san|sang|sao|se|sen|seng|sha|shai|shan|shang|shao|".
  19. "she| shen|sheng|shi|shou|shu|shua|shuai|shuan|shuang|shui|shun|shuo|si|song|sou|su|suan|sui|sun|suo|ta|tai|".
  20. "tan| tang|tao|te|teng|ti|tian|tiao|tie|ting|tong|tou|tu|tuan|tui|tun|tuo|wa|wai|wan|wang|wei|wen|weng|wo|wu" .
  21. "|xi|xia|xian|xiang|xiao|xie|xin|xing|xiong|xiu|xu|xuan|xue|xun|ya|yan|yang|yao|ye|yi|yin|ying|yo| yong|you".
  22. "|yu|yuan|yue|yun|za|zai|zan|zang|zao|ze|zei|zen|zeng|zha|zhai|zhan|zhang|zhao|zhe|zhen|zheng| zhi|zhong|".
  23. "zhou|zhu|zhua|zhuai|zhuan|zhuang|zhui|zhun|zhuo|zi|zong|zou|zu|zuan|zui|zun|zuo";
  24. $_DataValue = "- 20319|-20317|-20304|-20295|-20292|-20283|-20265|-20257|-20242|-20230|-20051|-20036|-20032|-20026|-20002|-19990".
  25. " |-19986|-19982|-19976|-19805|-19784|-19775|-19774|-19763|-19756|-19751|-19746|-19741|-19739|-19728|-19725".
  26. "| -19715|-19540|-19531|-19525|-19515|-19500|-19484|-19479|-19467|-19289|-19288|-19281|-19275|-19270|-19263".
  27. "|- 19261|-19249|-19243|-19242|-19238|-19235|-19227|-19224|-19218|-19212|-19038|-19023|-19018|-19006|-19003".
  28. "|-18996 |-18977|-18961|-18952|-18783|-18774|-18773|-18763|-18756|-18741|-18735|-18731|-18722|-18710|-18697".
  29. "|-18696| -18526|-18518|-18501|-18490|-18478|-18463|-18448|-18447|-18446|-18239|-18237|-18231|-18220|-18211".
  30. "|-18201|- 18184|-18183|-18181|-18012|-17997|-17988|-17970|-17964|-17961|-17950|-17947|-17931|-17928|-17922".
  31. "|-17759|-17752 |-17733|-17730|-17721|-17703|-17701|-17697|-17692|-17683|-17676|-17496|-17487|-17482|-17468".
  32. "|-17454|-17433| -17427|-17417|-17202|-17185|-16983|-16970|-16942|-16915|-16733|-16708|-16706|-16689|-16664".
  33. "|-16657|-16647|- 16474|-16470|-16465|-16459|-16452|-16448|-16433|-16429|-16427|-16423|-16419|-16412|-16407".
  34. "|-16403|-16401|-16393 |-16220|-16216|-16212|-16205|-16202|-16187|-16180|-16171|-16169|-16158|-16155|-15959".
  35. "|-15958|-15944|-15933| -15920|-15915|-15903|-15889|-15878|-15707|-15701|-15681|-15667|-15661|-15659|-15652".
  36. "|-15640|-15631|-15625|- 15454|-15448|-15436|-15435|-15419|-15416|-15408|-15394|-15385|-15377|-15375|-15369".
  37. "|-15363|-15362|-15183|-15180 |-15165|-15158|-15153|-15150|-15149|-15144|-15143|-15141|-15140|-15139|-15128".
  38. "|-15121|-15119|-15117|-15110| -15109|-14941|-14937|-14933|-14930|-14929|-14928|-14926|-14922|-14921|-14914".
  39. "|-14908|-14902|-14894|-14889|- 14882|-14873|-14871|-14857|-14678|-14674|-14670|-14668|-14663|-14654|-14645".
  40. "|-14630|-14594|-14429|-14407|-14399 |-14384|-14379|-14368|-14355|-14353|-14345|-14170|-14159|-14151|-14149".
  41. "|-14145|-14140|-14137|-14135|-14125| -14123|-14122|-14112|-14109|-14099|-14097|-14094|-14092|-14090|-14087".
  42. "|-14083|-13917|-13914|-13910|-13907|- 13906|-13905|-13896|-13894|-13878|-13870|-13859|-13847|-13831|-13658".
  43. "|-13611|-13601|-13406|-13404|-13400|-13398 |-13395|-13391|-13387|-13383|-13367|-13359|-13356|-13343|-13340".
  44. "|-13329|-13326|-13318|-13147|-13138|-13120|-13107|-13096|-13095|-13091|-13076|-13068|-13063|-13060|-12888".
  45. "|-12875|-12871|-12860|-12858|-12852|-12849|-12838|-12831|-12829|-12812|-12802|-12607|-12597|-12594|-12585".
  46. "|-12556|-12359|-12346|-12320|-12300|-12120|-12099|-12089|-12074|-12067|-12058|-12039|-11867|-11861|-11847".
  47. "|-11831|-11798|-11781|-11604|-11589|-11536|-11358|-11340|-11339|-11324|-11303|-11097|-11077|-11067|-11055".
  48. "|-11052|-11045|-11041|-11038|-11024|-11020|-11019|-11018|-11014|-10838|-10832|-10815|-10800|-10790|-10780".
  49. "|-10764|-10587|-10544|-10533|-10519|-10331|-10329|-10328|-10322|-10315|-10309|-10307|-10296|-10281|-10274".
  50. "|-10270|-10262|-10260|-10256|-10254";
  51. $_TDataKey = explode('|', $_DataKey);
  52. $_TDataValue = explode('|', $_DataValue);
  53. $_Data = (PHP_VERSION>='5.0') ? array_combine($_TDataKey, $_TDataValue) : _Array_Combine($_TDataKey, $_TDataValue);
  54. arsort($_Data);
  55. reset($_Data);
  56. if($_Code != 'gb2312') $_String = _U2_Utf8_Gb($_String);
  57. $_Res = '';
  58. for($i=0; $i{
  59. $_P = ord(substr($_String, $i, 1));
  60. if($_P>160) { $_Q = ord(substr($_String, ++$i, 1)); $_P = $_P*256 + $_Q - 65536; }
  61. $_Res .= _Pinyin($_P, $_Data);
  62. }
  63. return preg_replace("/[^a-z0-9]*/", '', $_Res);
  64. }
  65. function _Pinyin($_Num, $_Data)
  66. {
  67. if ($_Num>0 && $_Num<160 ) return chr($_Num);
  68. elseif($_Num<-20319 || $_Num>-10247) return '';
  69. else {
  70. foreach($_Data as $k=>$v){ if($v<=$_Num) break; }
  71. return $k;
  72. }
  73. }
  74. function _U2_Utf8_Gb($_C)
  75. {
  76. $_String = '';
  77. if($_C < 0x80) $_String .= $_C;
  78. elseif($_C < 0x800)
  79. {
  80. $_String .= chr(0xC0 | $_C>>6);
  81. $_String .= chr(0x80 | $_C & 0x3F);
  82. }elseif($_C < 0x10000){
  83. $_String .= chr(0xE0 | $_C>>12);
  84. $_String .= chr(0x80 | $_C>>6 & 0x3F);
  85. $_String .= chr(0x80 | $_C & 0x3F);
  86. } elseif($_C < 0x200000) {
  87. $_String .= chr(0xF0 | $_C>>18);
  88. $_String .= chr(0x80 | $_C>>12 & 0x3F);
  89. $_String .= chr(0x80 | $_C>>6 & 0x3F);
  90. $_String .= chr(0x80 | $_C & 0x3F);
  91. }
  92. return iconv('UTF-8', 'GB2312', $_String);
  93. }
  94. function _Array_Combine($_Arr1, $_Arr2)
  95. {
  96. for($i=0; $ireturn $_Res;
  97. }
  98. echo Pinyin('张洪华'); //默认是gb编码
  99. echo Pinyin('张洪华',1); //第二个参数随意设置则为utf-8编码
  100. ?>
复制代码

以上就是本节提供的汉字转拼音的代码,很简单吧,期待大家在工作中用的顺手。

附,完整代码下载。



Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn