In a recent project, PHP was used as the development language to provide data for the front end. In order to be able to sort by letters, it was necessary to extract the Chinese Pinyin. I used this project to write a script to convert Chinese characters to Pinyin. The script is relatively simple and the comments are relatively simple. I won’t go into details here, let’s go directly to the code.
Usage:
- $py = new PinYin();
- $all_py = $py->get_all_py("武国伟"); //Output ['wu','guo' ,'wei'], output the string and call the join method, join('',$all_py)
- $first_py = $py->get_first_py($all_py);//Output wgw
- $first_letter = $py->get_first_letter ($all_py);//Output w
Copy code
Source code:
- /**
- * +-------------------------------------------------- -------
- * PHP convert Chinese characters to Pinyin
- * +---------------------------------- --------------------
- * Usage:
- * $py = new PinYin();
- * $all_py = $py->get_all_py("武国伟" ); //Output ['wu','guo','wei'], output the string and call the join method, join('',$all_py)
- * $first_py = $py->get_first_py($all_py); //Output wgw
- * $first_letter = $py->get_first_letter($all_py);//Output w
- *
- * +--------------------- ----------------------------------
- */
- class PinYin
- {
-
- private $dict_list = array(
- 'a' => -20319, 'ai' => -20317, 'an' => -20304, 'ang' => -20295, 'ao' => -20292,
- 'ba' => -20283, 'bai' => -20265, 'ban' => -20257, 'bang' => -20242, 'bao' => -20230, 'bei' => -20051, 'ben' => -20036, 'beng' => -20032, 'bi' => -20026, 'bian' => -20002, 'biao' => -19990, 'bie' => -19986, 'bin' => -19982, 'bing' => -19976, 'bo' => -19805, 'bu' => -19784,
- 'ca' => -19775, 'cai' => -19774, 'can' => -19763, 'cang' => -19756, 'cao' => -19751, 'ce' => -19746, 'ceng' => -19741, 'cha' => -19739, 'chai' => -19728, 'chan' => -19725, 'chang' => -19715, 'chao' => -19540, 'che' => -19531, 'chen' => -19525, 'cheng' => -19515, 'chi' => -19500, 'chong' => -19484, 'chou' => -19479, 'chu' => -19467, 'chuai' => -19289, 'chuan' => -19288, 'chuang' => -19281, 'chui' => -19275, 'chun' => -19270, 'chuo' => -19263, 'ci' => -19261, 'cong' => -19249, 'cou' => -19243, 'cu' => -19242, 'cuan' => -19238, 'cui' => -19235, 'cun' => -19227, 'cuo' => -19224,
- 'da' => -19218, 'dai' => -19212, 'dan' => -19038, 'dang' => -19023, 'dao' => -19018, 'de' => -19006, 'deng' => -19003, 'di' => -18996, 'dian' => -18977, 'diao' => -18961, 'die' => -18952, 'ding' => -18783, 'diu' => -18774, 'dong' => -18773, 'dou' => -18763, 'du' => -18756, 'duan' => -18741, 'dui' => -18735, 'dun' => -18731, 'duo' => -18722,
- 'e' => -18710, 'en' => -18697, 'er' => -18696,
- 'fa' => -18526, 'fan' => -18518, 'fang' => -18501, 'fei' => -18490, 'fen' => -18478, 'feng' => -18463, 'fo' => -18448, 'fou' => -18447, 'fu' => -18446,
- 'ga' => -18239, 'gai' => -18237, 'gan' => -18231, 'gang' => -18220, 'gao' => -18211, 'ge' => -18201, 'gei' => -18184, 'gen' => -18183, 'geng' => -18181, 'gong' => -18012, 'gou' => -17997, 'gu' => -17988, 'gua' => -17970, 'guai' => -17964, 'guan' => -17961, 'guang' => -17950, 'gui' => -17947,
- 'gun' => -17931, 'guo' => -17928,
- 'ha' => -17922, 'hai' => -17759, 'han' => -17752, 'hang' => -17733, 'hao' => -17730, 'he' => -17721, 'hei' => -17703, 'hen' => -17701, 'heng' => -17697, 'hong' => -17692, 'hou' => -17683, 'hu' => -17676, 'hua' => -17496, 'huai' => -17487, 'huan' => -17482, 'huang' => -17468, 'hui' => -17454,
- 'hun' => -17433, 'huo' => -17427,
- 'ji' => -17417, 'jia' => -17202, 'jian' => -17185, 'jiang' => -16983, 'jiao' => -16970, 'jie' => -16942, 'jin' => -16915, 'jing' => -16733, 'jiong' => -16708, 'jiu' => -16706, 'ju' => -16689, 'juan' => -16664, 'jue' => -16657, 'jun' => -16647,
- 'ka' => -16474, 'kai' => -16470, 'kan' => -16465, 'kang' => -16459, 'kao' => -16452, 'ke' => -16448, 'ken' => -16433, 'keng' => -16429, 'kong' => -16427, 'kou' => -16423, 'ku' => -16419, 'kua' => -16412, 'kuai' => -16407, 'kuan' => -16403, 'kuang' => -16401, 'kui' => -16393, 'kun' => -16220, 'kuo' => -16216,
- 'la' => -16212, 'lai' => -16205, 'lan' => -16202, 'lang' => -16187, 'lao' => -16180, 'le' => -16171, 'lei' => -16169, 'leng' => -16158, 'li' => -16155, 'lia' => -15959, 'lian' => -15958, 'liang' => -15944, 'liao' => -15933, 'lie' => -15920, 'lin' => -15915, 'ling' => -15903, 'liu' => -15889,
- 'long' => -15878, 'lou' => -15707, 'lu' => -15701, 'lv' => -15681, 'luan' => -15667, 'lue' => -15661, 'lun' => -15659, 'luo' => -15652,
- 'ma' => -15640, 'mai' => -15631, 'man' => -15625, 'mang' => -15454, 'mao' => -15448, 'me' => -15436, 'mei' => -15435, 'men' => -15419, 'meng' => -15416, 'mi' => -15408, 'mian' => -15394, 'miao' => -15385, 'mie' => -15377, 'min' => -15375, 'ming' => -15369, 'miu' => -15363, 'mo' => -15362, 'mou' => -15183, 'mu' => -15180,
- 'na' => -15165, 'nai' => -15158, 'nan' => -15153, 'nang' => -15150, 'nao' => -15149, 'ne' => -15144, 'nei' => -15143, 'nen' => -15141, 'neng' => -15140, 'ni' => -15139, 'nian' => -15128, 'niang' => -15121, 'niao' => -15119, 'nie' => -15117, 'nin' => -15110, 'ning' => -15109, 'niu' => -14941,
- 'nong' => -14937, 'nu' => -14933, 'nv' => -14930, 'nuan' => -14929, 'nue' => -14928, 'nuo' => -14926,
- 'o' => -14922, 'ou' => -14921,
- 'pa' => -14914, 'pai' => -14908, 'pan' => -14902, 'pang' => -14894, 'pao' => -14889, 'pei' => -14882, 'pen' => -14873, 'peng' => -14871, 'pi' => -14857, 'pian' => -14678, 'piao' => -14674, 'pie' => -14670, 'pin' => -14668, 'ping' => -14663, 'po' => -14654, 'pu' => -14645,
- 'qi' => -14630, 'qia' => -14594, 'qian' => -14429, 'qiang' => -14407, 'qiao' => -14399, 'qie' => -14384, 'qin' => -14379, 'qing' => -14368, 'qiong' => -14355, 'qiu' => -14353, 'qu' => -14345, 'quan' => -14170, 'que' => -14159, 'qun' => -14151,
- 'ran' => -14149, 'rang' => -14145, 'rao' => -14140, 're' => -14137, 'ren' => -14135, 'reng' => -14125, 'ri' => -14123, 'rong' => -14122, 'rou' => -14112, 'ru' => -14109, 'ruan' => -14099, 'rui' => -14097, 'run' => -14094, 'ruo' => -14092,
- 'sa' => -14090, 'sai' => -14087, 'san' => -14083, 'sang' => -13917, 'sao' => -13914, 'se' => -13910, 'sen' => -13907, 'seng' => -13906, 'sha' => -13905, 'shai' => -13896, 'shan' => -13894, 'shang' => -13878, 'shao' => -13870, 'she' => -13859, 'shen' => -13847, 'sheng' => -13831, 'shi' => -13658, 'shou' => -13611, 'shu' => -13601, 'shua' => -13406, 'shuai' => -13404, 'shuan' => -13400, 'shuang' => -13398, 'shui' => -13395, 'shun' => -13391, 'shuo' => -13387, 'si' => -13383, 'song' => -13367, 'sou' => -13359, 'su' => -13356, 'suan' => -13343, 'sui' => -13340, 'sun' => -13329, 'suo' => -13326,
- 'ta' => -13318, 'tai' => -13147, 'tan' => -13138, 'tang' => -13120, 'tao' => -13107, 'te' => -13096, 'teng' => -13095, 'ti' => -13091, 'tian' => -13076, 'tiao' => -13068, 'tie' => -13063, 'ting' => -13060, 'tong' => -12888, 'tou' => -12875, 'tu' => -12871, 'tuan' => -12860, 'tui' => -12858, 'tun' => -12852, 'tuo' => -12849,
- 'wa' => -12838, 'wai' => -12831, 'wan' => -12829, 'wang' => -12812, 'wei' => -12802, 'wen' => -12607, 'weng' => -12597, 'wo' => -12594, 'wu' => -12585,
- 'xi' => -12556, 'xia' => -12359, 'xian' => -12346, 'xiang' => -12320, 'xiao' => -12300, 'xie' => -12120, 'xin' => -12099, 'xing' => -12089, 'xiong' => -12074, 'xiu' => -12067, 'xu' => -12058, 'xuan' => -12039, 'xue' => -11867, 'xun' => -11861,
- 'ya' => -11847, 'yan' => -11831, 'yang' => -11798, 'yao' => -11781, 'ye' => -11604, 'yi' => ; -11589, 'yin' => -11536, 'ying' => -11358, 'yo' => -11340, 'yong' => -11339, 'you' => -11324, ' yu' => -11303, 'yuan' => -11097, 'yue' => -11077, 'yun' => -11067,
- 'za' => -11055, 'zai' => ; -11052, 'zan' => -11045, 'zang' => -11041, 'zao' => -11038, 'ze' => -11024, 'zei' => -11020, ' zen' => -11019, 'zeng' => -11018, 'zha' => -11014, 'zhai' => -10838, 'zhan' => -10832, 'zhang' => -10815, 'zhao' => -10800, 'zhe' => -10790, 'zhen' => -10780, 'zheng' => -10764, 'zhi' => -10587, 'zhong ' => -10544, 'zhou' => -10533, 'zhu' => -10519, 'zhua' => -10331, 'zhuai' => -10329, 'zhuan' => - 10328, 'zhuang' => -10322, 'zhui' => -10315, 'zhun' => -10309, 'zhuo' => -10307, 'zi' => -10296, 'zong' => -10281, 'zou' => -10274, 'zu' => -10270, 'zuan' => -10262,
- 'zui' => -10260, 'zun' => - 10256, 'zuo' => -10254
- );
-
-
- /**
- * Get all pinyin and return the array of pinyin, such as 'Zhang Sanfeng' ==> ['zhang','san','feng']
- * @param $chinese
- * @param string $charset
- * @return array
- */
- public function get_all_py($chinese, $charset = 'utf-8')
- {
- if ($charset != 'gb2312') $chinese = $this->_U2_Utf8_Gb($chinese);
- $py = $this->zh_to_pys($chinese);
-
- return $py;
- }
-
- /**
- * Get the first letter of Pinyin, such as ['zhang','san','feng'] ==> zsf
- * @param $all_pys
- * @return string
- * /
- public function get_first_py($all_pys)
- {
- if (count($all_pys) return '';
- }
-
- $result = [];
- foreach ($all_pys as $one) {
- if (is_null($one) || strlen($one) continue;
- }
- $result[] = substr($one, 0, 1);
- }
-
- return join(' ', $result);
- }
-
- /**
- * Get the first letter of Pinyin, such as ['zhang','san','feng'] ==> z
- * @param $all_pys
- * @return string
- */
- public function get_first_letter($all_pys)
- {
- if (count($all_pys) return '';
- }
-
- foreach ($all_pys as $one) {
- if (is_null($one) || strlen($one) continue;
- }
- return substr($one, 0, 1);
- }
-
- return '';
- }
-
- private function _U2_Utf8_Gb($_C)
- {
- $_String = '';
- if ($_C elseif ($_C $_String .= chr(0xC0 | $_C >> 6);
- $_String .= chr(0x80 | $_C & 0x3F);
- } elseif ($_C $_String . = chr(0xE0 | $_C >> 12);
- $_String .= chr(0x80 | $_C >> 6 & 0x3F);
- $_String .= chr(0x80 | $_C & 0x3F);
- } elseif ($_C $_String .= chr(0xF0 | $_C >> 18);
- $_String .= chr(0x80 | $_C >> 12 & 0x3F);
- $ _String .= chr(0x80 | $_C >> 6 & 0x3F);
- $_String .= chr(0x80 | $_C & 0x3F);
- }
- return iconv('UTF-8', 'GB2312', $ _String);
- }
-
- private function zh_to_py($num, $blank = '')
- {
- if ($num > 0 && $num return chr($num);
- } elseif ( $num -10247) {
- return $blank;
- } else {
- foreach ($this->dict_list as $py => $code) {
- if ($code > ; $num) break;
- $result = $py;
- }
- return $result;
- }
- }
-
- private function zh_to_pys($chinese)
- {
- $result = array();
- for ($i = 0 ; $i $p = ord(substr($chinese, $i, 1));
- if ($p > 160) {
- $q = ord(substr ($chinese, ++$i, 1));
- $p = $p * 256 + $q - 65536;
- }
- $result[] = $this->zh_to_py($p);
- }
- return $result;
- }
- }
Copy code
|