최근 프로젝트에서는 프론트엔드용 데이터를 제공하기 위해 개발언어로 PHP를 사용했는데, 글자별로 정렬이 가능하려면 중국어 병음을 추출해야 해서 이 프로젝트를 사용했습니다. 한자를 병음으로 변환하는 스크립트를 작성하려면 스크립트가 비교적 간단하고 주석도 비교적 상세하므로 여기서는 자세히 설명하지 않고 코드만 살펴보겠습니다.
사용법:
- $py = new PinYin();
- $ all_py = $py->get_all_py("武國伟"); //['wu','guo','wei'] 출력, 문자열 출력 및 조인 메서드 호출('',$all_py)
- $first_py = $py->get_first_py($all_py);//wgw 출력
- $first_letter = $py->get_first_letter($all_py);//wgw 출력
코드 복사
소스코드:
- /**
- *---------------------------------- -- --------
- * PHP는 한자를 병음으로 변환
- * ------------- -- -------------
- * 사용법:
- * $py = new PinYin();
- * $all_py = $py->get_all_py("武國伟"); //['wu','guo','wei'] 출력, 문자열 출력 및 조인 메소드 호출('',$all_py)
- * $first_py = $py->get_first_py($all_py);//wgw 출력
- * $first_letter = $py->get_first_letter($all_py);//wgw 출력
- *
- * ---- --------------------------------- ----
- */
- class PinYin
- {
-
- private $dict_list = 배열 (
- 'a' => -20319, 'ai' => -20317, 'an' => -20304, 'ang' => -20295, 'ao' => -20292,
- 'ba' => -20283, 'bai' => -20257, 'bang' => -20230 => -20051, 'ben' => -20032, 'bi' => -20002, 'biao' => , 'bie' => -19982, 'bing' => -19805, 'bu' => ca' => -19775, 'cai' => -19774, 'can' => -19756, 'cao' => -19751 -19746, 'ceng' => -19741, 'cha' => -19728, 'chan' => -19715 ' => -19540, 'che' => -19525, 'cheng' => -19500, '종' - 19484, 'chou' => -19479, 'chui' => -19289, 'chuang' => -19281 => -19275, '춘' => -19270, '츄오' => -19263, 'ci' => -19261, '콩' => -19249, '쿠' => -19243, 'cu' => -19242, '쿠안' => -19238, 'cui' => -19235, '쿤' => -19227, 'cuo' => -19224,
- '다' => -19218, '다이' => -19212, '단' => -19038, '당' => -19023, '다오' => -19018, '드' => -19006, '뎅' => -19003, '디' => -18996, '디안' => -18977, '디아오' => -18961, '죽다' => -18952, '딩' => -18783, '디우' => -18774, '동' => -18773, '두' => -18763, '두' => -18756, '두안' => -18741, 'dui' => -18735, '던' => -18731, '듀오' => -18722,
- 'e' => -18710, 'ko' => -18697, '어' => -18696,
- '파' => -18526, '팬' => -18518, '팡' => -18501, '페이' => -18490, '펜' => -18478, '펑' => -18463, 'fo' => -18448, '포우' => -18447, '푸' => -18446,
- '가' => -18239, '가이' => -18237, '간' => -18231, '깡패' => -18220, '가오' => -18211, 'ge' => -18201, '게이' => -18184, '겐' => -18183, 'geng' => -18181, '공' => -18012, '고우' => -17997, '구' => -17988, '구아' => -17970, '과이' => -17964, '구안' => -17961, '광' => -17950, 'gui' => -17947,
- '총' => -17931, '구오' => -17928,
- '하' => -17922, '하이' => -17759, '한' => -17752, '멈춤' => -17733, '하오' => -17730, '그 사람' => -17721, '안녕하세요' => -17703, '암탉' => -17701, '헹' => -17697, '홍' => -17692, '허우' => -17683, 'hu' => -17676, '화' => -17496, '화이' => -17487, '환' => -17482, '황' => -17468, '후이' => -17454,
- '훈' => -17433, '후오' => -17427,
- '지' => -17417, '지아' => -17202, '지안' => -17185, '장' => -16983, '지아오' => -16970, '지예' => -16942, '진' => -16915, '징' => -16733, '지옹' => -16708, '지우' => -16706, '주' => -16689, '주안' => -16664, '주에' => -16657, '준' => -16647,
- '카' => -16474, '카이' => -16470, '칸' => -16465, '강' => -16459, '카오' => -16452, '케' => -16448, '켄' => -16433, '켕' => -16429, '콩' => -16427, '코우' => -16423, '구' => -16419, '쿠아' => -16412, '쿠아이' => -16407, '쿠안' => -16403, '꽝' => -16401, '쿠이' => -16393, '군' => -16220, '쿠오' => -16216,
- '라' => -16212, '라이' => -16205, 'lan' => -16202, '언어' => -16187, '라오어' => -16180, '르' => -16171, '레이' => -16169, '길이' => -16158, '리' => -16155, '리아' => -15959, '리안' => -15958, '량' => -15944, '랴오' => -15933, '거짓말' => -15920, '린' => -15915, '링' => -15903, '리우' => -15889,
- '긴' => -15878, '루' => -15707, '루' => -15701, 'lv' => -15681, '루안' => -15667, '루' => -15661, '룬' => -15659, '루오' => -15652,
- '엄마' => -15640, '마이' => -15631, '남자' => -15625, '망' => -15454, '마오' => -15448, '나' => -15436, '메이' => -15435, '남자' => -15419, '멍' => -15416, '미' => -15408, '미안' => -15394, '먀오족' => -15385, '미에' => -15377, '최소' => -15375, '밍' => -15369, '미우' => -15363, '모' => -15362, '모' => -15183, '뮤' => -15180,
- '나' => -15165, '나이' => -15158, '난' => -15153, '낭' => -15150, '나오' => -15149, '네' => -15144, '네이' => -15143, '넨' => -15141, '셩' => -15140, '니' => -15139, '니안' => -15128, '냐앙' => -15121, '냐오' => -15119, '니' => -15117, '닌' => -15110, '닝' => -15109, '뉴' => -14941,
- '농' => -14937, '뉴' => -14933, 'nv' => -14930, '누안' => -14929, '누에' => -14928, '누오' => -14926,
- 'o' => -14922, 'ou' => -14921,
- '빠' => -14914, '파이' => -14908, '팬' => -14902, '팡' => -14894, '파오' => -14889, '페이' => -14882, '펜' => -14873, '펭' => -14871, '파이' => -14857, '피안' => -14678, '피아오' => -14674, '파이' => -14670, '핀' => -14668, '핑' => -14663, '포' => -14654, '푸' => -14645,
- '치' => -14630, '키아' => -14594, '치안' => -14429, '치앙' => -14407, '교' => -14399, 'qie' => -14384, '친' => -14379, '칭' => -14368, 'qiong' => -14355, '치우' => -14353, 'qu' => -14345, '콴' => -14170, 'que' => -14159, 'qun' => -14151,
- '란' => -14149, '울림' => -14145, '라오' => -14140, '다시' => -14137, '렌' => -14135, '렝' => -14125, '리' => -14123, '롱' => -14122, '루' => -14112, '루' => -14109, '루안' => -14099, '루이' => -14097, '실행' => -14094, '루오' => -14092,
- '사' => -14090, '사이' => -14087, '산' => -14083, '상' => -13917, '사오' => -13914, 'se' => -13910, '센' => -13907, '셍' => -13906, '샤' => -13905, '샤이' => -13896, '샨' => -13894, '상' => -13878, '샤오' => -13870, '그녀' => -13859, '쉔' => -13847, '성' => -13831, '시' => -13658, '수' => -13611, '슈' => -13601, '슈아' => -13406, '슈아이' => -13404, '슈안' => -13400, '슈앙' => -13398, '슈이' => -13395, '슌' => -13391, '슈오' => -13387, '시' => -13383, '노래' => -13367, 'sou' => -13359, '수' => -13356, '수안' => -13343, '수이' => -13340, '태양' => -13329, '수오' => -13326,
- '타' => -13318, '타이' => -13147, '황갈색' => -13138, '탱' => -13120, '타오' => -13107, '테' => -13096, '텡' => -13095, 'ti' => -13091, '티안' => -13076, '티아오' => -13068, '넥타이' => -13063, '팅' => -13060, '통' => -12888, '토우' => -12875, '투' => -12871, '투안' => -12860, 'tui' => -12858, 'tun' => -12852, '투오' => -12849,
- '와' => -12838, '와이' => -12831, '완' => -12829, '왕' => -12812, '웨이' => -12802, '원' => -12607, '웽' => -12597, '워' => -12594, '우' => -12585,
- 'xi' => -12556, '샤' => -12359, '시안' => -12346, '샹' => -12320, '샤오' => -12300, 'xie' => -12120, '신' => -12099, '싱' => -12089, 'xiong' => -12074, '슈' => -12067, 'xu' => -12058, '쉬안' => -12039, 'xue' => -11867, 'xun' => -11861,
- 'ya' => -11847, 'yan' => -11798, 'yao' => -11604, 'yi' => ; -11589, 'ying' => -11358, 'yong' => -11339, ' 유' => -11303, '위안' => -11077, '윤' => -11067,
- 'zai' => -11052, 'zan' => -11041, 'zao' => -11024, 'zei' => , 'zen' => -11019, 'zha' => -11014, 'zhan' => -10832 > -10815, 'zhao' => -10790, 'zhen' => -10764, 'zhi' => 'zhong' => -10544, 'zhuan' => -10519, 'zhua' => -10329 ; -10328, 'zhui' => -10315, 'zhuo' => -10307, -10296 , 'zong' => -10281, 'zou' => -10270, 'zuan' => -10262, ' zun' => -10256, 'zuo' => -10254
- );
-
-
- /**
- * 모든 병음을 가져오고 'Zhang Sanfeng'과 같은 병음 배열을 반환합니다. ==> ['zhang','san','feng']
- * @param $chinese
- * @ param string $ charset
- * @return 배열
- */
- 공용 함수 get_all_py($chinese, $charset = 'utf-8')
- {
- if ($charset != 'gb2312') $chinese = $this->_U2_Utf8_Gb($chinese);
- $py = $this->zh_to_pys ($ 중국어);
-
- return $py;
- }
-
- /**
- * 병음의 첫 글자를 가져옵니다(예: ['zhang','san','feng'] ==> zsf
- * @param $all_pys
- * @return string
- */
- 공개 함수 get_first_py($all_pys)
- {
- if ( count($all_pys) <= 0) {
- return '';
- }
- $result = [];
- foreach ($all_pys를 $one으로) {
- if (is_null($one) || strlen($one) <= 0) {
- 계속;
- }
- $result[] = substr($one, 0, 1);
- }
- return Join('', $result);
- }
- /**
- * 병음의 첫 글자 가져오기(예: ['zhang','san','feng'] ==> z
- * @param $all_pys
- * @return string
- */
- 공개 함수 get_first_letter($all_pys)
- {
- if (count($all_pys) <= 0) {
- return '';
- }
- foreach ($all_pys as $one) {
- if (is_null($ one) || strlen($one) <= 0) {
- 계속;
- }
- return substr($one, 0, 1);
- }
- return ' ';
- }
- 비공개 함수 _U2_Utf8_Gb($_C)
- {
- $_String = '';
- if ($_C < 0x80) $_String .= $_C;
- elseif ($_C < 0x800) {
- $_String .= chr(0xC0 | $_C >> 6);
- $_String .= chr(0x80 | $_C & 0x3F);
- } elseif ($_C < 0x10000) {
- $_String .= chr(0xE0 | $_C >> 12);
- $_String .= chr(0x80 | $_C >> 6 & 0x3F );
- $_String .= chr(0x80 | $_C & 0x3F);
- } elseif ($_C < 0x200000) {
- $_String .= chr(0xF0 | $_C >> ; 18 );
- $_String .= chr(0x80 | $_C >> 12 & 0x3F);
- $_String .= chr(0x80 | $_C >> 6 & 0x3F);
- $ _String .= chr(0x80 | $_C & 0x3F);
- }
- return iconv('UTF-8', 'GB2312', $_String);
- }
-
- 비공개 function zh_to_py ($num, $blank = '')
- {
- if ($num > 0 && $num < 160) {
- return chr($num);
- } elseif ( $num < -20319 || $num > -10247) {
- return $blank;
- } else {
- foreach ($this->dict_list as $py => $code)
- if ($code > $num) break;
- $result = $py;
- }
- return $result;
- }
- }
-
- 비공개 함수 zh_to_pys( $chinese)
- {
- $result = array();
- for ($i = 0; $i < strlen($chinese); $i ) {
- $p = ord (substr ($chinese, $i, 1));
- if ($p > 160) {
- $q = ord(substr($chinese, $i, 1));
- $p = $ p * 256 $q - 65536;
- }
- $result[] = $this->zh_to_py($p);
- }
- return $result;
- }
- }
-
코드 복사
|