Home  >  Article  >  Java  >  Example tutorial of using pinyin4j

Example tutorial of using pinyin4j

PHP中文网
PHP中文网Original
2017-06-20 16:27:021337browse
Usage of pinyin4j
pinyin4j is a powerful Chinese pinyin toolkit. It mainly obtains pinyin in various formats and requirements from Chinese. It has powerful functions. Let’s see how to use it. pinyin4j.
 

import java.util.HashSet;
import java.util.Set;

import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

public class PingYingChange {

   /**
* Get the first letter of the Chinese pinyin string, the English characters remain unchanged
*
* @param chinese Chinese character string
* @return The first letter of the Chinese pinyin
*/
   public static String cn2FirstSpell(String chinese) {
           StringBuffer pybf = new StringBuffer();
           char[] arr = chinese.toCharArray();
           HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
           defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
           defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
           for (int i = 0; i < arr.length; i++) {
                   if (arr[i] > 128) {
                           try {
                                   String[] _t = PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat);
                                   if (_t != null) {
                                           pybf.append(_t[0].charAt(0));
                                   }
                           } catch (BadHanyuPinyinOutputFormatCombination e) {
                                   e.printStackTrace();
                           }
                   } else {
                           pybf.append(arr[i]);
                   }
           }
           return pybf.toString().replaceAll("\\W", "").trim();
   }

/**
* Get the pinyin of the Chinese character string, the English characters remain unchanged
*
* @param chinese Chinese character string
* @return Chinese pinyin
*/
   public static String cn2Spell(String chinese) {
           StringBuffer pybf = new StringBuffer();
           char[] arr = chinese.toCharArray();
           HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
           defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
           defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
           for (int i = 0; i < arr.length; i++) {
                   if (arr[i] > 128) {
                           try {
                                   pybf.append(PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat)[0]);
                           } catch (BadHanyuPinyinOutputFormatCombination e) {
                                   e.printStackTrace();
                           }
                   } else {
                           pybf.append(arr[i]);
                   }
           }
           return pybf.toString();
   }
public static String cnToSpell(String chines) {

String pinyinName = "";
StringBuffer strbuf = new StringBuffer();
char[] nameChar = chines.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < nameChar.length; i++) {
char name = quanbianban(nameChar[i]);
nameChar[i] = name;
if (128 < nameChar[i]) {
try {
strbuf.append(PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat)[0].charAt(0));
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
strbuf.append(nameChar[i]);
}
}

pinyinName = strbuf.toString();

return pinyinName;
}

public static char quanbianban(char quan) {
switch (quan) {

case '0':
return '0';

case '1':
return '1';

case '2':
return '2';

case '3':
return '3';

case '4':
return '4';

case '5':
return '5';

case '6':
return '6';

case '7':
return '7';

case '8':
return '8';

case '9':
return '9';

default:
return quan;

}
}
/**
* String set conversion string (comma separated)
* @author wyh
* @param stringSet
* @return
*/
public static String makeStringByStringSet(Set stringSet){
StringBuilder str = new StringBuilder();
int i =0;
for(String s : stringSet){
if(i == stringSet.size() - 1){
str.append(s);
}else{
str.append(s + ",");
}
i++;
}
return str.toString().toLowerCase();
}

/* *
* Get the pinyin set
* @author wyh
* @param src
* @return Set
*/
public static Set getPinyin(String src){
if(src!=null && !src.trim().equalsIgnoreCase("")){
char[ ] srcChar ;
srcChar=src.toCharArray();
//Hanyu Pinyin format output class
HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat();

//Output settings, upper and lower case, phonetic symbols Methods, etc.
hanYuPinOutputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
hanYuPinOutputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
hanYuPinOutputFormat.setVCharType(HanyuPinyinVCharType.WITH_V);
String[][] temp = new String[src.length()][];
for(int i=0;ichar c = srcChar[i];
//is Chinese Or convert pinyin from a-z or A-Z (my requirement is to keep Chinese or a-z or A-Z)
if(String.valueOf(c).matches("[\\u4E00-\\u9FA5]+")){
try{
temp[i] = PinyinHelper.toHanyuPinyinStringArray(srcChar[i], hanYuPinOutputFormat);
}catch(BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else if(((int)c>=65 && (int)c<=90) || ((int)c>=97 && (int)c<=122)){
temp[i] = new String[]{String.valueOf(srcChar[i])};
}else{
temp[i] = new String[]{""};
}
}
String [] pingyinArray = Exchange(temp);
Set pinyinSet = new HashSet();
for(int i=0;ipinyinSet. add(pingyinArray[i]);
}
return pinyinSet;
}
return null;
}

/**
* Recursion
* @author wyh
* @param strJaggedArray
* @return
*/
public static String[] Exchange(String[][] strJaggedArray){
String[][] temp = DoExchange(strJaggedArray);
return temp[0];
}

/* *
* Recursive
* @author wyh
* @param strJaggedArray
* @return
*/
private static String[][] DoExchange(String[][] strJaggedArray){
int len ​​= strJaggedArray.length;
if(len >= 2){            
int len1 = strJaggedArray[0].length;
int len2 = strJaggedArray[1].length;
int newlen = len1*len2;
String[] temp = new String[newlen];
int Index = 0;
for(int i=0;ifor(int j=0;jtemp[Index] = strJaggedArray[ 0][i] + strJaggedArray[1][j];
Index ++;
}
}
String[][] newArray = new String[len-1][];
for(int i=2;inewArray[i-1] = strJaggedArray[i]; (newArray);
}else{
return strJaggedArray;
}
}

public static void main(String[] args) throws Exception {
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();

// UPPERCASE: uppercase (ZHONG)
// LOWERCASE: lowercase ( zhong)
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);

// WITHOUT_TONE: No phonetic symbols (zhong)
// WITH_TONE_NUMBER: 1-4 numbers represent British symbols (zhong4)
/ / WITH_TONE_MARK: Use phonetic symbols directly (must WITH_U_UNICODE otherwise exception) (zhòng)
format.setToneType(HanyuPinyinToneType.WITH_TONE_MARK);

// WITH_V: Use v to represent ü (nv)
// WITH_U_AND_COLON: Use "u:" to represent ü (nu:)
// WITH_U_UNICODE: Use ü (nü) directly
format.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE);
String[] pinyin = PinyinHelper.toHanyuPinyinStringArray( 'heavy', format);
System.out.println(PinyinHelper.toHanyuPinyinStringArray('heavy', format)[1]);
System.out.println(PingYingChange.cnToSpell("Zhenjiang abc")) ;
System.out.println(PingYingChange.quanbianban('o'));
String x = "囧Who said bad students will send videos of their crotch";
System.out.println( cn2FirstSpell(x));
System.out.println(cn2Spell(x));
String str = "Shan Tianfang";
System.out.println(makeStringByStringSet(getPinyin(str))) ;
System.out.println("");
}
}

##

The above is the detailed content of Example tutorial of using pinyin4j. For more information, please follow other related articles on the PHP Chinese website!

Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn