Warning: Can only detect less than 5000 characters
Recently, I need to implement the first letter search, by the way, the code for the pinyin’s first alphase search in other languages is ready to stand, and take it directly.Not much to say, directly on the code
JavaScript, only retains the core of the core extraction pinyin, if you need to leave a mailbox or private letter
Import java.io.unsupportedEncodingexception;
/ **
*
* @Author yuki_ho
*
* /
Public class chinesechartoenutil {
Private final static int [] li_secposvalue = {1601, 1637, 1833, 2078, 2274,
2302, 2433, 2594, 2787, 3106, 3212, 3472, 3635, 3722, 3730, 3858,
4027, 4086, 4390, 4558, 4684, 4925, 5249, 5590};
Private final static string [] lc_firstletter = {“a”, “b”, “c”, “d”, “e”,
“f”, “g”, “h”, “j”, “k”, “l”, “m”, “n”, “o”, “p”, “q”, “r”, “s ”
“t”, “w”, “x”, “y”, “z”};
Be
/ **
* Acquisition of the first letter string of a given Chinese string, that is, sound mother strings
* @Param STR gives a Chinese character string
* @Return
* /
Public String getAllFirstletter (String Str) {
IF (str == null || Str.trim (). Length () == 0) {
Return “”
}
Be
String _str = “”;
For (int I = 0; i 1) // Judging is Chinese characters
{
INT Li_sectorcode = (int) Chinese.Charat (0); // Chinese character code
INT Li_PositionCode = (int) Chinese.Charat (1); // Chinese character code
Li_sectorcode = li_sectorcode – 160;
LI_POSITIONCODE = li_positioncode – 160;
INT Li_secposcode = li_sectorcode * 100 + li_positioncode; // Chinese character location code
IF (li_secposcode> 1600 && li_secposcode <5590) {
For (int i = 0; i <23; i ++) {
IF (li_secposcode> = li_secposvalue [i]
&&l_secposcode
Required package: Net.SourceForge.Pinyin4j
Import Net.SourceForge.pinyin4j.pinyinhelper;
Import net.sourceforge.pinyin4j.format.HanyupinyIncaseType;
Import net.sourceforge.pinyin4j.format.HanyupinyinOutputFormat;
Import net.sourceforge.pinyin4j.format.HanyupinyintonetyPE;
Import net.sourceforge.pinyin4j.format.Hanyupinyinvchartype;
Import net.sourceforge.pinyin4j.format.exception.badhanyupinyinoutputFormatCombination;
/ **
*
* @Author yuki_ho
* @time 2017-07-25
* /
Public class chinesechartoenutil {
/ **
* Transform Chinese in the string into pinyin, other characters unchanged
*
* @Param INPUTSTRING
* @Return
* /
Public static string getPingyin (String InputString) {
HanyupinyinoutputFormat Format = new hanyupinyinoutputFormat ();
Format.SetcaseType (HanyupinyIncaseType.LowerCase);
Format.SettonType (HanyupinyintONTYPE.without_tone);
Format.Setvchartype (Hanyupinyinvchartype.with_v);
CHAR [] INPUT = INPUTSTRING.TRIM (). TOCHARRAY ();
String Output = “”;
Try {
For (INT i = 0; I 128) {
Try {
String [] Temp = Pinyinhelper.tohanyupinyinstringArray (Arr [i], defaultformat;
IF (Temp! = NULL) {
Pybf.append (Temp [0] .Charat (0));
}
} catch (BadhanyupinyinoutputFormatcombination E) {
E.PrintStackTrace ();
}
Else {
Pybf.append (arr [i]);
}
}
Return pybf.tostring (). ReplaceAll (“\ w”, “”) .trim ();
}
/ **
* Get the pinyin of the Chinese character, the English character is constant
* @Param CHINESE Chinese character string
* @Return Chinese Pinyin
* /
Public static string getfullspell (string chinese) {
StringBuffer pybf = new stringbuffer ();
Char [] arr = Chinese.tochararray ();
HanyupinyinoutputFormat defaultformat = new hanyupinyinoutputFormat ();
DefaultFormat.SetcaseType (HanyupinyIncaseType.LowerCase);
DefaultFormat.SettonType (HanyupinyintonetyPE.without_tone);
For (INT i = 0; I 128) {
Try {
Pybf.Append (PinyinHelper.tohanyupinyinstringArray (Arr [i], defaultformat) [0]);
} catch (BadhanyupinyinoutputFormatcombination E) {
E.PrintStackTrace ();
}
Else {
Pybf.append (arr [i]);
}
}
Return pybf.tostring ();
}
Be
Public static void main (string [] args)
{
String CNSTR = “Coconut”;
System.out.println (“Coconut ->” + getPingyin (CNSTR));
String s = getFirstspell (“Coconut”);
System.out.println (“Working Coconut ->” + S);
StringBuffer SB = New StringBuffer (s);
IF (sb.length ()> 1)
{
String ss = sb.delete (1, sb.length ()). TOSTRING ();
System.out.println (“Working Coconut ->”
+ Character.touppercase (SS.TOCHARRAY () [0]) + “”
}
}
}
The first:
Direct code (there is a different Chinese unrecognizable):
The second type:
#! / usr / bin / env python
# – * – CODING: UTF-8 – * –
Def multi_get_letter:
IF isinstance (STR_INPUT, Unicode):
Unicode_str = STR_INPUT
Else:
TRY:
Unicode_Str = STR_INPUT.DECode (‘UTF8’)
Except:
TRY:
Unicode_str = str_input.decode (‘GBK’)
Except:
Print ‘Unknown Coding’
Return
Return_list = []
For One_Unicode in Unicode_Str:
Return_List.Append (Single_Get_First (One_UNICODE))
Return Return_List
DEF SINGLE_GET_FIRST (Unicode1):
Str1 = unicode1.encode (‘GBK’)
TRY:
ORD (STR1)
Return STR1
Except:
ASC = ORD (STR1 [0]) * 256 + ORD (STR1 [1]) – 65536
IF ASC> = -20319 and ASC <= -20284:
Return ‘a’
IF ASC> = -20283 and ASC <= -19776:
Return ‘b’
IF ASC> = -19775 and ASC <= -19219:
Return ‘c’
IF ASC> = -19218 and ASC <= -18711:
Return ‘d’
IF ASC> = -18710 and ASC <= -18527:
Return ‘e’
IF ASC> = -18526 and ASC <= -18240:
Return ‘f’
IF ASC> = -18239 and ASC <= -17923:
Return ‘g’
IF ASC> = -17922 and ASC <= -17418:
Return ‘h’
IF ASC> = -17417 and ASC <= -16475:
Return ‘J’
IF ASC> = -16474 and ASC <= -16213:
Return ‘K’
IF ASC> = -16212 and ASC <= -15641:
Return ‘L’
IF ASC> = -15640 and ASC <= -15166:
Return ‘m’
IF ASC> = -15165 and ASC <= -14923:
Return ‘n’
IF ASC> = -14922 and ASC <= -14915:
Return ‘o’
IF ASC> = -14914 and ASC <= -14631:
Return ‘P’
IF ASC> = -14630 and ASC <= -14150:
Return ‘Q’
IF ASC> = -14149 and ASC <= -14091:
Return ‘r’
IF ASC> = -14090 and ASC <= -13119:
Return ‘s’
IF ASC> = -13118 and ASC <= -12839:
Return ‘t’
IF ASC> = -12838 and ASC <= -12557:
Return ‘W’
IF ASC> = -12556 and ASC <= -11848:
Return ‘x’
IF ASC> = -11847 and ASC <= -11056:
Return ‘Y’
IF ASC> = -11055 and ASC <= -10247:
Return ‘Z’
Return ”
Def main (STR_INPUT):
A = MULTI_GET_LETTER (STR_INPUT)
B = ”
For i IN A:
B = B + I
Print B
IF __NAME__ == “__main__”:
Str_INPUT = ‘Tryed Coconut’
Main (STR_INPUT)
Python