Python 处理各种编码的字符串
2018-07-20 来源:open-open
# file: Unicode2.py # -*- coding: utf-8 -*- import chilkat # The CkString object can handle any character encoding. s1 = chilkat.CkString() # The appendEnc method allows us to append a string in any encoding. s1.appendEnc('èéêëabc','utf-8') # If you're working with different encodings, you may wish # to name your string variables to reflect the encoding. strAnsi = s1.getAnsi() strUtf8 = s1.getUtf8() # Prints "7" print len(strAnsi) # Prints "11" print len(strUtf8) # getNumChars returns the number of characters print 'Num Chars: ' + str(s1.getNumChars()) # utf-8 chars do not have a constant number of bytes/char. # A single utf-8 char is represented in 1 to 6 bytes. print 'utf-8: ' + str(s1.getSizeUtf8()) # ANSI is typically 1 byte per/char, but for some languages # such as Japanese, ANSI equates to a character encoding that may # not be 1 byte/char. (Shift_JIS is the ANSI encoding for Japanese) print 'ANSI: ' + str(s1.getSizeAnsi()) # Let's create an English/Japanese string. s2 = chilkat.CkString() s2.appendEnc('abc愛知県新城市の','utf-8') # We can get the string in any multibyte encoding. print 's2 num chars = ' + str(s2.getNumChars()) strShiftJIS = s2.getEnc('shift_JIS') print 'Shift-JIS num bytes = ' + str(len(strShiftJIS)) strIso2022JP = s2.getEnc('iso-2022-jp') print 'iso-2022-jp num bytes = ' + str(len(strIso2022JP)) strEucJp = s2.getEnc('euc-jp') print 'euc-jp num bytes = ' + str(len(strEucJp)) # We can save the string in any encoding s2.saveToFile('out_shift_jis.txt','shift_JIS') s2.saveToFile('out_iso_2022_jp.txt','iso-2022-jp') s2.saveToFile('out_utf8.txt','utf-8') s2.saveToFile('out_euc_jp.txt','euc-jp') # You may mix any number of languages in a utf-8 string # because utf-8 can encode characters in all languages. # (utf-8 is the multi-byte encoding of Unicode) # # An ANSI string can generally hold us-ascii + the native language. # For example, Shift_JIS can represent us-ascii characters # in addition to Japanese characters. # For example, this is OK strShiftJis = 'abc123' + s2.getEnc('shift_JIS') # This is not OK: strShiftJis2 = '?στυφ' + s2.getEnc('shift_JIS') print "Done!"
标签:
版权申明:本站文章部分自网络,如有侵权,请联系:west999com@outlook.com
特别注意:本站所有转载文章言论不代表本站观点!
本站所提供的图片等素材,版权归原作者所有,如需使用,请与原作者联系。
上一篇:python文件操作
最新资讯
热门推荐