| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- # Test a UTF-8 encoded literal
- s = "asdf©qwer"
- for i in range(len(s)):
- print("s[%d]: %s %X"%(i, s[i], ord(s[i])))
- # Test all three forms of Unicode escape, and
- # all blocks of UTF-8 byte patterns
- s = "a\xA9\xFF\u0123\u0800\uFFEE\U0001F44C"
- for i in range(-len(s), len(s)):
- print("s[%d]: %s %X"%(i, s[i], ord(s[i])))
- print("s[:%d]: %d chars, '%s'"%(i, len(s[:i]), s[:i]))
- for j in range(i, len(s)):
- print("s[%d:%d]: %d chars, '%s'"%(i, j, len(s[i:j]), s[i:j]))
- print("s[%d:]: %d chars, '%s'"%(i, len(s[i:]), s[i:]))
- # Test UTF-8 encode and decode
- enc = s.encode()
- print(enc, enc.decode() == s)
- # printing of unicode chars using repr
- # NOTE: for some characters (eg \u10ff) we differ to CPython
- print(repr('a\uffff'))
- print(repr('a\U0001ffff'))
- # test invalid escape code
- try:
- eval('"\\U00110000"')
- except SyntaxError:
- print('SyntaxError')
- # test unicode string given to int
- try:
- int('\u0200')
- except ValueError:
- print('ValueError')
- # test invalid UTF-8 string
- try:
- str(b'ab\xa1', 'utf8')
- except UnicodeError:
- print('UnicodeError')
- try:
- str(b'ab\xf8', 'utf8')
- except UnicodeError:
- print('UnicodeError')
- try:
- str(bytearray(b'ab\xc0a'), 'utf8')
- except UnicodeError:
- print('UnicodeError')
|