unicode.py 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. # Test a UTF-8 encoded literal
  2. s = "asdf©qwer"
  3. for i in range(len(s)):
  4. print("s[%d]: %s %X"%(i, s[i], ord(s[i])))
  5. # Test all three forms of Unicode escape, and
  6. # all blocks of UTF-8 byte patterns
  7. s = "a\xA9\xFF\u0123\u0800\uFFEE\U0001F44C"
  8. for i in range(-len(s), len(s)):
  9. print("s[%d]: %s %X"%(i, s[i], ord(s[i])))
  10. print("s[:%d]: %d chars, '%s'"%(i, len(s[:i]), s[:i]))
  11. for j in range(i, len(s)):
  12. print("s[%d:%d]: %d chars, '%s'"%(i, j, len(s[i:j]), s[i:j]))
  13. print("s[%d:]: %d chars, '%s'"%(i, len(s[i:]), s[i:]))
  14. # Test UTF-8 encode and decode
  15. enc = s.encode()
  16. print(enc, enc.decode() == s)
  17. # printing of unicode chars using repr
  18. # NOTE: for some characters (eg \u10ff) we differ to CPython
  19. print(repr('a\uffff'))
  20. print(repr('a\U0001ffff'))
  21. # test invalid escape code
  22. try:
  23. eval('"\\U00110000"')
  24. except SyntaxError:
  25. print('SyntaxError')
  26. # test unicode string given to int
  27. try:
  28. int('\u0200')
  29. except ValueError:
  30. print('ValueError')
  31. # test invalid UTF-8 string
  32. try:
  33. str(b'ab\xa1', 'utf8')
  34. except UnicodeError:
  35. print('UnicodeError')
  36. try:
  37. str(b'ab\xf8', 'utf8')
  38. except UnicodeError:
  39. print('UnicodeError')
  40. try:
  41. str(bytearray(b'ab\xc0a'), 'utf8')
  42. except UnicodeError:
  43. print('UnicodeError')