makeqstrdata.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. """
  2. Process raw qstr file and output qstr data with length, hash and data bytes.
  3. This script works with Python 2.6, 2.7, 3.3 and 3.4.
  4. """
  5. from __future__ import print_function
  6. import re
  7. import sys
  8. # Python 2/3 compatibility:
  9. # - iterating through bytes is different
  10. # - codepoint2name lives in a different module
  11. import platform
  12. if platform.python_version_tuple()[0] == '2':
  13. bytes_cons = lambda val, enc=None: bytearray(val)
  14. from htmlentitydefs import codepoint2name
  15. elif platform.python_version_tuple()[0] == '3':
  16. bytes_cons = bytes
  17. from html.entities import codepoint2name
  18. # end compatibility code
  19. codepoint2name[ord('-')] = 'hyphen';
  20. # add some custom names to map characters that aren't in HTML
  21. codepoint2name[ord(' ')] = 'space'
  22. codepoint2name[ord('\'')] = 'squot'
  23. codepoint2name[ord(',')] = 'comma'
  24. codepoint2name[ord('.')] = 'dot'
  25. codepoint2name[ord(':')] = 'colon'
  26. codepoint2name[ord(';')] = 'semicolon'
  27. codepoint2name[ord('/')] = 'slash'
  28. codepoint2name[ord('%')] = 'percent'
  29. codepoint2name[ord('#')] = 'hash'
  30. codepoint2name[ord('(')] = 'paren_open'
  31. codepoint2name[ord(')')] = 'paren_close'
  32. codepoint2name[ord('[')] = 'bracket_open'
  33. codepoint2name[ord(']')] = 'bracket_close'
  34. codepoint2name[ord('{')] = 'brace_open'
  35. codepoint2name[ord('}')] = 'brace_close'
  36. codepoint2name[ord('*')] = 'star'
  37. codepoint2name[ord('!')] = 'bang'
  38. codepoint2name[ord('\\')] = 'backslash'
  39. codepoint2name[ord('+')] = 'plus'
  40. codepoint2name[ord('$')] = 'dollar'
  41. codepoint2name[ord('=')] = 'equals'
  42. codepoint2name[ord('?')] = 'question'
  43. codepoint2name[ord('@')] = 'at_sign'
  44. codepoint2name[ord('^')] = 'caret'
  45. codepoint2name[ord('|')] = 'pipe'
  46. codepoint2name[ord('~')] = 'tilde'
  47. # this must match the equivalent function in qstr.c
  48. def compute_hash(qstr, bytes_hash):
  49. hash = 5381
  50. for b in qstr:
  51. hash = (hash * 33) ^ b
  52. # Make sure that valid hash is never zero, zero means "hash not computed"
  53. return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
  54. def qstr_escape(qst):
  55. def esc_char(m):
  56. c = ord(m.group(0))
  57. try:
  58. name = codepoint2name[c]
  59. except KeyError:
  60. name = '0x%02x' % c
  61. return "_" + name + '_'
  62. return re.sub(r'[^A-Za-z0-9_]', esc_char, qst)
  63. def parse_input_headers(infiles):
  64. # read the qstrs in from the input files
  65. qcfgs = {}
  66. qstrs = {}
  67. for infile in infiles:
  68. with open(infile, 'rt') as f:
  69. for line in f:
  70. line = line.strip()
  71. # is this a config line?
  72. match = re.match(r'^QCFG\((.+), (.+)\)', line)
  73. if match:
  74. value = match.group(2)
  75. if value[0] == '(' and value[-1] == ')':
  76. # strip parenthesis from config value
  77. value = value[1:-1]
  78. qcfgs[match.group(1)] = value
  79. continue
  80. # is this a QSTR line?
  81. match = re.match(r'^Q\((.*)\)$', line)
  82. if not match:
  83. continue
  84. # get the qstr value
  85. qstr = match.group(1)
  86. # special case to specify control characters
  87. if qstr == '\\n':
  88. qstr = '\n'
  89. # work out the corresponding qstr name
  90. ident = qstr_escape(qstr)
  91. # don't add duplicates
  92. if ident in qstrs:
  93. continue
  94. # add the qstr to the list, with order number to retain original order in file
  95. order = len(qstrs)
  96. # but put special method names like __add__ at the top of list, so
  97. # that their id's fit into a byte
  98. if ident == "":
  99. # Sort empty qstr above all still
  100. order = -200000
  101. elif ident == "__dir__":
  102. # Put __dir__ after empty qstr for builtin dir() to work
  103. order = -190000
  104. elif ident.startswith("__"):
  105. order -= 100000
  106. qstrs[ident] = (order, ident, qstr)
  107. if not qcfgs:
  108. sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
  109. sys.exit(1)
  110. return qcfgs, qstrs
  111. def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
  112. qbytes = bytes_cons(qstr, 'utf8')
  113. qlen = len(qbytes)
  114. qhash = compute_hash(qbytes, cfg_bytes_hash)
  115. if all(32 <= ord(c) <= 126 and c != '\\' and c != '"' for c in qstr):
  116. # qstr is all printable ASCII so render it as-is (for easier debugging)
  117. qdata = qstr
  118. else:
  119. # qstr contains non-printable codes so render entire thing as hex pairs
  120. qdata = ''.join(('\\x%02x' % b) for b in qbytes)
  121. if qlen >= (1 << (8 * cfg_bytes_len)):
  122. print('qstr is too long:', qstr)
  123. assert False
  124. qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len))
  125. qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash))
  126. return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata)
  127. def print_qstr_data(qcfgs, qstrs):
  128. # get config variables
  129. cfg_bytes_len = int(qcfgs['BYTES_IN_LEN'])
  130. cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH'])
  131. # print out the starter of the generated C header file
  132. print('// This file was automatically generated by makeqstrdata.py')
  133. print('')
  134. # add NULL qstr with no hash or data
  135. print('QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")' % ('\\x00' * cfg_bytes_hash, '\\x00' * cfg_bytes_len))
  136. # go through each qstr and print it out
  137. for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
  138. qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
  139. print('QDEF(MP_QSTR_%s, %s)' % (ident, qbytes))
  140. def do_work(infiles):
  141. qcfgs, qstrs = parse_input_headers(infiles)
  142. print_qstr_data(qcfgs, qstrs)
  143. if __name__ == "__main__":
  144. do_work(sys.argv[1:])