cc1 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. #!/usr/bin/env python3
  2. """
  3. This is a middle-processor for MicroPython source files. It takes the output
  4. of the C preprocessor, has the option to change it, then feeds this into the
  5. C compiler.
  6. It currently has the ability to reorder static hash tables so they are actually
  7. hashed, resulting in faster lookup times at runtime.
  8. To use, configure the Python variables below, and add the following line to the
  9. Makefile:
  10. CFLAGS += -no-integrated-cpp -B$(shell pwd)/../tools
  11. """
  12. import sys
  13. import os
  14. import re
  15. ################################################################################
  16. # these are the configuration variables
  17. # TODO somehow make them externally configurable
  18. # this is the path to the true C compiler
  19. cc1_path = '/usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/cc1'
  20. #cc1_path = '/usr/lib/gcc/arm-none-eabi/5.3.0/cc1'
  21. # this must be the same as MICROPY_QSTR_BYTES_IN_HASH
  22. bytes_in_qstr_hash = 2
  23. # this must be 1 or more (can be a decimal)
  24. # larger uses more code size but yields faster lookups
  25. table_size_mult = 1
  26. # these control output during processing
  27. print_stats = True
  28. print_debug = False
  29. # end configuration variables
  30. ################################################################################
  31. # precompile regexs
  32. re_preproc_line = re.compile(r'# [0-9]+ ')
  33. re_map_entry = re.compile(r'\{.+?\(MP_QSTR_([A-Za-z0-9_]+)\).+\},')
  34. re_mp_obj_dict_t = re.compile(r'(?P<head>(static )?const mp_obj_dict_t (?P<id>[a-z0-9_]+) = \{ \.base = \{&mp_type_dict\}, \.map = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
  35. re_mp_map_t = re.compile(r'(?P<head>(static )?const mp_map_t (?P<id>[a-z0-9_]+) = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
  36. re_mp_rom_map_elem_t = re.compile(r'static const mp_rom_map_elem_t [a-z_0-9]+\[\] = {$')
  37. # this must match the equivalent function in qstr.c
  38. def compute_hash(qstr):
  39. hash = 5381
  40. for char in qstr:
  41. hash = (hash * 33) ^ ord(char)
  42. # Make sure that valid hash is never zero, zero means "hash not computed"
  43. return (hash & ((1 << (8 * bytes_in_qstr_hash)) - 1)) or 1
  44. # this algo must match the equivalent in map.c
  45. def hash_insert(map, key, value):
  46. hash = compute_hash(key)
  47. pos = hash % len(map)
  48. start_pos = pos
  49. if print_debug:
  50. print(' insert %s: start at %u/%u -- ' % (key, pos, len(map)), end='')
  51. while True:
  52. if map[pos] is None:
  53. # found empty slot, so key is not in table
  54. if print_debug:
  55. print('put at %u' % pos)
  56. map[pos] = (key, value)
  57. return
  58. else:
  59. # not yet found, keep searching
  60. if map[pos][0] == key:
  61. raise AssertionError("duplicate key '%s'" % (key,))
  62. pos = (pos + 1) % len(map)
  63. assert pos != start_pos
  64. def hash_find(map, key):
  65. hash = compute_hash(key)
  66. pos = hash % len(map)
  67. start_pos = pos
  68. attempts = 0
  69. while True:
  70. attempts += 1
  71. if map[pos] is None:
  72. return attempts, None
  73. elif map[pos][0] == key:
  74. return attempts, map[pos][1]
  75. else:
  76. pos = (pos + 1) % len(map)
  77. if pos == start_pos:
  78. return attempts, None
  79. def process_map_table(file, line, output):
  80. output.append(line)
  81. # consume all lines that are entries of the table and concat them
  82. # (we do it this way because there can be multiple entries on one line)
  83. table_contents = []
  84. while True:
  85. line = file.readline()
  86. if len(line) == 0:
  87. print('unexpected end of input')
  88. sys.exit(1)
  89. line = line.strip()
  90. if len(line) == 0:
  91. # empty line
  92. continue
  93. if re_preproc_line.match(line):
  94. # preprocessor line number comment
  95. continue
  96. if line == '};':
  97. # end of table (we assume it appears on a single line)
  98. break
  99. table_contents.append(line)
  100. # make combined string of entries
  101. entries_str = ''.join(table_contents)
  102. # split into individual entries
  103. entries = []
  104. while entries_str:
  105. # look for single entry, by matching nested braces
  106. match = None
  107. if entries_str[0] == '{':
  108. nested_braces = 0
  109. for i in range(len(entries_str)):
  110. if entries_str[i] == '{':
  111. nested_braces += 1
  112. elif entries_str[i] == '}':
  113. nested_braces -= 1
  114. if nested_braces == 0:
  115. match = re_map_entry.match(entries_str[:i + 2])
  116. break
  117. if not match:
  118. print('unknown line in table:', entries_str)
  119. sys.exit(1)
  120. # extract single entry
  121. line = match.group(0)
  122. qstr = match.group(1)
  123. entries_str = entries_str[len(line):].lstrip()
  124. # add the qstr and the whole line to list of all entries
  125. entries.append((qstr, line))
  126. # sort entries so hash table construction is deterministic
  127. entries.sort()
  128. # create hash table
  129. map = [None] * int(len(entries) * table_size_mult)
  130. for qstr, line in entries:
  131. # We assume that qstr does not have any escape sequences in it.
  132. # This is reasonably safe, since keys in a module or class dict
  133. # should be standard identifiers.
  134. # TODO verify this and raise an error if escape sequence found
  135. hash_insert(map, qstr, line)
  136. # compute statistics
  137. total_attempts = 0
  138. for qstr, _ in entries:
  139. attempts, line = hash_find(map, qstr)
  140. assert line is not None
  141. if print_debug:
  142. print(' %s lookup took %u attempts' % (qstr, attempts))
  143. total_attempts += attempts
  144. if len(entries):
  145. stats = len(map), len(entries) / len(map), total_attempts / len(entries)
  146. else:
  147. stats = 0, 0, 0
  148. if print_debug:
  149. print(' table stats: size=%d, load=%.2f, avg_lookups=%.1f' % stats)
  150. # output hash table
  151. for row in map:
  152. if row is None:
  153. output.append('{ 0, 0 },\n')
  154. else:
  155. output.append(row[1] + '\n')
  156. output.append('};\n')
  157. # skip to next non-blank line
  158. while True:
  159. line = file.readline()
  160. if len(line) == 0:
  161. print('unexpected end of input')
  162. sys.exit(1)
  163. line = line.strip()
  164. if len(line) == 0:
  165. continue
  166. break
  167. # transform the is_ordered param from 1 to 0
  168. match = re_mp_obj_dict_t.match(line)
  169. if match is None:
  170. match = re_mp_map_t.match(line)
  171. if match is None:
  172. print('expecting mp_obj_dict_t or mp_map_t definition')
  173. print(output[0])
  174. print(line)
  175. sys.exit(1)
  176. line = match.group('head') + '0' + match.group('tail') + '\n'
  177. output.append(line)
  178. return (match.group('id'),) + stats
  179. def process_file(filename):
  180. output = []
  181. file_changed = False
  182. with open(filename, 'rt') as f:
  183. while True:
  184. line = f.readline()
  185. if not line:
  186. break
  187. if re_mp_rom_map_elem_t.match(line):
  188. file_changed = True
  189. stats = process_map_table(f, line, output)
  190. if print_stats:
  191. print(' [%s: size=%d, load=%.2f, avg_lookups=%.1f]' % stats)
  192. else:
  193. output.append(line)
  194. if file_changed:
  195. if print_debug:
  196. print(' modifying static maps in', output[0].strip())
  197. with open(filename, 'wt') as f:
  198. for line in output:
  199. f.write(line)
  200. def main():
  201. # run actual C compiler
  202. # need to quote args that have special characters in them
  203. def quote(s):
  204. if s.find('<') != -1 or s.find('>') != -1:
  205. return "'" + s + "'"
  206. else:
  207. return s
  208. ret = os.system(cc1_path + ' ' + ' '.join(quote(s) for s in sys.argv[1:]))
  209. if ret != 0:
  210. ret = (ret & 0x7f) or 127 # make it in range 0-127, but non-zero
  211. sys.exit(ret)
  212. if sys.argv[1] == '-E':
  213. # CPP has been run, now do our processing stage
  214. for i, arg in enumerate(sys.argv):
  215. if arg == '-o':
  216. return process_file(sys.argv[i + 1])
  217. print('%s: could not find "-o" option' % (sys.argv[0],))
  218. sys.exit(1)
  219. elif sys.argv[1] == '-fpreprocessed':
  220. # compiler has been run, nothing more to do
  221. return
  222. else:
  223. # unknown processing stage
  224. print('%s: unknown first option "%s"' % (sys.argv[0], sys.argv[1]))
  225. sys.exit(1)
  226. if __name__ == '__main__':
  227. main()