| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- #!/usr/bin/env python3
- """
- This is a middle-processor for MicroPython source files. It takes the output
- of the C preprocessor, has the option to change it, then feeds this into the
- C compiler.
- It currently has the ability to reorder static hash tables so they are actually
- hashed, resulting in faster lookup times at runtime.
- To use, configure the Python variables below, and add the following line to the
- Makefile:
- CFLAGS += -no-integrated-cpp -B$(shell pwd)/../tools
- """
- import sys
- import os
- import re
- ################################################################################
- # these are the configuration variables
- # TODO somehow make them externally configurable
- # this is the path to the true C compiler
- cc1_path = '/usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/cc1'
- #cc1_path = '/usr/lib/gcc/arm-none-eabi/5.3.0/cc1'
- # this must be the same as MICROPY_QSTR_BYTES_IN_HASH
- bytes_in_qstr_hash = 2
- # this must be 1 or more (can be a decimal)
- # larger uses more code size but yields faster lookups
- table_size_mult = 1
- # these control output during processing
- print_stats = True
- print_debug = False
- # end configuration variables
- ################################################################################
- # precompile regexs
- re_preproc_line = re.compile(r'# [0-9]+ ')
- re_map_entry = re.compile(r'\{.+?\(MP_QSTR_([A-Za-z0-9_]+)\).+\},')
- re_mp_obj_dict_t = re.compile(r'(?P<head>(static )?const mp_obj_dict_t (?P<id>[a-z0-9_]+) = \{ \.base = \{&mp_type_dict\}, \.map = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
- re_mp_map_t = re.compile(r'(?P<head>(static )?const mp_map_t (?P<id>[a-z0-9_]+) = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
- re_mp_rom_map_elem_t = re.compile(r'static const mp_rom_map_elem_t [a-z_0-9]+\[\] = {$')
- # this must match the equivalent function in qstr.c
- def compute_hash(qstr):
- hash = 5381
- for char in qstr:
- hash = (hash * 33) ^ ord(char)
- # Make sure that valid hash is never zero, zero means "hash not computed"
- return (hash & ((1 << (8 * bytes_in_qstr_hash)) - 1)) or 1
- # this algo must match the equivalent in map.c
- def hash_insert(map, key, value):
- hash = compute_hash(key)
- pos = hash % len(map)
- start_pos = pos
- if print_debug:
- print(' insert %s: start at %u/%u -- ' % (key, pos, len(map)), end='')
- while True:
- if map[pos] is None:
- # found empty slot, so key is not in table
- if print_debug:
- print('put at %u' % pos)
- map[pos] = (key, value)
- return
- else:
- # not yet found, keep searching
- if map[pos][0] == key:
- raise AssertionError("duplicate key '%s'" % (key,))
- pos = (pos + 1) % len(map)
- assert pos != start_pos
- def hash_find(map, key):
- hash = compute_hash(key)
- pos = hash % len(map)
- start_pos = pos
- attempts = 0
- while True:
- attempts += 1
- if map[pos] is None:
- return attempts, None
- elif map[pos][0] == key:
- return attempts, map[pos][1]
- else:
- pos = (pos + 1) % len(map)
- if pos == start_pos:
- return attempts, None
- def process_map_table(file, line, output):
- output.append(line)
- # consume all lines that are entries of the table and concat them
- # (we do it this way because there can be multiple entries on one line)
- table_contents = []
- while True:
- line = file.readline()
- if len(line) == 0:
- print('unexpected end of input')
- sys.exit(1)
- line = line.strip()
- if len(line) == 0:
- # empty line
- continue
- if re_preproc_line.match(line):
- # preprocessor line number comment
- continue
- if line == '};':
- # end of table (we assume it appears on a single line)
- break
- table_contents.append(line)
- # make combined string of entries
- entries_str = ''.join(table_contents)
- # split into individual entries
- entries = []
- while entries_str:
- # look for single entry, by matching nested braces
- match = None
- if entries_str[0] == '{':
- nested_braces = 0
- for i in range(len(entries_str)):
- if entries_str[i] == '{':
- nested_braces += 1
- elif entries_str[i] == '}':
- nested_braces -= 1
- if nested_braces == 0:
- match = re_map_entry.match(entries_str[:i + 2])
- break
- if not match:
- print('unknown line in table:', entries_str)
- sys.exit(1)
- # extract single entry
- line = match.group(0)
- qstr = match.group(1)
- entries_str = entries_str[len(line):].lstrip()
- # add the qstr and the whole line to list of all entries
- entries.append((qstr, line))
- # sort entries so hash table construction is deterministic
- entries.sort()
- # create hash table
- map = [None] * int(len(entries) * table_size_mult)
- for qstr, line in entries:
- # We assume that qstr does not have any escape sequences in it.
- # This is reasonably safe, since keys in a module or class dict
- # should be standard identifiers.
- # TODO verify this and raise an error if escape sequence found
- hash_insert(map, qstr, line)
- # compute statistics
- total_attempts = 0
- for qstr, _ in entries:
- attempts, line = hash_find(map, qstr)
- assert line is not None
- if print_debug:
- print(' %s lookup took %u attempts' % (qstr, attempts))
- total_attempts += attempts
- if len(entries):
- stats = len(map), len(entries) / len(map), total_attempts / len(entries)
- else:
- stats = 0, 0, 0
- if print_debug:
- print(' table stats: size=%d, load=%.2f, avg_lookups=%.1f' % stats)
- # output hash table
- for row in map:
- if row is None:
- output.append('{ 0, 0 },\n')
- else:
- output.append(row[1] + '\n')
- output.append('};\n')
- # skip to next non-blank line
- while True:
- line = file.readline()
- if len(line) == 0:
- print('unexpected end of input')
- sys.exit(1)
- line = line.strip()
- if len(line) == 0:
- continue
- break
- # transform the is_ordered param from 1 to 0
- match = re_mp_obj_dict_t.match(line)
- if match is None:
- match = re_mp_map_t.match(line)
- if match is None:
- print('expecting mp_obj_dict_t or mp_map_t definition')
- print(output[0])
- print(line)
- sys.exit(1)
- line = match.group('head') + '0' + match.group('tail') + '\n'
- output.append(line)
- return (match.group('id'),) + stats
- def process_file(filename):
- output = []
- file_changed = False
- with open(filename, 'rt') as f:
- while True:
- line = f.readline()
- if not line:
- break
- if re_mp_rom_map_elem_t.match(line):
- file_changed = True
- stats = process_map_table(f, line, output)
- if print_stats:
- print(' [%s: size=%d, load=%.2f, avg_lookups=%.1f]' % stats)
- else:
- output.append(line)
- if file_changed:
- if print_debug:
- print(' modifying static maps in', output[0].strip())
- with open(filename, 'wt') as f:
- for line in output:
- f.write(line)
- def main():
- # run actual C compiler
- # need to quote args that have special characters in them
- def quote(s):
- if s.find('<') != -1 or s.find('>') != -1:
- return "'" + s + "'"
- else:
- return s
- ret = os.system(cc1_path + ' ' + ' '.join(quote(s) for s in sys.argv[1:]))
- if ret != 0:
- ret = (ret & 0x7f) or 127 # make it in range 0-127, but non-zero
- sys.exit(ret)
- if sys.argv[1] == '-E':
- # CPP has been run, now do our processing stage
- for i, arg in enumerate(sys.argv):
- if arg == '-o':
- return process_file(sys.argv[i + 1])
- print('%s: could not find "-o" option' % (sys.argv[0],))
- sys.exit(1)
- elif sys.argv[1] == '-fpreprocessed':
- # compiler has been run, nothing more to do
- return
- else:
- # unknown processing stage
- print('%s: unknown first option "%s"' % (sys.argv[0], sys.argv[1]))
- sys.exit(1)
- if __name__ == '__main__':
- main()
|