map.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. /*
  2. * This file is part of the MicroPython project, http://micropython.org/
  3. *
  4. * The MIT License (MIT)
  5. *
  6. * Copyright (c) 2013, 2014 Damien P. George
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #include <stdint.h>
  27. #include <stdlib.h>
  28. #include <string.h>
  29. #include <assert.h>
  30. #include "py/mpconfig.h"
  31. #include "py/misc.h"
  32. #include "py/runtime.h"
  33. #if MICROPY_DEBUG_VERBOSE // print debugging info
  34. #define DEBUG_PRINT (1)
  35. #else // don't print debugging info
  36. #define DEBUG_PRINT (0)
  37. #define DEBUG_printf(...) (void)0
  38. #endif
  39. // Fixed empty map. Useful when need to call kw-receiving functions
  40. // without any keywords from C, etc.
  41. const mp_map_t mp_const_empty_map = {
  42. .all_keys_are_qstrs = 0,
  43. .is_fixed = 1,
  44. .is_ordered = 1,
  45. .used = 0,
  46. .alloc = 0,
  47. .table = NULL,
  48. };
  49. // This table of sizes is used to control the growth of hash tables.
  50. // The first set of sizes are chosen so the allocation fits exactly in a
  51. // 4-word GC block, and it's not so important for these small values to be
  52. // prime. The latter sizes are prime and increase at an increasing rate.
  53. STATIC const uint16_t hash_allocation_sizes[] = {
  54. 0, 2, 4, 6, 8, 10, 12, // +2
  55. 17, 23, 29, 37, 47, 59, 73, // *1.25
  56. 97, 127, 167, 223, 293, 389, 521, 691, 919, 1223, 1627, 2161, // *1.33
  57. 3229, 4831, 7243, 10861, 16273, 24407, 36607, 54907, // *1.5
  58. };
  59. STATIC size_t get_hash_alloc_greater_or_equal_to(size_t x) {
  60. for (size_t i = 0; i < MP_ARRAY_SIZE(hash_allocation_sizes); i++) {
  61. if (hash_allocation_sizes[i] >= x) {
  62. return hash_allocation_sizes[i];
  63. }
  64. }
  65. // ran out of primes in the table!
  66. // return something sensible, at least make it odd
  67. return (x + x / 2) | 1;
  68. }
  69. /******************************************************************************/
  70. /* map */
  71. void mp_map_init(mp_map_t *map, size_t n) {
  72. if (n == 0) {
  73. map->alloc = 0;
  74. map->table = NULL;
  75. } else {
  76. map->alloc = n;
  77. map->table = m_new0(mp_map_elem_t, map->alloc);
  78. }
  79. map->used = 0;
  80. map->all_keys_are_qstrs = 1;
  81. map->is_fixed = 0;
  82. map->is_ordered = 0;
  83. }
  84. void mp_map_init_fixed_table(mp_map_t *map, size_t n, const mp_obj_t *table) {
  85. map->alloc = n;
  86. map->used = n;
  87. map->all_keys_are_qstrs = 1;
  88. map->is_fixed = 1;
  89. map->is_ordered = 1;
  90. map->table = (mp_map_elem_t*)table;
  91. }
  92. // Differentiate from mp_map_clear() - semantics is different
  93. void mp_map_deinit(mp_map_t *map) {
  94. if (!map->is_fixed) {
  95. m_del(mp_map_elem_t, map->table, map->alloc);
  96. }
  97. map->used = map->alloc = 0;
  98. }
  99. void mp_map_clear(mp_map_t *map) {
  100. if (!map->is_fixed) {
  101. m_del(mp_map_elem_t, map->table, map->alloc);
  102. }
  103. map->alloc = 0;
  104. map->used = 0;
  105. map->all_keys_are_qstrs = 1;
  106. map->is_fixed = 0;
  107. map->table = NULL;
  108. }
  109. STATIC void mp_map_rehash(mp_map_t *map) {
  110. size_t old_alloc = map->alloc;
  111. size_t new_alloc = get_hash_alloc_greater_or_equal_to(map->alloc + 1);
  112. DEBUG_printf("mp_map_rehash(%p): " UINT_FMT " -> " UINT_FMT "\n", map, old_alloc, new_alloc);
  113. mp_map_elem_t *old_table = map->table;
  114. mp_map_elem_t *new_table = m_new0(mp_map_elem_t, new_alloc);
  115. // If we reach this point, table resizing succeeded, now we can edit the old map.
  116. map->alloc = new_alloc;
  117. map->used = 0;
  118. map->all_keys_are_qstrs = 1;
  119. map->table = new_table;
  120. for (size_t i = 0; i < old_alloc; i++) {
  121. if (old_table[i].key != MP_OBJ_NULL && old_table[i].key != MP_OBJ_SENTINEL) {
  122. mp_map_lookup(map, old_table[i].key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = old_table[i].value;
  123. }
  124. }
  125. m_del(mp_map_elem_t, old_table, old_alloc);
  126. }
  127. // MP_MAP_LOOKUP behaviour:
  128. // - returns NULL if not found, else the slot it was found in with key,value non-null
  129. // MP_MAP_LOOKUP_ADD_IF_NOT_FOUND behaviour:
  130. // - returns slot, with key non-null and value=MP_OBJ_NULL if it was added
  131. // MP_MAP_LOOKUP_REMOVE_IF_FOUND behaviour:
  132. // - returns NULL if not found, else the slot if was found in with key null and value non-null
  133. mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t lookup_kind) {
  134. // If the map is a fixed array then we must only be called for a lookup
  135. assert(!map->is_fixed || lookup_kind == MP_MAP_LOOKUP);
  136. // Work out if we can compare just pointers
  137. bool compare_only_ptrs = map->all_keys_are_qstrs;
  138. if (compare_only_ptrs) {
  139. if (MP_OBJ_IS_QSTR(index)) {
  140. // Index is a qstr, so can just do ptr comparison.
  141. } else if (MP_OBJ_IS_TYPE(index, &mp_type_str)) {
  142. // Index is a non-interned string.
  143. // We can either intern the string, or force a full equality comparison.
  144. // We chose the latter, since interning costs time and potentially RAM,
  145. // and it won't necessarily benefit subsequent calls because these calls
  146. // most likely won't pass the newly-interned string.
  147. compare_only_ptrs = false;
  148. } else if (lookup_kind != MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
  149. // If we are not adding, then we can return straight away a failed
  150. // lookup because we know that the index will never be found.
  151. return NULL;
  152. }
  153. }
  154. // if the map is an ordered array then we must do a brute force linear search
  155. if (map->is_ordered) {
  156. for (mp_map_elem_t *elem = &map->table[0], *top = &map->table[map->used]; elem < top; elem++) {
  157. if (elem->key == index || (!compare_only_ptrs && mp_obj_equal(elem->key, index))) {
  158. #if MICROPY_PY_COLLECTIONS_ORDEREDDICT
  159. if (MP_UNLIKELY(lookup_kind == MP_MAP_LOOKUP_REMOVE_IF_FOUND)) {
  160. // remove the found element by moving the rest of the array down
  161. mp_obj_t value = elem->value;
  162. --map->used;
  163. memmove(elem, elem + 1, (top - elem - 1) * sizeof(*elem));
  164. // put the found element after the end so the caller can access it if needed
  165. elem = &map->table[map->used];
  166. elem->key = MP_OBJ_NULL;
  167. elem->value = value;
  168. }
  169. #endif
  170. return elem;
  171. }
  172. }
  173. #if MICROPY_PY_COLLECTIONS_ORDEREDDICT
  174. if (MP_LIKELY(lookup_kind != MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)) {
  175. return NULL;
  176. }
  177. if (map->used == map->alloc) {
  178. // TODO: Alloc policy
  179. map->alloc += 4;
  180. map->table = m_renew(mp_map_elem_t, map->table, map->used, map->alloc);
  181. mp_seq_clear(map->table, map->used, map->alloc, sizeof(*map->table));
  182. }
  183. mp_map_elem_t *elem = map->table + map->used++;
  184. elem->key = index;
  185. if (!MP_OBJ_IS_QSTR(index)) {
  186. map->all_keys_are_qstrs = 0;
  187. }
  188. return elem;
  189. #else
  190. return NULL;
  191. #endif
  192. }
  193. // map is a hash table (not an ordered array), so do a hash lookup
  194. if (map->alloc == 0) {
  195. if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
  196. mp_map_rehash(map);
  197. } else {
  198. return NULL;
  199. }
  200. }
  201. // get hash of index, with fast path for common case of qstr
  202. mp_uint_t hash;
  203. if (MP_OBJ_IS_QSTR(index)) {
  204. hash = qstr_hash(MP_OBJ_QSTR_VALUE(index));
  205. } else {
  206. hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
  207. }
  208. size_t pos = hash % map->alloc;
  209. size_t start_pos = pos;
  210. mp_map_elem_t *avail_slot = NULL;
  211. for (;;) {
  212. mp_map_elem_t *slot = &map->table[pos];
  213. if (slot->key == MP_OBJ_NULL) {
  214. // found NULL slot, so index is not in table
  215. if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
  216. map->used += 1;
  217. if (avail_slot == NULL) {
  218. avail_slot = slot;
  219. }
  220. avail_slot->key = index;
  221. avail_slot->value = MP_OBJ_NULL;
  222. if (!MP_OBJ_IS_QSTR(index)) {
  223. map->all_keys_are_qstrs = 0;
  224. }
  225. return avail_slot;
  226. } else {
  227. return NULL;
  228. }
  229. } else if (slot->key == MP_OBJ_SENTINEL) {
  230. // found deleted slot, remember for later
  231. if (avail_slot == NULL) {
  232. avail_slot = slot;
  233. }
  234. } else if (slot->key == index || (!compare_only_ptrs && mp_obj_equal(slot->key, index))) {
  235. // found index
  236. // Note: CPython does not replace the index; try x={True:'true'};x[1]='one';x
  237. if (lookup_kind == MP_MAP_LOOKUP_REMOVE_IF_FOUND) {
  238. // delete element in this slot
  239. map->used--;
  240. if (map->table[(pos + 1) % map->alloc].key == MP_OBJ_NULL) {
  241. // optimisation if next slot is empty
  242. slot->key = MP_OBJ_NULL;
  243. } else {
  244. slot->key = MP_OBJ_SENTINEL;
  245. }
  246. // keep slot->value so that caller can access it if needed
  247. }
  248. return slot;
  249. }
  250. // not yet found, keep searching in this table
  251. pos = (pos + 1) % map->alloc;
  252. if (pos == start_pos) {
  253. // search got back to starting position, so index is not in table
  254. if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
  255. if (avail_slot != NULL) {
  256. // there was an available slot, so use that
  257. map->used++;
  258. avail_slot->key = index;
  259. avail_slot->value = MP_OBJ_NULL;
  260. if (!MP_OBJ_IS_QSTR(index)) {
  261. map->all_keys_are_qstrs = 0;
  262. }
  263. return avail_slot;
  264. } else {
  265. // not enough room in table, rehash it
  266. mp_map_rehash(map);
  267. // restart the search for the new element
  268. start_pos = pos = hash % map->alloc;
  269. }
  270. } else {
  271. return NULL;
  272. }
  273. }
  274. }
  275. }
  276. /******************************************************************************/
  277. /* set */
  278. #if MICROPY_PY_BUILTINS_SET
  279. void mp_set_init(mp_set_t *set, size_t n) {
  280. set->alloc = n;
  281. set->used = 0;
  282. set->table = m_new0(mp_obj_t, set->alloc);
  283. }
  284. STATIC void mp_set_rehash(mp_set_t *set) {
  285. size_t old_alloc = set->alloc;
  286. mp_obj_t *old_table = set->table;
  287. set->alloc = get_hash_alloc_greater_or_equal_to(set->alloc + 1);
  288. set->used = 0;
  289. set->table = m_new0(mp_obj_t, set->alloc);
  290. for (size_t i = 0; i < old_alloc; i++) {
  291. if (old_table[i] != MP_OBJ_NULL && old_table[i] != MP_OBJ_SENTINEL) {
  292. mp_set_lookup(set, old_table[i], MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
  293. }
  294. }
  295. m_del(mp_obj_t, old_table, old_alloc);
  296. }
  297. mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind) {
  298. // Note: lookup_kind can be MP_MAP_LOOKUP_ADD_IF_NOT_FOUND_OR_REMOVE_IF_FOUND which
  299. // is handled by using bitwise operations.
  300. if (set->alloc == 0) {
  301. if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
  302. mp_set_rehash(set);
  303. } else {
  304. return MP_OBJ_NULL;
  305. }
  306. }
  307. mp_uint_t hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
  308. size_t pos = hash % set->alloc;
  309. size_t start_pos = pos;
  310. mp_obj_t *avail_slot = NULL;
  311. for (;;) {
  312. mp_obj_t elem = set->table[pos];
  313. if (elem == MP_OBJ_NULL) {
  314. // found NULL slot, so index is not in table
  315. if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
  316. if (avail_slot == NULL) {
  317. avail_slot = &set->table[pos];
  318. }
  319. set->used++;
  320. *avail_slot = index;
  321. return index;
  322. } else {
  323. return MP_OBJ_NULL;
  324. }
  325. } else if (elem == MP_OBJ_SENTINEL) {
  326. // found deleted slot, remember for later
  327. if (avail_slot == NULL) {
  328. avail_slot = &set->table[pos];
  329. }
  330. } else if (mp_obj_equal(elem, index)) {
  331. // found index
  332. if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) {
  333. // delete element
  334. set->used--;
  335. if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) {
  336. // optimisation if next slot is empty
  337. set->table[pos] = MP_OBJ_NULL;
  338. } else {
  339. set->table[pos] = MP_OBJ_SENTINEL;
  340. }
  341. }
  342. return elem;
  343. }
  344. // not yet found, keep searching in this table
  345. pos = (pos + 1) % set->alloc;
  346. if (pos == start_pos) {
  347. // search got back to starting position, so index is not in table
  348. if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
  349. if (avail_slot != NULL) {
  350. // there was an available slot, so use that
  351. set->used++;
  352. *avail_slot = index;
  353. return index;
  354. } else {
  355. // not enough room in table, rehash it
  356. mp_set_rehash(set);
  357. // restart the search for the new element
  358. start_pos = pos = hash % set->alloc;
  359. }
  360. } else {
  361. return MP_OBJ_NULL;
  362. }
  363. }
  364. }
  365. }
  366. mp_obj_t mp_set_remove_first(mp_set_t *set) {
  367. for (size_t pos = 0; pos < set->alloc; pos++) {
  368. if (MP_SET_SLOT_IS_FILLED(set, pos)) {
  369. mp_obj_t elem = set->table[pos];
  370. // delete element
  371. set->used--;
  372. if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) {
  373. // optimisation if next slot is empty
  374. set->table[pos] = MP_OBJ_NULL;
  375. } else {
  376. set->table[pos] = MP_OBJ_SENTINEL;
  377. }
  378. return elem;
  379. }
  380. }
  381. return MP_OBJ_NULL;
  382. }
  383. void mp_set_clear(mp_set_t *set) {
  384. m_del(mp_obj_t, set->table, set->alloc);
  385. set->alloc = 0;
  386. set->used = 0;
  387. set->table = NULL;
  388. }
  389. #endif // MICROPY_PY_BUILTINS_SET
  390. #if defined(DEBUG_PRINT) && DEBUG_PRINT
  391. void mp_map_dump(mp_map_t *map) {
  392. for (size_t i = 0; i < map->alloc; i++) {
  393. if (map->table[i].key != NULL) {
  394. mp_obj_print(map->table[i].key, PRINT_REPR);
  395. } else {
  396. printf("(nil)");
  397. }
  398. printf(": %p\n", map->table[i].value);
  399. }
  400. printf("---\n");
  401. }
  402. #endif