stress_aes.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. # Stress test for threads using AES encryption routines.
  2. #
  3. # AES was chosen because it is integer based and inplace so doesn't use the
  4. # heap. It is therefore a good test of raw performance and correctness of the
  5. # VM/runtime. It can be used to measure threading performance (concurrency is
  6. # in principle possible) and correctness (it's non trivial for the encryption/
  7. # decryption to give the correct answer).
  8. #
  9. # The AES code comes first (code originates from a C version authored by D.P.George)
  10. # and then the test harness at the bottom. It can be tuned to be more/less
  11. # aggressive by changing the amount of data to encrypt, the number of loops and
  12. # the number of threads.
  13. #
  14. # MIT license; Copyright (c) 2016 Damien P. George on behalf of Pycom Ltd
  15. ##################################################################
  16. # discrete arithmetic routines, mostly from a precomputed table
  17. # non-linear, invertible, substitution box
  18. aes_s_box_table = bytes((
  19. 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
  20. 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
  21. 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15,
  22. 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75,
  23. 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84,
  24. 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf,
  25. 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8,
  26. 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2,
  27. 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73,
  28. 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb,
  29. 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79,
  30. 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08,
  31. 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a,
  32. 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e,
  33. 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf,
  34. 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16,
  35. ))
  36. # multiplication of polynomials modulo x^8 + x^4 + x^3 + x + 1 = 0x11b
  37. def aes_gf8_mul_2(x):
  38. if x & 0x80:
  39. return (x << 1) ^ 0x11b
  40. else:
  41. return x << 1
  42. def aes_gf8_mul_3(x):
  43. return x ^ aes_gf8_mul_2(x)
  44. # non-linear, invertible, substitution box
  45. def aes_s_box(a):
  46. return aes_s_box_table[a & 0xff]
  47. # return 0x02^(a-1) in GF(2^8)
  48. def aes_r_con(a):
  49. ans = 1
  50. while a > 1:
  51. ans <<= 1;
  52. if ans & 0x100:
  53. ans ^= 0x11b
  54. a -= 1
  55. return ans
  56. ##################################################################
  57. # basic AES algorithm; see FIPS-197
  58. #
  59. # Think of it as a pseudo random number generator, with each
  60. # symbol in the sequence being a 16 byte block (the state). The
  61. # key is a parameter of the algorithm and tells which particular
  62. # sequence of random symbols you want. The initial vector, IV,
  63. # sets the start of the sequence. The idea of a strong cipher
  64. # is that it's very difficult to guess the key even if you know
  65. # a large part of the sequence. The basic AES algorithm simply
  66. # provides such a sequence. En/de-cryption is implemented here
  67. # using OCB, where the sequence is xored against the plaintext.
  68. # Care must be taken to (almost) always choose a different IV.
  69. # all inputs must be size 16
  70. def aes_add_round_key(state, w):
  71. for i in range(16):
  72. state[i] ^= w[i]
  73. # combined sub_bytes, shift_rows, mix_columns, add_round_key
  74. # all inputs must be size 16
  75. def aes_sb_sr_mc_ark(state, w, w_idx, temp):
  76. temp_idx = 0
  77. for i in range(4):
  78. x0 = aes_s_box_table[state[i * 4]]
  79. x1 = aes_s_box_table[state[1 + ((i + 1) & 3) * 4]]
  80. x2 = aes_s_box_table[state[2 + ((i + 2) & 3) * 4]]
  81. x3 = aes_s_box_table[state[3 + ((i + 3) & 3) * 4]]
  82. temp[temp_idx] = aes_gf8_mul_2(x0) ^ aes_gf8_mul_3(x1) ^ x2 ^ x3 ^ w[w_idx]
  83. temp[temp_idx + 1] = x0 ^ aes_gf8_mul_2(x1) ^ aes_gf8_mul_3(x2) ^ x3 ^ w[w_idx + 1]
  84. temp[temp_idx + 2] = x0 ^ x1 ^ aes_gf8_mul_2(x2) ^ aes_gf8_mul_3(x3) ^ w[w_idx + 2]
  85. temp[temp_idx + 3] = aes_gf8_mul_3(x0) ^ x1 ^ x2 ^ aes_gf8_mul_2(x3) ^ w[w_idx + 3]
  86. w_idx += 4
  87. temp_idx += 4
  88. for i in range(16):
  89. state[i] = temp[i]
  90. # combined sub_bytes, shift_rows, add_round_key
  91. # all inputs must be size 16
  92. def aes_sb_sr_ark(state, w, w_idx, temp):
  93. temp_idx = 0
  94. for i in range(4):
  95. x0 = aes_s_box_table[state[i * 4]]
  96. x1 = aes_s_box_table[state[1 + ((i + 1) & 3) * 4]]
  97. x2 = aes_s_box_table[state[2 + ((i + 2) & 3) * 4]]
  98. x3 = aes_s_box_table[state[3 + ((i + 3) & 3) * 4]]
  99. temp[temp_idx] = x0 ^ w[w_idx]
  100. temp[temp_idx + 1] = x1 ^ w[w_idx + 1]
  101. temp[temp_idx + 2] = x2 ^ w[w_idx + 2]
  102. temp[temp_idx + 3] = x3 ^ w[w_idx + 3]
  103. w_idx += 4
  104. temp_idx += 4
  105. for i in range(16):
  106. state[i] = temp[i]
  107. # take state as input and change it to the next state in the sequence
  108. # state and temp have size 16, w has size 16 * (Nr + 1), Nr >= 1
  109. def aes_state(state, w, temp, nr):
  110. aes_add_round_key(state, w)
  111. w_idx = 16
  112. for i in range(nr - 1):
  113. aes_sb_sr_mc_ark(state, w, w_idx, temp)
  114. w_idx += 16
  115. aes_sb_sr_ark(state, w, w_idx, temp)
  116. # expand 'key' to 'w' for use with aes_state
  117. # key has size 4 * Nk, w has size 16 * (Nr + 1), temp has size 16
  118. def aes_key_expansion(key, w, temp, nk, nr):
  119. for i in range(4 * nk):
  120. w[i] = key[i]
  121. w_idx = 4 * nk - 4
  122. for i in range(nk, 4 * (nr + 1)):
  123. t = temp
  124. t_idx = 0
  125. if i % nk == 0:
  126. t[0] = aes_s_box(w[w_idx + 1]) ^ aes_r_con(i // nk)
  127. for j in range(1, 4):
  128. t[j] = aes_s_box(w[w_idx + (j + 1) % 4])
  129. elif nk > 6 and i % nk == 4:
  130. for j in range(0, 4):
  131. t[j] = aes_s_box(w[w_idx + j])
  132. else:
  133. t = w
  134. t_idx = w_idx
  135. w_idx += 4
  136. for j in range(4):
  137. w[w_idx + j] = w[w_idx + j - 4 * nk] ^ t[t_idx + j]
  138. ##################################################################
  139. # simple use of AES algorithm, using output feedback (OFB) mode
  140. class AES:
  141. def __init__(self, keysize):
  142. if keysize == 128:
  143. self.nk = 4
  144. self.nr = 10
  145. elif keysize == 192:
  146. self.nk = 6
  147. self.nr = 12
  148. else:
  149. assert keysize == 256
  150. self.nk = 8
  151. self.nr = 14
  152. self.state = bytearray(16)
  153. self.w = bytearray(16 * (self.nr + 1))
  154. self.temp = bytearray(16)
  155. self.state_pos = 16
  156. def set_key(self, key):
  157. aes_key_expansion(key, self.w, self.temp, self.nk, self.nr)
  158. self.state_pos = 16
  159. def set_iv(self, iv):
  160. for i in range(16):
  161. self.state[i] = iv[i]
  162. self.state_pos = 16;
  163. def get_some_state(self, n_needed):
  164. if self.state_pos >= 16:
  165. aes_state(self.state, self.w, self.temp, self.nr)
  166. self.state_pos = 0
  167. n = 16 - self.state_pos
  168. if n > n_needed:
  169. n = n_needed
  170. return n
  171. def apply_to(self, data):
  172. idx = 0
  173. n = len(data)
  174. while n > 0:
  175. ln = self.get_some_state(n)
  176. n -= ln
  177. for i in range(ln):
  178. data[idx + i] ^= self.state[self.state_pos + i]
  179. idx += ln
  180. self.state_pos += n
  181. ##################################################################
  182. # test code
  183. try:
  184. import utime as time
  185. except ImportError:
  186. import time
  187. import _thread
  188. class LockedCounter:
  189. def __init__(self):
  190. self.lock = _thread.allocate_lock()
  191. self.value = 0
  192. def add(self, val):
  193. self.lock.acquire()
  194. self.value += val
  195. self.lock.release()
  196. count = LockedCounter()
  197. def thread_entry():
  198. global count
  199. aes = AES(256)
  200. key = bytearray(256 // 8)
  201. iv = bytearray(16)
  202. data = bytearray(128)
  203. # from now on we don't use the heap
  204. for loop in range(5):
  205. # encrypt
  206. aes.set_key(key)
  207. aes.set_iv(iv)
  208. for i in range(8):
  209. aes.apply_to(data)
  210. # decrypt
  211. aes.set_key(key)
  212. aes.set_iv(iv)
  213. for i in range(8):
  214. aes.apply_to(data)
  215. # verify
  216. for i in range(len(data)):
  217. assert data[i] == 0
  218. count.add(1)
  219. if __name__ == '__main__':
  220. n_thread = 20
  221. for i in range(n_thread):
  222. _thread.start_new_thread(thread_entry, ())
  223. while count.value < n_thread:
  224. time.sleep(1)