lexer.h 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. /*
  2. * This file is part of the MicroPython project, http://micropython.org/
  3. *
  4. * The MIT License (MIT)
  5. *
  6. * Copyright (c) 2013, 2014 Damien P. George
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #ifndef MICROPY_INCLUDED_PY_LEXER_H
  27. #define MICROPY_INCLUDED_PY_LEXER_H
  28. #include <stdint.h>
  29. #include "py/mpconfig.h"
  30. #include "py/qstr.h"
  31. #include "py/reader.h"
  32. /* lexer.h -- simple tokeniser for MicroPython
  33. *
  34. * Uses (byte) length instead of null termination.
  35. * Tokens are the same - UTF-8 with (byte) length.
  36. */
  37. typedef enum _mp_token_kind_t {
  38. MP_TOKEN_END,
  39. MP_TOKEN_INVALID,
  40. MP_TOKEN_DEDENT_MISMATCH,
  41. MP_TOKEN_LONELY_STRING_OPEN,
  42. MP_TOKEN_NEWLINE,
  43. MP_TOKEN_INDENT,
  44. MP_TOKEN_DEDENT,
  45. MP_TOKEN_NAME,
  46. MP_TOKEN_INTEGER,
  47. MP_TOKEN_FLOAT_OR_IMAG,
  48. MP_TOKEN_STRING,
  49. MP_TOKEN_BYTES,
  50. MP_TOKEN_ELLIPSIS,
  51. MP_TOKEN_KW_FALSE,
  52. MP_TOKEN_KW_NONE,
  53. MP_TOKEN_KW_TRUE,
  54. MP_TOKEN_KW___DEBUG__,
  55. MP_TOKEN_KW_AND,
  56. MP_TOKEN_KW_AS,
  57. MP_TOKEN_KW_ASSERT,
  58. #if MICROPY_PY_ASYNC_AWAIT
  59. MP_TOKEN_KW_ASYNC,
  60. MP_TOKEN_KW_AWAIT,
  61. #endif
  62. MP_TOKEN_KW_BREAK,
  63. MP_TOKEN_KW_CLASS,
  64. MP_TOKEN_KW_CONTINUE,
  65. MP_TOKEN_KW_DEF,
  66. MP_TOKEN_KW_DEL,
  67. MP_TOKEN_KW_ELIF,
  68. MP_TOKEN_KW_ELSE,
  69. MP_TOKEN_KW_EXCEPT,
  70. MP_TOKEN_KW_FINALLY,
  71. MP_TOKEN_KW_FOR,
  72. MP_TOKEN_KW_FROM,
  73. MP_TOKEN_KW_GLOBAL,
  74. MP_TOKEN_KW_IF,
  75. MP_TOKEN_KW_IMPORT,
  76. MP_TOKEN_KW_IN,
  77. MP_TOKEN_KW_IS,
  78. MP_TOKEN_KW_LAMBDA,
  79. MP_TOKEN_KW_NONLOCAL,
  80. MP_TOKEN_KW_NOT,
  81. MP_TOKEN_KW_OR,
  82. MP_TOKEN_KW_PASS,
  83. MP_TOKEN_KW_RAISE,
  84. MP_TOKEN_KW_RETURN,
  85. MP_TOKEN_KW_TRY,
  86. MP_TOKEN_KW_WHILE,
  87. MP_TOKEN_KW_WITH,
  88. MP_TOKEN_KW_YIELD,
  89. MP_TOKEN_OP_PLUS,
  90. MP_TOKEN_OP_MINUS,
  91. MP_TOKEN_OP_STAR,
  92. MP_TOKEN_OP_DBL_STAR,
  93. MP_TOKEN_OP_SLASH,
  94. MP_TOKEN_OP_DBL_SLASH,
  95. MP_TOKEN_OP_PERCENT,
  96. MP_TOKEN_OP_LESS,
  97. MP_TOKEN_OP_DBL_LESS,
  98. MP_TOKEN_OP_MORE,
  99. MP_TOKEN_OP_DBL_MORE,
  100. MP_TOKEN_OP_AMPERSAND,
  101. MP_TOKEN_OP_PIPE,
  102. MP_TOKEN_OP_CARET,
  103. MP_TOKEN_OP_TILDE,
  104. MP_TOKEN_OP_LESS_EQUAL,
  105. MP_TOKEN_OP_MORE_EQUAL,
  106. MP_TOKEN_OP_DBL_EQUAL,
  107. MP_TOKEN_OP_NOT_EQUAL,
  108. MP_TOKEN_DEL_PAREN_OPEN,
  109. MP_TOKEN_DEL_PAREN_CLOSE,
  110. MP_TOKEN_DEL_BRACKET_OPEN,
  111. MP_TOKEN_DEL_BRACKET_CLOSE,
  112. MP_TOKEN_DEL_BRACE_OPEN,
  113. MP_TOKEN_DEL_BRACE_CLOSE,
  114. MP_TOKEN_DEL_COMMA,
  115. MP_TOKEN_DEL_COLON,
  116. MP_TOKEN_DEL_PERIOD,
  117. MP_TOKEN_DEL_SEMICOLON,
  118. MP_TOKEN_DEL_AT,
  119. MP_TOKEN_DEL_EQUAL,
  120. MP_TOKEN_DEL_PLUS_EQUAL,
  121. MP_TOKEN_DEL_MINUS_EQUAL,
  122. MP_TOKEN_DEL_STAR_EQUAL,
  123. MP_TOKEN_DEL_SLASH_EQUAL,
  124. MP_TOKEN_DEL_DBL_SLASH_EQUAL,
  125. MP_TOKEN_DEL_PERCENT_EQUAL,
  126. MP_TOKEN_DEL_AMPERSAND_EQUAL,
  127. MP_TOKEN_DEL_PIPE_EQUAL,
  128. MP_TOKEN_DEL_CARET_EQUAL,
  129. MP_TOKEN_DEL_DBL_MORE_EQUAL,
  130. MP_TOKEN_DEL_DBL_LESS_EQUAL,
  131. MP_TOKEN_DEL_DBL_STAR_EQUAL,
  132. MP_TOKEN_DEL_MINUS_MORE,
  133. } mp_token_kind_t;
  134. // this data structure is exposed for efficiency
  135. // public members are: source_name, tok_line, tok_column, tok_kind, vstr
  136. typedef struct _mp_lexer_t {
  137. qstr source_name; // name of source
  138. mp_reader_t reader; // stream source
  139. unichar chr0, chr1, chr2; // current cached characters from source
  140. size_t line; // current source line
  141. size_t column; // current source column
  142. mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit
  143. mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines
  144. size_t alloc_indent_level;
  145. size_t num_indent_level;
  146. uint16_t *indent_level;
  147. size_t tok_line; // token source line
  148. size_t tok_column; // token source column
  149. mp_token_kind_t tok_kind; // token kind
  150. vstr_t vstr; // token data
  151. } mp_lexer_t;
  152. mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader);
  153. mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, size_t len, size_t free_len);
  154. void mp_lexer_free(mp_lexer_t *lex);
  155. void mp_lexer_to_next(mp_lexer_t *lex);
  156. /******************************************************************/
  157. // platform specific import function; must be implemented for a specific port
  158. // TODO tidy up, rename, or put elsewhere
  159. //mp_lexer_t *mp_import_open_file(qstr mod_name);
  160. typedef enum {
  161. MP_IMPORT_STAT_NO_EXIST,
  162. MP_IMPORT_STAT_DIR,
  163. MP_IMPORT_STAT_FILE,
  164. } mp_import_stat_t;
  165. mp_import_stat_t mp_import_stat(const char *path);
  166. mp_lexer_t *mp_lexer_new_from_file(const char *filename);
  167. #if MICROPY_HELPER_LEXER_UNIX
  168. mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd);
  169. #endif
  170. #endif // MICROPY_INCLUDED_PY_LEXER_H