re1.5.h 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. // Copyright 2007-2009 Russ Cox. All Rights Reserved.
  2. // Copyright 2014 Paul Sokolovsky.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. #ifndef _RE1_5_REGEXP__H
  6. #define _RE1_5_REGEXP__H
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <stdarg.h>
  11. #include <assert.h>
  12. #define nil ((void*)0)
  13. #define nelem(x) (sizeof(x)/sizeof((x)[0]))
  14. typedef struct Regexp Regexp;
  15. typedef struct Prog Prog;
  16. typedef struct ByteProg ByteProg;
  17. typedef struct Inst Inst;
  18. typedef struct Subject Subject;
  19. struct Regexp
  20. {
  21. int type;
  22. int n;
  23. int ch;
  24. Regexp *left;
  25. Regexp *right;
  26. };
  27. enum /* Regexp.type */
  28. {
  29. Alt = 1,
  30. Cat,
  31. Lit,
  32. Dot,
  33. Paren,
  34. Quest,
  35. Star,
  36. Plus,
  37. };
  38. Regexp *parse(char*);
  39. Regexp *reg(int type, Regexp *left, Regexp *right);
  40. void printre(Regexp*);
  41. #ifndef re1_5_fatal
  42. void re1_5_fatal(char*);
  43. #endif
  44. #ifndef re1_5_stack_chk
  45. #define re1_5_stack_chk()
  46. #endif
  47. void *mal(int);
  48. struct Prog
  49. {
  50. Inst *start;
  51. int len;
  52. };
  53. struct ByteProg
  54. {
  55. int bytelen;
  56. int len;
  57. int sub;
  58. char insts[0];
  59. };
  60. struct Inst
  61. {
  62. int opcode;
  63. int c;
  64. int n;
  65. Inst *x;
  66. Inst *y;
  67. int gen; // global state, oooh!
  68. };
  69. enum /* Inst.opcode */
  70. {
  71. // Instructions which consume input bytes (and thus fail if none left)
  72. CONSUMERS = 1,
  73. Char = CONSUMERS,
  74. Any,
  75. Class,
  76. ClassNot,
  77. NamedClass,
  78. ASSERTS = 0x50,
  79. Bol = ASSERTS,
  80. Eol,
  81. // Instructions which take relative offset as arg
  82. JUMPS = 0x60,
  83. Jmp = JUMPS,
  84. Split,
  85. RSplit,
  86. // Other (special) instructions
  87. Save = 0x7e,
  88. Match = 0x7f,
  89. };
  90. #define inst_is_consumer(inst) ((inst) < ASSERTS)
  91. #define inst_is_jump(inst) ((inst) & 0x70 == JUMPS)
  92. Prog *compile(Regexp*);
  93. void printprog(Prog*);
  94. extern int gen;
  95. enum {
  96. MAXSUB = 20
  97. };
  98. typedef struct Sub Sub;
  99. struct Sub
  100. {
  101. int ref;
  102. int nsub;
  103. const char *sub[MAXSUB];
  104. };
  105. Sub *newsub(int n);
  106. Sub *incref(Sub*);
  107. Sub *copy(Sub*);
  108. Sub *update(Sub*, int, const char*);
  109. void decref(Sub*);
  110. struct Subject {
  111. const char *begin;
  112. const char *end;
  113. };
  114. #define NON_ANCHORED_PREFIX 5
  115. #define HANDLE_ANCHORED(bytecode, is_anchored) ((is_anchored) ? (bytecode) + NON_ANCHORED_PREFIX : (bytecode))
  116. int re1_5_backtrack(ByteProg*, Subject*, const char**, int, int);
  117. int re1_5_pikevm(ByteProg*, Subject*, const char**, int, int);
  118. int re1_5_recursiveloopprog(ByteProg*, Subject*, const char**, int, int);
  119. int re1_5_recursiveprog(ByteProg*, Subject*, const char**, int, int);
  120. int re1_5_thompsonvm(ByteProg*, Subject*, const char**, int, int);
  121. int re1_5_sizecode(const char *re);
  122. int re1_5_compilecode(ByteProg *prog, const char *re);
  123. void re1_5_dumpcode(ByteProg *prog);
  124. void cleanmarks(ByteProg *prog);
  125. int _re1_5_classmatch(const char *pc, const char *sp);
  126. int _re1_5_namedclassmatch(const char *pc, const char *sp);
  127. #endif /*_RE1_5_REGEXP__H*/