fork download
  1. #include "regex.h"
  2.  
  3. #define connect_e(src_node, dst_node, i) \
  4.   { \
  5.   src_node->edges[i] = dst_node; \
  6.   src_node->noname[i] = true; \
  7.   }
  8.  
  9. #define connect(src_node, dst_node, c, i) \
  10.   { \
  11.   connect_e(src_node, dst_node, i); \
  12.   src_node->labels[(int)c] = i; \
  13.   src_node->noname[i] = false; \
  14.   }
  15.  
  16. #define check_char(c) (isprint(c) && !iscntrl(c))
  17.  
  18. typedef struct _ReturnValue {
  19. NFA head;
  20. NFA tail;
  21. int str_index;
  22. } ReturnValue;
  23.  
  24. static NFA new_node(void);
  25. static ReturnValue new_connect_node(NFA head, char c);
  26. static ReturnValue compile_class(const char *str, int begin);
  27. static ReturnValue nfa_compile(const char *str, int begin);
  28. static void nfa_to_graphviz(NFA head, const char *dot_file, const char *image_file);
  29.  
  30. static int count_id = 1;
  31.  
  32. static NFA new_node(void)
  33. /* ノードを確保し値を初期化して返す
  34.   labelsの使わない添字には-1を入れておく */
  35. {
  36. NFA node = malloc(sizeof(struct _NFA));
  37. node->edges[0] = node->edges[1] = NULL;
  38. node->noname[0] = node->noname[1] = false;
  39. memset(node->labels, -1, LABELS_SIZE * sizeof(node->labels[0]));
  40. node->id = count_id++;
  41. return node;
  42. }
  43.  
  44. static ReturnValue new_connect_node(NFA head, char c)
  45. /* head --[c]--> newnode */
  46. {
  47. NFA tail = new_node();
  48. connect(head, tail, c, 0);
  49. ReturnValue rv = {.head = head, .tail = tail};
  50. return rv;
  51. }
  52.  
  53. static NFA reverse_labels(NFA node)
  54. {
  55. for (int i = 0; i < LABELS_SIZE; i++) {
  56. node->labels[i] = (node->labels[i] == -1 ? 0 : -1);
  57. if (!check_char(i)) {
  58. node->labels[i] = -1;
  59. }
  60. }
  61. return node;
  62. }
  63.  
  64. static ReturnValue compile_class(const char *str, int begin)
  65. {
  66. NFA head = new_node();
  67. NFA tail = new_node();
  68.  
  69. bool reverse_flag = false;
  70.  
  71. if (str[begin] == '^') {
  72. begin++;
  73. reverse_flag = true;
  74. }
  75.  
  76. for (int i = begin; str[i]; i++) {
  77. switch (str[i]) {
  78. case ']':
  79. {
  80. if (reverse_flag) {
  81. head = reverse_labels(head);
  82. }
  83. ReturnValue rv = {.head = head, .tail = tail, .str_index = i};
  84. return rv;
  85. }
  86. case '-':
  87. {
  88. char c1 = str[i-1];
  89. char c2 = str[i+1];
  90. for (; c1 <= c2; c1++)
  91. connect(head, tail, c1, 0);
  92. i++;
  93. break;
  94. }
  95. default:
  96. if (str[i+1] != '-') {
  97. connect(head, tail, str[i], 0);
  98. }
  99. break;
  100. }
  101. }
  102.  
  103. fputs("クラスの終わりがない\n", stderr);
  104. exit(EXIT_FAILURE);
  105. }
  106.  
  107. static ReturnValue compile_or(NFA head_node, NFA tail_node, const char *str, int i)
  108. {
  109. NFA head = new_node(), tail = new_node();
  110. connect_e(head, head_node, 0);
  111. connect_e(tail_node, tail, 0);
  112. ReturnValue rv = nfa_compile(str, i + 1);
  113. connect_e(head, rv.head, 1);
  114. connect_e(rv.tail, tail, 0);
  115.  
  116. ReturnValue rv2 = {.head = head, .tail = tail, .str_index = rv.str_index};
  117. return rv2;
  118. }
  119.  
  120. static ReturnValue compile_repeat(NFA head_node, NFA tail_node, NFA cur_node)
  121. {
  122. NFA node, head = new_node(), tail = new_node();
  123. bool flag = false;
  124.  
  125. if (head_node == cur_node) {
  126. flag = true;
  127. node = new_node();
  128. } else {
  129. for (node = head_node; node->edges[0] != cur_node; node = node->edges[0]);
  130. }
  131.  
  132. connect_e(head, cur_node, 0);
  133. connect_e(head, tail, 1);
  134. connect_e(tail_node, tail, 0);
  135. connect_e(tail_node, cur_node, 1);
  136. node->edges[0] = head;
  137.  
  138. if (flag) head_node = head;
  139. ReturnValue rv = {.head = head_node, .tail = tail};
  140. return rv;
  141. }
  142.  
  143. static ReturnValue nfa_compile(const char *str, int begin)
  144. {
  145. NFA head_node = new_node();
  146. NFA tail_node = head_node;
  147. NFA cur_node = head_node;
  148.  
  149. int i;
  150. for (i = begin; str[i]; i++) {
  151. switch (str[i]) {
  152. case '[':
  153. {
  154. ReturnValue rv = compile_class(str, i + 1);
  155. connect_e(tail_node, rv.head, 0);
  156. cur_node = rv.head;
  157. tail_node = rv.tail;
  158. i = rv.str_index;
  159. break;
  160. }
  161. case '(':
  162. {
  163. ReturnValue rv = nfa_compile(str, i + 1);
  164. connect_e(tail_node, rv.head, 0);
  165. cur_node = rv.head;
  166. tail_node = rv.tail;
  167. i = rv.str_index;
  168. break;
  169. }
  170. case ')':
  171. {
  172. ReturnValue rv = {.head = head_node, .tail = tail_node, .str_index = i};
  173. return rv;
  174. }
  175. case '|':
  176. {
  177. ReturnValue rv = compile_or(head_node, tail_node, str, i);
  178. head_node = rv.head;
  179. tail_node = rv.tail;
  180. cur_node = rv.head;
  181. i = rv.str_index;
  182. break;
  183. }
  184. case '*':
  185. {
  186. ReturnValue rv = compile_repeat(head_node, tail_node, cur_node);
  187. head_node = rv.head;
  188. tail_node = rv.tail;
  189. break;
  190. }
  191. default:
  192. {
  193. ReturnValue rv = new_connect_node(tail_node, str[i]);
  194. cur_node = rv.head;
  195. tail_node = rv.tail;
  196. break;
  197. }
  198. }
  199. }
  200.  
  201. ReturnValue rv = {.head = head_node, .tail = tail_node, .str_index = i - 1};
  202. return rv;
  203. }
  204.  
  205. static void nfa_to_graphviz(NFA head, const char *dot_file, const char *image_file)
  206. {
  207. FILE *fp = fopen(dot_file, "w");
  208. if (fp == NULL) {
  209. fputs("ファイルを読み込めない\n", stderr);
  210. exit(EXIT_FAILURE);
  211. }
  212.  
  213. int id_array[100000];
  214. memset(id_array, 0, sizeof(id_array));
  215.  
  216. bool do_node(NFA node, int index)
  217. {
  218. if (node->edges[index]) {
  219. if (node->noname[index]) {
  220. fprintf(fp, "%d -> %d [label = \"%s\"];\n",
  221. node->id, node->edges[index]->id, "ε");
  222. } else {
  223. for (int i = 0; i < LABELS_SIZE; i++) {
  224. if ((node->labels[i] != -1) && (index == node->labels[i])) {
  225. if (i == '"' || i == '\\') {
  226. fprintf(fp, "%d -> %d [label = \"\\%c\"];\n",
  227. node->id, node->edges[index]->id, i);
  228. } else {
  229. fprintf(fp, "%d -> %d [label = \"%c\"];\n",
  230. node->id, node->edges[index]->id, i);
  231. }
  232. }
  233. }
  234. }
  235. return true;
  236. }
  237. return false;
  238. }
  239.  
  240. void recur(NFA node)
  241. {
  242. if (id_array[node->id]) {
  243. return;
  244. } else {
  245. id_array[node->id] = 1;
  246. }
  247.  
  248. if (do_node(node, 0)) recur(node->edges[0]);
  249. if (do_node(node, 1)) recur(node->edges[1]);
  250. }
  251.  
  252. fputs("digraph test {\n", fp);
  253. fputs("graph [rankdir = LR];\n", fp);
  254. recur(head);
  255. fputs("}\n", fp);
  256.  
  257. fclose(fp);
  258.  
  259. if (image_file != NULL) {
  260. char buf[512];
  261. sprintf(buf, "dot -Tgif %s -o %s", dot_file, image_file);
  262. system(buf);
  263. }
  264. }
  265.  
  266. NFA regex_compile(const char *regstr)
  267. {
  268. ReturnValue rv = nfa_compile(regstr, 0);
  269. return rv.head;
  270. }
  271.  
  272.  
  273. static char g_word[256];
  274. static int g_word_index;
  275.  
  276. #define branch(edge, str) \
  277.   { \
  278.   char *p = regex_recur(edge, str); \
  279.   if (p) return p; \
  280.   }
  281.  
  282. static char* regex_recur(NFA node, const char *str)
  283. {
  284. if (node->edges[0] == NULL && node->edges[1] == NULL) {
  285. if (g_word_index == 0) return NULL;
  286. g_word[g_word_index] = '\0';
  287. return g_word;
  288. }
  289.  
  290. for (int i = 0; i <= 1; i++) {
  291. if (node->edges[i] && node->noname[i] && node->id > node->edges[i]->id) {
  292. branch(node->edges[i], str);
  293. }
  294. }
  295.  
  296. for (int i = 0; i <= 1; i++) {
  297. if (node->edges[i] && node->noname[i]) {
  298. branch(node->edges[i], str);
  299. }
  300. }
  301.  
  302. int index = node->labels[(int)*str];
  303. if (index != -1) {
  304. g_word[g_word_index++] = *str;
  305. branch(node->edges[index], str + 1);
  306. }
  307.  
  308. return NULL;
  309. }
  310.  
  311. char* regex_search(NFA node, const char *str)
  312. {
  313. for (int i = 0; str[i]; i++) {
  314. g_word_index = 0;
  315. branch(node, str + i);
  316. }
  317. return NULL;
  318. }
  319.  
  320. int main(int argc, char *argv[])
  321. {
  322. if (argc == 1) return 1;
  323.  
  324. NFA head = regex_compile(argv[1]);
  325. nfa_to_graphviz(head, "test.dot", "test.gif");
  326.  
  327. char line[256];
  328. while (fgets(line, sizeof(line), stdin)) {
  329. char *p = regex_search(head, line);
  330. puts(p);
  331. }
  332. }
  333.  
Not running #stdin #stdout 0s 0KB
stdin
Standard input is empty
stdout
Standard output is empty