fork download
  1. #include "regex.h"
  2.  
  3. #define connect_e(src_node, dst_node, i) \
  4.   { \
  5.   src_node->edges[i] = dst_node; \
  6.   src_node->noname[i] = true; \
  7.   }
  8.  
  9. #define connect(src_node, dst_node, c, i) \
  10.   { \
  11.   connect_e(src_node, dst_node, i); \
  12.   src_node->labels[(int)c] = i; \
  13.   src_node->noname[i] = false; \
  14.   }
  15.  
  16. #define return_rv(headp, tailp, index) \
  17.   { \
  18.   ReturnValue rv = {.head = headp, \
  19. .tail = tailp, \
  20. .str_index = index}; \
  21.   return rv; \
  22.   }
  23.  
  24. typedef struct _ReturnValue {
  25. NFA head;
  26. NFA tail;
  27. int str_index;
  28. } ReturnValue;
  29.  
  30. static NFA new_node(void);
  31. static ReturnValue new_connect_node(NFA head, char c);
  32. static ReturnValue compile_class(const char *str, int begin);
  33. static ReturnValue nfa_compile(const char *str, int begin);
  34. static void nfa_to_graphviz(NFA head, const char *dot_file, const char *image_file);
  35.  
  36. static int count_id = 1;
  37.  
  38. static inline bool check_char(char c)
  39. {
  40. return (isprint(c) && !iscntrl(c));
  41. }
  42.  
  43. static NFA new_node(void)
  44. /* ノードを確保し値を初期化して返す
  45.   labelsの使わない添字には-1を入れておく */
  46. {
  47. NFA node = malloc(sizeof(struct _NFA));
  48. node->edges[0] = node->edges[1] = NULL;
  49. node->noname[0] = node->noname[1] = false;
  50. memset(node->labels, -1, LABELS_SIZE * sizeof(node->labels[0]));
  51. node->id = count_id++;
  52. return node;
  53. }
  54.  
  55. static ReturnValue new_connect_node(NFA head, char c)
  56. /* head --[c]--> newnode */
  57. {
  58. NFA tail = new_node();
  59. connect(head, tail, c, 0);
  60. ReturnValue rv = {.head = head, .tail = tail};
  61. return rv;
  62. }
  63.  
  64. static NFA reverse_labels(NFA node)
  65. {
  66. for (int i = 0; i < LABELS_SIZE; i++) {
  67. node->labels[i] = (node->labels[i] == -1 ? 0 : -1);
  68. if (!check_char(i)) {
  69. node->labels[i] = -1;
  70. }
  71. }
  72. return node;
  73. }
  74.  
  75. static ReturnValue compile_class(const char *str, int begin)
  76. {
  77. NFA head = new_node();
  78. NFA tail = new_node();
  79.  
  80. bool reverse_flag = false;
  81.  
  82. if (str[begin] == '^') {
  83. begin++;
  84. reverse_flag = true;
  85. }
  86.  
  87. for (int i = begin; str[i]; i++) {
  88. switch (str[i]) {
  89. case ']':
  90. {
  91. if (reverse_flag) {
  92. head = reverse_labels(head);
  93. }
  94. ReturnValue rv = {.head = head, .tail = tail, .str_index = i};
  95. return rv;
  96. }
  97. case '-':
  98. {
  99. char c1 = str[i-1];
  100. char c2 = str[i+1];
  101.  
  102. for (; c1 <= c2; c1++) {
  103. connect(head, tail, c1, 0);
  104. }
  105.  
  106. i++;
  107. break;
  108. }
  109. default:
  110. if (str[i+1] != '-') {
  111. connect(head, tail, str[i], 0);
  112. }
  113. break;
  114. }
  115. }
  116.  
  117. fputs("クラスの終わりがない\n", stderr);
  118. exit(EXIT_FAILURE);
  119. }
  120.  
  121. static ReturnValue nfa_compile(const char *str, int begin)
  122. {
  123. NFA head_node = new_node();
  124. NFA tail_node = head_node;
  125.  
  126. int i;
  127. for (i = begin; str[i]; i++) {
  128. switch (str[i]) {
  129. case '[':
  130. {
  131. ReturnValue rv = compile_class(str, i + 1);
  132. connect_e(tail_node, rv.head, 0);
  133. tail_node = rv.tail;
  134. i = rv.str_index;
  135. break;
  136. }
  137. case '(':
  138. {
  139. ReturnValue rv = nfa_compile(str, i + 1);
  140. connect_e(tail_node, rv.head, 0);
  141. tail_node = rv.tail;
  142. i = rv.str_index;
  143. break;
  144. }
  145. case ')':
  146. {
  147. ReturnValue rv = {.head = head_node, .tail = tail_node, .str_index = i};
  148. return rv;
  149. }
  150. case '|':
  151. {
  152. NFA head = new_node(), tail = new_node();
  153. connect_e(head, head_node, 0);
  154. connect_e(tail_node, tail, 0);
  155. ReturnValue rv = nfa_compile(str, i + 1);
  156. connect_e(head, rv.head, 1);
  157. connect_e(rv.tail, tail, 0);
  158. i = rv.str_index;
  159. head_node = head, tail_node = tail;
  160. break;
  161. }
  162. case '*':
  163. {
  164. NFA head = new_node(), tail = new_node();
  165. connect_e(head, head_node, 0);
  166. connect_e(tail_node, tail, 0);
  167. connect_e(head, tail, 1);
  168. connect_e(tail_node, head_node, 1);
  169. head_node = head;
  170. tail_node = tail;
  171. break;
  172. }
  173. default:
  174. {
  175. ReturnValue rv = new_connect_node(tail_node, str[i]);
  176. tail_node = rv.tail;
  177. break;
  178. }
  179. }
  180. }
  181.  
  182. ReturnValue rv = {.head = head_node, .tail = tail_node, .str_index = i - 1};
  183. return rv;
  184. }
  185.  
  186. static void nfa_to_graphviz(NFA head, const char *dot_file, const char *image_file)
  187. {
  188. FILE *fp = fopen(dot_file, "w");
  189. if (fp == NULL) {
  190. fputs("ファイルを読み込めない\n", stderr);
  191. exit(EXIT_FAILURE);
  192. }
  193.  
  194. int id_array[100000];
  195. memset(id_array, 0, sizeof(id_array));
  196.  
  197. bool do_node(NFA node, int index)
  198. {
  199. if (node->edges[index]) {
  200. if (node->noname[index]) {
  201. fprintf(fp, "%d -> %d [label = \"%s\"];\n",
  202. node->id, node->edges[index]->id, "ε");
  203. } else {
  204. for (int i = 0; i < LABELS_SIZE; i++) {
  205. if ((node->labels[i] != -1) && (index == node->labels[i])) {
  206. if (i == '"' || i == '\\') {
  207. fprintf(fp, "%d -> %d [label = \"\\%c\"];\n",
  208. node->id, node->edges[index]->id, i);
  209. } else {
  210. fprintf(fp, "%d -> %d [label = \"%c\"];\n",
  211. node->id, node->edges[index]->id, i);
  212. }
  213. }
  214. }
  215. }
  216. return true;
  217. }
  218. return false;
  219. }
  220.  
  221. void recur(NFA node)
  222. {
  223. if (id_array[node->id]) {
  224. return;
  225. } else {
  226. id_array[node->id] = 1;
  227. }
  228.  
  229. if (do_node(node, 0)) recur(node->edges[0]);
  230. if (do_node(node, 1)) recur(node->edges[1]);
  231. }
  232.  
  233. fputs("digraph test {\n", fp);
  234. fputs("graph [rankdir = LR];\n", fp);
  235. recur(head);
  236. fputs("}\n", fp);
  237.  
  238. fclose(fp);
  239.  
  240. if (image_file != NULL) {
  241. char buf[512];
  242. sprintf(buf, "dot -Tgif %s -o %s", dot_file, image_file);
  243. system(buf);
  244. }
  245. }
  246.  
  247. NFA regex_compile(const char *regstr)
  248. {
  249. ReturnValue rv = nfa_compile(regstr, 0);
  250. return rv.head;
  251. }
  252.  
  253.  
  254. static char g_word[256];
  255. static int g_word_index;
  256.  
  257. #define branch(edge, str) \
  258.   { \
  259.   char *p = regex_recur(edge, str); \
  260.   if (p) return p; \
  261.   }
  262.  
  263. static char* regex_recur(NFA node, const char *str)
  264. {
  265. if (node->edges[0] == NULL && node->edges[1] == NULL) {
  266. if (g_word_index == 0) return NULL;
  267. g_word[g_word_index] = '\0';
  268. return g_word;
  269. }
  270.  
  271. for (int i = 0; i <= 1; i++) {
  272. if (node->edges[i] && node->noname[i] && node->id > node->edges[i]->id) {
  273. branch(node->edges[i], str);
  274. }
  275. }
  276.  
  277. for (int i = 0; i <= 1; i++) {
  278. if (node->edges[i] && node->noname[i]) {
  279. branch(node->edges[i], str);
  280. }
  281. }
  282.  
  283. int index = node->labels[(int)*str];
  284. if (index != -1) {
  285. g_word[g_word_index++] = *str;
  286. branch(node->edges[index], str + 1);
  287. }
  288.  
  289. return NULL;
  290. }
  291.  
  292. char* regex_search(NFA node, const char *str)
  293. {
  294. for (int i = 0; str[i]; i++) {
  295. g_word_index = 0;
  296. branch(node, str + i);
  297. }
  298. return NULL;
  299. }
  300.  
  301. int main(void)
  302. {
  303. NFA head = regex_compile("[a-z]*");
  304. //nfa_to_graphviz(head, "test.dot", "test.gif");
  305. char *p = regex_search(head, "213abcdefgjf3");
  306. puts(p);
  307.  
  308. return 0;
  309. }
  310.  
Not running #stdin #stdout 0s 0KB
stdin
Standard input is empty
stdout
Standard output is empty