#include "regex.h"
#define connect_e(src_node, dst_node, i) \
{ \
src_node->edges[i] = dst_node; \
src_node->noname[i] = true; \
}
#define connect(src_node, dst_node, c, i) \
{ \
connect_e(src_node, dst_node, i); \
src_node->labels[(int)c] = i; \
src_node->noname[i] = false; \
}
#define check_char(c) (isprint(c) && !iscntrl(c))
typedef struct _ReturnValue {
NFA head;
NFA tail;
int str_index;
} ReturnValue;
static NFA new_node(void);
static ReturnValue new_connect_node(NFA head, char c);
static ReturnValue compile_class(const char *str, int begin);
static ReturnValue nfa_compile(const char *str, int begin);
static void nfa_to_graphviz(NFA head, const char *dot_file, const char *image_file);
static int count_id = 1;
static NFA new_node(void)
/* ノードを確保し値を初期化して返す
labelsの使わない添字には-1を入れておく */
{
NFA node
= malloc(sizeof(struct _NFA
)); node->edges[0] = node->edges[1] = NULL;
node->noname[0] = node->noname[1] = false;
memset(node
->labels
, -1, LABELS_SIZE
* sizeof(node
->labels
[0])); node->id = count_id++;
return node;
}
static ReturnValue new_connect_node(NFA head, char c)
/* head --[c]--> newnode */
{
NFA tail = new_node();
connect(head, tail, c, 0);
ReturnValue rv = {.head = head, .tail = tail};
return rv;
}
static NFA reverse_labels(NFA node)
{
for (int i = 0; i < LABELS_SIZE; i++) {
node->labels[i] = (node->labels[i] == -1 ? 0 : -1);
if (!check_char(i)) {
node->labels[i] = -1;
}
}
return node;
}
static ReturnValue compile_class(const char *str, int begin)
{
NFA head = new_node();
NFA tail = new_node();
bool reverse_flag = false;
if (str[begin] == '^') {
begin++;
reverse_flag = true;
}
for (int i = begin; str[i]; i++) {
switch (str[i]) {
case ']':
{
if (reverse_flag) {
head = reverse_labels(head);
}
ReturnValue rv = {.head = head, .tail = tail, .str_index = i};
return rv;
}
case '-':
{
char c1 = str[i-1];
char c2 = str[i+1];
for (; c1 <= c2; c1++)
connect(head, tail, c1, 0);
i++;
break;
}
default:
if (str[i+1] != '-') {
connect(head, tail, str[i], 0);
}
break;
}
}
fputs("クラスの終わりがない\n", stderr
); }
static ReturnValue compile_or(NFA head_node, NFA tail_node, const char *str, int i)
{
NFA head = new_node(), tail = new_node();
connect_e(head, head_node, 0);
connect_e(tail_node, tail, 0);
ReturnValue rv = nfa_compile(str, i + 1);
connect_e(head, rv.head, 1);
connect_e(rv.tail, tail, 0);
ReturnValue rv2 = {.head = head, .tail = tail, .str_index = rv.str_index};
return rv2;
}
static ReturnValue compile_repeat(NFA head_node, NFA tail_node, NFA cur_node)
{
NFA node, head = new_node(), tail = new_node();
bool flag = false;
if (head_node == cur_node) {
flag = true;
node = new_node();
} else {
for (node = head_node; node->edges[0] != cur_node; node = node->edges[0]);
}
connect_e(head, cur_node, 0);
connect_e(head, tail, 1);
connect_e(tail_node, tail, 0);
connect_e(tail_node, cur_node, 1);
node->edges[0] = head;
if (flag) head_node = head;
ReturnValue rv = {.head = head_node, .tail = tail};
return rv;
}
static ReturnValue nfa_compile(const char *str, int begin)
{
NFA head_node = new_node();
NFA tail_node = head_node;
NFA cur_node = head_node;
int i;
for (i = begin; str[i]; i++) {
switch (str[i]) {
case '[':
{
ReturnValue rv = compile_class(str, i + 1);
connect_e(tail_node, rv.head, 0);
cur_node = rv.head;
tail_node = rv.tail;
i = rv.str_index;
break;
}
case '(':
{
ReturnValue rv = nfa_compile(str, i + 1);
connect_e(tail_node, rv.head, 0);
cur_node = rv.head;
tail_node = rv.tail;
i = rv.str_index;
break;
}
case ')':
{
ReturnValue rv = {.head = head_node, .tail = tail_node, .str_index = i};
return rv;
}
case '|':
{
ReturnValue rv = compile_or(head_node, tail_node, str, i);
head_node = rv.head;
tail_node = rv.tail;
cur_node = rv.head;
i = rv.str_index;
break;
}
case '*':
{
ReturnValue rv = compile_repeat(head_node, tail_node, cur_node);
head_node = rv.head;
tail_node = rv.tail;
break;
}
default:
{
ReturnValue rv = new_connect_node(tail_node, str[i]);
cur_node = rv.head;
tail_node = rv.tail;
break;
}
}
}
ReturnValue rv = {.head = head_node, .tail = tail_node, .str_index = i - 1};
return rv;
}
static void nfa_to_graphviz(NFA head, const char *dot_file, const char *image_file)
{
FILE
*fp
= fopen(dot_file
, "w"); if (fp == NULL) {
fputs("ファイルを読み込めない\n", stderr
); }
int id_array[100000];
memset(id_array
, 0, sizeof(id_array
));
bool do_node(NFA node, int index)
{
if (node->edges[index]) {
if (node->noname[index]) {
fprintf(fp
, "%d -> %d [label = \"%s\"];\n", node->id, node->edges[index]->id, "ε");
} else {
for (int i = 0; i < LABELS_SIZE; i++) {
if ((node->labels[i] != -1) && (index == node->labels[i])) {
if (i == '"' || i == '\\') {
fprintf(fp
, "%d -> %d [label = \"\\%c\"];\n", node->id, node->edges[index]->id, i);
} else {
fprintf(fp
, "%d -> %d [label = \"%c\"];\n", node->id, node->edges[index]->id, i);
}
}
}
}
return true;
}
return false;
}
void recur(NFA node)
{
if (id_array[node->id]) {
return;
} else {
id_array[node->id] = 1;
}
if (do_node(node, 0)) recur(node->edges[0]);
if (do_node(node, 1)) recur(node->edges[1]);
}
fputs("digraph test {\n", fp
); fputs("graph [rankdir = LR];\n", fp
); recur(head);
if (image_file != NULL) {
char buf[512];
sprintf(buf
, "dot -Tgif %s -o %s", dot_file
, image_file
); }
}
NFA regex_compile(const char *regstr)
{
ReturnValue rv = nfa_compile(regstr, 0);
return rv.head;
}
static char g_word[256];
static int g_word_index;
#define branch(edge, str) \
{ \
char *p = regex_recur(edge, str); \
if (p) return p; \
}
static char* regex_recur(NFA node, const char *str)
{
if (node->edges[0] == NULL && node->edges[1] == NULL) {
if (g_word_index == 0) return NULL;
g_word[g_word_index] = '\0';
return g_word;
}
for (int i = 0; i <= 1; i++) {
if (node->edges[i] && node->noname[i] && node->id > node->edges[i]->id) {
branch(node->edges[i], str);
}
}
for (int i = 0; i <= 1; i++) {
if (node->edges[i] && node->noname[i]) {
branch(node->edges[i], str);
}
}
int index = node->labels[(int)*str];
if (index != -1) {
g_word[g_word_index++] = *str;
branch(node->edges[index], str + 1);
}
return NULL;
}
char* regex_search(NFA node, const char *str)
{
for (int i = 0; str[i]; i++) {
g_word_index = 0;
branch(node, str + i);
}
return NULL;
}
int main(int argc, char *argv[])
{
if (argc == 1) return 1;
NFA head = regex_compile(argv[1]);
nfa_to_graphviz(head, "test.dot", "test.gif");
char line[256];
while (fgets(line
, sizeof(line
), stdin
)) { char *p = regex_search(head, line);
}
}
#include "regex.h"

#define connect_e(src_node, dst_node, i)    \
  {						\
    src_node->edges[i] = dst_node;		\
    src_node->noname[i] = true;			\
  }

#define connect(src_node, dst_node, c, i)	\
  {						\
    connect_e(src_node, dst_node, i);		\
    src_node->labels[(int)c] = i;		\
    src_node->noname[i] = false;		\
  }

#define check_char(c) (isprint(c) && !iscntrl(c))

typedef struct _ReturnValue {
  NFA head;
  NFA tail;
  int str_index;
} ReturnValue;

static NFA new_node(void);
static ReturnValue new_connect_node(NFA head, char c);
static ReturnValue compile_class(const char *str, int begin);
static ReturnValue nfa_compile(const char *str, int begin);
static void nfa_to_graphviz(NFA head, const char *dot_file, const char *image_file);

static int count_id = 1;

static NFA new_node(void)
/* ノードを確保し値を初期化して返す
   labelsの使わない添字には-1を入れておく */
{
  NFA node = malloc(sizeof(struct _NFA));
  node->edges[0] = node->edges[1] = NULL;
  node->noname[0] = node->noname[1] = false;
  memset(node->labels, -1, LABELS_SIZE * sizeof(node->labels[0]));
  node->id = count_id++;
  return node;
}

static ReturnValue new_connect_node(NFA head, char c)
/* head --[c]--> newnode */
{
  NFA tail = new_node();
  connect(head, tail, c, 0);
  ReturnValue rv = {.head = head, .tail = tail};
  return rv;
}

static NFA reverse_labels(NFA node)
{
  for (int i = 0; i < LABELS_SIZE; i++) {
    node->labels[i] = (node->labels[i] == -1 ? 0 : -1);
    if (!check_char(i)) {
      node->labels[i] = -1;
    }
  }
  return node;
}

static ReturnValue compile_class(const char *str, int begin)
{
  NFA head = new_node();
  NFA tail = new_node();

  bool reverse_flag = false;

  if (str[begin] == '^') {
    begin++;
    reverse_flag = true;
  }

  for (int i = begin; str[i]; i++) {
    switch (str[i]) {
    case ']':
      {
	if (reverse_flag) {
	  head = reverse_labels(head);
	}
	ReturnValue rv = {.head = head, .tail = tail, .str_index = i};
	return rv;
      }
    case '-':
      {
	char c1 = str[i-1];
	char c2 = str[i+1];
	for (; c1 <= c2; c1++)
	  connect(head, tail, c1, 0);
	i++;
	break;
      }
    default:
      if (str[i+1] != '-') {
	connect(head, tail, str[i], 0);
      }
      break;
    }
  }

  fputs("クラスの終わりがない\n", stderr);
  exit(EXIT_FAILURE);
}

static ReturnValue compile_or(NFA head_node, NFA tail_node, const char *str, int i)
{
  NFA head = new_node(), tail = new_node();
  connect_e(head, head_node, 0);
  connect_e(tail_node, tail, 0);
  ReturnValue rv = nfa_compile(str, i + 1);
  connect_e(head, rv.head, 1);
  connect_e(rv.tail, tail, 0);

  ReturnValue rv2 = {.head = head, .tail = tail, .str_index = rv.str_index};
  return rv2;
}

static ReturnValue compile_repeat(NFA head_node, NFA tail_node, NFA cur_node)
{
  NFA node, head = new_node(), tail = new_node();
  bool flag = false;

  if (head_node == cur_node) {
    flag = true;
    node = new_node();
  } else {
    for (node = head_node; node->edges[0] != cur_node; node = node->edges[0]);
  }

  connect_e(head, cur_node, 0);
  connect_e(head, tail, 1);
  connect_e(tail_node, tail, 0);
  connect_e(tail_node, cur_node, 1);
  node->edges[0] = head;

  if (flag) head_node = head;
  ReturnValue rv = {.head = head_node, .tail = tail};
  return rv;
}

static ReturnValue nfa_compile(const char *str, int begin)
{
  NFA head_node = new_node();
  NFA tail_node = head_node;
  NFA cur_node = head_node;

  int i;
  for (i = begin; str[i]; i++) {
    switch (str[i]) {
    case '[':
      {
      	ReturnValue rv = compile_class(str, i + 1);
	connect_e(tail_node, rv.head, 0);
	cur_node = rv.head;
      	tail_node = rv.tail;
	i = rv.str_index;
      	break;
      }
    case '(':
      {
	ReturnValue rv = nfa_compile(str, i + 1);
	connect_e(tail_node, rv.head, 0);
	cur_node = rv.head;
	tail_node = rv.tail;
	i = rv.str_index;
	break;
      }
    case ')':
      {
	ReturnValue rv = {.head = head_node, .tail = tail_node, .str_index = i};
	return rv;
      }
    case '|':
      {
	ReturnValue rv = compile_or(head_node, tail_node, str, i);
	head_node = rv.head;
	tail_node = rv.tail;
	cur_node = rv.head;
	i = rv.str_index;
	break;
      }
    case '*':
      {
	ReturnValue rv = compile_repeat(head_node, tail_node, cur_node);
	head_node = rv.head;
	tail_node = rv.tail;
	break;
      }
    default:
      {
	ReturnValue rv = new_connect_node(tail_node, str[i]);
	cur_node = rv.head;
	tail_node = rv.tail;
	break;
      }
    }
  }

  ReturnValue rv = {.head = head_node, .tail = tail_node, .str_index = i - 1};
  return rv;
}

static void nfa_to_graphviz(NFA head, const char *dot_file, const char *image_file)
{
  FILE *fp = fopen(dot_file, "w");
  if (fp == NULL) {
    fputs("ファイルを読み込めない\n", stderr);
    exit(EXIT_FAILURE);
  }

  int id_array[100000];
  memset(id_array, 0, sizeof(id_array));

  bool do_node(NFA node, int index)
  {
    if (node->edges[index]) {
      if (node->noname[index]) {
	fprintf(fp, "%d -> %d [label = \"%s\"];\n",
		node->id, node->edges[index]->id, "ε");
      } else {
	for (int i = 0; i < LABELS_SIZE; i++) {
	  if ((node->labels[i] != -1) && (index == node->labels[i])) {
	    if (i == '"' || i == '\\') {
	      fprintf(fp, "%d -> %d [label = \"\\%c\"];\n",
		      node->id, node->edges[index]->id, i);
	    } else {
	      fprintf(fp, "%d -> %d [label = \"%c\"];\n",
		      node->id, node->edges[index]->id, i);
	    }
	  }
	}
      }
      return true;
    }
    return false;
  }

  void recur(NFA node)
  {
    if (id_array[node->id]) {
      return;
    } else {
      id_array[node->id] = 1;
    }

    if (do_node(node, 0)) recur(node->edges[0]);
    if (do_node(node, 1)) recur(node->edges[1]);
  }

  fputs("digraph test {\n", fp);
  fputs("graph [rankdir = LR];\n", fp);
  recur(head);
  fputs("}\n", fp);

  fclose(fp);

  if (image_file != NULL) {
    char buf[512];
    sprintf(buf, "dot -Tgif %s -o %s", dot_file, image_file);
    system(buf);
  }
}

NFA regex_compile(const char *regstr)
{
  ReturnValue rv = nfa_compile(regstr, 0);
  return rv.head;
}


static char g_word[256];
static int g_word_index;

#define branch(edge, str)			\
  {						\
    char *p = regex_recur(edge, str);		\
    if (p) return p;				\
  }

static char* regex_recur(NFA node, const char *str)
{
  if (node->edges[0] == NULL && node->edges[1] == NULL) {
    if (g_word_index == 0) return NULL;
    g_word[g_word_index] = '\0';
    return g_word;
  }

  for (int i = 0; i <= 1; i++) {
    if (node->edges[i] && node->noname[i] && node->id > node->edges[i]->id) {
      branch(node->edges[i], str);
    }
  }

  for (int i = 0; i <= 1; i++) {
    if (node->edges[i] && node->noname[i]) {
      branch(node->edges[i], str);
    }
  }

  int index = node->labels[(int)*str];
  if (index != -1) {
    g_word[g_word_index++] = *str;
    branch(node->edges[index], str + 1);
  }

  return NULL;
}

char* regex_search(NFA node, const char *str)
{
  for (int i = 0; str[i]; i++) {
    g_word_index = 0;
    branch(node, str + i);
  }
  return NULL;
}

int main(int argc, char *argv[])
{
  if (argc == 1) return 1;

  NFA head = regex_compile(argv[1]);
  nfa_to_graphviz(head, "test.dot", "test.gif");

  char line[256];
  while (fgets(line, sizeof(line), stdin)) {
    char *p = regex_search(head, line);
    puts(p);
  }
}
