fork download
  1. /* file: "tinyc.c" */
  2.  
  3. /* Copyright (C) 2001 by Marc Feeley, All Rights Reserved. */
  4.  
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <string.h>
  8.  
  9. /*
  10.  * This is a compiler for the Tiny-C language. Tiny-C is a
  11.  * considerably stripped down version of C and it is meant as a
  12.  * pedagogical tool for learning about compilers. The integer global
  13.  * variables "a" to "z" are predefined and initialized to zero, and it
  14.  * is not possible to declare new variables. The compiler reads the
  15.  * program from standard input and prints out the value of the
  16.  * variables that are not zero. The grammar of Tiny-C in EBNF is:
  17.  *
  18.  * <program> ::= <statement>
  19.  * <statement> ::= "if" <paren_expr> <statement> |
  20.  * "if" <paren_expr> <statement> "else" <statement> |
  21.  * "while" <paren_expr> <statement> |
  22.  * "do" <statement> "while" <paren_expr> ";" |
  23.  * "{" { <statement> } "}" |
  24.  * <expr> ";" |
  25.  * ";"
  26.  * <paren_expr> ::= "(" <expr> ")"
  27.  * <expr> ::= <test> | <id> "=" <expr>
  28.  * <test> ::= <sum> | <sum> "<" <sum>
  29.  * <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term>
  30.  * <term> ::= <id> | <int> | <paren_expr>
  31.  * <id> ::= "a" | "b" | "c" | "d" | ... | "z"
  32.  * <int> ::= <an_unsigned_decimal_integer>
  33.  *
  34.  * Here are a few invocations of the compiler:
  35.  *
  36.  * % echo "a=b=c=2<3;" | ./a.out
  37.  * a = 1
  38.  * b = 1
  39.  * c = 1
  40.  * % echo "{ i=1; while (i<100) i=i+i; }" | ./a.out
  41.  * i = 128
  42.  * % echo "{ i=125; j=100; while (i-j) if (i<j) j=j-i; else i=i-j; }" | ./a.out
  43.  * i = 25
  44.  * j = 25
  45.  * % echo "{ i=1; do i=i+10; while (i<50); }" | ./a.out
  46.  * i = 51
  47.  * % echo "{ i=1; while ((i=i+10)<50) ; }" | ./a.out
  48.  * i = 51
  49.  * % echo "{ i=7; if (i<5) x=1; if (i<10) y=2; }" | ./a.out
  50.  * i = 7
  51.  * y = 2
  52.  *
  53.  * The compiler does a minimal amount of error checking to help
  54.  * highlight the structure of the compiler.
  55.  */
  56.  
  57.  
  58. /*---------------------------------------------------------------------------*/
  59.  
  60. /* Lexer. */
  61.  
  62. enum { DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, LBRA, RBRA, LPAR, RPAR,
  63. PLUS, MINUS, LESS, SEMI, EQUAL, INT, ID, EOI };
  64.  
  65. char *words[] = { "do", "else", "if", "while", NULL };
  66.  
  67. int ch = ' ';
  68. int sym;
  69. int int_val;
  70. char id_name[100];
  71.  
  72. void syntax_error() { fprintf(stderr, "syntax error\n"); exit(1); }
  73.  
  74. void next_ch() { ch = getchar(); }
  75.  
  76. void next_sym()
  77. { again: switch (ch)
  78. { case ' ': case '\n': next_ch(); goto again;
  79. case EOF: sym = EOI; break;
  80. case '{': next_ch(); sym = LBRA; break;
  81. case '}': next_ch(); sym = RBRA; break;
  82. case '(': next_ch(); sym = LPAR; break;
  83. case ')': next_ch(); sym = RPAR; break;
  84. case '+': next_ch(); sym = PLUS; break;
  85. case '-': next_ch(); sym = MINUS; break;
  86. case '<': next_ch(); sym = LESS; break;
  87. case ';': next_ch(); sym = SEMI; break;
  88. case '=': next_ch(); sym = EQUAL; break;
  89. default:
  90. if (ch >= '0' && ch <= '9')
  91. { int_val = 0; /* missing overflow check */
  92. while (ch >= '0' && ch <= '9')
  93. { int_val = int_val*10 + (ch - '0'); next_ch(); }
  94. sym = INT;
  95. }
  96. else if (ch >= 'a' && ch <= 'z')
  97. { int i = 0; /* missing overflow check */
  98. while ((ch >= 'a' && ch <= 'z') || ch == '_')
  99. { id_name[i++] = ch; next_ch(); }
  100. id_name[i] = '\0';
  101. sym = 0;
  102. while (words[sym] != NULL && strcmp(words[sym], id_name) != 0)
  103. sym++;
  104. if (words[sym] == NULL)
  105. if (id_name[1] == '\0') sym = ID; else syntax_error();
  106. }
  107. else
  108. syntax_error();
  109. }
  110. }
  111.  
  112. /*---------------------------------------------------------------------------*/
  113.  
  114. /* Parser. */
  115.  
  116. enum { VAR, CST, ADD, SUB, LT, SET,
  117. IF1, IF2, WHILE, DO, EMPTY, SEQ, EXPR, PROG };
  118.  
  119. struct node { int kind; struct node *o1, *o2, *o3; int val; };
  120. typedef struct node node;
  121.  
  122. node *new_node(int k)
  123. { node *x = (node*)malloc(sizeof(node)); x->kind = k; return x; }
  124.  
  125. node *paren_expr(); /* forward declaration */
  126.  
  127. node *term() /* <term> ::= <id> | <int> | <paren_expr> */
  128. { node *x;
  129. if (sym == ID) { x=new_node(VAR); x->val=id_name[0]-'a'; next_sym(); }
  130. else if (sym == INT) { x=new_node(CST); x->val=int_val; next_sym(); }
  131. else x = paren_expr();
  132. return x;
  133. }
  134.  
  135. node *sum() /* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> */
  136. { node *t, *x = term();
  137. while (sym == PLUS || sym == MINUS)
  138. { t=x; x=new_node(sym==PLUS?ADD:SUB); next_sym(); x->o1=t; x->o2=term(); }
  139. return x;
  140. }
  141.  
  142. node *test() /* <test> ::= <sum> | <sum> "<" <sum> */
  143. { node *t, *x = sum();
  144. if (sym == LESS)
  145. { t=x; x=new_node(LT); next_sym(); x->o1=t; x->o2=sum(); }
  146. return x;
  147. }
  148.  
  149. node *expr() /* <expr> ::= <test> | <id> "=" <expr> */
  150. { node *t, *x;
  151. if (sym != ID) return test();
  152. x = test();
  153. if (x->kind == VAR && sym == EQUAL)
  154. { t=x; x=new_node(SET); next_sym(); x->o1=t; x->o2=expr(); }
  155. return x;
  156. }
  157.  
  158. node *paren_expr() /* <paren_expr> ::= "(" <expr> ")" */
  159. { node *x;
  160. if (sym == LPAR) next_sym(); else syntax_error();
  161. x = expr();
  162. if (sym == RPAR) next_sym(); else syntax_error();
  163. return x;
  164. }
  165.  
  166. node *statement()
  167. { node *t, *x;
  168. if (sym == IF_SYM) /* "if" <paren_expr> <statement> */
  169. { x = new_node(IF1);
  170. next_sym();
  171. x->o1 = paren_expr();
  172. x->o2 = statement();
  173. if (sym == ELSE_SYM) /* ... "else" <statement> */
  174. { x->kind = IF2;
  175. next_sym();
  176. x->o3 = statement();
  177. }
  178. }
  179. else if (sym == WHILE_SYM) /* "while" <paren_expr> <statement> */
  180. { x = new_node(WHILE);
  181. next_sym();
  182. x->o1 = paren_expr();
  183. x->o2 = statement();
  184. }
  185. else if (sym == DO_SYM) /* "do" <statement> "while" <paren_expr> ";" */
  186. { x = new_node(DO);
  187. next_sym();
  188. x->o1 = statement();
  189. if (sym == WHILE_SYM) next_sym(); else syntax_error();
  190. x->o2 = paren_expr();
  191. if (sym == SEMI) next_sym(); else syntax_error();
  192. }
  193. else if (sym == SEMI) /* ";" */
  194. { x = new_node(EMPTY); next_sym(); }
  195. else if (sym == LBRA) /* "{" { <statement> } "}" */
  196. { x = new_node(EMPTY);
  197. next_sym();
  198. while (sym != RBRA)
  199. { t=x; x=new_node(SEQ); x->o1=t; x->o2=statement(); }
  200. next_sym();
  201. }
  202. else /* <expr> ";" */
  203. { x = new_node(EXPR);
  204. x->o1 = expr();
  205. if (sym == SEMI) next_sym(); else syntax_error();
  206. }
  207. return x;
  208. }
  209.  
  210. node *program() /* <program> ::= <statement> */
  211. { node *x = new_node(PROG);
  212. next_sym(); x->o1 = statement(); if (sym != EOI) syntax_error();
  213. return x;
  214. }
  215.  
  216. /*---------------------------------------------------------------------------*/
  217.  
  218. /* Code generator. */
  219.  
  220. enum { IFETCH, ISTORE, IPUSH, IPOP, IADD, ISUB, ILT, JZ, JNZ, JMP, HALT };
  221.  
  222. typedef char code;
  223. code object[1000], *here = object;
  224.  
  225. void g(code c) { *here++ = c; } /* missing overflow check */
  226. code *hole() { return here++; }
  227. void fix(code *src, code *dst) { *src = dst-src; } /* missing overflow check */
  228.  
  229. void c(node *x)
  230. { code *p1, *p2;
  231. switch (x->kind)
  232. { case VAR : g(IFETCH); g(x->val); break;
  233. case CST : g(IPUSH); g(x->val); break;
  234. case ADD : c(x->o1); c(x->o2); g(IADD); break;
  235. case SUB : c(x->o1); c(x->o2); g(ISUB); break;
  236. case LT : c(x->o1); c(x->o2); g(ILT); break;
  237. case SET : c(x->o2); g(ISTORE); g(x->o1->val); break;
  238. case IF1 : c(x->o1); g(JZ); p1=hole(); c(x->o2); fix(p1,here); break;
  239. case IF2 : c(x->o1); g(JZ); p1=hole(); c(x->o2); g(JMP); p2=hole();
  240. fix(p1,here); c(x->o3); fix(p2,here); break;
  241. case WHILE: p1=here; c(x->o1); g(JZ); p2=hole(); c(x->o2);
  242. g(JMP); fix(hole(),p1); fix(p2,here); break;
  243. case DO : p1=here; c(x->o1); c(x->o2); g(JNZ); fix(hole(),p1); break;
  244. case EMPTY: break;
  245. case SEQ : c(x->o1); c(x->o2); break;
  246. case EXPR : c(x->o1); g(IPOP); break;
  247. case PROG : c(x->o1); g(HALT); break;
  248. }
  249. }
  250.  
  251. /*---------------------------------------------------------------------------*/
  252.  
  253. /* Virtual machine. */
  254.  
  255. int globals[26];
  256.  
  257. void run()
  258. { int stack[1000], *sp = stack;
  259. code *pc = object;
  260. again: switch (*pc++)
  261. { case IFETCH: *sp++ = globals[*pc++]; goto again;
  262. case ISTORE: globals[*pc++] = sp[-1]; goto again;
  263. case IPUSH : *sp++ = *pc++; goto again;
  264. case IPOP : --sp; goto again;
  265. case IADD : sp[-2] = sp[-2] + sp[-1]; --sp; goto again;
  266. case ISUB : sp[-2] = sp[-2] - sp[-1]; --sp; goto again;
  267. case ILT : sp[-2] = sp[-2] < sp[-1]; --sp; goto again;
  268. case JMP : pc += *pc; goto again;
  269. case JZ : if (*--sp == 0) pc += *pc; else pc++; goto again;
  270. case JNZ : if (*--sp != 0) pc += *pc; else pc++; goto again;
  271. }
  272. }
  273.  
  274. /*---------------------------------------------------------------------------*/
  275.  
  276. /* Main program. */
  277.  
  278. int main()
  279. { int i;
  280.  
  281. c(program());
  282.  
  283. for (i=0; i<26; i++)
  284. globals[i] = 0;
  285. run();
  286. for (i=0; i<26; i++)
  287. if (globals[i] != 0)
  288. printf("%c = %d\n", 'a'+i, globals[i]);
  289.  
  290. return 0;
  291. }
  292.  
Runtime error #stdin #stdout #stderr 0s 2248KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
syntax error