fork download
  1. # include <stdio.h>
  2. # include <stdbool.h>
  3. # include <stdlib.h>
  4. # include <string.h>
  5.  
  6. # define NOT !
  7. # define AND &&
  8. # define OR ||
  9. # define EQ ==
  10.  
  11. enum TokenType { IDENTIFIER = 34313, CONSTANT = 87424, SPECIAL = 29535 };
  12. typedef char * CharPtr;
  13.  
  14. typedef struct tokenInfo {
  15. CharPtr token_str;
  16. enum TokenType token_type;
  17. int first_char_line;
  18. int first_char_column;
  19. }TokenInfo;
  20.  
  21. typedef TokenInfo * TokenInfoPtr;
  22.  
  23. typedef struct Column {
  24. int column;
  25. struct Column * next;
  26. }Column;
  27.  
  28. typedef Column * ColumnPtr;
  29.  
  30. typedef struct Line {
  31. int line;
  32. ColumnPtr first_appear_at;
  33. ColumnPtr last_appear_at;
  34. struct Line * next ;
  35. }Line;
  36.  
  37. typedef Line * LinePtr ;
  38.  
  39. typedef struct Token {
  40. CharPtr token_str ;
  41. enum TokenType type ;
  42. LinePtr first_appear_on ;
  43. LinePtr last_appear_on ;
  44. struct Token * next ;
  45. }Token;
  46.  
  47. typedef Token * TokenPtr ;
  48.  
  49. void GetNextChar(char *ch, int *line, int *column);
  50. bool IsDigit(char ch);
  51. bool IsLetter(char ch);
  52. bool IsWhiteSpace(char ch);
  53. bool IsSpecial(char ch);
  54. void SkipWhiteSpaces(char *ch, int *line, int *column);
  55. void SkipLineComment(char *ch, int *line, int *column);
  56. void SkipMultiLineComment(char *ch, int *line, int *column);
  57. CharPtr GetID(char *ch, int *line, int *column);
  58. CharPtr GetNum(char *ch, int *line, int *column);
  59. CharPtr GetCharConst(char *ch, int *line, int *column);
  60. CharPtr GetStringConst(char *ch, int *line, int *column);
  61. CharPtr GetSpecial(char *ch, int *line, int *column);
  62. TokenInfoPtr GetToken(char *ch, int *current_line, int *current_column);
  63. void PrintAllTokenInfo(TokenPtr head);
  64. TokenPtr OrderInsertToken(TokenPtr head, TokenInfoPtr info);
  65.  
  66.  
  67.  
  68. int main() {
  69. char ch = '\0';
  70. int current_line = -1; // line-no of current char
  71. int current_column = -1; // column-no of current char
  72. TokenInfoPtr info_ptr = NULL;
  73. TokenPtr list_head = NULL;
  74.  
  75. GetNextChar(&ch, &current_line, &current_column); // get the very first char
  76.  
  77. do{
  78. info_ptr = GetToken(&ch, &current_line, &current_column);
  79. if ( info_ptr != NULL )
  80. list_head = OrderInsertToken(list_head, info_ptr);
  81. } while ( info_ptr != NULL );
  82.  
  83. PrintAllTokenInfo(list_head);
  84.  
  85. return 0 ;
  86. }
  87. int g_next_char_line = 1;
  88. int g_next_char_column = 1;
  89. void GetNextChar(char *ch, int *line, int *column) {
  90. if (*line == -1 && *column == -1) {
  91. *line = 1;
  92. *column = 0;
  93. }
  94. if (scanf("%c", ch) == EOF) {
  95. *ch = EOF;
  96. return;
  97. }
  98. if (*ch == '\n') {
  99. *column = 0;
  100. *line = *line + 1;
  101. } else if (*ch != EOF) {
  102. *column = *column + 1;
  103. }
  104. }
  105.  
  106. bool IsDigit(char ch) {
  107. if (ch >= '0' && ch <= '9')
  108. return true;
  109. else
  110. return false;
  111. }
  112.  
  113. bool IsLetter(char ch) {
  114. if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
  115. return true;
  116. else
  117. return false;
  118. }
  119.  
  120. bool IsWhiteSpace(char ch) {
  121. if (ch == '\n' || ch == '\t' || ch == ' ')
  122. return true;
  123. else
  124. return false;
  125. }
  126.  
  127. bool IsSpecial(char ch) {
  128. char specialch[] = "^,()[]{}!:;#?+-*/><=%&|";
  129. for (int i = 0; i < strlen(specialch); i++) {
  130. if (ch == specialch[i])
  131. return true;
  132. }
  133. return false;
  134. }
  135.  
  136. void SkipWhiteSpaces(char *ch, int *line, int *column) {
  137. GetNextChar(ch, line, column);
  138. while (IsWhiteSpace(*ch)) {
  139. GetNextChar(ch, line, column);
  140. }
  141. }
  142.  
  143. void SkipLineComment(char *ch, int *line, int *column) {
  144. GetNextChar(ch, line, column);
  145. while (*ch != '\n' && *ch != EOF) {
  146. GetNextChar(ch, line, column);
  147. }
  148. if (*ch == '\n') {
  149. GetNextChar(ch, line, column);
  150. }
  151. }
  152.  
  153. void SkipMultiLineComment(char *ch, int *line, int *column) {
  154. while (*ch != '*' && *ch != EOF) {
  155. GetNextChar(ch, line, column);
  156. }
  157. if (*ch == '*') {
  158. GetNextChar(ch, line, column);
  159. while (*ch != '/' && *ch != EOF) {
  160. if (*ch == '*') {
  161. GetNextChar(ch, line, column);
  162. if (*ch == '/') {
  163. GetNextChar(ch, line, column);
  164. return;
  165. }
  166. }
  167. GetNextChar(ch, line, column);
  168. }
  169. }
  170. }
  171.  
  172. CharPtr GetID(char *ch, int *line, int *column) {
  173. CharPtr newptr = (CharPtr)malloc(sizeof(char) * 100);
  174. int i = 0;
  175. newptr[i] = *ch;
  176. i++;
  177. GetNextChar(ch, line, column);
  178. while (i < 100 && !IsWhiteSpace(*ch) &&
  179. (IsLetter(*ch) || IsDigit(*ch) || *ch == '_') && *ch != EOF) {
  180. newptr[i] = *ch;
  181. i++;
  182. GetNextChar(ch, line, column);
  183. }
  184. newptr[i] = '\0';
  185. return newptr;
  186. }
  187.  
  188. CharPtr GetNum(char *ch, int *line, int *column) {
  189. CharPtr newptr = (CharPtr)malloc(sizeof(char) * 100);
  190. int dot = 0;
  191. int i = 0;
  192. newptr[i] = *ch;
  193. i++;
  194. GetNextChar(ch, line, column);
  195. if (*ch == '.')
  196. dot++;
  197. else if (dot <= 1 && !IsDigit(*ch))
  198. dot = 10;
  199. while (!IsWhiteSpace(*ch) && (dot <= 1 || IsDigit(*ch)) && *ch != EOF) {
  200. newptr[i] = *ch;
  201. i++;
  202. GetNextChar(ch, line, column);
  203. if (*ch == '.')
  204. dot++;
  205. else if (dot <= 1 && !IsDigit(*ch))
  206. dot = 10;
  207. }
  208. newptr[i] = '\0';
  209. return newptr;
  210. }
  211.  
  212. CharPtr GetCharConst(char *ch, int *line, int *column) {
  213. CharPtr newptr = (CharPtr)malloc(sizeof(char) * 10);
  214. bool escape = false;
  215. int i = 0;
  216. newptr[i] = *ch;
  217. i++;
  218. newptr[i] = '\0';
  219. GetNextChar(ch, line, column);
  220. if (*ch == '\\')
  221. escape = true;
  222. while (escape || *ch != '\'') {
  223. if (*ch != '\'') {
  224. newptr[i] = *ch;
  225. i++;
  226. newptr[i] = '\0';
  227. GetNextChar(ch, line, column);
  228. }
  229. newptr[i] = *ch;
  230. i++;
  231. newptr[i] = '\0';
  232. GetNextChar(ch, line, column);
  233. if (escape)
  234. escape = false;
  235. }
  236. newptr[i] = *ch;
  237. i++;
  238. newptr[i] = '\0';
  239. // printf("Check:%s:%c:%d:\n",newptr, *ch, *ch);
  240. if (strcmp(newptr, "'\\'") == 0) {
  241. GetNextChar(ch, line, column);
  242. newptr[i] = *ch;
  243. i++;
  244. newptr[i] = '\0';
  245. }
  246. // newptr[i] = '\0';
  247. GetNextChar(ch, line, column);
  248. return newptr;
  249. }
  250.  
  251. CharPtr GetStringConst(char *ch, int *line, int *column) {
  252. CharPtr newptr = (CharPtr)malloc(sizeof(char) * 100);
  253. bool escape = false;
  254. int i = 0;
  255. newptr[i] = *ch;
  256. i++;
  257. GetNextChar(ch, line, column);
  258. if (*ch == '\\')
  259. escape = true;
  260. newptr[i] = *ch;
  261. i++;
  262. newptr[i] = '\0';
  263. GetNextChar(ch, line, column);
  264. if ( strcmp(newptr, "\"\"") == 0)
  265. return newptr;
  266. while (escape || (*ch != '\"' && *ch != EOF)) {
  267. if (escape)
  268. escape = false;
  269. else if (!escape && *ch == '\\')
  270. escape = true;
  271. newptr[i] = *ch;
  272. i++;
  273. GetNextChar(ch, line, column);
  274. }
  275. newptr[i] = *ch;
  276. i++;
  277. newptr[i] = '\0';
  278. GetNextChar(ch, line, column);
  279. return newptr;
  280. }
  281.  
  282. CharPtr GetSpecial(char *ch, int *line, int *column) {
  283. bool check = false;
  284. CharPtr newptr = (CharPtr)malloc(sizeof(char) * 10);
  285. char ch1 = *ch;
  286. int i = 0;
  287. newptr[i] = *ch;
  288. i++;
  289. GetNextChar(ch, line, column);
  290. if (!IsSpecial(*ch)) {
  291. newptr[i] = '\0';
  292. return newptr;
  293. }
  294. if (*ch == '=')
  295. if (ch1 == '+' || ch1 == '-' || ch1 == '*' ||
  296. ch1 == '/' || ch1 == '>' || ch1 == '<' || ch1 == '=' ||
  297. ch1 == '!' || ch1 == '%')
  298. check = true;
  299.  
  300. if (ch1 == *ch)
  301. if (*ch == '&' || *ch == '|' || *ch == '+' || *ch == '-' ||
  302. *ch == '>' || *ch == '<')
  303. check = true;
  304.  
  305. if (*ch == '>' && ch1 == '-')
  306. check = true;
  307.  
  308. if (check) {
  309. newptr[i] = *ch;
  310. i++;
  311. GetNextChar(ch, line, column);
  312. }
  313. newptr[i] = '\0';
  314. return newptr;
  315. }
  316.  
  317. TokenInfoPtr GetToken(char *ch, int *current_line, int *current_column) {
  318. TokenInfoPtr token = NULL;
  319. char ch1;
  320. char *ch2;
  321. if (*ch == EOF || *ch == 26)
  322. return token;
  323. token = (TokenInfoPtr)malloc(sizeof(TokenInfo));
  324. bool slash_is_divide = false;
  325. int temp_line = *current_line;
  326. int temp_column = *current_column;
  327. while (IsWhiteSpace(*ch) || (*ch == '/' && !slash_is_divide)) {
  328. if ( IsWhiteSpace(*ch) ) {
  329. SkipWhiteSpaces(ch, current_line, current_column);
  330. } else if ( *ch == '/' ) {
  331. ch1 = *ch;
  332. GetNextChar(ch, current_line, current_column);
  333. if ( *ch == '/' )
  334. SkipLineComment(ch, current_line, current_column);
  335. else if (*ch == '*')
  336. SkipMultiLineComment(ch, current_line, current_column);
  337. else
  338. slash_is_divide = true;
  339. }
  340. if (*ch == EOF)
  341. return NULL;
  342. }
  343. if (slash_is_divide) {
  344. if (*ch == '=') {
  345. token->first_char_line = temp_line;
  346. token->first_char_column = temp_column;
  347. CharPtr newptr = (CharPtr)malloc(sizeof(char) * 10);
  348. strcpy(newptr, "/=");
  349. token->token_str = newptr;
  350. token->token_type = SPECIAL;
  351. GetNextChar(ch, current_line, current_column);
  352. } else {
  353. CharPtr newptr = (CharPtr)malloc(sizeof(char) * 10);
  354. token->first_char_line = temp_line;
  355. token->first_char_column = temp_column;
  356. strcpy(newptr, "/");
  357. token->token_str = newptr;
  358. token->token_type = SPECIAL;
  359. }
  360. } else if (IsLetter(*ch) || *ch == '_') {
  361. token->first_char_line = *current_line;
  362. token->first_char_column = *current_column;
  363. token->token_str = GetID(ch, current_line, current_column);
  364. token->token_type = IDENTIFIER;
  365. } else if ( IsDigit(*ch) || *ch == '.' ) {
  366. token->first_char_line = *current_line;
  367. token->first_char_column = *current_column;
  368. token->token_str = GetNum(ch, current_line, current_column);
  369. token->token_type = CONSTANT;
  370. } else if ( *ch == '\'' ) {
  371. token->first_char_line = *current_line;
  372. token->first_char_column = *current_column;
  373. token->token_str = GetCharConst(ch, current_line, current_column);
  374. token->token_type = CONSTANT;
  375. } else if ( *ch == '\"' ) {
  376. token->first_char_line = *current_line;
  377. token->first_char_column = *current_column;
  378. token->token_str = GetStringConst(ch, current_line, current_column);
  379. token->token_type = CONSTANT;
  380. } else if ( IsSpecial(*ch) ) {
  381. token->first_char_line = *current_line;
  382. token->first_char_column = *current_column;
  383. token->token_str = GetSpecial(ch, current_line, current_column);
  384. token->token_type = SPECIAL;
  385. }
  386. return token;
  387. }
  388.  
  389. TokenPtr OrderInsertToken(TokenPtr head, TokenInfoPtr info) {
  390. TokenPtr walkhead = head;
  391. LinePtr walkline = NULL;
  392. ColumnPtr walkcolumn = NULL;
  393. TokenPtr newhead = NULL;
  394. LinePtr newline = NULL;
  395. ColumnPtr newcolumn = NULL;
  396. int temp;
  397. // printf("Check:%s:%d:%d:\n",info->token_str,info->first_char_line,info->first_char_column);
  398. if (head == NULL) {
  399. temp = -1;
  400. } else {
  401. temp = strcmp(info->token_str, head->token_str);
  402. }
  403. if (temp < 0) {
  404. newhead = (TokenPtr)malloc(sizeof(Token));
  405. newline = (LinePtr)malloc(sizeof(Line));
  406. newcolumn = (ColumnPtr)malloc(sizeof(Column));
  407. newhead->token_str = info->token_str;
  408. newhead->type = info->token_type;
  409. newhead->first_appear_on = newline;
  410. newhead->last_appear_on = newline;
  411. newhead->next = head;
  412. head = newhead;
  413. newline->line = info->first_char_line;
  414. newline->next = NULL;
  415. newline->first_appear_at = newcolumn;
  416. newline->last_appear_at = newcolumn;
  417. newcolumn->column = info->first_char_column;
  418. newcolumn->next = NULL;
  419. return head;
  420. }
  421. TokenPtr prehead = NULL;
  422. walkhead = head;
  423. while (walkhead != NULL && temp > 0) {
  424. prehead = walkhead;
  425. walkhead = walkhead->next;
  426. if (walkhead != NULL)
  427. temp = strcmp(info->token_str, walkhead->token_str);
  428. }
  429. if (walkhead == NULL) {
  430. newhead = (TokenPtr)malloc(sizeof(Token));
  431. newline = (LinePtr)malloc(sizeof(Line));
  432. newcolumn = (ColumnPtr)malloc(sizeof(Column));
  433. newhead->token_str = info->token_str;
  434. newhead->type = info->token_type;
  435. newline->line = info->first_char_line;
  436. newcolumn->column = info->first_char_column;
  437. newhead->first_appear_on = newline;
  438. newhead->last_appear_on = newline;
  439. newline->first_appear_at = newcolumn;
  440. newline->last_appear_at = newcolumn;
  441. newhead->next = NULL;
  442. prehead->next = newhead;
  443. newline->next = NULL;
  444. newcolumn->next = NULL;
  445. return head;
  446. }
  447. if (temp == 0) {
  448. if (walkhead->last_appear_on->line != info->first_char_line) {
  449. newline = (LinePtr)malloc(sizeof(Line));
  450. newline->line = info->first_char_line;
  451. walkhead->last_appear_on->next = newline;
  452. walkhead->last_appear_on = newline;
  453. newline->next = NULL;
  454. newcolumn = (ColumnPtr)malloc(sizeof(Column));
  455. newcolumn->column = info->first_char_column;
  456. newline->first_appear_at = newcolumn;
  457. newline->last_appear_at = newcolumn;
  458. newcolumn->next = NULL;
  459. } else {
  460. walkline = walkhead->last_appear_on;
  461. newcolumn = (ColumnPtr)malloc(sizeof(Column));
  462. newcolumn->column = info->first_char_column;
  463. walkline->last_appear_at->next = newcolumn;
  464. walkline->last_appear_at = newcolumn;
  465. newcolumn->next = NULL;
  466. }
  467. return head;
  468. }
  469.  
  470. if (temp < 0) {
  471. // printf("Check:\n");
  472. newhead = (TokenPtr)malloc(sizeof(Token));
  473. newline = (LinePtr)malloc(sizeof(Line));
  474. newcolumn = (ColumnPtr)malloc(sizeof(Column));
  475. newhead->token_str = info->token_str;
  476. newhead->type = info->token_type;
  477. newline->line = info->first_char_line;
  478. newcolumn->column = info->first_char_column;
  479. newhead->first_appear_on = newline;
  480. newhead->last_appear_on = newline;
  481. newline->first_appear_at = newcolumn;
  482. newline->last_appear_at = newcolumn;
  483. newhead->next = walkhead;
  484. prehead->next = newhead;
  485. newline->next = NULL;
  486. newcolumn->next = NULL;
  487. return head;
  488. }
  489. }
  490.  
  491. void PrintAllTokenInfo(TokenPtr head) {
  492. if (head == NULL) {
  493. return;
  494. }
  495. printf("%s ", head->token_str);
  496. if (head->type == IDENTIFIER) {
  497. printf("IDENTIFIER ");
  498. } else if (head->type == CONSTANT) {
  499. printf("CONSTANT ");
  500. } else {
  501. printf("SPECIAL ");
  502. }
  503. LinePtr t = head->first_appear_on;
  504. ColumnPtr h = t->first_appear_at;
  505. while (1) {
  506. printf("(%d,%d)", t->line, h->column);
  507. if (h->next != NULL) {
  508. h = h->next;
  509. } else {
  510. t = t->next;
  511. if (t == NULL) {
  512. printf("\n");
  513. break;
  514. }
  515. h = t->first_appear_at;
  516. }
  517. }
  518. PrintAllTokenInfo(head->next);
  519. return;
  520. }
  521.  
Success #stdin #stdout 0.01s 5300KB
stdin
/=/
stdout
/= SPECIAL (1,1)