# include <stdio.h>
# include <stdbool.h>
# include <stdlib.h>
# include <string.h>
# define NOT !
# define AND &&
# define OR ||
# define EQ ==
enum TokenType { IDENTIFIER = 34313, CONSTANT = 87424, SPECIAL = 29535 };
typedef char * CharPtr;
typedef struct tokenInfo {
CharPtr token_str;
enum TokenType token_type;
int first_char_line;
int first_char_column;
}TokenInfo;
typedef TokenInfo * TokenInfoPtr;
typedef struct Column {
int column;
struct Column * next;
}Column;
typedef Column * ColumnPtr;
typedef struct Line {
int line;
ColumnPtr first_appear_at;
ColumnPtr last_appear_at;
struct Line * next ;
}Line;
typedef Line * LinePtr ;
typedef struct Token {
CharPtr token_str ;
enum TokenType type ;
LinePtr first_appear_on ;
LinePtr last_appear_on ;
struct Token * next ;
}Token;
typedef Token * TokenPtr ;
void GetNextChar(char *ch, int *line, int *column);
bool IsDigit(char ch);
bool IsLetter(char ch);
bool IsWhiteSpace(char ch);
bool IsSpecial(char ch);
void SkipWhiteSpaces(char *ch, int *line, int *column);
void SkipLineComment(char *ch, int *line, int *column);
void SkipMultiLineComment(char *ch, int *line, int *column);
CharPtr GetID(char *ch, int *line, int *column);
CharPtr GetNum(char *ch, int *line, int *column);
CharPtr GetCharConst(char *ch, int *line, int *column);
CharPtr GetStringConst(char *ch, int *line, int *column);
CharPtr GetSpecial(char *ch, int *line, int *column);
TokenInfoPtr GetToken(char *ch, int *current_line, int *current_column);
void PrintAllTokenInfo(TokenPtr head);
TokenPtr OrderInsertToken(TokenPtr head, TokenInfoPtr info);
int main() {
char ch = '\0';
int current_line = -1; // line-no of current char
int current_column = -1; // column-no of current char
TokenInfoPtr info_ptr = NULL;
TokenPtr list_head = NULL;
GetNextChar(&ch, ¤t_line, ¤t_column); // get the very first char
do{
info_ptr = GetToken(&ch, ¤t_line, ¤t_column);
if ( info_ptr != NULL )
list_head = OrderInsertToken(list_head, info_ptr);
} while ( info_ptr != NULL );
PrintAllTokenInfo(list_head);
return 0 ;
}
int g_next_char_line = 1;
int g_next_char_column = 1;
void GetNextChar(char *ch, int *line, int *column) {
if (*line == -1 && *column == -1) {
*line = 1;
*column = 0;
}
if (scanf("%c", ch
) == EOF
) { *ch = EOF;
return;
}
if (*ch == '\n') {
*column = 0;
*line = *line + 1;
} else if (*ch != EOF) {
*column = *column + 1;
}
}
bool IsDigit(char ch) {
if (ch >= '0' && ch <= '9')
return true;
else
return false;
}
bool IsLetter(char ch) {
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
return true;
else
return false;
}
bool IsWhiteSpace(char ch) {
if (ch == '\n' || ch == '\t' || ch == ' ')
return true;
else
return false;
}
bool IsSpecial(char ch) {
char specialch[] = "^,()[]{}!:;#?+-*/><=%&|";
for (int i
= 0; i
< strlen(specialch
); i
++) { if (ch == specialch[i])
return true;
}
return false;
}
void SkipWhiteSpaces(char *ch, int *line, int *column) {
GetNextChar(ch, line, column);
while (IsWhiteSpace(*ch)) {
GetNextChar(ch, line, column);
}
}
void SkipLineComment(char *ch, int *line, int *column) {
GetNextChar(ch, line, column);
while (*ch != '\n' && *ch != EOF) {
GetNextChar(ch, line, column);
}
if (*ch == '\n') {
GetNextChar(ch, line, column);
}
}
void SkipMultiLineComment(char *ch, int *line, int *column) {
while (*ch != '*' && *ch != EOF) {
GetNextChar(ch, line, column);
}
if (*ch == '*') {
GetNextChar(ch, line, column);
while (*ch != '/' && *ch != EOF) {
if (*ch == '*') {
GetNextChar(ch, line, column);
if (*ch == '/') {
GetNextChar(ch, line, column);
return;
}
}
GetNextChar(ch, line, column);
}
}
}
CharPtr GetID(char *ch, int *line, int *column) {
CharPtr newptr
= (CharPtr
)malloc(sizeof(char) * 100); int i = 0;
newptr[i] = *ch;
i++;
GetNextChar(ch, line, column);
while (i < 100 && !IsWhiteSpace(*ch) &&
(IsLetter(*ch) || IsDigit(*ch) || *ch == '_') && *ch != EOF) {
newptr[i] = *ch;
i++;
GetNextChar(ch, line, column);
}
newptr[i] = '\0';
return newptr;
}
CharPtr GetNum(char *ch, int *line, int *column) {
CharPtr newptr
= (CharPtr
)malloc(sizeof(char) * 100); int dot = 0;
int i = 0;
newptr[i] = *ch;
i++;
GetNextChar(ch, line, column);
if (*ch == '.')
dot++;
else if (dot <= 1 && !IsDigit(*ch))
dot = 10;
while (!IsWhiteSpace(*ch) && (dot <= 1 || IsDigit(*ch)) && *ch != EOF) {
newptr[i] = *ch;
i++;
GetNextChar(ch, line, column);
if (*ch == '.')
dot++;
else if (dot <= 1 && !IsDigit(*ch))
dot = 10;
}
newptr[i] = '\0';
return newptr;
}
CharPtr GetCharConst(char *ch, int *line, int *column) {
CharPtr newptr
= (CharPtr
)malloc(sizeof(char) * 10); bool escape = false;
int i = 0;
newptr[i] = *ch;
i++;
newptr[i] = '\0';
GetNextChar(ch, line, column);
if (*ch == '\\')
escape = true;
while (escape || *ch != '\'') {
if (*ch != '\'') {
newptr[i] = *ch;
i++;
newptr[i] = '\0';
GetNextChar(ch, line, column);
}
newptr[i] = *ch;
i++;
newptr[i] = '\0';
GetNextChar(ch, line, column);
if (escape)
escape = false;
}
newptr[i] = *ch;
i++;
newptr[i] = '\0';
// printf("Check:%s:%c:%d:\n",newptr, *ch, *ch);
if (strcmp(newptr
, "'\\'") == 0) { GetNextChar(ch, line, column);
newptr[i] = *ch;
i++;
newptr[i] = '\0';
}
// newptr[i] = '\0';
GetNextChar(ch, line, column);
return newptr;
}
CharPtr GetStringConst(char *ch, int *line, int *column) {
CharPtr newptr
= (CharPtr
)malloc(sizeof(char) * 100); bool escape = false;
int i = 0;
newptr[i] = *ch;
i++;
GetNextChar(ch, line, column);
if (*ch == '\\')
escape = true;
newptr[i] = *ch;
i++;
newptr[i] = '\0';
GetNextChar(ch, line, column);
if ( strcmp(newptr
, "\"\"") == 0) return newptr;
while (escape || (*ch != '\"' && *ch != EOF)) {
if (escape)
escape = false;
else if (!escape && *ch == '\\')
escape = true;
newptr[i] = *ch;
i++;
GetNextChar(ch, line, column);
}
newptr[i] = *ch;
i++;
newptr[i] = '\0';
GetNextChar(ch, line, column);
return newptr;
}
CharPtr GetSpecial(char *ch, int *line, int *column) {
bool check = false;
CharPtr newptr
= (CharPtr
)malloc(sizeof(char) * 10); char ch1 = *ch;
int i = 0;
newptr[i] = *ch;
i++;
GetNextChar(ch, line, column);
if (!IsSpecial(*ch)) {
newptr[i] = '\0';
return newptr;
}
if (*ch == '=')
if (ch1 == '+' || ch1 == '-' || ch1 == '*' ||
ch1 == '/' || ch1 == '>' || ch1 == '<' || ch1 == '=' ||
ch1 == '!' || ch1 == '%')
check = true;
if (ch1 == *ch)
if (*ch == '&' || *ch == '|' || *ch == '+' || *ch == '-' ||
*ch == '>' || *ch == '<')
check = true;
if (*ch == '>' && ch1 == '-')
check = true;
if (check) {
newptr[i] = *ch;
i++;
GetNextChar(ch, line, column);
}
newptr[i] = '\0';
return newptr;
}
TokenInfoPtr GetToken(char *ch, int *current_line, int *current_column) {
TokenInfoPtr token = NULL;
char ch1;
char *ch2;
if (*ch == EOF || *ch == 26)
return token;
token
= (TokenInfoPtr
)malloc(sizeof(TokenInfo
)); bool slash_is_divide = false;
int temp_line = *current_line;
int temp_column = *current_column;
while (IsWhiteSpace(*ch) || (*ch == '/' && !slash_is_divide)) {
if ( IsWhiteSpace(*ch) ) {
SkipWhiteSpaces(ch, current_line, current_column);
} else if ( *ch == '/' ) {
ch1 = *ch;
GetNextChar(ch, current_line, current_column);
if ( *ch == '/' )
SkipLineComment(ch, current_line, current_column);
else if (*ch == '*')
SkipMultiLineComment(ch, current_line, current_column);
else
slash_is_divide = true;
}
if (*ch == EOF)
return NULL;
}
if (slash_is_divide) {
if (*ch == '=') {
token->first_char_line = temp_line;
token->first_char_column = temp_column;
CharPtr newptr
= (CharPtr
)malloc(sizeof(char) * 10); token->token_str = newptr;
token->token_type = SPECIAL;
GetNextChar(ch, current_line, current_column);
} else {
CharPtr newptr
= (CharPtr
)malloc(sizeof(char) * 10); token->first_char_line = temp_line;
token->first_char_column = temp_column;
token->token_str = newptr;
token->token_type = SPECIAL;
}
} else if (IsLetter(*ch) || *ch == '_') {
token->first_char_line = *current_line;
token->first_char_column = *current_column;
token->token_str = GetID(ch, current_line, current_column);
token->token_type = IDENTIFIER;
} else if ( IsDigit(*ch) || *ch == '.' ) {
token->first_char_line = *current_line;
token->first_char_column = *current_column;
token->token_str = GetNum(ch, current_line, current_column);
token->token_type = CONSTANT;
} else if ( *ch == '\'' ) {
token->first_char_line = *current_line;
token->first_char_column = *current_column;
token->token_str = GetCharConst(ch, current_line, current_column);
token->token_type = CONSTANT;
} else if ( *ch == '\"' ) {
token->first_char_line = *current_line;
token->first_char_column = *current_column;
token->token_str = GetStringConst(ch, current_line, current_column);
token->token_type = CONSTANT;
} else if ( IsSpecial(*ch) ) {
token->first_char_line = *current_line;
token->first_char_column = *current_column;
token->token_str = GetSpecial(ch, current_line, current_column);
token->token_type = SPECIAL;
}
return token;
}
TokenPtr OrderInsertToken(TokenPtr head, TokenInfoPtr info) {
TokenPtr walkhead = head;
LinePtr walkline = NULL;
ColumnPtr walkcolumn = NULL;
TokenPtr newhead = NULL;
LinePtr newline = NULL;
ColumnPtr newcolumn = NULL;
int temp;
// printf("Check:%s:%d:%d:\n",info->token_str,info->first_char_line,info->first_char_column);
if (head == NULL) {
temp = -1;
} else {
temp
= strcmp(info
->token_str
, head
->token_str
); }
if (temp < 0) {
newhead
= (TokenPtr
)malloc(sizeof(Token
)); newline
= (LinePtr
)malloc(sizeof(Line
)); newcolumn
= (ColumnPtr
)malloc(sizeof(Column
)); newhead->token_str = info->token_str;
newhead->type = info->token_type;
newhead->first_appear_on = newline;
newhead->last_appear_on = newline;
newhead->next = head;
head = newhead;
newline->line = info->first_char_line;
newline->next = NULL;
newline->first_appear_at = newcolumn;
newline->last_appear_at = newcolumn;
newcolumn->column = info->first_char_column;
newcolumn->next = NULL;
return head;
}
TokenPtr prehead = NULL;
walkhead = head;
while (walkhead != NULL && temp > 0) {
prehead = walkhead;
walkhead = walkhead->next;
if (walkhead != NULL)
temp
= strcmp(info
->token_str
, walkhead
->token_str
); }
if (walkhead == NULL) {
newhead
= (TokenPtr
)malloc(sizeof(Token
)); newline
= (LinePtr
)malloc(sizeof(Line
)); newcolumn
= (ColumnPtr
)malloc(sizeof(Column
)); newhead->token_str = info->token_str;
newhead->type = info->token_type;
newline->line = info->first_char_line;
newcolumn->column = info->first_char_column;
newhead->first_appear_on = newline;
newhead->last_appear_on = newline;
newline->first_appear_at = newcolumn;
newline->last_appear_at = newcolumn;
newhead->next = NULL;
prehead->next = newhead;
newline->next = NULL;
newcolumn->next = NULL;
return head;
}
if (temp == 0) {
if (walkhead->last_appear_on->line != info->first_char_line) {
newline
= (LinePtr
)malloc(sizeof(Line
)); newline->line = info->first_char_line;
walkhead->last_appear_on->next = newline;
walkhead->last_appear_on = newline;
newline->next = NULL;
newcolumn
= (ColumnPtr
)malloc(sizeof(Column
)); newcolumn->column = info->first_char_column;
newline->first_appear_at = newcolumn;
newline->last_appear_at = newcolumn;
newcolumn->next = NULL;
} else {
walkline = walkhead->last_appear_on;
newcolumn
= (ColumnPtr
)malloc(sizeof(Column
)); newcolumn->column = info->first_char_column;
walkline->last_appear_at->next = newcolumn;
walkline->last_appear_at = newcolumn;
newcolumn->next = NULL;
}
return head;
}
if (temp < 0) {
// printf("Check:\n");
newhead
= (TokenPtr
)malloc(sizeof(Token
)); newline
= (LinePtr
)malloc(sizeof(Line
)); newcolumn
= (ColumnPtr
)malloc(sizeof(Column
)); newhead->token_str = info->token_str;
newhead->type = info->token_type;
newline->line = info->first_char_line;
newcolumn->column = info->first_char_column;
newhead->first_appear_on = newline;
newhead->last_appear_on = newline;
newline->first_appear_at = newcolumn;
newline->last_appear_at = newcolumn;
newhead->next = walkhead;
prehead->next = newhead;
newline->next = NULL;
newcolumn->next = NULL;
return head;
}
}
void PrintAllTokenInfo(TokenPtr head) {
if (head == NULL) {
return;
}
printf("%s ", head
->token_str
); if (head->type == IDENTIFIER) {
} else if (head->type == CONSTANT) {
} else {
}
LinePtr t = head->first_appear_on;
ColumnPtr h = t->first_appear_at;
while (1) {
printf("(%d,%d)", t
->line
, h
->column
); if (h->next != NULL) {
h = h->next;
} else {
t = t->next;
if (t == NULL) {
break;
}
h = t->first_appear_at;
}
}
PrintAllTokenInfo(head->next);
return;
}