/* token.c */ #include #include #include #include #include #include "token.h" #include "hash_table.h" #include "util.h" /* Token Data Structure */ #define TOK_MAGIC_1 0x544F4B454E544F4Bul // "TOKENTOK" #define TOK_MAGIC_2 0x544F4B544F4B454Eul // "TOKTOKEN" struct token { long magic; int line; int column; short kind; long opt_data[0]; }; typedef struct token token_t; struct token_data { union { int64_t i; double f; const char *s; char c; } data; }; typedef struct token_data token_data_t; int column = 1; int line = 1; /* Token Data Access */ #define token_data(token) ((struct token_data *)((token)->opt_data)) c_token_types token_type(token_t *token) { assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2); return token->kind; } int64_t token_int(token_t *token) { assert(token->kind == TOK_INTEGER_U32 || token->kind == TOK_INTEGER_U64 || token->kind == TOK_INTEGER_S32 || token->kind == TOK_INTEGER_S64); assert(token->magic == TOK_MAGIC_1); return token_data(token)->data.i; } double token_float(token_t *token) { assert(token->kind == TOK_FLOAT_32 || token->kind == TOK_FLOAT_64); assert(token->magic == TOK_MAGIC_1); return token_data(token)->data.f; } const char *token_string(token_t *token) { assert(token->kind == TOK_STRING_ASCII || token->kind == TOK_ID || token->kind == TOK_TYPEDEF_NAME); assert(token->magic == TOK_MAGIC_1); return token_data(token)->data.s; } char token_char(token_t *token) { assert(token->kind == TOK_CHAR_CONST); assert(token->magic == TOK_MAGIC_1); return token_data(token)->data.c; } int token_line(token_t *token) { assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2); return token->line; } int token_column(token_t *token) { assert(token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2); return token->column; } /* Token Creation and Destruction */ token_t *token_data_create(c_token_types kind, int lin, int col, int len) { token_t *token = malloc(sizeof(token_t) + sizeof(struct token_data)); if (!token) { fputs("Out of memory\n", stderr); exit(1); } token->magic = TOK_MAGIC_1; token->line = lin; token->column = col; column += len; token->kind = kind; return token; } token_t *token_create(c_token_types kind, int lin, int col, int len) { token_t *token = malloc(sizeof(token_t)); if (!token) { fputs("Out of memory\n", stderr); exit(1); } token->magic = TOK_MAGIC_2; token->line = lin; token->column = col; column += len; token->kind = kind; return token; } token_t *token_create_int(c_token_types kind, int lin, int col, int64_t i, int len) { token_t *token = token_data_create(kind, lin, col, len); token_data(token)->data.i = i; return token; } token_t *token_create_float(c_token_types kind, int lin, int col, double f, int len) { token_t *token = token_data_create(kind, lin, col, len); token_data(token)->data.f = f; return token; } token_t *token_create_char(c_token_types kind, int lin, int col, char c, int len) { token_t *token = token_data_create(kind, lin, col, len); token_data(token)->data.c = c; return token; } void token_destroy(token_t *token) { if (token->magic == TOK_MAGIC_1 || token->magic == TOK_MAGIC_2) { free(token); } else { fputs("Corrupt token\n", stderr); exit(1); } } /* Token Create String */ hash_table_t *string_table; token_t *token_create_string(c_token_types kind, int lin, int col, const char *s, int len) { if (string_table == NULL) { string_table = hash_table_create(2048, cmp_string, hash_string, dtor_string); } token_t *token = token_data_create(kind, lin, col, len); char *key = hash_table_get(string_table, (void *)s); if (key == NULL) { key = strdup(s); hash_table_put(string_table, key, key); } token_data(token)->data.s = key; return token; } /* Token Debugging */ /* Token Type Enum to String */ const char *token_name_from_type(c_token_types type) { switch (type) { case TOK_IF: return "TOK_IF"; case TOK_ELSE: return "TOK_ELSE"; case TOK_SWITCH: return "TOK_SWITCH"; case TOK_CASE: return "TOK_CASE"; case TOK_DEFAULT: return "TOK_DEFAULT"; case TOK_WHILE: return "TOK_WHILE"; case TOK_DO: return "TOK_DO"; case TOK_FOR: return "TOK_FOR"; case TOK_CONTINUE: return "TOK_CONTINUE"; case TOK_BREAK: return "TOK_BREAK"; case TOK_RETURN: return "TOK_RETURN"; case TOK_GOTO: return "TOK_GOTO"; case TOK_VOID: return "TOK_VOID"; case TOK_CHAR: return "TOK_CHAR"; case TOK_SHORT: return "TOK_SHORT"; case TOK_INT: return "TOK_INT"; case TOK_LONG: return "TOK_LONG"; case TOK_FLOAT: return "TOK_FLOAT"; case TOK_DOUBLE: return "TOK_DOUBLE"; case TOK_SIGNED: return "TOK_SIGNED"; case TOK_UNSIGNED: return "TOK_UNSIGNED"; case TOK_STRUCT: return "TOK_STRUCT"; case TOK_UNION: return "TOK_UNION"; case TOK_ENUM: return "TOK_ENUM"; case TOK_TYPEDEF: return "TOK_TYPEDEF"; case TOK_AUTO: return "TOK_AUTO"; case TOK_REGISTER: return "TOK_REGISTER"; case TOK_STATIC: return "TOK_STATIC"; case TOK_EXTERN: return "TOK_EXTERN"; case TOK_CONST: return "TOK_CONST"; case TOK_VOLATILE: return "TOK_VOLATILE"; case TOK_SIZEOF: return "TOK_SIZEOF"; case TOK_ADD: return "TOK_ADD"; case TOK_SUB: return "TOK_SUB"; case TOK_MUL: return "TOK_MUL"; case TOK_DIV: return "TOK_DIV"; case TOK_MOD: return "TOK_MOD"; case TOK_BIT_AND: return "TOK_BIT_AND"; case TOK_BIT_OR: return "TOK_BIT_OR"; case TOK_BIT_XOR: return "TOK_BIT_XOR"; case TOK_BIT_NOT: return "TOK_BIT_NOT"; case TOK_LSHIFT: return "TOK_LSHIFT"; case TOK_RSHIFT: return "TOK_RSHIFT"; case TOK_NOT: return "TOK_NOT"; case TOK_ASSIGN: return "TOK_ASSIGN"; case TOK_LT: return "TOK_LT"; case TOK_GT: return "TOK_GT"; case TOK_INC: return "TOK_INC"; case TOK_DEC: return "TOK_DEC"; case TOK_EQ: return "TOK_EQ"; case TOK_NE: return "TOK_NE"; case TOK_LE: return "TOK_LE"; case TOK_GE: return "TOK_GE"; case TOK_AND: return "TOK_AND"; case TOK_OR: return "TOK_OR"; case TOK_MEMBER_POINTER: return "TOK_MEMBER_POINTER"; case TOK_MEMBER: return "TOK_MEMBER"; case TOK_COND_DECISION: return "TOK_COND_DECISION"; case TOK_COND: return "TOK_COND"; case TOK_ASSIGN_ADD: return "TOK_ASSIGN_ADD"; case TOK_ASSIGN_SUB: return "TOK_ASSIGN_SUB"; case TOK_ASSIGN_MUL: return "TOK_ASSIGN_MUL"; case TOK_ASSIGN_DIV: return "TOK_ASSIGN_DIV"; case TOK_ASSIGN_MOD: return "TOK_ASSIGN_MOD"; case TOK_ASSIGN_BITAND: return "TOK_ASSIGN_BITAND"; case TOK_ASSIGN_BITOR: return "TOK_ASSIGN_BITOR"; case TOK_ASSIGN_BITXOR: return "TOK_ASSIGN_BITXOR"; case TOK_ASSIGN_LSHIFT: return "TOK_ASSIGN_LSHIFT"; case TOK_ASSIGN_RSHIFT: return "TOK_ASSIGN_RSHIFT"; case TOK_HASH: return "TOK_HASH"; case TOK_ID: return "TOK_ID"; case TOK_TYPEDEF_NAME: return "TOK_TYPEDEF_NAME"; case TOK_INTEGER_U32: return "TOK_INTEGER_U32"; case TOK_INTEGER_U64: return "TOK_INTEGER_U64"; case TOK_INTEGER_S32: return "TOK_INTEGER_S32"; case TOK_INTEGER_S64: return "TOK_INTEGER_S64"; case TOK_FLOAT_32: return "TOK_FLOAT_32"; case TOK_FLOAT_64: return "TOK_FLOAT_64"; case TOK_CHAR_CONST: return "TOK_CHAR_CONST"; case TOK_STRING_ASCII: return "TOK_STRING_ASCII"; case TOK_EOF: return "TOK_EOF"; case TOK_ERROR: return "TOK_ERROR"; case TOK_LEFT_PAREN: return "TOK_LEFT_PAREN"; case TOK_RIGHT_PAREN: return "TOK_RIGHT_PAREN"; case TOK_LEFT_BRACKET: return "TOK_LEFT_BRACKET"; case TOK_RIGHT_BRACKET: return "TOK_RIGHT_BRACKET"; case TOK_LEFT_BRACE: return "TOK_LEFT_BRACE"; case TOK_RIGHT_BRACE: return "TOK_RIGHT_BRACE"; case TOK_COMMA: return "TOK_COMMA"; case TOK_SEMICOLON: return "TOK_SEMICOLON"; case TOK_DOT: return "TOK_DOT"; case TOK_ELLIPSIS: return "TOK_ELLIPSIS"; } return "UNKNOWN"; } /* Unescape String */ #define clamp(x, min, max) ((x) < (min) ? (min) : (x) > (max) ? (max) : (x)) char *re_escape_string(const char *str) { int len = strlen(str); char *buf = malloc(len * 2 + 1); if (!buf) { fprintf(stderr, "Out of memory. Cannot escape string\n"); exit(1); } int i = 0; for (int j = 0; j < len; j++) { switch (str[j]) { case '\a': buf[i++] = '\\'; buf[i++] = 'a'; break; case '\b': buf[i++] = '\\'; buf[i++] = 'b'; break; case '\f': buf[i++] = '\\'; buf[i++] = 'f'; break; case '\n': buf[i++] = '\\'; buf[i++] = 'n'; break; case '\r': buf[i++] = '\\'; buf[i++] = 'r'; break; case '\t': buf[i++] = '\\'; buf[i++] = 't'; break; case '\v': buf[i++] = '\\'; buf[i++] = 'v'; break; case '\\': buf[i++] = '\\'; buf[i++] = '\\'; break; case '\'': buf[i++] = '\\'; buf[i++] = '\''; break; case '"': buf[i++] = '\\'; buf[i++] = '"'; break; default: { if (isprint(str[j])) { buf[i++] = str[j]; } else { buf[i++] = '\\'; buf[i++] = 'x'; buf[i++] = "0123456789abcdef"[clamp(str[j] >> 4, 0, 0xf)]; buf[i++] = "0123456789abcdef"[clamp(str[j] & 0xf, 0, 0xf)]; } } } } buf[i] = '\0'; return buf; } /* Print Token */ void print_token(token_t *tok) { if (!tok) { printf("NULL\n"); return; } const char *name = token_name_from_type(tok->kind); switch (tok->kind) { case TOK_ID: case TOK_STRING_ASCII: { char *escaped = re_escape_string(token_string(tok)); printf("%s: \"%s\"@%d:%d\n", name, escaped, tok->line, tok->column); free(escaped); break; } case TOK_TYPEDEF_NAME: { char *escaped = re_escape_string(token_string(tok)); printf("%s: %s@%d:%d\n", name, escaped, tok->line, tok->column); free(escaped); break; } case TOK_CHAR_CONST: printf("%s: '%c'@%d:%d\n", name, token_char(tok), tok->line, tok->column); break; case TOK_INTEGER_S32: case TOK_INTEGER_U32: case TOK_INTEGER_S64: case TOK_INTEGER_U64: printf("%s: %ld@%d:%d\n", name, token_int(tok), tok->line, tok->column); break; case TOK_FLOAT_32: case TOK_FLOAT_64: printf("%s: %f@%d:%d\n", name, token_float(tok), tok->line, tok->column); break; default: printf("%s@%d:%d\n", name, tok->line, tok->column); break; } }