/* Simple expression to x86_64 compiler. * * I made it to test out an idea that I had for reaching * the design goals of the C compiler that I am building * which are: very low memory usage, no reliance on * complex memory allocators, no reliance on the C * standard library and no operating system-specific * requirements. * * The first expression parser and generator I made for * my compiler was based on an Abstract Syntax Tree (AST), * where it would parse the source code into a tree * structure and then recursively descent the tree and * generate code for each branch. This works fine, but * trees are often quite wasteful on memory (even in * my case where the tree is fully static) and in the * cast of ASTs they waste stack during recursion * because there ends up being a lot of "empty" nodes * that only check a single parameter before descending. * * What follows is an implementation of "interpreter-based * compilation" (someone has probably already done this * and called it something else, but I haven't seen * it). The idea is instead of building a tree, the * compiler builds a buffer of bytecode. That bytecode * is then run inside a stack-based virtual machine in * a similar way to how the Lua interpreter works. The * difference here is that instead of producing a result, * the VM outputs assembly code. The advantages of it * is that the compiler can be made very lightweight and * fast because a lot of assumptions can be made about * the state of the VM stack at any given time which * makes operator precedence simpler. * * This implementation supports operator-precedence * and two operators (+ and *) and 64 bit signed * integers. It outputs x86_64 assembly in AT&T * syntax. It's very primitive and does almost zero * error checking; the goal was to verify that the * concept works. */ #include #include #define max_ops 32 #define max_stack 32 static const char* regs[] = { "rax", "r9" }; typedef enum { op_pushc, op_add, op_mul, op_ret } Op; typedef enum { tok_number, tok_plus, tok_star, tok_error, tok_end } Tok_Type; typedef struct { Tok_Type type; int len; const char* start; } Tok; typedef enum { stack_type_constexpr, stack_type_expr } Stack_Type; typedef struct { Stack_Type type; unsigned char reg; unsigned char idx; } Stack_Value; typedef struct { const char* src; const char* cur; const char* start; Tok tok; Stack_Value* stack_top; int op_count, num_count, r; unsigned char ops[max_ops]; int numbers[(unsigned char)-1]; Stack_Value stack[max_stack]; } Compiler; int get_prec(Tok_Type type) { switch (type) { case tok_number: return 0; case tok_plus: return 1; case tok_star: return 2; default: return -1; } } int is_digit(char c) { return c >= '0' && c <= '9'; } char peek(Compiler* c) { return *c->cur; } char advance(Compiler* c) { c->cur++; return c->cur[-1]; } void skip_whitespace(Compiler* c) { while (1) { switch (peek(c)) { case ' ': case '\t': case '\n': advance(c); break; default: return; } } } Tok new_tok(Compiler* c, Tok_Type type) { Tok t; t.type = type; t.start = c->start - 1; t.len = (c->cur - c->start) + 1; return t; } Tok new_err_tok(Compiler* c, const char* msg) { Tok t; t.type = tok_error; t.start = msg; t.len = -1; return t; } Tok next_tok(Compiler* c) { char ch; skip_whitespace(c); ch = advance(c); c->start = c->cur; if (is_digit(ch)) { while (is_digit(peek(c))) { advance(c); } return new_tok(c, tok_number); } switch (ch) { case '+': return new_tok(c, tok_plus); case '*': return new_tok(c, tok_star); case 0: return new_tok(c, tok_end); default: return new_err_tok(c, "Unexpected character."); } } void emit_byte(Compiler* c, unsigned char b) { c->ops[c->op_count++] = b; } void emit_op(Compiler* c, Op op) { emit_byte(c, (unsigned char)op); } unsigned char emit_number(Compiler* c, int n) { c->numbers[c->num_count] = n; return (unsigned char)c->num_count++; } void push(Compiler* c, Stack_Value v) { *c->stack_top = v; c->stack_top++; } Stack_Value pop(Compiler* c) { c->stack_top--; return *c->stack_top; } void gen_val(Compiler* c, Stack_Value v, unsigned char r) { if (v.type == stack_type_constexpr) { printf( "mov $%d, %%%s\n", c->numbers[v.idx], regs[r] ); } else if (v.reg != r) { /*printf( "mov %%rax, %%%s\n", regs[r] );*/ } } void generate(Compiler* c) { Op op; int i; Stack_Value v, a, b; i = 0; printf(".globl eval\n"); printf(".text\n"); printf("eval:\n"); while (1) { op = c->ops[i]; switch (op) { case op_pushc: v.type = stack_type_constexpr; v.reg = c->ops[i + 1]; v.idx = c->ops[i + 2]; push(c, v); i += 2; break; case op_add: b = pop(c); a = pop(c); gen_val(c, a, 1); gen_val(c, b, 0); printf( "add %%%s, %%%s\n", regs[1], regs[0] ); v.type = stack_type_expr; v.reg = 0; v.idx = -1; push(c, v); break; case op_mul: b = pop(c); a = pop(c); gen_val(c, a, 1); gen_val(c, b, 0); printf( "imul %%%s, %%%s\n", regs[1], regs[0] ); v.type = stack_type_expr; v.reg = 0; v.idx = -1; push(c, v); break; break; case op_ret: printf("ret\n"); default: return; } i++; } } const Tok* cadvance(Compiler* c) { c->tok = next_tok(c); return &c->tok; } void cnumber(Compiler* c) { int n; n = (int)strtol(c->tok.start, 0, 10); emit_op(c, op_pushc); emit_op(c, c->r); emit_op(c, emit_number(c, n)); c->r = !c->r; } void bin_op(Compiler* c, Tok_Type t) { int p, np; const Tok* rhs, * nxt; Tok_Type nt; p = get_prec(t); rhs = cadvance(c); cnumber(c); /* rhs */ nxt = cadvance(c); nt = nxt->type; np = get_prec(nt); if (nt == tok_star || nt == tok_plus) { if (p <= np) { bin_op(c, nt); emit_op(c, t); } else { emit_op(c, t); bin_op(c, nt); } } else { emit_op(c, t); } } void compile(const char* src) { const Tok* t; Compiler c; c.src = src; c.cur = src; c.op_count = 0; c.num_count = 0; c.r = 0; c.stack_top = c.stack; do { t = cadvance(&c); if (t->type == tok_error) { fprintf(stderr, "Error: %s\n", t->start); return; } switch (t->type) { case tok_number: cnumber(&c); break; case tok_plus: case tok_star: bin_op(&c, t->type); break; default: break; } } while (t->type != tok_end); emit_op(&c, op_ret); generate(&c); } int main() { const char* expr = "6 + 2 + 5 * 10"; compile(expr); return 0; }