#include #include #include #include #include #include "ply.h" struct providers { provider_t **prov; size_t len; } providers; provider_t *provider_get(const char *name) { size_t i; for (i = 0; i < providers.len; i++) { if (strstr(providers.prov[i]->name, name) == providers.prov[i]->name) return providers.prov[i]; } return NULL; } void provider_register(provider_t *prov) { assert(prov); assert(prov->probe); assert(prov->resolve); providers.prov = realloc(providers.prov, ++providers.len * sizeof(*providers.prov)); providers.prov[providers.len - 1] = prov; } typedef struct pass pass_t; struct pass { int (*run)(pass_t *, ctx_t *); walk_fn pre; walk_fn post; }; symtab_t globals = { .sym = NULL, .len = 0 }; symtab_t locals = { .sym = NULL, .len = 0 }; ctx_t *ctx_get(void) { ctx_t *ctx; prog_t *prog; ctx = calloc(1, sizeof(*ctx)); ctx->globals = calloc(1, sizeof(*ctx->globals)); ctx->progs = calloc(3, sizeof(*ctx->progs)); /* PROBE0 */ prog = calloc(1, sizeof(*prog)); prog->locals = calloc(1, sizeof(*prog->locals)); prog->globals = ctx->globals; prog->probe = "k:SyS_read"; /* { @t[0] = time(); @reads[pid()] = quantize(arg2) } */ prog->ast = node_vlist( node_vlist(node_keyword('=', 0), node_vlist(node_keyword('[', 0), node_ident("@t"), node_num(0), NULL), node_list(node_ident("time")), NULL), node_vlist(node_keyword('=', 0), node_vlist(node_keyword('[', 0), node_ident("@reads"), node_list(node_ident("pid")), NULL), node_vlist(node_ident("quantize"), node_ident("arg2"), NULL), NULL), NULL); prog->provider = provider_get("k"); prog->provider->probe(prog); prog->ir = ir_new(); ctx->progs[0] = prog; /* PROBE1 */ prog = calloc(1, sizeof(*prog)); prog->locals = calloc(1, sizeof(*prog->locals)); prog->globals = ctx->globals; /* TODO: k -> kret */ prog->probe = "k:SyS_read2"; /* { @times[pid()] = quantize(time() - t0) } */ prog->ast = node_list( node_vlist(node_keyword('=', 0), node_vlist(node_keyword('[', 0), node_ident("@times"), node_list(node_ident("pid")), NULL), node_vlist(node_ident("quantize"), node_vlist(node_keyword('b', '-'), node_list(node_ident("time")), node_ident("t0"), NULL), NULL), NULL) ); prog->provider = provider_get("k"); prog->provider->probe(prog); prog->ir = ir_new(); ctx->progs[1] = prog; return ctx; } int pass_resolve_symbols(node_t *n, void *_prog) { prog_t *prog = _prog; provider_t *global = provider_get(":"); node_t *op; int err; if (n->ntype != N_IDENT) return 0; err = prog->provider->resolve(prog, n); if (!err || (err != -ENOENT)) return err; err = global->resolve(prog, n); if (!err || (err != -ENOENT)) return err; /* neither provider identifier nor global ditto => user * variable, add it as a global symbol of unknown type. */ return sym_add(prog->globals, n->ident, NULL, &n->sym); } int infer_type_list(prog_t *prog, node_t *n) { type_t *t; /* list of lists (code block) => void */ if (n->list->ntype == N_LIST) { n->type = &t_void; return 0; } t = n->list->type; if (!t) return 0; switch (t->ttype) { case T_FUNC: n->type = t->t.func.type; break; default: n->type = t; } return 0; } int infer_type_keyword(prog_t *prog, node_t *n) { node_t *dst, *src; switch (n->keyword.class) { case KW_ASSIGN: dst = node_next(n); src = node_next(dst); assert(dst && src); if (!src->type) return 0; /* TODO: assignment is statement for now. do we need * c-style assignment expressions? e.g `a = b = 2;` */ n->type = &t_void; if (dst->type) return 0; dst->type = src->type; if (dst->ntype != N_IDENT) return 0; return sym_add(dst->sym->st, dst->ident, dst->type, NULL); case KW_BINOP: dst = node_next(n); src = node_next(dst); assert(dst && src); if (!(src->type && dst->type && type_equal(src->type, dst->type))) return 0; n->type = dst->type; return 0; default: n->type = &t_void; return 0; } return -ENOSYS; } int infer_type_sym(prog_t *prog, node_t *n) { node_t *parent, *key; if (n->sym->type) { /* the symbol type could have been inferred in another * probe, in that case copy the type to this node. */ if (!n->type) n->type = n->sym->type; return 0; } parent = node_up(n); key = node_next(n); /* match `somemap[somekey]` where the type of the entire * expression and the type of the key is known, since that * means the type of the map itself is also known. */ if (parent && parent->type && (parent->list->ntype == N_KEYWORD) && (parent->list->keyword.class == KW_SUBSCRIPT) && key && key->type) { n->type = type_map_of(key->type, parent->type); return sym_add(n->sym->st, n->ident, n->type, NULL); } return 0; } int pass_infer_types(node_t *n, void *_prog) { prog_t *prog = _prog; if (n->type) return 0; switch (n->ntype) { case N_LIST: return infer_type_list(prog, n); case N_KEYWORD: return infer_type_keyword(prog, n); case N_IDENT: return infer_type_sym(prog, n); default: break; } return 0; } int validate_func(node_t *n) { node_t *arg; field_t *f; int i; for (arg = node_next(n), f = n->type->t.func.args, i = 1; arg && f && f->type; arg = node_next(arg), f++, i++) { if (type_compatible(arg->type, f->type)) continue; node_print(n, stderr); fprintf(stderr, ": incompatible type of argument %d (", i); type_dump(arg->type, stderr); fputs("), expected ", stderr); type_dump(f->type, stderr); fputs("\n", stderr); return -EINVAL; } if (!arg && (!f || !f->type)) return 0; if (arg) { node_print(n, stderr); fprintf(stderr, ": too many arguments, expected %d", i); return -EINVAL; } if (f->optional) return 0; node_print(n, stderr); fputs(": too few arguments", stderr); return -EINVAL; } int pass_validate_types(node_t *n, void *_prog) { prog_t *prog = _prog; node_print(n, stdout); putchar('\n'); if (!n->type) { node_print(n, stderr); fputs(": type unknown\n", stderr); return -EINVAL; } if (n->ntype != N_LIST) return 0; if (n->list->ntype != N_IDENT) return 0; assert(n->list->type->ttype == T_FUNC); return validate_func(n->list); } int validate_syms(prog_t *prog) { return 0; } int run_validate_types(pass_t *pass, ctx_t *ctx) { prog_t **prog; int err; for (prog = ctx->progs; *prog; prog++) { /* check syms first to give better error messages. * e.g. "i: type unknown", not "b-: type unknown" */ err = validate_syms(*prog); if (err) return err; err = node_walk((*prog)->ast, pass->pre, pass->post, *prog); if (err) return err; } return 0; } int rewrite_const_math(node_t *n) { int64_t result; node_t *a, *b, *new; int op; /* TODO: handle L/UL/ULL correctly */ op = n->list->keyword.op; a = node_next(n->list); b = node_next(a); switch (op) { case '*': result = a->num * b->num; break; case '/': result = a->num / b->num; break; case '%': result = a->num % b->num; break; case '+': result = a->num + b->num; break; case '-': result = a->num - b->num; break; case '<': result = a->num << b->num; break; case '>': result = a->num >> b->num; break; default: return 0; } new = node_num(result); new->type = n->type; return node_replace(n, new); } int pass_rewrite_ast(node_t *n, void *_prog) { prog_t *prog = _prog; provider_t *global = provider_get(":"); int err; if (prog->provider->rewrite_node) { err = prog->provider->rewrite_node(prog, n); if (err) return err; } if (global->rewrite_node) { err = global->rewrite_node(prog, n); if (err) return err; } /* pre-compute binops where both sides are constants */ if ((n->ntype == N_LIST) && (n->list->ntype == N_KEYWORD) && (n->list->keyword.class == KW_BINOP) && (node_next(n->list)->ntype == N_NUM) && (node_next(node_next(n->list))->ntype == N_NUM)) return rewrite_const_math(n); return 0; } int generate_ir_ident(prog_t *prog, node_t *n) { switch (n->sym->type->ttype) { case T_FUNC: return n->sym->type->t.func.generate_ir(prog, n); case T_MAP: ir_emit_ldmap(prog->ir, BPF_REG_0, n->sym); return 0; default: break; } return 0; } int pass_generate_ir(node_t *n, void *_prog) { prog_t *prog = _prog; switch (n->ntype) { case N_LIST: return 0; case N_IDENT: return generate_ir_ident(prog, n); default: break; } return 0; } int run_generate_ir(pass_t *pass, ctx_t *ctx) { prog_t **progp; int err; for (progp = ctx->progs; *progp; progp++) { prog_t *prog = *progp; int return_label = ir_alloc_label(prog->ir); err = prog->provider->ir_prologue ? prog->provider->ir_prologue(prog) : 0; if (err) return err; err = node_walk(prog->ast, NULL, pass_generate_ir, prog); if (err) return err; err = prog->provider->ir_epilogue ? prog->provider->ir_epilogue(prog) : 0; if (err) return err; ir_emit_label(prog->ir, return_label); ir_emit_insn(prog->ir, EXIT, 0, 0); } return 0; } int run_walk(pass_t *pass, ctx_t *ctx) { prog_t **prog; int err; for (prog = ctx->progs; *prog; prog++) { err = node_walk((*prog)->ast, pass->pre, pass->post, *prog); if (err) return err; } return 0; } pass_t passes[] = { { .run = run_walk, .post = pass_resolve_symbols }, { .run = run_walk, .post = pass_infer_types }, { .run = run_walk, .post = pass_infer_types }, { .run = run_walk, .post = pass_infer_types }, { .run = run_validate_types, .post = pass_validate_types }, { .run = run_walk, .post = pass_rewrite_ast }, { .run = run_generate_ir }, { NULL } }; int main(void) { ctx_t *ctx = ctx_get(); prog_t **prog; pass_t *pass; int err; for (pass = passes; pass->run; pass++) { err = pass->run(pass, ctx); if (err) break; } for (prog = ctx->progs; *prog; prog++) { printf("\n\e[34m%s\e[0m\n", (*prog)->probe); node_dump((*prog)->ast, stdout); printf("\n-- locals\n"); symtab_dump((*prog)->locals, stdout); printf("-- ir\n"); ir_dump((*prog)->ir, stdout); } printf("\n\n-- globals\n"); symtab_dump(ctx->globals, stdout); if (err) printf("ERR: %d\n", err); return err; }