A dynamic tracer for Linux

ply.c 9.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. #include <assert.h>
  2. #include <errno.h>
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include "ply.h"
  7. struct providers {
  8. provider_t **prov;
  9. size_t len;
  10. } providers;
  11. provider_t *provider_get(const char *name)
  12. {
  13. size_t i;
  14. for (i = 0; i < providers.len; i++) {
  15. if (strstr(providers.prov[i]->name, name)
  16. == providers.prov[i]->name)
  17. return providers.prov[i];
  18. }
  19. return NULL;
  20. }
  21. void provider_register(provider_t *prov)
  22. {
  23. assert(prov);
  24. assert(prov->probe);
  25. assert(prov->resolve);
  26. providers.prov = realloc(providers.prov,
  27. ++providers.len * sizeof(*providers.prov));
  28. providers.prov[providers.len - 1] = prov;
  29. }
  30. typedef struct pass pass_t;
  31. struct pass {
  32. int (*run)(pass_t *, ctx_t *);
  33. walk_fn pre;
  34. walk_fn post;
  35. };
  36. symtab_t globals = { .sym = NULL, .len = 0 };
  37. symtab_t locals = { .sym = NULL, .len = 0 };
  38. ctx_t *ctx_get(void)
  39. {
  40. ctx_t *ctx;
  41. prog_t *prog;
  42. ctx = calloc(1, sizeof(*ctx));
  43. ctx->globals = calloc(1, sizeof(*ctx->globals));
  44. ctx->progs = calloc(3, sizeof(*ctx->progs));
  45. /* PROBE0 */
  46. prog = calloc(1, sizeof(*prog));
  47. prog->locals = calloc(1, sizeof(*prog->locals));
  48. prog->globals = ctx->globals;
  49. prog->probe = "k:SyS_read";
  50. /* { @t[0] = time(); @reads[pid()] = quantize(arg2) } */
  51. prog->ast =
  52. node_expr("",
  53. node_expr("=",
  54. node_expr("[]",
  55. node_ident("@t"),
  56. node_num(0),
  57. NULL),
  58. node_expr("time"),
  59. NULL),
  60. node_expr("=",
  61. node_expr("[]",
  62. node_ident("@reads"),
  63. node_expr("pid", NULL),
  64. NULL),
  65. node_expr("quantize", node_ident("arg2"), NULL),
  66. NULL),
  67. NULL);
  68. prog->provider = provider_get("k");
  69. prog->provider->probe(prog);
  70. prog->ir = ir_new();
  71. ctx->progs[0] = prog;
  72. /* PROBE1 */
  73. prog = calloc(1, sizeof(*prog));
  74. prog->locals = calloc(1, sizeof(*prog->locals));
  75. prog->globals = ctx->globals;
  76. /* TODO: k -> kret */
  77. prog->probe = "k:SyS_read2";
  78. /* { @times[pid()] = quantize(time() - t0) } */
  79. prog->ast =
  80. node_expr("=",
  81. node_expr("[]",
  82. node_ident("@times"),
  83. node_expr("pid", NULL),
  84. NULL),
  85. node_expr("quantize",
  86. node_expr("-",
  87. node_expr("time", NULL),
  88. node_ident("t0"),
  89. NULL),
  90. NULL),
  91. NULL);
  92. prog->provider = provider_get("k");
  93. prog->provider->probe(prog);
  94. prog->ir = ir_new();
  95. ctx->progs[1] = prog;
  96. return ctx;
  97. }
  98. int pass_resolve_symbols(node_t *n, void *_prog)
  99. {
  100. prog_t *prog = _prog;
  101. provider_t *global = provider_get(":");
  102. node_t *op;
  103. int err;
  104. if (n->ntype != N_IDENT)
  105. return 0;
  106. err = prog->provider->resolve(prog, n);
  107. if (!err || (err != -ENOENT))
  108. return err;
  109. err = global->resolve(prog, n);
  110. if (!err || (err != -ENOENT))
  111. return err;
  112. /* neither provider identifier nor global ditto => user
  113. * variable, add it as a global symbol of unknown type. */
  114. return sym_add(prog->globals, n->ident, NULL, &n->sym);
  115. }
  116. int infer_type_list(prog_t *prog, node_t *n)
  117. {
  118. type_t *t;
  119. /* list of lists (code block) => void */
  120. if (n->list->ntype == N_LIST) {
  121. n->type = &t_void;
  122. return 0;
  123. }
  124. t = n->list->type;
  125. if (!t)
  126. return 0;
  127. switch (t->ttype) {
  128. case T_FUNC:
  129. n->type = t->t.func.type;
  130. break;
  131. default:
  132. n->type = t;
  133. }
  134. return 0;
  135. }
  136. int infer_type_keyword(prog_t *prog, node_t *n)
  137. {
  138. node_t *dst, *src;
  139. switch (n->keyword.class) {
  140. case KW_ASSIGN:
  141. dst = node_next(n);
  142. src = node_next(dst);
  143. assert(dst && src);
  144. if (!src->type)
  145. return 0;
  146. /* TODO: assignment is statement for now. do we need
  147. * c-style assignment expressions? e.g `a = b = 2;` */
  148. n->type = &t_void;
  149. if (dst->type)
  150. return 0;
  151. dst->type = src->type;
  152. if (dst->ntype != N_IDENT)
  153. return 0;
  154. return sym_add(dst->sym->st, dst->ident, dst->type, NULL);
  155. case KW_BINOP:
  156. dst = node_next(n);
  157. src = node_next(dst);
  158. assert(dst && src);
  159. if (!(src->type && dst->type && type_equal(src->type, dst->type)))
  160. return 0;
  161. n->type = dst->type;
  162. return 0;
  163. default:
  164. n->type = &t_void;
  165. return 0;
  166. }
  167. return -ENOSYS;
  168. }
  169. int infer_type_sym(prog_t *prog, node_t *n)
  170. {
  171. node_t *parent, *key;
  172. if (n->sym->type) {
  173. /* the symbol type could have been inferred in another
  174. * probe, in that case copy the type to this node. */
  175. if (!n->type)
  176. n->type = n->sym->type;
  177. return 0;
  178. }
  179. parent = node_up(n);
  180. key = node_next(n);
  181. /* match `somemap[somekey]` where the type of the entire
  182. * expression and the type of the key is known, since that
  183. * means the type of the map itself is also known. */
  184. if (parent && parent->type
  185. && (parent->list->ntype == N_KEYWORD)
  186. && (parent->list->keyword.class == KW_SUBSCRIPT)
  187. && key && key->type) {
  188. n->type = type_map_of(key->type, parent->type);
  189. return sym_add(n->sym->st, n->ident, n->type, NULL);
  190. }
  191. return 0;
  192. }
  193. int pass_infer_types(node_t *n, void *_prog)
  194. {
  195. prog_t *prog = _prog;
  196. if (n->type)
  197. return 0;
  198. switch (n->ntype) {
  199. case N_LIST:
  200. return infer_type_list(prog, n);
  201. case N_KEYWORD:
  202. return infer_type_keyword(prog, n);
  203. case N_IDENT:
  204. return infer_type_sym(prog, n);
  205. default:
  206. break;
  207. }
  208. return 0;
  209. }
  210. int validate_func(node_t *n)
  211. {
  212. node_t *arg;
  213. field_t *f;
  214. int i;
  215. for (arg = node_next(n), f = n->type->t.func.args, i = 1;
  216. arg && f && f->type; arg = node_next(arg), f++, i++) {
  217. if (type_compatible(arg->type, f->type))
  218. continue;
  219. node_print(n, stderr);
  220. fprintf(stderr, ": incompatible type of argument %d (", i);
  221. type_dump(arg->type, stderr);
  222. fputs("), expected ", stderr);
  223. type_dump(f->type, stderr);
  224. fputs("\n", stderr);
  225. return -EINVAL;
  226. }
  227. if (!arg && (!f || !f->type))
  228. return 0;
  229. if (arg) {
  230. node_print(n, stderr);
  231. fprintf(stderr, ": too many arguments, expected %d", i);
  232. return -EINVAL;
  233. }
  234. if (f->optional)
  235. return 0;
  236. node_print(n, stderr);
  237. fputs(": too few arguments", stderr);
  238. return -EINVAL;
  239. }
  240. int pass_validate_types(node_t *n, void *_prog)
  241. {
  242. prog_t *prog = _prog;
  243. node_print(n, stdout); putchar('\n');
  244. if (!n->type) {
  245. node_print(n, stderr);
  246. fputs(": type unknown\n", stderr);
  247. return -EINVAL;
  248. }
  249. if (n->ntype != N_LIST)
  250. return 0;
  251. if (n->list->ntype != N_IDENT)
  252. return 0;
  253. assert(n->list->type->ttype == T_FUNC);
  254. return validate_func(n->list);
  255. }
  256. int validate_syms(prog_t *prog)
  257. {
  258. return 0;
  259. }
  260. int run_validate_types(pass_t *pass, ctx_t *ctx)
  261. {
  262. prog_t **prog;
  263. int err;
  264. for (prog = ctx->progs; *prog; prog++) {
  265. /* check syms first to give better error messages.
  266. * e.g. "i: type unknown", not "b-: type unknown" */
  267. err = validate_syms(*prog);
  268. if (err)
  269. return err;
  270. err = node_walk((*prog)->ast, pass->pre, pass->post, *prog);
  271. if (err)
  272. return err;
  273. }
  274. return 0;
  275. }
  276. int rewrite_const_math(node_t *n)
  277. {
  278. int64_t result;
  279. node_t *a, *b, *new;
  280. int op;
  281. /* TODO: handle L/UL/ULL correctly */
  282. op = n->list->keyword.op;
  283. a = node_next(n->list);
  284. b = node_next(a);
  285. switch (op) {
  286. case '*': result = a->num * b->num; break;
  287. case '/': result = a->num / b->num; break;
  288. case '%': result = a->num % b->num; break;
  289. case '+': result = a->num + b->num; break;
  290. case '-': result = a->num - b->num; break;
  291. case '<': result = a->num << b->num; break;
  292. case '>': result = a->num >> b->num; break;
  293. default: return 0;
  294. }
  295. new = node_num(result);
  296. new->type = n->type;
  297. return node_replace(n, new);
  298. }
  299. int pass_rewrite_ast(node_t *n, void *_prog)
  300. {
  301. prog_t *prog = _prog;
  302. provider_t *global = provider_get(":");
  303. int err;
  304. if (prog->provider->rewrite_node) {
  305. err = prog->provider->rewrite_node(prog, n);
  306. if (err)
  307. return err;
  308. }
  309. if (global->rewrite_node) {
  310. err = global->rewrite_node(prog, n);
  311. if (err)
  312. return err;
  313. }
  314. /* pre-compute binops where both sides are constants */
  315. if ((n->ntype == N_LIST)
  316. && (n->list->ntype == N_KEYWORD)
  317. && (n->list->keyword.class == KW_BINOP)
  318. && (node_next(n->list)->ntype == N_NUM)
  319. && (node_next(node_next(n->list))->ntype == N_NUM))
  320. return rewrite_const_math(n);
  321. return 0;
  322. }
  323. int generate_ir_ident(prog_t *prog, node_t *n)
  324. {
  325. switch (n->sym->type->ttype) {
  326. case T_FUNC:
  327. return n->sym->type->t.func.generate_ir(prog, n);
  328. case T_MAP:
  329. ir_emit_ldmap(prog->ir, BPF_REG_0, n->sym);
  330. return 0;
  331. default:
  332. break;
  333. }
  334. return 0;
  335. }
  336. int pass_generate_ir(node_t *n, void *_prog)
  337. {
  338. prog_t *prog = _prog;
  339. switch (n->ntype) {
  340. case N_LIST:
  341. return 0;
  342. case N_IDENT:
  343. return generate_ir_ident(prog, n);
  344. default:
  345. break;
  346. }
  347. return 0;
  348. }
  349. int run_generate_ir(pass_t *pass, ctx_t *ctx)
  350. {
  351. prog_t **progp;
  352. int err;
  353. for (progp = ctx->progs; *progp; progp++) {
  354. prog_t *prog = *progp;
  355. int return_label = ir_alloc_label(prog->ir);
  356. err = prog->provider->ir_prologue ?
  357. prog->provider->ir_prologue(prog) : 0;
  358. if (err)
  359. return err;
  360. err = node_walk(prog->ast, NULL, pass_generate_ir, prog);
  361. if (err)
  362. return err;
  363. err = prog->provider->ir_epilogue ?
  364. prog->provider->ir_epilogue(prog) : 0;
  365. if (err)
  366. return err;
  367. ir_emit_label(prog->ir, return_label);
  368. ir_emit_insn(prog->ir, EXIT, 0, 0);
  369. }
  370. return 0;
  371. }
  372. int run_walk(pass_t *pass, ctx_t *ctx)
  373. {
  374. prog_t **prog;
  375. int err;
  376. for (prog = ctx->progs; *prog; prog++) {
  377. err = node_walk((*prog)->ast, pass->pre, pass->post, *prog);
  378. if (err)
  379. return err;
  380. }
  381. return 0;
  382. }
  383. pass_t passes[] = {
  384. { .run = run_walk, .post = pass_resolve_symbols },
  385. { .run = run_walk, .post = pass_infer_types },
  386. { .run = run_walk, .post = pass_infer_types },
  387. { .run = run_walk, .post = pass_infer_types },
  388. { .run = run_validate_types, .post = pass_validate_types },
  389. { .run = run_walk, .post = pass_rewrite_ast },
  390. { .run = run_generate_ir },
  391. { NULL }
  392. };
  393. int main(void)
  394. {
  395. ctx_t *ctx = ctx_get();
  396. prog_t **prog;
  397. pass_t *pass;
  398. int err;
  399. for (pass = passes; pass->run; pass++) {
  400. err = pass->run(pass, ctx);
  401. if (err)
  402. break;
  403. }
  404. for (prog = ctx->progs; *prog; prog++) {
  405. printf("\n\e[34m%s\e[0m\n", (*prog)->probe);
  406. node_dump((*prog)->ast, stdout);
  407. printf("\n-- locals\n");
  408. symtab_dump((*prog)->locals, stdout);
  409. printf("-- ir\n");
  410. ir_dump((*prog)->ir, stdout);
  411. }
  412. printf("\n\n-- globals\n");
  413. symtab_dump(ctx->globals, stdout);
  414. if (err)
  415. printf("ERR: %d\n", err);
  416. return err;
  417. }