A dynamic tracer for Linux

ply.c 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. #include <assert.h>
  2. #include <errno.h>
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include "ply.h"
  7. struct providers {
  8. provider_t **prov;
  9. size_t len;
  10. } providers;
  11. provider_t *provider_get(const char *name)
  12. {
  13. size_t i;
  14. for (i = 0; i < providers.len; i++) {
  15. if (strstr(providers.prov[i]->name, name)
  16. == providers.prov[i]->name)
  17. return providers.prov[i];
  18. }
  19. return NULL;
  20. }
  21. void provider_register(provider_t *prov)
  22. {
  23. assert(prov);
  24. assert(prov->probe);
  25. assert(prov->resolve);
  26. providers.prov = realloc(providers.prov,
  27. ++providers.len * sizeof(*providers.prov));
  28. providers.prov[providers.len - 1] = prov;
  29. }
  30. typedef struct pass pass_t;
  31. struct pass {
  32. int (*run)(pass_t *, ctx_t *);
  33. walk_fn pre;
  34. walk_fn post;
  35. };
  36. symtab_t globals = { .sym = NULL, .len = 0 };
  37. symtab_t locals = { .sym = NULL, .len = 0 };
  38. ctx_t *ctx_get(void)
  39. {
  40. ctx_t *ctx;
  41. prog_t *prog;
  42. ctx = calloc(1, sizeof(*ctx));
  43. ctx->globals = calloc(1, sizeof(*ctx->globals));
  44. ctx->progs = calloc(3, sizeof(*ctx->progs));
  45. /* PROBE0 */
  46. prog = calloc(1, sizeof(*prog));
  47. prog->locals = calloc(1, sizeof(*prog->locals));
  48. prog->globals = ctx->globals;
  49. prog->probe = "k:SyS_read";
  50. /* { @t[0] = time(); @reads[pid()] = quantize(arg2) } */
  51. prog->ast =
  52. node_vlist(
  53. node_vlist(node_keyword('=', 0),
  54. node_vlist(node_keyword('[', 0),
  55. node_ident("@t"),
  56. node_num(0),
  57. NULL),
  58. node_list(node_ident("time")),
  59. NULL),
  60. node_vlist(node_keyword('=', 0),
  61. node_vlist(node_keyword('[', 0),
  62. node_ident("@reads"),
  63. node_list(node_ident("pid")),
  64. NULL),
  65. node_vlist(node_ident("quantize"),
  66. node_ident("arg2"),
  67. NULL),
  68. NULL),
  69. NULL);
  70. prog->provider = provider_get("k");
  71. prog->provider->probe(prog);
  72. prog->ir = ir_new();
  73. ctx->progs[0] = prog;
  74. /* PROBE1 */
  75. prog = calloc(1, sizeof(*prog));
  76. prog->locals = calloc(1, sizeof(*prog->locals));
  77. prog->globals = ctx->globals;
  78. /* TODO: k -> kret */
  79. prog->probe = "k:SyS_read2";
  80. /* { @times[pid()] = quantize(time() - t0) } */
  81. prog->ast =
  82. node_list(
  83. node_vlist(node_keyword('=', 0),
  84. node_vlist(node_keyword('[', 0),
  85. node_ident("@times"),
  86. node_list(node_ident("pid")),
  87. NULL),
  88. node_vlist(node_ident("quantize"),
  89. node_vlist(node_keyword('b', '-'),
  90. node_list(node_ident("time")),
  91. node_ident("t0"),
  92. NULL),
  93. NULL),
  94. NULL)
  95. );
  96. prog->provider = provider_get("k");
  97. prog->provider->probe(prog);
  98. prog->ir = ir_new();
  99. ctx->progs[1] = prog;
  100. return ctx;
  101. }
  102. int pass_resolve_symbols(node_t *n, void *_prog)
  103. {
  104. prog_t *prog = _prog;
  105. provider_t *global = provider_get(":");
  106. node_t *op;
  107. int err;
  108. if (n->ntype != N_IDENT)
  109. return 0;
  110. err = prog->provider->resolve(prog, n);
  111. if (!err || (err != -ENOENT))
  112. return err;
  113. err = global->resolve(prog, n);
  114. if (!err || (err != -ENOENT))
  115. return err;
  116. /* neither provider identifier nor global ditto => user
  117. * variable, add it as a global symbol of unknown type. */
  118. return sym_add(prog->globals, n->ident, NULL, &n->sym);
  119. }
  120. int infer_type_list(prog_t *prog, node_t *n)
  121. {
  122. type_t *t;
  123. /* list of lists (code block) => void */
  124. if (n->list->ntype == N_LIST) {
  125. n->type = &t_void;
  126. return 0;
  127. }
  128. t = n->list->type;
  129. if (!t)
  130. return 0;
  131. switch (t->ttype) {
  132. case T_FUNC:
  133. n->type = t->t.func.type;
  134. break;
  135. default:
  136. n->type = t;
  137. }
  138. return 0;
  139. }
  140. int infer_type_keyword(prog_t *prog, node_t *n)
  141. {
  142. node_t *dst, *src;
  143. switch (n->keyword.class) {
  144. case KW_ASSIGN:
  145. dst = node_next(n);
  146. src = node_next(dst);
  147. assert(dst && src);
  148. if (!src->type)
  149. return 0;
  150. /* TODO: assignment is statement for now. do we need
  151. * c-style assignment expressions? e.g `a = b = 2;` */
  152. n->type = &t_void;
  153. if (dst->type)
  154. return 0;
  155. dst->type = src->type;
  156. if (dst->ntype != N_IDENT)
  157. return 0;
  158. return sym_add(dst->sym->st, dst->ident, dst->type, NULL);
  159. case KW_BINOP:
  160. dst = node_next(n);
  161. src = node_next(dst);
  162. assert(dst && src);
  163. if (!(src->type && dst->type && type_equal(src->type, dst->type)))
  164. return 0;
  165. n->type = dst->type;
  166. return 0;
  167. default:
  168. n->type = &t_void;
  169. return 0;
  170. }
  171. return -ENOSYS;
  172. }
  173. int infer_type_sym(prog_t *prog, node_t *n)
  174. {
  175. node_t *parent, *key;
  176. if (n->sym->type) {
  177. /* the symbol type could have been inferred in another
  178. * probe, in that case copy the type to this node. */
  179. if (!n->type)
  180. n->type = n->sym->type;
  181. return 0;
  182. }
  183. parent = node_up(n);
  184. key = node_next(n);
  185. /* match `somemap[somekey]` where the type of the entire
  186. * expression and the type of the key is known, since that
  187. * means the type of the map itself is also known. */
  188. if (parent && parent->type
  189. && (parent->list->ntype == N_KEYWORD)
  190. && (parent->list->keyword.class == KW_SUBSCRIPT)
  191. && key && key->type) {
  192. n->type = type_map_of(key->type, parent->type);
  193. return sym_add(n->sym->st, n->ident, n->type, NULL);
  194. }
  195. return 0;
  196. }
  197. int pass_infer_types(node_t *n, void *_prog)
  198. {
  199. prog_t *prog = _prog;
  200. if (n->type)
  201. return 0;
  202. switch (n->ntype) {
  203. case N_LIST:
  204. return infer_type_list(prog, n);
  205. case N_KEYWORD:
  206. return infer_type_keyword(prog, n);
  207. case N_IDENT:
  208. return infer_type_sym(prog, n);
  209. default:
  210. break;
  211. }
  212. return 0;
  213. }
  214. int validate_func(node_t *n)
  215. {
  216. node_t *arg;
  217. field_t *f;
  218. int i;
  219. for (arg = node_next(n), f = n->type->t.func.args, i = 1;
  220. arg && f && f->type; arg = node_next(arg), f++, i++) {
  221. if (type_compatible(arg->type, f->type))
  222. continue;
  223. node_print(n, stderr);
  224. fprintf(stderr, ": incompatible type of argument %d (", i);
  225. type_dump(arg->type, stderr);
  226. fputs("), expected ", stderr);
  227. type_dump(f->type, stderr);
  228. fputs("\n", stderr);
  229. return -EINVAL;
  230. }
  231. if (!arg && (!f || !f->type))
  232. return 0;
  233. if (arg) {
  234. node_print(n, stderr);
  235. fprintf(stderr, ": too many arguments, expected %d", i);
  236. return -EINVAL;
  237. }
  238. if (f->optional)
  239. return 0;
  240. node_print(n, stderr);
  241. fputs(": too few arguments", stderr);
  242. return -EINVAL;
  243. }
  244. int pass_validate_types(node_t *n, void *_prog)
  245. {
  246. prog_t *prog = _prog;
  247. node_print(n, stdout); putchar('\n');
  248. if (!n->type) {
  249. node_print(n, stderr);
  250. fputs(": type unknown\n", stderr);
  251. return -EINVAL;
  252. }
  253. if (n->ntype != N_LIST)
  254. return 0;
  255. if (n->list->ntype != N_IDENT)
  256. return 0;
  257. assert(n->list->type->ttype == T_FUNC);
  258. return validate_func(n->list);
  259. }
  260. int validate_syms(prog_t *prog)
  261. {
  262. return 0;
  263. }
  264. int run_validate_types(pass_t *pass, ctx_t *ctx)
  265. {
  266. prog_t **prog;
  267. int err;
  268. for (prog = ctx->progs; *prog; prog++) {
  269. /* check syms first to give better error messages.
  270. * e.g. "i: type unknown", not "b-: type unknown" */
  271. err = validate_syms(*prog);
  272. if (err)
  273. return err;
  274. err = node_walk((*prog)->ast, pass->pre, pass->post, *prog);
  275. if (err)
  276. return err;
  277. }
  278. return 0;
  279. }
  280. int rewrite_const_math(node_t *n)
  281. {
  282. int64_t result;
  283. node_t *a, *b, *new;
  284. int op;
  285. /* TODO: handle L/UL/ULL correctly */
  286. op = n->list->keyword.op;
  287. a = node_next(n->list);
  288. b = node_next(a);
  289. switch (op) {
  290. case '*': result = a->num * b->num; break;
  291. case '/': result = a->num / b->num; break;
  292. case '%': result = a->num % b->num; break;
  293. case '+': result = a->num + b->num; break;
  294. case '-': result = a->num - b->num; break;
  295. case '<': result = a->num << b->num; break;
  296. case '>': result = a->num >> b->num; break;
  297. default: return 0;
  298. }
  299. new = node_num(result);
  300. new->type = n->type;
  301. return node_replace(n, new);
  302. }
  303. int pass_rewrite_ast(node_t *n, void *_prog)
  304. {
  305. prog_t *prog = _prog;
  306. provider_t *global = provider_get(":");
  307. int err;
  308. if (prog->provider->rewrite_node) {
  309. err = prog->provider->rewrite_node(prog, n);
  310. if (err)
  311. return err;
  312. }
  313. if (global->rewrite_node) {
  314. err = global->rewrite_node(prog, n);
  315. if (err)
  316. return err;
  317. }
  318. /* pre-compute binops where both sides are constants */
  319. if ((n->ntype == N_LIST)
  320. && (n->list->ntype == N_KEYWORD)
  321. && (n->list->keyword.class == KW_BINOP)
  322. && (node_next(n->list)->ntype == N_NUM)
  323. && (node_next(node_next(n->list))->ntype == N_NUM))
  324. return rewrite_const_math(n);
  325. return 0;
  326. }
  327. int generate_ir_ident(prog_t *prog, node_t *n)
  328. {
  329. switch (n->sym->type->ttype) {
  330. case T_FUNC:
  331. return n->sym->type->t.func.generate_ir(prog, n);
  332. case T_MAP:
  333. ir_emit_ldmap(prog->ir, BPF_REG_0, n->sym);
  334. return 0;
  335. default:
  336. break;
  337. }
  338. return 0;
  339. }
  340. int pass_generate_ir(node_t *n, void *_prog)
  341. {
  342. prog_t *prog = _prog;
  343. switch (n->ntype) {
  344. case N_LIST:
  345. return 0;
  346. case N_IDENT:
  347. return generate_ir_ident(prog, n);
  348. default:
  349. break;
  350. }
  351. return 0;
  352. }
  353. int run_generate_ir(pass_t *pass, ctx_t *ctx)
  354. {
  355. prog_t **progp;
  356. int err;
  357. for (progp = ctx->progs; *progp; progp++) {
  358. prog_t *prog = *progp;
  359. int return_label = ir_alloc_label(prog->ir);
  360. err = prog->provider->ir_prologue ?
  361. prog->provider->ir_prologue(prog) : 0;
  362. if (err)
  363. return err;
  364. err = node_walk(prog->ast, NULL, pass_generate_ir, prog);
  365. if (err)
  366. return err;
  367. err = prog->provider->ir_epilogue ?
  368. prog->provider->ir_epilogue(prog) : 0;
  369. if (err)
  370. return err;
  371. ir_emit_label(prog->ir, return_label);
  372. ir_emit_insn(prog->ir, EXIT, 0, 0);
  373. }
  374. return 0;
  375. }
  376. int run_walk(pass_t *pass, ctx_t *ctx)
  377. {
  378. prog_t **prog;
  379. int err;
  380. for (prog = ctx->progs; *prog; prog++) {
  381. err = node_walk((*prog)->ast, pass->pre, pass->post, *prog);
  382. if (err)
  383. return err;
  384. }
  385. return 0;
  386. }
  387. pass_t passes[] = {
  388. { .run = run_walk, .post = pass_resolve_symbols },
  389. { .run = run_walk, .post = pass_infer_types },
  390. { .run = run_walk, .post = pass_infer_types },
  391. { .run = run_walk, .post = pass_infer_types },
  392. { .run = run_validate_types, .post = pass_validate_types },
  393. { .run = run_walk, .post = pass_rewrite_ast },
  394. { .run = run_generate_ir },
  395. { NULL }
  396. };
  397. int main(void)
  398. {
  399. ctx_t *ctx = ctx_get();
  400. prog_t **prog;
  401. pass_t *pass;
  402. int err;
  403. for (pass = passes; pass->run; pass++) {
  404. err = pass->run(pass, ctx);
  405. if (err)
  406. break;
  407. }
  408. for (prog = ctx->progs; *prog; prog++) {
  409. printf("\n\e[34m%s\e[0m\n", (*prog)->probe);
  410. node_dump((*prog)->ast, stdout);
  411. printf("\n-- locals\n");
  412. symtab_dump((*prog)->locals, stdout);
  413. printf("-- ir\n");
  414. ir_dump((*prog)->ir, stdout);
  415. }
  416. printf("\n\n-- globals\n");
  417. symtab_dump(ctx->globals, stdout);
  418. if (err)
  419. printf("ERR: %d\n", err);
  420. return err;
  421. }