A dynamic tracer for Linux

ply.c 9.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. #include <assert.h>
  2. #include <errno.h>
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include "ply.h"
  7. struct providers {
  8. provider_t **prov;
  9. size_t len;
  10. } providers;
  11. provider_t *provider_get(const char *name)
  12. {
  13. size_t i;
  14. for (i = 0; i < providers.len; i++) {
  15. if (strstr(providers.prov[i]->name, name)
  16. == providers.prov[i]->name)
  17. return providers.prov[i];
  18. }
  19. return NULL;
  20. }
  21. void provider_register(provider_t *prov)
  22. {
  23. assert(prov);
  24. assert(prov->probe);
  25. assert(prov->resolve);
  26. providers.prov = realloc(providers.prov,
  27. ++providers.len * sizeof(*providers.prov));
  28. providers.prov[providers.len - 1] = prov;
  29. }
  30. typedef struct pass pass_t;
  31. struct pass {
  32. int (*run)(pass_t *, ctx_t *);
  33. walk_fn pre;
  34. walk_fn post;
  35. };
  36. symtab_t globals = { .sym = NULL, .len = 0 };
  37. symtab_t locals = { .sym = NULL, .len = 0 };
  38. ctx_t *ctx_get(void)
  39. {
  40. ctx_t *ctx;
  41. prog_t *prog;
  42. ctx = calloc(1, sizeof(*ctx));
  43. ctx->globals = calloc(1, sizeof(*ctx->globals));
  44. ctx->progs = calloc(3, sizeof(*ctx->progs));
  45. /* PROBE0 */
  46. prog = calloc(1, sizeof(*prog));
  47. prog->locals = calloc(1, sizeof(*prog->locals));
  48. prog->globals = ctx->globals;
  49. prog->probe = "k:SyS_read";
  50. /* { t0 = time(); @reads[pid()] = quantize(arg2) } */
  51. prog->ast =
  52. node_vlist(
  53. node_vlist(node_keyword('=', 0),
  54. node_ident("t0"),
  55. node_list(node_ident("time")),
  56. NULL),
  57. node_vlist(node_keyword('=', 0),
  58. node_vlist(node_keyword('[', 0),
  59. node_ident("@reads"),
  60. node_list(node_ident("pid")),
  61. NULL),
  62. node_vlist(node_ident("quantize"),
  63. node_ident("arg2"),
  64. NULL),
  65. NULL),
  66. NULL);
  67. prog->provider = provider_get("k");
  68. prog->provider->probe(prog);
  69. prog->ir = ir_new();
  70. ctx->progs[0] = prog;
  71. /* PROBE1 */
  72. prog = calloc(1, sizeof(*prog));
  73. prog->locals = calloc(1, sizeof(*prog->locals));
  74. prog->globals = ctx->globals;
  75. /* TODO: k -> kret */
  76. prog->probe = "k:SyS_read2";
  77. /* { @times[pid()] = quantize(time() - t0) } */
  78. prog->ast =
  79. node_list(
  80. node_vlist(node_keyword('=', 0),
  81. node_vlist(node_keyword('[', 0),
  82. node_ident("@times"),
  83. node_list(node_ident("pid")),
  84. NULL),
  85. node_vlist(node_ident("quantize"),
  86. node_vlist(node_keyword('b', '-'),
  87. node_list(node_ident("time")),
  88. node_ident("t0"),
  89. NULL),
  90. NULL),
  91. NULL)
  92. );
  93. prog->provider = provider_get("k");
  94. prog->provider->probe(prog);
  95. prog->ir = ir_new();
  96. ctx->progs[1] = prog;
  97. return ctx;
  98. }
  99. int pass_resolve_symbols(node_t *n, void *_prog)
  100. {
  101. prog_t *prog = _prog;
  102. provider_t *global = provider_get(":");
  103. node_t *op;
  104. int err;
  105. if (n->ntype != N_IDENT)
  106. return 0;
  107. err = prog->provider->resolve(prog, n);
  108. if (!err || (err != -ENOENT))
  109. return err;
  110. err = global->resolve(prog, n);
  111. if (!err || (err != -ENOENT))
  112. return err;
  113. /* neither provider identifier nor global ditto => user
  114. * variable, add it as a global symbol of unknown type. */
  115. return sym_add(prog->globals, n->ident, NULL, &n->sym);
  116. }
  117. int infer_type_list(prog_t *prog, node_t *n)
  118. {
  119. type_t *t;
  120. /* list of lists (code block) => void */
  121. if (n->list->ntype == N_LIST) {
  122. n->type = &t_void;
  123. return 0;
  124. }
  125. t = n->list->type;
  126. if (!t)
  127. return 0;
  128. switch (t->ttype) {
  129. case T_FUNC:
  130. n->type = t->t.func.type;
  131. break;
  132. default:
  133. n->type = t;
  134. }
  135. return 0;
  136. }
  137. int infer_type_keyword(prog_t *prog, node_t *n)
  138. {
  139. node_t *dst, *src;
  140. switch (n->keyword.class) {
  141. case KW_ASSIGN:
  142. dst = node_next(n);
  143. src = node_next(dst);
  144. assert(dst && src);
  145. if (!src->type)
  146. return 0;
  147. /* TODO: assignment is statement for now. do we need
  148. * c-style assignment expressions? e.g `a = b = 2;` */
  149. n->type = &t_void;
  150. if (dst->type)
  151. return 0;
  152. dst->type = src->type;
  153. if (dst->ntype != N_IDENT)
  154. return 0;
  155. return sym_add(dst->sym->st, dst->ident, dst->type, NULL);
  156. case KW_BINOP:
  157. dst = node_next(n);
  158. src = node_next(dst);
  159. assert(dst && src);
  160. if (!(src->type && dst->type && type_equal(src->type, dst->type)))
  161. return 0;
  162. n->type = dst->type;
  163. return 0;
  164. default:
  165. n->type = &t_void;
  166. return 0;
  167. }
  168. return -ENOSYS;
  169. }
  170. int infer_type_sym(prog_t *prog, node_t *n)
  171. {
  172. node_t *parent, *key;
  173. if (n->sym->type) {
  174. /* the symbol type could have been inferred in another
  175. * probe, in that case copy the type to this node. */
  176. if (!n->type)
  177. n->type = n->sym->type;
  178. return 0;
  179. }
  180. parent = node_up(n);
  181. key = node_next(n);
  182. /* match `somemap[somekey]` where the type of the entire
  183. * expression and the type of the key is known, since that
  184. * means the type of the map itself is also known. */
  185. if (parent && parent->type
  186. && (parent->list->ntype == N_KEYWORD)
  187. && (parent->list->keyword.class == KW_SUBSCRIPT)
  188. && key && key->type) {
  189. n->type = type_map_of(key->type, parent->type);
  190. return sym_add(n->sym->st, n->ident, n->type, NULL);
  191. }
  192. return 0;
  193. }
  194. int pass_infer_types(node_t *n, void *_prog)
  195. {
  196. prog_t *prog = _prog;
  197. if (n->type)
  198. return 0;
  199. switch (n->ntype) {
  200. case N_LIST:
  201. return infer_type_list(prog, n);
  202. case N_KEYWORD:
  203. return infer_type_keyword(prog, n);
  204. case N_IDENT:
  205. return infer_type_sym(prog, n);
  206. default:
  207. break;
  208. }
  209. return 0;
  210. }
  211. int validate_func(node_t *n)
  212. {
  213. node_t *arg;
  214. field_t *f;
  215. int i;
  216. for (arg = node_next(n), f = n->type->t.func.args, i = 1;
  217. arg && f && f->type; arg = node_next(arg), f++, i++) {
  218. if (type_compatible(arg->type, f->type))
  219. continue;
  220. node_print(n, stderr);
  221. fprintf(stderr, ": incompatible type of argument %d (", i);
  222. type_dump(arg->type, stderr);
  223. fputs("), expected ", stderr);
  224. type_dump(f->type, stderr);
  225. fputs("\n", stderr);
  226. return -EINVAL;
  227. }
  228. if (!arg && (!f || !f->type))
  229. return 0;
  230. if (arg) {
  231. node_print(n, stderr);
  232. fprintf(stderr, ": too many arguments, expected %d", i);
  233. return -EINVAL;
  234. }
  235. if (f->optional)
  236. return 0;
  237. node_print(n, stderr);
  238. fputs(": too few arguments", stderr);
  239. return -EINVAL;
  240. }
  241. int pass_validate_types(node_t *n, void *_prog)
  242. {
  243. prog_t *prog = _prog;
  244. if (!n->type) {
  245. node_print(n, stderr);
  246. fputs(": is of unknown type\n", stderr);
  247. return -EINVAL;
  248. }
  249. if (n->ntype != N_LIST)
  250. return 0;
  251. if (n->list->ntype != N_IDENT)
  252. return 0;
  253. assert(n->list->type->ttype == T_FUNC);
  254. return validate_func(n->list);
  255. }
  256. int rewrite_const_math(node_t *n)
  257. {
  258. int64_t result;
  259. node_t *a, *b, *new;
  260. int op;
  261. op = n->list->keyword.op;
  262. a = node_next(n->list);
  263. b = node_next(a);
  264. switch (op) {
  265. case '*': result = a->num * b->num; break;
  266. case '/': result = a->num / b->num; break;
  267. case '%': result = a->num % b->num; break;
  268. case '+': result = a->num + b->num; break;
  269. case '-': result = a->num - b->num; break;
  270. case '<': result = a->num << b->num; break;
  271. case '>': result = a->num >> b->num; break;
  272. default: return 0;
  273. }
  274. new = node_num(result);
  275. new->type = n->type;
  276. return node_replace(n, new);
  277. }
  278. int pass_rewrite_ast(node_t *n, void *_prog)
  279. {
  280. prog_t *prog = _prog;
  281. provider_t *global = provider_get(":");
  282. int err;
  283. if (prog->provider->rewrite_node) {
  284. err = prog->provider->rewrite_node(prog, n);
  285. if (err)
  286. return err;
  287. }
  288. if (global->rewrite_node) {
  289. err = global->rewrite_node(prog, n);
  290. if (err)
  291. return err;
  292. }
  293. /* pre-compute binops where both sides are constants */
  294. if ((n->ntype == N_LIST)
  295. && (n->list->ntype == N_KEYWORD)
  296. && (n->list->keyword.class == KW_BINOP)
  297. && (node_next(n->list)->ntype == N_NUM)
  298. && (node_next(node_next(n->list))->ntype == N_NUM))
  299. return rewrite_const_math(n);
  300. return 0;
  301. }
  302. int pass_generate_ir(node_t *n, void *_prog)
  303. {
  304. prog_t *prog = _prog;
  305. return 0;
  306. }
  307. int run_generate_ir(pass_t *pass, ctx_t *ctx)
  308. {
  309. prog_t **progp;
  310. int err;
  311. for (progp = ctx->progs; *progp; progp++) {
  312. prog_t *prog = *progp;
  313. int return_label = ir_alloc_label(prog->ir);
  314. err = prog->provider->ir_prologue ?
  315. prog->provider->ir_prologue(prog) : 0;
  316. if (err)
  317. return err;
  318. err = node_walk(prog->ast, pass->pre, pass->post, prog);
  319. if (err)
  320. return err;
  321. err = prog->provider->ir_epilogue ?
  322. prog->provider->ir_epilogue(prog) : 0;
  323. if (err)
  324. return err;
  325. ir_emit_label(prog->ir, return_label);
  326. ir_emit_insn(prog->ir, EXIT, 0, 0);
  327. }
  328. return 0;
  329. }
  330. int run_walk(pass_t *pass, ctx_t *ctx)
  331. {
  332. prog_t **prog;
  333. int err;
  334. for (prog = ctx->progs; *prog; prog++) {
  335. err = node_walk((*prog)->ast, pass->pre, pass->post, *prog);
  336. if (err)
  337. return err;
  338. }
  339. return 0;
  340. }
  341. pass_t passes[] = {
  342. { .run = run_walk, .post = pass_resolve_symbols },
  343. { .run = run_walk, .post = pass_infer_types },
  344. { .run = run_walk, .post = pass_infer_types },
  345. { .run = run_walk, .post = pass_infer_types },
  346. { .run = run_walk, .post = pass_validate_types },
  347. { .run = run_walk, .post = pass_rewrite_ast },
  348. { .run = run_generate_ir },
  349. { NULL }
  350. };
  351. int main(void)
  352. {
  353. ctx_t *ctx = ctx_get();
  354. prog_t **prog;
  355. pass_t *pass;
  356. int err;
  357. for (pass = passes; pass->run; pass++) {
  358. err = pass->run(pass, ctx);
  359. if (err)
  360. break;
  361. }
  362. for (prog = ctx->progs; *prog; prog++) {
  363. printf("\n\e[34m%s\e[0m\n", (*prog)->probe);
  364. node_dump((*prog)->ast, stdout);
  365. printf("\n-- locals\n");
  366. symtab_dump((*prog)->locals, stdout);
  367. printf("-- ir\n");
  368. ir_dump((*prog)->ir, stdout);
  369. }
  370. printf("\n\n-- globals\n");
  371. symtab_dump(ctx->globals, stdout);
  372. if (err)
  373. printf("ERR: %d\n", err);
  374. return err;
  375. }