A dynamic tracer for Linux

ply.c 8.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. #include <assert.h>
  2. #include <errno.h>
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include "ply.h"
  7. struct providers {
  8. provider_t **prov;
  9. size_t len;
  10. } providers;
  11. provider_t *provider_get(const char *name)
  12. {
  13. size_t i;
  14. for (i = 0; i < providers.len; i++) {
  15. if (strstr(providers.prov[i]->name, name)
  16. == providers.prov[i]->name)
  17. return providers.prov[i];
  18. }
  19. return NULL;
  20. }
  21. void provider_register(provider_t *prov)
  22. {
  23. assert(prov);
  24. assert(prov->probe);
  25. assert(prov->resolve);
  26. providers.prov = realloc(providers.prov,
  27. ++providers.len * sizeof(*providers.prov));
  28. providers.prov[providers.len - 1] = prov;
  29. }
  30. typedef struct pass pass_t;
  31. struct pass {
  32. int (*run)(pass_t *, ctx_t *);
  33. walk_fn pre;
  34. walk_fn post;
  35. };
  36. symtab_t globals = { .sym = NULL, .len = 0 };
  37. symtab_t locals = { .sym = NULL, .len = 0 };
  38. ctx_t *ctx_get(void)
  39. {
  40. ctx_t *ctx;
  41. prog_t *prog;
  42. ctx = calloc(1, sizeof(*ctx));
  43. ctx->globals = calloc(1, sizeof(*ctx->globals));
  44. ctx->progs = calloc(3, sizeof(*ctx->progs));
  45. /* PROBE0 */
  46. prog = calloc(1, sizeof(*prog));
  47. prog->locals = calloc(1, sizeof(*prog->locals));
  48. prog->globals = ctx->globals;
  49. prog->probe = "k:SyS_read";
  50. /* { t0 = time(); @reads[pid()] = quantize(arg2) } */
  51. prog->ast =
  52. node_vlist(
  53. node_vlist(node_keyword('=', 0),
  54. node_ident("t0"),
  55. node_list(node_ident("time")),
  56. NULL),
  57. node_vlist(node_keyword('=', 0),
  58. node_vlist(node_keyword('[', 0),
  59. node_ident("@reads"),
  60. node_list(node_ident("pid")),
  61. NULL),
  62. node_vlist(node_ident("quantize"),
  63. node_ident("arg2"),
  64. NULL),
  65. NULL),
  66. NULL);
  67. prog->provider = provider_get("k");
  68. prog->provider->probe(prog);
  69. ctx->progs[0] = prog;
  70. /* PROBE1 */
  71. prog = calloc(1, sizeof(*prog));
  72. prog->locals = calloc(1, sizeof(*prog->locals));
  73. prog->globals = ctx->globals;
  74. /* TODO: k -> kret */
  75. prog->probe = "k:SyS_read2";
  76. /* { @times[pid()] = quantize(time() - t0) } */
  77. prog->ast =
  78. node_list(
  79. node_vlist(node_keyword('=', 0),
  80. node_vlist(node_keyword('[', 0),
  81. node_ident("@times"),
  82. node_list(node_ident("pid")),
  83. NULL),
  84. node_vlist(node_ident("quantize"),
  85. node_vlist(node_keyword('b', '-'),
  86. node_list(node_ident("time")),
  87. node_ident("t0"),
  88. NULL),
  89. NULL),
  90. NULL)
  91. );
  92. prog->provider = provider_get("k");
  93. prog->provider->probe(prog);
  94. ctx->progs[1] = prog;
  95. return ctx;
  96. }
  97. int pass_resolve_symbols(node_t *n, void *_prog)
  98. {
  99. prog_t *prog = _prog;
  100. provider_t *global = provider_get(":");
  101. node_t *op;
  102. int err;
  103. if (n->ntype != N_IDENT)
  104. return 0;
  105. err = prog->provider->resolve(prog, n);
  106. if (!err || (err != -ENOENT))
  107. return err;
  108. err = global->resolve(prog, n);
  109. if (!err || (err != -ENOENT))
  110. return err;
  111. /* neither provider identifier nor global ditto => user
  112. * variable, add it as a global symbol of unknown type. */
  113. return sym_add(prog->globals, n->ident, NULL, &n->sym);
  114. }
  115. int infer_type_list(prog_t *prog, node_t *n)
  116. {
  117. type_t *t;
  118. /* list of lists (code block) => void */
  119. if (n->list->ntype == N_LIST) {
  120. n->type = &t_void;
  121. return 0;
  122. }
  123. t = n->list->type;
  124. if (!t)
  125. return 0;
  126. switch (t->ttype) {
  127. case T_FUNC:
  128. n->type = t->t.func.type;
  129. break;
  130. default:
  131. n->type = t;
  132. }
  133. return 0;
  134. }
  135. int infer_type_keyword(prog_t *prog, node_t *n)
  136. {
  137. node_t *dst, *src;
  138. switch (n->keyword.class) {
  139. case KW_ASSIGN:
  140. dst = node_next(n);
  141. src = node_next(dst);
  142. assert(dst && src);
  143. if (!src->type)
  144. return 0;
  145. /* TODO: assignment is statement for now. do we need
  146. * c-style assignment expressions? e.g `a = b = 2;` */
  147. n->type = &t_void;
  148. if (dst->type)
  149. return 0;
  150. dst->type = src->type;
  151. if (dst->ntype != N_IDENT)
  152. return 0;
  153. return sym_add(dst->sym->st, dst->ident, dst->type, NULL);
  154. case KW_BINOP:
  155. dst = node_next(n);
  156. src = node_next(dst);
  157. assert(dst && src);
  158. if (!(src->type && dst->type && type_equal(src->type, dst->type)))
  159. return 0;
  160. n->type = dst->type;
  161. return 0;
  162. default:
  163. n->type = &t_void;
  164. return 0;
  165. }
  166. return -ENOSYS;
  167. }
  168. int infer_type_sym(prog_t *prog, node_t *n)
  169. {
  170. node_t *parent, *key;
  171. if (n->sym->type) {
  172. /* the symbol type could have been inferred in another
  173. * probe, in that case copy the type to this node. */
  174. if (!n->type)
  175. n->type = n->sym->type;
  176. return 0;
  177. }
  178. parent = node_up(n);
  179. key = node_next(n);
  180. /* match `somemap[somekey]` where the type of the entire
  181. * expression and the type of the key is known, since that
  182. * means the type of the map itself is also known. */
  183. if (parent && parent->type
  184. && (parent->list->ntype == N_KEYWORD)
  185. && (parent->list->keyword.class == KW_SUBSCRIPT)
  186. && key && key->type) {
  187. n->type = type_map_of(key->type, parent->type);
  188. return sym_add(n->sym->st, n->ident, n->type, NULL);
  189. }
  190. return 0;
  191. }
  192. int pass_infer_types(node_t *n, void *_prog)
  193. {
  194. prog_t *prog = _prog;
  195. if (n->type)
  196. return 0;
  197. switch (n->ntype) {
  198. case N_LIST:
  199. return infer_type_list(prog, n);
  200. case N_KEYWORD:
  201. return infer_type_keyword(prog, n);
  202. case N_IDENT:
  203. return infer_type_sym(prog, n);
  204. default:
  205. break;
  206. }
  207. return 0;
  208. }
  209. int validate_func(node_t *n)
  210. {
  211. node_t *arg;
  212. field_t *f;
  213. int i;
  214. for (arg = node_next(n), f = n->type->t.func.args, i = 1;
  215. arg && f && f->type; arg = node_next(arg), f++, i++) {
  216. if (type_compatible(arg->type, f->type))
  217. continue;
  218. node_print(n, stderr);
  219. fprintf(stderr, ": incompatible type of argument %d (", i);
  220. type_dump(arg->type, stderr);
  221. fputs("), expected ", stderr);
  222. type_dump(f->type, stderr);
  223. fputs("\n", stderr);
  224. return -EINVAL;
  225. }
  226. if (!arg && (!f || !f->type))
  227. return 0;
  228. if (arg) {
  229. node_print(n, stderr);
  230. fprintf(stderr, ": too many arguments, expected %d", i);
  231. return -EINVAL;
  232. }
  233. if (f->optional)
  234. return 0;
  235. node_print(n, stderr);
  236. fputs(": too few arguments", stderr);
  237. return -EINVAL;
  238. }
  239. int pass_validate_types(node_t *n, void *_prog)
  240. {
  241. prog_t *prog = _prog;
  242. if (!n->type) {
  243. node_print(n, stderr);
  244. fputs(": is of unknown type\n", stderr);
  245. return -EINVAL;
  246. }
  247. if (n->ntype != N_LIST)
  248. return 0;
  249. if (n->list->ntype != N_IDENT)
  250. return 0;
  251. assert(n->list->type->ttype == T_FUNC);
  252. return validate_func(n->list);
  253. }
  254. int rewrite_const_math(node_t *n)
  255. {
  256. int64_t result;
  257. node_t *a, *b, *new;
  258. int op;
  259. op = n->list->keyword.op;
  260. a = node_next(n->list);
  261. b = node_next(a);
  262. switch (op) {
  263. case '*': result = a->num * b->num; break;
  264. case '/': result = a->num / b->num; break;
  265. case '%': result = a->num % b->num; break;
  266. case '+': result = a->num + b->num; break;
  267. case '-': result = a->num - b->num; break;
  268. case '<': result = a->num << b->num; break;
  269. case '>': result = a->num >> b->num; break;
  270. default: return 0;
  271. }
  272. new = node_num(result);
  273. new->type = n->type;
  274. return node_replace(n, new);
  275. }
  276. int pass_rewrite_ast(node_t *n, void *_prog)
  277. {
  278. prog_t *prog = _prog;
  279. provider_t *global = provider_get(":");
  280. int err;
  281. if (prog->provider->rewrite_node) {
  282. err = prog->provider->rewrite_node(prog, n);
  283. if (err)
  284. return err;
  285. }
  286. if (global->rewrite_node) {
  287. err = global->rewrite_node(prog, n);
  288. if (err)
  289. return err;
  290. }
  291. /* pre-compute binops where both sides are constants */
  292. if ((n->ntype == N_LIST)
  293. && (n->list->ntype == N_KEYWORD)
  294. && (n->list->keyword.class == KW_BINOP)
  295. && (node_next(n->list)->ntype == N_NUM)
  296. && (node_next(node_next(n->list))->ntype == N_NUM))
  297. return rewrite_const_math(n);
  298. return 0;
  299. }
  300. int pass_walk(pass_t *pass, ctx_t *ctx)
  301. {
  302. prog_t **prog;
  303. int err;
  304. for (prog = ctx->progs; *prog; prog++) {
  305. err = node_walk((*prog)->ast, pass->pre, pass->post, *prog);
  306. if (err)
  307. return err;
  308. }
  309. return 0;
  310. }
  311. pass_t passes[] = {
  312. { .run = pass_walk, .post = pass_resolve_symbols },
  313. { .run = pass_walk, .post = pass_infer_types },
  314. { .run = pass_walk, .post = pass_infer_types },
  315. { .run = pass_walk, .post = pass_infer_types },
  316. { .run = pass_walk, .post = pass_validate_types },
  317. { .run = pass_walk, .post = pass_rewrite_ast },
  318. { NULL }
  319. };
  320. int main(void)
  321. {
  322. ctx_t *ctx = ctx_get();
  323. prog_t **prog;
  324. pass_t *pass;
  325. int err;
  326. for (pass = passes; pass->run; pass++) {
  327. err = pass->run(pass, ctx);
  328. if (err)
  329. break;
  330. }
  331. for (prog = ctx->progs; *prog; prog++) {
  332. printf("\n\e[34m%s\e[0m\n", (*prog)->probe);
  333. node_dump((*prog)->ast, stdout);
  334. printf("\n-- locals\n");
  335. symtab_dump((*prog)->locals, stdout);
  336. }
  337. printf("\n\n-- globals\n");
  338. symtab_dump(ctx->globals, stdout);
  339. if (err)
  340. printf("ERR: %d\n", err);
  341. return err;
  342. }