A dynamic tracer for Linux

global.c 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656
  1. #define _GNU_SOURCE /* asprintf */
  2. #include <assert.h>
  3. #include <errno.h>
  4. #include <limits.h>
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #include "arch.h"
  8. #include "func.h"
  9. #include "node.h"
  10. #include "ply.h"
  11. #include "sym.h"
  12. #include "type.h"
  13. /* . */
  14. static int global_dot_ir_pre(const struct func *func, struct node *n,
  15. struct prog *prog)
  16. {
  17. struct node *sou = n->expr.args;
  18. if (node_is(sou, ":deref")) {
  19. /* (*ptr).member, if *ptr is not already loaded let it
  20. * know that we're only interested in one member */
  21. sou->sym->irs.hint.dot = 1;
  22. /* this also means we need to put ourselves on the
  23. * stack since data will be loaded via probe_read */
  24. n->sym->irs.hint.stack = 1;
  25. }
  26. return 0;
  27. }
  28. static int global_dot_ir_post(const struct func *func, struct node *n,
  29. struct prog *prog)
  30. {
  31. struct node *sou, *member;
  32. struct irstate *dst;
  33. ssize_t offset;
  34. sou = n->expr.args;
  35. member = sou->next;
  36. dst = &n->sym->irs;
  37. ir_init_sym(prog->ir, n->sym);
  38. offset = type_offsetof(type_base(sou->sym->type), member->string.data);
  39. assert(offset >= 0);
  40. if (!sou->sym->irs.loc) {
  41. /* sou is a :deref which wasn't loaded by child, just
  42. * read the member we're interested in. */
  43. struct node *ptr = sou->expr.args;
  44. ir_emit_sym_to_reg(prog->ir, BPF_REG_3, ptr->sym);
  45. ir_emit_insn(prog->ir, ALU_IMM(BPF_ADD, 0, offset), BPF_REG_3, 0);
  46. goto probe_read;
  47. }
  48. offset += sou->sym->irs.stack;
  49. if (dst->loc == LOC_REG) {
  50. switch (dst->size) {
  51. case 1:
  52. ir_emit_insn(prog->ir, LDXB(0, offset, 0),
  53. dst->reg, BPF_REG_BP);
  54. break;
  55. case 2:
  56. ir_emit_insn(prog->ir, LDXH(0, offset, 0),
  57. dst->reg, BPF_REG_BP);
  58. break;
  59. case 4:
  60. ir_emit_insn(prog->ir, LDXW(0, offset, 0),
  61. dst->reg, BPF_REG_BP);
  62. break;
  63. case 8:
  64. ir_emit_insn(prog->ir, LDXDW(0, offset, 0),
  65. dst->reg, BPF_REG_BP);
  66. break;
  67. default:
  68. assert(0);
  69. }
  70. return 0;
  71. }
  72. ir_emit_insn(prog->ir, ALU_IMM(BPF_ADD, 0, offset), BPF_REG_3, 0);
  73. probe_read:
  74. ir_emit_insn(prog->ir, MOV_IMM(0, (int32_t)dst->size), BPF_REG_2, 0);
  75. ir_emit_insn(prog->ir, MOV(0, 0), BPF_REG_1, BPF_REG_BP);
  76. ir_emit_insn(prog->ir, ALU_IMM(BPF_ADD, 0, dst->stack), BPF_REG_1, 0);
  77. ir_emit_insn(prog->ir, CALL(BPF_FUNC_probe_read), 0, 0);
  78. /* TODO if (r0) exit(r0); */
  79. return 0;
  80. }
  81. static int global_dot_type_infer(const struct func *func, struct node *n)
  82. {
  83. struct node *sou, *member;
  84. struct type *t;
  85. struct tfield *f;
  86. if (n->sym->type)
  87. return 0;
  88. sou = n->expr.args;
  89. member = sou->next;
  90. if (!sou->sym->type)
  91. return 0;
  92. t = type_base(sou->sym->type);
  93. /* TODO: add union */
  94. if (t->ttype != T_STRUCT) {
  95. _e("%#N: %N is neither struct nor union (type '%T').\n",
  96. n, sou, sou->sym->type);
  97. return -EINVAL;
  98. }
  99. f = tfields_get(t->sou.fields, member->string.data);
  100. if (!f) {
  101. _e("%#N: type '%T' has no member named %N.\n", n, t, member);
  102. return -EINVAL;
  103. }
  104. /* given `sou.member` where sou is a struct/union, infer that
  105. * the expression's type is equal to member's type. */
  106. n->sym->type = f->type;
  107. return 0;
  108. }
  109. /* :deref */
  110. static int global_deref_ir_post(const struct func *func, struct node *n,
  111. struct prog *prog)
  112. {
  113. struct node *ptr = n->expr.args;
  114. struct irstate *dst;
  115. size_t size;
  116. dst = &n->sym->irs;
  117. if (dst->hint.dot)
  118. /* (*ptr).member, ptr points to a struct and our
  119. * parent is only interested in one member. don't load
  120. * the struct, let the dot operaton steal the address
  121. * from our argument */
  122. return 0;
  123. ir_init_sym(prog->ir, n->sym);
  124. if (dst->hint.lval)
  125. /* *ptr = val, whatever is in our storage now it will
  126. be overwritten, so skip the load. */
  127. return 0;
  128. ir_emit_insn(prog->ir, MOV(0, 0), BPF_REG_1, BPF_REG_BP);
  129. ir_emit_insn(prog->ir, ALU_IMM(BPF_ADD, 0, dst->stack), BPF_REG_1, 0);
  130. ir_emit_insn(prog->ir, MOV_IMM(0, (int32_t)dst->size), BPF_REG_2, 0);
  131. ir_emit_sym_to_reg(prog->ir, BPF_REG_3, ptr->sym);
  132. ir_emit_insn(prog->ir, CALL(BPF_FUNC_probe_read), 0, 0);
  133. /* TODO if (r0) exit(r0); */
  134. return 0;
  135. }
  136. static int global_deref_type_infer(const struct func *func, struct node *n)
  137. {
  138. struct node *ptr = n->expr.args;
  139. struct type *t;
  140. if (n->sym->type || !ptr->sym->type)
  141. return 0;
  142. t = type_base(ptr->sym->type);
  143. if (t->ttype != T_POINTER) {
  144. _e("%#N: can't dereference %N (type '%T').\n",
  145. n, ptr, ptr->sym->type);
  146. return -EINVAL;
  147. }
  148. /* given `*p` where p is a pointer, infer that the
  149. * expression's type is equal to p's concrete type. */
  150. n->sym->type = t->ptr.type;
  151. return 0;
  152. }
  153. /* :map */
  154. static struct type *global_map_ktype(struct node *n)
  155. {
  156. struct node *map, *key;
  157. struct type *ktype;
  158. struct tfield *kfields, *f;
  159. int i, nargs = node_nargs(n);
  160. char *kname;
  161. map = n->expr.args;
  162. if (nargs == 2)
  163. return map->next->sym->type;
  164. ktype = calloc(1, sizeof(*ktype));
  165. assert(ktype);
  166. kfields = calloc(nargs, sizeof(*kfields));
  167. assert(kfields);
  168. for (key = map->next, f = kfields, i = 0; key; key = key->next, f++, i++) {
  169. asprintf(&f->name, "k%d", i);
  170. f->type = key->sym->type;
  171. }
  172. asprintf(&ktype->sou.name, ":%s_key", map->ident.name);
  173. ktype->ttype = T_STRUCT;
  174. ktype->sou.fields = kfields;
  175. type_add(ktype);
  176. return ktype;
  177. }
  178. static int global_map_type_infer(const struct func *func, struct node *n)
  179. {
  180. struct node *map = n->expr.args;
  181. if (!map->sym->type)
  182. return 0;
  183. /* TODO validate key against known type */
  184. /* given `m[key]` where m's type is known, infer that the
  185. * expression's type is equal to m's value type. */
  186. n->sym->type = map->sym->type->map.vtype;
  187. return 0;
  188. }
  189. static int global_map_static_validate(const struct func *func, struct node *n)
  190. {
  191. if (n->expr.args->ntype != N_IDENT) {
  192. _e("%#N: can't lookup a key in %N, which is not a map.\n",
  193. n, n);
  194. return -EINVAL;
  195. }
  196. return 0;
  197. }
  198. /* :assign */
  199. static int global_assign_type_infer_map(struct node *n)
  200. {
  201. struct node *map, *key;
  202. struct type *ktype;
  203. map = n->expr.args;
  204. for (key = map->next; key; key = key->next) {
  205. if (type_sizeof(key->sym->type) < 0)
  206. return 0;
  207. }
  208. map->sym->type = type_map_of(global_map_ktype(n), n->sym->type);
  209. return 0;
  210. }
  211. static int global_assign_type_infer(const struct func *func, struct node *n)
  212. {
  213. struct node *lval, *rval;
  214. if (n->sym->type)
  215. return 0;
  216. lval = n->expr.args;
  217. rval = lval->next;
  218. if (!rval->sym->type)
  219. return 0;
  220. if (!lval->sym->type) {
  221. /* given `a = b` where b's type is known but not a's,
  222. * infer that a's type must be equal to b's */
  223. lval->sym->type = rval->sym->type;
  224. /* TODO do we need assignment expressions? */
  225. n->sym->type = &t_void;
  226. if (node_is(lval, "{}"))
  227. return global_assign_type_infer_map(lval);
  228. return 0;
  229. }
  230. if (type_compatible(lval->sym->type, rval->sym->type))
  231. return 0;
  232. _e("%#N: can't assign %N (type '%T'), to %N (type '%T').\n",
  233. n, rval, rval->sym->type, lval, lval->sym->type);
  234. return -EINVAL;
  235. }
  236. static int global_assign_static_validate(const struct func *func, struct node *n)
  237. {
  238. struct node *lval;
  239. lval = n->expr.args;
  240. if (node_is(lval, "{}") || (lval->ntype == N_IDENT))
  241. return 0;
  242. _e("%#N: can't assign a value to %N.\n", n, lval);
  243. return -EINVAL;
  244. }
  245. /* :binop */
  246. static int global_binop_type_infer(const struct func *func, struct node *n)
  247. {
  248. struct node *lval, *rval;
  249. if (n->sym->type)
  250. return 0;
  251. lval = n->expr.args;
  252. rval = lval->next;
  253. if (!lval->sym->type || !rval->sym->type)
  254. return 0;
  255. if (type_equal(lval->sym->type, rval->sym->type)) {
  256. n->sym->type = lval->sym->type;
  257. return 0;
  258. }
  259. /* TODO handle integer promotion */
  260. return 0;
  261. }
  262. /* :binop */
  263. static int global_quantize_type_infer(const struct func *func, struct node *n)
  264. {
  265. struct node *arg;
  266. struct type *t;
  267. arg = n->expr.args;
  268. if (n->sym->type || !arg->sym->type)
  269. return 0;
  270. t = type_base(arg->sym->type);
  271. if (t->ttype != T_SCALAR) {
  272. _e("%#N: can't quantize non-scalar value %N (type '%T').\n",
  273. n, arg, arg->sym->type);
  274. return -EINVAL;
  275. }
  276. n->sym->type = type_array_of(arg->sym->type, type_sizeof(t) * 8);
  277. return 0;
  278. }
  279. /* pid */
  280. static int global_pid_ir_post(const struct func *func, struct node *n,
  281. struct prog *prog)
  282. {
  283. struct node *ptr = n->expr.args;
  284. ir_init_sym(prog->ir, n->sym);
  285. ir_emit_insn(prog->ir, CALL(BPF_FUNC_get_current_pid_tgid), 0, 0);
  286. ir_emit_insn(prog->ir, ALU64_IMM(BPF_RSH, 0, 32), BPF_REG_0, 0);
  287. ir_emit_reg_to_sym(prog->ir, n->sym, BPF_REG_0);
  288. return 0;
  289. }
  290. struct type t_pid = {
  291. .ttype = T_TYPEDEF,
  292. .tdef = { .name = ":pid", .type = &t_u32 },
  293. };
  294. struct type t_pid_func = {
  295. .ttype = T_FUNC,
  296. .func = { .type = &t_pid },
  297. };
  298. /* time */
  299. static int global_time_ir_post(const struct func *func, struct node *n,
  300. struct prog *prog)
  301. {
  302. struct node *ptr = n->expr.args;
  303. ir_init_sym(prog->ir, n->sym);
  304. ir_emit_insn(prog->ir, CALL(BPF_FUNC_ktime_get_ns), 0, 0);
  305. ir_emit_reg_to_sym(prog->ir, n->sym, BPF_REG_0);
  306. return 0;
  307. }
  308. struct type t_time = {
  309. .ttype = T_TYPEDEF, /* TODO: should be a T_FUNC with a static
  310. * signature */
  311. .tdef = { .name = ":time", .type = &t_s64 },
  312. };
  313. struct type t_time_func = {
  314. .ttype = T_FUNC,
  315. .func = { .type = &t_time },
  316. };
  317. struct type t_block_func = {
  318. .ttype = T_FUNC,
  319. .func = { .type = &t_void, .vargs = 1 },
  320. };
  321. struct type t_string_array = {
  322. .ttype = T_ARRAY,
  323. .array = { .type = &t_char, .len = 64 }, /* TODO: tunable */
  324. };
  325. struct type t_string = {
  326. .ttype = T_TYPEDEF,
  327. .tdef = { .name = ":string", .type = &t_string_array },
  328. };
  329. struct tfield f_dot[] = {
  330. { .type = &t_void },
  331. { .type = &t_string },
  332. { .type = NULL }
  333. };
  334. struct type t_dot_func = {
  335. .ttype = T_FUNC,
  336. .func = { .type = &t_void, .args = f_dot },
  337. };
  338. struct tfield f_2args[] = {
  339. { .type = &t_void },
  340. { .type = &t_void },
  341. { .type = NULL }
  342. };
  343. struct type t_2args_func = {
  344. .ttype = T_FUNC,
  345. .func = { .type = &t_void, .args = f_2args },
  346. };
  347. struct tfield f_1arg[] = {
  348. { .type = &t_void },
  349. { .type = NULL }
  350. };
  351. struct type t_1arg_func = {
  352. .ttype = T_FUNC,
  353. .func = { .type = &t_void, .args = f_1arg },
  354. };
  355. static const struct func global_funcs[] = {
  356. {
  357. .name = ":block",
  358. .type = &t_block_func,
  359. .static_ret = 1,
  360. },
  361. {
  362. .name = ".",
  363. .type = &t_dot_func,
  364. .type_infer = global_dot_type_infer,
  365. .ir_pre = global_dot_ir_pre,
  366. .ir_post = global_dot_ir_post,
  367. },
  368. {
  369. .name = ":deref",
  370. .type = &t_1arg_func,
  371. .type_infer = global_deref_type_infer,
  372. .ir_post = global_deref_ir_post,
  373. },
  374. {
  375. .name = "+",
  376. .type = &t_2args_func,
  377. .type_infer = global_binop_type_infer,
  378. },
  379. {
  380. .name = "-",
  381. .type = &t_2args_func,
  382. .type_infer = global_binop_type_infer,
  383. },
  384. {
  385. .name = "=",
  386. .type = &t_2args_func,
  387. .type_infer = global_assign_type_infer,
  388. .static_validate = global_assign_static_validate,
  389. },
  390. {
  391. .name = "{}",
  392. /* .type = t_map_func, */
  393. .type_infer = global_map_type_infer,
  394. .static_validate = global_map_static_validate,
  395. },
  396. {
  397. .name = "pid",
  398. .type = &t_pid_func,
  399. .static_ret = 1,
  400. .ir_post = global_pid_ir_post,
  401. },
  402. {
  403. .name = "time",
  404. .type = &t_time_func,
  405. .static_ret = 1,
  406. .ir_post = global_time_ir_post,
  407. },
  408. {
  409. .name = "quantize",
  410. .type = &t_1arg_func,
  411. .type_infer = global_quantize_type_infer,
  412. },
  413. { .name = NULL }
  414. };
  415. static struct type *global_num_type(struct node *n)
  416. {
  417. if (n->num.unsignd) {
  418. if (n->num.u64 <= INT_MAX)
  419. return &t_int;
  420. else if (n->num.u64 <= UINT_MAX)
  421. return &t_uint;
  422. else if (n->num.u64 <= LONG_MAX)
  423. return &t_long;
  424. else if (n->num.u64 <= ULONG_MAX)
  425. return &t_ulong;
  426. else if (n->num.u64 <= LLONG_MAX)
  427. return &t_llong;
  428. else if (n->num.u64 <= ULLONG_MAX)
  429. return &t_ullong;
  430. } else {
  431. if (n->num.s64 >= INT_MIN && n->num.s64 <= INT_MAX)
  432. return &t_int;
  433. else if (n->num.s64 >= LONG_MIN && n->num.s64 <= LONG_MAX)
  434. return &t_long;
  435. else if (n->num.s64 >= LLONG_MIN && n->num.s64 <= LLONG_MAX)
  436. return &t_llong;
  437. }
  438. assert(0);
  439. return NULL;
  440. }
  441. static const struct func global_num_func = {
  442. .name = ":num",
  443. };
  444. static const struct func global_string_func = {
  445. .name = ":string",
  446. .type = &t_string,
  447. .static_ret = 1,
  448. };
  449. static const struct func global_ident_func = {
  450. .name = ":ident",
  451. };
  452. static const struct func *global_sym_alloc_expr(struct node *n)
  453. {
  454. const struct func *func;
  455. int err;
  456. for (func = global_funcs; func->name; func++) {
  457. if (strcmp(func->name, n->expr.func))
  458. continue;
  459. return func;
  460. }
  461. return NULL;
  462. }
  463. int global_sym_alloc(struct prog *prog, struct node *n)
  464. {
  465. const struct func *func;
  466. struct symtab *st = prog->locals;
  467. int err;
  468. switch (n->ntype) {
  469. case N_EXPR:
  470. func = global_sym_alloc_expr(n);
  471. break;
  472. case N_IDENT:
  473. st = prog->globals;
  474. func = &global_ident_func;
  475. break;
  476. case N_NUM:
  477. func = &global_num_func;
  478. break;
  479. case N_STRING:
  480. func = &global_string_func;
  481. break;
  482. }
  483. if (!func)
  484. return -ENOENT;
  485. err = func_static_validate(func, n);
  486. if (err)
  487. return err;
  488. n->sym = sym_alloc(st, n, func);
  489. if (n->ntype == N_NUM)
  490. n->sym->type = global_num_type(n);
  491. else if (func->static_ret)
  492. n->sym->type = func_return_type(func);
  493. return 0;
  494. }
  495. int global_probe(struct prog *prog)
  496. {
  497. return 0;
  498. }
  499. struct provider global = {
  500. .name = ":",
  501. .sym_alloc = global_sym_alloc,
  502. .probe = global_probe,
  503. };
  504. __attribute__((constructor))
  505. static void global_init(void)
  506. {
  507. provider_register(&global);
  508. }