A dynamic tracer for Linux

global.c 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762
  1. #define _GNU_SOURCE /* asprintf */
  2. #include <assert.h>
  3. #include <errno.h>
  4. #include <limits.h>
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #include "arch.h"
  8. #include "func.h"
  9. #include "node.h"
  10. #include "ply.h"
  11. #include "sym.h"
  12. #include "type.h"
  13. /* . */
  14. static int global_dot_ir_pre(const struct func *func, struct node *n,
  15. struct prog *prog)
  16. {
  17. struct node *sou = n->expr.args;
  18. if (node_is(sou, ":deref")) {
  19. /* (*ptr).member, if *ptr is not already loaded let it
  20. * know that we're only interested in one member */
  21. sou->sym->irs.hint.dot = 1;
  22. /* this also means we need to put ourselves on the
  23. * stack since data will be loaded via probe_read */
  24. n->sym->irs.hint.stack = 1;
  25. }
  26. return 0;
  27. }
  28. static int global_dot_ir_post(const struct func *func, struct node *n,
  29. struct prog *prog)
  30. {
  31. struct node *sou, *member;
  32. struct irstate *dst;
  33. ssize_t offset;
  34. sou = n->expr.args;
  35. member = sou->next;
  36. dst = &n->sym->irs;
  37. ir_init_sym(prog->ir, n->sym);
  38. offset = type_offsetof(type_base(sou->sym->type), member->string.data);
  39. assert(offset >= 0);
  40. if (!sou->sym->irs.loc) {
  41. /* sou is a :deref which wasn't loaded by child, just
  42. * read the member we're interested in. */
  43. struct node *ptr = sou->expr.args;
  44. ir_emit_sym_to_reg(prog->ir, BPF_REG_3, ptr->sym);
  45. ir_emit_insn(prog->ir, ALU_IMM(BPF_ADD, offset), BPF_REG_3, 0);
  46. goto probe_read;
  47. }
  48. offset += sou->sym->irs.stack;
  49. if (dst->loc == LOC_REG) {
  50. ir_emit_insn(prog->ir, LDX(bpf_width(dst->size), offset),
  51. dst->reg, BPF_REG_BP);
  52. return 0;
  53. }
  54. ir_emit_insn(prog->ir, ALU_IMM(BPF_ADD, offset), BPF_REG_3, 0);
  55. probe_read:
  56. ir_emit_insn(prog->ir, MOV_IMM((int32_t)dst->size), BPF_REG_2, 0);
  57. ir_emit_insn(prog->ir, MOV, BPF_REG_1, BPF_REG_BP);
  58. ir_emit_insn(prog->ir, ALU_IMM(BPF_ADD, dst->stack), BPF_REG_1, 0);
  59. ir_emit_insn(prog->ir, CALL(BPF_FUNC_probe_read), 0, 0);
  60. /* TODO if (r0) exit(r0); */
  61. return 0;
  62. }
  63. static int global_dot_type_infer(const struct func *func, struct node *n)
  64. {
  65. struct node *sou, *member;
  66. struct type *t;
  67. struct tfield *f;
  68. if (n->sym->type)
  69. return 0;
  70. sou = n->expr.args;
  71. member = sou->next;
  72. if (!sou->sym->type)
  73. return 0;
  74. t = type_base(sou->sym->type);
  75. /* TODO: add union */
  76. if (t->ttype != T_STRUCT) {
  77. _e("%#N: %N is neither struct nor union (type '%T').\n",
  78. n, sou, sou->sym->type);
  79. return -EINVAL;
  80. }
  81. f = tfields_get(t->sou.fields, member->string.data);
  82. if (!f) {
  83. _e("%#N: type '%T' has no member named %N.\n", n, t, member);
  84. return -EINVAL;
  85. }
  86. /* given `sou.member` where sou is a struct/union, infer that
  87. * the expression's type is equal to member's type. */
  88. n->sym->type = f->type;
  89. return 0;
  90. }
  91. /* :deref */
  92. static int global_deref_ir_post(const struct func *func, struct node *n,
  93. struct prog *prog)
  94. {
  95. struct node *ptr = n->expr.args;
  96. struct irstate *dst;
  97. size_t size;
  98. dst = &n->sym->irs;
  99. if (dst->hint.dot)
  100. /* (*ptr).member, ptr points to a struct and our
  101. * parent is only interested in one member. don't load
  102. * the struct, let the dot operaton steal the address
  103. * from our argument */
  104. return 0;
  105. ir_init_sym(prog->ir, n->sym);
  106. if (dst->hint.lval)
  107. /* *ptr = val, whatever is in our storage now it will
  108. be overwritten, so skip the load. */
  109. return 0;
  110. ir_emit_insn(prog->ir, MOV, BPF_REG_1, BPF_REG_BP);
  111. ir_emit_insn(prog->ir, ALU_IMM(BPF_ADD, dst->stack), BPF_REG_1, 0);
  112. ir_emit_insn(prog->ir, MOV_IMM((int32_t)dst->size), BPF_REG_2, 0);
  113. ir_emit_sym_to_reg(prog->ir, BPF_REG_3, ptr->sym);
  114. ir_emit_insn(prog->ir, CALL(BPF_FUNC_probe_read), 0, 0);
  115. /* TODO if (r0) exit(r0); */
  116. return 0;
  117. }
  118. static int global_deref_type_infer(const struct func *func, struct node *n)
  119. {
  120. struct node *ptr = n->expr.args;
  121. struct type *t;
  122. if (n->sym->type || !ptr->sym->type)
  123. return 0;
  124. t = type_base(ptr->sym->type);
  125. if (t->ttype != T_POINTER) {
  126. _e("%#N: can't dereference %N (type '%T').\n",
  127. n, ptr, ptr->sym->type);
  128. return -EINVAL;
  129. }
  130. /* given `*p` where p is a pointer, infer that the
  131. * expression's type is equal to p's concrete type. */
  132. n->sym->type = t->ptr.type;
  133. return 0;
  134. }
  135. /* :map */
  136. static int global_map_ir_pre_key(struct node *n, struct prog *prog)
  137. {
  138. struct node *map = n->expr.args, *arg;
  139. struct type *ktype = type_base(map->sym->type->map.ktype);
  140. ssize_t stack = map->sym->irs.stack;
  141. size_t offset, size, pad;
  142. struct tfield *f;
  143. arg = map->next;
  144. tfields_foreach(f, ktype->sou.fields) {
  145. offset = type_offsetof(ktype, f->name);
  146. size = type_sizeof(f->type);
  147. if (!arg->sym->irs.loc) {
  148. arg->sym->irs.hint.stack = 1;
  149. arg->sym->irs.stack = stack + offset;
  150. }
  151. if (arg->next) {
  152. pad = type_offsetof(ktype, f[1].name) - (offset + size);
  153. if (pad)
  154. ir_emit_bzero(prog->ir,
  155. stack + offset + size, pad);
  156. }
  157. arg = arg->next;
  158. }
  159. pad = type_sizeof(ktype) - (offset + size);
  160. if (pad)
  161. ir_emit_bzero(prog->ir, stack + offset + size, pad);
  162. return 0;
  163. }
  164. static int global_map_ir_pre(const struct func *func, struct node *n,
  165. struct prog *prog)
  166. {
  167. struct irstate *kirs;
  168. struct node *map = n->expr.args;
  169. struct type *ktype = type_base(map->sym->type->map.ktype);
  170. map->sym->irs.hint.stack = 1;
  171. ir_init_irs(prog->ir, &map->sym->irs, ktype);
  172. if (ktype->ttype == T_STRUCT)
  173. return global_map_ir_pre_key(n, prog);
  174. kirs = &map->next->sym->irs;
  175. if (!kirs->loc) {
  176. kirs->hint.stack = 1;
  177. kirs->stack = map->sym->irs.stack;
  178. }
  179. return 0;
  180. }
  181. static int global_map_ir_post(const struct func *func, struct node *n,
  182. struct prog *prog)
  183. {
  184. struct node *map = n->expr.args, *arg;
  185. struct type *ktype = type_base(map->sym->type->map.ktype);
  186. ssize_t stack = map->sym->irs.stack;
  187. size_t offset;
  188. struct tfield *f;
  189. arg = map->next;
  190. tfields_foreach(f, ktype->sou.fields) {
  191. offset = type_offsetof(ktype, f->name);
  192. ir_emit_sym_to_stack(prog->ir, stack + offset, arg->sym);
  193. arg = arg->next;
  194. }
  195. return 0;
  196. }
  197. static struct type *global_map_ktype(struct node *n)
  198. {
  199. struct node *map, *key;
  200. struct type *ktype;
  201. struct tfield *kfields, *f;
  202. int i, nargs = node_nargs(n);
  203. char *kname;
  204. map = n->expr.args;
  205. if (nargs == 2)
  206. return map->next->sym->type;
  207. ktype = calloc(1, sizeof(*ktype));
  208. assert(ktype);
  209. kfields = calloc(nargs, sizeof(*kfields));
  210. assert(kfields);
  211. for (key = map->next, f = kfields, i = 0; key; key = key->next, f++, i++) {
  212. asprintf(&f->name, "k%d", i);
  213. f->type = key->sym->type;
  214. }
  215. asprintf(&ktype->sou.name, ":%s_key", map->ident.name);
  216. ktype->ttype = T_STRUCT;
  217. ktype->sou.fields = kfields;
  218. type_add(ktype);
  219. return ktype;
  220. }
  221. static int global_map_type_infer(const struct func *func, struct node *n)
  222. {
  223. struct node *map = n->expr.args;
  224. if (!map->sym->type)
  225. return 0;
  226. /* TODO validate key against known type */
  227. /* given `m[key]` where m's type is known, infer that the
  228. * expression's type is equal to m's value type. */
  229. n->sym->type = map->sym->type->map.vtype;
  230. return 0;
  231. }
  232. static int global_map_static_validate(const struct func *func, struct node *n)
  233. {
  234. if (n->expr.args->ntype != N_IDENT) {
  235. _e("%#N: can't lookup a key in %N, which is not a map.\n",
  236. n, n);
  237. return -EINVAL;
  238. }
  239. return 0;
  240. }
  241. /* :assign */
  242. static int global_assign_type_infer_map(struct node *n)
  243. {
  244. struct node *map, *key;
  245. struct type *ktype;
  246. map = n->expr.args;
  247. for (key = map->next; key; key = key->next) {
  248. if (type_sizeof(key->sym->type) < 0)
  249. return 0;
  250. }
  251. map->sym->type = type_map_of(global_map_ktype(n), n->sym->type);
  252. return 0;
  253. }
  254. static int global_assign_type_infer(const struct func *func, struct node *n)
  255. {
  256. struct node *lval, *rval;
  257. if (n->sym->type)
  258. return 0;
  259. lval = n->expr.args;
  260. rval = lval->next;
  261. if (!rval->sym->type)
  262. return 0;
  263. if (!lval->sym->type) {
  264. /* given `a = b` where b's type is known but not a's,
  265. * infer that a's type must be equal to b's */
  266. lval->sym->type = rval->sym->type;
  267. /* TODO do we need assignment expressions? */
  268. n->sym->type = &t_void;
  269. if (node_is(lval, "{}"))
  270. return global_assign_type_infer_map(lval);
  271. return 0;
  272. }
  273. if (type_compatible(lval->sym->type, rval->sym->type))
  274. return 0;
  275. _e("%#N: can't assign %N (type '%T'), to %N (type '%T').\n",
  276. n, rval, rval->sym->type, lval, lval->sym->type);
  277. return -EINVAL;
  278. }
  279. static int global_assign_static_validate(const struct func *func, struct node *n)
  280. {
  281. struct node *lval;
  282. lval = n->expr.args;
  283. if (node_is(lval, "{}") || (lval->ntype == N_IDENT))
  284. return 0;
  285. _e("%#N: can't assign a value to %N.\n", n, lval);
  286. return -EINVAL;
  287. }
  288. /* :binop */
  289. static int global_binop_type_infer(const struct func *func, struct node *n)
  290. {
  291. struct node *lval, *rval;
  292. if (n->sym->type)
  293. return 0;
  294. lval = n->expr.args;
  295. rval = lval->next;
  296. if (!lval->sym->type || !rval->sym->type)
  297. return 0;
  298. if (type_equal(lval->sym->type, rval->sym->type)) {
  299. n->sym->type = lval->sym->type;
  300. return 0;
  301. }
  302. /* TODO handle integer promotion */
  303. return 0;
  304. }
  305. /* :binop */
  306. static int global_quantize_type_infer(const struct func *func, struct node *n)
  307. {
  308. struct node *arg;
  309. struct type *t;
  310. arg = n->expr.args;
  311. if (n->sym->type || !arg->sym->type)
  312. return 0;
  313. t = type_base(arg->sym->type);
  314. if (t->ttype != T_SCALAR) {
  315. _e("%#N: can't quantize non-scalar value %N (type '%T').\n",
  316. n, arg, arg->sym->type);
  317. return -EINVAL;
  318. }
  319. n->sym->type = type_array_of(arg->sym->type, type_sizeof(t) * 8);
  320. return 0;
  321. }
  322. /* pid */
  323. static int global_pid_ir_post(const struct func *func, struct node *n,
  324. struct prog *prog)
  325. {
  326. struct node *ptr = n->expr.args;
  327. ir_init_sym(prog->ir, n->sym);
  328. ir_emit_insn(prog->ir, CALL(BPF_FUNC_get_current_pid_tgid), 0, 0);
  329. ir_emit_insn(prog->ir, ALU64_IMM(BPF_RSH, 32), BPF_REG_0, 0);
  330. ir_emit_reg_to_sym(prog->ir, n->sym, BPF_REG_0);
  331. return 0;
  332. }
  333. struct type t_pid = {
  334. .ttype = T_TYPEDEF,
  335. .tdef = { .name = ":pid", .type = &t_u32 },
  336. };
  337. struct type t_pid_func = {
  338. .ttype = T_FUNC,
  339. .func = { .type = &t_pid },
  340. };
  341. /* time */
  342. static int global_time_ir_post(const struct func *func, struct node *n,
  343. struct prog *prog)
  344. {
  345. struct node *ptr = n->expr.args;
  346. ir_init_sym(prog->ir, n->sym);
  347. ir_emit_insn(prog->ir, CALL(BPF_FUNC_ktime_get_ns), 0, 0);
  348. ir_emit_reg_to_sym(prog->ir, n->sym, BPF_REG_0);
  349. return 0;
  350. }
  351. struct type t_time = {
  352. .ttype = T_TYPEDEF, /* TODO: should be a T_FUNC with a static
  353. * signature */
  354. .tdef = { .name = ":time", .type = &t_s64 },
  355. };
  356. struct type t_time_func = {
  357. .ttype = T_FUNC,
  358. .func = { .type = &t_time },
  359. };
  360. /* */
  361. struct type t_block_func = {
  362. .ttype = T_FUNC,
  363. .func = { .type = &t_void, .vargs = 1 },
  364. };
  365. struct type t_string_array = {
  366. .ttype = T_ARRAY,
  367. .array = { .type = &t_char, .len = 64 }, /* TODO: tunable */
  368. };
  369. struct type t_string = {
  370. .ttype = T_TYPEDEF,
  371. .tdef = { .name = ":string", .type = &t_string_array },
  372. };
  373. struct tfield f_dot[] = {
  374. { .type = &t_void },
  375. { .type = &t_string },
  376. { .type = NULL }
  377. };
  378. struct type t_dot_func = {
  379. .ttype = T_FUNC,
  380. .func = { .type = &t_void, .args = f_dot },
  381. };
  382. struct tfield f_2args[] = {
  383. { .type = &t_void },
  384. { .type = &t_void },
  385. { .type = NULL }
  386. };
  387. struct type t_2args_func = {
  388. .ttype = T_FUNC,
  389. .func = { .type = &t_void, .args = f_2args },
  390. };
  391. struct tfield f_1arg[] = {
  392. { .type = &t_void },
  393. { .type = NULL }
  394. };
  395. struct type t_1arg_func = {
  396. .ttype = T_FUNC,
  397. .func = { .type = &t_void, .args = f_1arg },
  398. };
  399. static const struct func global_funcs[] = {
  400. {
  401. .name = ":block",
  402. .type = &t_block_func,
  403. .static_ret = 1,
  404. },
  405. {
  406. .name = ".",
  407. .type = &t_dot_func,
  408. .type_infer = global_dot_type_infer,
  409. .ir_pre = global_dot_ir_pre,
  410. .ir_post = global_dot_ir_post,
  411. },
  412. {
  413. .name = ":deref",
  414. .type = &t_1arg_func,
  415. .type_infer = global_deref_type_infer,
  416. .ir_post = global_deref_ir_post,
  417. },
  418. {
  419. .name = "+",
  420. .type = &t_2args_func,
  421. .type_infer = global_binop_type_infer,
  422. },
  423. {
  424. .name = "-",
  425. .type = &t_2args_func,
  426. .type_infer = global_binop_type_infer,
  427. },
  428. {
  429. .name = "=",
  430. .type = &t_2args_func,
  431. .type_infer = global_assign_type_infer,
  432. .static_validate = global_assign_static_validate,
  433. },
  434. {
  435. .name = "{}",
  436. /* .type = t_map_func, */
  437. .type_infer = global_map_type_infer,
  438. .static_validate = global_map_static_validate,
  439. .ir_pre = global_map_ir_pre,
  440. .ir_post = global_map_ir_post,
  441. },
  442. {
  443. .name = "pid",
  444. .type = &t_pid_func,
  445. .static_ret = 1,
  446. .ir_post = global_pid_ir_post,
  447. },
  448. {
  449. .name = "time",
  450. .type = &t_time_func,
  451. .static_ret = 1,
  452. .ir_post = global_time_ir_post,
  453. },
  454. {
  455. .name = "quantize",
  456. .type = &t_1arg_func,
  457. .type_infer = global_quantize_type_infer,
  458. },
  459. { .name = NULL }
  460. };
  461. static struct type *global_num_type(struct node *n)
  462. {
  463. if (n->num.unsignd) {
  464. if (n->num.u64 <= INT_MAX)
  465. return &t_int;
  466. else if (n->num.u64 <= UINT_MAX)
  467. return &t_uint;
  468. else if (n->num.u64 <= LONG_MAX)
  469. return &t_long;
  470. else if (n->num.u64 <= ULONG_MAX)
  471. return &t_ulong;
  472. else if (n->num.u64 <= LLONG_MAX)
  473. return &t_llong;
  474. else if (n->num.u64 <= ULLONG_MAX)
  475. return &t_ullong;
  476. } else {
  477. if (n->num.s64 >= INT_MIN && n->num.s64 <= INT_MAX)
  478. return &t_int;
  479. else if (n->num.s64 >= LONG_MIN && n->num.s64 <= LONG_MAX)
  480. return &t_long;
  481. else if (n->num.s64 >= LLONG_MIN && n->num.s64 <= LLONG_MAX)
  482. return &t_llong;
  483. }
  484. assert(0);
  485. return NULL;
  486. }
  487. static int global_num_ir_post(const struct func *func, struct node *n,
  488. struct prog *prog)
  489. {
  490. struct irstate *irs = &n->sym->irs;
  491. if ((n->num.unsignd && (n->num.u64 <= INT32_MAX)) ||
  492. (n->num.s64 >= INT32_MIN && n->num.s64 <= INT32_MAX)) {
  493. irs->loc = LOC_IMM;
  494. irs->imm = n->num.s64;
  495. irs->size = 4;
  496. return 0;
  497. }
  498. /* we need to build the constant in a register, so ignore any
  499. * advise about stack allocation. */
  500. irs->hint.stack = 0;
  501. ir_init_sym(prog->ir, n->sym);
  502. if (n->num.u64 > 0x3fffffffffffffff) {
  503. ir_emit_insn(prog->ir, MOV64_IMM(n->num.u64 >> 33), irs->reg, 0);
  504. ir_emit_insn(prog->ir, ALU64_IMM(BPF_LSH, 31), irs->reg, 0);
  505. if ((n->num.u64 >> 2) & 0x7fffffff)
  506. ir_emit_insn(prog->ir,
  507. ALU64_IMM(BPF_OR, (n->num.u64 >> 2) & 0x7fffffff),
  508. irs->reg, 0);
  509. ir_emit_insn(prog->ir, ALU64_IMM(BPF_LSH, 2), irs->reg, 0);
  510. if (n->num.u64 & 0x3)
  511. ir_emit_insn(prog->ir, ALU64_IMM(BPF_OR, n->num.u64 & 0x3),
  512. irs->reg, 0);
  513. } else if (n->num.u64 > 0x7fffffff) {
  514. ir_emit_insn(prog->ir, MOV64_IMM(n->num.u64 >> 31), irs->reg, 0);
  515. ir_emit_insn(prog->ir, ALU64_IMM(BPF_LSH, 31), irs->reg, 0);
  516. if (n->num.u64 & 0x7fffffff)
  517. ir_emit_insn(prog->ir,
  518. ALU64_IMM(BPF_OR, n->num.u64 & 0x7fffffff),
  519. irs->reg, 0);
  520. }
  521. return 0;
  522. }
  523. static const struct func global_num_func = {
  524. .name = ":num",
  525. .ir_post = global_num_ir_post,
  526. };
  527. static const struct func global_string_func = {
  528. .name = ":string",
  529. .type = &t_string,
  530. .static_ret = 1,
  531. };
  532. static const struct func global_ident_func = {
  533. .name = ":ident",
  534. };
  535. static const struct func *global_sym_alloc_expr(struct node *n)
  536. {
  537. const struct func *func;
  538. int err;
  539. for (func = global_funcs; func->name; func++) {
  540. if (strcmp(func->name, n->expr.func))
  541. continue;
  542. return func;
  543. }
  544. return NULL;
  545. }
  546. int global_sym_alloc(struct prog *prog, struct node *n)
  547. {
  548. const struct func *func;
  549. struct symtab *st = prog->locals;
  550. int err;
  551. switch (n->ntype) {
  552. case N_EXPR:
  553. func = global_sym_alloc_expr(n);
  554. break;
  555. case N_IDENT:
  556. st = prog->globals;
  557. func = &global_ident_func;
  558. break;
  559. case N_NUM:
  560. func = &global_num_func;
  561. break;
  562. case N_STRING:
  563. func = &global_string_func;
  564. break;
  565. }
  566. if (!func)
  567. return -ENOENT;
  568. err = func_static_validate(func, n);
  569. if (err)
  570. return err;
  571. n->sym = sym_alloc(st, n, func);
  572. if (n->ntype == N_NUM)
  573. n->sym->type = global_num_type(n);
  574. else if (func->static_ret)
  575. n->sym->type = func_return_type(func);
  576. return 0;
  577. }
  578. int global_probe(struct prog *prog)
  579. {
  580. return 0;
  581. }
  582. struct provider global = {
  583. .name = ":",
  584. .sym_alloc = global_sym_alloc,
  585. .probe = global_probe,
  586. };
  587. __attribute__((constructor))
  588. static void global_init(void)
  589. {
  590. provider_register(&global);
  591. }