A dynamic tracer for Linux

ir.c 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646
  1. #include <assert.h>
  2. #include <errno.h>
  3. #include <inttypes.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include <linux/bpf.h>
  7. #include "ir.h"
  8. #include "ply.h"
  9. #include "sym.h"
  10. #include "type.h"
  11. const uint16_t vreg_base = 0x8000;
  12. static const char *bpf_func_name(enum bpf_func_id id)
  13. {
  14. switch (id) {
  15. case BPF_FUNC_get_current_comm:
  16. return "get_current_comm";
  17. case BPF_FUNC_get_current_pid_tgid:
  18. return "get_current_pid_tgid";
  19. case BPF_FUNC_get_current_uid_gid:
  20. return "get_current_uid_gid";
  21. case BPF_FUNC_get_stackid:
  22. return "get_stackid";
  23. case BPF_FUNC_ktime_get_ns:
  24. return "ktime_get_ns";
  25. case BPF_FUNC_map_delete_elem:
  26. return "map_delete_elem";
  27. case BPF_FUNC_map_lookup_elem:
  28. return "map_lookup_elem";
  29. case BPF_FUNC_map_update_elem:
  30. return "map_update_elem";
  31. case BPF_FUNC_perf_event_output:
  32. return "perf_event_output";
  33. case BPF_FUNC_probe_read:
  34. return "probe_read";
  35. case BPF_FUNC_trace_printk:
  36. return "trace_printk";
  37. default:
  38. return NULL;
  39. }
  40. }
  41. static void reg_name(uint16_t reg, char *name)
  42. {
  43. if (reg & vreg_base) {
  44. sprintf(name, "v%u", reg & ~vreg_base);
  45. } else if (reg == BPF_REG_10) {
  46. strcpy(name, "bp");
  47. } else {
  48. sprintf(name, "r%u", reg);
  49. }
  50. }
  51. static void reg_dump(uint16_t reg, int16_t off, FILE *fp)
  52. {
  53. char name[8];
  54. reg_name(reg, name);
  55. if (off < 0)
  56. fprintf(fp, "[%s - 0x%x]", name, -off);
  57. else if (off > 0)
  58. fprintf(fp, "[%s + 0x%x]", name, off);
  59. else
  60. fprintf(fp, "%s", name);
  61. }
  62. static char size_name(uint8_t code)
  63. {
  64. switch (BPF_SIZE(code)) {
  65. case BPF_B: return 'b';
  66. case BPF_H: return 'h';
  67. case BPF_W: return 'w';
  68. case BPF_DW: return 'q';
  69. }
  70. return '?';
  71. }
  72. static void alu_dump(uint8_t code, FILE *fp)
  73. {
  74. switch (BPF_OP(code)) {
  75. case BPF_MOV: fputs("mov", fp); break;
  76. case BPF_ADD: fputs("add", fp); break;
  77. case BPF_SUB: fputs("sub", fp); break;
  78. case BPF_MUL: fputs("mul", fp); break;
  79. case BPF_DIV: fputs("div", fp); break;
  80. case BPF_OR : fputs("or", fp); break;
  81. case BPF_AND: fputs("and", fp); break;
  82. case BPF_LSH: fputs("lsh", fp); break;
  83. case BPF_RSH: fputs("rsh", fp); break;
  84. case BPF_NEG: fputs("neg", fp); break;
  85. case BPF_MOD: fputs("mod", fp); break;
  86. case BPF_XOR: fputs("xor", fp); break;
  87. }
  88. switch (BPF_CLASS(code)) {
  89. case BPF_ALU: fputc(size_name(BPF_W), fp);
  90. case BPF_ALU64: fputc(size_name(BPF_DW), fp);
  91. }
  92. }
  93. static void offset_dump(int16_t off, FILE *fp)
  94. {
  95. if (off < 0)
  96. fprintf(fp, "L%d", -off);
  97. else
  98. fprintf(fp, "+%d", off);
  99. }
  100. static void __insn_dump(const struct bpf_insn insn, uint16_t dst, uint16_t src,
  101. FILE *fp)
  102. {
  103. const char *name;
  104. enum {
  105. OFF_NONE,
  106. OFF_DST,
  107. OFF_SRC,
  108. OFF_EXP,
  109. } off = OFF_NONE;
  110. switch (BPF_CLASS(insn.code)) {
  111. case BPF_LD:
  112. case BPF_LDX:
  113. off = OFF_SRC;
  114. fprintf(fp, "ld%c", size_name(insn.code));
  115. break;
  116. case BPF_ST:
  117. case BPF_STX:
  118. off = OFF_DST;
  119. fprintf(fp, "st%c", size_name(insn.code));
  120. break;
  121. case BPF_ALU:
  122. case BPF_ALU64:
  123. alu_dump(insn.code, fp);
  124. break;
  125. case BPF_JMP:
  126. off = OFF_EXP;
  127. switch (BPF_OP(insn.code)) {
  128. case BPF_EXIT:
  129. fputs("exit", fp);
  130. return;
  131. case BPF_CALL:
  132. fputs("call\t", fp);
  133. name = bpf_func_name(insn.imm);
  134. if (name)
  135. fputs(name, fp);
  136. else
  137. fprintf(fp, "%d", insn.imm);
  138. return;
  139. case BPF_JA:
  140. fputs("ja\t", fp);
  141. offset_dump(insn.off, fp);
  142. return;
  143. case BPF_JEQ: fputs("jeq", fp); break;
  144. case BPF_JNE: fputs("jne", fp); break;
  145. case BPF_JGT: fputs("jgt", fp); break;
  146. case BPF_JGE: fputs("jge", fp); break;
  147. case BPF_JSGE: fputs("jsge", fp); break;
  148. case BPF_JSGT: fputs("jsgt", fp); break;
  149. default:
  150. goto unknown;
  151. }
  152. break;
  153. default:
  154. goto unknown;
  155. }
  156. fputc('\t', fp);
  157. reg_dump(dst, off == OFF_DST ? insn.off : 0, fp);
  158. fputs(", ", fp);
  159. if (BPF_CLASS(insn.code) == BPF_LDX || BPF_CLASS(insn.code) == BPF_STX)
  160. goto reg_src;
  161. else if (BPF_CLASS(insn.code) == BPF_ST)
  162. goto imm_src;
  163. switch (BPF_SRC(insn.code)) {
  164. case BPF_K:
  165. imm_src:
  166. fprintf(fp, "#%s0x%x", insn.imm < 0 ? "-" : "",
  167. insn.imm < 0 ? -insn.imm : insn.imm);
  168. break;
  169. case BPF_X:
  170. reg_src:
  171. reg_dump(src, off == OFF_SRC ? insn.off : 0, fp);
  172. break;
  173. }
  174. if (off == OFF_EXP) {
  175. fputs(", ", fp);
  176. offset_dump(insn.off, fp);
  177. }
  178. return;
  179. unknown:
  180. fprintf(fp, "data\t0x%16.16" PRIx64 "\n", *((uint64_t *)&insn));
  181. }
  182. void insn_dump(struct bpf_insn insn, FILE *fp)
  183. {
  184. __insn_dump(insn, insn.dst_reg, insn.src_reg, fp);
  185. }
  186. void vinsn_dump(struct vinsn *vi, FILE *fp)
  187. {
  188. switch (vi->vitype) {
  189. case VI_INSN:
  190. __insn_dump(vi->insn.bpf, vi->insn.dst, vi->insn.src, fp);
  191. return;
  192. case VI_LDMAP:
  193. fputs("ldmap\t", fp); reg_dump(vi->map.reg, 0, fp);
  194. fprintf(fp, ", %s", vi->map.sym->name);
  195. return;
  196. case VI_LABEL:
  197. offset_dump(vi->label, fp);
  198. fputc(':', fp);
  199. return;
  200. }
  201. }
  202. void ir_dump(struct ir *ir, FILE *fp)
  203. {
  204. size_t i;
  205. for (i = 0; i < ir->len; i++) {
  206. struct vinsn *vi = &ir->vi[i];
  207. switch (vi->vitype) {
  208. case VI_INSN:
  209. case VI_LDMAP:
  210. fputc('\t', fp);
  211. break;
  212. case VI_LABEL:
  213. default:
  214. break;
  215. }
  216. vinsn_dump(vi, fp);
  217. fputc('\n', fp);
  218. }
  219. }
  220. static void ir_emit(struct ir *ir, struct vinsn *vi)
  221. {
  222. ir->vi = realloc(ir->vi, (++ir->len)*sizeof(*vi));
  223. assert(ir->vi);
  224. ir->vi[ir->len - 1] = *vi;
  225. }
  226. void ir_emit_insn(struct ir *ir, struct bpf_insn bpf, uint16_t dst, uint16_t src)
  227. {
  228. struct vinsn vi;
  229. vi.vitype = VI_INSN;
  230. vi.insn.bpf = bpf;
  231. vi.insn.dst = dst;
  232. vi.insn.src = src;
  233. ir_emit(ir, &vi);
  234. }
  235. void ir_emit_ldmap(struct ir *ir, uint16_t dst, struct sym *map)
  236. {
  237. struct vinsn vi;
  238. vi.vitype = VI_LDMAP;
  239. vi.map.reg = dst;
  240. vi.map.sym = map;
  241. ir_emit(ir, &vi);
  242. }
  243. void ir_emit_label (struct ir *ir, int16_t label)
  244. {
  245. struct vinsn vi;
  246. vi.vitype = VI_LABEL;
  247. vi.label = label;
  248. ir_emit(ir, &vi);
  249. }
  250. void ir_emit_sym_to_reg(struct ir *ir, uint16_t dst, struct sym *src)
  251. {
  252. struct irstate *irs = &src->irs;
  253. switch (irs->loc) {
  254. case LOC_IMM:
  255. ir_emit_insn(ir, MOV_IMM(irs->imm), dst, 0);
  256. break;
  257. case LOC_REG:
  258. if (dst == irs->reg)
  259. break;
  260. if (irs->size == 8)
  261. ir_emit_insn(ir, MOV64, dst, irs->reg);
  262. else
  263. ir_emit_insn(ir, MOV, dst, irs->reg);
  264. break;
  265. case LOC_STACK:
  266. ir_emit_insn(ir, LDX(bpf_width(irs->size), irs->stack),
  267. dst, BPF_REG_BP);
  268. break;
  269. default:
  270. assert(0);
  271. }
  272. }
  273. void ir_emit_reg_to_sym(struct ir *ir, struct sym *dst, uint16_t src)
  274. {
  275. struct irstate *irs = &dst->irs;
  276. switch (irs->loc) {
  277. case LOC_REG:
  278. if (irs->reg == src)
  279. break;
  280. if (irs->size == 8)
  281. ir_emit_insn(ir, MOV64, irs->reg, src);
  282. else
  283. ir_emit_insn(ir, MOV, irs->reg, src);
  284. break;
  285. case LOC_STACK:
  286. ir_emit_insn(ir, STX(bpf_width(irs->size), irs->stack),
  287. BPF_REG_BP, src);
  288. break;
  289. default:
  290. assert(0);
  291. }
  292. }
  293. void ir_emit_sym_to_stack(struct ir *ir, ssize_t offset, struct sym *src)
  294. {
  295. struct irstate *irs = &src->irs;
  296. switch (irs->loc) {
  297. case LOC_IMM:
  298. ir_emit_insn(ir, ST_IMM(bpf_width(irs->size), offset, irs->imm),
  299. BPF_REG_BP, 0);
  300. break;
  301. case LOC_REG:
  302. ir_emit_insn(ir, STX(bpf_width(irs->size), offset),
  303. BPF_REG_BP, irs->reg);
  304. case LOC_STACK:
  305. ir_emit_memcpy(ir, offset, irs->stack, irs->size);
  306. break;
  307. default:
  308. assert(0);
  309. }
  310. }
  311. void ir_emit_sym_to_sym(struct ir *ir, struct sym *dst, struct sym *src)
  312. {
  313. switch (dst->irs.loc) {
  314. case LOC_REG:
  315. ir_emit_sym_to_reg(ir, dst->irs.reg, src);
  316. break;
  317. case LOC_STACK:
  318. ir_emit_sym_to_stack(ir, dst->irs.stack, src);
  319. break;
  320. default:
  321. assert(0);
  322. break;
  323. }
  324. }
  325. void ir_emit_read_to_sym(struct ir *ir, struct sym *dst, uint16_t src)
  326. {
  327. struct irstate *irs = &dst->irs;
  328. assert(irs->loc == LOC_STACK);
  329. ir_emit_insn(ir, MOV, BPF_REG_1, BPF_REG_BP);
  330. ir_emit_insn(ir, ALU_IMM(BPF_ADD, irs->stack), BPF_REG_1, 0);
  331. ir_emit_insn(ir, MOV_IMM((int32_t)irs->size), BPF_REG_2, 0);
  332. if (src != BPF_REG_3)
  333. ir_emit_insn(ir, MOV, BPF_REG_3, src);
  334. ir_emit_insn(ir, CALL(BPF_FUNC_probe_read), 0, 0);
  335. /* TODO if (r0) exit(r0); */
  336. }
  337. void ir_emit_memcpy(struct ir *ir, ssize_t dst, ssize_t src, size_t size)
  338. {
  339. if (dst == src)
  340. return;
  341. for (; size >= 8; size -= 8, dst += 8, src += 8) {
  342. ir_emit_insn(ir, LDX(BPF_DW, src), BPF_REG_0, BPF_REG_BP);
  343. ir_emit_insn(ir, STX(BPF_DW, dst), BPF_REG_BP, BPF_REG_0);
  344. }
  345. if (size >= 4) {
  346. ir_emit_insn(ir, LDX(BPF_W, src), BPF_REG_0, BPF_REG_BP);
  347. ir_emit_insn(ir, STX(BPF_W, dst), BPF_REG_BP, BPF_REG_0);
  348. size -= 4, dst += 4, src += 4;
  349. }
  350. if (size >= 2) {
  351. ir_emit_insn(ir, LDX(BPF_H, src), BPF_REG_0, BPF_REG_BP);
  352. ir_emit_insn(ir, STX(BPF_H, dst), BPF_REG_BP, BPF_REG_0);
  353. size -= 2, dst += 2, src += 2;
  354. }
  355. if (size >= 1) {
  356. ir_emit_insn(ir, LDX(BPF_B, src), BPF_REG_0, BPF_REG_BP);
  357. ir_emit_insn(ir, STX(BPF_B, dst), BPF_REG_BP, BPF_REG_0);
  358. size -= 1, dst += 1, src += 1;
  359. }
  360. assert(size == 0);
  361. }
  362. void ir_emit_bzero(struct ir *ir, ssize_t offset, size_t size)
  363. {
  364. for (; size >= 8; size -= 8)
  365. ir_emit_insn(ir, ST_IMM(BPF_DW, offset, 0), BPF_REG_BP, 0);
  366. if (size >= 4) {
  367. ir_emit_insn(ir, ST_IMM(BPF_W, offset, 0), BPF_REG_BP, 0);
  368. size -= 4;
  369. }
  370. if (size >= 2) {
  371. ir_emit_insn(ir, ST_IMM(BPF_H, offset, 0), BPF_REG_BP, 0);
  372. size -= 2;
  373. }
  374. if (size >= 1) {
  375. ir_emit_insn(ir, ST_IMM(BPF_B, offset, 0), BPF_REG_BP, 0);
  376. size -= 1;
  377. }
  378. assert(size == 0);
  379. }
  380. int16_t ir_alloc_label (struct ir *ir)
  381. {
  382. return ir->next_label--;
  383. }
  384. uint16_t ir_alloc_register(struct ir *ir)
  385. {
  386. return ir->next_reg++;
  387. }
  388. ssize_t ir_alloc_stack(struct ir *ir, size_t size, size_t align)
  389. {
  390. ir->sp -= size;
  391. if (ir->sp % align)
  392. ir->sp -= align - (ir->sp & align);
  393. assert(ir->sp > INT16_MIN);
  394. return ir->sp;
  395. }
  396. void ir_init_irs(struct ir *ir, struct irstate *irs, struct type *t)
  397. {
  398. t = type_base(t);
  399. if (irs->loc)
  400. return;
  401. irs->size = type_sizeof(t);
  402. if ((!irs->hint.stack)
  403. && ((t->ttype == T_SCALAR) || (t->ttype == T_POINTER))) {
  404. irs->loc = LOC_REG;
  405. irs->reg = ir_alloc_register(ir);
  406. return;
  407. }
  408. irs->loc = LOC_STACK;
  409. /* a parent may already have filled in a stack position.
  410. * usually this is when we're part of a map key. */
  411. if (!irs->stack)
  412. irs->stack = ir_alloc_stack(ir, irs->size, type_alignof(t));
  413. }
  414. void ir_init_sym(struct ir *ir, struct sym *sym)
  415. {
  416. return ir_init_irs(ir, &sym->irs, sym->type);
  417. }
  418. struct ir *ir_new(void)
  419. {
  420. struct ir *ir;
  421. ir = calloc(1, sizeof(*ir));
  422. assert(ir);
  423. ir->next_reg = vreg_base;
  424. ir->next_label = -1;
  425. return ir;
  426. }
  427. /* ir->bpf generation */
  428. static void ir_bpf_vreg_replace(struct ir *ir, struct vinsn *last, int reg)
  429. {
  430. struct vinsn *vi;
  431. _d("ir_bpf_generate: v%d -> r%d\n", last->insn.src & ~vreg_base, reg);
  432. for (vi = ir->vi; vi <= last; vi++) {
  433. if (vi->vitype != VI_INSN)
  434. continue;
  435. if (vi->insn.dst == last->insn.src)
  436. vi->insn.dst = reg;
  437. if (vi->insn.src == last->insn.src)
  438. vi->insn.src = reg;
  439. }
  440. }
  441. static int ir_bpf_registerize_one(struct ir *ir, struct vinsn *last)
  442. {
  443. struct vinsn *vi;
  444. uint16_t clean = 0x3ff;
  445. for (vi = ir->vi; clean && (vi < last); vi++) {
  446. if (vi->vitype != VI_INSN)
  447. continue;
  448. if (!(vi->insn.src & vreg_base))
  449. clean &= ~(1 << vi->insn.src);
  450. if (!(vi->insn.dst & vreg_base))
  451. clean &= ~(1 << vi->insn.dst);
  452. if ((BPF_CLASS(vi->insn.bpf.code) == BPF_JMP)
  453. && (BPF_OP(vi->insn.bpf.code) == BPF_CALL))
  454. clean &= ~BPF_REG_CALLER_SAVE;
  455. }
  456. if (clean) {
  457. int reg;
  458. for (reg = BPF_REG_0; !(clean & 1); clean >>= 1, reg++);
  459. ir_bpf_vreg_replace(ir, last, reg);
  460. return 0;
  461. }
  462. /* TODO ir_bpf_vreg_spill(ir, last); */
  463. return -1;
  464. }
  465. static int ir_bpf_registerize(struct ir *ir)
  466. {
  467. struct vinsn *vi;
  468. int err = 0;
  469. if (!ir->len)
  470. return 0;
  471. for (vi = &ir->vi[ir->len - 1]; vi >= ir->vi; vi--) {
  472. if (vi->vitype != VI_INSN)
  473. continue;
  474. if (vi->insn.src & vreg_base) {
  475. err = ir_bpf_registerize_one(ir, vi);
  476. if (err)
  477. return err;
  478. }
  479. }
  480. return err;
  481. }
  482. static int ir_bpf_jmp_resolve_one(struct ir *ir, struct vinsn *jmp)
  483. {
  484. struct vinsn *vi;
  485. int off = 0;
  486. for (vi = jmp + 1; vi < &ir->vi[ir->len - 1]; vi++) {
  487. switch (vi->vitype) {
  488. case VI_INSN:
  489. off++;
  490. break;
  491. case VI_LABEL:
  492. if (vi->label != jmp->insn.bpf.off)
  493. break;
  494. jmp->insn.bpf.off = off;
  495. return 0;
  496. default:
  497. break;
  498. }
  499. }
  500. return -ENOENT;
  501. }
  502. static int ir_bpf_jmp_resolve(struct ir *ir)
  503. {
  504. struct vinsn *vi;
  505. int err;
  506. for (vi = ir->vi; vi < &ir->vi[ir->len - 1]; vi++) {
  507. if (vi->vitype != VI_INSN)
  508. continue;
  509. if ((BPF_CLASS(vi->insn.bpf.code) == BPF_JMP)
  510. && (vi->insn.bpf.off < 0)) {
  511. err = ir_bpf_jmp_resolve_one(ir, vi);
  512. if (err)
  513. return err;
  514. }
  515. }
  516. return 0;
  517. }
  518. int ir_bpf_generate(struct ir *ir)
  519. {
  520. int err;
  521. err = ir_bpf_registerize(ir);
  522. if (err)
  523. return err;
  524. /* no instructions will be added/removed to the program after
  525. * this point, thus it is now safe to convert labeled jumps to
  526. * fixed offsets. */
  527. err = ir_bpf_jmp_resolve(ir);
  528. if (err)
  529. return err;
  530. return 0;
  531. }