ソースを参照

started on ir

Tobias Waldekranz.com 8 年 前
コミット
9f42acefdd
共有6 個のファイルを変更した355 個の追加12 個の削除を含む
  1. 237 1
      ir.c
  2. 35 0
      ir.h
  3. 24 4
      kprobe.c
  4. 50 7
      ply.c
  5. 5 0
      ply.h
  6. 4 0
      sym.h

+ 237 - 1
ir.c

@@ -1,12 +1,245 @@
1 1
 #include <assert.h>
2
+#include <inttypes.h>
3
+#include <stdio.h>
4
+#include <string.h>
5
+
6
+#include <linux/bpf.h>
2 7
 
3 8
 #include "ir.h"
4 9
 
5 10
 const uint16_t vreg_base = 0x8000;
6 11
 
12
+static const char *bpf_func_name(enum bpf_func_id id)
13
+{
14
+	switch (id) {
15
+	case BPF_FUNC_get_current_comm:
16
+		return "get_current_comm";
17
+	case BPF_FUNC_get_current_pid_tgid:
18
+		return "get_current_pid_tgid";
19
+	case BPF_FUNC_get_current_uid_gid:
20
+		return "get_current_uid_gid";
21
+	case BPF_FUNC_get_stackid:
22
+		return "get_stackid";
23
+	case BPF_FUNC_ktime_get_ns:
24
+		return "ktime_get_ns";
25
+	case BPF_FUNC_map_delete_elem:
26
+		return "map_delete_elem";
27
+	case BPF_FUNC_map_lookup_elem:
28
+		return "map_lookup_elem";
29
+	case BPF_FUNC_map_update_elem:
30
+		return "map_update_elem";
31
+	case BPF_FUNC_perf_event_output:
32
+		return "perf_event_output";
33
+	case BPF_FUNC_probe_read:
34
+		return "probe_read";
35
+	case BPF_FUNC_trace_printk:
36
+		return "trace_printk";
37
+	default:
38
+		return NULL;
39
+	}
40
+}
41
+
42
+static void reg_name(uint16_t reg, char *name)
43
+{
44
+        if (reg & vreg_base) {
45
+		sprintf(name, "v%u", reg & ~vreg_base);		
46
+	} else if (reg == BPF_REG_10) {
47
+		strcpy(name, "bp");
48
+	} else {
49
+		sprintf(name, "r%u", reg);
50
+	}
51
+}
52
+
53
+static void reg_dump(uint16_t reg, int16_t off, FILE *fp)
54
+{
55
+	char name[8];
56
+
57
+	reg_name(reg, name);
58
+
59
+	if (off < 0)
60
+		fprintf(fp, "[%s - 0x%x]", name, -off);
61
+	else if (off > 0)
62
+		fprintf(fp, "[%s + 0x%x]", name, off);
63
+	else
64
+		fprintf(fp, "%s", name);
65
+}
66
+
67
+static char size_name(uint8_t code)
68
+{
69
+	switch (BPF_SIZE(code)) {
70
+	case BPF_B:  return 'b';
71
+	case BPF_H:  return 'h';
72
+	case BPF_W:  return 'w';
73
+	case BPF_DW: return 'q';
74
+	}
75
+
76
+	return '?';
77
+}		
78
+
79
+static void alu_dump(uint8_t code, FILE *fp)
80
+{
81
+	switch (BPF_OP(code)) {
82
+	case BPF_MOV: fputs("mov", fp); break;
83
+	case BPF_ADD: fputs("add", fp); break;
84
+	case BPF_SUB: fputs("sub", fp); break;
85
+	case BPF_MUL: fputs("mul", fp); break;
86
+	case BPF_DIV: fputs("div", fp); break;
87
+	case BPF_OR : fputs("or",  fp); break;
88
+	case BPF_AND: fputs("and", fp); break;
89
+	case BPF_LSH: fputs("lsh", fp); break;
90
+	case BPF_RSH: fputs("rsh", fp); break;
91
+	case BPF_NEG: fputs("neg", fp); break;
92
+	case BPF_MOD: fputs("mod", fp); break;
93
+	case BPF_XOR: fputs("xor", fp); break;
94
+	}
95
+
96
+	switch (BPF_CLASS(code)) {
97
+	case BPF_ALU:   fputc(size_name(BPF_W), fp);
98
+	case BPF_ALU64: fputc(size_name(BPF_DW), fp);
99
+	}
100
+}
101
+
102
+static void offset_dump(int16_t off, FILE *fp)
103
+{
104
+	if (off < 0)
105
+		fprintf(fp, "L%d", -off);
106
+	else
107
+		fprintf(fp, "+%d", off);
108
+}
109
+
110
+static void __insn_dump(const struct bpf_insn insn, uint16_t dst, uint16_t src,
111
+			FILE *fp)
112
+{
113
+	const char *name;
114
+	enum {
115
+		OFF_NONE,
116
+		OFF_DST,
117
+		OFF_SRC,
118
+		OFF_EXP,
119
+	} off = OFF_NONE;
120
+
121
+	switch (BPF_CLASS(insn.code)) {
122
+	case BPF_LD:
123
+	case BPF_LDX:
124
+		off = OFF_SRC;
125
+		fprintf(fp, "ld%c", size_name(insn.code));
126
+		break;
127
+
128
+	case BPF_ST:
129
+	case BPF_STX:
130
+		off = OFF_DST;
131
+		fprintf(fp, "st%c", size_name(insn.code));
132
+		break;
133
+
134
+	case BPF_ALU:
135
+	case BPF_ALU64:
136
+		alu_dump(insn.code, fp);
137
+		break;
138
+
139
+	case BPF_JMP:
140
+		off = OFF_EXP;
141
+
142
+		switch (BPF_OP(insn.code)) {
143
+		case BPF_EXIT:
144
+			fputs("exit", fp);
145
+			return;
146
+		case BPF_CALL:
147
+			fputs("call\t", fp);
148
+
149
+			name = bpf_func_name(insn.imm);
150
+			if (name)
151
+				fputs(name, fp);
152
+			else
153
+				fprintf(fp, "%d", insn.imm);
154
+			return;
155
+		case BPF_JA:
156
+			fputs("ja\t", fp);
157
+			offset_dump(insn.off, fp);
158
+			return;
159
+
160
+		case BPF_JEQ:  fputs("jeq", fp); break;
161
+		case BPF_JNE:  fputs("jne", fp); break;
162
+		case BPF_JGT:  fputs("jgt", fp); break;
163
+		case BPF_JGE:  fputs("jge", fp); break;
164
+		case BPF_JSGE: fputs("jsge", fp); break;
165
+		case BPF_JSGT: fputs("jsgt", fp); break;
166
+		default:
167
+			goto unknown;
168
+		}
169
+		break;
170
+
171
+	default:
172
+		goto unknown;
173
+	}
174
+
175
+	fputc('\t', fp);
176
+	reg_dump(dst, off == OFF_DST ? insn.off : 0, fp);		
177
+	fputs(", ", fp);
178
+
179
+	if (BPF_CLASS(insn.code) == BPF_LDX || BPF_CLASS(insn.code) == BPF_STX)
180
+		goto reg_src;
181
+
182
+	switch (BPF_SRC(insn.code)) {
183
+	case BPF_K:
184
+		fprintf(fp, "#%s0x%x", insn.imm < 0 ? "-" : "",
185
+			insn.imm < 0 ? -insn.imm : insn.imm);
186
+		break;
187
+	case BPF_X:
188
+	reg_src:
189
+		reg_dump(src, off == OFF_SRC ? insn.off : 0, fp);		
190
+		break;
191
+	}
192
+
193
+	if (off == OFF_EXP) {
194
+		fputs(", ", fp);
195
+		offset_dump(insn.off, fp);
196
+	}
197
+
198
+	return;
199
+
200
+unknown:
201
+	fprintf(fp, "data\t0x%16.16" PRIx64 "\n", *((uint64_t *)&insn));	
202
+}
203
+
204
+void insn_dump(struct bpf_insn insn, FILE *fp)
205
+{
206
+	__insn_dump(insn, insn.dst_reg, insn.src_reg, fp);
207
+}
208
+
209
+void vinsn_dump(vinsn_t *vi, FILE *fp)
210
+{
211
+	switch (vi->vitype) {
212
+	case VI_INSN:
213
+		__insn_dump(vi->insn.bpf, vi->insn.dst, vi->insn.src, fp);
214
+		return;
215
+	case VI_LABEL:
216
+		offset_dump(vi->label, fp);
217
+		fputc(':', fp);
218
+		return;
219
+	case VI_REG_GET:
220
+	case VI_REG_PUT:
221
+		fputs((vi->vitype == VI_REG_GET) ? "+ " : "- ", fp);
222
+		reg_dump(vi->reg, 0, fp);
223
+		return;
224
+	}
225
+}
226
+
227
+void ir_dump(ir_t *ir, FILE *fp)
228
+{
229
+	size_t i;
230
+
231
+	for (i = 0; i < ir->len; i++) {
232
+		if (ir->vi[i].vitype == VI_INSN)
233
+			fputc('\t', fp);
234
+
235
+		vinsn_dump(&ir->vi[i], fp);
236
+		fputc('\n', fp);
237
+	}
238
+}
239
+
7 240
 static void ir_emit(ir_t *ir, vinsn_t *vi)
8 241
 {
9
-	ir->vi = realloc(ir->vi, ++ir->len);
242
+	ir->vi = realloc(ir->vi, (++ir->len)*sizeof(*vi));
10 243
 	assert(ir->vi);
11 244
 
12 245
 	ir->vi[ir->len - 1] = *vi;
@@ -29,6 +262,7 @@ void ir_emit_label  (ir_t *ir, int16_t label)
29 262
 
30 263
 	vi.vitype = VI_LABEL;
31 264
 	vi.label = label;
265
+	ir_emit(ir, &vi);
32 266
 }
33 267
 
34 268
 void ir_emit_reg_get(ir_t *ir, uint16_t reg)
@@ -37,6 +271,7 @@ void ir_emit_reg_get(ir_t *ir, uint16_t reg)
37 271
 
38 272
 	vi.vitype = VI_REG_GET;
39 273
 	vi.reg = reg;
274
+	ir_emit(ir, &vi);
40 275
 }
41 276
 
42 277
 void ir_emit_reg_put(ir_t *ir, uint16_t reg)
@@ -45,6 +280,7 @@ void ir_emit_reg_put(ir_t *ir, uint16_t reg)
45 280
 
46 281
 	vi.vitype = VI_REG_PUT;
47 282
 	vi.reg = reg;
283
+	ir_emit(ir, &vi);
48 284
 }
49 285
 
50 286
 int16_t ir_alloc_label (ir_t *ir)

+ 35 - 0
ir.h

@@ -6,6 +6,37 @@
6 6
 
7 7
 #include <linux/bpf.h>
8 8
 
9
+#define INSN(_code, _dst, _src, _off, _imm)	\
10
+	((struct bpf_insn) {			\
11
+		.code  = _code,			\
12
+		.dst_reg = _dst,		\
13
+		.src_reg = _src,		\
14
+		.off   = _off,			\
15
+		.imm   = _imm			\
16
+	})
17
+
18
+#define MOV(_dst, _src)     INSN(BPF_ALU64 | BPF_MOV | BPF_X, _dst, _src, 0, 0)
19
+#define MOV_IMM(_dst, _imm) INSN(BPF_ALU64 | BPF_MOV | BPF_K, _dst, 0, 0, _imm)
20
+
21
+#define EXIT INSN(BPF_JMP | BPF_EXIT, 0, 0, 0, 0)
22
+#define CALL(_imm) INSN(BPF_JMP | BPF_CALL, 0, 0, 0, _imm)
23
+#define JMP(_op, _dst, _src, _off)     INSN(BPF_JMP | BPF_OP((_op)) | BPF_X, _dst, _src, _off, 0)
24
+#define JMP_IMM(_op, _dst, _imm, _off) INSN(BPF_JMP | BPF_OP((_op)) | BPF_K, _dst, 0, _off, _imm)
25
+
26
+#define ALU(_op, _dst, _src)     INSN(BPF_ALU64 | BPF_OP((_op)) | BPF_X, _dst, _src, 0, 0)
27
+#define ALU_IMM(_op, _dst, _imm) INSN(BPF_ALU64 | BPF_OP((_op)) | BPF_K, _dst, 0, 0, _imm)
28
+
29
+#define STW_IMM(_dst, _off, _imm) INSN(BPF_ST  | BPF_SIZE(BPF_W)  | BPF_MEM, _dst, 0, _off, _imm)
30
+#define STXB(_dst, _off, _src)   INSN(BPF_STX | BPF_SIZE(BPF_B) | BPF_MEM, _dst, _src, _off, 0)
31
+#define STXH(_dst, _off, _src)   INSN(BPF_STX | BPF_SIZE(BPF_H) | BPF_MEM, _dst, _src, _off, 0)
32
+#define STXW(_dst, _off, _src)   INSN(BPF_STX | BPF_SIZE(BPF_W) | BPF_MEM, _dst, _src, _off, 0)
33
+#define STXDW(_dst, _off, _src)   INSN(BPF_STX | BPF_SIZE(BPF_DW) | BPF_MEM, _dst, _src, _off, 0)
34
+
35
+#define LDXB(_dst, _off, _src)  INSN(BPF_LDX | BPF_SIZE(BPF_B)  | BPF_MEM, _dst, _src, _off, 0)
36
+#define LDXH(_dst, _off, _src)  INSN(BPF_LDX | BPF_SIZE(BPF_H)  | BPF_MEM, _dst, _src, _off, 0)
37
+#define LDXW(_dst, _off, _src)  INSN(BPF_LDX | BPF_SIZE(BPF_W)  | BPF_MEM, _dst, _src, _off, 0)
38
+#define LDXDW(_dst, _off, _src) INSN(BPF_LDX | BPF_SIZE(BPF_DW) | BPF_MEM, _dst, _src, _off, 0)
39
+
9 40
 typedef enum vitype {
10 41
 	VI_INSN,
11 42
 	VI_LABEL,
@@ -36,6 +67,10 @@ typedef struct ir {
36 67
 	uint16_t next_reg;
37 68
 } ir_t;
38 69
 
70
+void insn_dump(struct bpf_insn insn, FILE *fp);
71
+void vinsn_dump(vinsn_t *vi, FILE *fp);
72
+void ir_dump(ir_t *ir, FILE *fp);
73
+
39 74
 int16_t  ir_alloc_label   (ir_t *ir);
40 75
 uint16_t ir_alloc_register(ir_t *ir);
41 76
 

+ 24 - 4
kprobe.c

@@ -10,6 +10,21 @@
10 10
 struct kprobe {
11 11
 };
12 12
 
13
+static int kprobe_ir_prologue(prog_t *prog)
14
+{
15
+	sym_t *ctx = sym_get(prog->locals, "ctx");
16
+
17
+	if (!ctx)
18
+		return 0;
19
+
20
+	ctx->reg = ir_alloc_register(prog->ir);
21
+	ir_emit_reg_get(prog->ir, ctx->reg);
22
+
23
+	/* kernel sets r1 to the address of the context */
24
+	ir_emit_insn(prog->ir, MOV(0, 0), ctx->reg, BPF_REG_1);
25
+	return 0;
26
+}
27
+
13 28
 static inline int is_arg(const char *name)
14 29
 {
15 30
 	return (strstr(name, "arg") == name)
@@ -21,7 +36,7 @@ static int kprobe_rewrite_arg(prog_t *prog, node_t *n)
21 36
 {
22 37
 	const char *reg;
23 38
 	int arg = n->ident[3] - '0';
24
-	node_t *new;
39
+	node_t *new, *ctx;
25 40
 
26 41
 	reg = arch_register_argument(arg);
27 42
 	if (!reg) {
@@ -32,20 +47,24 @@ static int kprobe_rewrite_arg(prog_t *prog, node_t *n)
32 47
 		return -EINVAL;
33 48
 	}
34 49
 
50
+	ctx = node_ident("ctx");
51
+
35 52
 	/* argN => (*ctx).REG */
36 53
 	new = node_vlist(node_keyword('.', 0),
37 54
 			 node_vlist(node_keyword('*', 0),
38
-				    node_ident("ctx"),
55
+				    ctx,
39 56
 				    NULL),
40 57
 			 node_string(reg),
41 58
 			 NULL);
42 59
 
60
+	ctx->type = type_ptr_of(&t_pt_regs);
43 61
 	new->type = n->type;
44 62
 	new->list->type = &t_void;
45 63
 	new->list->next->type = &t_pt_regs;
46 64
 	new->list->next->list->type = &t_void;
47
-	new->list->next->list->next->type = type_ptr_of(&t_pt_regs);
48
-	return node_replace(n, new);
65
+	node_replace(n, new);
66
+
67
+	return sym_add(prog->locals, ctx->ident, ctx->type, &ctx->sym);
49 68
 }
50 69
 
51 70
 static int kprobe_rewrite_node(prog_t *prog, node_t *n)
@@ -85,6 +104,7 @@ static int kprobe_probe(prog_t *prog)
85 104
 provider_t kprobe = {
86 105
 	.name = "kprobe",
87 106
 
107
+	.ir_prologue = kprobe_ir_prologue,
88 108
 	.rewrite_node = kprobe_rewrite_node,
89 109
 	.resolve = kprobe_resolve,
90 110
 	.probe = kprobe_probe,

+ 50 - 7
ply.c

@@ -83,6 +83,7 @@ ctx_t *ctx_get(void)
83 83
 
84 84
 	prog->provider = provider_get("k");
85 85
 	prog->provider->probe(prog);
86
+	prog->ir = ir_new();
86 87
 	ctx->progs[0] = prog;
87 88
 
88 89
 	/* PROBE1 */
@@ -111,6 +112,7 @@ ctx_t *ctx_get(void)
111 112
 
112 113
 	prog->provider = provider_get("k");
113 114
 	prog->provider->probe(prog);
115
+	prog->ir = ir_new();
114 116
 	ctx->progs[1] = prog;
115 117
 
116 118
 	return ctx;
@@ -374,7 +376,45 @@ int pass_rewrite_ast(node_t *n, void *_prog)
374 376
 	return 0;
375 377
 }
376 378
 
377
-int pass_walk(pass_t *pass, ctx_t *ctx)
379
+int pass_generate_ir(node_t *n, void *_prog)
380
+{
381
+	prog_t *prog = _prog;
382
+
383
+	return 0;
384
+}
385
+
386
+int run_generate_ir(pass_t *pass, ctx_t *ctx)
387
+{
388
+	prog_t **progp;
389
+	int err;
390
+
391
+	for (progp = ctx->progs; *progp; progp++) {
392
+		prog_t *prog = *progp;
393
+
394
+		int return_label = ir_alloc_label(prog->ir);
395
+
396
+		err = prog->provider->ir_prologue ?
397
+			prog->provider->ir_prologue(prog) : 0;
398
+		if (err)
399
+			return err;
400
+
401
+		err = node_walk(prog->ast, pass->pre, pass->post, prog);
402
+		if (err)
403
+			return err;
404
+
405
+		err = prog->provider->ir_epilogue ?
406
+			prog->provider->ir_epilogue(prog) : 0;
407
+		if (err)
408
+			return err;
409
+
410
+		ir_emit_label(prog->ir, return_label);
411
+		ir_emit_insn(prog->ir, EXIT, 0, 0);
412
+	}
413
+
414
+	return 0;
415
+}
416
+
417
+int run_walk(pass_t *pass, ctx_t *ctx)
378 418
 {
379 419
 	prog_t **prog;
380 420
 	int err;
@@ -389,12 +429,13 @@ int pass_walk(pass_t *pass, ctx_t *ctx)
389 429
 }
390 430
 
391 431
 pass_t passes[] = {
392
-	{ .run = pass_walk, .post = pass_resolve_symbols },
393
-	{ .run = pass_walk, .post = pass_infer_types },
394
-	{ .run = pass_walk, .post = pass_infer_types },
395
-	{ .run = pass_walk, .post = pass_infer_types },
396
-	{ .run = pass_walk, .post = pass_validate_types },
397
-	{ .run = pass_walk, .post = pass_rewrite_ast },
432
+	{ .run = run_walk, .post = pass_resolve_symbols },
433
+	{ .run = run_walk, .post = pass_infer_types },
434
+	{ .run = run_walk, .post = pass_infer_types },
435
+	{ .run = run_walk, .post = pass_infer_types },
436
+	{ .run = run_walk, .post = pass_validate_types },
437
+	{ .run = run_walk, .post = pass_rewrite_ast },
438
+	{ .run = run_generate_ir },
398 439
 
399 440
 	{ NULL }
400 441
 };
@@ -417,6 +458,8 @@ int main(void)
417 458
 		node_dump((*prog)->ast, stdout);
418 459
 		printf("\n-- locals\n");
419 460
 		symtab_dump((*prog)->locals, stdout);
461
+		printf("-- ir\n");
462
+		ir_dump((*prog)->ir, stdout);
420 463
 	}
421 464
 
422 465
 	printf("\n\n-- globals\n");

+ 5 - 0
ply.h

@@ -5,6 +5,7 @@
5 5
 #include "sym.h"
6 6
 #include "type.h"
7 7
 #include "arch.h"
8
+#include "ir.h"
8 9
 
9 10
 typedef struct prog prog_t;
10 11
 typedef struct provider provider_t;
@@ -18,6 +19,8 @@ struct prog {
18 19
 
19 20
 	provider_t *provider;
20 21
 	void *provider_data;
22
+
23
+	ir_t *ir;
21 24
 };
22 25
 
23 26
 typedef struct ctx {
@@ -32,6 +35,8 @@ struct provider {
32 35
 	int (*probe)(prog_t *);
33 36
 	int (*resolve)(prog_t *, node_t *);
34 37
 	int (*rewrite_node)(prog_t *, node_t *);
38
+	int (*ir_prologue)(prog_t *);
39
+	int (*ir_epilogue)(prog_t *);
35 40
 };
36 41
 
37 42
 void provider_register(provider_t *prov);

+ 4 - 0
sym.h

@@ -1,6 +1,8 @@
1 1
 #ifndef _PLY_SYM_H
2 2
 #define _PLY_SYM_H
3 3
 
4
+#include <stdint.h>
5
+
4 6
 #include "type.h"
5 7
 
6 8
 typedef struct symtab symtab_t;
@@ -9,6 +11,8 @@ typedef struct sym {
9 11
 	symtab_t *st;
10 12
 	const char *name;
11 13
 	type_t *type;
14
+
15
+	uint16_t reg;
12 16
 } sym_t;
13 17
 
14 18
 struct symtab {