Browse Source

started on ir

Tobias Waldekranz.com 8 years ago
parent
commit
9f42acefdd
6 changed files with 355 additions and 12 deletions
  1. 237 1
      ir.c
  2. 35 0
      ir.h
  3. 24 4
      kprobe.c
  4. 50 7
      ply.c
  5. 5 0
      ply.h
  6. 4 0
      sym.h

+ 237 - 1
ir.c

1
 #include <assert.h>
1
 #include <assert.h>
2
+#include <inttypes.h>
3
+#include <stdio.h>
4
+#include <string.h>
5
+
6
+#include <linux/bpf.h>
2
 
7
 
3
 #include "ir.h"
8
 #include "ir.h"
4
 
9
 
5
 const uint16_t vreg_base = 0x8000;
10
 const uint16_t vreg_base = 0x8000;
6
 
11
 
12
+static const char *bpf_func_name(enum bpf_func_id id)
13
+{
14
+	switch (id) {
15
+	case BPF_FUNC_get_current_comm:
16
+		return "get_current_comm";
17
+	case BPF_FUNC_get_current_pid_tgid:
18
+		return "get_current_pid_tgid";
19
+	case BPF_FUNC_get_current_uid_gid:
20
+		return "get_current_uid_gid";
21
+	case BPF_FUNC_get_stackid:
22
+		return "get_stackid";
23
+	case BPF_FUNC_ktime_get_ns:
24
+		return "ktime_get_ns";
25
+	case BPF_FUNC_map_delete_elem:
26
+		return "map_delete_elem";
27
+	case BPF_FUNC_map_lookup_elem:
28
+		return "map_lookup_elem";
29
+	case BPF_FUNC_map_update_elem:
30
+		return "map_update_elem";
31
+	case BPF_FUNC_perf_event_output:
32
+		return "perf_event_output";
33
+	case BPF_FUNC_probe_read:
34
+		return "probe_read";
35
+	case BPF_FUNC_trace_printk:
36
+		return "trace_printk";
37
+	default:
38
+		return NULL;
39
+	}
40
+}
41
+
42
+static void reg_name(uint16_t reg, char *name)
43
+{
44
+        if (reg & vreg_base) {
45
+		sprintf(name, "v%u", reg & ~vreg_base);		
46
+	} else if (reg == BPF_REG_10) {
47
+		strcpy(name, "bp");
48
+	} else {
49
+		sprintf(name, "r%u", reg);
50
+	}
51
+}
52
+
53
+static void reg_dump(uint16_t reg, int16_t off, FILE *fp)
54
+{
55
+	char name[8];
56
+
57
+	reg_name(reg, name);
58
+
59
+	if (off < 0)
60
+		fprintf(fp, "[%s - 0x%x]", name, -off);
61
+	else if (off > 0)
62
+		fprintf(fp, "[%s + 0x%x]", name, off);
63
+	else
64
+		fprintf(fp, "%s", name);
65
+}
66
+
67
+static char size_name(uint8_t code)
68
+{
69
+	switch (BPF_SIZE(code)) {
70
+	case BPF_B:  return 'b';
71
+	case BPF_H:  return 'h';
72
+	case BPF_W:  return 'w';
73
+	case BPF_DW: return 'q';
74
+	}
75
+
76
+	return '?';
77
+}		
78
+
79
+static void alu_dump(uint8_t code, FILE *fp)
80
+{
81
+	switch (BPF_OP(code)) {
82
+	case BPF_MOV: fputs("mov", fp); break;
83
+	case BPF_ADD: fputs("add", fp); break;
84
+	case BPF_SUB: fputs("sub", fp); break;
85
+	case BPF_MUL: fputs("mul", fp); break;
86
+	case BPF_DIV: fputs("div", fp); break;
87
+	case BPF_OR : fputs("or",  fp); break;
88
+	case BPF_AND: fputs("and", fp); break;
89
+	case BPF_LSH: fputs("lsh", fp); break;
90
+	case BPF_RSH: fputs("rsh", fp); break;
91
+	case BPF_NEG: fputs("neg", fp); break;
92
+	case BPF_MOD: fputs("mod", fp); break;
93
+	case BPF_XOR: fputs("xor", fp); break;
94
+	}
95
+
96
+	switch (BPF_CLASS(code)) {
97
+	case BPF_ALU:   fputc(size_name(BPF_W), fp);
98
+	case BPF_ALU64: fputc(size_name(BPF_DW), fp);
99
+	}
100
+}
101
+
102
+static void offset_dump(int16_t off, FILE *fp)
103
+{
104
+	if (off < 0)
105
+		fprintf(fp, "L%d", -off);
106
+	else
107
+		fprintf(fp, "+%d", off);
108
+}
109
+
110
+static void __insn_dump(const struct bpf_insn insn, uint16_t dst, uint16_t src,
111
+			FILE *fp)
112
+{
113
+	const char *name;
114
+	enum {
115
+		OFF_NONE,
116
+		OFF_DST,
117
+		OFF_SRC,
118
+		OFF_EXP,
119
+	} off = OFF_NONE;
120
+
121
+	switch (BPF_CLASS(insn.code)) {
122
+	case BPF_LD:
123
+	case BPF_LDX:
124
+		off = OFF_SRC;
125
+		fprintf(fp, "ld%c", size_name(insn.code));
126
+		break;
127
+
128
+	case BPF_ST:
129
+	case BPF_STX:
130
+		off = OFF_DST;
131
+		fprintf(fp, "st%c", size_name(insn.code));
132
+		break;
133
+
134
+	case BPF_ALU:
135
+	case BPF_ALU64:
136
+		alu_dump(insn.code, fp);
137
+		break;
138
+
139
+	case BPF_JMP:
140
+		off = OFF_EXP;
141
+
142
+		switch (BPF_OP(insn.code)) {
143
+		case BPF_EXIT:
144
+			fputs("exit", fp);
145
+			return;
146
+		case BPF_CALL:
147
+			fputs("call\t", fp);
148
+
149
+			name = bpf_func_name(insn.imm);
150
+			if (name)
151
+				fputs(name, fp);
152
+			else
153
+				fprintf(fp, "%d", insn.imm);
154
+			return;
155
+		case BPF_JA:
156
+			fputs("ja\t", fp);
157
+			offset_dump(insn.off, fp);
158
+			return;
159
+
160
+		case BPF_JEQ:  fputs("jeq", fp); break;
161
+		case BPF_JNE:  fputs("jne", fp); break;
162
+		case BPF_JGT:  fputs("jgt", fp); break;
163
+		case BPF_JGE:  fputs("jge", fp); break;
164
+		case BPF_JSGE: fputs("jsge", fp); break;
165
+		case BPF_JSGT: fputs("jsgt", fp); break;
166
+		default:
167
+			goto unknown;
168
+		}
169
+		break;
170
+
171
+	default:
172
+		goto unknown;
173
+	}
174
+
175
+	fputc('\t', fp);
176
+	reg_dump(dst, off == OFF_DST ? insn.off : 0, fp);		
177
+	fputs(", ", fp);
178
+
179
+	if (BPF_CLASS(insn.code) == BPF_LDX || BPF_CLASS(insn.code) == BPF_STX)
180
+		goto reg_src;
181
+
182
+	switch (BPF_SRC(insn.code)) {
183
+	case BPF_K:
184
+		fprintf(fp, "#%s0x%x", insn.imm < 0 ? "-" : "",
185
+			insn.imm < 0 ? -insn.imm : insn.imm);
186
+		break;
187
+	case BPF_X:
188
+	reg_src:
189
+		reg_dump(src, off == OFF_SRC ? insn.off : 0, fp);		
190
+		break;
191
+	}
192
+
193
+	if (off == OFF_EXP) {
194
+		fputs(", ", fp);
195
+		offset_dump(insn.off, fp);
196
+	}
197
+
198
+	return;
199
+
200
+unknown:
201
+	fprintf(fp, "data\t0x%16.16" PRIx64 "\n", *((uint64_t *)&insn));	
202
+}
203
+
204
+void insn_dump(struct bpf_insn insn, FILE *fp)
205
+{
206
+	__insn_dump(insn, insn.dst_reg, insn.src_reg, fp);
207
+}
208
+
209
+void vinsn_dump(vinsn_t *vi, FILE *fp)
210
+{
211
+	switch (vi->vitype) {
212
+	case VI_INSN:
213
+		__insn_dump(vi->insn.bpf, vi->insn.dst, vi->insn.src, fp);
214
+		return;
215
+	case VI_LABEL:
216
+		offset_dump(vi->label, fp);
217
+		fputc(':', fp);
218
+		return;
219
+	case VI_REG_GET:
220
+	case VI_REG_PUT:
221
+		fputs((vi->vitype == VI_REG_GET) ? "+ " : "- ", fp);
222
+		reg_dump(vi->reg, 0, fp);
223
+		return;
224
+	}
225
+}
226
+
227
+void ir_dump(ir_t *ir, FILE *fp)
228
+{
229
+	size_t i;
230
+
231
+	for (i = 0; i < ir->len; i++) {
232
+		if (ir->vi[i].vitype == VI_INSN)
233
+			fputc('\t', fp);
234
+
235
+		vinsn_dump(&ir->vi[i], fp);
236
+		fputc('\n', fp);
237
+	}
238
+}
239
+
7
 static void ir_emit(ir_t *ir, vinsn_t *vi)
240
 static void ir_emit(ir_t *ir, vinsn_t *vi)
8
 {
241
 {
9
-	ir->vi = realloc(ir->vi, ++ir->len);
242
+	ir->vi = realloc(ir->vi, (++ir->len)*sizeof(*vi));
10
 	assert(ir->vi);
243
 	assert(ir->vi);
11
 
244
 
12
 	ir->vi[ir->len - 1] = *vi;
245
 	ir->vi[ir->len - 1] = *vi;
29
 
262
 
30
 	vi.vitype = VI_LABEL;
263
 	vi.vitype = VI_LABEL;
31
 	vi.label = label;
264
 	vi.label = label;
265
+	ir_emit(ir, &vi);
32
 }
266
 }
33
 
267
 
34
 void ir_emit_reg_get(ir_t *ir, uint16_t reg)
268
 void ir_emit_reg_get(ir_t *ir, uint16_t reg)
37
 
271
 
38
 	vi.vitype = VI_REG_GET;
272
 	vi.vitype = VI_REG_GET;
39
 	vi.reg = reg;
273
 	vi.reg = reg;
274
+	ir_emit(ir, &vi);
40
 }
275
 }
41
 
276
 
42
 void ir_emit_reg_put(ir_t *ir, uint16_t reg)
277
 void ir_emit_reg_put(ir_t *ir, uint16_t reg)
45
 
280
 
46
 	vi.vitype = VI_REG_PUT;
281
 	vi.vitype = VI_REG_PUT;
47
 	vi.reg = reg;
282
 	vi.reg = reg;
283
+	ir_emit(ir, &vi);
48
 }
284
 }
49
 
285
 
50
 int16_t ir_alloc_label (ir_t *ir)
286
 int16_t ir_alloc_label (ir_t *ir)

+ 35 - 0
ir.h

6
 
6
 
7
 #include <linux/bpf.h>
7
 #include <linux/bpf.h>
8
 
8
 
9
+#define INSN(_code, _dst, _src, _off, _imm)	\
10
+	((struct bpf_insn) {			\
11
+		.code  = _code,			\
12
+		.dst_reg = _dst,		\
13
+		.src_reg = _src,		\
14
+		.off   = _off,			\
15
+		.imm   = _imm			\
16
+	})
17
+
18
+#define MOV(_dst, _src)     INSN(BPF_ALU64 | BPF_MOV | BPF_X, _dst, _src, 0, 0)
19
+#define MOV_IMM(_dst, _imm) INSN(BPF_ALU64 | BPF_MOV | BPF_K, _dst, 0, 0, _imm)
20
+
21
+#define EXIT INSN(BPF_JMP | BPF_EXIT, 0, 0, 0, 0)
22
+#define CALL(_imm) INSN(BPF_JMP | BPF_CALL, 0, 0, 0, _imm)
23
+#define JMP(_op, _dst, _src, _off)     INSN(BPF_JMP | BPF_OP((_op)) | BPF_X, _dst, _src, _off, 0)
24
+#define JMP_IMM(_op, _dst, _imm, _off) INSN(BPF_JMP | BPF_OP((_op)) | BPF_K, _dst, 0, _off, _imm)
25
+
26
+#define ALU(_op, _dst, _src)     INSN(BPF_ALU64 | BPF_OP((_op)) | BPF_X, _dst, _src, 0, 0)
27
+#define ALU_IMM(_op, _dst, _imm) INSN(BPF_ALU64 | BPF_OP((_op)) | BPF_K, _dst, 0, 0, _imm)
28
+
29
+#define STW_IMM(_dst, _off, _imm) INSN(BPF_ST  | BPF_SIZE(BPF_W)  | BPF_MEM, _dst, 0, _off, _imm)
30
+#define STXB(_dst, _off, _src)   INSN(BPF_STX | BPF_SIZE(BPF_B) | BPF_MEM, _dst, _src, _off, 0)
31
+#define STXH(_dst, _off, _src)   INSN(BPF_STX | BPF_SIZE(BPF_H) | BPF_MEM, _dst, _src, _off, 0)
32
+#define STXW(_dst, _off, _src)   INSN(BPF_STX | BPF_SIZE(BPF_W) | BPF_MEM, _dst, _src, _off, 0)
33
+#define STXDW(_dst, _off, _src)   INSN(BPF_STX | BPF_SIZE(BPF_DW) | BPF_MEM, _dst, _src, _off, 0)
34
+
35
+#define LDXB(_dst, _off, _src)  INSN(BPF_LDX | BPF_SIZE(BPF_B)  | BPF_MEM, _dst, _src, _off, 0)
36
+#define LDXH(_dst, _off, _src)  INSN(BPF_LDX | BPF_SIZE(BPF_H)  | BPF_MEM, _dst, _src, _off, 0)
37
+#define LDXW(_dst, _off, _src)  INSN(BPF_LDX | BPF_SIZE(BPF_W)  | BPF_MEM, _dst, _src, _off, 0)
38
+#define LDXDW(_dst, _off, _src) INSN(BPF_LDX | BPF_SIZE(BPF_DW) | BPF_MEM, _dst, _src, _off, 0)
39
+
9
 typedef enum vitype {
40
 typedef enum vitype {
10
 	VI_INSN,
41
 	VI_INSN,
11
 	VI_LABEL,
42
 	VI_LABEL,
36
 	uint16_t next_reg;
67
 	uint16_t next_reg;
37
 } ir_t;
68
 } ir_t;
38
 
69
 
70
+void insn_dump(struct bpf_insn insn, FILE *fp);
71
+void vinsn_dump(vinsn_t *vi, FILE *fp);
72
+void ir_dump(ir_t *ir, FILE *fp);
73
+
39
 int16_t  ir_alloc_label   (ir_t *ir);
74
 int16_t  ir_alloc_label   (ir_t *ir);
40
 uint16_t ir_alloc_register(ir_t *ir);
75
 uint16_t ir_alloc_register(ir_t *ir);
41
 
76
 

+ 24 - 4
kprobe.c

10
 struct kprobe {
10
 struct kprobe {
11
 };
11
 };
12
 
12
 
13
+static int kprobe_ir_prologue(prog_t *prog)
14
+{
15
+	sym_t *ctx = sym_get(prog->locals, "ctx");
16
+
17
+	if (!ctx)
18
+		return 0;
19
+
20
+	ctx->reg = ir_alloc_register(prog->ir);
21
+	ir_emit_reg_get(prog->ir, ctx->reg);
22
+
23
+	/* kernel sets r1 to the address of the context */
24
+	ir_emit_insn(prog->ir, MOV(0, 0), ctx->reg, BPF_REG_1);
25
+	return 0;
26
+}
27
+
13
 static inline int is_arg(const char *name)
28
 static inline int is_arg(const char *name)
14
 {
29
 {
15
 	return (strstr(name, "arg") == name)
30
 	return (strstr(name, "arg") == name)
21
 {
36
 {
22
 	const char *reg;
37
 	const char *reg;
23
 	int arg = n->ident[3] - '0';
38
 	int arg = n->ident[3] - '0';
24
-	node_t *new;
39
+	node_t *new, *ctx;
25
 
40
 
26
 	reg = arch_register_argument(arg);
41
 	reg = arch_register_argument(arg);
27
 	if (!reg) {
42
 	if (!reg) {
32
 		return -EINVAL;
47
 		return -EINVAL;
33
 	}
48
 	}
34
 
49
 
50
+	ctx = node_ident("ctx");
51
+
35
 	/* argN => (*ctx).REG */
52
 	/* argN => (*ctx).REG */
36
 	new = node_vlist(node_keyword('.', 0),
53
 	new = node_vlist(node_keyword('.', 0),
37
 			 node_vlist(node_keyword('*', 0),
54
 			 node_vlist(node_keyword('*', 0),
38
-				    node_ident("ctx"),
55
+				    ctx,
39
 				    NULL),
56
 				    NULL),
40
 			 node_string(reg),
57
 			 node_string(reg),
41
 			 NULL);
58
 			 NULL);
42
 
59
 
60
+	ctx->type = type_ptr_of(&t_pt_regs);
43
 	new->type = n->type;
61
 	new->type = n->type;
44
 	new->list->type = &t_void;
62
 	new->list->type = &t_void;
45
 	new->list->next->type = &t_pt_regs;
63
 	new->list->next->type = &t_pt_regs;
46
 	new->list->next->list->type = &t_void;
64
 	new->list->next->list->type = &t_void;
47
-	new->list->next->list->next->type = type_ptr_of(&t_pt_regs);
48
-	return node_replace(n, new);
65
+	node_replace(n, new);
66
+
67
+	return sym_add(prog->locals, ctx->ident, ctx->type, &ctx->sym);
49
 }
68
 }
50
 
69
 
51
 static int kprobe_rewrite_node(prog_t *prog, node_t *n)
70
 static int kprobe_rewrite_node(prog_t *prog, node_t *n)
85
 provider_t kprobe = {
104
 provider_t kprobe = {
86
 	.name = "kprobe",
105
 	.name = "kprobe",
87
 
106
 
107
+	.ir_prologue = kprobe_ir_prologue,
88
 	.rewrite_node = kprobe_rewrite_node,
108
 	.rewrite_node = kprobe_rewrite_node,
89
 	.resolve = kprobe_resolve,
109
 	.resolve = kprobe_resolve,
90
 	.probe = kprobe_probe,
110
 	.probe = kprobe_probe,

+ 50 - 7
ply.c

83
 
83
 
84
 	prog->provider = provider_get("k");
84
 	prog->provider = provider_get("k");
85
 	prog->provider->probe(prog);
85
 	prog->provider->probe(prog);
86
+	prog->ir = ir_new();
86
 	ctx->progs[0] = prog;
87
 	ctx->progs[0] = prog;
87
 
88
 
88
 	/* PROBE1 */
89
 	/* PROBE1 */
111
 
112
 
112
 	prog->provider = provider_get("k");
113
 	prog->provider = provider_get("k");
113
 	prog->provider->probe(prog);
114
 	prog->provider->probe(prog);
115
+	prog->ir = ir_new();
114
 	ctx->progs[1] = prog;
116
 	ctx->progs[1] = prog;
115
 
117
 
116
 	return ctx;
118
 	return ctx;
374
 	return 0;
376
 	return 0;
375
 }
377
 }
376
 
378
 
377
-int pass_walk(pass_t *pass, ctx_t *ctx)
379
+int pass_generate_ir(node_t *n, void *_prog)
380
+{
381
+	prog_t *prog = _prog;
382
+
383
+	return 0;
384
+}
385
+
386
+int run_generate_ir(pass_t *pass, ctx_t *ctx)
387
+{
388
+	prog_t **progp;
389
+	int err;
390
+
391
+	for (progp = ctx->progs; *progp; progp++) {
392
+		prog_t *prog = *progp;
393
+
394
+		int return_label = ir_alloc_label(prog->ir);
395
+
396
+		err = prog->provider->ir_prologue ?
397
+			prog->provider->ir_prologue(prog) : 0;
398
+		if (err)
399
+			return err;
400
+
401
+		err = node_walk(prog->ast, pass->pre, pass->post, prog);
402
+		if (err)
403
+			return err;
404
+
405
+		err = prog->provider->ir_epilogue ?
406
+			prog->provider->ir_epilogue(prog) : 0;
407
+		if (err)
408
+			return err;
409
+
410
+		ir_emit_label(prog->ir, return_label);
411
+		ir_emit_insn(prog->ir, EXIT, 0, 0);
412
+	}
413
+
414
+	return 0;
415
+}
416
+
417
+int run_walk(pass_t *pass, ctx_t *ctx)
378
 {
418
 {
379
 	prog_t **prog;
419
 	prog_t **prog;
380
 	int err;
420
 	int err;
389
 }
429
 }
390
 
430
 
391
 pass_t passes[] = {
431
 pass_t passes[] = {
392
-	{ .run = pass_walk, .post = pass_resolve_symbols },
393
-	{ .run = pass_walk, .post = pass_infer_types },
394
-	{ .run = pass_walk, .post = pass_infer_types },
395
-	{ .run = pass_walk, .post = pass_infer_types },
396
-	{ .run = pass_walk, .post = pass_validate_types },
397
-	{ .run = pass_walk, .post = pass_rewrite_ast },
432
+	{ .run = run_walk, .post = pass_resolve_symbols },
433
+	{ .run = run_walk, .post = pass_infer_types },
434
+	{ .run = run_walk, .post = pass_infer_types },
435
+	{ .run = run_walk, .post = pass_infer_types },
436
+	{ .run = run_walk, .post = pass_validate_types },
437
+	{ .run = run_walk, .post = pass_rewrite_ast },
438
+	{ .run = run_generate_ir },
398
 
439
 
399
 	{ NULL }
440
 	{ NULL }
400
 };
441
 };
417
 		node_dump((*prog)->ast, stdout);
458
 		node_dump((*prog)->ast, stdout);
418
 		printf("\n-- locals\n");
459
 		printf("\n-- locals\n");
419
 		symtab_dump((*prog)->locals, stdout);
460
 		symtab_dump((*prog)->locals, stdout);
461
+		printf("-- ir\n");
462
+		ir_dump((*prog)->ir, stdout);
420
 	}
463
 	}
421
 
464
 
422
 	printf("\n\n-- globals\n");
465
 	printf("\n\n-- globals\n");

+ 5 - 0
ply.h

5
 #include "sym.h"
5
 #include "sym.h"
6
 #include "type.h"
6
 #include "type.h"
7
 #include "arch.h"
7
 #include "arch.h"
8
+#include "ir.h"
8
 
9
 
9
 typedef struct prog prog_t;
10
 typedef struct prog prog_t;
10
 typedef struct provider provider_t;
11
 typedef struct provider provider_t;
18
 
19
 
19
 	provider_t *provider;
20
 	provider_t *provider;
20
 	void *provider_data;
21
 	void *provider_data;
22
+
23
+	ir_t *ir;
21
 };
24
 };
22
 
25
 
23
 typedef struct ctx {
26
 typedef struct ctx {
32
 	int (*probe)(prog_t *);
35
 	int (*probe)(prog_t *);
33
 	int (*resolve)(prog_t *, node_t *);
36
 	int (*resolve)(prog_t *, node_t *);
34
 	int (*rewrite_node)(prog_t *, node_t *);
37
 	int (*rewrite_node)(prog_t *, node_t *);
38
+	int (*ir_prologue)(prog_t *);
39
+	int (*ir_epilogue)(prog_t *);
35
 };
40
 };
36
 
41
 
37
 void provider_register(provider_t *prov);
42
 void provider_register(provider_t *prov);

+ 4 - 0
sym.h

1
 #ifndef _PLY_SYM_H
1
 #ifndef _PLY_SYM_H
2
 #define _PLY_SYM_H
2
 #define _PLY_SYM_H
3
 
3
 
4
+#include <stdint.h>
5
+
4
 #include "type.h"
6
 #include "type.h"
5
 
7
 
6
 typedef struct symtab symtab_t;
8
 typedef struct symtab symtab_t;
9
 	symtab_t *st;
11
 	symtab_t *st;
10
 	const char *name;
12
 	const char *name;
11
 	type_t *type;
13
 	type_t *type;
14
+
15
+	uint16_t reg;
12
 } sym_t;
16
 } sym_t;
13
 
17
 
14
 struct symtab {
18
 struct symtab {