-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsyscall_entrypoint.c
463 lines (405 loc) · 16.4 KB
/
syscall_entrypoint.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
#include "syscall_entrypoint.h"
#include "debug.h"
#include "sandbox.h"
#include "trusted_thread.h"
// TODO(markus): change this into a function that returns the address of the
// assembly code. If that isn't possible for sandbox_clone, then move that
// function into a *.S file
asm(
".pushsection .text, \"ax\", @progbits\n"
// This is the special wrapper for the clone() system call. The code
// relies on the stack layout of the system call entrypoint (c.f. below).
// It passes the stack pointer as an additional argument to
/* // syscall__clone(), so that upon starting the child, register values can*/
/*// be restored and the child can start executing at the correct IP,*/
/*// instead of trying to run in the trusted thread.*/
/*"syscall_clone:"*/
/*".internal syscall_clone\n"*/
/*".globl syscall_clone\n"*/
/*".type syscall_clone, @function\n"*/
/*#if defined(__x86_64__)*/
/*// Skip the 8 byte return address into the system call entrypoint. The*/
/*// following bytes are the saved register values that we need to restore*/
/*// upon return from clone() in the new thread.*/
/*"lea 8(%rsp), %r9\n"*/
/*"jmp sandbox__clone\n"*/
/*#elif defined(__i386__)*/
/*// As i386 passes function arguments on the stack, we need to skip a few*/
/*// more values before we can get to the saved registers.*/
/*"mov 28(%esp), %eax\n"*/
/*"mov %eax, 24(%esp)\n"*/
/*"jmp sandbox__clone\n"*/
/*#else*/
/*#error Unsupported target platform*/
/*#endif*/
/*".size syscall_clone, .-syscall_clone\n"*/
// This is the entrypoint which is called by the untrusted code, trying to
// make a system call.
"syscall_enter_without_frame:"
".internal syscall_enter_without_frame\n"
".globl syscall_enter_without_frame\n"
".type syscall_enter_without_frame, @function\n"
#if defined(__x86_64__)
"mov 0(%rsp), %r11\n" // add fake return address by duplicating
"push %r11\n" // real return address
/* fall through */
#elif defined(__i386__)
"push %eax\n" // add fake return address, which in this
"mov 4(%esp), %eax\n" // case is identical to the real return
"xchg %eax, 0(%esp)\n" // address
/* fall through */
#else
#error Unsupported target platform
#endif
".size syscall_enter_without_frame, .-syscall_enter_without_frame\n"
"syscall_enter_with_frame:"
".internal syscall_enter_with_frame\n"
".globl syscall_enter_with_frame\n"
".type syscall_enter_with_frame, @function\n"
#if defined(__x86_64__)
// Check for rt_sigreturn(). It needs to be handled specially.
/* "cmp $15, %rax\n" // NR_rt_sigreturn*/
/*"jnz 1f\n"*/
/*"add $0x90, %rsp\n" // pop return addresses and red zone*/
/*"0:syscall\n" // rt_sigreturn() is unrestricted*/
/*"mov $66, %edi\n" // rt_sigreturn() should never return*/
/*"mov $231, %eax\n" // NR_exit_group*/
/*"jmp 0b\n"*/
// Save all registers
"1:push %rbp\n"
"movq $0xDEADBEEFDEADBEEF, %rbp\n" // marker used by breakpad to remove
"push %rbp\n" // seccomp-sandbox's stack frame from dumps
"mov %rsp, %rbp\n"
"push %rbx\n"
"push %rcx\n"
"push %rdx\n"
"push %rsi\n"
"push %rdi\n"
"push %r8\n"
"push %r9\n"
"push %r10\n"
"push %r11\n"
"push %r12\n"
"push %r13\n"
"push %r14\n"
"push %r15\n"
// This code is only x86_64 compatible
// translation from syscall calling covention
// to function calling convention
// 7th argmument in the stack
"push %rax\n"
"call pad_request\n"
// cleaning the stack
"add $8, %rsp\n"
// Restore CPU registers, except for %rax which was set by the system call.
"pop %r15\n"
"pop %r14\n"
"pop %r13\n"
"pop %r12\n"
"pop %r11\n"
"pop %r10\n"
"pop %r9\n"
"pop %r8\n"
"pop %rdi\n"
"pop %rsi\n"
"pop %rdx\n"
"pop %rcx\n"
"pop %rbx\n"
"pop %rbp\n" // 0xDEADBEEF marker
"pop %rbp\n"
// Remove fake return address. This is added in the patching code in
// library.cc and it makes stack traces a little cleaner.
"add $8, %rsp\n"
// Return to caller
"ret\n"
/*"3:"*/
/*// If we end up calling a specific handler, we don't need to know the*/
/*// system call number. However, in the generic case, we do. Shift*/
/*// registers so that the system call number becomes visible as the first*/
/*// function argument.*/
/*"push %r9\n"*/
/*"mov %r8, %r9\n"*/
/*"mov %r10, %r8\n"*/
/*"mov %rdx, %rcx\n"*/
/*"mov %rsi, %rdx\n"*/
/*"mov %rdi, %rsi\n"*/
/*"mov %rax, %rdi\n"*/
/*// Call default handler*/
/*"call syscall_default_handler\n"*/
/*"pop %r9\n"*/
/*"jmp 2b\n"*/
/*#elif defined(__i386__)*/
/*"cmp $119, %eax\n" // NR_sigreturn*/
/*"jnz 1f\n"*/
/*"add $0x8, %esp\n" // pop return address*/
/*"0:int $0x80\n" // sigreturn() is unrestricted*/
/*"mov $66, %ebx\n" // sigreturn() should never return*/
/*"mov %ebx, %eax\n" // NR_exit*/
/*"jmp 0b\n"*/
/*"1:cmp $173, %eax\n" // NR_rt_sigreturn*/
/*"jnz 4f\n"*/
/*// Convert rt_sigframe into sigframe, allowing us to call sigreturn().*/
/*// This is possible since the first part of signal stack frames have*/
/*// stayed very stable since the earliest kernel versions. While never*/
/*// officially documented, lots of user space applications rely on this*/
/*// part of the ABI, and kernel developers have been careful to maintain*/
/*// backwards compatibility.*/
/*// In general, the rt_sigframe includes a lot of extra information that*/
/*// the signal handler can look at. Most notably, this means a complete*/
/*// siginfo record.*/
/*// Fortunately though, the kernel doesn't look at any of this extra data*/
/*// when returning from a signal handler. So, we can safely convert an*/
/*// rt_sigframe to a legacy sigframe, discarding the extra data in the*/
/*// process. Interestingly, the legacy signal frame is actually larger than*/
/*// the rt signal frame, as it includes a lot more padding.*/
/*"sub $0x1C4, %esp\n" // a legacy signal stack is much larger*/
/*"mov 0x1CC(%esp), %eax\n" // push signal number*/
/*"push %eax\n"*/
/*"lea 0x270(%esp), %esi\n" // copy siginfo register values*/
/*"lea 0x4(%esp), %edi\n" // into new location*/
/*"mov $0x16, %ecx\n"*/
/*"cld\n"*/
/*"rep movsl\n"*/
/*"mov 0x2C8(%esp), %ebx\n" // copy first half of signal mask*/
/*"mov %ebx, 0x54(%esp)\n"*/
/*"call 2f\n"*/
/*"2:pop %esi\n"*/
/*"add $(3f-2b), %esi\n"*/
/*"push %esi\n" // push restorer function*/
/*"lea 0x2D4(%esp), %edi\n" // patch up retcode magic numbers*/
/*"movb $2, %cl\n"*/
/*"rep movsl\n"*/
/*"ret\n" // return to restorer function*/
/*"3:pop %eax\n" // remove dummy argument (signo)*/
/*"mov $119, %eax\n" // NR_sigaction*/
/*"int $0x80\n"*/
/*// Preserve all registers*/
/*"4:push %ebp\n"*/
/*"push $0xDEADBEEF\n" // marker used by breakpad*/
/*"push %ebx\n"*/
/*"push %ecx\n"*/
/*"push %edx\n"*/
/*"push %esi\n"*/
/*"push %edi\n"*/
/*// Align stack pointer, so that called functions can push SSE registers*/
/*// onto stack. This apparently is a requirement of the x86-32 ABI.*/
/*"mov %esp, %ebp\n"*/
/*"and $-16, %esp\n"*/
/*"sub $4, %esp\n"*/
/*"push %ebp\n" // push old un-aligned stack pointer*/
/*"lea 0x14(%ebp), %ebp\n" // frame pointer points to 0xDEADBEEF*/
/*"push %eax\n"*/
/*"mov 4(%ebp), %eax\n" // push original value of %ebp*/
/*"xchg %eax, 0(%esp)\n"*/
/*// Convert from syscall calling conventions to C calling conventions*/
/*"push %edi\n"*/
/*"push %esi\n"*/
/*"push %edx\n"*/
/*"push %ecx\n"*/
/*"push %ebx\n"*/
/*"push %eax\n"*/
/*// Check range of system call*/
/*"call 5f\n"*/
/*"5:pop %edx\n"*/
/*"add $(_GLOBAL_OFFSET_TABLE_+(.-5b)), %edx\n"*/
/*"mov syscall_table_size@GOT(%edx), %edx\n"*/
/*"cmp 0(%edx), %eax\n"*/
/*"ja 14f\n"*/
/*// We often have long sequences of calls to gettimeofday(). This is*/
/*// needlessly expensive. Coalesce them into a single call.*/
/*//*/
/*// We keep track of state in TLS storage that we can access through the*/
/*// %fs segment register. See trusted_thread.cc for the exact memory*/
/*// layout.*/
/*//*/
/*// TODO(markus): maybe, we should proactively call gettimeofday() and*/
/*// clock_gettime(), whenever we talk to the trusted thread?*/
/*// or maybe, if we have recently seen requests to compute*/
/*// the time. There might be a repeated pattern of those.*/
/*"cmp $78, %eax\n" // __NR_gettimeofday*/
/*"jnz 10f\n"*/
/*"cmp %eax, %fs:0x102C-0x58\n" // last system call*/
/*"jnz 7f\n"*/
/*// This system call and the last system call prior to this one both are*/
/*// calls to gettimeofday(). Try to avoid making the new call and just*/
/*// return the same result as in the previous call. Just in case the*/
/*// caller is spinning on the result from gettimeofday(), every so often,*/
/*// call the actual system call.*/
/*"decl %fs:0x1030-0x58\n" // countdown calls to gettimofday()*/
/*"jz 7f\n"*/
/*// Atomically read the 64bit word representing last-known timestamp and*/
/*// return it to the caller. On x86-32 this is a little more complicated*/
/*// and requires the use of the cmpxchg8b instruction.*/
/*"mov %ebx, %eax\n"*/
/*"mov %ecx, %edx\n"*/
/*"call 6f\n"*/
/*"6:pop %ebp\n"*/
/*"add $(100f-6b), %ebp\n"*/
/*"lock; cmpxchg8b 0(%ebp)\n"*/
/*"mov %eax, 0(%ebx)\n"*/
/*"mov %edx, 4(%ebx)\n"*/
/*"xor %eax, %eax\n"*/
/*"add $28, %esp\n"*/
/*"jmp 13f\n"*/
/*// This is a call to gettimeofday(), but we don't have a valid cached*/
/*// result, yet.*/
/*"7:mov %eax, %fs:0x102C-0x58\n" // remember syscall number*/
/*"movl $500, %fs:0x1030-0x58\n" // make system call, each 500 invocations*/
/*"call syscall_default_handler@PLT\n"*/
/*// Returned from gettimeofday(). Remember return value, in case the*/
/*// application calls us again right away.*/
/*// Again, this has to happen atomically and requires cmpxchg8b.*/
/*"mov 4(%ebx), %ecx\n"*/
/*"mov 0(%ebx), %ebx\n"*/
/*"call 8f\n"*/
/*"8:pop %ebp\n"*/
/*"add $(100f-8b), %ebp\n"*/
/*"mov 0(%ebp), %eax\n"*/
/*"mov 4(%ebp), %edx\n"*/
/*"9:lock; cmpxchg8b 0(%ebp)\n"*/
/*"jnz 9b\n"*/
/*"xor %eax, %eax\n"*/
/*"jmp 15f\n"*/
/*// Remember the number of the last system call made. We deliberately do*/
/*// not remember calls to gettid(), as we have often seen long sequences of*/
/*// calls to just gettimeofday() and gettid(). In that situation, we would*/
/*// still like to coalesce the gettimeofday() calls.*/
/*"10:cmp $224, %eax\n" // __NR_gettid*/
/*"jz 11f\n"*/
/*"mov %eax, %fs:0x102C-0x58\n" // remember syscall number*/
/*// Retrieve function call from system call table (c.f.syscall_table.c)*/
/*// We have three different types of entries; zero for denied system calls,*/
/*// that should be handled by the default_syscall_handler(); minus one*/
/*// for unrestricted system calls that need to be forwarded to the trusted*/
/*// thread; and function pointers to specific handler functions.*/
/*"11:shl $3, %eax\n"*/
/*"call 12f\n"*/
/*"12:pop %ebx\n"*/
/*"add $(_GLOBAL_OFFSET_TABLE_+(.-12b)), %ebx\n"*/
/*"mov syscall_table@GOT(%ebx), %ebx\n"*/
/*"add 0(%ebx), %eax\n"*/
/*"mov 0(%eax), %eax\n"*/
/*// Jump to function if non-null and not UNRESTRICTED_SYSCALL, otherwise*/
/*// jump to fallback handler.*/
/*"cmp $1, %eax\n"*/
/*"jbe 14f\n"*/
/*"add $4, %esp\n"*/
/*"call *%eax\n"*/
/*"add $24, %esp\n"*/
/*// Restore CPU registers, except for %eax which was set by the system call.*/
/*"13:pop %esp\n"*/
/*"pop %edi\n"*/
/*"pop %esi\n"*/
/*"pop %edx\n"*/
/*"pop %ecx\n"*/
/*"pop %ebx\n"*/
/*"pop %ebp\n" // 0xDEADBEEF marker*/
/*"pop %ebp\n"*/
/*// Remove fake return address. This is added in the patching code in*/
/*// library.cc and it makes stack traces a little cleaner.*/
/*"add $4, %esp\n"*/
/*// Return to the caller*/
/*"ret\n"*/
/*// Call the default handler*/
/*"14:call syscall_default_handler@PLT\n"*/
/*"15:add $28, %esp\n"*/
/*"jmp 13b\n"*/
/*".pushsection \".bss\"\n"*/
/*".balign 8\n"*/
/*"100:.byte 0, 0, 0, 0, 0, 0, 0, 0\n"*/
/*".popsection\n"*/
#else
#error Unsupported target platform
#endif
".size syscall_enter_with_frame, .-syscall_enter_with_frame\n"
".popsection\n"
);
int pad_request( unsigned long arg1,
unsigned long arg2,
unsigned long arg3,
unsigned long arg4,
unsigned long arg5,
unsigned long arg6,
unsigned long sysnum) __attribute__ ((visibility ("internal")));
// Easy function that pads a request structure to send to the trusted thread;
int pad_request( unsigned long arg0,
unsigned long arg1,
unsigned long arg2,
unsigned long arg3,
unsigned long arg4,
unsigned long arg5,
unsigned long sysnum)
{
syscall_request request;
syscall_result result;
memset((void*)&request, 0, sizeof(syscall_request));
memset((void*)&result, 0, sizeof(syscall_result));
request.syscall_identifier = sysnum;
request.arg0 = arg0;
request.arg1 = arg1;
request.arg2 = arg2;
request.arg3 = arg3;
request.arg4 = arg4;
request.arg5 = arg5;
// debug
print_syscall_info(&request);
// send
send_syscall_request(&request);
// result
get_syscall_result(&result);
// put the result in rax
return result.result;
}
/*#include "syscall_names.h"*/
/*void *syscall_default_handler(int sysno, void *arg0, void *arg1, void *arg2, void *arg3, void *arg4, void *arg5) {*/
/*// TODO: The following comment is currently not true, we do intercept these system calls. Try to fix that.*/
/*// We try to avoid intercepting mmap(), and munamp(), ... as these system*/
/*// calls are not restricted. But depending on the exact instruction sequence*/
/*// in libc, we might not be able to reliably filter out these system calls*/
/*// at the time when we instrument the code.*/
/*long ret;*/
/*//long long tm;*/
/*const char __attribute__((unused)) *sysname = syscall_names[sysno];*/
/*sys_write(2, sysname, strlen(sysname));*/
/*sys_write(2, "\n", 1);*/
/*switch (sysno) {*/
/*case __NR_mmap:*/
/*//DPRINTF(DLVL_INFO, "Allowing unrestricted system call\n");*/
/*ret = (long) sys_mmap(arg0, (size_t) arg1, (intptr_t) arg2, (intptr_t) arg3, (intptr_t) arg4, (off_t) arg5);*/
/*break;*/
/*case __NR_mprotect:*/
/*ret = sys_mprotect(arg0, (size_t) arg1, (intptr_t) arg2);*/
/*break;*/
/*case __NR_munmap:*/
/*ret = sys_munmap(arg0, (size_t) arg1);*/
/*break;*/
/*case __NR_brk:*/
/*ret = (long) sys_brk(arg0);*/
/*break;*/
/*default:*/
/*ret = __syscall(sysno, arg0, arg1, arg2, arg3, arg4, arg5);*/
/*return (void *)ret;*/
/*#ifdef DEBUG*/
/*// Prevent stderr from being closed in debug mode*/
/*if (sysno == __NR_close && arg0 == (void *)2)*/
/*return 0;*/
/*#endif*/
/*//if ((unsigned int)sysno > syscall_table_size || !syscall_table[sysno].handler)*/
/*// return (void *)-ENOSYS;*/
/*//struct {*/
/*// int sysno;*/
/*// void *args[6];*/
/*//} __attribute__((packed)) req = { sysno, { arg0, arg1, arg2, arg3, arg4, arg5 } };*/
/*//int thread = thread_fd_pub();*/
/*//void *ret;*/
/*//if (__write(thread, &req, sizeof(req)) != sizeof(req) ||*/
/*// __read(thread, &ret, sizeof(ret)) != sizeof(ret)) {*/
/*// die("Failed to forward unrestricted system call");*/
/*//}*/
/*//return (void *)ret;*/
/*}*/
/*if (ret < 0) {*/
/*ret = -errno;*/
/*}*/
/*return (void *)ret;*/
/*}*/