diff --git a/libsgxstep/apic.c b/libsgxstep/apic.c index 6c3bdca..4cb61b2 100644 --- a/libsgxstep/apic.c +++ b/libsgxstep/apic.c @@ -21,41 +21,65 @@ #include "apic.h" #include "pt.h" #include "cpu.h" +#include "idt.h" #include "sched.h" #include #include #include "../kernel/sgxstep_ioctl.h" -extern void *apic_base; -void *dummy_pt = NULL; +int g_apic_setup = 0; +uint64_t g_apic_deadline_tsc_begin = -1; + +#if !X2APIC + extern void *apic_base; + + /* + * Code below maps APIC timer MMIO registers in user space. + * + * NOTE: we require xAPIC mode, since "In x2APIC mode, the memory mapped + * interface is not available and any access to the MMIO interface will behave + * similar to that of a legacy xAPIC in globally disabled state" (Intel SDM + * 10.12.2). + * + * Advised Linux kernel parameters are: "nox2apic iomem=relaxed no_timer_check" + */ + static void do_apic_init(void) + { + uintptr_t apic_base_addr = 0x0; + #if APIC_CONFIG_MSR + uint64_t apic_base_msr = 0x0; + rdmsr_on_cpu(IA32_APIC_BASE_MSR, get_cpu(), &apic_base_msr); + ASSERT( (apic_base_msr & APIC_BASE_MSR_ENABLE) ); + ASSERT( !(apic_base_msr & APIC_BASE_MSR_X2APIC) ); + apic_base_addr = apic_base_msr & ~APIC_BASE_ADDR_MASK; + #else + apic_base_addr = APIC_BASE; + #endif + + apic_base = remap(apic_base_addr); + libsgxstep_info("established local memory mapping for APIC_BASE=%p at %p", (void*) apic_base_addr, apic_base); + } +#else /* X2APIC */ + /* See irq_entry.S to see how these are used. */ + void __wrmsr_gate(void); + void __rdmsr_gate(void); + + /* + * Install custom ring-0 IRQ gates to read/write privileged X2APIC MSR registers. + */ + static void do_apic_init(void) + { + install_priv_gate(__wrmsr_gate, WRMSR_GATE_VECTOR); + install_priv_gate(__rdmsr_gate, RDMSR_GATE_VECTOR); + } +#endif /* X2APIC */ -/* - * Code below maps APIC timer MMIO registers in user space. - * - * NOTE: we require xAPIC mode, since "In x2APIC mode, the memory mapped - * interface is not available and any access to the MMIO interface will behave - * similar to that of a legacy xAPIC in globally disabled state" (Intel SDM - * 10.12.2). - * - * Advised Linux kernel parameters are: "nox2apic iomem=relaxed no_timer_check" - */ void apic_init(void) { - if (apic_base) return; + if (g_apic_setup) return; - uintptr_t apic_base_addr = 0x0; - #if APIC_CONFIG_MSR - uint64_t apic_base_msr = 0x0; - rdmsr_on_cpu(IA32_APIC_BASE_MSR, get_cpu(), &apic_base_msr); - ASSERT( (apic_base_msr & APIC_BASE_MSR_ENABLE) ); - ASSERT( !(apic_base_msr & APIC_BASE_MSR_X2APIC) ); - apic_base_addr = apic_base_msr & ~APIC_BASE_ADDR_MASK; - #else - apic_base_addr = APIC_BASE; - #endif - - apic_base = remap(apic_base_addr); - libsgxstep_info("established local memory mapping for APIC_BASE=%p at %p", (void*) apic_base_addr, apic_base); + do_apic_init(); + g_apic_setup = 1; libsgxstep_info("APIC_ID=%#x; LVTT=%#x; TDCR=%#x", apic_id(), apic_read(APIC_LVTT), apic_read(APIC_TDCR)); @@ -65,7 +89,10 @@ void apic_init(void) uint8_t apic_id(void) { uint32_t id = apic_read(APIC_ID); - id = (id & APIC_ID_MASK) >> APIC_ID_SHIFT; + /* SDM: Figure 11-6. Local APIC ID Register */ + #if !X2APIC + id = (id & APIC_ID_MASK) >> APIC_ID_SHIFT; + #endif return (uint8_t) id; } @@ -88,10 +115,14 @@ int apic_timer_deadline(uint8_t vector) /* In xAPIC mode the memory-mapped write to LVTT needs to be serialized. */ asm volatile("mfence" : : : "memory"); + + libsgxstep_info("APIC timer tsc-deadline mode (lvtt=%x/tdcr=%x)", + apic_read(APIC_LVTT), apic_read(APIC_TDCR)); } -void apic_timer_deadline_irq(int tsc_diff) +void apic_timer_deadline_irq(int tsc_offset) { - uint64_t now = rdtsc_begin(); - wrmsr_on_cpu(IA32_TSC_DEADLINE_MSR, get_cpu(), now + tsc_diff); + g_apic_deadline_tsc_begin = rdtsc_begin(); + /* NOTE: don't use apic_write here as this is a 64-bit MSR */ + wrmsr(IA32_TSC_DEADLINE_MSR, g_apic_deadline_tsc_begin + tsc_offset); } diff --git a/libsgxstep/apic.h b/libsgxstep/apic.h index 2a896e1..c2cc1bd 100644 --- a/libsgxstep/apic.h +++ b/libsgxstep/apic.h @@ -22,84 +22,104 @@ #define SGX_STEP_APIC_H #include "debug.h" +#include "sched.h" +#include "cpu.h" #include "config.h" #include -#if APIC_CONFIG_MSR - #define APIC_BASE_MSR_X2APIC 0x400 - #define APIC_BASE_MSR_ENABLE 0x800 - #define APIC_BASE_ADDR_MASK 0xfff -#else - #define APIC_BASE 0xfee00000 -#endif - -#define APIC_ICR_LOW 0x300 -#define APIC_ICR_HIGH 0x310 - -#define APIC_LVTT 0x320 -#define APIC_TDCR 0x3e0 -#define APIC_TMICT 0x380 -#define APIC_TMCCT 0x390 - -#define APIC_ID 0x20 -#define APIC_ID_SHIFT 24 -#define APIC_ID_MASK (0xff << APIC_ID_SHIFT) - -#define APIC_EOI 0xb0 - -#define APIC_TPR 0x80 -#define APIC_PPR 0xa0 - -#define APIC_TDR_DIV_1 0xb -#define APIC_TDR_DIV_2 0x0 -#define APIC_LVTT_ONESHOT (0 << 17) -#define APIC_LVTT_DEADLINE (2 << 17) - -#define APIC_IPI_CFG 0xc08f1 - -#define APIC_ICR_VECTOR(n) (n & 0xFF) -#define APIC_ICR_DELIVERY_FIXED (0x0 << 8) -#define APIC_ICR_LEVEL_ASSERT (0x1 << 14) -#define APIC_ICR_DEST_SELF (0x1 << 18) -#define APIC_ICR_DEST_PHYSICAL (0x0 << 11) -#define APIC_ICR_DEST_LOGICAL (0x1 << 11) -#define APIC_ICR_DEST_MASK 0xff000000 - -extern void* apic_base; -extern uint32_t apic_lvtt; +/* APIC MMIO/MSR register address map (cf. SDM Table 11.6) */ +#if !X2APIC + #if APIC_CONFIG_MSR + #define APIC_BASE_MSR_X2APIC 0x400 + #define APIC_BASE_MSR_ENABLE 0x800 + #define APIC_BASE_ADDR_MASK 0xfff + #else + #define APIC_BASE 0xfee00000 + #endif + + #define APIC_ID 0x20 + #define APIC_EOI 0xb0 + #define APIC_ICR_LOW 0x300 + #define APIC_ICR_HIGH 0x310 + #define APIC_LVTT 0x320 + #define APIC_TMICT 0x380 + #define APIC_TDCR 0x3e0 +#else /* X2APIC */ + #define APIC_ID 0x802 + #define APIC_EOI 0x80b + #define APIC_ICR 0x830 + #define APIC_LVTT 0x832 + #define APIC_TMICT 0x838 + #define APIC_TDCR 0x83e +#endif /* X2APIC */ + +#define APIC_ID_SHIFT 24 +#define APIC_ID_MASK (0xff << APIC_ID_SHIFT) +#define APIC_TDR_DIV_1 0xb +#define APIC_TDR_DIV_2 0x0 +#define APIC_LVTT_ONESHOT (0 << 17) +#define APIC_LVTT_DEADLINE (2 << 17) + +#define APIC_ICR_VECTOR(n) (n & 0xFF) +#define APIC_ICR_DELIVERY_FIXED (0x0 << 8) +#define APIC_ICR_LEVEL_ASSERT (0x1 << 14) +#define APIC_ICR_DEST_SELF (0x1 << 18) +#define APIC_ICR_DEST_PHYSICAL (0x0 << 11) +#define APIC_ICR_DEST_LOGICAL (0x1 << 11) +#define APIC_ICR_DEST_MASK 0xff000000 + +extern int g_apic_setup; +extern uint64_t g_apic_deadline_tsc_begin; void apic_init(void); - -/* - * From Linux kernel source: /arch/x86/include/asm/apic.h - * NOTE: Intel SDM: "any access that touches bytes 4 through 15 of an APIC - * register may cause undefined behavior and must not be executed." - */ -static inline int apic_write(uint32_t reg, uint32_t v) -{ - volatile uint32_t *addr; - if (!apic_base) apic_init(); - - addr = (volatile uint32_t *)(apic_base + reg); - __asm__ volatile ("movl %1, %0\n\t" - :"=m"(*addr):"r"(v):); - - return 0; -} - -static inline uint32_t apic_read(uint32_t reg) -{ - if (!apic_base) apic_init(); - - return *((volatile uint32_t *)(apic_base + reg)); -} - -//#define apic_send_ipi() apic_write(APIC_ICR_LOW, APIC_IPI_CFG) -#define apic_timer_irq(tsc) apic_write(APIC_TMICT, tsc); -#define apic_send_ipi_self(n) apic_write(APIC_ICR_LOW, APIC_ICR_VECTOR(n) | APIC_ICR_DELIVERY_FIXED | APIC_ICR_LEVEL_ASSERT | APIC_ICR_DEST_SELF) + +/* read/write functions for XAPIC in MMIO access mode */ +#if !X2APIC + /* + * From Linux kernel source: /arch/x86/include/asm/apic.h + * NOTE: Intel SDM: "any access that touches bytes 4 through 15 of an APIC + * register may cause undefined behavior and must not be executed." + */ + static inline int apic_write(uint32_t reg, uint32_t v) + { + volatile uint32_t *addr; + if (!g_apic_setup) apic_init(); + + addr = (volatile uint32_t *)(apic_base + reg); + __asm__ volatile ("movl %1, %0\n\t" + :"=m"(*addr):"r"(v):); + + return 0; + } + + static inline uint32_t apic_read(uint32_t reg) + { + if (!g_apic_setup) apic_init(); + + return *((volatile uint32_t *)(apic_base + reg)); + } +#else /* X2APIC */ + static inline int apic_write(uint32_t reg, uint32_t v) + { + if (!g_apic_setup) apic_init(); + + wrmsr(reg, v); + return 0; + } + + static inline uint32_t apic_read(uint32_t reg) + { + if (!g_apic_setup) apic_init(); + + return rdmsr(reg); + } +#endif /* X2APIC */ + +#define apic_timer_irq(interval) apic_write(APIC_TMICT, interval); +#define apic_send_ipi_self(n) apic_write(APIC_ICR_LOW, APIC_ICR_VECTOR(n) | APIC_ICR_DELIVERY_FIXED | APIC_ICR_LEVEL_ASSERT | APIC_ICR_DEST_SELF) uint8_t apic_id(void); int apic_timer_oneshot(uint8_t vector); int apic_timer_deadline(uint8_t vector); -void apic_timer_deadline_irq(int tsc_diff); +void apic_timer_deadline_irq(int tsc_offset); #endif diff --git a/libsgxstep/config.h b/libsgxstep/config.h index f111152..86842b3 100644 --- a/libsgxstep/config.h +++ b/libsgxstep/config.h @@ -29,11 +29,21 @@ #define USER_IDT_ENABLE 1 #define IRQ_VECTOR 45 #define IRQ_PRIV_VECTOR 49 +#define RDMSR_GATE_VECTOR 50 +#define WRMSR_GATE_VECTOR 51 #define GDT_VECTOR 13 -#if (M32 != 1) - #define APIC_CONFIG_MSR 1 + +/* + * Some recent CPUs with LEGACY_XAPIC_DISABLED only support x2apic mode with + * SGX. In x2apic mode all APIC configuration needs to go through ring-0 + * RD/WRMSR instructions, which is also the only access mode supported for + * IA32_TSC_DEADLINE. + */ +#define X2APIC 1 +#if (!X2APIC && !M32) + #define APIC_CONFIG_MSR 1 #else - #define APIC_CONFIG_MSR 0 + #define APIC_CONFIG_MSR 0 #endif #define VICTIM_CPU 1 diff --git a/libsgxstep/cpu.c b/libsgxstep/cpu.c index 79795fc..313dc3c 100644 --- a/libsgxstep/cpu.c +++ b/libsgxstep/cpu.c @@ -104,17 +104,3 @@ void clflush(void *p) : "c" (p) : "rax"); } - -uint64_t rdmsr(uint32_t msr) -{ - uint64_t lo, hi; - asm volatile("rdmsr" : "=a"(lo), "=d"(hi) : "c"(msr)); - return ((hi<<32) | lo); -} - -void wrmsr(uint32_t msr, uint64_t val) -{ - uint32_t hi = (uint32_t) (val>>32); - uint32_t lo = (uint32_t) val; - asm volatile("wrmsr" : : "a"(lo), "d"(hi), "c"(msr)); -} diff --git a/libsgxstep/idt.c b/libsgxstep/idt.c index c121632..dad2dac 100644 --- a/libsgxstep/idt.c +++ b/libsgxstep/idt.c @@ -8,7 +8,7 @@ /* See irq_entry.S to see how these are used. */ void __ss_irq_gate(void); -exec_priv_cb_t __ss_irq_gate_cb = NULL; +extern exec_priv_cb_t __ss_irq_gate_cb; uint64_t sgx_step_isr_kernel_map_offset = 0; @@ -146,52 +146,31 @@ void install_kernel_irq_handler(idt_t *idt, void *asm_handler, int vector) install_irq_handler(idt, asm_handler, vector, KERNEL_CS, GATE_INTERRUPT); } -void __attribute__((noinline)) trigger_sw_irq(void) +void install_priv_gate(void *asm_handler, int vector) { + idt_t idt; + libsgxstep_info("locking user-space IRQ gate handler page at %p", __ss_irq_gate); + ASSERT( !mlock(__ss_irq_gate, 0x1000) ); + + libsgxstep_info("installing ring-0 IRQ gate"); + ASSERT( !claim_cpu(VICTIM_CPU) ); + map_idt(&idt); /* - * NOTE: separate C function to make sure caller-save registers are - * properly stored and restored by the compiler. + * In principle, we could use a trap gate to make the exec_priv code + * interruptible, but it seems the Linux kernel does not expect and + * freezes when interrupting ring-0 code. So we use an interrupt gate + * here to make the exec_priv code uninterruptible. */ - asm("int %0\n\t" ::"i"(IRQ_PRIV_VECTOR):); + install_kernel_irq_handler(&idt, asm_handler, vector); + free_map(idt.base); } -/* - * Executes the provided callback function with ring-0 privileges by installing - * a custom interrupt gate. - * - * NOTE: Calling `exec_priv` may lead to unpredictable system freezes when - * passing larger or complex functions. Keep in mind the following for the - * callback function: - * - * 1. Executes with interrupts disabled, best to keep it short. - * 2. Don't use any system calls (e.g., libc). - * 3. Avoid page-fault exceptions: no illegal accesses and preferably - * `mlock` all code/data pages. - * - * While `exec_priv` may be greatly useful to quickly test out some privileged - * functionality in ring-0 C code without recompiling the kernel, if long-term - * stability is desired it may be best to pass a carefully crafted asm callback - * function. - */ void exec_priv(exec_priv_cb_t cb) { - idt_t idt; if (!__ss_irq_gate_cb) { - libsgxstep_info("locking user-space IRQ gate handler page at %p", __ss_irq_gate); - ASSERT( !mlock(__ss_irq_gate, 0x1000) ); - - libsgxstep_info("installing and calling ring-0 IRQ gate"); - ASSERT( !claim_cpu(VICTIM_CPU) ); - map_idt(&idt); - /* - * In principle, we could use a trap gate to make the exec_priv code - * interruptible, but it seems the Linux kernel does not expect and - * freezes when interrupting ring-0 code. So we use an interrupt gate - * here to make the exec_priv code uninterruptible. - */ - install_irq_handler(&idt, __ss_irq_gate, IRQ_PRIV_VECTOR, KERNEL_CS, GATE_INTERRUPT); - free_map(idt.base); + install_priv_gate(__ss_irq_gate, IRQ_PRIV_VECTOR); + ASSERT( !mlock(&__ss_irq_gate_cb, 0x1000) ); } __ss_irq_gate_cb = cb; diff --git a/libsgxstep/idt.h b/libsgxstep/idt.h index 5d63bc2..632a786 100644 --- a/libsgxstep/idt.h +++ b/libsgxstep/idt.h @@ -32,7 +32,26 @@ typedef struct { typedef void (*irq_cb_t)(uint8_t *rsp); typedef void (*exec_priv_cb_t)(void); +/* + * Executes the provided callback function with ring-0 privileges by installing + * a custom interrupt gate. + * + * NOTE: Calling `exec_priv` may lead to unpredictable system freezes when + * passing larger or complex functions. Keep in mind the following for the + * callback function: + * + * 1. Executes with interrupts disabled, best to keep it short. + * 2. Don't use any system calls (e.g., libc). + * 3. Avoid page-fault exceptions: no illegal accesses and preferably + * `mlock` all code/data pages. + * + * While `exec_priv` may be greatly useful to quickly test out some privileged + * functionality in ring-0 C code without recompiling the kernel, if long-term + * stability is desired it may be best to pass a carefully crafted asm callback + * function. + */ void exec_priv(exec_priv_cb_t cb); +void trigger_sw_irq(); void map_idt(idt_t *idt); void dump_idt(idt_t *idt); @@ -40,6 +59,7 @@ void dump_gate(gate_desc_t *gate, int idx); void install_user_irq_handler(idt_t *idt, void* asm_handler, int vector); void install_kernel_irq_handler(idt_t *idt, void *asm_handler, int vector); +void install_priv_gate(void *asm_handler, int vector); void __ss_irq_handler(void); extern int volatile __ss_irq_fired, __ss_irq_count, __ss_irq_cpl; diff --git a/libsgxstep/irq_entry.S b/libsgxstep/irq_entry.S index 08b3ff6..93de73e 100644 --- a/libsgxstep/irq_entry.S +++ b/libsgxstep/irq_entry.S @@ -12,17 +12,21 @@ * */ /* ********************************************************************** */ +#include "config.h" + .section isr_section,"awx",@progbits .align 0x1000 - .global __ss_irq_fired, __ss_irq_count, __ss_irq_cpl, apic_base, nemesis_tsc_aex, __ss_irq_rip + .global __ss_irq_fired, __ss_irq_count, __ss_irq_cpl, apic_base, nemesis_tsc_aex, __ss_irq_rip, __ss_irq_rax __ss_irq_fired: .int 0x0 __ss_irq_count: .int 0x0 __ss_irq_cpl: .int 0xff +#if !X2APIC apic_base: .quad 0x0 +#endif nemesis_tsc_aex: .quad 0x0; /* not sure there's a kernel stack we can use(?) */ @@ -30,6 +34,8 @@ __ss_irq_rax: .quad 0x0 __ss_irq_rdx: .quad 0x0 +__ss_irq_rcx: + .quad 0x0 __ss_irq_rip: .quad 0x0 @@ -44,6 +50,7 @@ __ss_irq_handler: rdtsc mov %eax, nemesis_tsc_aex(%rip) mov %edx, nemesis_tsc_aex+4(%rip) + mov %rcx, __ss_irq_rcx(%rip) /* IRQ bookkeeping */ mov 0(%rsp), %rax @@ -54,6 +61,7 @@ __ss_irq_handler: incl __ss_irq_fired(%rip) /* apic_write(APIC_EOI, 0x0); */ +#if !X2APIC lea apic_base(%rip), %rax mov (%rax),%rax test %rax, %rax @@ -61,8 +69,15 @@ __ss_irq_handler: add $0xb0, %rax movl $0x0, (%rax) 1: +#else + xor %eax,%eax + xor %edx,%edx + mov $0x80b, %ecx + wrmsr +#endif mov __ss_irq_rax(%rip), %rax mov __ss_irq_rdx(%rip), %rdx + mov __ss_irq_rcx(%rip), %rcx iretq /* ********************************************************************** */ @@ -72,9 +87,69 @@ __ss_irq_handler: * interrupt `int` instruction. Hence, we don't have to map it into the global * kernel virtual address range. */ + .data + .align 0x1000 + .global __ss_irq_gate_cb +__ss_irq_gate_cb: + .quad 0x0 + .text .align 0x1000 .global __ss_irq_gate __ss_irq_gate: call *__ss_irq_gate_cb(%rip) iretq + + /* + * void trigger_sw_irq(void); + * + * \note Separate asm function to make sure caller-save + * registers are properly stored and restored. + */ + .global trigger_sw_irq +trigger_sw_irq: + int $IRQ_PRIV_VECTOR + retq + + /* + * void wrmsr(uint32_t reg, uint64_t val); + * + * \arg rdi MSR address (32-bit) + * \arg rsi value (64-bit) + * + * \note clobbers RDI, RSI, RAX, RCX, RDX (all caller-save) + */ + .global wrmsr +wrmsr: + int $WRMSR_GATE_VECTOR + retq + + .global __wrmsr_gate +__wrmsr_gate: + mov %edi, %ecx + mov %esi, %eax + shr $32, %rsi + mov %esi, %edx + wrmsr + iretq + + /* + * uint64_t rdmsr(uint32_t reg); + * + * \arg rdi MSR address (32-bit) + * \ret rax value (64-bit) + * + * \note clobbers RDI, RAX, RCX, RDX (all caller-save) + */ + .global rdmsr +rdmsr: + int $RDMSR_GATE_VECTOR + retq + + .global __rdmsr_gate +__rdmsr_gate: + mov %edi, %ecx + rdmsr + shl $32, %rdx + or %rdx, %rax + iretq