From 5ed4ec1a607a179bd5a0a4d3d39a8459595a0f19 Mon Sep 17 00:00:00 2001 From: Andreas Kogler Date: Mon, 9 Jan 2023 22:27:26 +0100 Subject: [PATCH 1/7] Map IRQ handlers to the kernel address space Introduced a new ISR section containing the IRQ handlers, which is mapped into the kernel address space. The mapping points to the same physical pages, removing crashes if a hooked IRQ is triggered while another address space is active. This even allows handling IRQs triggered from other processes or cores. --- app/idt_isr_map/.gitignore | 13 +++++ app/idt_isr_map/Makefile | 64 ++++++++++++++++++++++ app/idt_isr_map/README.md | 41 ++++++++++++++ app/idt_isr_map/main.c | 94 ++++++++++++++++++++++++++++++++ app/idt_isr_map/trigger.c | 0 kernel/sgxstep.c | 109 +++++++++++++++++++++++++++++++++++-- kernel/sgxstep_internal.h | 9 +++ kernel/sgxstep_ioctl.h | 7 +++ libsgxstep/idt.c | 66 +++++++++++++++++++++- libsgxstep/irq_entry.S | 9 ++- 10 files changed, 402 insertions(+), 10 deletions(-) create mode 100644 app/idt_isr_map/.gitignore create mode 100644 app/idt_isr_map/Makefile create mode 100644 app/idt_isr_map/README.md create mode 100644 app/idt_isr_map/main.c create mode 100644 app/idt_isr_map/trigger.c diff --git a/app/idt_isr_map/.gitignore b/app/idt_isr_map/.gitignore new file mode 100644 index 0000000..f1aeaf0 --- /dev/null +++ b/app/idt_isr_map/.gitignore @@ -0,0 +1,13 @@ +app +measurements.txt +measurements_raw.txt +outlier_idx.txt +plot.pdf +xlabels.gp + +*.swp + +out.txt +parsed.txt +parsed_zz.txt +parsed_strlen.txt diff --git a/app/idt_isr_map/Makefile b/app/idt_isr_map/Makefile new file mode 100644 index 0000000..45ba47a --- /dev/null +++ b/app/idt_isr_map/Makefile @@ -0,0 +1,64 @@ +LIBSGXSTEP_DIR = ../.. +LIBSGXSTEP = $(LIBSGXSTEP_DIR)/libsgxstep +-include $(LIBSGXSTEP)/Makefile.config + +URTS_LIB_PATH = $(LIBSGXSTEP_DIR)/linux-sgx/psw/urts/linux + +ifeq ($(SGX_SDK),) + SGX_SDK = /opt/intel/sgxsdk +endif +export SGX_SDK +ifneq ($(SGX_SDK), /opt/intel/sgxsdk) + URTS_LD_LIBRARY_PATH = LD_LIBRARY_PATH=$(LIBSGXSTEP_DIR)/linux-sgx/psw/urts/linux +endif + +SUBDIRS = $(LIBSGXSTEP) + +CC = gcc +AS = gcc +LD = gcc + +CFLAGS += -fPIC -fno-stack-protector -fno-builtin -fno-jump-tables \ + -fno-common -Wno-attributes -g -D_GNU_SOURCE -O0 +INCLUDE = -I$(SGX_SDK)/include/ -I$(LIBSGXSTEP_DIR) +LDFLAGS += -lsgx-step -lsgx_urts \ + -lsgx_uae_service -pthread $(SUBDIRS:%=-L %) -L$(SGX_SDK)/lib64/ + +SOURCES = $(shell ls *.c) +OBJECTS = $(SOURCES:.c=.o) +OUTPUT = app + +BUILDDIRS = $(SUBDIRS:%=build-%) +CLEANDIRS = $(SUBDIRS:%=clean-%) + + +MAKEFLAGS += --silent + +all: $(OUTPUT) + +run: clean all + sudo $(URTS_LD_LIBRARY_PATH) ./app + +$(OUTPUT): $(BUILDDIRS) $(OBJECTS) + echo "$(INDENT)[LD]" $(OBJECTS) $(LIBS) -o $(OUTPUT) + $(LD) $(OBJECTS) $(LDFLAGS) -o $(OUTPUT) + +%.o : %.c + echo "$(INDENT)[CC] " $< + $(CC) $(CFLAGS) $(INCLUDE) -c $< + +%.o : %.S + echo "$(INDENT)[AS] " $< + $(AS) $(INCLUDE) -c $< -o $@ + +clean: $(CLEANDIRS) + echo "$(INDENT)[RM]" $(OBJECTS) $(OUTPUT) + rm -f $(OBJECTS) $(OUTPUT) + +$(BUILDDIRS): + echo "$(INDENT)[===] $(@:build-%=%) [===]" + $(MAKE) -C $(@:build-%=%) INDENT+="$(INDENT_STEP)" curr-dir=$(curr-dir)/$(@:build-%=%) + +$(CLEANDIRS): + echo "$(INDENT)[===] $(@:clean-%=%) [===]" + $(MAKE) clean -C $(@:clean-%=%) INDENT+="$(INDENT_STEP)" curr-dir=$(curr-dir)/$(@:build-%=%) diff --git a/app/idt_isr_map/README.md b/app/idt_isr_map/README.md new file mode 100644 index 0000000..5c0cf02 --- /dev/null +++ b/app/idt_isr_map/README.md @@ -0,0 +1,41 @@ +# Test for the kernel mapped interupt handlers + +This test uses the kernel mapped ISR region to demonstrate sw IRQ handling from other processes and cores. + +## Usage: +Start the listener: +``` +sudo ./app +``` + +Trigger the SW IRQ from another core and process: +``` +taskset -c 1 ./app trigger +``` + + +# Sample output: + + +``` +[idt.c] locking IRQ handler pages 0x564b69f22000/0x564b69f21000 +[idt.c] setting up isr mapping: from 0x564b69f21000 to 0x564b69f2205b +[pt.c] /dev/sgx-step opened! +[idt.c] we received the base address from kernel 0xffffc900201ea000 +[idt.c] the offset to the kernel mapped ISR region is 0xffff72b4b62c9000 +[pt.c] /dev/mem opened! +[sched.c] continuing on CPU 1 +[idt.c] DTR.base=0xfffffe0000000000/size=4095 (256 entries) +[idt.c] established user space IDT mapping at 0x7f4302014000 + +-------------------------------------------------------------------------------- +[main.c] Installing and testing ring0 IDT handler +-------------------------------------------------------------------------------- + +[idt.c] using kernel mapped ISR handler: 0x564b69f22000 -> 0xffffc900201eb000 +[idt.c] installed asm IRQ handler at 10:0x564b69f22000 +[idt.c] IDT[ 45] @0x7f43020142d0 = 0xffffc900201eb000 (seg sel 0x10); p=1; dpl=3; type=14; ist=0 +[main.c] wating for sw IRQ on vector 45 ... +[main.c] returned from IRQ: my_cpl=3; irq_cpl=0; count=01; flags=0x246; nemesis=835151372 +[main.c] all is well; irq_count=1; exiting.. +``` diff --git a/app/idt_isr_map/main.c b/app/idt_isr_map/main.c new file mode 100644 index 0000000..6324e69 --- /dev/null +++ b/app/idt_isr_map/main.c @@ -0,0 +1,94 @@ +/* + * This file is part of the SGX-Step enclave execution control framework. + * + * Copyright (C) 2017 Jo Van Bulck , + * Raoul Strackx + * + * SGX-Step is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * SGX-Step is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with SGX-Step. If not, see . + */ + +#include "libsgxstep/idt.h" +#include "libsgxstep/gdt.h" +#include "libsgxstep/apic.h" +#include "libsgxstep/cpu.h" +#include "libsgxstep/sched.h" +#include "libsgxstep/config.h" +#include + +#define DO_APIC_SW_IRQ 1 +#define DO_APIC_TMR_IRQ 0 +#define DO_EXEC_PRIV 0 +#define NUM 1000 +#define NEMESIS_HIGH 1 + +/* ------------------------------------------------------------ */ +/* This code may execute with ring0 privileges */ +int my_cpl = -1; +extern uint64_t nemesis_tsc_aex, nemesis_tsc_eresume; + +void pre_irq(void) +{ + my_cpl = get_cpl(); + __ss_irq_fired = 0; + nemesis_tsc_eresume = rdtsc_begin(); +} + +void do_irq_sw(void) +{ + asm("int %0\n\t" ::"i"(IRQ_VECTOR):); +} + + +/* ------------------------------------------------------------ */ + +void post_irq(void) +{ + ASSERT(__ss_irq_fired); + info("returned from IRQ: my_cpl=%d; irq_cpl=%d; count=%02d; flags=%p; nemesis=%d", + my_cpl, __ss_irq_cpl, __ss_irq_count, read_flags(), nemesis_tsc_aex - nemesis_tsc_eresume); +} + +void sgx_get_aep() {} +void sgx_set_aep() {} +void sgx_get_tcs() {} + +int main( int argc, char **argv ) +{ + if (argc == 2) { + info("triggering sw IRQ on vector %d!", IRQ_VECTOR); + do_irq_sw(); + return 0; + } + + idt_t idt = {0}; + ASSERT( !claim_cpu(VICTIM_CPU) ); + map_idt(&idt); + + info_event("Installing and testing ring0 IDT handler"); + install_kernel_irq_handler(&idt, __ss_irq_handler, IRQ_VECTOR); + + info("wating for sw IRQ on vector %d ...", IRQ_VECTOR); + + pre_irq(); + + while (__ss_irq_fired == 0) { + sleep(1); + } + + post_irq(); + + info("all is well; irq_count=%d; exiting..", __ss_irq_count); + return 0; + +} diff --git a/app/idt_isr_map/trigger.c b/app/idt_isr_map/trigger.c new file mode 100644 index 0000000..e69de29 diff --git a/kernel/sgxstep.c b/kernel/sgxstep.c index 293a730..9cd5654 100644 --- a/kernel/sgxstep.c +++ b/kernel/sgxstep.c @@ -31,6 +31,9 @@ #include #include #include +#include +#include +#include #include #include @@ -39,24 +42,78 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jo Van Bulck , Raoul Strackx "); MODULE_DESCRIPTION("SGX-Step: A Practical Attack Framework for Precise Enclave Execution Control"); -int target_cpu = -1; +static struct page **isr_pages = NULL; +static uint64_t isr_nr_pages = 0; +static void *isr_kernel_vbase = NULL; + +static int in_use = 0; + +typedef struct { + uint16_t size; + uint64_t base; +} __attribute__((packed)) dtr_t; + +static dtr_t original_dtr = {0}; + +static void *original_idt_copy = NULL; int step_open(struct inode *inode, struct file *file) { - if (target_cpu != -1) + if (in_use) { err("Device is already opened"); return -EBUSY; } - target_cpu = smp_processor_id(); + asm volatile ("sidt %0\n\t" + :"=m"(original_dtr) :: ); + + log("read idt: 0x%llx with size %u", original_dtr.base, original_dtr.size+1); + + original_idt_copy = kmalloc(original_dtr.size+1, GFP_KERNEL); + RET_ASSERT(original_idt_copy); + + memcpy(original_idt_copy, (void*)original_dtr.base, original_dtr.size+1); + log("copied original idt"); + + in_use = 1; return 0; } int step_release(struct inode *inode, struct file *file) { - target_cpu = -1; + uint64_t cr0; + asm volatile("mov %%cr0, %0" : "=r"(cr0)); + + // disable write protection + asm volatile("mov %0, %%cr0" : : "r"(cr0 & ~(1llu << 16))); + + // restore original idt to ensure no user pointers are left + memcpy((void*)original_dtr.base, original_idt_copy, original_dtr.size+1); + log("restored original idt at 0x%llx with size %u", original_dtr.base, original_dtr.size+1); + + // restore original cr0 + asm volatile("mov %0, %%cr0" : : "r"(cr0)); + + kfree(original_idt_copy); + original_idt_copy = NULL; + + if (isr_kernel_vbase) { + // freeup the kernel vbase mapping for isrs + vunmap(isr_kernel_vbase); + + // unpin the isr user physical pages + unpin_user_pages(isr_pages, isr_nr_pages); + + // free the isr user physical page structure + kfree(isr_pages); + + isr_kernel_vbase = NULL; + isr_pages = NULL; + isr_nr_pages = 0; + } + in_use = 0; return 0; } @@ -131,6 +188,47 @@ long sgx_step_get_pt_mapping(struct file *filep, unsigned int cmd, unsigned long return 0; } + +long sgx_step_ioctl_setup_isr_map(struct file *filep, unsigned int cmd, unsigned long arg) +{ + uint64_t nr_pinned_pages; + + setup_isr_map_t *data = (setup_isr_map_t*) arg; + + isr_nr_pages = (data->isr_stop - data->isr_start + PAGE_SIZE - 1) / PAGE_SIZE; + + isr_pages = kmalloc(isr_nr_pages * sizeof(struct page *), GFP_KERNEL); + RET_ASSERT_GOTO(isr_pages, "cannot allocate memory", out); + + nr_pinned_pages = pin_user_pages(data->isr_start & ~(PAGE_SIZE - 1), isr_nr_pages, FOLL_LONGTERM | FOLL_WRITE, isr_pages, NULL); + log("nr_pinned_pages = %llu should be %llu", nr_pinned_pages, isr_nr_pages); + + RET_ASSERT_GOTO(nr_pinned_pages == isr_nr_pages, "could not pin all isr pages", cleanup_pages); + + isr_kernel_vbase = vmap(isr_pages, isr_nr_pages, VM_READ | VM_EXEC | VM_SHARED, PAGE_SHARED_EXEC); + RET_ASSERT_GOTO(isr_kernel_vbase, "could not vmap isr", cleanup_pin); + + log("mapped isr to kernel virtual address 0x%llx", (uint64_t)isr_kernel_vbase); + log("first qword of the isr p[0]=0x%llx", *(uint64_t*)isr_kernel_vbase); + + data->isr_kernel_base = isr_kernel_vbase; + + return 0; + +cleanup_pin: + unpin_user_pages(isr_pages, isr_nr_pages); + +cleanup_pages: + kfree(isr_pages); + +out: + isr_kernel_vbase = NULL; + isr_pages = NULL; + isr_nr_pages = 0; + return -EINVAL; +} + + typedef long (*ioctl_t)(struct file *filep, unsigned int cmd, unsigned long arg); long step_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) @@ -147,6 +245,9 @@ long step_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) case SGX_STEP_IOCTL_INVPG: handler = sgx_step_ioctl_invpg; break; + case SGX_STEP_IOCTL_SETUP_ISR_MAP: + handler = sgx_step_ioctl_setup_isr_map; + break; default: return -EINVAL; } diff --git a/kernel/sgxstep_internal.h b/kernel/sgxstep_internal.h index bcd531d..fedc2b9 100644 --- a/kernel/sgxstep_internal.h +++ b/kernel/sgxstep_internal.h @@ -37,4 +37,13 @@ } \ } while(0) +#define RET_ASSERT_GOTO(cond, message, label) \ + do { \ + if (!(cond)) \ + { \ + err("assertion '" #cond "' failed: %s", message); \ + goto label; \ + } \ + } while(0) + #endif diff --git a/kernel/sgxstep_ioctl.h b/kernel/sgxstep_ioctl.h index 86dcb9c..da1c440 100644 --- a/kernel/sgxstep_ioctl.h +++ b/kernel/sgxstep_ioctl.h @@ -26,6 +26,7 @@ #define SGX_STEP_IOCTL_MAGIC 'L' #define SGX_STEP_IOCTL_GET_PT_MAPPING _IOWR(SGX_STEP_IOCTL_MAGIC, 0, address_mapping_t) #define SGX_STEP_IOCTL_INVPG _IOWR(SGX_STEP_IOCTL_MAGIC, 1, invpg_t) +#define SGX_STEP_IOCTL_SETUP_ISR_MAP _IOWR(SGX_STEP_IOCTL_MAGIC, 2, setup_isr_map_t) typedef struct { uint64_t virt; @@ -41,4 +42,10 @@ typedef struct { uint64_t adrs; } invpg_t; +typedef struct { + uint64_t isr_start; // in + uint64_t isr_stop; // in + uint64_t isr_kernel_base; // out +} setup_isr_map_t; + #endif diff --git a/libsgxstep/idt.c b/libsgxstep/idt.c index 2fba4f9..4d3c946 100644 --- a/libsgxstep/idt.c +++ b/libsgxstep/idt.c @@ -3,11 +3,56 @@ #include "apic.h" #include "sched.h" #include +#include + #include /* See irq_entry.S to see how these are used. */ void sgx_step_irq_gate_func(void); exec_priv_cb_t sgx_step_irq_gate_cb = NULL; +uint64_t sgx_step_isr_kernel_map_offset = 0; + +// this externs will be created by the compiler pointing to the ISR section. +extern void *__start_isr_section; +extern void *__stop_isr_section; + +extern int fd_step; + +static int is_in_isr_section(void *handler) { + return (void*)(&__start_isr_section) <= handler && handler < (void*)(&__stop_isr_section); +} + +static void setup_isr_map() +{ + setup_isr_map_t param; + + param.isr_start = (uint64_t)&__start_isr_section; + param.isr_stop = (uint64_t)&__stop_isr_section; + + info("setting up isr mapping: from 0x%lx to 0x%lx", param.isr_start, param.isr_stop); + + step_open(); + ASSERT( ioctl(fd_step, SGX_STEP_IOCTL_SETUP_ISR_MAP, ¶m) >= 0 ); + + info("we received the base address from kernel 0x%lx", param.isr_kernel_base); + + // calculate the virtual address space offset + sgx_step_isr_kernel_map_offset = param.isr_kernel_base - param.isr_start; + + info("the offset to the kernel mapped ISR region is 0x%lx", sgx_step_isr_kernel_map_offset); + + // This is currently a workaround: We are currently use one section for both data and code. + // Therefore, we need executable and writable pages which are per definition a security risk. + // Linux detects this and removes one of the flags, therefore we directly modify the page tables. + for (uint64_t address = param.isr_start; address < param.isr_stop; address += 0x1000) { + void *page = (void*)address + sgx_step_isr_kernel_map_offset; + uint64_t* pte = remap_page_table_level(page, PTE); + *pte = MARK_EXECUTABLE(*pte); + *pte = MARK_WRITABLE(*pte); + flush_tlb(page); + } +} + void dump_gate(gate_desc_t *gate, int idx) { info("IDT[%3d] @%p = %p (seg sel 0x%x); p=%d; dpl=%d; type=%02d; ist=%d", @@ -48,10 +93,23 @@ void install_irq_handler(idt_t *idt, void* asm_handler, int vector, cs_t seg, ga { ASSERT(vector >= 0 && vector < idt->entries); + uint64_t handler = (uint64_t)asm_handler; + + // check if the ISR section is mapped + if (sgx_step_isr_kernel_map_offset == 0) { + setup_isr_map(); + } + + // only if the handler is within the ISR section we use the kernel mapped handler + if (is_in_isr_section(asm_handler)) { + handler += sgx_step_isr_kernel_map_offset; + libsgxstep_info("using kernel mapped ISR handler: %p -> %p", asm_handler, (void*)handler); + } + gate_desc_t *gate = gate_ptr(idt->base, vector); - gate->offset_low = PTR_LOW(asm_handler); - gate->offset_middle = PTR_MIDDLE(asm_handler); - gate->offset_high = PTR_HIGH(asm_handler); + gate->offset_low = PTR_LOW(handler); + gate->offset_middle = PTR_MIDDLE(handler); + gate->offset_high = PTR_HIGH(handler); gate->p = 1; gate->segment = seg; @@ -65,6 +123,7 @@ void install_irq_handler(idt_t *idt, void* asm_handler, int vector, cs_t seg, ga #endif } + void install_user_irq_handler(idt_t *idt, void* asm_handler, int vector) { /* @@ -104,4 +163,5 @@ void __attribute__((constructor)) init_sgx_step( void ) info("locking IRQ handler pages %p/%p", &__ss_irq_handler, &__ss_irq_fired); ASSERT( !mlock(&__ss_irq_handler, 0x1000) ); ASSERT( !mlock((void*) &__ss_irq_fired, 0x1000) ); + // TODO: maybe call setup_isr_map here? this would require to open sgx_step in the constructor. } diff --git a/libsgxstep/irq_entry.S b/libsgxstep/irq_entry.S index f4b2ed8..c83a5a0 100644 --- a/libsgxstep/irq_entry.S +++ b/libsgxstep/irq_entry.S @@ -1,5 +1,5 @@ /* ********************************************************************** */ - .data + .section isr_section,"awx",@progbits .align 0x1000 .global __ss_irq_fired, __ss_irq_count, __ss_irq_cpl, apic_base, nemesis_tsc_aex __ss_irq_fired: @@ -19,7 +19,7 @@ __ss_irq_rdx: .quad 0x0 /* ********************************************************************** */ - .text + .section isr_section,"awx",@progbits .align 0x1000 .global __ss_irq_handler __ss_irq_handler: @@ -46,8 +46,11 @@ __ss_irq_handler: mov __ss_irq_rax(%rip), %rax mov __ss_irq_rdx(%rip), %rdx iretq - /* ********************************************************************** */ +/* don't put the custom user handler in the mapped ISR region (yet)! */ +/* the pointer could point to data outside the ISR region */ + .text + .align 0x1000 .global sgx_step_irq_gate_func sgx_step_irq_gate_func: call *sgx_step_irq_gate_cb(%rip) From 56689330cfa8f5121e9f6b7ac623d469e052f3c7 Mon Sep 17 00:00:00 2001 From: Andreas Kogler Date: Mon, 9 Jan 2023 22:30:04 +0100 Subject: [PATCH 2/7] Removed left over testing code and fixed indentation error --- app/idt_isr_map/README.md | 2 +- app/idt_isr_map/main.c | 4 ---- libsgxstep/idt.c | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/app/idt_isr_map/README.md b/app/idt_isr_map/README.md index 5c0cf02..8a4e791 100644 --- a/app/idt_isr_map/README.md +++ b/app/idt_isr_map/README.md @@ -8,7 +8,7 @@ Start the listener: sudo ./app ``` -Trigger the SW IRQ from another core and process: +Trigger the SW IRQ from another process: ``` taskset -c 1 ./app trigger ``` diff --git a/app/idt_isr_map/main.c b/app/idt_isr_map/main.c index 6324e69..a08faf3 100644 --- a/app/idt_isr_map/main.c +++ b/app/idt_isr_map/main.c @@ -59,10 +59,6 @@ void post_irq(void) my_cpl, __ss_irq_cpl, __ss_irq_count, read_flags(), nemesis_tsc_aex - nemesis_tsc_eresume); } -void sgx_get_aep() {} -void sgx_set_aep() {} -void sgx_get_tcs() {} - int main( int argc, char **argv ) { if (argc == 2) { diff --git a/libsgxstep/idt.c b/libsgxstep/idt.c index 4d3c946..b836a23 100644 --- a/libsgxstep/idt.c +++ b/libsgxstep/idt.c @@ -4,7 +4,7 @@ #include "sched.h" #include #include - #include +#include /* See irq_entry.S to see how these are used. */ void sgx_step_irq_gate_func(void); From 1ffcc3400555083f94b6d2e2806245615c81365c Mon Sep 17 00:00:00 2001 From: Andreas Kogler Date: Mon, 9 Jan 2023 22:38:47 +0100 Subject: [PATCH 3/7] Removed left over debug message --- kernel/sgxstep.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/sgxstep.c b/kernel/sgxstep.c index 9cd5654..78d3f5f 100644 --- a/kernel/sgxstep.c +++ b/kernel/sgxstep.c @@ -209,8 +209,7 @@ long sgx_step_ioctl_setup_isr_map(struct file *filep, unsigned int cmd, unsigned RET_ASSERT_GOTO(isr_kernel_vbase, "could not vmap isr", cleanup_pin); log("mapped isr to kernel virtual address 0x%llx", (uint64_t)isr_kernel_vbase); - log("first qword of the isr p[0]=0x%llx", *(uint64_t*)isr_kernel_vbase); - + data->isr_kernel_base = isr_kernel_vbase; return 0; From 543aeab81e58bd54edbf2b6ee5cbf5ed9f8cc5f0 Mon Sep 17 00:00:00 2001 From: Andreas Kogler Date: Tue, 10 Jan 2023 10:54:25 +0100 Subject: [PATCH 4/7] removed unused file trigger.c --- app/idt_isr_map/trigger.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 app/idt_isr_map/trigger.c diff --git a/app/idt_isr_map/trigger.c b/app/idt_isr_map/trigger.c deleted file mode 100644 index e69de29..0000000 From df721a76d3515079d125a9e034baf6195392afb0 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Tue, 10 Jan 2023 17:03:57 +0100 Subject: [PATCH 5/7] doc --- README.md | 4 ++-- kernel/sgxstep.c | 2 +- libsgxstep/idt.c | 26 +++++++++++++------------- libsgxstep/irq_entry.S | 2 ++ 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index d1c298e..84e1994 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ below. | `isolcpus=1` | Affinitize the victim process to an isolated CPU core. | | `nosmap nosmep` | Disable Supervisor Mode Access/Execution Prevention (to allow SGX-Step to execute ring-0 IRQ handlers on user pages). | | `clearcpuid=514` | Disable User-Mode Instruction Prevention (on newer CPUs). | -| `kpti=0` | Disable Kernel Page-Table Isolation (to avoid kernel panics with user IRQ handlers). | +| `pti=off` | Disable Kernel Page-Table Isolation (to avoid kernel panics with user IRQ handlers). | | `rcuupdate.rcu_cpu_stall_suppress=1` | Disable the kernel's read-copy update (RCU) CPU stall detector (to avoid warnings when single-stepping for a long time without calling the kernel's timer interrupt handler.) | | `msr.allow_writes=on` | Suppress kernel warning messages for model-specific register (MSR) writes by SGX-Step. | | `vdso=0` | Only on recent Linux kernels: disable vdso_sgx_enter_enclave library (not compatible with AEP interception patches). | @@ -125,7 +125,7 @@ Pass the desired boot parameters to the kernel as follows: ```bash $ sudo vim /etc/default/grub - # Add the following line: GRUB_CMDLINE_LINUX_DEFAULT="quiet splash nox2apic iomem=relaxed no_timer_check nosmep nosmap clearcpuid=514 kpti=0 isolcpus=1 nmi_watchdog=0 rcupdate.rcu_cpu_stall_suppress=1 msr.allow_writes=on vdso=0" + # Add the following line: GRUB_CMDLINE_LINUX_DEFAULT="quiet splash nox2apic iomem=relaxed no_timer_check nosmep nosmap clearcpuid=514 pti=off isolcpus=1 nmi_watchdog=0 rcupdate.rcu_cpu_stall_suppress=1 msr.allow_writes=on vdso=0" $ sudo update-grub && reboot # to inspect the boot parameters of the currently running kernel, execute: $ cat /proc/cmdline diff --git a/kernel/sgxstep.c b/kernel/sgxstep.c index 78d3f5f..d4e23a2 100644 --- a/kernel/sgxstep.c +++ b/kernel/sgxstep.c @@ -210,7 +210,7 @@ long sgx_step_ioctl_setup_isr_map(struct file *filep, unsigned int cmd, unsigned log("mapped isr to kernel virtual address 0x%llx", (uint64_t)isr_kernel_vbase); - data->isr_kernel_base = isr_kernel_vbase; + data->isr_kernel_base = (uint64_t) isr_kernel_vbase; return 0; diff --git a/libsgxstep/idt.c b/libsgxstep/idt.c index b836a23..f9ee2b3 100644 --- a/libsgxstep/idt.c +++ b/libsgxstep/idt.c @@ -29,21 +29,22 @@ static void setup_isr_map() param.isr_start = (uint64_t)&__start_isr_section; param.isr_stop = (uint64_t)&__stop_isr_section; - info("setting up isr mapping: from 0x%lx to 0x%lx", param.isr_start, param.isr_stop); + libsgxstep_info("setting up isr mapping: from 0x%lx to 0x%lx", param.isr_start, param.isr_stop); step_open(); ASSERT( ioctl(fd_step, SGX_STEP_IOCTL_SETUP_ISR_MAP, ¶m) >= 0 ); - info("we received the base address from kernel 0x%lx", param.isr_kernel_base); + libsgxstep_info("we received the base address from kernel 0x%lx", param.isr_kernel_base); // calculate the virtual address space offset sgx_step_isr_kernel_map_offset = param.isr_kernel_base - param.isr_start; - info("the offset to the kernel mapped ISR region is 0x%lx", sgx_step_isr_kernel_map_offset); + libsgxstep_info("the offset to the kernel mapped ISR region is 0x%lx", sgx_step_isr_kernel_map_offset); // This is currently a workaround: We are currently use one section for both data and code. // Therefore, we need executable and writable pages which are per definition a security risk. // Linux detects this and removes one of the flags, therefore we directly modify the page tables. + // TODO split into isr.code and isr.data sections for (uint64_t address = param.isr_start; address < param.isr_stop; address += 0x1000) { void *page = (void*)address + sgx_step_isr_kernel_map_offset; uint64_t* pte = remap_page_table_level(page, PTE); @@ -75,6 +76,11 @@ void map_idt(idt_t *idt) void *idt_base = NULL; int entries; + /* + * NOTE: Linux uses the same physical memory for every IDT on all CPUs (set + * by `lidt` by Linux at boot time). Hence, any change made in the IDT will + * be reflected globally across all CPUs. + */ asm volatile ("sidt %0\n\t" :"=m"(idtr) :: ); entries = (idtr.size+1)/sizeof(gate_desc_t); @@ -101,7 +107,10 @@ void install_irq_handler(idt_t *idt, void* asm_handler, int vector, cs_t seg, ga } // only if the handler is within the ISR section we use the kernel mapped handler - if (is_in_isr_section(asm_handler)) { + // TODO caller should indicate if this should be asserted: not a + // problem for `exec_priv` call gates (as no C3 switch can occur in between) + if (is_in_isr_section(asm_handler)) + { handler += sgx_step_isr_kernel_map_offset; libsgxstep_info("using kernel mapped ISR handler: %p -> %p", asm_handler, (void*)handler); } @@ -156,12 +165,3 @@ void exec_priv(exec_priv_cb_t cb) sgx_step_irq_gate_cb = cb; asm("int %0\n\t" ::"i"(IRQ_VECTOR+4):); } - -void __attribute__((constructor)) init_sgx_step( void ) -{ - /* Ensure IRQ handler asm code is not subject to demand-paging */ - info("locking IRQ handler pages %p/%p", &__ss_irq_handler, &__ss_irq_fired); - ASSERT( !mlock(&__ss_irq_handler, 0x1000) ); - ASSERT( !mlock((void*) &__ss_irq_fired, 0x1000) ); - // TODO: maybe call setup_isr_map here? this would require to open sgx_step in the constructor. -} diff --git a/libsgxstep/irq_entry.S b/libsgxstep/irq_entry.S index c83a5a0..4c5e465 100644 --- a/libsgxstep/irq_entry.S +++ b/libsgxstep/irq_entry.S @@ -49,6 +49,8 @@ __ss_irq_handler: /* ********************************************************************** */ /* don't put the custom user handler in the mapped ISR region (yet)! */ /* the pointer could point to data outside the ISR region */ +/* TODO this is okay, as IRQ/Call gates always triggered from within the calling process CR3 */ +/* as long as teh irq num is unused and never triggered from outside the process */ .text .align 0x1000 .global sgx_step_irq_gate_func From 05676f598b517940b2664feeb07f7490a805b397 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Wed, 11 Jan 2023 18:59:31 +0100 Subject: [PATCH 6/7] driver: refactor + auto restore APIC on close. --- kernel/sgxstep.c | 220 ++++++++++++++++++++++++++------------ kernel/sgxstep_internal.h | 2 +- libsgxstep/apic.c | 2 + 3 files changed, 157 insertions(+), 67 deletions(-) diff --git a/kernel/sgxstep.c b/kernel/sgxstep.c index d4e23a2..7207a7c 100644 --- a/kernel/sgxstep.c +++ b/kernel/sgxstep.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -42,81 +43,169 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jo Van Bulck , Raoul Strackx "); MODULE_DESCRIPTION("SGX-Step: A Practical Attack Framework for Precise Enclave Execution Control"); -static struct page **isr_pages = NULL; -static uint64_t isr_nr_pages = 0; -static void *isr_kernel_vbase = NULL; +static struct page **g_isr_pages = NULL; +static uint64_t g_isr_nr_pages = 0; +static void *g_isr_kernel_vbase = NULL; -static int in_use = 0; +static int g_in_use = 0; typedef struct { uint16_t size; uint64_t base; -} __attribute__((packed)) dtr_t; +} __attribute__((packed)) idtr_t; +static idtr_t g_idtr = {0}; +static void *g_idt_copy = NULL; -static dtr_t original_dtr = {0}; +static uint32_t g_apic_lvtt_copy = 0x0, g_apic_tdcr_copy = 0x0; -static void *original_idt_copy = NULL; +/* ********************** UTIL FUNCTIONS ******************************* */ -int step_open(struct inode *inode, struct file *file) +/* + * NOTE: Linux's default `write_cr0` does not allow to change protected bits, + * so we include our own here. + */ +inline void do_write_cr0(unsigned long val) { + asm volatile("mov %0, %%cr0" : : "r"(val)); +} + +void enable_write_protection(void) { - if (in_use) - { - err("Device is already opened"); - return -EBUSY; - } + unsigned long cr0 = read_cr0(); + set_bit(16, &cr0); + do_write_cr0(cr0); +} + +void disable_write_protection(void) +{ + unsigned long cr0 = read_cr0(); + clear_bit(16, &cr0); + do_write_cr0(cr0); +} +/* ********************** DEVICE OPEN ******************************* */ + +/* + * Save original interrupt descriptor table register (IDTR) -- remains + * unmodified by libsgxstep; addresses the single global IDT memory structure + * setup by Linux at boot time and shared accross all CPUs. + * + * Copy original interrupt descriptor table (IDT) -- may be modified by + * libsgxstep; will be auto restored when closing /dev/sgx-step. + */ +int save_idt(void) +{ asm volatile ("sidt %0\n\t" - :"=m"(original_dtr) :: ); + :"=m"(g_idtr) :: ); + log("original IDT: %#llx with size %u", g_idtr.base, g_idtr.size+1); + RET_ASSERT(g_idtr.base); - log("read idt: 0x%llx with size %u", original_dtr.base, original_dtr.size+1); + g_idt_copy = kmalloc(g_idtr.size+1, GFP_KERNEL); + RET_ASSERT(g_idt_copy); + memcpy(g_idt_copy, (void*)g_idtr.base, g_idtr.size+1); - original_idt_copy = kmalloc(original_dtr.size+1, GFP_KERNEL); - RET_ASSERT(original_idt_copy); + return 0; +} - memcpy(original_idt_copy, (void*)original_dtr.base, original_dtr.size+1); - log("copied original idt"); +/* + * Save APIC timer configuration registers that may be modified by libsgxstep; + * will be auto restored when closing /dev/sgx-step. + */ +int save_apic(void) +{ + g_apic_lvtt_copy = apic_read(APIC_LVTT); + g_apic_tdcr_copy = apic_read(APIC_TDCR); + log("original APIC_LVTT=%#x/TDCR=%#x)", g_apic_lvtt_copy, g_apic_tdcr_copy); - in_use = 1; return 0; } -int step_release(struct inode *inode, struct file *file) +int step_open(struct inode *inode, struct file *file) { - uint64_t cr0; - asm volatile("mov %%cr0, %0" : "=r"(cr0)); + if (g_in_use) + { + err("Device is already opened"); + return -EBUSY; + } + + RET_ASSERT( !save_idt() ); + RET_ASSERT( !save_apic() ); - // disable write protection - asm volatile("mov %0, %%cr0" : : "r"(cr0 & ~(1llu << 16))); + g_in_use = 1; + return 0; +} - // restore original idt to ensure no user pointers are left - memcpy((void*)original_dtr.base, original_idt_copy, original_dtr.size+1); - log("restored original idt at 0x%llx with size %u", original_dtr.base, original_dtr.size+1); +/* ********************** DEVICE CLOSE ******************************* */ - // restore original cr0 - asm volatile("mov %0, %%cr0" : : "r"(cr0)); +/* + * Restore original IDT to ensure no user pointers are left. Free kernel vbase + * mappings and pinned user physical pages for any registered user ISRs. + * + * NOTE: the IDT virtual memory page is mapped write-protected by Linux, so we + * have to disable CR0.WP temporarily here. + */ +void restore_idt(void) +{ + disable_write_protection(); + memcpy((void*)g_idtr.base, g_idt_copy, g_idtr.size+1); + enable_write_protection(); + log("restored IDT: %#llx with size %u", g_idtr.base, g_idtr.size+1); - kfree(original_idt_copy); - original_idt_copy = NULL; + kfree(g_idt_copy); + g_idt_copy = NULL; - if (isr_kernel_vbase) { - // freeup the kernel vbase mapping for isrs - vunmap(isr_kernel_vbase); + if (g_isr_kernel_vbase) { + vunmap(g_isr_kernel_vbase); + g_isr_kernel_vbase = NULL; - // unpin the isr user physical pages - unpin_user_pages(isr_pages, isr_nr_pages); + unpin_user_pages(g_isr_pages, g_isr_nr_pages); + g_isr_nr_pages = 0; - // free the isr user physical page structure - kfree(isr_pages); + kfree(g_isr_pages); + g_isr_pages = NULL; + } +} - isr_kernel_vbase = NULL; - isr_pages = NULL; - isr_nr_pages = 0; +void restore_apic(void) +{ + int delta = 100; + + apic_write(APIC_LVTT, g_apic_lvtt_copy); + apic_write(APIC_TDCR, g_apic_tdcr_copy); + log("restored APIC_LVTT=%#x/TDCR=%#x)", g_apic_lvtt_copy, g_apic_tdcr_copy); + + /* In xAPIC mode the memory-mapped write to LVTT needs to be serialized. */ + asm volatile("mfence" : : : "memory"); + + /* Re-arm the timer so Linux's original handler should take over again. */ + if (g_apic_lvtt_copy & APIC_LVT_TIMER_TSCDEADLINE) + { + log("restoring APIC timer tsc-deadline operation"); + wrmsrl(MSR_IA32_TSC_DEADLINE, rdtsc() + delta); } + else + { + log("restoring APIC timer one-shot/periodic operation"); + apic_write(APIC_TMICT, delta); + } +} - in_use = 0; +/* + * Called when /dev/sgx-step is closed, also when the application that + * originally opened it crashed. We take care to restore any IDT and APIC + * modifications made by user-space libsgxstep here to their original values, + * such that everything runs again normally and Linux does not panic. + */ +int step_release(struct inode *inode, struct file *file) +{ + restore_idt(); + restore_apic(); + + g_in_use = 0; return 0; } +/* ********************** IOCTL FUNCTIONS ******************************* */ + /* Convenience function when editing PTEs from user space (but normally not * needed, since SGX already flushes the TLB on enclave entry/exit) */ long sgx_step_ioctl_invpg(struct file *filep, unsigned int cmd, unsigned long arg) @@ -188,46 +277,44 @@ long sgx_step_get_pt_mapping(struct file *filep, unsigned int cmd, unsigned long return 0; } - long sgx_step_ioctl_setup_isr_map(struct file *filep, unsigned int cmd, unsigned long arg) { uint64_t nr_pinned_pages; - setup_isr_map_t *data = (setup_isr_map_t*) arg; - - isr_nr_pages = (data->isr_stop - data->isr_start + PAGE_SIZE - 1) / PAGE_SIZE; - - isr_pages = kmalloc(isr_nr_pages * sizeof(struct page *), GFP_KERNEL); - RET_ASSERT_GOTO(isr_pages, "cannot allocate memory", out); - nr_pinned_pages = pin_user_pages(data->isr_start & ~(PAGE_SIZE - 1), isr_nr_pages, FOLL_LONGTERM | FOLL_WRITE, isr_pages, NULL); - log("nr_pinned_pages = %llu should be %llu", nr_pinned_pages, isr_nr_pages); + /* allocate space to hold Linux struct page pointers */ + g_isr_nr_pages = (data->isr_stop - data->isr_start + PAGE_SIZE - 1) / PAGE_SIZE; + g_isr_pages = kmalloc(g_isr_nr_pages * sizeof(struct page *), GFP_KERNEL); + GOTO_ASSERT(g_isr_pages, "cannot allocate memory", out); - RET_ASSERT_GOTO(nr_pinned_pages == isr_nr_pages, "could not pin all isr pages", cleanup_pages); + /* pin user physical memory so it cannot be swapped out by the kernel */ + nr_pinned_pages = pin_user_pages(data->isr_start & ~(PAGE_SIZE - 1), + g_isr_nr_pages, FOLL_LONGTERM | FOLL_WRITE, + g_isr_pages, NULL); + GOTO_ASSERT(nr_pinned_pages == g_isr_nr_pages, "cannot pin all ISR pages", cleanup_pages); - isr_kernel_vbase = vmap(isr_pages, isr_nr_pages, VM_READ | VM_EXEC | VM_SHARED, PAGE_SHARED_EXEC); - RET_ASSERT_GOTO(isr_kernel_vbase, "could not vmap isr", cleanup_pin); + /* map pinned physical memory into the kernel virtual address range */ + g_isr_kernel_vbase = vmap(g_isr_pages, g_isr_nr_pages, + VM_READ | VM_EXEC | VM_SHARED, PAGE_SHARED_EXEC); + GOTO_ASSERT(g_isr_kernel_vbase, "cannot vmap ISR pages", cleanup_pin); - log("mapped isr to kernel virtual address 0x%llx", (uint64_t)isr_kernel_vbase); - - data->isr_kernel_base = (uint64_t) isr_kernel_vbase; - + data->isr_kernel_base = (uint64_t) g_isr_kernel_vbase; + log("mapped pinned user ISR memory to kernel virtual address %#llx", data->isr_kernel_base); return 0; cleanup_pin: - unpin_user_pages(isr_pages, isr_nr_pages); + unpin_user_pages(g_isr_pages, g_isr_nr_pages); cleanup_pages: - kfree(isr_pages); + kfree(g_isr_pages); out: - isr_kernel_vbase = NULL; - isr_pages = NULL; - isr_nr_pages = 0; + g_isr_kernel_vbase = NULL; + g_isr_pages = NULL; + g_isr_nr_pages = 0; return -EINVAL; } - typedef long (*ioctl_t)(struct file *filep, unsigned int cmd, unsigned long arg); long step_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) @@ -265,6 +352,8 @@ long step_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) return 0; } +/* ********************** FILE OPERATIONS ******************************* */ + static const struct file_operations step_fops = { .owner = THIS_MODULE, .compat_ioctl = step_ioctl, @@ -323,7 +412,6 @@ void cleanup_module(void) if (step_dev.this_device) misc_deregister(&step_dev); - unregister_kretprobe(&krp); log("kernel module unloaded"); } diff --git a/kernel/sgxstep_internal.h b/kernel/sgxstep_internal.h index fedc2b9..0b70a2a 100644 --- a/kernel/sgxstep_internal.h +++ b/kernel/sgxstep_internal.h @@ -37,7 +37,7 @@ } \ } while(0) -#define RET_ASSERT_GOTO(cond, message, label) \ +#define GOTO_ASSERT(cond, message, label) \ do { \ if (!(cond)) \ { \ diff --git a/libsgxstep/apic.c b/libsgxstep/apic.c index 67e4f5e..dcd6b92 100644 --- a/libsgxstep/apic.c +++ b/libsgxstep/apic.c @@ -89,7 +89,9 @@ int apic_timer_deadline(void) } /* writing a non-zero value to the TSC_DEADLINE MSR will arm the timer */ + /* In xAPIC mode the memory-mapped write to LVTT needs to be serialized. */ #if APIC_CONFIG_MSR + asm volatile("mfence" : : : "memory"); wrmsr_on_cpu(IA32_TSC_DEADLINE_MSR, get_cpu(), 1); #endif } From 08168e46c7aeb141214637a7615c2bb72b9e0ff4 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 12 Jan 2023 18:07:53 +0100 Subject: [PATCH 7/7] libsgxstep: doc + improve `exec_priv` support. --- .gitignore | 1 + app/idt/main.c | 52 +++++++++++++------ kernel/sgxstep.c | 3 +- libsgxstep/config.h | 1 + libsgxstep/cpu.c | 8 +-- libsgxstep/idt.c | 114 ++++++++++++++++++++++++++--------------- libsgxstep/irq_entry.S | 30 ++++++++--- 7 files changed, 139 insertions(+), 70 deletions(-) diff --git a/.gitignore b/.gitignore index ef9083a..0ef7b5a 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ *.o.* *.mk *.mod +*.swn diff --git a/app/idt/main.c b/app/idt/main.c index 82bdd02..5f53da6 100644 --- a/app/idt/main.c +++ b/app/idt/main.c @@ -29,16 +29,19 @@ #define DO_APIC_TMR_IRQ 1 #define DO_EXEC_PRIV 1 #define NUM 100 +#define INFINITE_LOOP 1 #define NEMESIS_HIGH 1 /* ------------------------------------------------------------ */ /* This code may execute with ring0 privileges */ int my_cpl = -1; +uint64_t my_flags = 0; extern uint64_t nemesis_tsc_aex, nemesis_tsc_eresume; void pre_irq(void) { my_cpl = get_cpl(); + my_flags = read_flags(); __ss_irq_fired = 0; nemesis_tsc_eresume = rdtsc_begin(); } @@ -49,12 +52,20 @@ void do_irq_sw(void) asm("int %0\n\t" ::"i"(IRQ_VECTOR):); } - void do_irq_tmr(void) { pre_irq(); apic_timer_irq(10); - while(!__ss_irq_fired) + + /* + * Ring-0 `exec_priv` handler executes with interrupts disabled FLAGS.IF=0 + * (as the kernel may freeze when interrupting a ring-0 trap gate), so the + * APIC timer IRQ should be handled after returning. + */ + if (!(my_flags & (0x1 << 9))) + return; + + while (!__ss_irq_fired) { #if NEMESIS_HIGH asm("rdrand %rax\n\t"); @@ -66,58 +77,60 @@ void do_irq_tmr(void) /* ------------------------------------------------------------ */ -void post_irq(void) +void post_irq(char *s) { ASSERT(__ss_irq_fired); - info("returned from IRQ: my_cpl=%d; irq_cpl=%d; count=%02d; flags=%p; nemesis=%d", - my_cpl, __ss_irq_cpl, __ss_irq_count, read_flags(), nemesis_tsc_aex - nemesis_tsc_eresume); + info("returned from %s IRQ: my_cpl=%d; irq_cpl=%d; my_flags=%p; count=%02d; nemesis=%d", s, + my_cpl, __ss_irq_cpl, my_flags, __ss_irq_count, nemesis_tsc_aex - nemesis_tsc_eresume); } void do_irq_test(int do_exec_priv) { #if DO_APIC_SW_IRQ printf("\n"); - info("Triggering ring3 software interrupts.."); + info_event("Triggering ring-3 software interrupts.."); for (int i=0; i < NUM; i++) { do_irq_sw(); - post_irq(); + post_irq("software"); } if (do_exec_priv) { printf("\n"); - info("Triggering ring0 software interrupts.."); + info_event("Triggering ring-0 software interrupts.."); for (int i=0; i < NUM; i++) { + my_cpl = -1; exec_priv(do_irq_sw); - post_irq(); + post_irq("software"); } } #endif #if DO_APIC_TMR_IRQ printf("\n"); - info("Triggering ring3 APIC timer interrupts.."); + info_event("Triggering ring-3 APIC timer interrupts.."); apic_timer_oneshot(IRQ_VECTOR); for (int i=0; i < NUM; i++) { do_irq_tmr(); - post_irq(); + post_irq("APIC timer"); } if (do_exec_priv) { printf("\n"); - info("Triggering ring0 APIC timer interrupts.."); + info_event("Triggering ring-0 APIC timer interrupts.."); for (int i=0; i < NUM; i++) { + my_cpl = -1; exec_priv(do_irq_tmr); - post_irq(); + while (!__ss_irq_fired); + post_irq("APIC timer"); } } - apic_timer_deadline(); #endif } @@ -129,19 +142,24 @@ int main( int argc, char **argv ) map_idt(&idt); #if 0 - // ring 0 timer irq handlers may #GP when interrupting the kernel.. + /* ring 3 timer IRQ handlers may #GP when interrupting the kernel.. */ info_event("Installing and testing ring3 IDT handler"); install_user_irq_handler(&idt, __ss_irq_handler, IRQ_VECTOR); do_irq_test(/*do_exec_priv=*/ 0); #endif - info_event("Installing and testing ring0 IDT handler"); + info_event("Installing and testing ring-0 IDT handler"); install_kernel_irq_handler(&idt, __ss_irq_handler, IRQ_VECTOR); + #if DO_EXEC_PRIV exec_priv(pre_irq); info("back from exec_priv(pre_irq) with CPL=%d", my_cpl); #endif - do_irq_test(/*do_exec_priv=*/ DO_EXEC_PRIV); + + #if INFINITE_LOOP + while(1) + #endif + do_irq_test(/*do_exec_priv=*/ DO_EXEC_PRIV); info("all is well; irq_count=%d; exiting..", __ss_irq_count); return 0; diff --git a/kernel/sgxstep.c b/kernel/sgxstep.c index 7207a7c..8a6b12c 100644 --- a/kernel/sgxstep.c +++ b/kernel/sgxstep.c @@ -299,7 +299,8 @@ long sgx_step_ioctl_setup_isr_map(struct file *filep, unsigned int cmd, unsigned GOTO_ASSERT(g_isr_kernel_vbase, "cannot vmap ISR pages", cleanup_pin); data->isr_kernel_base = (uint64_t) g_isr_kernel_vbase; - log("mapped pinned user ISR memory to kernel virtual address %#llx", data->isr_kernel_base); + log("mapped %lld pinned user ISR memory pages to kernel virtual address %#llx", + g_isr_nr_pages, data->isr_kernel_base); return 0; cleanup_pin: diff --git a/libsgxstep/config.h b/libsgxstep/config.h index cad3ae3..c1e1c17 100644 --- a/libsgxstep/config.h +++ b/libsgxstep/config.h @@ -28,6 +28,7 @@ #define SINGLE_STEP_ENABLE 1 #define USER_IDT_ENABLE 1 #define IRQ_VECTOR 45 +#define IRQ_PRIV_VECTOR 49 #define GDT_VECTOR 13 #if (M32 != 1) #define APIC_CONFIG_MSR 1 diff --git a/libsgxstep/cpu.c b/libsgxstep/cpu.c index e338150..79795fc 100644 --- a/libsgxstep/cpu.c +++ b/libsgxstep/cpu.c @@ -88,11 +88,11 @@ int wrmsr_on_cpu(uint32_t reg, int cpu, uint64_t data) uint64_t read_flags() { - uint64_t flags = 0x0; + uint64_t flags = 0x0; - asm("pushfq\n\t" - "popq %0\n\t" - :"=m"(flags)::); + asm("pushfq\n\t" + "popq %0\n\t" + :"=m"(flags)::); return flags; } diff --git a/libsgxstep/idt.c b/libsgxstep/idt.c index f9ee2b3..c121632 100644 --- a/libsgxstep/idt.c +++ b/libsgxstep/idt.c @@ -7,46 +7,45 @@ #include /* See irq_entry.S to see how these are used. */ -void sgx_step_irq_gate_func(void); -exec_priv_cb_t sgx_step_irq_gate_cb = NULL; +void __ss_irq_gate(void); +exec_priv_cb_t __ss_irq_gate_cb = NULL; uint64_t sgx_step_isr_kernel_map_offset = 0; -// this externs will be created by the compiler pointing to the ISR section. +/* this externs will be created by the compiler pointing to the ISR section */ extern void *__start_isr_section; extern void *__stop_isr_section; extern int fd_step; -static int is_in_isr_section(void *handler) { - return (void*)(&__start_isr_section) <= handler && handler < (void*)(&__stop_isr_section); +static int is_in_isr_section(void *handler) +{ + return (void*) (&__start_isr_section) <= handler && handler < (void*)(&__stop_isr_section); } -static void setup_isr_map() +static void setup_isr_map(void) { setup_isr_map_t param; - param.isr_start = (uint64_t)&__start_isr_section; - param.isr_stop = (uint64_t)&__stop_isr_section; - - libsgxstep_info("setting up isr mapping: from 0x%lx to 0x%lx", param.isr_start, param.isr_stop); + param.isr_start = (uint64_t) &__start_isr_section; + param.isr_stop = (uint64_t) &__stop_isr_section; + libsgxstep_info("setting up ISR kernel mapping: %#lx-%#lx", + param.isr_start, param.isr_stop); step_open(); ASSERT( ioctl(fd_step, SGX_STEP_IOCTL_SETUP_ISR_MAP, ¶m) >= 0 ); - - libsgxstep_info("we received the base address from kernel 0x%lx", param.isr_kernel_base); - - // calculate the virtual address space offset sgx_step_isr_kernel_map_offset = param.isr_kernel_base - param.isr_start; + libsgxstep_info("received kernel base address %#lx with offset %#lx", + param.isr_kernel_base, sgx_step_isr_kernel_map_offset); - libsgxstep_info("the offset to the kernel mapped ISR region is 0x%lx", sgx_step_isr_kernel_map_offset); - - // This is currently a workaround: We are currently use one section for both data and code. - // Therefore, we need executable and writable pages which are per definition a security risk. - // Linux detects this and removes one of the flags, therefore we directly modify the page tables. - // TODO split into isr.code and isr.data sections - for (uint64_t address = param.isr_start; address < param.isr_stop; address += 0x1000) { - void *page = (void*)address + sgx_step_isr_kernel_map_offset; + /* + * We currently use one section for both data and code. Therefore, we need + * executable and writable pages which are per definition a security risk. + * Linux detects this and removes one of the flags, therefore we directly + * modify the page table entries here. + */ + for (uint64_t addr = param.isr_start; addr < param.isr_stop; addr += 0x1000) { + void *page = (void*) addr + sgx_step_isr_kernel_map_offset; uint64_t* pte = remap_page_table_level(page, PTE); *pte = MARK_EXECUTABLE(*pte); *pte = MARK_WRITABLE(*pte); @@ -98,21 +97,20 @@ void map_idt(idt_t *idt) void install_irq_handler(idt_t *idt, void* asm_handler, int vector, cs_t seg, gate_type_t type) { ASSERT(vector >= 0 && vector < idt->entries); + void *handler = asm_handler; - uint64_t handler = (uint64_t)asm_handler; - - // check if the ISR section is mapped - if (sgx_step_isr_kernel_map_offset == 0) { - setup_isr_map(); - } - - // only if the handler is within the ISR section we use the kernel mapped handler - // TODO caller should indicate if this should be asserted: not a - // problem for `exec_priv` call gates (as no C3 switch can occur in between) + /* + * If the handler is within the special ISR section, we rebase it to the + * virtual shadow mapping in the global kernel address range. + */ if (is_in_isr_section(asm_handler)) { + if (sgx_step_isr_kernel_map_offset == 0) + { + setup_isr_map(); + } handler += sgx_step_isr_kernel_map_offset; - libsgxstep_info("using kernel mapped ISR handler: %p -> %p", asm_handler, (void*)handler); + libsgxstep_info("using kernel-mapped ISR handler: %p -> %p", asm_handler, handler); } gate_desc_t *gate = gate_ptr(idt->base, vector); @@ -126,13 +124,12 @@ void install_irq_handler(idt_t *idt, void* asm_handler, int vector, cs_t seg, ga gate->type = type; gate->ist = 0; - libsgxstep_info("installed asm IRQ handler at %x:%p", seg, asm_handler); + libsgxstep_info("installed asm IRQ handler at %x:%p", seg, handler); #if !LIBSGXSTEP_SILENT dump_gate(gate, vector); #endif } - void install_user_irq_handler(idt_t *idt, void* asm_handler, int vector) { /* @@ -149,19 +146,54 @@ void install_kernel_irq_handler(idt_t *idt, void *asm_handler, int vector) install_irq_handler(idt, asm_handler, vector, KERNEL_CS, GATE_INTERRUPT); } +void __attribute__((noinline)) trigger_sw_irq(void) +{ + /* + * NOTE: separate C function to make sure caller-save registers are + * properly stored and restored by the compiler. + */ + asm("int %0\n\t" ::"i"(IRQ_PRIV_VECTOR):); +} + +/* + * Executes the provided callback function with ring-0 privileges by installing + * a custom interrupt gate. + * + * NOTE: Calling `exec_priv` may lead to unpredictable system freezes when + * passing larger or complex functions. Keep in mind the following for the + * callback function: + * + * 1. Executes with interrupts disabled, best to keep it short. + * 2. Don't use any system calls (e.g., libc). + * 3. Avoid page-fault exceptions: no illegal accesses and preferably + * `mlock` all code/data pages. + * + * While `exec_priv` may be greatly useful to quickly test out some privileged + * functionality in ring-0 C code without recompiling the kernel, if long-term + * stability is desired it may be best to pass a carefully crafted asm callback + * function. + */ void exec_priv(exec_priv_cb_t cb) { idt_t idt; - if (!sgx_step_irq_gate_cb) + if (!__ss_irq_gate_cb) { - libsgxstep_info("installing and calling ring0 irq gate"); + libsgxstep_info("locking user-space IRQ gate handler page at %p", __ss_irq_gate); + ASSERT( !mlock(__ss_irq_gate, 0x1000) ); + + libsgxstep_info("installing and calling ring-0 IRQ gate"); ASSERT( !claim_cpu(VICTIM_CPU) ); map_idt(&idt); - /* We use a trap gate to make the exec_priv code interruptible. */ - install_irq_handler(&idt, sgx_step_irq_gate_func, IRQ_VECTOR+4, KERNEL_CS, GATE_TRAP); + /* + * In principle, we could use a trap gate to make the exec_priv code + * interruptible, but it seems the Linux kernel does not expect and + * freezes when interrupting ring-0 code. So we use an interrupt gate + * here to make the exec_priv code uninterruptible. + */ + install_irq_handler(&idt, __ss_irq_gate, IRQ_PRIV_VECTOR, KERNEL_CS, GATE_INTERRUPT); free_map(idt.base); } - sgx_step_irq_gate_cb = cb; - asm("int %0\n\t" ::"i"(IRQ_VECTOR+4):); + __ss_irq_gate_cb = cb; + trigger_sw_irq(); } diff --git a/libsgxstep/irq_entry.S b/libsgxstep/irq_entry.S index 4c5e465..05d20ca 100644 --- a/libsgxstep/irq_entry.S +++ b/libsgxstep/irq_entry.S @@ -1,3 +1,16 @@ +/* + * NOTE: code/data in the dedicated `isr_section` will be mapped by libsgxstep + * into the global kernel address range. This ensures that the handler is always + * available across CR3 switches (as the APIC timer may sometimes fire after + * the libsgxstep process has been context switched). + * + * Important considerations when programming APIC interrupt handlers: + * + * 1. All code and global data referenced in the handler should be placed + * in the special `isr_section`. + * 2. All code should be written position-independent. + * + */ /* ********************************************************************** */ .section isr_section,"awx",@progbits .align 0x1000 @@ -46,14 +59,17 @@ __ss_irq_handler: mov __ss_irq_rax(%rip), %rax mov __ss_irq_rdx(%rip), %rdx iretq + /* ********************************************************************** */ -/* don't put the custom user handler in the mapped ISR region (yet)! */ -/* the pointer could point to data outside the ISR region */ -/* TODO this is okay, as IRQ/Call gates always triggered from within the calling process CR3 */ -/* as long as teh irq num is unused and never triggered from outside the process */ +/* + * NOTE: The following handler is only ever supposed to be triggered from + * within the libsgxstep process (CR3) by means of a dedicated software + * interrupt `int` instruction. Hence, we don't have to map it into the global + * kernel virtual address range. + */ .text .align 0x1000 - .global sgx_step_irq_gate_func -sgx_step_irq_gate_func: - call *sgx_step_irq_gate_cb(%rip) + .global __ss_irq_gate +__ss_irq_gate: + call *__ss_irq_gate_cb(%rip) iretq