From 4996de7d5e086f894fa49d2f2b6e35a7d9c44aee Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Wed, 23 Aug 2023 19:25:51 +0200 Subject: [PATCH] elfloader: setup pagetables as needed This change sets up pagetables individually for: - The ELFloader image (Normal memory) - The DTB, whether supplied by EFI, cpio or u-boot (Normal mem) - The UART MMIO range (Strongly-Ordered mem) Thus, it removes the bulk 512 GiB 1:1 mapping that was there before. This resulted in problems, since the kernel image was mapped with Normal memory, but the same physical memory was part of the 1:1 Strongly-Ordered mapping. This fulfills the definition of "Mismatched memory attributes" from the ARM Architecture specification (ARM DDI 0487I.a, section B2.8). Even though I am currently unable to see where there would *occur* such a mismatched access, having such a mapping situation is certainly not desirable and should be avoided. Moreover, it is unclear whether there could arise problems from establishing the (Strongly-ordered) mapping if there is nothing behind a physical address (which is certainly true for some parts of the 512 GiB range). This commit solves the sporadics hangs while booting after the "Enabling MMU and ..." message. Tests on several different Orins (Muc and SJ) show promising results, i.e. no "hangs" occurred anymore. Note: The code in arm_switch_to_hyp_tables() still disables and re-enables both MMU & caches, but there are no memory accesses in between. That section has been engineered to be as short as possible and no memory accesses happen in between. Several barriers and code to invalidate instruction caches have been added, too, in order to be on the safe side. However, tests with just adding *that* code still showed the problem being present. The only change that showed behavior change was the change of translation tables. Thus, this *is* the actual solution to the instability problems. Moreover, we need to support crossing a 1 GiB page for placement of the ELFloader. This is due to the latest firmware on Orin0 in MUC, named "Jetson UEFI firmware (version 4.1-33958178)", which puts our image closely below a 1 GiB boundary. Only for tiny image sizes the boundary will not be crossed. Thus, we do not hard-code the writing of tables, because the logic for doing so while crossing a 1 GiB boundary is too complicated. Instead, we use a fully dynamic approach that walks the pagetables in software for a given VA and inserts missing levels on demand from a preallocated pool of pages. Only the two top-level pagetables are fixed. This allows for re-use of all pagetable code, where we only need to distinguish in one (!) place between hypervisor and non-hyp (or VHE). Signed-off-by: Matthias Rosenfelder --- .../include/arch-arm/64/mode/structures.h | 10 +- elfloader-tool/include/arch-arm/elfloader.h | 15 + elfloader-tool/include/drivers/uart.h | 5 + elfloader-tool/src/arch-arm/64/mmu.c | 441 +++++++++++++++--- elfloader-tool/src/arch-arm/64/structures.c | 7 +- .../src/arch-arm/armv/armv8-a/64/mmu-hyp.S | 27 +- elfloader-tool/src/arch-arm/smp_boot.c | 11 +- elfloader-tool/src/arch-arm/sys_boot.c | 20 +- elfloader-tool/src/drivers/uart/common.c | 3 + 9 files changed, 444 insertions(+), 95 deletions(-) diff --git a/elfloader-tool/include/arch-arm/64/mode/structures.h b/elfloader-tool/include/arch-arm/64/mode/structures.h index aaa8bced..dbc7a49f 100644 --- a/elfloader-tool/include/arch-arm/64/mode/structures.h +++ b/elfloader-tool/include/arch-arm/64/mode/structures.h @@ -6,6 +6,12 @@ #pragma once +/* ARM VMSAv8-64 (with a fully populated last level) has the same number of PTEs + * in all levels (we don't use concatenated pagetables in ELFloader) and each + * table entry is always eight bytes large. + */ +#define BITS_PER_LEVEL (PAGE_BITS - 3) + #define ARM_1GB_BLOCK_BITS 30 #define ARM_2MB_BLOCK_BITS 21 @@ -26,9 +32,5 @@ #define GET_PMD_INDEX(x) (((word_t)(x) >> (ARM_2MB_BLOCK_BITS)) & MASK(PMD_BITS)) extern uint64_t _boot_pgd_up[BIT(PGD_BITS)]; -extern uint64_t _boot_pud_up[BIT(PUD_BITS)]; -extern uint64_t _boot_pmd_up[BIT(PMD_BITS)]; - extern uint64_t _boot_pgd_down[BIT(PGD_BITS)]; -extern uint64_t _boot_pud_down[BIT(PUD_BITS)]; diff --git a/elfloader-tool/include/arch-arm/elfloader.h b/elfloader-tool/include/arch-arm/elfloader.h index 93293a75..ceab4796 100644 --- a/elfloader-tool/include/arch-arm/elfloader.h +++ b/elfloader-tool/include/arch-arm/elfloader.h @@ -22,7 +22,22 @@ typedef void (*init_arm_kernel_t)(word_t ui_p_reg_start, /* Enable the mmu. */ extern void arm_enable_mmu(void); + +/* These functions are very similar however, there are some small differences + * between the ARMv8 and legacy implementation. + * + * New ARMv8 implementation: + * - Does the MMU disabling. This is to keep the time spent with MMU off low. + * - Is only meant if seL4 runs in EL2. + */ +#if defined(CONFIG_ARCH_AARCH64) +/* Switches MMU-related stuff: pagetables, MAIR & TCR etc. Works also if the MMU + * was off initially. EL2 translation regime only. + */ +extern void arm_switch_to_hyp_tables(void); +#else extern void arm_enable_hyp_mmu(void); +#endif /* Setup boot VSpace. */ diff --git a/elfloader-tool/include/drivers/uart.h b/elfloader-tool/include/drivers/uart.h index 1fa9f970..74ce4b16 100644 --- a/elfloader-tool/include/drivers/uart.h +++ b/elfloader-tool/include/drivers/uart.h @@ -6,6 +6,7 @@ #pragma once +#include #include #define dev_get_uart(dev) ((struct elfloader_uart_ops *)(dev->drv->ops)) @@ -16,3 +17,7 @@ struct elfloader_uart_ops { volatile void *uart_get_mmio(void); void uart_set_out(struct elfloader_device *out); +#if defined(CONFIG_ARCH_AARCH64) +/* Implemented in mmu.c */ +void mmu_set_uart_base(volatile void *base); +#endif diff --git a/elfloader-tool/src/arch-arm/64/mmu.c b/elfloader-tool/src/arch-arm/64/mmu.c index e927f3a6..c70b5466 100644 --- a/elfloader-tool/src/arch-arm/64/mmu.c +++ b/elfloader-tool/src/arch-arm/64/mmu.c @@ -11,88 +11,415 @@ #include #include #include +#include #include -#include +#include /* dsb() */ +#include -/* -* Create a "boot" page table, which contains a 1:1 mapping below -* the kernel's first vaddr, and a virtual-to-physical mapping above the -* kernel's first vaddr. -*/ -void init_boot_vspace(struct image_info *kernel_info) +/* Note: "typeof()" is a GCC extension that is supported by Clang, too. */ +#define READ_ONCE(x) (*(const volatile typeof(x) *)&(x)) +#define WRITE_ONCE(var, value) \ + *((volatile typeof(var) *)(&(var))) = (value); + + +//#define DEBUG_PAGETABLES + +#ifndef DEBUG_PAGETABLES +#define dbg_printf(...) /* empty */ +static void dgb_print_2M_mapping_details(const char *map_name UNUSED, + paddr_t pa UNUSED, size_t size UNUSED) {} +#else +#define dbg_printf(...) printf(__VA_ARGS__) + +static int dgb_print_2M_mapping_indices(paddr_t pa) { - word_t i; + return printf("%u.%u.%u.X", + GET_PGD_INDEX(pa), + GET_PUD_INDEX(pa), + GET_PMD_INDEX(pa)); +} - vaddr_t first_vaddr = kernel_info->virt_region_start; - vaddr_t last_vaddr = kernel_info->virt_region_end; - paddr_t first_paddr = kernel_info->phys_region_start; +static void dgb_print_2M_mapping_details(const char *map_name, paddr_t pa, size_t size) +{ + int cnt = 0; + paddr_t pa_start = pa; + size_t orig_sz = size; - _boot_pgd_down[0] = ((uintptr_t)_boot_pud_down) | BIT(1) | BIT(0); /* its a page table */ + pa = ROUND_DOWN(pa, ARM_2MB_BLOCK_BITS); + size += (pa_start - pa); + size = ROUND_UP(size, ARM_2MB_BLOCK_BITS); - for (i = 0; i < BIT(PUD_BITS); i++) { - _boot_pud_down[i] = (i << ARM_1GB_BLOCK_BITS) - | BIT(10) /* access flag */ - | (MT_DEVICE_nGnRnE << 2) /* strongly ordered memory */ - | BIT(0); /* 1G block */ + cnt += dgb_print_2M_mapping_indices(pa); + if (orig_sz) { + while (cnt < 11) { + printf(" "); + cnt++; + } + cnt += printf("--"); + while (cnt < 16) { + printf(" "); + cnt++; + } + cnt += dgb_print_2M_mapping_indices(pa + size - 1); + } + while (cnt < 27) { + printf(" "); + cnt++; } + if (orig_sz) { + printf("PA 0x%lx - 0x%lx (size: %lu MiB): %s\n", pa, pa + size - 1, size / 1024u / 1024, map_name); + } else { + /* No range given, just a single 2 MiB page */ + printf("PA 0x%lx: %s\n", pa, map_name); + } +} +#endif /* DEBUG_PAGETABLES */ - _boot_pgd_up[GET_PGD_INDEX(first_vaddr)] - = ((uintptr_t)_boot_pud_up) | BIT(1) | BIT(0); /* its a page table */ +/* Page allocator. Contains a fixed number of pages. All page-aligned. No returning possible. */ - _boot_pud_up[GET_PUD_INDEX(first_vaddr)] - = ((uintptr_t)_boot_pmd_up) | BIT(1) | BIT(0); /* its a page table */ +#define NUM_PAGES 7 +static char pages[BIT(PAGE_BITS) * NUM_PAGES] ALIGN(BIT(PGD_SIZE_BITS)); +static unsigned page_cnt; - /* We only map in 1 GiB, so check that the kernel doesn't cross 1GiB boundary. */ - if ((first_vaddr & ~MASK(ARM_1GB_BLOCK_BITS)) != (last_vaddr & ~MASK(ARM_1GB_BLOCK_BITS))) { - printf("We only map 1GiB, but kernel vaddr range covers multiple GiB.\n"); - abort(); +static void *get_page(void) +{ + void *ret = NULL; + + if (page_cnt == 0) { + dbg_printf("get_page(): pages @ 0x%p\n", pages); } - for (i = GET_PMD_INDEX(first_vaddr); i < BIT(PMD_BITS); i++) { - _boot_pmd_up[i] = first_paddr - | BIT(10) /* access flag */ + + if (page_cnt < NUM_PAGES) { + ret = &pages[BIT(PAGE_BITS) * page_cnt]; + dbg_printf("get_page(): ret: 0x%p (%u->%u)\n", ret, page_cnt, page_cnt + 1); + page_cnt ++; + } + + return ret; +} + +/* Translate a PA to a VA such that when accessing the VA we end up at that PA. + * Usually done in OS kernels via a physical memory map which has a constant + * virt-to-phys offset. Here this is the same, since either the MMU is off or + * we're running on the identity mapping. + */ +static inline uint64_t pa_to_va(uint64_t pa) +{ + return pa; +} + +static inline uint64_t va_to_pa(uint64_t va) +{ + return va; +} + +typedef uint64_t pte_t; + +/* This can be used to clear unwanted bits from a PA that is supposed to be put + * into a PTE/PDE; or it can be used to extract the PA from a PTE/PDE. + */ +static inline uint64_t mask_pa(uint64_t pa) +{ + /* Mask out the upper 16 bits and lower 12 bits. Only 48-bit OA for now. */ + return (pa & 0x0000FFFFFFFFF000); +} + +static inline uintptr_t pde_to_paddr(uint64_t pde_val) +{ + /* ARM DDI ARM DDI 0487I.a, page D8-5124 */ + return mask_pa(pde_val); +} + +static inline uint64_t make_pde(uintptr_t pa) +{ + /* For now we set all (upper) attributes to zero */ + return (mask_pa(pa) | BIT(1) | BIT(0)); +} + +/* Accept a pointer, otherwise same as make_pde() */ +static inline uint64_t make_pde_from_ptr(pte_t *pagetable_target) +{ + return make_pde(va_to_pa((uintptr_t)pagetable_target)); +} + +/* ARM DDI 0487I.a, section D8.5.2 */ +#define INNER_SHAREABLE 3 +static inline uint64_t make_pte(paddr_t pa, uint8_t mem_attr_index) +{ + /* Note: As per R_PYFVQ from the ARM spec, we can always safely set the + * shareability to inner, even for device-type memory. + */ + return mask_pa(pa) + | BIT(10) /* access flag */ #if CONFIG_MAX_NUM_NODES > 1 - | (3 << 8) /* make sure the shareability is the same as the kernel's */ + | (INNER_SHAREABLE << 8) #endif - | (MT_NORMAL << 2) /* MT_NORMAL memory */ - | BIT(0); /* 2M block */ - first_paddr += BIT(ARM_2MB_BLOCK_BITS); + | (mem_attr_index << 2) + | BIT(0); /* valid page/block mapping */ +} + +static inline _Bool pte_is_valid(pte_t pte) +{ + return (pte & 1); +} + +static inline _Bool pte_is_block(pte_t pte) +{ + return ((pte & 3) == 1); +} + +/* Take care about atomicity */ +static inline void pte_set(pte_t *ptep, pte_t val) +{ + WRITE_ONCE(*ptep, val); +} + +static inline pte_t pte_get(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} + +static_assert(PGD_BITS == BITS_PER_LEVEL, "Mismatch in expected pagetable size"); +static_assert(PUD_BITS == BITS_PER_LEVEL, "Mismatch in expected pagetable size"); +static_assert(PMD_BITS == BITS_PER_LEVEL, "Mismatch in expected pagetable size"); +/* ARM VMSAv8-64: Each table entry is always eight bytes large */ +static_assert(PAGE_BITS == (BITS_PER_LEVEL + 3), "Mismatch in expected page size"); + +/* A valid PA can be maximum 48 or 52 bit large, so upper bits are always zero */ +#define INVALID_PA ((uint64_t)-1) +static paddr_t walk_pagetables(vaddr_t va, uint64_t *l0_table, + unsigned *level, pte_t **fault_pde) +{ + paddr_t ret = INVALID_PA; + /* All levels have the same size and therefore number of index bits + * (9 for 4kiB Translation Granule) on ARMv8. + */ + uint64_t index_mask_bits = PGD_BITS + PUD_BITS + PMD_BITS + PAGE_BITS; + uint64_t *tbl = l0_table; + + unsigned idx, lvl; + paddr_t pa; + pte_t pte; + + /* Walk up to four levels */ + for (lvl = 0; lvl <= 3; lvl++) { + idx = (va >> index_mask_bits) & MASK(BITS_PER_LEVEL); + pte = pte_get(&tbl[idx]); + + if (!pte_is_valid(pte)) { + goto err_out; + } else if (pte_is_block(pte)) { + /* L0 giant pages (512 GiB) are not allowed by the architecture for + * 4kiB Granule size and 48 bit OA. We don't support 52 bit OA. + */ + if (lvl == 0) { + goto err_out; + } + break; + } + if (lvl == 3) { + /* ARM DDI 0487I.a, page D8-5126 (I_WYRBP), D8-5131 (I_VKPKF): + * If the PTE in the last level is valid, it is interpreted as a page + * table, irrespectively of bit 1. This allows for the "loopback + * trick" - described in every (good) OS lecture at university :-) + * Other architectures like RISC-V have screwed this up with their + * pagetable format. + */ + break; + } + /* We have a table descriptor. Descent to the next lower level */ + pa = pde_to_paddr(pte); + vaddr_t va_next = pa_to_va(pa); + tbl = (uint64_t *)va_next; + + index_mask_bits -= BITS_PER_LEVEL; } - /* Architecturally required barrier to make all writes to pagetable memories - * visible to the pagetable walker. See ARM DDI 0487I.a, section D8.2.6. + ret = (pa | (va & (MASK(index_mask_bits)))); + +err_out: + *level = lvl; + *fault_pde = &tbl[idx]; + return ret; +} + +/* Returns NULL if there is already something mappped at the requested VA. Fills + * in page tables if needed until the desired level is reached. + */ +static pte_t *fill_pt_tree(vaddr_t va, uint64_t *l0_table, unsigned target_lvl) +{ + paddr_t pa; + unsigned lvl; + pte_t *fault_pde; + + pa = walk_pagetables(va, l0_table, &lvl, &fault_pde); + + while ((lvl < target_lvl) && (pa == INVALID_PA)) { + /* fault_pde points to the entry to write. Add a new pagetable */ + pte_set(fault_pde, make_pde_from_ptr(get_page())); + + pa = walk_pagetables(va, l0_table, &lvl, &fault_pde); + } + + if ((lvl == target_lvl) && fault_pde && !pte_is_valid(pte_get(fault_pde))) { + return fault_pde; + } + return NULL; +} + +extern char _text[]; +extern char _end[]; + +extern size_t dtb_size; + +static inline void clean_inval_cl(void *addr) +{ + asm volatile("dc civac, %0\n\t" :: "r"(addr)); +} + +static void clean_inval_pagetables(void) +{ + dsb(); + /* Whole image for now; EFI case: Maybe our image is loaded on the boot + * CPU with caches enabled (and still being dirty), but the secondary CPUs + * start with caches disabled. Further, assume CL size is >= 64 Bytes. + * Maybe this is too cautious. Can we relax this? */ + for (vaddr_t va = (vaddr_t)_text; va < (vaddr_t)(_end); va += 64) { + clean_inval_cl((void *)va); + } dsb(); } -void init_hyp_boot_vspace(struct image_info *kernel_info) +static void map_uart(paddr_t base) +{ + pte_t *pte; + + base = ROUND_DOWN(base, ARM_2MB_BLOCK_BITS); + pte = fill_pt_tree(base, _boot_pgd_down, 2); + if (pte) { + pte_set(pte, make_pte(base, MT_DEVICE_nGnRnE)); + } else { + printf("Unable to map the UART at PA 0x%lx\n", base); + abort(); + } + dbg_printf("Done mapping UART at PA: 0x%lx\n", base); +} + + +static paddr_t uart_base_mmio; +void mmu_set_uart_base(volatile void *base) +{ + uart_base_mmio = (paddr_t)base; +} + +/* + * Create a "boot" page table, which contains a 1:1 mapping for the ELFloader and + * the DTB. Moreover create a mapping for the kernel image at the desired VA with the + * physical memory that was used when extracting the kernel from the elfloader + * image previously. + */ +static void init_boot_vspace_impl(const struct image_info *kernel_info, _Bool has_one_va_range) { - word_t i; - word_t pmd_index; + /* We may be running with MMU & caches off. Before we write new values + * make sure to clean & invalidate all previous data in those locations. + */ + clean_inval_pagetables(); + + /* Map UART, using strongly ordered memory; one 2 MiB page; 1:1 VA/PA */ + paddr_t uart_base = ROUND_DOWN(uart_base_mmio, ARM_2MB_BLOCK_BITS); + map_uart(uart_base); + + /* Map Elfloader image, using NORMAL memory; 1:1 VA/PA */ + paddr_t start_paddr = ROUND_DOWN(((paddr_t)_text), ARM_2MB_BLOCK_BITS); + paddr_t end_paddr = ROUND_UP(((paddr_t)_end), ARM_2MB_BLOCK_BITS); + + for (paddr_t pa = start_paddr; pa < end_paddr; pa += BIT(ARM_2MB_BLOCK_BITS)) { + pte_t *pte = fill_pt_tree(pa, _boot_pgd_down, 2); + if (pte) { + pte_set(pte, make_pte(pa, MT_NORMAL)); + } else { + printf("Unable to map ELFloader at PA: 0x%lx\n", pa); + abort(); + } + dbg_printf("Map Elfloader PA: 0x%lx\n", pa); + } + dbg_printf("Done mapping Elfloader\n"); + + paddr_t dtb_map_start, dtb_map_end; + if (dtb && (dtb_size > 0)) { + /* Device Tree Blob (DTB): + * An UEFI-supplied DTB lies outside of the image memory => Add mapping. + * For other DTBs the ELFloader of course saves the *target* address of + * the copied DTB in "dtb". + * So we also need to add a mapping here in those cases. + */ + paddr_t dtb_end = (paddr_t)dtb + dtb_size; + + dtb_map_start = ROUND_DOWN((paddr_t)dtb, ARM_2MB_BLOCK_BITS); + dtb_map_end = ROUND_UP(dtb_end, ARM_2MB_BLOCK_BITS); + for (paddr_t pa = dtb_map_start; pa < dtb_map_end; pa += BIT(ARM_2MB_BLOCK_BITS)) { + pte_t *pte = fill_pt_tree(pa, _boot_pgd_down, 2); + if (pte) { + pte_set(pte, make_pte(pa, MT_NORMAL)); + } else { + printf("Unable to map DTB at PA: 0x%lx\n", pa); + } + dbg_printf("Map DTB PA: 0x%lx\n", pa); + } + dbg_printf("Done mapping DTB\n"); + } + + /* Map the kernel */ vaddr_t first_vaddr = kernel_info->virt_region_start; + vaddr_t last_vaddr = kernel_info->virt_region_end; paddr_t first_paddr = kernel_info->phys_region_start; - _boot_pgd_down[0] = ((uintptr_t)_boot_pud_down) | BIT(1) | BIT(0); - for (i = 0; i < BIT(PUD_BITS); i++) { - _boot_pud_down[i] = (i << ARM_1GB_BLOCK_BITS) - | BIT(10) /* access flag */ - | (MT_DEVICE_nGnRnE << 2) /* strongly ordered memory */ - | BIT(0); /* 1G block */ + uint64_t *l0_table = has_one_va_range ? _boot_pgd_down : _boot_pgd_up; + paddr_t pa = first_paddr; + for (vaddr_t va = first_vaddr; va < last_vaddr; + va += BIT(ARM_2MB_BLOCK_BITS), + pa += BIT(ARM_2MB_BLOCK_BITS)) { + + pte_t *pte = fill_pt_tree(va, l0_table, 2); + if (pte) { + pte_set(pte, make_pte(pa, MT_NORMAL)); + } else { + printf("Unable to map kernel at VA/PA: 0x%lx / 0x%lx\n", va, pa); + } + dbg_printf("Map kernel VA -> PA: 0x%lx -> 0x%lx\n", va, pa); } + dbg_printf("Done mapping kernel\n"); - _boot_pgd_down[GET_PGD_INDEX(first_vaddr)] - = ((uintptr_t)_boot_pud_up) | BIT(1) | BIT(0); /* its a page table */ + dbg_printf("Mapping indices:\n"); + dgb_print_2M_mapping_details("UART", uart_base, /* one 2 MiB page */ 2u * 1024 * 1024); + dgb_print_2M_mapping_details("ELFloader image", (paddr_t)_text, (paddr_t)_end - (paddr_t)_text); + if (dtb && (dtb_size > 0)) { + dgb_print_2M_mapping_details("dtb", dtb_map_start, dtb_map_end - dtb_map_start - 1); + } - _boot_pud_up[GET_PUD_INDEX(first_vaddr)] - = ((uintptr_t)_boot_pmd_up) | BIT(1) | BIT(0); /* its a page table */ - pmd_index = GET_PMD_INDEX(first_vaddr); - for (i = pmd_index; i < BIT(PMD_BITS); i++) { - _boot_pmd_up[i] = (((i - pmd_index) << ARM_2MB_BLOCK_BITS) + first_paddr) - | BIT(10) /* access flag */ -#if CONFIG_MAX_NUM_NODES > 1 - | (3 << 8) -#endif - | (MT_NORMAL << 2) /* MT_NORMAL memory */ - | BIT(0); /* 2M block */ - } + /* Architecturally required barrier to make all writes to pagetable memories + * visible to the pagetable walker. See ARM DDI 0487I.a, section D8.2.6. + */ + dsb(); + + /* Maintenance again, just to be sure. This is only necessary for the secondary + * CPUs; they may come up with caches & MMU disabled. What they should usually + * do is enable caches & MMU together! The following code is only necessary + * if they enable ONLY the MMU first and after that they enable the cache. + * That would be totally ... well ... suboptimal, but we play "better safe + * than sorry" here. + */ + clean_inval_pagetables(); +} + +void init_boot_vspace(struct image_info *kernel_info) +{ + init_boot_vspace_impl(kernel_info, 0); +} + +void init_hyp_boot_vspace(struct image_info *kernel_info) +{ + init_boot_vspace_impl(kernel_info, 1); } diff --git a/elfloader-tool/src/arch-arm/64/structures.c b/elfloader-tool/src/arch-arm/64/structures.c index 654fc7dc..e24680f4 100644 --- a/elfloader-tool/src/arch-arm/64/structures.c +++ b/elfloader-tool/src/arch-arm/64/structures.c @@ -8,11 +8,6 @@ #include #include -/* Paging structures for kernel mapping */ +/* Top-level paging structures for kernel and identity mapping */ uint64_t _boot_pgd_up[BIT(PGD_BITS)] ALIGN(BIT(PGD_SIZE_BITS)); -uint64_t _boot_pud_up[BIT(PUD_BITS)] ALIGN(BIT(PUD_SIZE_BITS)); -uint64_t _boot_pmd_up[BIT(PMD_BITS)] ALIGN(BIT(PMD_SIZE_BITS)); - -/* Paging structures for identity mapping */ uint64_t _boot_pgd_down[BIT(PGD_BITS)] ALIGN(BIT(PGD_SIZE_BITS)); -uint64_t _boot_pud_down[BIT(PUD_BITS)] ALIGN(BIT(PUD_SIZE_BITS)); diff --git a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S index 8b9fd216..eb091f81 100644 --- a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S +++ b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S @@ -21,12 +21,6 @@ .extern invalidate_icache .extern _boot_pgd_down -BEGIN_FUNC(disable_mmu_caches_hyp) - /* Assume D-cache already cleaned to PoC */ - disable_mmu sctlr_el2, x9 - ret -END_FUNC(disable_mmu_caches_hyp) - BEGIN_FUNC(clean_dcache_by_range) /* Ordering needed for strongly-ordered mem, not needed for NORMAL mem. * See ARM DDI 0487I.a, page D7-5063. @@ -76,7 +70,10 @@ BEGIN_FUNC(leave_hyp) eret END_FUNC(leave_hyp) -BEGIN_FUNC(arm_enable_hyp_mmu) +BEGIN_FUNC(arm_switch_to_hyp_tables) + /* Load MAIR & TCR values; construct TTBR address before disabling and re- + * enabling the MMU & caches. + */ /* * DEVICE_nGnRnE 000 00000000 * DEVICE_nGnRE 001 00000100 @@ -91,16 +88,28 @@ BEGIN_FUNC(arm_enable_hyp_mmu) MAIR(0x44, MT_NORMAL_NC) | \ MAIR(0xff, MT_NORMAL) | \ MAIR(0xaa, MT_NORMAL_WT) - msr mair_el2, x5 + ldr x8, =TCR_T0SZ(48) | TCR_IRGN0_WBWC | TCR_ORGN0_WBWC | TCR_SH0_ISH | TCR_TG0_4K | TCR_PS | TCR_EL2_RES1 + + /* Use x16 as temp register */ + disable_mmu sctlr_el2, x16 + + msr mair_el2, x5 msr tcr_el2, x8 isb + /* For non-VHE the "down" contains both the the kernel mapping and 1:1 mapping. */ adrp x8, _boot_pgd_down msr ttbr0_el2, x8 isb + /* Invalidate TLBs */ + dsb sy tlbi alle2is + tlbi vmalls12e1 + dsb sy + + tlbi vmalle1is dsb ish isb @@ -113,4 +122,4 @@ BEGIN_FUNC(arm_enable_hyp_mmu) /* NOTE: enable_mmu already contains an isb after enabling. */ ret -END_FUNC(arm_enable_hyp_mmu) +END_FUNC(arm_switch_to_hyp_tables) diff --git a/elfloader-tool/src/arch-arm/smp_boot.c b/elfloader-tool/src/arch-arm/smp_boot.c index 704d90d2..f795f3a7 100644 --- a/elfloader-tool/src/arch-arm/smp_boot.c +++ b/elfloader-tool/src/arch-arm/smp_boot.c @@ -51,20 +51,17 @@ void non_boot_main(void) if (is_hyp_mode()) { extern void leave_hyp(void); - extern void disable_mmu_caches_hyp(void); -#ifdef CONFIG_ARCH_AARCH64 - /* Disable the MMU and cacheability unconditionally on ARM64. - * The 32 bit ARM platforms do not expect the MMU to be turned - * off, so we leave them alone. */ - disable_mmu_caches_hyp(); -#endif #ifndef CONFIG_ARM_HYPERVISOR_SUPPORT leave_hyp(); #endif } /* Enable the MMU, and enter the kernel. */ if (is_hyp_mode()) { +#if defined(CONFIG_ARCH_AARCH64) + arm_switch_to_hyp_tables(); +#else arm_enable_hyp_mmu(); +#endif } else { arm_enable_mmu(); } diff --git a/elfloader-tool/src/arch-arm/sys_boot.c b/elfloader-tool/src/arch-arm/sys_boot.c index f63e9824..5b49d2a5 100644 --- a/elfloader-tool/src/arch-arm/sys_boot.c +++ b/elfloader-tool/src/arch-arm/sys_boot.c @@ -191,34 +191,31 @@ void continue_boot(int was_relocated) init_boot_vspace(&kernel_info); #endif - /* If in EL2, disable MMU and I/D cacheability unconditionally */ if (is_hyp_mode()) { - extern void disable_mmu_caches_hyp(void); extern void clean_dcache_by_range(paddr_t start, paddr_t end); paddr_t start = kernel_info.phys_region_start; paddr_t end = kernel_info.phys_region_end; clean_dcache_by_range(start, end); + start = (paddr_t)user_info.phys_region_start; end = (paddr_t)user_info.phys_region_end; clean_dcache_by_range(start, end); + start = (paddr_t)_text; end = (paddr_t)_end; clean_dcache_by_range(start, end); + if (dtb) { start = (paddr_t)dtb; end = start + dtb_size; clean_dcache_by_range(start, end); } -#if defined(CONFIG_ARCH_AARCH64) - /* Disable the MMU and cacheability unconditionally on ARM64. - * The 32 bit ARM platforms do not expect the MMU to be turned - * off, so we leave them alone. */ - disable_mmu_caches_hyp(); -#endif - -#if (defined(CONFIG_ARCH_ARM_V7A) || defined(CONFIG_ARCH_ARM_V8A)) && !defined(CONFIG_ARM_HYPERVISOR_SUPPORT) +#if defined(CONFIG_ARM_HYPERVISOR_SUPPORT) + printf("Switch to hypervisor mapping\n"); + arm_switch_to_hyp_tables(); +#else extern void leave_hyp(void); /* Switch to EL1, assume EL2 MMU already disabled for ARMv8. */ leave_hyp(); @@ -230,8 +227,7 @@ void continue_boot(int was_relocated) #endif /* CONFIG_MAX_NUM_NODES */ if (is_hyp_mode()) { - printf("Enabling hypervisor MMU and paging\n"); - arm_enable_hyp_mmu(); + /* Nothing to be done here, we already switched above */ } else { printf("Enabling MMU and paging\n"); arm_enable_mmu(); diff --git a/elfloader-tool/src/drivers/uart/common.c b/elfloader-tool/src/drivers/uart/common.c index a5c914ff..b6ec30af 100644 --- a/elfloader-tool/src/drivers/uart/common.c +++ b/elfloader-tool/src/drivers/uart/common.c @@ -18,6 +18,9 @@ void uart_set_out(struct elfloader_device *out) return; } uart_out = out; +#if defined(CONFIG_ARCH_AARCH64) + mmu_set_uart_base(out->region_bases[0]); +#endif } volatile void *uart_get_mmio(void)