From a12093a7b5590de006cbaeda09e4faf326398c5c Mon Sep 17 00:00:00 2001 From: Axel Heider Date: Thu, 11 Nov 2021 03:59:23 +0100 Subject: [PATCH] elfloader/risc-v: fix multicore hart starting - stop wrong boot hart, it will be started alter again. - abort if a secondary hart cannot be started. - check hsm_exists only once. - log message if HSM extension is missing. - improve comments. Signed-off-by: Axel Heider --- elfloader-tool/include/arch-riscv/sbi.h | 60 ++++++++++++++------- elfloader-tool/src/arch-riscv/boot.c | 70 +++++++++++++++++++------ elfloader-tool/src/arch-riscv/crt0.S | 14 ++--- 3 files changed, 100 insertions(+), 44 deletions(-) diff --git a/elfloader-tool/include/arch-riscv/sbi.h b/elfloader-tool/include/arch-riscv/sbi.h index 9c87057d..3af5f6b5 100644 --- a/elfloader-tool/include/arch-riscv/sbi.h +++ b/elfloader-tool/include/arch-riscv/sbi.h @@ -30,24 +30,43 @@ a0; \ }) +typedef enum { + SBI_SUCCESS = 0, + SBI_ERR_FAILED = -1, + SBI_ERR_NOT_SUPPORTED = -2, + SBI_ERR_INVALID_PARAM = -3, + SBI_ERR_DENIED = -4, + SBI_ERR_INVALID_ADDRESS = -5, + SBI_ERR_ALREADY_AVAILABLE = -6, + SBI_ERR_ALREADY_STARTED = -7, + SBI_ERR_ALREADY_STOPPED = -8 +} sbi_call_ret_t; + #define SBI_HSM 0x48534DULL #define SBI_HSM_HART_START 0 -#define SBI_EXT_CALL(extension, which, arg0, arg1, arg2) ({ \ - register word_t a0 asm ("a0") = (word_t)(arg0); \ - register word_t a1 asm ("a1") = (word_t)(arg1); \ - register word_t a2 asm ("a2") = (word_t)(arg2); \ - register word_t a6 asm ("a6") = (word_t)(which); \ - register word_t a7 asm ("a7") = (word_t)(extension); \ - asm volatile ("ecall" \ - : "+r" (a0) \ - : "r" (a1), "r" (a2), "r" (a6), "r" (a7) \ - : "memory"); \ - a0; \ -}) - -#define SBI_HSM_CALL(which, arg0, arg1, arg2) \ - SBI_EXT_CALL(SBI_HSM, (which), (arg0), (arg1), (arg2)) +typedef struct { + sbi_call_ret_t code; + word_t data; +} sbi_hsm_ret_t; + +#define SBI_EXT_CALL(extension, which, arg0, arg1, arg2, var_sbi_hsm_ret) \ + do { \ + register word_t a0 asm ("a0") = (word_t)(arg0); \ + register word_t a1 asm ("a1") = (word_t)(arg1); \ + register word_t a2 asm ("a2") = (word_t)(arg2); \ + register word_t a6 asm ("a6") = (word_t)(which); \ + register word_t a7 asm ("a7") = (word_t)(extension); \ + asm volatile ("ecall" \ + : "+r" (a0), "+r" (a1) \ + : "r" (a2), "r" (a6), "r" (a7) \ + : "memory"); \ + (var_sbi_hsm_ret).code = a0; \ + (var_sbi_hsm_ret).data = a1; \ + } while(0) + +#define SBI_HSM_CALL(which, arg0, arg1, arg2, var_sbi_hsm_ret) \ + SBI_EXT_CALL(SBI_HSM, (which), (arg0), (arg1), (arg2), var_sbi_hsm_ret) /* Lazy implementations until SBI is finalized */ #define SBI_CALL_0(which) SBI_CALL(which, 0, 0, 0) @@ -111,9 +130,12 @@ static inline void sbi_remote_sfence_vma_asid(const word_t *hart_mask, SBI_CALL_1(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask); } -static inline void sbi_hart_start(const word_t hart_id, - void (*start)(word_t hart_id, word_t arg), - word_t arg) +static inline sbi_hsm_ret_t sbi_hart_start(const word_t hart_id, + void (*start)(word_t hart_id, + word_t arg), + word_t arg) { - SBI_HSM_CALL(SBI_HSM_HART_START, hart_id, start, arg); + sbi_hsm_ret_t ret = { 0 }; + SBI_HSM_CALL(SBI_HSM_HART_START, hart_id, start, arg, ret); + return ret; } diff --git a/elfloader-tool/src/arch-riscv/boot.c b/elfloader-tool/src/arch-riscv/boot.c index 872dac7a..ff851f84 100644 --- a/elfloader-tool/src/arch-riscv/boot.c +++ b/elfloader-tool/src/arch-riscv/boot.c @@ -171,6 +171,7 @@ int hsm_exists = 0; /* assembly startup code will initialise this */ #if CONFIG_MAX_NUM_NODES > 1 extern void secondary_harts(word_t hart_id, word_t core_id); +extern void hsm_entry_on_secondary_hart(void); int secondary_go = 0; int next_logical_core_id = 1; /* incremented by assembly code */ @@ -211,6 +212,58 @@ static int is_core_ready(int core_id) return (0 != __atomic_load_n(&core_ready[core_id], __ATOMIC_RELAXED)); } +static void start_secondary_harts(word_t primary_hart_id) +{ + /* Take the multicore lock first, then start all secondary cores. This + * ensures the boot process on the primary core can continue without running + * into concurrency issues, until things can really run in parallel. The + * main use case for this currently is printing nicely serialized boot + * messages, + */ + acquire_multicore_lock(); + set_secondary_cores_go(); + /* Start all cores */ + if (!hsm_exists) { + /* Without the HSM extension, we can't start the cores explicitly. But + * they might be running already, so we do nothing here and just hope + * things work out. If the secondary cores don't start we are stuck. + */ + printf("no HSM extension, let's hope secondary cores have been started\n"); + return; + } + + /* If we are running on a platform with SBI HSM extension support, no other + * hart is running. The system starts harts in a random hart, but the + * assembly startup code has done the migration to the designated primary + * hart already and stopped the others. The global variable logical_core_id + * must still be untouched here, otherwise something is badly wrong. + */ + if (1 != next_logical_core_id) { + printf("ERROR: logical core IDs have been assigned already\n"); + abort(); + UNREACHABLE(); + } + /* Start all harts */ + for (int i = 0; i < CONFIG_MAX_NUM_NODES; i++) { + word_t remote_hart_id = i + 1; /* hart IDs start at 1 */ + if (remote_hart_id == CONFIG_FIRST_HART_ID) { + assert(remote_hart_id == primary_hart_id); + continue; /* this is the current hart */ + } + /* Start secondary hart, there is nothing to pass as custom + * parameter thus it's 0. + */ + sbi_hsm_ret_t ret = sbi_hart_start(remote_hart_id, + hsm_entry_on_secondary_hart, + 0); + if (SBI_SUCCESS != ret.code) { + printf("ERROR: could not start hart %"PRIu_word", failure" + " (%d, %d)\n", remote_hart_id, ret.code, ret.data); + abort(); + UNREACHABLE(); + } + } +} #endif /* CONFIG_MAX_NUM_NODES > 1 */ @@ -332,22 +385,7 @@ void main(word_t hart_id, void *bootloader_dtb) } #if CONFIG_MAX_NUM_NODES > 1 - /* Take the multicore lock first, then start all secondary cores. This - * ensures the boot process on the primary core can continue without running - * into concurrency issues, until things can really run in parallel. The - * main use case for this currently is printing nicely serialized boot - * messages, - */ - acquire_multicore_lock(); - set_secondary_cores_go(); - word_t i = 0; - while (i < CONFIG_MAX_NUM_NODES && hsm_exists) { - i++; - if (i != hart_id) { - sbi_hart_start(i, secondary_harts, i); - } - } -} + start_secondary_harts(hart_id); #endif /* CONFIG_MAX_NUM_NODES > 1 */ boot_hart(hart_id, 0); diff --git a/elfloader-tool/src/arch-riscv/crt0.S b/elfloader-tool/src/arch-riscv/crt0.S index 105b88be..3169f2d4 100644 --- a/elfloader-tool/src/arch-riscv/crt0.S +++ b/elfloader-tool/src/arch-riscv/crt0.S @@ -101,12 +101,8 @@ hsm_switch_hart: mv a2, s2 /* dtb address to be passed in a1 when new hart starts is 3rd parameter */ la a1, _start1 /* where to start the hart */ ecall /* call SBI to start hart FIRST_HART_ID */ - - /* Since we are not the designated primary hart, continue the boot process as - * secondary hart - */ - mv a0, s0 /* restore a0 to hold hart ID passed by OpenSBI */ - j secondary_harts + /* Stop current hart, the boot code may bring it up again when needed. */ + j hsm_suspend_hart /*----------------------------------------------------------------------------*/ _start1: @@ -117,7 +113,7 @@ _start1: * All that is left to be done here is setting up the registers gp and sp to * have a proper C environment. The hart we are running on now could be a * different HART to the one that we have been on in _start. The original hart - * we came from will get a different stack in secondary_harts. + * we came from will get a different stack in hsm_entry_on_secondary_hart. */ .option push .option norelax @@ -130,8 +126,8 @@ _start1: jr s0 /*----------------------------------------------------------------------------*/ -.global secondary_harts -secondary_harts: +.global hsm_entry_on_secondary_hart +hsm_entry_on_secondary_hart: .option push .option norelax