diff --git a/plat/arm/board/arm_fpga/aarch64/fpga_helpers.S b/plat/arm/board/arm_fpga/aarch64/fpga_helpers.S index aeed310..20120c9 100644 --- a/plat/arm/board/arm_fpga/aarch64/fpga_helpers.S +++ b/plat/arm/board/arm_fpga/aarch64/fpga_helpers.S @@ -7,6 +7,8 @@ #include #include #include +#include "../fpga_private.h" + #include .globl plat_get_my_entrypoint @@ -14,10 +16,10 @@ .globl plat_is_my_cpu_primary .globl platform_mem_init .globl plat_my_core_pos - .globl plat_fpga_calc_core_pos .globl plat_crash_console_init .globl plat_crash_console_putc .globl plat_crash_console_flush + .globl plat_fpga_calc_core_pos /* ----------------------------------------------------------------------- * Indicate a cold boot for every CPU - warm boot is unsupported for the @@ -34,23 +36,59 @@ * ----------------------------------------------------------------------- */ func plat_secondary_cold_boot_setup + + /* + * Wait for the primary processor to initialise the .BSS segment + * to avoid a race condition that would erase fpga_valid_mpids + * if it is populated before the C runtime is ready. + * + * We cannot use the current spin-lock implementation until the + * runtime is up and we should not rely on sevl/wfe instructions as + * it is optional whether they are implemented or not, so we use + * a global variable as lock and wait for the primary processor to + * finish the C runtime bring-up. + */ + + ldr w0, =C_RUNTIME_READY_KEY + adrp x1, secondary_core_spinlock + add x1, x1, :lo12:secondary_core_spinlock +1: + wfe + ldr w2, [x1] + cmp w2, w0 + b.ne 1b + /* Prevent reordering of the store into fpga_valid_mpids below */ + dmb ish + + mov x10, x30 + bl plat_my_core_pos + mov x30, x10 + + adrp x4, fpga_valid_mpids + add x4, x4, :lo12:fpga_valid_mpids + mov x5, #VALID_MPID + strb w5, [x4, x0] + /* * Poll the CPU's hold entry until it indicates to jump * to the entrypoint address. */ - bl plat_my_core_pos - lsl x0, x0, #PLAT_FPGA_HOLD_ENTRY_SHIFT - ldr x1, =hold_base - ldr x2, =fpga_sec_entrypoint + + adrp x1, hold_base + add x1, x1, :lo12:hold_base poll_hold_entry: - ldr x3, [x1, x0] + ldr x3, [x1, x0, LSL #PLAT_FPGA_HOLD_ENTRY_SHIFT] cmp x3, #PLAT_FPGA_HOLD_STATE_GO b.ne 1f + + adrp x2, fpga_sec_entrypoint + add x2, x2, :lo12:fpga_sec_entrypoint ldr x3, [x2] br x3 1: wfe b poll_hold_entry + endfunc plat_secondary_cold_boot_setup /* ----------------------------------------------------------------------- @@ -73,12 +111,16 @@ endfunc platform_mem_init func plat_my_core_pos + ldr x1, =(MPID_MASK & ~(MPIDR_AFFLVL_MASK << MPIDR_AFF3_SHIFT)) mrs x0, mpidr_el1 + and x0, x0, x1 b plat_fpga_calc_core_pos + endfunc plat_my_core_pos /* ----------------------------------------------------------------------- - * unsigned int plat_fpga_calc_core_pos(u_register_t mpidr) + * unsigned int plat_fpga_calc_core_pos (uint32_t mpid) + * Clobber registers: x0 to x5 * ----------------------------------------------------------------------- */ func plat_fpga_calc_core_pos @@ -88,6 +130,7 @@ * * If not set, shift MPIDR to left to make it look as if in a * multi-threaded implementation. + * */ tst x0, #MPIDR_MT_MASK lsl x3, x0, #MPIDR_AFFINITY_BITS @@ -98,11 +141,13 @@ ubfx x1, x3, #MPIDR_AFF1_SHIFT, #MPIDR_AFFINITY_BITS ubfx x2, x3, #MPIDR_AFF2_SHIFT, #MPIDR_AFFINITY_BITS - /* Compute linear position */ mov x4, #FPGA_MAX_CPUS_PER_CLUSTER - madd x1, x2, x4, x1 mov x5, #FPGA_MAX_PE_PER_CPU + + /* Compute linear position */ + madd x1, x2, x4, x1 madd x0, x1, x5, x0 + ret endfunc plat_fpga_calc_core_pos diff --git a/plat/arm/board/arm_fpga/fpga_bl31_setup.c b/plat/arm/board/arm_fpga/fpga_bl31_setup.c index e4b9767..6eeff45 100644 --- a/plat/arm/board/arm_fpga/fpga_bl31_setup.c +++ b/plat/arm/board/arm_fpga/fpga_bl31_setup.c @@ -7,23 +7,23 @@ #include #include +#include #include -#include #include +#include "fpga_private.h" #include #include -#include "fpga_private.h" - static entry_point_info_t bl33_image_ep_info; +volatile uint32_t secondary_core_spinlock; uintptr_t plat_get_ns_image_entrypoint(void) { #ifdef PRELOADED_BL33_BASE return PRELOADED_BL33_BASE; #else - return 0; + return 0ULL; #endif } @@ -35,6 +35,17 @@ void bl31_early_platform_setup2(u_register_t arg0, u_register_t arg1, u_register_t arg2, u_register_t arg3) { + /* Add this core to the VALID mpids list */ + fpga_valid_mpids[plat_my_core_pos()] = VALID_MPID; + + /* + * Notify the secondary CPUs that the C runtime is ready + * so they can announce themselves. + */ + secondary_core_spinlock = C_RUNTIME_READY_KEY; + dsbish(); + sev(); + fpga_console_init(); bl33_image_ep_info.pc = plat_get_ns_image_entrypoint(); @@ -54,11 +65,37 @@ void bl31_platform_setup(void) { - /* Initialize the GIC driver, cpu and distributor interfaces */ - plat_fpga_gic_init(); - /* Write frequency to CNTCRL and initialize timer */ generic_delay_timer_init(); + + /* + * Before doing anything else, wait for some time to ensure that + * the secondary CPUs have populated the fpga_valid_mpids array. + * As the number of secondary cores is unknown and can even be 0, + * it is not possible to rely on any signal from them, so use a + * delay instead. + */ + mdelay(5); + + /* + * On the event of a cold reset issued by, for instance, a reset pin + * assertion, we cannot guarantee memory to be initialized to zero. + * In such scenario, if the secondary cores reached + * plat_secondary_cold_boot_setup before the primary one initialized + * .BSS, we could end up having a race condition if the spinlock + * was not cleared before. + * + * Similarly, if there were a reset before the spinlock had been + * cleared, the secondary cores would find the lock opened before + * .BSS is cleared, causing another race condition. + * + * So clean the spinlock as soon as we think it is safe to reduce the + * chances of any race condition on a reset. + */ + secondary_core_spinlock = 0UL; + + /* Initialize the GIC driver, cpu and distributor interfaces */ + plat_fpga_gic_init(); } entry_point_info_t *bl31_plat_get_next_image_ep_info(uint32_t type) diff --git a/plat/arm/board/arm_fpga/fpga_def.h b/plat/arm/board/arm_fpga/fpga_def.h index 5f1951f..2884ea6 100644 --- a/plat/arm/board/arm_fpga/fpga_def.h +++ b/plat/arm/board/arm_fpga/fpga_def.h @@ -18,6 +18,7 @@ * that are present will still be indexed appropriately regardless of any empty * entries in the array used to represent the topology. */ + #define FPGA_MAX_CLUSTER_COUNT 4 #define FPGA_MAX_CPUS_PER_CLUSTER 8 #define FPGA_MAX_PE_PER_CPU 4 diff --git a/plat/arm/board/arm_fpga/fpga_private.h b/plat/arm/board/arm_fpga/fpga_private.h index 7545bd1..46287ad 100644 --- a/plat/arm/board/arm_fpga/fpga_private.h +++ b/plat/arm/board/arm_fpga/fpga_private.h @@ -7,12 +7,23 @@ #ifndef FPGA_PRIVATE_H #define FPGA_PRIVATE_H -unsigned int plat_fpga_calc_core_pos(u_register_t mpidr); +#include "../fpga_def.h" +#include + +#define C_RUNTIME_READY_KEY (0xaa55aa55) +#define VALID_MPID (1U) + +#ifndef __ASSEMBLER__ + +extern unsigned char fpga_valid_mpids[PLATFORM_CORE_COUNT]; void fpga_console_init(void); void plat_fpga_gic_init(void); void fpga_pwr_gic_on_finish(void); void fpga_pwr_gic_off(void); +unsigned int plat_fpga_calc_core_pos(uint32_t mpid); -#endif +#endif /* __ASSEMBLER__ */ + +#endif /* FPGA_PRIVATE_H */ diff --git a/plat/arm/board/arm_fpga/fpga_topology.c b/plat/arm/board/arm_fpga/fpga_topology.c index a2908d7..7fead86 100644 --- a/plat/arm/board/arm_fpga/fpga_topology.c +++ b/plat/arm/board/arm_fpga/fpga_topology.c @@ -5,15 +5,20 @@ */ #include +#include +#include #include "fpga_private.h" +#include #include -static unsigned char fpga_power_domain_tree_desc[FPGA_MAX_CLUSTER_COUNT + 2]; +unsigned char fpga_power_domain_tree_desc[FPGA_MAX_CLUSTER_COUNT + 2]; +unsigned char fpga_valid_mpids[PLATFORM_CORE_COUNT]; const unsigned char *plat_get_power_domain_tree_desc(void) { - int i; + unsigned int i; + /* * The highest level is the system level. The next level is constituted * by clusters and then cores in clusters. @@ -21,12 +26,26 @@ * This description of the power domain topology is aligned with the CPU * indices returned by the plat_core_pos_by_mpidr() and plat_my_core_pos() * APIs. + * + * A description of the topology tree can be found at + * https://trustedfirmware-a.readthedocs.io/en/latest/design/psci-pd-tree.html#design */ - fpga_power_domain_tree_desc[0] = 1; - fpga_power_domain_tree_desc[1] = FPGA_MAX_CLUSTER_COUNT; - for (i = 0; i < FPGA_MAX_CLUSTER_COUNT; i++) { - fpga_power_domain_tree_desc[i + 2] = FPGA_MAX_CPUS_PER_CLUSTER * FPGA_MAX_PE_PER_CPU; + if (fpga_power_domain_tree_desc[0] == 0U) { + /* + * As fpga_power_domain_tree_desc[0] == 0, assume that the + * Power Domain Topology Tree has not been initialized, so + * perform the initialization here. + */ + + fpga_power_domain_tree_desc[0] = 1U; + fpga_power_domain_tree_desc[1] = FPGA_MAX_CLUSTER_COUNT; + + for (i = 0U; i < FPGA_MAX_CLUSTER_COUNT; i++) { + fpga_power_domain_tree_desc[2 + i] = + (FPGA_MAX_CPUS_PER_CLUSTER * + FPGA_MAX_PE_PER_CPU); + } } return fpga_power_domain_tree_desc; @@ -34,40 +53,25 @@ int plat_core_pos_by_mpidr(u_register_t mpidr) { - unsigned int cluster_id, cpu_id, thread_id; + unsigned int core_pos; - /* - * The image running on the FPGA may or may not implement - * multithreading, and it shouldn't be assumed this is consistent - * across all CPUs. - * This ensures that any passed mpidr values reflect the status of the - * primary CPU's MT bit. - */ + mpidr &= (MPID_MASK & ~(MPIDR_AFFLVL_MASK << MPIDR_AFF3_SHIFT)); mpidr |= (read_mpidr_el1() & MPIDR_MT_MASK); - mpidr &= MPID_MASK; - if (mpidr & MPIDR_MT_MASK) { - thread_id = MPIDR_AFFLVL0_VAL(mpidr); - cpu_id = MPIDR_AFFLVL1_VAL(mpidr); - cluster_id = MPIDR_AFFLVL2_VAL(mpidr); - } else { - thread_id = 0; - cpu_id = MPIDR_AFFLVL0_VAL(mpidr); - cluster_id = MPIDR_AFFLVL1_VAL(mpidr); + if ((MPIDR_AFFLVL2_VAL(mpidr) >= FPGA_MAX_CLUSTER_COUNT) || + (MPIDR_AFFLVL1_VAL(mpidr) >= FPGA_MAX_CPUS_PER_CLUSTER) || + (MPIDR_AFFLVL0_VAL(mpidr) >= FPGA_MAX_PE_PER_CPU)) { + ERROR ("Invalid mpidr: 0x%08x\n", (uint32_t)mpidr); + panic(); } - if (cluster_id >= FPGA_MAX_CLUSTER_COUNT) { + /* Calculate the core position, based on the maximum topology. */ + core_pos = plat_fpga_calc_core_pos(mpidr); + + /* Check whether this core is actually present. */ + if (fpga_valid_mpids[core_pos] != VALID_MPID) { return -1; } - if (cpu_id >= FPGA_MAX_CPUS_PER_CLUSTER) { - return -1; - } - - if (thread_id >= FPGA_MAX_PE_PER_CPU) { - return -1; - } - - /* Calculate the correct core, catering for multi-threaded images */ - return (int) plat_fpga_calc_core_pos(mpidr); + return core_pos; } diff --git a/plat/arm/board/arm_fpga/include/platform_def.h b/plat/arm/board/arm_fpga/include/platform_def.h index 31fc987..411b936 100644 --- a/plat/arm/board/arm_fpga/include/platform_def.h +++ b/plat/arm/board/arm_fpga/include/platform_def.h @@ -21,11 +21,12 @@ #define CACHE_WRITEBACK_SHIFT U(6) #define CACHE_WRITEBACK_GRANULE (U(1) << CACHE_WRITEBACK_SHIFT) -#define PLATFORM_CORE_COUNT \ - (FPGA_MAX_CLUSTER_COUNT * FPGA_MAX_CPUS_PER_CLUSTER * FPGA_MAX_PE_PER_CPU) +#define PLATFORM_CORE_COUNT \ + (FPGA_MAX_CLUSTER_COUNT * \ + FPGA_MAX_CPUS_PER_CLUSTER * \ + FPGA_MAX_PE_PER_CPU) -#define PLAT_NUM_PWR_DOMAINS (FPGA_MAX_CLUSTER_COUNT + \ - PLATFORM_CORE_COUNT) + 1 +#define PLAT_NUM_PWR_DOMAINS (FPGA_MAX_CLUSTER_COUNT + PLATFORM_CORE_COUNT + 1) #if !ENABLE_PIE #define BL31_BASE UL(0x80000000)