diff --git a/bl1/aarch64/bl1_entrypoint.S b/bl1/aarch64/bl1_entrypoint.S index 82330c1..cfc6292 100644 --- a/bl1/aarch64/bl1_entrypoint.S +++ b/bl1/aarch64/bl1_entrypoint.S @@ -131,9 +131,11 @@ ldr x1, =__BSS_SIZE__ bl zeromem16 +#if USE_COHERENT_MEM ldr x0, =__COHERENT_RAM_START__ ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__ bl zeromem16 +#endif ldr x0, =__DATA_RAM_START__ ldr x1, =__DATA_ROM_START__ diff --git a/bl1/bl1.ld.S b/bl1/bl1.ld.S index 007149b..d682384 100644 --- a/bl1/bl1.ld.S +++ b/bl1/bl1.ld.S @@ -107,6 +107,7 @@ *(xlat_table) } >RAM +#if USE_COHERENT_MEM /* * The base address of the coherent memory section must be page-aligned (4K) * to guarantee that the coherent data are stored on their own pages and @@ -125,6 +126,7 @@ . = NEXT(4096); __COHERENT_RAM_END__ = .; } >RAM +#endif __BL1_RAM_START__ = ADDR(.data); __BL1_RAM_END__ = .; @@ -140,8 +142,10 @@ __BSS_SIZE__ = SIZEOF(.bss); +#if USE_COHERENT_MEM __COHERENT_RAM_UNALIGNED_SIZE__ = __COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__; +#endif ASSERT(. <= BL1_RW_LIMIT, "BL1's RW section has exceeded its limit.") } diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S index 2f058da..499dc37 100644 --- a/bl2/aarch64/bl2_entrypoint.S +++ b/bl2/aarch64/bl2_entrypoint.S @@ -91,9 +91,11 @@ ldr x1, =__BSS_SIZE__ bl zeromem16 +#if USE_COHERENT_MEM ldr x0, =__COHERENT_RAM_START__ ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__ bl zeromem16 +#endif /* -------------------------------------------- * Allocate a stack whose memory will be marked diff --git a/bl2/bl2.ld.S b/bl2/bl2.ld.S index 65304de..9933339 100644 --- a/bl2/bl2.ld.S +++ b/bl2/bl2.ld.S @@ -93,6 +93,7 @@ *(xlat_table) } >RAM +#if USE_COHERENT_MEM /* * The base address of the coherent memory section must be page-aligned (4K) * to guarantee that the coherent data are stored on their own pages and @@ -111,12 +112,16 @@ . = NEXT(4096); __COHERENT_RAM_END__ = .; } >RAM +#endif __BL2_END__ = .; __BSS_SIZE__ = SIZEOF(.bss); + +#if USE_COHERENT_MEM __COHERENT_RAM_UNALIGNED_SIZE__ = __COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__; +#endif ASSERT(. <= BL2_LIMIT, "BL2 image has exceeded its limit.") } diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S index 04063e1..b786b29 100644 --- a/bl31/aarch64/bl31_entrypoint.S +++ b/bl31/aarch64/bl31_entrypoint.S @@ -149,9 +149,11 @@ ldr x1, =__BSS_SIZE__ bl zeromem16 +#if USE_COHERENT_MEM ldr x0, =__COHERENT_RAM_START__ ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__ bl zeromem16 +#endif /* --------------------------------------------- * Initialize the cpu_ops pointer. diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S index 124be85..3327f31 100644 --- a/bl31/bl31.ld.S +++ b/bl31/bl31.ld.S @@ -117,6 +117,7 @@ *(xlat_table) } >RAM +#if USE_COHERENT_MEM /* * The base address of the coherent memory section must be page-aligned (4K) * to guarantee that the coherent data are stored on their own pages and @@ -135,12 +136,15 @@ . = NEXT(4096); __COHERENT_RAM_END__ = .; } >RAM +#endif __BL31_END__ = .; __BSS_SIZE__ = SIZEOF(.bss); +#if USE_COHERENT_MEM __COHERENT_RAM_UNALIGNED_SIZE__ = __COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__; +#endif ASSERT(. <= BL31_LIMIT, "BL3-1 image has exceeded its limit.") } diff --git a/bl32/tsp/aarch64/tsp_entrypoint.S b/bl32/tsp/aarch64/tsp_entrypoint.S index 1cda165..2714282 100644 --- a/bl32/tsp/aarch64/tsp_entrypoint.S +++ b/bl32/tsp/aarch64/tsp_entrypoint.S @@ -108,9 +108,11 @@ ldr x1, =__BSS_SIZE__ bl zeromem16 +#if USE_COHERENT_MEM ldr x0, =__COHERENT_RAM_START__ ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__ bl zeromem16 +#endif /* -------------------------------------------- * Allocate a stack whose memory will be marked diff --git a/bl32/tsp/tsp.ld.S b/bl32/tsp/tsp.ld.S index 5d7ffa1..d411ad0 100644 --- a/bl32/tsp/tsp.ld.S +++ b/bl32/tsp/tsp.ld.S @@ -98,6 +98,7 @@ *(xlat_table) } >RAM +#if USE_COHERENT_MEM /* * The base address of the coherent memory section must be page-aligned (4K) * to guarantee that the coherent data are stored on their own pages and @@ -116,12 +117,15 @@ . = NEXT(4096); __COHERENT_RAM_END__ = .; } >RAM +#endif __BL32_END__ = .; __BSS_SIZE__ = SIZEOF(.bss); +#if USE_COHERENT_MEM __COHERENT_RAM_UNALIGNED_SIZE__ = __COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__; +#endif ASSERT(. <= BL32_LIMIT, "BL3-2 image has exceeded its limit.") } diff --git a/bl32/tsp/tsp_main.c b/bl32/tsp/tsp_main.c index 193ba29..2eaca7c 100644 --- a/bl32/tsp/tsp_main.c +++ b/bl32/tsp/tsp_main.c @@ -43,7 +43,7 @@ * of trusted SRAM ******************************************************************************/ extern unsigned long __RO_START__; -extern unsigned long __COHERENT_RAM_END__; +extern unsigned long __BL32_END__; /******************************************************************************* * Lock to control access to the console @@ -63,11 +63,11 @@ /******************************************************************************* * The BL32 memory footprint starts with an RO sections and ends - * with a section for coherent RAM. Use it to find the memory size + * with the linker symbol __BL32_END__. Use it to find the memory size ******************************************************************************/ #define BL32_TOTAL_BASE (unsigned long)(&__RO_START__) -#define BL32_TOTAL_LIMIT (unsigned long)(&__COHERENT_RAM_END__) +#define BL32_TOTAL_LIMIT (unsigned long)(&__BL32_END__) static tsp_args_t *set_smc_args(uint64_t arg0, uint64_t arg1, diff --git a/docs/firmware-design.md b/docs/firmware-design.md index 41aaf7f..774ea43 100644 --- a/docs/firmware-design.md +++ b/docs/firmware-design.md @@ -12,8 +12,9 @@ 7. [CPU specific operations framework](#7--cpu-specific-operations-framework) 8. [Memory layout of BL images](#8-memory-layout-of-bl-images) 9. [Firmware Image Package (FIP)](#9--firmware-image-package-fip) -10. [Code Structure](#10--code-structure) -11. [References](#11--references) +10. [Use of coherent memory in Trusted Firmware](#10--use-of-coherent-memory-in-trusted-firmware) +11. [Code Structure](#11--code-structure) +12. [References](#12--references) 1. Introduction @@ -368,10 +369,10 @@ `ON`; any other cluster is `OFF`. BL3-1 initializes the data structures that implement the state machine, including the locks that protect them. BL3-1 accesses the state of a CPU or cluster immediately after reset and before - the MMU is enabled in the warm boot path. It is not currently possible to - use 'exclusive' based spinlocks, therefore BL3-1 uses locks based on - Lamport's Bakery algorithm instead. BL3-1 allocates these locks in device - memory. They are accessible irrespective of MMU state. + the data cache is enabled in the warm boot path. It is not currently + possible to use 'exclusive' based spinlocks, therefore BL3-1 uses locks + based on Lamport's Bakery algorithm instead. BL3-1 allocates these locks in + device memory by default. * Runtime services initialization: @@ -1127,9 +1128,10 @@ * `__BSS_START__` This address must be aligned on a 16-byte boundary. * `__BSS_SIZE__` -Similarly, the coherent memory section must be zero-initialised. Also, the MMU -setup code needs to know the extents of this section to set the right memory -attributes for it. The following linker symbols are defined for this purpose: +Similarly, the coherent memory section (if enabled) must be zero-initialised. +Also, the MMU setup code needs to know the extents of this section to set the +right memory attributes for it. The following linker symbols are defined for +this purpose: * `__COHERENT_RAM_START__` This address must be aligned on a page-size boundary. * `__COHERENT_RAM_END__` This address must be aligned on a page-size boundary. @@ -1443,7 +1445,208 @@ platform policy can be modified to allow additional images. -10. Code Structure +10. Use of coherent memory in Trusted Firmware +---------------------------------------------- + +There might be loss of coherency when physical memory with mismatched +shareability, cacheability and memory attributes is accessed by multiple CPUs +(refer to section B2.9 of [ARM ARM] for more details). This possibility occurs +in Trusted Firmware during power up/down sequences when coherency, MMU and +caches are turned on/off incrementally. + +Trusted Firmware defines coherent memory as a region of memory with Device +nGnRE attributes in the translation tables. The translation granule size in +Trusted Firmware is 4KB. This is the smallest possible size of the coherent +memory region. + +By default, all data structures which are susceptible to accesses with +mismatched attributes from various CPUs are allocated in a coherent memory +region (refer to section 2.1 of [Porting Guide]). The coherent memory region +accesses are Outer Shareable, non-cacheable and they can be accessed +with the Device nGnRE attributes when the MMU is turned on. Hence, at the +expense of at least an extra page of memory, Trusted Firmware is able to work +around coherency issues due to mismatched memory attributes. + +The alternative to the above approach is to allocate the susceptible data +structures in Normal WriteBack WriteAllocate Inner shareable memory. This +approach requires the data structures to be designed so that it is possible to +work around the issue of mismatched memory attributes by performing software +cache maintenance on them. + +### Disabling the use of coherent memory in Trusted Firmware + +It might be desirable to avoid the cost of allocating coherent memory on +platforms which are memory constrained. Trusted Firmware enables inclusion of +coherent memory in firmware images through the build flag `USE_COHERENT_MEM`. +This flag is enabled by default. It can be disabled to choose the second +approach described above. + +The below sections analyze the data structures allocated in the coherent memory +region and the changes required to allocate them in normal memory. + +### PSCI Affinity map nodes + +The `psci_aff_map` data structure stores the hierarchial node information for +each affinity level in the system including the PSCI states associated with them. +By default, this data structure is allocated in the coherent memory region in +the Trusted Firmware because it can be accessed by multiple CPUs, either with +their caches enabled or disabled. + + typedef struct aff_map_node { + unsigned long mpidr; + unsigned char ref_count; + unsigned char state; + unsigned char level; + #if USE_COHERENT_MEM + bakery_lock_t lock; + #else + unsigned char aff_map_index; + #endif + } aff_map_node_t; + +In order to move this data structure to normal memory, the use of each of its +fields must be analyzed. Fields like `mpidr` and `level` are only written once +during cold boot. Hence removing them from coherent memory involves only doing +a clean and invalidate of the cache lines after these fields are written. + +The fields `state` and `ref_count` can be concurrently accessed by multiple +CPUs in different cache states. A Lamport's Bakery lock is used to ensure mutual +exlusion to these fields. As a result, it is possible to move these fields out +of coherent memory by performing software cache maintenance on them. The field +`lock` is the bakery lock data structure when `USE_COHERENT_MEM` is enabled. +The `aff_map_index` is used to identify the bakery lock when `USE_COHERENT_MEM` +is disabled. + +### Bakery lock data + +The bakery lock data structure `bakery_lock_t` is allocated in coherent memory +and is accessed by multiple CPUs with mismatched attributes. `bakery_lock_t` is +defined as follows: + + typedef struct bakery_lock { + int owner; + volatile char entering[BAKERY_LOCK_MAX_CPUS]; + volatile unsigned number[BAKERY_LOCK_MAX_CPUS]; + } bakery_lock_t; + +It is a characteristic of Lamport's Bakery algorithm that the volatile per-CPU +fields can be read by all CPUs but only written to by the owning CPU. + +Depending upon the data cache line size, the per-CPU fields of the +`bakery_lock_t` structure for multiple CPUs may exist on a single cache line. +These per-CPU fields can be read and written during lock contention by multiple +CPUs with mismatched memory attributes. Since these fields are a part of the +lock implementation, they do not have access to any other locking primitive to +safeguard against the resulting coherency issues. As a result, simple software +cache maintenance is not enough to allocate them in coherent memory. Consider +the following example. + +CPU0 updates its per-CPU field with data cache enabled. This write updates a +local cache line which contains a copy of the fields for other CPUs as well. Now +CPU1 updates its per-CPU field of the `bakery_lock_t` structure with data cache +disabled. CPU1 then issues a DCIVAC operation to invalidate any stale copies of +its field in any other cache line in the system. This operation will invalidate +the update made by CPU0 as well. + +To use bakery locks when `USE_COHERENT_MEM` is disabled, the lock data structure +has been redesigned. The changes utilise the characteristic of Lamport's Bakery +algorithm mentioned earlier. The per-CPU fields of the new lock structure are +aligned such that they are allocated on separate cache lines. The per-CPU data +framework in Trusted Firmware is used to achieve this. This enables software to +perform software cache maintenance on the lock data structure without running +into coherency issues associated with mismatched attributes. + +The per-CPU data framework enables consolidation of data structures on the +fewest cache lines possible. This saves memory as compared to the scenario where +each data structure is separately aligned to the cache line boundary to achieve +the same effect. + +The bakery lock data structure `bakery_info_t` is defined for use when +`USE_COHERENT_MEM` is disabled as follows: + + typedef struct bakery_info { + /* + * The lock_data is a bit-field of 2 members: + * Bit[0] : choosing. This field is set when the CPU is + * choosing its bakery number. + * Bits[1 - 15] : number. This is the bakery number allocated. + */ + volatile uint16_t lock_data; + } bakery_info_t; + +The `bakery_info_t` represents a single per-CPU field of one lock and +the combination of corresponding `bakery_info_t` structures for all CPUs in the +system represents the complete bakery lock. It is embedded in the per-CPU +data framework `cpu_data` as shown below: + + CPU0 cpu_data + ------------------ + | .... | + |----------------| + | `bakery_info_t`| <-- Lock_0 per-CPU field + | Lock_0 | for CPU0 + |----------------| + | `bakery_info_t`| <-- Lock_1 per-CPU field + | Lock_1 | for CPU0 + |----------------| + | .... | + |----------------| + | `bakery_info_t`| <-- Lock_N per-CPU field + | Lock_N | for CPU0 + ------------------ + + + CPU1 cpu_data + ------------------ + | .... | + |----------------| + | `bakery_info_t`| <-- Lock_0 per-CPU field + | Lock_0 | for CPU1 + |----------------| + | `bakery_info_t`| <-- Lock_1 per-CPU field + | Lock_1 | for CPU1 + |----------------| + | .... | + |----------------| + | `bakery_info_t`| <-- Lock_N per-CPU field + | Lock_N | for CPU1 + ------------------ + +Consider a system of 2 CPUs with 'N' bakery locks as shown above. For an +operation on Lock_N, the corresponding `bakery_info_t` in both CPU0 and CPU1 +`cpu_data` need to be fetched and appropriate cache operations need to be +performed for each access. + +For multiple bakery locks, an array of `bakery_info_t` is declared in `cpu_data` +and each lock is given an `id` to identify it in the array. + +### Non Functional Impact of removing coherent memory + +Removal of the coherent memory region leads to the additional software overhead +of performing cache maintenance for the affected data structures. However, since +the memory where the data structures are allocated is cacheable, the overhead is +mostly mitigated by an increase in performance. + +There is however a performance impact for bakery locks, due to: +* Additional cache maintenance operations, and +* Multiple cache line reads for each lock operation, since the bakery locks + for each CPU are distributed across different cache lines. + +The implementation has been optimized to mimimize this additional overhead. +Measurements indicate that when bakery locks are allocated in Normal memory, the +minimum latency of acquiring a lock is on an average 3-4 micro seconds whereas +in Device memory the same is 2 micro seconds. The measurements were done on the +Juno ARM development platform. + +As mentioned earlier, almost a page of memory can be saved by disabling +`USE_COHERENT_MEM`. Each platform needs to consider these trade-offs to decide +whether coherent memory should be used. If a platform disables +`USE_COHERENT_MEM` and needs to use bakery locks in the porting layer, it should +reserve memory in `cpu_data` by defining the macro `PLAT_PCPU_DATA_SIZE` (see +the [Porting Guide]). Refer to the reference platform code for examples. + + +11. Code Structure ------------------- Trusted Firmware code is logically divided between the three boot loader @@ -1488,7 +1691,7 @@ kernel at boot time. These can be found in the `fdts` directory. -11. References +12. References --------------- 1. Trusted Board Boot Requirements CLIENT PDD (ARM DEN 0006B-5). Available @@ -1504,7 +1707,7 @@ _Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved._ - +[ARM ARM]: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0487a.e/index.html "ARMv8-A Reference Manual (ARM DDI0487A.E)" [PSCI]: http://infocenter.arm.com/help/topic/com.arm.doc.den0022b/index.html "Power State Coordination Interface PDD (ARM DEN 0022B.b)" [SMCCC]: http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html "SMC Calling Convention PDD (ARM DEN 0028A)" [UUID]: https://tools.ietf.org/rfc/rfc4122.txt "A Universally Unique IDentifier (UUID) URN Namespace" diff --git a/docs/porting-guide.md b/docs/porting-guide.md index 3855ca7..3d5e66f 100644 --- a/docs/porting-guide.md +++ b/docs/porting-guide.md @@ -63,11 +63,11 @@ stage. In the ARM FVP port, each BL stage configures the MMU in its platform- specific architecture setup function, for example `blX_plat_arch_setup()`. -Each platform must allocate a block of identity mapped secure memory with -Device-nGnRE attributes aligned to page boundary (4K) for each BL stage. This -memory is identified by the section name `tzfw_coherent_mem` so that its -possible for the firmware to place variables in it using the following C code -directive: +If the build option `USE_COHERENT_MEM` is enabled, each platform must allocate a +block of identity mapped secure memory with Device-nGnRE attributes aligned to +page boundary (4K) for each BL stage. This memory is identified by the section +name `tzfw_coherent_mem` so that its possible for the firmware to place +variables in it using the following C code directive: __attribute__ ((section("tzfw_coherent_mem"))) @@ -246,6 +246,17 @@ entities than this value using `io_open()` will fail with IO_RESOURCES_EXHAUSTED. +If the platform needs to allocate data within the per-cpu data framework in +BL3-1, it should define the following macro. Currently this is only required if +the platform decides not to use the coherent memory section by undefining the +USE_COHERENT_MEM build flag. In this case, the framework allocates the required +memory within the the per-cpu data to minimize wastage. + +* **#define : PLAT_PCPU_DATA_SIZE** + + Defines the memory (in bytes) to be reserved within the per-cpu data + structure for use by the platform layer. + The following constants are optional. They should be defined when the platform memory layout implies some image overlaying like on FVP. diff --git a/docs/user-guide.md b/docs/user-guide.md index b33c4c0..5ad44a8 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -245,6 +245,12 @@ synchronous method) or 1 (BL3-2 is initialized using asynchronous method). Default is 0. +* `USE_COHERENT_MEM`: This flag determines whether to include the coherent + memory region in the BL memory map or not (see "Use of Coherent memory in + Trusted Firmware" section in [Firmware Design]). It can take the value 1 + (Coherent memory region is included) or 0 (Coherent memory region is + excluded). Default is 1. + #### FVP specific build options * `FVP_TSP_RAM_LOCATION`: location of the TSP binary. Options: diff --git a/plat/fvp/aarch64/fvp_common.c b/plat/fvp/aarch64/fvp_common.c index 987f48f..e20fe7d 100644 --- a/plat/fvp/aarch64/fvp_common.c +++ b/plat/fvp/aarch64/fvp_common.c @@ -136,7 +136,8 @@ * Macro generating the code for the function setting up the pagetables as per * the platform memory map & initialize the mmu, for the given exception level ******************************************************************************/ -#define DEFINE_CONFIGURE_MMU_EL(_el) \ +#if USE_COHERENT_MEM +#define DEFINE_CONFIGURE_MMU_EL(_el) \ void fvp_configure_mmu_el##_el(unsigned long total_base, \ unsigned long total_size, \ unsigned long ro_start, \ @@ -158,6 +159,25 @@ \ enable_mmu_el##_el(0); \ } +#else +#define DEFINE_CONFIGURE_MMU_EL(_el) \ + void fvp_configure_mmu_el##_el(unsigned long total_base, \ + unsigned long total_size, \ + unsigned long ro_start, \ + unsigned long ro_limit) \ + { \ + mmap_add_region(total_base, total_base, \ + total_size, \ + MT_MEMORY | MT_RW | MT_SECURE); \ + mmap_add_region(ro_start, ro_start, \ + ro_limit - ro_start, \ + MT_MEMORY | MT_RO | MT_SECURE); \ + mmap_add(fvp_mmap); \ + init_xlat_tables(); \ + \ + enable_mmu_el##_el(0); \ + } +#endif /* Define EL1 and EL3 variants of the function initialising the MMU */ DEFINE_CONFIGURE_MMU_EL(1) diff --git a/plat/fvp/bl1_fvp_setup.c b/plat/fvp/bl1_fvp_setup.c index b1205d4..4b421d7 100644 --- a/plat/fvp/bl1_fvp_setup.c +++ b/plat/fvp/bl1_fvp_setup.c @@ -40,6 +40,7 @@ #include "fvp_def.h" #include "fvp_private.h" +#if USE_COHERENT_MEM /******************************************************************************* * Declarations of linker defined symbols which will help us find the layout * of trusted SRAM @@ -56,6 +57,7 @@ */ #define BL1_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__) #define BL1_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__) +#endif /* Data structure which holds the extents of the trusted SRAM for BL1*/ static meminfo_t bl1_tzram_layout; @@ -116,9 +118,12 @@ fvp_configure_mmu_el3(bl1_tzram_layout.total_base, bl1_tzram_layout.total_size, BL1_RO_BASE, - BL1_RO_LIMIT, - BL1_COHERENT_RAM_BASE, - BL1_COHERENT_RAM_LIMIT); + BL1_RO_LIMIT +#if USE_COHERENT_MEM + , BL1_COHERENT_RAM_BASE, + BL1_COHERENT_RAM_LIMIT +#endif + ); } diff --git a/plat/fvp/bl2_fvp_setup.c b/plat/fvp/bl2_fvp_setup.c index 67f89bc..71bd8c2 100644 --- a/plat/fvp/bl2_fvp_setup.c +++ b/plat/fvp/bl2_fvp_setup.c @@ -45,8 +45,10 @@ extern unsigned long __RO_START__; extern unsigned long __RO_END__; +#if USE_COHERENT_MEM extern unsigned long __COHERENT_RAM_START__; extern unsigned long __COHERENT_RAM_END__; +#endif /* * The next 2 constants identify the extents of the code & RO data region. @@ -57,6 +59,7 @@ #define BL2_RO_BASE (unsigned long)(&__RO_START__) #define BL2_RO_LIMIT (unsigned long)(&__RO_END__) +#if USE_COHERENT_MEM /* * The next 2 constants identify the extents of the coherent memory region. * These addresses are used by the MMU setup code and therefore they must be @@ -66,11 +69,11 @@ */ #define BL2_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__) #define BL2_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__) +#endif /* Data structure which holds the extents of the trusted SRAM for BL2 */ static meminfo_t bl2_tzram_layout -__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE), - section("tzfw_coherent_mem"))); +__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE))); /* Assert that BL3-1 parameters fit in shared memory */ CASSERT((PARAMS_BASE + sizeof(bl2_to_bl31_params_mem_t)) < @@ -209,9 +212,12 @@ fvp_configure_mmu_el1(bl2_tzram_layout.total_base, bl2_tzram_layout.total_size, BL2_RO_BASE, - BL2_RO_LIMIT, - BL2_COHERENT_RAM_BASE, - BL2_COHERENT_RAM_LIMIT); + BL2_RO_LIMIT +#if USE_COHERENT_MEM + , BL2_COHERENT_RAM_BASE, + BL2_COHERENT_RAM_LIMIT +#endif + ); } /******************************************************************************* diff --git a/plat/fvp/bl31_fvp_setup.c b/plat/fvp/bl31_fvp_setup.c index 69efc9c..3874413 100644 --- a/plat/fvp/bl31_fvp_setup.c +++ b/plat/fvp/bl31_fvp_setup.c @@ -48,19 +48,25 @@ ******************************************************************************/ extern unsigned long __RO_START__; extern unsigned long __RO_END__; +extern unsigned long __BL31_END__; +#if USE_COHERENT_MEM extern unsigned long __COHERENT_RAM_START__; extern unsigned long __COHERENT_RAM_END__; +#endif /* - * The next 2 constants identify the extents of the code & RO data region. - * These addresses are used by the MMU setup code and therefore they must be - * page-aligned. It is the responsibility of the linker script to ensure that - * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses. + * The next 3 constants identify the extents of the code, RO data region and the + * limit of the BL3-1 image. These addresses are used by the MMU setup code and + * therefore they must be page-aligned. It is the responsibility of the linker + * script to ensure that __RO_START__, __RO_END__ & __BL31_END__ linker symbols + * refer to page-aligned addresses. */ #define BL31_RO_BASE (unsigned long)(&__RO_START__) #define BL31_RO_LIMIT (unsigned long)(&__RO_END__) +#define BL31_END (unsigned long)(&__BL31_END__) +#if USE_COHERENT_MEM /* * The next 2 constants identify the extents of the coherent memory region. * These addresses are used by the MMU setup code and therefore they must be @@ -70,7 +76,7 @@ */ #define BL31_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__) #define BL31_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__) - +#endif #if RESET_TO_BL31 static entry_point_info_t bl32_image_ep_info; @@ -235,9 +241,12 @@ fvp_cci_enable(); #endif fvp_configure_mmu_el3(BL31_RO_BASE, - (BL31_COHERENT_RAM_LIMIT - BL31_RO_BASE), + (BL31_END - BL31_RO_BASE), BL31_RO_BASE, - BL31_RO_LIMIT, - BL31_COHERENT_RAM_BASE, - BL31_COHERENT_RAM_LIMIT); + BL31_RO_LIMIT +#if USE_COHERENT_MEM + , BL31_COHERENT_RAM_BASE, + BL31_COHERENT_RAM_LIMIT +#endif + ); } diff --git a/plat/fvp/fvp_private.h b/plat/fvp/fvp_private.h index 6f1a637..3949754 100644 --- a/plat/fvp/fvp_private.h +++ b/plat/fvp/fvp_private.h @@ -118,15 +118,21 @@ void fvp_configure_mmu_el1(unsigned long total_base, unsigned long total_size, unsigned long, - unsigned long, - unsigned long, - unsigned long); + unsigned long +#if USE_COHERENT_MEM + , unsigned long, + unsigned long +#endif + ); void fvp_configure_mmu_el3(unsigned long total_base, unsigned long total_size, unsigned long, - unsigned long, - unsigned long, - unsigned long); + unsigned long +#if USE_COHERENT_MEM + , unsigned long, + unsigned long +#endif + ); int fvp_config_setup(void); diff --git a/plat/fvp/tsp/tsp_fvp_setup.c b/plat/fvp/tsp/tsp_fvp_setup.c index 301f669..d8f46bd 100644 --- a/plat/fvp/tsp/tsp_fvp_setup.c +++ b/plat/fvp/tsp/tsp_fvp_setup.c @@ -40,19 +40,25 @@ ******************************************************************************/ extern unsigned long __RO_START__; extern unsigned long __RO_END__; +extern unsigned long __BL32_END__; +#if USE_COHERENT_MEM extern unsigned long __COHERENT_RAM_START__; extern unsigned long __COHERENT_RAM_END__; +#endif /* - * The next 2 constants identify the extents of the code & RO data region. - * These addresses are used by the MMU setup code and therefore they must be - * page-aligned. It is the responsibility of the linker script to ensure that - * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses. + * The next 3 constants identify the extents of the code & RO data region and + * the limit of the BL3-2 image. These addresses are used by the MMU setup code + * and therefore they must be page-aligned. It is the responsibility of the + * linker script to ensure that __RO_START__, __RO_END__ & & __BL32_END__ + * linker symbols refer to page-aligned addresses. */ #define BL32_RO_BASE (unsigned long)(&__RO_START__) #define BL32_RO_LIMIT (unsigned long)(&__RO_END__) +#define BL32_END (unsigned long)(&__BL32_END__) +#if USE_COHERENT_MEM /* * The next 2 constants identify the extents of the coherent memory region. * These addresses are used by the MMU setup code and therefore they must be @@ -62,6 +68,7 @@ */ #define BL32_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__) #define BL32_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__) +#endif /******************************************************************************* * Initialize the UART @@ -93,9 +100,12 @@ void tsp_plat_arch_setup(void) { fvp_configure_mmu_el1(BL32_RO_BASE, - (BL32_COHERENT_RAM_LIMIT - BL32_RO_BASE), + (BL32_END - BL32_RO_BASE), BL32_RO_BASE, - BL32_RO_LIMIT, - BL32_COHERENT_RAM_BASE, - BL32_COHERENT_RAM_LIMIT); + BL32_RO_LIMIT +#if USE_COHERENT_MEM + , BL32_COHERENT_RAM_BASE, + BL32_COHERENT_RAM_LIMIT +#endif + ); } diff --git a/plat/juno/aarch64/juno_common.c b/plat/juno/aarch64/juno_common.c index 8129b05..7ad40d0 100644 --- a/plat/juno/aarch64/juno_common.c +++ b/plat/juno/aarch64/juno_common.c @@ -140,6 +140,7 @@ * Macro generating the code for the function setting up the pagetables as per * the platform memory map & initialize the mmu, for the given exception level ******************************************************************************/ +#if USE_COHERENT_MEM #define DEFINE_CONFIGURE_MMU_EL(_el) \ void configure_mmu_el##_el(unsigned long total_base, \ unsigned long total_size, \ @@ -162,7 +163,25 @@ \ enable_mmu_el##_el(0); \ } - +#else +#define DEFINE_CONFIGURE_MMU_EL(_el) \ + void configure_mmu_el##_el(unsigned long total_base, \ + unsigned long total_size, \ + unsigned long ro_start, \ + unsigned long ro_limit) \ + { \ + mmap_add_region(total_base, total_base, \ + total_size, \ + MT_MEMORY | MT_RW | MT_SECURE); \ + mmap_add_region(ro_start, ro_start, \ + ro_limit - ro_start, \ + MT_MEMORY | MT_RO | MT_SECURE); \ + mmap_add(juno_mmap); \ + init_xlat_tables(); \ + \ + enable_mmu_el##_el(0); \ + } +#endif /* Define EL1 and EL3 variants of the function initialising the MMU */ DEFINE_CONFIGURE_MMU_EL(1) DEFINE_CONFIGURE_MMU_EL(3) diff --git a/plat/juno/bl1_plat_setup.c b/plat/juno/bl1_plat_setup.c index e27e394..23e8592 100644 --- a/plat/juno/bl1_plat_setup.c +++ b/plat/juno/bl1_plat_setup.c @@ -41,6 +41,7 @@ #include "juno_def.h" #include "juno_private.h" +#if USE_COHERENT_MEM /******************************************************************************* * Declarations of linker defined symbols which will help us find the layout * of trusted RAM @@ -57,6 +58,7 @@ */ #define BL1_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__) #define BL1_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__) +#endif /* Data structure which holds the extents of the trusted RAM for BL1 */ static meminfo_t bl1_tzram_layout; @@ -189,9 +191,12 @@ configure_mmu_el3(bl1_tzram_layout.total_base, bl1_tzram_layout.total_size, TZROM_BASE, - TZROM_BASE + TZROM_SIZE, - BL1_COHERENT_RAM_BASE, - BL1_COHERENT_RAM_LIMIT); + TZROM_BASE + TZROM_SIZE +#if USE_COHERENT_MEM + , BL1_COHERENT_RAM_BASE, + BL1_COHERENT_RAM_LIMIT +#endif + ); } /******************************************************************************* diff --git a/plat/juno/bl2_plat_setup.c b/plat/juno/bl2_plat_setup.c index 900a587..8e7b2a0 100644 --- a/plat/juno/bl2_plat_setup.c +++ b/plat/juno/bl2_plat_setup.c @@ -47,8 +47,10 @@ extern unsigned long __RO_START__; extern unsigned long __RO_END__; +#if USE_COHERENT_MEM extern unsigned long __COHERENT_RAM_START__; extern unsigned long __COHERENT_RAM_END__; +#endif /* * The next 2 constants identify the extents of the code & RO data region. @@ -59,6 +61,7 @@ #define BL2_RO_BASE (unsigned long)(&__RO_START__) #define BL2_RO_LIMIT (unsigned long)(&__RO_END__) +#if USE_COHERENT_MEM /* * The next 2 constants identify the extents of the coherent memory region. * These addresses are used by the MMU setup code and therefore they must be @@ -68,11 +71,11 @@ */ #define BL2_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__) #define BL2_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__) +#endif /* Data structure which holds the extents of the trusted RAM for BL2 */ static meminfo_t bl2_tzram_layout -__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE), - section("tzfw_coherent_mem"))); +__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE))); /******************************************************************************* * Structure which holds the arguments which need to be passed to BL3-1 @@ -194,9 +197,12 @@ configure_mmu_el1(bl2_tzram_layout.total_base, bl2_tzram_layout.total_size, BL2_RO_BASE, - BL2_RO_LIMIT, - BL2_COHERENT_RAM_BASE, - BL2_COHERENT_RAM_LIMIT); + BL2_RO_LIMIT +#if USE_COHERENT_MEM + , BL2_COHERENT_RAM_BASE, + BL2_COHERENT_RAM_LIMIT +#endif + ); } /******************************************************************************* diff --git a/plat/juno/bl31_plat_setup.c b/plat/juno/bl31_plat_setup.c index c450462..ad8ea43 100644 --- a/plat/juno/bl31_plat_setup.c +++ b/plat/juno/bl31_plat_setup.c @@ -48,19 +48,25 @@ ******************************************************************************/ extern unsigned long __RO_START__; extern unsigned long __RO_END__; +extern unsigned long __BL31_END__; +#if USE_COHERENT_MEM extern unsigned long __COHERENT_RAM_START__; extern unsigned long __COHERENT_RAM_END__; +#endif /* - * The next 2 constants identify the extents of the code & RO data region. - * These addresses are used by the MMU setup code and therefore they must be - * page-aligned. It is the responsibility of the linker script to ensure that - * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses. + * The next 3 constants identify the extents of the code, RO data region and the + * limit of the BL3-1 image. These addresses are used by the MMU setup code and + * therefore they must be page-aligned. It is the responsibility of the linker + * script to ensure that __RO_START__, __RO_END__ & __BL31_END__ linker symbols + * refer to page-aligned addresses. */ #define BL31_RO_BASE (unsigned long)(&__RO_START__) #define BL31_RO_LIMIT (unsigned long)(&__RO_END__) +#define BL31_END (unsigned long)(&__BL31_END__) +#if USE_COHERENT_MEM /* * The next 2 constants identify the extents of the coherent memory region. * These addresses are used by the MMU setup code and therefore they must be @@ -70,6 +76,7 @@ */ #define BL31_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__) #define BL31_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__) +#endif /****************************************************************************** * Placeholder variables for copying the arguments that have been passed to @@ -178,9 +185,13 @@ void bl31_plat_arch_setup() { configure_mmu_el3(BL31_RO_BASE, - BL31_COHERENT_RAM_LIMIT - BL31_RO_BASE, + (BL31_END - BL31_RO_BASE), BL31_RO_BASE, - BL31_RO_LIMIT, + BL31_RO_LIMIT +#if USE_COHERENT_MEM + , BL31_COHERENT_RAM_BASE, - BL31_COHERENT_RAM_LIMIT); + BL31_COHERENT_RAM_LIMIT +#endif + ); } diff --git a/plat/juno/juno_private.h b/plat/juno/juno_private.h index b7ef448..70439e8 100644 --- a/plat/juno/juno_private.h +++ b/plat/juno/juno_private.h @@ -134,15 +134,21 @@ void configure_mmu_el1(unsigned long total_base, unsigned long total_size, unsigned long ro_start, - unsigned long ro_limit, - unsigned long coh_start, - unsigned long coh_limit); + unsigned long ro_limit +#if USE_COHERENT_MEM + , unsigned long coh_start, + unsigned long coh_limit +#endif + ); void configure_mmu_el3(unsigned long total_base, unsigned long total_size, unsigned long ro_start, - unsigned long ro_limit, - unsigned long coh_start, - unsigned long coh_limit); + unsigned long ro_limit +#if USE_COHERENT_MEM + , unsigned long coh_start, + unsigned long coh_limit +#endif + ); void plat_report_exception(unsigned long type); unsigned long plat_get_ns_image_entrypoint(void); unsigned long platform_get_stack(unsigned long mpidr); diff --git a/plat/juno/tsp/tsp_plat_setup.c b/plat/juno/tsp/tsp_plat_setup.c index 0a9d4cb..8293a13 100644 --- a/plat/juno/tsp/tsp_plat_setup.c +++ b/plat/juno/tsp/tsp_plat_setup.c @@ -40,19 +40,25 @@ ******************************************************************************/ extern unsigned long __RO_START__; extern unsigned long __RO_END__; +extern unsigned long __BL32_END__; +#if USE_COHERENT_MEM extern unsigned long __COHERENT_RAM_START__; extern unsigned long __COHERENT_RAM_END__; +#endif /* - * The next 2 constants identify the extents of the code & RO data region. - * These addresses are used by the MMU setup code and therefore they must be - * page-aligned. It is the responsibility of the linker script to ensure that - * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses. + * The next 3 constants identify the extents of the code, RO data region and the + * limit of the BL3-2 image. These addresses are used by the MMU setup code and + * therefore they must be page-aligned. It is the responsibility of the linker + * script to ensure that __RO_START__, __RO_END__ & __BL32_END__ linker symbols + * refer to page-aligned addresses. */ #define BL32_RO_BASE (unsigned long)(&__RO_START__) #define BL32_RO_LIMIT (unsigned long)(&__RO_END__) +#define BL32_END (unsigned long)(&__BL32_END__) +#if USE_COHERENT_MEM /* * The next 2 constants identify the extents of the coherent memory region. * These addresses are used by the MMU setup code and therefore they must be @@ -62,6 +68,7 @@ */ #define BL32_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__) #define BL32_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__) +#endif /******************************************************************************* * Initialize the UART @@ -90,9 +97,12 @@ void tsp_plat_arch_setup(void) { configure_mmu_el1(BL32_RO_BASE, - BL32_COHERENT_RAM_LIMIT - BL32_RO_BASE, + (BL32_END - BL32_RO_BASE), BL32_RO_BASE, - BL32_RO_LIMIT, - BL32_COHERENT_RAM_BASE, - BL32_COHERENT_RAM_LIMIT); + BL32_RO_LIMIT +#if USE_COHERENT_MEM + , BL32_COHERENT_RAM_BASE, + BL32_COHERENT_RAM_LIMIT +#endif + ); } diff --git a/services/std_svc/psci/psci_common.c b/services/std_svc/psci/psci_common.c index c984e9e..0a1cdf9 100644 --- a/services/std_svc/psci/psci_common.c +++ b/services/std_svc/psci/psci_common.c @@ -51,7 +51,10 @@ * corresponds to an affinity instance e.g. cluster, cpu within an mpidr ******************************************************************************/ aff_map_node_t psci_aff_map[PSCI_NUM_AFFS] -__attribute__ ((section("tzfw_coherent_mem"))); +#if USE_COHERENT_MEM +__attribute__ ((section("tzfw_coherent_mem"))) +#endif +; /******************************************************************************* * Pointer to functions exported by the platform to complete power mgmt. ops @@ -352,6 +355,10 @@ ******************************************************************************/ unsigned short psci_get_state(aff_map_node_t *node) { +#if !USE_COHERENT_MEM + flush_dcache_range((uint64_t) node, sizeof(*node)); +#endif + assert(node->level >= MPIDR_AFFLVL0 && node->level <= MPIDR_MAX_AFFLVL); /* A cpu node just contains the state which can be directly returned */ @@ -409,6 +416,10 @@ node->state &= ~(PSCI_STATE_MASK << PSCI_STATE_SHIFT); node->state |= (state & PSCI_STATE_MASK) << PSCI_STATE_SHIFT; } + +#if !USE_COHERENT_MEM + flush_dcache_range((uint64_t) node, sizeof(*node)); +#endif } /******************************************************************************* diff --git a/services/std_svc/psci/psci_setup.c b/services/std_svc/psci/psci_setup.c index a750256..be504e8 100644 --- a/services/std_svc/psci/psci_setup.c +++ b/services/std_svc/psci/psci_setup.c @@ -331,13 +331,20 @@ afflvl); } +#if !USE_COHERENT_MEM + /* + * The psci_aff_map only needs flushing when it's not allocated in + * coherent memory. + */ + flush_dcache_range((uint64_t) &psci_aff_map, sizeof(psci_aff_map)); +#endif + /* * Set the bounds for the affinity counts of each level in the map. Also * flush out the entire array so that it's visible to subsequent power - * management operations. The 'psci_aff_map' array is allocated in - * coherent memory so does not need flushing. The 'psci_aff_limits' - * array is allocated in normal memory. It will be accessed when the mmu - * is off e.g. after reset. Hence it needs to be flushed. + * management operations. The 'psci_aff_limits' array is allocated in + * normal memory. It will be accessed when the mmu is off e.g. after + * reset. Hence it needs to be flushed. */ for (afflvl = MPIDR_AFFLVL0; afflvl < max_afflvl; afflvl++) { psci_aff_limits[afflvl].min =