diff --git a/arch/arm/cpu/cache-armv7.S b/arch/arm/cpu/cache-armv7.S index 7a1c5c0..6a8aff8 100644 --- a/arch/arm/cpu/cache-armv7.S +++ b/arch/arm/cpu/cache-armv7.S @@ -83,7 +83,10 @@ ands r3, r0, #0x7000000 @ extract loc from clidr mov r3, r3, lsr #23 @ left align loc bit field beq finished @ if loc is 0, then no need to clean - mov r12, #0 @ start clean at cache level 0 + cmp r8, #0 +THUMB( ite eq ) + moveq r12, #0 + subne r12, r3, #2 @ start invalidate at outmost cache level loop1: add r2, r12, r12, lsr #1 @ work out 3x current cache level mov r1, r0, lsr r2 @ extract cache type bits from clidr @@ -118,8 +121,16 @@ subs r7, r7, #1 @ decrement the index bge loop2 skip: + cmp r8, #0 + bne inval_check add r12, r12, #2 @ increment cache number cmp r3, r12 + b loop_end_check +inval_check: + cmp r12, #0 + sub r12, r12, #2 @ decrement cache number +loop_end_check: + dsb @ work-around Cortex-A7 erratum 814220 bgt loop1 finished: ldmfd sp!, {r4-r11} diff --git a/arch/arm/cpu/lowlevel_64.S b/arch/arm/cpu/lowlevel_64.S index af1cd8b..6a23132 100644 --- a/arch/arm/cpu/lowlevel_64.S +++ b/arch/arm/cpu/lowlevel_64.S @@ -12,6 +12,13 @@ orr x0, x0, #(1 << 10) /* 64-bit EL2 */ msr scr_el3, x0 msr cptr_el3, xzr + + mrs x0, sctlr_el3 + ldr x1, =SCTLR_ELx_FLAGS + bic x0, x0, x1 + msr sctlr_el3, x0 + isb + b done 2: diff --git a/arch/arm/cpu/mmu-early.c b/arch/arm/cpu/mmu-early.c index d39a03e..2f5876f 100644 --- a/arch/arm/cpu/mmu-early.c +++ b/arch/arm/cpu/mmu-early.c @@ -5,17 +5,20 @@ #include #include #include +#include #include "mmu.h" static uint32_t *ttb; -static void map_cachable(unsigned long start, unsigned long size) +static inline void map_region(unsigned long start, unsigned long size, + uint64_t flags) + { start = ALIGN_DOWN(start, SZ_1M); size = ALIGN(size, SZ_1M); - create_sections(ttb, start, start + size - 1, PMD_SECT_DEF_CACHED); + create_sections(ttb, start, start + size - 1, flags); } void mmu_early_enable(unsigned long membase, unsigned long memsize, @@ -28,9 +31,27 @@ set_ttbr(ttb); set_domain(DOMAIN_MANAGER); + /* + * This marks the whole address space as uncachable as well as + * unexecutable if possible + */ create_flat_mapping(ttb); - map_cachable(membase, memsize); + /* + * There can be SoCs that have a section shared between device memory + * and the on-chip RAM hosting the PBL. Thus mark this section + * uncachable, but executable. + * On such SoCs, executing from OCRAM could cause the instruction + * prefetcher to speculatively access that device memory, triggering + * potential errant behavior. + * + * If your SoC has such a memory layout, you should rewrite the code + * here to map the OCRAM page-wise. + */ + map_region((unsigned long)_stext, _etext - _stext, PMD_SECT_DEF_UNCACHED); + + /* maps main memory as cachable */ + map_region(membase, memsize, PMD_SECT_DEF_CACHED); __mmu_cache_on(); } diff --git a/arch/arm/cpu/mmu.c b/arch/arm/cpu/mmu.c index 29816ad..123e19e 100644 --- a/arch/arm/cpu/mmu.c +++ b/arch/arm/cpu/mmu.c @@ -34,7 +34,6 @@ #include "mmu.h" -#define PMD_SECT_DEF_CACHED (PMD_SECT_WB | PMD_SECT_DEF_UNCACHED) #define PTRS_PER_PTE (PGDIR_SIZE / PAGE_SIZE) #define ARCH_MAP_WRITECOMBINE ((unsigned)-1) @@ -58,11 +57,13 @@ } #define PTE_FLAGS_CACHED_V7 (PTE_EXT_TEX(1) | PTE_BUFFERABLE | PTE_CACHEABLE) -#define PTE_FLAGS_WC_V7 PTE_EXT_TEX(1) -#define PTE_FLAGS_UNCACHED_V7 (0) +#define PTE_FLAGS_WC_V7 (PTE_EXT_TEX(1) | PTE_EXT_XN) +#define PTE_FLAGS_UNCACHED_V7 PTE_EXT_XN #define PTE_FLAGS_CACHED_V4 (PTE_SMALL_AP_UNO_SRW | PTE_BUFFERABLE | PTE_CACHEABLE) #define PTE_FLAGS_UNCACHED_V4 PTE_SMALL_AP_UNO_SRW -#define PGD_FLAGS_WC_V7 (PMD_SECT_TEX(1) | PMD_TYPE_SECT | PMD_SECT_BUFFERABLE) +#define PGD_FLAGS_WC_V7 (PMD_SECT_TEX(1) | PMD_TYPE_SECT | PMD_SECT_BUFFERABLE | \ + PMD_SECT_XN) +#define PGD_FLAGS_UNCACHED_V7 (PMD_SECT_DEF_UNCACHED | PMD_SECT_XN) /* * PTE flags to set cached and uncached areas. @@ -72,6 +73,7 @@ static uint32_t pte_flags_wc; static uint32_t pte_flags_uncached; static uint32_t pgd_flags_wc; +static uint32_t pgd_flags_uncached; #define PTE_MASK ((1 << 12) - 1) @@ -164,7 +166,7 @@ break; case MAP_UNCACHED: pte_flags = pte_flags_uncached; - pgd_flags = PMD_SECT_DEF_UNCACHED; + pgd_flags = pgd_flags_uncached; break; case ARCH_MAP_WRITECOMBINE: pte_flags = pte_flags_wc; @@ -247,7 +249,7 @@ unsigned long start = (unsigned long)_start, sec; for (sec = start; sec < start + size; sec += PGDIR_SIZE, phys += PGDIR_SIZE) - ttb[pgd_index(sec)] = phys | PMD_SECT_DEF_UNCACHED; + ttb[pgd_index(sec)] = phys | pgd_flags_uncached; dma_flush_range(ttb, 0x4000); tlb_invalidate(); @@ -411,11 +413,13 @@ pte_flags_cached = PTE_FLAGS_CACHED_V7; pte_flags_wc = PTE_FLAGS_WC_V7; pgd_flags_wc = PGD_FLAGS_WC_V7; + pgd_flags_uncached = PGD_FLAGS_UNCACHED_V7; pte_flags_uncached = PTE_FLAGS_UNCACHED_V7; } else { pte_flags_cached = PTE_FLAGS_CACHED_V4; pte_flags_wc = PTE_FLAGS_UNCACHED_V4; pgd_flags_wc = PMD_SECT_DEF_UNCACHED; + pgd_flags_uncached = PMD_SECT_DEF_UNCACHED; pte_flags_uncached = PTE_FLAGS_UNCACHED_V4; } diff --git a/arch/arm/cpu/mmu.h b/arch/arm/cpu/mmu.h index 338728a..c911ee2 100644 --- a/arch/arm/cpu/mmu.h +++ b/arch/arm/cpu/mmu.h @@ -3,6 +3,7 @@ #include #include +#include #include "mmu-common.h" @@ -62,8 +63,13 @@ static inline void create_flat_mapping(uint32_t *ttb) { + unsigned int flags = PMD_SECT_DEF_UNCACHED; + + if (cpu_architecture() >= CPU_ARCH_ARMv7) + flags |= PMD_SECT_XN; + /* create a flat mapping using 1MiB sections */ - create_sections(ttb, 0, 0xffffffff, PMD_SECT_DEF_UNCACHED); + create_sections(ttb, 0, 0xffffffff, flags); } #endif /* __ARM_MMU_H */ diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index ef9cb98..a0180f2 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -1,6 +1,26 @@ #ifndef __ASM_ARM_SYSTEM_H #define __ASM_ARM_SYSTEM_H +#include + +/* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_DSSBS (_BITUL(44)) +#define SCTLR_ELx_ENIA (_BITUL(31)) +#define SCTLR_ELx_ENIB (_BITUL(30)) +#define SCTLR_ELx_ENDA (_BITUL(27)) +#define SCTLR_ELx_EE (_BITUL(25)) +#define SCTLR_ELx_IESB (_BITUL(21)) +#define SCTLR_ELx_WXN (_BITUL(19)) +#define SCTLR_ELx_ENDB (_BITUL(13)) +#define SCTLR_ELx_I (_BITUL(12)) +#define SCTLR_ELx_SA (_BITUL(3)) +#define SCTLR_ELx_C (_BITUL(2)) +#define SCTLR_ELx_A (_BITUL(1)) +#define SCTLR_ELx_M (_BITUL(0)) + +#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ + SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB) + #if __LINUX_ARM_ARCH__ >= 7 #define isb() __asm__ __volatile__ ("isb" : : : "memory") #ifdef CONFIG_CPU_64v8