@@ -1095,6 +1095,18 @@ config ARM64_RAS_EXTN
and access the new registers if the system supports the extension.
Platform RAS features may additionally depend on firmware support.
+config ARM64_SKIP_CACHE_POU
+ bool "Enable support to skip cache POU operations"
+ default y
+ help
+ Explicit point of unification cache operations can be eliminated
+ in software if the hardware handles transparently. The new bits in
+ CTR_EL0, CTR_EL0.DIC and CTR_EL0.IDC indicates the hardware
+ capabilities of ICache and DCache POU requirements.
+
+ Selecting this feature will allow the kernel to optimize the POU
+ cache maintaince operations where it requires 'D{I}C C{I}VAU'
+
endmenu
config ARM64_SVE
@@ -444,6 +444,11 @@
* Corrupts: tmp1, tmp2
*/
.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+#ifdef CONFIG_ARM64_SKIP_CACHE_POU
+alternative_if ARM64_HAS_CACHE_DIC
+ b 9996f
+alternative_else_nop_endif
+#endif
icache_line_size \tmp1, \tmp2
sub \tmp2, \tmp1, #1
bic \tmp2, \start, \tmp2
@@ -453,6 +458,7 @@
cmp \tmp2, \end
b.lo 9997b
dsb ish
+9996:
isb
.endm
@@ -20,8 +20,13 @@
#define CTR_L1IP_SHIFT 14
#define CTR_L1IP_MASK 3
+#define CTR_DMLINE_SHIFT 16
+#define CTR_ERG_SHIFT 20
#define CTR_CWG_SHIFT 24
#define CTR_CWG_MASK 15
+#define CTR_IDC_SHIFT 28
+#define CTR_DIC_SHIFT 29
+#define CTR_B31_SHIFT 31
#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
@@ -45,7 +45,9 @@
#define ARM64_HARDEN_BRANCH_PREDICTOR 24
#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25
#define ARM64_HAS_RAS_EXTN 26
+#define ARM64_HAS_CACHE_IDC 27
+#define ARM64_HAS_CACHE_DIC 28
-#define ARM64_NCAPS 27
+#define ARM64_NCAPS 29
#endif /* __ASM_CPUCAPS_H */
@@ -199,12 +199,12 @@ static int __init register_cpu_hwcaps_dumper(void)
};
static const struct arm64_ftr_bits ftr_ctr[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1), /* DIC */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1), /* IDC */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0), /* ERG */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, CTR_B31_SHIFT, 1, 1), /* RES1 */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 1), /* DIC */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 1), /* IDC */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, CTR_CWG_SHIFT, 4, 0), /* CWG */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, CTR_ERG_SHIFT, 4, 0), /* ERG */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMLINE_SHIFT, 4, 1), /* DminLine */
/*
* Linux can handle differing I-cache policies. Userspace JITs will
* make use of *minLine.
@@ -864,6 +864,20 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
ID_AA64PFR0_FP_SHIFT) < 0;
}
+#ifdef CONFIG_ARM64_SKIP_CACHE_POU
+static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
+ int __unused)
+{
+ return (read_sanitised_ftr_reg(SYS_CTR_EL0) & (1UL << CTR_IDC_SHIFT));
+}
+
+static bool has_cache_dic(const struct arm64_cpu_capabilities *entry,
+ int __unused)
+{
+ return (read_sanitised_ftr_reg(SYS_CTR_EL0) & (1UL << CTR_DIC_SHIFT));
+}
+#endif
+
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
@@ -1100,6 +1114,20 @@ static int cpu_copy_el2regs(void *__unused)
.enable = cpu_clear_disr,
},
#endif /* CONFIG_ARM64_RAS_EXTN */
+#ifdef CONFIG_ARM64_SKIP_CACHE_POU
+ {
+ .desc = "Skip D-Cache maintenance 'DC CVAU' (CTR_EL0.IDC=1)",
+ .capability = ARM64_HAS_CACHE_IDC,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = has_cache_idc,
+ },
+ {
+ .desc = "Skip I-Cache maintenance 'IC IVAU' (CTR_EL0.DIC=1)",
+ .capability = ARM64_HAS_CACHE_DIC,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = has_cache_dic,
+ },
+#endif /* CONFIG_ARM64_SKIP_CACHE_POU */
{},
};
@@ -50,6 +50,12 @@ ENTRY(flush_icache_range)
*/
ENTRY(__flush_cache_user_range)
uaccess_ttbr0_enable x2, x3, x4
+#ifdef CONFIG_ARM64_SKIP_CACHE_POU
+alternative_if ARM64_HAS_CACHE_IDC
+ dsb ishst
+ b 8f
+alternative_else_nop_endif
+#endif
dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x0, x3
@@ -60,6 +66,7 @@ user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
b.lo 1b
dsb ish
+8:
invalidate_icache_by_line x0, x1, x2, x3, 9f
mov x0, #0
1:
@@ -116,6 +123,12 @@ ENDPIPROC(__flush_dcache_area)
* - size - size in question
*/
ENTRY(__clean_dcache_area_pou)
+#ifdef CONFIG_ARM64_SKIP_CACHE_POU
+alternative_if ARM64_HAS_CACHE_IDC
+ dsb ishst
+ ret
+alternative_else_nop_endif
+#endif
dcache_by_line_op cvau, ish, x0, x1, x2, x3
ret
ENDPROC(__clean_dcache_area_pou)