Message ID | 20241022092734.59984-4-yangyicong@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support Armv8.9/v9.4 FEAT_HAFT | expand |
On Tue, Oct 22, 2024 at 05:27:32PM +0800, Yicong Yang wrote: > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 3e29b44d2d7b..029d7ad89de5 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -2176,6 +2176,21 @@ config ARCH_PKEY_BITS > int > default 3 > > +config ARM64_HAFT > + bool "Support for Hardware managed Access Flag for Table Descriptor" Super nit: s/Descriptor/Descriptors/ > + depends on ARM64_HW_AFDBM > + default y > + help > + The ARMv8.9/ARMv9.5 introduces the feature Hardware managed Access > + Flag for Table descriptors. When enabled an architectural executed > + memory access will update the Access Flag in each Table descriptor > + which is accessed during the translation table walk and for which > + the Access Flag is 0. The Access Flag of the Table descriptor use > + the same bit of PTE_AF. > + > + The feature will only be enabled if all the CPUs in the system > + support this feature. If unsure, say Y. > + > endmenu # "ARMv8.9 architectural features" > > config ARM64_SVE > diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h > index 3d261cc123c1..fba2347c0aa6 100644 > --- a/arch/arm64/include/asm/cpufeature.h > +++ b/arch/arm64/include/asm/cpufeature.h > @@ -879,6 +879,30 @@ static inline bool cpu_has_hw_af(void) > ID_AA64MMFR1_EL1_HAFDBS_SHIFT); > } > > +/* > + * Contrary to the page/block access flag, the table access flag > + * cannot be emulated in software (no access fault will occur). > + * So users should use this feature only if it's supported system > + * wide. > + */ > +static inline bool system_support_haft(void) > +{ > + unsigned int hafdbs; > + u64 mmfr1; > + > + if (!IS_ENABLED(CONFIG_ARM64_HAFT)) > + return false; > + > + /* > + * Check the sanitised registers to see this feature is supported > + * on all the CPUs. > + */ > + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); > + hafdbs = cpuid_feature_extract_unsigned_field(mmfr1, > + ID_AA64MMFR1_EL1_HAFDBS_SHIFT); > + return hafdbs >= ID_AA64MMFR1_EL1_HAFDBS_HAFT; > +} Can we not have just an entry in the arm64_features array with the type ARM64_CPUCAP_SYSTEM_FEATURE and avoid the explicit checks here? > diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h > index 8ff5f2a2579e..bc1051d65125 100644 > --- a/arch/arm64/include/asm/pgalloc.h > +++ b/arch/arm64/include/asm/pgalloc.h > @@ -30,7 +30,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) > { > pudval_t pudval = PUD_TYPE_TABLE; > > - pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; > + pudval |= (mm == &init_mm) ? PUD_TABLE_AF | PUD_TABLE_UXN : PUD_TABLE_PXN; > __pud_populate(pudp, __pa(pmdp), pudval); > } Why not set the table AF for the task entries? I haven't checked the core code but normally when we map a pte it's mapped as young. While for table AF we wouldn't get a fault, I would have thought the core code follows the same logic. > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index 718728a85430..6eeaaa80f6fe 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -2046,6 +2046,18 @@ static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap, > > #endif > > +#if CONFIG_ARM64_HAFT > + > +static struct cpumask haft_cpus; > + > +static void cpu_enable_haft(struct arm64_cpu_capabilities const *cap) > +{ > + if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) > + cpumask_set_cpu(smp_processor_id(), &haft_cpus); > +} > + > +#endif /* CONFIG_ARM64_HAFT */ > + > #ifdef CONFIG_ARM64_AMU_EXTN > > /* > @@ -2590,6 +2602,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { > .cpus = &dbm_cpus, > ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM) > }, > +#endif > +#ifdef CONFIG_ARM64_HAFT > + { > + .desc = "Hardware managed Access Flag for Table Descriptor", > + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, I'd actually use ARM64_CPUCAP_SYSTEM_FEATURE here. We use something similar for HW DBM but there we get a fault and set the pte dirty. You combined it with a system_support_haft() that checks the sanitised regs but I'd rather have a static branch check via cpus_have_cap(). Even with your approach we can have a race with a late CPU hot-plugged that doesn't have the feature in the middle of some core code walking the page tables. With a system feature type, late CPUs not having the feature won't be brought online (if feature enabled) but in general I don't have much sympathy for SoC vendors combining CPUs with incompatible features ;). > + .capability = ARM64_HAFT, > + .matches = has_cpuid_feature, > + .cpu_enable = cpu_enable_haft, > + .cpus = &haft_cpus, > + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, HAFT) > + }, [...] > diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S > index ccbae4525891..4a58b9b36eb6 100644 > --- a/arch/arm64/mm/proc.S > +++ b/arch/arm64/mm/proc.S > @@ -495,9 +495,15 @@ alternative_else_nop_endif > * via capabilities. > */ > mrs x9, ID_AA64MMFR1_EL1 > - and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK > + ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4 > cbz x9, 1f > orr tcr, tcr, #TCR_HA // hardware Access flag update > + > +#ifdef CONFIG_ARM64_HAFT > + cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT > + b.lt 1f > + orr tcr2, tcr2, TCR2_EL1x_HAFT > +#endif /* CONFIG_ARM64_HAFT */ I think we can skip the ID check here and always set the HAFT bit. We do something similar with MTE (not for TCR_HA though, don't remember why).
On 2024/10/23 2:30, Catalin Marinas wrote: > On Tue, Oct 22, 2024 at 05:27:32PM +0800, Yicong Yang wrote: >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index 3e29b44d2d7b..029d7ad89de5 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -2176,6 +2176,21 @@ config ARCH_PKEY_BITS >> int >> default 3 >> >> +config ARM64_HAFT >> + bool "Support for Hardware managed Access Flag for Table Descriptor" > > Super nit: s/Descriptor/Descriptors/ > sure. >> + depends on ARM64_HW_AFDBM >> + default y >> + help >> + The ARMv8.9/ARMv9.5 introduces the feature Hardware managed Access >> + Flag for Table descriptors. When enabled an architectural executed >> + memory access will update the Access Flag in each Table descriptor >> + which is accessed during the translation table walk and for which >> + the Access Flag is 0. The Access Flag of the Table descriptor use >> + the same bit of PTE_AF. >> + >> + The feature will only be enabled if all the CPUs in the system >> + support this feature. If unsure, say Y. >> + >> endmenu # "ARMv8.9 architectural features" >> >> config ARM64_SVE >> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h >> index 3d261cc123c1..fba2347c0aa6 100644 >> --- a/arch/arm64/include/asm/cpufeature.h >> +++ b/arch/arm64/include/asm/cpufeature.h >> @@ -879,6 +879,30 @@ static inline bool cpu_has_hw_af(void) >> ID_AA64MMFR1_EL1_HAFDBS_SHIFT); >> } >> >> +/* >> + * Contrary to the page/block access flag, the table access flag >> + * cannot be emulated in software (no access fault will occur). >> + * So users should use this feature only if it's supported system >> + * wide. >> + */ >> +static inline bool system_support_haft(void) >> +{ >> + unsigned int hafdbs; >> + u64 mmfr1; >> + >> + if (!IS_ENABLED(CONFIG_ARM64_HAFT)) >> + return false; >> + >> + /* >> + * Check the sanitised registers to see this feature is supported >> + * on all the CPUs. >> + */ >> + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); >> + hafdbs = cpuid_feature_extract_unsigned_field(mmfr1, >> + ID_AA64MMFR1_EL1_HAFDBS_SHIFT); >> + return hafdbs >= ID_AA64MMFR1_EL1_HAFDBS_HAFT; >> +} > > Can we not have just an entry in the arm64_features array with the type > ARM64_CPUCAP_SYSTEM_FEATURE and avoid the explicit checks here? > reply below... >> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h >> index 8ff5f2a2579e..bc1051d65125 100644 >> --- a/arch/arm64/include/asm/pgalloc.h >> +++ b/arch/arm64/include/asm/pgalloc.h >> @@ -30,7 +30,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) >> { >> pudval_t pudval = PUD_TYPE_TABLE; >> >> - pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; >> + pudval |= (mm == &init_mm) ? PUD_TABLE_AF | PUD_TABLE_UXN : PUD_TABLE_PXN; >> __pud_populate(pudp, __pa(pmdp), pudval); >> } > > Why not set the table AF for the task entries? I haven't checked the > core code but normally when we map a pte it's mapped as young. While for > table AF we wouldn't get a fault, I would have thought the core code > follows the same logic. > I may need to check. If I understand it correctly, for most case (e.g. a read fault) we should make pte young if the hardware AF update is not supported. Otherwsie hardware will help to update. >> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c >> index 718728a85430..6eeaaa80f6fe 100644 >> --- a/arch/arm64/kernel/cpufeature.c >> +++ b/arch/arm64/kernel/cpufeature.c >> @@ -2046,6 +2046,18 @@ static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap, >> >> #endif >> >> +#if CONFIG_ARM64_HAFT >> + >> +static struct cpumask haft_cpus; >> + >> +static void cpu_enable_haft(struct arm64_cpu_capabilities const *cap) >> +{ >> + if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) >> + cpumask_set_cpu(smp_processor_id(), &haft_cpus); >> +} >> + >> +#endif /* CONFIG_ARM64_HAFT */ >> + >> #ifdef CONFIG_ARM64_AMU_EXTN >> >> /* >> @@ -2590,6 +2602,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { >> .cpus = &dbm_cpus, >> ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM) >> }, >> +#endif >> +#ifdef CONFIG_ARM64_HAFT >> + { >> + .desc = "Hardware managed Access Flag for Table Descriptor", >> + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, > > I'd actually use ARM64_CPUCAP_SYSTEM_FEATURE here. We use something > similar for HW DBM but there we get a fault and set the pte dirty. You > combined it with a system_support_haft() that checks the sanitised regs > but I'd rather have a static branch check via cpus_have_cap(). Even with > your approach we can have a race with a late CPU hot-plugged that > doesn't have the feature in the middle of some core code walking the > page tables. > > With a system feature type, late CPUs not having the feature won't be > brought online (if feature enabled) but in general I don't have much > sympathy for SoC vendors combining CPUs with incompatible features ;). > ok. If we make it a system feature, we can using cpus_have_cap() then and drop the system_support_haft() which is checking with the sanitised registers. It's fine for me. Will ask to not refuse online a CPU due to mismatch of this feature in [1], hope we have an agreement :) [1] https://lore.kernel.org/linux-arm-kernel/20240820161822.GC28750@willie-the-truck/ >> + .capability = ARM64_HAFT, >> + .matches = has_cpuid_feature, >> + .cpu_enable = cpu_enable_haft, >> + .cpus = &haft_cpus, >> + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, HAFT) >> + }, > [...] >> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S >> index ccbae4525891..4a58b9b36eb6 100644 >> --- a/arch/arm64/mm/proc.S >> +++ b/arch/arm64/mm/proc.S >> @@ -495,9 +495,15 @@ alternative_else_nop_endif >> * via capabilities. >> */ >> mrs x9, ID_AA64MMFR1_EL1 >> - and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK >> + ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4 >> cbz x9, 1f >> orr tcr, tcr, #TCR_HA // hardware Access flag update >> + >> +#ifdef CONFIG_ARM64_HAFT >> + cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT >> + b.lt 1f >> + orr tcr2, tcr2, TCR2_EL1x_HAFT >> +#endif /* CONFIG_ARM64_HAFT */ > > I think we can skip the ID check here and always set the HAFT bit. We do > something similar with MTE (not for TCR_HA though, don't remember why). > Thanks for the reference to MTE. Will see and have a test. But a check here may seem more reasonable as we usually detect a feature first then enable it? Thanks.
On Wed, Oct 23, 2024 at 06:30:18PM +0800, Yicong Yang wrote: > On 2024/10/23 2:30, Catalin Marinas wrote: > > On Tue, Oct 22, 2024 at 05:27:32PM +0800, Yicong Yang wrote: > >> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h > >> index 8ff5f2a2579e..bc1051d65125 100644 > >> --- a/arch/arm64/include/asm/pgalloc.h > >> +++ b/arch/arm64/include/asm/pgalloc.h > >> @@ -30,7 +30,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) > >> { > >> pudval_t pudval = PUD_TYPE_TABLE; > >> > >> - pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; > >> + pudval |= (mm == &init_mm) ? PUD_TABLE_AF | PUD_TABLE_UXN : PUD_TABLE_PXN; > >> __pud_populate(pudp, __pa(pmdp), pudval); > >> } > > > > Why not set the table AF for the task entries? I haven't checked the > > core code but normally when we map a pte it's mapped as young. While for > > table AF we wouldn't get a fault, I would have thought the core code > > follows the same logic. > > I may need to check. If I understand it correctly, for most case (e.g. > a read fault) we should make pte young if the hardware AF update is > not supported. Otherwsie hardware will help to update. On arm64 at least, _PROT_DEFAULT has PTE_AF set. So all accessible entries in protection_map[] will have it set. I'm not sure how the core code clears PTE_AF in the table entries. I'd have thought it goes together with some pte_mkold(). > >> +#ifdef CONFIG_ARM64_HAFT > >> + { > >> + .desc = "Hardware managed Access Flag for Table Descriptor", > >> + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, > > > > I'd actually use ARM64_CPUCAP_SYSTEM_FEATURE here. We use something > > similar for HW DBM but there we get a fault and set the pte dirty. You > > combined it with a system_support_haft() that checks the sanitised regs > > but I'd rather have a static branch check via cpus_have_cap(). Even with > > your approach we can have a race with a late CPU hot-plugged that > > doesn't have the feature in the middle of some core code walking the > > page tables. > > > > With a system feature type, late CPUs not having the feature won't be > > brought online (if feature enabled) but in general I don't have much > > sympathy for SoC vendors combining CPUs with incompatible features ;). > > ok. If we make it a system feature, we can using cpus_have_cap() then and > drop the system_support_haft() which is checking with the sanitised registers. > It's fine for me. > > Will ask to not refuse online a CPU due to mismatch of this feature in [1], > hope we have an agreement :) > > [1] https://lore.kernel.org/linux-arm-kernel/20240820161822.GC28750@willie-the-truck/ I initially thought this would work but I don't feel easy about having should_clear_pmd_young() change its polarity at runtime while user space is running. If that's not a problem, we can go with your current approach. > >> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S > >> index ccbae4525891..4a58b9b36eb6 100644 > >> --- a/arch/arm64/mm/proc.S > >> +++ b/arch/arm64/mm/proc.S > >> @@ -495,9 +495,15 @@ alternative_else_nop_endif > >> * via capabilities. > >> */ > >> mrs x9, ID_AA64MMFR1_EL1 > >> - and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK > >> + ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4 > >> cbz x9, 1f > >> orr tcr, tcr, #TCR_HA // hardware Access flag update > >> + > >> +#ifdef CONFIG_ARM64_HAFT > >> + cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT > >> + b.lt 1f > >> + orr tcr2, tcr2, TCR2_EL1x_HAFT > >> +#endif /* CONFIG_ARM64_HAFT */ > > > > I think we can skip the ID check here and always set the HAFT bit. We do > > something similar with MTE (not for TCR_HA though, don't remember why). > > Thanks for the reference to MTE. Will see and have a test. But a check > here may seem more reasonable as we usually detect a feature first > then enable it? The behaviour of these RES0 bits is that we can write them and if the feature is present, it will be enabled, otherwise it won't have any effect, so it's not necessary to check the ID bits, the result would be the same. We do this in other places as well. Of course, we need to check the presence of TCR2_EL1, otherwise it would undef. Just a bit less code since we want the feature on anyway.
On 2024/10/23 20:36, Catalin Marinas wrote: > On Wed, Oct 23, 2024 at 06:30:18PM +0800, Yicong Yang wrote: >> On 2024/10/23 2:30, Catalin Marinas wrote: >>> On Tue, Oct 22, 2024 at 05:27:32PM +0800, Yicong Yang wrote: >>>> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h >>>> index 8ff5f2a2579e..bc1051d65125 100644 >>>> --- a/arch/arm64/include/asm/pgalloc.h >>>> +++ b/arch/arm64/include/asm/pgalloc.h >>>> @@ -30,7 +30,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) >>>> { >>>> pudval_t pudval = PUD_TYPE_TABLE; >>>> >>>> - pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; >>>> + pudval |= (mm == &init_mm) ? PUD_TABLE_AF | PUD_TABLE_UXN : PUD_TABLE_PXN; >>>> __pud_populate(pudp, __pa(pmdp), pudval); >>>> } >>> >>> Why not set the table AF for the task entries? I haven't checked the >>> core code but normally when we map a pte it's mapped as young. While for >>> table AF we wouldn't get a fault, I would have thought the core code >>> follows the same logic. >> >> I may need to check. If I understand it correctly, for most case (e.g. >> a read fault) we should make pte young if the hardware AF update is >> not supported. Otherwsie hardware will help to update. > > On arm64 at least, _PROT_DEFAULT has PTE_AF set. So all accessible > entries in protection_map[] will have it set. I'm not sure how the core > code clears PTE_AF in the table entries. I'd have thought it goes > together with some pte_mkold(). > you're right. Checked that x86 will set the AF bit for the table entry [1][2]. Will set AF for task entries. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/include/asm/pgalloc.h#n84 [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/include/asm/pgtable_types.h#n221 >>>> +#ifdef CONFIG_ARM64_HAFT >>>> + { >>>> + .desc = "Hardware managed Access Flag for Table Descriptor", >>>> + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, >>> >>> I'd actually use ARM64_CPUCAP_SYSTEM_FEATURE here. We use something >>> similar for HW DBM but there we get a fault and set the pte dirty. You >>> combined it with a system_support_haft() that checks the sanitised regs >>> but I'd rather have a static branch check via cpus_have_cap(). Even with >>> your approach we can have a race with a late CPU hot-plugged that >>> doesn't have the feature in the middle of some core code walking the >>> page tables. >>> >>> With a system feature type, late CPUs not having the feature won't be >>> brought online (if feature enabled) but in general I don't have much >>> sympathy for SoC vendors combining CPUs with incompatible features ;). >> >> ok. If we make it a system feature, we can using cpus_have_cap() then and >> drop the system_support_haft() which is checking with the sanitised registers. >> It's fine for me. >> >> Will ask to not refuse online a CPU due to mismatch of this feature in [1], >> hope we have an agreement :) >> >> [1] https://lore.kernel.org/linux-arm-kernel/20240820161822.GC28750@willie-the-truck/ > > I initially thought this would work but I don't feel easy about having > should_clear_pmd_young() change its polarity at runtime while user space > is running. If that's not a problem, we can go with your current > approach. this should be ok as I image. after online a CPU without HAFT the system won't advertise HAFT support but we don't disable the HAFT update on the supported CPUs, the ongoing page aging process can still use the updated table AF information and later process will fallback to use the PTE's AF bit. efficiency maybe reduced but the function should be correct. the user setting will be changed after online a CPU without HAFT. the user will know this unless read /sys/kernel/mm/lru_gen/enabled again (LRU_GEN_NONLEAF_YOUNG bit 0x4 will be cleared after online a CPU without HAFT since system would not declare non-leaf PMD young support). >>>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S >>>> index ccbae4525891..4a58b9b36eb6 100644 >>>> --- a/arch/arm64/mm/proc.S >>>> +++ b/arch/arm64/mm/proc.S >>>> @@ -495,9 +495,15 @@ alternative_else_nop_endif >>>> * via capabilities. >>>> */ >>>> mrs x9, ID_AA64MMFR1_EL1 >>>> - and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK >>>> + ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4 >>>> cbz x9, 1f >>>> orr tcr, tcr, #TCR_HA // hardware Access flag update >>>> + >>>> +#ifdef CONFIG_ARM64_HAFT >>>> + cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT >>>> + b.lt 1f >>>> + orr tcr2, tcr2, TCR2_EL1x_HAFT >>>> +#endif /* CONFIG_ARM64_HAFT */ >>> >>> I think we can skip the ID check here and always set the HAFT bit. We do >>> something similar with MTE (not for TCR_HA though, don't remember why). >> >> Thanks for the reference to MTE. Will see and have a test. But a check >> here may seem more reasonable as we usually detect a feature first >> then enable it? > > The behaviour of these RES0 bits is that we can write them and if the > feature is present, it will be enabled, otherwise it won't have any > effect, so it's not necessary to check the ID bits, the result would be > the same. We do this in other places as well. > > Of course, we need to check the presence of TCR2_EL1, otherwise it would > undef. Just a bit less code since we want the feature on anyway. sure. will drop the check and set the HAFT bit unconditionally. Thanks.
On 2024/10/24 22:45, Yicong Yang wrote: > On 2024/10/23 20:36, Catalin Marinas wrote: >> On Wed, Oct 23, 2024 at 06:30:18PM +0800, Yicong Yang wrote: >>> On 2024/10/23 2:30, Catalin Marinas wrote: >>>> On Tue, Oct 22, 2024 at 05:27:32PM +0800, Yicong Yang wrote: >>>>> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h >>>>> index 8ff5f2a2579e..bc1051d65125 100644 >>>>> --- a/arch/arm64/include/asm/pgalloc.h >>>>> +++ b/arch/arm64/include/asm/pgalloc.h >>>>> @@ -30,7 +30,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) >>>>> { >>>>> pudval_t pudval = PUD_TYPE_TABLE; >>>>> >>>>> - pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; >>>>> + pudval |= (mm == &init_mm) ? PUD_TABLE_AF | PUD_TABLE_UXN : PUD_TABLE_PXN; >>>>> __pud_populate(pudp, __pa(pmdp), pudval); >>>>> } >>>> >>>> Why not set the table AF for the task entries? I haven't checked the >>>> core code but normally when we map a pte it's mapped as young. While for >>>> table AF we wouldn't get a fault, I would have thought the core code >>>> follows the same logic. >>> >>> I may need to check. If I understand it correctly, for most case (e.g. >>> a read fault) we should make pte young if the hardware AF update is >>> not supported. Otherwsie hardware will help to update. >> >> On arm64 at least, _PROT_DEFAULT has PTE_AF set. So all accessible >> entries in protection_map[] will have it set. I'm not sure how the core >> code clears PTE_AF in the table entries. I'd have thought it goes >> together with some pte_mkold(). >> > > you're right. Checked that x86 will set the AF bit for the table entry [1][2]. > Will set AF for task entries. > > [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/include/asm/pgalloc.h#n84 > [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/include/asm/pgtable_types.h#n221 > >>>>> +#ifdef CONFIG_ARM64_HAFT >>>>> + { >>>>> + .desc = "Hardware managed Access Flag for Table Descriptor", >>>>> + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, >>>> >>>> I'd actually use ARM64_CPUCAP_SYSTEM_FEATURE here. We use something >>>> similar for HW DBM but there we get a fault and set the pte dirty. You >>>> combined it with a system_support_haft() that checks the sanitised regs >>>> but I'd rather have a static branch check via cpus_have_cap(). Even with >>>> your approach we can have a race with a late CPU hot-plugged that >>>> doesn't have the feature in the middle of some core code walking the >>>> page tables. >>>> >>>> With a system feature type, late CPUs not having the feature won't be >>>> brought online (if feature enabled) but in general I don't have much >>>> sympathy for SoC vendors combining CPUs with incompatible features ;). >>> >>> ok. If we make it a system feature, we can using cpus_have_cap() then and >>> drop the system_support_haft() which is checking with the sanitised registers. >>> It's fine for me. >>> >>> Will ask to not refuse online a CPU due to mismatch of this feature in [1], >>> hope we have an agreement :) >>> >>> [1] https://lore.kernel.org/linux-arm-kernel/20240820161822.GC28750@willie-the-truck/ >> >> I initially thought this would work but I don't feel easy about having >> should_clear_pmd_young() change its polarity at runtime while user space >> is running. If that's not a problem, we can go with your current >> approach. > > this should be ok as I image. after online a CPU without HAFT the system won't > advertise HAFT support but we don't disable the HAFT update on the supported > CPUs, the ongoing page aging process can still use the updated table AF information > and later process will fallback to use the PTE's AF bit. efficiency maybe reduced > but the function should be correct. > But we may have a chance to hit the VM_WARN_ON() in pmdp_test_and_clear_young() introduced in Patch 5/5: CPUx CPUy (without HAFT) if (should_clear_pmd_yound()) online and make system_support_haft() == false; pmdp_test_and_clear_young() VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_support_haft()); we may need to drop the VM_WARN_ON() with current approach or make this feature ARM64_CPUCAP_SYSTEM_FEATURE. > the user setting will be changed after online a CPU without HAFT. the user will > know this unless read /sys/kernel/mm/lru_gen/enabled again (LRU_GEN_NONLEAF_YOUNG > bit 0x4 will be cleared after online a CPU without HAFT since system would > not declare non-leaf PMD young support). > >>>>> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S >>>>> index ccbae4525891..4a58b9b36eb6 100644 >>>>> --- a/arch/arm64/mm/proc.S >>>>> +++ b/arch/arm64/mm/proc.S >>>>> @@ -495,9 +495,15 @@ alternative_else_nop_endif >>>>> * via capabilities. >>>>> */ >>>>> mrs x9, ID_AA64MMFR1_EL1 >>>>> - and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK >>>>> + ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4 >>>>> cbz x9, 1f >>>>> orr tcr, tcr, #TCR_HA // hardware Access flag update >>>>> + >>>>> +#ifdef CONFIG_ARM64_HAFT >>>>> + cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT >>>>> + b.lt 1f >>>>> + orr tcr2, tcr2, TCR2_EL1x_HAFT >>>>> +#endif /* CONFIG_ARM64_HAFT */ >>>> >>>> I think we can skip the ID check here and always set the HAFT bit. We do >>>> something similar with MTE (not for TCR_HA though, don't remember why). >>> >>> Thanks for the reference to MTE. Will see and have a test. But a check >>> here may seem more reasonable as we usually detect a feature first >>> then enable it? >> >> The behaviour of these RES0 bits is that we can write them and if the >> feature is present, it will be enabled, otherwise it won't have any >> effect, so it's not necessary to check the ID bits, the result would be >> the same. We do this in other places as well. >> >> Of course, we need to check the presence of TCR2_EL1, otherwise it would >> undef. Just a bit less code since we want the feature on anyway. > > sure. will drop the check and set the HAFT bit unconditionally. > > Thanks. > > > . >
On Thu, Oct 24, 2024 at 10:45:51PM +0800, Yicong Yang wrote: > On 2024/10/23 20:36, Catalin Marinas wrote: > > On Wed, Oct 23, 2024 at 06:30:18PM +0800, Yicong Yang wrote: > >> On 2024/10/23 2:30, Catalin Marinas wrote: > >>> On Tue, Oct 22, 2024 at 05:27:32PM +0800, Yicong Yang wrote: > >>>> +#ifdef CONFIG_ARM64_HAFT > >>>> + { > >>>> + .desc = "Hardware managed Access Flag for Table Descriptor", > >>>> + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, > >>> > >>> I'd actually use ARM64_CPUCAP_SYSTEM_FEATURE here. We use something > >>> similar for HW DBM but there we get a fault and set the pte dirty. You > >>> combined it with a system_support_haft() that checks the sanitised regs > >>> but I'd rather have a static branch check via cpus_have_cap(). Even with > >>> your approach we can have a race with a late CPU hot-plugged that > >>> doesn't have the feature in the middle of some core code walking the > >>> page tables. > >>> > >>> With a system feature type, late CPUs not having the feature won't be > >>> brought online (if feature enabled) but in general I don't have much > >>> sympathy for SoC vendors combining CPUs with incompatible features ;). > >> > >> ok. If we make it a system feature, we can using cpus_have_cap() then and > >> drop the system_support_haft() which is checking with the sanitised registers. > >> It's fine for me. > >> > >> Will ask to not refuse online a CPU due to mismatch of this feature in [1], > >> hope we have an agreement :) > >> > >> [1] https://lore.kernel.org/linux-arm-kernel/20240820161822.GC28750@willie-the-truck/ > > > > I initially thought this would work but I don't feel easy about having > > should_clear_pmd_young() change its polarity at runtime while user space > > is running. If that's not a problem, we can go with your current > > approach. > > this should be ok as I image. after online a CPU without HAFT the system won't > advertise HAFT support but we don't disable the HAFT update on the supported > CPUs, the ongoing page aging process can still use the updated table AF information > and later process will fallback to use the PTE's AF bit. efficiency maybe reduced > but the function should be correct. It's more of a theoretical case - walk_pmd_range() for example checks should_clear_pmd_young() followed by !pmd_young(). Between these two checks, should_clear_pmd_young() becomes false but the pmd may have been accessed by a CPU without HAFT. We'd miss this. However, such race is benign I think, only used for page aging so it shouldn't matter. The other thing with your approach is the cost of checking (load, mask, compare) vs just a static branch. Given that it's only done for pmds, it's probably lost in the noise but you could check it to be sure.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3e29b44d2d7b..029d7ad89de5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2176,6 +2176,21 @@ config ARCH_PKEY_BITS int default 3 +config ARM64_HAFT + bool "Support for Hardware managed Access Flag for Table Descriptor" + depends on ARM64_HW_AFDBM + default y + help + The ARMv8.9/ARMv9.5 introduces the feature Hardware managed Access + Flag for Table descriptors. When enabled an architectural executed + memory access will update the Access Flag in each Table descriptor + which is accessed during the translation table walk and for which + the Access Flag is 0. The Access Flag of the Table descriptor use + the same bit of PTE_AF. + + The feature will only be enabled if all the CPUs in the system + support this feature. If unsure, say Y. + endmenu # "ARMv8.9 architectural features" config ARM64_SVE diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 3d261cc123c1..fba2347c0aa6 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -879,6 +879,30 @@ static inline bool cpu_has_hw_af(void) ID_AA64MMFR1_EL1_HAFDBS_SHIFT); } +/* + * Contrary to the page/block access flag, the table access flag + * cannot be emulated in software (no access fault will occur). + * So users should use this feature only if it's supported system + * wide. + */ +static inline bool system_support_haft(void) +{ + unsigned int hafdbs; + u64 mmfr1; + + if (!IS_ENABLED(CONFIG_ARM64_HAFT)) + return false; + + /* + * Check the sanitised registers to see this feature is supported + * on all the CPUs. + */ + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + hafdbs = cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_EL1_HAFDBS_SHIFT); + return hafdbs >= ID_AA64MMFR1_EL1_HAFDBS_HAFT; +} + static inline bool cpu_has_pan(void) { u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 8ff5f2a2579e..bc1051d65125 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -30,7 +30,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { pudval_t pudval = PUD_TYPE_TABLE; - pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; + pudval |= (mm == &init_mm) ? PUD_TABLE_AF | PUD_TABLE_UXN : PUD_TABLE_PXN; __pud_populate(pudp, __pa(pmdp), pudval); } #else @@ -52,7 +52,7 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { p4dval_t p4dval = P4D_TYPE_TABLE; - p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN; + p4dval |= (mm == &init_mm) ? P4D_TABLE_AF | P4D_TABLE_UXN : P4D_TABLE_PXN; __p4d_populate(p4dp, __pa(pudp), p4dval); } @@ -81,7 +81,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp) { pgdval_t pgdval = PGD_TYPE_TABLE; - pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN; + pgdval |= (mm == &init_mm) ? PGD_TABLE_AF | PGD_TABLE_UXN : PGD_TABLE_PXN; __pgd_populate(pgdp, __pa(p4dp), pgdval); } @@ -127,7 +127,8 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { VM_BUG_ON(mm && mm != &init_mm); - __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN); + __pmd_populate(pmdp, __pa(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_UXN); } static inline void diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index fd330c1db289..c78a988cca93 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -99,6 +99,7 @@ #define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0) #define PGD_TABLE_BIT (_AT(pgdval_t, 1) << 1) #define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0) +#define PGD_TABLE_AF (_AT(pgdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59) #define PGD_TABLE_UXN (_AT(pgdval_t, 1) << 60) @@ -110,6 +111,7 @@ #define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) #define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) #define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ +#define P4D_TABLE_AF (_AT(p4dval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59) #define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60) @@ -121,6 +123,7 @@ #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) #define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ +#define PUD_TABLE_AF (_AT(pudval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59) #define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60) @@ -131,6 +134,7 @@ #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) #define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) +#define PMD_TABLE_AF (_AT(pmdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ /* * Section diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 718728a85430..6eeaaa80f6fe 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2046,6 +2046,18 @@ static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap, #endif +#if CONFIG_ARM64_HAFT + +static struct cpumask haft_cpus; + +static void cpu_enable_haft(struct arm64_cpu_capabilities const *cap) +{ + if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) + cpumask_set_cpu(smp_processor_id(), &haft_cpus); +} + +#endif /* CONFIG_ARM64_HAFT */ + #ifdef CONFIG_ARM64_AMU_EXTN /* @@ -2590,6 +2602,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpus = &dbm_cpus, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM) }, +#endif +#ifdef CONFIG_ARM64_HAFT + { + .desc = "Hardware managed Access Flag for Table Descriptor", + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .capability = ARM64_HAFT, + .matches = has_cpuid_feature, + .cpu_enable = cpu_enable_haft, + .cpus = &haft_cpus, + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, HAFT) + }, #endif { .desc = "CRC32 instructions", diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index de1e09d986ad..c5c5425791da 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -47,7 +47,8 @@ static void __init early_fixmap_init_pte(pmd_t *pmdp, unsigned long addr) if (pmd_none(pmd)) { ptep = bm_pte[BM_PTE_TABLE_IDX(addr)]; - __pmd_populate(pmdp, __pa_symbol(ptep), PMD_TYPE_TABLE); + __pmd_populate(pmdp, __pa_symbol(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF); } } @@ -59,7 +60,8 @@ static void __init early_fixmap_init_pmd(pud_t *pudp, unsigned long addr, pmd_t *pmdp; if (pud_none(pud)) - __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); + __pud_populate(pudp, __pa_symbol(bm_pmd), + PUD_TYPE_TABLE | PUD_TABLE_AF); pmdp = pmd_offset_kimg(pudp, addr); do { @@ -86,7 +88,8 @@ static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr, } if (p4d_none(p4d)) - __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); + __p4d_populate(p4dp, __pa_symbol(bm_pud), + P4D_TYPE_TABLE | P4D_TABLE_AF); pudp = pud_offset_kimg(p4dp, addr); early_fixmap_init_pmd(pudp, addr, end); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e55b02fbddc8..6441a45eaeda 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -201,7 +201,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, BUG_ON(pmd_sect(pmd)); if (pmd_none(pmd)) { - pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN; + pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF; phys_addr_t pte_phys; if (flags & NO_EXEC_MAPPINGS) @@ -288,7 +288,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, */ BUG_ON(pud_sect(pud)); if (pud_none(pud)) { - pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN; + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF; phys_addr_t pmd_phys; if (flags & NO_EXEC_MAPPINGS) @@ -333,7 +333,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, pud_t *pudp; if (p4d_none(p4d)) { - p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN; + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN | P4D_TABLE_AF; phys_addr_t pud_phys; if (flags & NO_EXEC_MAPPINGS) @@ -391,7 +391,7 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, p4d_t *p4dp; if (pgd_none(pgd)) { - pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_UXN; + pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_UXN | PGD_TABLE_AF; phys_addr_t p4d_phys; if (flags & NO_EXEC_MAPPINGS) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index ccbae4525891..4a58b9b36eb6 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -495,9 +495,15 @@ alternative_else_nop_endif * via capabilities. */ mrs x9, ID_AA64MMFR1_EL1 - and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK + ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4 cbz x9, 1f orr tcr, tcr, #TCR_HA // hardware Access flag update + +#ifdef CONFIG_ARM64_HAFT + cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT + b.lt 1f + orr tcr2, tcr2, TCR2_EL1x_HAFT +#endif /* CONFIG_ARM64_HAFT */ 1: #endif /* CONFIG_ARM64_HW_AFDBM */ msr mair_el1, mair diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index eedb5acc21ed..b35004fa8313 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -56,6 +56,7 @@ HAS_TLB_RANGE HAS_VA52 HAS_VIRT_HOST_EXTN HAS_WFXT +HAFT HW_DBM KVM_HVHE KVM_PROTECTED_MODE