Message ID | 20201023091437.8225-4-miles.chen@mediatek.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm: support get_user_pages_fast | expand |
On Fri, Oct 23, 2020 at 05:14:36PM +0800, Miles Chen wrote: > From: Minchan Kim <minchan@kernel.org> > > This patch introduces L_PTE_SPECIAL and pte functions for supporting > get_user_pages_fast. > > Cc: Russell King <linux@armlinux.org.uk> > Cc: Catalin Marinas <catalin.marinas@arm.com> > Cc: Will Deacon <will.deacon@arm.com> > Cc: Steve Capper <steve.capper@linaro.org> > Cc: Minchan Kim <minchan@kernel.org> > Cc: Suren Baghdasaryan <surenb@google.com> > Signed-off-by: Minchan Kim <minchan@kernel.org> > Signed-off-by: Miles Chen <miles.chen@mediatek.com> > --- > arch/arm/Kconfig | 4 ++-- > arch/arm/include/asm/pgtable-2level.h | 1 + > arch/arm/include/asm/pgtable-3level.h | 6 ------ > arch/arm/include/asm/pgtable.h | 13 +++++++++++++ > 4 files changed, 16 insertions(+), 8 deletions(-) > > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig > index c18fa9d382b7..1f75864b7c7a 100644 > --- a/arch/arm/Kconfig > +++ b/arch/arm/Kconfig > @@ -13,7 +13,7 @@ config ARM > select ARCH_HAS_KCOV > select ARCH_HAS_MEMBARRIER_SYNC_CORE > select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE > - select ARCH_HAS_PTE_SPECIAL if ARM_LPAE > + select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K) > select ARCH_HAS_PHYS_TO_DMA > select ARCH_HAS_SETUP_DMA_OPS > select ARCH_HAS_SET_MEMORY > @@ -82,7 +82,7 @@ config ARM > select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE > select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU > select HAVE_EXIT_THREAD > - select HAVE_FAST_GUP if ARM_LPAE > + select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K) > select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL > select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG > select HAVE_FUNCTION_TRACER if !XIP_KERNEL > diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h > index cdcd55cca37d..385e7a32394e 100644 > --- a/arch/arm/include/asm/pgtable-2level.h > +++ b/arch/arm/include/asm/pgtable-2level.h > @@ -117,6 +117,7 @@ > #define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ > #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) > #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) > +#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 5) How does this work? Bits 2 through 5 are already in use for the memory type. Repurposing this bit means that L_PTE_MT_DEV_NONSHARED, L_PTE_MT_DEV_WC, L_PTE_MT_DEV_CACHED and L_PTE_MT_VECTORS clash with it.
On Fri, 2020-10-23 at 11:08 +0100, Russell King - ARM Linux admin wrote: > On Fri, Oct 23, 2020 at 05:14:36PM +0800, Miles Chen wrote: > > From: Minchan Kim <minchan@kernel.org> > > > > This patch introduces L_PTE_SPECIAL and pte functions for supporting > > get_user_pages_fast. > > > > Cc: Russell King <linux@armlinux.org.uk> > > Cc: Catalin Marinas <catalin.marinas@arm.com> > > Cc: Will Deacon <will.deacon@arm.com> > > Cc: Steve Capper <steve.capper@linaro.org> > > Cc: Minchan Kim <minchan@kernel.org> > > Cc: Suren Baghdasaryan <surenb@google.com> > > Signed-off-by: Minchan Kim <minchan@kernel.org> > > Signed-off-by: Miles Chen <miles.chen@mediatek.com> > > --- > > arch/arm/Kconfig | 4 ++-- > > arch/arm/include/asm/pgtable-2level.h | 1 + > > arch/arm/include/asm/pgtable-3level.h | 6 ------ > > arch/arm/include/asm/pgtable.h | 13 +++++++++++++ > > 4 files changed, 16 insertions(+), 8 deletions(-) > > > > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig > > index c18fa9d382b7..1f75864b7c7a 100644 > > --- a/arch/arm/Kconfig > > +++ b/arch/arm/Kconfig > > @@ -13,7 +13,7 @@ config ARM > > select ARCH_HAS_KCOV > > select ARCH_HAS_MEMBARRIER_SYNC_CORE > > select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE > > - select ARCH_HAS_PTE_SPECIAL if ARM_LPAE > > + select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K) > > select ARCH_HAS_PHYS_TO_DMA > > select ARCH_HAS_SETUP_DMA_OPS > > select ARCH_HAS_SET_MEMORY > > @@ -82,7 +82,7 @@ config ARM > > select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE > > select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU > > select HAVE_EXIT_THREAD > > - select HAVE_FAST_GUP if ARM_LPAE > > + select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K) > > select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL > > select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG > > select HAVE_FUNCTION_TRACER if !XIP_KERNEL > > diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h > > index cdcd55cca37d..385e7a32394e 100644 > > --- a/arch/arm/include/asm/pgtable-2level.h > > +++ b/arch/arm/include/asm/pgtable-2level.h > > @@ -117,6 +117,7 @@ > > #define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ > > #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) > > #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) > > +#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 5) > > How does this work? Bits 2 through 5 are already in use for the memory > type. > > Repurposing this bit means that L_PTE_MT_DEV_NONSHARED, > L_PTE_MT_DEV_WC, L_PTE_MT_DEV_CACHED and L_PTE_MT_VECTORS clash with > it. Thanks for the comment. The idea is to re-order the memory type table in [1] (patch v2/4) and use bit 5 for L_PTE_SPECIAL. [1] https://lore.kernel.org/patchwork/patch/1323893/ Miles
On Tue, Oct 27, 2020 at 03:45:12PM +0800, Miles Chen wrote: > On Fri, 2020-10-23 at 11:08 +0100, Russell King - ARM Linux admin wrote: > > On Fri, Oct 23, 2020 at 05:14:36PM +0800, Miles Chen wrote: > > > From: Minchan Kim <minchan@kernel.org> > > > > > > This patch introduces L_PTE_SPECIAL and pte functions for supporting > > > get_user_pages_fast. > > > > > > Cc: Russell King <linux@armlinux.org.uk> > > > Cc: Catalin Marinas <catalin.marinas@arm.com> > > > Cc: Will Deacon <will.deacon@arm.com> > > > Cc: Steve Capper <steve.capper@linaro.org> > > > Cc: Minchan Kim <minchan@kernel.org> > > > Cc: Suren Baghdasaryan <surenb@google.com> > > > Signed-off-by: Minchan Kim <minchan@kernel.org> > > > Signed-off-by: Miles Chen <miles.chen@mediatek.com> > > > --- > > > arch/arm/Kconfig | 4 ++-- > > > arch/arm/include/asm/pgtable-2level.h | 1 + > > > arch/arm/include/asm/pgtable-3level.h | 6 ------ > > > arch/arm/include/asm/pgtable.h | 13 +++++++++++++ > > > 4 files changed, 16 insertions(+), 8 deletions(-) > > > > > > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig > > > index c18fa9d382b7..1f75864b7c7a 100644 > > > --- a/arch/arm/Kconfig > > > +++ b/arch/arm/Kconfig > > > @@ -13,7 +13,7 @@ config ARM > > > select ARCH_HAS_KCOV > > > select ARCH_HAS_MEMBARRIER_SYNC_CORE > > > select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE > > > - select ARCH_HAS_PTE_SPECIAL if ARM_LPAE > > > + select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K) > > > select ARCH_HAS_PHYS_TO_DMA > > > select ARCH_HAS_SETUP_DMA_OPS > > > select ARCH_HAS_SET_MEMORY > > > @@ -82,7 +82,7 @@ config ARM > > > select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE > > > select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU > > > select HAVE_EXIT_THREAD > > > - select HAVE_FAST_GUP if ARM_LPAE > > > + select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K) > > > select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL > > > select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG > > > select HAVE_FUNCTION_TRACER if !XIP_KERNEL > > > diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h > > > index cdcd55cca37d..385e7a32394e 100644 > > > --- a/arch/arm/include/asm/pgtable-2level.h > > > +++ b/arch/arm/include/asm/pgtable-2level.h > > > @@ -117,6 +117,7 @@ > > > #define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ > > > #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) > > > #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) > > > +#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 5) > > > > How does this work? Bits 2 through 5 are already in use for the memory > > type. > > > > Repurposing this bit means that L_PTE_MT_DEV_NONSHARED, > > L_PTE_MT_DEV_WC, L_PTE_MT_DEV_CACHED and L_PTE_MT_VECTORS clash with > > it. > > Thanks for the comment. > The idea is to re-order the memory type table in [1] (patch v2/4) and > use bit 5 for L_PTE_SPECIAL. Thanks, I know what you are trying to achieve. I don't think it's possible without breaking the kernel on some CPUs and configurations.
On Tue, 2020-10-27 at 09:11 +0000, Russell King - ARM Linux admin wrote: > On Tue, Oct 27, 2020 at 03:45:12PM +0800, Miles Chen wrote: > > On Fri, 2020-10-23 at 11:08 +0100, Russell King - ARM Linux admin wrote: > > > On Fri, Oct 23, 2020 at 05:14:36PM +0800, Miles Chen wrote: > > > > From: Minchan Kim <minchan@kernel.org> > > > > > > > > This patch introduces L_PTE_SPECIAL and pte functions for supporting > > > > get_user_pages_fast. > > > > > > > > Cc: Russell King <linux@armlinux.org.uk> > > > > Cc: Catalin Marinas <catalin.marinas@arm.com> > > > > Cc: Will Deacon <will.deacon@arm.com> > > > > Cc: Steve Capper <steve.capper@linaro.org> > > > > Cc: Minchan Kim <minchan@kernel.org> > > > > Cc: Suren Baghdasaryan <surenb@google.com> > > > > Signed-off-by: Minchan Kim <minchan@kernel.org> > > > > Signed-off-by: Miles Chen <miles.chen@mediatek.com> > > > > --- > > > > arch/arm/Kconfig | 4 ++-- > > > > arch/arm/include/asm/pgtable-2level.h | 1 + > > > > arch/arm/include/asm/pgtable-3level.h | 6 ------ > > > > arch/arm/include/asm/pgtable.h | 13 +++++++++++++ > > > > 4 files changed, 16 insertions(+), 8 deletions(-) > > > > > > > > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig > > > > index c18fa9d382b7..1f75864b7c7a 100644 > > > > --- a/arch/arm/Kconfig > > > > +++ b/arch/arm/Kconfig > > > > @@ -13,7 +13,7 @@ config ARM > > > > select ARCH_HAS_KCOV > > > > select ARCH_HAS_MEMBARRIER_SYNC_CORE > > > > select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE > > > > - select ARCH_HAS_PTE_SPECIAL if ARM_LPAE > > > > + select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K) > > > > select ARCH_HAS_PHYS_TO_DMA > > > > select ARCH_HAS_SETUP_DMA_OPS > > > > select ARCH_HAS_SET_MEMORY > > > > @@ -82,7 +82,7 @@ config ARM > > > > select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE > > > > select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU > > > > select HAVE_EXIT_THREAD > > > > - select HAVE_FAST_GUP if ARM_LPAE > > > > + select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K) > > > > select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL > > > > select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG > > > > select HAVE_FUNCTION_TRACER if !XIP_KERNEL > > > > diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h > > > > index cdcd55cca37d..385e7a32394e 100644 > > > > --- a/arch/arm/include/asm/pgtable-2level.h > > > > +++ b/arch/arm/include/asm/pgtable-2level.h > > > > @@ -117,6 +117,7 @@ > > > > #define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ > > > > #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) > > > > #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) > > > > +#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 5) > > > > > > How does this work? Bits 2 through 5 are already in use for the memory > > > type. > > > > > > Repurposing this bit means that L_PTE_MT_DEV_NONSHARED, > > > L_PTE_MT_DEV_WC, L_PTE_MT_DEV_CACHED and L_PTE_MT_VECTORS clash with > > > it. > > > > Thanks for the comment. > > The idea is to re-order the memory type table in [1] (patch v2/4) and > > use bit 5 for L_PTE_SPECIAL. > > Thanks, I know what you are trying to achieve. I don't think it's > possible without breaking the kernel on some CPUs and configurations. > Got it. Thanks for your review. Miles
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c18fa9d382b7..1f75864b7c7a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -13,7 +13,7 @@ config ARM select ARCH_HAS_KCOV select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE - select ARCH_HAS_PTE_SPECIAL if ARM_LPAE + select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K) select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SET_MEMORY @@ -82,7 +82,7 @@ config ARM select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_EXIT_THREAD - select HAVE_FAST_GUP if ARM_LPAE + select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K) select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index cdcd55cca37d..385e7a32394e 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -117,6 +117,7 @@ #define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) +#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 5) #define L_PTE_DIRTY (_AT(pteval_t, 1) << 6) #define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) #define L_PTE_USER (_AT(pteval_t, 1) << 8) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index fbb6693c3352..46fcc6725d3e 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -175,12 +175,6 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) #define pmd_present(pmd) (pmd_isset((pmd), L_PMD_SECT_VALID)) #define pmd_young(pmd) (pmd_isset((pmd), PMD_SECT_AF)) -#define pte_special(pte) (pte_isset((pte), L_PTE_SPECIAL)) -static inline pte_t pte_mkspecial(pte_t pte) -{ - pte_val(pte) |= L_PTE_SPECIAL; - return pte; -} #define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index c02f24400369..4092154ca779 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -195,6 +195,11 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define pte_dirty(pte) (pte_isset((pte), L_PTE_DIRTY)) #define pte_young(pte) (pte_isset((pte), L_PTE_YOUNG)) #define pte_exec(pte) (pte_isclear((pte), L_PTE_XN)) +#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL +#define pte_special(pte) (pte_isset((pte), L_PTE_SPECIAL)) +#else +#define pte_special(pte) (0) +#endif #define pte_valid_user(pte) \ (pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte)) @@ -274,6 +279,14 @@ static inline pte_t pte_mknexec(pte_t pte) return set_pte_bit(pte, __pgprot(L_PTE_XN)); } +#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL +static inline pte_t pte_mkspecial(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(L_PTE_SPECIAL)); +} +#else +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } +#endif static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER |