diff mbox

[6/6] arm64: mm: Enable RCU fast_gup

Message ID 1403710824-24340-7-git-send-email-steve.capper@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Steve Capper June 25, 2014, 3:40 p.m. UTC
Activate the RCU fast_gup for ARM64. We also need to force THP splits
to broadcast an IPI s.t. we block in the fast_gup page walker. As THP
splits are comparatively rare, this should not lead to a noticeable
performance degradation.

Some pre-requisite functions pud_write and pud_page are also added.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
---
 arch/arm64/Kconfig               |  3 +++
 arch/arm64/include/asm/pgtable.h | 11 ++++++++++-
 arch/arm64/mm/flush.c            | 19 +++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)

Comments

Mark Rutland June 25, 2014, 4:50 p.m. UTC | #1
Hi Steve,

On Wed, Jun 25, 2014 at 04:40:24PM +0100, Steve Capper wrote:
> Activate the RCU fast_gup for ARM64. We also need to force THP splits
> to broadcast an IPI s.t. we block in the fast_gup page walker. As THP
> splits are comparatively rare, this should not lead to a noticeable
> performance degradation.
> 
> Some pre-requisite functions pud_write and pud_page are also added.
> 
> Signed-off-by: Steve Capper <steve.capper@linaro.org>
> ---
>  arch/arm64/Kconfig               |  3 +++
>  arch/arm64/include/asm/pgtable.h | 11 ++++++++++-
>  arch/arm64/mm/flush.c            | 19 +++++++++++++++++++
>  3 files changed, 32 insertions(+), 1 deletion(-)

[...]

> diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
> index e4193e3..ddf96c1 100644
> --- a/arch/arm64/mm/flush.c
> +++ b/arch/arm64/mm/flush.c
> @@ -103,3 +103,22 @@ EXPORT_SYMBOL(flush_dcache_page);
>   */
>  EXPORT_SYMBOL(flush_cache_all);
>  EXPORT_SYMBOL(flush_icache_range);
> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +#ifdef CONFIG_HAVE_RCU_TABLE_FREE
> +static void thp_splitting_flush_sync(void *arg)
> +{
> +}
> +
> +void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
> +			  pmd_t *pmdp)
> +{
> +	pmd_t pmd = pmd_mksplitting(*pmdp);
> +	VM_BUG_ON(address & ~PMD_MASK);
> +	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
> +
> +	/* dummy IPI to serialise against fast_gup */
> +	smp_call_function(thp_splitting_flush_sync, NULL, 1);

Is there some reason we can't use kick_all_cpus_sync()?

From a glance it seems that powerpc does just that.

Mark.
Peter Zijlstra June 26, 2014, 7:56 a.m. UTC | #2
On Wed, Jun 25, 2014 at 05:50:03PM +0100, Mark Rutland wrote:
> Hi Steve,
> 
> On Wed, Jun 25, 2014 at 04:40:24PM +0100, Steve Capper wrote:
> > Activate the RCU fast_gup for ARM64. We also need to force THP splits
> > to broadcast an IPI s.t. we block in the fast_gup page walker. As THP
> > splits are comparatively rare, this should not lead to a noticeable
> > performance degradation.
> > 
> > Some pre-requisite functions pud_write and pud_page are also added.
> > 
> > Signed-off-by: Steve Capper <steve.capper@linaro.org>
> > ---
> >  arch/arm64/Kconfig               |  3 +++
> >  arch/arm64/include/asm/pgtable.h | 11 ++++++++++-
> >  arch/arm64/mm/flush.c            | 19 +++++++++++++++++++
> >  3 files changed, 32 insertions(+), 1 deletion(-)
> 
> [...]
> 
> > diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
> > index e4193e3..ddf96c1 100644
> > --- a/arch/arm64/mm/flush.c
> > +++ b/arch/arm64/mm/flush.c
> > @@ -103,3 +103,22 @@ EXPORT_SYMBOL(flush_dcache_page);
> >   */
> >  EXPORT_SYMBOL(flush_cache_all);
> >  EXPORT_SYMBOL(flush_icache_range);
> > +
> > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > +#ifdef CONFIG_HAVE_RCU_TABLE_FREE
> > +static void thp_splitting_flush_sync(void *arg)
> > +{
> > +}
> > +
> > +void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
> > +			  pmd_t *pmdp)
> > +{
> > +	pmd_t pmd = pmd_mksplitting(*pmdp);
> > +	VM_BUG_ON(address & ~PMD_MASK);
> > +	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
> > +
> > +	/* dummy IPI to serialise against fast_gup */
> > +	smp_call_function(thp_splitting_flush_sync, NULL, 1);
> 
> Is there some reason we can't use kick_all_cpus_sync()?

Yes that would be equivalent. But looking at that, I worry about the
smp_mb(); archs are supposed to make sure IPIs are serializing.
Will Deacon June 27, 2014, 12:20 p.m. UTC | #3
On Thu, Jun 26, 2014 at 08:56:05AM +0100, Peter Zijlstra wrote:
> On Wed, Jun 25, 2014 at 05:50:03PM +0100, Mark Rutland wrote:
> > Hi Steve,
> > 
> > On Wed, Jun 25, 2014 at 04:40:24PM +0100, Steve Capper wrote:
> > > Activate the RCU fast_gup for ARM64. We also need to force THP splits
> > > to broadcast an IPI s.t. we block in the fast_gup page walker. As THP
> > > splits are comparatively rare, this should not lead to a noticeable
> > > performance degradation.
> > > 
> > > Some pre-requisite functions pud_write and pud_page are also added.
> > > 
> > > Signed-off-by: Steve Capper <steve.capper@linaro.org>
> > > ---
> > >  arch/arm64/Kconfig               |  3 +++
> > >  arch/arm64/include/asm/pgtable.h | 11 ++++++++++-
> > >  arch/arm64/mm/flush.c            | 19 +++++++++++++++++++
> > >  3 files changed, 32 insertions(+), 1 deletion(-)
> > 
> > [...]
> > 
> > > diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
> > > index e4193e3..ddf96c1 100644
> > > --- a/arch/arm64/mm/flush.c
> > > +++ b/arch/arm64/mm/flush.c
> > > @@ -103,3 +103,22 @@ EXPORT_SYMBOL(flush_dcache_page);
> > >   */
> > >  EXPORT_SYMBOL(flush_cache_all);
> > >  EXPORT_SYMBOL(flush_icache_range);
> > > +
> > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > > +#ifdef CONFIG_HAVE_RCU_TABLE_FREE
> > > +static void thp_splitting_flush_sync(void *arg)
> > > +{
> > > +}
> > > +
> > > +void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
> > > +			  pmd_t *pmdp)
> > > +{
> > > +	pmd_t pmd = pmd_mksplitting(*pmdp);
> > > +	VM_BUG_ON(address & ~PMD_MASK);
> > > +	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
> > > +
> > > +	/* dummy IPI to serialise against fast_gup */
> > > +	smp_call_function(thp_splitting_flush_sync, NULL, 1);
> > 
> > Is there some reason we can't use kick_all_cpus_sync()?
> 
> Yes that would be equivalent. But looking at that, I worry about the
> smp_mb(); archs are supposed to make sure IPIs are serializing.

Agreed; smp_call_function would be hopelessly broken if that wasn't true
(at least, everywhere I've used it ;)

Will
diff mbox

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e1d2eef..d6fcb8e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -102,6 +102,9 @@  config GENERIC_CALIBRATE_DELAY
 config ZONE_DMA
 	def_bool y
 
+config HAVE_RCU_GUP
+	def_bool y
+
 config ARCH_DMA_ADDR_T_64BIT
 	def_bool y
 
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index e0ccceb..62510f7 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -237,7 +237,13 @@  static inline pmd_t pte_pmd(pte_t pte)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
 #define pmd_trans_splitting(pmd)	pte_special(pmd_pte(pmd))
-#endif
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+struct vm_area_struct;
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp);
+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
@@ -258,6 +264,7 @@  static inline pmd_t pte_pmd(pte_t pte)
 #define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
 
 #define pmd_page(pmd)           pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
+#define pud_write(pud)		pmd_write(__pmd(pud_val(pud)))
 #define pud_pfn(pud)		(((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
 
 #define set_pmd_at(mm, addr, pmdp, pmd)	set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
@@ -345,6 +352,8 @@  static inline pmd_t *pud_page_vaddr(pud_t pud)
 	return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
 }
 
+#define pud_page(pud)           pmd_page(__pmd(pud_val(pud)))
+
 #endif	/* CONFIG_ARM64_64K_PAGES */
 
 /* to find an entry in a page-table-directory */
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index e4193e3..ddf96c1 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -103,3 +103,22 @@  EXPORT_SYMBOL(flush_dcache_page);
  */
 EXPORT_SYMBOL(flush_cache_all);
 EXPORT_SYMBOL(flush_icache_range);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+static void thp_splitting_flush_sync(void *arg)
+{
+}
+
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp)
+{
+	pmd_t pmd = pmd_mksplitting(*pmdp);
+	VM_BUG_ON(address & ~PMD_MASK);
+	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
+
+	/* dummy IPI to serialise against fast_gup */
+	smp_call_function(thp_splitting_flush_sync, NULL, 1);
+}
+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */