diff mbox

[RFC,1/6] ARM: mm: define LoUIS API for cache maintenance ops

Message ID 1347531651-28218-2-git-send-email-lorenzo.pieralisi@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lorenzo Pieralisi Sept. 13, 2012, 10:20 a.m. UTC
ARM v7 architecture introduced the concept of cache levels and related
coherency requirements. New processors like A7 and A15 embed an
L2 unified cache controller that becomes part of the cache level
hierarchy. Some operations in the kernel like cpu_suspend and __cpu_disable
does not require a flush of the entire cache hierarchy to DRAM but just the
cache levels belonging to the Level of Unification Inner Shareable (LoUIS),
which in most of ARM v7 systems correspond to L1.

The current cache flushing API used in cpu_suspend and __cpu_disable,
flush_cache_all(), ends up flushing the whole cache hierarchy since for
v7 it cleans and invalidates all cache levels up to Level of Coherency
(LoC) which cripples system performance when used in hot paths like hotplug
and cpuidle.

Therefore a new kernel cache maintenance API must be added to the
cpu_cache_fns structure of pointers to cope with latest ARM system requirements.
This patch adds flush_cache_louis() to the ARM kernel cache maintenance API.

This function cleans and invalidates all data cache levels up to the
level of unification inner shareable (LoUIS) and invalidates the instruction
cache.

The cpu_cache_fns struct reflects this change by adding a new function pointer
that is initialized by arch specific assembly files.

By default, all existing ARM archs do not instantiate any cache LoUIS function
pointer, and flush_dcache_louis just falls back to flush_kern_all.

Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
---
 arch/arm/include/asm/cacheflush.h | 17 +++++++++++++++++
 arch/arm/mm/proc-macros.S         |  7 ++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

Comments

tip-bot for Dave Martin Sept. 13, 2012, 11:39 a.m. UTC | #1
On Thu, Sep 13, 2012 at 11:20:46AM +0100, Lorenzo Pieralisi wrote:
> ARM v7 architecture introduced the concept of cache levels and related
> coherency requirements. New processors like A7 and A15 embed an
> L2 unified cache controller that becomes part of the cache level
> hierarchy. Some operations in the kernel like cpu_suspend and __cpu_disable
> does not require a flush of the entire cache hierarchy to DRAM but just the
> cache levels belonging to the Level of Unification Inner Shareable (LoUIS),
> which in most of ARM v7 systems correspond to L1.
> 
> The current cache flushing API used in cpu_suspend and __cpu_disable,
> flush_cache_all(), ends up flushing the whole cache hierarchy since for
> v7 it cleans and invalidates all cache levels up to Level of Coherency
> (LoC) which cripples system performance when used in hot paths like hotplug
> and cpuidle.
> 
> Therefore a new kernel cache maintenance API must be added to the
> cpu_cache_fns structure of pointers to cope with latest ARM system requirements.
> This patch adds flush_cache_louis() to the ARM kernel cache maintenance API.
> 
> This function cleans and invalidates all data cache levels up to the
> level of unification inner shareable (LoUIS) and invalidates the instruction
> cache.
> 
> The cpu_cache_fns struct reflects this change by adding a new function pointer
> that is initialized by arch specific assembly files.
> 
> By default, all existing ARM archs do not instantiate any cache LoUIS function
> pointer, and flush_dcache_louis just falls back to flush_kern_all.
> 
> Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> ---
>  arch/arm/include/asm/cacheflush.h | 17 +++++++++++++++++
>  arch/arm/mm/proc-macros.S         |  7 ++++++-
>  2 files changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
> index c6e2ed9..7683316 100644
> --- a/arch/arm/include/asm/cacheflush.h
> +++ b/arch/arm/include/asm/cacheflush.h
> @@ -50,6 +50,13 @@
>   *
>   *		Unconditionally clean and invalidate the entire cache.
>   *
> + *     flush_kern_cache_louis()
> + *
> + *             Flush data cache levels up to the level of unification
> + *             inner shareable and invalidate the I-cache.
> + *             Only needed from v7 onwards, falls back to flush_cache_all()
> + *             for all other processor versions.
> + *
>   *	flush_user_all()
>   *
>   *		Clean and invalidate all user space cache entries
> @@ -98,6 +105,7 @@
>  struct cpu_cache_fns {
>  	void (*flush_icache_all)(void);
>  	void (*flush_kern_all)(void);
> +	void (*flush_kern_cache_louis)(void);
>  	void (*flush_user_all)(void);
>  	void (*flush_user_range)(unsigned long, unsigned long, unsigned int);
>  
> @@ -200,6 +208,15 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
>  #define __flush_icache_preferred	__flush_icache_all_generic
>  #endif
>  
> +/*
> + * Flush caches up to Level of Unification Inner Shareable
> + */
> +#ifdef MULTI_CACHE
> +#define flush_cache_louis()   cpu_cache.flush_kern_cache_louis()
> +#else
> +#define flush_cache_louis()   __cpuc_flush_kern_all()
> +#endif

So, without MULTI_CACHE, we always fall back to flush_kern_all.

I'm guessing this is done because CPUs can't be relied on to provide
flush_kern_cache_louis.  Shouldn't this be handled directly? 

We could introduce something like CONFIG_ARM_HAVE_CACHEFLUSH_LOUIS, and
do:

<asm/glue-cache.h>
#ifndef MULTI_CACHE
#ifdef CONFIG_HAVE_ARM_CACHEFLUSH_LOUIS
#define __cpuc_flush_kern_cache_louis __glue(_CACHE,_flush_kern_cache_louis)
#else
#define __cpuc_flush_kern_cache_louis __glue(_CACHE,_flush_kern_all)
#endif
#endif

<asm/cacheflush.h>
#ifdef MULTI_CACHE
#define flush_cache_louis()   cpu_cache.flush_kern_cache_louis()
#else
#define flush_cache_louis()   __cpuc_flush_kern_cache_louis()
#endif


Any good?

Then the only question is whether this is worth the complexity.

This only works if the __cpuc_ aliases are not used from assembler.
That seems wrong anyway, since on a MULTI_CACHE kernel those would turn
into C struct member references which wouldn't be valid in assembler
anyway.

Cheers
---Dave
Russell King - ARM Linux Sept. 13, 2012, 12:36 p.m. UTC | #2
On Thu, Sep 13, 2012 at 11:20:46AM +0100, Lorenzo Pieralisi wrote:
> +/*
> + * Flush caches up to Level of Unification Inner Shareable
> + */
> +#ifdef MULTI_CACHE
> +#define flush_cache_louis()   cpu_cache.flush_kern_cache_louis()
> +#else
> +#define flush_cache_louis()   __cpuc_flush_kern_all()
> +#endif

NAK.  This is broken as you don't seem to understand what MULTI_CACHE
actually means.  MULTI_CACHE means that we _may_ support more than one
type of cache, so it not being selected means nothing as far as whether
to use flush_kern_all() or not.

Follow the pattern in the rest of the file - that's the _only_ way to do
this.

Note that ARMv6 only and ARMv7 only kernels will not have MULTI_CACHE
defined (I mentioned this on Monday in our call, though not explicitly
by that name.)

> diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
> index 2d8ff3a..28e91f8 100644
> --- a/arch/arm/mm/proc-macros.S
> +++ b/arch/arm/mm/proc-macros.S
> @@ -293,12 +293,17 @@ ENTRY(\name\()_processor_functions)
>  	.size	\name\()_processor_functions, . - \name\()_processor_functions
>  .endm
>  
> -.macro define_cache_functions name:req
> +.macro define_cache_functions name:req, cachelouis=0
>  	.align 2
>  	.type	\name\()_cache_fns, #object
>  ENTRY(\name\()_cache_fns)
>  	.long	\name\()_flush_icache_all
>  	.long	\name\()_flush_kern_cache_all
> +	.if \cachelouis
> +	.long   \name\()_flush_kern_cache_louis
> +	.else
> +	.long   \name\()_flush_kern_cache_all
> +	.endif
>  	.long	\name\()_flush_user_cache_all
>  	.long	\name\()_flush_user_cache_range
>  	.long	\name\()_coherent_kern_range

And what the above means is that _every_ cache support file must supply a
XXX_flush_kern_cache_louis function name.  If there is no separate
implementation, then name it an alias for the XXX_flush_cache_all function.
Lorenzo Pieralisi Sept. 13, 2012, 12:57 p.m. UTC | #3
On Thu, Sep 13, 2012 at 01:36:04PM +0100, Russell King - ARM Linux wrote:
> On Thu, Sep 13, 2012 at 11:20:46AM +0100, Lorenzo Pieralisi wrote:
> > +/*
> > + * Flush caches up to Level of Unification Inner Shareable
> > + */
> > +#ifdef MULTI_CACHE
> > +#define flush_cache_louis()   cpu_cache.flush_kern_cache_louis()
> > +#else
> > +#define flush_cache_louis()   __cpuc_flush_kern_all()
> > +#endif
> 
> NAK.  This is broken as you don't seem to understand what MULTI_CACHE
> actually means.  MULTI_CACHE means that we _may_ support more than one
> type of cache, so it not being selected means nothing as far as whether
> to use flush_kern_all() or not.
> 
> Follow the pattern in the rest of the file - that's the _only_ way to do
> this.
> 
> Note that ARMv6 only and ARMv7 only kernels will not have MULTI_CACHE
> defined (I mentioned this on Monday in our call, though not explicitly
> by that name.)

Point taken, I will fix it.

> > diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
> > index 2d8ff3a..28e91f8 100644
> > --- a/arch/arm/mm/proc-macros.S
> > +++ b/arch/arm/mm/proc-macros.S
> > @@ -293,12 +293,17 @@ ENTRY(\name\()_processor_functions)
> >  	.size	\name\()_processor_functions, . - \name\()_processor_functions
> >  .endm
> >  
> > -.macro define_cache_functions name:req
> > +.macro define_cache_functions name:req, cachelouis=0
> >  	.align 2
> >  	.type	\name\()_cache_fns, #object
> >  ENTRY(\name\()_cache_fns)
> >  	.long	\name\()_flush_icache_all
> >  	.long	\name\()_flush_kern_cache_all
> > +	.if \cachelouis
> > +	.long   \name\()_flush_kern_cache_louis
> > +	.else
> > +	.long   \name\()_flush_kern_cache_all
> > +	.endif
> >  	.long	\name\()_flush_user_cache_all
> >  	.long	\name\()_flush_user_cache_range
> >  	.long	\name\()_coherent_kern_range
> 
> And what the above means is that _every_ cache support file must supply a
> XXX_flush_kern_cache_louis function name.  If there is no separate
> implementation, then name it an alias for the XXX_flush_cache_all function.

I will do that, thanks for the review.

Lorenzo
Russell King - ARM Linux Sept. 13, 2012, 1:03 p.m. UTC | #4
On Thu, Sep 13, 2012 at 12:39:49PM +0100, Dave Martin wrote:
> We could introduce something like CONFIG_ARM_HAVE_CACHEFLUSH_LOUIS, and
> do:
> 
> <asm/glue-cache.h>
> #ifndef MULTI_CACHE
> #ifdef CONFIG_HAVE_ARM_CACHEFLUSH_LOUIS
> #define __cpuc_flush_kern_cache_louis __glue(_CACHE,_flush_kern_cache_louis)
> #else
> #define __cpuc_flush_kern_cache_louis __glue(_CACHE,_flush_kern_all)
> #endif
> #endif
> 
> <asm/cacheflush.h>
> #ifdef MULTI_CACHE
> #define flush_cache_louis()   cpu_cache.flush_kern_cache_louis()
> #else
> #define flush_cache_louis()   __cpuc_flush_kern_cache_louis()
> #endif

No, this is stupidly complicated and is fragile.  Just alias the
functions, like we do in cache-v4wt.S:

	.globl	v4wt_dma_flush_range
	.equ	v4wt_dma_flush_range, v4wt_dma_inv_range

except, you'll need:

	.globl	v4wt_flush_kern_cache_louis
	.equ	v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all

You can do it automatically, using the attached sedscript and this bit
of shell:

$ for f in $(grep -l define_cache_functions arch/arm/mm/*.S ); do
    sed -if sedscript $f
    git add $f
done
$ git commit -s

Do that first, and then go over those which you need to add a real
flush_kern_cache_louis function to.
1,/__INITDATA\|define struct cpu_cache_fns/ {
	/ENTRY.*flush_kern_cache_all/ {
        	h
	        s/.*(\([^_]*\)_.*/\t.globl\t\1_flush_kern_cache_louis\n\t.equ\t\1_flush_kern_cache_louis, \1_flush_kern_cache_all\n/
        	x
	}
	/__INITDATA\|define struct cpu_cache_fns/ {
        	H
	        g
	}
}
tip-bot for Dave Martin Sept. 13, 2012, 2:02 p.m. UTC | #5
On Thu, Sep 13, 2012 at 02:03:34PM +0100, Russell King - ARM Linux wrote:
> On Thu, Sep 13, 2012 at 12:39:49PM +0100, Dave Martin wrote:
> > We could introduce something like CONFIG_ARM_HAVE_CACHEFLUSH_LOUIS, and
> > do:
> > 
> > <asm/glue-cache.h>
> > #ifndef MULTI_CACHE
> > #ifdef CONFIG_HAVE_ARM_CACHEFLUSH_LOUIS
> > #define __cpuc_flush_kern_cache_louis __glue(_CACHE,_flush_kern_cache_louis)
> > #else
> > #define __cpuc_flush_kern_cache_louis __glue(_CACHE,_flush_kern_all)
> > #endif
> > #endif
> > 
> > <asm/cacheflush.h>
> > #ifdef MULTI_CACHE
> > #define flush_cache_louis()   cpu_cache.flush_kern_cache_louis()
> > #else
> > #define flush_cache_louis()   __cpuc_flush_kern_cache_louis()
> > #endif
> 
> No, this is stupidly complicated and is fragile.  Just alias the
> functions, like we do in cache-v4wt.S:
> 
> 	.globl	v4wt_dma_flush_range
> 	.equ	v4wt_dma_flush_range, v4wt_dma_inv_range
> 
> except, you'll need:
> 
> 	.globl	v4wt_flush_kern_cache_louis
> 	.equ	v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all
> 
> You can do it automatically, using the attached sedscript and this bit
> of shell:
> 
> $ for f in $(grep -l define_cache_functions arch/arm/mm/*.S ); do
>     sed -if sedscript $f
>     git add $f
> done
> $ git commit -s
> 
> Do that first, and then go over those which you need to add a real
> flush_kern_cache_louis function to.

Sure, that works better.

I was trying to think of a more localised way to do it, but the result
was admittedly rather ugly (and not that localised once we select
HAVE_ARM_CACHEFLUSH_LOUIS all over the place).

Cheers
---Dave

> 1,/__INITDATA\|define struct cpu_cache_fns/ {
> 	/ENTRY.*flush_kern_cache_all/ {
>         	h
> 	        s/.*(\([^_]*\)_.*/\t.globl\t\1_flush_kern_cache_louis\n\t.equ\t\1_flush_kern_cache_louis, \1_flush_kern_cache_all\n/
>         	x
> 	}
> 	/__INITDATA\|define struct cpu_cache_fns/ {
>         	H
> 	        g
> 	}
> }
diff mbox

Patch

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index c6e2ed9..7683316 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -50,6 +50,13 @@ 
  *
  *		Unconditionally clean and invalidate the entire cache.
  *
+ *     flush_kern_cache_louis()
+ *
+ *             Flush data cache levels up to the level of unification
+ *             inner shareable and invalidate the I-cache.
+ *             Only needed from v7 onwards, falls back to flush_cache_all()
+ *             for all other processor versions.
+ *
  *	flush_user_all()
  *
  *		Clean and invalidate all user space cache entries
@@ -98,6 +105,7 @@ 
 struct cpu_cache_fns {
 	void (*flush_icache_all)(void);
 	void (*flush_kern_all)(void);
+	void (*flush_kern_cache_louis)(void);
 	void (*flush_user_all)(void);
 	void (*flush_user_range)(unsigned long, unsigned long, unsigned int);
 
@@ -200,6 +208,15 @@  extern void copy_to_user_page(struct vm_area_struct *, struct page *,
 #define __flush_icache_preferred	__flush_icache_all_generic
 #endif
 
+/*
+ * Flush caches up to Level of Unification Inner Shareable
+ */
+#ifdef MULTI_CACHE
+#define flush_cache_louis()   cpu_cache.flush_kern_cache_louis()
+#else
+#define flush_cache_louis()   __cpuc_flush_kern_all()
+#endif
+
 static inline void __flush_icache_all(void)
 {
 	__flush_icache_preferred();
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 2d8ff3a..28e91f8 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -293,12 +293,17 @@  ENTRY(\name\()_processor_functions)
 	.size	\name\()_processor_functions, . - \name\()_processor_functions
 .endm
 
-.macro define_cache_functions name:req
+.macro define_cache_functions name:req, cachelouis=0
 	.align 2
 	.type	\name\()_cache_fns, #object
 ENTRY(\name\()_cache_fns)
 	.long	\name\()_flush_icache_all
 	.long	\name\()_flush_kern_cache_all
+	.if \cachelouis
+	.long   \name\()_flush_kern_cache_louis
+	.else
+	.long   \name\()_flush_kern_cache_all
+	.endif
 	.long	\name\()_flush_user_cache_all
 	.long	\name\()_flush_user_cache_range
 	.long	\name\()_coherent_kern_range