Message ID | 20130114170851.GA1967@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Jan 14, 2013 at 05:08:51PM +0000, Dave Martin wrote: > From b64f305c90e7ea585992df2d710f62ec6a7b5395 Mon Sep 17 00:00:00 2001 > From: Dave Martin <dave.martin@linaro.org> > Date: Mon, 14 Jan 2013 16:25:47 +0000 > Subject: [PATCH] ARM: b.L: Fix outer cache handling for coherency setup/exit helpers > > This patch addresses the following issues: > > * When invalidating stale data from the cache before a read, > outer caches must be invalidated _before_ inner caches, not > after, otherwise stale data may be re-filled from outer to > inner after the inner cache is flushed. > > We still retain an inner clean before touching the outer cache, > to avoid stale data being rewritten from there into the outer > cache after the outer cache is flushed. > > * All the sync_mem() calls synchronise either reads or writes, > but not both. This patch splits sync_mem() into separate > functions for reads and writes, to avoid excessive inner > flushes in the write case. > > The two functions are different from the original sync_mem(), > to fix the above issues. > > Signed-off-by: Dave Martin <dave.martin@linaro.org> > --- > NOTE: This patch is build-tested only. > > arch/arm/common/bL_entry.c | 57 ++++++++++++++++++++++++++++++++++---------- > 1 files changed, 44 insertions(+), 13 deletions(-) > > diff --git a/arch/arm/common/bL_entry.c b/arch/arm/common/bL_entry.c > index 1ea4ec9..3e1a404 100644 > --- a/arch/arm/common/bL_entry.c > +++ b/arch/arm/common/bL_entry.c > @@ -119,16 +119,47 @@ int bL_cpu_powered_up(void) > > struct bL_sync_struct bL_sync; > > -static void __sync_range(volatile void *p, size_t size) > +/* > + * Ensure preceding writes to *p by this CPU are visible to > + * subsequent reads by other CPUs: > + */ > +static void __sync_range_w(volatile void *p, size_t size) > { > char *_p = (char *)p; > > __cpuc_flush_dcache_area(_p, size); > - outer_flush_range(__pa(_p), __pa(_p + size)); > + outer_clean_range(__pa(_p), __pa(_p + size)); > outer_sync(); It's not part of your patch but I thought about commenting here. The outer_clean_range() already has a cache_sync() operation, so no need for the additional outer_sync(). > } > > -#define sync_mem(ptr) __sync_range(ptr, sizeof *(ptr)) > +/* > + * Ensure preceding writes to *p by other CPUs are visible to > + * subsequent reads by this CPU: > + */ > +static void __sync_range_r(volatile void *p, size_t size) > +{ > + char *_p = (char *)p; > + > +#ifdef CONFIG_OUTER_CACHE > + if (outer_cache.flush_range) { > + /* > + * Ensure ditry data migrated from other CPUs into our cache > + * are cleaned out safely before the outer cache is cleaned: > + */ > + __cpuc_flush_dcache_area(_p, size); > + > + /* Clean and invalidate stale data for *p from outer ... */ > + outer_flush_range(__pa(_p), __pa(_p + size)); > + outer_sync(); Same here.
On Mon, Jan 14, 2013 at 05:15:28PM +0000, Catalin Marinas wrote: > On Mon, Jan 14, 2013 at 05:08:51PM +0000, Dave Martin wrote: > > From b64f305c90e7ea585992df2d710f62ec6a7b5395 Mon Sep 17 00:00:00 2001 > > From: Dave Martin <dave.martin@linaro.org> > > Date: Mon, 14 Jan 2013 16:25:47 +0000 > > Subject: [PATCH] ARM: b.L: Fix outer cache handling for coherency setup/exit helpers > > > > This patch addresses the following issues: > > > > * When invalidating stale data from the cache before a read, > > outer caches must be invalidated _before_ inner caches, not > > after, otherwise stale data may be re-filled from outer to > > inner after the inner cache is flushed. > > > > We still retain an inner clean before touching the outer cache, > > to avoid stale data being rewritten from there into the outer > > cache after the outer cache is flushed. > > > > * All the sync_mem() calls synchronise either reads or writes, > > but not both. This patch splits sync_mem() into separate > > functions for reads and writes, to avoid excessive inner > > flushes in the write case. > > > > The two functions are different from the original sync_mem(), > > to fix the above issues. > > > > Signed-off-by: Dave Martin <dave.martin@linaro.org> > > --- > > NOTE: This patch is build-tested only. > > > > arch/arm/common/bL_entry.c | 57 ++++++++++++++++++++++++++++++++++---------- > > 1 files changed, 44 insertions(+), 13 deletions(-) > > > > diff --git a/arch/arm/common/bL_entry.c b/arch/arm/common/bL_entry.c > > index 1ea4ec9..3e1a404 100644 > > --- a/arch/arm/common/bL_entry.c > > +++ b/arch/arm/common/bL_entry.c > > @@ -119,16 +119,47 @@ int bL_cpu_powered_up(void) > > > > struct bL_sync_struct bL_sync; > > > > -static void __sync_range(volatile void *p, size_t size) > > +/* > > + * Ensure preceding writes to *p by this CPU are visible to > > + * subsequent reads by other CPUs: > > + */ > > +static void __sync_range_w(volatile void *p, size_t size) > > { > > char *_p = (char *)p; > > > > __cpuc_flush_dcache_area(_p, size); > > - outer_flush_range(__pa(_p), __pa(_p + size)); > > + outer_clean_range(__pa(_p), __pa(_p + size)); > > outer_sync(); > > It's not part of your patch but I thought about commenting here. The > outer_clean_range() already has a cache_sync() operation, so no need for > the additional outer_sync(). > > > } > > > > -#define sync_mem(ptr) __sync_range(ptr, sizeof *(ptr)) > > +/* > > + * Ensure preceding writes to *p by other CPUs are visible to > > + * subsequent reads by this CPU: > > + */ > > +static void __sync_range_r(volatile void *p, size_t size) > > +{ > > + char *_p = (char *)p; > > + > > +#ifdef CONFIG_OUTER_CACHE > > + if (outer_cache.flush_range) { > > + /* > > + * Ensure ditry data migrated from other CPUs into our cache > > + * are cleaned out safely before the outer cache is cleaned: > > + */ > > + __cpuc_flush_dcache_area(_p, size); > > + > > + /* Clean and invalidate stale data for *p from outer ... */ > > + outer_flush_range(__pa(_p), __pa(_p + size)); > > + outer_sync(); > > Same here. Ah, right. I've seen code do this in various places, and just copy- pasted it under the assumption that it is needed. Our discussion abouto ensuring that outer_sync() really does guarantee completion of its effects on return still applies. Are there any situations when outer_sync() should be called explicitly? Cheers ---Dave
On Mon, Jan 14, 2013 at 06:10:06PM +0000, Dave Martin wrote: > On Mon, Jan 14, 2013 at 05:15:28PM +0000, Catalin Marinas wrote: > > On Mon, Jan 14, 2013 at 05:08:51PM +0000, Dave Martin wrote: > > > From b64f305c90e7ea585992df2d710f62ec6a7b5395 Mon Sep 17 00:00:00 2001 > > > From: Dave Martin <dave.martin@linaro.org> > > > Date: Mon, 14 Jan 2013 16:25:47 +0000 > > > Subject: [PATCH] ARM: b.L: Fix outer cache handling for coherency setup/exit helpers > > > > > > This patch addresses the following issues: > > > > > > * When invalidating stale data from the cache before a read, > > > outer caches must be invalidated _before_ inner caches, not > > > after, otherwise stale data may be re-filled from outer to > > > inner after the inner cache is flushed. > > > > > > We still retain an inner clean before touching the outer cache, > > > to avoid stale data being rewritten from there into the outer > > > cache after the outer cache is flushed. > > > > > > * All the sync_mem() calls synchronise either reads or writes, > > > but not both. This patch splits sync_mem() into separate > > > functions for reads and writes, to avoid excessive inner > > > flushes in the write case. > > > > > > The two functions are different from the original sync_mem(), > > > to fix the above issues. > > > > > > Signed-off-by: Dave Martin <dave.martin@linaro.org> > > > --- > > > NOTE: This patch is build-tested only. > > > > > > arch/arm/common/bL_entry.c | 57 ++++++++++++++++++++++++++++++++++---------- > > > 1 files changed, 44 insertions(+), 13 deletions(-) > > > > > > diff --git a/arch/arm/common/bL_entry.c b/arch/arm/common/bL_entry.c > > > index 1ea4ec9..3e1a404 100644 > > > --- a/arch/arm/common/bL_entry.c > > > +++ b/arch/arm/common/bL_entry.c > > > @@ -119,16 +119,47 @@ int bL_cpu_powered_up(void) > > > > > > struct bL_sync_struct bL_sync; > > > > > > -static void __sync_range(volatile void *p, size_t size) > > > +/* > > > + * Ensure preceding writes to *p by this CPU are visible to > > > + * subsequent reads by other CPUs: > > > + */ > > > +static void __sync_range_w(volatile void *p, size_t size) > > > { > > > char *_p = (char *)p; > > > > > > __cpuc_flush_dcache_area(_p, size); > > > - outer_flush_range(__pa(_p), __pa(_p + size)); > > > + outer_clean_range(__pa(_p), __pa(_p + size)); > > > outer_sync(); > > > > It's not part of your patch but I thought about commenting here. The > > outer_clean_range() already has a cache_sync() operation, so no need for > > the additional outer_sync(). > > > > > } > > > > > > -#define sync_mem(ptr) __sync_range(ptr, sizeof *(ptr)) > > > +/* > > > + * Ensure preceding writes to *p by other CPUs are visible to > > > + * subsequent reads by this CPU: > > > + */ > > > +static void __sync_range_r(volatile void *p, size_t size) > > > +{ > > > + char *_p = (char *)p; > > > + > > > +#ifdef CONFIG_OUTER_CACHE > > > + if (outer_cache.flush_range) { > > > + /* > > > + * Ensure ditry data migrated from other CPUs into our cache > > > + * are cleaned out safely before the outer cache is cleaned: > > > + */ > > > + __cpuc_flush_dcache_area(_p, size); > > > + > > > + /* Clean and invalidate stale data for *p from outer ... */ > > > + outer_flush_range(__pa(_p), __pa(_p + size)); > > > + outer_sync(); > > > > Same here. > > Ah, right. I've seen code do this in various places, and just copy- > pasted it under the assumption that it is needed. Our discussion abouto > ensuring that outer_sync() really does guarantee completion of its > effects on return still applies. > > Are there any situations when outer_sync() should be called explicitly? outer_sync() on its own ensures the draining of the PL310 write buffer. DSB drains the CPU write buffers but PL310 doesn't detect it, so a separate outer_sync() is needed. In general this is required when you write a Normal Non-cacheable buffer (but bufferable, e.g. DMA coherent) and you want to ensure data visibility (DSB+outer_sync(), that's what the mb() macro does).
diff --git a/arch/arm/common/bL_entry.c b/arch/arm/common/bL_entry.c index 1ea4ec9..3e1a404 100644 --- a/arch/arm/common/bL_entry.c +++ b/arch/arm/common/bL_entry.c @@ -119,16 +119,47 @@ int bL_cpu_powered_up(void) struct bL_sync_struct bL_sync; -static void __sync_range(volatile void *p, size_t size) +/* + * Ensure preceding writes to *p by this CPU are visible to + * subsequent reads by other CPUs: + */ +static void __sync_range_w(volatile void *p, size_t size) { char *_p = (char *)p; __cpuc_flush_dcache_area(_p, size); - outer_flush_range(__pa(_p), __pa(_p + size)); + outer_clean_range(__pa(_p), __pa(_p + size)); outer_sync(); } -#define sync_mem(ptr) __sync_range(ptr, sizeof *(ptr)) +/* + * Ensure preceding writes to *p by other CPUs are visible to + * subsequent reads by this CPU: + */ +static void __sync_range_r(volatile void *p, size_t size) +{ + char *_p = (char *)p; + +#ifdef CONFIG_OUTER_CACHE + if (outer_cache.flush_range) { + /* + * Ensure ditry data migrated from other CPUs into our cache + * are cleaned out safely before the outer cache is cleaned: + */ + __cpuc_flush_dcache_area(_p, size); + + /* Clean and invalidate stale data for *p from outer ... */ + outer_flush_range(__pa(_p), __pa(_p + size)); + outer_sync(); + } +#endif + + /* ... and inner cache: */ + __cpuc_flush_dcache_area(_p, size); +} + +#define sync_w(ptr) __sync_range_w(ptr, sizeof *(ptr)) +#define sync_r(ptr) __sync_range_r(ptr, sizeof *(ptr)) /* * __bL_cpu_going_down: Indicates that the cpu is being torn down. @@ -138,7 +169,7 @@ static void __sync_range(volatile void *p, size_t size) void __bL_cpu_going_down(unsigned int cpu, unsigned int cluster) { bL_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN; - sync_mem(&bL_sync.clusters[cluster].cpus[cpu].cpu); + sync_w(&bL_sync.clusters[cluster].cpus[cpu].cpu); } /* @@ -151,7 +182,7 @@ void __bL_cpu_down(unsigned int cpu, unsigned int cluster) { dsb(); bL_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN; - sync_mem(&bL_sync.clusters[cluster].cpus[cpu].cpu); + sync_w(&bL_sync.clusters[cluster].cpus[cpu].cpu); sev(); } @@ -167,7 +198,7 @@ void __bL_outbound_leave_critical(unsigned int cluster, int state) { dsb(); bL_sync.clusters[cluster].cluster = state; - sync_mem(&bL_sync.clusters[cluster].cluster); + sync_w(&bL_sync.clusters[cluster].cluster); sev(); } @@ -189,10 +220,10 @@ bool __bL_outbound_enter_critical(unsigned int cpu, unsigned int cluster) /* Warn inbound CPUs that the cluster is being torn down: */ c->cluster = CLUSTER_GOING_DOWN; - sync_mem(&c->cluster); + sync_w(&c->cluster); /* Back out if the inbound cluster is already in the critical region: */ - sync_mem(&c->inbound); + sync_r(&c->inbound); if (c->inbound == INBOUND_COMING_UP) goto abort; @@ -203,7 +234,7 @@ bool __bL_outbound_enter_critical(unsigned int cpu, unsigned int cluster) * If any CPU has been woken up again from the DOWN state, then we * shouldn't be taking the cluster down at all: abort in that case. */ - sync_mem(&c->cpus); + sync_r(&c->cpus); for (i = 0; i < BL_CPUS_PER_CLUSTER; i++) { int cpustate; @@ -216,7 +247,7 @@ bool __bL_outbound_enter_critical(unsigned int cpu, unsigned int cluster) break; wfe(); - sync_mem(&c->cpus[i].cpu); + sync_r(&c->cpus[i].cpu); } switch (cpustate) { @@ -239,7 +270,7 @@ abort: int __bL_cluster_state(unsigned int cluster) { - sync_mem(&bL_sync.clusters[cluster].cluster); + sync_r(&bL_sync.clusters[cluster].cluster); return bL_sync.clusters[cluster].cluster; } @@ -267,11 +298,11 @@ int __init bL_cluster_sync_init(void (*power_up_setup)(void)) for_each_online_cpu(i) bL_sync.clusters[this_cluster].cpus[i].cpu = CPU_UP; bL_sync.clusters[this_cluster].cluster = CLUSTER_UP; - sync_mem(&bL_sync); + sync_w(&bL_sync); if (power_up_setup) { bL_power_up_setup_phys = virt_to_phys(power_up_setup); - sync_mem(&bL_power_up_setup_phys); + sync_w(&bL_power_up_setup_phys); } return 0;