diff mbox series

[1/1] ARM: LPAE: use phys_addr_t instead of unsigned long in outercache hooks

Message ID 20201225114458.1334-1-thunder.leizhen@huawei.com (mailing list archive)
State New, archived
Headers show
Series [1/1] ARM: LPAE: use phys_addr_t instead of unsigned long in outercache hooks | expand

Commit Message

Leizhen (ThunderTown) Dec. 25, 2020, 11:44 a.m. UTC
The outercache of some Hisilicon SOCs support physical addresses wider
than 32-bits. The unsigned long datatype is not sufficient for mapping
physical addresses >= 4GB. The commit ad6b9c9d78b9 ("ARM: 6671/1: LPAE:
use phys_addr_t instead of unsigned long in outercache functions") has
already modified the outercache functions. But the parameters of the
outercache hooks are not changed. This patch use phys_addr_t instead of
unsigned long in outercache hooks: inv_range, clean_range, flush_range.

To ensure the outercache that does not support LPAE works properly, do
cast phys_addr_t to unsigned long by adding a middle-tier function.
For example:
-static void l2c220_inv_range(unsigned long start, unsigned long end)
+static void __l2c220_inv_range(unsigned long start, unsigned long end)
 {
 	...
 }
+static void l2c220_inv_range(phys_addr_t start, phys_addr_t end)
+{
+  __l2c220_inv_range(start, end);
+}

Note that the outercache functions have been doing this cast before this
patch. So now, the cast is just moved to the middle-tier function.

No functional change.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
---
 arch/arm/include/asm/outercache.h |  6 +--
 arch/arm/mm/cache-feroceon-l2.c   | 21 ++++++++--
 arch/arm/mm/cache-l2x0.c          | 83 ++++++++++++++++++++++++++++++++-------
 arch/arm/mm/cache-tauros2.c       | 21 ++++++++--
 arch/arm/mm/cache-uniphier.c      |  6 +--
 arch/arm/mm/cache-xsc3l2.c        | 21 ++++++++--
 6 files changed, 129 insertions(+), 29 deletions(-)

Comments

Leizhen (ThunderTown) Dec. 26, 2020, 2:18 a.m. UTC | #1
On 2020/12/25 19:44, Zhen Lei wrote:
> The outercache of some Hisilicon SOCs support physical addresses wider
> than 32-bits. The unsigned long datatype is not sufficient for mapping
> physical addresses >= 4GB. The commit ad6b9c9d78b9 ("ARM: 6671/1: LPAE:
> use phys_addr_t instead of unsigned long in outercache functions") has
> already modified the outercache functions. But the parameters of the
> outercache hooks are not changed. This patch use phys_addr_t instead of
> unsigned long in outercache hooks: inv_range, clean_range, flush_range.
> 
> To ensure the outercache that does not support LPAE works properly, do
> cast phys_addr_t to unsigned long by adding a middle-tier function.
> For example:
> -static void l2c220_inv_range(unsigned long start, unsigned long end)
> +static void __l2c220_inv_range(unsigned long start, unsigned long end)
>  {
>  	...
>  }
> +static void l2c220_inv_range(phys_addr_t start, phys_addr_t end)
> +{
> +  __l2c220_inv_range(start, end);
> +}
> 
> Note that the outercache functions have been doing this cast before this
> patch. So now, the cast is just moved to the middle-tier function.
> 
> No functional change.

This patch will impact the outercache drivers that have not been merged into
the kernel. They should also update the datatype of the outercache hooks.

Another compatible solution is to add three new outercache hooks, as follows:

diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
index 3364637755e86aa..83344d0428fa5b6 100644
--- a/arch/arm/include/asm/outercache.h
+++ b/arch/arm/include/asm/outercache.h
@@ -17,6 +17,9 @@ struct outer_cache_fns {
         void (*inv_range)(unsigned long, unsigned long);
         void (*clean_range)(unsigned long, unsigned long);
         void (*flush_range)(unsigned long, unsigned long);
+  void (*lpae_inv_range)(phys_addr_t, phys_addr_t);
+  void (*lpae_clean_range)(phys_addr_t, phys_addr_t);
+  void (*lpae_flush_range)(phys_addr_t, phys_addr_t);
         void (*flush_all)(void);
         void (*disable)(void);
 #ifdef CONFIG_OUTER_CACHE_SYNC
@@ -41,6 +44,8 @@ static inline void outer_inv_range(phys_addr_t start, phys_addr_t end)
 {
         if (outer_cache.inv_range)
                 outer_cache.inv_range(start, end);
+  else if (outer_cache.lpae_inv_range)
+          outer_cache.lpae_inv_range(start, end);
 }

 /**
@@ -52,6 +57,8 @@ static inline void outer_clean_range(phys_addr_t start, phys_addr_t end)
 {
         if (outer_cache.clean_range)
                 outer_cache.clean_range(start, end);
+  else if (outer_cache.lpae_clean_range)
+          outer_cache.lpae_clean_range(start, end);
 }

 /**
@@ -63,6 +70,8 @@ static inline void outer_flush_range(phys_addr_t start, phys_addr_t end)
 {
         if (outer_cache.flush_range)
                 outer_cache.flush_range(start, end);
+  else if (outer_cache.lpae_flush_range)
+          outer_cache.lpae_flush_range(start, end);
 }

 /**



> 
> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> ---
>  arch/arm/include/asm/outercache.h |  6 +--
>  arch/arm/mm/cache-feroceon-l2.c   | 21 ++++++++--
>  arch/arm/mm/cache-l2x0.c          | 83 ++++++++++++++++++++++++++++++++-------
>  arch/arm/mm/cache-tauros2.c       | 21 ++++++++--
>  arch/arm/mm/cache-uniphier.c      |  6 +--
>  arch/arm/mm/cache-xsc3l2.c        | 21 ++++++++--
>  6 files changed, 129 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
> index 3364637755e86aa..4cee1ea0c15449a 100644
> --- a/arch/arm/include/asm/outercache.h
> +++ b/arch/arm/include/asm/outercache.h
> @@ -14,9 +14,9 @@
>  struct l2x0_regs;
>  
>  struct outer_cache_fns {
> -	void (*inv_range)(unsigned long, unsigned long);
> -	void (*clean_range)(unsigned long, unsigned long);
> -	void (*flush_range)(unsigned long, unsigned long);
> +	void (*inv_range)(phys_addr_t, phys_addr_t);
> +	void (*clean_range)(phys_addr_t, phys_addr_t);
> +	void (*flush_range)(phys_addr_t, phys_addr_t);
>  	void (*flush_all)(void);
>  	void (*disable)(void);
>  #ifdef CONFIG_OUTER_CACHE_SYNC
> diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
> index 5c1b7a7b9af6300..ab1d8051bf832c9 100644
> --- a/arch/arm/mm/cache-feroceon-l2.c
> +++ b/arch/arm/mm/cache-feroceon-l2.c
> @@ -168,7 +168,7 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end)
>  	return range_end;
>  }
>  
> -static void feroceon_l2_inv_range(unsigned long start, unsigned long end)
> +static void __feroceon_l2_inv_range(unsigned long start, unsigned long end)
>  {
>  	/*
>  	 * Clean and invalidate partial first cache line.
> @@ -198,7 +198,12 @@ static void feroceon_l2_inv_range(unsigned long start, unsigned long end)
>  	dsb();
>  }
>  
> -static void feroceon_l2_clean_range(unsigned long start, unsigned long end)
> +static void feroceon_l2_inv_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__feroceon_l2_inv_range(start, end);
> +}
> +
> +static void __feroceon_l2_clean_range(unsigned long start, unsigned long end)
>  {
>  	/*
>  	 * If L2 is forced to WT, the L2 will always be clean and we
> @@ -217,7 +222,12 @@ static void feroceon_l2_clean_range(unsigned long start, unsigned long end)
>  	dsb();
>  }
>  
> -static void feroceon_l2_flush_range(unsigned long start, unsigned long end)
> +static void feroceon_l2_clean_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__feroceon_l2_clean_range(start, end);
> +}
> +
> +static void __feroceon_l2_flush_range(unsigned long start, unsigned long end)
>  {
>  	start &= ~(CACHE_LINE_SIZE - 1);
>  	end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
> @@ -232,6 +242,11 @@ static void feroceon_l2_flush_range(unsigned long start, unsigned long end)
>  	dsb();
>  }
>  
> +static void feroceon_l2_flush_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__feroceon_l2_flush_range(start, end);
> +}
> +
>  
>  /*
>   * Routines to disable and re-enable the D-cache and I-cache at run
> diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
> index 43d91bfd2360086..644d857dcbd6bf0 100644
> --- a/arch/arm/mm/cache-l2x0.c
> +++ b/arch/arm/mm/cache-l2x0.c
> @@ -184,7 +184,7 @@ static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start,
>  	}
>  }
>  
> -static void l2c210_inv_range(unsigned long start, unsigned long end)
> +static void __l2c210_inv_range(unsigned long start, unsigned long end)
>  {
>  	void __iomem *base = l2x0_base;
>  
> @@ -203,7 +203,12 @@ static void l2c210_inv_range(unsigned long start, unsigned long end)
>  	__l2c210_cache_sync(base);
>  }
>  
> -static void l2c210_clean_range(unsigned long start, unsigned long end)
> +static void l2c210_inv_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__l2c210_inv_range(start, end);
> +}
> +
> +static void __l2c210_clean_range(unsigned long start, unsigned long end)
>  {
>  	void __iomem *base = l2x0_base;
>  
> @@ -212,7 +217,12 @@ static void l2c210_clean_range(unsigned long start, unsigned long end)
>  	__l2c210_cache_sync(base);
>  }
>  
> -static void l2c210_flush_range(unsigned long start, unsigned long end)
> +static void l2c210_clean_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__l2c210_clean_range(start, end);
> +}
> +
> +static void __l2c210_flush_range(unsigned long start, unsigned long end)
>  {
>  	void __iomem *base = l2x0_base;
>  
> @@ -221,6 +231,11 @@ static void l2c210_flush_range(unsigned long start, unsigned long end)
>  	__l2c210_cache_sync(base);
>  }
>  
> +static void l2c210_flush_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__l2c210_flush_range(start, end);
> +}
> +
>  static void l2c210_flush_all(void)
>  {
>  	void __iomem *base = l2x0_base;
> @@ -304,7 +319,7 @@ static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start,
>  	return flags;
>  }
>  
> -static void l2c220_inv_range(unsigned long start, unsigned long end)
> +static void __l2c220_inv_range(unsigned long start, unsigned long end)
>  {
>  	void __iomem *base = l2x0_base;
>  	unsigned long flags;
> @@ -331,7 +346,12 @@ static void l2c220_inv_range(unsigned long start, unsigned long end)
>  	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
>  }
>  
> -static void l2c220_clean_range(unsigned long start, unsigned long end)
> +static void l2c220_inv_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__l2c220_inv_range(start, end);
> +}
> +
> +static void __l2c220_clean_range(unsigned long start, unsigned long end)
>  {
>  	void __iomem *base = l2x0_base;
>  	unsigned long flags;
> @@ -350,7 +370,12 @@ static void l2c220_clean_range(unsigned long start, unsigned long end)
>  	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
>  }
>  
> -static void l2c220_flush_range(unsigned long start, unsigned long end)
> +static void l2c220_clean_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__l2c220_clean_range(start, end);
> +}
> +
> +static void __l2c220_flush_range(unsigned long start, unsigned long end)
>  {
>  	void __iomem *base = l2x0_base;
>  	unsigned long flags;
> @@ -369,6 +394,11 @@ static void l2c220_flush_range(unsigned long start, unsigned long end)
>  	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
>  }
>  
> +static void l2c220_flush_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__l2c220_flush_range(start, end);
> +}
> +
>  static void l2c220_flush_all(void)
>  {
>  	l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY);
> @@ -464,7 +494,7 @@ static void l2c220_unlock(void __iomem *base, unsigned num_lock)
>   *	Affects: store buffer
>   *	store buffer is not automatically drained.
>   */
> -static void l2c310_inv_range_erratum(unsigned long start, unsigned long end)
> +static void __l2c310_inv_range_erratum(unsigned long start, unsigned long end)
>  {
>  	void __iomem *base = l2x0_base;
>  
> @@ -496,7 +526,12 @@ static void l2c310_inv_range_erratum(unsigned long start, unsigned long end)
>  	__l2c210_cache_sync(base);
>  }
>  
> -static void l2c310_flush_range_erratum(unsigned long start, unsigned long end)
> +static void l2c310_inv_range_erratum(phys_addr_t start, phys_addr_t end)
> +{
> +	__l2c310_inv_range_erratum(start, end);
> +}
> +
> +static void __l2c310_flush_range_erratum(unsigned long start, unsigned long end)
>  {
>  	raw_spinlock_t *lock = &l2x0_lock;
>  	unsigned long flags;
> @@ -523,6 +558,11 @@ static void l2c310_flush_range_erratum(unsigned long start, unsigned long end)
>  	__l2c210_cache_sync(base);
>  }
>  
> +static void l2c310_flush_range_erratum(phys_addr_t start, phys_addr_t end)
> +{
> +	__l2c310_flush_range_erratum(start, end);
> +}
> +
>  static void l2c310_flush_all_erratum(void)
>  {
>  	void __iomem *base = l2x0_base;
> @@ -1400,12 +1440,12 @@ static void aurora_pa_range(unsigned long start, unsigned long end,
>  		start = range_end;
>  	}
>  }
> -static void aurora_inv_range(unsigned long start, unsigned long end)
> +static void aurora_inv_range(phys_addr_t start, phys_addr_t end)
>  {
>  	aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
>  }
>  
> -static void aurora_clean_range(unsigned long start, unsigned long end)
> +static void aurora_clean_range(phys_addr_t start, phys_addr_t end)
>  {
>  	/*
>  	 * If L2 is forced to WT, the L2 will always be clean and we
> @@ -1415,7 +1455,7 @@ static void aurora_clean_range(unsigned long start, unsigned long end)
>  		aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG);
>  }
>  
> -static void aurora_flush_range(unsigned long start, unsigned long end)
> +static void aurora_flush_range(phys_addr_t start, phys_addr_t end)
>  {
>  	if (l2_wt_override)
>  		aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
> @@ -1604,7 +1644,7 @@ static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
>  		return addr + BCM_VC_EMI_OFFSET;
>  }
>  
> -static void bcm_inv_range(unsigned long start, unsigned long end)
> +static void __bcm_inv_range(unsigned long start, unsigned long end)
>  {
>  	unsigned long new_start, new_end;
>  
> @@ -1631,7 +1671,12 @@ static void bcm_inv_range(unsigned long start, unsigned long end)
>  		new_end);
>  }
>  
> -static void bcm_clean_range(unsigned long start, unsigned long end)
> +static void bcm_inv_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__bcm_inv_range(start, end);
> +}
> +
> +static void __bcm_clean_range(unsigned long start, unsigned long end)
>  {
>  	unsigned long new_start, new_end;
>  
> @@ -1658,7 +1703,12 @@ static void bcm_clean_range(unsigned long start, unsigned long end)
>  		new_end);
>  }
>  
> -static void bcm_flush_range(unsigned long start, unsigned long end)
> +static void bcm_clean_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__bcm_clean_range(start, end);
> +}
> +
> +static void __bcm_flush_range(unsigned long start, unsigned long end)
>  {
>  	unsigned long new_start, new_end;
>  
> @@ -1690,6 +1740,11 @@ static void bcm_flush_range(unsigned long start, unsigned long end)
>  		new_end);
>  }
>  
> +static void bcm_flush_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__bcm_flush_range(start, end);
> +}
> +
>  /* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */
>  static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
>  	.type = "BCM-L2C-310",
> diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c
> index 88255bea65e41e6..145008d9f92690c 100644
> --- a/arch/arm/mm/cache-tauros2.c
> +++ b/arch/arm/mm/cache-tauros2.c
> @@ -66,7 +66,7 @@ static inline void tauros2_inv_pa(unsigned long addr)
>   */
>  #define CACHE_LINE_SIZE		32
>  
> -static void tauros2_inv_range(unsigned long start, unsigned long end)
> +static void __tauros2_inv_range(unsigned long start, unsigned long end)
>  {
>  	/*
>  	 * Clean and invalidate partial first cache line.
> @@ -95,7 +95,12 @@ static void tauros2_inv_range(unsigned long start, unsigned long end)
>  	dsb();
>  }
>  
> -static void tauros2_clean_range(unsigned long start, unsigned long end)
> +static void tauros2_inv_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__tauros2_inv_range(start, end);
> +}
> +
> +static void __tauros2_clean_range(unsigned long start, unsigned long end)
>  {
>  	start &= ~(CACHE_LINE_SIZE - 1);
>  	while (start < end) {
> @@ -106,7 +111,12 @@ static void tauros2_clean_range(unsigned long start, unsigned long end)
>  	dsb();
>  }
>  
> -static void tauros2_flush_range(unsigned long start, unsigned long end)
> +static void tauros2_clean_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__tauros2_clean_range(start, end);
> +}
> +
> +static void __tauros2_flush_range(unsigned long start, unsigned long end)
>  {
>  	start &= ~(CACHE_LINE_SIZE - 1);
>  	while (start < end) {
> @@ -117,6 +127,11 @@ static void tauros2_flush_range(unsigned long start, unsigned long end)
>  	dsb();
>  }
>  
> +static void tauros2_flush_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__tauros2_flush_range(start, end);
> +}
> +
>  static void tauros2_disable(void)
>  {
>  	__asm__ __volatile__ (
> diff --git a/arch/arm/mm/cache-uniphier.c b/arch/arm/mm/cache-uniphier.c
> index ff2881458504329..e2508358e9f4082 100644
> --- a/arch/arm/mm/cache-uniphier.c
> +++ b/arch/arm/mm/cache-uniphier.c
> @@ -250,17 +250,17 @@ static void uniphier_cache_maint_all(u32 operation)
>  		__uniphier_cache_maint_all(data, operation);
>  }
>  
> -static void uniphier_cache_inv_range(unsigned long start, unsigned long end)
> +static void uniphier_cache_inv_range(phys_addr_t start, phys_addr_t end)
>  {
>  	uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_INV);
>  }
>  
> -static void uniphier_cache_clean_range(unsigned long start, unsigned long end)
> +static void uniphier_cache_clean_range(phys_addr_t start, phys_addr_t end)
>  {
>  	uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_CLEAN);
>  }
>  
> -static void uniphier_cache_flush_range(unsigned long start, unsigned long end)
> +static void uniphier_cache_flush_range(phys_addr_t start, phys_addr_t end)
>  {
>  	uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_FLUSH);
>  }
> diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c
> index d20d7af02d10fc0..095a9a125174502 100644
> --- a/arch/arm/mm/cache-xsc3l2.c
> +++ b/arch/arm/mm/cache-xsc3l2.c
> @@ -83,7 +83,7 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va)
>  #endif
>  }
>  
> -static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
> +static void __xsc3_l2_inv_range(unsigned long start, unsigned long end)
>  {
>  	unsigned long vaddr;
>  
> @@ -127,7 +127,12 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
>  	dsb();
>  }
>  
> -static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
> +static void xsc3_l2_inv_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__xsc3_l2_inv_range(start, end);
> +}
> +
> +static void __xsc3_l2_clean_range(unsigned long start, unsigned long end)
>  {
>  	unsigned long vaddr;
>  
> @@ -145,6 +150,11 @@ static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
>  	dsb();
>  }
>  
> +static void xsc3_l2_clean_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__xsc3_l2_clean_range(start, end);
> +}
> +
>  /*
>   * optimize L2 flush all operation by set/way format
>   */
> @@ -165,7 +175,7 @@ static inline void xsc3_l2_flush_all(void)
>  	dsb();
>  }
>  
> -static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
> +static void __xsc3_l2_flush_range(unsigned long start, unsigned long end)
>  {
>  	unsigned long vaddr;
>  
> @@ -189,6 +199,11 @@ static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
>  	dsb();
>  }
>  
> +static void xsc3_l2_flush_range(phys_addr_t start, phys_addr_t end)
> +{
> +	__xsc3_l2_flush_range(start, end);
> +}
> +
>  static int __init xsc3_l2_init(void)
>  {
>  	if (!cpu_is_xsc3() || !xsc3_l2_present())
>
Russell King (Oracle) Dec. 26, 2020, 12:13 p.m. UTC | #2
On Fri, Dec 25, 2020 at 07:44:58PM +0800, Zhen Lei wrote:
> The outercache of some Hisilicon SOCs support physical addresses wider
> than 32-bits. The unsigned long datatype is not sufficient for mapping
> physical addresses >= 4GB. The commit ad6b9c9d78b9 ("ARM: 6671/1: LPAE:
> use phys_addr_t instead of unsigned long in outercache functions") has
> already modified the outercache functions. But the parameters of the
> outercache hooks are not changed. This patch use phys_addr_t instead of
> unsigned long in outercache hooks: inv_range, clean_range, flush_range.
> 
> To ensure the outercache that does not support LPAE works properly, do
> cast phys_addr_t to unsigned long by adding a middle-tier function.

Please don't do that. The cast can be done inside the L2 functions
themselves without needing all these additional functions.

We probably ought to also add some protection against addresses > 4GB,
although these are hot paths, so we don't want to add tests in these
functions. Maybe instead checking whether the system has memory above
4GB while the L2 cache is being initialised would be a good idea?
Russell King (Oracle) Dec. 26, 2020, 12:15 p.m. UTC | #3
On Sat, Dec 26, 2020 at 10:18:08AM +0800, Leizhen (ThunderTown) wrote:
> On 2020/12/25 19:44, Zhen Lei wrote:
> > The outercache of some Hisilicon SOCs support physical addresses wider
> > than 32-bits. The unsigned long datatype is not sufficient for mapping
> > physical addresses >= 4GB. The commit ad6b9c9d78b9 ("ARM: 6671/1: LPAE:
> > use phys_addr_t instead of unsigned long in outercache functions") has
> > already modified the outercache functions. But the parameters of the
> > outercache hooks are not changed. This patch use phys_addr_t instead of
> > unsigned long in outercache hooks: inv_range, clean_range, flush_range.
> > 
> > To ensure the outercache that does not support LPAE works properly, do
> > cast phys_addr_t to unsigned long by adding a middle-tier function.
> 
> This patch will impact the outercache drivers that have not been merged into
> the kernel. They should also update the datatype of the outercache hooks.

This isn't much of a concern to mainline. If it's that big a problem
for you, then please consider merging your code into mainline so that
everyone can benefit from it.
Arnd Bergmann Dec. 28, 2020, 7 a.m. UTC | #4
On Fri, Dec 25, 2020 at 12:48 PM Zhen Lei <thunder.leizhen@huawei.com> wrote:
>
> The outercache of some Hisilicon SOCs support physical addresses wider
> than 32-bits. The unsigned long datatype is not sufficient for mapping
> physical addresses >= 4GB. The commit ad6b9c9d78b9 ("ARM: 6671/1: LPAE:
> use phys_addr_t instead of unsigned long in outercache functions") has
> already modified the outercache functions. But the parameters of the
> outercache hooks are not changed. This patch use phys_addr_t instead of
> unsigned long in outercache hooks: inv_range, clean_range, flush_range.
>
> To ensure the outercache that does not support LPAE works properly, do
> cast phys_addr_t to unsigned long by adding a middle-tier function.
> For example:
> -static void l2c220_inv_range(unsigned long start, unsigned long end)
> +static void __l2c220_inv_range(unsigned long start, unsigned long end)
>  {
>         ...
>  }
> +static void l2c220_inv_range(phys_addr_t start, phys_addr_t end)
> +{
> +  __l2c220_inv_range(start, end);
> +}
>
> Note that the outercache functions have been doing this cast before this
> patch. So now, the cast is just moved to the middle-tier function.
>
> No functional change.
>
> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>

This looks reasonable in principle, but it would be helpful to
understand better which SoCs are affected. In which way is
this specific to Hisilicon implementations, and why would others
not need this?

Wouldn't this also be needed by an Armada XP that supports
more than 4GB of RAM but has an outer cache?

I suppose those SoCs using off-the-shelf Arm cores are either
pre-LPAE and cannot address memory above 4GB, or they do
not need the outer_cache interfaces.

> diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
> index 5c1b7a7b9af6300..ab1d8051bf832c9 100644
> --- a/arch/arm/mm/cache-feroceon-l2.c
> +++ b/arch/arm/mm/cache-feroceon-l2.c
> @@ -168,7 +168,7 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end)
>         return range_end;
>  }
>
> -static void feroceon_l2_inv_range(unsigned long start, unsigned long end)
> +static void __feroceon_l2_inv_range(unsigned long start, unsigned long end)
>  {
>         /*
>          * Clean and invalidate partial first cache line.
> @@ -198,7 +198,12 @@ static void feroceon_l2_inv_range(unsigned long start, unsigned long end)
>         dsb();
>  }
>
> -static void feroceon_l2_clean_range(unsigned long start, unsigned long end)
> +static void feroceon_l2_inv_range(phys_addr_t start, phys_addr_t end)
> +{
> +       __feroceon_l2_inv_range(start, end);
> +}
> +

What is this indirection for? It looks like you do this for all implementations,
so the actual address gets truncated here.

       Arnd
Leizhen (ThunderTown) Dec. 29, 2020, 6:30 a.m. UTC | #5
On 2020/12/26 20:13, Russell King - ARM Linux admin wrote:
> On Fri, Dec 25, 2020 at 07:44:58PM +0800, Zhen Lei wrote:
>> The outercache of some Hisilicon SOCs support physical addresses wider
>> than 32-bits. The unsigned long datatype is not sufficient for mapping
>> physical addresses >= 4GB. The commit ad6b9c9d78b9 ("ARM: 6671/1: LPAE:
>> use phys_addr_t instead of unsigned long in outercache functions") has
>> already modified the outercache functions. But the parameters of the
>> outercache hooks are not changed. This patch use phys_addr_t instead of
>> unsigned long in outercache hooks: inv_range, clean_range, flush_range.
>>
>> To ensure the outercache that does not support LPAE works properly, do
>> cast phys_addr_t to unsigned long by adding a middle-tier function.
> 
> Please don't do that. The cast can be done inside the L2 functions
> themselves without needing all these additional functions.

OK. At first, I wanted to fit in like this:

-static void l2c220_inv_range(unsigned long start, unsigned long end)
+static void l2c220_inv_range(phys_addr_t lpae_start, phys_addr_t lpae_end)
 {
+  unsigned long start = lpae_start;
+  unsigned long end = lpae_end;


> 
> We probably ought to also add some protection against addresses > 4GB,
> although these are hot paths, so we don't want to add tests in these
> functions. Maybe instead checking whether the system has memory above
> 4GB while the L2 cache is being initialised would be a good idea?
> 

I'm sorry, I didn't quite understand what you meant. Currently, the
biggest problem is the compilation problem. The sizeof(long) may be
32, and the 64-bit physical address cannot be transferred from outcache
functions to outcache hooks.
Leizhen (ThunderTown) Dec. 29, 2020, 6:31 a.m. UTC | #6
On 2020/12/26 20:15, Russell King - ARM Linux admin wrote:
> On Sat, Dec 26, 2020 at 10:18:08AM +0800, Leizhen (ThunderTown) wrote:
>> On 2020/12/25 19:44, Zhen Lei wrote:
>>> The outercache of some Hisilicon SOCs support physical addresses wider
>>> than 32-bits. The unsigned long datatype is not sufficient for mapping
>>> physical addresses >= 4GB. The commit ad6b9c9d78b9 ("ARM: 6671/1: LPAE:
>>> use phys_addr_t instead of unsigned long in outercache functions") has
>>> already modified the outercache functions. But the parameters of the
>>> outercache hooks are not changed. This patch use phys_addr_t instead of
>>> unsigned long in outercache hooks: inv_range, clean_range, flush_range.
>>>
>>> To ensure the outercache that does not support LPAE works properly, do
>>> cast phys_addr_t to unsigned long by adding a middle-tier function.
>>
>> This patch will impact the outercache drivers that have not been merged into
>> the kernel. They should also update the datatype of the outercache hooks.
> 
> This isn't much of a concern to mainline. If it's that big a problem
> for you, then please consider merging your code into mainline so that
> everyone can benefit from it.

All right, I got it.

>
Leizhen (ThunderTown) Dec. 29, 2020, 6:45 a.m. UTC | #7
On 2020/12/28 15:00, Arnd Bergmann wrote:
> On Fri, Dec 25, 2020 at 12:48 PM Zhen Lei <thunder.leizhen@huawei.com> wrote:
>>
>> The outercache of some Hisilicon SOCs support physical addresses wider
>> than 32-bits. The unsigned long datatype is not sufficient for mapping
>> physical addresses >= 4GB. The commit ad6b9c9d78b9 ("ARM: 6671/1: LPAE:
>> use phys_addr_t instead of unsigned long in outercache functions") has
>> already modified the outercache functions. But the parameters of the
>> outercache hooks are not changed. This patch use phys_addr_t instead of
>> unsigned long in outercache hooks: inv_range, clean_range, flush_range.
>>
>> To ensure the outercache that does not support LPAE works properly, do
>> cast phys_addr_t to unsigned long by adding a middle-tier function.
>> For example:
>> -static void l2c220_inv_range(unsigned long start, unsigned long end)
>> +static void __l2c220_inv_range(unsigned long start, unsigned long end)
>>  {
>>         ...
>>  }
>> +static void l2c220_inv_range(phys_addr_t start, phys_addr_t end)
>> +{
>> +  __l2c220_inv_range(start, end);
>> +}
>>
>> Note that the outercache functions have been doing this cast before this
>> patch. So now, the cast is just moved to the middle-tier function.
>>
>> No functional change.
>>
>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> 
> This looks reasonable in principle, but it would be helpful to
> understand better which SoCs are affected. In which way is
> this specific to Hisilicon implementations, and why would others
> not need this?

I answered at the end.

> 
> Wouldn't this also be needed by an Armada XP that supports
> more than 4GB of RAM but has an outer cache?

I don't know about the armada XP environment.

> 
> I suppose those SoCs using off-the-shelf Arm cores are either
> pre-LPAE and cannot address memory above 4GB, or they do
> not need the outer_cache interfaces.

I think so.

> 
>> diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
>> index 5c1b7a7b9af6300..ab1d8051bf832c9 100644
>> --- a/arch/arm/mm/cache-feroceon-l2.c
>> +++ b/arch/arm/mm/cache-feroceon-l2.c
>> @@ -168,7 +168,7 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end)
>>         return range_end;
>>  }
>>
>> -static void feroceon_l2_inv_range(unsigned long start, unsigned long end)
>> +static void __feroceon_l2_inv_range(unsigned long start, unsigned long end)
>>  {
>>         /*
>>          * Clean and invalidate partial first cache line.
>> @@ -198,7 +198,12 @@ static void feroceon_l2_inv_range(unsigned long start, unsigned long end)
>>         dsb();
>>  }
>>
>> -static void feroceon_l2_clean_range(unsigned long start, unsigned long end)
>> +static void feroceon_l2_inv_range(phys_addr_t start, phys_addr_t end)
>> +{
>> +       __feroceon_l2_inv_range(start, end);
>> +}
>> +
> 
> What is this indirection for? It looks like you do this for all implementations,
> so the actual address gets truncated here.

Because these environments are all 32-bit physical addresses or only the lower
32-bit physical addresses need to be operated. But my environment operates 64-bit
physical address and sizeof(long) is 32. So need to change the datatype of the
outchache hooks.

 struct outer_cache_fns {
-	void (*inv_range)(unsigned long, unsigned long);
-	void (*clean_range)(unsigned long, unsigned long);
-	void (*flush_range)(unsigned long, unsigned long);
+	void (*inv_range)(phys_addr_t, phys_addr_t);
+	void (*clean_range)(phys_addr_t, phys_addr_t);
+	void (*flush_range)(phys_addr_t, phys_addr_t);
 	void (*flush_all)(void);

I added middle-tier function for all implementations, just to ensure that the
above changes do not have side effects on them.

> 
>        Arnd
> 
> .
>
Russell King (Oracle) Dec. 29, 2020, 10:51 a.m. UTC | #8
On Tue, Dec 29, 2020 at 02:30:56PM +0800, Leizhen (ThunderTown) wrote:
> 
> 
> On 2020/12/26 20:13, Russell King - ARM Linux admin wrote:
> > On Fri, Dec 25, 2020 at 07:44:58PM +0800, Zhen Lei wrote:
> >> The outercache of some Hisilicon SOCs support physical addresses wider
> >> than 32-bits. The unsigned long datatype is not sufficient for mapping
> >> physical addresses >= 4GB. The commit ad6b9c9d78b9 ("ARM: 6671/1: LPAE:
> >> use phys_addr_t instead of unsigned long in outercache functions") has
> >> already modified the outercache functions. But the parameters of the
> >> outercache hooks are not changed. This patch use phys_addr_t instead of
> >> unsigned long in outercache hooks: inv_range, clean_range, flush_range.
> >>
> >> To ensure the outercache that does not support LPAE works properly, do
> >> cast phys_addr_t to unsigned long by adding a middle-tier function.
> > 
> > Please don't do that. The cast can be done inside the L2 functions
> > themselves without needing all these additional functions.
> 
> OK. At first, I wanted to fit in like this:
> 
> -static void l2c220_inv_range(unsigned long start, unsigned long end)
> +static void l2c220_inv_range(phys_addr_t lpae_start, phys_addr_t lpae_end)
>  {
> +  unsigned long start = lpae_start;
> +  unsigned long end = lpae_end;

It sounds like there should be a "but..." clause here. This is exactly
what I'm suggesting you should be doing. Currently, there's a silent
narrowing cast in every single caller of the outer_.*_range() functions
and you're only moving it from the callsites to inside the called
functions.

> > We probably ought to also add some protection against addresses > 4GB,
> > although these are hot paths, so we don't want to add tests in these
> > functions. Maybe instead checking whether the system has memory above
> > 4GB while the L2 cache is being initialised would be a good idea?
> 
> I'm sorry, I didn't quite understand what you meant. Currently, the
> biggest problem is the compilation problem. The sizeof(long) may be
> 32, and the 64-bit physical address cannot be transferred from outcache
> functions to outcache hooks.

What I mean is that we really ought to warn if the L2C310 code tries to
initialise on a system where memory is above 4GB. However, it's very
unlikely that such a system exists, so it's probably fine not implement
a check, but it just feels fragile to be truncating the 64-bit address
to 32-bit on a kernel that _could_ support higher addresses, even though
that's exactly what is happening today (kind of by accident - I don't
think anyone realised.)
Russell King (Oracle) Dec. 29, 2020, 10:54 a.m. UTC | #9
On Mon, Dec 28, 2020 at 08:00:00AM +0100, Arnd Bergmann wrote:
> Wouldn't this also be needed by an Armada XP that supports
> more than 4GB of RAM but has an outer cache?

While Armada XP has an outer cache, it requires no maintanence; the
only support the kernel has is for configuring it at boot and resume:

 * For Aurora cache in no outer mode, enable via the CP15 coprocessor
 * broadcasting of cache commands to L2.
Leizhen (ThunderTown) Dec. 30, 2020, 8:08 a.m. UTC | #10
On 2020/12/29 18:51, Russell King - ARM Linux admin wrote:
> On Tue, Dec 29, 2020 at 02:30:56PM +0800, Leizhen (ThunderTown) wrote:
>>
>>
>> On 2020/12/26 20:13, Russell King - ARM Linux admin wrote:
>>> On Fri, Dec 25, 2020 at 07:44:58PM +0800, Zhen Lei wrote:
>>>> The outercache of some Hisilicon SOCs support physical addresses wider
>>>> than 32-bits. The unsigned long datatype is not sufficient for mapping
>>>> physical addresses >= 4GB. The commit ad6b9c9d78b9 ("ARM: 6671/1: LPAE:
>>>> use phys_addr_t instead of unsigned long in outercache functions") has
>>>> already modified the outercache functions. But the parameters of the
>>>> outercache hooks are not changed. This patch use phys_addr_t instead of
>>>> unsigned long in outercache hooks: inv_range, clean_range, flush_range.
>>>>
>>>> To ensure the outercache that does not support LPAE works properly, do
>>>> cast phys_addr_t to unsigned long by adding a middle-tier function.
>>>
>>> Please don't do that. The cast can be done inside the L2 functions
>>> themselves without needing all these additional functions.
>>
>> OK. At first, I wanted to fit in like this:
>>
>> -static void l2c220_inv_range(unsigned long start, unsigned long end)
>> +static void l2c220_inv_range(phys_addr_t lpae_start, phys_addr_t lpae_end)
>>  {
>> +  unsigned long start = lpae_start;
>> +  unsigned long end = lpae_end;
> 
> It sounds like there should be a "but..." clause here. This is exactly
> what I'm suggesting you should be doing. Currently, there's a silent
> narrowing cast in every single caller of the outer_.*_range() functions
> and you're only moving it from the callsites to inside the called
> functions.

Okay, I will send v2 based on this idea.

> 
>>> We probably ought to also add some protection against addresses > 4GB,
>>> although these are hot paths, so we don't want to add tests in these
>>> functions. Maybe instead checking whether the system has memory above
>>> 4GB while the L2 cache is being initialised would be a good idea?
>>
>> I'm sorry, I didn't quite understand what you meant. Currently, the
>> biggest problem is the compilation problem. The sizeof(long) may be
>> 32, and the 64-bit physical address cannot be transferred from outcache
>> functions to outcache hooks.
> 
> What I mean is that we really ought to warn if the L2C310 code tries to
> initialise on a system where memory is above 4GB. However, it's very
> unlikely that such a system exists, so it's probably fine not implement
> a check, but it just feels fragile to be truncating the 64-bit address
> to 32-bit on a kernel that _could_ support higher addresses, even though
> that's exactly what is happening today (kind of by accident - I don't
> think anyone realised.)
>
diff mbox series

Patch

diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
index 3364637755e86aa..4cee1ea0c15449a 100644
--- a/arch/arm/include/asm/outercache.h
+++ b/arch/arm/include/asm/outercache.h
@@ -14,9 +14,9 @@ 
 struct l2x0_regs;
 
 struct outer_cache_fns {
-	void (*inv_range)(unsigned long, unsigned long);
-	void (*clean_range)(unsigned long, unsigned long);
-	void (*flush_range)(unsigned long, unsigned long);
+	void (*inv_range)(phys_addr_t, phys_addr_t);
+	void (*clean_range)(phys_addr_t, phys_addr_t);
+	void (*flush_range)(phys_addr_t, phys_addr_t);
 	void (*flush_all)(void);
 	void (*disable)(void);
 #ifdef CONFIG_OUTER_CACHE_SYNC
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index 5c1b7a7b9af6300..ab1d8051bf832c9 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -168,7 +168,7 @@  static unsigned long calc_range_end(unsigned long start, unsigned long end)
 	return range_end;
 }
 
-static void feroceon_l2_inv_range(unsigned long start, unsigned long end)
+static void __feroceon_l2_inv_range(unsigned long start, unsigned long end)
 {
 	/*
 	 * Clean and invalidate partial first cache line.
@@ -198,7 +198,12 @@  static void feroceon_l2_inv_range(unsigned long start, unsigned long end)
 	dsb();
 }
 
-static void feroceon_l2_clean_range(unsigned long start, unsigned long end)
+static void feroceon_l2_inv_range(phys_addr_t start, phys_addr_t end)
+{
+	__feroceon_l2_inv_range(start, end);
+}
+
+static void __feroceon_l2_clean_range(unsigned long start, unsigned long end)
 {
 	/*
 	 * If L2 is forced to WT, the L2 will always be clean and we
@@ -217,7 +222,12 @@  static void feroceon_l2_clean_range(unsigned long start, unsigned long end)
 	dsb();
 }
 
-static void feroceon_l2_flush_range(unsigned long start, unsigned long end)
+static void feroceon_l2_clean_range(phys_addr_t start, phys_addr_t end)
+{
+	__feroceon_l2_clean_range(start, end);
+}
+
+static void __feroceon_l2_flush_range(unsigned long start, unsigned long end)
 {
 	start &= ~(CACHE_LINE_SIZE - 1);
 	end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
@@ -232,6 +242,11 @@  static void feroceon_l2_flush_range(unsigned long start, unsigned long end)
 	dsb();
 }
 
+static void feroceon_l2_flush_range(phys_addr_t start, phys_addr_t end)
+{
+	__feroceon_l2_flush_range(start, end);
+}
+
 
 /*
  * Routines to disable and re-enable the D-cache and I-cache at run
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 43d91bfd2360086..644d857dcbd6bf0 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -184,7 +184,7 @@  static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start,
 	}
 }
 
-static void l2c210_inv_range(unsigned long start, unsigned long end)
+static void __l2c210_inv_range(unsigned long start, unsigned long end)
 {
 	void __iomem *base = l2x0_base;
 
@@ -203,7 +203,12 @@  static void l2c210_inv_range(unsigned long start, unsigned long end)
 	__l2c210_cache_sync(base);
 }
 
-static void l2c210_clean_range(unsigned long start, unsigned long end)
+static void l2c210_inv_range(phys_addr_t start, phys_addr_t end)
+{
+	__l2c210_inv_range(start, end);
+}
+
+static void __l2c210_clean_range(unsigned long start, unsigned long end)
 {
 	void __iomem *base = l2x0_base;
 
@@ -212,7 +217,12 @@  static void l2c210_clean_range(unsigned long start, unsigned long end)
 	__l2c210_cache_sync(base);
 }
 
-static void l2c210_flush_range(unsigned long start, unsigned long end)
+static void l2c210_clean_range(phys_addr_t start, phys_addr_t end)
+{
+	__l2c210_clean_range(start, end);
+}
+
+static void __l2c210_flush_range(unsigned long start, unsigned long end)
 {
 	void __iomem *base = l2x0_base;
 
@@ -221,6 +231,11 @@  static void l2c210_flush_range(unsigned long start, unsigned long end)
 	__l2c210_cache_sync(base);
 }
 
+static void l2c210_flush_range(phys_addr_t start, phys_addr_t end)
+{
+	__l2c210_flush_range(start, end);
+}
+
 static void l2c210_flush_all(void)
 {
 	void __iomem *base = l2x0_base;
@@ -304,7 +319,7 @@  static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start,
 	return flags;
 }
 
-static void l2c220_inv_range(unsigned long start, unsigned long end)
+static void __l2c220_inv_range(unsigned long start, unsigned long end)
 {
 	void __iomem *base = l2x0_base;
 	unsigned long flags;
@@ -331,7 +346,12 @@  static void l2c220_inv_range(unsigned long start, unsigned long end)
 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
-static void l2c220_clean_range(unsigned long start, unsigned long end)
+static void l2c220_inv_range(phys_addr_t start, phys_addr_t end)
+{
+	__l2c220_inv_range(start, end);
+}
+
+static void __l2c220_clean_range(unsigned long start, unsigned long end)
 {
 	void __iomem *base = l2x0_base;
 	unsigned long flags;
@@ -350,7 +370,12 @@  static void l2c220_clean_range(unsigned long start, unsigned long end)
 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
-static void l2c220_flush_range(unsigned long start, unsigned long end)
+static void l2c220_clean_range(phys_addr_t start, phys_addr_t end)
+{
+	__l2c220_clean_range(start, end);
+}
+
+static void __l2c220_flush_range(unsigned long start, unsigned long end)
 {
 	void __iomem *base = l2x0_base;
 	unsigned long flags;
@@ -369,6 +394,11 @@  static void l2c220_flush_range(unsigned long start, unsigned long end)
 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
+static void l2c220_flush_range(phys_addr_t start, phys_addr_t end)
+{
+	__l2c220_flush_range(start, end);
+}
+
 static void l2c220_flush_all(void)
 {
 	l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY);
@@ -464,7 +494,7 @@  static void l2c220_unlock(void __iomem *base, unsigned num_lock)
  *	Affects: store buffer
  *	store buffer is not automatically drained.
  */
-static void l2c310_inv_range_erratum(unsigned long start, unsigned long end)
+static void __l2c310_inv_range_erratum(unsigned long start, unsigned long end)
 {
 	void __iomem *base = l2x0_base;
 
@@ -496,7 +526,12 @@  static void l2c310_inv_range_erratum(unsigned long start, unsigned long end)
 	__l2c210_cache_sync(base);
 }
 
-static void l2c310_flush_range_erratum(unsigned long start, unsigned long end)
+static void l2c310_inv_range_erratum(phys_addr_t start, phys_addr_t end)
+{
+	__l2c310_inv_range_erratum(start, end);
+}
+
+static void __l2c310_flush_range_erratum(unsigned long start, unsigned long end)
 {
 	raw_spinlock_t *lock = &l2x0_lock;
 	unsigned long flags;
@@ -523,6 +558,11 @@  static void l2c310_flush_range_erratum(unsigned long start, unsigned long end)
 	__l2c210_cache_sync(base);
 }
 
+static void l2c310_flush_range_erratum(phys_addr_t start, phys_addr_t end)
+{
+	__l2c310_flush_range_erratum(start, end);
+}
+
 static void l2c310_flush_all_erratum(void)
 {
 	void __iomem *base = l2x0_base;
@@ -1400,12 +1440,12 @@  static void aurora_pa_range(unsigned long start, unsigned long end,
 		start = range_end;
 	}
 }
-static void aurora_inv_range(unsigned long start, unsigned long end)
+static void aurora_inv_range(phys_addr_t start, phys_addr_t end)
 {
 	aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
 }
 
-static void aurora_clean_range(unsigned long start, unsigned long end)
+static void aurora_clean_range(phys_addr_t start, phys_addr_t end)
 {
 	/*
 	 * If L2 is forced to WT, the L2 will always be clean and we
@@ -1415,7 +1455,7 @@  static void aurora_clean_range(unsigned long start, unsigned long end)
 		aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG);
 }
 
-static void aurora_flush_range(unsigned long start, unsigned long end)
+static void aurora_flush_range(phys_addr_t start, phys_addr_t end)
 {
 	if (l2_wt_override)
 		aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
@@ -1604,7 +1644,7 @@  static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
 		return addr + BCM_VC_EMI_OFFSET;
 }
 
-static void bcm_inv_range(unsigned long start, unsigned long end)
+static void __bcm_inv_range(unsigned long start, unsigned long end)
 {
 	unsigned long new_start, new_end;
 
@@ -1631,7 +1671,12 @@  static void bcm_inv_range(unsigned long start, unsigned long end)
 		new_end);
 }
 
-static void bcm_clean_range(unsigned long start, unsigned long end)
+static void bcm_inv_range(phys_addr_t start, phys_addr_t end)
+{
+	__bcm_inv_range(start, end);
+}
+
+static void __bcm_clean_range(unsigned long start, unsigned long end)
 {
 	unsigned long new_start, new_end;
 
@@ -1658,7 +1703,12 @@  static void bcm_clean_range(unsigned long start, unsigned long end)
 		new_end);
 }
 
-static void bcm_flush_range(unsigned long start, unsigned long end)
+static void bcm_clean_range(phys_addr_t start, phys_addr_t end)
+{
+	__bcm_clean_range(start, end);
+}
+
+static void __bcm_flush_range(unsigned long start, unsigned long end)
 {
 	unsigned long new_start, new_end;
 
@@ -1690,6 +1740,11 @@  static void bcm_flush_range(unsigned long start, unsigned long end)
 		new_end);
 }
 
+static void bcm_flush_range(phys_addr_t start, phys_addr_t end)
+{
+	__bcm_flush_range(start, end);
+}
+
 /* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */
 static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
 	.type = "BCM-L2C-310",
diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c
index 88255bea65e41e6..145008d9f92690c 100644
--- a/arch/arm/mm/cache-tauros2.c
+++ b/arch/arm/mm/cache-tauros2.c
@@ -66,7 +66,7 @@  static inline void tauros2_inv_pa(unsigned long addr)
  */
 #define CACHE_LINE_SIZE		32
 
-static void tauros2_inv_range(unsigned long start, unsigned long end)
+static void __tauros2_inv_range(unsigned long start, unsigned long end)
 {
 	/*
 	 * Clean and invalidate partial first cache line.
@@ -95,7 +95,12 @@  static void tauros2_inv_range(unsigned long start, unsigned long end)
 	dsb();
 }
 
-static void tauros2_clean_range(unsigned long start, unsigned long end)
+static void tauros2_inv_range(phys_addr_t start, phys_addr_t end)
+{
+	__tauros2_inv_range(start, end);
+}
+
+static void __tauros2_clean_range(unsigned long start, unsigned long end)
 {
 	start &= ~(CACHE_LINE_SIZE - 1);
 	while (start < end) {
@@ -106,7 +111,12 @@  static void tauros2_clean_range(unsigned long start, unsigned long end)
 	dsb();
 }
 
-static void tauros2_flush_range(unsigned long start, unsigned long end)
+static void tauros2_clean_range(phys_addr_t start, phys_addr_t end)
+{
+	__tauros2_clean_range(start, end);
+}
+
+static void __tauros2_flush_range(unsigned long start, unsigned long end)
 {
 	start &= ~(CACHE_LINE_SIZE - 1);
 	while (start < end) {
@@ -117,6 +127,11 @@  static void tauros2_flush_range(unsigned long start, unsigned long end)
 	dsb();
 }
 
+static void tauros2_flush_range(phys_addr_t start, phys_addr_t end)
+{
+	__tauros2_flush_range(start, end);
+}
+
 static void tauros2_disable(void)
 {
 	__asm__ __volatile__ (
diff --git a/arch/arm/mm/cache-uniphier.c b/arch/arm/mm/cache-uniphier.c
index ff2881458504329..e2508358e9f4082 100644
--- a/arch/arm/mm/cache-uniphier.c
+++ b/arch/arm/mm/cache-uniphier.c
@@ -250,17 +250,17 @@  static void uniphier_cache_maint_all(u32 operation)
 		__uniphier_cache_maint_all(data, operation);
 }
 
-static void uniphier_cache_inv_range(unsigned long start, unsigned long end)
+static void uniphier_cache_inv_range(phys_addr_t start, phys_addr_t end)
 {
 	uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_INV);
 }
 
-static void uniphier_cache_clean_range(unsigned long start, unsigned long end)
+static void uniphier_cache_clean_range(phys_addr_t start, phys_addr_t end)
 {
 	uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_CLEAN);
 }
 
-static void uniphier_cache_flush_range(unsigned long start, unsigned long end)
+static void uniphier_cache_flush_range(phys_addr_t start, phys_addr_t end)
 {
 	uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_FLUSH);
 }
diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c
index d20d7af02d10fc0..095a9a125174502 100644
--- a/arch/arm/mm/cache-xsc3l2.c
+++ b/arch/arm/mm/cache-xsc3l2.c
@@ -83,7 +83,7 @@  static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va)
 #endif
 }
 
-static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
+static void __xsc3_l2_inv_range(unsigned long start, unsigned long end)
 {
 	unsigned long vaddr;
 
@@ -127,7 +127,12 @@  static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
 	dsb();
 }
 
-static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
+static void xsc3_l2_inv_range(phys_addr_t start, phys_addr_t end)
+{
+	__xsc3_l2_inv_range(start, end);
+}
+
+static void __xsc3_l2_clean_range(unsigned long start, unsigned long end)
 {
 	unsigned long vaddr;
 
@@ -145,6 +150,11 @@  static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
 	dsb();
 }
 
+static void xsc3_l2_clean_range(phys_addr_t start, phys_addr_t end)
+{
+	__xsc3_l2_clean_range(start, end);
+}
+
 /*
  * optimize L2 flush all operation by set/way format
  */
@@ -165,7 +175,7 @@  static inline void xsc3_l2_flush_all(void)
 	dsb();
 }
 
-static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
+static void __xsc3_l2_flush_range(unsigned long start, unsigned long end)
 {
 	unsigned long vaddr;
 
@@ -189,6 +199,11 @@  static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
 	dsb();
 }
 
+static void xsc3_l2_flush_range(phys_addr_t start, phys_addr_t end)
+{
+	__xsc3_l2_flush_range(start, end);
+}
+
 static int __init xsc3_l2_init(void)
 {
 	if (!cpu_is_xsc3() || !xsc3_l2_present())