diff mbox

[V4,6/7] iommu/msm: Use writel_relaxed and add a barrier

Message ID 1463381341-30498-7-git-send-email-sricharan@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Sricharan Ramabadhran May 16, 2016, 6:49 a.m. UTC
While using the generic pagetable ops the tlb maintenance
operation gets completed in the sync callback. So use writel_relaxed
for all register access and add a mb() at appropriate places.

Signed-off-by: Sricharan R <sricharan@codeaurora.org>
---
 drivers/iommu/msm_iommu.c         | 11 ++++++++---
 drivers/iommu/msm_iommu_hw-8xxx.h |  7 ++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

Comments

Arnd Bergmann May 17, 2016, 1:52 p.m. UTC | #1
On Monday 16 May 2016 12:19:00 Sricharan R wrote:
> diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
> index f7b4c11..1240a5a 100644
> --- a/drivers/iommu/msm_iommu.c
> +++ b/drivers/iommu/msm_iommu.c
> @@ -124,6 +124,7 @@ static void msm_iommu_reset(void __iomem *base, int ncb)
>                 SET_TLBLKCR(base, ctx, 0);
>                 SET_CONTEXTIDR(base, ctx, 0);
>         }
> +       mb(); /* sync */
>  }
>  
>  static void __flush_iotlb(void *cookie)
> @@ -143,6 +144,7 @@ static void __flush_iotlb(void *cookie)
>  
>                 __disable_clocks(iommu);
>         }
> +       mb(); /* sync */
>  fail:
>         return;
>  }

These comments are completely useless. What is the specific race
that you are protecting against, and why are the implicit barriers
not sufficient here? Please find a better way to document what
is going on.

> diff --git a/drivers/iommu/msm_iommu_hw-8xxx.h b/drivers/iommu/msm_iommu_hw-8xxx.h
> index 84ba5739..161036c 100644
> --- a/drivers/iommu/msm_iommu_hw-8xxx.h
> +++ b/drivers/iommu/msm_iommu_hw-8xxx.h
> @@ -24,10 +24,10 @@
>  #define GET_CTX_REG(reg, base, ctx) \
>                                 (readl((base) + (reg) + ((ctx) << CTX_SHIFT)))
>  
> -#define SET_GLOBAL_REG(reg, base, val) writel((val), ((base) + (reg)))
> +#define SET_GLOBAL_REG(reg, base, val) writel_relaxed((val), ((base) + (reg)))
>  
>  #define SET_CTX_REG(reg, base, ctx, val) \
> -                       writel((val), ((base) + (reg) + ((ctx) << CTX_SHIFT)))
> +               writel_relaxed((val), ((base) + (reg) + ((ctx) << CTX_SHIFT)))
>  
>  /* Wrappers for numbered registers */
>  #define SET_GLOBAL_REG_N(b, n, r, v) SET_GLOBAL_REG(b, ((r) + (n << 2)), (v))
> @@ -48,7 +48,8 @@
>  #define SET_FIELD(addr, mask, shift, v) \
>  do { \
>         int t = readl(addr); \
> -       writel((t & ~((mask) << (shift))) + (((v) & (mask)) << (shift)), addr);\
> +       writel_relaxed((t & ~((mask) << (shift))) + \
> +                      (((v) & (mask)) << (shift)), addr);\
>  } while (0)

No, please add a new macro for the relaxed accessors and then add comments
in the places where those are used, to document why you can take shortcuts
in those places.

	Arnd
Sricharan Ramabadhran May 18, 2016, 12:07 p.m. UTC | #2
Hi Arnd,

>> diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
>> index f7b4c11..1240a5a 100644
>> --- a/drivers/iommu/msm_iommu.c
>> +++ b/drivers/iommu/msm_iommu.c
>> @@ -124,6 +124,7 @@ static void msm_iommu_reset(void __iomem *base, int ncb)
>>                 SET_TLBLKCR(base, ctx, 0);
>>                 SET_CONTEXTIDR(base, ctx, 0);
>>         }
>> +       mb(); /* sync */
>>  }
>>
>>  static void __flush_iotlb(void *cookie)
>> @@ -143,6 +144,7 @@ static void __flush_iotlb(void *cookie)
>>
>>                 __disable_clocks(iommu);
>>         }
>> +       mb(); /* sync */
>>  fail:
>>         return;
>>  }
>
>These comments are completely useless. What is the specific race
>that you are protecting against, and why are the implicit barriers
>not sufficient here? Please find a better way to document what
>is going on.
>

The reason for doing this was, when the tlb maintenance ops are called
by  io-pgtable functions, it expects that the tlb_range ops is complete
only after the tlb_sync callback is called. Previously we were using
writel and the sync in that case was dummy. Also previously every register
configuration write was done using writel,  which was an overkill. So now
we do all the writes with writel_relaxed  and a barrier in the end. I will
change the documentation for this.

>> diff --git a/drivers/iommu/msm_iommu_hw-8xxx.h b/drivers/iommu/msm_iommu_hw-8xxx.h
>> index 84ba5739..161036c 100644
>> --- a/drivers/iommu/msm_iommu_hw-8xxx.h
>> +++ b/drivers/iommu/msm_iommu_hw-8xxx.h
>> @@ -24,10 +24,10 @@
>>  #define GET_CTX_REG(reg, base, ctx) \
>>                                 (readl((base) + (reg) + ((ctx) << CTX_SHIFT)))
>>
>> -#define SET_GLOBAL_REG(reg, base, val) writel((val), ((base) + (reg)))
>> +#define SET_GLOBAL_REG(reg, base, val) writel_relaxed((val), ((base) + (reg)))
>>
>>  #define SET_CTX_REG(reg, base, ctx, val) \
>> -                       writel((val), ((base) + (reg) + ((ctx) << CTX_SHIFT)))
>> +               writel_relaxed((val), ((base) + (reg) + ((ctx) << CTX_SHIFT)))
>>
>>  /* Wrappers for numbered registers */
>>  #define SET_GLOBAL_REG_N(b, n, r, v) SET_GLOBAL_REG(b, ((r) + (n << 2)), (v))
>> @@ -48,7 +48,8 @@
>>  #define SET_FIELD(addr, mask, shift, v) \
>>  do { \
>>         int t = readl(addr); \
>> -       writel((t & ~((mask) << (shift))) + (((v) & (mask)) << (shift)), addr);\
>> +       writel_relaxed((t & ~((mask) << (shift))) + \
>> +                      (((v) & (mask)) << (shift)), addr);\
>>  } while (0)
>
>No, please add a new macro for the relaxed accessors and then add comments
>in the places where those are used, to document why you can take shortcuts
>in those places.

Ok, will add a new accessor for this and comment them.

Regards,
 Sricharan
Arnd Bergmann May 18, 2016, 12:15 p.m. UTC | #3
On Wednesday 18 May 2016 17:37:40 Sricharan wrote:
> >
> >These comments are completely useless. What is the specific race
> >that you are protecting against, and why are the implicit barriers
> >not sufficient here? Please find a better way to document what
> >is going on.
> >
> 
> The reason for doing this was, when the tlb maintenance ops are called
> by  io-pgtable functions, it expects that the tlb_range ops is complete
> only after the tlb_sync callback is called. Previously we were using
> writel and the sync in that case was dummy. Also previously every register
> configuration write was done using writel,  which was an overkill. So now
> we do all the writes with writel_relaxed  and a barrier in the end. I will
> change the documentation for this.

If you need the barrier after the write, it probably was already faulty
before, because writel only implies a barrier before the store, not
after. Of course all the barriers likely made the whole process so
slow that you never hit that race in the end.

	Arnd
Sricharan Ramabadhran May 18, 2016, 12:33 p.m. UTC | #4
Hi Arnd,
>> >
>> >These comments are completely useless. What is the specific race
>> >that you are protecting against, and why are the implicit barriers
>> >not sufficient here? Please find a better way to document what
>> >is going on.
>> >
>>
>> The reason for doing this was, when the tlb maintenance ops are called
>> by  io-pgtable functions, it expects that the tlb_range ops is complete
>> only after the tlb_sync callback is called. Previously we were using
>> writel and the sync in that case was dummy. Also previously every register
>> configuration write was done using writel,  which was an overkill. So now
>> we do all the writes with writel_relaxed  and a barrier in the end. I will
>> change the documentation for this.
>
>If you need the barrier after the write, it probably was already faulty
>before, because writel only implies a barrier before the store, not
>after. Of course all the barriers likely made the whole process so
>slow that you never hit that race in the end.

ya, it could have worked in this way and i never saw a race issue before this.
The only reason for changing this was to optimise out the additonal barriers
that were happening. I do not see any issue now as well, only that the writes would
be faster.

Regards,
 Sricharan
Sricharan Ramabadhran May 20, 2016, 11:18 a.m. UTC | #5
Hi Arnd,

>>
>>If you need the barrier after the write, it probably was already faulty
>>before, because writel only implies a barrier before the store, not
>>after. Of course all the barriers likely made the whole process so
>>slow that you never hit that race in the end.
>
>ya, it could have worked in this way and i never saw a race issue before this.
>The only reason for changing this was to optimise out the additonal barriers
>that were happening. I do not see any issue now as well, only that the writes would
>be faster.
>
 I reposted a patch here [1] with comments, i had to delete the
old accessors though as it became unused now.

http://www.spinics.net/lists/arm-kernel/msg505448.html

Regards,
 Sricharan
diff mbox

Patch

diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index f7b4c11..1240a5a 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -124,6 +124,7 @@  static void msm_iommu_reset(void __iomem *base, int ncb)
 		SET_TLBLKCR(base, ctx, 0);
 		SET_CONTEXTIDR(base, ctx, 0);
 	}
+	mb(); /* sync */
 }
 
 static void __flush_iotlb(void *cookie)
@@ -143,6 +144,7 @@  static void __flush_iotlb(void *cookie)
 
 		__disable_clocks(iommu);
 	}
+	mb(); /* sync */
 fail:
 	return;
 }
@@ -181,7 +183,7 @@  fail:
 
 static void __flush_iotlb_sync(void *cookie)
 {
-	/* To avoid a null function pointer */
+	mb(); /* sync */
 }
 
 static const struct iommu_gather_ops msm_iommu_gather_ops = {
@@ -235,6 +237,7 @@  static void config_mids(struct msm_iommu_dev *iommu,
 		/* Set security bit override to be Non-secure */
 		SET_NSCFG(iommu->base, mid, 3);
 	}
+	mb(); /* sync */
 }
 
 static void __reset_context(void __iomem *base, int ctx)
@@ -257,6 +260,7 @@  static void __reset_context(void __iomem *base, int ctx)
 	SET_TLBFLPTER(base, ctx, 0);
 	SET_TLBSLPTER(base, ctx, 0);
 	SET_TLBLKCR(base, ctx, 0);
+	mb(); /* sync */
 }
 
 static void __program_context(void __iomem *base, int ctx,
@@ -305,6 +309,7 @@  static void __program_context(void __iomem *base, int ctx,
 
 	/* Enable the MMU */
 	SET_M(base, ctx, 1);
+	mb(); /* sync */
 }
 
 static struct iommu_domain *msm_iommu_domain_alloc(unsigned type)
@@ -500,7 +505,7 @@  static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
 	/* Invalidate context TLB */
 	SET_CTX_TLBIALL(iommu->base, master->num, 0);
 	SET_V2PPR(iommu->base, master->num, va & V2Pxx_VA);
-
+	mb(); /* sync */
 	par = GET_PAR(iommu->base, master->num);
 
 	/* We are dealing with a supersection */
@@ -714,7 +719,7 @@  static int msm_iommu_probe(struct platform_device *pdev)
 	par = GET_PAR(iommu->base, 0);
 	SET_V2PCFG(iommu->base, 0, 0);
 	SET_M(iommu->base, 0, 0);
-
+	mb(); /* sync */
 	if (!par) {
 		pr_err("Invalid PAR value detected\n");
 		ret = -ENODEV;
diff --git a/drivers/iommu/msm_iommu_hw-8xxx.h b/drivers/iommu/msm_iommu_hw-8xxx.h
index 84ba5739..161036c 100644
--- a/drivers/iommu/msm_iommu_hw-8xxx.h
+++ b/drivers/iommu/msm_iommu_hw-8xxx.h
@@ -24,10 +24,10 @@ 
 #define GET_CTX_REG(reg, base, ctx) \
 				(readl((base) + (reg) + ((ctx) << CTX_SHIFT)))
 
-#define SET_GLOBAL_REG(reg, base, val)	writel((val), ((base) + (reg)))
+#define SET_GLOBAL_REG(reg, base, val)	writel_relaxed((val), ((base) + (reg)))
 
 #define SET_CTX_REG(reg, base, ctx, val) \
-			writel((val), ((base) + (reg) + ((ctx) << CTX_SHIFT)))
+		writel_relaxed((val), ((base) + (reg) + ((ctx) << CTX_SHIFT)))
 
 /* Wrappers for numbered registers */
 #define SET_GLOBAL_REG_N(b, n, r, v) SET_GLOBAL_REG(b, ((r) + (n << 2)), (v))
@@ -48,7 +48,8 @@ 
 #define SET_FIELD(addr, mask, shift, v) \
 do { \
 	int t = readl(addr); \
-	writel((t & ~((mask) << (shift))) + (((v) & (mask)) << (shift)), addr);\
+	writel_relaxed((t & ~((mask) << (shift))) + \
+		       (((v) & (mask)) << (shift)), addr);\
 } while (0)