Message ID | 20190614131141.4428-1-msys.mizuma@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] arm64/mm: Correct the cache line size warning with non coherent device | expand |
Hi Masayoshi, A few trivial comments inline. On 2019/6/14 21:11, Masayoshi Mizuma wrote: > From: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> > > If the cache line size is greater than ARCH_DMA_MINALIGN (128), > the warning shows and it's tainted as TAINT_CPU_OUT_OF_SPEC. > > However, it's not good because as discussed in the thread [1], the cpu > cache line size will be problem only on non-coherent devices. > > Since the coherent flag is already introduced to struct device, > show the warning only if the device is non-coherent device and > ARCH_DMA_MINALIGN is smaller than the cpu cache size. > > [1] https://lore.kernel.org/linux-arm-kernel/20180514145703.celnlobzn3uh5tc2@localhost/ > > Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> > Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> > Tested-by: Zhang Lei <zhang.lei@jp.fujitsu.com> > --- > arch/arm64/include/asm/cache.h | 7 +++++++ > arch/arm64/kernel/cacheinfo.c | 4 +--- > arch/arm64/mm/dma-mapping.c | 14 ++++++++++---- > 3 files changed, 18 insertions(+), 7 deletions(-) > > diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h > index 758af6340314..d24b7c1ecd9b 100644 > --- a/arch/arm64/include/asm/cache.h > +++ b/arch/arm64/include/asm/cache.h > @@ -91,6 +91,13 @@ static inline u32 cache_type_cwg(void) > > #define __read_mostly __attribute__((__section__(".data..read_mostly"))) > > +static inline int cache_line_size_of_cpu(void) > +{ > + u32 cwg = cache_type_cwg(); > + > + return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; > +} > + > int cache_line_size(void); > > /* > diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c > index 6eaf1c07aa4e..7fa6828bb488 100644 > --- a/arch/arm64/kernel/cacheinfo.c > +++ b/arch/arm64/kernel/cacheinfo.c > @@ -19,12 +19,10 @@ > > int cache_line_size(void) > { > - u32 cwg = cache_type_cwg(); > - > if (coherency_max_size != 0) > return coherency_max_size; > > - return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; > + return cache_line_size_of_cpu(); > } How about simplify it as this? int cache_line_size(void) { return coherency_max_size ? coherency_max_size : cache_line_size_of_cpu(); } > EXPORT_SYMBOL_GPL(cache_line_size); > > diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c > index 1669618db08a..379589dc7113 100644 > --- a/arch/arm64/mm/dma-mapping.c > +++ b/arch/arm64/mm/dma-mapping.c > @@ -38,10 +38,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size) > > static int __init arm64_dma_init(void) > { > - WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), > - TAINT_CPU_OUT_OF_SPEC, > - "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", > - ARCH_DMA_MINALIGN, cache_line_size()); > return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); > } > arch_initcall(arm64_dma_init); > @@ -56,7 +52,17 @@ void arch_teardown_dma_ops(struct device *dev) > void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, > const struct iommu_ops *iommu, bool coherent) > { > + int cls = cache_line_size_of_cpu(); whether we need this local variable, how about use cache_line_size_of_cpu directly in WARN_TAINT just like before. Thanks, Shaokun > + > dev->dma_coherent = coherent; > + > + if (!coherent) > + WARN_TAINT(cls > ARCH_DMA_MINALIGN, > + TAINT_CPU_OUT_OF_SPEC, > + "%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", > + dev_driver_string(dev), dev_name(dev), > + ARCH_DMA_MINALIGN, cls); > + > if (iommu) > iommu_setup_dma_ops(dev, dma_base, size); > >
On Sat, Jun 15, 2019 at 10:44:33AM +0800, Zhangshaokun wrote: > On 2019/6/14 21:11, Masayoshi Mizuma wrote: > > diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c > > index 6eaf1c07aa4e..7fa6828bb488 100644 > > --- a/arch/arm64/kernel/cacheinfo.c > > +++ b/arch/arm64/kernel/cacheinfo.c > > @@ -19,12 +19,10 @@ > > > > int cache_line_size(void) > > { > > - u32 cwg = cache_type_cwg(); > > - > > if (coherency_max_size != 0) > > return coherency_max_size; > > > > - return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; > > + return cache_line_size_of_cpu(); > > } > > How about simplify it as this? > > int cache_line_size(void) > { > return coherency_max_size ? coherency_max_size : > cache_line_size_of_cpu(); > } I don't see this as a simplification, easier to read with explicit 'if'. > > EXPORT_SYMBOL_GPL(cache_line_size); > > > > diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c > > index 1669618db08a..379589dc7113 100644 > > --- a/arch/arm64/mm/dma-mapping.c > > +++ b/arch/arm64/mm/dma-mapping.c > > @@ -38,10 +38,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size) > > > > static int __init arm64_dma_init(void) > > { > > - WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), > > - TAINT_CPU_OUT_OF_SPEC, > > - "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", > > - ARCH_DMA_MINALIGN, cache_line_size()); > > return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); > > } > > arch_initcall(arm64_dma_init); > > @@ -56,7 +52,17 @@ void arch_teardown_dma_ops(struct device *dev) > > void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, > > const struct iommu_ops *iommu, bool coherent) > > { > > + int cls = cache_line_size_of_cpu(); > > whether we need this local variable, how about use cache_line_size_of_cpu > directly in WARN_TAINT just like before. The reason being? Anyway, I'll queue v2 of this patch as is for 5.3. Thanks.
Hi Catalin, On 2019/6/17 18:45, Catalin Marinas wrote: > On Sat, Jun 15, 2019 at 10:44:33AM +0800, Zhangshaokun wrote: >> On 2019/6/14 21:11, Masayoshi Mizuma wrote: >>> diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c >>> index 6eaf1c07aa4e..7fa6828bb488 100644 >>> --- a/arch/arm64/kernel/cacheinfo.c >>> +++ b/arch/arm64/kernel/cacheinfo.c >>> @@ -19,12 +19,10 @@ >>> >>> int cache_line_size(void) >>> { >>> - u32 cwg = cache_type_cwg(); >>> - >>> if (coherency_max_size != 0) >>> return coherency_max_size; >>> >>> - return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; >>> + return cache_line_size_of_cpu(); >>> } >> >> How about simplify it as this? >> >> int cache_line_size(void) >> { >> return coherency_max_size ? coherency_max_size : >> cache_line_size_of_cpu(); >> } > > I don't see this as a simplification, easier to read with explicit 'if'. > Okay, I thought it can save some unnecessary lines :-). >>> EXPORT_SYMBOL_GPL(cache_line_size); >>> >>> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c >>> index 1669618db08a..379589dc7113 100644 >>> --- a/arch/arm64/mm/dma-mapping.c >>> +++ b/arch/arm64/mm/dma-mapping.c >>> @@ -38,10 +38,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size) >>> >>> static int __init arm64_dma_init(void) >>> { >>> - WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), >>> - TAINT_CPU_OUT_OF_SPEC, >>> - "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", >>> - ARCH_DMA_MINALIGN, cache_line_size()); >>> return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); >>> } >>> arch_initcall(arm64_dma_init); >>> @@ -56,7 +52,17 @@ void arch_teardown_dma_ops(struct device *dev) >>> void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, >>> const struct iommu_ops *iommu, bool coherent) >>> { >>> + int cls = cache_line_size_of_cpu(); >> >> whether we need this local variable, how about use cache_line_size_of_cpu >> directly in WARN_TAINT just like before. > > The reason being? > Since it is inline function, maybe it is unnecessary, it is trivial. > Anyway, I'll queue v2 of this patch as is for 5.3. Thanks. > It's fine. Thanks, Shaokun
On Mon, Jun 17, 2019 at 07:00:34PM +0800, Zhangshaokun wrote: > On 2019/6/17 18:45, Catalin Marinas wrote: > > On Sat, Jun 15, 2019 at 10:44:33AM +0800, Zhangshaokun wrote: > >> On 2019/6/14 21:11, Masayoshi Mizuma wrote: > >>> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c > >>> index 1669618db08a..379589dc7113 100644 > >>> --- a/arch/arm64/mm/dma-mapping.c > >>> +++ b/arch/arm64/mm/dma-mapping.c > >>> @@ -38,10 +38,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size) > >>> > >>> static int __init arm64_dma_init(void) > >>> { > >>> - WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), > >>> - TAINT_CPU_OUT_OF_SPEC, > >>> - "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", > >>> - ARCH_DMA_MINALIGN, cache_line_size()); > >>> return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); > >>> } > >>> arch_initcall(arm64_dma_init); > >>> @@ -56,7 +52,17 @@ void arch_teardown_dma_ops(struct device *dev) > >>> void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, > >>> const struct iommu_ops *iommu, bool coherent) > >>> { > >>> + int cls = cache_line_size_of_cpu(); > >> > >> whether we need this local variable, how about use cache_line_size_of_cpu > >> directly in WARN_TAINT just like before. > > > > The reason being? > > Since it is inline function, maybe it is unnecessary, it is trivial. OTOH, you end up with two reads from the CTR_EL0 register.
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 758af6340314..d24b7c1ecd9b 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -91,6 +91,13 @@ static inline u32 cache_type_cwg(void) #define __read_mostly __attribute__((__section__(".data..read_mostly"))) +static inline int cache_line_size_of_cpu(void) +{ + u32 cwg = cache_type_cwg(); + + return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; +} + int cache_line_size(void); /* diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 6eaf1c07aa4e..7fa6828bb488 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -19,12 +19,10 @@ int cache_line_size(void) { - u32 cwg = cache_type_cwg(); - if (coherency_max_size != 0) return coherency_max_size; - return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; + return cache_line_size_of_cpu(); } EXPORT_SYMBOL_GPL(cache_line_size); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 1669618db08a..379589dc7113 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -38,10 +38,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size) static int __init arm64_dma_init(void) { - WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), - TAINT_CPU_OUT_OF_SPEC, - "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", - ARCH_DMA_MINALIGN, cache_line_size()); return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); } arch_initcall(arm64_dma_init); @@ -56,7 +52,17 @@ void arch_teardown_dma_ops(struct device *dev) void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { + int cls = cache_line_size_of_cpu(); + dev->dma_coherent = coherent; + + if (!coherent) + WARN_TAINT(cls > ARCH_DMA_MINALIGN, + TAINT_CPU_OUT_OF_SPEC, + "%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", + dev_driver_string(dev), dev_name(dev), + ARCH_DMA_MINALIGN, cls); + if (iommu) iommu_setup_dma_ops(dev, dma_base, size);