Message ID | 20220303180013.512219-6-balasubramani.vivekanandan@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: Use the memcpy_from_wc function from drm | expand |
+Thomas Zimmermann and +Daniel Vetter Could you take a look below regarding the I/O to I/O memory access? On Thu, Mar 03, 2022 at 11:30:11PM +0530, Balasubramani Vivekanandan wrote: >memcpy_from_wc functions in i915_memcpy.c will be removed and replaced >by the implementation in drm_cache.c. >Updated to use the functions provided by drm_cache.c. > >v2: check if the source and destination memory address is from local > memory or system memory and initialize the iosys_map accordingly > (Lucas) > >Cc: Lucas De Marchi <lucas.demarchi@intel.com> >Cc: Matthew Auld <matthew.auld@intel.com> >Cc: Thomas Hellstr_m <thomas.hellstrom@linux.intel.com> > >Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com> >--- > .../drm/i915/selftests/intel_memory_region.c | 41 +++++++++++++------ > 1 file changed, 28 insertions(+), 13 deletions(-) > >diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c >index ba32893e0873..d16ecb905f3b 100644 >--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c >+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c >@@ -7,6 +7,7 @@ > #include <linux/sort.h> > > #include <drm/drm_buddy.h> >+#include <drm/drm_cache.h> > > #include "../i915_selftest.h" > >@@ -1133,7 +1134,7 @@ static const char *repr_type(u32 type) > > static struct drm_i915_gem_object * > create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, >- void **out_addr) >+ struct iosys_map *out_addr) > { > struct drm_i915_gem_object *obj; > void *addr; >@@ -1153,7 +1154,11 @@ create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, > return addr; > } > >- *out_addr = addr; >+ if (i915_gem_object_is_lmem(obj)) >+ iosys_map_set_vaddr_iomem(out_addr, (void __iomem *)addr); >+ else >+ iosys_map_set_vaddr(out_addr, addr); >+ > return obj; > } > >@@ -1164,24 +1169,33 @@ static int wrap_ktime_compare(const void *A, const void *B) > return ktime_compare(*a, *b); > } > >-static void igt_memcpy_long(void *dst, const void *src, size_t size) >+static void igt_memcpy_long(struct iosys_map *dst, struct iosys_map *src, >+ size_t size) > { >- unsigned long *tmp = dst; >- const unsigned long *s = src; >+ unsigned long *tmp = dst->is_iomem ? >+ (unsigned long __force *)dst->vaddr_iomem : >+ dst->vaddr; if we access vaddr_iomem/vaddr we basically break the promise of abstracting system and I/O memory. There is no point in receiving struct iosys_map as argument and then break the abstraction. >+ const unsigned long *s = src->is_iomem ? >+ (unsigned long __force *)src->vaddr_iomem : >+ src->vaddr; > > size = size / sizeof(unsigned long); > while (size--) > *tmp++ = *s++; so we basically want to copy from one place to the other on a word boundary. And it may be a) I/O -> I/O or b) system -> I/O or c) I/O -> system (b) and (c) should work, but AFAICS (a) is not possible with the current iosys-map API. Not even the underlying APIs have that abstracted. Both memcpy_fromio() and memcpy_toio() expect one of them to be RAM (system memory) I remember seeing people using a temporary in buffer in system memory for proxying the copy. But maybe we need an abstraction for that? Also adding Thomas Zimmermann here for that question. and since this is a selftest testing the performance of the memcpy from one memory region to the other, it would be good to have this test executed to a) make sure it still works and b) record in the commit message any possible slow down we are incurring. thanks Lucas De Marchi > } > >-static inline void igt_memcpy(void *dst, const void *src, size_t size) >+static inline void igt_memcpy(struct iosys_map *dst, struct iosys_map *src, >+ size_t size) > { >- memcpy(dst, src, size); >+ memcpy(dst->is_iomem ? (void __force *)dst->vaddr_iomem : dst->vaddr, >+ src->is_iomem ? (void __force *)src->vaddr_iomem : src->vaddr, >+ size); > } > >-static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size) >+static inline void igt_memcpy_from_wc(struct iosys_map *dst, struct iosys_map *src, >+ size_t size) > { >- i915_memcpy_from_wc(dst, src, size); >+ drm_memcpy_from_wc(dst, src, size); > } > > static int _perf_memcpy(struct intel_memory_region *src_mr, >@@ -1191,7 +1205,8 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, > struct drm_i915_private *i915 = src_mr->i915; > const struct { > const char *name; >- void (*copy)(void *dst, const void *src, size_t size); >+ void (*copy)(struct iosys_map *dst, struct iosys_map *src, >+ size_t size); > bool skip; > } tests[] = { > { >@@ -1205,11 +1220,11 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, > { > "memcpy_from_wc", > igt_memcpy_from_wc, >- !i915_has_memcpy_from_wc(), >+ !drm_memcpy_fastcopy_supported(), > }, > }; > struct drm_i915_gem_object *src, *dst; >- void *src_addr, *dst_addr; >+ struct iosys_map src_addr, dst_addr; > int ret = 0; > int i; > >@@ -1237,7 +1252,7 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, > > t0 = ktime_get(); > >- tests[i].copy(dst_addr, src_addr, size); >+ tests[i].copy(&dst_addr, &src_addr, size); > > t1 = ktime_get(); > t[pass] = ktime_sub(t1, t0); >-- >2.25.1 >
Now Cc'ing Daniel properly Lucas De Marchi On Mon, Mar 21, 2022 at 04:00:56PM -0700, Lucas De Marchi wrote: >+Thomas Zimmermann and +Daniel Vetter > >Could you take a look below regarding the I/O to I/O memory access? > >On Thu, Mar 03, 2022 at 11:30:11PM +0530, Balasubramani Vivekanandan wrote: >>memcpy_from_wc functions in i915_memcpy.c will be removed and replaced >>by the implementation in drm_cache.c. >>Updated to use the functions provided by drm_cache.c. >> >>v2: check if the source and destination memory address is from local >> memory or system memory and initialize the iosys_map accordingly >> (Lucas) >> >>Cc: Lucas De Marchi <lucas.demarchi@intel.com> >>Cc: Matthew Auld <matthew.auld@intel.com> >>Cc: Thomas Hellstr_m <thomas.hellstrom@linux.intel.com> >> >>Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com> >>--- >>.../drm/i915/selftests/intel_memory_region.c | 41 +++++++++++++------ >>1 file changed, 28 insertions(+), 13 deletions(-) >> >>diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c >>index ba32893e0873..d16ecb905f3b 100644 >>--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c >>+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c >>@@ -7,6 +7,7 @@ >>#include <linux/sort.h> >> >>#include <drm/drm_buddy.h> >>+#include <drm/drm_cache.h> >> >>#include "../i915_selftest.h" >> >>@@ -1133,7 +1134,7 @@ static const char *repr_type(u32 type) >> >>static struct drm_i915_gem_object * >>create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, >>- void **out_addr) >>+ struct iosys_map *out_addr) >>{ >> struct drm_i915_gem_object *obj; >> void *addr; >>@@ -1153,7 +1154,11 @@ create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, >> return addr; >> } >> >>- *out_addr = addr; >>+ if (i915_gem_object_is_lmem(obj)) >>+ iosys_map_set_vaddr_iomem(out_addr, (void __iomem *)addr); >>+ else >>+ iosys_map_set_vaddr(out_addr, addr); >>+ >> return obj; >>} >> >>@@ -1164,24 +1169,33 @@ static int wrap_ktime_compare(const void *A, const void *B) >> return ktime_compare(*a, *b); >>} >> >>-static void igt_memcpy_long(void *dst, const void *src, size_t size) >>+static void igt_memcpy_long(struct iosys_map *dst, struct iosys_map *src, >>+ size_t size) >>{ >>- unsigned long *tmp = dst; >>- const unsigned long *s = src; >>+ unsigned long *tmp = dst->is_iomem ? >>+ (unsigned long __force *)dst->vaddr_iomem : >>+ dst->vaddr; > >if we access vaddr_iomem/vaddr we basically break the promise of >abstracting system and I/O memory. There is no point in receiving >struct iosys_map as argument and then break the abstraction. > >>+ const unsigned long *s = src->is_iomem ? >>+ (unsigned long __force *)src->vaddr_iomem : >>+ src->vaddr; >> >> size = size / sizeof(unsigned long); >> while (size--) >> *tmp++ = *s++; > > >so we basically want to copy from one place to the other on a word >boundary. And it may be > > a) I/O -> I/O or > b) system -> I/O or > c) I/O -> system > >(b) and (c) should work, but AFAICS (a) is not possible with the current >iosys-map API. Not even the underlying APIs have that abstracted. Both >memcpy_fromio() and memcpy_toio() expect one of them to be RAM (system >memory) > >I remember seeing people using a temporary in buffer in system memory >for proxying the copy. But maybe we need an abstraction for that? >Also adding Thomas Zimmermann here for that question. > >and since this is a selftest testing the performance of the memcpy from >one memory region to the other, it would be good to have this test >executed to a) make sure it still works and b) record in the commit >message any possible slow down we are incurring. > >thanks >Lucas De Marchi > > >>} >> >>-static inline void igt_memcpy(void *dst, const void *src, size_t size) >>+static inline void igt_memcpy(struct iosys_map *dst, struct iosys_map *src, >>+ size_t size) >>{ >>- memcpy(dst, src, size); >>+ memcpy(dst->is_iomem ? (void __force *)dst->vaddr_iomem : dst->vaddr, >>+ src->is_iomem ? (void __force *)src->vaddr_iomem : src->vaddr, >>+ size); >>} >> >>-static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size) >>+static inline void igt_memcpy_from_wc(struct iosys_map *dst, struct iosys_map *src, >>+ size_t size) >>{ >>- i915_memcpy_from_wc(dst, src, size); >>+ drm_memcpy_from_wc(dst, src, size); >>} >> >>static int _perf_memcpy(struct intel_memory_region *src_mr, >>@@ -1191,7 +1205,8 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, >> struct drm_i915_private *i915 = src_mr->i915; >> const struct { >> const char *name; >>- void (*copy)(void *dst, const void *src, size_t size); >>+ void (*copy)(struct iosys_map *dst, struct iosys_map *src, >>+ size_t size); >> bool skip; >> } tests[] = { >> { >>@@ -1205,11 +1220,11 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, >> { >> "memcpy_from_wc", >> igt_memcpy_from_wc, >>- !i915_has_memcpy_from_wc(), >>+ !drm_memcpy_fastcopy_supported(), >> }, >> }; >> struct drm_i915_gem_object *src, *dst; >>- void *src_addr, *dst_addr; >>+ struct iosys_map src_addr, dst_addr; >> int ret = 0; >> int i; >> >>@@ -1237,7 +1252,7 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, >> >> t0 = ktime_get(); >> >>- tests[i].copy(dst_addr, src_addr, size); >>+ tests[i].copy(&dst_addr, &src_addr, size); >> >> t1 = ktime_get(); >> t[pass] = ktime_sub(t1, t0); >>-- >>2.25.1 >>
On 21.03.2022 16:07, Lucas De Marchi wrote: > Now Cc'ing Daniel properly > > Lucas De Marchi > > On Mon, Mar 21, 2022 at 04:00:56PM -0700, Lucas De Marchi wrote: > > +Thomas Zimmermann and +Daniel Vetter > > > > Could you take a look below regarding the I/O to I/O memory access? > > > > On Thu, Mar 03, 2022 at 11:30:11PM +0530, Balasubramani Vivekanandan wrote: > > > memcpy_from_wc functions in i915_memcpy.c will be removed and replaced > > > by the implementation in drm_cache.c. > > > Updated to use the functions provided by drm_cache.c. > > > > > > v2: check if the source and destination memory address is from local > > > memory or system memory and initialize the iosys_map accordingly > > > (Lucas) > > > > > > Cc: Lucas De Marchi <lucas.demarchi@intel.com> > > > Cc: Matthew Auld <matthew.auld@intel.com> > > > Cc: Thomas Hellstr_m <thomas.hellstrom@linux.intel.com> > > > > > > Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com> > > > --- > > > .../drm/i915/selftests/intel_memory_region.c | 41 +++++++++++++------ > > > 1 file changed, 28 insertions(+), 13 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c > > > index ba32893e0873..d16ecb905f3b 100644 > > > --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c > > > +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c > > > @@ -7,6 +7,7 @@ > > > #include <linux/sort.h> > > > > > > #include <drm/drm_buddy.h> > > > +#include <drm/drm_cache.h> > > > > > > #include "../i915_selftest.h" > > > > > > @@ -1133,7 +1134,7 @@ static const char *repr_type(u32 type) > > > > > > static struct drm_i915_gem_object * > > > create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, > > > - void **out_addr) > > > + struct iosys_map *out_addr) > > > { > > > struct drm_i915_gem_object *obj; > > > void *addr; > > > @@ -1153,7 +1154,11 @@ create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, > > > return addr; > > > } > > > > > > - *out_addr = addr; > > > + if (i915_gem_object_is_lmem(obj)) > > > + iosys_map_set_vaddr_iomem(out_addr, (void __iomem *)addr); > > > + else > > > + iosys_map_set_vaddr(out_addr, addr); > > > + > > > return obj; > > > } > > > > > > @@ -1164,24 +1169,33 @@ static int wrap_ktime_compare(const void *A, const void *B) > > > return ktime_compare(*a, *b); > > > } > > > > > > -static void igt_memcpy_long(void *dst, const void *src, size_t size) > > > +static void igt_memcpy_long(struct iosys_map *dst, struct iosys_map *src, > > > + size_t size) > > > { > > > - unsigned long *tmp = dst; > > > - const unsigned long *s = src; > > > + unsigned long *tmp = dst->is_iomem ? > > > + (unsigned long __force *)dst->vaddr_iomem : > > > + dst->vaddr; > > > > if we access vaddr_iomem/vaddr we basically break the promise of > > abstracting system and I/O memory. There is no point in receiving > > struct iosys_map as argument and then break the abstraction. > > Hi Lucas, I didn't attempt to convert the memory access using iosys_map interfaces to abstract system and I/O memory, in this patch. The intention of passing iosys_map structures instead of raw pointers in the test functions is for the benefit of igt_memcpy_from_wc() test function. igt_memcpy_from_wc() requires iosys_map variables for passing it to drm_memcpy_from_wc(). In the other test functions, though it receives iosys_map structures I have retained the behavior same as earlier by converting back the iosys_map structures to pointers. I made a short try to use iosys_map structures to perform the memory copy inside other test functions, but I dropped it after I realized that their is support lacking for (a) mentioned below in your comment. Since it requires some discussion to bring in the support for (a), I did not proceed with it. Regards, Bala > > > + const unsigned long *s = src->is_iomem ? > > > + (unsigned long __force *)src->vaddr_iomem : > > > + src->vaddr; > > > > > > size = size / sizeof(unsigned long); > > > while (size--) > > > *tmp++ = *s++; > > > > > > so we basically want to copy from one place to the other on a word > > boundary. And it may be > > > > a) I/O -> I/O or > > b) system -> I/O or > > c) I/O -> system > > > > (b) and (c) should work, but AFAICS (a) is not possible with the current > > iosys-map API. Not even the underlying APIs have that abstracted. Both > > memcpy_fromio() and memcpy_toio() expect one of them to be RAM (system > > memory) > > > > I remember seeing people using a temporary in buffer in system memory > > for proxying the copy. But maybe we need an abstraction for that? > > Also adding Thomas Zimmermann here for that question. > > > > and since this is a selftest testing the performance of the memcpy from > > one memory region to the other, it would be good to have this test > > executed to a) make sure it still works and b) record in the commit > > message any possible slow down we are incurring. > > > > thanks > > Lucas De Marchi > > > > > > > } > > > > > > -static inline void igt_memcpy(void *dst, const void *src, size_t size) > > > +static inline void igt_memcpy(struct iosys_map *dst, struct iosys_map *src, > > > + size_t size) > > > { > > > - memcpy(dst, src, size); > > > + memcpy(dst->is_iomem ? (void __force *)dst->vaddr_iomem : dst->vaddr, > > > + src->is_iomem ? (void __force *)src->vaddr_iomem : src->vaddr, > > > + size); > > > } > > > > > > -static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size) > > > +static inline void igt_memcpy_from_wc(struct iosys_map *dst, struct iosys_map *src, > > > + size_t size) > > > { > > > - i915_memcpy_from_wc(dst, src, size); > > > + drm_memcpy_from_wc(dst, src, size); > > > } > > > > > > static int _perf_memcpy(struct intel_memory_region *src_mr, > > > @@ -1191,7 +1205,8 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, > > > struct drm_i915_private *i915 = src_mr->i915; > > > const struct { > > > const char *name; > > > - void (*copy)(void *dst, const void *src, size_t size); > > > + void (*copy)(struct iosys_map *dst, struct iosys_map *src, > > > + size_t size); > > > bool skip; > > > } tests[] = { > > > { > > > @@ -1205,11 +1220,11 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, > > > { > > > "memcpy_from_wc", > > > igt_memcpy_from_wc, > > > - !i915_has_memcpy_from_wc(), > > > + !drm_memcpy_fastcopy_supported(), > > > }, > > > }; > > > struct drm_i915_gem_object *src, *dst; > > > - void *src_addr, *dst_addr; > > > + struct iosys_map src_addr, dst_addr; > > > int ret = 0; > > > int i; > > > > > > @@ -1237,7 +1252,7 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, > > > > > > t0 = ktime_get(); > > > > > > - tests[i].copy(dst_addr, src_addr, size); > > > + tests[i].copy(&dst_addr, &src_addr, size); > > > > > > t1 = ktime_get(); > > > t[pass] = ktime_sub(t1, t0); > > > -- > > > 2.25.1 > > >
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index ba32893e0873..d16ecb905f3b 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -7,6 +7,7 @@ #include <linux/sort.h> #include <drm/drm_buddy.h> +#include <drm/drm_cache.h> #include "../i915_selftest.h" @@ -1133,7 +1134,7 @@ static const char *repr_type(u32 type) static struct drm_i915_gem_object * create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, - void **out_addr) + struct iosys_map *out_addr) { struct drm_i915_gem_object *obj; void *addr; @@ -1153,7 +1154,11 @@ create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, return addr; } - *out_addr = addr; + if (i915_gem_object_is_lmem(obj)) + iosys_map_set_vaddr_iomem(out_addr, (void __iomem *)addr); + else + iosys_map_set_vaddr(out_addr, addr); + return obj; } @@ -1164,24 +1169,33 @@ static int wrap_ktime_compare(const void *A, const void *B) return ktime_compare(*a, *b); } -static void igt_memcpy_long(void *dst, const void *src, size_t size) +static void igt_memcpy_long(struct iosys_map *dst, struct iosys_map *src, + size_t size) { - unsigned long *tmp = dst; - const unsigned long *s = src; + unsigned long *tmp = dst->is_iomem ? + (unsigned long __force *)dst->vaddr_iomem : + dst->vaddr; + const unsigned long *s = src->is_iomem ? + (unsigned long __force *)src->vaddr_iomem : + src->vaddr; size = size / sizeof(unsigned long); while (size--) *tmp++ = *s++; } -static inline void igt_memcpy(void *dst, const void *src, size_t size) +static inline void igt_memcpy(struct iosys_map *dst, struct iosys_map *src, + size_t size) { - memcpy(dst, src, size); + memcpy(dst->is_iomem ? (void __force *)dst->vaddr_iomem : dst->vaddr, + src->is_iomem ? (void __force *)src->vaddr_iomem : src->vaddr, + size); } -static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size) +static inline void igt_memcpy_from_wc(struct iosys_map *dst, struct iosys_map *src, + size_t size) { - i915_memcpy_from_wc(dst, src, size); + drm_memcpy_from_wc(dst, src, size); } static int _perf_memcpy(struct intel_memory_region *src_mr, @@ -1191,7 +1205,8 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, struct drm_i915_private *i915 = src_mr->i915; const struct { const char *name; - void (*copy)(void *dst, const void *src, size_t size); + void (*copy)(struct iosys_map *dst, struct iosys_map *src, + size_t size); bool skip; } tests[] = { { @@ -1205,11 +1220,11 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, { "memcpy_from_wc", igt_memcpy_from_wc, - !i915_has_memcpy_from_wc(), + !drm_memcpy_fastcopy_supported(), }, }; struct drm_i915_gem_object *src, *dst; - void *src_addr, *dst_addr; + struct iosys_map src_addr, dst_addr; int ret = 0; int i; @@ -1237,7 +1252,7 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, t0 = ktime_get(); - tests[i].copy(dst_addr, src_addr, size); + tests[i].copy(&dst_addr, &src_addr, size); t1 = ktime_get(); t[pass] = ktime_sub(t1, t0);
memcpy_from_wc functions in i915_memcpy.c will be removed and replaced by the implementation in drm_cache.c. Updated to use the functions provided by drm_cache.c. v2: check if the source and destination memory address is from local memory or system memory and initialize the iosys_map accordingly (Lucas) Cc: Lucas De Marchi <lucas.demarchi@intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Cc: Thomas Hellstr_m <thomas.hellstrom@linux.intel.com> Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com> --- .../drm/i915/selftests/intel_memory_region.c | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-)