Message ID | 20250325133744.23805-1-jacek.lawrynowicz@linux.intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | drm/gem-shmem: Optimize DMA mapping for exported buffers | expand |
Am 25.03.25 um 14:37 schrieb Jacek Lawrynowicz: > Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping. > The same flag is already used by drm_gem_map_dma_buf() for imported > buffers. > > Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> > --- > drivers/gpu/drm/drm_gem_shmem_helper.c | 8 ++++++-- > include/drm/drm_gem.h | 12 ++++++++++++ > 2 files changed, 18 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c > index d99dee67353a1..8938d8e3de52f 100644 > --- a/drivers/gpu/drm/drm_gem_shmem_helper.c > +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c > @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); > static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem) > { > struct drm_gem_object *obj = &shmem->base; > - int ret; > + int ret, flags = 0; > struct sg_table *sgt; > > if (shmem->sgt) > @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_ > ret = PTR_ERR(sgt); > goto err_put_pages; > } > + > + if (drm_gem_is_exported()) > + flags |= DMA_ATTR_SKIP_CPU_SYNC; We should probably just unconditionally set this flag or not at all. Otherwise we could run into quite some surprises. Regards, Christian. > + > /* Map the pages for use by the h/w. */ > - ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, 0); > + ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, flags); > if (ret) > goto err_free_sgt; > > diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h > index 2bf893eabb4b2..7c355a44d0a49 100644 > --- a/include/drm/drm_gem.h > +++ b/include/drm/drm_gem.h > @@ -589,6 +589,18 @@ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) > return obj->dma_buf && (obj->dma_buf->priv != obj); > } > > +/** > + * drm_gem_is_exported() - Tests if GEM object's buffer has been exported > + * @obj: the GEM object > + * > + * Returns: > + * True if the GEM object's buffer has been exported, false otherwise > + */ > +static inline bool drm_gem_is_exported(const struct drm_gem_object *obj) > +{ > + return obj->dma_buf && (obj->dma_buf->priv == obj); > +} > + > #ifdef CONFIG_LOCKDEP > /** > * drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list.
Hi Am 25.03.25 um 14:53 schrieb Christian König: > Am 25.03.25 um 14:37 schrieb Jacek Lawrynowicz: >> Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping. >> The same flag is already used by drm_gem_map_dma_buf() for imported >> buffers. >> >> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> >> --- >> drivers/gpu/drm/drm_gem_shmem_helper.c | 8 ++++++-- >> include/drm/drm_gem.h | 12 ++++++++++++ >> 2 files changed, 18 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c >> index d99dee67353a1..8938d8e3de52f 100644 >> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c >> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c >> @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); >> static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem) >> { >> struct drm_gem_object *obj = &shmem->base; >> - int ret; >> + int ret, flags = 0; >> struct sg_table *sgt; >> >> if (shmem->sgt) >> @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_ >> ret = PTR_ERR(sgt); >> goto err_put_pages; >> } >> + >> + if (drm_gem_is_exported()) >> + flags |= DMA_ATTR_SKIP_CPU_SYNC; > We should probably just unconditionally set this flag or not at all. A question to both of you: does this have an effect on performance? I'm asking because i have a bug report where someone exports a buffer from gem-shmem with miserable performance. Would this flag make a difference? Best regards Thomas > > Otherwise we could run into quite some surprises. > > Regards, > Christian. > >> + >> /* Map the pages for use by the h/w. */ >> - ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, 0); >> + ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, flags); >> if (ret) >> goto err_free_sgt; >> >> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h >> index 2bf893eabb4b2..7c355a44d0a49 100644 >> --- a/include/drm/drm_gem.h >> +++ b/include/drm/drm_gem.h >> @@ -589,6 +589,18 @@ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) >> return obj->dma_buf && (obj->dma_buf->priv != obj); >> } >> >> +/** >> + * drm_gem_is_exported() - Tests if GEM object's buffer has been exported >> + * @obj: the GEM object >> + * >> + * Returns: >> + * True if the GEM object's buffer has been exported, false otherwise >> + */ >> +static inline bool drm_gem_is_exported(const struct drm_gem_object *obj) >> +{ >> + return obj->dma_buf && (obj->dma_buf->priv == obj); >> +} >> + >> #ifdef CONFIG_LOCKDEP >> /** >> * drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list.
Am 25.03.25 um 15:14 schrieb Thomas Zimmermann: > Hi > > Am 25.03.25 um 14:53 schrieb Christian König: >> Am 25.03.25 um 14:37 schrieb Jacek Lawrynowicz: >>> Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping. >>> The same flag is already used by drm_gem_map_dma_buf() for imported >>> buffers. >>> >>> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> >>> --- >>> drivers/gpu/drm/drm_gem_shmem_helper.c | 8 ++++++-- >>> include/drm/drm_gem.h | 12 ++++++++++++ >>> 2 files changed, 18 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c >>> index d99dee67353a1..8938d8e3de52f 100644 >>> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c >>> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c >>> @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); >>> static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem) >>> { >>> struct drm_gem_object *obj = &shmem->base; >>> - int ret; >>> + int ret, flags = 0; >>> struct sg_table *sgt; >>> if (shmem->sgt) >>> @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_ >>> ret = PTR_ERR(sgt); >>> goto err_put_pages; >>> } >>> + >>> + if (drm_gem_is_exported()) >>> + flags |= DMA_ATTR_SKIP_CPU_SYNC; >> We should probably just unconditionally set this flag or not at all. > > A question to both of you: does this have an effect on performance? I'm asking because i have a bug report where someone exports a buffer from gem-shmem with miserable performance. Would this flag make a difference? You can usually skip the CPU sync when you are sure your device is coherent to the CPU cache. And that saves time since you don't need to wait for cache flushes to complete. But that is completely independent of if a buffer is exported or not. Regards, Christian. > > Best regards > Thomas > >> >> Otherwise we could run into quite some surprises. >> >> Regards, >> Christian. >> >>> + >>> /* Map the pages for use by the h/w. */ >>> - ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, 0); >>> + ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, flags); >>> if (ret) >>> goto err_free_sgt; >>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h >>> index 2bf893eabb4b2..7c355a44d0a49 100644 >>> --- a/include/drm/drm_gem.h >>> +++ b/include/drm/drm_gem.h >>> @@ -589,6 +589,18 @@ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) >>> return obj->dma_buf && (obj->dma_buf->priv != obj); >>> } >>> +/** >>> + * drm_gem_is_exported() - Tests if GEM object's buffer has been exported >>> + * @obj: the GEM object >>> + * >>> + * Returns: >>> + * True if the GEM object's buffer has been exported, false otherwise >>> + */ >>> +static inline bool drm_gem_is_exported(const struct drm_gem_object *obj) >>> +{ >>> + return obj->dma_buf && (obj->dma_buf->priv == obj); >>> +} >>> + >>> #ifdef CONFIG_LOCKDEP >>> /** >>> * drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list. >
Hi, On 3/25/2025 2:53 PM, Christian König wrote: > Am 25.03.25 um 14:37 schrieb Jacek Lawrynowicz: >> Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping. >> The same flag is already used by drm_gem_map_dma_buf() for imported >> buffers. >> >> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> >> --- >> drivers/gpu/drm/drm_gem_shmem_helper.c | 8 ++++++-- >> include/drm/drm_gem.h | 12 ++++++++++++ >> 2 files changed, 18 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c >> index d99dee67353a1..8938d8e3de52f 100644 >> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c >> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c >> @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); >> static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem) >> { >> struct drm_gem_object *obj = &shmem->base; >> - int ret; >> + int ret, flags = 0; >> struct sg_table *sgt; >> >> if (shmem->sgt) >> @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_ >> ret = PTR_ERR(sgt); >> goto err_put_pages; >> } >> + >> + if (drm_gem_is_exported()) >> + flags |= DMA_ATTR_SKIP_CPU_SYNC; > > We should probably just unconditionally set this flag or not at all. > > Otherwise we could run into quite some surprises. I see that this flag is usually set in drm_gem_map_dma_buf() and similar callbacks across drm drivers. Shouldn't it be the default on x86? Regards, Jacek
Hi, On 3/25/2025 3:14 PM, Thomas Zimmermann wrote: > Hi > > Am 25.03.25 um 14:53 schrieb Christian König: >> Am 25.03.25 um 14:37 schrieb Jacek Lawrynowicz: >>> Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping. >>> The same flag is already used by drm_gem_map_dma_buf() for imported >>> buffers. >>> >>> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> >>> --- >>> drivers/gpu/drm/drm_gem_shmem_helper.c | 8 ++++++-- >>> include/drm/drm_gem.h | 12 ++++++++++++ >>> 2 files changed, 18 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c >>> index d99dee67353a1..8938d8e3de52f 100644 >>> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c >>> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c >>> @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); >>> static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem) >>> { >>> struct drm_gem_object *obj = &shmem->base; >>> - int ret; >>> + int ret, flags = 0; >>> struct sg_table *sgt; >>> if (shmem->sgt) >>> @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_ >>> ret = PTR_ERR(sgt); >>> goto err_put_pages; >>> } >>> + >>> + if (drm_gem_is_exported()) >>> + flags |= DMA_ATTR_SKIP_CPU_SYNC; >> We should probably just unconditionally set this flag or not at all. > > A question to both of you: does this have an effect on performance? I'm asking because i have a bug report where someone exports a buffer from gem-shmem with miserable performance. Would this flag make a difference? On x86 this has no effect on performance. I'm not sure about other archs. Regards, Jacek
Am 25.03.25 um 16:33 schrieb Jacek Lawrynowicz: > Hi, > > On 3/25/2025 2:53 PM, Christian König wrote: >> Am 25.03.25 um 14:37 schrieb Jacek Lawrynowicz: >>> Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping. >>> The same flag is already used by drm_gem_map_dma_buf() for imported >>> buffers. >>> >>> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> >>> --- >>> drivers/gpu/drm/drm_gem_shmem_helper.c | 8 ++++++-- >>> include/drm/drm_gem.h | 12 ++++++++++++ >>> 2 files changed, 18 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c >>> index d99dee67353a1..8938d8e3de52f 100644 >>> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c >>> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c >>> @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); >>> static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem) >>> { >>> struct drm_gem_object *obj = &shmem->base; >>> - int ret; >>> + int ret, flags = 0; >>> struct sg_table *sgt; >>> >>> if (shmem->sgt) >>> @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_ >>> ret = PTR_ERR(sgt); >>> goto err_put_pages; >>> } >>> + >>> + if (drm_gem_is_exported()) >>> + flags |= DMA_ATTR_SKIP_CPU_SYNC; >> We should probably just unconditionally set this flag or not at all. >> >> Otherwise we could run into quite some surprises. > I see that this flag is usually set in drm_gem_map_dma_buf() and similar callbacks across drm drivers. > Shouldn't it be the default on x86? Not really, no. If you can skip CPU sync depends on the platform. On x86 it's correct that almost all devices are coherent with the CPU cache, but that isn't always the case. The device driver using the GEM shmem helper needs to decide if that is ok or not. Regards, Christian. > > Regards, > Jacek
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index d99dee67353a1..8938d8e3de52f 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; - int ret; + int ret, flags = 0; struct sg_table *sgt; if (shmem->sgt) @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_ ret = PTR_ERR(sgt); goto err_put_pages; } + + if (drm_gem_is_exported()) + flags |= DMA_ATTR_SKIP_CPU_SYNC; + /* Map the pages for use by the h/w. */ - ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, 0); + ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, flags); if (ret) goto err_free_sgt; diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 2bf893eabb4b2..7c355a44d0a49 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -589,6 +589,18 @@ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) return obj->dma_buf && (obj->dma_buf->priv != obj); } +/** + * drm_gem_is_exported() - Tests if GEM object's buffer has been exported + * @obj: the GEM object + * + * Returns: + * True if the GEM object's buffer has been exported, false otherwise + */ +static inline bool drm_gem_is_exported(const struct drm_gem_object *obj) +{ + return obj->dma_buf && (obj->dma_buf->priv == obj); +} + #ifdef CONFIG_LOCKDEP /** * drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list.
Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping. The same flag is already used by drm_gem_map_dma_buf() for imported buffers. Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> --- drivers/gpu/drm/drm_gem_shmem_helper.c | 8 ++++++-- include/drm/drm_gem.h | 12 ++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-)