Message ID | 20191211081810.20079-2-kraxel@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/virtio: fix mmap page attributes | expand |
Hi Gerd Am 11.12.19 um 09:18 schrieb Gerd Hoffmann: > Add caching field to drm_gem_shmem_object to specify the cachine > attributes for mappings. Add helper function to tweak pgprot > accordingly. Switch vmap and mmap functions to the new helper. > > Set caching to write-combine when creating the object so behavior > doesn't change by default. Drivers can override that later if > needed. > > Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> If you want to merge this patch, you have my Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de> Please see my comment below. > --- > include/drm/drm_gem_shmem_helper.h | 12 ++++++++++++ > drivers/gpu/drm/drm_gem_shmem_helper.c | 24 +++++++++++++++++++++--- > 2 files changed, 33 insertions(+), 3 deletions(-) > > diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h > index 6748379a0b44..9d6e02c6205f 100644 > --- a/include/drm/drm_gem_shmem_helper.h > +++ b/include/drm/drm_gem_shmem_helper.h > @@ -17,6 +17,11 @@ struct drm_mode_create_dumb; > struct drm_printer; > struct sg_table; > > +enum drm_gem_shmem_caching { > + DRM_GEM_SHMEM_CACHED = 1, > + DRM_GEM_SHMEM_WC, > +}; > + > /** > * struct drm_gem_shmem_object - GEM object backed by shmem > */ > @@ -83,6 +88,11 @@ struct drm_gem_shmem_object { > * The address are un-mapped when the count reaches zero. > */ > unsigned int vmap_use_count; > + > + /** > + * @caching: caching attributes for mappings. > + */ > + enum drm_gem_shmem_caching caching; > }; > > #define to_drm_gem_shmem_obj(obj) \ > @@ -130,6 +140,8 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, > > struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj); > > +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot); > + > /** > * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations > * > diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c > index a421a2eed48a..5bb94e130a50 100644 > --- a/drivers/gpu/drm/drm_gem_shmem_helper.c > +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c > @@ -76,6 +76,7 @@ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t > mutex_init(&shmem->pages_lock); > mutex_init(&shmem->vmap_lock); > INIT_LIST_HEAD(&shmem->madv_list); > + shmem->caching = DRM_GEM_SHMEM_WC; > > /* > * Our buffers are kept pinned, so allocating them > @@ -256,9 +257,11 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem) > > if (obj->import_attach) > shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf); > - else > + else { > + pgprot_t prot = drm_gem_shmem_caching(shmem, PAGE_KERNEL); > shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT, > - VM_MAP, pgprot_writecombine(PAGE_KERNEL)); > + VM_MAP, prot); > + } > > if (!shmem->vaddr) { > DRM_DEBUG_KMS("Failed to vmap pages\n"); > @@ -540,7 +543,8 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) > } > > vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; > - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); > + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); > + vma->vm_page_prot = drm_gem_shmem_caching(shmem, vma->vm_page_prot); > vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); > vma->vm_ops = &drm_gem_shmem_vm_ops; > > @@ -683,3 +687,17 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, > return ERR_PTR(ret); > } > EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table); > + > +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot) > +{ > + switch (shmem->caching) { > + case DRM_GEM_SHMEM_CACHED: > + return prot; > + case DRM_GEM_SHMEM_WC: > + return pgprot_writecombine(prot); > + default: > + WARN_ON_ONCE(1); > + return prot; > + } > +} > +EXPORT_SYMBOL_GPL(drm_gem_shmem_caching); Two reason why I'd reconsider this design. I don't like switch statements new the bottom of the call graph. The code ends up with default warnings, such as this one. Udl has different caching flags for imported and 'native' buffers. This would require a new constant and additional code here. What do you think about turning this function into a callback in struct shmem_funcs? The default implementation would be for WC, virtio would use CACHED. The individual implementations could still be located in the shmem code. Udl would later provide its own code. Best regards Thomas >
Am 11.12.19 um 10:58 schrieb Thomas Zimmermann: > > What do you think about turning this function into a callback in struct > shmem_funcs? The default implementation would be for WC, virtio would s/shmem_funcs/drm_gem_object_funcs > use CACHED. The individual implementations could still be located in the > shmem code. Udl would later provide its own code. > > Best regards > Thomas > >> >
Am 11.12.19 um 10:58 schrieb Thomas Zimmermann: > Hi Gerd > > Am 11.12.19 um 09:18 schrieb Gerd Hoffmann: >> Add caching field to drm_gem_shmem_object to specify the cachine >> attributes for mappings. Add helper function to tweak pgprot >> accordingly. Switch vmap and mmap functions to the new helper. >> >> Set caching to write-combine when creating the object so behavior >> doesn't change by default. Drivers can override that later if >> needed. >> >> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> > > If you want to merge this patch, you have my > > Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de> > > Please see my comment below. > >> --- >> include/drm/drm_gem_shmem_helper.h | 12 ++++++++++++ >> drivers/gpu/drm/drm_gem_shmem_helper.c | 24 +++++++++++++++++++++--- >> 2 files changed, 33 insertions(+), 3 deletions(-) >> >> diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h >> index 6748379a0b44..9d6e02c6205f 100644 >> --- a/include/drm/drm_gem_shmem_helper.h >> +++ b/include/drm/drm_gem_shmem_helper.h >> @@ -17,6 +17,11 @@ struct drm_mode_create_dumb; >> struct drm_printer; >> struct sg_table; >> >> +enum drm_gem_shmem_caching { >> + DRM_GEM_SHMEM_CACHED = 1, >> + DRM_GEM_SHMEM_WC, >> +}; >> + >> /** >> * struct drm_gem_shmem_object - GEM object backed by shmem >> */ >> @@ -83,6 +88,11 @@ struct drm_gem_shmem_object { >> * The address are un-mapped when the count reaches zero. >> */ >> unsigned int vmap_use_count; >> + >> + /** >> + * @caching: caching attributes for mappings. >> + */ >> + enum drm_gem_shmem_caching caching; >> }; >> >> #define to_drm_gem_shmem_obj(obj) \ >> @@ -130,6 +140,8 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, >> >> struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj); >> >> +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot); >> + >> /** >> * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations >> * >> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c >> index a421a2eed48a..5bb94e130a50 100644 >> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c >> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c >> @@ -76,6 +76,7 @@ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t >> mutex_init(&shmem->pages_lock); >> mutex_init(&shmem->vmap_lock); >> INIT_LIST_HEAD(&shmem->madv_list); >> + shmem->caching = DRM_GEM_SHMEM_WC; >> >> /* >> * Our buffers are kept pinned, so allocating them >> @@ -256,9 +257,11 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem) >> >> if (obj->import_attach) >> shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf); >> - else >> + else { >> + pgprot_t prot = drm_gem_shmem_caching(shmem, PAGE_KERNEL); >> shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT, >> - VM_MAP, pgprot_writecombine(PAGE_KERNEL)); >> + VM_MAP, prot); >> + } >> >> if (!shmem->vaddr) { >> DRM_DEBUG_KMS("Failed to vmap pages\n"); >> @@ -540,7 +543,8 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) >> } >> >> vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; >> - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); >> + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); >> + vma->vm_page_prot = drm_gem_shmem_caching(shmem, vma->vm_page_prot); >> vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); >> vma->vm_ops = &drm_gem_shmem_vm_ops; >> >> @@ -683,3 +687,17 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, >> return ERR_PTR(ret); >> } >> EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table); >> + >> +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot) >> +{ >> + switch (shmem->caching) { >> + case DRM_GEM_SHMEM_CACHED: >> + return prot; >> + case DRM_GEM_SHMEM_WC: >> + return pgprot_writecombine(prot); >> + default: >> + WARN_ON_ONCE(1); >> + return prot; >> + } >> +} >> +EXPORT_SYMBOL_GPL(drm_gem_shmem_caching); > > Two reason why I'd reconsider this design. > > I don't like switch statements new the bottom of the call graph. The > code ends up with default warnings, such as this one. > > Udl has different caching flags for imported and 'native' buffers. This > would require a new constant and additional code here. > > What do you think about turning this function into a callback in struct > shmem_funcs? The default implementation would be for WC, virtio would > use CACHED. The individual implementations could still be located in the > shmem code. Udl would later provide its own code. On a second thought, all this might be over-engineered and v1 of the patchset was the correct approach. You can add my Acked-by: Thomas Zimmermann <tzimmermann@suse.de> if you prefer to merge v1. > > Best regards > Thomas > >> >
On Wed, Dec 11, 2019 at 11:07:25AM +0100, Thomas Zimmermann wrote: > > > Am 11.12.19 um 10:58 schrieb Thomas Zimmermann: > > Hi Gerd > > > > Am 11.12.19 um 09:18 schrieb Gerd Hoffmann: > >> Add caching field to drm_gem_shmem_object to specify the cachine > >> attributes for mappings. Add helper function to tweak pgprot > >> accordingly. Switch vmap and mmap functions to the new helper. > >> > >> Set caching to write-combine when creating the object so behavior > >> doesn't change by default. Drivers can override that later if > >> needed. > >> > >> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> > > > > If you want to merge this patch, you have my > > > > Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de> > > > > Please see my comment below. > > > >> --- > >> include/drm/drm_gem_shmem_helper.h | 12 ++++++++++++ > >> drivers/gpu/drm/drm_gem_shmem_helper.c | 24 +++++++++++++++++++++--- > >> 2 files changed, 33 insertions(+), 3 deletions(-) > >> > >> diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h > >> index 6748379a0b44..9d6e02c6205f 100644 > >> --- a/include/drm/drm_gem_shmem_helper.h > >> +++ b/include/drm/drm_gem_shmem_helper.h > >> @@ -17,6 +17,11 @@ struct drm_mode_create_dumb; > >> struct drm_printer; > >> struct sg_table; > >> > >> +enum drm_gem_shmem_caching { > >> + DRM_GEM_SHMEM_CACHED = 1, > >> + DRM_GEM_SHMEM_WC, > >> +}; > >> + > >> /** > >> * struct drm_gem_shmem_object - GEM object backed by shmem > >> */ > >> @@ -83,6 +88,11 @@ struct drm_gem_shmem_object { > >> * The address are un-mapped when the count reaches zero. > >> */ > >> unsigned int vmap_use_count; > >> + > >> + /** > >> + * @caching: caching attributes for mappings. > >> + */ > >> + enum drm_gem_shmem_caching caching; > >> }; > >> > >> #define to_drm_gem_shmem_obj(obj) \ > >> @@ -130,6 +140,8 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, > >> > >> struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj); > >> > >> +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot); > >> + > >> /** > >> * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations > >> * > >> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c > >> index a421a2eed48a..5bb94e130a50 100644 > >> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c > >> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c > >> @@ -76,6 +76,7 @@ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t > >> mutex_init(&shmem->pages_lock); > >> mutex_init(&shmem->vmap_lock); > >> INIT_LIST_HEAD(&shmem->madv_list); > >> + shmem->caching = DRM_GEM_SHMEM_WC; > >> > >> /* > >> * Our buffers are kept pinned, so allocating them > >> @@ -256,9 +257,11 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem) > >> > >> if (obj->import_attach) > >> shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf); > >> - else > >> + else { > >> + pgprot_t prot = drm_gem_shmem_caching(shmem, PAGE_KERNEL); > >> shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT, > >> - VM_MAP, pgprot_writecombine(PAGE_KERNEL)); > >> + VM_MAP, prot); > >> + } > >> > >> if (!shmem->vaddr) { > >> DRM_DEBUG_KMS("Failed to vmap pages\n"); > >> @@ -540,7 +543,8 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) > >> } > >> > >> vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; > >> - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); > >> + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); > >> + vma->vm_page_prot = drm_gem_shmem_caching(shmem, vma->vm_page_prot); > >> vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); > >> vma->vm_ops = &drm_gem_shmem_vm_ops; > >> > >> @@ -683,3 +687,17 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, > >> return ERR_PTR(ret); > >> } > >> EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table); > >> + > >> +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot) > >> +{ > >> + switch (shmem->caching) { > >> + case DRM_GEM_SHMEM_CACHED: > >> + return prot; > >> + case DRM_GEM_SHMEM_WC: > >> + return pgprot_writecombine(prot); > >> + default: > >> + WARN_ON_ONCE(1); > >> + return prot; > >> + } > >> +} > >> +EXPORT_SYMBOL_GPL(drm_gem_shmem_caching); > > > > Two reason why I'd reconsider this design. > > > > I don't like switch statements new the bottom of the call graph. The > > code ends up with default warnings, such as this one. > > > > Udl has different caching flags for imported and 'native' buffers. This > > would require a new constant and additional code here. > > > > What do you think about turning this function into a callback in struct > > shmem_funcs? The default implementation would be for WC, virtio would > > use CACHED. The individual implementations could still be located in the > > shmem code. Udl would later provide its own code. > > On a second thought, all this might be over-engineered and v1 of the > patchset was the correct approach. You can add my The udl use-case should be covered already, simply set the flag correctly at create/import time? It's per-object ... btw on why udl does this: Imported bo are usually rendered by real hw, and reading it uncached/wc is the more defensive setting. It would be kinda nice if dma-buf would expose this, but I fear dma-api maintainers would murder us if we even just propose that ... so it's a mess right now. btw the issue extends to dma access by devices too, e.g. both i915 and amdgpu can select the coherency mode at runtime (using e.g. the pcie no-snoop transaction mode), and we have similar uncoordinated hacks in there too, like in udl. -Daniel > > Acked-by: Thomas Zimmermann <tzimmermann@suse.de> > > if you prefer to merge v1. > > > > > Best regards > > Thomas > > > >> > > > > -- > Thomas Zimmermann > Graphics Driver Developer > SUSE Software Solutions Germany GmbH > Maxfeldstr. 5, 90409 Nürnberg, Germany > (HRB 36809, AG Nürnberg) > Geschäftsführer: Felix Imendörffer > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
Hi Am 11.12.19 um 13:36 schrieb Daniel Vetter: > > The udl use-case should be covered already, simply set the flag correctly > at create/import time? It's per-object ... The original udl gem code did this. It was additional state for something that was detectable from the value of import_attach. So udl now overrides vmap and mmap. > btw on why udl does this: Imported bo are usually rendered by real hw, and > reading it uncached/wc is the more defensive setting. It would be kinda > nice if dma-buf would expose this, but I fear dma-api maintainers would > murder us if we even just propose that ... so it's a mess right now. Yeah, in some way it's a variation of the discussion around fbdev memory access that we had before. Best regards Thomas > > btw the issue extends to dma access by devices too, e.g. both i915 and > amdgpu can select the coherency mode at runtime (using e.g. the pcie > no-snoop transaction mode), and we have similar uncoordinated hacks in > there too, like in udl. > -Daniel > >> >> Acked-by: Thomas Zimmermann <tzimmermann@suse.de> >> >> if you prefer to merge v1. >> >>> >>> Best regards >>> Thomas >>> >>>> >>> >> >> -- >> Thomas Zimmermann >> Graphics Driver Developer >> SUSE Software Solutions Germany GmbH >> Maxfeldstr. 5, 90409 Nürnberg, Germany >> (HRB 36809, AG Nürnberg) >> Geschäftsführer: Felix Imendörffer >> > > > > >> _______________________________________________ >> dri-devel mailing list >> dri-devel@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/dri-devel > >
Hi, > btw on why udl does this: Imported bo are usually rendered by real hw, and > reading it uncached/wc is the more defensive setting. It would be kinda > nice if dma-buf would expose this, but I fear dma-api maintainers would > murder us if we even just propose that ... so it's a mess right now. I suspect for imported dma-bufs we should leave the mmap() to the exporter instead of pulling the pages out of the sgt and map them ourself. > btw the issue extends to dma access by devices too, e.g. both i915 and > amdgpu can select the coherency mode at runtime (using e.g. the pcie > no-snoop transaction mode), and we have similar uncoordinated hacks in > there too, like in udl. Hmm. Ok. I guess I'm not going to try solve all that properly just for the little virtio fix. Just curious: How do you tell your hardware? Are there bits for that in the gtt, simliar to the caching bits in the x86 page tables? cheers, Gerd
On Wed, Dec 11, 2019 at 02:18:30PM +0100, Gerd Hoffmann wrote: > Hi, > > > btw on why udl does this: Imported bo are usually rendered by real hw, and > > reading it uncached/wc is the more defensive setting. It would be kinda > > nice if dma-buf would expose this, but I fear dma-api maintainers would > > murder us if we even just propose that ... so it's a mess right now. > > I suspect for imported dma-bufs we should leave the mmap() to the > exporter instead of pulling the pages out of the sgt and map them > ourself. Uh yes. If we still do that, then yes very much we shouldn't. Even better would be to just not do that, because the semantics of dumb gem mmap and dma-buf mmap differ (the latter has the begin/end access ioctl). If we can't ditch the mmap I think we should at least improve the helpers to do the redirect to dma_buf_mmap directly and stop drivers from doing silly stuff. > > btw the issue extends to dma access by devices too, e.g. both i915 and > > amdgpu can select the coherency mode at runtime (using e.g. the pcie > > no-snoop transaction mode), and we have similar uncoordinated hacks in > > there too, like in udl. > > Hmm. Ok. I guess I'm not going to try solve all that properly just for > the little virtio fix. > > Just curious: How do you tell your hardware? Are there bits for that > in the gtt, simliar to the caching bits in the x86 page tables? Brace for contact ... - on amdgpu it's a bit in the gpu pagetable. I think (but not sure, not an expert on these chips) that's the only way. - on i915 it's a also a bit in the gpu pagetables, but userspace can override the caching/coherency mode on a per-texture/render target/*BO level in the command stream. This is why gpus and dma-api don't mix well, since dma-api's goal is to hide all this even from the driver. gpus otoh leak it all the way to userspace. The trouble is as old as AGP from 1999 or so, I've become somewhat cynic at trying to fix this for real and not just with hacks. The disconnect between what we need and what dma-api kernel people want to give us is too big to bridge it seems. -Daniel
Hi, > > I suspect for imported dma-bufs we should leave the mmap() to the > > exporter instead of pulling the pages out of the sgt and map them > > ourself. > > Uh yes. If we still do that, then yes very much we shouldn't. Looking again. drm_gem_dumb_map_offset() throws an error in case obj->import_attach is not NULL. So the udl mmap code should not see imported buffers in the first place, unless I missed some detail ... > > Hmm. Ok. I guess I'm not going to try solve all that properly just for > > the little virtio fix. > > > > Just curious: How do you tell your hardware? Are there bits for that > > in the gtt, simliar to the caching bits in the x86 page tables? > > Brace for contact ... > > - on amdgpu it's a bit in the gpu pagetable. I think (but not sure, not an > expert on these chips) that's the only way. > > - on i915 it's a also a bit in the gpu pagetables, but userspace can > override the caching/coherency mode on a per-texture/render target/*BO > level in the command stream. > > This is why gpus and dma-api don't mix well, since dma-api's goal is to > hide all this even from the driver. gpus otoh leak it all the way to > userspace. The trouble is as old as AGP from 1999 or so, I've become > somewhat cynic at trying to fix this for real and not just with hacks. The > disconnect between what we need and what dma-api kernel people want to > give us is too big to bridge it seems. Phew. For vulkan support in virtio-gpu we are going to throw virtual machines into that mix. Fun ahead I guess ... cheers, Gerd
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 6748379a0b44..9d6e02c6205f 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -17,6 +17,11 @@ struct drm_mode_create_dumb; struct drm_printer; struct sg_table; +enum drm_gem_shmem_caching { + DRM_GEM_SHMEM_CACHED = 1, + DRM_GEM_SHMEM_WC, +}; + /** * struct drm_gem_shmem_object - GEM object backed by shmem */ @@ -83,6 +88,11 @@ struct drm_gem_shmem_object { * The address are un-mapped when the count reaches zero. */ unsigned int vmap_use_count; + + /** + * @caching: caching attributes for mappings. + */ + enum drm_gem_shmem_caching caching; }; #define to_drm_gem_shmem_obj(obj) \ @@ -130,6 +140,8 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj); +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot); + /** * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations * diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index a421a2eed48a..5bb94e130a50 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -76,6 +76,7 @@ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t mutex_init(&shmem->pages_lock); mutex_init(&shmem->vmap_lock); INIT_LIST_HEAD(&shmem->madv_list); + shmem->caching = DRM_GEM_SHMEM_WC; /* * Our buffers are kept pinned, so allocating them @@ -256,9 +257,11 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem) if (obj->import_attach) shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf); - else + else { + pgprot_t prot = drm_gem_shmem_caching(shmem, PAGE_KERNEL); shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT, - VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + VM_MAP, prot); + } if (!shmem->vaddr) { DRM_DEBUG_KMS("Failed to vmap pages\n"); @@ -540,7 +543,8 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) } vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + vma->vm_page_prot = drm_gem_shmem_caching(shmem, vma->vm_page_prot); vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); vma->vm_ops = &drm_gem_shmem_vm_ops; @@ -683,3 +687,17 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table); + +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot) +{ + switch (shmem->caching) { + case DRM_GEM_SHMEM_CACHED: + return prot; + case DRM_GEM_SHMEM_WC: + return pgprot_writecombine(prot); + default: + WARN_ON_ONCE(1); + return prot; + } +} +EXPORT_SYMBOL_GPL(drm_gem_shmem_caching);
Add caching field to drm_gem_shmem_object to specify the cachine attributes for mappings. Add helper function to tweak pgprot accordingly. Switch vmap and mmap functions to the new helper. Set caching to write-combine when creating the object so behavior doesn't change by default. Drivers can override that later if needed. Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> --- include/drm/drm_gem_shmem_helper.h | 12 ++++++++++++ drivers/gpu/drm/drm_gem_shmem_helper.c | 24 +++++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-)