diff mbox series

[v2,1/2] drm/shmem: add support for per object caching attributes

Message ID 20191211081810.20079-2-kraxel@redhat.com (mailing list archive)
State New, archived
Headers show
Series drm/virtio: fix mmap page attributes | expand

Commit Message

Gerd Hoffmann Dec. 11, 2019, 8:18 a.m. UTC
Add caching field to drm_gem_shmem_object to specify the cachine
attributes for mappings.  Add helper function to tweak pgprot
accordingly.  Switch vmap and mmap functions to the new helper.

Set caching to write-combine when creating the object so behavior
doesn't change by default.  Drivers can override that later if
needed.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 include/drm/drm_gem_shmem_helper.h     | 12 ++++++++++++
 drivers/gpu/drm/drm_gem_shmem_helper.c | 24 +++++++++++++++++++++---
 2 files changed, 33 insertions(+), 3 deletions(-)

Comments

Thomas Zimmermann Dec. 11, 2019, 9:58 a.m. UTC | #1
Hi Gerd

Am 11.12.19 um 09:18 schrieb Gerd Hoffmann:
> Add caching field to drm_gem_shmem_object to specify the cachine
> attributes for mappings.  Add helper function to tweak pgprot
> accordingly.  Switch vmap and mmap functions to the new helper.
> 
> Set caching to write-combine when creating the object so behavior
> doesn't change by default.  Drivers can override that later if
> needed.
> 
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>

If you want to merge this patch, you have my

Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>

Please see my comment below.

> ---
>  include/drm/drm_gem_shmem_helper.h     | 12 ++++++++++++
>  drivers/gpu/drm/drm_gem_shmem_helper.c | 24 +++++++++++++++++++++---
>  2 files changed, 33 insertions(+), 3 deletions(-)
> 
> diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
> index 6748379a0b44..9d6e02c6205f 100644
> --- a/include/drm/drm_gem_shmem_helper.h
> +++ b/include/drm/drm_gem_shmem_helper.h
> @@ -17,6 +17,11 @@ struct drm_mode_create_dumb;
>  struct drm_printer;
>  struct sg_table;
>  
> +enum drm_gem_shmem_caching {
> +	DRM_GEM_SHMEM_CACHED = 1,
> +	DRM_GEM_SHMEM_WC,
> +};
> +
>  /**
>   * struct drm_gem_shmem_object - GEM object backed by shmem
>   */
> @@ -83,6 +88,11 @@ struct drm_gem_shmem_object {
>  	 * The address are un-mapped when the count reaches zero.
>  	 */
>  	unsigned int vmap_use_count;
> +
> +	/**
> +	 * @caching: caching attributes for mappings.
> +	 */
> +	enum drm_gem_shmem_caching caching;
>  };
>  
>  #define to_drm_gem_shmem_obj(obj) \
> @@ -130,6 +140,8 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
>  
>  struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj);
>  
> +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot);
> +
>  /**
>   * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations
>   *
> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> index a421a2eed48a..5bb94e130a50 100644
> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> @@ -76,6 +76,7 @@ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t
>  	mutex_init(&shmem->pages_lock);
>  	mutex_init(&shmem->vmap_lock);
>  	INIT_LIST_HEAD(&shmem->madv_list);
> +	shmem->caching = DRM_GEM_SHMEM_WC;
>  
>  	/*
>  	 * Our buffers are kept pinned, so allocating them
> @@ -256,9 +257,11 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
>  
>  	if (obj->import_attach)
>  		shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
> -	else
> +	else {
> +		pgprot_t prot = drm_gem_shmem_caching(shmem, PAGE_KERNEL);
>  		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
> -				    VM_MAP, pgprot_writecombine(PAGE_KERNEL));
> +				    VM_MAP, prot);
> +	}
>  
>  	if (!shmem->vaddr) {
>  		DRM_DEBUG_KMS("Failed to vmap pages\n");
> @@ -540,7 +543,8 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
>  	}
>  
>  	vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
> -	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
> +	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
> +	vma->vm_page_prot = drm_gem_shmem_caching(shmem, vma->vm_page_prot);
>  	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
>  	vma->vm_ops = &drm_gem_shmem_vm_ops;
>  
> @@ -683,3 +687,17 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
>  	return ERR_PTR(ret);
>  }
>  EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table);
> +
> +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot)
> +{
> +	switch (shmem->caching) {
> +	case DRM_GEM_SHMEM_CACHED:
> +		return prot;
> +	case DRM_GEM_SHMEM_WC:
> +		return pgprot_writecombine(prot);
> +	default:
> +		WARN_ON_ONCE(1);
> +		return prot;
> +	}
> +}
> +EXPORT_SYMBOL_GPL(drm_gem_shmem_caching);

Two reason why I'd reconsider this design.

I don't like switch statements new the bottom of the call graph. The
code ends up with default warnings, such as this one.

Udl has different caching flags for imported and 'native' buffers. This
would require a new constant and additional code here.

What do you think about turning this function into a callback in struct
shmem_funcs? The default implementation would be for WC, virtio would
use CACHED. The individual implementations could still be located in the
shmem code. Udl would later provide its own code.

Best regards
Thomas

>
Thomas Zimmermann Dec. 11, 2019, 10:01 a.m. UTC | #2
Am 11.12.19 um 10:58 schrieb Thomas Zimmermann:
> 
> What do you think about turning this function into a callback in struct
> shmem_funcs? The default implementation would be for WC, virtio would

s/shmem_funcs/drm_gem_object_funcs

> use CACHED. The individual implementations could still be located in the
> shmem code. Udl would later provide its own code.
> 
> Best regards
> Thomas
> 
>>
>
Thomas Zimmermann Dec. 11, 2019, 10:07 a.m. UTC | #3
Am 11.12.19 um 10:58 schrieb Thomas Zimmermann:
> Hi Gerd
> 
> Am 11.12.19 um 09:18 schrieb Gerd Hoffmann:
>> Add caching field to drm_gem_shmem_object to specify the cachine
>> attributes for mappings.  Add helper function to tweak pgprot
>> accordingly.  Switch vmap and mmap functions to the new helper.
>>
>> Set caching to write-combine when creating the object so behavior
>> doesn't change by default.  Drivers can override that later if
>> needed.
>>
>> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> 
> If you want to merge this patch, you have my
> 
> Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
> 
> Please see my comment below.
> 
>> ---
>>  include/drm/drm_gem_shmem_helper.h     | 12 ++++++++++++
>>  drivers/gpu/drm/drm_gem_shmem_helper.c | 24 +++++++++++++++++++++---
>>  2 files changed, 33 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
>> index 6748379a0b44..9d6e02c6205f 100644
>> --- a/include/drm/drm_gem_shmem_helper.h
>> +++ b/include/drm/drm_gem_shmem_helper.h
>> @@ -17,6 +17,11 @@ struct drm_mode_create_dumb;
>>  struct drm_printer;
>>  struct sg_table;
>>  
>> +enum drm_gem_shmem_caching {
>> +	DRM_GEM_SHMEM_CACHED = 1,
>> +	DRM_GEM_SHMEM_WC,
>> +};
>> +
>>  /**
>>   * struct drm_gem_shmem_object - GEM object backed by shmem
>>   */
>> @@ -83,6 +88,11 @@ struct drm_gem_shmem_object {
>>  	 * The address are un-mapped when the count reaches zero.
>>  	 */
>>  	unsigned int vmap_use_count;
>> +
>> +	/**
>> +	 * @caching: caching attributes for mappings.
>> +	 */
>> +	enum drm_gem_shmem_caching caching;
>>  };
>>  
>>  #define to_drm_gem_shmem_obj(obj) \
>> @@ -130,6 +140,8 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
>>  
>>  struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj);
>>  
>> +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot);
>> +
>>  /**
>>   * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations
>>   *
>> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
>> index a421a2eed48a..5bb94e130a50 100644
>> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
>> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
>> @@ -76,6 +76,7 @@ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t
>>  	mutex_init(&shmem->pages_lock);
>>  	mutex_init(&shmem->vmap_lock);
>>  	INIT_LIST_HEAD(&shmem->madv_list);
>> +	shmem->caching = DRM_GEM_SHMEM_WC;
>>  
>>  	/*
>>  	 * Our buffers are kept pinned, so allocating them
>> @@ -256,9 +257,11 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
>>  
>>  	if (obj->import_attach)
>>  		shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
>> -	else
>> +	else {
>> +		pgprot_t prot = drm_gem_shmem_caching(shmem, PAGE_KERNEL);
>>  		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
>> -				    VM_MAP, pgprot_writecombine(PAGE_KERNEL));
>> +				    VM_MAP, prot);
>> +	}
>>  
>>  	if (!shmem->vaddr) {
>>  		DRM_DEBUG_KMS("Failed to vmap pages\n");
>> @@ -540,7 +543,8 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
>>  	}
>>  
>>  	vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
>> -	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
>> +	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
>> +	vma->vm_page_prot = drm_gem_shmem_caching(shmem, vma->vm_page_prot);
>>  	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
>>  	vma->vm_ops = &drm_gem_shmem_vm_ops;
>>  
>> @@ -683,3 +687,17 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
>>  	return ERR_PTR(ret);
>>  }
>>  EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table);
>> +
>> +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot)
>> +{
>> +	switch (shmem->caching) {
>> +	case DRM_GEM_SHMEM_CACHED:
>> +		return prot;
>> +	case DRM_GEM_SHMEM_WC:
>> +		return pgprot_writecombine(prot);
>> +	default:
>> +		WARN_ON_ONCE(1);
>> +		return prot;
>> +	}
>> +}
>> +EXPORT_SYMBOL_GPL(drm_gem_shmem_caching);
> 
> Two reason why I'd reconsider this design.
> 
> I don't like switch statements new the bottom of the call graph. The
> code ends up with default warnings, such as this one.
> 
> Udl has different caching flags for imported and 'native' buffers. This
> would require a new constant and additional code here.
> 
> What do you think about turning this function into a callback in struct
> shmem_funcs? The default implementation would be for WC, virtio would
> use CACHED. The individual implementations could still be located in the
> shmem code. Udl would later provide its own code.

On a second thought, all this might be over-engineered and v1 of the
patchset was the correct approach. You can add my

Acked-by: Thomas Zimmermann <tzimmermann@suse.de>

if you prefer to merge v1.

> 
> Best regards
> Thomas
> 
>>
>
Daniel Vetter Dec. 11, 2019, 12:36 p.m. UTC | #4
On Wed, Dec 11, 2019 at 11:07:25AM +0100, Thomas Zimmermann wrote:
> 
> 
> Am 11.12.19 um 10:58 schrieb Thomas Zimmermann:
> > Hi Gerd
> > 
> > Am 11.12.19 um 09:18 schrieb Gerd Hoffmann:
> >> Add caching field to drm_gem_shmem_object to specify the cachine
> >> attributes for mappings.  Add helper function to tweak pgprot
> >> accordingly.  Switch vmap and mmap functions to the new helper.
> >>
> >> Set caching to write-combine when creating the object so behavior
> >> doesn't change by default.  Drivers can override that later if
> >> needed.
> >>
> >> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> > 
> > If you want to merge this patch, you have my
> > 
> > Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
> > 
> > Please see my comment below.
> > 
> >> ---
> >>  include/drm/drm_gem_shmem_helper.h     | 12 ++++++++++++
> >>  drivers/gpu/drm/drm_gem_shmem_helper.c | 24 +++++++++++++++++++++---
> >>  2 files changed, 33 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
> >> index 6748379a0b44..9d6e02c6205f 100644
> >> --- a/include/drm/drm_gem_shmem_helper.h
> >> +++ b/include/drm/drm_gem_shmem_helper.h
> >> @@ -17,6 +17,11 @@ struct drm_mode_create_dumb;
> >>  struct drm_printer;
> >>  struct sg_table;
> >>  
> >> +enum drm_gem_shmem_caching {
> >> +	DRM_GEM_SHMEM_CACHED = 1,
> >> +	DRM_GEM_SHMEM_WC,
> >> +};
> >> +
> >>  /**
> >>   * struct drm_gem_shmem_object - GEM object backed by shmem
> >>   */
> >> @@ -83,6 +88,11 @@ struct drm_gem_shmem_object {
> >>  	 * The address are un-mapped when the count reaches zero.
> >>  	 */
> >>  	unsigned int vmap_use_count;
> >> +
> >> +	/**
> >> +	 * @caching: caching attributes for mappings.
> >> +	 */
> >> +	enum drm_gem_shmem_caching caching;
> >>  };
> >>  
> >>  #define to_drm_gem_shmem_obj(obj) \
> >> @@ -130,6 +140,8 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
> >>  
> >>  struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj);
> >>  
> >> +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot);
> >> +
> >>  /**
> >>   * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations
> >>   *
> >> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> >> index a421a2eed48a..5bb94e130a50 100644
> >> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> >> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> >> @@ -76,6 +76,7 @@ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t
> >>  	mutex_init(&shmem->pages_lock);
> >>  	mutex_init(&shmem->vmap_lock);
> >>  	INIT_LIST_HEAD(&shmem->madv_list);
> >> +	shmem->caching = DRM_GEM_SHMEM_WC;
> >>  
> >>  	/*
> >>  	 * Our buffers are kept pinned, so allocating them
> >> @@ -256,9 +257,11 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
> >>  
> >>  	if (obj->import_attach)
> >>  		shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
> >> -	else
> >> +	else {
> >> +		pgprot_t prot = drm_gem_shmem_caching(shmem, PAGE_KERNEL);
> >>  		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
> >> -				    VM_MAP, pgprot_writecombine(PAGE_KERNEL));
> >> +				    VM_MAP, prot);
> >> +	}
> >>  
> >>  	if (!shmem->vaddr) {
> >>  		DRM_DEBUG_KMS("Failed to vmap pages\n");
> >> @@ -540,7 +543,8 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
> >>  	}
> >>  
> >>  	vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
> >> -	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
> >> +	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
> >> +	vma->vm_page_prot = drm_gem_shmem_caching(shmem, vma->vm_page_prot);
> >>  	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
> >>  	vma->vm_ops = &drm_gem_shmem_vm_ops;
> >>  
> >> @@ -683,3 +687,17 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
> >>  	return ERR_PTR(ret);
> >>  }
> >>  EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table);
> >> +
> >> +pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot)
> >> +{
> >> +	switch (shmem->caching) {
> >> +	case DRM_GEM_SHMEM_CACHED:
> >> +		return prot;
> >> +	case DRM_GEM_SHMEM_WC:
> >> +		return pgprot_writecombine(prot);
> >> +	default:
> >> +		WARN_ON_ONCE(1);
> >> +		return prot;
> >> +	}
> >> +}
> >> +EXPORT_SYMBOL_GPL(drm_gem_shmem_caching);
> > 
> > Two reason why I'd reconsider this design.
> > 
> > I don't like switch statements new the bottom of the call graph. The
> > code ends up with default warnings, such as this one.
> > 
> > Udl has different caching flags for imported and 'native' buffers. This
> > would require a new constant and additional code here.
> > 
> > What do you think about turning this function into a callback in struct
> > shmem_funcs? The default implementation would be for WC, virtio would
> > use CACHED. The individual implementations could still be located in the
> > shmem code. Udl would later provide its own code.
> 
> On a second thought, all this might be over-engineered and v1 of the
> patchset was the correct approach. You can add my

The udl use-case should be covered already, simply set the flag correctly
at create/import time? It's per-object ...

btw on why udl does this: Imported bo are usually rendered by real hw, and
reading it uncached/wc is the more defensive setting. It would be kinda
nice if dma-buf would expose this, but I fear dma-api maintainers would
murder us if we even just propose that ... so it's a mess right now.

btw the issue extends to dma access by devices too, e.g. both i915 and
amdgpu can select the coherency mode at runtime (using e.g. the pcie
no-snoop transaction mode), and we have similar uncoordinated hacks in
there too, like in udl.
-Daniel

> 
> Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
> 
> if you prefer to merge v1.
> 
> > 
> > Best regards
> > Thomas
> > 
> >>
> > 
> 
> -- 
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Felix Imendörffer
> 




> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Thomas Zimmermann Dec. 11, 2019, 1:09 p.m. UTC | #5
Hi

Am 11.12.19 um 13:36 schrieb Daniel Vetter:
> 
> The udl use-case should be covered already, simply set the flag correctly
> at create/import time? It's per-object ...

The original udl gem code did this. It was additional state for
something that was detectable from the value of import_attach. So udl
now overrides vmap and mmap.


> btw on why udl does this: Imported bo are usually rendered by real hw, and
> reading it uncached/wc is the more defensive setting. It would be kinda
> nice if dma-buf would expose this, but I fear dma-api maintainers would
> murder us if we even just propose that ... so it's a mess right now.

Yeah, in some way it's a variation of the discussion around fbdev memory
access that we had before.

Best regards
Thomas

> 
> btw the issue extends to dma access by devices too, e.g. both i915 and
> amdgpu can select the coherency mode at runtime (using e.g. the pcie
> no-snoop transaction mode), and we have similar uncoordinated hacks in
> there too, like in udl.
> -Daniel
> 
>>
>> Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
>>
>> if you prefer to merge v1.
>>
>>>
>>> Best regards
>>> Thomas
>>>
>>>>
>>>
>>
>> -- 
>> Thomas Zimmermann
>> Graphics Driver Developer
>> SUSE Software Solutions Germany GmbH
>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>> (HRB 36809, AG Nürnberg)
>> Geschäftsführer: Felix Imendörffer
>>
> 
> 
> 
> 
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
>
Gerd Hoffmann Dec. 11, 2019, 1:18 p.m. UTC | #6
Hi,

> btw on why udl does this: Imported bo are usually rendered by real hw, and
> reading it uncached/wc is the more defensive setting. It would be kinda
> nice if dma-buf would expose this, but I fear dma-api maintainers would
> murder us if we even just propose that ... so it's a mess right now.

I suspect for imported dma-bufs we should leave the mmap() to the
exporter instead of pulling the pages out of the sgt and map them
ourself.

> btw the issue extends to dma access by devices too, e.g. both i915 and
> amdgpu can select the coherency mode at runtime (using e.g. the pcie
> no-snoop transaction mode), and we have similar uncoordinated hacks in
> there too, like in udl.

Hmm.  Ok.  I guess I'm not going to try solve all that properly just for
the little virtio fix.

Just curious:  How do you tell your hardware?  Are there bits for that
in the gtt, simliar to the caching bits in the x86 page tables?

cheers,
  Gerd
Daniel Vetter Dec. 13, 2019, 9:59 a.m. UTC | #7
On Wed, Dec 11, 2019 at 02:18:30PM +0100, Gerd Hoffmann wrote:
>   Hi,
> 
> > btw on why udl does this: Imported bo are usually rendered by real hw, and
> > reading it uncached/wc is the more defensive setting. It would be kinda
> > nice if dma-buf would expose this, but I fear dma-api maintainers would
> > murder us if we even just propose that ... so it's a mess right now.
> 
> I suspect for imported dma-bufs we should leave the mmap() to the
> exporter instead of pulling the pages out of the sgt and map them
> ourself.

Uh yes. If we still do that, then yes very much we shouldn't. Even better
would be to just not do that, because the semantics of dumb gem mmap and
dma-buf mmap differ (the latter has the begin/end access ioctl).

If we can't ditch the mmap I think we should at least improve the helpers
to do the redirect to dma_buf_mmap directly and stop drivers from doing
silly stuff.

> > btw the issue extends to dma access by devices too, e.g. both i915 and
> > amdgpu can select the coherency mode at runtime (using e.g. the pcie
> > no-snoop transaction mode), and we have similar uncoordinated hacks in
> > there too, like in udl.
> 
> Hmm.  Ok.  I guess I'm not going to try solve all that properly just for
> the little virtio fix.
> 
> Just curious:  How do you tell your hardware?  Are there bits for that
> in the gtt, simliar to the caching bits in the x86 page tables?

Brace for contact ...

- on amdgpu it's a bit in the gpu pagetable. I think (but not sure, not an
  expert on these chips) that's the only way.

- on i915 it's a also a bit in the gpu pagetables, but userspace can
  override the caching/coherency mode on a per-texture/render target/*BO
  level in the command stream.

This is why gpus and dma-api don't mix well, since dma-api's goal is to
hide all this even from the driver. gpus otoh leak it all the way to
userspace. The trouble is as old as AGP from 1999 or so, I've become
somewhat cynic at trying to fix this for real and not just with hacks. The
disconnect between what we need and what dma-api kernel people want to
give us is too big to bridge it seems.
-Daniel
Gerd Hoffmann Dec. 16, 2019, 8:07 a.m. UTC | #8
Hi,

> > I suspect for imported dma-bufs we should leave the mmap() to the
> > exporter instead of pulling the pages out of the sgt and map them
> > ourself.
> 
> Uh yes. If we still do that, then yes very much we shouldn't.

Looking again.  drm_gem_dumb_map_offset() throws an error in case
obj->import_attach is not NULL.  So the udl mmap code should not see
imported buffers in the first place, unless I missed some detail ...

> > Hmm.  Ok.  I guess I'm not going to try solve all that properly just for
> > the little virtio fix.
> > 
> > Just curious:  How do you tell your hardware?  Are there bits for that
> > in the gtt, simliar to the caching bits in the x86 page tables?
> 
> Brace for contact ...
> 
> - on amdgpu it's a bit in the gpu pagetable. I think (but not sure, not an
>   expert on these chips) that's the only way.
> 
> - on i915 it's a also a bit in the gpu pagetables, but userspace can
>   override the caching/coherency mode on a per-texture/render target/*BO
>   level in the command stream.
> 
> This is why gpus and dma-api don't mix well, since dma-api's goal is to
> hide all this even from the driver. gpus otoh leak it all the way to
> userspace. The trouble is as old as AGP from 1999 or so, I've become
> somewhat cynic at trying to fix this for real and not just with hacks. The
> disconnect between what we need and what dma-api kernel people want to
> give us is too big to bridge it seems.

Phew.  For vulkan support in virtio-gpu we are going to throw virtual
machines into that mix.  Fun ahead I guess ...

cheers,
  Gerd
diff mbox series

Patch

diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index 6748379a0b44..9d6e02c6205f 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -17,6 +17,11 @@  struct drm_mode_create_dumb;
 struct drm_printer;
 struct sg_table;
 
+enum drm_gem_shmem_caching {
+	DRM_GEM_SHMEM_CACHED = 1,
+	DRM_GEM_SHMEM_WC,
+};
+
 /**
  * struct drm_gem_shmem_object - GEM object backed by shmem
  */
@@ -83,6 +88,11 @@  struct drm_gem_shmem_object {
 	 * The address are un-mapped when the count reaches zero.
 	 */
 	unsigned int vmap_use_count;
+
+	/**
+	 * @caching: caching attributes for mappings.
+	 */
+	enum drm_gem_shmem_caching caching;
 };
 
 #define to_drm_gem_shmem_obj(obj) \
@@ -130,6 +140,8 @@  drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
 
 struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj);
 
+pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot);
+
 /**
  * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations
  *
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index a421a2eed48a..5bb94e130a50 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -76,6 +76,7 @@  struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t
 	mutex_init(&shmem->pages_lock);
 	mutex_init(&shmem->vmap_lock);
 	INIT_LIST_HEAD(&shmem->madv_list);
+	shmem->caching = DRM_GEM_SHMEM_WC;
 
 	/*
 	 * Our buffers are kept pinned, so allocating them
@@ -256,9 +257,11 @@  static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
 
 	if (obj->import_attach)
 		shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
-	else
+	else {
+		pgprot_t prot = drm_gem_shmem_caching(shmem, PAGE_KERNEL);
 		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
-				    VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+				    VM_MAP, prot);
+	}
 
 	if (!shmem->vaddr) {
 		DRM_DEBUG_KMS("Failed to vmap pages\n");
@@ -540,7 +543,8 @@  int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 	}
 
 	vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
-	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	vma->vm_page_prot = drm_gem_shmem_caching(shmem, vma->vm_page_prot);
 	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
 	vma->vm_ops = &drm_gem_shmem_vm_ops;
 
@@ -683,3 +687,17 @@  drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
 	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table);
+
+pgprot_t drm_gem_shmem_caching(struct drm_gem_shmem_object *shmem, pgprot_t prot)
+{
+	switch (shmem->caching) {
+	case DRM_GEM_SHMEM_CACHED:
+		return prot;
+	case DRM_GEM_SHMEM_WC:
+		return pgprot_writecombine(prot);
+	default:
+		WARN_ON_ONCE(1);
+		return prot;
+	}
+}
+EXPORT_SYMBOL_GPL(drm_gem_shmem_caching);