diff mbox

[16/20] drm/i915: Support to create write combined type vmaps

Message ID 1470983123-22127-17-git-send-email-akash.goel@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

akash.goel@intel.com Aug. 12, 2016, 6:25 a.m. UTC
From: Chris Wilson <chris@chris-wilson.co.uk>

vmaps has a provision for controlling the page protection bits, with which
we can use to control the mapping type, e.g. WB, WC, UC or even WT.
To allow the caller to choose their mapping type, we add a parameter to
i915_gem_object_pin_map - but we still only allow one vmap to be cached
per object. If the object is currently not pinned, then we recreate the
previous vmap with the new access type, but if it was pinned we report an
error. This effectively limits the access via i915_gem_object_pin_map to a
single mapping type for the lifetime of the object. Not usually a problem,
but something to be aware of when setting up the object's vmap.

We will want to vary the access type to enable WC mappings of ringbuffer
and context objects on !llc platforms, as well as other objects where we
need coherent access to the GPU's pages without going through the GTT

v2: Remove the redundant braces around pin count check and fix the marker
    in documentation (Chris)

v3:
- Add a new enum for the vmalloc mapping type & pass that as an argument to
  i915_object_pin_map. (Tvrtko)
- Use PAGE_MASK to extract or filter the mapping type info and remove a
  superfluous BUG_ON.(Tvrtko)

v4:
- Rename the enums and clean up the pin_map function. (Chris)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  9 ++++-
 drivers/gpu/drm/i915/i915_gem.c            | 58 +++++++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_gem_dmabuf.c     |  2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c           |  8 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 +-
 6 files changed, 60 insertions(+), 21 deletions(-)

Comments

Tvrtko Ursulin Aug. 12, 2016, 10:49 a.m. UTC | #1
On 12/08/16 07:25, akash.goel@intel.com wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
>
> vmaps has a provision for controlling the page protection bits, with which
> we can use to control the mapping type, e.g. WB, WC, UC or even WT.
> To allow the caller to choose their mapping type, we add a parameter to
> i915_gem_object_pin_map - but we still only allow one vmap to be cached
> per object. If the object is currently not pinned, then we recreate the
> previous vmap with the new access type, but if it was pinned we report an
> error. This effectively limits the access via i915_gem_object_pin_map to a
> single mapping type for the lifetime of the object. Not usually a problem,
> but something to be aware of when setting up the object's vmap.
>
> We will want to vary the access type to enable WC mappings of ringbuffer
> and context objects on !llc platforms, as well as other objects where we
> need coherent access to the GPU's pages without going through the GTT
>
> v2: Remove the redundant braces around pin count check and fix the marker
>      in documentation (Chris)
>
> v3:
> - Add a new enum for the vmalloc mapping type & pass that as an argument to
>    i915_object_pin_map. (Tvrtko)
> - Use PAGE_MASK to extract or filter the mapping type info and remove a
>    superfluous BUG_ON.(Tvrtko)
>
> v4:
> - Rename the enums and clean up the pin_map function. (Chris)
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Akash Goel <akash.goel@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h            |  9 ++++-
>   drivers/gpu/drm/i915/i915_gem.c            | 58 +++++++++++++++++++++++-------
>   drivers/gpu/drm/i915/i915_gem_dmabuf.c     |  2 +-
>   drivers/gpu/drm/i915/i915_guc_submission.c |  2 +-
>   drivers/gpu/drm/i915/intel_lrc.c           |  8 ++---
>   drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 +-
>   6 files changed, 60 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4bd3790..6603812 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -834,6 +834,11 @@ enum i915_cache_level {
>   	I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
>   };
>
> +enum i915_map_type {
> +	I915_MAP_WB = 0,
> +	I915_MAP_WC,
> +};
> +
>   struct i915_ctx_hang_stats {
>   	/* This context had batch pending when hang was declared */
>   	unsigned batch_pending;
> @@ -3150,6 +3155,7 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>   /**
>    * i915_gem_object_pin_map - return a contiguous mapping of the entire object
>    * @obj - the object to map into kernel address space
> + * @map_type - whether the vmalloc mapping should be using WC or WB pgprot_t
>    *
>    * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
>    * pages and then returns a contiguous mapping of the backing storage into
> @@ -3161,7 +3167,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>    * Returns the pointer through which to access the mapped object, or an
>    * ERR_PTR() on error.
>    */
> -void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj);
> +void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
> +					enum i915_map_type map_type);
>
>   /**
>    * i915_gem_object_unpin_map - releases an earlier mapping
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 03548db..7dabbc3f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2077,10 +2077,11 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>   	list_del(&obj->global_list);
>
>   	if (obj->mapping) {
> -		if (is_vmalloc_addr(obj->mapping))
> -			vunmap(obj->mapping);
> +		void *ptr = (void *)((uintptr_t)obj->mapping & PAGE_MASK);
> +		if (is_vmalloc_addr(ptr))
> +			vunmap(ptr);
>   		else
> -			kunmap(kmap_to_page(obj->mapping));
> +			kunmap(kmap_to_page(ptr));
>   		obj->mapping = NULL;
>   	}
>
> @@ -2253,7 +2254,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>   }
>
>   /* The 'mapping' part of i915_gem_object_pin_map() below */
> -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
> +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
> +				 enum i915_map_type type)
>   {
>   	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
>   	struct sg_table *sgt = obj->pages;
> @@ -2263,9 +2265,10 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
>   	struct page **pages = stack_pages;
>   	unsigned long i = 0;
>   	void *addr;
> +	bool use_wc = (type == I915_MAP_WC);
>
>   	/* A single page can always be kmapped */
> -	if (n_pages == 1)
> +	if (n_pages == 1 && !use_wc)
>   		return kmap(sg_page(sgt->sgl));
>
>   	if (n_pages > ARRAY_SIZE(stack_pages)) {
> @@ -2281,7 +2284,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
>   	/* Check that we have the expected number of pages */
>   	GEM_BUG_ON(i != n_pages);
>
> -	addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
> +	addr = vmap(pages, n_pages, VM_NO_GUARD,

Unreleated and unmentioned change to no guard page. Best to remove IMHO. 
Can keep the RB in that case.

> +		    use_wc ? pgprot_writecombine(PAGE_KERNEL_IO) : PAGE_KERNEL);
>
>   	if (pages != stack_pages)
>   		drm_free_large(pages);
> @@ -2290,11 +2294,16 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
>   }
>
>   /* get, pin, and map the pages of the object into kernel space */
> -void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
> +void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
> +			      enum i915_map_type type)
>   {
> +	enum i915_map_type has_type;
> +	bool pinned;
> +	void *ptr;
>   	int ret;
>
>   	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	GEM_BUG_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0);
>
>   	ret = i915_gem_object_get_pages(obj);
>   	if (ret)
> @@ -2302,15 +2311,38 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
>
>   	i915_gem_object_pin_pages(obj);
>
> -	if (!obj->mapping) {
> -		obj->mapping = i915_gem_object_map(obj);
> -		if (!obj->mapping) {
> -			i915_gem_object_unpin_pages(obj);
> -			return ERR_PTR(-ENOMEM);
> +	pinned = obj->pages_pin_count > 1;
> +	ptr = (void *)((uintptr_t)obj->mapping & PAGE_MASK);
> +	has_type = (uintptr_t)obj->mapping & ~PAGE_MASK;
> +
> +	if (ptr && has_type != type) {
> +		if (pinned) {
> +			ret = -EBUSY;
> +			goto err;
> +		}
> +
> +		if (is_vmalloc_addr(ptr))
> +			vunmap(ptr);
> +		else
> +			kunmap(kmap_to_page(ptr));
> +		ptr = obj->mapping = NULL;
> +	}
> +
> +	if (!ptr) {
> +		ptr = i915_gem_object_map(obj, type);
> +		if (!ptr) {
> +			ret = -ENOMEM;
> +			goto err;
>   		}
> +
> +		obj->mapping = (void *)((uintptr_t)ptr | type);
>   	}
>
> -	return obj->mapping;
> +	return ptr;
> +
> +err:
> +	i915_gem_object_unpin_pages(obj);
> +	return ERR_PTR(ret);
>   }
>
>   static void
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index c60a8d5b..10265bb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -119,7 +119,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
>   	if (ret)
>   		return ERR_PTR(ret);
>
> -	addr = i915_gem_object_pin_map(obj);
> +	addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
>   	mutex_unlock(&dev->struct_mutex);
>
>   	return addr;
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 041cf68..1d58d36 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -1178,7 +1178,7 @@ static int guc_create_log_extras(struct intel_guc *guc)
>
>   	if (!guc->log.buf_addr) {
>   		/* Create a vmalloc mapping of log buffer pages */
> -		vaddr = i915_gem_object_pin_map(guc->log.obj);
> +		vaddr = i915_gem_object_pin_map(guc->log.obj, I915_MAP_WB);
>   		if (IS_ERR(vaddr)) {
>   			ret = PTR_ERR(vaddr);
>   			DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index c7f4b64..c24ac39 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -780,7 +780,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
>   	if (ret)
>   		goto err;
>
> -	vaddr = i915_gem_object_pin_map(ce->state);
> +	vaddr = i915_gem_object_pin_map(ce->state, I915_MAP_WB);
>   	if (IS_ERR(vaddr)) {
>   		ret = PTR_ERR(vaddr);
>   		goto unpin_ctx_obj;
> @@ -1755,7 +1755,7 @@ lrc_setup_hws(struct intel_engine_cs *engine,
>   	/* The HWSP is part of the default context object in LRC mode. */
>   	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) +
>   				       LRC_PPHWSP_PN * PAGE_SIZE;
> -	hws = i915_gem_object_pin_map(dctx_obj);
> +	hws = i915_gem_object_pin_map(dctx_obj, I915_MAP_WB);
>   	if (IS_ERR(hws))
>   		return PTR_ERR(hws);
>   	engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
> @@ -1968,7 +1968,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>   		return ret;
>   	}
>
> -	vaddr = i915_gem_object_pin_map(ctx_obj);
> +	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
>   	if (IS_ERR(vaddr)) {
>   		ret = PTR_ERR(vaddr);
>   		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
> @@ -2189,7 +2189,7 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv,
>   		if (!ctx_obj)
>   			continue;
>
> -		vaddr = i915_gem_object_pin_map(ctx_obj);
> +		vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
>   		if (WARN_ON(IS_ERR(vaddr)))
>   			continue;
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index e8fa26c..69ec5da 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1951,7 +1951,7 @@ int intel_ring_pin(struct intel_ring *ring)
>   		if (ret)
>   			goto err_unpin;
>
> -		addr = i915_gem_object_pin_map(obj);
> +		addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
>   		if (IS_ERR(addr)) {
>   			ret = PTR_ERR(addr);
>   			goto err_unpin;
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
akash.goel@intel.com Aug. 12, 2016, 3:13 p.m. UTC | #2
On 8/12/2016 4:19 PM, Tvrtko Ursulin wrote:
>
> On 12/08/16 07:25, akash.goel@intel.com wrote:
>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> vmaps has a provision for controlling the page protection bits, with
>> which
>> we can use to control the mapping type, e.g. WB, WC, UC or even WT.
>> To allow the caller to choose their mapping type, we add a parameter to
>> i915_gem_object_pin_map - but we still only allow one vmap to be cached
>> per object. If the object is currently not pinned, then we recreate the
>> previous vmap with the new access type, but if it was pinned we report an
>> error. This effectively limits the access via i915_gem_object_pin_map
>> to a
>> single mapping type for the lifetime of the object. Not usually a
>> problem,
>> but something to be aware of when setting up the object's vmap.
>>
>> We will want to vary the access type to enable WC mappings of ringbuffer
>> and context objects on !llc platforms, as well as other objects where we
>> need coherent access to the GPU's pages without going through the GTT
>>
>> v2: Remove the redundant braces around pin count check and fix the marker
>>      in documentation (Chris)
>>
>> v3:
>> - Add a new enum for the vmalloc mapping type & pass that as an
>> argument to
>>    i915_object_pin_map. (Tvrtko)
>> - Use PAGE_MASK to extract or filter the mapping type info and remove a
>>    superfluous BUG_ON.(Tvrtko)
>>
>> v4:
>> - Rename the enums and clean up the pin_map function. (Chris)
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Akash Goel <akash.goel@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h            |  9 ++++-
>>   drivers/gpu/drm/i915/i915_gem.c            | 58
>> +++++++++++++++++++++++-------
>>   drivers/gpu/drm/i915/i915_gem_dmabuf.c     |  2 +-
>>   drivers/gpu/drm/i915/i915_guc_submission.c |  2 +-
>>   drivers/gpu/drm/i915/intel_lrc.c           |  8 ++---
>>   drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 +-
>>   6 files changed, 60 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 4bd3790..6603812 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -834,6 +834,11 @@ enum i915_cache_level {
>>       I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
>>   };
>>
>> +enum i915_map_type {
>> +    I915_MAP_WB = 0,
>> +    I915_MAP_WC,
>> +};
>> +
>>   struct i915_ctx_hang_stats {
>>       /* This context had batch pending when hang was declared */
>>       unsigned batch_pending;
>> @@ -3150,6 +3155,7 @@ static inline void
>> i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>>   /**
>>    * i915_gem_object_pin_map - return a contiguous mapping of the
>> entire object
>>    * @obj - the object to map into kernel address space
>> + * @map_type - whether the vmalloc mapping should be using WC or WB
>> pgprot_t
>>    *
>>    * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
>>    * pages and then returns a contiguous mapping of the backing
>> storage into
>> @@ -3161,7 +3167,8 @@ static inline void
>> i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>>    * Returns the pointer through which to access the mapped object, or an
>>    * ERR_PTR() on error.
>>    */
>> -void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object
>> *obj);
>> +void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object
>> *obj,
>> +                    enum i915_map_type map_type);
>>
>>   /**
>>    * i915_gem_object_unpin_map - releases an earlier mapping
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index 03548db..7dabbc3f 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2077,10 +2077,11 @@ i915_gem_object_put_pages(struct
>> drm_i915_gem_object *obj)
>>       list_del(&obj->global_list);
>>
>>       if (obj->mapping) {
>> -        if (is_vmalloc_addr(obj->mapping))
>> -            vunmap(obj->mapping);
>> +        void *ptr = (void *)((uintptr_t)obj->mapping & PAGE_MASK);
>> +        if (is_vmalloc_addr(ptr))
>> +            vunmap(ptr);
>>           else
>> -            kunmap(kmap_to_page(obj->mapping));
>> +            kunmap(kmap_to_page(ptr));
>>           obj->mapping = NULL;
>>       }
>>
>> @@ -2253,7 +2254,8 @@ i915_gem_object_get_pages(struct
>> drm_i915_gem_object *obj)
>>   }
>>
>>   /* The 'mapping' part of i915_gem_object_pin_map() below */
>> -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
>> +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
>> +                 enum i915_map_type type)
>>   {
>>       unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
>>       struct sg_table *sgt = obj->pages;
>> @@ -2263,9 +2265,10 @@ static void *i915_gem_object_map(const struct
>> drm_i915_gem_object *obj)
>>       struct page **pages = stack_pages;
>>       unsigned long i = 0;
>>       void *addr;
>> +    bool use_wc = (type == I915_MAP_WC);
>>
>>       /* A single page can always be kmapped */
>> -    if (n_pages == 1)
>> +    if (n_pages == 1 && !use_wc)
>>           return kmap(sg_page(sgt->sgl));
>>
>>       if (n_pages > ARRAY_SIZE(stack_pages)) {
>> @@ -2281,7 +2284,8 @@ static void *i915_gem_object_map(const struct
>> drm_i915_gem_object *obj)
>>       /* Check that we have the expected number of pages */
>>       GEM_BUG_ON(i != n_pages);
>>
>> -    addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
>> +    addr = vmap(pages, n_pages, VM_NO_GUARD,
>
> Unreleated and unmentioned change to no guard page. Best to remove IMHO.
> Can keep the RB in that case.

Though its not called out, sorry for that, but isn't it better to avoid 
using the guard page, which will save 4KB of vmalloc virtual space 
(which is scarce) for every mapping created by Driver.

Updating the commit message would be fine to mention about this ?.

Best regards
Akash

>
>> +            use_wc ? pgprot_writecombine(PAGE_KERNEL_IO) : PAGE_KERNEL);
>>
>>       if (pages != stack_pages)
>>           drm_free_large(pages);
>> @@ -2290,11 +2294,16 @@ static void *i915_gem_object_map(const struct
>> drm_i915_gem_object *obj)
>>   }
>>
>>   /* get, pin, and map the pages of the object into kernel space */
>> -void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
>> +void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>> +                  enum i915_map_type type)
>>   {
>> +    enum i915_map_type has_type;
>> +    bool pinned;
>> +    void *ptr;
>>       int ret;
>>
>>       lockdep_assert_held(&obj->base.dev->struct_mutex);
>> +    GEM_BUG_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) ==
>> 0);
>>
>>       ret = i915_gem_object_get_pages(obj);
>>       if (ret)
>> @@ -2302,15 +2311,38 @@ void *i915_gem_object_pin_map(struct
>> drm_i915_gem_object *obj)
>>
>>       i915_gem_object_pin_pages(obj);
>>
>> -    if (!obj->mapping) {
>> -        obj->mapping = i915_gem_object_map(obj);
>> -        if (!obj->mapping) {
>> -            i915_gem_object_unpin_pages(obj);
>> -            return ERR_PTR(-ENOMEM);
>> +    pinned = obj->pages_pin_count > 1;
>> +    ptr = (void *)((uintptr_t)obj->mapping & PAGE_MASK);
>> +    has_type = (uintptr_t)obj->mapping & ~PAGE_MASK;
>> +
>> +    if (ptr && has_type != type) {
>> +        if (pinned) {
>> +            ret = -EBUSY;
>> +            goto err;
>> +        }
>> +
>> +        if (is_vmalloc_addr(ptr))
>> +            vunmap(ptr);
>> +        else
>> +            kunmap(kmap_to_page(ptr));
>> +        ptr = obj->mapping = NULL;
>> +    }
>> +
>> +    if (!ptr) {
>> +        ptr = i915_gem_object_map(obj, type);
>> +        if (!ptr) {
>> +            ret = -ENOMEM;
>> +            goto err;
>>           }
>> +
>> +        obj->mapping = (void *)((uintptr_t)ptr | type);
>>       }
>>
>> -    return obj->mapping;
>> +    return ptr;
>> +
>> +err:
>> +    i915_gem_object_unpin_pages(obj);
>> +    return ERR_PTR(ret);
>>   }
>>
>>   static void
>> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
>> b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
>> index c60a8d5b..10265bb 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
>> @@ -119,7 +119,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf
>> *dma_buf)
>>       if (ret)
>>           return ERR_PTR(ret);
>>
>> -    addr = i915_gem_object_pin_map(obj);
>> +    addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
>>       mutex_unlock(&dev->struct_mutex);
>>
>>       return addr;
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 041cf68..1d58d36 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -1178,7 +1178,7 @@ static int guc_create_log_extras(struct
>> intel_guc *guc)
>>
>>       if (!guc->log.buf_addr) {
>>           /* Create a vmalloc mapping of log buffer pages */
>> -        vaddr = i915_gem_object_pin_map(guc->log.obj);
>> +        vaddr = i915_gem_object_pin_map(guc->log.obj, I915_MAP_WB);
>>           if (IS_ERR(vaddr)) {
>>               ret = PTR_ERR(vaddr);
>>               DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index c7f4b64..c24ac39 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -780,7 +780,7 @@ static int intel_lr_context_pin(struct
>> i915_gem_context *ctx,
>>       if (ret)
>>           goto err;
>>
>> -    vaddr = i915_gem_object_pin_map(ce->state);
>> +    vaddr = i915_gem_object_pin_map(ce->state, I915_MAP_WB);
>>       if (IS_ERR(vaddr)) {
>>           ret = PTR_ERR(vaddr);
>>           goto unpin_ctx_obj;
>> @@ -1755,7 +1755,7 @@ lrc_setup_hws(struct intel_engine_cs *engine,
>>       /* The HWSP is part of the default context object in LRC mode. */
>>       engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) +
>>                          LRC_PPHWSP_PN * PAGE_SIZE;
>> -    hws = i915_gem_object_pin_map(dctx_obj);
>> +    hws = i915_gem_object_pin_map(dctx_obj, I915_MAP_WB);
>>       if (IS_ERR(hws))
>>           return PTR_ERR(hws);
>>       engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
>> @@ -1968,7 +1968,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>>           return ret;
>>       }
>>
>> -    vaddr = i915_gem_object_pin_map(ctx_obj);
>> +    vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
>>       if (IS_ERR(vaddr)) {
>>           ret = PTR_ERR(vaddr);
>>           DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
>> @@ -2189,7 +2189,7 @@ void intel_lr_context_reset(struct
>> drm_i915_private *dev_priv,
>>           if (!ctx_obj)
>>               continue;
>>
>> -        vaddr = i915_gem_object_pin_map(ctx_obj);
>> +        vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
>>           if (WARN_ON(IS_ERR(vaddr)))
>>               continue;
>>
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index e8fa26c..69ec5da 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -1951,7 +1951,7 @@ int intel_ring_pin(struct intel_ring *ring)
>>           if (ret)
>>               goto err_unpin;
>>
>> -        addr = i915_gem_object_pin_map(obj);
>> +        addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
>>           if (IS_ERR(addr)) {
>>               ret = PTR_ERR(addr);
>>               goto err_unpin;
>>
>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Regards,
>
> Tvrtko
Chris Wilson Aug. 12, 2016, 3:16 p.m. UTC | #3
On Fri, Aug 12, 2016 at 08:43:58PM +0530, Goel, Akash wrote:
> On 8/12/2016 4:19 PM, Tvrtko Ursulin wrote:
> >Unreleated and unmentioned change to no guard page. Best to remove IMHO.
> >Can keep the RB in that case.
> 
> Though its not called out, sorry for that, but isn't it better to
> avoid using the guard page, which will save 4KB of vmalloc virtual
> space (which is scarce) for every mapping created by Driver.
> 
> Updating the commit message would be fine to mention about this ?.

Too late, already applied without the new flag.

Yes, that's why I dropped the guard page when I found out it was being
added. Send a patch to add the flag and we can discuss whether we think
our code is adequate to not require the protection.
-Chris
akash.goel@intel.com Aug. 12, 2016, 4:46 p.m. UTC | #4
On 8/12/2016 8:46 PM, Chris Wilson wrote:
> On Fri, Aug 12, 2016 at 08:43:58PM +0530, Goel, Akash wrote:
>> On 8/12/2016 4:19 PM, Tvrtko Ursulin wrote:
>>> Unreleated and unmentioned change to no guard page. Best to remove IMHO.
>>> Can keep the RB in that case.
>>
>> Though its not called out, sorry for that, but isn't it better to
>> avoid using the guard page, which will save 4KB of vmalloc virtual
>> space (which is scarce) for every mapping created by Driver.
>>
>> Updating the commit message would be fine to mention about this ?.
>
> Too late, already applied without the new flag.
>
ohh, the patch is already queued for merge ?

> Yes, that's why I dropped the guard page when I found out it was being
> added. Send a patch to add the flag and we can discuss whether we think
> our code is adequate to not require the protection.

Fine, will prepare a separate patch to avoid using the guard page.

Best regards
Akash

> -Chris
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4bd3790..6603812 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -834,6 +834,11 @@  enum i915_cache_level {
 	I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
 };
 
+enum i915_map_type {
+	I915_MAP_WB = 0,
+	I915_MAP_WC,
+};
+
 struct i915_ctx_hang_stats {
 	/* This context had batch pending when hang was declared */
 	unsigned batch_pending;
@@ -3150,6 +3155,7 @@  static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 /**
  * i915_gem_object_pin_map - return a contiguous mapping of the entire object
  * @obj - the object to map into kernel address space
+ * @map_type - whether the vmalloc mapping should be using WC or WB pgprot_t
  *
  * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
  * pages and then returns a contiguous mapping of the backing storage into
@@ -3161,7 +3167,8 @@  static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
  * Returns the pointer through which to access the mapped object, or an
  * ERR_PTR() on error.
  */
-void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj);
+void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+					enum i915_map_type map_type);
 
 /**
  * i915_gem_object_unpin_map - releases an earlier mapping
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 03548db..7dabbc3f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2077,10 +2077,11 @@  i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 	list_del(&obj->global_list);
 
 	if (obj->mapping) {
-		if (is_vmalloc_addr(obj->mapping))
-			vunmap(obj->mapping);
+		void *ptr = (void *)((uintptr_t)obj->mapping & PAGE_MASK);
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
 		else
-			kunmap(kmap_to_page(obj->mapping));
+			kunmap(kmap_to_page(ptr));
 		obj->mapping = NULL;
 	}
 
@@ -2253,7 +2254,8 @@  i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 }
 
 /* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+				 enum i915_map_type type)
 {
 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
 	struct sg_table *sgt = obj->pages;
@@ -2263,9 +2265,10 @@  static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 	struct page **pages = stack_pages;
 	unsigned long i = 0;
 	void *addr;
+	bool use_wc = (type == I915_MAP_WC);
 
 	/* A single page can always be kmapped */
-	if (n_pages == 1)
+	if (n_pages == 1 && !use_wc)
 		return kmap(sg_page(sgt->sgl));
 
 	if (n_pages > ARRAY_SIZE(stack_pages)) {
@@ -2281,7 +2284,8 @@  static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 	/* Check that we have the expected number of pages */
 	GEM_BUG_ON(i != n_pages);
 
-	addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+	addr = vmap(pages, n_pages, VM_NO_GUARD,
+		    use_wc ? pgprot_writecombine(PAGE_KERNEL_IO) : PAGE_KERNEL);
 
 	if (pages != stack_pages)
 		drm_free_large(pages);
@@ -2290,11 +2294,16 @@  static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 }
 
 /* get, pin, and map the pages of the object into kernel space */
-void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
+void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+			      enum i915_map_type type)
 {
+	enum i915_map_type has_type;
+	bool pinned;
+	void *ptr;
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	GEM_BUG_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0);
 
 	ret = i915_gem_object_get_pages(obj);
 	if (ret)
@@ -2302,15 +2311,38 @@  void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
 
 	i915_gem_object_pin_pages(obj);
 
-	if (!obj->mapping) {
-		obj->mapping = i915_gem_object_map(obj);
-		if (!obj->mapping) {
-			i915_gem_object_unpin_pages(obj);
-			return ERR_PTR(-ENOMEM);
+	pinned = obj->pages_pin_count > 1;
+	ptr = (void *)((uintptr_t)obj->mapping & PAGE_MASK);
+	has_type = (uintptr_t)obj->mapping & ~PAGE_MASK;
+
+	if (ptr && has_type != type) {
+		if (pinned) {
+			ret = -EBUSY;
+			goto err;
+		}
+
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
+		else
+			kunmap(kmap_to_page(ptr));
+		ptr = obj->mapping = NULL;
+	}
+
+	if (!ptr) {
+		ptr = i915_gem_object_map(obj, type);
+		if (!ptr) {
+			ret = -ENOMEM;
+			goto err;
 		}
+
+		obj->mapping = (void *)((uintptr_t)ptr | type);
 	}
 
-	return obj->mapping;
+	return ptr;
+
+err:
+	i915_gem_object_unpin_pages(obj);
+	return ERR_PTR(ret);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index c60a8d5b..10265bb 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -119,7 +119,7 @@  static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 	if (ret)
 		return ERR_PTR(ret);
 
-	addr = i915_gem_object_pin_map(obj);
+	addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
 	mutex_unlock(&dev->struct_mutex);
 
 	return addr;
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 041cf68..1d58d36 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -1178,7 +1178,7 @@  static int guc_create_log_extras(struct intel_guc *guc)
 
 	if (!guc->log.buf_addr) {
 		/* Create a vmalloc mapping of log buffer pages */
-		vaddr = i915_gem_object_pin_map(guc->log.obj);
+		vaddr = i915_gem_object_pin_map(guc->log.obj, I915_MAP_WB);
 		if (IS_ERR(vaddr)) {
 			ret = PTR_ERR(vaddr);
 			DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c7f4b64..c24ac39 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -780,7 +780,7 @@  static int intel_lr_context_pin(struct i915_gem_context *ctx,
 	if (ret)
 		goto err;
 
-	vaddr = i915_gem_object_pin_map(ce->state);
+	vaddr = i915_gem_object_pin_map(ce->state, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
 		goto unpin_ctx_obj;
@@ -1755,7 +1755,7 @@  lrc_setup_hws(struct intel_engine_cs *engine,
 	/* The HWSP is part of the default context object in LRC mode. */
 	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) +
 				       LRC_PPHWSP_PN * PAGE_SIZE;
-	hws = i915_gem_object_pin_map(dctx_obj);
+	hws = i915_gem_object_pin_map(dctx_obj, I915_MAP_WB);
 	if (IS_ERR(hws))
 		return PTR_ERR(hws);
 	engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
@@ -1968,7 +1968,7 @@  populate_lr_context(struct i915_gem_context *ctx,
 		return ret;
 	}
 
-	vaddr = i915_gem_object_pin_map(ctx_obj);
+	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
 		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
@@ -2189,7 +2189,7 @@  void intel_lr_context_reset(struct drm_i915_private *dev_priv,
 		if (!ctx_obj)
 			continue;
 
-		vaddr = i915_gem_object_pin_map(ctx_obj);
+		vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
 		if (WARN_ON(IS_ERR(vaddr)))
 			continue;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e8fa26c..69ec5da 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1951,7 +1951,7 @@  int intel_ring_pin(struct intel_ring *ring)
 		if (ret)
 			goto err_unpin;
 
-		addr = i915_gem_object_pin_map(obj);
+		addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
 		if (IS_ERR(addr)) {
 			ret = PTR_ERR(addr);
 			goto err_unpin;