Message ID | 1468158084-22028-12-git-send-email-akash.goel@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 10/07/16 14:41, akash.goel@intel.com wrote: > From: Chris Wilson <chris@chris-wilson.co.uk> > > vmaps has a provision for controlling the page protection bits, with which > we can use to control the mapping type, e.g. WB, WC, UC or even WT. > To allow the caller to choose their mapping type, we add a parameter to > i915_gem_object_pin_map - but we still only allow one vmap to be cached > per object. If the object is currently not pinned, then we recreate the > previous vmap with the new access type, but if it was pinned we report an > error. This effectively limits the access via i915_gem_object_pin_map to a > single mapping type for the lifetime of the object. Not usually a problem, > but something to be aware of when setting up the object's vmap. > > We will want to vary the access type to enable WC mappings of ringbuffer > and context objects on !llc platforms, as well as other objects where we > need coherent access to the GPU's pages without going through the GTT > > v2: Remove the redundant braces around pin count check and fix the marker > in documentation (Chris) > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Signed-off-by: Akash Goel <akash.goel@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 4 ++- > drivers/gpu/drm/i915/i915_gem.c | 57 +++++++++++++++++++++++------- > drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 +- > drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- > drivers/gpu/drm/i915/intel_lrc.c | 8 ++--- > drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- > 6 files changed, 54 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 6e2ddfa..84afa17 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -3248,6 +3248,7 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) > /** > * i915_gem_object_pin_map - return a contiguous mapping of the entire object > * @obj - the object to map into kernel address space > + * @use_wc - whether the mapping should be using WC or WB pgprot_t > * > * Calls i915_gem_object_pin_pages() to prevent reaping of the object's > * pages and then returns a contiguous mapping of the backing storage into > @@ -3259,7 +3260,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) > * Returns the pointer through which to access the mapped object, or an > * ERR_PTR() on error. > */ > -void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj); > +void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj, > + bool use_wc); Could you make it an enum instead of a bool? Commit message suggests more modes will potentially be added and if so, and we start with an enum straight away, it will make for less churn in the future. func(something, true) is always also quite unreadabe in the code because one has to remember or remind himself what it really means. Something like func(something, MAP_WC) would be simply self-documenting. > > /** > * i915_gem_object_unpin_map - releases an earlier mapping > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 8f50919..c431b40 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2471,10 +2471,11 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) > list_del(&obj->global_list); > > if (obj->mapping) { > - if (is_vmalloc_addr(obj->mapping)) > - vunmap(obj->mapping); > + void *ptr = (void *)((uintptr_t)obj->mapping & ~1); How many bits we have to play with here? Is there a suitable define somewhere we could use for a mask instead of hardcoded "1" or we could add one if you think that would be better? > + if (is_vmalloc_addr(ptr)) > + vunmap(ptr); > else > - kunmap(kmap_to_page(obj->mapping)); > + kunmap(kmap_to_page(ptr)); > obj->mapping = NULL; > } > > @@ -2647,7 +2648,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) > } > > /* The 'mapping' part of i915_gem_object_pin_map() below */ > -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) > +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, > + bool use_wc) > { > unsigned long n_pages = obj->base.size >> PAGE_SHIFT; > struct sg_table *sgt = obj->pages; > @@ -2659,7 +2661,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) > void *addr; > > /* A single page can always be kmapped */ > - if (n_pages == 1) > + if (n_pages == 1 && !use_wc) > return kmap(sg_page(sgt->sgl)); > > if (n_pages > ARRAY_SIZE(stack_pages)) { > @@ -2675,7 +2677,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) > /* Check that we have the expected number of pages */ > GEM_BUG_ON(i != n_pages); > > - addr = vmap(pages, n_pages, 0, PAGE_KERNEL); > + addr = vmap(pages, n_pages, VM_NO_GUARD, > + use_wc ? pgprot_writecombine(PAGE_KERNEL_IO) : PAGE_KERNEL); For educational benefit, what is the importance and difference between PAGE_KERNEL and PAGE_KERNEL_IO here? > > if (pages != stack_pages) > drm_free_large(pages); > @@ -2684,27 +2687,55 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) > } > > /* get, pin, and map the pages of the object into kernel space */ > -void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) > +void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, bool use_wc) > { > + void *ptr; > + bool has_wc; > + bool pinned; > int ret; > > lockdep_assert_held(&obj->base.dev->struct_mutex); > + GEM_BUG_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0); > > ret = i915_gem_object_get_pages(obj); > if (ret) > return ERR_PTR(ret); > > + GEM_BUG_ON(obj->pages == NULL); Looks like belongs to i915_gem_object_get_pages and not to callers. > i915_gem_object_pin_pages(obj); > > - if (!obj->mapping) { > - obj->mapping = i915_gem_object_map(obj); > - if (!obj->mapping) { > - i915_gem_object_unpin_pages(obj); > - return ERR_PTR(-ENOMEM); > + pinned = obj->pages_pin_count > 1; > + ptr = (void *)((uintptr_t)obj->mapping & ~1); > + has_wc = (uintptr_t)obj->mapping & 1; > + > + if (ptr && has_wc != use_wc) { > + if (pinned) { > + ret = -EBUSY; > + goto err; > + } > + > + if (is_vmalloc_addr(ptr)) > + vunmap(ptr); > + else > + kunmap(kmap_to_page(ptr)); > + ptr = obj->mapping = NULL; > + } > + > + if (!ptr) { > + ptr = i915_gem_object_map(obj, use_wc); > + if (!ptr) { > + ret = -ENOMEM; > + goto err; > } > + > + obj->mapping = (void *)((uintptr_t)ptr | use_wc); > } > > - return obj->mapping; > + return ptr; > + > +err: > + i915_gem_object_unpin_pages(obj); > + return ERR_PTR(ret); > } > > void i915_vma_move_to_active(struct i915_vma *vma, > diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c > index 80bbe43..edcadce 100644 > --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c > +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c > @@ -115,7 +115,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) > if (ret) > return ERR_PTR(ret); > > - addr = i915_gem_object_pin_map(obj); > + addr = i915_gem_object_pin_map(obj, false); > mutex_unlock(&dev->struct_mutex); > > return addr; > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index 009d7c0..c468619 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -1096,7 +1096,7 @@ static int guc_create_log_extras(struct intel_guc *guc) > > if (!guc->log.buf_addr) { > /* Create a vmalloc mapping of log buffer pages */ > - vaddr = i915_gem_object_pin_map(guc->log.obj); > + vaddr = i915_gem_object_pin_map(guc->log.obj, false); > if (IS_ERR(vaddr)) { > ret = PTR_ERR(vaddr); > DRM_ERROR("Couldn't map log buffer pages %d\n", ret); > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 70c6990..0d41047 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -971,7 +971,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, > if (ret) > goto err; > > - vaddr = i915_gem_object_pin_map(ce->state); > + vaddr = i915_gem_object_pin_map(ce->state, false); > if (IS_ERR(vaddr)) { > ret = PTR_ERR(vaddr); > goto unpin_ctx_obj; > @@ -1993,7 +1993,7 @@ lrc_setup_hws(struct intel_engine_cs *engine, > /* The HWSP is part of the default context object in LRC mode. */ > engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) + > LRC_PPHWSP_PN * PAGE_SIZE; > - hws = i915_gem_object_pin_map(dctx_obj); > + hws = i915_gem_object_pin_map(dctx_obj, false); > if (IS_ERR(hws)) > return PTR_ERR(hws); > engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE; > @@ -2324,7 +2324,7 @@ populate_lr_context(struct i915_gem_context *ctx, > return ret; > } > > - vaddr = i915_gem_object_pin_map(ctx_obj); > + vaddr = i915_gem_object_pin_map(ctx_obj, false); > if (IS_ERR(vaddr)) { > ret = PTR_ERR(vaddr); > DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); > @@ -2558,7 +2558,7 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv, > if (!ctx_obj) > continue; > > - vaddr = i915_gem_object_pin_map(ctx_obj); > + vaddr = i915_gem_object_pin_map(ctx_obj, false); > if (WARN_ON(IS_ERR(vaddr))) > continue; > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 736ddba..a195f65 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -2007,7 +2007,7 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, > if (ret) > goto err_unpin; > > - addr = i915_gem_object_pin_map(obj); > + addr = i915_gem_object_pin_map(obj, false); > if (IS_ERR(addr)) { > ret = PTR_ERR(addr); > goto err_unpin; > The rest looks fine to me. Regards, Tvrtko
On Fri, Jul 15, 2016 at 12:31:23PM +0100, Tvrtko Ursulin wrote: [snip good ideas, leaving the questions] > > /** > > * i915_gem_object_unpin_map - releases an earlier mapping > >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > >index 8f50919..c431b40 100644 > >--- a/drivers/gpu/drm/i915/i915_gem.c > >+++ b/drivers/gpu/drm/i915/i915_gem.c > >@@ -2471,10 +2471,11 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) > > list_del(&obj->global_list); > > > > if (obj->mapping) { > >- if (is_vmalloc_addr(obj->mapping)) > >- vunmap(obj->mapping); > >+ void *ptr = (void *)((uintptr_t)obj->mapping & ~1); > > How many bits we have to play with here? Is there a suitable define > somewhere we could use for a mask instead of hardcoded "1" or we > could add one if you think that would be better? The mapping is always page-aligned, so bits 0-11 are available. PAGE_MASK should do the trick > >- addr = vmap(pages, n_pages, 0, PAGE_KERNEL); > >+ addr = vmap(pages, n_pages, VM_NO_GUARD, > >+ use_wc ? pgprot_writecombine(PAGE_KERNEL_IO) : PAGE_KERNEL); > > For educational benefit, what is the importance and difference > between PAGE_KERNEL and PAGE_KERNEL_IO here? One day I'll find out. We've always tagged our WC mmapings as PAGE_KERNEL_IO, so cargo-culted it in. > > > > if (pages != stack_pages) > > drm_free_large(pages); > >@@ -2684,27 +2687,55 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) > > } > > > > /* get, pin, and map the pages of the object into kernel space */ > >-void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) > >+void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, bool use_wc) > > { > >+ void *ptr; > >+ bool has_wc; > >+ bool pinned; > > int ret; > > > > lockdep_assert_held(&obj->base.dev->struct_mutex); > >+ GEM_BUG_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0); > > > > ret = i915_gem_object_get_pages(obj); > > if (ret) > > return ERR_PTR(ret); > > > >+ GEM_BUG_ON(obj->pages == NULL); > > Looks like belongs to i915_gem_object_get_pages and not to callers. This is from later where it did: pinned = true; if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { ret = ____i915_gem_object_get_pages(obj); if (ret) goto err_unlock; smp_mb__before_atomic(); GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count)); atomic_set(&obj->mm.pages_pin_count, 1); pinned = false; } GEM_BUG_ON(obj->mm.pages == NULL); Right now the BUG_ON is superfluous. -Chris
On 7/15/2016 5:01 PM, Tvrtko Ursulin wrote: > > On 10/07/16 14:41, akash.goel@intel.com wrote: >> From: Chris Wilson <chris@chris-wilson.co.uk> >> >> vmaps has a provision for controlling the page protection bits, with >> which >> we can use to control the mapping type, e.g. WB, WC, UC or even WT. >> To allow the caller to choose their mapping type, we add a parameter to >> i915_gem_object_pin_map - but we still only allow one vmap to be cached >> per object. If the object is currently not pinned, then we recreate the >> previous vmap with the new access type, but if it was pinned we report an >> error. This effectively limits the access via i915_gem_object_pin_map >> to a >> single mapping type for the lifetime of the object. Not usually a >> problem, >> but something to be aware of when setting up the object's vmap. >> >> We will want to vary the access type to enable WC mappings of ringbuffer >> and context objects on !llc platforms, as well as other objects where we >> need coherent access to the GPU's pages without going through the GTT >> >> v2: Remove the redundant braces around pin count check and fix the marker >> in documentation (Chris) >> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >> Signed-off-by: Akash Goel <akash.goel@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 4 ++- >> drivers/gpu/drm/i915/i915_gem.c | 57 >> +++++++++++++++++++++++------- >> drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 +- >> drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- >> drivers/gpu/drm/i915/intel_lrc.c | 8 ++--- >> drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- >> 6 files changed, 54 insertions(+), 21 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h >> b/drivers/gpu/drm/i915/i915_drv.h >> index 6e2ddfa..84afa17 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -3248,6 +3248,7 @@ static inline void >> i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) >> /** >> * i915_gem_object_pin_map - return a contiguous mapping of the >> entire object >> * @obj - the object to map into kernel address space >> + * @use_wc - whether the mapping should be using WC or WB pgprot_t >> * >> * Calls i915_gem_object_pin_pages() to prevent reaping of the object's >> * pages and then returns a contiguous mapping of the backing >> storage into >> @@ -3259,7 +3260,8 @@ static inline void >> i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) >> * Returns the pointer through which to access the mapped object, or an >> * ERR_PTR() on error. >> */ >> -void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object >> *obj); >> +void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object >> *obj, >> + bool use_wc); > > Could you make it an enum instead of a bool? Commit message suggests > more modes will potentially be added and if so, and we start with an > enum straight away, it will make for less churn in the future. > > func(something, true) is always also quite unreadabe in the code because > one has to remember or remind himself what it really means. > > Something like func(something, MAP_WC) would be simply self-documenting. > Thanks nice suggestion, will do that. enum only or macros also will do ? #define MAP_CACHED 0x1 #define MAP_WC 0x2 >> >> /** >> * i915_gem_object_unpin_map - releases an earlier mapping >> diff --git a/drivers/gpu/drm/i915/i915_gem.c >> b/drivers/gpu/drm/i915/i915_gem.c >> index 8f50919..c431b40 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -2471,10 +2471,11 @@ i915_gem_object_put_pages(struct >> drm_i915_gem_object *obj) >> list_del(&obj->global_list); >> >> if (obj->mapping) { >> - if (is_vmalloc_addr(obj->mapping)) >> - vunmap(obj->mapping); >> + void *ptr = (void *)((uintptr_t)obj->mapping & ~1); > > How many bits we have to play with here? Is there a suitable define > somewhere we could use for a mask instead of hardcoded "1" or we could > add one if you think that would be better? As Chris said, will use PAGE_MASK. > >> + if (is_vmalloc_addr(ptr)) >> + vunmap(ptr); >> else >> - kunmap(kmap_to_page(obj->mapping)); >> + kunmap(kmap_to_page(ptr)); >> obj->mapping = NULL; >> } >> >> @@ -2647,7 +2648,8 @@ i915_gem_object_get_pages(struct >> drm_i915_gem_object *obj) >> } >> >> /* The 'mapping' part of i915_gem_object_pin_map() below */ >> -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) >> +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, >> + bool use_wc) >> { >> unsigned long n_pages = obj->base.size >> PAGE_SHIFT; >> struct sg_table *sgt = obj->pages; >> @@ -2659,7 +2661,7 @@ static void *i915_gem_object_map(const struct >> drm_i915_gem_object *obj) >> void *addr; >> >> /* A single page can always be kmapped */ >> - if (n_pages == 1) >> + if (n_pages == 1 && !use_wc) >> return kmap(sg_page(sgt->sgl)); >> >> if (n_pages > ARRAY_SIZE(stack_pages)) { >> @@ -2675,7 +2677,8 @@ static void *i915_gem_object_map(const struct >> drm_i915_gem_object *obj) >> /* Check that we have the expected number of pages */ >> GEM_BUG_ON(i != n_pages); >> >> - addr = vmap(pages, n_pages, 0, PAGE_KERNEL); >> + addr = vmap(pages, n_pages, VM_NO_GUARD, >> + use_wc ? pgprot_writecombine(PAGE_KERNEL_IO) : PAGE_KERNEL); > > For educational benefit, what is the importance and difference between > PAGE_KERNEL and PAGE_KERNEL_IO here? > >> >> if (pages != stack_pages) >> drm_free_large(pages); >> @@ -2684,27 +2687,55 @@ static void *i915_gem_object_map(const struct >> drm_i915_gem_object *obj) >> } >> >> /* get, pin, and map the pages of the object into kernel space */ >> -void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) >> +void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, bool >> use_wc) >> { >> + void *ptr; >> + bool has_wc; >> + bool pinned; >> int ret; >> >> lockdep_assert_held(&obj->base.dev->struct_mutex); >> + GEM_BUG_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == >> 0); >> >> ret = i915_gem_object_get_pages(obj); >> if (ret) >> return ERR_PTR(ret); >> >> + GEM_BUG_ON(obj->pages == NULL); > > Looks like belongs to i915_gem_object_get_pages and not to callers. > As Chris said, this is superfluous, so will remove it. Best regards Akash >> i915_gem_object_pin_pages(obj); >> >> - if (!obj->mapping) { >> - obj->mapping = i915_gem_object_map(obj); >> - if (!obj->mapping) { >> - i915_gem_object_unpin_pages(obj); >> - return ERR_PTR(-ENOMEM); >> + pinned = obj->pages_pin_count > 1; >> + ptr = (void *)((uintptr_t)obj->mapping & ~1); >> + has_wc = (uintptr_t)obj->mapping & 1; >> + >> + if (ptr && has_wc != use_wc) { >> + if (pinned) { >> + ret = -EBUSY; >> + goto err; >> + } >> + >> + if (is_vmalloc_addr(ptr)) >> + vunmap(ptr); >> + else >> + kunmap(kmap_to_page(ptr)); >> + ptr = obj->mapping = NULL; >> + } >> + >> + if (!ptr) { >> + ptr = i915_gem_object_map(obj, use_wc); >> + if (!ptr) { >> + ret = -ENOMEM; >> + goto err; >> } >> + >> + obj->mapping = (void *)((uintptr_t)ptr | use_wc); >> } >> >> - return obj->mapping; >> + return ptr; >> + >> +err: >> + i915_gem_object_unpin_pages(obj); >> + return ERR_PTR(ret); >> } >> >> void i915_vma_move_to_active(struct i915_vma *vma, >> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c >> b/drivers/gpu/drm/i915/i915_gem_dmabuf.c >> index 80bbe43..edcadce 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c >> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c >> @@ -115,7 +115,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf >> *dma_buf) >> if (ret) >> return ERR_PTR(ret); >> >> - addr = i915_gem_object_pin_map(obj); >> + addr = i915_gem_object_pin_map(obj, false); >> mutex_unlock(&dev->struct_mutex); >> >> return addr; >> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c >> b/drivers/gpu/drm/i915/i915_guc_submission.c >> index 009d7c0..c468619 100644 >> --- a/drivers/gpu/drm/i915/i915_guc_submission.c >> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c >> @@ -1096,7 +1096,7 @@ static int guc_create_log_extras(struct >> intel_guc *guc) >> >> if (!guc->log.buf_addr) { >> /* Create a vmalloc mapping of log buffer pages */ >> - vaddr = i915_gem_object_pin_map(guc->log.obj); >> + vaddr = i915_gem_object_pin_map(guc->log.obj, false); >> if (IS_ERR(vaddr)) { >> ret = PTR_ERR(vaddr); >> DRM_ERROR("Couldn't map log buffer pages %d\n", ret); >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c >> b/drivers/gpu/drm/i915/intel_lrc.c >> index 70c6990..0d41047 100644 >> --- a/drivers/gpu/drm/i915/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/intel_lrc.c >> @@ -971,7 +971,7 @@ static int intel_lr_context_pin(struct >> i915_gem_context *ctx, >> if (ret) >> goto err; >> >> - vaddr = i915_gem_object_pin_map(ce->state); >> + vaddr = i915_gem_object_pin_map(ce->state, false); >> if (IS_ERR(vaddr)) { >> ret = PTR_ERR(vaddr); >> goto unpin_ctx_obj; >> @@ -1993,7 +1993,7 @@ lrc_setup_hws(struct intel_engine_cs *engine, >> /* The HWSP is part of the default context object in LRC mode. */ >> engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) + >> LRC_PPHWSP_PN * PAGE_SIZE; >> - hws = i915_gem_object_pin_map(dctx_obj); >> + hws = i915_gem_object_pin_map(dctx_obj, false); >> if (IS_ERR(hws)) >> return PTR_ERR(hws); >> engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE; >> @@ -2324,7 +2324,7 @@ populate_lr_context(struct i915_gem_context *ctx, >> return ret; >> } >> >> - vaddr = i915_gem_object_pin_map(ctx_obj); >> + vaddr = i915_gem_object_pin_map(ctx_obj, false); >> if (IS_ERR(vaddr)) { >> ret = PTR_ERR(vaddr); >> DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); >> @@ -2558,7 +2558,7 @@ void intel_lr_context_reset(struct >> drm_i915_private *dev_priv, >> if (!ctx_obj) >> continue; >> >> - vaddr = i915_gem_object_pin_map(ctx_obj); >> + vaddr = i915_gem_object_pin_map(ctx_obj, false); >> if (WARN_ON(IS_ERR(vaddr))) >> continue; >> >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c >> b/drivers/gpu/drm/i915/intel_ringbuffer.c >> index 736ddba..a195f65 100644 >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c >> @@ -2007,7 +2007,7 @@ int intel_pin_and_map_ringbuffer_obj(struct >> drm_i915_private *dev_priv, >> if (ret) >> goto err_unpin; >> >> - addr = i915_gem_object_pin_map(obj); >> + addr = i915_gem_object_pin_map(obj, false); >> if (IS_ERR(addr)) { >> ret = PTR_ERR(addr); >> goto err_unpin; >> > > The rest looks fine to me. > > Regards, > > Tvrtko
On 15/07/16 17:30, Goel, Akash wrote: > On 7/15/2016 5:01 PM, Tvrtko Ursulin wrote: >> On 10/07/16 14:41, akash.goel@intel.com wrote: >>> From: Chris Wilson <chris@chris-wilson.co.uk> >>> >>> vmaps has a provision for controlling the page protection bits, with >>> which >>> we can use to control the mapping type, e.g. WB, WC, UC or even WT. >>> To allow the caller to choose their mapping type, we add a parameter to >>> i915_gem_object_pin_map - but we still only allow one vmap to be cached >>> per object. If the object is currently not pinned, then we recreate the >>> previous vmap with the new access type, but if it was pinned we >>> report an >>> error. This effectively limits the access via i915_gem_object_pin_map >>> to a >>> single mapping type for the lifetime of the object. Not usually a >>> problem, >>> but something to be aware of when setting up the object's vmap. >>> >>> We will want to vary the access type to enable WC mappings of ringbuffer >>> and context objects on !llc platforms, as well as other objects where we >>> need coherent access to the GPU's pages without going through the GTT >>> >>> v2: Remove the redundant braces around pin count check and fix the >>> marker >>> in documentation (Chris) >>> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>> Signed-off-by: Akash Goel <akash.goel@intel.com> >>> --- >>> drivers/gpu/drm/i915/i915_drv.h | 4 ++- >>> drivers/gpu/drm/i915/i915_gem.c | 57 >>> +++++++++++++++++++++++------- >>> drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 +- >>> drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- >>> drivers/gpu/drm/i915/intel_lrc.c | 8 ++--- >>> drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- >>> 6 files changed, 54 insertions(+), 21 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_drv.h >>> b/drivers/gpu/drm/i915/i915_drv.h >>> index 6e2ddfa..84afa17 100644 >>> --- a/drivers/gpu/drm/i915/i915_drv.h >>> +++ b/drivers/gpu/drm/i915/i915_drv.h >>> @@ -3248,6 +3248,7 @@ static inline void >>> i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) >>> /** >>> * i915_gem_object_pin_map - return a contiguous mapping of the >>> entire object >>> * @obj - the object to map into kernel address space >>> + * @use_wc - whether the mapping should be using WC or WB pgprot_t >>> * >>> * Calls i915_gem_object_pin_pages() to prevent reaping of the >>> object's >>> * pages and then returns a contiguous mapping of the backing >>> storage into >>> @@ -3259,7 +3260,8 @@ static inline void >>> i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) >>> * Returns the pointer through which to access the mapped object, >>> or an >>> * ERR_PTR() on error. >>> */ >>> -void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object >>> *obj); >>> +void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object >>> *obj, >>> + bool use_wc); >> >> Could you make it an enum instead of a bool? Commit message suggests >> more modes will potentially be added and if so, and we start with an >> enum straight away, it will make for less churn in the future. >> >> func(something, true) is always also quite unreadabe in the code because >> one has to remember or remind himself what it really means. >> >> Something like func(something, MAP_WC) would be simply self-documenting. >> > Thanks nice suggestion, will do that. > enum only or macros also will do ? > #define MAP_CACHED 0x1 > #define MAP_WC 0x2 I think enum is better since you then have type checking on the arguments. Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6e2ddfa..84afa17 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3248,6 +3248,7 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) /** * i915_gem_object_pin_map - return a contiguous mapping of the entire object * @obj - the object to map into kernel address space + * @use_wc - whether the mapping should be using WC or WB pgprot_t * * Calls i915_gem_object_pin_pages() to prevent reaping of the object's * pages and then returns a contiguous mapping of the backing storage into @@ -3259,7 +3260,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) * Returns the pointer through which to access the mapped object, or an * ERR_PTR() on error. */ -void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj); +void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj, + bool use_wc); /** * i915_gem_object_unpin_map - releases an earlier mapping diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8f50919..c431b40 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2471,10 +2471,11 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) list_del(&obj->global_list); if (obj->mapping) { - if (is_vmalloc_addr(obj->mapping)) - vunmap(obj->mapping); + void *ptr = (void *)((uintptr_t)obj->mapping & ~1); + if (is_vmalloc_addr(ptr)) + vunmap(ptr); else - kunmap(kmap_to_page(obj->mapping)); + kunmap(kmap_to_page(ptr)); obj->mapping = NULL; } @@ -2647,7 +2648,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) } /* The 'mapping' part of i915_gem_object_pin_map() below */ -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, + bool use_wc) { unsigned long n_pages = obj->base.size >> PAGE_SHIFT; struct sg_table *sgt = obj->pages; @@ -2659,7 +2661,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) void *addr; /* A single page can always be kmapped */ - if (n_pages == 1) + if (n_pages == 1 && !use_wc) return kmap(sg_page(sgt->sgl)); if (n_pages > ARRAY_SIZE(stack_pages)) { @@ -2675,7 +2677,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) /* Check that we have the expected number of pages */ GEM_BUG_ON(i != n_pages); - addr = vmap(pages, n_pages, 0, PAGE_KERNEL); + addr = vmap(pages, n_pages, VM_NO_GUARD, + use_wc ? pgprot_writecombine(PAGE_KERNEL_IO) : PAGE_KERNEL); if (pages != stack_pages) drm_free_large(pages); @@ -2684,27 +2687,55 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) } /* get, pin, and map the pages of the object into kernel space */ -void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) +void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, bool use_wc) { + void *ptr; + bool has_wc; + bool pinned; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); + GEM_BUG_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0); ret = i915_gem_object_get_pages(obj); if (ret) return ERR_PTR(ret); + GEM_BUG_ON(obj->pages == NULL); i915_gem_object_pin_pages(obj); - if (!obj->mapping) { - obj->mapping = i915_gem_object_map(obj); - if (!obj->mapping) { - i915_gem_object_unpin_pages(obj); - return ERR_PTR(-ENOMEM); + pinned = obj->pages_pin_count > 1; + ptr = (void *)((uintptr_t)obj->mapping & ~1); + has_wc = (uintptr_t)obj->mapping & 1; + + if (ptr && has_wc != use_wc) { + if (pinned) { + ret = -EBUSY; + goto err; + } + + if (is_vmalloc_addr(ptr)) + vunmap(ptr); + else + kunmap(kmap_to_page(ptr)); + ptr = obj->mapping = NULL; + } + + if (!ptr) { + ptr = i915_gem_object_map(obj, use_wc); + if (!ptr) { + ret = -ENOMEM; + goto err; } + + obj->mapping = (void *)((uintptr_t)ptr | use_wc); } - return obj->mapping; + return ptr; + +err: + i915_gem_object_unpin_pages(obj); + return ERR_PTR(ret); } void i915_vma_move_to_active(struct i915_vma *vma, diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 80bbe43..edcadce 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -115,7 +115,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) if (ret) return ERR_PTR(ret); - addr = i915_gem_object_pin_map(obj); + addr = i915_gem_object_pin_map(obj, false); mutex_unlock(&dev->struct_mutex); return addr; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 009d7c0..c468619 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1096,7 +1096,7 @@ static int guc_create_log_extras(struct intel_guc *guc) if (!guc->log.buf_addr) { /* Create a vmalloc mapping of log buffer pages */ - vaddr = i915_gem_object_pin_map(guc->log.obj); + vaddr = i915_gem_object_pin_map(guc->log.obj, false); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); DRM_ERROR("Couldn't map log buffer pages %d\n", ret); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 70c6990..0d41047 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -971,7 +971,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ret) goto err; - vaddr = i915_gem_object_pin_map(ce->state); + vaddr = i915_gem_object_pin_map(ce->state, false); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); goto unpin_ctx_obj; @@ -1993,7 +1993,7 @@ lrc_setup_hws(struct intel_engine_cs *engine, /* The HWSP is part of the default context object in LRC mode. */ engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) + LRC_PPHWSP_PN * PAGE_SIZE; - hws = i915_gem_object_pin_map(dctx_obj); + hws = i915_gem_object_pin_map(dctx_obj, false); if (IS_ERR(hws)) return PTR_ERR(hws); engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE; @@ -2324,7 +2324,7 @@ populate_lr_context(struct i915_gem_context *ctx, return ret; } - vaddr = i915_gem_object_pin_map(ctx_obj); + vaddr = i915_gem_object_pin_map(ctx_obj, false); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); @@ -2558,7 +2558,7 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv, if (!ctx_obj) continue; - vaddr = i915_gem_object_pin_map(ctx_obj); + vaddr = i915_gem_object_pin_map(ctx_obj, false); if (WARN_ON(IS_ERR(vaddr))) continue; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 736ddba..a195f65 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2007,7 +2007,7 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, if (ret) goto err_unpin; - addr = i915_gem_object_pin_map(obj); + addr = i915_gem_object_pin_map(obj, false); if (IS_ERR(addr)) { ret = PTR_ERR(addr); goto err_unpin;