Message ID | 1372375867-1003-62-git-send-email-ben@bwidawsk.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Jun 27, 2013 at 04:31:02PM -0700, Ben Widawsky wrote: > This requires doing an actual switch of the page tables during the > context switch/execbuf. > > Along the way, cut away as much "aliasing" ppgtt as possible > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > --- > drivers/gpu/drm/i915/i915_gem.c | 22 +++++++++++++--------- > drivers/gpu/drm/i915/i915_gem_context.c | 29 +++++++++++++++++------------ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 27 ++++++++++++++++++++------- > 3 files changed, 50 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index af0150e..f05d585 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2170,7 +2170,10 @@ request_to_vm(struct drm_i915_gem_request *request) > struct drm_i915_private *dev_priv = request->ring->dev->dev_private; > struct i915_address_space *vm; > > - vm = &dev_priv->gtt.base; > + if (request->ctx) > + vm = &request->ctx->ppgtt.base; > + else > + vm = &dev_priv->gtt.base; > > return vm; > } > @@ -2676,10 +2679,10 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj, > > if (obj->has_global_gtt_mapping && is_i915_ggtt(vm)) > i915_gem_gtt_unbind_object(obj); > - if (obj->has_aliasing_ppgtt_mapping) { > - i915_ppgtt_unbind_object(dev_priv->gtt.aliasing_ppgtt, obj); > - obj->has_aliasing_ppgtt_mapping = 0; > - } > + > + vm->clear_range(vm, i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT, > + obj->base.size >> PAGE_SHIFT); > + > i915_gem_gtt_finish_object(obj); > i915_gem_object_unpin_pages(obj); > > @@ -3444,11 +3447,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > return ret; > } > > - if (obj->has_global_gtt_mapping) > + if (!is_i915_ggtt(vm) && obj->has_global_gtt_mapping) > i915_gem_gtt_bind_object(obj, cache_level); > - if (obj->has_aliasing_ppgtt_mapping) > - i915_ppgtt_bind_object(dev_priv->gtt.aliasing_ppgtt, > - obj, cache_level); > + > + vm->insert_entries(vm, obj->pages, > + i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT, > + cache_level); > > i915_gem_obj_set_color(obj, vm, cache_level); > } > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 37ebfa2..cea036e 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -526,10 +526,14 @@ static int do_switch(struct intel_ring_buffer *ring, > if (from == to && from->last_ring == ring) > return 0; > > + ret = to->ppgtt.switch_mm(&to->ppgtt, ring); > + if (ret) > + return ret; > + > if (ring != &dev_priv->ring[RCS] && from) { > ret = i915_add_request(ring, NULL); > if (ret) > - return ret; > + goto err_out; > i915_gem_context_unreference(from); > } > > @@ -538,7 +542,7 @@ static int do_switch(struct intel_ring_buffer *ring, > > ret = i915_gem_ggtt_pin(to->obj, CONTEXT_ALIGN, false, false); > if (ret) > - return ret; > + goto err_out; > > /* Clear this page out of any CPU caches for coherent swap-in/out. Note > * that thanks to write = false in this call and us not setting any gpu > @@ -546,10 +550,8 @@ static int do_switch(struct intel_ring_buffer *ring, > * (when switching away from it), this won't block. > * XXX: We need a real interface to do this instead of trickery. */ > ret = i915_gem_object_set_to_gtt_domain(to->obj, false); > - if (ret) { > - i915_gem_object_unpin(to->obj); > - return ret; > - } > + if (ret) > + goto unpin_out; > > if (!to->obj->has_global_gtt_mapping) > i915_gem_gtt_bind_object(to->obj, to->obj->cache_level); > @@ -560,10 +562,8 @@ static int do_switch(struct intel_ring_buffer *ring, > hw_flags |= MI_FORCE_RESTORE; > > ret = mi_set_context(ring, to, hw_flags); > - if (ret) { > - i915_gem_object_unpin(to->obj); > - return ret; > - } > + if (ret) > + goto unpin_out; > > /* The backing object for the context is done after switching to the > * *next* context. Therefore we cannot retire the previous context until > @@ -593,7 +593,7 @@ static int do_switch(struct intel_ring_buffer *ring, > * scream. > */ > WARN_ON(mi_set_context(ring, from, MI_RESTORE_INHIBIT)); > - return ret; > + goto err_out; > } > > i915_gem_object_unpin(from->obj); > @@ -605,8 +605,13 @@ done: > ring->last_context = to; > to->is_initialized = true; > to->last_ring = ring; > - > return 0; > + > +unpin_out: > + i915_gem_object_unpin(to->obj); > +err_out: > + WARN_ON(from->ppgtt.switch_mm(&from->ppgtt, ring)); > + return ret; > } > > /** > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index aeec8c0..0f6bf3c 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -437,11 +437,21 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, > } > > /* Ensure ppgtt mapping exists if needed */ > - if (dev_priv->gtt.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { > - i915_ppgtt_bind_object(dev_priv->gtt.aliasing_ppgtt, > - obj, obj->cache_level); > - > + if (is_i915_ggtt(vm) && > + dev_priv->gtt.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { > + /* FIXME: remove this later */ > + struct i915_address_space *appgtt = > + &dev_priv->gtt.aliasing_ppgtt->base; > + unsigned long obj_offset = i915_gem_obj_offset(obj, appgtt); > + > + appgtt->insert_entries(appgtt, obj->pages, > + obj_offset >> PAGE_SHIFT, > + obj->cache_level); > I meant to remove this, but I missed it. In theory I don't ever want to insert Aliasing PPGTT PTEs. Will remove it locally and test it now. > > obj->has_aliasing_ppgtt_mapping = 1; > + } else { > + vm->insert_entries(vm, obj->pages, > + i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT, > + obj->cache_level); > } > > if (entry->offset != i915_gem_obj_offset(obj, vm)) { > @@ -864,7 +874,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > struct i915_hw_context *ctx; > struct i915_address_space *vm; > u32 ctx_id = i915_execbuffer2_get_context_id(*args); > - u32 exec_start, exec_len; > + u32 exec_start = args->batch_start_offset, exec_len; > u32 mask, flags; > int ret, mode, i; > bool need_relocs; > @@ -1085,8 +1095,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > goto err; > } > > - exec_start = i915_gem_obj_offset(batch_obj, vm) + > - args->batch_start_offset; > + if (batch_obj->has_global_gtt_mapping) > + exec_start += i915_gem_ggtt_offset(batch_obj); > + else > + exec_start += i915_gem_obj_offset(batch_obj, vm); > + > exec_len = args->batch_len; > if (cliprects) { > for (i = 0; i < args->num_cliprects; i++) { > -- > 1.8.3.1 >
On Thu, Jun 27, 2013 at 04:43:40PM -0700, Ben Widawsky wrote: > On Thu, Jun 27, 2013 at 04:31:02PM -0700, Ben Widawsky wrote: > > This requires doing an actual switch of the page tables during the > > context switch/execbuf. > > > > Along the way, cut away as much "aliasing" ppgtt as possible > > > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > > --- > > drivers/gpu/drm/i915/i915_gem.c | 22 +++++++++++++--------- > > drivers/gpu/drm/i915/i915_gem_context.c | 29 +++++++++++++++++------------ > > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 27 ++++++++++++++++++++------- > > 3 files changed, 50 insertions(+), 28 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > > index af0150e..f05d585 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -2170,7 +2170,10 @@ request_to_vm(struct drm_i915_gem_request *request) > > struct drm_i915_private *dev_priv = request->ring->dev->dev_private; > > struct i915_address_space *vm; > > > > - vm = &dev_priv->gtt.base; > > + if (request->ctx) > > + vm = &request->ctx->ppgtt.base; > > + else > > + vm = &dev_priv->gtt.base; > > > > return vm; > > } > > @@ -2676,10 +2679,10 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj, > > > > if (obj->has_global_gtt_mapping && is_i915_ggtt(vm)) > > i915_gem_gtt_unbind_object(obj); > > - if (obj->has_aliasing_ppgtt_mapping) { > > - i915_ppgtt_unbind_object(dev_priv->gtt.aliasing_ppgtt, obj); > > - obj->has_aliasing_ppgtt_mapping = 0; > > - } > > + > > + vm->clear_range(vm, i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT, > > + obj->base.size >> PAGE_SHIFT); > > + > > i915_gem_gtt_finish_object(obj); > > i915_gem_object_unpin_pages(obj); > > > > @@ -3444,11 +3447,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > > return ret; > > } > > > > - if (obj->has_global_gtt_mapping) > > + if (!is_i915_ggtt(vm) && obj->has_global_gtt_mapping) > > i915_gem_gtt_bind_object(obj, cache_level); Are you planning to kill i915_gem_gtt_(un)bind_object? In many cases you seem to end up writing the global GTT PTEs twice because of it. I guess the only catch is that obj->has_gtt_mapping must be kept in sync w/ reality if you kill it. > > - if (obj->has_aliasing_ppgtt_mapping) > > - i915_ppgtt_bind_object(dev_priv->gtt.aliasing_ppgtt, > > - obj, cache_level); > > + > > + vm->insert_entries(vm, obj->pages, > > + i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT, > > + cache_level); > > > > i915_gem_obj_set_color(obj, vm, cache_level); This cache level stuff ends up looking a bit wonky, but I guess you didn't spend much time on it yet. I'll just note that I don't think we can allow per address space cache levels through this interface since that would break the case where the client renders to the buffer, then the server/compositor sets it to uncached and does a page flip. After this the client has to also use uncached mappings or the server/compositor won't realize that it would need to clflush again before page flipping. So I think you can just eliminate the vm argument from this function and then the function could look something like this (pardon my pseudo code): ... if (bound_any(obj)) { finish_gpu finish_gtt put_fence ... } for_each_safe(obj->vma_list) { vm = vma->vm; node = vma->node; if (verify_gtt(node, cache_level)) { unbind(ovj, vm); continue; } vm->insert_range(); vma->color = cacle_level; } ... This also got me thinking about MOCS. That think seems a bit dangerous in this case since the client can easily override the cache level w/o the server/compositor noticing. I guess we just have to make a rule that clients aren't allowed to override cache level w/ MOCS for window system buffers. Also IIRC someone told me that w/ uncached mappings the caches aren't snooped even on LLC platforms. If that's true, MOCS seems even more dangerous since the client could easily mix cached and uncached accesses. I don't really understand why uncached mappings wouldn't snoop on LLC platforms since the snooping should be more or less free.
On Tue, Jul 02, 2013 at 01:58:13PM +0300, Ville Syrjälä wrote: > Also IIRC someone told me that w/ uncached mappings the caches aren't > snooped even on LLC platforms. If that's true, MOCS seems even more > dangerous since the client could easily mix cached and uncached > accesses. I don't really understand why uncached mappings wouldn't > snoop on LLC platforms since the snooping should be more or less free. Who said that? Doesn't appear to be the case on SNB/IVB/HSW as far as I can tell. -Chris
On Tue, Jul 02, 2013 at 12:07:10PM +0100, Chris Wilson wrote: > On Tue, Jul 02, 2013 at 01:58:13PM +0300, Ville Syrjälä wrote: > > Also IIRC someone told me that w/ uncached mappings the caches aren't > > snooped even on LLC platforms. If that's true, MOCS seems even more > > dangerous since the client could easily mix cached and uncached > > accesses. I don't really understand why uncached mappings wouldn't > > snoop on LLC platforms since the snooping should be more or less free. > > Who said that? Doesn't appear to be the case on SNB/IVB/HSW as far as I > can tell. Can't remember now who said it, or could be I just misunderstood. But if it's not true, then MOCS seems actually useful. Otherwise it's going to be a clflush fest when you want to change from cached to uncached.
On Tue, Jul 02, 2013 at 02:34:59PM +0300, Ville Syrjälä wrote: > On Tue, Jul 02, 2013 at 12:07:10PM +0100, Chris Wilson wrote: > > On Tue, Jul 02, 2013 at 01:58:13PM +0300, Ville Syrjälä wrote: > > > Also IIRC someone told me that w/ uncached mappings the caches aren't > > > snooped even on LLC platforms. If that's true, MOCS seems even more > > > dangerous since the client could easily mix cached and uncached > > > accesses. I don't really understand why uncached mappings wouldn't > > > snoop on LLC platforms since the snooping should be more or less free. > > > > Who said that? Doesn't appear to be the case on SNB/IVB/HSW as far as I > > can tell. > > Can't remember now who said it, or could be I just misunderstood. But if > it's not true, then MOCS seems actually useful. Otherwise it's going to > be a clflush fest when you want to change from cached to uncached. Well MOCS doesn't apply for GTT access by the CPU, so there you only need to be concerned about whether you are rendering to a scanout. That is tracked in the ddx, but as you point out mesa would have to assume that any winsys buffer is a potential scanout unless we add an interface for it to ask the display server. -Chris
On Tue, Jul 02, 2013 at 12:38:33PM +0100, Chris Wilson wrote: > On Tue, Jul 02, 2013 at 02:34:59PM +0300, Ville Syrjälä wrote: > > On Tue, Jul 02, 2013 at 12:07:10PM +0100, Chris Wilson wrote: > > > On Tue, Jul 02, 2013 at 01:58:13PM +0300, Ville Syrjälä wrote: > > > > Also IIRC someone told me that w/ uncached mappings the caches aren't > > > > snooped even on LLC platforms. If that's true, MOCS seems even more > > > > dangerous since the client could easily mix cached and uncached > > > > accesses. I don't really understand why uncached mappings wouldn't > > > > snoop on LLC platforms since the snooping should be more or less free. > > > > > > Who said that? Doesn't appear to be the case on SNB/IVB/HSW as far as I > > > can tell. > > > > Can't remember now who said it, or could be I just misunderstood. But if > > it's not true, then MOCS seems actually useful. Otherwise it's going to > > be a clflush fest when you want to change from cached to uncached. > > Well MOCS doesn't apply for GTT access by the CPU, so there you only > need to be concerned about whether you are rendering to a scanout. That > is tracked in the ddx, but as you point out mesa would have to assume > that any winsys buffer is a potential scanout unless we add an interface > for it to ask the display server. I think userspace should make damn sure that it doesn't change the cache level (i.e. snooped vs. non-snooped or that special write-back mode we have on some hsw machines). Otherwise we'd be indeed screwed up since the clflush tracking done by the kernel would be screwed up. But thus far all the MOCS stuff seems to only be used to select in which caches and at what age we should allocate cachelines ... -Daniel
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index af0150e..f05d585 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2170,7 +2170,10 @@ request_to_vm(struct drm_i915_gem_request *request) struct drm_i915_private *dev_priv = request->ring->dev->dev_private; struct i915_address_space *vm; - vm = &dev_priv->gtt.base; + if (request->ctx) + vm = &request->ctx->ppgtt.base; + else + vm = &dev_priv->gtt.base; return vm; } @@ -2676,10 +2679,10 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj, if (obj->has_global_gtt_mapping && is_i915_ggtt(vm)) i915_gem_gtt_unbind_object(obj); - if (obj->has_aliasing_ppgtt_mapping) { - i915_ppgtt_unbind_object(dev_priv->gtt.aliasing_ppgtt, obj); - obj->has_aliasing_ppgtt_mapping = 0; - } + + vm->clear_range(vm, i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT, + obj->base.size >> PAGE_SHIFT); + i915_gem_gtt_finish_object(obj); i915_gem_object_unpin_pages(obj); @@ -3444,11 +3447,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return ret; } - if (obj->has_global_gtt_mapping) + if (!is_i915_ggtt(vm) && obj->has_global_gtt_mapping) i915_gem_gtt_bind_object(obj, cache_level); - if (obj->has_aliasing_ppgtt_mapping) - i915_ppgtt_bind_object(dev_priv->gtt.aliasing_ppgtt, - obj, cache_level); + + vm->insert_entries(vm, obj->pages, + i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT, + cache_level); i915_gem_obj_set_color(obj, vm, cache_level); } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 37ebfa2..cea036e 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -526,10 +526,14 @@ static int do_switch(struct intel_ring_buffer *ring, if (from == to && from->last_ring == ring) return 0; + ret = to->ppgtt.switch_mm(&to->ppgtt, ring); + if (ret) + return ret; + if (ring != &dev_priv->ring[RCS] && from) { ret = i915_add_request(ring, NULL); if (ret) - return ret; + goto err_out; i915_gem_context_unreference(from); } @@ -538,7 +542,7 @@ static int do_switch(struct intel_ring_buffer *ring, ret = i915_gem_ggtt_pin(to->obj, CONTEXT_ALIGN, false, false); if (ret) - return ret; + goto err_out; /* Clear this page out of any CPU caches for coherent swap-in/out. Note * that thanks to write = false in this call and us not setting any gpu @@ -546,10 +550,8 @@ static int do_switch(struct intel_ring_buffer *ring, * (when switching away from it), this won't block. * XXX: We need a real interface to do this instead of trickery. */ ret = i915_gem_object_set_to_gtt_domain(to->obj, false); - if (ret) { - i915_gem_object_unpin(to->obj); - return ret; - } + if (ret) + goto unpin_out; if (!to->obj->has_global_gtt_mapping) i915_gem_gtt_bind_object(to->obj, to->obj->cache_level); @@ -560,10 +562,8 @@ static int do_switch(struct intel_ring_buffer *ring, hw_flags |= MI_FORCE_RESTORE; ret = mi_set_context(ring, to, hw_flags); - if (ret) { - i915_gem_object_unpin(to->obj); - return ret; - } + if (ret) + goto unpin_out; /* The backing object for the context is done after switching to the * *next* context. Therefore we cannot retire the previous context until @@ -593,7 +593,7 @@ static int do_switch(struct intel_ring_buffer *ring, * scream. */ WARN_ON(mi_set_context(ring, from, MI_RESTORE_INHIBIT)); - return ret; + goto err_out; } i915_gem_object_unpin(from->obj); @@ -605,8 +605,13 @@ done: ring->last_context = to; to->is_initialized = true; to->last_ring = ring; - return 0; + +unpin_out: + i915_gem_object_unpin(to->obj); +err_out: + WARN_ON(from->ppgtt.switch_mm(&from->ppgtt, ring)); + return ret; } /** diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index aeec8c0..0f6bf3c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -437,11 +437,21 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, } /* Ensure ppgtt mapping exists if needed */ - if (dev_priv->gtt.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { - i915_ppgtt_bind_object(dev_priv->gtt.aliasing_ppgtt, - obj, obj->cache_level); - + if (is_i915_ggtt(vm) && + dev_priv->gtt.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { + /* FIXME: remove this later */ + struct i915_address_space *appgtt = + &dev_priv->gtt.aliasing_ppgtt->base; + unsigned long obj_offset = i915_gem_obj_offset(obj, appgtt); + + appgtt->insert_entries(appgtt, obj->pages, + obj_offset >> PAGE_SHIFT, + obj->cache_level); obj->has_aliasing_ppgtt_mapping = 1; + } else { + vm->insert_entries(vm, obj->pages, + i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT, + obj->cache_level); } if (entry->offset != i915_gem_obj_offset(obj, vm)) { @@ -864,7 +874,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_hw_context *ctx; struct i915_address_space *vm; u32 ctx_id = i915_execbuffer2_get_context_id(*args); - u32 exec_start, exec_len; + u32 exec_start = args->batch_start_offset, exec_len; u32 mask, flags; int ret, mode, i; bool need_relocs; @@ -1085,8 +1095,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; } - exec_start = i915_gem_obj_offset(batch_obj, vm) + - args->batch_start_offset; + if (batch_obj->has_global_gtt_mapping) + exec_start += i915_gem_ggtt_offset(batch_obj); + else + exec_start += i915_gem_obj_offset(batch_obj, vm); + exec_len = args->batch_len; if (cliprects) { for (i = 0; i < args->num_cliprects; i++) {
This requires doing an actual switch of the page tables during the context switch/execbuf. Along the way, cut away as much "aliasing" ppgtt as possible Signed-off-by: Ben Widawsky <ben@bwidawsk.net> --- drivers/gpu/drm/i915/i915_gem.c | 22 +++++++++++++--------- drivers/gpu/drm/i915/i915_gem_context.c | 29 +++++++++++++++++------------ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 27 ++++++++++++++++++++------- 3 files changed, 50 insertions(+), 28 deletions(-)