Message ID | 1443623779-10640-2-git-send-email-michel.thierry@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Sep 30, 2015 at 03:36:18PM +0100, Michel Thierry wrote: I made one more change based on profiling: > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 67ef118..6ca39c1 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -589,11 +589,20 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > if (entry->flags & EXEC_OBJECT_NEEDS_GTT) > flags |= PIN_GLOBAL; > > + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, > + * limit address to the first 4GBs for unflagged objects. > + */ > + flags |= PIN_ZONE_4G; > + if (entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) > + flags &= ~PIN_ZONE_4G; > + > if (!drm_mm_node_allocated(&vma->node)) { /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, * limit address to the first 4GBs for unflagged objects. */ if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0) flags |= PIN_ZONE_4G; > if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) > flags |= PIN_GLOBAL | PIN_MAPPABLE; > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) > flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; > + if ((flags & PIN_MAPPABLE) == 0) > + flags |= PIN_HIGH; > } It saves me a patch if it we do it now. -Chris
Hi, On 30/09/15 15:36, Michel Thierry wrote: > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 67ef118..6ca39c1 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -589,11 +589,20 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > if (entry->flags & EXEC_OBJECT_NEEDS_GTT) > flags |= PIN_GLOBAL; > > + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, > + * limit address to the first 4GBs for unflagged objects. > + */ > + flags |= PIN_ZONE_4G; > + if (entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) > + flags &= ~PIN_ZONE_4G; I spotted this patch purely accidentally since it was probably the reason pad_to_size IGT started failing in the Android tree - given how there is mention of changing the allocation order. Anyway beside the point.. Point is when I spotted it by accident, I also spotted this unusual handling of flags - set then conditionally clear. Why not conditionally set for one fewer line of code? Regards, Tvrtko
On Wed, Sep 30, 2015 at 04:45:18PM +0100, Tvrtko Ursulin wrote: > > Hi, > > On 30/09/15 15:36, Michel Thierry wrote: > >diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > >index 67ef118..6ca39c1 100644 > >--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > >+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > >@@ -589,11 +589,20 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > > if (entry->flags & EXEC_OBJECT_NEEDS_GTT) > > flags |= PIN_GLOBAL; > > > >+ /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, > >+ * limit address to the first 4GBs for unflagged objects. > >+ */ > >+ flags |= PIN_ZONE_4G; > >+ if (entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) > >+ flags &= ~PIN_ZONE_4G; > > I spotted this patch purely accidentally since it was probably the > reason pad_to_size IGT started failing in the Android tree - given > how there is mention of changing the allocation order. Anyway beside > the point.. > > Point is when I spotted it by accident, I also spotted this unusual > handling of flags - set then conditionally clear. Why not > conditionally set for one fewer line of code? Because for the next few years entry->flags & 48B will be in the minority. The other reason is that is emphasizes that we limit everything by default and only allow the special objects to use highmem. The idiom of: x = default/expected; if (unlikely) x = something else; is fairly commonplace in the kernel, at least before gcc had likely(). -Chris
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 04d710f..ecc56fa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2826,6 +2826,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma); #define PIN_OFFSET_BIAS (1<<3) #define PIN_USER (1<<4) #define PIN_UPDATE (1<<5) +#define PIN_ZONE_4G (1<<6) +#define PIN_HIGH (1<<7) #define PIN_OFFSET_MASK (~4095) int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bf5ef7a..f0cfbb9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3354,11 +3354,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 fence_alignment, unfenced_alignment; + u32 search_flag, alloc_flag; + u64 start, end; u64 size, fence_size; - u64 start = - flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; - u64 end = - flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; struct i915_vma *vma; int ret; @@ -3398,6 +3396,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; } + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; + end = vm->total; + if (flags & PIN_MAPPABLE) + end = min_t(u64, end, dev_priv->gtt.mappable_end); + if (flags & PIN_ZONE_4G) + end = min_t(u64, end, (1ULL << 32)); + if (alignment == 0) alignment = flags & PIN_MAPPABLE ? fence_alignment : unfenced_alignment; @@ -3433,13 +3438,21 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) goto err_unpin; + if (flags & PIN_HIGH) { + search_flag = DRM_MM_SEARCH_BELOW; + alloc_flag = DRM_MM_CREATE_TOP; + } else { + search_flag = DRM_MM_SEARCH_DEFAULT; + alloc_flag = DRM_MM_CREATE_DEFAULT; + } + search_free: ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, size, alignment, obj->cache_level, start, end, - DRM_MM_SEARCH_DEFAULT, - DRM_MM_CREATE_DEFAULT); + search_flag, + alloc_flag); if (ret) { ret = i915_gem_evict_something(dev, vm, size, alignment, obj->cache_level, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 67ef118..6ca39c1 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -589,11 +589,20 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, if (entry->flags & EXEC_OBJECT_NEEDS_GTT) flags |= PIN_GLOBAL; + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, + * limit address to the first 4GBs for unflagged objects. + */ + flags |= PIN_ZONE_4G; + if (entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) + flags &= ~PIN_ZONE_4G; + if (!drm_mm_node_allocated(&vma->node)) { if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) flags |= PIN_GLOBAL | PIN_MAPPABLE; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + if ((flags & PIN_MAPPABLE) == 0) + flags |= PIN_HIGH; } ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); @@ -671,6 +680,10 @@ eb_vma_misplaced(struct i915_vma *vma) if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) return !only_mappable_for_reloc(entry->flags); + if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && + (vma->node.start + vma->node.size - 1) >> 32) + return true; + return false; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index fd5aa47..484a9fb 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -690,7 +690,8 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_NEEDS_FENCE (1<<0) #define EXEC_OBJECT_NEEDS_GTT (1<<1) #define EXEC_OBJECT_WRITE (1<<2) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) __u64 flags; __u64 rsvd1;