Message ID | 1437039211-2507-18-git-send-email-michel.thierry@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 7/16/2015 3:03 PM, Michel Thierry wrote: > There are some allocations that must be only referenced by 32-bit > offsets. To limit the chances of having the first 4GB already full, > objects not requiring this workaround use DRM_MM_SEARCH_BELOW/ > DRM_MM_CREATE_TOP flags > > In specific, any resource used with flat/heapless (0x00000000-0xfffff000) > General State Heap (GSH) or Instruction State Heap (ISH) must be in a > 32-bit range, because the General State Offset and Instruction State > Offset are limited to 32-bits. > > Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if > they can be allocated above the 32-bit address range. To limit the > chances of having the first 4GB already full, objects will use > DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible. > > v2: Changed flag logic from neeeds_32b, to supports_48b. > v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel) > v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK > to use last PIN_ defined instead of hard-coded value; use correct limit > check in eb_vma_misplaced. (Chris) > v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris) > v6: Apply pin-high for ggtt too (Chris) > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4) > Signed-off-by: Michel Thierry <michel.thierry@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > drivers/gpu/drm/i915/i915_gem.c | 14 ++++++++++++-- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +++++++++++++ > include/uapi/drm/i915_drm.h | 3 ++- > 4 files changed, 29 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 1dbbbf0..f79cc7b 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2771,6 +2771,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma); > #define PIN_OFFSET_BIAS (1<<3) > #define PIN_USER (1<<4) > #define PIN_UPDATE (1<<5) > +#define PIN_ZONE_4G (1<<6) > +#define PIN_HIGH (1<<7) > #define PIN_OFFSET_MASK (~4095) > int __must_check > i915_gem_object_pin(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 76b7612..cd7e4b6 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3728,6 +3728,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > struct drm_i915_private *dev_priv = dev->dev_private; > u32 fence_alignment, unfenced_alignment; > u64 size, fence_size; > + u32 search_flag = DRM_MM_SEARCH_DEFAULT; > + u32 alloc_flag = DRM_MM_CREATE_DEFAULT; > u64 start = > flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; > u64 end = > @@ -3771,6 +3773,14 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; > } > > + if (flags & PIN_HIGH) { > + search_flag = DRM_MM_SEARCH_BELOW; > + alloc_flag = DRM_MM_CREATE_TOP; > + } > + > + if (flags & PIN_ZONE_4G) > + end = (1ULL << 32); Would this be fine for platforms, where only 2 GB of GGTT space is available ? For GEN7 & older platforms, only GGTT would be used. Shouldn't this check for PIN_ZONE_4G flag, be done only for PPGTT vm ? For GGTT we have to obey the PIN_MAPPABLE flag, if set then 'end' will be 256 MB. If both PIN_MAPPABLE & PIN_ZONE_4G flags are set, the 'end' should still be 256 MB, for GGTT vm. So we need to mindful in defining the 'end' for platforms where only GGTT would be used. Best Regards Akash > + > if (alignment == 0) > alignment = flags & PIN_MAPPABLE ? fence_alignment : > unfenced_alignment; > @@ -3811,8 +3821,8 @@ search_free: > size, alignment, > obj->cache_level, > start, end, > - DRM_MM_SEARCH_DEFAULT, > - DRM_MM_CREATE_DEFAULT); > + search_flag, > + alloc_flag); > if (ret) { > ret = i915_gem_evict_something(dev, vm, size, alignment, > obj->cache_level, > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 923a3c4..209e8e2 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -589,11 +589,20 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > if (entry->flags & EXEC_OBJECT_NEEDS_GTT) > flags |= PIN_GLOBAL; > > + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, > + * limit address to the first 4GBs for unflagged objects. > + */ > + flags |= PIN_ZONE_4G; > + if (entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) > + flags &= ~PIN_ZONE_4G; > + > if (!drm_mm_node_allocated(&vma->node)) { > if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) > flags |= PIN_GLOBAL | PIN_MAPPABLE; > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) > flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; > + if ((flags & PIN_MAPPABLE) == 0) > + flags |= PIN_HIGH; > } > > ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); > @@ -671,6 +680,10 @@ eb_vma_misplaced(struct i915_vma *vma) > if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) > return !only_mappable_for_reloc(entry->flags); > > + if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && > + (vma->node.start + vma->node.size) >= (1ULL << 32)) > + return true; > + > return false; > } > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index e7c29f1..e4471e8 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -686,7 +686,8 @@ struct drm_i915_gem_exec_object2 { > #define EXEC_OBJECT_NEEDS_FENCE (1<<0) > #define EXEC_OBJECT_NEEDS_GTT (1<<1) > #define EXEC_OBJECT_WRITE (1<<2) > -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) > +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) > __u64 flags; > > __u64 rsvd1; >
On Mon, Jul 27, 2015 at 08:04:50PM +0530, Goel, Akash wrote: > > > On 7/16/2015 3:03 PM, Michel Thierry wrote: > >There are some allocations that must be only referenced by 32-bit > >offsets. To limit the chances of having the first 4GB already full, > >objects not requiring this workaround use DRM_MM_SEARCH_BELOW/ > >DRM_MM_CREATE_TOP flags > > > >In specific, any resource used with flat/heapless (0x00000000-0xfffff000) > >General State Heap (GSH) or Instruction State Heap (ISH) must be in a > >32-bit range, because the General State Offset and Instruction State > >Offset are limited to 32-bits. > > > >Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if > >they can be allocated above the 32-bit address range. To limit the > >chances of having the first 4GB already full, objects will use > >DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible. > > > >v2: Changed flag logic from neeeds_32b, to supports_48b. > >v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel) > >v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK > >to use last PIN_ defined instead of hard-coded value; use correct limit > >check in eb_vma_misplaced. (Chris) > >v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris) > >v6: Apply pin-high for ggtt too (Chris) > > > >Cc: Chris Wilson <chris@chris-wilson.co.uk> > >Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4) > >Signed-off-by: Michel Thierry <michel.thierry@intel.com> > >--- > > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > > drivers/gpu/drm/i915/i915_gem.c | 14 ++++++++++++-- > > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +++++++++++++ > > include/uapi/drm/i915_drm.h | 3 ++- > > 4 files changed, 29 insertions(+), 3 deletions(-) > > > >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > >index 1dbbbf0..f79cc7b 100644 > >--- a/drivers/gpu/drm/i915/i915_drv.h > >+++ b/drivers/gpu/drm/i915/i915_drv.h > >@@ -2771,6 +2771,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma); > > #define PIN_OFFSET_BIAS (1<<3) > > #define PIN_USER (1<<4) > > #define PIN_UPDATE (1<<5) > >+#define PIN_ZONE_4G (1<<6) > >+#define PIN_HIGH (1<<7) > > #define PIN_OFFSET_MASK (~4095) > > int __must_check > > i915_gem_object_pin(struct drm_i915_gem_object *obj, > >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > >index 76b7612..cd7e4b6 100644 > >--- a/drivers/gpu/drm/i915/i915_gem.c > >+++ b/drivers/gpu/drm/i915/i915_gem.c > >@@ -3728,6 +3728,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > > struct drm_i915_private *dev_priv = dev->dev_private; > > u32 fence_alignment, unfenced_alignment; > > u64 size, fence_size; > >+ u32 search_flag = DRM_MM_SEARCH_DEFAULT; > >+ u32 alloc_flag = DRM_MM_CREATE_DEFAULT; > > u64 start = > > flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; > > u64 end = > >@@ -3771,6 +3773,14 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > > size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; > > } > > > >+ if (flags & PIN_HIGH) { > >+ search_flag = DRM_MM_SEARCH_BELOW; > >+ alloc_flag = DRM_MM_CREATE_TOP; > >+ } > >+ > >+ if (flags & PIN_ZONE_4G) > >+ end = (1ULL << 32); > > Would this be fine for platforms, where only 2 GB of GGTT space is > available ? For GEN7 & older platforms, only GGTT would be used. > Shouldn't this check for PIN_ZONE_4G flag, be done only for PPGTT vm ? > For GGTT we have to obey the PIN_MAPPABLE flag, if set then 'end' > will be 256 MB. > If both PIN_MAPPABLE & PIN_ZONE_4G flags are set, the 'end' should > still be 256 MB, for GGTT vm. > So we need to mindful in defining the 'end' for platforms where > only GGTT would be used. Ah, indeed. I didn't notice since this is end = min(end, 1<<32) in my tree. -Chris
On 7/27/2015 3:46 PM, Chris Wilson wrote: > On Mon, Jul 27, 2015 at 08:04:50PM +0530, Goel, Akash wrote: >> >> >> On 7/16/2015 3:03 PM, Michel Thierry wrote: >>> There are some allocations that must be only referenced by 32-bit >>> offsets. To limit the chances of having the first 4GB already full, >>> objects not requiring this workaround use DRM_MM_SEARCH_BELOW/ >>> DRM_MM_CREATE_TOP flags >>> >>> In specific, any resource used with flat/heapless (0x00000000-0xfffff000) >>> General State Heap (GSH) or Instruction State Heap (ISH) must be in a >>> 32-bit range, because the General State Offset and Instruction State >>> Offset are limited to 32-bits. >>> >>> Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if >>> they can be allocated above the 32-bit address range. To limit the >>> chances of having the first 4GB already full, objects will use >>> DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible. >>> >>> v2: Changed flag logic from neeeds_32b, to supports_48b. >>> v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel) >>> v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK >>> to use last PIN_ defined instead of hard-coded value; use correct limit >>> check in eb_vma_misplaced. (Chris) >>> v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris) >>> v6: Apply pin-high for ggtt too (Chris) >>> >>> Cc: Chris Wilson <chris@chris-wilson.co.uk> >>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4) >>> Signed-off-by: Michel Thierry <michel.thierry@intel.com> >>> --- >>> drivers/gpu/drm/i915/i915_drv.h | 2 ++ >>> drivers/gpu/drm/i915/i915_gem.c | 14 ++++++++++++-- >>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +++++++++++++ >>> include/uapi/drm/i915_drm.h | 3 ++- >>> 4 files changed, 29 insertions(+), 3 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >>> index 1dbbbf0..f79cc7b 100644 >>> --- a/drivers/gpu/drm/i915/i915_drv.h >>> +++ b/drivers/gpu/drm/i915/i915_drv.h >>> @@ -2771,6 +2771,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma); >>> #define PIN_OFFSET_BIAS (1<<3) >>> #define PIN_USER (1<<4) >>> #define PIN_UPDATE (1<<5) >>> +#define PIN_ZONE_4G (1<<6) >>> +#define PIN_HIGH (1<<7) >>> #define PIN_OFFSET_MASK (~4095) >>> int __must_check >>> i915_gem_object_pin(struct drm_i915_gem_object *obj, >>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >>> index 76b7612..cd7e4b6 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem.c >>> +++ b/drivers/gpu/drm/i915/i915_gem.c >>> @@ -3728,6 +3728,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, >>> struct drm_i915_private *dev_priv = dev->dev_private; >>> u32 fence_alignment, unfenced_alignment; >>> u64 size, fence_size; >>> + u32 search_flag = DRM_MM_SEARCH_DEFAULT; >>> + u32 alloc_flag = DRM_MM_CREATE_DEFAULT; >>> u64 start = >>> flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; >>> u64 end = >>> @@ -3771,6 +3773,14 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, >>> size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; >>> } >>> >>> + if (flags & PIN_HIGH) { >>> + search_flag = DRM_MM_SEARCH_BELOW; >>> + alloc_flag = DRM_MM_CREATE_TOP; >>> + } >>> + >>> + if (flags & PIN_ZONE_4G) >>> + end = (1ULL << 32); >> >> Would this be fine for platforms, where only 2 GB of GGTT space is >> available ? For GEN7 & older platforms, only GGTT would be used. >> Shouldn't this check for PIN_ZONE_4G flag, be done only for PPGTT vm ? >> For GGTT we have to obey the PIN_MAPPABLE flag, if set then 'end' >> will be 256 MB. >> If both PIN_MAPPABLE & PIN_ZONE_4G flags are set, the 'end' should >> still be 256 MB, for GGTT vm. >> So we need to mindful in defining the 'end' for platforms where >> only GGTT would be used. > > Ah, indeed. I didn't notice since this is end = min(end, 1<<32) in my > tree. Thanks, I'll resend with this fix.
On Thu, Jul 16, 2015 at 10:33:29AM +0100, Michel Thierry wrote: > + if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && > + (vma->node.start + vma->node.size) >= (1ULL << 32)) > + return true; gcc completely screwed this up here and used 0 for 1ULL<<32. Note that we can allow state + size == 4G (since the end is exclusive), so I went with if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && (vma->node.start + vma->node.size - 1) >> 32) return true; instead. -Chris
On 7/27/2015 10:11 PM, Chris Wilson wrote: > On Thu, Jul 16, 2015 at 10:33:29AM +0100, Michel Thierry wrote: >> + if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && >> + (vma->node.start + vma->node.size) >= (1ULL << 32)) >> + return true; > > gcc completely screwed this up here and used 0 for 1ULL<<32. > > Note that we can allow state + size == 4G (since the end is exclusive), > so I went with > > if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && > (vma->node.start + vma->node.size - 1) >> 32) > return true; > > instead. > -Chris > Thanks, I'll include this change in the next patch version.
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1dbbbf0..f79cc7b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2771,6 +2771,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma); #define PIN_OFFSET_BIAS (1<<3) #define PIN_USER (1<<4) #define PIN_UPDATE (1<<5) +#define PIN_ZONE_4G (1<<6) +#define PIN_HIGH (1<<7) #define PIN_OFFSET_MASK (~4095) int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 76b7612..cd7e4b6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3728,6 +3728,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct drm_i915_private *dev_priv = dev->dev_private; u32 fence_alignment, unfenced_alignment; u64 size, fence_size; + u32 search_flag = DRM_MM_SEARCH_DEFAULT; + u32 alloc_flag = DRM_MM_CREATE_DEFAULT; u64 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; u64 end = @@ -3771,6 +3773,14 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; } + if (flags & PIN_HIGH) { + search_flag = DRM_MM_SEARCH_BELOW; + alloc_flag = DRM_MM_CREATE_TOP; + } + + if (flags & PIN_ZONE_4G) + end = (1ULL << 32); + if (alignment == 0) alignment = flags & PIN_MAPPABLE ? fence_alignment : unfenced_alignment; @@ -3811,8 +3821,8 @@ search_free: size, alignment, obj->cache_level, start, end, - DRM_MM_SEARCH_DEFAULT, - DRM_MM_CREATE_DEFAULT); + search_flag, + alloc_flag); if (ret) { ret = i915_gem_evict_something(dev, vm, size, alignment, obj->cache_level, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 923a3c4..209e8e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -589,11 +589,20 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, if (entry->flags & EXEC_OBJECT_NEEDS_GTT) flags |= PIN_GLOBAL; + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, + * limit address to the first 4GBs for unflagged objects. + */ + flags |= PIN_ZONE_4G; + if (entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) + flags &= ~PIN_ZONE_4G; + if (!drm_mm_node_allocated(&vma->node)) { if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) flags |= PIN_GLOBAL | PIN_MAPPABLE; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + if ((flags & PIN_MAPPABLE) == 0) + flags |= PIN_HIGH; } ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); @@ -671,6 +680,10 @@ eb_vma_misplaced(struct i915_vma *vma) if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) return !only_mappable_for_reloc(entry->flags); + if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && + (vma->node.start + vma->node.size) >= (1ULL << 32)) + return true; + return false; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e7c29f1..e4471e8 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -686,7 +686,8 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_NEEDS_FENCE (1<<0) #define EXEC_OBJECT_NEEDS_GTT (1<<1) #define EXEC_OBJECT_WRITE (1<<2) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) __u64 flags; __u64 rsvd1;