Message ID | 1438187043-34267-18-git-send-email-michel.thierry@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Reviewed the patch & it looks fine. Reviewed-by: "Akash Goel <akash.goel@intel.com>" On 7/29/2015 9:54 PM, Michel Thierry wrote: > There are some allocations that must be only referenced by 32-bit > offsets. To limit the chances of having the first 4GB already full, > objects not requiring this workaround use DRM_MM_SEARCH_BELOW/ > DRM_MM_CREATE_TOP flags > > In specific, any resource used with flat/heapless (0x00000000-0xfffff000) > General State Heap (GSH) or Instruction State Heap (ISH) must be in a > 32-bit range, because the General State Offset and Instruction State > Offset are limited to 32-bits. > > Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if > they can be allocated above the 32-bit address range. To limit the > chances of having the first 4GB already full, objects will use > DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible. > > v2: Changed flag logic from neeeds_32b, to supports_48b. > v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel) > v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK > to use last PIN_ defined instead of hard-coded value; use correct limit > check in eb_vma_misplaced. (Chris) > v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris) > v6: Apply pin-high for ggtt too (Chris) > v7: Handle simultaneous pin-high and pin-mappable end correctly (Akash) > Fix check for entries currently using +4GB addresses, use min_t and > other polish in object_bind_to_vm (Chris) > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Akash Goel <akash.goel@intel.com> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4) > Signed-off-by: Michel Thierry <michel.thierry@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > drivers/gpu/drm/i915/i915_gem.c | 25 +++++++++++++++++++------ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +++++++++++++ > include/uapi/drm/i915_drm.h | 3 ++- > 4 files changed, 36 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index ed2fbcd..c344805 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2775,6 +2775,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma); > #define PIN_OFFSET_BIAS (1<<3) > #define PIN_USER (1<<4) > #define PIN_UPDATE (1<<5) > +#define PIN_ZONE_4G (1<<6) > +#define PIN_HIGH (1<<7) > #define PIN_OFFSET_MASK (~4095) > int __must_check > i915_gem_object_pin(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 80f5d97..e1ca63f 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3349,11 +3349,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > struct drm_device *dev = obj->base.dev; > struct drm_i915_private *dev_priv = dev->dev_private; > u32 fence_alignment, unfenced_alignment; > + u32 search_flag, alloc_flag; > + u64 start, end; > u64 size, fence_size; > - u64 start = > - flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; > - u64 end = > - flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; > struct i915_vma *vma; > int ret; > > @@ -3393,6 +3391,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; > } > > + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; > + end = vm->total; > + if (flags & PIN_MAPPABLE) > + end = min_t(u64, end, dev_priv->gtt.mappable_end); > + if (flags & PIN_ZONE_4G) > + end = min_t(u64, end, (1ULL << 32)); > + > if (alignment == 0) > alignment = flags & PIN_MAPPABLE ? fence_alignment : > unfenced_alignment; > @@ -3428,13 +3433,21 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > if (IS_ERR(vma)) > goto err_unpin; > > + if (flags & PIN_HIGH) { > + search_flag = DRM_MM_SEARCH_BELOW; > + alloc_flag = DRM_MM_CREATE_TOP; > + } else { > + search_flag = DRM_MM_SEARCH_DEFAULT; > + alloc_flag = DRM_MM_CREATE_DEFAULT; > + } > + > search_free: > ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, > size, alignment, > obj->cache_level, > start, end, > - DRM_MM_SEARCH_DEFAULT, > - DRM_MM_CREATE_DEFAULT); > + search_flag, > + alloc_flag); > if (ret) { > ret = i915_gem_evict_something(dev, vm, size, alignment, > obj->cache_level, > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 923a3c4..78fc881 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -589,11 +589,20 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > if (entry->flags & EXEC_OBJECT_NEEDS_GTT) > flags |= PIN_GLOBAL; > > + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, > + * limit address to the first 4GBs for unflagged objects. > + */ > + flags |= PIN_ZONE_4G; > + if (entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) > + flags &= ~PIN_ZONE_4G; > + > if (!drm_mm_node_allocated(&vma->node)) { > if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) > flags |= PIN_GLOBAL | PIN_MAPPABLE; > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) > flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; > + if ((flags & PIN_MAPPABLE) == 0) > + flags |= PIN_HIGH; > } > > ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); > @@ -671,6 +680,10 @@ eb_vma_misplaced(struct i915_vma *vma) > if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) > return !only_mappable_for_reloc(entry->flags); > > + if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && > + (vma->node.start + vma->node.size - 1) >> 32) > + return true; > + > return false; > } > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index dbd16a2..08e047c 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -690,7 +690,8 @@ struct drm_i915_gem_exec_object2 { > #define EXEC_OBJECT_NEEDS_FENCE (1<<0) > #define EXEC_OBJECT_NEEDS_GTT (1<<1) > #define EXEC_OBJECT_WRITE (1<<2) > -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) > +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) > __u64 flags; > > __u64 rsvd1; >
On Wed, Jul 29, 2015 at 05:24:01PM +0100, Michel Thierry wrote: > There are some allocations that must be only referenced by 32-bit > offsets. To limit the chances of having the first 4GB already full, > objects not requiring this workaround use DRM_MM_SEARCH_BELOW/ > DRM_MM_CREATE_TOP flags > > In specific, any resource used with flat/heapless (0x00000000-0xfffff000) > General State Heap (GSH) or Instruction State Heap (ISH) must be in a > 32-bit range, because the General State Offset and Instruction State > Offset are limited to 32-bits. > > Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if > they can be allocated above the 32-bit address range. To limit the > chances of having the first 4GB already full, objects will use > DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible. > > v2: Changed flag logic from neeeds_32b, to supports_48b. > v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel) > v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK > to use last PIN_ defined instead of hard-coded value; use correct limit > check in eb_vma_misplaced. (Chris) > v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris) > v6: Apply pin-high for ggtt too (Chris) > v7: Handle simultaneous pin-high and pin-mappable end correctly (Akash) > Fix check for entries currently using +4GB addresses, use min_t and > other polish in object_bind_to_vm (Chris) > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Akash Goel <akash.goel@intel.com> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4) > Signed-off-by: Michel Thierry <michel.thierry@intel.com> For the record, where can I find the mesa patches for this? I think for simple things like this a References: line point to the relevant UMD patches in mailing-list archives would be great. -Daniel > --- > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > drivers/gpu/drm/i915/i915_gem.c | 25 +++++++++++++++++++------ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +++++++++++++ > include/uapi/drm/i915_drm.h | 3 ++- > 4 files changed, 36 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index ed2fbcd..c344805 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2775,6 +2775,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma); > #define PIN_OFFSET_BIAS (1<<3) > #define PIN_USER (1<<4) > #define PIN_UPDATE (1<<5) > +#define PIN_ZONE_4G (1<<6) > +#define PIN_HIGH (1<<7) > #define PIN_OFFSET_MASK (~4095) > int __must_check > i915_gem_object_pin(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 80f5d97..e1ca63f 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3349,11 +3349,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > struct drm_device *dev = obj->base.dev; > struct drm_i915_private *dev_priv = dev->dev_private; > u32 fence_alignment, unfenced_alignment; > + u32 search_flag, alloc_flag; > + u64 start, end; > u64 size, fence_size; > - u64 start = > - flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; > - u64 end = > - flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; > struct i915_vma *vma; > int ret; > > @@ -3393,6 +3391,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; > } > > + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; > + end = vm->total; > + if (flags & PIN_MAPPABLE) > + end = min_t(u64, end, dev_priv->gtt.mappable_end); > + if (flags & PIN_ZONE_4G) > + end = min_t(u64, end, (1ULL << 32)); > + > if (alignment == 0) > alignment = flags & PIN_MAPPABLE ? fence_alignment : > unfenced_alignment; > @@ -3428,13 +3433,21 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > if (IS_ERR(vma)) > goto err_unpin; > > + if (flags & PIN_HIGH) { > + search_flag = DRM_MM_SEARCH_BELOW; > + alloc_flag = DRM_MM_CREATE_TOP; > + } else { > + search_flag = DRM_MM_SEARCH_DEFAULT; > + alloc_flag = DRM_MM_CREATE_DEFAULT; > + } > + > search_free: > ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, > size, alignment, > obj->cache_level, > start, end, > - DRM_MM_SEARCH_DEFAULT, > - DRM_MM_CREATE_DEFAULT); > + search_flag, > + alloc_flag); > if (ret) { > ret = i915_gem_evict_something(dev, vm, size, alignment, > obj->cache_level, > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 923a3c4..78fc881 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -589,11 +589,20 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > if (entry->flags & EXEC_OBJECT_NEEDS_GTT) > flags |= PIN_GLOBAL; > > + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, > + * limit address to the first 4GBs for unflagged objects. > + */ > + flags |= PIN_ZONE_4G; > + if (entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) > + flags &= ~PIN_ZONE_4G; > + > if (!drm_mm_node_allocated(&vma->node)) { > if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) > flags |= PIN_GLOBAL | PIN_MAPPABLE; > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) > flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; > + if ((flags & PIN_MAPPABLE) == 0) > + flags |= PIN_HIGH; > } > > ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); > @@ -671,6 +680,10 @@ eb_vma_misplaced(struct i915_vma *vma) > if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) > return !only_mappable_for_reloc(entry->flags); > > + if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && > + (vma->node.start + vma->node.size - 1) >> 32) > + return true; > + > return false; > } > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index dbd16a2..08e047c 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -690,7 +690,8 @@ struct drm_i915_gem_exec_object2 { > #define EXEC_OBJECT_NEEDS_FENCE (1<<0) > #define EXEC_OBJECT_NEEDS_GTT (1<<1) > #define EXEC_OBJECT_WRITE (1<<2) > -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) > +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) > __u64 flags; > > __u64 rsvd1; > -- > 2.4.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On 8/5/2015 4:58 PM, Daniel Vetter wrote: > On Wed, Jul 29, 2015 at 05:24:01PM +0100, Michel Thierry wrote: >> There are some allocations that must be only referenced by 32-bit >> offsets. To limit the chances of having the first 4GB already full, >> objects not requiring this workaround use DRM_MM_SEARCH_BELOW/ >> DRM_MM_CREATE_TOP flags >> >> In specific, any resource used with flat/heapless (0x00000000-0xfffff000) >> General State Heap (GSH) or Instruction State Heap (ISH) must be in a >> 32-bit range, because the General State Offset and Instruction State >> Offset are limited to 32-bits. >> >> Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if >> they can be allocated above the 32-bit address range. To limit the >> chances of having the first 4GB already full, objects will use >> DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible. >> >> v2: Changed flag logic from neeeds_32b, to supports_48b. >> v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel) >> v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK >> to use last PIN_ defined instead of hard-coded value; use correct limit >> check in eb_vma_misplaced. (Chris) >> v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris) >> v6: Apply pin-high for ggtt too (Chris) >> v7: Handle simultaneous pin-high and pin-mappable end correctly (Akash) >> Fix check for entries currently using +4GB addresses, use min_t and >> other polish in object_bind_to_vm (Chris) >> >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> Cc: Akash Goel <akash.goel@intel.com> >> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4) >> Signed-off-by: Michel Thierry <michel.thierry@intel.com> > > For the record, where can I find the mesa patches for this? I think for > simple things like this a References: line point to the relevant UMD > patches in mailing-list archives would be great. > -Daniel > Here they are, References: http://lists.freedesktop.org/archives/dri-devel/2015-July/085501.html and http://lists.freedesktop.org/archives/mesa-dev/2015-July/088003.html The name for the macro will be OUT_RELOC64_INSIDE_4G, as suggested by Chris.
On Wed, Aug 05, 2015 at 05:14:33PM +0100, Michel Thierry wrote: > On 8/5/2015 4:58 PM, Daniel Vetter wrote: > >On Wed, Jul 29, 2015 at 05:24:01PM +0100, Michel Thierry wrote: > >>There are some allocations that must be only referenced by 32-bit > >>offsets. To limit the chances of having the first 4GB already full, > >>objects not requiring this workaround use DRM_MM_SEARCH_BELOW/ > >>DRM_MM_CREATE_TOP flags > >> > >>In specific, any resource used with flat/heapless (0x00000000-0xfffff000) > >>General State Heap (GSH) or Instruction State Heap (ISH) must be in a > >>32-bit range, because the General State Offset and Instruction State > >>Offset are limited to 32-bits. > >> > >>Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if > >>they can be allocated above the 32-bit address range. To limit the > >>chances of having the first 4GB already full, objects will use > >>DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible. > >> > >>v2: Changed flag logic from neeeds_32b, to supports_48b. > >>v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel) > >>v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK > >>to use last PIN_ defined instead of hard-coded value; use correct limit > >>check in eb_vma_misplaced. (Chris) > >>v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris) > >>v6: Apply pin-high for ggtt too (Chris) > >>v7: Handle simultaneous pin-high and pin-mappable end correctly (Akash) > >> Fix check for entries currently using +4GB addresses, use min_t and > >> other polish in object_bind_to_vm (Chris) > >> > >>Cc: Chris Wilson <chris@chris-wilson.co.uk> > >>Cc: Akash Goel <akash.goel@intel.com> > >>Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4) > >>Signed-off-by: Michel Thierry <michel.thierry@intel.com> > > > >For the record, where can I find the mesa patches for this? I think for > >simple things like this a References: line point to the relevant UMD > >patches in mailing-list archives would be great. > >-Daniel > > > > Here they are, > > References: > http://lists.freedesktop.org/archives/dri-devel/2015-July/085501.html and > http://lists.freedesktop.org/archives/mesa-dev/2015-July/088003.html Sounds like there's still another revision we need to do on those? -Daniel > > The name for the macro will be OUT_RELOC64_INSIDE_4G, as suggested by Chris. >
On 8/6/2015 1:47 PM, Daniel Vetter wrote: > On Wed, Aug 05, 2015 at 05:14:33PM +0100, Michel Thierry wrote: >> On 8/5/2015 4:58 PM, Daniel Vetter wrote: >>> On Wed, Jul 29, 2015 at 05:24:01PM +0100, Michel Thierry wrote: >>>> There are some allocations that must be only referenced by 32-bit >>>> offsets. To limit the chances of having the first 4GB already full, >>>> objects not requiring this workaround use DRM_MM_SEARCH_BELOW/ >>>> DRM_MM_CREATE_TOP flags >>>> >>>> In specific, any resource used with flat/heapless (0x00000000-0xfffff000) >>>> General State Heap (GSH) or Instruction State Heap (ISH) must be in a >>>> 32-bit range, because the General State Offset and Instruction State >>>> Offset are limited to 32-bits. >>>> >>>> Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if >>>> they can be allocated above the 32-bit address range. To limit the >>>> chances of having the first 4GB already full, objects will use >>>> DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible. >>>> >>>> v2: Changed flag logic from neeeds_32b, to supports_48b. >>>> v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel) >>>> v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK >>>> to use last PIN_ defined instead of hard-coded value; use correct limit >>>> check in eb_vma_misplaced. (Chris) >>>> v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris) >>>> v6: Apply pin-high for ggtt too (Chris) >>>> v7: Handle simultaneous pin-high and pin-mappable end correctly (Akash) >>>> Fix check for entries currently using +4GB addresses, use min_t and >>>> other polish in object_bind_to_vm (Chris) >>>> >>>> Cc: Chris Wilson <chris@chris-wilson.co.uk> >>>> Cc: Akash Goel <akash.goel@intel.com> >>>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4) >>>> Signed-off-by: Michel Thierry <michel.thierry@intel.com> >>> >>> For the record, where can I find the mesa patches for this? I think for >>> simple things like this a References: line point to the relevant UMD >>> patches in mailing-list archives would be great. >>> -Daniel >>> >> >> Here they are, >> >> References: >> http://lists.freedesktop.org/archives/dri-devel/2015-July/085501.html and >> http://lists.freedesktop.org/archives/mesa-dev/2015-July/088003.html > > Sounds like there's still another revision we need to do on those? Yes, a couple of changes, set/clear functions internal in libdrm and update the symbol-check test. I put it on hold, because I was also asked to not include the libdrm changes until the updated kernel header (EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag) was merged. Then I also need to create a libdrm release, and update mesa's dependency to this new version number. -Michel
On Thu, Aug 06, 2015 at 05:27:38PM +0100, Michel Thierry wrote: > On 8/6/2015 1:47 PM, Daniel Vetter wrote: > >On Wed, Aug 05, 2015 at 05:14:33PM +0100, Michel Thierry wrote: > >>On 8/5/2015 4:58 PM, Daniel Vetter wrote: > >>>On Wed, Jul 29, 2015 at 05:24:01PM +0100, Michel Thierry wrote: > >>>>There are some allocations that must be only referenced by 32-bit > >>>>offsets. To limit the chances of having the first 4GB already full, > >>>>objects not requiring this workaround use DRM_MM_SEARCH_BELOW/ > >>>>DRM_MM_CREATE_TOP flags > >>>> > >>>>In specific, any resource used with flat/heapless (0x00000000-0xfffff000) > >>>>General State Heap (GSH) or Instruction State Heap (ISH) must be in a > >>>>32-bit range, because the General State Offset and Instruction State > >>>>Offset are limited to 32-bits. > >>>> > >>>>Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if > >>>>they can be allocated above the 32-bit address range. To limit the > >>>>chances of having the first 4GB already full, objects will use > >>>>DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible. > >>>> > >>>>v2: Changed flag logic from neeeds_32b, to supports_48b. > >>>>v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel) > >>>>v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK > >>>>to use last PIN_ defined instead of hard-coded value; use correct limit > >>>>check in eb_vma_misplaced. (Chris) > >>>>v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris) > >>>>v6: Apply pin-high for ggtt too (Chris) > >>>>v7: Handle simultaneous pin-high and pin-mappable end correctly (Akash) > >>>> Fix check for entries currently using +4GB addresses, use min_t and > >>>> other polish in object_bind_to_vm (Chris) > >>>> > >>>>Cc: Chris Wilson <chris@chris-wilson.co.uk> > >>>>Cc: Akash Goel <akash.goel@intel.com> > >>>>Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4) > >>>>Signed-off-by: Michel Thierry <michel.thierry@intel.com> > >>> > >>>For the record, where can I find the mesa patches for this? I think for > >>>simple things like this a References: line point to the relevant UMD > >>>patches in mailing-list archives would be great. > >>>-Daniel > >>> > >> > >>Here they are, > >> > >>References: > >>http://lists.freedesktop.org/archives/dri-devel/2015-July/085501.html and > >>http://lists.freedesktop.org/archives/mesa-dev/2015-July/088003.html > > > >Sounds like there's still another revision we need to do on those? > > Yes, a couple of changes, set/clear functions internal in libdrm and update > the symbol-check test. > > I put it on hold, because I was also asked to not include the libdrm changes > until the updated kernel header (EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag) was > merged. > > Then I also need to create a libdrm release, and update mesa's dependency to > this new version number. Nope, we need everything before I can pull in the kernel patch. Once that happens then you can do the release/depency dance (of course don't include those bits in your proposed patches yet). -Daniel
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ed2fbcd..c344805 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2775,6 +2775,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma); #define PIN_OFFSET_BIAS (1<<3) #define PIN_USER (1<<4) #define PIN_UPDATE (1<<5) +#define PIN_ZONE_4G (1<<6) +#define PIN_HIGH (1<<7) #define PIN_OFFSET_MASK (~4095) int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 80f5d97..e1ca63f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3349,11 +3349,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 fence_alignment, unfenced_alignment; + u32 search_flag, alloc_flag; + u64 start, end; u64 size, fence_size; - u64 start = - flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; - u64 end = - flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; struct i915_vma *vma; int ret; @@ -3393,6 +3391,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; } + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; + end = vm->total; + if (flags & PIN_MAPPABLE) + end = min_t(u64, end, dev_priv->gtt.mappable_end); + if (flags & PIN_ZONE_4G) + end = min_t(u64, end, (1ULL << 32)); + if (alignment == 0) alignment = flags & PIN_MAPPABLE ? fence_alignment : unfenced_alignment; @@ -3428,13 +3433,21 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) goto err_unpin; + if (flags & PIN_HIGH) { + search_flag = DRM_MM_SEARCH_BELOW; + alloc_flag = DRM_MM_CREATE_TOP; + } else { + search_flag = DRM_MM_SEARCH_DEFAULT; + alloc_flag = DRM_MM_CREATE_DEFAULT; + } + search_free: ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, size, alignment, obj->cache_level, start, end, - DRM_MM_SEARCH_DEFAULT, - DRM_MM_CREATE_DEFAULT); + search_flag, + alloc_flag); if (ret) { ret = i915_gem_evict_something(dev, vm, size, alignment, obj->cache_level, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 923a3c4..78fc881 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -589,11 +589,20 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, if (entry->flags & EXEC_OBJECT_NEEDS_GTT) flags |= PIN_GLOBAL; + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, + * limit address to the first 4GBs for unflagged objects. + */ + flags |= PIN_ZONE_4G; + if (entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) + flags &= ~PIN_ZONE_4G; + if (!drm_mm_node_allocated(&vma->node)) { if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) flags |= PIN_GLOBAL | PIN_MAPPABLE; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + if ((flags & PIN_MAPPABLE) == 0) + flags |= PIN_HIGH; } ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); @@ -671,6 +680,10 @@ eb_vma_misplaced(struct i915_vma *vma) if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) return !only_mappable_for_reloc(entry->flags); + if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && + (vma->node.start + vma->node.size - 1) >> 32) + return true; + return false; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index dbd16a2..08e047c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -690,7 +690,8 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_NEEDS_FENCE (1<<0) #define EXEC_OBJECT_NEEDS_GTT (1<<1) #define EXEC_OBJECT_WRITE (1<<2) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) __u64 flags; __u64 rsvd1;