Message ID | 1449575707-20933-1-git-send-email-thomas.daniel@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 12/8/2015 11:55 AM, Thomas Daniel wrote: > From: Chris Wilson <chris@chris-wilson.co.uk> > > Userspace can pass in an offset that it presumes the object is located > at. The kernel will then do its utmost to fit the object into that > location. The assumption is that userspace is handling its own object > locations (for example along with full-ppgtt) and that the kernel will > rarely have to make space for the user's requests. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris > Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. > (Not published externally) > > v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction > to allow eviction of soft-pinned objects when another soft-pinned object used > by a subsequent execbuffer overlaps reported by Michal Winiarski. > (Not published externally) > > v4: Moved soft-pinned objects to the front of ordered_vmas so that they are > pinned first after an address conflict happens to avoid repeated conflicts in > rare cases (Suggested by Chris Wilson). Expanded comment on > drm_i915_gem_exec_object2.offset to cover this new API. > > v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability > (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure > buffers are not considered misplaced without the user specifying > EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility for any > addressing workarounds. Updated object2.offset field comment again to clarify > NO_RELOC case (Chris). checkpatch cleanup. > > v6: Trivial rebase on latest drm-intel-nightly > > v7: Catch attempts to pin above the max virtual address size and return > EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and > EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin > something at an offset above 4GB (Chris, Daniel Vetter). > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Akash Goel <akash.goel@intel.com> > Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> > Cc: Michal Winiarski <michal.winiarski@intel.com> > Cc: Zou Nanhai <nanhai.zou@intel.com> > Cc: Kristian Høgsberg <hoegsberg@gmail.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> > --- > drivers/gpu/drm/i915/i915_dma.c | 3 ++ > drivers/gpu/drm/i915/i915_drv.h | 2 + > drivers/gpu/drm/i915/i915_gem.c | 64 ++++++++++++++++++++---------- > drivers/gpu/drm/i915/i915_gem_evict.c | 39 ++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++- > include/uapi/drm/i915_drm.h | 12 ++++-- > 6 files changed, 111 insertions(+), 25 deletions(-) > Extra support from the other patch aside, v6 already had rb from Akash and this one, Reviewed-by: Michel Thierry <michel.thierry@intel.com> > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index a81c766..52b8289 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -169,6 +169,9 @@ static int i915_getparam(struct drm_device *dev, void *data, > case I915_PARAM_HAS_RESOURCE_STREAMER: > value = HAS_RESOURCE_STREAMER(dev); > break; > + case I915_PARAM_HAS_EXEC_SOFTPIN: > + value = 1; > + break; > default: > DRM_DEBUG("Unknown parameter %d\n", param->param); > return -EINVAL; > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index f1a8a53..315d79d 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2860,6 +2860,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma); > #define PIN_UPDATE (1<<5) > #define PIN_ZONE_4G (1<<6) > #define PIN_HIGH (1<<7) > +#define PIN_OFFSET_FIXED (1<<8) > #define PIN_OFFSET_MASK (~4095) > int __must_check > i915_gem_object_pin(struct drm_i915_gem_object *obj, > @@ -3205,6 +3206,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev, > unsigned long start, > unsigned long end, > unsigned flags); > +int __must_check i915_gem_evict_for_vma(struct i915_vma *target); > int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); > > /* belongs in i915_gem_gtt.h */ > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index dfaf25b..801f3d0 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3476,30 +3476,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > if (IS_ERR(vma)) > goto err_unpin; > > - if (flags & PIN_HIGH) { > - search_flag = DRM_MM_SEARCH_BELOW; > - alloc_flag = DRM_MM_CREATE_TOP; > + if (flags & PIN_OFFSET_FIXED) { > + uint64_t offset = flags & PIN_OFFSET_MASK; > + > + if (offset & (alignment - 1) || offset + size > end) { > + ret = -EINVAL; > + goto err_free_vma; > + } > + vma->node.start = offset; > + vma->node.size = size; > + vma->node.color = obj->cache_level; > + ret = drm_mm_reserve_node(&vm->mm, &vma->node); > + if (ret) { > + ret = i915_gem_evict_for_vma(vma); > + if (ret == 0) > + ret = drm_mm_reserve_node(&vm->mm, &vma->node); > + } > + if (ret) > + goto err_free_vma; > } else { > - search_flag = DRM_MM_SEARCH_DEFAULT; > - alloc_flag = DRM_MM_CREATE_DEFAULT; > - } > + if (flags & PIN_HIGH) { > + search_flag = DRM_MM_SEARCH_BELOW; > + alloc_flag = DRM_MM_CREATE_TOP; > + } else { > + search_flag = DRM_MM_SEARCH_DEFAULT; > + alloc_flag = DRM_MM_CREATE_DEFAULT; > + } > > search_free: > - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, > - size, alignment, > - obj->cache_level, > - start, end, > - search_flag, > - alloc_flag); > - if (ret) { > - ret = i915_gem_evict_something(dev, vm, size, alignment, > - obj->cache_level, > - start, end, > - flags); > - if (ret == 0) > - goto search_free; > + ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, > + size, alignment, > + obj->cache_level, > + start, end, > + search_flag, > + alloc_flag); > + if (ret) { > + ret = i915_gem_evict_something(dev, vm, size, alignment, > + obj->cache_level, > + start, end, > + flags); > + if (ret == 0) > + goto search_free; > > - goto err_free_vma; > + goto err_free_vma; > + } > } > if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { > ret = -EINVAL; > @@ -4090,6 +4110,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) > vma->node.start < (flags & PIN_OFFSET_MASK)) > return true; > > + if (flags & PIN_OFFSET_FIXED && > + vma->node.start != (flags & PIN_OFFSET_MASK)) > + return true; > + > return false; > } > > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c > index d71a133..07c6e4d 100644 > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > @@ -199,6 +199,45 @@ found: > return ret; > } > > +int > +i915_gem_evict_for_vma(struct i915_vma *target) > +{ > + struct drm_mm_node *node, *next; > + > + list_for_each_entry_safe(node, next, > + &target->vm->mm.head_node.node_list, > + node_list) { > + struct i915_vma *vma; > + int ret; > + > + if (node->start + node->size <= target->node.start) > + continue; > + if (node->start >= target->node.start + target->node.size) > + break; > + > + vma = container_of(node, typeof(*vma), node); > + > + if (vma->pin_count) { > + if (!vma->exec_entry || (vma->pin_count > 1)) > + /* Object is pinned for some other use */ > + return -EBUSY; > + > + /* We need to evict a buffer in the same batch */ > + if (vma->exec_entry->flags & EXEC_OBJECT_PINNED) > + /* Overlapping fixed objects in the same batch */ > + return -EINVAL; > + > + return -ENOSPC; > + } > + > + ret = i915_vma_unbind(vma); > + if (ret) > + return ret; > + } > + > + return 0; > +} > + > /** > * i915_gem_evict_vm - Evict all idle vmas from a vm > * @vm: Address space to cleanse > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index a4c243c..48ec484 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -599,6 +599,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > flags |= PIN_GLOBAL | PIN_MAPPABLE; > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) > flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; > + if (entry->flags & EXEC_OBJECT_PINNED) > + flags |= entry->offset | PIN_OFFSET_FIXED; > if ((flags & PIN_MAPPABLE) == 0) > flags |= PIN_HIGH; > } > @@ -670,6 +672,10 @@ eb_vma_misplaced(struct i915_vma *vma) > vma->node.start & (entry->alignment - 1)) > return true; > > + if (entry->flags & EXEC_OBJECT_PINNED && > + vma->node.start != entry->offset) > + return true; > + > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && > vma->node.start < BATCH_OFFSET_BIAS) > return true; > @@ -695,6 +701,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > struct i915_vma *vma; > struct i915_address_space *vm; > struct list_head ordered_vmas; > + struct list_head pinned_vmas; > bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; > int retry; > > @@ -703,6 +710,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; > > INIT_LIST_HEAD(&ordered_vmas); > + INIT_LIST_HEAD(&pinned_vmas); > while (!list_empty(vmas)) { > struct drm_i915_gem_exec_object2 *entry; > bool need_fence, need_mappable; > @@ -721,7 +729,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > obj->tiling_mode != I915_TILING_NONE; > need_mappable = need_fence || need_reloc_mappable(vma); > > - if (need_mappable) { > + if (entry->flags & EXEC_OBJECT_PINNED) > + list_move_tail(&vma->exec_list, &pinned_vmas); > + else if (need_mappable) { > entry->flags |= __EXEC_OBJECT_NEEDS_MAP; > list_move(&vma->exec_list, &ordered_vmas); > } else > @@ -731,6 +741,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > obj->base.pending_write_domain = 0; > } > list_splice(&ordered_vmas, vmas); > + list_splice(&pinned_vmas, vmas); > > /* Attempt to pin all of the buffers into the GTT. > * This is done in 3 phases: > @@ -1317,7 +1328,8 @@ eb_get_batch(struct eb_vmas *eb) > * Note that actual hangs have only been observed on gen7, but for > * paranoia do it everywhere. > */ > - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; > + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) > + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; > > return vma->obj; > } > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 67ef73a..d727b49 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait { > #define I915_PARAM_EU_TOTAL 34 > #define I915_PARAM_HAS_GPU_RESET 35 > #define I915_PARAM_HAS_RESOURCE_STREAMER 36 > +#define I915_PARAM_HAS_EXEC_SOFTPIN 37 > > typedef struct drm_i915_getparam { > __s32 param; > @@ -682,8 +683,12 @@ struct drm_i915_gem_exec_object2 { > __u64 alignment; > > /** > - * Returned value of the updated offset of the object, for future > - * presumed_offset writes. > + * When the EXEC_OBJECT_PINNED flag is specified this is populated by > + * the user with the GTT offset at which this object will be pinned. > + * When the I915_EXEC_NO_RELOC flag is specified this must contain the > + * presumed_offset of the object. > + * During execbuffer2 the kernel populates it with the value of the > + * current GTT offset of the object, for future presumed_offset writes. > */ > __u64 offset; > > @@ -691,7 +696,8 @@ struct drm_i915_gem_exec_object2 { > #define EXEC_OBJECT_NEEDS_GTT (1<<1) > #define EXEC_OBJECT_WRITE (1<<2) > #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) > -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) > +#define EXEC_OBJECT_PINNED (1<<4) > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) > __u64 flags; > > __u64 rsvd1; > -- > 1.9.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx >
On 08/12/15 18:49, Michel Thierry wrote: > On 12/8/2015 11:55 AM, Thomas Daniel wrote: >> From: Chris Wilson <chris@chris-wilson.co.uk> >> >> Userspace can pass in an offset that it presumes the object is located >> at. The kernel will then do its utmost to fit the object into that >> location. The assumption is that userspace is handling its own object >> locations (for example along with full-ppgtt) and that the kernel will >> rarely have to make space for the user's requests. >> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >> >> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested >> by Chris >> Wilson. Fixed incorrect error paths causing crash found by Michal >> Winiarski. >> (Not published externally) >> >> v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed >> eviction >> to allow eviction of soft-pinned objects when another soft-pinned >> object used >> by a subsequent execbuffer overlaps reported by Michal Winiarski. >> (Not published externally) >> >> v4: Moved soft-pinned objects to the front of ordered_vmas so that >> they are >> pinned first after an address conflict happens to avoid repeated >> conflicts in >> rare cases (Suggested by Chris Wilson). Expanded comment on >> drm_i915_gem_exec_object2.offset to cover this new API. >> >> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this >> capability >> (Kristian). Added check for multiple pinnings on eviction (Akash). >> Made sure >> buffers are not considered misplaced without the user specifying >> EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility for >> any >> addressing workarounds. Updated object2.offset field comment again to >> clarify >> NO_RELOC case (Chris). checkpatch cleanup. >> >> v6: Trivial rebase on latest drm-intel-nightly >> >> v7: Catch attempts to pin above the max virtual address size and return >> EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and >> EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin >> something at an offset above 4GB (Chris, Daniel Vetter). >> >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> Cc: Akash Goel <akash.goel@intel.com> >> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> >> Cc: Michal Winiarski <michal.winiarski@intel.com> >> Cc: Zou Nanhai <nanhai.zou@intel.com> >> Cc: Kristian Høgsberg <hoegsberg@gmail.com> >> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> >> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> >> --- >> drivers/gpu/drm/i915/i915_dma.c | 3 ++ >> drivers/gpu/drm/i915/i915_drv.h | 2 + >> drivers/gpu/drm/i915/i915_gem.c | 64 >> ++++++++++++++++++++---------- >> drivers/gpu/drm/i915/i915_gem_evict.c | 39 ++++++++++++++++++ >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++- >> include/uapi/drm/i915_drm.h | 12 ++++-- >> 6 files changed, 111 insertions(+), 25 deletions(-) >> > > Extra support from the other patch aside, v6 already had rb from Akash > and this one, > Reviewed-by: Michel Thierry <michel.thierry@intel.com> This patch was acked by the PDT so I merged it to drm-intel-next-queued. Regards, Tvrtko
On Wed, Dec 09, 2015 at 10:30:41AM +0000, Tvrtko Ursulin wrote: > > On 08/12/15 18:49, Michel Thierry wrote: > >On 12/8/2015 11:55 AM, Thomas Daniel wrote: > >>From: Chris Wilson <chris@chris-wilson.co.uk> > >> > >>Userspace can pass in an offset that it presumes the object is located > >>at. The kernel will then do its utmost to fit the object into that > >>location. The assumption is that userspace is handling its own object > >>locations (for example along with full-ppgtt) and that the kernel will > >>rarely have to make space for the user's requests. > >> > >>Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > >> > >>v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested > >>by Chris > >>Wilson. Fixed incorrect error paths causing crash found by Michal > >>Winiarski. > >>(Not published externally) > >> > >>v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed > >>eviction > >>to allow eviction of soft-pinned objects when another soft-pinned > >>object used > >>by a subsequent execbuffer overlaps reported by Michal Winiarski. > >>(Not published externally) > >> > >>v4: Moved soft-pinned objects to the front of ordered_vmas so that > >>they are > >>pinned first after an address conflict happens to avoid repeated > >>conflicts in > >>rare cases (Suggested by Chris Wilson). Expanded comment on > >>drm_i915_gem_exec_object2.offset to cover this new API. > >> > >>v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this > >>capability > >>(Kristian). Added check for multiple pinnings on eviction (Akash). > >>Made sure > >>buffers are not considered misplaced without the user specifying > >>EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility for > >>any > >>addressing workarounds. Updated object2.offset field comment again to > >>clarify > >>NO_RELOC case (Chris). checkpatch cleanup. > >> > >>v6: Trivial rebase on latest drm-intel-nightly > >> > >>v7: Catch attempts to pin above the max virtual address size and return > >>EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and > >>EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin > >>something at an offset above 4GB (Chris, Daniel Vetter). > >> > >>Cc: Chris Wilson <chris@chris-wilson.co.uk> > >>Cc: Akash Goel <akash.goel@intel.com> > >>Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> > >>Cc: Michal Winiarski <michal.winiarski@intel.com> > >>Cc: Zou Nanhai <nanhai.zou@intel.com> > >>Cc: Kristian Høgsberg <hoegsberg@gmail.com> > >>Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > >>Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> > >>--- > >> drivers/gpu/drm/i915/i915_dma.c | 3 ++ > >> drivers/gpu/drm/i915/i915_drv.h | 2 + > >> drivers/gpu/drm/i915/i915_gem.c | 64 > >>++++++++++++++++++++---------- > >> drivers/gpu/drm/i915/i915_gem_evict.c | 39 ++++++++++++++++++ > >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++- > >> include/uapi/drm/i915_drm.h | 12 ++++-- > >> 6 files changed, 111 insertions(+), 25 deletions(-) > >> > > > >Extra support from the other patch aside, v6 already had rb from Akash > >and this one, > >Reviewed-by: Michel Thierry <michel.thierry@intel.com> > > This patch was acked by the PDT so I merged it to drm-intel-next-queued. Please revert immediately. We need to fix the ABI for canonical addressing before proceeding. Then please work on the better patch. -Chris
On 09/12/15 10:51, Chris Wilson wrote: > On Wed, Dec 09, 2015 at 10:30:41AM +0000, Tvrtko Ursulin wrote: >> >> On 08/12/15 18:49, Michel Thierry wrote: >>> On 12/8/2015 11:55 AM, Thomas Daniel wrote: >>>> From: Chris Wilson <chris@chris-wilson.co.uk> >>>> >>>> Userspace can pass in an offset that it presumes the object is located >>>> at. The kernel will then do its utmost to fit the object into that >>>> location. The assumption is that userspace is handling its own object >>>> locations (for example along with full-ppgtt) and that the kernel will >>>> rarely have to make space for the user's requests. >>>> >>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>>> >>>> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested >>>> by Chris >>>> Wilson. Fixed incorrect error paths causing crash found by Michal >>>> Winiarski. >>>> (Not published externally) >>>> >>>> v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed >>>> eviction >>>> to allow eviction of soft-pinned objects when another soft-pinned >>>> object used >>>> by a subsequent execbuffer overlaps reported by Michal Winiarski. >>>> (Not published externally) >>>> >>>> v4: Moved soft-pinned objects to the front of ordered_vmas so that >>>> they are >>>> pinned first after an address conflict happens to avoid repeated >>>> conflicts in >>>> rare cases (Suggested by Chris Wilson). Expanded comment on >>>> drm_i915_gem_exec_object2.offset to cover this new API. >>>> >>>> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this >>>> capability >>>> (Kristian). Added check for multiple pinnings on eviction (Akash). >>>> Made sure >>>> buffers are not considered misplaced without the user specifying >>>> EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility for >>>> any >>>> addressing workarounds. Updated object2.offset field comment again to >>>> clarify >>>> NO_RELOC case (Chris). checkpatch cleanup. >>>> >>>> v6: Trivial rebase on latest drm-intel-nightly >>>> >>>> v7: Catch attempts to pin above the max virtual address size and return >>>> EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and >>>> EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin >>>> something at an offset above 4GB (Chris, Daniel Vetter). >>>> >>>> Cc: Chris Wilson <chris@chris-wilson.co.uk> >>>> Cc: Akash Goel <akash.goel@intel.com> >>>> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> >>>> Cc: Michal Winiarski <michal.winiarski@intel.com> >>>> Cc: Zou Nanhai <nanhai.zou@intel.com> >>>> Cc: Kristian Høgsberg <hoegsberg@gmail.com> >>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> >>>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> >>>> --- >>>> drivers/gpu/drm/i915/i915_dma.c | 3 ++ >>>> drivers/gpu/drm/i915/i915_drv.h | 2 + >>>> drivers/gpu/drm/i915/i915_gem.c | 64 >>>> ++++++++++++++++++++---------- >>>> drivers/gpu/drm/i915/i915_gem_evict.c | 39 ++++++++++++++++++ >>>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++- >>>> include/uapi/drm/i915_drm.h | 12 ++++-- >>>> 6 files changed, 111 insertions(+), 25 deletions(-) >>>> >>> >>> Extra support from the other patch aside, v6 already had rb from Akash >>> and this one, >>> Reviewed-by: Michel Thierry <michel.thierry@intel.com> >> >> This patch was acked by the PDT so I merged it to drm-intel-next-queued. > > Please revert immediately. We need to fix the ABI for canonical > addressing before proceeding. Then please work on the better patch. Sounds like this is a valid comment, guys please check the thread with subject "[PATCH v2] drm/i915: Avoid writing relocs with addresses in non-canonical form". amd64 ABI mandates rules on virtual addresess - https://en.wikipedia.org/wiki/X86-64#Canonical_form_addresses. Regards, Tvrtko
On 12/9/2015 12:34 PM, Tvrtko Ursulin wrote: > > On 09/12/15 10:51, Chris Wilson wrote: >> On Wed, Dec 09, 2015 at 10:30:41AM +0000, Tvrtko Ursulin wrote: >>> >>> On 08/12/15 18:49, Michel Thierry wrote: >>>> On 12/8/2015 11:55 AM, Thomas Daniel wrote: >>>>> From: Chris Wilson <chris@chris-wilson.co.uk> >>>>> >>>>> Userspace can pass in an offset that it presumes the object is located >>>>> at. The kernel will then do its utmost to fit the object into that >>>>> location. The assumption is that userspace is handling its own object >>>>> locations (for example along with full-ppgtt) and that the kernel will >>>>> rarely have to make space for the user's requests. >>>>> >>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>>>> >>>>> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested >>>>> by Chris >>>>> Wilson. Fixed incorrect error paths causing crash found by Michal >>>>> Winiarski. >>>>> (Not published externally) >>>>> >>>>> v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed >>>>> eviction >>>>> to allow eviction of soft-pinned objects when another soft-pinned >>>>> object used >>>>> by a subsequent execbuffer overlaps reported by Michal Winiarski. >>>>> (Not published externally) >>>>> >>>>> v4: Moved soft-pinned objects to the front of ordered_vmas so that >>>>> they are >>>>> pinned first after an address conflict happens to avoid repeated >>>>> conflicts in >>>>> rare cases (Suggested by Chris Wilson). Expanded comment on >>>>> drm_i915_gem_exec_object2.offset to cover this new API. >>>>> >>>>> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this >>>>> capability >>>>> (Kristian). Added check for multiple pinnings on eviction (Akash). >>>>> Made sure >>>>> buffers are not considered misplaced without the user specifying >>>>> EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility for >>>>> any >>>>> addressing workarounds. Updated object2.offset field comment again to >>>>> clarify >>>>> NO_RELOC case (Chris). checkpatch cleanup. >>>>> >>>>> v6: Trivial rebase on latest drm-intel-nightly >>>>> >>>>> v7: Catch attempts to pin above the max virtual address size and >>>>> return >>>>> EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and >>>>> EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt >>>>> to pin >>>>> something at an offset above 4GB (Chris, Daniel Vetter). >>>>> >>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk> >>>>> Cc: Akash Goel <akash.goel@intel.com> >>>>> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> >>>>> Cc: Michal Winiarski <michal.winiarski@intel.com> >>>>> Cc: Zou Nanhai <nanhai.zou@intel.com> >>>>> Cc: Kristian Høgsberg <hoegsberg@gmail.com> >>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> >>>>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> >>>>> --- >>>>> drivers/gpu/drm/i915/i915_dma.c | 3 ++ >>>>> drivers/gpu/drm/i915/i915_drv.h | 2 + >>>>> drivers/gpu/drm/i915/i915_gem.c | 64 >>>>> ++++++++++++++++++++---------- >>>>> drivers/gpu/drm/i915/i915_gem_evict.c | 39 ++++++++++++++++++ >>>>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++- >>>>> include/uapi/drm/i915_drm.h | 12 ++++-- >>>>> 6 files changed, 111 insertions(+), 25 deletions(-) >>>>> >>>> >>>> Extra support from the other patch aside, v6 already had rb from Akash >>>> and this one, >>>> Reviewed-by: Michel Thierry <michel.thierry@intel.com> >>> >>> This patch was acked by the PDT so I merged it to drm-intel-next-queued. >> >> Please revert immediately. We need to fix the ABI for canonical >> addressing before proceeding. Then please work on the better patch. > > Sounds like this is a valid comment, guys please check the thread with > subject "[PATCH v2] drm/i915: Avoid writing relocs with addresses in > non-canonical form". > > amd64 ABI mandates rules on virtual addresess - > https://en.wikipedia.org/wiki/X86-64#Canonical_form_addresses. > > Regards, > > Tvrtko And if the someone tries to use softpin with a virtual address in non-canonical form, reject with EINVAL?
On 09/12/15 13:33, Michel Thierry wrote: > On 12/9/2015 12:34 PM, Tvrtko Ursulin wrote: >> >> On 09/12/15 10:51, Chris Wilson wrote: >>> On Wed, Dec 09, 2015 at 10:30:41AM +0000, Tvrtko Ursulin wrote: >>>> >>>> On 08/12/15 18:49, Michel Thierry wrote: >>>>> On 12/8/2015 11:55 AM, Thomas Daniel wrote: >>>>>> From: Chris Wilson <chris@chris-wilson.co.uk> >>>>>> >>>>>> Userspace can pass in an offset that it presumes the object is >>>>>> located >>>>>> at. The kernel will then do its utmost to fit the object into that >>>>>> location. The assumption is that userspace is handling its own object >>>>>> locations (for example along with full-ppgtt) and that the kernel >>>>>> will >>>>>> rarely have to make space for the user's requests. >>>>>> >>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>>>>> >>>>>> v2: Fixed incorrect eviction found by Michal Winiarski - fix >>>>>> suggested >>>>>> by Chris >>>>>> Wilson. Fixed incorrect error paths causing crash found by Michal >>>>>> Winiarski. >>>>>> (Not published externally) >>>>>> >>>>>> v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed >>>>>> eviction >>>>>> to allow eviction of soft-pinned objects when another soft-pinned >>>>>> object used >>>>>> by a subsequent execbuffer overlaps reported by Michal Winiarski. >>>>>> (Not published externally) >>>>>> >>>>>> v4: Moved soft-pinned objects to the front of ordered_vmas so that >>>>>> they are >>>>>> pinned first after an address conflict happens to avoid repeated >>>>>> conflicts in >>>>>> rare cases (Suggested by Chris Wilson). Expanded comment on >>>>>> drm_i915_gem_exec_object2.offset to cover this new API. >>>>>> >>>>>> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this >>>>>> capability >>>>>> (Kristian). Added check for multiple pinnings on eviction (Akash). >>>>>> Made sure >>>>>> buffers are not considered misplaced without the user specifying >>>>>> EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility >>>>>> for >>>>>> any >>>>>> addressing workarounds. Updated object2.offset field comment >>>>>> again to >>>>>> clarify >>>>>> NO_RELOC case (Chris). checkpatch cleanup. >>>>>> >>>>>> v6: Trivial rebase on latest drm-intel-nightly >>>>>> >>>>>> v7: Catch attempts to pin above the max virtual address size and >>>>>> return >>>>>> EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and >>>>>> EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt >>>>>> to pin >>>>>> something at an offset above 4GB (Chris, Daniel Vetter). >>>>>> >>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk> >>>>>> Cc: Akash Goel <akash.goel@intel.com> >>>>>> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> >>>>>> Cc: Michal Winiarski <michal.winiarski@intel.com> >>>>>> Cc: Zou Nanhai <nanhai.zou@intel.com> >>>>>> Cc: Kristian Høgsberg <hoegsberg@gmail.com> >>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> >>>>>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> >>>>>> --- >>>>>> drivers/gpu/drm/i915/i915_dma.c | 3 ++ >>>>>> drivers/gpu/drm/i915/i915_drv.h | 2 + >>>>>> drivers/gpu/drm/i915/i915_gem.c | 64 >>>>>> ++++++++++++++++++++---------- >>>>>> drivers/gpu/drm/i915/i915_gem_evict.c | 39 ++++++++++++++++++ >>>>>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++- >>>>>> include/uapi/drm/i915_drm.h | 12 ++++-- >>>>>> 6 files changed, 111 insertions(+), 25 deletions(-) >>>>>> >>>>> >>>>> Extra support from the other patch aside, v6 already had rb from Akash >>>>> and this one, >>>>> Reviewed-by: Michel Thierry <michel.thierry@intel.com> >>>> >>>> This patch was acked by the PDT so I merged it to >>>> drm-intel-next-queued. >>> >>> Please revert immediately. We need to fix the ABI for canonical >>> addressing before proceeding. Then please work on the better patch. >> >> Sounds like this is a valid comment, guys please check the thread with >> subject "[PATCH v2] drm/i915: Avoid writing relocs with addresses in >> non-canonical form". >> >> amd64 ABI mandates rules on virtual addresess - >> https://en.wikipedia.org/wiki/X86-64#Canonical_form_addresses. >> >> Regards, >> >> Tvrtko > > And if the someone tries to use softpin with a virtual address in > non-canonical form, reject with EINVAL? I think so, since they are not valid userspace virtual addresses. Regards, Tvrtko
The stress test will need to be modified to ensure a canonical address(currently uses starting address of 0x8000000000000). The invalid_vma test takes care of the non-canonical scenario in an indirect way. Do we still need a separate test for this change then? Thanks, Vinay. -----Original Message----- From: Tvrtko Ursulin [mailto:tvrtko.ursulin@linux.intel.com] Sent: Wednesday, December 9, 2015 5:36 AM To: Thierry, Michel; Chris Wilson; Daniel, Thomas; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay; Goel, Akash; Kristian Høgsberg Subject: Re: [Intel-gfx] [PATCH v7] drm/i915: Add soft-pinning API for execbuffer On 09/12/15 13:33, Michel Thierry wrote: > On 12/9/2015 12:34 PM, Tvrtko Ursulin wrote: >> >> On 09/12/15 10:51, Chris Wilson wrote: >>> On Wed, Dec 09, 2015 at 10:30:41AM +0000, Tvrtko Ursulin wrote: >>>> >>>> On 08/12/15 18:49, Michel Thierry wrote: >>>>> On 12/8/2015 11:55 AM, Thomas Daniel wrote: >>>>>> From: Chris Wilson <chris@chris-wilson.co.uk> >>>>>> >>>>>> Userspace can pass in an offset that it presumes the object is >>>>>> located at. The kernel will then do its utmost to fit the object >>>>>> into that location. The assumption is that userspace is handling >>>>>> its own object locations (for example along with full-ppgtt) and >>>>>> that the kernel will rarely have to make space for the user's >>>>>> requests. >>>>>> >>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>>>>> >>>>>> v2: Fixed incorrect eviction found by Michal Winiarski - fix >>>>>> suggested by Chris Wilson. Fixed incorrect error paths causing >>>>>> crash found by Michal Winiarski. >>>>>> (Not published externally) >>>>>> >>>>>> v3: Rebased because of trivial conflict in object_bind_to_vm. >>>>>> Fixed eviction to allow eviction of soft-pinned objects when >>>>>> another soft-pinned object used by a subsequent execbuffer >>>>>> overlaps reported by Michal Winiarski. >>>>>> (Not published externally) >>>>>> >>>>>> v4: Moved soft-pinned objects to the front of ordered_vmas so >>>>>> that they are pinned first after an address conflict happens to >>>>>> avoid repeated conflicts in rare cases (Suggested by Chris >>>>>> Wilson). Expanded comment on drm_i915_gem_exec_object2.offset to >>>>>> cover this new API. >>>>>> >>>>>> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting >>>>>> this capability (Kristian). Added check for multiple pinnings on >>>>>> eviction (Akash). >>>>>> Made sure >>>>>> buffers are not considered misplaced without the user specifying >>>>>> EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume >>>>>> responsibility for any addressing workarounds. Updated >>>>>> object2.offset field comment again to clarify NO_RELOC case >>>>>> (Chris). checkpatch cleanup. >>>>>> >>>>>> v6: Trivial rebase on latest drm-intel-nightly >>>>>> >>>>>> v7: Catch attempts to pin above the max virtual address size and >>>>>> return EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS >>>>>> and EXEC_OBJECT_PINNED flags, user must pass both flags in any >>>>>> attempt to pin something at an offset above 4GB (Chris, Daniel >>>>>> Vetter). >>>>>> >>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk> >>>>>> Cc: Akash Goel <akash.goel@intel.com> >>>>>> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> >>>>>> Cc: Michal Winiarski <michal.winiarski@intel.com> >>>>>> Cc: Zou Nanhai <nanhai.zou@intel.com> >>>>>> Cc: Kristian Høgsberg <hoegsberg@gmail.com> >>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> >>>>>> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> >>>>>> --- >>>>>> drivers/gpu/drm/i915/i915_dma.c | 3 ++ >>>>>> drivers/gpu/drm/i915/i915_drv.h | 2 + >>>>>> drivers/gpu/drm/i915/i915_gem.c | 64 >>>>>> ++++++++++++++++++++---------- >>>>>> drivers/gpu/drm/i915/i915_gem_evict.c | 39 ++++++++++++++++++ >>>>>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++- >>>>>> include/uapi/drm/i915_drm.h | 12 ++++-- >>>>>> 6 files changed, 111 insertions(+), 25 deletions(-) >>>>>> >>>>> >>>>> Extra support from the other patch aside, v6 already had rb from >>>>> Akash and this one, >>>>> Reviewed-by: Michel Thierry <michel.thierry@intel.com> >>>> >>>> This patch was acked by the PDT so I merged it to >>>> drm-intel-next-queued. >>> >>> Please revert immediately. We need to fix the ABI for canonical >>> addressing before proceeding. Then please work on the better patch. >> >> Sounds like this is a valid comment, guys please check the thread >> with subject "[PATCH v2] drm/i915: Avoid writing relocs with >> addresses in non-canonical form". >> >> amd64 ABI mandates rules on virtual addresess - >> https://en.wikipedia.org/wiki/X86-64#Canonical_form_addresses. >> >> Regards, >> >> Tvrtko > > And if the someone tries to use softpin with a virtual address in > non-canonical form, reject with EINVAL? I think so, since they are not valid userspace virtual addresses. Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index a81c766..52b8289 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -169,6 +169,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_RESOURCE_STREAMER: value = HAS_RESOURCE_STREAMER(dev); break; + case I915_PARAM_HAS_EXEC_SOFTPIN: + value = 1; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f1a8a53..315d79d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2860,6 +2860,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma); #define PIN_UPDATE (1<<5) #define PIN_ZONE_4G (1<<6) #define PIN_HIGH (1<<7) +#define PIN_OFFSET_FIXED (1<<8) #define PIN_OFFSET_MASK (~4095) int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, @@ -3205,6 +3206,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev, unsigned long start, unsigned long end, unsigned flags); +int __must_check i915_gem_evict_for_vma(struct i915_vma *target); int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); /* belongs in i915_gem_gtt.h */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index dfaf25b..801f3d0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3476,30 +3476,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) goto err_unpin; - if (flags & PIN_HIGH) { - search_flag = DRM_MM_SEARCH_BELOW; - alloc_flag = DRM_MM_CREATE_TOP; + if (flags & PIN_OFFSET_FIXED) { + uint64_t offset = flags & PIN_OFFSET_MASK; + + if (offset & (alignment - 1) || offset + size > end) { + ret = -EINVAL; + goto err_free_vma; + } + vma->node.start = offset; + vma->node.size = size; + vma->node.color = obj->cache_level; + ret = drm_mm_reserve_node(&vm->mm, &vma->node); + if (ret) { + ret = i915_gem_evict_for_vma(vma); + if (ret == 0) + ret = drm_mm_reserve_node(&vm->mm, &vma->node); + } + if (ret) + goto err_free_vma; } else { - search_flag = DRM_MM_SEARCH_DEFAULT; - alloc_flag = DRM_MM_CREATE_DEFAULT; - } + if (flags & PIN_HIGH) { + search_flag = DRM_MM_SEARCH_BELOW; + alloc_flag = DRM_MM_CREATE_TOP; + } else { + search_flag = DRM_MM_SEARCH_DEFAULT; + alloc_flag = DRM_MM_CREATE_DEFAULT; + } search_free: - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, - size, alignment, - obj->cache_level, - start, end, - search_flag, - alloc_flag); - if (ret) { - ret = i915_gem_evict_something(dev, vm, size, alignment, - obj->cache_level, - start, end, - flags); - if (ret == 0) - goto search_free; + ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, + size, alignment, + obj->cache_level, + start, end, + search_flag, + alloc_flag); + if (ret) { + ret = i915_gem_evict_something(dev, vm, size, alignment, + obj->cache_level, + start, end, + flags); + if (ret == 0) + goto search_free; - goto err_free_vma; + goto err_free_vma; + } } if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { ret = -EINVAL; @@ -4090,6 +4110,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) vma->node.start < (flags & PIN_OFFSET_MASK)) return true; + if (flags & PIN_OFFSET_FIXED && + vma->node.start != (flags & PIN_OFFSET_MASK)) + return true; + return false; } diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index d71a133..07c6e4d 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -199,6 +199,45 @@ found: return ret; } +int +i915_gem_evict_for_vma(struct i915_vma *target) +{ + struct drm_mm_node *node, *next; + + list_for_each_entry_safe(node, next, + &target->vm->mm.head_node.node_list, + node_list) { + struct i915_vma *vma; + int ret; + + if (node->start + node->size <= target->node.start) + continue; + if (node->start >= target->node.start + target->node.size) + break; + + vma = container_of(node, typeof(*vma), node); + + if (vma->pin_count) { + if (!vma->exec_entry || (vma->pin_count > 1)) + /* Object is pinned for some other use */ + return -EBUSY; + + /* We need to evict a buffer in the same batch */ + if (vma->exec_entry->flags & EXEC_OBJECT_PINNED) + /* Overlapping fixed objects in the same batch */ + return -EINVAL; + + return -ENOSPC; + } + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + } + + return 0; +} + /** * i915_gem_evict_vm - Evict all idle vmas from a vm * @vm: Address space to cleanse diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a4c243c..48ec484 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -599,6 +599,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= PIN_GLOBAL | PIN_MAPPABLE; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + if (entry->flags & EXEC_OBJECT_PINNED) + flags |= entry->offset | PIN_OFFSET_FIXED; if ((flags & PIN_MAPPABLE) == 0) flags |= PIN_HIGH; } @@ -670,6 +672,10 @@ eb_vma_misplaced(struct i915_vma *vma) vma->node.start & (entry->alignment - 1)) return true; + if (entry->flags & EXEC_OBJECT_PINNED && + vma->node.start != entry->offset) + return true; + if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && vma->node.start < BATCH_OFFSET_BIAS) return true; @@ -695,6 +701,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, struct i915_vma *vma; struct i915_address_space *vm; struct list_head ordered_vmas; + struct list_head pinned_vmas; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; int retry; @@ -703,6 +710,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; INIT_LIST_HEAD(&ordered_vmas); + INIT_LIST_HEAD(&pinned_vmas); while (!list_empty(vmas)) { struct drm_i915_gem_exec_object2 *entry; bool need_fence, need_mappable; @@ -721,7 +729,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(vma); - if (need_mappable) { + if (entry->flags & EXEC_OBJECT_PINNED) + list_move_tail(&vma->exec_list, &pinned_vmas); + else if (need_mappable) { entry->flags |= __EXEC_OBJECT_NEEDS_MAP; list_move(&vma->exec_list, &ordered_vmas); } else @@ -731,6 +741,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj->base.pending_write_domain = 0; } list_splice(&ordered_vmas, vmas); + list_splice(&pinned_vmas, vmas); /* Attempt to pin all of the buffers into the GTT. * This is done in 3 phases: @@ -1317,7 +1328,8 @@ eb_get_batch(struct eb_vmas *eb) * Note that actual hangs have only been observed on gen7, but for * paranoia do it everywhere. */ - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; return vma->obj; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 67ef73a..d727b49 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_EU_TOTAL 34 #define I915_PARAM_HAS_GPU_RESET 35 #define I915_PARAM_HAS_RESOURCE_STREAMER 36 +#define I915_PARAM_HAS_EXEC_SOFTPIN 37 typedef struct drm_i915_getparam { __s32 param; @@ -682,8 +683,12 @@ struct drm_i915_gem_exec_object2 { __u64 alignment; /** - * Returned value of the updated offset of the object, for future - * presumed_offset writes. + * When the EXEC_OBJECT_PINNED flag is specified this is populated by + * the user with the GTT offset at which this object will be pinned. + * When the I915_EXEC_NO_RELOC flag is specified this must contain the + * presumed_offset of the object. + * During execbuffer2 the kernel populates it with the value of the + * current GTT offset of the object, for future presumed_offset writes. */ __u64 offset; @@ -691,7 +696,8 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_NEEDS_GTT (1<<1) #define EXEC_OBJECT_WRITE (1<<2) #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) +#define EXEC_OBJECT_PINNED (1<<4) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) __u64 flags; __u64 rsvd1;