Message ID | 1432650084-24491-16-git-send-email-michel.thierry@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, May 26, 2015 at 03:21:22PM +0100, Michel Thierry wrote: > There are some allocations that must be only referenced by 32bit > offsets. To limit the chances of having the first 4GB already full, > objects not requiring this workaround don't use the first 2 PDPs. > > User must pass EXEC_OBJECT_NEEDS_32BADDRESS flag to indicate it needs a > 32b address. > > The flag is ignored in 32b PPGTT. > > Signed-off-by: Michel Thierry <michel.thierry@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/i915_gem.c | 11 +++++++++++ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +++ > include/uapi/drm/i915_drm.h | 3 ++- > 4 files changed, 17 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 32493f0..a06f19c 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2714,6 +2714,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma); > #define PIN_OFFSET_BIAS (1<<3) > #define PIN_USER (1<<4) > #define PIN_UPDATE (1<<5) > +#define PIN_FULL_RANGE (1<<6) > #define PIN_OFFSET_MASK (~4095) > int __must_check > i915_gem_object_pin(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index acd928d..a133b7d 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3713,6 +3713,17 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > obj->tiling_mode, > false); > size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; > + > + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, > + * limit address to 4GB-1 for objects requiring this wa; for > + * others, start on the 2nd PDP. > + */ > + if (USES_FULL_48BIT_PPGTT(dev)) { > + if (flags & PIN_FULL_RANGE) > + start += (2ULL << GEN8_PDPE_SHIFT); > + else > + end = ((4ULL << GEN8_PDPE_SHIFT) - 1); > + } > } > > if (alignment == 0) > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index bd0e4bd..3de7f0f 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -588,6 +588,9 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > if (entry->flags & EXEC_OBJECT_NEEDS_GTT) > flags |= PIN_GLOBAL; > > + if (!(entry->flags & EXEC_OBJECT_NEEDS_32BADDRESS)) > + flags |= PIN_FULL_RANGE; > + > if (!drm_mm_node_allocated(&vma->node)) { > if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) > flags |= PIN_GLOBAL | PIN_MAPPABLE; > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 4851d66..ebdf6dd 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 { > #define EXEC_OBJECT_NEEDS_FENCE (1<<0) > #define EXEC_OBJECT_NEEDS_GTT (1<<1) > #define EXEC_OBJECT_WRITE (1<<2) > -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) > +#define EXEC_OBJECT_NEEDS_32BADDRESS (1<<3) > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_NEEDS_32BADDRESS<<1) This is the wrong way round for existing userspace (and remember, bdw is shipping already so we can't opt out of that). Instead the w/a needs to allow 32bit+ addresses only if the new bit is set. Also this needs the usual pile of userspace enabling (libdrm+mesa). -Daniel > __u64 flags; > > __u64 rsvd1; > -- > 2.4.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On 5/26/2015 4:25 PM, Daniel Vetter wrote: > On Tue, May 26, 2015 at 03:21:22PM +0100, Michel Thierry wrote: >> There are some allocations that must be only referenced by 32bit >> offsets. To limit the chances of having the first 4GB already full, >> objects not requiring this workaround don't use the first 2 PDPs. >> >> User must pass EXEC_OBJECT_NEEDS_32BADDRESS flag to indicate it needs a >> 32b address. >> >> The flag is ignored in 32b PPGTT. >> >> Signed-off-by: Michel Thierry <michel.thierry@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 1 + >> drivers/gpu/drm/i915/i915_gem.c | 11 +++++++++++ >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +++ >> include/uapi/drm/i915_drm.h | 3 ++- >> 4 files changed, 17 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 32493f0..a06f19c 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -2714,6 +2714,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma); >> #define PIN_OFFSET_BIAS (1<<3) >> #define PIN_USER (1<<4) >> #define PIN_UPDATE (1<<5) >> +#define PIN_FULL_RANGE (1<<6) >> #define PIN_OFFSET_MASK (~4095) >> int __must_check >> i915_gem_object_pin(struct drm_i915_gem_object *obj, >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >> index acd928d..a133b7d 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -3713,6 +3713,17 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, >> obj->tiling_mode, >> false); >> size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; >> + >> + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, >> + * limit address to 4GB-1 for objects requiring this wa; for >> + * others, start on the 2nd PDP. >> + */ >> + if (USES_FULL_48BIT_PPGTT(dev)) { >> + if (flags & PIN_FULL_RANGE) >> + start += (2ULL << GEN8_PDPE_SHIFT); >> + else >> + end = ((4ULL << GEN8_PDPE_SHIFT) - 1); >> + } >> } >> >> if (alignment == 0) >> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> index bd0e4bd..3de7f0f 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> @@ -588,6 +588,9 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, >> if (entry->flags & EXEC_OBJECT_NEEDS_GTT) >> flags |= PIN_GLOBAL; >> >> + if (!(entry->flags & EXEC_OBJECT_NEEDS_32BADDRESS)) >> + flags |= PIN_FULL_RANGE; >> + >> if (!drm_mm_node_allocated(&vma->node)) { >> if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) >> flags |= PIN_GLOBAL | PIN_MAPPABLE; >> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >> index 4851d66..ebdf6dd 100644 >> --- a/include/uapi/drm/i915_drm.h >> +++ b/include/uapi/drm/i915_drm.h >> @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 { >> #define EXEC_OBJECT_NEEDS_FENCE (1<<0) >> #define EXEC_OBJECT_NEEDS_GTT (1<<1) >> #define EXEC_OBJECT_WRITE (1<<2) >> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) >> +#define EXEC_OBJECT_NEEDS_32BADDRESS (1<<3) >> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_NEEDS_32BADDRESS<<1) > This is the wrong way round for existing userspace (and remember, bdw is > shipping already so we can't opt out of that). Instead the w/a needs to > allow 32bit+ addresses only if the new bit is set. > > Also this needs the usual pile of userspace enabling (libdrm+mesa). > -Daniel Hi, Ok, I'll change it to something like EXEC_OBJECT_SUPPORTS_48BADDRESS. --Michel >> __u64 flags; >> >> __u64 rsvd1; >> -- >> 2.4.0 >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Tue, May 26, 2015 at 05:25:48PM +0200, Daniel Vetter wrote: > On Tue, May 26, 2015 at 03:21:22PM +0100, Michel Thierry wrote: > > There are some allocations that must be only referenced by 32bit > > offsets. > > To limit the chances of having the first 4GB already full, > > objects not requiring this workaround don't use the first 2 PDPs. This is complete tosh. Please have a later patch that uses SEARCH_BELOW for 48bit objects, and cite eviction rates and improvements. > This is the wrong way round for existing userspace (and remember, bdw is > shipping already so we can't opt out of that). Instead the w/a needs to > allow 32bit+ addresses only if the new bit is set. > > Also this needs the usual pile of userspace enabling (libdrm+mesa). Indeed, the code is very much broken as is. So I expect to see a Testcase: igt/gem_exec_48bit in the next patch. As a hint, consider reusing the object. -Chris
On Tue, May 26, 2015 at 09:16:11PM +0100, Chris Wilson wrote: > On Tue, May 26, 2015 at 05:25:48PM +0200, Daniel Vetter wrote: > > On Tue, May 26, 2015 at 03:21:22PM +0100, Michel Thierry wrote: > > > There are some allocations that must be only referenced by 32bit > > > offsets. > > > > To limit the chances of having the first 4GB already full, > > > objects not requiring this workaround don't use the first 2 PDPs. > > This is complete tosh. Please have a later patch that uses SEARCH_BELOW > for 48bit objects, and cite eviction rates and improvements. Yeah didn't spot that at first, I agree that trying to segregate objects upfront is premature optimization. There's very few state objects (compard to textures), usually of differen size classes and different lifetimes. And they should all be reused anyway for efficient, so after a few rounds of execbuf things should settle in nicely. Let's avoid a bit of complexity here when not yet proven to be required. -Daniel
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 32493f0..a06f19c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2714,6 +2714,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma); #define PIN_OFFSET_BIAS (1<<3) #define PIN_USER (1<<4) #define PIN_UPDATE (1<<5) +#define PIN_FULL_RANGE (1<<6) #define PIN_OFFSET_MASK (~4095) int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index acd928d..a133b7d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3713,6 +3713,17 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, obj->tiling_mode, false); size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; + + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, + * limit address to 4GB-1 for objects requiring this wa; for + * others, start on the 2nd PDP. + */ + if (USES_FULL_48BIT_PPGTT(dev)) { + if (flags & PIN_FULL_RANGE) + start += (2ULL << GEN8_PDPE_SHIFT); + else + end = ((4ULL << GEN8_PDPE_SHIFT) - 1); + } } if (alignment == 0) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index bd0e4bd..3de7f0f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -588,6 +588,9 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, if (entry->flags & EXEC_OBJECT_NEEDS_GTT) flags |= PIN_GLOBAL; + if (!(entry->flags & EXEC_OBJECT_NEEDS_32BADDRESS)) + flags |= PIN_FULL_RANGE; + if (!drm_mm_node_allocated(&vma->node)) { if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) flags |= PIN_GLOBAL | PIN_MAPPABLE; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 4851d66..ebdf6dd 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_NEEDS_FENCE (1<<0) #define EXEC_OBJECT_NEEDS_GTT (1<<1) #define EXEC_OBJECT_WRITE (1<<2) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) +#define EXEC_OBJECT_NEEDS_32BADDRESS (1<<3) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_NEEDS_32BADDRESS<<1) __u64 flags; __u64 rsvd1;
There are some allocations that must be only referenced by 32bit offsets. To limit the chances of having the first 4GB already full, objects not requiring this workaround don't use the first 2 PDPs. User must pass EXEC_OBJECT_NEEDS_32BADDRESS flag to indicate it needs a 32b address. The flag is ignored in 32b PPGTT. Signed-off-by: Michel Thierry <michel.thierry@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 11 +++++++++++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +++ include/uapi/drm/i915_drm.h | 3 ++- 4 files changed, 17 insertions(+), 1 deletion(-)