diff mbox

[2/4] drm/i915: mark the global default (intel_)context as such

Message ID 1450291011-31486-3-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon Dec. 16, 2015, 6:36 p.m. UTC
Some of the LRC-specific context-destruction code has to special-case
the global default context, because the HWSP is part of that context. At
present it deduces it indirectly by checking for the backpointer from
the engine to the context, but that's an unsafe assumption if the setup
and teardown code is reorganised. (It could also test !ctx->file_priv,
but again that's a detail that might be subject to change).

So here we explicitly flag the default context at the point of creation,
and then reorganise the code in intel_lr_context_free() not to rely on
the ring->default_pointer (still) being set up; to iterate over engines
in reverse (as this is teardown code); and to reduce the nesting level
so it's easier to read.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  2 ++
 drivers/gpu/drm/i915/i915_gem_context.c | 12 +++++++-----
 drivers/gpu/drm/i915/intel_lrc.c        | 25 ++++++++++++-------------
 3 files changed, 21 insertions(+), 18 deletions(-)

Comments

Chris Wilson Dec. 16, 2015, 6:57 p.m. UTC | #1
On Wed, Dec 16, 2015 at 06:36:49PM +0000, Dave Gordon wrote:
> Some of the LRC-specific context-destruction code has to special-case
> the global default context, because the HWSP is part of that context. At
> present it deduces it indirectly by checking for the backpointer from
> the engine to the context, but that's an unsafe assumption if the setup
> and teardown code is reorganised. (It could also test !ctx->file_priv,
> but again that's a detail that might be subject to change).
> 
> So here we explicitly flag the default context at the point of creation,
> and then reorganise the code in intel_lr_context_free() not to rely on
> the ring->default_pointer (still) being set up; to iterate over engines
> in reverse (as this is teardown code); and to reduce the nesting level
> so it's easier to read.
> 
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>

#define intel_context_is_global(ctx) ((ctx)->file_priv == NULL)

> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3aa6147..23f90b2 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2367,22 +2367,21 @@ void intel_lr_context_free(struct intel_context *ctx)
>  {
>  	int i;
>  
> -	for (i = 0; i < I915_NUM_RINGS; i++) {
> +	for (i = I915_NUM_RINGS; --i >= 0; ) {
> +		struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
>  		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
>  
> -		if (ctx_obj) {
> -			struct intel_ringbuffer *ringbuf =
> -					ctx->engine[i].ringbuf;
> -			struct intel_engine_cs *ring = ringbuf->ring;
> +		if (!ctx_obj)
> +			continue;
>  
> -			if (ctx == ring->default_context) {
> -				intel_unpin_ringbuffer_obj(ringbuf);
> -				i915_gem_object_ggtt_unpin(ctx_obj);
> -			}
> -			WARN_ON(ctx->engine[ring->id].pin_count);
> -			intel_ringbuffer_free(ringbuf);
> -			drm_gem_object_unreference(&ctx_obj->base);
> +		if (ctx->is_global_default) {
> +			intel_unpin_ringbuffer_obj(ringbuf);
> +			i915_gem_object_ggtt_unpin(ctx_obj);
>  		}
> +
> +		WARN_ON(ctx->engine[i].pin_count);
> +		intel_ringbuffer_free(ringbuf);
> +		drm_gem_object_unreference(&ctx_obj->base);

That looks much neater, indeed.
-Chris
Dave Gordon Dec. 16, 2015, 7:22 p.m. UTC | #2
On 16/12/15 18:57, Chris Wilson wrote:
> On Wed, Dec 16, 2015 at 06:36:49PM +0000, Dave Gordon wrote:
>> Some of the LRC-specific context-destruction code has to special-case
>> the global default context, because the HWSP is part of that context. At
>> present it deduces it indirectly by checking for the backpointer from
>> the engine to the context, but that's an unsafe assumption if the setup
>> and teardown code is reorganised. (It could also test !ctx->file_priv,
>> but again that's a detail that might be subject to change).
>>
>> So here we explicitly flag the default context at the point of creation,
>> and then reorganise the code in intel_lr_context_free() not to rely on
>> the ring->default_pointer (still) being set up; to iterate over engines
>> in reverse (as this is teardown code); and to reduce the nesting level
>> so it's easier to read.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>
> #define intel_context_is_global(ctx) ((ctx)->file_priv == NULL)

The last sentence of the first paragraph of the commit message above 
notes that we *could* use that as a test, but I don't regard it as a 
safe test, in either direction. That is, it could give a false negative 
if we someday associate some (internal) fd with the default context, or 
(more likely) a false positive if the file association were broken and 
the pointer nulled in an earlier stage of the teardown of a non-global 
(user-created) context.

int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
                                    struct drm_file *file)
{
         struct drm_i915_gem_context_destroy *args = data;
         struct drm_i915_file_private *file_priv = file->driver_priv;
         struct intel_context *ctx;
         int ret;

         if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
                 return -ENOENT;

         ret = i915_mutex_lock_interruptible(dev);
         if (ret)
                 return ret;

         ctx = i915_gem_context_get(file_priv, args->ctx_id);
         if (IS_ERR(ctx)) {
                 mutex_unlock(&dev->struct_mutex);
                 return PTR_ERR(ctx);
         }

         idr_remove(&ctx->file_priv->context_idr, ctx->user_handle);
         i915_gem_context_unreference(ctx);
         mutex_unlock(&dev->struct_mutex);

         DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id);
         return 0;
}

At present, i915_gem_context_destroy_ioctl() above removes the context 
from the file's list-of-contexts but DOESN'T clear the ctx->file_priv, 
which means there's a somewhat inconsistent (but transient) state during 
which a soon-to-be-destroyed context links to a file, but the file 
doesn't have a link back. It probably doesn't matter, because the code 
holds the mutex across the two operations ...

... unless of course the context's refcount isn't 1 at this point, in 
which case I suppose someone else *might* go from the context to the 
file and then be mystified as to why the context isn't on the list ...

... and if we changed the code above, then file_priv would *always* be 
NULL by the time the destructor was called!

So it's surely safer to have a flag that explicitly says "I'm the global 
default context" than to guess based on some other contingent property.

.Dave.
Chris Wilson Dec. 16, 2015, 7:30 p.m. UTC | #3
On Wed, Dec 16, 2015 at 07:22:52PM +0000, Dave Gordon wrote:
> On 16/12/15 18:57, Chris Wilson wrote:
> >On Wed, Dec 16, 2015 at 06:36:49PM +0000, Dave Gordon wrote:
> >>Some of the LRC-specific context-destruction code has to special-case
> >>the global default context, because the HWSP is part of that context. At
> >>present it deduces it indirectly by checking for the backpointer from
> >>the engine to the context, but that's an unsafe assumption if the setup
> >>and teardown code is reorganised. (It could also test !ctx->file_priv,
> >>but again that's a detail that might be subject to change).
> >>
> >>So here we explicitly flag the default context at the point of creation,
> >>and then reorganise the code in intel_lr_context_free() not to rely on
> >>the ring->default_pointer (still) being set up; to iterate over engines
> >>in reverse (as this is teardown code); and to reduce the nesting level
> >>so it's easier to read.
> >>
> >>Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> >
> >#define intel_context_is_global(ctx) ((ctx)->file_priv == NULL)
> 
> The last sentence of the first paragraph of the commit message above
> notes that we *could* use that as a test, but I don't regard it as a
> safe test, in either direction. That is, it could give a false
> negative if we someday associate some (internal) fd with the default
> context, or (more likely) a false positive if the file association
> were broken and the pointer nulled in an earlier stage of the
> teardown of a non-global (user-created) context.
> 
> int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>                                    struct drm_file *file)
> {
>         struct drm_i915_gem_context_destroy *args = data;
>         struct drm_i915_file_private *file_priv = file->driver_priv;
>         struct intel_context *ctx;
>         int ret;
> 
>         if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
>                 return -ENOENT;
> 
>         ret = i915_mutex_lock_interruptible(dev);
>         if (ret)
>                 return ret;
> 
>         ctx = i915_gem_context_get(file_priv, args->ctx_id);
>         if (IS_ERR(ctx)) {
>                 mutex_unlock(&dev->struct_mutex);
>                 return PTR_ERR(ctx);
>         }
> 
>         idr_remove(&ctx->file_priv->context_idr, ctx->user_handle);
>         i915_gem_context_unreference(ctx);
>         mutex_unlock(&dev->struct_mutex);
> 
>         DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id);
>         return 0;
> }
> 
> At present, i915_gem_context_destroy_ioctl() above removes the
> context from the file's list-of-contexts but DOESN'T clear the
> ctx->file_priv, which means there's a somewhat inconsistent (but
> transient) state during which a soon-to-be-destroyed context links
> to a file, but the file doesn't have a link back. It probably
> doesn't matter, because the code holds the mutex across the two
> operations ...

And that the ctx was created to belong to the file still holds true.
 
> ... unless of course the context's refcount isn't 1 at this point,
> in which case I suppose someone else *might* go from the context to
> the file and then be mystified as to why the context isn't on the
> list ...
> 
> ... and if we changed the code above, then file_priv would *always*
> be NULL by the time the destructor was called!
> 
> So it's surely safer to have a flag that explicitly says "I'm the
> global default context" than to guess based on some other contingent
> property.

No, we have a flag that says this context was created belonging to a
file, with the corollary that only one context doesn't belong to any
file.
-Chris
Nick Hoath Dec. 17, 2015, 11:09 a.m. UTC | #4
On 16/12/2015 19:30, Chris Wilson wrote:
> On Wed, Dec 16, 2015 at 07:22:52PM +0000, Dave Gordon wrote:
>> On 16/12/15 18:57, Chris Wilson wrote:
>>> On Wed, Dec 16, 2015 at 06:36:49PM +0000, Dave Gordon wrote:
>>>> Some of the LRC-specific context-destruction code has to special-case
>>>> the global default context, because the HWSP is part of that context. At
>>>> present it deduces it indirectly by checking for the backpointer from
>>>> the engine to the context, but that's an unsafe assumption if the setup
>>>> and teardown code is reorganised. (It could also test !ctx->file_priv,
>>>> but again that's a detail that might be subject to change).
>>>>
>>>> So here we explicitly flag the default context at the point of creation,
>>>> and then reorganise the code in intel_lr_context_free() not to rely on
>>>> the ring->default_pointer (still) being set up; to iterate over engines
>>>> in reverse (as this is teardown code); and to reduce the nesting level
>>>> so it's easier to read.
>>>>
>>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>>
>>> #define intel_context_is_global(ctx) ((ctx)->file_priv == NULL)
>>
>> The last sentence of the first paragraph of the commit message above
>> notes that we *could* use that as a test, but I don't regard it as a
>> safe test, in either direction. That is, it could give a false
>> negative if we someday associate some (internal) fd with the default
>> context, or (more likely) a false positive if the file association
>> were broken and the pointer nulled in an earlier stage of the
>> teardown of a non-global (user-created) context.
>>
>> int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>>                                     struct drm_file *file)
>> {
>>          struct drm_i915_gem_context_destroy *args = data;
>>          struct drm_i915_file_private *file_priv = file->driver_priv;
>>          struct intel_context *ctx;
>>          int ret;
>>
>>          if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
>>                  return -ENOENT;
>>
>>          ret = i915_mutex_lock_interruptible(dev);
>>          if (ret)
>>                  return ret;
>>
>>          ctx = i915_gem_context_get(file_priv, args->ctx_id);
>>          if (IS_ERR(ctx)) {
>>                  mutex_unlock(&dev->struct_mutex);
>>                  return PTR_ERR(ctx);
>>          }
>>
>>          idr_remove(&ctx->file_priv->context_idr, ctx->user_handle);
>>          i915_gem_context_unreference(ctx);
>>          mutex_unlock(&dev->struct_mutex);
>>
>>          DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id);
>>          return 0;
>> }
>>
>> At present, i915_gem_context_destroy_ioctl() above removes the
>> context from the file's list-of-contexts but DOESN'T clear the
>> ctx->file_priv, which means there's a somewhat inconsistent (but
>> transient) state during which a soon-to-be-destroyed context links
>> to a file, but the file doesn't have a link back. It probably
>> doesn't matter, because the code holds the mutex across the two
>> operations ...
>
> And that the ctx was created to belong to the file still holds true.
>
>> ... unless of course the context's refcount isn't 1 at this point,
>> in which case I suppose someone else *might* go from the context to
>> the file and then be mystified as to why the context isn't on the
>> list ...
>>
>> ... and if we changed the code above, then file_priv would *always*
>> be NULL by the time the destructor was called!
>>
>> So it's surely safer to have a flag that explicitly says "I'm the
>> global default context" than to guess based on some other contingent
>> property.
>
> No, we have a flag that says this context was created belonging to a
> file, with the corollary that only one context doesn't belong to any
> file.
Using pointers like this to provide 'magic' secondary state information
just adds to the fragility of the driver.
So:
Reviewed-by: Nick Hoath <nicholas.hoath@intel.com>
to the original patch.
> -Chris
>
Chris Wilson Dec. 17, 2015, 12:27 p.m. UTC | #5
On Thu, Dec 17, 2015 at 11:09:54AM +0000, Nick Hoath wrote:
> On 16/12/2015 19:30, Chris Wilson wrote:
> >On Wed, Dec 16, 2015 at 07:22:52PM +0000, Dave Gordon wrote:
> >>On 16/12/15 18:57, Chris Wilson wrote:
> >>>On Wed, Dec 16, 2015 at 06:36:49PM +0000, Dave Gordon wrote:
> >>>>Some of the LRC-specific context-destruction code has to special-case
> >>>>the global default context, because the HWSP is part of that context. At
> >>>>present it deduces it indirectly by checking for the backpointer from
> >>>>the engine to the context, but that's an unsafe assumption if the setup
> >>>>and teardown code is reorganised. (It could also test !ctx->file_priv,
> >>>>but again that's a detail that might be subject to change).
> >>>>
> >>>>So here we explicitly flag the default context at the point of creation,
> >>>>and then reorganise the code in intel_lr_context_free() not to rely on
> >>>>the ring->default_pointer (still) being set up; to iterate over engines
> >>>>in reverse (as this is teardown code); and to reduce the nesting level
> >>>>so it's easier to read.
> >>>>
> >>>>Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> >>>
> >>>#define intel_context_is_global(ctx) ((ctx)->file_priv == NULL)
> >>
> >>The last sentence of the first paragraph of the commit message above
> >>notes that we *could* use that as a test, but I don't regard it as a
> >>safe test, in either direction. That is, it could give a false
> >>negative if we someday associate some (internal) fd with the default
> >>context, or (more likely) a false positive if the file association
> >>were broken and the pointer nulled in an earlier stage of the
> >>teardown of a non-global (user-created) context.
> >>
> >>int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
> >>                                    struct drm_file *file)
> >>{
> >>         struct drm_i915_gem_context_destroy *args = data;
> >>         struct drm_i915_file_private *file_priv = file->driver_priv;
> >>         struct intel_context *ctx;
> >>         int ret;
> >>
> >>         if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
> >>                 return -ENOENT;
> >>
> >>         ret = i915_mutex_lock_interruptible(dev);
> >>         if (ret)
> >>                 return ret;
> >>
> >>         ctx = i915_gem_context_get(file_priv, args->ctx_id);
> >>         if (IS_ERR(ctx)) {
> >>                 mutex_unlock(&dev->struct_mutex);
> >>                 return PTR_ERR(ctx);
> >>         }
> >>
> >>         idr_remove(&ctx->file_priv->context_idr, ctx->user_handle);
> >>         i915_gem_context_unreference(ctx);
> >>         mutex_unlock(&dev->struct_mutex);
> >>
> >>         DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id);
> >>         return 0;
> >>}
> >>
> >>At present, i915_gem_context_destroy_ioctl() above removes the
> >>context from the file's list-of-contexts but DOESN'T clear the
> >>ctx->file_priv, which means there's a somewhat inconsistent (but
> >>transient) state during which a soon-to-be-destroyed context links
> >>to a file, but the file doesn't have a link back. It probably
> >>doesn't matter, because the code holds the mutex across the two
> >>operations ...
> >
> >And that the ctx was created to belong to the file still holds true.
> >
> >>... unless of course the context's refcount isn't 1 at this point,
> >>in which case I suppose someone else *might* go from the context to
> >>the file and then be mystified as to why the context isn't on the
> >>list ...
> >>
> >>... and if we changed the code above, then file_priv would *always*
> >>be NULL by the time the destructor was called!
> >>
> >>So it's surely safer to have a flag that explicitly says "I'm the
> >>global default context" than to guess based on some other contingent
> >>property.
> >
> >No, we have a flag that says this context was created belonging to a
> >file, with the corollary that only one context doesn't belong to any
> >file.
> Using pointers like this to provide 'magic' secondary state information
> just adds to the fragility of the driver.

It's primary. Ownership information is fundamental to the driver.
The change is irrelevant to the bugfix.

If you want to make such a big change, eliminate the default_ctx from
execlists.
-Chris
Dave Gordon Dec. 17, 2015, 7 p.m. UTC | #6
On 17/12/15 12:27, Chris Wilson wrote:
> On Thu, Dec 17, 2015 at 11:09:54AM +0000, Nick Hoath wrote:
>> On 16/12/2015 19:30, Chris Wilson wrote:
>>> On Wed, Dec 16, 2015 at 07:22:52PM +0000, Dave Gordon wrote:
>>>> On 16/12/15 18:57, Chris Wilson wrote:
>>>>> On Wed, Dec 16, 2015 at 06:36:49PM +0000, Dave Gordon wrote:
>>>>>> Some of the LRC-specific context-destruction code has to special-case
>>>>>> the global default context, because the HWSP is part of that context. At
>>>>>> present it deduces it indirectly by checking for the backpointer from
>>>>>> the engine to the context, but that's an unsafe assumption if the setup
>>>>>> and teardown code is reorganised. (It could also test !ctx->file_priv,
>>>>>> but again that's a detail that might be subject to change).
>>>>>>
>>>>>> So here we explicitly flag the default context at the point of creation,
>>>>>> and then reorganise the code in intel_lr_context_free() not to rely on
>>>>>> the ring->default_pointer (still) being set up; to iterate over engines
>>>>>> in reverse (as this is teardown code); and to reduce the nesting level
>>>>>> so it's easier to read.
>>>>>>
>>>>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>>>>
>>>>> #define intel_context_is_global(ctx) ((ctx)->file_priv == NULL)

Why not
#define intel_context_is_global(ctx)	((ctx)->is_global) /* ! */

>>>> The last sentence of the first paragraph of the commit message above
>>>> notes that we *could* use that as a test, but I don't regard it as a
>>>> safe test, in either direction. That is, it could give a false
>>>> negative if we someday associate some (internal) fd with the default
>>>> context, or (more likely) a false positive if the file association
>>>> were broken and the pointer nulled in an earlier stage of the
>>>> teardown of a non-global (user-created) context.
>>>>
>>>> int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>>>>                                     struct drm_file *file)
>>>> {
>>>>          struct drm_i915_gem_context_destroy *args = data;
>>>>          struct drm_i915_file_private *file_priv = file->driver_priv;
>>>>          struct intel_context *ctx;
>>>>          int ret;
>>>>
>>>>          if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
>>>>                  return -ENOENT;
>>>>
>>>>          ret = i915_mutex_lock_interruptible(dev);
>>>>          if (ret)
>>>>                  return ret;
>>>>
>>>>          ctx = i915_gem_context_get(file_priv, args->ctx_id);
>>>>          if (IS_ERR(ctx)) {
>>>>                  mutex_unlock(&dev->struct_mutex);
>>>>                  return PTR_ERR(ctx);
>>>>          }
>>>>
>>>>          idr_remove(&ctx->file_priv->context_idr, ctx->user_handle);
>>>>          i915_gem_context_unreference(ctx);
>>>>          mutex_unlock(&dev->struct_mutex);
>>>>
>>>>          DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id);
>>>>          return 0;
>>>> }
>>>>
>>>> At present, i915_gem_context_destroy_ioctl() above removes the
>>>> context from the file's list-of-contexts but DOESN'T clear the
>>>> ctx->file_priv, which means there's a somewhat inconsistent (but
>>>> transient) state during which a soon-to-be-destroyed context links
>>>> to a file, but the file doesn't have a link back. It probably
>>>> doesn't matter, because the code holds the mutex across the two
>>>> operations ...
>>>
>>> And that the ctx was created to belong to the file still holds true.
>>>
>>>> ... unless of course the context's refcount isn't 1 at this point,
>>>> in which case I suppose someone else *might* go from the context to
>>>> the file and then be mystified as to why the context isn't on the
>>>> list ...
>>>>
>>>> ... and if we changed the code above, then file_priv would *always*
>>>> be NULL by the time the destructor was called!
>>>>
>>>> So it's surely safer to have a flag that explicitly says "I'm the
>>>> global default context" than to guess based on some other contingent
>>>> property.
>>>
>>> No, we have a flag that says this context was created belonging to a
>>> file, with the corollary that only one context doesn't belong to any
>>> file.
>> Using pointers like this to provide 'magic' secondary state information
>> just adds to the fragility of the driver.
>
> It's primary. Ownership information is fundamental to the driver.
> The change is irrelevant to the bugfix.
>
> If you want to make such a big change, eliminate the default_ctx from
> execlists.
> -Chris

No, we need the default (or global) context for idling the engines, as 
well as for sending initialisation commands during startup. We can't 
make the GPU stop using any given (user) context within a known bounded 
time except by telling it to switch to another context. Ergo, to stop 
using ANY user context, there must exist a non-user context that we can 
switch to.

And this is not a BIG change, it's a trivial change, to identify THE 
GLOBAL CONTEXT by means of *specific* information contained within that 
context, rather than by relying on information stored in a different 
structure.

Your suggested #define also does that much, but in a less obvious way 
that I consider lays a trap for a future maintainer who doesn't realise 
that the code relies on a subtle distinction between "a context created 
internally by the driver, without an owning fd" and "a context that has 
become detached from the fd that originally created it".

Eliminating such obscure and undocumented dependencies enables us to 
more safely restructure and simplify areas that currently have 
excessively high coupling, in terms of the nonlocal assumptions that 
each part makes about what other pieces of code have done/will do.
The delicate and fragile load/unload dance is a prime example.

.Dave.
Dave Gordon Dec. 18, 2015, 4:02 p.m. UTC | #7
On 17/12/15 19:00, Dave Gordon wrote:
> On 17/12/15 12:27, Chris Wilson wrote:
>> On Thu, Dec 17, 2015 at 11:09:54AM +0000, Nick Hoath wrote:
[snip!]
>>
>> If you want to make such a big change, eliminate the default_ctx from
>> execlists.
>> -Chris
>
> No, we need the default (or global) context for idling the engines, as
> well as for sending initialisation commands during startup. We can't
> make the GPU stop using any given (user) context within a known bounded
> time except by telling it to switch to another context. Ergo, to stop
> using ANY user context, there must exist a non-user context that we can
> switch to.

After writing this last night, I realised this morning that maybe you 
didn't mean "eliminate the default context per se" but rather "eliminate 
the use of ring->default_context", which is a very different thing and 
much less problematic.

So if that's the case, then, yes, I'll be quite happy to provide a 
followup patch which eliminates most uses of ring->default_context and 
in particular all those in intel_lrc.c where it's compared against 
another context pointer. But I'm not going to do that extra work until 
and unless this is merged, as it would just be a waste of effort.

The reason we just hit this specific use of default_context first is 
because this is the one that was blocking the merge of Nick's "Fix 
context/engine cleanup order" patch that you'd already R-B'd. Once this 
is in, we can get Nick's patches in, and /then/ clean up all the other 
comparisons made against ring->default_context.

.Dave.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9124085..ea59843 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -851,6 +851,7 @@  struct i915_ctx_hang_stats {
  * @ref: reference count.
  * @user_handle: userspace tracking identity for this context.
  * @remap_slice: l3 row remapping information.
+ * @is_default: true iff this is the global default context
  * @flags: context specific flags:
  *         CONTEXT_NO_ZEROMAP: do not allow mapping things to page 0.
  * @file_priv: filp associated with this context (NULL for global default
@@ -869,6 +870,7 @@  struct intel_context {
 	struct kref ref;
 	int user_handle;
 	uint8_t remap_slice;
+	uint8_t is_global_default;
 	struct drm_i915_private *i915;
 	int flags;
 	struct drm_i915_file_private *file_priv;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index e143ea5..1f16880 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -241,8 +241,10 @@  __create_hw_context(struct drm_device *dev,
 				DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL);
 		if (ret < 0)
 			goto err_out;
-	} else
+	} else {
+		ctx->is_global_default = true;
 		ret = DEFAULT_CONTEXT_HANDLE;
+	}
 
 	ctx->file_priv = file_priv;
 	ctx->user_handle = ret;
@@ -269,7 +271,6 @@  static struct intel_context *
 i915_gem_create_context(struct drm_device *dev,
 			struct drm_i915_file_private *file_priv)
 {
-	const bool is_global_default_ctx = file_priv == NULL;
 	struct intel_context *ctx;
 	int ret = 0;
 
@@ -279,8 +280,9 @@  i915_gem_create_context(struct drm_device *dev,
 	if (IS_ERR(ctx))
 		return ctx;
 
-	if (is_global_default_ctx && ctx->legacy_hw_ctx.rcs_state) {
-		/* We may need to do things with the shrinker which
+	if (ctx->is_global_default && ctx->legacy_hw_ctx.rcs_state) {
+		/*
+		 * We may need to do things with the shrinker which
 		 * require us to immediately switch back to the default
 		 * context. This can cause a problem as pinning the
 		 * default context also requires GTT space which may not
@@ -313,7 +315,7 @@  i915_gem_create_context(struct drm_device *dev,
 	return ctx;
 
 err_unpin:
-	if (is_global_default_ctx && ctx->legacy_hw_ctx.rcs_state)
+	if (ctx->is_global_default && ctx->legacy_hw_ctx.rcs_state)
 		i915_gem_object_ggtt_unpin(ctx->legacy_hw_ctx.rcs_state);
 err_destroy:
 	idr_remove(&file_priv->context_idr, ctx->user_handle);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3aa6147..23f90b2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2367,22 +2367,21 @@  void intel_lr_context_free(struct intel_context *ctx)
 {
 	int i;
 
-	for (i = 0; i < I915_NUM_RINGS; i++) {
+	for (i = I915_NUM_RINGS; --i >= 0; ) {
+		struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
 		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
 
-		if (ctx_obj) {
-			struct intel_ringbuffer *ringbuf =
-					ctx->engine[i].ringbuf;
-			struct intel_engine_cs *ring = ringbuf->ring;
+		if (!ctx_obj)
+			continue;
 
-			if (ctx == ring->default_context) {
-				intel_unpin_ringbuffer_obj(ringbuf);
-				i915_gem_object_ggtt_unpin(ctx_obj);
-			}
-			WARN_ON(ctx->engine[ring->id].pin_count);
-			intel_ringbuffer_free(ringbuf);
-			drm_gem_object_unreference(&ctx_obj->base);
+		if (ctx->is_global_default) {
+			intel_unpin_ringbuffer_obj(ringbuf);
+			i915_gem_object_ggtt_unpin(ctx_obj);
 		}
+
+		WARN_ON(ctx->engine[i].pin_count);
+		intel_ringbuffer_free(ringbuf);
+		drm_gem_object_unreference(&ctx_obj->base);
 	}
 }
 
@@ -2443,7 +2442,7 @@  static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
  */
 
 int intel_lr_context_deferred_alloc(struct intel_context *ctx,
-				     struct intel_engine_cs *ring)
+				    struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_gem_object *ctx_obj;