[09/15] drm/i915: GuC submission setup, phase 1

Message ID	1434393394-21002-10-git-send-email-david.s.gordon@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Dave Gordon <david.s.gordon@intel.com> To: intel-gfx@lists.freedesktop.org Date: Mon, 15 Jun 2015 19:36:27 +0100 Message-Id: <1434393394-21002-10-git-send-email-david.s.gordon@intel.com> In-Reply-To: <1434393394-21002-1-git-send-email-david.s.gordon@intel.com> References: <1434393394-21002-1-git-send-email-david.s.gordon@intel.com> Organization: Intel Corporation (UK) Ltd. - Co. Reg. #1134945 - Pipers Way, Swindon SN3 1RJ MIME-Version: 1.0 Subject: [Intel-gfx] =?utf-8?q?=5BPATCH_09/15=5D_drm/i915=3A_GuC_submissio?= =?utf-8?q?n_setup=2C_phase_1?= Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

1434393394-21002-10-git-send-email-david.s.gordon@intel.com (mailing list archive)

State

New, archived

Headers

From: Dave Gordon <david.s.gordon@intel.com>
To: intel-gfx@lists.freedesktop.org
Date: Mon, 15 Jun 2015 19:36:27 +0100
Message-Id: <1434393394-21002-10-git-send-email-david.s.gordon@intel.com>
In-Reply-To: <1434393394-21002-1-git-send-email-david.s.gordon@intel.com>
References: <1434393394-21002-1-git-send-email-david.s.gordon@intel.com>
Organization: Intel Corporation (UK) Ltd. - Co. Reg. #1134945 - Pipers Way, 
	Swindon SN3 1RJ
MIME-Version: 1.0
Subject: [Intel-gfx] =?utf-8?q?=5BPATCH_09/15=5D_drm/i915=3A_GuC_submissio?=
	=?utf-8?q?n_setup=2C_phase_1?=
Precedence: list
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Dave Gordon June 15, 2015, 6:36 p.m. UTC

From: Alex Dai <yu.dai@intel.com>

This adds the first of the data structures used to communicate with the
GuC (the pool of guc_context structures).

We create a GuC-specific wrapper round the GEM object allocator as all
GEM objects shared with the GuC must be pinned into GGTT space at an
address that is NOT in the range [0..WOPCM_SIZE), as that range of GGTT
addresses is not accessible to the GuC (from the GuC's point of view,
it's permanently reserved for other objects such as the BootROM & SRAM).

Later, we will need to allocate additional GuC-sharable objects for the
submission client(s) and the GuC's debug log.

Issue: VIZ-4884
Signed-off-by: Alex Dai <yu.dai@intel.com>
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/Makefile              |    3 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |  122 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc.h           |    4 +
 drivers/gpu/drm/i915/intel_guc_loader.c    |   21 +++++
 4 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_guc_submission.c

Comments

Chris Wilson June 15, 2015, 9:32 p.m. UTC | #1

On Mon, Jun 15, 2015 at 07:36:27PM +0100, Dave Gordon wrote:
> +static struct drm_i915_gem_object *gem_allocate_guc_obj(struct drm_device *dev,
> +							u32 size)
> +{
> +	struct drm_i915_gem_object *obj;
> +
> +	obj = i915_gem_alloc_object(dev, size);
> +	if (!obj)
> +		return NULL;

Does it need to be a shmemfs object?

> +	if (i915_gem_object_get_pages(obj)) {
> +		drm_gem_object_unreference(&obj->base);
> +		return NULL;
> +	}

This is a random function call.

> +	if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
> +			PIN_OFFSET_BIAS | GUC_WOPCM_SIZE_VALUE)) {
> +		drm_gem_object_unreference(&obj->base);
> +		return NULL;

How about reporting the right error code?

> +	}
> +
> +	return obj;
> +}
> +
> +/**
> + * gem_release_guc_obj() - Release gem object allocated for GuC usage
> + * @obj:	gem obj to be released
> +  */
> +static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
> +{
> +	if (!obj)
> +		return;
> +
> +	if (i915_gem_obj_is_pinned(obj))
> +		i915_gem_object_ggtt_unpin(obj);

What?

> +	drm_gem_object_unreference(&obj->base);
> +}
> +
> +/*
> + * Set up the memory resources to be shared with the GuC.  At this point,
> + * we require just one object that can be mapped through the GGTT.
> + */
> +int i915_guc_submission_init(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;

Bleh.

> +	const size_t ctxsize = sizeof(struct guc_context_desc);
> +	const size_t poolsize = MAX_GUC_GPU_CONTEXTS * ctxsize;
> +	const size_t gemsize = round_up(poolsize, PAGE_SIZE);
> +	struct intel_guc *guc = &dev_priv->guc;
> +
> +	if (!i915.enable_guc_submission)
> +		return 0; /* not enabled  */
> +
> +	if (guc->ctx_pool_obj)
> +		return 0; /* already allocated */

Eh? Where have you hooked into... So looking at that, it looks like you
want to move this into the device initialisation rather than guc
firmware load. To me at least they are conceptually separate stages, and
judging by the above combining them has resulted in very clumsy code.

> +	guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv->dev, gemsize);
> +	if (!guc->ctx_pool_obj)
> +		return -ENOMEM;
> +
> +	spin_lock_init(&dev_priv->guc.host2guc_lock);
> +
> +	ida_init(&guc->ctx_ids);
> +
> +	memset(guc->doorbell_bitmap, 0, sizeof(guc->doorbell_bitmap));
> +	guc->db_cacheline = 0;

Before you relied on guc being zeroed, and now you memset it again.

> +
> +	return 0;
> +}
> +
> +void i915_guc_submission_fini(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_guc *guc = &dev_priv->guc;
> +
> +	gem_release_guc_obj(dev_priv->guc.log_obj);
> +	guc->log_obj = NULL;
> +
> +	if (guc->ctx_pool_obj)
> +		ida_destroy(&guc->ctx_ids);

Interesting guard. Maybe just make the GuC controller a pointer from
i915 and then you can do a more natural if (i915->guc == NULL) return;

> +	gem_release_guc_obj(guc->ctx_pool_obj);
> +	guc->ctx_pool_obj = NULL;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 0b44265..06b68c2 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -171,4 +171,8 @@ extern void intel_guc_ucode_init(struct drm_device *dev);
>  extern int intel_guc_ucode_load(struct drm_device *dev, bool wait);
>  extern void intel_guc_ucode_fini(struct drm_device *dev);
>  
> +/* i915_guc_submission.c */
> +int i915_guc_submission_init(struct drm_device *dev);
> +void i915_guc_submission_fini(struct drm_device *dev);
> +
>  #endif
> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> index 16eef4c..0f74876 100644
> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> @@ -111,6 +111,21 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv)
>  			i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
>  	}
>  
> +	/* If GuC scheduling is enabled, setup params here. */
> +	if (i915.enable_guc_submission) {
> +		u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj);
> +		u32 ctx_in_16 = MAX_GUC_GPU_CONTEXTS / 16;

So really you didn't need to pin the ctx_pool_obj until this point?

> +
> +		pgs >>= PAGE_SHIFT;
> +		params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) |
> +			(ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT);
> +
> +		params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS;
> +
> +		/* Unmask this bit to enable GuC scheduler */
> +		params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER;

/* Enable multiple context submission through the GuC */
params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER;
params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS;

Try to keep comments to explain why rather than what. Most of the
comments here fall into the "i++; // postincrement i" category.
-Chris

Chris Wilson June 16, 2015, 11:44 a.m. UTC | #2

On Mon, Jun 15, 2015 at 07:36:27PM +0100, Dave Gordon wrote:
> +	if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
> +			PIN_OFFSET_BIAS | GUC_WOPCM_SIZE_VALUE)) {
> +		drm_gem_object_unreference(&obj->base);
> +		return NULL;
> +	}

Another question is should this take up mappable aperture space at all?
-Chris

Dave Gordon June 19, 2015, 5:02 p.m. UTC | #3

On 15/06/15 22:32, Chris Wilson wrote:
> On Mon, Jun 15, 2015 at 07:36:27PM +0100, Dave Gordon wrote:
>> +static struct drm_i915_gem_object *gem_allocate_guc_obj(struct drm_device *dev,
>> +							u32 size)
>> +{
>> +	struct drm_i915_gem_object *obj;
>> +
>> +	obj = i915_gem_alloc_object(dev, size);
>> +	if (!obj)
>> +		return NULL;
> 
> Does it need to be a shmemfs object?

It needs to be permanently in RAM, so probably not. But I don't see an
allocator that gives you a GEM memory object /without/ backing store. Do
we have one?

>> +	if (i915_gem_object_get_pages(obj)) {
>> +		drm_gem_object_unreference(&obj->base);
>> +		return NULL;
>> +	}
> 
> This is a random function call.

Which is? Unreferencing the newly-allocated object before returning?
Otherwise it will leak :(

Presumably if the object didn't have backing store, the get_pages()
would be unnecessary as they would already be resident? Or would they
not exist until the first get_pages call instantiated them?

>> +	if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
>> +			PIN_OFFSET_BIAS | GUC_WOPCM_SIZE_VALUE)) {
>> +		drm_gem_object_unreference(&obj->base);
>> +		return NULL;
> 
> How about reporting the right error code?

Doesn't add anything. Allocation failure is going to be fatal anyway.
And i915_gem_alloc_object() just returns NULL for failure, so we'd have
to *make up* an error code for that case :(

Oh, and ERR_PTR/PTR_ERR are ugly.

>> +	}
>> +
>> +	return obj;
>> +}
>> +
>> +/**
>> + * gem_release_guc_obj() - Release gem object allocated for GuC usage
>> + * @obj:	gem obj to be released
>> +  */
>> +static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
>> +{
>> +	if (!obj)
>> +		return;
>> +
>> +	if (i915_gem_obj_is_pinned(obj))
>> +		i915_gem_object_ggtt_unpin(obj);
> 
> What?

The object /should/ be pinned when we arrive here, thanks to the
i915_gem_obj_ggtt_pin() call discussed above. We could just always
unpin, and see whether we trigger this:

        WARN_ON(vma->pin_count == 0);

inside i915_gem_object_ggtt_unpin(). The test is just in case there's an
error path where the object being released wasn't in fact pinned.

>> +	drm_gem_object_unreference(&obj->base);
>> +}
>> +
>> +/*
>> + * Set up the memory resources to be shared with the GuC.  At this point,
>> + * we require just one object that can be mapped through the GGTT.
>> + */
>> +int i915_guc_submission_init(struct drm_device *dev)
>> +{
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
> 
> Bleh.

Cross-file interface, nonstatic, hence passes 'dev'; also it needs 'dev'
anyway, so there's no gain in passing dev_priv. And dev_priv
(i.e. struct drm_i915_private) isn't even in scope when this function is
declared in the header file.

>> +	const size_t ctxsize = sizeof(struct guc_context_desc);
>> +	const size_t poolsize = MAX_GUC_GPU_CONTEXTS * ctxsize;
>> +	const size_t gemsize = round_up(poolsize, PAGE_SIZE);
>> +	struct intel_guc *guc = &dev_priv->guc;
>> +
>> +	if (!i915.enable_guc_submission)
>> +		return 0; /* not enabled  */
>> +
>> +	if (guc->ctx_pool_obj)
>> +		return 0; /* already allocated */
> 
> Eh? Where have you hooked into... So looking at that, it looks like you
> want to move this into the device initialisation rather than guc
> firmware load. To me at least they are conceptually separate stages, and
> judging by the above combining them has resulted in very clumsy code.

So ... we don't want to allocate any GuC-related structures unless we're
going at least try to use the GuC, so this has to come /after/ firmware
fetch and validation. OTOH we can't actually fire up the GuC until after
these structures are allocated, because the GGTT address of the
ctx_pool_obj has to be passed to the GuC firmware as one of its startup
parameters. Thus, the only place to do this allocation is in between
deciding to transfer the f/w to the GuC and actually doing so.

Hence, the GuC loading code calls this each time we're about to squirt
the f/w into the GuC; but, we don't need to allocate this more than once
(OTOH it would be a violation of modularity for the loader to know that;
only the submission code needs to know that little detail). So we end up
with the above do-this-only-once code.

>> +	guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv->dev, gemsize);
>> +	if (!guc->ctx_pool_obj)
>> +		return -ENOMEM;
>> +
>> +	spin_lock_init(&dev_priv->guc.host2guc_lock);
>> +
>> +	ida_init(&guc->ctx_ids);
>> +
>> +	memset(guc->doorbell_bitmap, 0, sizeof(guc->doorbell_bitmap));
>> +	guc->db_cacheline = 0;
> 
> Before you relied on guc being zeroed, and now you memset it again.

Hmm ... perhaps we should rezero some of these things /every/ time instead?

/me examines code ...

Nope; it looks like everything is once again zero at the point when this
function takes the early exit.

>> +	return 0;
>> +}
>> +
>> +void i915_guc_submission_fini(struct drm_device *dev)
>> +{
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>> +	struct intel_guc *guc = &dev_priv->guc;
>> +
>> +	gem_release_guc_obj(dev_priv->guc.log_obj);
>> +	guc->log_obj = NULL;
>> +
>> +	if (guc->ctx_pool_obj)
>> +		ida_destroy(&guc->ctx_ids);
> 
> Interesting guard. Maybe just make the GuC controller a pointer from
> i915 and then you can do a more natural if (i915->guc == NULL) return;

That test was because there's no way to tell from ctx_ids itself whether
it was initialised (in any case, it's a black box as far as I'm
concerned). But the init code above guarantees that iff the pool was
allocated, then the rest of the initialisation was also done, so we
should call ida_destroy().

I wouldn't object to changing the intel_guc to a separate allocation
rather than embedding it. We'd need to add a backpointer though as we
currently use container_of() inside the guc_to_i915 macro. But you'd
still end up with something like the above, because the allocation of
the ctx_pool is still a separate step that can fail after the intel_guc
structure has been allocated; and you need the f/w-loading-related data
very early. The only saving is a small amount of memory, for an cost of
extra memory dereference at various points. So probably not worth it.

>> +	gem_release_guc_obj(guc->ctx_pool_obj);
>> +	guc->ctx_pool_obj = NULL;
>> +}
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
>> index 0b44265..06b68c2 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -171,4 +171,8 @@ extern void intel_guc_ucode_init(struct drm_device *dev);
>>  extern int intel_guc_ucode_load(struct drm_device *dev, bool wait);
>>  extern void intel_guc_ucode_fini(struct drm_device *dev);
>>  
>> +/* i915_guc_submission.c */
>> +int i915_guc_submission_init(struct drm_device *dev);
>> +void i915_guc_submission_fini(struct drm_device *dev);
>> +
>>  #endif
>> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
>> index 16eef4c..0f74876 100644
>> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
>> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
>> @@ -111,6 +111,21 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv)
>>  			i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
>>  	}
>>  
>> +	/* If GuC scheduling is enabled, setup params here. */
>> +	if (i915.enable_guc_submission) {
>> +		u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj);
>> +		u32 ctx_in_16 = MAX_GUC_GPU_CONTEXTS / 16;
> 
> So really you didn't need to pin the ctx_pool_obj until this point?

Possibly. But that's not long after the allocation above (it's called
from the next function that the caller of i915_guc_submission_init()
calls after a successful return from that function).

intel_guc_ucode_load()
  i915_guc_submission_init()
    gem_allocate_guc_obj() -- returns pinned object
  guc_ucode_xfer()
    set_guc_init_params() -- needs GGTT address of pinned object

And we really don't want any extra failure paths at this depth. Better
to pin the object early and bail out if there's a problem. Its going to
be pinned for its entire lifetime anyway, so I don't think there's a
problem with pinning it a few microseconds early, especially /during
first open/ when there's no contention for GGTT space.

>> +		pgs >>= PAGE_SHIFT;
>> +		params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) |
>> +			(ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT);
>> +
>> +		params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS;
>> +
>> +		/* Unmask this bit to enable GuC scheduler */
>> +		params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER;

This line deserves the comment firstly because we explicitly set this
bit earlier in the function, but have now decided to clear it again (it
was tidier than having unbalanced if-else legs); and secondly to help
people not get confused by the number of negations (&= ~x means clear
something, but what we're clearing has negative semantics "disable"). So
it does convey our intent ("why? to enable the GuC scheduler") as well
as how ("*un*mask this bit").

[aside]
At least the GuC parameter semantics are not as ugly as some workarounds
in the BSpec, where I regularly find things such as "this workaround
disables feature A when using option B but need not be applied if
condition C holds unless condition D is false or feature E is disabled.
The workaround must not be applied in mode F." *Bleeuurgh*
[/aside]

> /* Enable multiple context submission through the GuC */
> params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER;
> params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS;
> 
> Try to keep comments to explain why rather than what. Most of the
> comments here fall into the "i++; // postincrement i" category.
> -Chris

Most of the "what" comments in this file are associated with accesses to
specific h/w registers, which therefore have semantic effect beyond what
is explicit in the code. For example this comment:

/* tell all command streamers to forward interrupts and vblank to GuC */
irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK,GFX_FORWARD_VBLANK_ALWAYS);
irqs |= _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING);
for_each_ring(ring, dev_priv, i)
    I915_WRITE(RING_MODE_GEN7(ring), irqs);

helps the reader what the /effect/ of the writes is intended to be. It's
quite different from:

/* write bitmask to GEN7 ring mode register */
I915_WRITE(RING_MODE_GEN7(ring),MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING));

and means you may not have to dig through the BSpec to find out what the
less helpfully-named bits actually do. And this:

        I915_WRITE(DE_GUCRMR, ~0);

would be incomprehensible without reading the BSpec ... or the comment

	/* tell DE to send nothing to GuC */

.Dave.

Dave Gordon June 19, 2015, 5:22 p.m. UTC | #4

On 19/06/15 18:02, Dave Gordon wrote:
> On 15/06/15 22:32, Chris Wilson wrote:

[snip]

>> Try to keep comments to explain why rather than what. Most of the
>> comments here fall into the "i++; // postincrement i" category.
>> -Chris
> 
> Most of the "what" comments in this file are associated with accesses to
> specific h/w registers, which therefore have semantic effect beyond what
> is explicit in the code. For example this comment:
> 
> /* tell all command streamers to forward interrupts and vblank to GuC */
> irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK,GFX_FORWARD_VBLANK_ALWAYS);
> irqs |= _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING);
> for_each_ring(ring, dev_priv, i)
>     I915_WRITE(RING_MODE_GEN7(ring), irqs);
> 
> helps the reader what the /effect/ of the writes is intended to be. It's
> quite different from:
> 
> /* write bitmask to GEN7 ring mode register */
> I915_WRITE(RING_MODE_GEN7(ring),MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING));
> 
> and means you may not have to dig through the BSpec to find out what the
> less helpfully-named bits actually do. And this:
> 
>         I915_WRITE(DE_GUCRMR, ~0);
> 
> would be incomprehensible without reading the BSpec ... or the comment
> 
> 	/* tell DE to send nothing to GuC */
> 
> .Dave.

Oops, those comments aren't actually in this patch, they're in a later
one. But they *will* be in this file by the end of the patchset ...

.Dave.

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 15818df..4dbd6b5 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -42,7 +42,8 @@  i915-y += i915_cmd_parser.o \
 i915-y += intel_uc_loader.o
 
 # general-purpose microcontroller (GuC) support
-i915-y += intel_guc_loader.o
+i915-y += intel_guc_loader.o \
+	  i915_guc_submission.o
 
 # autogenerated null render state
 i915-y += intel_renderstate_gen6.o \
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
new file mode 100644
index 0000000..273cf38
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -0,0 +1,122 @@ 
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/circ_buf.h>
+#include "i915_drv.h"
+#include "intel_guc.h"
+
+/**
+ * gem_allocate_guc_obj() - Allocate gem object for GuC usage
+ * @dev:	drm device
+ * @size:	size of object
+ *
+ * This is a wrapper to create a gem obj. In order to use it inside GuC, the
+ * object needs to be pinned lifetime. Also we must pin it to gtt space other
+ * than [0, GUC_WOPCM_SIZE] because this range is reserved inside GuC.
+ *
+ * Return:	A drm_i915_gem_object if successful, otherwise NULL.
+ */
+static struct drm_i915_gem_object *gem_allocate_guc_obj(struct drm_device *dev,
+							u32 size)
+{
+	struct drm_i915_gem_object *obj;
+
+	obj = i915_gem_alloc_object(dev, size);
+	if (!obj)
+		return NULL;
+
+	if (i915_gem_object_get_pages(obj)) {
+		drm_gem_object_unreference(&obj->base);
+		return NULL;
+	}
+
+	if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
+			PIN_OFFSET_BIAS | GUC_WOPCM_SIZE_VALUE)) {
+		drm_gem_object_unreference(&obj->base);
+		return NULL;
+	}
+
+	return obj;
+}
+
+/**
+ * gem_release_guc_obj() - Release gem object allocated for GuC usage
+ * @obj:	gem obj to be released
+  */
+static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
+{
+	if (!obj)
+		return;
+
+	if (i915_gem_obj_is_pinned(obj))
+		i915_gem_object_ggtt_unpin(obj);
+
+	drm_gem_object_unreference(&obj->base);
+}
+
+/*
+ * Set up the memory resources to be shared with the GuC.  At this point,
+ * we require just one object that can be mapped through the GGTT.
+ */
+int i915_guc_submission_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	const size_t ctxsize = sizeof(struct guc_context_desc);
+	const size_t poolsize = MAX_GUC_GPU_CONTEXTS * ctxsize;
+	const size_t gemsize = round_up(poolsize, PAGE_SIZE);
+	struct intel_guc *guc = &dev_priv->guc;
+
+	if (!i915.enable_guc_submission)
+		return 0; /* not enabled  */
+
+	if (guc->ctx_pool_obj)
+		return 0; /* already allocated */
+
+	guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv->dev, gemsize);
+	if (!guc->ctx_pool_obj)
+		return -ENOMEM;
+
+	spin_lock_init(&dev_priv->guc.host2guc_lock);
+
+	ida_init(&guc->ctx_ids);
+
+	memset(guc->doorbell_bitmap, 0, sizeof(guc->doorbell_bitmap));
+	guc->db_cacheline = 0;
+
+	return 0;
+}
+
+void i915_guc_submission_fini(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_guc *guc = &dev_priv->guc;
+
+	gem_release_guc_obj(dev_priv->guc.log_obj);
+	guc->log_obj = NULL;
+
+	if (guc->ctx_pool_obj)
+		ida_destroy(&guc->ctx_ids);
+	gem_release_guc_obj(guc->ctx_pool_obj);
+	guc->ctx_pool_obj = NULL;
+}
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 0b44265..06b68c2 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -171,4 +171,8 @@  extern void intel_guc_ucode_init(struct drm_device *dev);
 extern int intel_guc_ucode_load(struct drm_device *dev, bool wait);
 extern void intel_guc_ucode_fini(struct drm_device *dev);
 
+/* i915_guc_submission.c */
+int i915_guc_submission_init(struct drm_device *dev);
+void i915_guc_submission_fini(struct drm_device *dev);
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 16eef4c..0f74876 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -111,6 +111,21 @@  static void set_guc_init_params(struct drm_i915_private *dev_priv)
 			i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
 	}
 
+	/* If GuC scheduling is enabled, setup params here. */
+	if (i915.enable_guc_submission) {
+		u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj);
+		u32 ctx_in_16 = MAX_GUC_GPU_CONTEXTS / 16;
+
+		pgs >>= PAGE_SHIFT;
+		params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) |
+			(ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT);
+
+		params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS;
+
+		/* Unmask this bit to enable GuC scheduler */
+		params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER;
+	}
+
 	I915_WRITE(SOFT_SCRATCH(0), 0);
 
 	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
@@ -387,6 +402,10 @@  int intel_guc_ucode_load(struct drm_device *dev, bool wait)
 	if (err)
 		goto fail;
 
+	err = i915_guc_submission_init(dev);
+	if (err)
+		goto fail;
+
 	err = guc_ucode_xfer(dev);
 	if (err)
 		goto fail;
@@ -412,5 +431,7 @@  void intel_guc_ucode_fini(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw;
 
+	i915_guc_submission_fini(dev);
+
 	intel_uc_fw_fini(guc_fw);
 }

[09/15] drm/i915: GuC submission setup, phase 1

Commit Message

Comments

Patch