diff mbox

[05/15] drm/i915: GuC-specific firmware loader

Message ID 1434393394-21002-6-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon June 15, 2015, 6:36 p.m. UTC
From: Alex Dai <yu.dai@intel.com>

This uses the unified firmware loader to fetch the firmware image,
then loads it into the GuC's memory via a dedicated DMA engine.

This patch is derived from GuC loading work originally done by
Vinit Azad and Ben Widawsky. It has been reconstructed to accord
with the unified firmware loading mechanism by Dave Gordon as well
as new firmware layout etc.

Issue: VIZ-4884
Signed-off-by: Alex Dai <yu.dai@intel.com>
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/Makefile           |    3 +
 drivers/gpu/drm/i915/i915_dma.c         |    4 +
 drivers/gpu/drm/i915/i915_drv.h         |   11 +
 drivers/gpu/drm/i915/i915_gem.c         |    2 +
 drivers/gpu/drm/i915/intel_guc.h        |    5 +
 drivers/gpu/drm/i915/intel_guc_loader.c |  416 +++++++++++++++++++++++++++++++
 6 files changed, 441 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/intel_guc_loader.c

Comments

Chris Wilson June 15, 2015, 8:30 p.m. UTC | #1
On Mon, Jun 15, 2015 at 07:36:23PM +0100, Dave Gordon wrote:
> +	/* We can't enable contexts until all firmware is loaded */
> +	ret = intel_guc_ucode_load(dev, false);

Pardon. I know context initialisation is broken, but adding to that
breakage is not pleasant.

>  	ret = i915_gem_context_enable(dev_priv);
>  	if (ret && ret != -EIO) {
>  		DRM_ERROR("Context enable failed %d\n", ret);

> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 82367c9..0b44265 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -166,4 +166,9 @@ struct intel_guc {
>  #define GUC_WD_VECS_IER		0xC558
>  #define GUC_PM_P24C_IER		0xC55C
>  
> +/* intel_guc_loader.c */
> +extern void intel_guc_ucode_init(struct drm_device *dev);
> +extern int intel_guc_ucode_load(struct drm_device *dev, bool wait);
> +extern void intel_guc_ucode_fini(struct drm_device *dev);
> +
>  #endif
> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> new file mode 100644
> index 0000000..16eef4c
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> @@ -0,0 +1,416 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Vinit Azad <vinit.azad@intel.com>
> + *    Ben Widawsky <ben@bwidawsk.net>
> + *    Dave Gordon <david.s.gordon@intel.com>
> + *    Alex Dai <yu.dai@intel.com>
> + */
> +#include <linux/firmware.h>
> +#include "i915_drv.h"
> +#include "intel_guc.h"
> +
> +/**
> + * DOC: GuC
> + *
> + * intel_guc:
> + * Top level structure of guc. It handles firmware loading and manages client
> + * pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy
> + * ExecList submission.
> + *
> + * Firmware versioning:
> + * The firmware build process will generate a version header file with major and
> + * minor version defined. The versions are built into CSS header of firmware.
> + * i915 kernel driver set the minimal firmware version required per platform.
> + * The firmware installation package will install (symbolic link) proper version
> + * of firmware.
> + *
> + * GuC address space:
> + * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP),
> + * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is
> + * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects
> + * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM.
> + *
> + * Firmware log:
> + * Firmware log is enabled by setting i915.guc_log_level to non-negative level.
> + * Log data is printed out via reading debugfs i915_guc_log_dump. Reading from
> + * i915_guc_load_status will print out firmware loading status and scratch
> + * registers value.
> + *
> + */
> +
> +#define I915_SKL_GUC_UCODE "i915/skl_guc_ver3.bin"
> +MODULE_FIRMWARE(I915_SKL_GUC_UCODE);
> +
> +static u32 get_gttype(struct drm_device *dev)
> +{
> +	/* XXX: GT type based on PCI device ID? field seems unused by fw */
> +	return 0;
> +}
> +
> +static u32 get_core_family(struct drm_device *dev)

For new code we really should be in the habit of passing around the
right pointer, not dev.

> +{
> +	switch (INTEL_INFO(dev)->gen) {
> +	case 8:
> +		return GFXCORE_FAMILY_GEN8;
> +	case 9:
> +		return GFXCORE_FAMILY_GEN9;
> +	default:
> +		DRM_ERROR("GUC: unknown gen for scheduler init\n");
> +		return GFXCORE_FAMILY_FORCE_ULONG;
> +	}
> +}
> +
> +static void set_guc_init_params(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_guc *guc = &dev_priv->guc;
> +	u32 params[GUC_CTL_MAX_DWORDS];
> +	int i;
> +
> +	memset(&params, 0, sizeof(params));
> +
> +	params[GUC_CTL_DEVICE_INFO] |=
> +		(get_gttype(dev_priv->dev) << GUC_CTL_GTTYPE_SHIFT) |
> +		(get_core_family(dev_priv->dev) << GUC_CTL_COREFAMILY_SHIFT);
> +
> +	/* GuC ARAT increment is 10 ns. GuC default scheduler quantum is one
> +	 * second. This ARAR is calculated by:
> +	 * Scheduler-Quantum-in-ns / ARAT-increment-in-ns = 1000000000 / 10
> +	 */
> +	params[GUC_CTL_ARAT_HIGH] = 0;
> +	params[GUC_CTL_ARAT_LOW] = 100000000;
> +
> +	params[GUC_CTL_WA] |= GUC_CTL_WA_UK_BY_DRIVER;
> +
> +	params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER |
> +			GUC_CTL_VCS2_ENABLED;
> +
> +	if (i915.guc_log_level >= 0) {
> +		params[GUC_CTL_LOG_PARAMS] = guc->log_flags;
> +		params[GUC_CTL_DEBUG] =
> +			i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
> +	}
> +
> +	I915_WRITE(SOFT_SCRATCH(0), 0);
> +
> +	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
> +		I915_WRITE(SOFT_SCRATCH(1 + i), params[i]);
> +}
> +
> +/* Read GuC status register (GUC_STATUS)
> + * Return true if get a success code from normal boot or RC6 boot
> + */
> +static inline bool i915_guc_get_status(struct drm_i915_private *dev_priv,
> +					u32 *status)
> +{
> +	*status = I915_READ(GUC_STATUS);
> +	return (((*status) & GS_UKERNEL_MASK) == GS_UKERNEL_READY ||
> +		((*status) & GS_UKERNEL_MASK) == GS_UKERNEL_LAPIC_DONE);

Weird function. Does two things, only one of those is get_status. Maybe
you would like to split this up better and use a switch when you mean a
switch. Or rename it to reflect it's use only as a condition.

> +}
> +
> +/* Transfers the firmware image to RAM for execution by the microcontroller.
> + *
> + * GuC Firmware layout:
> + * +-------------------------------+  ----
> + * |          CSS header           |  128B
> + * +-------------------------------+  ----
> + * |             uCode             |
> + * +-------------------------------+  ----
> + * |         RSA signature         |  256B
> + * +-------------------------------+  ----
> + * |         RSA public Key        |  256B
> + * +-------------------------------+  ----
> + * |       Public key modulus      |    4B
> + * +-------------------------------+  ----
> + *
> + * Architecturally, the DMA engine is bidirectional, and in can potentially
> + * even transfer between GTT locations. This functionality is left out of the
> + * API for now as there is no need for it.
> + *
> + * Be note that GuC need the CSS header plus uKernel code to be copied as one
> + * chunk of data. RSA sig data is loaded via MMIO.
> + */
> +static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw;
> +	struct drm_i915_gem_object *fw_obj = guc_fw->uc_fw_obj;
> +	unsigned long offset;
> +	struct sg_table *sg = fw_obj->pages;
> +	u32 status, ucode_size, rsa[UOS_RSA_SIG_SIZE / sizeof(u32)];
> +	int i, ret = 0;
> +
> +	/* uCode size, also is where RSA signature starts */
> +	offset = ucode_size = guc_fw->uc_fw_size - UOS_CSS_SIGNING_SIZE;
> +
> +	/* Copy RSA signature from the fw image to HW for verification */
> +	sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, UOS_RSA_SIG_SIZE, offset);
> +	for (i = 0; i < UOS_RSA_SIG_SIZE / sizeof(u32); i++)
> +		I915_WRITE(UOS_RSA_SCRATCH_0 + i * sizeof(u32), rsa[i]);
> +
> +	/* Set the source address for the new blob */
> +	offset = i915_gem_obj_ggtt_offset(fw_obj);

Why would it even have a GGTT vma? There's no precondition here to
assert that it should.

> +	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
> +	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
> +
> +	/* Set the destination. Current uCode expects an 8k stack starting from
> +	 * offset 0. */
> +	I915_WRITE(DMA_ADDR_1_LOW, 0x2000);
> +
> +	/* XXX: The image is automatically transfered to SRAM after the RSA
> +	 * verification. This is why the address space is chosen as such. */
> +	I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
> +
> +	I915_WRITE(DMA_COPY_SIZE, ucode_size);
> +
> +	/* Finally start the DMA */
> +	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA));
> +

Just assuming that the writes land and in the order you expect?

> +	/*
> +	 * Spin-wait for the DMA to complete & the GuC to start up.
> +	 * NB: Docs recommend not using the interrupt for completion.
> +	 * FIXME: what's a valid timeout?
> +	 */
> +	ret = wait_for_atomic(i915_guc_get_status(dev_priv, &status), 10);

FIXME, error handling is too hard.

> +	DRM_DEBUG_DRIVER("DMA status = 0x%x, GuC status 0x%x\n",
> +			I915_READ(DMA_CTRL), status);
> +
> +	if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
> +		DRM_ERROR("%s firmware signature verification failed\n",
> +			guc_fw->uc_name);
> +		ret = -ENOEXEC;
> +	}
> +
> +	DRM_DEBUG_DRIVER("GuC fw load status %s %d\n",
> +			ret ? "FAIL" : "SUCCESS", ret);
> +
> +	return ret;
> +}

I'm guessing the other functions are basically more of the same...
-Chris
yu.dai@intel.com June 18, 2015, 5:53 p.m. UTC | #2
On 06/15/2015 01:30 PM, Chris Wilson wrote:
> On Mon, Jun 15, 2015 at 07:36:23PM +0100, Dave Gordon wrote:
> ----snip----
> > + * Return true if get a success code from normal boot or RC6 boot
> > + */
> > +static inline bool i915_guc_get_status(struct drm_i915_private *dev_priv,
> > +					u32 *status)
> > +{
> > +	*status = I915_READ(GUC_STATUS);
> > +	return (((*status) & GS_UKERNEL_MASK) == GS_UKERNEL_READY ||
> > +		((*status) & GS_UKERNEL_MASK) == GS_UKERNEL_LAPIC_DONE);
>
> Weird function. Does two things, only one of those is get_status. Maybe
> you would like to split this up better and use a switch when you mean a
> switch. Or rename it to reflect it's use only as a condition.
Yes. It makes sense to change it to something like 
i915_guc_is_ucode_loaded().
> > +}
> > +
> > +/* Transfers the firmware image to RAM for execution by the microcontroller.
> > + *
> > + * GuC Firmware layout:
> > + * +-------------------------------+  ----
> > + * |          CSS header           |  128B
> > + * +-------------------------------+  ----
> > + * |             uCode             |
> > + * +-------------------------------+  ----
> > + * |         RSA signature         |  256B
> > + * +-------------------------------+  ----
> > + * |         RSA public Key        |  256B
> > + * +-------------------------------+  ----
> > + * |       Public key modulus      |    4B
> > + * +-------------------------------+  ----
> > + *
> > + * Architecturally, the DMA engine is bidirectional, and in can potentially
> > + * even transfer between GTT locations. This functionality is left out of the
> > + * API for now as there is no need for it.
> > + *
> > + * Be note that GuC need the CSS header plus uKernel code to be copied as one
> > + * chunk of data. RSA sig data is loaded via MMIO.
> > + */
> > +static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw;
> > +	struct drm_i915_gem_object *fw_obj = guc_fw->uc_fw_obj;
> > +	unsigned long offset;
> > +	struct sg_table *sg = fw_obj->pages;
> > +	u32 status, ucode_size, rsa[UOS_RSA_SIG_SIZE / sizeof(u32)];
> > +	int i, ret = 0;
> > +
> > +	/* uCode size, also is where RSA signature starts */
> > +	offset = ucode_size = guc_fw->uc_fw_size - UOS_CSS_SIGNING_SIZE;
> > +
> > +	/* Copy RSA signature from the fw image to HW for verification */
> > +	sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, UOS_RSA_SIG_SIZE, offset);
> > +	for (i = 0; i < UOS_RSA_SIG_SIZE / sizeof(u32); i++)
> > +		I915_WRITE(UOS_RSA_SCRATCH_0 + i * sizeof(u32), rsa[i]);
> > +
> > +	/* Set the source address for the new blob */
> > +	offset = i915_gem_obj_ggtt_offset(fw_obj);
>
> Why would it even have a GGTT vma? There's no precondition here to
> assert that it should.
It is pinned into GGTT inside gem_allocate_guc_obj.
> > +	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
> > +	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
> > +
> > +	/* Set the destination. Current uCode expects an 8k stack starting from
> > +	 * offset 0. */
> > +	I915_WRITE(DMA_ADDR_1_LOW, 0x2000);
> > +
> > +	/* XXX: The image is automatically transfered to SRAM after the RSA
> > +	 * verification. This is why the address space is chosen as such. */
> > +	I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
> > +
> > +	I915_WRITE(DMA_COPY_SIZE, ucode_size);
> > +
> > +	/* Finally start the DMA */
> > +	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA));
> > +
>
> Just assuming that the writes land and in the order you expect?
A POSTING_READ of DMA_COPY_SIZE before issue the DMA is enough here? Or, 
POSTING_READ all those writes?

-Alex
Dave Gordon June 18, 2015, 6:54 p.m. UTC | #3
On 15/06/15 21:30, Chris Wilson wrote:
> On Mon, Jun 15, 2015 at 07:36:23PM +0100, Dave Gordon wrote:
>> +	/* We can't enable contexts until all firmware is loaded */
>> +	ret = intel_guc_ucode_load(dev, false);
> 
> Pardon. I know context initialisation is broken, but adding to that
> breakage is not pleasant.

Sorry, but that's just the way it works. If you want to use the GuC for
batch submission, then you cannot submit any commands to any engine via
the GuC before its firmware is loaded, nor can you submit anything at
all directly to the ELSPs.

However in /this/ patch the 'false' above should have been 'true' to
give synchronous load semantics; and then ignoring the return is
intentional, because either it's worked and we're going to use the GuC,
or it hasn't and we're not (and it's already printed a message). Then
there's a later patch that tries to decouple engine MMIO setup from
engine setup using batches & contexts, at which point we can make use of
the return code.

>>  	ret = i915_gem_context_enable(dev_priv);
>>  	if (ret && ret != -EIO) {
>>  		DRM_ERROR("Context enable failed %d\n", ret);
> 
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
>> index 82367c9..0b44265 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -166,4 +166,9 @@ struct intel_guc {
>>  #define GUC_WD_VECS_IER		0xC558
>>  #define GUC_PM_P24C_IER		0xC55C
>>  
>> +/* intel_guc_loader.c */
>> +extern void intel_guc_ucode_init(struct drm_device *dev);
>> +extern int intel_guc_ucode_load(struct drm_device *dev, bool wait);
>> +extern void intel_guc_ucode_fini(struct drm_device *dev);
>> +
>>  #endif
>> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
>> new file mode 100644
>> index 0000000..16eef4c
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
>> @@ -0,0 +1,416 @@
>> +/*
>> + * Copyright © 2014 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *    Vinit Azad <vinit.azad@intel.com>
>> + *    Ben Widawsky <ben@bwidawsk.net>
>> + *    Dave Gordon <david.s.gordon@intel.com>
>> + *    Alex Dai <yu.dai@intel.com>
>> + */
>> +#include <linux/firmware.h>
>> +#include "i915_drv.h"
>> +#include "intel_guc.h"
>> +
>> +/**
>> + * DOC: GuC
>> + *
>> + * intel_guc:
>> + * Top level structure of guc. It handles firmware loading and manages client
>> + * pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy
>> + * ExecList submission.
>> + *
>> + * Firmware versioning:
>> + * The firmware build process will generate a version header file with major and
>> + * minor version defined. The versions are built into CSS header of firmware.
>> + * i915 kernel driver set the minimal firmware version required per platform.
>> + * The firmware installation package will install (symbolic link) proper version
>> + * of firmware.
>> + *
>> + * GuC address space:
>> + * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP),
>> + * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is
>> + * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects
>> + * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM.
>> + *
>> + * Firmware log:
>> + * Firmware log is enabled by setting i915.guc_log_level to non-negative level.
>> + * Log data is printed out via reading debugfs i915_guc_log_dump. Reading from
>> + * i915_guc_load_status will print out firmware loading status and scratch
>> + * registers value.
>> + *
>> + */
>> +
>> +#define I915_SKL_GUC_UCODE "i915/skl_guc_ver3.bin"
>> +MODULE_FIRMWARE(I915_SKL_GUC_UCODE);
>> +
>> +static u32 get_gttype(struct drm_device *dev)
>> +{
>> +	/* XXX: GT type based on PCI device ID? field seems unused by fw */
>> +	return 0;
>> +}
>> +
>> +static u32 get_core_family(struct drm_device *dev)
> 
> For new code we really should be in the habit of passing around the
> right pointer, not dev.

Good idea :) Especially as the caller actually passes dev_priv->dev!!

>> +{
>> +	switch (INTEL_INFO(dev)->gen) {
>> +	case 8:
>> +		return GFXCORE_FAMILY_GEN8;
>> +	case 9:
>> +		return GFXCORE_FAMILY_GEN9;
>> +	default:
>> +		DRM_ERROR("GUC: unknown gen for scheduler init\n");
>> +		return GFXCORE_FAMILY_FORCE_ULONG;
>> +	}
>> +}
>> +
>> +static void set_guc_init_params(struct drm_i915_private *dev_priv)
>> +{
>> +	struct intel_guc *guc = &dev_priv->guc;
>> +	u32 params[GUC_CTL_MAX_DWORDS];
>> +	int i;
>> +
>> +	memset(&params, 0, sizeof(params));
>> +
>> +	params[GUC_CTL_DEVICE_INFO] |=
>> +		(get_gttype(dev_priv->dev) << GUC_CTL_GTTYPE_SHIFT) |
>> +		(get_core_family(dev_priv->dev) << GUC_CTL_COREFAMILY_SHIFT);
>> +
>> +	/* GuC ARAT increment is 10 ns. GuC default scheduler quantum is one
>> +	 * second. This ARAR is calculated by:
>> +	 * Scheduler-Quantum-in-ns / ARAT-increment-in-ns = 1000000000 / 10
>> +	 */
>> +	params[GUC_CTL_ARAT_HIGH] = 0;
>> +	params[GUC_CTL_ARAT_LOW] = 100000000;
>> +
>> +	params[GUC_CTL_WA] |= GUC_CTL_WA_UK_BY_DRIVER;
>> +
>> +	params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER |
>> +			GUC_CTL_VCS2_ENABLED;
>> +
>> +	if (i915.guc_log_level >= 0) {
>> +		params[GUC_CTL_LOG_PARAMS] = guc->log_flags;
>> +		params[GUC_CTL_DEBUG] =
>> +			i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
>> +	}
>> +
>> +	I915_WRITE(SOFT_SCRATCH(0), 0);
>> +
>> +	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
>> +		I915_WRITE(SOFT_SCRATCH(1 + i), params[i]);
>> +}
>> +
>> +/* Read GuC status register (GUC_STATUS)
>> + * Return true if get a success code from normal boot or RC6 boot
>> + */
>> +static inline bool i915_guc_get_status(struct drm_i915_private *dev_priv,
>> +					u32 *status)
>> +{
>> +	*status = I915_READ(GUC_STATUS);
>> +	return (((*status) & GS_UKERNEL_MASK) == GS_UKERNEL_READY ||
>> +		((*status) & GS_UKERNEL_MASK) == GS_UKERNEL_LAPIC_DONE);
> 
> Weird function. Does two things, only one of those is get_status. Maybe
> you would like to split this up better and use a switch when you mean a
> switch. Or rename it to reflect it's use only as a condition.

The weirdness is down to the fact that it's passed as an argument to the
MACRO "wait_for_atomic()". The "caller" of wait_for_atomic() also wants
to see the status value that caused the MACRO to exit so it has to save
that indirectly via the pointer. We can't break the "status = READ()"
and "classify the result" stages into two separate functions because we
have to pass a single expression to the MACRO; both have to be inside
the generated loop.

So it may be weird, but at least it's simple; and the comment above does
tell you that it does two things. We could call it
i915_read_guc_status_and_test_whether_ready() if you like, but I think
that'll make the line where it's used more than 80 characters ;-(
Other (shorter) suggestions happily accepted.

Macros that repeatedly evaluate the text of their arguments are ugly :(

>> +}
>> +
>> +/* Transfers the firmware image to RAM for execution by the microcontroller.
>> + *
>> + * GuC Firmware layout:
>> + * +-------------------------------+  ----
>> + * |          CSS header           |  128B
>> + * +-------------------------------+  ----
>> + * |             uCode             |
>> + * +-------------------------------+  ----
>> + * |         RSA signature         |  256B
>> + * +-------------------------------+  ----
>> + * |         RSA public Key        |  256B
>> + * +-------------------------------+  ----
>> + * |       Public key modulus      |    4B
>> + * +-------------------------------+  ----
>> + *
>> + * Architecturally, the DMA engine is bidirectional, and in can potentially
>> + * even transfer between GTT locations. This functionality is left out of the
>> + * API for now as there is no need for it.
>> + *
>> + * Be note that GuC need the CSS header plus uKernel code to be copied as one
>> + * chunk of data. RSA sig data is loaded via MMIO.
>> + */
>> +static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
>> +{
>> +	struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw;
>> +	struct drm_i915_gem_object *fw_obj = guc_fw->uc_fw_obj;
>> +	unsigned long offset;
>> +	struct sg_table *sg = fw_obj->pages;
>> +	u32 status, ucode_size, rsa[UOS_RSA_SIG_SIZE / sizeof(u32)];
>> +	int i, ret = 0;
>> +
>> +	/* uCode size, also is where RSA signature starts */
>> +	offset = ucode_size = guc_fw->uc_fw_size - UOS_CSS_SIGNING_SIZE;
>> +
>> +	/* Copy RSA signature from the fw image to HW for verification */
>> +	sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, UOS_RSA_SIG_SIZE, offset);
>> +	for (i = 0; i < UOS_RSA_SIG_SIZE / sizeof(u32); i++)
>> +		I915_WRITE(UOS_RSA_SCRATCH_0 + i * sizeof(u32), rsa[i]);
>> +
>> +	/* Set the source address for the new blob */
>> +	offset = i915_gem_obj_ggtt_offset(fw_obj);
> 
> Why would it even have a GGTT vma? There's no precondition here to
> assert that it should.

The (only) caller already did:

        ret = i915_gem_obj_ggtt_pin(guc_fw->uc_fw_obj, 0, 0);

and also deals with unpinning it after use.

>> +	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
>> +	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
>> +
>> +	/* Set the destination. Current uCode expects an 8k stack starting from
>> +	 * offset 0. */
>> +	I915_WRITE(DMA_ADDR_1_LOW, 0x2000);
>> +
>> +	/* XXX: The image is automatically transfered to SRAM after the RSA
>> +	 * verification. This is why the address space is chosen as such. */
>> +	I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
>> +
>> +	I915_WRITE(DMA_COPY_SIZE, ucode_size);
>> +
>> +	/* Finally start the DMA */
>> +	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA));
>> +
> 
> Just assuming that the writes land and in the order you expect?

Yes. If they don't then the mapping of the MMIO registers is set up
wrong. No one should ever map h/w registers as writeback or
write-combining; or in fact anything other than uncached and strongly
ordered w.r.t. each other for both reads and writes.

Sometimes we need a POSTING_READ() to ensure that a WRITE has reached
the h/w before touching something other than a device register -- s/w
state or shared memory -- but not between consecutive writes to
registers of the same device.

The next operation is going to be a READ (inside i915_guc_get_status()
above, so that will flush (in order) any of the above writes that
haven't actually reached the h/w yet ...

>> +	/*
>> +	 * Spin-wait for the DMA to complete & the GuC to start up.
>> +	 * NB: Docs recommend not using the interrupt for completion.
>> +	 * FIXME: what's a valid timeout?
>> +	 */
>> +	ret = wait_for_atomic(i915_guc_get_status(dev_priv, &status), 10);
> 
> FIXME, error handling is too hard.

I got a new timeout value from the MinuteIA team, so I'll update that.
The error code is passed back for the caller to handle.

>> +	DRM_DEBUG_DRIVER("DMA status = 0x%x, GuC status 0x%x\n",
>> +			I915_READ(DMA_CTRL), status);
>> +
>> +	if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
>> +		DRM_ERROR("%s firmware signature verification failed\n",
>> +			guc_fw->uc_name);
>> +		ret = -ENOEXEC;
>> +	}
>> +
>> +	DRM_DEBUG_DRIVER("GuC fw load status %s %d\n",
>> +			ret ? "FAIL" : "SUCCESS", ret);
>> +
>> +	return ret;
>> +}
> 
> I'm guessing the other functions are basically more of the same...
> -Chris

?
.Dave.
Chris Wilson June 18, 2015, 8:12 p.m. UTC | #4
On Thu, Jun 18, 2015 at 10:53:10AM -0700, Yu Dai wrote:
> 
> 
> On 06/15/2015 01:30 PM, Chris Wilson wrote:
> >On Mon, Jun 15, 2015 at 07:36:23PM +0100, Dave Gordon wrote:
> >> +	/* Set the source address for the new blob */
> >> +	offset = i915_gem_obj_ggtt_offset(fw_obj);
> >
> >Why would it even have a GGTT vma? There's no precondition here to
> >assert that it should.
> It is pinned into GGTT inside gem_allocate_guc_obj.

The basic rules when reviewing is pinning is:
- is there a reason for this pin?
- is the lifetime of the pin bound to the hardware access?
- are the pad-to-size/alignment correct?
- is the vma in the wrong location?

Pinning early (and then not even stating in the function preamble that
you expect the object to be pinned) makes it hard to review both the
reason and check the lifetime. An easy solution to avoiding the
assumption of having a pinned object is to pass around the vma instead.
Though because you pin too early it is not clear the reason for the pin
nor that you only pin it for the lifetime of the hardware access, and
you have to scour the code to ensure that the pin isn't randomly dropped
or reused for another access.
-Chris
Dave Gordon June 19, 2015, 2:34 p.m. UTC | #5
On 18/06/15 21:12, Chris Wilson wrote:
> On Thu, Jun 18, 2015 at 10:53:10AM -0700, Yu Dai wrote:
>>
>>
>> On 06/15/2015 01:30 PM, Chris Wilson wrote:
>>> On Mon, Jun 15, 2015 at 07:36:23PM +0100, Dave Gordon wrote:
>>>> +	/* Set the source address for the new blob */
>>>> +	offset = i915_gem_obj_ggtt_offset(fw_obj);
>>>
>>> Why would it even have a GGTT vma? There's no precondition here to
>>> assert that it should.
>> It is pinned into GGTT inside gem_allocate_guc_obj.

This particular object wasn't allocated with that function; that's only
used for objects that need to be permanently accessible by the GuC
(context pool, GuC logbuffer, per-client structure). As I already
mentioned in another reply, /this/ one was pinned (and will be unpinned)
by the *immediate caller* of this function.

.Dave.

> The basic rules when reviewing is pinning is:
> - is there a reason for this pin?
> - is the lifetime of the pin bound to the hardware access?
> - are the pad-to-size/alignment correct?
> - is the vma in the wrong location?
> 
> Pinning early (and then not even stating in the function preamble that
> you expect the object to be pinned) makes it hard to review both the
> reason and check the lifetime. An easy solution to avoiding the
> assumption of having a pinned object is to pass around the vma instead.
> Though because you pin too early it is not clear the reason for the pin
> nor that you only pin it for the lifetime of the hardware access, and
> you have to scour the code to ensure that the pin isn't randomly dropped
> or reused for another access.
> -Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 607fa2a..15818df 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -41,6 +41,9 @@  i915-y += i915_cmd_parser.o \
 # generic ancilliary microcontroller support
 i915-y += intel_uc_loader.o
 
+# general-purpose microcontroller (GuC) support
+i915-y += intel_guc_loader.o
+
 # autogenerated null render state
 i915-y += intel_renderstate_gen6.o \
 	  intel_renderstate_gen7.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 3424863..028dbff 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -465,6 +465,7 @@  static int i915_load_modeset_init(struct drm_device *dev)
 
 cleanup_gem:
 	mutex_lock(&dev->struct_mutex);
+	intel_guc_ucode_fini(dev);
 	i915_gem_cleanup_ringbuffer(dev);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
@@ -862,6 +863,8 @@  int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	intel_uncore_init(dev);
 
+	intel_guc_ucode_init(dev);
+
 	/* Load CSR Firmware for SKL */
 	intel_csr_ucode_init(dev);
 
@@ -1113,6 +1116,7 @@  int i915_driver_unload(struct drm_device *dev)
 	flush_workqueue(dev_priv->wq);
 
 	mutex_lock(&dev->struct_mutex);
+	intel_guc_ucode_fini(dev);
 	i915_gem_cleanup_ringbuffer(dev);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 731a1c8..f47cde7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -50,6 +50,7 @@ 
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
+#include "intel_guc.h"
 
 /* General customization:
  */
@@ -1669,6 +1670,8 @@  struct drm_i915_private {
 
 	struct intel_gmbus gmbus[GMBUS_NUM_PINS];
 
+	struct intel_guc guc;
+
 	/** gmbus_mutex protects against concurrent usage of the single hw gmbus
 	 * controller on different i2c buses. */
 	struct mutex gmbus_mutex;
@@ -1913,6 +1916,11 @@  static inline struct drm_i915_private *dev_to_i915(struct device *dev)
 	return to_i915(dev_get_drvdata(dev));
 }
 
+static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
+{
+	return container_of(guc, struct drm_i915_private, guc);
+}
+
 /* Iterate over initialised rings */
 #define for_each_ring(ring__, dev_priv__, i__) \
 	for ((i__) = 0; (i__) < I915_NUM_RINGS; (i__)++) \
@@ -2503,6 +2511,9 @@  struct drm_i915_cmd_table {
 
 #define HAS_CSR(dev)	(IS_SKYLAKE(dev))
 
+#define HAS_GUC_UCODE(dev)	(IS_GEN9(dev))
+#define HAS_GUC_SCHED(dev)	(IS_GEN9(dev))
+
 #define INTEL_PCH_DEVICE_ID_MASK		0xff00
 #define INTEL_PCH_IBX_DEVICE_ID_TYPE		0x3b00
 #define INTEL_PCH_CPT_DEVICE_ID_TYPE		0x1c00
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 75d63c2..cd4a865 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5023,6 +5023,8 @@  i915_gem_init_hw(struct drm_device *dev)
 		i915_gem_cleanup_ringbuffer(dev);
 	}
 
+	/* We can't enable contexts until all firmware is loaded */
+	ret = intel_guc_ucode_load(dev, false);
 	ret = i915_gem_context_enable(dev_priv);
 	if (ret && ret != -EIO) {
 		DRM_ERROR("Context enable failed %d\n", ret);
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 82367c9..0b44265 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -166,4 +166,9 @@  struct intel_guc {
 #define GUC_WD_VECS_IER		0xC558
 #define GUC_PM_P24C_IER		0xC55C
 
+/* intel_guc_loader.c */
+extern void intel_guc_ucode_init(struct drm_device *dev);
+extern int intel_guc_ucode_load(struct drm_device *dev, bool wait);
+extern void intel_guc_ucode_fini(struct drm_device *dev);
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
new file mode 100644
index 0000000..16eef4c
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -0,0 +1,416 @@ 
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Vinit Azad <vinit.azad@intel.com>
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *    Dave Gordon <david.s.gordon@intel.com>
+ *    Alex Dai <yu.dai@intel.com>
+ */
+#include <linux/firmware.h>
+#include "i915_drv.h"
+#include "intel_guc.h"
+
+/**
+ * DOC: GuC
+ *
+ * intel_guc:
+ * Top level structure of guc. It handles firmware loading and manages client
+ * pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy
+ * ExecList submission.
+ *
+ * Firmware versioning:
+ * The firmware build process will generate a version header file with major and
+ * minor version defined. The versions are built into CSS header of firmware.
+ * i915 kernel driver set the minimal firmware version required per platform.
+ * The firmware installation package will install (symbolic link) proper version
+ * of firmware.
+ *
+ * GuC address space:
+ * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP),
+ * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is
+ * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects
+ * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM.
+ *
+ * Firmware log:
+ * Firmware log is enabled by setting i915.guc_log_level to non-negative level.
+ * Log data is printed out via reading debugfs i915_guc_log_dump. Reading from
+ * i915_guc_load_status will print out firmware loading status and scratch
+ * registers value.
+ *
+ */
+
+#define I915_SKL_GUC_UCODE "i915/skl_guc_ver3.bin"
+MODULE_FIRMWARE(I915_SKL_GUC_UCODE);
+
+static u32 get_gttype(struct drm_device *dev)
+{
+	/* XXX: GT type based on PCI device ID? field seems unused by fw */
+	return 0;
+}
+
+static u32 get_core_family(struct drm_device *dev)
+{
+	switch (INTEL_INFO(dev)->gen) {
+	case 8:
+		return GFXCORE_FAMILY_GEN8;
+	case 9:
+		return GFXCORE_FAMILY_GEN9;
+	default:
+		DRM_ERROR("GUC: unknown gen for scheduler init\n");
+		return GFXCORE_FAMILY_FORCE_ULONG;
+	}
+}
+
+static void set_guc_init_params(struct drm_i915_private *dev_priv)
+{
+	struct intel_guc *guc = &dev_priv->guc;
+	u32 params[GUC_CTL_MAX_DWORDS];
+	int i;
+
+	memset(&params, 0, sizeof(params));
+
+	params[GUC_CTL_DEVICE_INFO] |=
+		(get_gttype(dev_priv->dev) << GUC_CTL_GTTYPE_SHIFT) |
+		(get_core_family(dev_priv->dev) << GUC_CTL_COREFAMILY_SHIFT);
+
+	/* GuC ARAT increment is 10 ns. GuC default scheduler quantum is one
+	 * second. This ARAR is calculated by:
+	 * Scheduler-Quantum-in-ns / ARAT-increment-in-ns = 1000000000 / 10
+	 */
+	params[GUC_CTL_ARAT_HIGH] = 0;
+	params[GUC_CTL_ARAT_LOW] = 100000000;
+
+	params[GUC_CTL_WA] |= GUC_CTL_WA_UK_BY_DRIVER;
+
+	params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER |
+			GUC_CTL_VCS2_ENABLED;
+
+	if (i915.guc_log_level >= 0) {
+		params[GUC_CTL_LOG_PARAMS] = guc->log_flags;
+		params[GUC_CTL_DEBUG] =
+			i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
+	}
+
+	I915_WRITE(SOFT_SCRATCH(0), 0);
+
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		I915_WRITE(SOFT_SCRATCH(1 + i), params[i]);
+}
+
+/* Read GuC status register (GUC_STATUS)
+ * Return true if get a success code from normal boot or RC6 boot
+ */
+static inline bool i915_guc_get_status(struct drm_i915_private *dev_priv,
+					u32 *status)
+{
+	*status = I915_READ(GUC_STATUS);
+	return (((*status) & GS_UKERNEL_MASK) == GS_UKERNEL_READY ||
+		((*status) & GS_UKERNEL_MASK) == GS_UKERNEL_LAPIC_DONE);
+}
+
+/* Transfers the firmware image to RAM for execution by the microcontroller.
+ *
+ * GuC Firmware layout:
+ * +-------------------------------+  ----
+ * |          CSS header           |  128B
+ * +-------------------------------+  ----
+ * |             uCode             |
+ * +-------------------------------+  ----
+ * |         RSA signature         |  256B
+ * +-------------------------------+  ----
+ * |         RSA public Key        |  256B
+ * +-------------------------------+  ----
+ * |       Public key modulus      |    4B
+ * +-------------------------------+  ----
+ *
+ * Architecturally, the DMA engine is bidirectional, and in can potentially
+ * even transfer between GTT locations. This functionality is left out of the
+ * API for now as there is no need for it.
+ *
+ * Be note that GuC need the CSS header plus uKernel code to be copied as one
+ * chunk of data. RSA sig data is loaded via MMIO.
+ */
+static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
+{
+	struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw;
+	struct drm_i915_gem_object *fw_obj = guc_fw->uc_fw_obj;
+	unsigned long offset;
+	struct sg_table *sg = fw_obj->pages;
+	u32 status, ucode_size, rsa[UOS_RSA_SIG_SIZE / sizeof(u32)];
+	int i, ret = 0;
+
+	/* uCode size, also is where RSA signature starts */
+	offset = ucode_size = guc_fw->uc_fw_size - UOS_CSS_SIGNING_SIZE;
+
+	/* Copy RSA signature from the fw image to HW for verification */
+	sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, UOS_RSA_SIG_SIZE, offset);
+	for (i = 0; i < UOS_RSA_SIG_SIZE / sizeof(u32); i++)
+		I915_WRITE(UOS_RSA_SCRATCH_0 + i * sizeof(u32), rsa[i]);
+
+	/* Set the source address for the new blob */
+	offset = i915_gem_obj_ggtt_offset(fw_obj);
+	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
+	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
+
+	/* Set the destination. Current uCode expects an 8k stack starting from
+	 * offset 0. */
+	I915_WRITE(DMA_ADDR_1_LOW, 0x2000);
+
+	/* XXX: The image is automatically transfered to SRAM after the RSA
+	 * verification. This is why the address space is chosen as such. */
+	I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
+
+	I915_WRITE(DMA_COPY_SIZE, ucode_size);
+
+	/* Finally start the DMA */
+	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA));
+
+	/*
+	 * Spin-wait for the DMA to complete & the GuC to start up.
+	 * NB: Docs recommend not using the interrupt for completion.
+	 * FIXME: what's a valid timeout?
+	 */
+	ret = wait_for_atomic(i915_guc_get_status(dev_priv, &status), 10);
+
+	DRM_DEBUG_DRIVER("DMA status = 0x%x, GuC status 0x%x\n",
+			I915_READ(DMA_CTRL), status);
+
+	if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
+		DRM_ERROR("%s firmware signature verification failed\n",
+			guc_fw->uc_name);
+		ret = -ENOEXEC;
+	}
+
+	DRM_DEBUG_DRIVER("GuC fw load status %s %d\n",
+			ret ? "FAIL" : "SUCCESS", ret);
+
+	return ret;
+}
+
+/*
+ * Loads the GuC firmware blob in to the MinuteIA.
+ */
+static int guc_ucode_xfer(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw;
+	bool pinned = false;
+	int ret;
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	ret = i915_gem_obj_ggtt_pin(guc_fw->uc_fw_obj, 0, 0);
+	if (ret)
+		goto out;
+	pinned = true;
+
+	/* init WOPCM */
+	I915_WRITE(GUC_WOPCM_SIZE, GUC_WOPCM_SIZE_VALUE);
+	I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET);
+
+	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
+	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+
+	/* Set MMIO/WA for GuC init */
+	I915_WRITE(DRBMISC1, DOORBELL_ENABLE);
+
+	/* Enable MIA caching. GuC clock gating is disabled. */
+	I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE);
+
+	/* WaC6DisallowByGfxPause*/
+	I915_WRITE(GEN6_GFXPAUSE, 0x30FFF);
+
+	if (IS_SKYLAKE(dev))
+		I915_WRITE(GEN9_GT_PM_CONFIG, GEN8_GT_DOORBELL_ENABLE);
+	else
+		I915_WRITE(GEN8_GT_PM_CONFIG, GEN8_GT_DOORBELL_ENABLE);
+
+	if (IS_GEN9(dev)) {
+		/* DOP Clock Gating Enable for GuC clocks */
+		I915_WRITE(GEN7_MISCCPCTL, (GEN8_DOP_CLOCK_GATE_GUC_ENABLE |
+					    I915_READ(GEN7_MISCCPCTL)));
+
+		/* allows for 5us before GT can go to RC6 */
+		I915_WRITE(GUC_ARAT_C6DIS, 0x1FF);
+	}
+
+	set_guc_init_params(dev_priv);
+
+	ret = guc_ucode_xfer_dma(dev_priv);
+
+	/* We can free the object pages now, and we would, except we might as
+	 * well keep it around for suspend/resume. Instead, we just wait for the
+	 * DMA to complete, and unpin the object
+	 */
+
+out:
+	if (pinned)
+		i915_gem_object_ggtt_unpin(guc_fw->uc_fw_obj);
+	else
+		DRM_DEBUG_DRIVER("pin failed %d\n", ret);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+
+	return ret;
+}
+
+/*
+ * Check the firmware that was found; if it's the wrong size or the wrong
+ * version, return FALSE. If it's OK, save the data in a GEM object and
+ * return TRUE.
+ *
+ * The GuC firmware image has the version number embedded at a well-known
+ * offset within the firmware blob; note that major / minor version are
+ * TWO bytes each (i.e. u16), although all pointers and offsets are defined
+ * in terms of bytes (u8).
+ */
+static bool
+guc_ucode_check(struct intel_uc_fw *guc_fw)
+{
+	struct intel_guc *guc = container_of(guc_fw, struct intel_guc, guc_fw);
+	const u8 *css_header = guc_fw->uc_fw_blob->data + UOS_CSS_HEADER_OFFSET;
+	uint32_t major, minor;
+
+	DRM_DEBUG_DRIVER("firmware file size %zu (minimum %u)\n",
+		guc_fw->uc_fw_blob->size, UOS_CSS_SIGNING_SIZE);
+
+	/* Check the size of the blob first */
+	if (guc_fw->uc_fw_blob->size <= UOS_CSS_SIGNING_SIZE)
+		return false;
+
+	major = *(u16 *)(css_header + UOS_VER_MAJOR_OFFSET);
+	minor = *(u16 *)(css_header + UOS_VER_MINOR_OFFSET);
+
+	if (major != guc->fw_ver_major || minor < guc->fw_ver_minor) {
+		DRM_ERROR("GuC firmware version %d.%d, required %d.%d\n",
+			 major, minor, guc->fw_ver_major, guc->fw_ver_minor);
+		return false;
+	}
+
+	DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n",
+		 major, minor, guc->fw_ver_major, guc->fw_ver_minor);
+
+	/* Override default GEM object allocation-and-save here, if needed */
+	return true;
+}
+
+/**
+ * intel_guc_ucode_init() - initiate a firmware loading request
+ *
+ * Called early during driver load, before GEM is initialised.
+ * Driver is single threaded, so no mutex is required.
+ */
+void intel_guc_ucode_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_guc *guc = &dev_priv->guc;
+	struct intel_uc_fw *guc_fw = &guc->guc_fw;
+	const char *path;
+
+	if (!HAS_GUC_SCHED(dev))
+		i915.enable_guc_submission = false;
+
+	if (!HAS_GUC_UCODE(dev)) {
+		path = NULL;
+	} else if (IS_SKYLAKE(dev)) {
+		path = I915_SKL_GUC_UCODE;
+		guc->fw_ver_major = 3;
+		guc->fw_ver_minor = 0;
+	} else {
+		i915.enable_guc_submission = false;
+		path = "";	/* unknown device */
+	}
+
+	intel_uc_fw_init(dev, guc_fw, "GuC", path);
+}
+
+/**
+ * intel_guc_ucode_load() - load GuC uCode into the device
+ *
+ * Called from gem_init_hw() during driver loading and also after a GPU reset.
+ * Checks that the firmware fetching process has succeeded, and if so transfers
+ * the loaded image to the hardware.
+ *
+ * However, there are a few checks to do first. The very first call should have
+ * (wait == FALSE), but the fetch_state will still be PENDING as the firmware may
+ * not be available that early. Therefore, on this first call, we just return.
+ *
+ * The second call should come from the first open of the device (wait == TRUE).
+ * This is a good time to load the firmware into the device, as by this point it
+ * must be available.
+ *
+ * Any subsequent calls are expected to have wait == FALSE, and indicate that the
+ * hardware has been reset and so the firmware should be reloaded.
+ */
+int intel_guc_ucode_load(struct drm_device *dev, bool wait)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw;
+	int err;
+
+	DRM_DEBUG_DRIVER("GuC: wait %d, fetch status %d, load status %d\n",
+		wait, guc_fw->uc_fw_fetch_status, guc_fw->uc_fw_load_status);
+
+	if (guc_fw->uc_fw_fetch_status == INTEL_UC_FIRMWARE_PENDING && !wait)
+		return -EAGAIN;
+
+	if (guc_fw->uc_fw_fetch_status == INTEL_UC_FIRMWARE_NONE)
+		return 0;
+
+	if (guc_fw->uc_fw_fetch_status == INTEL_UC_FIRMWARE_SUCCESS &&
+	    guc_fw->uc_fw_load_status == INTEL_UC_FIRMWARE_FAIL)
+		return -ENOEXEC;
+
+	guc_fw->uc_fw_load_status = INTEL_UC_FIRMWARE_PENDING;
+	err = intel_uc_fw_check(guc_fw, guc_ucode_check);
+	if (err)
+		goto fail;
+
+	err = guc_ucode_xfer(dev);
+	if (err)
+		goto fail;
+
+	guc_fw->uc_fw_load_status = INTEL_UC_FIRMWARE_SUCCESS;
+
+	return 0;
+
+fail:
+	if (guc_fw->uc_fw_load_status == INTEL_UC_FIRMWARE_PENDING)
+		guc_fw->uc_fw_load_status = INTEL_UC_FIRMWARE_FAIL;
+
+	DRM_ERROR("Failed to initialize GuC, error %d\n", err);
+
+	return err;
+}
+
+/**
+ * intel_guc_ucode_fini() - clean up all allocated resources
+ */
+void intel_guc_ucode_fini(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw;
+
+	intel_uc_fw_fini(guc_fw);
+}