diff mbox

[CI,v2,2/2] drm/i915/guc: Introduce buffer based cmd transport

Message ID 20170522113048.83348-3-michal.wajdeczko@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michal Wajdeczko May 22, 2017, 11:30 a.m. UTC
Buffer based command transport can replace MMIO based mechanism.
It may be used to perform host-2-guc and guc-to-host communication.

Portions of this patch are based on work by:
 Michel Thierry <michel.thierry@intel.com>
 Robert Beckett <robert.beckett@intel.com>
 Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

v2: use gem_object_pin_map (Chris)
    don't use DEBUG_RATELIMITED (Chris)
    don't track action stats (Chris)
    simplify next fence (Chris)
    use READ_ONCE (Chris)
    move blob allocation to new function (Chris)

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile         |   1 +
 drivers/gpu/drm/i915/i915_drv.c       |   2 +
 drivers/gpu/drm/i915/i915_drv.h       |   2 +
 drivers/gpu/drm/i915/intel_guc_ct.c   | 468 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc_ct.h   |  97 +++++++
 drivers/gpu/drm/i915/intel_guc_fwif.h |  44 ++++
 drivers/gpu/drm/i915/intel_uc.c       |  25 +-
 drivers/gpu/drm/i915/intel_uc.h       |   4 +-
 8 files changed, 641 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_guc_ct.c
 create mode 100644 drivers/gpu/drm/i915/intel_guc_ct.h

Comments

Daniele Ceraolo Spurio May 23, 2017, 6:59 p.m. UTC | #1
On 22/05/17 04:30, Michal Wajdeczko wrote:
> Buffer based command transport can replace MMIO based mechanism.
> It may be used to perform host-2-guc and guc-to-host communication.
>
> Portions of this patch are based on work by:
>  Michel Thierry <michel.thierry@intel.com>
>  Robert Beckett <robert.beckett@intel.com>
>  Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>
> v2: use gem_object_pin_map (Chris)
>     don't use DEBUG_RATELIMITED (Chris)
>     don't track action stats (Chris)
>     simplify next fence (Chris)
>     use READ_ONCE (Chris)
>     move blob allocation to new function (Chris)
>
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/Makefile         |   1 +
>  drivers/gpu/drm/i915/i915_drv.c       |   2 +
>  drivers/gpu/drm/i915/i915_drv.h       |   2 +
>  drivers/gpu/drm/i915/intel_guc_ct.c   | 468 ++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_guc_ct.h   |  97 +++++++
>  drivers/gpu/drm/i915/intel_guc_fwif.h |  44 ++++
>  drivers/gpu/drm/i915/intel_uc.c       |  25 +-
>  drivers/gpu/drm/i915/intel_uc.h       |   4 +-
>  8 files changed, 641 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/intel_guc_ct.c
>  create mode 100644 drivers/gpu/drm/i915/intel_guc_ct.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 7b05fb8..16dccf5 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -58,6 +58,7 @@ i915-y += i915_cmd_parser.o \
>
>  # general-purpose microcontroller (GuC) support
>  i915-y += intel_uc.o \
> +	  intel_guc_ct.o \
>  	  intel_guc_log.o \
>  	  intel_guc_loader.o \
>  	  intel_huc.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index d703897..6c78469 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -869,6 +869,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
>  	i915_workqueues_cleanup(dev_priv);
>  err_engines:
>  	i915_engines_cleanup(dev_priv);
> +	intel_uc_cleanup(dev_priv);
>  	return ret;
>  }
>
> @@ -883,6 +884,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
>  	intel_irq_fini(dev_priv);
>  	i915_workqueues_cleanup(dev_priv);
>  	i915_engines_cleanup(dev_priv);
> +	intel_uc_cleanup(dev_priv);
>  }
>
>  static int i915_mmio_setup(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 17883a8..453eea5 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -760,6 +760,7 @@ struct intel_csr {
>  	func(has_gmbus_irq); \
>  	func(has_gmch_display); \
>  	func(has_guc); \
> +	func(has_guc_ct); \
>  	func(has_hotplug); \
>  	func(has_l3_dpf); \
>  	func(has_llc); \
> @@ -2947,6 +2948,7 @@ intel_info(const struct drm_i915_private *dev_priv)
>   * properties, so we have separate macros to test them.
>   */
>  #define HAS_GUC(dev_priv)	((dev_priv)->info.has_guc)
> +#define HAS_GUC_CT(dev_priv)	((dev_priv)->info.has_guc_ct)
>  #define HAS_GUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
>  #define HAS_GUC_SCHED(dev_priv)	(HAS_GUC(dev_priv))
>  #define HAS_HUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
> diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c
> new file mode 100644
> index 0000000..869a7ad
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_guc_ct.c
> @@ -0,0 +1,468 @@
> +/*
> + * Copyright © 2016-2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "i915_drv.h"
> +#include "intel_guc_ct.h"
> +
> +enum { CTB_SEND = 0, CTB_RECV = 1 };
> +
> +static inline const char *guc_ct_buffer_type_to_str(u32 type)
> +{
> +	switch (type) {
> +	case INTEL_GUC_CT_BUFFER_TYPE_SEND:
> +		return "SEND";
> +	case INTEL_GUC_CT_BUFFER_TYPE_RECV:
> +		return "RECV";
> +	default:
> +		return "<invalid>";
> +	}
> +}
> +
> +static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
> +				    u32 cmds_addr, u32 size, u32 owner)
> +{
> +	DRM_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
> +			 desc, cmds_addr, size, owner);
> +	memset(desc, 0, sizeof(*desc));
> +	desc->addr = cmds_addr;
> +	desc->size = size;
> +	desc->owner = owner;
> +}
> +
> +static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
> +{
> +	DRM_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
> +			 desc, desc->head, desc->tail);
> +	desc->head = 0;
> +	desc->tail = 0;
> +	desc->is_in_error = 0;
> +}
> +
> +static int guc_action_register_ct_buffer(struct intel_guc *guc,
> +					 u32 desc_addr,
> +					 u32 type)
> +{
> +	u32 action[] = {
> +		INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER,
> +		desc_addr,
> +		sizeof(struct guc_ct_buffer_desc),
> +		type
> +	};
> +	int err;
> +
> +	/* Can't use generic send(), CT registration must go over MMIO */
> +	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
> +	if (err)
> +		DRM_ERROR("CT: register %s buffer failed; err=%d\n",
> +			  guc_ct_buffer_type_to_str(type), err);
> +	return err;
> +}
> +
> +static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
> +					   u32 owner,
> +					   u32 type)
> +{
> +	u32 action[] = {
> +		INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
> +		owner,
> +		type
> +	};
> +	int err;
> +
> +	/* Can't use generic send(), CT deregistration must go over MMIO */
> +	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
> +	if (err)
> +		DRM_ERROR("CT: deregister %s buffer failed; err=%d\n",
> +			  guc_ct_buffer_type_to_str(type), err);
> +	return err;
> +}
> +
> +static bool ctch_is_open(struct intel_guc_ct_channel *ctch)
> +{
> +	return ctch->vma != NULL;
> +}
> +
> +static int ctch_init(struct intel_guc *guc,
> +		     struct intel_guc_ct_channel *ctch)
> +{
> +	struct i915_vma *vma;
> +	void *blob;
> +	int err;
> +	int i;
> +
> +	GEM_BUG_ON(ctch->vma);
> +
> +#if INTEL_GUC_CT_MAX_CHANNELS > 1

Bikeshed: after reviewing the GuC design intent for CT buffers I think 
we can remove the ida logic completely, even if 
INTEL_GUC_CT_MAX_CHANNELS > 1. Currently we don't expect more than 1 
pair, but, if my understanding is correct, in case we ever need more 
than 1 channel the number should be statically determined and not a 
dynamic thing. I would therefore prefer a static define for it. e.g.:

#define GUC_KMD_CTCH 0

and if we ever have more:

#define GUC_xxx_CTCH 1
#define GUC_yyy_CTCH 2

We can then pass in the owner when we open the channel:

ctch_open(guc, ctch, GUC_KMD_CTCH);

And we could get rid of a few new functions, like intel_uc_cleanup().

> +	/* get unique owner id */
> +	err = ida_simple_get(&guc->ct.owner_ida,
> +			     0, INTEL_GUC_CT_MAX_CHANNELS, GFP_KERNEL);
> +	if (err < 0)
> +		return err;
> +	ctch->owner = err;
> +	DRM_DEBUG_DRIVER("CT: owner=%d\n", ctch->owner);
> +#else
> +	if (ctch_is_open(&guc->ct.channel))
> +		return -ENOSPC;

-EEXIST?

> +#endif
> +
> +	/* We allocate 1 page to hold both descriptors and both buffers.
> +	 *       ___________.....................
> +	 *      |desc (SEND)|                   :
> +	 *      |___________|                   PAGE/4
> +	 *      :___________....................:
> +	 *      |desc (RECV)|                   :
> +	 *      |___________|                   PAGE/4
> +	 *      :_______________________________:
> +	 *      |cmds (SEND)                    |
> +	 *      |                               PAGE/4
> +	 *      |_______________________________|
> +	 *      |cmds (RECV)                    |
> +	 *      |                               PAGE/4
> +	 *      |_______________________________|
> +	 *
> +	 * Each message can use a maximum of 32 dwords and we don't expect to
> +	 * have more than 1 in flight at any time, so we have enough space.
> +	 * Some logic further ahead will rely on the fact that there is only 1
> +	 * page and that it is always mapped, so if the size is changed the
> +	 * other code will need updating as well.
> +	 */
> +
> +	/* allocate vma */
> +	vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
> +	if (IS_ERR(vma)) {
> +		err = PTR_ERR(vma);
> +		goto err_ida;
> +	}
> +	ctch->vma = vma;
> +
> +	/* map first page */
> +	blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
> +	if (IS_ERR(blob)) {
> +		err = PTR_ERR(blob);
> +		goto err_vma;
> +	}
> +	DRM_DEBUG_DRIVER("CT: vma base=%#x\n", guc_ggtt_offset(ctch->vma));
> +
> +	/* store pointers to desc and cmds */
> +	for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
> +		GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
> +		ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i;
> +		ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2;
> +	}
> +
> +	return 0;
> +
> +err_vma:
> +	i915_vma_unpin_and_release(&ctch->vma);
> +err_ida:
> +#if INTEL_GUC_CT_MAX_CHANNELS > 1
> +	ida_simple_remove(&guc->ct.owner_ida, ctch->owner);
> +#endif
> +	return err;
> +}
> +
> +static void ctch_fini(struct intel_guc *guc,
> +		      struct intel_guc_ct_channel *ctch)
> +{
> +	GEM_BUG_ON(!ctch->vma);
> +
> +	i915_gem_object_unpin_map(ctch->vma->obj);
> +	i915_vma_unpin_and_release(&ctch->vma);
> +
> +#if INTEL_GUC_CT_MAX_CHANNELS > 1
> +	ida_simple_remove(&guc->ct.owner_ida, ctch->owner);
> +#endif
> +}
> +
> +static int ctch_open(struct intel_guc *guc,
> +		     struct intel_guc_ct_channel *ctch)
> +{
> +	u32 base;
> +	int err;
> +	int i;
> +
> +	DRM_DEBUG_DRIVER("CT: reopen=%s\n", yesno(ctch_is_open(ctch)));
> +
> +	if (!ctch->vma) {
> +		err = ctch_init(guc, ctch);
> +		if (unlikely(err))
> +			return err;
> +	}
> +
> +	/* vma should be already allocated and map'ed */
> +	base = guc_ggtt_offset(ctch->vma);
> +
> +	/* (re)initialize descriptors
> +	 * cmds buffers are in the second half of the blob page
> +	 */
> +	for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
> +		GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
> +		guc_ct_buffer_desc_init(ctch->ctbs[i].desc,
> +					base + PAGE_SIZE/4 * i + PAGE_SIZE/2,
> +					PAGE_SIZE/4,
> +					ctch->owner);
> +	}
> +
> +	/* register buffers, starting wirh RECV buffer
> +	 * descriptors are in first half of the blob
> +	 */
> +	err = guc_action_register_ct_buffer(guc,
> +					    base + PAGE_SIZE/4 * CTB_RECV,
> +					    INTEL_GUC_CT_BUFFER_TYPE_RECV);
> +	if (unlikely(err))
> +		goto err_fini;
> +
> +	err = guc_action_register_ct_buffer(guc,
> +					    base + PAGE_SIZE/4 * CTB_SEND,
> +					    INTEL_GUC_CT_BUFFER_TYPE_SEND);
> +	if (unlikely(err))
> +		goto err_deregister;
> +
> +	return 0;
> +
> +err_deregister:
> +	guc_action_deregister_ct_buffer(guc,
> +					ctch->owner,
> +					INTEL_GUC_CT_BUFFER_TYPE_RECV);
> +err_fini:
> +	ctch_fini(guc, ctch);
> +	return err;
> +}
> +
> +static void ctch_close(struct intel_guc *guc,
> +		       struct intel_guc_ct_channel *ctch)
> +{
> +	GEM_BUG_ON(!ctch_is_open(ctch));
> +
> +	guc_action_deregister_ct_buffer(guc,
> +					ctch->owner,
> +					INTEL_GUC_CT_BUFFER_TYPE_SEND);
> +	guc_action_deregister_ct_buffer(guc,
> +					ctch->owner,
> +					INTEL_GUC_CT_BUFFER_TYPE_RECV);
> +	ctch_fini(guc, ctch);
> +}
> +
> +static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch)
> +{
> +	/* For now it's trivial */
> +	return ++ctch->next_fence;
> +}
> +
> +static int ctb_write(struct intel_guc_ct_buffer *ctb,
> +		     const u32 *action,
> +		     u32 len /* in dwords */,
> +		     u32 fence)
> +{
> +	struct guc_ct_buffer_desc *desc = ctb->desc;
> +	u32 head = desc->head / 4;	/* in dwords */
> +	u32 tail = desc->tail / 4;	/* in dwords */
> +	u32 size = desc->size / 4;	/* in dwords */
> +	u32 used;			/* in dwords */
> +	u32 header;
> +	u32 *cmds = ctb->cmds;
> +	unsigned int i;
> +
> +	GEM_BUG_ON(desc->size % 4);
> +	GEM_BUG_ON(desc->head % 4);
> +	GEM_BUG_ON(desc->tail % 4);
> +	GEM_BUG_ON(tail >= size);
> +
> +	/*
> +	 * tail == head condition indicates empty. GuC FW does not support
> +	 * using up the entire buffer to get tail == head meaning full.
> +	 */
> +	if (tail < head)
> +		used = (size - head) + tail;
> +	else
> +		used = tail - head;
> +
> +	/* make sure there is a space including extra dw for the fence */
> +	if (unlikely(used + len + 1 >= size))
> +		return -ENOSPC;
> +
> +	/* Write the message. The format is the following:
> +	 * DW0: header (including action code)
> +	 * DW1: fence
> +	 * DW2+: action data
> +	 */
> +	header = (len << GUC_CT_MSG_LEN_SHIFT) |
> +		 (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
> +		 (action[0] << GUC_CT_MSG_ACTION_SHIFT);
> +
> +	cmds[tail] = header;
> +	tail = (tail + 1) % size;
> +
> +	cmds[tail] = fence;
> +	tail = (tail + 1) % size;
> +
> +	for (i = 1; i < len; i++) {
> +		cmds[tail] = action[i];
> +		tail = (tail + 1) % size;
> +	}
> +
> +	/* now update desc tail (back in bytes) */
> +	desc->tail = tail * 4;
> +	GEM_BUG_ON(desc->tail > desc->size);
> +
> +	return 0;
> +}
> +
> +/* Wait for the response from the GuC.
> + * @fence:	response fence
> + * @status:	placeholder for status
> + * return:	0 response received (status is valid)
> + *		-ETIMEDOUT no response within hardcoded timeout
> + *		-EPROTO no response, ct buffer was in error
> + */
> +static int wait_for_response(struct guc_ct_buffer_desc *desc,
> +			     u32 fence,
> +			     u32 *status)
> +{
> +	int err;
> +
> +	/*
> +	 * Fast commands should complete in less than 10us, so sample quickly
> +	 * up to that length of time, then switch to a slower sleep-wait loop.
> +	 * No GuC command should ever take longer than 10ms.
> +	 */
> +#define done (READ_ONCE(desc->fence) == fence)
> +	err = wait_for_us(done, 10);
> +	if (err)
> +		err = wait_for(done, 10);
> +#undef done
> +
> +	if (unlikely(err)) {
> +		DRM_ERROR("CT: fence %u failed; reported fence=%u\n",
> +			  fence, desc->fence);
> +
> +		if (WARN_ON(desc->is_in_error)) {
> +			/* Something went wrong with the messaging, try to reset
> +			 * the buffer and hope for the best
> +			 */
> +			guc_ct_buffer_desc_reset(desc);
> +			err = -EPROTO;
> +		}
> +	}
> +
> +	*status = desc->status;
> +	return err;
> +}
> +
> +static int ctch_send(struct intel_guc *guc,
> +		     struct intel_guc_ct_channel *ctch,
> +		     const u32 *action,
> +		     u32 len,
> +		     u32 *status)
> +{
> +	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
> +	struct guc_ct_buffer_desc *desc = ctb->desc;
> +	u32 fence;
> +	int err;
> +
> +	GEM_BUG_ON(!ctch_is_open(ctch));
> +	GEM_BUG_ON(!len);
> +	GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
> +
> +	fence = ctch_get_next_fence(ctch);
> +	err = ctb_write(ctb, action, len, fence);
> +	if (unlikely(err))
> +		return err;
> +
> +	intel_guc_notify(guc);
> +
> +	err = wait_for_response(desc, fence, status);
> +	if (unlikely(err))
> +		return err;
> +	if (*status != INTEL_GUC_STATUS_SUCCESS)
> +		return -EIO;
> +	return 0;
> +}
> +
> +/*
> + * Command Transport (CT) buffer based GuC send function.
> + */
> +static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len)
> +{
> +	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
> +	u32 status = ~0; /* undefined */
> +	int err;
> +
> +	mutex_lock(&guc->send_mutex);
> +
> +	err = ctch_send(guc, ctch, action, len, &status);
> +	if (unlikely(err)) {
> +		DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
> +			  action[0], err, status);
> +	}
> +
> +	mutex_unlock(&guc->send_mutex);
> +	return err;
> +}
> +
> +/**
> + * Enable buffer based command transport
> + * Shall only be called for platforms with HAS_GUC_CT.
> + * @guc:	the guc
> + * return:	0 on success
> + *		non-zero on failure
> + */
> +int intel_guc_enable_ct(struct intel_guc *guc)
> +{
> +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
> +	int err;
> +
> +	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
> +
> +	err = ctch_open(guc, ctch);
> +	if (unlikely(err))
> +		return err;
> +
> +	/* Switch into cmd transport buffer based send() */
> +	guc->send = intel_guc_send_ct;
> +	DRM_INFO("CT: %s\n", enableddisabled(true));
> +	return 0;
> +}
> +
> +/**
> + * Disable buffer based command transport.
> + * Shall only be called for platforms with HAS_GUC_CT.
> + * @guc: the guc
> + */
> +void intel_guc_disable_ct(struct intel_guc *guc)
> +{
> +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
> +
> +	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
> +
> +	if (!ctch_is_open(ctch))
> +		return;
> +
> +	ctch_close(guc, ctch);
> +
> +	/* Disable send */
> +	guc->send = intel_guc_send_nop;
> +	DRM_INFO("CT: %s\n", enableddisabled(false));
> +}
> diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h
> new file mode 100644
> index 0000000..b6a2742
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_guc_ct.h
> @@ -0,0 +1,97 @@
> +/*
> + * Copyright © 2016-2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#ifndef _INTEL_GUC_CT_H_
> +#define _INTEL_GUC_CT_H_
> +
> +struct intel_guc;
> +struct i915_vma;
> +
> +#include "intel_guc_fwif.h"
> +
> +/**
> + * DOC: Command Transport (CT).
> + *
> + * Buffer based command transport is a replacement for MMIO based mechanism.
> + * It can be used to perform both host-2-guc and guc-to-host communication.
> + */
> +
> +/** Represents single command transport buffer.
> + *
> + * A single command transport buffer consists of two parts, the header
> + * record (command transport buffer descriptor) and the actual buffer which
> + * holds the commands.
> + *
> + * @desc: pointer to the buffer descriptor
> + * @cmds: pointer to the commands buffer
> + */
> +struct intel_guc_ct_buffer {
> +	struct guc_ct_buffer_desc *desc;
> +	u32 *cmds;
> +};
> +
> +/** Represents pair of command transport buffers.
> + *
> + * Buffers go in pairs to allow bi-directional communication.
> + * To simplify the code we place both of them in the same vma.
> + * Buffers from the same pair must share unique owner id.
> + *
> + * @vma: pointer to the vma with pair of CT buffers
> + * @ctbs: buffers for sending(0) and receiving(1) commands
> + * @owner: unique identifier
> + * @next_fence: fence to be used with next send command
> + */
> +struct intel_guc_ct_channel {
> +	struct i915_vma *vma;
> +	struct intel_guc_ct_buffer ctbs[2];
> +	u32 owner;
> +	u32 next_fence;
> +};
> +
> +/* */
> +struct intel_guc_ct {
> +#if INTEL_GUC_CT_MAX_CHANNELS > 1
> +	struct ida owner_ida;
> +#endif
> +	struct intel_guc_ct_channel channel;
> +};
> +
> +static inline void intel_guc_ct_init_early(struct intel_guc_ct *ct)
> +{
> +#if INTEL_GUC_CT_MAX_CHANNELS > 1
> +	ida_init(&ct->owner_ida);
> +#endif
> +}
> +
> +static inline void intel_guc_ct_cleanup(struct intel_guc_ct *ct)
> +{
> +#if INTEL_GUC_CT_MAX_CHANNELS > 1
> +	ida_destroy(&ct->owner_ida);
> +#endif
> +}
> +
> +/* XXX: move to intel_uc.h ? but it doesn't fit there either */
> +int intel_guc_enable_ct(struct intel_guc *guc);
> +void intel_guc_disable_ct(struct intel_guc *guc);
> +
> +#endif /* _INTEL_GUC_CT_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
> index 6156845..47f8eec 100644
> --- a/drivers/gpu/drm/i915/intel_guc_fwif.h
> +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
> @@ -331,6 +331,48 @@ struct guc_stage_desc {
>  	u64 desc_private;
>  } __packed;
>
> +/*
> + * Describes single command transport buffer.
> + * Used by both guc-master and clients.
> + */
> +struct guc_ct_buffer_desc {
> +	u32 addr;		/* gfx address */
> +	u64 vaddr;		/* virtual address */

Do we want to mention that vaddr is for reserved for host usage? we 
could potentially use it to store the pointer to the mapped cmds buffer, 
although I'm not sure we want to store any host-only values inside a 
GuC-shared struct

> +	u32 size;		/* size in bytes */
> +	u32 head;		/* offset updated by GuC*/
> +	u32 tail;		/* offset updated by owner */
> +	u32 is_in_error;	/* error indicator */
> +	u32 fence;		/* fence updated by GuC */
> +	u32 status;		/* status updated by GuC */
> +	u32 owner;		/* id assigned by owner */

One of the reserved should be owner_sub_id, which is an 
host/owner-defined field for further tracking. We could potentially use 
it to store the CT buffer type (send/recv).

Apart from these small comments the logic matches the GuC protocol and LGTM.

Thanks,
Daniele

> +	u32 reserved[6];
> +} __packed;
> +
> +/* Type of command transport buffer */
> +#define INTEL_GUC_CT_BUFFER_TYPE_SEND	0x0u
> +#define INTEL_GUC_CT_BUFFER_TYPE_RECV	0x1u
> +
> +#define INTEL_GUC_CT_MAX_CHANNELS	1
> +
> +/*
> + * Definition of the command transport message header (DW0)
> + *
> + * bit[4..0]	message len (in dwords)
> + * bit[7..5]	reserved
> + * bit[8]	write fence to desc
> + * bit[9]	write status to H2G buff
> + * bit[10]	send status (via G2H)
> + * bit[15..11]	reserved
> + * bit[31..16]	action code
> + */
> +#define GUC_CT_MSG_LEN_SHIFT			0
> +#define GUC_CT_MSG_LEN_MASK			0x1F
> +#define GUC_CT_MSG_WRITE_FENCE_TO_DESC		(1 << 8)
> +#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF		(1 << 9)
> +#define GUC_CT_MSG_SEND_STATUS			(1 << 10)
> +#define GUC_CT_MSG_ACTION_SHIFT			16
> +#define GUC_CT_MSG_ACTION_MASK			0xFFFF
> +
>  #define GUC_FORCEWAKE_RENDER	(1 << 0)
>  #define GUC_FORCEWAKE_MEDIA	(1 << 1)
>
> @@ -515,6 +557,8 @@ enum intel_guc_action {
>  	INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
>  	INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003,
>  	INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
> +	INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
> +	INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
>  	INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000,
>  	INTEL_GUC_ACTION_LIMIT
>  };
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 31dc8c3..5183245 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -108,6 +108,14 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv)
>  	mutex_init(&guc->send_mutex);
>  	guc->send = intel_guc_send_nop;
>  	guc->notify = guc_write_irq_trigger;
> +	intel_guc_ct_init_early(&guc->ct);
> +}
> +
> +void intel_uc_cleanup(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_guc *guc = &dev_priv->guc;
> +
> +	intel_guc_ct_cleanup(&guc->ct);
>  }
>
>  static void fetch_uc_fw(struct drm_i915_private *dev_priv,
> @@ -288,14 +296,24 @@ static void guc_init_send_regs(struct intel_guc *guc)
>
>  static int guc_enable_communication(struct intel_guc *guc)
>  {
> -	/* XXX: placeholder for alternate setup */
> +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +
>  	guc_init_send_regs(guc);
> +
> +	if (HAS_GUC_CT(dev_priv))
> +		return intel_guc_enable_ct(guc);
> +
>  	guc->send = intel_guc_send_mmio;
>  	return 0;
>  }
>
>  static void guc_disable_communication(struct intel_guc *guc)
>  {
> +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +
> +	if (HAS_GUC_CT(dev_priv))
> +		intel_guc_disable_ct(guc);
> +
>  	guc->send = intel_guc_send_nop;
>  }
>
> @@ -442,6 +460,11 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
>  	GEM_BUG_ON(!len);
>  	GEM_BUG_ON(len > guc->send_regs.count);
>
> +	/* If CT is available, we expect to use MMIO only during init/fini */
> +	GEM_BUG_ON(HAS_GUC_CT(dev_priv) &&
> +		*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
> +		*action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER);
> +
>  	mutex_lock(&guc->send_mutex);
>  	intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains);
>
> diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> index 930f2e1..fc4d1d7 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -27,7 +27,7 @@
>  #include "intel_guc_fwif.h"
>  #include "i915_guc_reg.h"
>  #include "intel_ringbuffer.h"
> -
> +#include "intel_guc_ct.h"
>  #include "i915_vma.h"
>
>  struct drm_i915_gem_request;
> @@ -173,6 +173,7 @@ struct intel_guc_log {
>  struct intel_guc {
>  	struct intel_uc_fw fw;
>  	struct intel_guc_log log;
> +	struct intel_guc_ct ct;
>
>  	/* intel_guc_recv interrupt related state */
>  	bool interrupts_enabled;
> @@ -214,6 +215,7 @@ struct intel_huc {
>  /* intel_uc.c */
>  void intel_uc_sanitize_options(struct drm_i915_private *dev_priv);
>  void intel_uc_init_early(struct drm_i915_private *dev_priv);
> +void intel_uc_cleanup(struct drm_i915_private *dev_priv);
>  void intel_uc_init_fw(struct drm_i915_private *dev_priv);
>  void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
>  int intel_uc_init_hw(struct drm_i915_private *dev_priv);
>
Chris Wilson May 23, 2017, 8:48 p.m. UTC | #2
On Tue, May 23, 2017 at 11:59:46AM -0700, Daniele Ceraolo Spurio wrote:
> On 22/05/17 04:30, Michal Wajdeczko wrote:
> >+static int ctch_init(struct intel_guc *guc,
> >+		     struct intel_guc_ct_channel *ctch)
> >+{
> >+	struct i915_vma *vma;
> >+	void *blob;
> >+	int err;
> >+	int i;
> >+
> >+	GEM_BUG_ON(ctch->vma);
> >+
> >+#if INTEL_GUC_CT_MAX_CHANNELS > 1
> 
> Bikeshed: after reviewing the GuC design intent for CT buffers I
> think we can remove the ida logic completely, even if
> INTEL_GUC_CT_MAX_CHANNELS > 1. Currently we don't expect more than 1
> pair, but, if my understanding is correct, in case we ever need more
> than 1 channel the number should be statically determined and not a
> dynamic thing. I would therefore prefer a static define for it.
> e.g.:
> 
> #define GUC_KMD_CTCH 0
> 
> and if we ever have more:
> 
> #define GUC_xxx_CTCH 1
> #define GUC_yyy_CTCH 2
> 
> We can then pass in the owner when we open the channel:
> 
> ctch_open(guc, ctch, GUC_KMD_CTCH);

If we do forsee that we don't need an ida for the at least the near
future, can we kill it? I'm still dubious about having
INTEL_GUC_CT_MAX_CHANNELS around not tied to any hw/fw concepts or
limits. At the least if you do split the ida into a separate patch, you
can apply it when you need it later.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 7b05fb8..16dccf5 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -58,6 +58,7 @@  i915-y += i915_cmd_parser.o \
 
 # general-purpose microcontroller (GuC) support
 i915-y += intel_uc.o \
+	  intel_guc_ct.o \
 	  intel_guc_log.o \
 	  intel_guc_loader.o \
 	  intel_huc.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d703897..6c78469 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -869,6 +869,7 @@  static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	i915_workqueues_cleanup(dev_priv);
 err_engines:
 	i915_engines_cleanup(dev_priv);
+	intel_uc_cleanup(dev_priv);
 	return ret;
 }
 
@@ -883,6 +884,7 @@  static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 	intel_irq_fini(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
 	i915_engines_cleanup(dev_priv);
+	intel_uc_cleanup(dev_priv);
 }
 
 static int i915_mmio_setup(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 17883a8..453eea5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -760,6 +760,7 @@  struct intel_csr {
 	func(has_gmbus_irq); \
 	func(has_gmch_display); \
 	func(has_guc); \
+	func(has_guc_ct); \
 	func(has_hotplug); \
 	func(has_l3_dpf); \
 	func(has_llc); \
@@ -2947,6 +2948,7 @@  intel_info(const struct drm_i915_private *dev_priv)
  * properties, so we have separate macros to test them.
  */
 #define HAS_GUC(dev_priv)	((dev_priv)->info.has_guc)
+#define HAS_GUC_CT(dev_priv)	((dev_priv)->info.has_guc_ct)
 #define HAS_GUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
 #define HAS_GUC_SCHED(dev_priv)	(HAS_GUC(dev_priv))
 #define HAS_HUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c
new file mode 100644
index 0000000..869a7ad
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_ct.c
@@ -0,0 +1,468 @@ 
+/*
+ * Copyright © 2016-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "i915_drv.h"
+#include "intel_guc_ct.h"
+
+enum { CTB_SEND = 0, CTB_RECV = 1 };
+
+static inline const char *guc_ct_buffer_type_to_str(u32 type)
+{
+	switch (type) {
+	case INTEL_GUC_CT_BUFFER_TYPE_SEND:
+		return "SEND";
+	case INTEL_GUC_CT_BUFFER_TYPE_RECV:
+		return "RECV";
+	default:
+		return "<invalid>";
+	}
+}
+
+static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
+				    u32 cmds_addr, u32 size, u32 owner)
+{
+	DRM_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
+			 desc, cmds_addr, size, owner);
+	memset(desc, 0, sizeof(*desc));
+	desc->addr = cmds_addr;
+	desc->size = size;
+	desc->owner = owner;
+}
+
+static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
+{
+	DRM_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
+			 desc, desc->head, desc->tail);
+	desc->head = 0;
+	desc->tail = 0;
+	desc->is_in_error = 0;
+}
+
+static int guc_action_register_ct_buffer(struct intel_guc *guc,
+					 u32 desc_addr,
+					 u32 type)
+{
+	u32 action[] = {
+		INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER,
+		desc_addr,
+		sizeof(struct guc_ct_buffer_desc),
+		type
+	};
+	int err;
+
+	/* Can't use generic send(), CT registration must go over MMIO */
+	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+	if (err)
+		DRM_ERROR("CT: register %s buffer failed; err=%d\n",
+			  guc_ct_buffer_type_to_str(type), err);
+	return err;
+}
+
+static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
+					   u32 owner,
+					   u32 type)
+{
+	u32 action[] = {
+		INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
+		owner,
+		type
+	};
+	int err;
+
+	/* Can't use generic send(), CT deregistration must go over MMIO */
+	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+	if (err)
+		DRM_ERROR("CT: deregister %s buffer failed; err=%d\n",
+			  guc_ct_buffer_type_to_str(type), err);
+	return err;
+}
+
+static bool ctch_is_open(struct intel_guc_ct_channel *ctch)
+{
+	return ctch->vma != NULL;
+}
+
+static int ctch_init(struct intel_guc *guc,
+		     struct intel_guc_ct_channel *ctch)
+{
+	struct i915_vma *vma;
+	void *blob;
+	int err;
+	int i;
+
+	GEM_BUG_ON(ctch->vma);
+
+#if INTEL_GUC_CT_MAX_CHANNELS > 1
+	/* get unique owner id */
+	err = ida_simple_get(&guc->ct.owner_ida,
+			     0, INTEL_GUC_CT_MAX_CHANNELS, GFP_KERNEL);
+	if (err < 0)
+		return err;
+	ctch->owner = err;
+	DRM_DEBUG_DRIVER("CT: owner=%d\n", ctch->owner);
+#else
+	if (ctch_is_open(&guc->ct.channel))
+		return -ENOSPC;
+#endif
+
+	/* We allocate 1 page to hold both descriptors and both buffers.
+	 *       ___________.....................
+	 *      |desc (SEND)|                   :
+	 *      |___________|                   PAGE/4
+	 *      :___________....................:
+	 *      |desc (RECV)|                   :
+	 *      |___________|                   PAGE/4
+	 *      :_______________________________:
+	 *      |cmds (SEND)                    |
+	 *      |                               PAGE/4
+	 *      |_______________________________|
+	 *      |cmds (RECV)                    |
+	 *      |                               PAGE/4
+	 *      |_______________________________|
+	 *
+	 * Each message can use a maximum of 32 dwords and we don't expect to
+	 * have more than 1 in flight at any time, so we have enough space.
+	 * Some logic further ahead will rely on the fact that there is only 1
+	 * page and that it is always mapped, so if the size is changed the
+	 * other code will need updating as well.
+	 */
+
+	/* allocate vma */
+	vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_ida;
+	}
+	ctch->vma = vma;
+
+	/* map first page */
+	blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+	if (IS_ERR(blob)) {
+		err = PTR_ERR(blob);
+		goto err_vma;
+	}
+	DRM_DEBUG_DRIVER("CT: vma base=%#x\n", guc_ggtt_offset(ctch->vma));
+
+	/* store pointers to desc and cmds */
+	for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
+		GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
+		ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i;
+		ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2;
+	}
+
+	return 0;
+
+err_vma:
+	i915_vma_unpin_and_release(&ctch->vma);
+err_ida:
+#if INTEL_GUC_CT_MAX_CHANNELS > 1
+	ida_simple_remove(&guc->ct.owner_ida, ctch->owner);
+#endif
+	return err;
+}
+
+static void ctch_fini(struct intel_guc *guc,
+		      struct intel_guc_ct_channel *ctch)
+{
+	GEM_BUG_ON(!ctch->vma);
+
+	i915_gem_object_unpin_map(ctch->vma->obj);
+	i915_vma_unpin_and_release(&ctch->vma);
+
+#if INTEL_GUC_CT_MAX_CHANNELS > 1
+	ida_simple_remove(&guc->ct.owner_ida, ctch->owner);
+#endif
+}
+
+static int ctch_open(struct intel_guc *guc,
+		     struct intel_guc_ct_channel *ctch)
+{
+	u32 base;
+	int err;
+	int i;
+
+	DRM_DEBUG_DRIVER("CT: reopen=%s\n", yesno(ctch_is_open(ctch)));
+
+	if (!ctch->vma) {
+		err = ctch_init(guc, ctch);
+		if (unlikely(err))
+			return err;
+	}
+
+	/* vma should be already allocated and map'ed */
+	base = guc_ggtt_offset(ctch->vma);
+
+	/* (re)initialize descriptors
+	 * cmds buffers are in the second half of the blob page
+	 */
+	for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
+		GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
+		guc_ct_buffer_desc_init(ctch->ctbs[i].desc,
+					base + PAGE_SIZE/4 * i + PAGE_SIZE/2,
+					PAGE_SIZE/4,
+					ctch->owner);
+	}
+
+	/* register buffers, starting wirh RECV buffer
+	 * descriptors are in first half of the blob
+	 */
+	err = guc_action_register_ct_buffer(guc,
+					    base + PAGE_SIZE/4 * CTB_RECV,
+					    INTEL_GUC_CT_BUFFER_TYPE_RECV);
+	if (unlikely(err))
+		goto err_fini;
+
+	err = guc_action_register_ct_buffer(guc,
+					    base + PAGE_SIZE/4 * CTB_SEND,
+					    INTEL_GUC_CT_BUFFER_TYPE_SEND);
+	if (unlikely(err))
+		goto err_deregister;
+
+	return 0;
+
+err_deregister:
+	guc_action_deregister_ct_buffer(guc,
+					ctch->owner,
+					INTEL_GUC_CT_BUFFER_TYPE_RECV);
+err_fini:
+	ctch_fini(guc, ctch);
+	return err;
+}
+
+static void ctch_close(struct intel_guc *guc,
+		       struct intel_guc_ct_channel *ctch)
+{
+	GEM_BUG_ON(!ctch_is_open(ctch));
+
+	guc_action_deregister_ct_buffer(guc,
+					ctch->owner,
+					INTEL_GUC_CT_BUFFER_TYPE_SEND);
+	guc_action_deregister_ct_buffer(guc,
+					ctch->owner,
+					INTEL_GUC_CT_BUFFER_TYPE_RECV);
+	ctch_fini(guc, ctch);
+}
+
+static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch)
+{
+	/* For now it's trivial */
+	return ++ctch->next_fence;
+}
+
+static int ctb_write(struct intel_guc_ct_buffer *ctb,
+		     const u32 *action,
+		     u32 len /* in dwords */,
+		     u32 fence)
+{
+	struct guc_ct_buffer_desc *desc = ctb->desc;
+	u32 head = desc->head / 4;	/* in dwords */
+	u32 tail = desc->tail / 4;	/* in dwords */
+	u32 size = desc->size / 4;	/* in dwords */
+	u32 used;			/* in dwords */
+	u32 header;
+	u32 *cmds = ctb->cmds;
+	unsigned int i;
+
+	GEM_BUG_ON(desc->size % 4);
+	GEM_BUG_ON(desc->head % 4);
+	GEM_BUG_ON(desc->tail % 4);
+	GEM_BUG_ON(tail >= size);
+
+	/*
+	 * tail == head condition indicates empty. GuC FW does not support
+	 * using up the entire buffer to get tail == head meaning full.
+	 */
+	if (tail < head)
+		used = (size - head) + tail;
+	else
+		used = tail - head;
+
+	/* make sure there is a space including extra dw for the fence */
+	if (unlikely(used + len + 1 >= size))
+		return -ENOSPC;
+
+	/* Write the message. The format is the following:
+	 * DW0: header (including action code)
+	 * DW1: fence
+	 * DW2+: action data
+	 */
+	header = (len << GUC_CT_MSG_LEN_SHIFT) |
+		 (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
+		 (action[0] << GUC_CT_MSG_ACTION_SHIFT);
+
+	cmds[tail] = header;
+	tail = (tail + 1) % size;
+
+	cmds[tail] = fence;
+	tail = (tail + 1) % size;
+
+	for (i = 1; i < len; i++) {
+		cmds[tail] = action[i];
+		tail = (tail + 1) % size;
+	}
+
+	/* now update desc tail (back in bytes) */
+	desc->tail = tail * 4;
+	GEM_BUG_ON(desc->tail > desc->size);
+
+	return 0;
+}
+
+/* Wait for the response from the GuC.
+ * @fence:	response fence
+ * @status:	placeholder for status
+ * return:	0 response received (status is valid)
+ *		-ETIMEDOUT no response within hardcoded timeout
+ *		-EPROTO no response, ct buffer was in error
+ */
+static int wait_for_response(struct guc_ct_buffer_desc *desc,
+			     u32 fence,
+			     u32 *status)
+{
+	int err;
+
+	/*
+	 * Fast commands should complete in less than 10us, so sample quickly
+	 * up to that length of time, then switch to a slower sleep-wait loop.
+	 * No GuC command should ever take longer than 10ms.
+	 */
+#define done (READ_ONCE(desc->fence) == fence)
+	err = wait_for_us(done, 10);
+	if (err)
+		err = wait_for(done, 10);
+#undef done
+
+	if (unlikely(err)) {
+		DRM_ERROR("CT: fence %u failed; reported fence=%u\n",
+			  fence, desc->fence);
+
+		if (WARN_ON(desc->is_in_error)) {
+			/* Something went wrong with the messaging, try to reset
+			 * the buffer and hope for the best
+			 */
+			guc_ct_buffer_desc_reset(desc);
+			err = -EPROTO;
+		}
+	}
+
+	*status = desc->status;
+	return err;
+}
+
+static int ctch_send(struct intel_guc *guc,
+		     struct intel_guc_ct_channel *ctch,
+		     const u32 *action,
+		     u32 len,
+		     u32 *status)
+{
+	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
+	struct guc_ct_buffer_desc *desc = ctb->desc;
+	u32 fence;
+	int err;
+
+	GEM_BUG_ON(!ctch_is_open(ctch));
+	GEM_BUG_ON(!len);
+	GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
+
+	fence = ctch_get_next_fence(ctch);
+	err = ctb_write(ctb, action, len, fence);
+	if (unlikely(err))
+		return err;
+
+	intel_guc_notify(guc);
+
+	err = wait_for_response(desc, fence, status);
+	if (unlikely(err))
+		return err;
+	if (*status != INTEL_GUC_STATUS_SUCCESS)
+		return -EIO;
+	return 0;
+}
+
+/*
+ * Command Transport (CT) buffer based GuC send function.
+ */
+static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len)
+{
+	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
+	u32 status = ~0; /* undefined */
+	int err;
+
+	mutex_lock(&guc->send_mutex);
+
+	err = ctch_send(guc, ctch, action, len, &status);
+	if (unlikely(err)) {
+		DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
+			  action[0], err, status);
+	}
+
+	mutex_unlock(&guc->send_mutex);
+	return err;
+}
+
+/**
+ * Enable buffer based command transport
+ * Shall only be called for platforms with HAS_GUC_CT.
+ * @guc:	the guc
+ * return:	0 on success
+ *		non-zero on failure
+ */
+int intel_guc_enable_ct(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
+	int err;
+
+	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
+
+	err = ctch_open(guc, ctch);
+	if (unlikely(err))
+		return err;
+
+	/* Switch into cmd transport buffer based send() */
+	guc->send = intel_guc_send_ct;
+	DRM_INFO("CT: %s\n", enableddisabled(true));
+	return 0;
+}
+
+/**
+ * Disable buffer based command transport.
+ * Shall only be called for platforms with HAS_GUC_CT.
+ * @guc: the guc
+ */
+void intel_guc_disable_ct(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
+
+	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
+
+	if (!ctch_is_open(ctch))
+		return;
+
+	ctch_close(guc, ctch);
+
+	/* Disable send */
+	guc->send = intel_guc_send_nop;
+	DRM_INFO("CT: %s\n", enableddisabled(false));
+}
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h
new file mode 100644
index 0000000..b6a2742
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_ct.h
@@ -0,0 +1,97 @@ 
+/*
+ * Copyright © 2016-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _INTEL_GUC_CT_H_
+#define _INTEL_GUC_CT_H_
+
+struct intel_guc;
+struct i915_vma;
+
+#include "intel_guc_fwif.h"
+
+/**
+ * DOC: Command Transport (CT).
+ *
+ * Buffer based command transport is a replacement for MMIO based mechanism.
+ * It can be used to perform both host-2-guc and guc-to-host communication.
+ */
+
+/** Represents single command transport buffer.
+ *
+ * A single command transport buffer consists of two parts, the header
+ * record (command transport buffer descriptor) and the actual buffer which
+ * holds the commands.
+ *
+ * @desc: pointer to the buffer descriptor
+ * @cmds: pointer to the commands buffer
+ */
+struct intel_guc_ct_buffer {
+	struct guc_ct_buffer_desc *desc;
+	u32 *cmds;
+};
+
+/** Represents pair of command transport buffers.
+ *
+ * Buffers go in pairs to allow bi-directional communication.
+ * To simplify the code we place both of them in the same vma.
+ * Buffers from the same pair must share unique owner id.
+ *
+ * @vma: pointer to the vma with pair of CT buffers
+ * @ctbs: buffers for sending(0) and receiving(1) commands
+ * @owner: unique identifier
+ * @next_fence: fence to be used with next send command
+ */
+struct intel_guc_ct_channel {
+	struct i915_vma *vma;
+	struct intel_guc_ct_buffer ctbs[2];
+	u32 owner;
+	u32 next_fence;
+};
+
+/* */
+struct intel_guc_ct {
+#if INTEL_GUC_CT_MAX_CHANNELS > 1
+	struct ida owner_ida;
+#endif
+	struct intel_guc_ct_channel channel;
+};
+
+static inline void intel_guc_ct_init_early(struct intel_guc_ct *ct)
+{
+#if INTEL_GUC_CT_MAX_CHANNELS > 1
+	ida_init(&ct->owner_ida);
+#endif
+}
+
+static inline void intel_guc_ct_cleanup(struct intel_guc_ct *ct)
+{
+#if INTEL_GUC_CT_MAX_CHANNELS > 1
+	ida_destroy(&ct->owner_ida);
+#endif
+}
+
+/* XXX: move to intel_uc.h ? but it doesn't fit there either */
+int intel_guc_enable_ct(struct intel_guc *guc);
+void intel_guc_disable_ct(struct intel_guc *guc);
+
+#endif /* _INTEL_GUC_CT_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 6156845..47f8eec 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -331,6 +331,48 @@  struct guc_stage_desc {
 	u64 desc_private;
 } __packed;
 
+/*
+ * Describes single command transport buffer.
+ * Used by both guc-master and clients.
+ */
+struct guc_ct_buffer_desc {
+	u32 addr;		/* gfx address */
+	u64 vaddr;		/* virtual address */
+	u32 size;		/* size in bytes */
+	u32 head;		/* offset updated by GuC*/
+	u32 tail;		/* offset updated by owner */
+	u32 is_in_error;	/* error indicator */
+	u32 fence;		/* fence updated by GuC */
+	u32 status;		/* status updated by GuC */
+	u32 owner;		/* id assigned by owner */
+	u32 reserved[6];
+} __packed;
+
+/* Type of command transport buffer */
+#define INTEL_GUC_CT_BUFFER_TYPE_SEND	0x0u
+#define INTEL_GUC_CT_BUFFER_TYPE_RECV	0x1u
+
+#define INTEL_GUC_CT_MAX_CHANNELS	1
+
+/*
+ * Definition of the command transport message header (DW0)
+ *
+ * bit[4..0]	message len (in dwords)
+ * bit[7..5]	reserved
+ * bit[8]	write fence to desc
+ * bit[9]	write status to H2G buff
+ * bit[10]	send status (via G2H)
+ * bit[15..11]	reserved
+ * bit[31..16]	action code
+ */
+#define GUC_CT_MSG_LEN_SHIFT			0
+#define GUC_CT_MSG_LEN_MASK			0x1F
+#define GUC_CT_MSG_WRITE_FENCE_TO_DESC		(1 << 8)
+#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF		(1 << 9)
+#define GUC_CT_MSG_SEND_STATUS			(1 << 10)
+#define GUC_CT_MSG_ACTION_SHIFT			16
+#define GUC_CT_MSG_ACTION_MASK			0xFFFF
+
 #define GUC_FORCEWAKE_RENDER	(1 << 0)
 #define GUC_FORCEWAKE_MEDIA	(1 << 1)
 
@@ -515,6 +557,8 @@  enum intel_guc_action {
 	INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
 	INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003,
 	INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+	INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
+	INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
 	INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000,
 	INTEL_GUC_ACTION_LIMIT
 };
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 31dc8c3..5183245 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -108,6 +108,14 @@  void intel_uc_init_early(struct drm_i915_private *dev_priv)
 	mutex_init(&guc->send_mutex);
 	guc->send = intel_guc_send_nop;
 	guc->notify = guc_write_irq_trigger;
+	intel_guc_ct_init_early(&guc->ct);
+}
+
+void intel_uc_cleanup(struct drm_i915_private *dev_priv)
+{
+	struct intel_guc *guc = &dev_priv->guc;
+
+	intel_guc_ct_cleanup(&guc->ct);
 }
 
 static void fetch_uc_fw(struct drm_i915_private *dev_priv,
@@ -288,14 +296,24 @@  static void guc_init_send_regs(struct intel_guc *guc)
 
 static int guc_enable_communication(struct intel_guc *guc)
 {
-	/* XXX: placeholder for alternate setup */
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
 	guc_init_send_regs(guc);
+
+	if (HAS_GUC_CT(dev_priv))
+		return intel_guc_enable_ct(guc);
+
 	guc->send = intel_guc_send_mmio;
 	return 0;
 }
 
 static void guc_disable_communication(struct intel_guc *guc)
 {
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	if (HAS_GUC_CT(dev_priv))
+		intel_guc_disable_ct(guc);
+
 	guc->send = intel_guc_send_nop;
 }
 
@@ -442,6 +460,11 @@  int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
 	GEM_BUG_ON(!len);
 	GEM_BUG_ON(len > guc->send_regs.count);
 
+	/* If CT is available, we expect to use MMIO only during init/fini */
+	GEM_BUG_ON(HAS_GUC_CT(dev_priv) &&
+		*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
+		*action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER);
+
 	mutex_lock(&guc->send_mutex);
 	intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains);
 
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 930f2e1..fc4d1d7 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -27,7 +27,7 @@ 
 #include "intel_guc_fwif.h"
 #include "i915_guc_reg.h"
 #include "intel_ringbuffer.h"
-
+#include "intel_guc_ct.h"
 #include "i915_vma.h"
 
 struct drm_i915_gem_request;
@@ -173,6 +173,7 @@  struct intel_guc_log {
 struct intel_guc {
 	struct intel_uc_fw fw;
 	struct intel_guc_log log;
+	struct intel_guc_ct ct;
 
 	/* intel_guc_recv interrupt related state */
 	bool interrupts_enabled;
@@ -214,6 +215,7 @@  struct intel_huc {
 /* intel_uc.c */
 void intel_uc_sanitize_options(struct drm_i915_private *dev_priv);
 void intel_uc_init_early(struct drm_i915_private *dev_priv);
+void intel_uc_cleanup(struct drm_i915_private *dev_priv);
 void intel_uc_init_fw(struct drm_i915_private *dev_priv);
 void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
 int intel_uc_init_hw(struct drm_i915_private *dev_priv);