diff mbox

[3/4] drm/tegra: Add VIC support

Message ID 1432214425-27137-4-git-send-email-amerilainen@nvidia.com (mailing list archive)
State New, archived
Headers show

Commit Message

Arto Merilainen May 21, 2015, 1:20 p.m. UTC
This patch adds support for Video Image Compositor engine which
can be used for 2d operations.

The engine has a microcontroller (Falcon) that acts as a frontend
for the rest of the unit. In order to properly utilize the engine,
the frontend must be booted before pushing any commands.

Signed-off-by: Andrew Chew <achew@nvidia.com>
Signed-off-by: Arto Merilainen <amerilainen@nvidia.com>
---
 drivers/gpu/drm/tegra/Makefile |   3 +-
 drivers/gpu/drm/tegra/drm.c    |   9 +-
 drivers/gpu/drm/tegra/drm.h    |   1 +
 drivers/gpu/drm/tegra/vic.c    | 593 +++++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/vic.h    | 116 ++++++++
 include/linux/host1x.h         |   1 +
 6 files changed, 721 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/tegra/vic.c
 create mode 100644 drivers/gpu/drm/tegra/vic.h

Comments

Mikko Perttunen May 21, 2015, 2:40 p.m. UTC | #1
Hi, very good patch!

Here are a few small comments. Aside those, you should also add a
section to
Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt in a
separate patch.

Thanks,
Mikko.

On 05/21/2015 04:20 PM, Arto Merilainen wrote:
> This patch adds support for Video Image Compositor engine which
> can be used for 2d operations.
> 
> The engine has a microcontroller (Falcon) that acts as a frontend
> for the rest of the unit. In order to properly utilize the engine,
> the frontend must be booted before pushing any commands.
> 
> Signed-off-by: Andrew Chew <achew@nvidia.com>
> Signed-off-by: Arto Merilainen <amerilainen@nvidia.com>
> ---
>  drivers/gpu/drm/tegra/Makefile |   3 +-
>  drivers/gpu/drm/tegra/drm.c    |   9 +-
>  drivers/gpu/drm/tegra/drm.h    |   1 +
>  drivers/gpu/drm/tegra/vic.c    | 593 +++++++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/tegra/vic.h    | 116 ++++++++
>  include/linux/host1x.h         |   1 +
>  6 files changed, 721 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/tegra/vic.c
>  create mode 100644 drivers/gpu/drm/tegra/vic.h
> 
> diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
> index 2c66a8db9da4..3bc3566e00b6 100644
> --- a/drivers/gpu/drm/tegra/Makefile
> +++ b/drivers/gpu/drm/tegra/Makefile
> @@ -13,6 +13,7 @@ tegra-drm-y := \
>  	sor.o \
>  	dpaux.o \
>  	gr2d.o \
> -	gr3d.o
> +	gr3d.o \
> +	vic.o
>  
>  obj-$(CONFIG_DRM_TEGRA) += tegra-drm.o
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index bfad15a913a0..d947f5f4d801 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -1,6 +1,6 @@
>  /*
>   * Copyright (C) 2012 Avionic Design GmbH
> - * Copyright (C) 2012-2013 NVIDIA CORPORATION.  All rights reserved.
> + * Copyright (C) 2012-2015 NVIDIA CORPORATION.  All rights reserved.
>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License version 2 as
> @@ -1048,6 +1048,7 @@ static const struct of_device_id host1x_drm_subdevs[] = {
>  	{ .compatible = "nvidia,tegra124-dc", },
>  	{ .compatible = "nvidia,tegra124-sor", },
>  	{ .compatible = "nvidia,tegra124-hdmi", },
> +	{ .compatible = "nvidia,tegra124-vic", },
>  	{ /* sentinel */ }
>  };
>  
> @@ -1097,8 +1098,14 @@ static int __init host1x_drm_init(void)
>  	if (err < 0)
>  		goto unregister_gr2d;
>  
> +	err = platform_driver_register(&tegra_vic_driver);
> +	if (err < 0)
> +		goto unregister_gr3d;
> +
>  	return 0;
>  
> +unregister_gr3d:
> +	platform_driver_unregister(&tegra_gr3d_driver);
>  unregister_gr2d:
>  	platform_driver_unregister(&tegra_gr2d_driver);
>  unregister_dpaux:
> diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
> index 0e7756e720c5..a9c02a80d6bf 100644
> --- a/drivers/gpu/drm/tegra/drm.h
> +++ b/drivers/gpu/drm/tegra/drm.h
> @@ -275,5 +275,6 @@ extern struct platform_driver tegra_hdmi_driver;
>  extern struct platform_driver tegra_dpaux_driver;
>  extern struct platform_driver tegra_gr2d_driver;
>  extern struct platform_driver tegra_gr3d_driver;
> +extern struct platform_driver tegra_vic_driver;
>  
>  #endif /* HOST1X_DRM_H */
> diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
> new file mode 100644
> index 000000000000..b7c5a96697ed
> --- /dev/null
> +++ b/drivers/gpu/drm/tegra/vic.c
> @@ -0,0 +1,593 @@
> +/*
> + * Copyright (c) 2015, NVIDIA Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/of.h>
> +#include <linux/of_device.h>
> +#include <linux/of_platform.h>
> +#include <linux/clk.h>
> +#include <linux/host1x.h>
> +#include <linux/module.h>
> +#include <linux/firmware.h>
> +#include <linux/platform_device.h>
> +#include <linux/reset.h>
> +#include <soc/tegra/pmc.h>
> +#include <linux/iommu.h>
> +
> +#include "drm.h"
> +#include "gem.h"
> +#include "vic.h"
> +
> +#define VIC_IDLE_TIMEOUT_DEFAULT	10000	/* 10 milliseconds */
> +#define VIC_IDLE_CHECK_PERIOD		10	/* 10 usec */
> +
> +struct vic;

This doesn't seem to be needed here.

> +
> +struct vic_config {
> +	/* firmware name */
> +	char *ucode_name;
> +
> +	/* class id */
> +	u32 class_id;
> +
> +	/* powergate id */
> +	int powergate_id;
> +};
> +
> +struct vic {
> +	struct {
> +		u32 bin_data_offset;
> +		u32 data_offset;
> +		u32 data_size;
> +		u32 code_offset;
> +		u32 size;
> +	} os, fce;
> +
> +	struct tegra_bo *ucode_bo;
> +	bool ucode_valid;
> +	void *ucode_vaddr;
> +
> +	bool booted;
> +
> +	void __iomem *regs;
> +	struct tegra_drm_client client;
> +	struct host1x_channel *channel;
> +	struct iommu_domain *domain;
> +	struct device *dev;
> +	struct clk *clk;
> +	struct reset_control *rst;
> +
> +	/* Platform configuration */
> +	struct vic_config *config;
> +
> +	/* for firewall - this determines if method 1 should be regarded
> +	 * as an address register */
> +	bool method_data_is_addr_reg;
> +};
> +
> +static inline struct vic *to_vic(struct tegra_drm_client *client)
> +{
> +	return container_of(client, struct vic, client);
> +}
> +
> +void vic_writel(struct vic *vic, u32 v, u32 r)
> +{
> +	writel(v, vic->regs + r);
> +}
> +
> +u32 vic_readl(struct vic *vic, u32 r)
> +{
> +	return readl(vic->regs + r);
> +}
> +
> +static int vic_wait_idle(struct vic *vic)
> +{
> +	u32 timeout = VIC_IDLE_TIMEOUT_DEFAULT;
> +
> +	do {
> +		u32 check = min_t(u32, VIC_IDLE_CHECK_PERIOD, timeout);
> +		u32 w = vic_readl(vic, NV_PVIC_FALCON_IDLESTATE);
> +
> +		if (!w)
> +			return 0;
> +
> +		udelay(VIC_IDLE_CHECK_PERIOD);
> +		timeout -= check;
> +	} while (timeout);
> +
> +	dev_err(vic->dev, "vic idle timeout");
> +
> +	return -ETIMEDOUT;
> +}
> +
> +static int vic_dma_wait_idle(struct vic *vic)
> +{
> +	u32 timeout = VIC_IDLE_TIMEOUT_DEFAULT;
> +
> +	do {
> +		u32 check = min_t(u32, VIC_IDLE_CHECK_PERIOD, timeout);
> +		u32 dmatrfcmd = vic_readl(vic, NV_PVIC_FALCON_DMATRFCMD);
> +
> +		if (dmatrfcmd & DMATRFCMD_IDLE)
> +			return 0;
> +
> +		udelay(VIC_IDLE_CHECK_PERIOD);
> +		timeout -= check;
> +	} while (timeout);
> +
> +	dev_err(vic->dev, "dma idle timeout");
> +
> +	return -ETIMEDOUT;
> +}
> +
> +static int vic_dma_pa_to_internal_256b(struct vic *vic, phys_addr_t pa,
> +				       u32 internal_offset, bool imem)
> +{
> +	u32 cmd = DMATRFCMD_SIZE_256B;
> +
> +	if (imem)
> +		cmd |= DMATRFCMD_IMEM;
> +
> +	vic_writel(vic, DMATRFMOFFS_OFFS(internal_offset),
> +		   NV_PVIC_FALCON_DMATRFMOFFS);
> +	vic_writel(vic, DMATRFFBOFFS_OFFS(pa),
> +		   NV_PVIC_FALCON_DMATRFFBOFFS);
> +	vic_writel(vic, cmd, NV_PVIC_FALCON_DMATRFCMD);
> +
> +	return vic_dma_wait_idle(vic);
> +}
> +
> +static int vic_setup_ucode_image(struct vic *vic,
> +				 const struct firmware *ucode_fw)
> +{
> +	/* image data is little endian. */
> +	u32 *ucode_vaddr = vic->ucode_vaddr;
> +	struct ucode_v1_vic ucode;
> +	int w;
> +
> +	/* copy the whole thing taking into account endianness */
> +	for (w = 0; w < ucode_fw->size / sizeof(u32); w++)
> +		ucode_vaddr[w] = le32_to_cpu(((u32 *)ucode_fw->data)[w]);
> +
> +	ucode.bin_header = (struct ucode_bin_header_v1_vic *)ucode_vaddr;
> +
> +	/* endian problems would show up right here */
> +	if (ucode.bin_header->bin_magic != 0x10de) {
> +		dev_err(vic->dev, "failed to get firmware magic");
> +		return -EINVAL;
> +	}
> +
> +	if (ucode.bin_header->bin_ver != 1) {
> +		dev_err(vic->dev, "unsupported firmware version");
> +		return -ENOENT;
> +	}
> +
> +	/* shouldn't be bigger than what firmware thinks */
> +	if (ucode.bin_header->bin_size > ucode_fw->size) {
> +		dev_err(vic->dev, "ucode image size inconsistency");
> +		return -EINVAL;
> +	}
> +
> +	ucode.os_header = (struct ucode_os_header_v1_vic *)
> +		(((void *)ucode_vaddr) +
> +		ucode.bin_header->os_bin_header_offset);
> +	vic->os.size = ucode.bin_header->os_bin_size;
> +	vic->os.bin_data_offset = ucode.bin_header->os_bin_data_offset;
> +	vic->os.code_offset = ucode.os_header->os_code_offset;
> +	vic->os.data_offset = ucode.os_header->os_data_offset;
> +	vic->os.data_size = ucode.os_header->os_data_size;
> +
> +	ucode.fce_header = (struct ucode_fce_header_v1_vic *)
> +		(((void *)ucode_vaddr) +
> +		ucode.bin_header->fce_bin_header_offset);
> +	vic->fce.size = ucode.fce_header->fce_ucode_size;
> +	vic->fce.data_offset = ucode.bin_header->fce_bin_data_offset;
> +
> +	return 0;
> +}
> +
> +static int vic_read_ucode(struct vic *vic)
> +{
> +	struct host1x_client *client = &vic->client.base;
> +	struct drm_device *dev = dev_get_drvdata(client->parent);
> +	const struct firmware *ucode_fw;
> +	int err;
> +
> +	err = request_firmware(&ucode_fw, vic->config->ucode_name, vic->dev);
> +	if (err) {
> +		dev_err(vic->dev, "failed to get firmware\n");
> +		goto err_request_firmware;
> +	}
> +
> +	vic->ucode_bo = tegra_bo_create(dev, ucode_fw->size, 0);
> +	if (IS_ERR(vic->ucode_bo)) {
> +		dev_err(vic->dev, "dma memory allocation failed");
> +		err = PTR_ERR(vic->ucode_bo);
> +		goto err_alloc_iova;
> +	}
> +
> +	/* get vaddr for the ucode */
> +	if (!vic->ucode_bo->vaddr)
> +		vic->ucode_vaddr = vmap(vic->ucode_bo->pages,
> +					vic->ucode_bo->num_pages, VM_MAP,
> +					pgprot_writecombine(PAGE_KERNEL));
> +	else
> +		vic->ucode_vaddr = vic->ucode_bo->vaddr;
> +
> +	err = vic_setup_ucode_image(vic, ucode_fw);
> +	if (err) {
> +		dev_err(vic->dev, "failed to parse firmware image\n");
> +		goto err_setup_ucode_image;
> +	}
> +
> +	vic->ucode_valid = true;
> +
> +	release_firmware(ucode_fw);
> +
> +	return 0;
> +
> +err_setup_ucode_image:
> +	drm_gem_object_release(&vic->ucode_bo->gem);

Should this not be freed with tegra_bo_free or similar? Right now this
looks like it would leak at least memory.

> +err_alloc_iova:
> +	release_firmware(ucode_fw);
> +err_request_firmware:
> +	return err;
> +}
> +
> +static int vic_boot(struct device *dev)
> +{
> +	struct vic *vic = dev_get_drvdata(dev);
> +	u32 offset;
> +	int err = 0;
> +
> +	if (vic->booted)
> +		return 0;
> +
> +	if (!vic->ucode_valid) {
> +		err = vic_read_ucode(vic);
> +		if (err)
> +			return err;
> +	}
> +
> +	/* ensure that the engine is in sane state */
> +	reset_control_assert(vic->rst);
> +	udelay(10);
> +	reset_control_deassert(vic->rst);
> +
> +	/* setup clockgating registers */
> +	vic_writel(vic, CG_IDLE_CG_DLY_CNT(4) |
> +			CG_IDLE_CG_EN |
> +			CG_WAKEUP_DLY_CNT(4),
> +		   NV_PVIC_MISC_PRI_VIC_CG);
> +
> +	/* service all dma requests */
> +	vic_writel(vic, 0, NV_PVIC_FALCON_DMACTL);
> +
> +	/* setup dma base address */
> +	vic_writel(vic, (vic->ucode_bo->paddr + vic->os.bin_data_offset) >> 8,
> +		   NV_PVIC_FALCON_DMATRFBASE);
> +
> +	/* dma ucode data */
> +	for (offset = 0; offset < vic->os.data_size; offset += 256)
> +		vic_dma_pa_to_internal_256b(vic,
> +					     vic->os.data_offset + offset,
> +					     offset, false);
> +
> +	/* dma ucode */
> +	vic_dma_pa_to_internal_256b(vic, vic->os.code_offset, 0, true);
> +
> +	/* setup falcon interrupts and enable interface */
> +	vic_writel(vic, IRQMSET_EXT(0xff) |
> +			IRQMSET_SWGEN1_SET |
> +			IRQMSET_SWGEN0_SET |
> +			IRQMSET_EXTERR_SET |
> +			IRQMSET_HALT_SET |
> +			IRQMSET_WDTMR_SET,
> +		   NV_PVIC_FALCON_IRQMSET);
> +	vic_writel(vic, IRQDEST_HOST_EXT(0Xff) |
> +			IRQDEST_HOST_SWGEN1_HOST |
> +			IRQDEST_HOST_SWGEN0_HOST |
> +			IRQDEST_HOST_EXTERR_HOST |
> +			IRQDEST_HOST_HALT_HOST,
> +		    NV_PVIC_FALCON_IRQDEST);
> +
> +	/* enable method and context switch interfaces */
> +	vic_writel(vic, ITFEN_MTHDEN_ENABLE |
> +			ITFEN_CTXEN_ENABLE,
> +		   NV_PVIC_FALCON_ITFEN);
> +
> +	/* boot falcon */
> +	vic_writel(vic, BOOTVEC_VEC(0), NV_PVIC_FALCON_BOOTVEC);
> +	vic_writel(vic, CPUCTL_STARTCPU, NV_PVIC_FALCON_CPUCTL);
> +
> +	err = vic_wait_idle(vic);
> +	if (err != 0) {
> +		dev_err(dev, "boot failed due to timeout");
> +		return err;
> +	}
> +
> +	/* Set application id and set-up FCE ucode address */
> +	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_APPLICATION_ID >> 2,
> +		    NV_PVIC_FALCON_METHOD_0);
> +	vic_writel(vic, 1, NV_PVIC_FALCON_METHOD_1);
> +	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_SIZE >> 2,
> +		    NV_PVIC_FALCON_METHOD_0);
> +	vic_writel(vic, vic->fce.size, NV_PVIC_FALCON_METHOD_1);
> +	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_OFFSET >> 2,
> +		    NV_PVIC_FALCON_METHOD_0);
> +	vic_writel(vic, (vic->ucode_bo->paddr + vic->fce.data_offset) >> 8,
> +		    NV_PVIC_FALCON_METHOD_1);
> +
> +	err = vic_wait_idle(vic);
> +	if (err != 0) {
> +		dev_err(dev, "failed to set application id and fce base");
> +		return err;
> +	}
> +
> +	vic->booted = true;
> +
> +	dev_info(dev, "booted");
> +
> +	return 0;
> +}
> +
> +static int vic_init(struct host1x_client *client)
> +{
> +	struct tegra_drm_client *drm = host1x_to_drm_client(client);
> +	struct drm_device *dev = dev_get_drvdata(client->parent);
> +	struct tegra_drm *tegra = dev->dev_private;
> +	struct vic *vic = to_vic(drm);
> +	int err;
> +
> +	if (tegra->domain) {
> +		err = iommu_attach_device(tegra->domain, vic->dev);
> +		if (err < 0) {
> +			dev_err(vic->dev, "failed to attach to domain: %d\n",
> +				err);
> +			return err;
> +		}
> +
> +		vic->domain = tegra->domain;
> +	}
> +
> +	vic->channel = host1x_channel_request(client->dev);
> +	if (!vic->channel)
> +		return -ENOMEM;
> +
> +	client->syncpts[0] = host1x_syncpt_request(client->dev, 0);
> +	if (!client->syncpts[0]) {
> +		host1x_channel_free(vic->channel);
> +		return -ENOMEM;
> +	}
> +
> +	return tegra_drm_register_client(tegra, drm);
> +}
> +
> +static int vic_exit(struct host1x_client *client)
> +{
> +	struct tegra_drm_client *drm = host1x_to_drm_client(client);
> +	struct drm_device *dev = dev_get_drvdata(client->parent);
> +	struct tegra_drm *tegra = dev->dev_private;
> +	struct vic *vic = to_vic(drm);
> +	int err;
> +
> +	err = tegra_drm_unregister_client(tegra, drm);
> +	if (err < 0)
> +		return err;
> +
> +	host1x_syncpt_free(client->syncpts[0]);
> +	host1x_channel_free(vic->channel);
> +
> +	/* ucode is no longer available. release it */
> +	if (vic->ucode_valid) {
> +		/* first, ensure that vic is not using it */
> +		reset_control_assert(vic->rst);
> +		udelay(10);
> +		reset_control_deassert(vic->rst);
> +
> +		/* ..then release the ucode */
> +		if (!vic->ucode_bo->vaddr)
> +			vunmap(vic->ucode_vaddr);
> +		drm_gem_object_release(&vic->ucode_bo->gem);

Same here, is this the correct way to free this?

> +		vic->ucode_valid = false;
> +	}
> +
> +	if (vic->domain) {
> +		iommu_detach_device(vic->domain, vic->dev);
> +		vic->domain = NULL;
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct host1x_client_ops vic_client_ops = {
> +	.init = vic_init,
> +	.exit = vic_exit,
> +};
> +
> +static int vic_open_channel(struct tegra_drm_client *client,
> +			     struct tegra_drm_context *context)
> +{
> +	struct vic *vic = to_vic(client);
> +	int err;
> +
> +	err = vic_boot(vic->dev);
> +	if (err)
> +		return err;
> +
> +	context->channel = host1x_channel_get(vic->channel);
> +	if (!context->channel)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +static void vic_close_channel(struct tegra_drm_context *context)
> +{
> +	host1x_channel_put(context->channel);
> +}
> +
> +static int vic_is_addr_reg(struct device *dev, u32 class, u32 offset, u32 val)
> +{
> +	struct vic *vic = dev_get_drvdata(dev);
> +
> +	/* handle host class */
> +	if (class == HOST1X_CLASS_HOST1X) {
> +		if (offset == 0x2b)
> +			return true;
> +		return false;

"return (offset == 0x2b);" perhaps?

> +	}
> +
> +	/* write targets towards method 1. check stashed value */
> +	if (offset == NV_PVIC_FALCON_METHOD_1 >> 2)
> +		return vic->method_data_is_addr_reg;
> +
> +	/* write targets to method 0. determine if the method takes an
> +	 * address as a parameter */
> +	if (offset == NV_PVIC_FALCON_METHOD_0 >> 2) {
> +		u32 method = val << 2;
> +
> +		if ((method >= 0x400 && method <= 0x5dc) ||
> +		    (method >= 0x720 && method <= 0x738))
> +			vic->method_data_is_addr_reg = true;
> +		else
> +			vic->method_data_is_addr_reg = false;
> +	}
> +
> +	/* default to false */
> +	return false;
> +}
> +
> +static const struct tegra_drm_client_ops vic_ops = {
> +	.open_channel = vic_open_channel,
> +	.close_channel = vic_close_channel,
> +	.is_addr_reg = vic_is_addr_reg,
> +	.submit = tegra_drm_submit,
> +};
> +
> +static const struct vic_config vic_t124_config = {
> +	.ucode_name = "vic03_ucode.bin",
> +	.class_id = HOST1X_CLASS_VIC,
> +	.powergate_id = TEGRA_POWERGATE_VIC,
> +};
> +
> +static const struct of_device_id vic_match[] = {
> +	{ .compatible = "nvidia,tegra124-vic",
> +		.data = &vic_t124_config },
> +	{ },
> +};
> +
> +static int vic_probe(struct platform_device *pdev)
> +{
> +	struct vic_config *vic_config = NULL;
> +	struct device *dev = &pdev->dev;
> +	struct host1x_syncpt **syncpts;
> +	struct resource *regs;
> +	struct vic *vic;
> +	int err;
> +
> +	if (dev->of_node) {
> +		const struct of_device_id *match;
> +
> +		match = of_match_device(vic_match, dev);
> +		if (match)
> +			vic_config = (struct vic_config *)match->data;
> +		else
> +			return -ENXIO;
> +	}

This doesn't seem necessary, we can only be probed from device tree and
each match entry has a data pointer anyway.

> +
> +	vic = devm_kzalloc(dev, sizeof(*vic), GFP_KERNEL);
> +	if (!vic)
> +		return -ENOMEM;
> +
> +	syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
> +	if (!syncpts)
> +		return -ENOMEM;
> +
> +	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	if (!regs) {
> +		dev_err(&pdev->dev, "failed to get registers\n");
> +		return -ENXIO;
> +	}
> +
> +	vic->regs = devm_ioremap_resource(dev, regs);
> +	if (IS_ERR(vic->regs))
> +		return PTR_ERR(vic->regs);
> +
> +	vic->clk = devm_clk_get(dev, NULL);
> +	if (IS_ERR(vic->clk)) {
> +		dev_err(&pdev->dev, "failed to get clock\n");
> +		return PTR_ERR(vic->clk);
> +	}
> +
> +	vic->rst = devm_reset_control_get(dev, "vic03");

I might prefer just "vic" as the clock/reset name. The name is often
used as a sort of "role" for the clock/reset for the device, not
necessarily the raw name of the "correct" clock/reset.

> +	if (IS_ERR(vic->rst)) {
> +		dev_err(&pdev->dev, "cannot get reset\n");
> +		return PTR_ERR(vic->rst);
> +	}
> +
> +	platform_set_drvdata(pdev, vic);
> +
> +	INIT_LIST_HEAD(&vic->client.base.list);
> +	vic->client.base.ops = &vic_client_ops;
> +	vic->client.base.dev = dev;
> +	vic->client.base.class = vic_config->class_id;
> +	vic->client.base.syncpts = syncpts;
> +	vic->client.base.num_syncpts = 1;
> +	vic->dev = dev;
> +	vic->config = vic_config;
> +
> +	INIT_LIST_HEAD(&vic->client.list);
> +	vic->client.ops = &vic_ops;
> +
> +	err = tegra_powergate_sequence_power_up(vic->config->powergate_id,
> +						vic->clk, vic->rst);
> +	if (err) {
> +		dev_err(dev, "cannot turn on the device\n");
> +		return err;
> +	}
> +
> +	err = host1x_client_register(&vic->client.base);
> +	if (err < 0) {

You used 'if (err) {' previously, so maybe also here.

> +		dev_err(dev, "failed to register host1x client: %d\n", err);
> +		clk_disable_unprepare(vic->clk);
> +		tegra_powergate_power_off(vic->config->powergate_id);
> +		platform_set_drvdata(pdev, NULL);
> +		return err;
> +	}
> +
> +	dev_info(&pdev->dev, "initialized");
> +
> +	return 0;
> +}
> +
> +static int vic_remove(struct platform_device *pdev)
> +{
> +	struct vic *vic = platform_get_drvdata(pdev);
> +	int err;
> +
> +	err = host1x_client_unregister(&vic->client.base);
> +	if (err < 0) {

and here.

> +		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
> +			err);
> +		return err;
> +	}
> +
> +	clk_disable_unprepare(vic->clk);
> +	tegra_powergate_power_off(vic->config->powergate_id);
> +
> +	return 0;
> +}
> +
> +struct platform_driver tegra_vic_driver = {
> +	.driver = {
> +		.name = "tegra-vic",
> +		.of_match_table = vic_match,
> +	},
> +	.probe = vic_probe,
> +	.remove = vic_remove,
> +};
> diff --git a/drivers/gpu/drm/tegra/vic.h b/drivers/gpu/drm/tegra/vic.h
> new file mode 100644
> index 000000000000..65ca38a8da88
> --- /dev/null
> +++ b/drivers/gpu/drm/tegra/vic.h
> @@ -0,0 +1,116 @@
> +/*
> + * Copyright (c) 2015, NVIDIA Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef TEGRA_VIC_H
> +#define TEGRA_VIC_H
> +
> +#include <linux/types.h>
> +#include <linux/dma-attrs.h>
> +#include <linux/firmware.h>
> +#include <linux/platform_device.h>
> +
> +struct ucode_bin_header_v1_vic {
> +	u32 bin_magic;        /* 0x10de */
> +	u32 bin_ver;          /* cya, versioning of bin format (1) */
> +	u32 bin_size;         /* entire image size including this header */
> +	u32 os_bin_header_offset;
> +	u32 os_bin_data_offset;
> +	u32 os_bin_size;
> +	u32 fce_bin_header_offset;
> +	u32 fce_bin_data_offset;
> +	u32 fce_bin_size;
> +};
> +
> +struct ucode_os_code_header_v1_vic {
> +	u32 offset;
> +	u32 size;
> +};
> +
> +struct ucode_os_header_v1_vic {
> +	u32 os_code_offset;
> +	u32 os_code_size;
> +	u32 os_data_offset;
> +	u32 os_data_size;
> +	u32 num_apps;
> +	struct ucode_os_code_header_v1_vic *app_code;
> +	struct ucode_os_code_header_v1_vic *app_data;
> +	u32 *os_ovl_offset;
> +	u32 *of_ovl_size;
> +};
> +
> +struct ucode_fce_header_v1_vic {
> +	u32 fce_ucode_offset;
> +	u32 fce_ucode_buffer_size;
> +	u32 fce_ucode_size;
> +};
> +
> +struct ucode_v1_vic {
> +	struct ucode_bin_header_v1_vic *bin_header;
> +	struct ucode_os_header_v1_vic  *os_header;
> +	struct ucode_fce_header_v1_vic *fce_header;
> +};
> +
> +/* VIC methods */
> +#define NVA0B6_VIDEO_COMPOSITOR_SET_APPLICATION_ID	0x00000200
> +#define NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_SIZE	0x0000071C
> +#define NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_OFFSET	0x0000072C
> +
> +/* VIC registers */
> +
> +#define NV_PVIC_FALCON_METHOD_0			0x00000040
> +#define NV_PVIC_FALCON_METHOD_1			0x00000044
> +
> +#define NV_PVIC_FALCON_IRQMSET			0x00001010
> +#define IRQMSET_WDTMR_SET			(1 << 1)
> +#define IRQMSET_HALT_SET			(1 << 4)
> +#define IRQMSET_EXTERR_SET			(1 << 5)
> +#define IRQMSET_SWGEN0_SET			(1 << 6)
> +#define IRQMSET_SWGEN1_SET			(1 << 7)
> +#define IRQMSET_EXT(val)			((val & 0xff) << 8)
> +
> +#define NV_PVIC_FALCON_IRQDEST			0x0000101c
> +#define IRQDEST_HOST_HALT_HOST			(1 << 4)
> +#define IRQDEST_HOST_EXTERR_HOST		(1 << 5)
> +#define IRQDEST_HOST_SWGEN0_HOST		(1 << 6)
> +#define IRQDEST_HOST_SWGEN1_HOST		(1 << 7)
> +#define IRQDEST_HOST_EXT(val)			((val & 0xff) << 8)
> +
> +#define NV_PVIC_FALCON_ITFEN			0x00001048
> +#define ITFEN_CTXEN_ENABLE			(1 << 0)
> +#define ITFEN_MTHDEN_ENABLE			(1 << 1)
> +
> +#define NV_PVIC_FALCON_IDLESTATE		0x0000104c
> +
> +#define NV_PVIC_FALCON_CPUCTL			0x00001100
> +#define CPUCTL_STARTCPU				(1 << 1)
> +
> +#define NV_PVIC_FALCON_BOOTVEC			0x00001104
> +#define BOOTVEC_VEC(val)			((val & 0xffffffff) << 0)
> +
> +#define NV_PVIC_FALCON_DMACTL			0x0000110c
> +
> +#define NV_PVIC_FALCON_DMATRFBASE		0x00001110
> +
> +#define NV_PVIC_FALCON_DMATRFMOFFS		0x00001114
> +#define DMATRFMOFFS_OFFS(val)			((val & 0xffff) << 0)
> +
> +#define NV_PVIC_FALCON_DMATRFCMD		0x00001118
> +#define DMATRFCMD_IDLE				(1 << 1)
> +#define DMATRFCMD_IMEM				(1 << 4)
> +#define DMATRFCMD_SIZE_256B			(6 << 8)
> +
> +#define NV_PVIC_FALCON_DMATRFFBOFFS		0x0000111c
> +#define DMATRFFBOFFS_OFFS(val)			((val & 0xffffffff) << 0)
> +
> +#define NV_PVIC_MISC_PRI_VIC_CG			0x000016d0
> +#define CG_IDLE_CG_DLY_CNT(val)			((val & 0x3f) << 0)
> +#define CG_IDLE_CG_EN				(1 << 6)
> +#define CG_WAKEUP_DLY_CNT(val)			((val & 0xf) << 16)
> +
> +
> +#endif /* TEGRA_VIC_H */
> diff --git a/include/linux/host1x.h b/include/linux/host1x.h
> index fc86ced77e76..a006dad00009 100644
> --- a/include/linux/host1x.h
> +++ b/include/linux/host1x.h
> @@ -26,6 +26,7 @@ enum host1x_class {
>  	HOST1X_CLASS_HOST1X = 0x1,
>  	HOST1X_CLASS_GR2D = 0x51,
>  	HOST1X_CLASS_GR2D_SB = 0x52,
> +	HOST1X_CLASS_VIC = 0x5D,
>  	HOST1X_CLASS_GR3D = 0x60,
>  };
>  
>
Arto Merilainen May 21, 2015, 3:10 p.m. UTC | #2
Thank you Mikko for your comments!

Please see my answers inline.

- Arto

On 05/21/2015 05:40 PM, Mikko Perttunen wrote:
> Hi, very good patch!
>
> Here are a few small comments. Aside those, you should also add a
> section to
> Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt in a
> separate patch.

Will do.

> On 05/21/2015 04:20 PM, Arto Merilainen wrote:
>> This patch adds support for Video Image Compositor engine which
>> can be used for 2d operations.
>>
>> The engine has a microcontroller (Falcon) that acts as a frontend
>> for the rest of the unit. In order to properly utilize the engine,
>> the frontend must be booted before pushing any commands.
>>
>> Signed-off-by: Andrew Chew <achew@nvidia.com>
>> Signed-off-by: Arto Merilainen <amerilainen@nvidia.com>
>> ---
>>   drivers/gpu/drm/tegra/Makefile |   3 +-
>>   drivers/gpu/drm/tegra/drm.c    |   9 +-
>>   drivers/gpu/drm/tegra/drm.h    |   1 +
>>   drivers/gpu/drm/tegra/vic.c    | 593 +++++++++++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/tegra/vic.h    | 116 ++++++++
>>   include/linux/host1x.h         |   1 +
>>   6 files changed, 721 insertions(+), 2 deletions(-)
>>   create mode 100644 drivers/gpu/drm/tegra/vic.c
>>   create mode 100644 drivers/gpu/drm/tegra/vic.h
>>
>> diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
>> index 2c66a8db9da4..3bc3566e00b6 100644
>> --- a/drivers/gpu/drm/tegra/Makefile
>> +++ b/drivers/gpu/drm/tegra/Makefile
>> @@ -13,6 +13,7 @@ tegra-drm-y := \
>>   	sor.o \
>>   	dpaux.o \
>>   	gr2d.o \
>> -	gr3d.o
>> +	gr3d.o \
>> +	vic.o
>>
>>   obj-$(CONFIG_DRM_TEGRA) += tegra-drm.o
>> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
>> index bfad15a913a0..d947f5f4d801 100644
>> --- a/drivers/gpu/drm/tegra/drm.c
>> +++ b/drivers/gpu/drm/tegra/drm.c
>> @@ -1,6 +1,6 @@
>>   /*
>>    * Copyright (C) 2012 Avionic Design GmbH
>> - * Copyright (C) 2012-2013 NVIDIA CORPORATION.  All rights reserved.
>> + * Copyright (C) 2012-2015 NVIDIA CORPORATION.  All rights reserved.
>>    *
>>    * This program is free software; you can redistribute it and/or modify
>>    * it under the terms of the GNU General Public License version 2 as
>> @@ -1048,6 +1048,7 @@ static const struct of_device_id host1x_drm_subdevs[] = {
>>   	{ .compatible = "nvidia,tegra124-dc", },
>>   	{ .compatible = "nvidia,tegra124-sor", },
>>   	{ .compatible = "nvidia,tegra124-hdmi", },
>> +	{ .compatible = "nvidia,tegra124-vic", },
>>   	{ /* sentinel */ }
>>   };
>>
>> @@ -1097,8 +1098,14 @@ static int __init host1x_drm_init(void)
>>   	if (err < 0)
>>   		goto unregister_gr2d;
>>
>> +	err = platform_driver_register(&tegra_vic_driver);
>> +	if (err < 0)
>> +		goto unregister_gr3d;
>> +
>>   	return 0;
>>
>> +unregister_gr3d:
>> +	platform_driver_unregister(&tegra_gr3d_driver);
>>   unregister_gr2d:
>>   	platform_driver_unregister(&tegra_gr2d_driver);
>>   unregister_dpaux:
>> diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
>> index 0e7756e720c5..a9c02a80d6bf 100644
>> --- a/drivers/gpu/drm/tegra/drm.h
>> +++ b/drivers/gpu/drm/tegra/drm.h
>> @@ -275,5 +275,6 @@ extern struct platform_driver tegra_hdmi_driver;
>>   extern struct platform_driver tegra_dpaux_driver;
>>   extern struct platform_driver tegra_gr2d_driver;
>>   extern struct platform_driver tegra_gr3d_driver;
>> +extern struct platform_driver tegra_vic_driver;
>>
>>   #endif /* HOST1X_DRM_H */
>> diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
>> new file mode 100644
>> index 000000000000..b7c5a96697ed
>> --- /dev/null
>> +++ b/drivers/gpu/drm/tegra/vic.c
>> @@ -0,0 +1,593 @@
>> +/*
>> + * Copyright (c) 2015, NVIDIA Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/of.h>
>> +#include <linux/of_device.h>
>> +#include <linux/of_platform.h>
>> +#include <linux/clk.h>
>> +#include <linux/host1x.h>
>> +#include <linux/module.h>
>> +#include <linux/firmware.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/reset.h>
>> +#include <soc/tegra/pmc.h>
>> +#include <linux/iommu.h>
>> +
>> +#include "drm.h"
>> +#include "gem.h"
>> +#include "vic.h"
>> +
>> +#define VIC_IDLE_TIMEOUT_DEFAULT	10000	/* 10 milliseconds */
>> +#define VIC_IDLE_CHECK_PERIOD		10	/* 10 usec */
>> +
>> +struct vic;
>
> This doesn't seem to be needed here.
>
>> +
>> +struct vic_config {
>> +	/* firmware name */
>> +	char *ucode_name;
>> +
>> +	/* class id */
>> +	u32 class_id;
>> +
>> +	/* powergate id */
>> +	int powergate_id;
>> +};
>> +
>> +struct vic {
>> +	struct {
>> +		u32 bin_data_offset;
>> +		u32 data_offset;
>> +		u32 data_size;
>> +		u32 code_offset;
>> +		u32 size;
>> +	} os, fce;
>> +
>> +	struct tegra_bo *ucode_bo;
>> +	bool ucode_valid;
>> +	void *ucode_vaddr;
>> +
>> +	bool booted;
>> +
>> +	void __iomem *regs;
>> +	struct tegra_drm_client client;
>> +	struct host1x_channel *channel;
>> +	struct iommu_domain *domain;
>> +	struct device *dev;
>> +	struct clk *clk;
>> +	struct reset_control *rst;
>> +
>> +	/* Platform configuration */
>> +	struct vic_config *config;
>> +
>> +	/* for firewall - this determines if method 1 should be regarded
>> +	 * as an address register */
>> +	bool method_data_is_addr_reg;
>> +};
>> +
>> +static inline struct vic *to_vic(struct tegra_drm_client *client)
>> +{
>> +	return container_of(client, struct vic, client);
>> +}
>> +
>> +void vic_writel(struct vic *vic, u32 v, u32 r)
>> +{
>> +	writel(v, vic->regs + r);
>> +}
>> +
>> +u32 vic_readl(struct vic *vic, u32 r)
>> +{
>> +	return readl(vic->regs + r);
>> +}
>> +
>> +static int vic_wait_idle(struct vic *vic)
>> +{
>> +	u32 timeout = VIC_IDLE_TIMEOUT_DEFAULT;
>> +
>> +	do {
>> +		u32 check = min_t(u32, VIC_IDLE_CHECK_PERIOD, timeout);
>> +		u32 w = vic_readl(vic, NV_PVIC_FALCON_IDLESTATE);
>> +
>> +		if (!w)
>> +			return 0;
>> +
>> +		udelay(VIC_IDLE_CHECK_PERIOD);
>> +		timeout -= check;
>> +	} while (timeout);
>> +
>> +	dev_err(vic->dev, "vic idle timeout");
>> +
>> +	return -ETIMEDOUT;
>> +}
>> +
>> +static int vic_dma_wait_idle(struct vic *vic)
>> +{
>> +	u32 timeout = VIC_IDLE_TIMEOUT_DEFAULT;
>> +
>> +	do {
>> +		u32 check = min_t(u32, VIC_IDLE_CHECK_PERIOD, timeout);
>> +		u32 dmatrfcmd = vic_readl(vic, NV_PVIC_FALCON_DMATRFCMD);
>> +
>> +		if (dmatrfcmd & DMATRFCMD_IDLE)
>> +			return 0;
>> +
>> +		udelay(VIC_IDLE_CHECK_PERIOD);
>> +		timeout -= check;
>> +	} while (timeout);
>> +
>> +	dev_err(vic->dev, "dma idle timeout");
>> +
>> +	return -ETIMEDOUT;
>> +}
>> +
>> +static int vic_dma_pa_to_internal_256b(struct vic *vic, phys_addr_t pa,
>> +				       u32 internal_offset, bool imem)
>> +{
>> +	u32 cmd = DMATRFCMD_SIZE_256B;
>> +
>> +	if (imem)
>> +		cmd |= DMATRFCMD_IMEM;
>> +
>> +	vic_writel(vic, DMATRFMOFFS_OFFS(internal_offset),
>> +		   NV_PVIC_FALCON_DMATRFMOFFS);
>> +	vic_writel(vic, DMATRFFBOFFS_OFFS(pa),
>> +		   NV_PVIC_FALCON_DMATRFFBOFFS);
>> +	vic_writel(vic, cmd, NV_PVIC_FALCON_DMATRFCMD);
>> +
>> +	return vic_dma_wait_idle(vic);
>> +}
>> +
>> +static int vic_setup_ucode_image(struct vic *vic,
>> +				 const struct firmware *ucode_fw)
>> +{
>> +	/* image data is little endian. */
>> +	u32 *ucode_vaddr = vic->ucode_vaddr;
>> +	struct ucode_v1_vic ucode;
>> +	int w;
>> +
>> +	/* copy the whole thing taking into account endianness */
>> +	for (w = 0; w < ucode_fw->size / sizeof(u32); w++)
>> +		ucode_vaddr[w] = le32_to_cpu(((u32 *)ucode_fw->data)[w]);
>> +
>> +	ucode.bin_header = (struct ucode_bin_header_v1_vic *)ucode_vaddr;
>> +
>> +	/* endian problems would show up right here */
>> +	if (ucode.bin_header->bin_magic != 0x10de) {
>> +		dev_err(vic->dev, "failed to get firmware magic");
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (ucode.bin_header->bin_ver != 1) {
>> +		dev_err(vic->dev, "unsupported firmware version");
>> +		return -ENOENT;
>> +	}
>> +
>> +	/* shouldn't be bigger than what firmware thinks */
>> +	if (ucode.bin_header->bin_size > ucode_fw->size) {
>> +		dev_err(vic->dev, "ucode image size inconsistency");
>> +		return -EINVAL;
>> +	}
>> +
>> +	ucode.os_header = (struct ucode_os_header_v1_vic *)
>> +		(((void *)ucode_vaddr) +
>> +		ucode.bin_header->os_bin_header_offset);
>> +	vic->os.size = ucode.bin_header->os_bin_size;
>> +	vic->os.bin_data_offset = ucode.bin_header->os_bin_data_offset;
>> +	vic->os.code_offset = ucode.os_header->os_code_offset;
>> +	vic->os.data_offset = ucode.os_header->os_data_offset;
>> +	vic->os.data_size = ucode.os_header->os_data_size;
>> +
>> +	ucode.fce_header = (struct ucode_fce_header_v1_vic *)
>> +		(((void *)ucode_vaddr) +
>> +		ucode.bin_header->fce_bin_header_offset);
>> +	vic->fce.size = ucode.fce_header->fce_ucode_size;
>> +	vic->fce.data_offset = ucode.bin_header->fce_bin_data_offset;
>> +
>> +	return 0;
>> +}
>> +
>> +static int vic_read_ucode(struct vic *vic)
>> +{
>> +	struct host1x_client *client = &vic->client.base;
>> +	struct drm_device *dev = dev_get_drvdata(client->parent);
>> +	const struct firmware *ucode_fw;
>> +	int err;
>> +
>> +	err = request_firmware(&ucode_fw, vic->config->ucode_name, vic->dev);
>> +	if (err) {
>> +		dev_err(vic->dev, "failed to get firmware\n");
>> +		goto err_request_firmware;
>> +	}
>> +
>> +	vic->ucode_bo = tegra_bo_create(dev, ucode_fw->size, 0);
>> +	if (IS_ERR(vic->ucode_bo)) {
>> +		dev_err(vic->dev, "dma memory allocation failed");
>> +		err = PTR_ERR(vic->ucode_bo);
>> +		goto err_alloc_iova;
>> +	}
>> +
>> +	/* get vaddr for the ucode */
>> +	if (!vic->ucode_bo->vaddr)
>> +		vic->ucode_vaddr = vmap(vic->ucode_bo->pages,
>> +					vic->ucode_bo->num_pages, VM_MAP,
>> +					pgprot_writecombine(PAGE_KERNEL));
>> +	else
>> +		vic->ucode_vaddr = vic->ucode_bo->vaddr;
>> +
>> +	err = vic_setup_ucode_image(vic, ucode_fw);
>> +	if (err) {
>> +		dev_err(vic->dev, "failed to parse firmware image\n");
>> +		goto err_setup_ucode_image;
>> +	}
>> +
>> +	vic->ucode_valid = true;
>> +
>> +	release_firmware(ucode_fw);
>> +
>> +	return 0;
>> +
>> +err_setup_ucode_image:
>> +	drm_gem_object_release(&vic->ucode_bo->gem);
>
> Should this not be freed with tegra_bo_free or similar? Right now this
> looks like it would leak at least memory.
>

Uh, this definitely looks broken. Will fix.

>> +err_alloc_iova:
>> +	release_firmware(ucode_fw);
>> +err_request_firmware:
>> +	return err;
>> +}
>> +
>> +static int vic_boot(struct device *dev)
>> +{
>> +	struct vic *vic = dev_get_drvdata(dev);
>> +	u32 offset;
>> +	int err = 0;
>> +
>> +	if (vic->booted)
>> +		return 0;
>> +
>> +	if (!vic->ucode_valid) {
>> +		err = vic_read_ucode(vic);
>> +		if (err)
>> +			return err;
>> +	}
>> +
>> +	/* ensure that the engine is in sane state */
>> +	reset_control_assert(vic->rst);
>> +	udelay(10);
>> +	reset_control_deassert(vic->rst);
>> +
>> +	/* setup clockgating registers */
>> +	vic_writel(vic, CG_IDLE_CG_DLY_CNT(4) |
>> +			CG_IDLE_CG_EN |
>> +			CG_WAKEUP_DLY_CNT(4),
>> +		   NV_PVIC_MISC_PRI_VIC_CG);
>> +
>> +	/* service all dma requests */
>> +	vic_writel(vic, 0, NV_PVIC_FALCON_DMACTL);
>> +
>> +	/* setup dma base address */
>> +	vic_writel(vic, (vic->ucode_bo->paddr + vic->os.bin_data_offset) >> 8,
>> +		   NV_PVIC_FALCON_DMATRFBASE);
>> +
>> +	/* dma ucode data */
>> +	for (offset = 0; offset < vic->os.data_size; offset += 256)
>> +		vic_dma_pa_to_internal_256b(vic,
>> +					     vic->os.data_offset + offset,
>> +					     offset, false);
>> +
>> +	/* dma ucode */
>> +	vic_dma_pa_to_internal_256b(vic, vic->os.code_offset, 0, true);
>> +
>> +	/* setup falcon interrupts and enable interface */
>> +	vic_writel(vic, IRQMSET_EXT(0xff) |
>> +			IRQMSET_SWGEN1_SET |
>> +			IRQMSET_SWGEN0_SET |
>> +			IRQMSET_EXTERR_SET |
>> +			IRQMSET_HALT_SET |
>> +			IRQMSET_WDTMR_SET,
>> +		   NV_PVIC_FALCON_IRQMSET);
>> +	vic_writel(vic, IRQDEST_HOST_EXT(0Xff) |
>> +			IRQDEST_HOST_SWGEN1_HOST |
>> +			IRQDEST_HOST_SWGEN0_HOST |
>> +			IRQDEST_HOST_EXTERR_HOST |
>> +			IRQDEST_HOST_HALT_HOST,
>> +		    NV_PVIC_FALCON_IRQDEST);
>> +
>> +	/* enable method and context switch interfaces */
>> +	vic_writel(vic, ITFEN_MTHDEN_ENABLE |
>> +			ITFEN_CTXEN_ENABLE,
>> +		   NV_PVIC_FALCON_ITFEN);
>> +
>> +	/* boot falcon */
>> +	vic_writel(vic, BOOTVEC_VEC(0), NV_PVIC_FALCON_BOOTVEC);
>> +	vic_writel(vic, CPUCTL_STARTCPU, NV_PVIC_FALCON_CPUCTL);
>> +
>> +	err = vic_wait_idle(vic);
>> +	if (err != 0) {
>> +		dev_err(dev, "boot failed due to timeout");
>> +		return err;
>> +	}
>> +
>> +	/* Set application id and set-up FCE ucode address */
>> +	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_APPLICATION_ID >> 2,
>> +		    NV_PVIC_FALCON_METHOD_0);
>> +	vic_writel(vic, 1, NV_PVIC_FALCON_METHOD_1);
>> +	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_SIZE >> 2,
>> +		    NV_PVIC_FALCON_METHOD_0);
>> +	vic_writel(vic, vic->fce.size, NV_PVIC_FALCON_METHOD_1);
>> +	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_OFFSET >> 2,
>> +		    NV_PVIC_FALCON_METHOD_0);
>> +	vic_writel(vic, (vic->ucode_bo->paddr + vic->fce.data_offset) >> 8,
>> +		    NV_PVIC_FALCON_METHOD_1);
>> +
>> +	err = vic_wait_idle(vic);
>> +	if (err != 0) {
>> +		dev_err(dev, "failed to set application id and fce base");
>> +		return err;
>> +	}
>> +
>> +	vic->booted = true;
>> +
>> +	dev_info(dev, "booted");
>> +
>> +	return 0;
>> +}
>> +
>> +static int vic_init(struct host1x_client *client)
>> +{
>> +	struct tegra_drm_client *drm = host1x_to_drm_client(client);
>> +	struct drm_device *dev = dev_get_drvdata(client->parent);
>> +	struct tegra_drm *tegra = dev->dev_private;
>> +	struct vic *vic = to_vic(drm);
>> +	int err;
>> +
>> +	if (tegra->domain) {
>> +		err = iommu_attach_device(tegra->domain, vic->dev);
>> +		if (err < 0) {
>> +			dev_err(vic->dev, "failed to attach to domain: %d\n",
>> +				err);
>> +			return err;
>> +		}
>> +
>> +		vic->domain = tegra->domain;
>> +	}
>> +
>> +	vic->channel = host1x_channel_request(client->dev);
>> +	if (!vic->channel)
>> +		return -ENOMEM;
>> +
>> +	client->syncpts[0] = host1x_syncpt_request(client->dev, 0);
>> +	if (!client->syncpts[0]) {
>> +		host1x_channel_free(vic->channel);
>> +		return -ENOMEM;
>> +	}
>> +
>> +	return tegra_drm_register_client(tegra, drm);
>> +}
>> +
>> +static int vic_exit(struct host1x_client *client)
>> +{
>> +	struct tegra_drm_client *drm = host1x_to_drm_client(client);
>> +	struct drm_device *dev = dev_get_drvdata(client->parent);
>> +	struct tegra_drm *tegra = dev->dev_private;
>> +	struct vic *vic = to_vic(drm);
>> +	int err;
>> +
>> +	err = tegra_drm_unregister_client(tegra, drm);
>> +	if (err < 0)
>> +		return err;
>> +
>> +	host1x_syncpt_free(client->syncpts[0]);
>> +	host1x_channel_free(vic->channel);
>> +
>> +	/* ucode is no longer available. release it */
>> +	if (vic->ucode_valid) {
>> +		/* first, ensure that vic is not using it */
>> +		reset_control_assert(vic->rst);
>> +		udelay(10);
>> +		reset_control_deassert(vic->rst);
>> +
>> +		/* ..then release the ucode */
>> +		if (!vic->ucode_bo->vaddr)
>> +			vunmap(vic->ucode_vaddr);
>> +		drm_gem_object_release(&vic->ucode_bo->gem);
>
> Same here, is this the correct way to free this?
>

Same as above.

>> +		vic->ucode_valid = false;
>> +	}
>> +
>> +	if (vic->domain) {
>> +		iommu_detach_device(vic->domain, vic->dev);
>> +		vic->domain = NULL;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static const struct host1x_client_ops vic_client_ops = {
>> +	.init = vic_init,
>> +	.exit = vic_exit,
>> +};
>> +
>> +static int vic_open_channel(struct tegra_drm_client *client,
>> +			     struct tegra_drm_context *context)
>> +{
>> +	struct vic *vic = to_vic(client);
>> +	int err;
>> +
>> +	err = vic_boot(vic->dev);
>> +	if (err)
>> +		return err;
>> +
>> +	context->channel = host1x_channel_get(vic->channel);
>> +	if (!context->channel)
>> +		return -ENOMEM;
>> +
>> +	return 0;
>> +}
>> +
>> +static void vic_close_channel(struct tegra_drm_context *context)
>> +{
>> +	host1x_channel_put(context->channel);
>> +}
>> +
>> +static int vic_is_addr_reg(struct device *dev, u32 class, u32 offset, u32 val)
>> +{
>> +	struct vic *vic = dev_get_drvdata(dev);
>> +
>> +	/* handle host class */
>> +	if (class == HOST1X_CLASS_HOST1X) {
>> +		if (offset == 0x2b)
>> +			return true;
>> +		return false;
>
> "return (offset == 0x2b);" perhaps?
>

Works for me.

>> +	}
>> +
>> +	/* write targets towards method 1. check stashed value */
>> +	if (offset == NV_PVIC_FALCON_METHOD_1 >> 2)
>> +		return vic->method_data_is_addr_reg;
>> +
>> +	/* write targets to method 0. determine if the method takes an
>> +	 * address as a parameter */
>> +	if (offset == NV_PVIC_FALCON_METHOD_0 >> 2) {
>> +		u32 method = val << 2;
>> +
>> +		if ((method >= 0x400 && method <= 0x5dc) ||
>> +		    (method >= 0x720 && method <= 0x738))
>> +			vic->method_data_is_addr_reg = true;
>> +		else
>> +			vic->method_data_is_addr_reg = false;
>> +	}
>> +
>> +	/* default to false */
>> +	return false;
>> +}
>> +
>> +static const struct tegra_drm_client_ops vic_ops = {
>> +	.open_channel = vic_open_channel,
>> +	.close_channel = vic_close_channel,
>> +	.is_addr_reg = vic_is_addr_reg,
>> +	.submit = tegra_drm_submit,
>> +};
>> +
>> +static const struct vic_config vic_t124_config = {
>> +	.ucode_name = "vic03_ucode.bin",
>> +	.class_id = HOST1X_CLASS_VIC,
>> +	.powergate_id = TEGRA_POWERGATE_VIC,
>> +};
>> +
>> +static const struct of_device_id vic_match[] = {
>> +	{ .compatible = "nvidia,tegra124-vic",
>> +		.data = &vic_t124_config },
>> +	{ },
>> +};
>> +
>> +static int vic_probe(struct platform_device *pdev)
>> +{
>> +	struct vic_config *vic_config = NULL;
>> +	struct device *dev = &pdev->dev;
>> +	struct host1x_syncpt **syncpts;
>> +	struct resource *regs;
>> +	struct vic *vic;
>> +	int err;
>> +
>> +	if (dev->of_node) {
>> +		const struct of_device_id *match;
>> +
>> +		match = of_match_device(vic_match, dev);
>> +		if (match)
>> +			vic_config = (struct vic_config *)match->data;
>> +		else
>> +			return -ENXIO;
>> +	}
>
> This doesn't seem necessary, we can only be probed from device tree and
> each match entry has a data pointer anyway.
>

True, will remove.

>> +
>> +	vic = devm_kzalloc(dev, sizeof(*vic), GFP_KERNEL);
>> +	if (!vic)
>> +		return -ENOMEM;
>> +
>> +	syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
>> +	if (!syncpts)
>> +		return -ENOMEM;
>> +
>> +	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>> +	if (!regs) {
>> +		dev_err(&pdev->dev, "failed to get registers\n");
>> +		return -ENXIO;
>> +	}
>> +
>> +	vic->regs = devm_ioremap_resource(dev, regs);
>> +	if (IS_ERR(vic->regs))
>> +		return PTR_ERR(vic->regs);
>> +
>> +	vic->clk = devm_clk_get(dev, NULL);
>> +	if (IS_ERR(vic->clk)) {
>> +		dev_err(&pdev->dev, "failed to get clock\n");
>> +		return PTR_ERR(vic->clk);
>> +	}
>> +
>> +	vic->rst = devm_reset_control_get(dev, "vic03");
>
> I might prefer just "vic" as the clock/reset name. The name is often
> used as a sort of "role" for the clock/reset for the device, not
> necessarily the raw name of the "correct" clock/reset.
>

I considered that - but I then noticed that 
drivers/clk/tegra/clk-tegra124.c was already using vic03 variant. I can 
write a patch for changing that too.

>> +	if (IS_ERR(vic->rst)) {
>> +		dev_err(&pdev->dev, "cannot get reset\n");
>> +		return PTR_ERR(vic->rst);
>> +	}
>> +
>> +	platform_set_drvdata(pdev, vic);
>> +
>> +	INIT_LIST_HEAD(&vic->client.base.list);
>> +	vic->client.base.ops = &vic_client_ops;
>> +	vic->client.base.dev = dev;
>> +	vic->client.base.class = vic_config->class_id;
>> +	vic->client.base.syncpts = syncpts;
>> +	vic->client.base.num_syncpts = 1;
>> +	vic->dev = dev;
>> +	vic->config = vic_config;
>> +
>> +	INIT_LIST_HEAD(&vic->client.list);
>> +	vic->client.ops = &vic_ops;
>> +
>> +	err = tegra_powergate_sequence_power_up(vic->config->powergate_id,
>> +						vic->clk, vic->rst);
>> +	if (err) {
>> +		dev_err(dev, "cannot turn on the device\n");
>> +		return err;
>> +	}
>> +
>> +	err = host1x_client_register(&vic->client.base);
>> +	if (err < 0) {
>
> You used 'if (err) {' previously, so maybe also here.
>

True, will fix.

>> +		dev_err(dev, "failed to register host1x client: %d\n", err);
>> +		clk_disable_unprepare(vic->clk);
>> +		tegra_powergate_power_off(vic->config->powergate_id);
>> +		platform_set_drvdata(pdev, NULL);
>> +		return err;
>> +	}
>> +
>> +	dev_info(&pdev->dev, "initialized");
>> +
>> +	return 0;
>> +}
>> +
>> +static int vic_remove(struct platform_device *pdev)
>> +{
>> +	struct vic *vic = platform_get_drvdata(pdev);
>> +	int err;
>> +
>> +	err = host1x_client_unregister(&vic->client.base);
>> +	if (err < 0) {
>
> and here.
>

Will fix.

>> +		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
>> +			err);
>> +		return err;
>> +	}
>> +
>> +	clk_disable_unprepare(vic->clk);
>> +	tegra_powergate_power_off(vic->config->powergate_id);
>> +
>> +	return 0;
>> +}
>> +
>> +struct platform_driver tegra_vic_driver = {
>> +	.driver = {
>> +		.name = "tegra-vic",
>> +		.of_match_table = vic_match,
>> +	},
>> +	.probe = vic_probe,
>> +	.remove = vic_remove,
>> +};
>> diff --git a/drivers/gpu/drm/tegra/vic.h b/drivers/gpu/drm/tegra/vic.h
>> new file mode 100644
>> index 000000000000..65ca38a8da88
>> --- /dev/null
>> +++ b/drivers/gpu/drm/tegra/vic.h
>> @@ -0,0 +1,116 @@
>> +/*
>> + * Copyright (c) 2015, NVIDIA Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#ifndef TEGRA_VIC_H
>> +#define TEGRA_VIC_H
>> +
>> +#include <linux/types.h>
>> +#include <linux/dma-attrs.h>
>> +#include <linux/firmware.h>
>> +#include <linux/platform_device.h>
>> +
>> +struct ucode_bin_header_v1_vic {
>> +	u32 bin_magic;        /* 0x10de */
>> +	u32 bin_ver;          /* cya, versioning of bin format (1) */
>> +	u32 bin_size;         /* entire image size including this header */
>> +	u32 os_bin_header_offset;
>> +	u32 os_bin_data_offset;
>> +	u32 os_bin_size;
>> +	u32 fce_bin_header_offset;
>> +	u32 fce_bin_data_offset;
>> +	u32 fce_bin_size;
>> +};
>> +
>> +struct ucode_os_code_header_v1_vic {
>> +	u32 offset;
>> +	u32 size;
>> +};
>> +
>> +struct ucode_os_header_v1_vic {
>> +	u32 os_code_offset;
>> +	u32 os_code_size;
>> +	u32 os_data_offset;
>> +	u32 os_data_size;
>> +	u32 num_apps;
>> +	struct ucode_os_code_header_v1_vic *app_code;
>> +	struct ucode_os_code_header_v1_vic *app_data;
>> +	u32 *os_ovl_offset;
>> +	u32 *of_ovl_size;
>> +};
>> +
>> +struct ucode_fce_header_v1_vic {
>> +	u32 fce_ucode_offset;
>> +	u32 fce_ucode_buffer_size;
>> +	u32 fce_ucode_size;
>> +};
>> +
>> +struct ucode_v1_vic {
>> +	struct ucode_bin_header_v1_vic *bin_header;
>> +	struct ucode_os_header_v1_vic  *os_header;
>> +	struct ucode_fce_header_v1_vic *fce_header;
>> +};
>> +
>> +/* VIC methods */
>> +#define NVA0B6_VIDEO_COMPOSITOR_SET_APPLICATION_ID	0x00000200
>> +#define NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_SIZE	0x0000071C
>> +#define NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_OFFSET	0x0000072C
>> +
>> +/* VIC registers */
>> +
>> +#define NV_PVIC_FALCON_METHOD_0			0x00000040
>> +#define NV_PVIC_FALCON_METHOD_1			0x00000044
>> +
>> +#define NV_PVIC_FALCON_IRQMSET			0x00001010
>> +#define IRQMSET_WDTMR_SET			(1 << 1)
>> +#define IRQMSET_HALT_SET			(1 << 4)
>> +#define IRQMSET_EXTERR_SET			(1 << 5)
>> +#define IRQMSET_SWGEN0_SET			(1 << 6)
>> +#define IRQMSET_SWGEN1_SET			(1 << 7)
>> +#define IRQMSET_EXT(val)			((val & 0xff) << 8)
>> +
>> +#define NV_PVIC_FALCON_IRQDEST			0x0000101c
>> +#define IRQDEST_HOST_HALT_HOST			(1 << 4)
>> +#define IRQDEST_HOST_EXTERR_HOST		(1 << 5)
>> +#define IRQDEST_HOST_SWGEN0_HOST		(1 << 6)
>> +#define IRQDEST_HOST_SWGEN1_HOST		(1 << 7)
>> +#define IRQDEST_HOST_EXT(val)			((val & 0xff) << 8)
>> +
>> +#define NV_PVIC_FALCON_ITFEN			0x00001048
>> +#define ITFEN_CTXEN_ENABLE			(1 << 0)
>> +#define ITFEN_MTHDEN_ENABLE			(1 << 1)
>> +
>> +#define NV_PVIC_FALCON_IDLESTATE		0x0000104c
>> +
>> +#define NV_PVIC_FALCON_CPUCTL			0x00001100
>> +#define CPUCTL_STARTCPU				(1 << 1)
>> +
>> +#define NV_PVIC_FALCON_BOOTVEC			0x00001104
>> +#define BOOTVEC_VEC(val)			((val & 0xffffffff) << 0)
>> +
>> +#define NV_PVIC_FALCON_DMACTL			0x0000110c
>> +
>> +#define NV_PVIC_FALCON_DMATRFBASE		0x00001110
>> +
>> +#define NV_PVIC_FALCON_DMATRFMOFFS		0x00001114
>> +#define DMATRFMOFFS_OFFS(val)			((val & 0xffff) << 0)
>> +
>> +#define NV_PVIC_FALCON_DMATRFCMD		0x00001118
>> +#define DMATRFCMD_IDLE				(1 << 1)
>> +#define DMATRFCMD_IMEM				(1 << 4)
>> +#define DMATRFCMD_SIZE_256B			(6 << 8)
>> +
>> +#define NV_PVIC_FALCON_DMATRFFBOFFS		0x0000111c
>> +#define DMATRFFBOFFS_OFFS(val)			((val & 0xffffffff) << 0)
>> +
>> +#define NV_PVIC_MISC_PRI_VIC_CG			0x000016d0
>> +#define CG_IDLE_CG_DLY_CNT(val)			((val & 0x3f) << 0)
>> +#define CG_IDLE_CG_EN				(1 << 6)
>> +#define CG_WAKEUP_DLY_CNT(val)			((val & 0xf) << 16)
>> +
>> +
>> +#endif /* TEGRA_VIC_H */
>> diff --git a/include/linux/host1x.h b/include/linux/host1x.h
>> index fc86ced77e76..a006dad00009 100644
>> --- a/include/linux/host1x.h
>> +++ b/include/linux/host1x.h
>> @@ -26,6 +26,7 @@ enum host1x_class {
>>   	HOST1X_CLASS_HOST1X = 0x1,
>>   	HOST1X_CLASS_GR2D = 0x51,
>>   	HOST1X_CLASS_GR2D_SB = 0x52,
>> +	HOST1X_CLASS_VIC = 0x5D,
>>   	HOST1X_CLASS_GR3D = 0x60,
>>   };
>>
>>
Mikko Perttunen May 21, 2015, 3:44 p.m. UTC | #3
On 05/21/2015 06:10 PM, Arto Merilainen wrote:
> ...
>>> +
>>> +    vic->rst = devm_reset_control_get(dev, "vic03");
>>
>> I might prefer just "vic" as the clock/reset name. The name is often
>> used as a sort of "role" for the clock/reset for the device, not
>> necessarily the raw name of the "correct" clock/reset.
>>
>
> I considered that - but I then noticed that
> drivers/clk/tegra/clk-tegra124.c was already using vic03 variant. I can
> write a patch for changing that too.

Well, the two can be different; the clock-name in device tree kind of 
means "string that i use to refer to a clock that powers the VIC unit". 
It's not really a big deal though, both ways are used in DT bindings.

Mikko
Thierry Reding May 22, 2015, 10:02 a.m. UTC | #4
On Thu, May 21, 2015 at 06:44:08PM +0300, Mikko Perttunen wrote:
> On 05/21/2015 06:10 PM, Arto Merilainen wrote:
> >...
> >>>+
> >>>+    vic->rst = devm_reset_control_get(dev, "vic03");
> >>
> >>I might prefer just "vic" as the clock/reset name. The name is often
> >>used as a sort of "role" for the clock/reset for the device, not
> >>necessarily the raw name of the "correct" clock/reset.
> >>
> >
> >I considered that - but I then noticed that
> >drivers/clk/tegra/clk-tegra124.c was already using vic03 variant. I can
> >write a patch for changing that too.
> 
> Well, the two can be different; the clock-name in device tree kind of means
> "string that i use to refer to a clock that powers the VIC unit". It's not
> really a big deal though, both ways are used in DT bindings.

I'll insist on calling this vic in the clock-names property. The 03 is
as far as I can tell an encoding of the version number, so if you want
to call this vic04 in some future version we'll have to needlessly
patch the driver.

Thierry
Arto Merilainen May 22, 2015, 10:12 a.m. UTC | #5
Hi Thierry,

On 05/22/2015 01:02 PM, Thierry Reding wrote:
> * PGP Signed by an unknown key
>
> On Thu, May 21, 2015 at 06:44:08PM +0300, Mikko Perttunen wrote:
>> On 05/21/2015 06:10 PM, Arto Merilainen wrote:
>>> ...
>>>>> +
>>>>> +    vic->rst = devm_reset_control_get(dev, "vic03");
>>>>
>>>> I might prefer just "vic" as the clock/reset name. The name is often
>>>> used as a sort of "role" for the clock/reset for the device, not
>>>> necessarily the raw name of the "correct" clock/reset.
>>>>
>>>
>>> I considered that - but I then noticed that
>>> drivers/clk/tegra/clk-tegra124.c was already using vic03 variant. I can
>>> write a patch for changing that too.
>>
>> Well, the two can be different; the clock-name in device tree kind of means
>> "string that i use to refer to a clock that powers the VIC unit". It's not
>> really a big deal though, both ways are used in DT bindings.
>
> I'll insist on calling this vic in the clock-names property. The 03 is
> as far as I can tell an encoding of the version number, so if you want
> to call this vic04 in some future version we'll have to needlessly
> patch the driver.

I agree, this is better without 03 postfix will fix this in the next 
version.

- Arto
Arto Merilainen May 22, 2015, 10:13 a.m. UTC | #6
Hi Thierry,

On 05/22/2015 01:02 PM, Thierry Reding wrote:
> * PGP Signed by an unknown key
>
> On Thu, May 21, 2015 at 06:44:08PM +0300, Mikko Perttunen wrote:
>> On 05/21/2015 06:10 PM, Arto Merilainen wrote:
>>> ...
>>>>> +
>>>>> +    vic->rst = devm_reset_control_get(dev, "vic03");
>>>>
>>>> I might prefer just "vic" as the clock/reset name. The name is often
>>>> used as a sort of "role" for the clock/reset for the device, not
>>>> necessarily the raw name of the "correct" clock/reset.
>>>>
>>>
>>> I considered that - but I then noticed that
>>> drivers/clk/tegra/clk-tegra124.c was already using vic03 variant. I can
>>> write a patch for changing that too.
>>
>> Well, the two can be different; the clock-name in device tree kind of means
>> "string that i use to refer to a clock that powers the VIC unit". It's not
>> really a big deal though, both ways are used in DT bindings.
>
> I'll insist on calling this vic in the clock-names property. The 03 is
> as far as I can tell an encoding of the version number, so if you want
> to call this vic04 in some future version we'll have to needlessly
> patch the driver.

I agree, this is better without 04 postfix and I will remove it in the 
next version.

- Arto
Thierry Reding May 22, 2015, 10:25 a.m. UTC | #7
On Thu, May 21, 2015 at 05:40:31PM +0300, Mikko Perttunen wrote:
> On 05/21/2015 04:20 PM, Arto Merilainen wrote:
[...]
> > +static int vic_is_addr_reg(struct device *dev, u32 class, u32 offset, u32 val)
> > +{
> > +	struct vic *vic = dev_get_drvdata(dev);
> > +
> > +	/* handle host class */
> > +	if (class == HOST1X_CLASS_HOST1X) {
> > +		if (offset == 0x2b)
> > +			return true;
> > +		return false;
> 
> "return (offset == 0x2b);" perhaps?

I think this should really be extracted into a separate helper. If we
ever need to take into account additional offsets we would otherwise
have to extend every driver rather than just the helper.

Also I think the 0x2b should be replaced by some symbolic name.
According to the TRM 0x2b is the host1x class method named
NV_CLASS_HOST_INDCTRL_0. Oddly enough that doesn't seem to be an address
register. Instead the address seems to be in the INDOFF2 and INDOFF
methods (0x2c and 0x2d). I also can't tell from the TRM what exactly
these are supposed to do.

Arto, can you clarify?

> > +	if (IS_ERR(vic->rst)) {
> > +		dev_err(&pdev->dev, "cannot get reset\n");
> > +		return PTR_ERR(vic->rst);
> > +	}
> > +
> > +	platform_set_drvdata(pdev, vic);
> > +
> > +	INIT_LIST_HEAD(&vic->client.base.list);
> > +	vic->client.base.ops = &vic_client_ops;
> > +	vic->client.base.dev = dev;
> > +	vic->client.base.class = vic_config->class_id;
> > +	vic->client.base.syncpts = syncpts;
> > +	vic->client.base.num_syncpts = 1;
> > +	vic->dev = dev;
> > +	vic->config = vic_config;
> > +
> > +	INIT_LIST_HEAD(&vic->client.list);
> > +	vic->client.ops = &vic_ops;
> > +
> > +	err = tegra_powergate_sequence_power_up(vic->config->powergate_id,
> > +						vic->clk, vic->rst);
> > +	if (err) {
> > +		dev_err(dev, "cannot turn on the device\n");
> > +		return err;
> > +	}
> > +
> > +	err = host1x_client_register(&vic->client.base);
> > +	if (err < 0) {
> 
> You used 'if (err) {' previously, so maybe also here.

For consistency with other Tegra DRM code these checks should use (at
least where possible) the (err < 0) notation.

Thierry
Thierry Reding May 22, 2015, 11:47 a.m. UTC | #8
On Thu, May 21, 2015 at 04:20:24PM +0300, Arto Merilainen wrote:
> This patch adds support for Video Image Compositor engine which
> can be used for 2d operations.
> 
> The engine has a microcontroller (Falcon) that acts as a frontend
> for the rest of the unit. In order to properly utilize the engine,
> the frontend must be booted before pushing any commands.
> 
> Signed-off-by: Andrew Chew <achew@nvidia.com>
> Signed-off-by: Arto Merilainen <amerilainen@nvidia.com>
> ---
>  drivers/gpu/drm/tegra/Makefile |   3 +-
>  drivers/gpu/drm/tegra/drm.c    |   9 +-
>  drivers/gpu/drm/tegra/drm.h    |   1 +
>  drivers/gpu/drm/tegra/vic.c    | 593 +++++++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/tegra/vic.h    | 116 ++++++++
>  include/linux/host1x.h         |   1 +
>  6 files changed, 721 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/tegra/vic.c
>  create mode 100644 drivers/gpu/drm/tegra/vic.h

I just reviewed, thoroughly, similar patches for ChromeOS. Unfortunately
these comments aren't publicly available, so I guess I'll have to
reproduce them here...

> diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
[...]
> +#include <linux/of.h>
> +#include <linux/of_device.h>
> +#include <linux/of_platform.h>
> +#include <linux/clk.h>
> +#include <linux/host1x.h>
> +#include <linux/module.h>
> +#include <linux/firmware.h>
> +#include <linux/platform_device.h>
> +#include <linux/reset.h>
> +#include <soc/tegra/pmc.h>

This include is misplaced. It should go into a separate section, that is
separated from the <linux/*.h> includes above and the "*.h" includes
below by a blank line.

> +#include <linux/iommu.h>
> +
> +#include "drm.h"
> +#include "gem.h"
> +#include "vic.h"
> +
> +#define VIC_IDLE_TIMEOUT_DEFAULT	10000	/* 10 milliseconds */

The _DEFAULT suffix here isn't very useful since you never override the
default. So this is really just the timeout value.

As was mentioned in the ChromeOS review this should also have a _US
suffix to indicate the unit.

> +#define VIC_IDLE_CHECK_PERIOD		10	/* 10 usec */

_US

> +struct vic;
> +
> +struct vic_config {
> +	/* firmware name */
> +	char *ucode_name;

const?

> +	/* class id */
> +	u32 class_id;
> +
> +	/* powergate id */
> +	int powergate_id;

I don't think it's very likely that these will ever change, so please
drop them.

> +struct vic {
> +	struct {
> +		u32 bin_data_offset;
> +		u32 data_offset;
> +		u32 data_size;
> +		u32 code_offset;
> +		u32 size;
> +	} os, fce;

This is unintuitive. At least these should be commented, but it looks
like these binary firmware images have code and data sections (and the
code to upload to either instruction or data memory corroborates that)
so perhaps it'd be better to structure this more explicitly and hence
do away with the prefixes. Also the types should be non-sized since
these don't represent direct register values nor fields in a binary
file. Perhaps something like:

	struct falcon_firmware_section {
		unsigned long offset;
		size_t size;
	};

	struct falcon_firmware {
		struct falcon_firmware_section code;
		struct falcon_firmware_section data;

		dma_addr_t phys;
		void *virt;
		size_t size;
	};

> +
> +	struct tegra_bo *ucode_bo;
> +	bool ucode_valid;
> +	void *ucode_vaddr;
> +
> +	bool booted;

There are a bunch of other drivers that use a Falcon and they will all
need to use similar data to this to deal with the firmware and all. I
would like to see that code to be made into a Falcon library so that it
can be reused in a meaningful way.

Roughly this would look like this:

	struct falcon {
		struct device *dev;
		...
	};

	int falcon_init(struct falcon *falcon, struct device *dev,
			void __iomem *regs);
	int falcon_load_firmware(struct falcon *falcon, const char *filename);
	int falcon_exit(struct falcon *falcon);
	int falcon_boot(struct falcon *falcon);

etc. Drivers that need to boot a Falcon would then use it somewhat like
this:

	struct vic_soc {
		const char *firmware;
	};

	struct vic {
		struct falcon *falcon;
		void __iomem *regs;
		...
	};

	static int vic_init(struct host1x_client *client)
	{
		struct vic *vic;
		...
		err = falcon_boot(&vic->falcon);
		...
	}

	static int vic_probe(struct platform_device *pdev)
	{
		struct falcon_firmware *firmware;
		const struct vic_soc *soc;
		struct vic *vic;
		...
		soc = (const struct vic_soc *)match->data;
		...
		err = falcon_init(&vic->falcon, &pdev->dev,
				  vic->regs + VIC_FALCON_OFFSET);
		..
		err = falcon_load_firmware(&pdev->dev, soc->firmware);
		...
	}

> +	void __iomem *regs;
> +	struct tegra_drm_client client;
> +	struct host1x_channel *channel;
> +	struct iommu_domain *domain;
> +	struct device *dev;
> +	struct clk *clk;
> +	struct reset_control *rst;
> +
> +	/* Platform configuration */
> +	struct vic_config *config;

This should be const.

> +
> +	/* for firewall - this determines if method 1 should be regarded
> +	 * as an address register */
> +	bool method_data_is_addr_reg;
> +};

I think it'd be best to incorporate that functionality into the firewall
so that we can deal with it more centrally rather that duplicate this in
all drivers.

> +static inline struct vic *to_vic(struct tegra_drm_client *client)
> +{
> +	return container_of(client, struct vic, client);
> +}
> +
> +void vic_writel(struct vic *vic, u32 v, u32 r)
> +{
> +	writel(v, vic->regs + r);
> +}
> +
> +u32 vic_readl(struct vic *vic, u32 r)
> +{
> +	return readl(vic->regs + r);
> +}

s/r/offset/, s/v/value/. The offset should also be unsigned long or
unsigned int to make it more difficult to confuse it with a register
value. Both of these functions should also be static. Compiling with
sparse checking enabled (install sparse, compile with C=1) will flag
such cases.

> +static int vic_wait_idle(struct vic *vic)
> +{
> +	u32 timeout = VIC_IDLE_TIMEOUT_DEFAULT;
> +
> +	do {
> +		u32 check = min_t(u32, VIC_IDLE_CHECK_PERIOD, timeout);
> +		u32 w = vic_readl(vic, NV_PVIC_FALCON_IDLESTATE);
> +
> +		if (!w)
> +			return 0;
> +
> +		udelay(VIC_IDLE_CHECK_PERIOD);

Don't even use udelay() unless you are in atomic context. usleep_range()
is the right function to use here.

> +		timeout -= check;
> +	} while (timeout);

Also this loop is not a very precise way of waiting for a timeout
because it will drift. Use an exit condition based on an absolute
timestamp. The helpers in linux/iopoll.h should work well in this
driver.

> +	dev_err(vic->dev, "vic idle timeout");

You should leave error reporting to the caller. Actually, you already do
that, so in case of a timeout we'd actually get two error messages.

> +
> +	return -ETIMEDOUT;
> +}
> +
> +static int vic_dma_wait_idle(struct vic *vic)
> +{
> +	u32 timeout = VIC_IDLE_TIMEOUT_DEFAULT;
> +
> +	do {
> +		u32 check = min_t(u32, VIC_IDLE_CHECK_PERIOD, timeout);
> +		u32 dmatrfcmd = vic_readl(vic, NV_PVIC_FALCON_DMATRFCMD);
> +
> +		if (dmatrfcmd & DMATRFCMD_IDLE)
> +			return 0;
> +
> +		udelay(VIC_IDLE_CHECK_PERIOD);
> +		timeout -= check;
> +	} while (timeout);
> +
> +	dev_err(vic->dev, "dma idle timeout");
> +
> +	return -ETIMEDOUT;
> +}
> +
> +static int vic_dma_pa_to_internal_256b(struct vic *vic, phys_addr_t pa,
> +				       u32 internal_offset, bool imem)

The name is confusing. Without looking at the code I'd expect this to
perform some kind of conversion from a physical address to some internal
address, but if I understand correctly this actually copies code into
the Falcon instruction or data memories. A better name I think would be:

	static int falcon_load_chunk(struct falcon *falcon, phys_addr_t phys,
				     unsigned long offset, enum falcon_memory target);

Note that this is now part of the Falcon library because I've seen
identical code used in several other drivers. Also the bool argument is
now an enumeration, which makes it much easier to read. Compare:

	err = vic_dma_pa_to_internal_256b(vic, phys, offset, true);

and

	err = falcon_load_chunk(&vic->falcon, phys, offset, FALCON_MEMORY_IMEM);

Is there a specific term in Falcon-speak for these 256 byte blocks?
"chunk" is a little awkward.

> +{
> +	u32 cmd = DMATRFCMD_SIZE_256B;
> +
> +	if (imem)
> +		cmd |= DMATRFCMD_IMEM;
> +
> +	vic_writel(vic, DMATRFMOFFS_OFFS(internal_offset),
> +		   NV_PVIC_FALCON_DMATRFMOFFS);
> +	vic_writel(vic, DMATRFFBOFFS_OFFS(pa),
> +		   NV_PVIC_FALCON_DMATRFFBOFFS);
> +	vic_writel(vic, cmd, NV_PVIC_FALCON_DMATRFCMD);
> +
> +	return vic_dma_wait_idle(vic);
> +}
> +
> +static int vic_setup_ucode_image(struct vic *vic,
> +				 const struct firmware *ucode_fw)
> +{
> +	/* image data is little endian. */
> +	u32 *ucode_vaddr = vic->ucode_vaddr;
> +	struct ucode_v1_vic ucode;
> +	int w;

"size_t i" might be better here. "size_t" because that matches
ucode_fw->size (I guess it isn't size_t in this patch, but it really
should be) and "i" because that's a more idiomatic loop variable.

> +
> +	/* copy the whole thing taking into account endianness */
> +	for (w = 0; w < ucode_fw->size / sizeof(u32); w++)
> +		ucode_vaddr[w] = le32_to_cpu(((u32 *)ucode_fw->data)[w]);

Why not make this part of the firmware loading function? Seems like we
always have to do it anyway, so might as well do it right from the
start.

> +	ucode.bin_header = (struct ucode_bin_header_v1_vic *)ucode_vaddr;
> +
> +	/* endian problems would show up right here */
> +	if (ucode.bin_header->bin_magic != 0x10de) {

There's a symbolic name for this magic value: PCI_VENDOR_ID_NVIDIA (see
include/linux/pci_ids.h). I suggest we use that here, even if it's a
little clumsy to use the header in a non-PCI driver.

> +		dev_err(vic->dev, "failed to get firmware magic");
> +		return -EINVAL;
> +	}
> +
> +	if (ucode.bin_header->bin_ver != 1) {
> +		dev_err(vic->dev, "unsupported firmware version");
> +		return -ENOENT;

Why not -EINVAL here, too?

> +	}
> +
> +	/* shouldn't be bigger than what firmware thinks */
> +	if (ucode.bin_header->bin_size > ucode_fw->size) {
> +		dev_err(vic->dev, "ucode image size inconsistency");
> +		return -EINVAL;
> +	}
> +
> +	ucode.os_header = (struct ucode_os_header_v1_vic *)
> +		(((void *)ucode_vaddr) +
> +		ucode.bin_header->os_bin_header_offset);
> +	vic->os.size = ucode.bin_header->os_bin_size;
> +	vic->os.bin_data_offset = ucode.bin_header->os_bin_data_offset;
> +	vic->os.code_offset = ucode.os_header->os_code_offset;
> +	vic->os.data_offset = ucode.os_header->os_data_offset;
> +	vic->os.data_size = ucode.os_header->os_data_size;
> +
> +	ucode.fce_header = (struct ucode_fce_header_v1_vic *)
> +		(((void *)ucode_vaddr) +
> +		ucode.bin_header->fce_bin_header_offset);
> +	vic->fce.size = ucode.fce_header->fce_ucode_size;
> +	vic->fce.data_offset = ucode.bin_header->fce_bin_data_offset;
> +
> +	return 0;
> +}
> +
> +static int vic_read_ucode(struct vic *vic)
> +{
> +	struct host1x_client *client = &vic->client.base;
> +	struct drm_device *dev = dev_get_drvdata(client->parent);
> +	const struct firmware *ucode_fw;
> +	int err;
> +
> +	err = request_firmware(&ucode_fw, vic->config->ucode_name, vic->dev);
> +	if (err) {
> +		dev_err(vic->dev, "failed to get firmware\n");
> +		goto err_request_firmware;
> +	}
> +
> +	vic->ucode_bo = tegra_bo_create(dev, ucode_fw->size, 0);
> +	if (IS_ERR(vic->ucode_bo)) {
> +		dev_err(vic->dev, "dma memory allocation failed");
> +		err = PTR_ERR(vic->ucode_bo);
> +		goto err_alloc_iova;
> +	}

Erm... no. Please don't use tegra_bo_create() to allocate these buffers.
tegra_bo_create() creates GEM objects and firmware doesn't qualify as a
GEM object.

Can't you use the DMA API here?

> +	/* get vaddr for the ucode */
> +	if (!vic->ucode_bo->vaddr)
> +		vic->ucode_vaddr = vmap(vic->ucode_bo->pages,
> +					vic->ucode_bo->num_pages, VM_MAP,
> +					pgprot_writecombine(PAGE_KERNEL));
> +	else
> +		vic->ucode_vaddr = vic->ucode_bo->vaddr;
> +
> +	err = vic_setup_ucode_image(vic, ucode_fw);
> +	if (err) {
> +		dev_err(vic->dev, "failed to parse firmware image\n");
> +		goto err_setup_ucode_image;
> +	}
> +
> +	vic->ucode_valid = true;
> +
> +	release_firmware(ucode_fw);
> +
> +	return 0;
> +
> +err_setup_ucode_image:
> +	drm_gem_object_release(&vic->ucode_bo->gem);
> +err_alloc_iova:
> +	release_firmware(ucode_fw);
> +err_request_firmware:
> +	return err;
> +}
> +
> +static int vic_boot(struct device *dev)
> +{
> +	struct vic *vic = dev_get_drvdata(dev);
> +	u32 offset;
> +	int err = 0;
> +
> +	if (vic->booted)
> +		return 0;
> +
> +	if (!vic->ucode_valid) {
> +		err = vic_read_ucode(vic);
> +		if (err)
> +			return err;
> +	}

I think this is the wrong place to do this. It's good in that it's as
late as possible, but vic_boot() is kind of the wrong place. I think you
should load the firmware and upload it into the Falcon within the
->open_channel() implementation, prior to the vic_boot() call. There is
also no need to reload the firmware every time a VIC channel is opened.
I think loading the firmware could be done in ->init() instead.

> +
> +	/* ensure that the engine is in sane state */
> +	reset_control_assert(vic->rst);
> +	udelay(10);
> +	reset_control_deassert(vic->rst);

This in not called from an atomic context, so should use usleep_range().

> +
> +	/* setup clockgating registers */
> +	vic_writel(vic, CG_IDLE_CG_DLY_CNT(4) |
> +			CG_IDLE_CG_EN |
> +			CG_WAKEUP_DLY_CNT(4),
> +		   NV_PVIC_MISC_PRI_VIC_CG);
> +
> +	/* service all dma requests */
> +	vic_writel(vic, 0, NV_PVIC_FALCON_DMACTL);
> +
> +	/* setup dma base address */
> +	vic_writel(vic, (vic->ucode_bo->paddr + vic->os.bin_data_offset) >> 8,
> +		   NV_PVIC_FALCON_DMATRFBASE);
> +
> +	/* dma ucode data */
> +	for (offset = 0; offset < vic->os.data_size; offset += 256)
> +		vic_dma_pa_to_internal_256b(vic,
> +					     vic->os.data_offset + offset,
> +					     offset, false);
> +
> +	/* dma ucode */
> +	vic_dma_pa_to_internal_256b(vic, vic->os.code_offset, 0, true);

Looks like this could be a separate function that uploads the microcode
to the Falcon. Perhaps as part of moving this to library code this could
be split up more fine-grainedly. Perhaps include a falcon_boot() wrapper
that does all this in the standard sequence. Having the possibility to
call low-level helpers makes this more flexible in case we ever need
extra tweaks in some driver.

> +
> +	/* setup falcon interrupts and enable interface */
> +	vic_writel(vic, IRQMSET_EXT(0xff) |
> +			IRQMSET_SWGEN1_SET |
> +			IRQMSET_SWGEN0_SET |
> +			IRQMSET_EXTERR_SET |
> +			IRQMSET_HALT_SET |
> +			IRQMSET_WDTMR_SET,
> +		   NV_PVIC_FALCON_IRQMSET);
> +	vic_writel(vic, IRQDEST_HOST_EXT(0Xff) |
> +			IRQDEST_HOST_SWGEN1_HOST |
> +			IRQDEST_HOST_SWGEN0_HOST |
> +			IRQDEST_HOST_EXTERR_HOST |
> +			IRQDEST_HOST_HALT_HOST,
> +		    NV_PVIC_FALCON_IRQDEST);
> +
> +	/* enable method and context switch interfaces */
> +	vic_writel(vic, ITFEN_MTHDEN_ENABLE |
> +			ITFEN_CTXEN_ENABLE,
> +		   NV_PVIC_FALCON_ITFEN);
> +
> +	/* boot falcon */
> +	vic_writel(vic, BOOTVEC_VEC(0), NV_PVIC_FALCON_BOOTVEC);
> +	vic_writel(vic, CPUCTL_STARTCPU, NV_PVIC_FALCON_CPUCTL);
> +
> +	err = vic_wait_idle(vic);
> +	if (err != 0) {

err < 0

> +		dev_err(dev, "boot failed due to timeout");
> +		return err;
> +	}
> +
> +	/* Set application id and set-up FCE ucode address */
> +	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_APPLICATION_ID >> 2,
> +		    NV_PVIC_FALCON_METHOD_0);
> +	vic_writel(vic, 1, NV_PVIC_FALCON_METHOD_1);
> +	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_SIZE >> 2,
> +		    NV_PVIC_FALCON_METHOD_0);
> +	vic_writel(vic, vic->fce.size, NV_PVIC_FALCON_METHOD_1);

Looks like this is already a case of device-specific case that's part of
the boot sequence, so it seems like we're going to need these low-level
helpers for VIC already.

> +	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_OFFSET >> 2,
> +		    NV_PVIC_FALCON_METHOD_0);
> +	vic_writel(vic, (vic->ucode_bo->paddr + vic->fce.data_offset) >> 8,
> +		    NV_PVIC_FALCON_METHOD_1);
> +
> +	err = vic_wait_idle(vic);
> +	if (err != 0) {

Here too.

> +		dev_err(dev, "failed to set application id and fce base");
> +		return err;
> +	}
> +
> +	vic->booted = true;
> +
> +	dev_info(dev, "booted");
> +
> +	return 0;
> +}
> +
> +static int vic_init(struct host1x_client *client)
> +{
> +	struct tegra_drm_client *drm = host1x_to_drm_client(client);
> +	struct drm_device *dev = dev_get_drvdata(client->parent);
> +	struct tegra_drm *tegra = dev->dev_private;
> +	struct vic *vic = to_vic(drm);
> +	int err;
> +
> +	if (tegra->domain) {
> +		err = iommu_attach_device(tegra->domain, vic->dev);
> +		if (err < 0) {
> +			dev_err(vic->dev, "failed to attach to domain: %d\n",
> +				err);
> +			return err;
> +		}
> +
> +		vic->domain = tegra->domain;
> +	}
> +
> +	vic->channel = host1x_channel_request(client->dev);
> +	if (!vic->channel)
> +		return -ENOMEM;
> +
> +	client->syncpts[0] = host1x_syncpt_request(client->dev, 0);
> +	if (!client->syncpts[0]) {
> +		host1x_channel_free(vic->channel);
> +		return -ENOMEM;
> +	}
> +
> +	return tegra_drm_register_client(tegra, drm);
> +}

You never detach from the IOMMU domain on failure.

> +
> +static int vic_exit(struct host1x_client *client)
> +{
> +	struct tegra_drm_client *drm = host1x_to_drm_client(client);
> +	struct drm_device *dev = dev_get_drvdata(client->parent);
> +	struct tegra_drm *tegra = dev->dev_private;
> +	struct vic *vic = to_vic(drm);
> +	int err;
> +
> +	err = tegra_drm_unregister_client(tegra, drm);
> +	if (err < 0)
> +		return err;
> +
> +	host1x_syncpt_free(client->syncpts[0]);
> +	host1x_channel_free(vic->channel);
> +
> +	/* ucode is no longer available. release it */

This comment isn't accurate. The microcode won't be available after
you've released it. Perhaps you meant to say "no longer needed"?

> +	if (vic->ucode_valid) {
> +		/* first, ensure that vic is not using it */
> +		reset_control_assert(vic->rst);
> +		udelay(10);
> +		reset_control_deassert(vic->rst);
> +
> +		/* ..then release the ucode */
> +		if (!vic->ucode_bo->vaddr)
> +			vunmap(vic->ucode_vaddr);
> +		drm_gem_object_release(&vic->ucode_bo->gem);
> +		vic->ucode_valid = false;
> +	}

This now also makes the code unbalanced. You allocate the microcode
during ->open_channel(), but free it in ->exit(). The allocation should
happen in ->init() instead if it's freed in ->exit().

> +
> +	if (vic->domain) {
> +		iommu_detach_device(vic->domain, vic->dev);
> +		vic->domain = NULL;
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct host1x_client_ops vic_client_ops = {
> +	.init = vic_init,
> +	.exit = vic_exit,
> +};
> +
> +static int vic_open_channel(struct tegra_drm_client *client,
> +			     struct tegra_drm_context *context)
> +{
> +	struct vic *vic = to_vic(client);
> +	int err;
> +
> +	err = vic_boot(vic->dev);
> +	if (err)
> +		return err;
> +
> +	context->channel = host1x_channel_get(vic->channel);
> +	if (!context->channel)
> +		return -ENOMEM;

It seems like that's correct, but that seems more like a coincidence
rather than anything else. It's weird how we propagate errors from
host1x_pushbuffer_init() to host1x_cdma_init() to host1x_channel_get()
and then return NULL on failure. Perhaps a better way would be to make
host1x_channel_get() return an ERR_PTR()-encoded error code to be more
explicit about the error code. The reason why this makes me
uncomfortable is that if we ever add code that can fail for reasons
other than memory allocation this will be wrong, and chances high that
nobody will remember to update this (given that host1x_channel_get()
does not propagate the error code we wouldn't be able to return anything
accurate anyway).

I think this is fine for now, just something to keep in mind for some
other time.

> +
> +	return 0;
> +}
> +
> +static void vic_close_channel(struct tegra_drm_context *context)
> +{
> +	host1x_channel_put(context->channel);
> +}
> +
> +static int vic_is_addr_reg(struct device *dev, u32 class, u32 offset, u32 val)
> +{
> +	struct vic *vic = dev_get_drvdata(dev);
> +
> +	/* handle host class */
> +	if (class == HOST1X_CLASS_HOST1X) {
> +		if (offset == 0x2b)
> +			return true;
> +		return false;
> +	}
> +
> +	/* write targets towards method 1. check stashed value */
> +	if (offset == NV_PVIC_FALCON_METHOD_1 >> 2)
> +		return vic->method_data_is_addr_reg;
> +
> +	/* write targets to method 0. determine if the method takes an
> +	 * address as a parameter */

This isn't a proper block-style comment.

> +	if (offset == NV_PVIC_FALCON_METHOD_0 >> 2) {
> +		u32 method = val << 2;
> +
> +		if ((method >= 0x400 && method <= 0x5dc) ||
> +		    (method >= 0x720 && method <= 0x738))

We're going to need symbolic names here.

> +			vic->method_data_is_addr_reg = true;
> +		else
> +			vic->method_data_is_addr_reg = false;
> +	}
> +
> +	/* default to false */

I think the code is obvious enough not to warrant this comment.

> +	return false;
> +}
> +
> +static const struct tegra_drm_client_ops vic_ops = {
> +	.open_channel = vic_open_channel,
> +	.close_channel = vic_close_channel,
> +	.is_addr_reg = vic_is_addr_reg,
> +	.submit = tegra_drm_submit,
> +};
> +
> +static const struct vic_config vic_t124_config = {
> +	.ucode_name = "vic03_ucode.bin",
> +	.class_id = HOST1X_CLASS_VIC,
> +	.powergate_id = TEGRA_POWERGATE_VIC,
> +};
> +
> +static const struct of_device_id vic_match[] = {
> +	{ .compatible = "nvidia,tegra124-vic",
> +		.data = &vic_t124_config },

You'd typically indent both of these or have them on one line. That is
either:

	{
		.compatible = ...,
		.data = ...
	},

or

	{ .compatible = ..., .data = ... },

> +	{ },
> +};
> +
> +static int vic_probe(struct platform_device *pdev)
> +{
> +	struct vic_config *vic_config = NULL;

const

> +	struct device *dev = &pdev->dev;
> +	struct host1x_syncpt **syncpts;
> +	struct resource *regs;
> +	struct vic *vic;
> +	int err;
> +
> +	if (dev->of_node) {

This will always be true, so can be dropped.

> +		const struct of_device_id *match;
> +
> +		match = of_match_device(vic_match, dev);
> +		if (match)
> +			vic_config = (struct vic_config *)match->data;
> +		else
> +			return -ENXIO;

This error condition can never happen either. At least not if you update
the driver properly. If you don't you deserve the ensuing crash.

> +	}
> +
> +	vic = devm_kzalloc(dev, sizeof(*vic), GFP_KERNEL);
> +	if (!vic)
> +		return -ENOMEM;
> +
> +	syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
> +	if (!syncpts)
> +		return -ENOMEM;
> +
> +	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	if (!regs) {
> +		dev_err(&pdev->dev, "failed to get registers\n");
> +		return -ENXIO;
> +	}

No need for the error checking here. devm_ioremap_resource() below will
do it for you.

> +
> +	vic->regs = devm_ioremap_resource(dev, regs);
> +	if (IS_ERR(vic->regs))
> +		return PTR_ERR(vic->regs);
> +
> +	vic->clk = devm_clk_get(dev, NULL);
> +	if (IS_ERR(vic->clk)) {
> +		dev_err(&pdev->dev, "failed to get clock\n");
> +		return PTR_ERR(vic->clk);
> +	}
> +
> +	vic->rst = devm_reset_control_get(dev, "vic03");
> +	if (IS_ERR(vic->rst)) {
> +		dev_err(&pdev->dev, "cannot get reset\n");
> +		return PTR_ERR(vic->rst);
> +	}
> +
> +	platform_set_drvdata(pdev, vic);
> +
> +	INIT_LIST_HEAD(&vic->client.base.list);
> +	vic->client.base.ops = &vic_client_ops;
> +	vic->client.base.dev = dev;
> +	vic->client.base.class = vic_config->class_id;
> +	vic->client.base.syncpts = syncpts;
> +	vic->client.base.num_syncpts = 1;
> +	vic->dev = dev;
> +	vic->config = vic_config;
> +
> +	INIT_LIST_HEAD(&vic->client.list);
> +	vic->client.ops = &vic_ops;
> +
> +	err = tegra_powergate_sequence_power_up(vic->config->powergate_id,
> +						vic->clk, vic->rst);
> +	if (err) {
> +		dev_err(dev, "cannot turn on the device\n");
> +		return err;
> +	}
> +
> +	err = host1x_client_register(&vic->client.base);
> +	if (err < 0) {
> +		dev_err(dev, "failed to register host1x client: %d\n", err);
> +		clk_disable_unprepare(vic->clk);
> +		tegra_powergate_power_off(vic->config->powergate_id);

tegra_powergate_sequence_power_up() also deasserts the reset, so you
probably want to assert that here again. Maybe to make it easier you
could abstract this away into a vic_enable()/vic_disable() pair of
functions? Or perhaps you could even use runtime PM for this? Don't
worry about runtime PM if that complicates things too much, though.

> +		platform_set_drvdata(pdev, NULL);
> +		return err;
> +	}
> +
> +	dev_info(&pdev->dev, "initialized");
> +
> +	return 0;
> +}
> +
> +static int vic_remove(struct platform_device *pdev)
> +{
> +	struct vic *vic = platform_get_drvdata(pdev);
> +	int err;
> +
> +	err = host1x_client_unregister(&vic->client.base);
> +	if (err < 0) {
> +		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
> +			err);
> +		return err;
> +	}
> +
> +	clk_disable_unprepare(vic->clk);
> +	tegra_powergate_power_off(vic->config->powergate_id);

This supports the suggestion to introduce a separate function for this.

> +
> +	return 0;
> +}
> +
> +struct platform_driver tegra_vic_driver = {
> +	.driver = {
> +		.name = "tegra-vic",
> +		.of_match_table = vic_match,
> +	},
> +	.probe = vic_probe,
> +	.remove = vic_remove,
> +};
> diff --git a/drivers/gpu/drm/tegra/vic.h b/drivers/gpu/drm/tegra/vic.h
> new file mode 100644
> index 000000000000..65ca38a8da88
> --- /dev/null
> +++ b/drivers/gpu/drm/tegra/vic.h
> @@ -0,0 +1,116 @@
> +/*
> + * Copyright (c) 2015, NVIDIA Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef TEGRA_VIC_H
> +#define TEGRA_VIC_H
> +
> +#include <linux/types.h>
> +#include <linux/dma-attrs.h>
> +#include <linux/firmware.h>
> +#include <linux/platform_device.h>
> +
> +struct ucode_bin_header_v1_vic {
> +	u32 bin_magic;        /* 0x10de */
> +	u32 bin_ver;          /* cya, versioning of bin format (1) */
> +	u32 bin_size;         /* entire image size including this header */
> +	u32 os_bin_header_offset;
> +	u32 os_bin_data_offset;
> +	u32 os_bin_size;
> +	u32 fce_bin_header_offset;
> +	u32 fce_bin_data_offset;
> +	u32 fce_bin_size;
> +};
> +
> +struct ucode_os_code_header_v1_vic {
> +	u32 offset;
> +	u32 size;
> +};
> +
> +struct ucode_os_header_v1_vic {
> +	u32 os_code_offset;
> +	u32 os_code_size;
> +	u32 os_data_offset;
> +	u32 os_data_size;
> +	u32 num_apps;
> +	struct ucode_os_code_header_v1_vic *app_code;
> +	struct ucode_os_code_header_v1_vic *app_data;
> +	u32 *os_ovl_offset;
> +	u32 *of_ovl_size;
> +};
> +
> +struct ucode_fce_header_v1_vic {
> +	u32 fce_ucode_offset;
> +	u32 fce_ucode_buffer_size;
> +	u32 fce_ucode_size;
> +};
> +
> +struct ucode_v1_vic {
> +	struct ucode_bin_header_v1_vic *bin_header;
> +	struct ucode_os_header_v1_vic  *os_header;
> +	struct ucode_fce_header_v1_vic *fce_header;
> +};

I'll assume that these are data structures shared by all other drivers
for Falcon driven engines, so they should probably go into the Falcon
library header as well.

> +
> +/* VIC methods */
> +#define NVA0B6_VIDEO_COMPOSITOR_SET_APPLICATION_ID	0x00000200
> +#define NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_SIZE	0x0000071C
> +#define NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_OFFSET	0x0000072C
> +
> +/* VIC registers */
> +
> +#define NV_PVIC_FALCON_METHOD_0			0x00000040
> +#define NV_PVIC_FALCON_METHOD_1			0x00000044
> +
> +#define NV_PVIC_FALCON_IRQMSET			0x00001010

I don't see this documented in the TRM.

> +#define IRQMSET_WDTMR_SET			(1 << 1)
> +#define IRQMSET_HALT_SET			(1 << 4)
> +#define IRQMSET_EXTERR_SET			(1 << 5)
> +#define IRQMSET_SWGEN0_SET			(1 << 6)
> +#define IRQMSET_SWGEN1_SET			(1 << 7)
> +#define IRQMSET_EXT(val)			((val & 0xff) << 8)

You'll need to add extra parentheses around "val" to protect against
operator precedence screwing this up.

> +
> +#define NV_PVIC_FALCON_IRQDEST			0x0000101c
> +#define IRQDEST_HOST_HALT_HOST			(1 << 4)
> +#define IRQDEST_HOST_EXTERR_HOST		(1 << 5)
> +#define IRQDEST_HOST_SWGEN0_HOST		(1 << 6)
> +#define IRQDEST_HOST_SWGEN1_HOST		(1 << 7)
> +#define IRQDEST_HOST_EXT(val)			((val & 0xff) << 8)

This isn't documented either.

> +
> +#define NV_PVIC_FALCON_ITFEN			0x00001048
> +#define ITFEN_CTXEN_ENABLE			(1 << 0)
> +#define ITFEN_MTHDEN_ENABLE			(1 << 1)

This is...

> +#define NV_PVIC_FALCON_IDLESTATE		0x0000104c

... but this again isn't.

> +
> +#define NV_PVIC_FALCON_CPUCTL			0x00001100
> +#define CPUCTL_STARTCPU				(1 << 1)
> +
> +#define NV_PVIC_FALCON_BOOTVEC			0x00001104
> +#define BOOTVEC_VEC(val)			((val & 0xffffffff) << 0)
> +
> +#define NV_PVIC_FALCON_DMACTL			0x0000110c
> +
> +#define NV_PVIC_FALCON_DMATRFBASE		0x00001110
> +
> +#define NV_PVIC_FALCON_DMATRFMOFFS		0x00001114
> +#define DMATRFMOFFS_OFFS(val)			((val & 0xffff) << 0)
> +
> +#define NV_PVIC_FALCON_DMATRFCMD		0x00001118
> +#define DMATRFCMD_IDLE				(1 << 1)
> +#define DMATRFCMD_IMEM				(1 << 4)
> +#define DMATRFCMD_SIZE_256B			(6 << 8)
> +
> +#define NV_PVIC_FALCON_DMATRFFBOFFS		0x0000111c
> +#define DMATRFFBOFFS_OFFS(val)			((val & 0xffffffff) << 0)
> +
> +#define NV_PVIC_MISC_PRI_VIC_CG			0x000016d0
> +#define CG_IDLE_CG_DLY_CNT(val)			((val & 0x3f) << 0)
> +#define CG_IDLE_CG_EN				(1 << 6)
> +#define CG_WAKEUP_DLY_CNT(val)			((val & 0xf) << 16)

These aren't in the TRM either, but I vaguely remember this being
tracked in an internal bug. Have bugs been filed to track documentation
of the other registers as well?

> +
> +

Gratuituous blank line.

> +#endif /* TEGRA_VIC_H */
> diff --git a/include/linux/host1x.h b/include/linux/host1x.h
> index fc86ced77e76..a006dad00009 100644
> --- a/include/linux/host1x.h
> +++ b/include/linux/host1x.h
> @@ -26,6 +26,7 @@ enum host1x_class {
>  	HOST1X_CLASS_HOST1X = 0x1,
>  	HOST1X_CLASS_GR2D = 0x51,
>  	HOST1X_CLASS_GR2D_SB = 0x52,
> +	HOST1X_CLASS_VIC = 0x5D,
>  	HOST1X_CLASS_GR3D = 0x60,
>  };

Thierry
Arto Merilainen May 25, 2015, 7:11 a.m. UTC | #9
On 05/22/2015 01:25 PM, Thierry Reding wrote:
> * PGP Signed by an unknown key
>
> On Thu, May 21, 2015 at 05:40:31PM +0300, Mikko Perttunen wrote:
>> On 05/21/2015 04:20 PM, Arto Merilainen wrote:
> [...]
>>> +static int vic_is_addr_reg(struct device *dev, u32 class, u32 offset, u32 val)
>>> +{
>>> +	struct vic *vic = dev_get_drvdata(dev);
>>> +
>>> +	/* handle host class */
>>> +	if (class == HOST1X_CLASS_HOST1X) {
>>> +		if (offset == 0x2b)
>>> +			return true;
>>> +		return false;
>>
>> "return (offset == 0x2b);" perhaps?
>
> I think this should really be extracted into a separate helper. If we
> ever need to take into account additional offsets we would otherwise
> have to extend every driver rather than just the helper.

I agree, that would be better.

>
> Also I think the 0x2b should be replaced by some symbolic name.
> According to the TRM 0x2b is the host1x class method named
> NV_CLASS_HOST_INDCTRL_0. Oddly enough that doesn't seem to be an address
> register. Instead the address seems to be in the INDOFF2 and INDOFF
> methods (0x2c and 0x2d). I also can't tell from the TRM what exactly
> these are supposed to do.
>
> Arto, can you clarify?

This looks like an unfortunate mistake that got reproduced from gr2d and 
gr3d.

The INDCTRL method is used for indirect register accessing and it allows 
Host1x to read registers of an engine - or write data directly to 
memory. It allow implementing context switch for the clients whose state 
should be not change between jobs from the same application.

>
>>> +	if (IS_ERR(vic->rst)) {
>>> +		dev_err(&pdev->dev, "cannot get reset\n");
>>> +		return PTR_ERR(vic->rst);
>>> +	}
>>> +
>>> +	platform_set_drvdata(pdev, vic);
>>> +
>>> +	INIT_LIST_HEAD(&vic->client.base.list);
>>> +	vic->client.base.ops = &vic_client_ops;
>>> +	vic->client.base.dev = dev;
>>> +	vic->client.base.class = vic_config->class_id;
>>> +	vic->client.base.syncpts = syncpts;
>>> +	vic->client.base.num_syncpts = 1;
>>> +	vic->dev = dev;
>>> +	vic->config = vic_config;
>>> +
>>> +	INIT_LIST_HEAD(&vic->client.list);
>>> +	vic->client.ops = &vic_ops;
>>> +
>>> +	err = tegra_powergate_sequence_power_up(vic->config->powergate_id,
>>> +						vic->clk, vic->rst);
>>> +	if (err) {
>>> +		dev_err(dev, "cannot turn on the device\n");
>>> +		return err;
>>> +	}
>>> +
>>> +	err = host1x_client_register(&vic->client.base);
>>> +	if (err < 0) {
>>
>> You used 'if (err) {' previously, so maybe also here.
>
> For consistency with other Tegra DRM code these checks should use (at
> least where possible) the (err < 0) notation.
>

Will fix.

- Arto
Arto Merilainen May 25, 2015, 8:25 a.m. UTC | #10
Hi Thierry,

Thank you for your thorough analysis - and sorry for a bunch of very 
silly mistakes.

I am skipping most trivial parts and focus on the trickier comments and 
questions.

On 05/22/2015 02:47 PM, Thierry Reding wrote:
>> +
>> +	struct tegra_bo *ucode_bo;
>> +	bool ucode_valid;
>> +	void *ucode_vaddr;
>> +
>> +	bool booted;
>
> There are a bunch of other drivers that use a Falcon and they will all
> need to use similar data to this to deal with the firmware and all. I
> would like to see that code to be made into a Falcon library so that it
> can be reused in a meaningful way.
>
> Roughly this would look like this:
>
> 	struct falcon {
> 		struct device *dev;
> 		...
> 	};
>
> 	int falcon_init(struct falcon *falcon, struct device *dev,
> 			void __iomem *regs);
> 	int falcon_load_firmware(struct falcon *falcon, const char *filename);
> 	int falcon_exit(struct falcon *falcon);
> 	int falcon_boot(struct falcon *falcon);
>

There are two issues in above scheme..:
- Memory allocation. Despite I have explicitly mentioned that the series 
has been tested only with iommu disabled, I would prefer trying to keep 
an option to use it later. Depending how well we want to isolate the 
falcon library from other parts of tegradrm, the library may not have 
ability to map anything to the tegradrm iommu domain.
- The firmware images may not hold only Falcon firmware. Already in VIC 
case we have two firmwares: One for Falcon, another for FCE.

To overcome the above issues, I would prefer dropping 
falcon_load_firmware() and keeping firmware image specifics inside the 
client driver and giving data related to Falcon as a parameter for 
falcon_boot(). Would this be ok?

>> +
>> +	/* for firewall - this determines if method 1 should be regarded
>> +	 * as an address register */
>> +	bool method_data_is_addr_reg;
>> +};
>
> I think it'd be best to incorporate that functionality into the firewall
> so that we can deal with it more centrally rather that duplicate this in
> all drivers.
>

Do you have a concrete suggestion how this should be done? Firewall has 
no access to the driver specifics and in VIC case the VIC methods 
themselves define whether the METHOD1 includes address or not.

>> +static int vic_dma_pa_to_internal_256b(struct vic *vic, phys_addr_t pa,
>> +				       u32 internal_offset, bool imem)
>
> The name is confusing. Without looking at the code I'd expect this to
> perform some kind of conversion from a physical address to some internal
> address, but if I understand correctly this actually copies code into
> the Falcon instruction or data memories. A better name I think would be:
>
> 	static int falcon_load_chunk(struct falcon *falcon, phys_addr_t phys,
> 				     unsigned long offset, enum falcon_memory target);
>
> Note that this is now part of the Falcon library because I've seen
> identical code used in several other drivers. Also the bool argument is
> now an enumeration, which makes it much easier to read. Compare:
>
> 	err = vic_dma_pa_to_internal_256b(vic, phys, offset, true);
>
> and
>
> 	err = falcon_load_chunk(&vic->falcon, phys, offset, FALCON_MEMORY_IMEM);
>

Sounds ok.

> Is there a specific term in Falcon-speak for these 256 byte blocks?
> "chunk" is a little awkward.
>

Unfortunately I am not aware that there would be - maybe "block"?

>> +	if (ucode.bin_header->bin_ver != 1) {
>> +		dev_err(vic->dev, "unsupported firmware version");
>> +		return -ENOENT;
>
> Why not -EINVAL here, too?
>

We can interpret the issue two ways..: The given firmware is invalid 
(-EINVAL) or that there was no supported firmware entry available (-ENOENT).

I do not have strong opinions and will change this to -EINVAL.

>> +	vic->ucode_bo = tegra_bo_create(dev, ucode_fw->size, 0);
>> +	if (IS_ERR(vic->ucode_bo)) {
>> +		dev_err(vic->dev, "dma memory allocation failed");
>> +		err = PTR_ERR(vic->ucode_bo);
>> +		goto err_alloc_iova;
>> +	}
>
> Erm... no. Please don't use tegra_bo_create() to allocate these buffers.
> tegra_bo_create() creates GEM objects and firmware doesn't qualify as a
> GEM object.
>
> Can't you use the DMA API here?
>

The firmware must be mapped to the IOMMU domain into which VIC is 
attached - and I would prefer keeping the door open for enabling iommu 
on VIC. This was the simplest way to get a buffer that is allocated to 
the tegradrm iommu domain.

Should I add a function for allocating memory without making a gem 
object or should I keep memory allocation here and simply add functions 
for mapping it into tegradrm domain?

>> +static int vic_boot(struct device *dev)
>> +{
>> +	struct vic *vic = dev_get_drvdata(dev);
>> +	u32 offset;
>> +	int err = 0;
>> +
>> +	if (vic->booted)
>> +		return 0;
>> +
>> +	if (!vic->ucode_valid) {
>> +		err = vic_read_ucode(vic);
>> +		if (err)
>> +			return err;
>> +	}
>
> I think this is the wrong place to do this. It's good in that it's as
> late as possible, but vic_boot() is kind of the wrong place. I think you
> should load the firmware and upload it into the Falcon within the
> ->open_channel() implementation, prior to the vic_boot() call. There is
> also no need to reload the firmware every time a VIC channel is opened.
> I think loading the firmware could be done in ->init() instead.
>

I'd prefer pushing this simply inside ->open_channel().

Please correct me if I am wrong here, but ->init() is usually called 
before rootfs has been mounted and the firmware may not be available at 
that point => initialization fails

>> +		dev_err(dev, "boot failed due to timeout");
>> +		return err;
>> +	}
>> +
>> +	/* Set application id and set-up FCE ucode address */
>> +	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_APPLICATION_ID >> 2,
>> +		    NV_PVIC_FALCON_METHOD_0);
>> +	vic_writel(vic, 1, NV_PVIC_FALCON_METHOD_1);
>> +	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_SIZE >> 2,
>> +		    NV_PVIC_FALCON_METHOD_0);
>> +	vic_writel(vic, vic->fce.size, NV_PVIC_FALCON_METHOD_1);
>
> Looks like this is already a case of device-specific case that's part of
> the boot sequence, so it seems like we're going to need these low-level
> helpers for VIC already.
>

Correct; Everything related to FCE is VIC specific.

>> +static int vic_exit(struct host1x_client *client)
>> +{
>> +	struct tegra_drm_client *drm = host1x_to_drm_client(client);
>> +	struct drm_device *dev = dev_get_drvdata(client->parent);
>> +	struct tegra_drm *tegra = dev->dev_private;
>> +	struct vic *vic = to_vic(drm);
>> +	int err;
>> +
>> +	err = tegra_drm_unregister_client(tegra, drm);
>> +	if (err < 0)
>> +		return err;
>> +
>> +	host1x_syncpt_free(client->syncpts[0]);
>> +	host1x_channel_free(vic->channel);
>> +
>> +	/* ucode is no longer available. release it */
>
> This comment isn't accurate. The microcode won't be available after
> you've released it. Perhaps you meant to say "no longer needed"?
>

Sounds better.

>> +	if (vic->ucode_valid) {
>> +		/* first, ensure that vic is not using it */
>> +		reset_control_assert(vic->rst);
>> +		udelay(10);
>> +		reset_control_deassert(vic->rst);
>> +
>> +		/* ..then release the ucode */
>> +		if (!vic->ucode_bo->vaddr)
>> +			vunmap(vic->ucode_vaddr);
>> +		drm_gem_object_release(&vic->ucode_bo->gem);
>> +		vic->ucode_valid = false;
>> +	}
>
> This now also makes the code unbalanced. You allocate the microcode
> during ->open_channel(), but free it in ->exit(). The allocation should
> happen in ->init() instead if it's freed in ->exit().
>

Please refer to my previous comment for ->init() allocation.

We can rework ->close_channel() to reset the engine and release the 
firmware, however, firmware reading and booting is not free so I would 
prefer keeping this in ->exit().

>> +	err = tegra_powergate_sequence_power_up(vic->config->powergate_id,
>> +						vic->clk, vic->rst);
>> +	if (err) {
>> +		dev_err(dev, "cannot turn on the device\n");
>> +		return err;
>> +	}
>> +
>> +	err = host1x_client_register(&vic->client.base);
>> +	if (err < 0) {
>> +		dev_err(dev, "failed to register host1x client: %d\n", err);
>> +		clk_disable_unprepare(vic->clk);
>> +		tegra_powergate_power_off(vic->config->powergate_id);
>
> tegra_powergate_sequence_power_up() also deasserts the reset, so you
> probably want to assert that here again. Maybe to make it easier you
> could abstract this away into a vic_enable()/vic_disable() pair of
> functions? Or perhaps you could even use runtime PM for this? Don't
> worry about runtime PM if that complicates things too much, though.
>

Sounds good. I planned to introduce PM runtime support a bit later but I 
can check if I can fit to this patch already.

>> +struct ucode_bin_header_v1_vic {
>> +	u32 bin_magic;        /* 0x10de */
>> +	u32 bin_ver;          /* cya, versioning of bin format (1) */
>> +	u32 bin_size;         /* entire image size including this header */
>> +	u32 os_bin_header_offset;
>> +	u32 os_bin_data_offset;
>> +	u32 os_bin_size;
>> +	u32 fce_bin_header_offset;
>> +	u32 fce_bin_data_offset;
>> +	u32 fce_bin_size;
>> +};
>> +
>> +struct ucode_os_code_header_v1_vic {
>> +	u32 offset;
>> +	u32 size;
>> +};
>> +
>> +struct ucode_os_header_v1_vic {
>> +	u32 os_code_offset;
>> +	u32 os_code_size;
>> +	u32 os_data_offset;
>> +	u32 os_data_size;
>> +	u32 num_apps;
>> +	struct ucode_os_code_header_v1_vic *app_code;
>> +	struct ucode_os_code_header_v1_vic *app_data;
>> +	u32 *os_ovl_offset;
>> +	u32 *of_ovl_size;
>> +};
>> +
>> +struct ucode_fce_header_v1_vic {
>> +	u32 fce_ucode_offset;
>> +	u32 fce_ucode_buffer_size;
>> +	u32 fce_ucode_size;
>> +};
>> +
>> +struct ucode_v1_vic {
>> +	struct ucode_bin_header_v1_vic *bin_header;
>> +	struct ucode_os_header_v1_vic  *os_header;
>> +	struct ucode_fce_header_v1_vic *fce_header;
>> +};
>
> I'll assume that these are data structures shared by all other drivers
> for Falcon driven engines, so they should probably go into the Falcon
> library header as well.
>

ucode_os_header_v1_vic is the only common structure above. Please see my 
earlier comment for the falcon library suggestion.

>> +#define NV_PVIC_MISC_PRI_VIC_CG			0x000016d0
>> +#define CG_IDLE_CG_DLY_CNT(val)			((val & 0x3f) << 0)
>> +#define CG_IDLE_CG_EN				(1 << 6)
>> +#define CG_WAKEUP_DLY_CNT(val)			((val & 0xf) << 16)
>
> These aren't in the TRM either, but I vaguely remember this being
> tracked in an internal bug. Have bugs been filed to track documentation
> of the other registers as well?
>

Yes, there is a bug for tracking that this register - and three others 
you mentioned - get documented in TRM.

- Arto
diff mbox

Patch

diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index 2c66a8db9da4..3bc3566e00b6 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -13,6 +13,7 @@  tegra-drm-y := \
 	sor.o \
 	dpaux.o \
 	gr2d.o \
-	gr3d.o
+	gr3d.o \
+	vic.o
 
 obj-$(CONFIG_DRM_TEGRA) += tegra-drm.o
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index bfad15a913a0..d947f5f4d801 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1,6 +1,6 @@ 
 /*
  * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012-2013 NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (C) 2012-2015 NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1048,6 +1048,7 @@  static const struct of_device_id host1x_drm_subdevs[] = {
 	{ .compatible = "nvidia,tegra124-dc", },
 	{ .compatible = "nvidia,tegra124-sor", },
 	{ .compatible = "nvidia,tegra124-hdmi", },
+	{ .compatible = "nvidia,tegra124-vic", },
 	{ /* sentinel */ }
 };
 
@@ -1097,8 +1098,14 @@  static int __init host1x_drm_init(void)
 	if (err < 0)
 		goto unregister_gr2d;
 
+	err = platform_driver_register(&tegra_vic_driver);
+	if (err < 0)
+		goto unregister_gr3d;
+
 	return 0;
 
+unregister_gr3d:
+	platform_driver_unregister(&tegra_gr3d_driver);
 unregister_gr2d:
 	platform_driver_unregister(&tegra_gr2d_driver);
 unregister_dpaux:
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 0e7756e720c5..a9c02a80d6bf 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -275,5 +275,6 @@  extern struct platform_driver tegra_hdmi_driver;
 extern struct platform_driver tegra_dpaux_driver;
 extern struct platform_driver tegra_gr2d_driver;
 extern struct platform_driver tegra_gr3d_driver;
+extern struct platform_driver tegra_vic_driver;
 
 #endif /* HOST1X_DRM_H */
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
new file mode 100644
index 000000000000..b7c5a96697ed
--- /dev/null
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -0,0 +1,593 @@ 
+/*
+ * Copyright (c) 2015, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/clk.h>
+#include <linux/host1x.h>
+#include <linux/module.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <soc/tegra/pmc.h>
+#include <linux/iommu.h>
+
+#include "drm.h"
+#include "gem.h"
+#include "vic.h"
+
+#define VIC_IDLE_TIMEOUT_DEFAULT	10000	/* 10 milliseconds */
+#define VIC_IDLE_CHECK_PERIOD		10	/* 10 usec */
+
+struct vic;
+
+struct vic_config {
+	/* firmware name */
+	char *ucode_name;
+
+	/* class id */
+	u32 class_id;
+
+	/* powergate id */
+	int powergate_id;
+};
+
+struct vic {
+	struct {
+		u32 bin_data_offset;
+		u32 data_offset;
+		u32 data_size;
+		u32 code_offset;
+		u32 size;
+	} os, fce;
+
+	struct tegra_bo *ucode_bo;
+	bool ucode_valid;
+	void *ucode_vaddr;
+
+	bool booted;
+
+	void __iomem *regs;
+	struct tegra_drm_client client;
+	struct host1x_channel *channel;
+	struct iommu_domain *domain;
+	struct device *dev;
+	struct clk *clk;
+	struct reset_control *rst;
+
+	/* Platform configuration */
+	struct vic_config *config;
+
+	/* for firewall - this determines if method 1 should be regarded
+	 * as an address register */
+	bool method_data_is_addr_reg;
+};
+
+static inline struct vic *to_vic(struct tegra_drm_client *client)
+{
+	return container_of(client, struct vic, client);
+}
+
+void vic_writel(struct vic *vic, u32 v, u32 r)
+{
+	writel(v, vic->regs + r);
+}
+
+u32 vic_readl(struct vic *vic, u32 r)
+{
+	return readl(vic->regs + r);
+}
+
+static int vic_wait_idle(struct vic *vic)
+{
+	u32 timeout = VIC_IDLE_TIMEOUT_DEFAULT;
+
+	do {
+		u32 check = min_t(u32, VIC_IDLE_CHECK_PERIOD, timeout);
+		u32 w = vic_readl(vic, NV_PVIC_FALCON_IDLESTATE);
+
+		if (!w)
+			return 0;
+
+		udelay(VIC_IDLE_CHECK_PERIOD);
+		timeout -= check;
+	} while (timeout);
+
+	dev_err(vic->dev, "vic idle timeout");
+
+	return -ETIMEDOUT;
+}
+
+static int vic_dma_wait_idle(struct vic *vic)
+{
+	u32 timeout = VIC_IDLE_TIMEOUT_DEFAULT;
+
+	do {
+		u32 check = min_t(u32, VIC_IDLE_CHECK_PERIOD, timeout);
+		u32 dmatrfcmd = vic_readl(vic, NV_PVIC_FALCON_DMATRFCMD);
+
+		if (dmatrfcmd & DMATRFCMD_IDLE)
+			return 0;
+
+		udelay(VIC_IDLE_CHECK_PERIOD);
+		timeout -= check;
+	} while (timeout);
+
+	dev_err(vic->dev, "dma idle timeout");
+
+	return -ETIMEDOUT;
+}
+
+static int vic_dma_pa_to_internal_256b(struct vic *vic, phys_addr_t pa,
+				       u32 internal_offset, bool imem)
+{
+	u32 cmd = DMATRFCMD_SIZE_256B;
+
+	if (imem)
+		cmd |= DMATRFCMD_IMEM;
+
+	vic_writel(vic, DMATRFMOFFS_OFFS(internal_offset),
+		   NV_PVIC_FALCON_DMATRFMOFFS);
+	vic_writel(vic, DMATRFFBOFFS_OFFS(pa),
+		   NV_PVIC_FALCON_DMATRFFBOFFS);
+	vic_writel(vic, cmd, NV_PVIC_FALCON_DMATRFCMD);
+
+	return vic_dma_wait_idle(vic);
+}
+
+static int vic_setup_ucode_image(struct vic *vic,
+				 const struct firmware *ucode_fw)
+{
+	/* image data is little endian. */
+	u32 *ucode_vaddr = vic->ucode_vaddr;
+	struct ucode_v1_vic ucode;
+	int w;
+
+	/* copy the whole thing taking into account endianness */
+	for (w = 0; w < ucode_fw->size / sizeof(u32); w++)
+		ucode_vaddr[w] = le32_to_cpu(((u32 *)ucode_fw->data)[w]);
+
+	ucode.bin_header = (struct ucode_bin_header_v1_vic *)ucode_vaddr;
+
+	/* endian problems would show up right here */
+	if (ucode.bin_header->bin_magic != 0x10de) {
+		dev_err(vic->dev, "failed to get firmware magic");
+		return -EINVAL;
+	}
+
+	if (ucode.bin_header->bin_ver != 1) {
+		dev_err(vic->dev, "unsupported firmware version");
+		return -ENOENT;
+	}
+
+	/* shouldn't be bigger than what firmware thinks */
+	if (ucode.bin_header->bin_size > ucode_fw->size) {
+		dev_err(vic->dev, "ucode image size inconsistency");
+		return -EINVAL;
+	}
+
+	ucode.os_header = (struct ucode_os_header_v1_vic *)
+		(((void *)ucode_vaddr) +
+		ucode.bin_header->os_bin_header_offset);
+	vic->os.size = ucode.bin_header->os_bin_size;
+	vic->os.bin_data_offset = ucode.bin_header->os_bin_data_offset;
+	vic->os.code_offset = ucode.os_header->os_code_offset;
+	vic->os.data_offset = ucode.os_header->os_data_offset;
+	vic->os.data_size = ucode.os_header->os_data_size;
+
+	ucode.fce_header = (struct ucode_fce_header_v1_vic *)
+		(((void *)ucode_vaddr) +
+		ucode.bin_header->fce_bin_header_offset);
+	vic->fce.size = ucode.fce_header->fce_ucode_size;
+	vic->fce.data_offset = ucode.bin_header->fce_bin_data_offset;
+
+	return 0;
+}
+
+static int vic_read_ucode(struct vic *vic)
+{
+	struct host1x_client *client = &vic->client.base;
+	struct drm_device *dev = dev_get_drvdata(client->parent);
+	const struct firmware *ucode_fw;
+	int err;
+
+	err = request_firmware(&ucode_fw, vic->config->ucode_name, vic->dev);
+	if (err) {
+		dev_err(vic->dev, "failed to get firmware\n");
+		goto err_request_firmware;
+	}
+
+	vic->ucode_bo = tegra_bo_create(dev, ucode_fw->size, 0);
+	if (IS_ERR(vic->ucode_bo)) {
+		dev_err(vic->dev, "dma memory allocation failed");
+		err = PTR_ERR(vic->ucode_bo);
+		goto err_alloc_iova;
+	}
+
+	/* get vaddr for the ucode */
+	if (!vic->ucode_bo->vaddr)
+		vic->ucode_vaddr = vmap(vic->ucode_bo->pages,
+					vic->ucode_bo->num_pages, VM_MAP,
+					pgprot_writecombine(PAGE_KERNEL));
+	else
+		vic->ucode_vaddr = vic->ucode_bo->vaddr;
+
+	err = vic_setup_ucode_image(vic, ucode_fw);
+	if (err) {
+		dev_err(vic->dev, "failed to parse firmware image\n");
+		goto err_setup_ucode_image;
+	}
+
+	vic->ucode_valid = true;
+
+	release_firmware(ucode_fw);
+
+	return 0;
+
+err_setup_ucode_image:
+	drm_gem_object_release(&vic->ucode_bo->gem);
+err_alloc_iova:
+	release_firmware(ucode_fw);
+err_request_firmware:
+	return err;
+}
+
+static int vic_boot(struct device *dev)
+{
+	struct vic *vic = dev_get_drvdata(dev);
+	u32 offset;
+	int err = 0;
+
+	if (vic->booted)
+		return 0;
+
+	if (!vic->ucode_valid) {
+		err = vic_read_ucode(vic);
+		if (err)
+			return err;
+	}
+
+	/* ensure that the engine is in sane state */
+	reset_control_assert(vic->rst);
+	udelay(10);
+	reset_control_deassert(vic->rst);
+
+	/* setup clockgating registers */
+	vic_writel(vic, CG_IDLE_CG_DLY_CNT(4) |
+			CG_IDLE_CG_EN |
+			CG_WAKEUP_DLY_CNT(4),
+		   NV_PVIC_MISC_PRI_VIC_CG);
+
+	/* service all dma requests */
+	vic_writel(vic, 0, NV_PVIC_FALCON_DMACTL);
+
+	/* setup dma base address */
+	vic_writel(vic, (vic->ucode_bo->paddr + vic->os.bin_data_offset) >> 8,
+		   NV_PVIC_FALCON_DMATRFBASE);
+
+	/* dma ucode data */
+	for (offset = 0; offset < vic->os.data_size; offset += 256)
+		vic_dma_pa_to_internal_256b(vic,
+					     vic->os.data_offset + offset,
+					     offset, false);
+
+	/* dma ucode */
+	vic_dma_pa_to_internal_256b(vic, vic->os.code_offset, 0, true);
+
+	/* setup falcon interrupts and enable interface */
+	vic_writel(vic, IRQMSET_EXT(0xff) |
+			IRQMSET_SWGEN1_SET |
+			IRQMSET_SWGEN0_SET |
+			IRQMSET_EXTERR_SET |
+			IRQMSET_HALT_SET |
+			IRQMSET_WDTMR_SET,
+		   NV_PVIC_FALCON_IRQMSET);
+	vic_writel(vic, IRQDEST_HOST_EXT(0Xff) |
+			IRQDEST_HOST_SWGEN1_HOST |
+			IRQDEST_HOST_SWGEN0_HOST |
+			IRQDEST_HOST_EXTERR_HOST |
+			IRQDEST_HOST_HALT_HOST,
+		    NV_PVIC_FALCON_IRQDEST);
+
+	/* enable method and context switch interfaces */
+	vic_writel(vic, ITFEN_MTHDEN_ENABLE |
+			ITFEN_CTXEN_ENABLE,
+		   NV_PVIC_FALCON_ITFEN);
+
+	/* boot falcon */
+	vic_writel(vic, BOOTVEC_VEC(0), NV_PVIC_FALCON_BOOTVEC);
+	vic_writel(vic, CPUCTL_STARTCPU, NV_PVIC_FALCON_CPUCTL);
+
+	err = vic_wait_idle(vic);
+	if (err != 0) {
+		dev_err(dev, "boot failed due to timeout");
+		return err;
+	}
+
+	/* Set application id and set-up FCE ucode address */
+	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_APPLICATION_ID >> 2,
+		    NV_PVIC_FALCON_METHOD_0);
+	vic_writel(vic, 1, NV_PVIC_FALCON_METHOD_1);
+	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_SIZE >> 2,
+		    NV_PVIC_FALCON_METHOD_0);
+	vic_writel(vic, vic->fce.size, NV_PVIC_FALCON_METHOD_1);
+	vic_writel(vic, NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_OFFSET >> 2,
+		    NV_PVIC_FALCON_METHOD_0);
+	vic_writel(vic, (vic->ucode_bo->paddr + vic->fce.data_offset) >> 8,
+		    NV_PVIC_FALCON_METHOD_1);
+
+	err = vic_wait_idle(vic);
+	if (err != 0) {
+		dev_err(dev, "failed to set application id and fce base");
+		return err;
+	}
+
+	vic->booted = true;
+
+	dev_info(dev, "booted");
+
+	return 0;
+}
+
+static int vic_init(struct host1x_client *client)
+{
+	struct tegra_drm_client *drm = host1x_to_drm_client(client);
+	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct tegra_drm *tegra = dev->dev_private;
+	struct vic *vic = to_vic(drm);
+	int err;
+
+	if (tegra->domain) {
+		err = iommu_attach_device(tegra->domain, vic->dev);
+		if (err < 0) {
+			dev_err(vic->dev, "failed to attach to domain: %d\n",
+				err);
+			return err;
+		}
+
+		vic->domain = tegra->domain;
+	}
+
+	vic->channel = host1x_channel_request(client->dev);
+	if (!vic->channel)
+		return -ENOMEM;
+
+	client->syncpts[0] = host1x_syncpt_request(client->dev, 0);
+	if (!client->syncpts[0]) {
+		host1x_channel_free(vic->channel);
+		return -ENOMEM;
+	}
+
+	return tegra_drm_register_client(tegra, drm);
+}
+
+static int vic_exit(struct host1x_client *client)
+{
+	struct tegra_drm_client *drm = host1x_to_drm_client(client);
+	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct tegra_drm *tegra = dev->dev_private;
+	struct vic *vic = to_vic(drm);
+	int err;
+
+	err = tegra_drm_unregister_client(tegra, drm);
+	if (err < 0)
+		return err;
+
+	host1x_syncpt_free(client->syncpts[0]);
+	host1x_channel_free(vic->channel);
+
+	/* ucode is no longer available. release it */
+	if (vic->ucode_valid) {
+		/* first, ensure that vic is not using it */
+		reset_control_assert(vic->rst);
+		udelay(10);
+		reset_control_deassert(vic->rst);
+
+		/* ..then release the ucode */
+		if (!vic->ucode_bo->vaddr)
+			vunmap(vic->ucode_vaddr);
+		drm_gem_object_release(&vic->ucode_bo->gem);
+		vic->ucode_valid = false;
+	}
+
+	if (vic->domain) {
+		iommu_detach_device(vic->domain, vic->dev);
+		vic->domain = NULL;
+	}
+
+	return 0;
+}
+
+static const struct host1x_client_ops vic_client_ops = {
+	.init = vic_init,
+	.exit = vic_exit,
+};
+
+static int vic_open_channel(struct tegra_drm_client *client,
+			     struct tegra_drm_context *context)
+{
+	struct vic *vic = to_vic(client);
+	int err;
+
+	err = vic_boot(vic->dev);
+	if (err)
+		return err;
+
+	context->channel = host1x_channel_get(vic->channel);
+	if (!context->channel)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void vic_close_channel(struct tegra_drm_context *context)
+{
+	host1x_channel_put(context->channel);
+}
+
+static int vic_is_addr_reg(struct device *dev, u32 class, u32 offset, u32 val)
+{
+	struct vic *vic = dev_get_drvdata(dev);
+
+	/* handle host class */
+	if (class == HOST1X_CLASS_HOST1X) {
+		if (offset == 0x2b)
+			return true;
+		return false;
+	}
+
+	/* write targets towards method 1. check stashed value */
+	if (offset == NV_PVIC_FALCON_METHOD_1 >> 2)
+		return vic->method_data_is_addr_reg;
+
+	/* write targets to method 0. determine if the method takes an
+	 * address as a parameter */
+	if (offset == NV_PVIC_FALCON_METHOD_0 >> 2) {
+		u32 method = val << 2;
+
+		if ((method >= 0x400 && method <= 0x5dc) ||
+		    (method >= 0x720 && method <= 0x738))
+			vic->method_data_is_addr_reg = true;
+		else
+			vic->method_data_is_addr_reg = false;
+	}
+
+	/* default to false */
+	return false;
+}
+
+static const struct tegra_drm_client_ops vic_ops = {
+	.open_channel = vic_open_channel,
+	.close_channel = vic_close_channel,
+	.is_addr_reg = vic_is_addr_reg,
+	.submit = tegra_drm_submit,
+};
+
+static const struct vic_config vic_t124_config = {
+	.ucode_name = "vic03_ucode.bin",
+	.class_id = HOST1X_CLASS_VIC,
+	.powergate_id = TEGRA_POWERGATE_VIC,
+};
+
+static const struct of_device_id vic_match[] = {
+	{ .compatible = "nvidia,tegra124-vic",
+		.data = &vic_t124_config },
+	{ },
+};
+
+static int vic_probe(struct platform_device *pdev)
+{
+	struct vic_config *vic_config = NULL;
+	struct device *dev = &pdev->dev;
+	struct host1x_syncpt **syncpts;
+	struct resource *regs;
+	struct vic *vic;
+	int err;
+
+	if (dev->of_node) {
+		const struct of_device_id *match;
+
+		match = of_match_device(vic_match, dev);
+		if (match)
+			vic_config = (struct vic_config *)match->data;
+		else
+			return -ENXIO;
+	}
+
+	vic = devm_kzalloc(dev, sizeof(*vic), GFP_KERNEL);
+	if (!vic)
+		return -ENOMEM;
+
+	syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
+	if (!syncpts)
+		return -ENOMEM;
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs) {
+		dev_err(&pdev->dev, "failed to get registers\n");
+		return -ENXIO;
+	}
+
+	vic->regs = devm_ioremap_resource(dev, regs);
+	if (IS_ERR(vic->regs))
+		return PTR_ERR(vic->regs);
+
+	vic->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(vic->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		return PTR_ERR(vic->clk);
+	}
+
+	vic->rst = devm_reset_control_get(dev, "vic03");
+	if (IS_ERR(vic->rst)) {
+		dev_err(&pdev->dev, "cannot get reset\n");
+		return PTR_ERR(vic->rst);
+	}
+
+	platform_set_drvdata(pdev, vic);
+
+	INIT_LIST_HEAD(&vic->client.base.list);
+	vic->client.base.ops = &vic_client_ops;
+	vic->client.base.dev = dev;
+	vic->client.base.class = vic_config->class_id;
+	vic->client.base.syncpts = syncpts;
+	vic->client.base.num_syncpts = 1;
+	vic->dev = dev;
+	vic->config = vic_config;
+
+	INIT_LIST_HEAD(&vic->client.list);
+	vic->client.ops = &vic_ops;
+
+	err = tegra_powergate_sequence_power_up(vic->config->powergate_id,
+						vic->clk, vic->rst);
+	if (err) {
+		dev_err(dev, "cannot turn on the device\n");
+		return err;
+	}
+
+	err = host1x_client_register(&vic->client.base);
+	if (err < 0) {
+		dev_err(dev, "failed to register host1x client: %d\n", err);
+		clk_disable_unprepare(vic->clk);
+		tegra_powergate_power_off(vic->config->powergate_id);
+		platform_set_drvdata(pdev, NULL);
+		return err;
+	}
+
+	dev_info(&pdev->dev, "initialized");
+
+	return 0;
+}
+
+static int vic_remove(struct platform_device *pdev)
+{
+	struct vic *vic = platform_get_drvdata(pdev);
+	int err;
+
+	err = host1x_client_unregister(&vic->client.base);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
+			err);
+		return err;
+	}
+
+	clk_disable_unprepare(vic->clk);
+	tegra_powergate_power_off(vic->config->powergate_id);
+
+	return 0;
+}
+
+struct platform_driver tegra_vic_driver = {
+	.driver = {
+		.name = "tegra-vic",
+		.of_match_table = vic_match,
+	},
+	.probe = vic_probe,
+	.remove = vic_remove,
+};
diff --git a/drivers/gpu/drm/tegra/vic.h b/drivers/gpu/drm/tegra/vic.h
new file mode 100644
index 000000000000..65ca38a8da88
--- /dev/null
+++ b/drivers/gpu/drm/tegra/vic.h
@@ -0,0 +1,116 @@ 
+/*
+ * Copyright (c) 2015, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef TEGRA_VIC_H
+#define TEGRA_VIC_H
+
+#include <linux/types.h>
+#include <linux/dma-attrs.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+
+struct ucode_bin_header_v1_vic {
+	u32 bin_magic;        /* 0x10de */
+	u32 bin_ver;          /* cya, versioning of bin format (1) */
+	u32 bin_size;         /* entire image size including this header */
+	u32 os_bin_header_offset;
+	u32 os_bin_data_offset;
+	u32 os_bin_size;
+	u32 fce_bin_header_offset;
+	u32 fce_bin_data_offset;
+	u32 fce_bin_size;
+};
+
+struct ucode_os_code_header_v1_vic {
+	u32 offset;
+	u32 size;
+};
+
+struct ucode_os_header_v1_vic {
+	u32 os_code_offset;
+	u32 os_code_size;
+	u32 os_data_offset;
+	u32 os_data_size;
+	u32 num_apps;
+	struct ucode_os_code_header_v1_vic *app_code;
+	struct ucode_os_code_header_v1_vic *app_data;
+	u32 *os_ovl_offset;
+	u32 *of_ovl_size;
+};
+
+struct ucode_fce_header_v1_vic {
+	u32 fce_ucode_offset;
+	u32 fce_ucode_buffer_size;
+	u32 fce_ucode_size;
+};
+
+struct ucode_v1_vic {
+	struct ucode_bin_header_v1_vic *bin_header;
+	struct ucode_os_header_v1_vic  *os_header;
+	struct ucode_fce_header_v1_vic *fce_header;
+};
+
+/* VIC methods */
+#define NVA0B6_VIDEO_COMPOSITOR_SET_APPLICATION_ID	0x00000200
+#define NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_SIZE	0x0000071C
+#define NVA0B6_VIDEO_COMPOSITOR_SET_FCE_UCODE_OFFSET	0x0000072C
+
+/* VIC registers */
+
+#define NV_PVIC_FALCON_METHOD_0			0x00000040
+#define NV_PVIC_FALCON_METHOD_1			0x00000044
+
+#define NV_PVIC_FALCON_IRQMSET			0x00001010
+#define IRQMSET_WDTMR_SET			(1 << 1)
+#define IRQMSET_HALT_SET			(1 << 4)
+#define IRQMSET_EXTERR_SET			(1 << 5)
+#define IRQMSET_SWGEN0_SET			(1 << 6)
+#define IRQMSET_SWGEN1_SET			(1 << 7)
+#define IRQMSET_EXT(val)			((val & 0xff) << 8)
+
+#define NV_PVIC_FALCON_IRQDEST			0x0000101c
+#define IRQDEST_HOST_HALT_HOST			(1 << 4)
+#define IRQDEST_HOST_EXTERR_HOST		(1 << 5)
+#define IRQDEST_HOST_SWGEN0_HOST		(1 << 6)
+#define IRQDEST_HOST_SWGEN1_HOST		(1 << 7)
+#define IRQDEST_HOST_EXT(val)			((val & 0xff) << 8)
+
+#define NV_PVIC_FALCON_ITFEN			0x00001048
+#define ITFEN_CTXEN_ENABLE			(1 << 0)
+#define ITFEN_MTHDEN_ENABLE			(1 << 1)
+
+#define NV_PVIC_FALCON_IDLESTATE		0x0000104c
+
+#define NV_PVIC_FALCON_CPUCTL			0x00001100
+#define CPUCTL_STARTCPU				(1 << 1)
+
+#define NV_PVIC_FALCON_BOOTVEC			0x00001104
+#define BOOTVEC_VEC(val)			((val & 0xffffffff) << 0)
+
+#define NV_PVIC_FALCON_DMACTL			0x0000110c
+
+#define NV_PVIC_FALCON_DMATRFBASE		0x00001110
+
+#define NV_PVIC_FALCON_DMATRFMOFFS		0x00001114
+#define DMATRFMOFFS_OFFS(val)			((val & 0xffff) << 0)
+
+#define NV_PVIC_FALCON_DMATRFCMD		0x00001118
+#define DMATRFCMD_IDLE				(1 << 1)
+#define DMATRFCMD_IMEM				(1 << 4)
+#define DMATRFCMD_SIZE_256B			(6 << 8)
+
+#define NV_PVIC_FALCON_DMATRFFBOFFS		0x0000111c
+#define DMATRFFBOFFS_OFFS(val)			((val & 0xffffffff) << 0)
+
+#define NV_PVIC_MISC_PRI_VIC_CG			0x000016d0
+#define CG_IDLE_CG_DLY_CNT(val)			((val & 0x3f) << 0)
+#define CG_IDLE_CG_EN				(1 << 6)
+#define CG_WAKEUP_DLY_CNT(val)			((val & 0xf) << 16)
+
+
+#endif /* TEGRA_VIC_H */
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index fc86ced77e76..a006dad00009 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -26,6 +26,7 @@  enum host1x_class {
 	HOST1X_CLASS_HOST1X = 0x1,
 	HOST1X_CLASS_GR2D = 0x51,
 	HOST1X_CLASS_GR2D_SB = 0x52,
+	HOST1X_CLASS_VIC = 0x5D,
 	HOST1X_CLASS_GR3D = 0x60,
 };