diff mbox series

[v3,2/2] drm/lima: driver for ARM Mali4xx GPUs

Message ID 20190227134124.28602-1-yuq825@gmail.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Qiang Yu Feb. 27, 2019, 1:41 p.m. UTC
- Mali 4xx GPUs have two kinds of processors GP and PP. GP is for
  OpenGL vertex shader processing and PP is for fragment shader
  processing. Each processor has its own MMU so prcessors work in
  virtual address space.
- There's only one GP but multiple PP (max 4 for mali 400 and 8
  for mali 450) in the same mali 4xx GPU. All PPs are grouped
  togather to handle a single fragment shader task divided by
  FB output tiled pixels. Mali 400 user space driver is
  responsible for assign target tiled pixels to each PP, but mali
  450 has a HW module called DLBU to dynamically balance each
  PP's load.
- User space driver allocate buffer object and map into GPU
  virtual address space, upload command stream and draw data with
  CPU mmap of the buffer object, then submit task to GP/PP with
  a register frame indicating where is the command stream and misc
  settings.
- There's no command stream validation/relocation due to each user
  process has its own GPU virtual address space. GP/PP's MMU switch
  virtual address space before running two tasks from different
  user process. Error or evil user space code just get MMU fault
  or GP/PP error IRQ, then the HW/SW will be recovered.
- Use GEM+shmem for MM. Currently just alloc and pin memory when
  gem object creation. GPU vm map of the buffer is also done in
  the alloc stage in kernel space. We may delay the memory
  allocation and real GPU vm map to command submition stage in the
  furture as improvement.
- Use drm_sched for GPU task schedule. Each OpenGL context should
  have a lima context object in the kernel to distinguish tasks
  from different user. drm_sched gets task from each lima context
  in a fair way.

v3:
- fix comments from kbuild robot
- restrict supported arch to tested ones

v2:
- fix syscall argument check
- fix job finish fence leak since kernel 5.0
- use drm syncobj to replace native fence
- move buffer object GPU va map into kernel
- reserve syscall argument space for future info
- remove kernel gem modifier
- switch TTM back to GEM+shmem MM
- use time based io poll
- use whole register name
- adopt gem reservation obj integration
- use drm_timeout_abs_to_jiffies

Cc: Eric Anholt <eric@anholt.net>
Cc: Rob Herring <robh@kernel.org>
Cc: Christian König <ckoenig.leichtzumerken@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Andreas Baierl <ichgeh@imkreisrum.de>
Signed-off-by: Erico Nunes <nunes.erico@gmail.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Simon Shields <simon@lineageos.org>
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
---
 drivers/gpu/drm/Kconfig               |   2 +
 drivers/gpu/drm/Makefile              |   1 +
 drivers/gpu/drm/lima/Kconfig          |  10 +
 drivers/gpu/drm/lima/Makefile         |  21 ++
 drivers/gpu/drm/lima/lima_bcast.c     |  46 +++
 drivers/gpu/drm/lima/lima_bcast.h     |  14 +
 drivers/gpu/drm/lima/lima_ctx.c       | 105 +++++++
 drivers/gpu/drm/lima/lima_ctx.h       |  30 ++
 drivers/gpu/drm/lima/lima_device.c    | 376 +++++++++++++++++++++++
 drivers/gpu/drm/lima/lima_device.h    | 129 ++++++++
 drivers/gpu/drm/lima/lima_dlbu.c      |  56 ++++
 drivers/gpu/drm/lima/lima_dlbu.h      |  18 ++
 drivers/gpu/drm/lima/lima_drv.c       | 353 +++++++++++++++++++++
 drivers/gpu/drm/lima/lima_drv.h       |  46 +++
 drivers/gpu/drm/lima/lima_gem.c       | 379 +++++++++++++++++++++++
 drivers/gpu/drm/lima/lima_gem.h       |  25 ++
 drivers/gpu/drm/lima/lima_gem_prime.c |  47 +++
 drivers/gpu/drm/lima/lima_gem_prime.h |  13 +
 drivers/gpu/drm/lima/lima_gp.c        | 282 +++++++++++++++++
 drivers/gpu/drm/lima/lima_gp.h        |  16 +
 drivers/gpu/drm/lima/lima_l2_cache.c  |  80 +++++
 drivers/gpu/drm/lima/lima_l2_cache.h  |  14 +
 drivers/gpu/drm/lima/lima_mmu.c       | 142 +++++++++
 drivers/gpu/drm/lima/lima_mmu.h       |  16 +
 drivers/gpu/drm/lima/lima_object.c    | 124 ++++++++
 drivers/gpu/drm/lima/lima_object.h    |  36 +++
 drivers/gpu/drm/lima/lima_pmu.c       |  59 ++++
 drivers/gpu/drm/lima/lima_pmu.h       |  12 +
 drivers/gpu/drm/lima/lima_pp.c        | 423 ++++++++++++++++++++++++++
 drivers/gpu/drm/lima/lima_pp.h        |  19 ++
 drivers/gpu/drm/lima/lima_regs.h      | 298 ++++++++++++++++++
 drivers/gpu/drm/lima/lima_sched.c     | 398 ++++++++++++++++++++++++
 drivers/gpu/drm/lima/lima_sched.h     | 104 +++++++
 drivers/gpu/drm/lima/lima_vm.c        | 280 +++++++++++++++++
 drivers/gpu/drm/lima/lima_vm.h        |  62 ++++
 include/uapi/drm/lima_drm.h           | 126 ++++++++
 36 files changed, 4162 insertions(+)
 create mode 100644 drivers/gpu/drm/lima/Kconfig
 create mode 100644 drivers/gpu/drm/lima/Makefile
 create mode 100644 drivers/gpu/drm/lima/lima_bcast.c
 create mode 100644 drivers/gpu/drm/lima/lima_bcast.h
 create mode 100644 drivers/gpu/drm/lima/lima_ctx.c
 create mode 100644 drivers/gpu/drm/lima/lima_ctx.h
 create mode 100644 drivers/gpu/drm/lima/lima_device.c
 create mode 100644 drivers/gpu/drm/lima/lima_device.h
 create mode 100644 drivers/gpu/drm/lima/lima_dlbu.c
 create mode 100644 drivers/gpu/drm/lima/lima_dlbu.h
 create mode 100644 drivers/gpu/drm/lima/lima_drv.c
 create mode 100644 drivers/gpu/drm/lima/lima_drv.h
 create mode 100644 drivers/gpu/drm/lima/lima_gem.c
 create mode 100644 drivers/gpu/drm/lima/lima_gem.h
 create mode 100644 drivers/gpu/drm/lima/lima_gem_prime.c
 create mode 100644 drivers/gpu/drm/lima/lima_gem_prime.h
 create mode 100644 drivers/gpu/drm/lima/lima_gp.c
 create mode 100644 drivers/gpu/drm/lima/lima_gp.h
 create mode 100644 drivers/gpu/drm/lima/lima_l2_cache.c
 create mode 100644 drivers/gpu/drm/lima/lima_l2_cache.h
 create mode 100644 drivers/gpu/drm/lima/lima_mmu.c
 create mode 100644 drivers/gpu/drm/lima/lima_mmu.h
 create mode 100644 drivers/gpu/drm/lima/lima_object.c
 create mode 100644 drivers/gpu/drm/lima/lima_object.h
 create mode 100644 drivers/gpu/drm/lima/lima_pmu.c
 create mode 100644 drivers/gpu/drm/lima/lima_pmu.h
 create mode 100644 drivers/gpu/drm/lima/lima_pp.c
 create mode 100644 drivers/gpu/drm/lima/lima_pp.h
 create mode 100644 drivers/gpu/drm/lima/lima_regs.h
 create mode 100644 drivers/gpu/drm/lima/lima_sched.c
 create mode 100644 drivers/gpu/drm/lima/lima_sched.h
 create mode 100644 drivers/gpu/drm/lima/lima_vm.c
 create mode 100644 drivers/gpu/drm/lima/lima_vm.h
 create mode 100644 include/uapi/drm/lima_drm.h

Comments

Sam Ravnborg Feb. 27, 2019, 8:29 p.m. UTC | #1
Hi Qiang.

Two general comments:
- We are trying to avoid drmP.h in new drivers, please drop
- The use of idr is being replaced with XArray, see patch-set
  posted by Matthew Wilcox.
  Try to use XArray so we do not introduce a new user.

This was from a very quick looks at the driver.
In general things looked good, but these two issues
I stumbled upon.

	Sam
Rob Herring (Arm) Feb. 27, 2019, 9:41 p.m. UTC | #2
On Wed, Feb 27, 2019 at 7:42 AM Qiang Yu <yuq825@gmail.com> wrote:
>

Looks pretty good. A few small things and some questions hopefully
some others can answer.

> - Mali 4xx GPUs have two kinds of processors GP and PP. GP is for
>   OpenGL vertex shader processing and PP is for fragment shader
>   processing. Each processor has its own MMU so prcessors work in
>   virtual address space.
> - There's only one GP but multiple PP (max 4 for mali 400 and 8
>   for mali 450) in the same mali 4xx GPU. All PPs are grouped
>   togather to handle a single fragment shader task divided by
>   FB output tiled pixels. Mali 400 user space driver is
>   responsible for assign target tiled pixels to each PP, but mali
>   450 has a HW module called DLBU to dynamically balance each
>   PP's load.
> - User space driver allocate buffer object and map into GPU
>   virtual address space, upload command stream and draw data with
>   CPU mmap of the buffer object, then submit task to GP/PP with
>   a register frame indicating where is the command stream and misc
>   settings.
> - There's no command stream validation/relocation due to each user
>   process has its own GPU virtual address space. GP/PP's MMU switch
>   virtual address space before running two tasks from different
>   user process. Error or evil user space code just get MMU fault
>   or GP/PP error IRQ, then the HW/SW will be recovered.
> - Use GEM+shmem for MM. Currently just alloc and pin memory when
>   gem object creation. GPU vm map of the buffer is also done in
>   the alloc stage in kernel space. We may delay the memory
>   allocation and real GPU vm map to command submition stage in the
>   furture as improvement.
> - Use drm_sched for GPU task schedule. Each OpenGL context should
>   have a lima context object in the kernel to distinguish tasks
>   from different user. drm_sched gets task from each lima context
>   in a fair way.
>
> v3:
> - fix comments from kbuild robot
> - restrict supported arch to tested ones
>
> v2:
> - fix syscall argument check
> - fix job finish fence leak since kernel 5.0
> - use drm syncobj to replace native fence
> - move buffer object GPU va map into kernel
> - reserve syscall argument space for future info
> - remove kernel gem modifier
> - switch TTM back to GEM+shmem MM
> - use time based io poll
> - use whole register name
> - adopt gem reservation obj integration
> - use drm_timeout_abs_to_jiffies
>
> Cc: Eric Anholt <eric@anholt.net>
> Cc: Rob Herring <robh@kernel.org>
> Cc: Christian König <ckoenig.leichtzumerken@gmail.com>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Cc: Alex Deucher <alexdeucher@gmail.com>
> Signed-off-by: Andreas Baierl <ichgeh@imkreisrum.de>
> Signed-off-by: Erico Nunes <nunes.erico@gmail.com>
> Signed-off-by: Heiko Stuebner <heiko@sntech.de>
> Signed-off-by: Marek Vasut <marex@denx.de>
> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
> Signed-off-by: Simon Shields <simon@lineageos.org>
> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
> Signed-off-by: Qiang Yu <yuq825@gmail.com>
> ---

> diff --git a/drivers/gpu/drm/lima/lima_bcast.c b/drivers/gpu/drm/lima/lima_bcast.c
> new file mode 100644
> index 000000000000..398e6d604426
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_bcast.c
> @@ -0,0 +1,46 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */

It's 2019 now.

> +
> +#include <linux/io.h>
> +#include <linux/device.h>
> +
> +#include "lima_device.h"
> +#include "lima_bcast.h"
> +#include "lima_regs.h"
> +
> +#define bcast_write(reg, data) writel(data, ip->iomem + reg)
> +#define bcast_read(reg) readl(ip->iomem + reg)
> +
> +void lima_bcast_enable(struct lima_device *dev, int num_pp)
> +{
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> +       struct lima_ip *ip = dev->ip + lima_ip_bcast;
> +       int i, mask = bcast_read(LIMA_BCAST_BROADCAST_MASK) & 0xffff0000;
> +
> +       for (i = 0; i < num_pp; i++) {
> +               struct lima_ip *pp = pipe->processor[i];
> +               mask |= 1 << (pp->id - lima_ip_pp0);
> +       }
> +
> +       bcast_write(LIMA_BCAST_BROADCAST_MASK, mask);
> +}
> +
> +int lima_bcast_init(struct lima_ip *ip)
> +{
> +       int i, mask = 0;
> +
> +       for (i = lima_ip_pp0; i <= lima_ip_pp7; i++) {
> +               if (ip->dev->ip[i].present)
> +                       mask |= 1 << (i - lima_ip_pp0);
> +       }
> +
> +       bcast_write(LIMA_BCAST_BROADCAST_MASK, mask << 16);
> +       bcast_write(LIMA_BCAST_INTERRUPT_MASK, mask);
> +       return 0;
> +}
> +
> +void lima_bcast_fini(struct lima_ip *ip)
> +{
> +
> +}
> +
> diff --git a/drivers/gpu/drm/lima/lima_bcast.h b/drivers/gpu/drm/lima/lima_bcast.h
> new file mode 100644
> index 000000000000..345e3e809860
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_bcast.h
> @@ -0,0 +1,14 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_BCAST_H__
> +#define __LIMA_BCAST_H__
> +
> +struct lima_ip;
> +
> +int lima_bcast_init(struct lima_ip *ip);
> +void lima_bcast_fini(struct lima_ip *ip);
> +
> +void lima_bcast_enable(struct lima_device *dev, int num_pp);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_ctx.c b/drivers/gpu/drm/lima/lima_ctx.c
> new file mode 100644
> index 000000000000..439cb44d7a0d
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_ctx.c
> @@ -0,0 +1,105 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <linux/slab.h>
> +
> +#include "lima_device.h"
> +#include "lima_ctx.h"
> +
> +int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id)
> +{
> +       struct lima_ctx *ctx;
> +       int i, err;
> +
> +       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> +       if (!ctx)
> +               return -ENOMEM;
> +       ctx->dev = dev;
> +       kref_init(&ctx->refcnt);
> +
> +       for (i = 0; i < lima_pipe_num; i++) {
> +               err = lima_sched_context_init(dev->pipe + i, ctx->context + i, &ctx->guilty);
> +               if (err)
> +                       goto err_out0;
> +       }
> +
> +       idr_preload(GFP_KERNEL);
> +       spin_lock(&mgr->lock);
> +       err = idr_alloc(&mgr->handles, ctx, 1, 0, GFP_ATOMIC);
> +       spin_unlock(&mgr->lock);
> +       idr_preload_end();
> +       if (err < 0)
> +               goto err_out0;
> +
> +       *id = err;
> +       return 0;
> +
> +err_out0:
> +       for (i--; i >= 0; i--)
> +               lima_sched_context_fini(dev->pipe + i, ctx->context + i);
> +       kfree(ctx);
> +       return err;
> +}
> +
> +static void lima_ctx_do_release(struct kref *ref)
> +{
> +       struct lima_ctx *ctx = container_of(ref, struct lima_ctx, refcnt);
> +       int i;
> +
> +       for (i = 0; i < lima_pipe_num; i++)
> +               lima_sched_context_fini(ctx->dev->pipe + i, ctx->context + i);
> +       kfree(ctx);
> +}
> +
> +int lima_ctx_free(struct lima_ctx_mgr *mgr, u32 id)
> +{
> +       struct lima_ctx *ctx;
> +
> +       spin_lock(&mgr->lock);
> +       ctx = idr_remove(&mgr->handles, id);
> +       spin_unlock(&mgr->lock);
> +
> +       if (ctx) {
> +               kref_put(&ctx->refcnt, lima_ctx_do_release);
> +               return 0;
> +       }
> +       return -EINVAL;
> +}
> +
> +struct lima_ctx *lima_ctx_get(struct lima_ctx_mgr *mgr, u32 id)
> +{
> +       struct lima_ctx *ctx;
> +
> +       spin_lock(&mgr->lock);
> +       ctx = idr_find(&mgr->handles, id);
> +       if (ctx)
> +               kref_get(&ctx->refcnt);
> +       spin_unlock(&mgr->lock);
> +       return ctx;
> +}
> +
> +void lima_ctx_put(struct lima_ctx *ctx)
> +{
> +       kref_put(&ctx->refcnt, lima_ctx_do_release);
> +}
> +
> +void lima_ctx_mgr_init(struct lima_ctx_mgr *mgr)
> +{
> +       spin_lock_init(&mgr->lock);
> +       idr_init(&mgr->handles);
> +}
> +
> +void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr)
> +{
> +       struct lima_ctx *ctx;
> +       struct idr *idp;
> +       uint32_t id;
> +
> +       idp = &mgr->handles;
> +
> +       idr_for_each_entry(idp, ctx, id) {
> +               kref_put(&ctx->refcnt, lima_ctx_do_release);
> +       }
> +
> +       idr_destroy(&mgr->handles);
> +}
> diff --git a/drivers/gpu/drm/lima/lima_ctx.h b/drivers/gpu/drm/lima/lima_ctx.h
> new file mode 100644
> index 000000000000..2d32ff9b30ad
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_ctx.h
> @@ -0,0 +1,30 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_CTX_H__
> +#define __LIMA_CTX_H__
> +
> +#include <linux/idr.h>
> +
> +#include "lima_device.h"
> +
> +struct lima_ctx {
> +       struct kref refcnt;
> +       struct lima_device *dev;
> +       struct lima_sched_context context[lima_pipe_num];
> +       atomic_t guilty;
> +};
> +
> +struct lima_ctx_mgr {
> +       spinlock_t lock;
> +       struct idr handles;
> +};
> +
> +int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id);
> +int lima_ctx_free(struct lima_ctx_mgr *mgr, u32 id);
> +struct lima_ctx *lima_ctx_get(struct lima_ctx_mgr *mgr, u32 id);
> +void lima_ctx_put(struct lima_ctx *ctx);
> +void lima_ctx_mgr_init(struct lima_ctx_mgr *mgr);
> +void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c
> new file mode 100644
> index 000000000000..2e137a0baddb
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_device.c
> @@ -0,0 +1,376 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <linux/regulator/consumer.h>
> +#include <linux/reset.h>
> +#include <linux/clk.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/platform_device.h>
> +
> +#include "lima_device.h"
> +#include "lima_gp.h"
> +#include "lima_pp.h"
> +#include "lima_mmu.h"
> +#include "lima_pmu.h"
> +#include "lima_l2_cache.h"
> +#include "lima_dlbu.h"
> +#include "lima_bcast.h"
> +#include "lima_vm.h"
> +
> +struct lima_ip_desc {
> +       char *name;
> +       char *irq_name;
> +       bool must_have[lima_gpu_num];
> +       int offset[lima_gpu_num];
> +
> +       int (*init)(struct lima_ip *);
> +       void (*fini)(struct lima_ip *);
> +};
> +
> +#define LIMA_IP_DESC(ipname, mst0, mst1, off0, off1, func, irq) \
> +       [lima_ip_##ipname] = { \
> +               .name = #ipname, \
> +               .irq_name = irq, \
> +               .must_have = { \
> +                       [lima_gpu_mali400] = mst0, \
> +                       [lima_gpu_mali450] = mst1, \
> +               }, \
> +               .offset = { \
> +                       [lima_gpu_mali400] = off0, \
> +                       [lima_gpu_mali450] = off1, \
> +               }, \
> +               .init = lima_##func##_init, \
> +               .fini = lima_##func##_fini, \
> +       }
> +
> +static struct lima_ip_desc lima_ip_desc[lima_ip_num] = {
> +       LIMA_IP_DESC(pmu,         false, false, 0x02000, 0x02000, pmu,      "pmu"),
> +       LIMA_IP_DESC(l2_cache0,   true,  true,  0x01000, 0x10000, l2_cache, NULL),
> +       LIMA_IP_DESC(l2_cache1,   false, true,  -1,      0x01000, l2_cache, NULL),
> +       LIMA_IP_DESC(l2_cache2,   false, false, -1,      0x11000, l2_cache, NULL),
> +       LIMA_IP_DESC(gp,          true,  true,  0x00000, 0x00000, gp,       "gp"),
> +       LIMA_IP_DESC(pp0,         true,  true,  0x08000, 0x08000, pp,       "pp0"),
> +       LIMA_IP_DESC(pp1,         false, false, 0x0A000, 0x0A000, pp,       "pp1"),
> +       LIMA_IP_DESC(pp2,         false, false, 0x0C000, 0x0C000, pp,       "pp2"),
> +       LIMA_IP_DESC(pp3,         false, false, 0x0E000, 0x0E000, pp,       "pp3"),
> +       LIMA_IP_DESC(pp4,         false, false, -1,      0x28000, pp,       "pp4"),
> +       LIMA_IP_DESC(pp5,         false, false, -1,      0x2A000, pp,       "pp5"),
> +       LIMA_IP_DESC(pp6,         false, false, -1,      0x2C000, pp,       "pp6"),
> +       LIMA_IP_DESC(pp7,         false, false, -1,      0x2E000, pp,       "pp7"),
> +       LIMA_IP_DESC(gpmmu,       true,  true,  0x03000, 0x03000, mmu,      "gpmmu"),
> +       LIMA_IP_DESC(ppmmu0,      true,  true,  0x04000, 0x04000, mmu,      "ppmmu0"),
> +       LIMA_IP_DESC(ppmmu1,      false, false, 0x05000, 0x05000, mmu,      "ppmmu1"),
> +       LIMA_IP_DESC(ppmmu2,      false, false, 0x06000, 0x06000, mmu,      "ppmmu2"),
> +       LIMA_IP_DESC(ppmmu3,      false, false, 0x07000, 0x07000, mmu,      "ppmmu3"),
> +       LIMA_IP_DESC(ppmmu4,      false, false, -1,      0x1C000, mmu,      "ppmmu4"),
> +       LIMA_IP_DESC(ppmmu5,      false, false, -1,      0x1D000, mmu,      "ppmmu5"),
> +       LIMA_IP_DESC(ppmmu6,      false, false, -1,      0x1E000, mmu,      "ppmmu6"),
> +       LIMA_IP_DESC(ppmmu7,      false, false, -1,      0x1F000, mmu,      "ppmmu7"),
> +       LIMA_IP_DESC(dlbu,        false, true,  -1,      0x14000, dlbu,     NULL),
> +       LIMA_IP_DESC(bcast,       false, true,  -1,      0x13000, bcast,    NULL),
> +       LIMA_IP_DESC(pp_bcast,    false, true,  -1,      0x16000, pp_bcast, "pp"),
> +       LIMA_IP_DESC(ppmmu_bcast, false, true,  -1,      0x15000, mmu,      NULL),
> +};
> +
> +const char *lima_ip_name(struct lima_ip *ip)
> +{
> +       return lima_ip_desc[ip->id].name;
> +}
> +
> +static int lima_clk_init(struct lima_device *dev)
> +{
> +       int err;
> +       unsigned long bus_rate, gpu_rate;
> +
> +       dev->clk_bus = devm_clk_get(dev->dev, "bus");
> +       if (IS_ERR(dev->clk_bus)) {
> +               dev_err(dev->dev, "get bus clk failed %ld\n", PTR_ERR(dev->clk_bus));
> +               return PTR_ERR(dev->clk_bus);
> +       }
> +
> +       dev->clk_gpu = devm_clk_get(dev->dev, "core");
> +       if (IS_ERR(dev->clk_gpu)) {
> +               dev_err(dev->dev, "get core clk failed %ld\n", PTR_ERR(dev->clk_gpu));
> +               return PTR_ERR(dev->clk_gpu);
> +       }
> +
> +       bus_rate = clk_get_rate(dev->clk_bus);
> +       dev_info(dev->dev, "bus rate = %lu\n", bus_rate);
> +
> +       gpu_rate = clk_get_rate(dev->clk_gpu);
> +       dev_info(dev->dev, "mod rate = %lu", gpu_rate);
> +
> +       if ((err = clk_prepare_enable(dev->clk_bus)))
> +               return err;
> +       if ((err = clk_prepare_enable(dev->clk_gpu)))
> +               goto error_out0;
> +
> +       dev->reset = devm_reset_control_get_optional(dev->dev, NULL);
> +       if (IS_ERR(dev->reset)) {
> +               err = PTR_ERR(dev->reset);
> +               goto error_out1;
> +       } else if (dev->reset != NULL) {
> +               if ((err = reset_control_deassert(dev->reset)))
> +                       goto error_out1;
> +       }
> +
> +       return 0;
> +
> +error_out1:
> +       clk_disable_unprepare(dev->clk_gpu);
> +error_out0:
> +       clk_disable_unprepare(dev->clk_bus);
> +       return err;
> +}
> +
> +static void lima_clk_fini(struct lima_device *dev)
> +{
> +       if (dev->reset != NULL)
> +               reset_control_assert(dev->reset);
> +       clk_disable_unprepare(dev->clk_gpu);
> +       clk_disable_unprepare(dev->clk_bus);
> +}
> +
> +static int lima_regulator_init(struct lima_device *dev)
> +{
> +       int ret;
> +       dev->regulator = devm_regulator_get_optional(dev->dev, "mali");
> +       if (IS_ERR(dev->regulator)) {
> +               ret = PTR_ERR(dev->regulator);
> +               dev->regulator = NULL;
> +               if (ret == -ENODEV)
> +                       return 0;
> +               dev_err(dev->dev, "failed to get regulator: %d\n", ret);
> +               return ret;
> +       }
> +
> +       ret = regulator_enable(dev->regulator);
> +       if (ret < 0) {
> +               dev_err(dev->dev, "failed to enable regulator: %d\n", ret);
> +               return ret;
> +       }
> +
> +       return 0;
> +}
> +
> +static void lima_regulator_fini(struct lima_device *dev)
> +{
> +       if (dev->regulator)
> +               regulator_disable(dev->regulator);
> +}
> +
> +static int lima_init_ip(struct lima_device *dev, int index)
> +{
> +       struct lima_ip_desc *desc = lima_ip_desc + index;
> +       struct lima_ip *ip = dev->ip + index;
> +       int offset = desc->offset[dev->id];
> +       bool must = desc->must_have[dev->id];
> +       int err;
> +
> +       if (offset < 0)
> +               return 0;
> +
> +       ip->dev = dev;
> +       ip->id = index;
> +       ip->iomem = dev->iomem + offset;
> +       if (desc->irq_name) {
> +               err = platform_get_irq_byname(dev->pdev, desc->irq_name);
> +               if (err < 0)
> +                       goto out;
> +               ip->irq = err;
> +       }
> +
> +       err = desc->init(ip);
> +       if (!err) {
> +               ip->present = true;
> +               return 0;
> +       }
> +
> +out:
> +       return must ? err : 0;
> +}
> +
> +static void lima_fini_ip(struct lima_device *ldev, int index)
> +{
> +       struct lima_ip_desc *desc = lima_ip_desc + index;
> +       struct lima_ip *ip = ldev->ip + index;
> +
> +       if (ip->present)
> +               desc->fini(ip);
> +}
> +
> +static int lima_init_gp_pipe(struct lima_device *dev)
> +{
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
> +       int err;
> +
> +       if ((err = lima_sched_pipe_init(pipe, "gp")))
> +               return err;
> +
> +       pipe->l2_cache[pipe->num_l2_cache++] = dev->ip + lima_ip_l2_cache0;
> +       pipe->mmu[pipe->num_mmu++] = dev->ip + lima_ip_gpmmu;
> +       pipe->processor[pipe->num_processor++] = dev->ip + lima_ip_gp;
> +
> +       if ((err = lima_gp_pipe_init(dev))) {
> +               lima_sched_pipe_fini(pipe);
> +               return err;
> +       }
> +
> +       return 0;
> +}
> +
> +static void lima_fini_gp_pipe(struct lima_device *dev)
> +{
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
> +
> +       lima_gp_pipe_fini(dev);
> +       lima_sched_pipe_fini(pipe);
> +}
> +
> +static int lima_init_pp_pipe(struct lima_device *dev)
> +{
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> +       int err, i;
> +
> +       if ((err = lima_sched_pipe_init(pipe, "pp")))
> +               return err;
> +
> +       for (i = 0; i < LIMA_SCHED_PIPE_MAX_PROCESSOR; i++) {
> +               struct lima_ip *pp = dev->ip + lima_ip_pp0 + i;
> +               struct lima_ip *ppmmu = dev->ip + lima_ip_ppmmu0 + i;
> +               struct lima_ip *l2_cache;
> +
> +               if (dev->id == lima_gpu_mali400)
> +                       l2_cache = dev->ip + lima_ip_l2_cache0;
> +               else
> +                       l2_cache = dev->ip + lima_ip_l2_cache1 + (i >> 2);
> +
> +               if (pp->present && ppmmu->present && l2_cache->present) {
> +                       pipe->mmu[pipe->num_mmu++] = ppmmu;
> +                       pipe->processor[pipe->num_processor++] = pp;
> +                       if (!pipe->l2_cache[i >> 2])
> +                               pipe->l2_cache[pipe->num_l2_cache++] = l2_cache;
> +               }
> +       }
> +
> +       if (dev->ip[lima_ip_bcast].present) {
> +               pipe->bcast_processor = dev->ip + lima_ip_pp_bcast;
> +               pipe->bcast_mmu = dev->ip + lima_ip_ppmmu_bcast;
> +       }
> +
> +       if ((err = lima_pp_pipe_init(dev))) {
> +               lima_sched_pipe_fini(pipe);
> +               return err;
> +       }
> +
> +       return 0;
> +}
> +
> +static void lima_fini_pp_pipe(struct lima_device *dev)
> +{
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> +
> +       lima_pp_pipe_fini(dev);
> +       lima_sched_pipe_fini(pipe);
> +}
> +
> +int lima_device_init(struct lima_device *ldev)
> +{
> +       int err, i;
> +       struct resource *res;
> +
> +       dma_set_coherent_mask(ldev->dev, DMA_BIT_MASK(32));
> +
> +       err = lima_clk_init(ldev);
> +       if (err) {
> +               dev_err(ldev->dev, "clk init fail %d\n", err);
> +               return err;
> +       }
> +
> +       if ((err = lima_regulator_init(ldev))) {
> +               dev_err(ldev->dev, "regulator init fail %d\n", err);
> +               goto err_out0;
> +       }
> +
> +       ldev->empty_vm = lima_vm_create(ldev);
> +       if (!ldev->empty_vm) {
> +               err = -ENOMEM;
> +               goto err_out1;
> +       }
> +
> +       ldev->va_start = 0;
> +       if (ldev->id == lima_gpu_mali450) {
> +               ldev->va_end = LIMA_VA_RESERVE_START;
> +               ldev->dlbu_cpu = dma_alloc_wc(
> +                       ldev->dev, LIMA_PAGE_SIZE,
> +                       &ldev->dlbu_dma, GFP_KERNEL);
> +               if (!ldev->dlbu_cpu) {
> +                       err = -ENOMEM;
> +                       goto err_out2;
> +               }
> +       }
> +       else
> +               ldev->va_end = LIMA_VA_RESERVE_END;
> +
> +       res = platform_get_resource(ldev->pdev, IORESOURCE_MEM, 0);
> +       ldev->iomem = devm_ioremap_resource(ldev->dev, res);
> +       if (IS_ERR(ldev->iomem)) {
> +               dev_err(ldev->dev, "fail to ioremap iomem\n");
> +               err = PTR_ERR(ldev->iomem);
> +               goto err_out3;
> +       }
> +
> +       for (i = 0; i < lima_ip_num; i++) {
> +               err = lima_init_ip(ldev, i);
> +               if (err)
> +                       goto err_out4;
> +       }
> +
> +       err = lima_init_gp_pipe(ldev);
> +       if (err)
> +               goto err_out4;
> +
> +       err = lima_init_pp_pipe(ldev);
> +       if (err)
> +               goto err_out5;
> +
> +       return 0;
> +
> +err_out5:
> +       lima_fini_gp_pipe(ldev);
> +err_out4:
> +       while (--i >= 0)
> +               lima_fini_ip(ldev, i);
> +err_out3:
> +       if (ldev->dlbu_cpu)
> +               dma_free_wc(ldev->dev, LIMA_PAGE_SIZE,
> +                           ldev->dlbu_cpu, ldev->dlbu_dma);
> +err_out2:
> +       lima_vm_put(ldev->empty_vm);
> +err_out1:
> +       lima_regulator_fini(ldev);
> +err_out0:
> +       lima_clk_fini(ldev);
> +       return err;
> +}
> +
> +void lima_device_fini(struct lima_device *ldev)
> +{
> +       int i;
> +
> +       lima_fini_pp_pipe(ldev);
> +       lima_fini_gp_pipe(ldev);
> +
> +       for (i = lima_ip_num - 1; i >= 0; i--)
> +               lima_fini_ip(ldev, i);
> +
> +       if (ldev->dlbu_cpu)
> +               dma_free_wc(ldev->dev, LIMA_PAGE_SIZE,
> +                           ldev->dlbu_cpu, ldev->dlbu_dma);
> +
> +       lima_vm_put(ldev->empty_vm);
> +
> +       lima_regulator_fini(ldev);
> +
> +       lima_clk_fini(ldev);
> +}
> diff --git a/drivers/gpu/drm/lima/lima_device.h b/drivers/gpu/drm/lima/lima_device.h
> new file mode 100644
> index 000000000000..41499f28ae13
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_device.h
> @@ -0,0 +1,129 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_DEVICE_H__
> +#define __LIMA_DEVICE_H__
> +
> +#include <drm/drm_device.h>
> +#include <linux/delay.h>
> +
> +#include "lima_sched.h"
> +
> +enum lima_gpu_id {
> +       lima_gpu_mali400 = 0,
> +       lima_gpu_mali450,
> +       lima_gpu_num,
> +};
> +
> +enum lima_ip_id {
> +       lima_ip_pmu,
> +       lima_ip_gpmmu,
> +       lima_ip_ppmmu0,
> +       lima_ip_ppmmu1,
> +       lima_ip_ppmmu2,
> +       lima_ip_ppmmu3,
> +       lima_ip_ppmmu4,
> +       lima_ip_ppmmu5,
> +       lima_ip_ppmmu6,
> +       lima_ip_ppmmu7,
> +       lima_ip_gp,
> +       lima_ip_pp0,
> +       lima_ip_pp1,
> +       lima_ip_pp2,
> +       lima_ip_pp3,
> +       lima_ip_pp4,
> +       lima_ip_pp5,
> +       lima_ip_pp6,
> +       lima_ip_pp7,
> +       lima_ip_l2_cache0,
> +       lima_ip_l2_cache1,
> +       lima_ip_l2_cache2,
> +       lima_ip_dlbu,
> +       lima_ip_bcast,
> +       lima_ip_pp_bcast,
> +       lima_ip_ppmmu_bcast,
> +       lima_ip_num,
> +};
> +
> +struct lima_device;
> +
> +struct lima_ip {
> +       struct lima_device *dev;
> +       enum lima_ip_id id;
> +       bool present;
> +
> +       void __iomem *iomem;
> +       int irq;
> +
> +       union {
> +               /* gp/pp */
> +               bool async_reset;
> +               /* l2 cache */
> +               spinlock_t lock;
> +       } data;
> +};
> +
> +enum lima_pipe_id {
> +       lima_pipe_gp,
> +       lima_pipe_pp,
> +       lima_pipe_num,
> +};
> +
> +struct lima_device {
> +       struct device *dev;
> +       struct drm_device *ddev;
> +       struct platform_device *pdev;
> +
> +       enum lima_gpu_id id;
> +       int num_pp;
> +
> +       void __iomem *iomem;
> +       struct clk *clk_bus;
> +       struct clk *clk_gpu;
> +       struct reset_control *reset;
> +       struct regulator *regulator;
> +
> +       struct lima_ip ip[lima_ip_num];
> +       struct lima_sched_pipe pipe[lima_pipe_num];
> +
> +       struct lima_vm *empty_vm;
> +       uint64_t va_start;
> +       uint64_t va_end;
> +
> +       u32 *dlbu_cpu;
> +       dma_addr_t dlbu_dma;
> +};
> +
> +static inline struct lima_device *
> +to_lima_dev(struct drm_device *dev)
> +{
> +       return dev->dev_private;
> +}
> +
> +int lima_device_init(struct lima_device *ldev);
> +void lima_device_fini(struct lima_device *ldev);
> +
> +const char *lima_ip_name(struct lima_ip *ip);
> +
> +typedef int (*lima_poll_func_t)(struct lima_ip *);
> +
> +static inline int lima_poll_timeout(struct lima_ip *ip, lima_poll_func_t func,
> +                                   int sleep_us, int timeout_us)
> +{
> +       ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
> +
> +       might_sleep_if(sleep_us);
> +       while (1) {
> +               if (func(ip))
> +                       return 0;
> +
> +               if (timeout_us && ktime_compare(ktime_get(), timeout) > 0)
> +                       return -ETIMEDOUT;
> +
> +               if (sleep_us)
> +                       usleep_range((sleep_us >> 2) + 1, sleep_us);
> +       }
> +       return 0;
> +}
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_dlbu.c b/drivers/gpu/drm/lima/lima_dlbu.c
> new file mode 100644
> index 000000000000..b7739712f235
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_dlbu.c
> @@ -0,0 +1,56 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <linux/io.h>
> +#include <linux/device.h>
> +
> +#include "lima_device.h"
> +#include "lima_dlbu.h"
> +#include "lima_vm.h"
> +#include "lima_regs.h"
> +
> +#define dlbu_write(reg, data) writel(data, ip->iomem + reg)
> +#define dlbu_read(reg) readl(ip->iomem + reg)
> +
> +void lima_dlbu_enable(struct lima_device *dev, int num_pp)
> +{
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> +       struct lima_ip *ip = dev->ip + lima_ip_dlbu;
> +       int i, mask = 0;
> +
> +       for (i = 0; i < num_pp; i++) {
> +               struct lima_ip *pp = pipe->processor[i];
> +               mask |= 1 << (pp->id - lima_ip_pp0);
> +       }
> +
> +       dlbu_write(LIMA_DLBU_PP_ENABLE_MASK, mask);
> +}
> +
> +void lima_dlbu_disable(struct lima_device *dev)
> +{
> +       struct lima_ip *ip = dev->ip + lima_ip_dlbu;
> +       dlbu_write(LIMA_DLBU_PP_ENABLE_MASK, 0);
> +}
> +
> +void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg)
> +{
> +       dlbu_write(LIMA_DLBU_TLLIST_VBASEADDR, reg[0]);
> +       dlbu_write(LIMA_DLBU_FB_DIM, reg[1]);
> +       dlbu_write(LIMA_DLBU_TLLIST_CONF, reg[2]);
> +       dlbu_write(LIMA_DLBU_START_TILE_POS, reg[3]);
> +}
> +
> +int lima_dlbu_init(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +
> +       dlbu_write(LIMA_DLBU_MASTER_TLLIST_PHYS_ADDR, dev->dlbu_dma | 1);
> +       dlbu_write(LIMA_DLBU_MASTER_TLLIST_VADDR, LIMA_VA_RESERVE_DLBU);
> +
> +       return 0;
> +}
> +
> +void lima_dlbu_fini(struct lima_ip *ip)
> +{
> +
> +}
> diff --git a/drivers/gpu/drm/lima/lima_dlbu.h b/drivers/gpu/drm/lima/lima_dlbu.h
> new file mode 100644
> index 000000000000..60cba387cf30
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_dlbu.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_DLBU_H__
> +#define __LIMA_DLBU_H__
> +
> +struct lima_ip;
> +struct lima_device;
> +
> +void lima_dlbu_enable(struct lima_device *dev, int num_pp);
> +void lima_dlbu_disable(struct lima_device *dev);
> +
> +void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg);
> +
> +int lima_dlbu_init(struct lima_ip *ip);
> +void lima_dlbu_fini(struct lima_ip *ip);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
> new file mode 100644
> index 000000000000..e93bce16ee10
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_drv.c
> @@ -0,0 +1,353 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <linux/module.h>
> +#include <linux/of_platform.h>
> +#include <linux/log2.h>
> +#include <drm/drm_prime.h>
> +#include <drm/lima_drm.h>
> +
> +#include "lima_drv.h"
> +#include "lima_gem.h"
> +#include "lima_gem_prime.h"
> +#include "lima_vm.h"
> +
> +int lima_sched_timeout_ms = 0;
> +int lima_sched_max_tasks = 32;
> +
> +MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms (0 = no timeout (default))");
> +module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
> +
> +MODULE_PARM_DESC(sched_max_tasks, "max queued task num in a context (default 32)");
> +module_param_named(sched_max_tasks, lima_sched_max_tasks, int, 0444);
> +
> +static int lima_ioctl_info(struct drm_device *dev, void *data, struct drm_file *file)
> +{

For panfrost, we generalized this to "get param" like other drivers.
Looks like you can only add 7 more items.

What about GPU revisions?

> +       struct drm_lima_info *info = data;
> +       struct lima_device *ldev = to_lima_dev(dev);
> +
> +       switch (ldev->id) {
> +       case lima_gpu_mali400:
> +               info->gpu_id = LIMA_INFO_GPU_MALI400;

Personally, I'd return the either the raw model or something like
'400' rather than making up some enumeration.

> +               break;
> +       case lima_gpu_mali450:
> +               info->gpu_id = LIMA_INFO_GPU_MALI450;
> +               break;
> +       default:
> +               return -ENODEV;
> +       }
> +       info->num_pp = ldev->pipe[lima_pipe_pp].num_processor;
> +       info->valid = 0;
> +       return 0;
> +}
> +
> +static int lima_ioctl_gem_create(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_gem_create *args = data;
> +
> +       if (args->flags)
> +               return -EINVAL;
> +
> +       if (args->size == 0)
> +               return -EINVAL;
> +
> +       return lima_gem_create_handle(dev, file, args->size, args->flags, &args->handle);
> +}
> +
> +static int lima_ioctl_gem_info(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_gem_info *args = data;
> +
> +       return lima_gem_get_info(file, args->handle, &args->va, &args->offset);
> +}
> +
> +static int lima_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_gem_submit *args = data;
> +       struct lima_device *ldev = to_lima_dev(dev);
> +       struct lima_drm_priv *priv = file->driver_priv;
> +       struct drm_lima_gem_submit_bo *bos;
> +       struct lima_sched_pipe *pipe;
> +       struct lima_sched_task *task;
> +       struct lima_ctx *ctx;
> +       struct lima_submit submit = {0};
> +       size_t size;
> +       int err = 0;
> +
> +       if (args->pipe >= lima_pipe_num || args->nr_bos == 0)
> +               return -EINVAL;
> +
> +       if (args->flags & ~(LIMA_SUBMIT_FLAG_EXPLICIT_FENCE))
> +               return -EINVAL;
> +
> +       pipe = ldev->pipe + args->pipe;
> +       if (args->frame_size != pipe->frame_size)
> +               return -EINVAL;
> +
> +       bos = kvcalloc(args->nr_bos, sizeof(*submit.bos) + sizeof(*submit.lbos), GFP_KERNEL);
> +       if (!bos)
> +               return -ENOMEM;
> +
> +       size = args->nr_bos * sizeof(*submit.bos);
> +       if (copy_from_user(bos, u64_to_user_ptr(args->bos), size)) {
> +               err = -EFAULT;
> +               goto out0;
> +       }
> +
> +       task = kmem_cache_zalloc(pipe->task_slab, GFP_KERNEL);
> +       if (!task) {
> +               err = -ENOMEM;
> +               goto out0;
> +       }
> +
> +       task->frame = task + 1;
> +       if (copy_from_user(task->frame, u64_to_user_ptr(args->frame), args->frame_size)) {
> +               err = -EFAULT;
> +               goto out1;
> +       }
> +
> +       err = pipe->task_validate(pipe, task);
> +       if (err)
> +               goto out1;
> +
> +       ctx = lima_ctx_get(&priv->ctx_mgr, args->ctx);
> +       if (!ctx) {
> +               err = -ENOENT;
> +               goto out1;
> +       }
> +
> +       submit.pipe = args->pipe;
> +       submit.bos = bos;
> +       submit.lbos = (void *)bos + size;
> +       submit.nr_bos = args->nr_bos;
> +       submit.task = task;
> +       submit.ctx = ctx;
> +       submit.flags = args->flags;
> +       submit.in_sync[0] = args->in_sync[0];
> +       submit.in_sync[1] = args->in_sync[1];
> +       submit.out_sync = args->out_sync;
> +
> +       err = lima_gem_submit(file, &submit);
> +
> +       lima_ctx_put(ctx);
> +out1:
> +       if (err)
> +               kmem_cache_free(pipe->task_slab, task);
> +out0:
> +       kvfree(bos);
> +       return err;
> +}
> +
> +static int lima_ioctl_gem_wait(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_gem_wait *args = data;
> +
> +       if (args->op & ~(LIMA_GEM_WAIT_READ|LIMA_GEM_WAIT_WRITE))
> +               return -EINVAL;
> +
> +       return lima_gem_wait(file, args->handle, args->op, args->timeout_ns);
> +}
> +
> +static int lima_ioctl_ctx(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_ctx *args = data;
> +       struct lima_drm_priv *priv = file->driver_priv;
> +       struct lima_device *ldev = to_lima_dev(dev);
> +
> +       if (args->op == LIMA_CTX_OP_CREATE)
> +               return lima_ctx_create(ldev, &priv->ctx_mgr, &args->id);
> +       else if (args->op == LIMA_CTX_OP_FREE)
> +               return lima_ctx_free(&priv->ctx_mgr, args->id);

Wasn't it suggested in the prior version to not mux these? Make them 2 ioctls.

> +
> +       return -EINVAL;
> +}
> +
> +static int lima_drm_driver_open(struct drm_device *dev, struct drm_file *file)
> +{
> +       int err;
> +       struct lima_drm_priv *priv;
> +       struct lima_device *ldev = to_lima_dev(dev);
> +
> +       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> +       if (!priv)
> +               return -ENOMEM;
> +
> +       priv->vm = lima_vm_create(ldev);
> +       if (!priv->vm) {
> +               err = -ENOMEM;
> +               goto err_out0;
> +       }
> +
> +       lima_ctx_mgr_init(&priv->ctx_mgr);
> +
> +       file->driver_priv = priv;
> +       return 0;
> +
> +err_out0:
> +       kfree(priv);
> +       return err;
> +}
> +
> +static void lima_drm_driver_postclose(struct drm_device *dev, struct drm_file *file)
> +{
> +       struct lima_drm_priv *priv = file->driver_priv;
> +
> +       lima_ctx_mgr_fini(&priv->ctx_mgr);
> +       lima_vm_put(priv->vm);
> +       kfree(priv);
> +}
> +
> +static const struct drm_ioctl_desc lima_drm_driver_ioctls[] = {
> +       DRM_IOCTL_DEF_DRV(LIMA_INFO, lima_ioctl_info, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_GEM_CREATE, lima_ioctl_gem_create, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_GEM_INFO, lima_ioctl_gem_info, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_GEM_SUBMIT, lima_ioctl_gem_submit, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_GEM_WAIT, lima_ioctl_gem_wait, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_CTX, lima_ioctl_ctx, DRM_AUTH|DRM_RENDER_ALLOW),
> +};
> +
> +static const struct file_operations lima_drm_driver_fops = {
> +       .owner              = THIS_MODULE,
> +       .open               = drm_open,
> +       .release            = drm_release,
> +       .unlocked_ioctl     = drm_ioctl,
> +#ifdef CONFIG_COMPAT
> +       .compat_ioctl       = drm_compat_ioctl,
> +#endif
> +       .mmap               = lima_gem_mmap,
> +};
> +
> +static struct drm_driver lima_drm_driver = {
> +       .driver_features    = DRIVER_RENDER | DRIVER_GEM | DRIVER_PRIME | DRIVER_SYNCOBJ,
> +       .open               = lima_drm_driver_open,
> +       .postclose          = lima_drm_driver_postclose,
> +       .ioctls             = lima_drm_driver_ioctls,
> +       .num_ioctls         = ARRAY_SIZE(lima_drm_driver_ioctls),
> +       .fops               = &lima_drm_driver_fops,
> +       .gem_free_object_unlocked = lima_gem_free_object,
> +       .gem_open_object    = lima_gem_object_open,
> +       .gem_close_object   = lima_gem_object_close,
> +       .gem_vm_ops         = &lima_gem_vm_ops,
> +       .name               = "lima",
> +       .desc               = "lima DRM",
> +       .date               = "20190217",
> +       .major              = 1,
> +       .minor              = 0,
> +       .patchlevel         = 0,
> +
> +       .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> +       .gem_prime_import_sg_table = lima_gem_prime_import_sg_table,
> +       .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> +       .gem_prime_get_sg_table = lima_gem_prime_get_sg_table,
> +       .gem_prime_mmap = lima_gem_prime_mmap,
> +};
> +
> +static int lima_pdev_probe(struct platform_device *pdev)
> +{
> +       struct lima_device *ldev;
> +       struct drm_device *ddev;
> +       int err;
> +
> +       ldev = devm_kzalloc(&pdev->dev, sizeof(*ldev), GFP_KERNEL);
> +       if (!ldev)
> +               return -ENOMEM;
> +
> +       ldev->pdev = pdev;
> +       ldev->dev = &pdev->dev;
> +       ldev->id = (enum lima_gpu_id)of_device_get_match_data(&pdev->dev);
> +
> +       platform_set_drvdata(pdev, ldev);
> +
> +       /* Allocate and initialize the DRM device. */
> +       ddev = drm_dev_alloc(&lima_drm_driver, &pdev->dev);
> +       if (IS_ERR(ddev))
> +               return PTR_ERR(ddev);
> +
> +       ddev->dev_private = ldev;
> +       ldev->ddev = ddev;
> +
> +       err = lima_device_init(ldev);
> +       if (err) {
> +               dev_err(&pdev->dev, "Fatal error during GPU init\n");
> +               goto err_out0;
> +       }
> +
> +       /*
> +        * Register the DRM device with the core and the connectors with
> +        * sysfs.
> +        */
> +       err = drm_dev_register(ddev, 0);
> +       if (err < 0)
> +               goto err_out1;
> +
> +       return 0;
> +
> +err_out1:
> +       lima_device_fini(ldev);
> +err_out0:
> +       drm_dev_put(ddev);
> +       return err;
> +}
> +
> +static int lima_pdev_remove(struct platform_device *pdev)
> +{
> +       struct lima_device *ldev = platform_get_drvdata(pdev);
> +       struct drm_device *ddev = ldev->ddev;
> +
> +       drm_dev_unregister(ddev);
> +       lima_device_fini(ldev);
> +       drm_dev_put(ddev);
> +       return 0;
> +}
> +
> +static const struct of_device_id dt_match[] = {
> +       { .compatible = "arm,mali-400", .data = (void *)lima_gpu_mali400 },
> +       { .compatible = "arm,mali-450", .data = (void *)lima_gpu_mali450 },
> +       {}
> +};
> +MODULE_DEVICE_TABLE(of, dt_match);
> +
> +static struct platform_driver lima_platform_driver = {
> +       .probe      = lima_pdev_probe,
> +       .remove     = lima_pdev_remove,
> +       .driver     = {
> +               .name   = "lima",
> +               .of_match_table = dt_match,
> +       },
> +};
> +
> +static void lima_check_module_param(void)
> +{
> +       if (lima_sched_max_tasks < 4)
> +               lima_sched_max_tasks = 4;
> +       else
> +               lima_sched_max_tasks = roundup_pow_of_two(lima_sched_max_tasks);
> +}
> +
> +static int __init lima_init(void)
> +{
> +       int ret;
> +
> +       lima_check_module_param();
> +       ret = lima_sched_slab_init();
> +       if (ret)
> +               return ret;
> +
> +       ret = platform_driver_register(&lima_platform_driver);
> +       if (ret)
> +               lima_sched_slab_fini();
> +
> +       return ret;
> +}
> +module_init(lima_init);
> +
> +static void __exit lima_exit(void)
> +{
> +       platform_driver_unregister(&lima_platform_driver);
> +       lima_sched_slab_fini();
> +}
> +module_exit(lima_exit);
> +
> +MODULE_AUTHOR("Lima Project Developers");
> +MODULE_DESCRIPTION("Lima DRM Driver");
> +MODULE_LICENSE("GPL v2");
> diff --git a/drivers/gpu/drm/lima/lima_drv.h b/drivers/gpu/drm/lima/lima_drv.h
> new file mode 100644
> index 000000000000..640a548cd617
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_drv.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_DRV_H__
> +#define __LIMA_DRV_H__
> +
> +#include <drm/drmP.h>
> +
> +#include "lima_ctx.h"
> +
> +extern int lima_sched_timeout_ms;
> +extern int lima_sched_max_tasks;
> +
> +struct lima_vm;
> +struct lima_bo;
> +struct lima_sched_task;
> +
> +struct drm_lima_gem_submit_bo;
> +
> +struct lima_drm_priv {
> +       struct lima_vm *vm;
> +       struct lima_ctx_mgr ctx_mgr;
> +};
> +
> +struct lima_submit {
> +       struct lima_ctx *ctx;
> +       int pipe;
> +       u32 flags;
> +
> +       struct drm_lima_gem_submit_bo *bos;
> +       struct lima_bo **lbos;
> +       u32 nr_bos;
> +
> +       u32 in_sync[2];
> +       u32 out_sync;
> +
> +       struct lima_sched_task *task;
> +};
> +
> +static inline struct lima_drm_priv *
> +to_lima_drm_priv(struct drm_file *file)
> +{
> +       return file->driver_priv;
> +}
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
> new file mode 100644
> index 000000000000..666960345566
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_gem.c
> @@ -0,0 +1,379 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <drm/drmP.h>
> +#include <drm/drm_syncobj.h>
> +#include <drm/drm_utils.h>
> +#include <linux/sync_file.h>
> +#include <linux/pfn_t.h>
> +
> +#include <drm/lima_drm.h>
> +
> +#include "lima_drv.h"
> +#include "lima_gem.h"
> +#include "lima_gem_prime.h"
> +#include "lima_vm.h"
> +#include "lima_object.h"
> +
> +int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file,
> +                          u32 size, u32 flags, u32 *handle)
> +{
> +       int err;
> +       struct lima_bo *bo;
> +       struct lima_device *ldev = to_lima_dev(dev);
> +
> +       bo = lima_bo_create(ldev, size, flags, NULL, NULL);
> +       if (IS_ERR(bo))
> +               return PTR_ERR(bo);
> +
> +       err = drm_gem_handle_create(file, &bo->gem, handle);
> +
> +       /* drop reference from allocate - handle holds it now */
> +       drm_gem_object_put_unlocked(&bo->gem);
> +
> +       return err;
> +}
> +
> +void lima_gem_free_object(struct drm_gem_object *obj)
> +{
> +       struct lima_bo *bo = to_lima_bo(obj);
> +
> +       if (!list_empty(&bo->va))
> +               dev_err(obj->dev->dev, "lima gem free bo still has va\n");
> +
> +       lima_bo_destroy(bo);
> +}
> +
> +int lima_gem_object_open(struct drm_gem_object *obj, struct drm_file *file)
> +{
> +       struct lima_bo *bo = to_lima_bo(obj);
> +       struct lima_drm_priv *priv = to_lima_drm_priv(file);
> +       struct lima_vm *vm = priv->vm;
> +
> +       return lima_vm_bo_add(vm, bo, true);
> +}
> +
> +void lima_gem_object_close(struct drm_gem_object *obj, struct drm_file *file)
> +{
> +       struct lima_bo *bo = to_lima_bo(obj);
> +       struct lima_drm_priv *priv = to_lima_drm_priv(file);
> +       struct lima_vm *vm = priv->vm;
> +
> +       lima_vm_bo_del(vm, bo);
> +}
> +
> +int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset)
> +{
> +       struct drm_gem_object *obj;
> +       struct lima_bo *bo;
> +       struct lima_drm_priv *priv = to_lima_drm_priv(file);
> +       struct lima_vm *vm = priv->vm;
> +       int err;
> +
> +       obj = drm_gem_object_lookup(file, handle);
> +       if (!obj)
> +               return -ENOENT;
> +
> +       bo = to_lima_bo(obj);
> +
> +       *va = lima_vm_get_va(vm, bo);
> +
> +       err = drm_gem_create_mmap_offset(obj);
> +       if (!err)
> +               *offset = drm_vma_node_offset_addr(&obj->vma_node);
> +
> +       drm_gem_object_put_unlocked(obj);
> +       return err;
> +}
> +
> +static vm_fault_t lima_gem_fault(struct vm_fault *vmf)
> +{
> +       struct vm_area_struct *vma = vmf->vma;
> +       struct drm_gem_object *obj = vma->vm_private_data;
> +       struct lima_bo *bo = to_lima_bo(obj);
> +       pfn_t pfn;
> +       pgoff_t pgoff;
> +
> +       /* We don't use vmf->pgoff since that has the fake offset: */
> +       pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
> +       pfn = __pfn_to_pfn_t(page_to_pfn(bo->pages[pgoff]), PFN_DEV);
> +
> +       return vmf_insert_mixed(vma, vmf->address, pfn);
> +}
> +
> +const struct vm_operations_struct lima_gem_vm_ops = {
> +       .fault = lima_gem_fault,
> +       .open = drm_gem_vm_open,
> +       .close = drm_gem_vm_close,
> +};
> +
> +void lima_set_vma_flags(struct vm_area_struct *vma)
> +{
> +       pgprot_t prot = vm_get_page_prot(vma->vm_flags);
> +
> +       vma->vm_flags |= VM_MIXEDMAP;
> +       vma->vm_flags &= ~VM_PFNMAP;
> +       vma->vm_page_prot = pgprot_writecombine(prot);
> +}
> +
> +int lima_gem_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> +       int ret;
> +
> +       ret = drm_gem_mmap(filp, vma);
> +       if (ret)
> +               return ret;
> +
> +       lima_set_vma_flags(vma);
> +       return 0;
> +}
> +
> +static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo,
> +                           bool write, bool explicit)
> +{
> +       int err = 0;
> +
> +       if (!write) {
> +               err = reservation_object_reserve_shared(bo->gem.resv, 1);
> +               if (err)
> +                       return err;
> +       }
> +
> +       /* explicit sync use user passed dep fence */
> +       if (explicit)
> +               return 0;
> +
> +       /* implicit sync use bo fence in resv obj */
> +       if (write) {
> +               unsigned nr_fences;
> +               struct dma_fence **fences;
> +               int i;
> +
> +               err = reservation_object_get_fences_rcu(
> +                       bo->gem.resv, NULL, &nr_fences, &fences);
> +               if (err || !nr_fences)
> +                       return err;
> +
> +               for (i = 0; i < nr_fences; i++) {
> +                       err = lima_sched_task_add_dep(task, fences[i]);
> +                       if (err)
> +                               break;
> +               }
> +
> +               /* for error case free remaining fences */
> +               for ( ; i < nr_fences; i++)
> +                       dma_fence_put(fences[i]);
> +
> +               kfree(fences);
> +       }
> +       else {
> +               struct dma_fence *fence;
> +               fence = reservation_object_get_excl_rcu(bo->gem.resv);
> +               if (fence) {
> +                       err = lima_sched_task_add_dep(task, fence);
> +                       if (err)
> +                               dma_fence_put(fence);
> +               }
> +       }
> +
> +       return err;
> +}
> +
> +static int lima_gem_lock_bos(struct lima_bo **bos, u32 nr_bos,
> +                            struct ww_acquire_ctx *ctx)
> +{
> +       int i, ret = 0, contended, slow_locked = -1;
> +
> +       ww_acquire_init(ctx, &reservation_ww_class);
> +
> +retry:
> +       for (i = 0; i < nr_bos; i++) {
> +               if (i == slow_locked) {
> +                       slow_locked = -1;
> +                       continue;
> +               }
> +
> +               ret = ww_mutex_lock_interruptible(&bos[i]->gem.resv->lock, ctx);
> +               if (ret < 0) {
> +                       contended = i;
> +                       goto err;
> +               }
> +       }
> +
> +       ww_acquire_done(ctx);
> +       return 0;
> +
> +err:
> +       for (i--; i >= 0; i--)
> +               ww_mutex_unlock(&bos[i]->gem.resv->lock);
> +
> +       if (slow_locked >= 0)
> +               ww_mutex_unlock(&bos[slow_locked]->gem.resv->lock);
> +
> +       if (ret == -EDEADLK) {
> +               /* we lost out in a seqno race, lock and retry.. */
> +               ret = ww_mutex_lock_slow_interruptible(
> +                       &bos[contended]->gem.resv->lock, ctx);
> +               if (!ret) {
> +                       slow_locked = contended;
> +                       goto retry;
> +               }
> +       }
> +       ww_acquire_fini(ctx);
> +
> +       return ret;
> +}
> +
> +static void lima_gem_unlock_bos(struct lima_bo **bos, u32 nr_bos,
> +                               struct ww_acquire_ctx *ctx)
> +{
> +       int i;
> +
> +       for (i = 0; i < nr_bos; i++)
> +               ww_mutex_unlock(&bos[i]->gem.resv->lock);
> +       ww_acquire_fini(ctx);
> +}
> +
> +static int lima_gem_add_deps(struct drm_file *file, struct lima_submit *submit)
> +{
> +       int i, err;
> +
> +       for (i = 0; i < ARRAY_SIZE(submit->in_sync); i++) {
> +               struct dma_fence *fence = NULL;
> +
> +               if (!submit->in_sync[i])
> +                       continue;
> +
> +               err = drm_syncobj_find_fence(file, submit->in_sync[i],
> +                                            0, 0, &fence);
> +               if (err)
> +                       return err;
> +
> +               err = lima_sched_task_add_dep(submit->task, fence);
> +               if (err) {
> +                       dma_fence_put(fence);
> +                       return err;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +int lima_gem_submit(struct drm_file *file, struct lima_submit *submit)
> +{
> +       int i, err = 0;
> +       struct ww_acquire_ctx ctx;
> +       struct lima_drm_priv *priv = to_lima_drm_priv(file);
> +       struct lima_vm *vm = priv->vm;
> +       struct drm_syncobj *out_sync = NULL;
> +       struct dma_fence *fence;
> +       struct lima_bo **bos = submit->lbos;
> +
> +       if (submit->out_sync) {
> +               out_sync = drm_syncobj_find(file, submit->out_sync);
> +               if (!out_sync)
> +                       return -ENOENT;
> +       }
> +
> +       for (i = 0; i < submit->nr_bos; i++) {
> +               struct drm_gem_object *obj;
> +               struct lima_bo *bo;
> +
> +               obj = drm_gem_object_lookup(file, submit->bos[i].handle);
> +               if (!obj) {
> +                       err = -ENOENT;
> +                       goto err_out0;
> +               }
> +
> +               bo = to_lima_bo(obj);
> +
> +               /* increase refcnt of gpu va map to prevent unmapped when executing,
> +                * will be decreased when task done */
> +               err = lima_vm_bo_add(vm, bo, false);
> +               if (err) {
> +                       drm_gem_object_put_unlocked(obj);
> +                       goto err_out0;
> +               }
> +
> +               bos[i] = bo;
> +       }
> +
> +       err = lima_gem_lock_bos(bos, submit->nr_bos, &ctx);
> +       if (err)
> +               goto err_out0;
> +
> +       err = lima_sched_task_init(
> +               submit->task, submit->ctx->context + submit->pipe,
> +               bos, submit->nr_bos, vm);
> +       if (err)
> +               goto err_out1;
> +
> +       err = lima_gem_add_deps(file, submit);
> +       if (err)
> +               goto err_out2;
> +
> +       for (i = 0; i < submit->nr_bos; i++) {
> +               err = lima_gem_sync_bo(
> +                       submit->task, bos[i],
> +                       submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE,
> +                       submit->flags & LIMA_SUBMIT_FLAG_EXPLICIT_FENCE);
> +               if (err)
> +                       goto err_out2;
> +       }
> +
> +       fence = lima_sched_context_queue_task(
> +               submit->ctx->context + submit->pipe, submit->task);
> +
> +       for (i = 0; i < submit->nr_bos; i++) {
> +               if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE)
> +                       reservation_object_add_excl_fence(bos[i]->gem.resv, fence);
> +               else
> +                       reservation_object_add_shared_fence(bos[i]->gem.resv, fence);
> +       }
> +
> +       lima_gem_unlock_bos(bos, submit->nr_bos, &ctx);
> +
> +       for (i = 0; i < submit->nr_bos; i++)
> +               drm_gem_object_put_unlocked(&bos[i]->gem);
> +
> +       if (out_sync) {
> +               drm_syncobj_replace_fence(out_sync, fence);
> +               drm_syncobj_put(out_sync);
> +       }
> +
> +       dma_fence_put(fence);
> +
> +       return 0;
> +
> +err_out2:
> +       lima_sched_task_fini(submit->task);
> +err_out1:
> +       lima_gem_unlock_bos(bos, submit->nr_bos, &ctx);
> +err_out0:
> +       for (i = 0; i < submit->nr_bos; i++) {
> +               if (!bos[i])
> +                       break;
> +               lima_vm_bo_del(vm, bos[i]);
> +               drm_gem_object_put_unlocked(&bos[i]->gem);
> +       }
> +       if (out_sync)
> +               drm_syncobj_put(out_sync);
> +       return err;
> +}
> +
> +int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, s64 timeout_ns)
> +{
> +       bool write = op & LIMA_GEM_WAIT_WRITE;
> +       long ret, timeout;
> +
> +       if (!op)
> +               return 0;
> +
> +       timeout = drm_timeout_abs_to_jiffies(timeout_ns);
> +
> +       ret = drm_gem_reservation_object_wait(file, handle, write, timeout);
> +       if (ret == 0)
> +               ret = timeout ? -ETIMEDOUT : -EBUSY;
> +
> +       return ret;
> +}
> diff --git a/drivers/gpu/drm/lima/lima_gem.h b/drivers/gpu/drm/lima/lima_gem.h
> new file mode 100644
> index 000000000000..f1c4658100a8
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_gem.h
> @@ -0,0 +1,25 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_GEM_H__
> +#define __LIMA_GEM_H__
> +
> +struct lima_bo;
> +struct lima_submit;
> +
> +extern const struct vm_operations_struct lima_gem_vm_ops;
> +
> +struct lima_bo *lima_gem_create_bo(struct drm_device *dev, u32 size, u32 flags);
> +int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file,
> +                          u32 size, u32 flags, u32 *handle);
> +void lima_gem_free_object(struct drm_gem_object *obj);
> +int lima_gem_object_open(struct drm_gem_object *obj, struct drm_file *file);
> +void lima_gem_object_close(struct drm_gem_object *obj, struct drm_file *file);
> +int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset);
> +int lima_gem_mmap(struct file *filp, struct vm_area_struct *vma);
> +int lima_gem_submit(struct drm_file *file, struct lima_submit *submit);
> +int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, s64 timeout_ns);
> +
> +void lima_set_vma_flags(struct vm_area_struct *vma);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_gem_prime.c b/drivers/gpu/drm/lima/lima_gem_prime.c
> new file mode 100644
> index 000000000000..fe8348a055f6
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_gem_prime.c
> @@ -0,0 +1,47 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <linux/dma-buf.h>
> +#include <drm/drm_prime.h>
> +#include <drm/drm_drv.h>
> +#include <drm/drm_file.h>
> +
> +#include "lima_device.h"
> +#include "lima_object.h"
> +#include "lima_gem.h"
> +#include "lima_gem_prime.h"
> +
> +struct drm_gem_object *lima_gem_prime_import_sg_table(
> +       struct drm_device *dev, struct dma_buf_attachment *attach,
> +       struct sg_table *sgt)
> +{
> +       struct lima_device *ldev = to_lima_dev(dev);
> +       struct lima_bo *bo;
> +
> +       bo = lima_bo_create(ldev, attach->dmabuf->size, 0, sgt,
> +                           attach->dmabuf->resv);
> +       if (IS_ERR(bo))
> +               return ERR_CAST(bo);
> +
> +       return &bo->gem;
> +}
> +
> +struct sg_table *lima_gem_prime_get_sg_table(struct drm_gem_object *obj)
> +{
> +       struct lima_bo *bo = to_lima_bo(obj);
> +       int npages = obj->size >> PAGE_SHIFT;
> +
> +       return drm_prime_pages_to_sg(bo->pages, npages);
> +}
> +
> +int lima_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
> +{
> +       int ret;
> +
> +       ret = drm_gem_mmap_obj(obj, obj->size, vma);
> +       if (ret)
> +               return ret;
> +
> +       lima_set_vma_flags(vma);
> +       return 0;
> +}
> diff --git a/drivers/gpu/drm/lima/lima_gem_prime.h b/drivers/gpu/drm/lima/lima_gem_prime.h
> new file mode 100644
> index 000000000000..ceb1be9840a5
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_gem_prime.h
> @@ -0,0 +1,13 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_GEM_PRIME_H__
> +#define __LIMA_GEM_PRIME_H__
> +
> +struct drm_gem_object *lima_gem_prime_import_sg_table(
> +       struct drm_device *dev, struct dma_buf_attachment *attach,
> +       struct sg_table *sgt);
> +struct sg_table *lima_gem_prime_get_sg_table(struct drm_gem_object *obj);
> +int lima_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_gp.c b/drivers/gpu/drm/lima/lima_gp.c
> new file mode 100644
> index 000000000000..4f4e9f5f7e19
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_gp.c
> @@ -0,0 +1,282 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <linux/interrupt.h>
> +#include <linux/iopoll.h>
> +#include <linux/device.h>
> +#include <linux/slab.h>
> +
> +#include <drm/lima_drm.h>
> +
> +#include "lima_device.h"
> +#include "lima_gp.h"
> +#include "lima_regs.h"
> +
> +#define gp_write(reg, data) writel(data, ip->iomem + reg)
> +#define gp_read(reg) readl(ip->iomem + reg)
> +
> +static irqreturn_t lima_gp_irq_handler(int irq, void *data)
> +{
> +       struct lima_ip *ip = data;
> +       struct lima_device *dev = ip->dev;
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
> +       u32 state = gp_read(LIMA_GP_INT_STAT);
> +       u32 status = gp_read(LIMA_GP_STATUS);
> +       bool done = false;
> +
> +       /* for shared irq case */
> +       if (!state)
> +               return IRQ_NONE;
> +
> +       if (state & LIMA_GP_IRQ_MASK_ERROR) {
> +               dev_err(dev->dev, "gp error irq state=%x status=%x\n",
> +                       state, status);
> +
> +               /* mask all interrupts before hard reset */
> +               gp_write(LIMA_GP_INT_MASK, 0);
> +
> +               pipe->error = true;
> +               done = true;
> +       }
> +       else {
> +               bool valid = state & (LIMA_GP_IRQ_VS_END_CMD_LST |
> +                                     LIMA_GP_IRQ_PLBU_END_CMD_LST);
> +               bool active = status & (LIMA_GP_STATUS_VS_ACTIVE |
> +                                       LIMA_GP_STATUS_PLBU_ACTIVE);
> +               done = valid && !active;
> +       }
> +
> +       gp_write(LIMA_GP_INT_CLEAR, state);
> +
> +       if (done)
> +               lima_sched_pipe_task_done(pipe);
> +
> +       return IRQ_HANDLED;
> +}
> +
> +static void lima_gp_soft_reset_async(struct lima_ip *ip)
> +{
> +       if (ip->data.async_reset)
> +               return;
> +
> +       gp_write(LIMA_GP_INT_MASK, 0);
> +       gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_RESET_COMPLETED);
> +       gp_write(LIMA_GP_CMD, LIMA_GP_CMD_SOFT_RESET);
> +       ip->data.async_reset = true;
> +}
> +
> +static int lima_gp_soft_reset_async_wait(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +       int err;
> +       u32 v;
> +
> +       if (!ip->data.async_reset)
> +               return 0;
> +
> +       err = readl_poll_timeout(ip->iomem + LIMA_GP_INT_RAWSTAT, v,
> +                                v & LIMA_GP_IRQ_RESET_COMPLETED,
> +                                0, 100);
> +       if (err) {
> +               dev_err(dev->dev, "gp soft reset time out\n");
> +               return err;
> +       }
> +
> +       gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_MASK_ALL);
> +       gp_write(LIMA_GP_INT_MASK, LIMA_GP_IRQ_MASK_USED);
> +
> +       ip->data.async_reset = false;
> +       return 0;
> +}
> +
> +static int lima_gp_task_validate(struct lima_sched_pipe *pipe,
> +                                struct lima_sched_task *task)
> +{
> +       struct drm_lima_gp_frame *frame = task->frame;
> +       u32 *f = frame->frame;
> +       (void)pipe;
> +
> +       if (f[LIMA_GP_VSCL_START_ADDR >> 2] >
> +           f[LIMA_GP_VSCL_END_ADDR >> 2] ||
> +           f[LIMA_GP_PLBUCL_START_ADDR >> 2] >
> +           f[LIMA_GP_PLBUCL_END_ADDR >> 2] ||
> +           f[LIMA_GP_PLBU_ALLOC_START_ADDR >> 2] >
> +           f[LIMA_GP_PLBU_ALLOC_END_ADDR >> 2])
> +               return -EINVAL;
> +
> +       if (f[LIMA_GP_VSCL_START_ADDR >> 2] ==
> +           f[LIMA_GP_VSCL_END_ADDR >> 2] &&
> +           f[LIMA_GP_PLBUCL_START_ADDR >> 2] ==
> +           f[LIMA_GP_PLBUCL_END_ADDR >> 2])
> +               return -EINVAL;
> +
> +       return 0;
> +}
> +
> +static void lima_gp_task_run(struct lima_sched_pipe *pipe,
> +                            struct lima_sched_task *task)
> +{
> +       struct lima_ip *ip = pipe->processor[0];
> +       struct drm_lima_gp_frame *frame = task->frame;
> +       u32 *f = frame->frame;
> +       u32 cmd = 0;
> +       int i;
> +
> +       if (f[LIMA_GP_VSCL_START_ADDR >> 2] !=
> +           f[LIMA_GP_VSCL_END_ADDR >> 2])
> +               cmd |= LIMA_GP_CMD_START_VS;
> +       if (f[LIMA_GP_PLBUCL_START_ADDR >> 2] !=
> +           f[LIMA_GP_PLBUCL_END_ADDR >> 2])
> +               cmd |= LIMA_GP_CMD_START_PLBU;
> +
> +       /* before any hw ops, wait last success task async soft reset */
> +       lima_gp_soft_reset_async_wait(ip);
> +
> +       for (i = 0; i < LIMA_GP_FRAME_REG_NUM; i++)
> +               writel(f[i], ip->iomem + LIMA_GP_VSCL_START_ADDR + i * 4);
> +
> +       gp_write(LIMA_GP_CMD, LIMA_GP_CMD_UPDATE_PLBU_ALLOC);
> +       gp_write(LIMA_GP_CMD, cmd);
> +}
> +
> +static int lima_gp_hard_reset_poll(struct lima_ip *ip)
> +{
> +       gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0xC01A0000);
> +       return gp_read(LIMA_GP_PERF_CNT_0_LIMIT) == 0xC01A0000;
> +}
> +
> +static int lima_gp_hard_reset(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +       int ret;
> +
> +       gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0xC0FFE000);
> +       gp_write(LIMA_GP_INT_MASK, 0);
> +       gp_write(LIMA_GP_CMD, LIMA_GP_CMD_RESET);
> +       ret = lima_poll_timeout(ip, lima_gp_hard_reset_poll, 10, 100);
> +       if (ret) {
> +               dev_err(dev->dev, "gp hard reset timeout\n");
> +               return ret;
> +       }
> +
> +       gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0);
> +       gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_MASK_ALL);
> +       gp_write(LIMA_GP_INT_MASK, LIMA_GP_IRQ_MASK_USED);
> +       return 0;
> +}
> +
> +static void lima_gp_task_fini(struct lima_sched_pipe *pipe)
> +{
> +       lima_gp_soft_reset_async(pipe->processor[0]);
> +}
> +
> +static void lima_gp_task_error(struct lima_sched_pipe *pipe)
> +{
> +       struct lima_ip *ip = pipe->processor[0];
> +
> +       dev_err(ip->dev->dev, "gp task error int_state=%x status=%x\n",
> +               gp_read(LIMA_GP_INT_STAT), gp_read(LIMA_GP_STATUS));
> +
> +       lima_gp_hard_reset(ip);
> +}
> +
> +static void lima_gp_task_mmu_error(struct lima_sched_pipe *pipe)
> +{
> +       lima_sched_pipe_task_done(pipe);
> +}
> +
> +static void lima_gp_print_version(struct lima_ip *ip)
> +{
> +       u32 version, major, minor;
> +       char *name;
> +
> +       version = gp_read(LIMA_GP_VERSION);
> +       major = (version >> 8) & 0xFF;
> +       minor = version & 0xFF;
> +       switch (version >> 16) {
> +       case 0xA07:
> +           name = "mali200";
> +               break;
> +       case 0xC07:
> +               name = "mali300";
> +               break;
> +       case 0xB07:
> +               name = "mali400";
> +               break;
> +       case 0xD07:
> +               name = "mali450";
> +               break;
> +       default:
> +               name = "unknow";
> +               break;
> +       }
> +       dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
> +                lima_ip_name(ip), name, major, minor);
> +}
> +
> +static struct kmem_cache *lima_gp_task_slab = NULL;
> +static int lima_gp_task_slab_refcnt = 0;
> +
> +int lima_gp_init(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +       int err;
> +
> +       lima_gp_print_version(ip);
> +
> +       ip->data.async_reset = false;
> +       lima_gp_soft_reset_async(ip);
> +       err = lima_gp_soft_reset_async_wait(ip);
> +       if (err)
> +               return err;
> +
> +       err = devm_request_irq(dev->dev, ip->irq, lima_gp_irq_handler,
> +                              IRQF_SHARED, lima_ip_name(ip), ip);
> +       if (err) {
> +               dev_err(dev->dev, "gp %s fail to request irq\n",
> +                       lima_ip_name(ip));
> +               return err;
> +       }
> +
> +       return 0;
> +}
> +
> +void lima_gp_fini(struct lima_ip *ip)
> +{
> +
> +}
> +
> +int lima_gp_pipe_init(struct lima_device *dev)
> +{
> +       int frame_size = sizeof(struct drm_lima_gp_frame);
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
> +
> +       if (!lima_gp_task_slab) {
> +               lima_gp_task_slab = kmem_cache_create_usercopy(
> +                       "lima_gp_task", sizeof(struct lima_sched_task) + frame_size,
> +                       0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
> +                       frame_size, NULL);
> +               if (!lima_gp_task_slab)
> +                       return -ENOMEM;
> +       }
> +       lima_gp_task_slab_refcnt++;
> +
> +       pipe->frame_size = frame_size;
> +       pipe->task_slab = lima_gp_task_slab;
> +
> +       pipe->task_validate = lima_gp_task_validate;
> +       pipe->task_run = lima_gp_task_run;
> +       pipe->task_fini = lima_gp_task_fini;
> +       pipe->task_error = lima_gp_task_error;
> +       pipe->task_mmu_error = lima_gp_task_mmu_error;
> +
> +       return 0;
> +}
> +
> +void lima_gp_pipe_fini(struct lima_device *dev)
> +{
> +       if (!--lima_gp_task_slab_refcnt) {
> +               kmem_cache_destroy(lima_gp_task_slab);
> +               lima_gp_task_slab = NULL;
> +       }
> +}
> diff --git a/drivers/gpu/drm/lima/lima_gp.h b/drivers/gpu/drm/lima/lima_gp.h
> new file mode 100644
> index 000000000000..55bc48ec7603
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_gp.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_GP_H__
> +#define __LIMA_GP_H__
> +
> +struct lima_ip;
> +struct lima_device;
> +
> +int lima_gp_init(struct lima_ip *ip);
> +void lima_gp_fini(struct lima_ip *ip);
> +
> +int lima_gp_pipe_init(struct lima_device *dev);
> +void lima_gp_pipe_fini(struct lima_device *dev);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_l2_cache.c b/drivers/gpu/drm/lima/lima_l2_cache.c
> new file mode 100644
> index 000000000000..2ba4786f9ec7
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_l2_cache.c
> @@ -0,0 +1,80 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <linux/iopoll.h>
> +#include <linux/device.h>
> +
> +#include "lima_device.h"
> +#include "lima_l2_cache.h"
> +#include "lima_regs.h"
> +
> +#define l2_cache_write(reg, data) writel(data, ip->iomem + reg)
> +#define l2_cache_read(reg) readl(ip->iomem + reg)
> +
> +static int lima_l2_cache_wait_idle(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +       int err;
> +       u32 v;
> +
> +       err = readl_poll_timeout(ip->iomem + LIMA_L2_CACHE_STATUS, v,
> +                                !(v & LIMA_L2_CACHE_STATUS_COMMAND_BUSY),
> +                                0, 1000);
> +       if (err) {
> +           dev_err(dev->dev, "l2 cache wait command timeout\n");
> +           return err;
> +       }
> +       return 0;
> +}
> +
> +int lima_l2_cache_flush(struct lima_ip *ip)
> +{
> +       int ret;
> +
> +       spin_lock(&ip->data.lock);
> +       l2_cache_write(LIMA_L2_CACHE_COMMAND, LIMA_L2_CACHE_COMMAND_CLEAR_ALL);
> +       ret = lima_l2_cache_wait_idle(ip);
> +       spin_unlock(&ip->data.lock);
> +       return ret;
> +}
> +
> +int lima_l2_cache_init(struct lima_ip *ip)
> +{
> +       int i, err;
> +       u32 size;
> +       struct lima_device *dev = ip->dev;
> +
> +       /* l2_cache2 only exists when one of PP4-7 present */
> +       if (ip->id == lima_ip_l2_cache2) {
> +               for (i = lima_ip_pp4; i <= lima_ip_pp7; i++) {
> +                       if (dev->ip[i].present)
> +                               break;
> +               }
> +               if (i > lima_ip_pp7)
> +                       return -ENODEV;
> +       }
> +
> +       spin_lock_init(&ip->data.lock);
> +
> +       size = l2_cache_read(LIMA_L2_CACHE_SIZE);
> +       dev_info(dev->dev, "l2 cache %uK, %u-way, %ubyte cache line, %ubit external bus\n",
> +                1 << (((size >> 16) & 0xff) - 10),
> +                1 << ((size >> 8) & 0xff),
> +                1 << (size & 0xff),
> +                1 << ((size >> 24) & 0xff));
> +
> +       err = lima_l2_cache_flush(ip);
> +       if (err)
> +               return err;
> +
> +       l2_cache_write(LIMA_L2_CACHE_ENABLE,
> +                      LIMA_L2_CACHE_ENABLE_ACCESS|LIMA_L2_CACHE_ENABLE_READ_ALLOCATE);
> +       l2_cache_write(LIMA_L2_CACHE_MAX_READS, 0x1c);
> +
> +       return 0;
> +}
> +
> +void lima_l2_cache_fini(struct lima_ip *ip)
> +{
> +
> +}
> diff --git a/drivers/gpu/drm/lima/lima_l2_cache.h b/drivers/gpu/drm/lima/lima_l2_cache.h
> new file mode 100644
> index 000000000000..2ff91eafefbe
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_l2_cache.h
> @@ -0,0 +1,14 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_L2_CACHE_H__
> +#define __LIMA_L2_CACHE_H__
> +
> +struct lima_ip;
> +
> +int lima_l2_cache_init(struct lima_ip *ip);
> +void lima_l2_cache_fini(struct lima_ip *ip);
> +
> +int lima_l2_cache_flush(struct lima_ip *ip);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_mmu.c b/drivers/gpu/drm/lima/lima_mmu.c
> new file mode 100644
> index 000000000000..c6c151d33cf8
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_mmu.c
> @@ -0,0 +1,142 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <linux/interrupt.h>
> +#include <linux/iopoll.h>
> +#include <linux/device.h>
> +
> +#include "lima_device.h"
> +#include "lima_mmu.h"
> +#include "lima_vm.h"
> +#include "lima_object.h"
> +#include "lima_regs.h"
> +
> +#define mmu_write(reg, data) writel(data, ip->iomem + reg)
> +#define mmu_read(reg) readl(ip->iomem + reg)
> +
> +#define lima_mmu_send_command(cmd, addr, val, cond)         \
> +({                                                          \
> +       int __ret;                                           \
> +                                                            \
> +       mmu_write(LIMA_MMU_COMMAND, cmd);                    \
> +       __ret = readl_poll_timeout(ip->iomem + (addr), val,  \
> +                                 cond, 0, 100);             \
> +       if (__ret)                                           \
> +               dev_err(dev->dev,                            \
> +                       "mmu command %x timeout\n", cmd);    \
> +       __ret;                                               \
> +})
> +
> +static irqreturn_t lima_mmu_irq_handler(int irq, void *data)
> +{
> +       struct lima_ip *ip = data;
> +       struct lima_device *dev = ip->dev;
> +       u32 status = mmu_read(LIMA_MMU_INT_STATUS);
> +       struct lima_sched_pipe *pipe;
> +
> +       /* for shared irq case */
> +       if (!status)
> +               return IRQ_NONE;
> +
> +       if (status & LIMA_MMU_INT_PAGE_FAULT) {
> +               u32 fault = mmu_read(LIMA_MMU_PAGE_FAULT_ADDR);
> +               dev_err(dev->dev, "mmu page fault at 0x%x from bus id %d of type %s on %s\n",
> +                       fault, LIMA_MMU_STATUS_BUS_ID(status),
> +                       status & LIMA_MMU_STATUS_PAGE_FAULT_IS_WRITE ? "write" : "read",
> +                       lima_ip_name(ip));
> +       }
> +
> +       if (status & LIMA_MMU_INT_READ_BUS_ERROR) {
> +               dev_err(dev->dev, "mmu %s irq bus error\n", lima_ip_name(ip));
> +       }
> +
> +       /* mask all interrupts before resume */
> +       mmu_write(LIMA_MMU_INT_MASK, 0);
> +       mmu_write(LIMA_MMU_INT_CLEAR, status);
> +
> +       pipe = dev->pipe + (ip->id == lima_ip_gpmmu ? lima_pipe_gp : lima_pipe_pp);
> +       lima_sched_pipe_mmu_error(pipe);
> +
> +       return IRQ_HANDLED;
> +}
> +
> +int lima_mmu_init(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +       int err;
> +       u32 v;
> +
> +       if (ip->id == lima_ip_ppmmu_bcast)
> +               return 0;
> +
> +       mmu_write(LIMA_MMU_DTE_ADDR, 0xCAFEBABE);
> +       if (mmu_read(LIMA_MMU_DTE_ADDR) != 0xCAFEB000) {
> +               dev_err(dev->dev, "mmu %s dte write test fail\n", lima_ip_name(ip));
> +               return -EIO;
> +       }
> +
> +       mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_HARD_RESET);
> +       err = lima_mmu_send_command(LIMA_MMU_COMMAND_HARD_RESET,
> +                                   LIMA_MMU_DTE_ADDR, v, v == 0);
> +       if (err)
> +               return err;
> +
> +       err = devm_request_irq(dev->dev, ip->irq, lima_mmu_irq_handler,
> +                              IRQF_SHARED, lima_ip_name(ip), ip);
> +       if (err) {
> +               dev_err(dev->dev, "mmu %s fail to request irq\n", lima_ip_name(ip));
> +               return err;
> +       }
> +
> +       mmu_write(LIMA_MMU_INT_MASK, LIMA_MMU_INT_PAGE_FAULT | LIMA_MMU_INT_READ_BUS_ERROR);
> +       mmu_write(LIMA_MMU_DTE_ADDR, dev->empty_vm->pd.dma);
> +       return lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_PAGING,
> +                                    LIMA_MMU_STATUS, v,
> +                                    v & LIMA_MMU_STATUS_PAGING_ENABLED);
> +}
> +
> +void lima_mmu_fini(struct lima_ip *ip)
> +{
> +
> +}
> +
> +void lima_mmu_switch_vm(struct lima_ip *ip, struct lima_vm *vm)
> +{
> +       struct lima_device *dev = ip->dev;
> +       u32 v;
> +
> +       lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_STALL,
> +                             LIMA_MMU_STATUS, v,
> +                             v & LIMA_MMU_STATUS_STALL_ACTIVE);
> +
> +       if (vm)
> +               mmu_write(LIMA_MMU_DTE_ADDR, vm->pd.dma);
> +
> +       /* flush the TLB */
> +       mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_ZAP_CACHE);
> +
> +       lima_mmu_send_command(LIMA_MMU_COMMAND_DISABLE_STALL,
> +                             LIMA_MMU_STATUS, v,
> +                             !(v & LIMA_MMU_STATUS_STALL_ACTIVE));
> +}
> +
> +void lima_mmu_page_fault_resume(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +       u32 status = mmu_read(LIMA_MMU_STATUS);
> +       u32 v;
> +
> +       if (status & LIMA_MMU_STATUS_PAGE_FAULT_ACTIVE) {
> +               dev_info(dev->dev, "mmu resume\n");
> +
> +               mmu_write(LIMA_MMU_INT_MASK, 0);
> +               mmu_write(LIMA_MMU_DTE_ADDR, 0xCAFEBABE);
> +               lima_mmu_send_command(LIMA_MMU_COMMAND_HARD_RESET,
> +                                     LIMA_MMU_DTE_ADDR, v, v == 0);
> +               mmu_write(LIMA_MMU_INT_MASK, LIMA_MMU_INT_PAGE_FAULT | LIMA_MMU_INT_READ_BUS_ERROR);
> +               mmu_write(LIMA_MMU_DTE_ADDR, dev->empty_vm->pd.dma);
> +               lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_PAGING,
> +                                     LIMA_MMU_STATUS, v,
> +                                     v & LIMA_MMU_STATUS_PAGING_ENABLED);
> +       }
> +}
> diff --git a/drivers/gpu/drm/lima/lima_mmu.h b/drivers/gpu/drm/lima/lima_mmu.h
> new file mode 100644
> index 000000000000..ca173b60fc73
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_mmu.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_MMU_H__
> +#define __LIMA_MMU_H__
> +
> +struct lima_ip;
> +struct lima_vm;
> +
> +int lima_mmu_init(struct lima_ip *ip);
> +void lima_mmu_fini(struct lima_ip *ip);
> +
> +void lima_mmu_switch_vm(struct lima_ip *ip, struct lima_vm *vm);
> +void lima_mmu_page_fault_resume(struct lima_ip *ip);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_object.c b/drivers/gpu/drm/lima/lima_object.c
> new file mode 100644
> index 000000000000..28ff1b8e1dca
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_object.c
> @@ -0,0 +1,124 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <drm/drm_prime.h>
> +#include <linux/pagemap.h>
> +#include <linux/dma-mapping.h>
> +
> +#include "lima_object.h"
> +
> +void lima_bo_destroy(struct lima_bo *bo)
> +{
> +        if (bo->sgt) {
> +               kfree(bo->pages);
> +               drm_prime_gem_destroy(&bo->gem, bo->sgt);
> +       }
> +       else {
> +               if (bo->pages_dma_addr) {
> +                       int i, npages = bo->gem.size >> PAGE_SHIFT;
> +
> +                       for (i = 0; i < npages; i++) {
> +                               if (bo->pages_dma_addr[i])
> +                                       dma_unmap_page(bo->gem.dev->dev,
> +                                                      bo->pages_dma_addr[i],
> +                                                      PAGE_SIZE, DMA_BIDIRECTIONAL);
> +                       }
> +               }
> +
> +               if (bo->pages)
> +                       drm_gem_put_pages(&bo->gem, bo->pages, true, true);
> +       }
> +
> +       kfree(bo->pages_dma_addr);
> +       drm_gem_object_release(&bo->gem);
> +       kfree(bo);
> +}
> +
> +static struct lima_bo *lima_bo_create_struct(struct lima_device *dev, u32 size, u32 flags,
> +                                            struct reservation_object *resv)
> +{
> +       struct lima_bo *bo;
> +       int err;
> +
> +       size = PAGE_ALIGN(size);
> +
> +       bo = kzalloc(sizeof(*bo), GFP_KERNEL);
> +       if (!bo)
> +               return ERR_PTR(-ENOMEM);
> +
> +       mutex_init(&bo->lock);
> +       INIT_LIST_HEAD(&bo->va);
> +       bo->gem.resv = resv;
> +
> +       err = drm_gem_object_init(dev->ddev, &bo->gem, size);
> +       if (err) {
> +               kfree(bo);
> +               return ERR_PTR(err);
> +       }
> +
> +       return bo;
> +}
> +
> +struct lima_bo *lima_bo_create(struct lima_device *dev, u32 size,
> +                              u32 flags, struct sg_table *sgt,
> +                              struct reservation_object *resv)
> +{
> +       int i, err;
> +       size_t npages;
> +       struct lima_bo *bo, *ret;
> +
> +       bo = lima_bo_create_struct(dev, size, flags, resv);
> +       if (IS_ERR(bo))
> +               return bo;
> +
> +       npages = bo->gem.size >> PAGE_SHIFT;
> +
> +       bo->pages_dma_addr = kzalloc(npages * sizeof(dma_addr_t), GFP_KERNEL);
> +       if (!bo->pages_dma_addr) {
> +               ret = ERR_PTR(-ENOMEM);
> +               goto err_out;
> +       }
> +
> +       if (sgt) {
> +               bo->sgt = sgt;
> +
> +               bo->pages = kzalloc(npages * sizeof(*bo->pages), GFP_KERNEL);
> +               if (!bo->pages) {
> +                       ret = ERR_PTR(-ENOMEM);
> +                       goto err_out;
> +               }
> +
> +               err = drm_prime_sg_to_page_addr_arrays(
> +                       sgt, bo->pages, bo->pages_dma_addr, npages);
> +               if (err) {
> +                       ret = ERR_PTR(err);
> +                       goto err_out;
> +               }
> +       }
> +       else {
> +               mapping_set_gfp_mask(bo->gem.filp->f_mapping, GFP_DMA32);
> +               bo->pages = drm_gem_get_pages(&bo->gem);
> +               if (IS_ERR(bo->pages)) {
> +                       ret = ERR_CAST(bo->pages);
> +                       bo->pages = NULL;
> +                       goto err_out;
> +               }
> +
> +               for (i = 0; i < npages; i++) {
> +                       dma_addr_t addr = dma_map_page(dev->dev, bo->pages[i], 0,
> +                                                      PAGE_SIZE, DMA_BIDIRECTIONAL);
> +                       if (dma_mapping_error(dev->dev, addr)) {
> +                               ret = ERR_PTR(-EFAULT);
> +                               goto err_out;
> +                       }
> +                       bo->pages_dma_addr[i] = addr;
> +               }
> +
> +       }
> +
> +       return bo;
> +
> +err_out:
> +       lima_bo_destroy(bo);
> +       return ret;
> +}
> diff --git a/drivers/gpu/drm/lima/lima_object.h b/drivers/gpu/drm/lima/lima_object.h
> new file mode 100644
> index 000000000000..70099f1045ac
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_object.h
> @@ -0,0 +1,36 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_OBJECT_H__
> +#define __LIMA_OBJECT_H__
> +
> +#include <drm/drm_gem.h>
> +
> +#include "lima_device.h"
> +
> +struct lima_bo {
> +       struct drm_gem_object gem;
> +
> +       struct page **pages;
> +       dma_addr_t *pages_dma_addr;
> +       struct sg_table *sgt;
> +       void *vaddr;
> +
> +       struct mutex lock;
> +       struct list_head va;
> +};
> +
> +static inline struct lima_bo *
> +to_lima_bo(struct drm_gem_object *obj)
> +{
> +       return container_of(obj, struct lima_bo, gem);
> +}
> +
> +struct lima_bo *lima_bo_create(struct lima_device *dev, u32 size,
> +                              u32 flags, struct sg_table *sgt,
> +                              struct reservation_object *resv);
> +void lima_bo_destroy(struct lima_bo *bo);
> +void *lima_bo_vmap(struct lima_bo *bo);
> +void lima_bo_vunmap(struct lima_bo *bo);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_pmu.c b/drivers/gpu/drm/lima/lima_pmu.c
> new file mode 100644
> index 000000000000..3c50524b70a7
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_pmu.c
> @@ -0,0 +1,59 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <linux/iopoll.h>
> +#include <linux/device.h>
> +
> +#include "lima_device.h"
> +#include "lima_pmu.h"
> +#include "lima_regs.h"
> +
> +#define pmu_write(reg, data) writel(data, ip->iomem + reg)
> +#define pmu_read(reg) readl(ip->iomem + reg)
> +
> +static int lima_pmu_wait_cmd(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +       int err;
> +       u32 v;
> +
> +       err = readl_poll_timeout(ip->iomem + LIMA_PMU_INT_RAWSTAT,
> +                                v, v & LIMA_PMU_INT_CMD_MASK,
> +                                100, 100000);
> +       if (err) {
> +               dev_err(dev->dev, "timeout wait pmd cmd\n");
> +               return err;
> +       }
> +
> +       pmu_write(LIMA_PMU_INT_CLEAR, LIMA_PMU_INT_CMD_MASK);
> +       return 0;
> +}
> +
> +int lima_pmu_init(struct lima_ip *ip)
> +{
> +       int err;
> +       u32 stat;
> +
> +       pmu_write(LIMA_PMU_INT_MASK, 0);
> +
> +       /* If this value is too low, when in high GPU clk freq,
> +        * GPU will be in unstable state. */
> +       pmu_write(LIMA_PMU_SW_DELAY, 0xffff);
> +
> +       /* status reg 1=off 0=on */
> +       stat = pmu_read(LIMA_PMU_STATUS);
> +
> +       /* power up all ip */
> +       if (stat) {
> +               pmu_write(LIMA_PMU_POWER_UP, stat);
> +               err = lima_pmu_wait_cmd(ip);
> +               if (err)
> +                       return err;
> +       }
> +       return 0;
> +}
> +
> +void lima_pmu_fini(struct lima_ip *ip)
> +{
> +
> +}
> diff --git a/drivers/gpu/drm/lima/lima_pmu.h b/drivers/gpu/drm/lima/lima_pmu.h
> new file mode 100644
> index 000000000000..1cf94a35bdf9
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_pmu.h
> @@ -0,0 +1,12 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_PMU_H__
> +#define __LIMA_PMU_H__
> +
> +struct lima_ip;
> +
> +int lima_pmu_init(struct lima_ip *ip);
> +void lima_pmu_fini(struct lima_ip *ip);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
> new file mode 100644
> index 000000000000..7b36c29eee89
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_pp.c
> @@ -0,0 +1,423 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/device.h>
> +#include <linux/slab.h>
> +
> +#include <drm/lima_drm.h>
> +
> +#include "lima_device.h"
> +#include "lima_pp.h"
> +#include "lima_dlbu.h"
> +#include "lima_bcast.h"
> +#include "lima_vm.h"
> +#include "lima_regs.h"
> +
> +#define pp_write(reg, data) writel(data, ip->iomem + reg)
> +#define pp_read(reg) readl(ip->iomem + reg)
> +
> +static void lima_pp_handle_irq(struct lima_ip *ip, u32 state)
> +{
> +       struct lima_device *dev = ip->dev;
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> +
> +       if (state & LIMA_PP_IRQ_MASK_ERROR) {
> +               u32 status = pp_read(LIMA_PP_STATUS);
> +
> +               dev_err(dev->dev, "pp error irq state=%x status=%x\n",
> +                       state, status);
> +
> +               pipe->error = true;
> +
> +               /* mask all interrupts before hard reset */
> +               pp_write(LIMA_PP_INT_MASK, 0);
> +       }
> +
> +       pp_write(LIMA_PP_INT_CLEAR, state);
> +}
> +
> +static irqreturn_t lima_pp_irq_handler(int irq, void *data)
> +{
> +       struct lima_ip *ip = data;
> +       struct lima_device *dev = ip->dev;
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> +       u32 state = pp_read(LIMA_PP_INT_STATUS);
> +
> +       /* for shared irq case */
> +       if (!state)
> +               return IRQ_NONE;
> +
> +       lima_pp_handle_irq(ip, state);
> +
> +       if (atomic_dec_and_test(&pipe->task))
> +               lima_sched_pipe_task_done(pipe);
> +
> +       return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t lima_pp_bcast_irq_handler(int irq, void *data)
> +{
> +       int i;
> +       irqreturn_t ret = IRQ_NONE;
> +       struct lima_ip *pp_bcast = data;
> +       struct lima_device *dev = pp_bcast->dev;
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> +       struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
> +
> +       for (i = 0; i < frame->num_pp; i++) {
> +               struct lima_ip *ip = pipe->processor[i];
> +               u32 status, state;
> +
> +               if (pipe->done & (1 << i))
> +                       continue;
> +
> +               /* status read first in case int state change in the middle
> +                * which may miss the interrupt handling */
> +               status = pp_read(LIMA_PP_STATUS);
> +               state = pp_read(LIMA_PP_INT_STATUS);
> +
> +               if (state) {
> +                       lima_pp_handle_irq(ip, state);
> +                       ret = IRQ_HANDLED;
> +               }
> +               else {
> +                       if (status & LIMA_PP_STATUS_RENDERING_ACTIVE)
> +                               continue;
> +               }
> +
> +               pipe->done |= (1 << i);
> +               if (atomic_dec_and_test(&pipe->task))
> +                       lima_sched_pipe_task_done(pipe);
> +       }
> +
> +       return ret;
> +}
> +
> +static void lima_pp_soft_reset_async(struct lima_ip *ip)
> +{
> +       if (ip->data.async_reset)
> +               return;
> +
> +       pp_write(LIMA_PP_INT_MASK, 0);
> +       pp_write(LIMA_PP_INT_RAWSTAT, LIMA_PP_IRQ_MASK_ALL);
> +       pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_SOFT_RESET);
> +       ip->data.async_reset = true;
> +}
> +
> +static int lima_pp_soft_reset_poll(struct lima_ip *ip)
> +{
> +       return !(pp_read(LIMA_PP_STATUS) & LIMA_PP_STATUS_RENDERING_ACTIVE) &&
> +               pp_read(LIMA_PP_INT_RAWSTAT) == LIMA_PP_IRQ_RESET_COMPLETED;
> +}
> +
> +static int lima_pp_soft_reset_async_wait_one(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +       int ret;
> +
> +       ret = lima_poll_timeout(ip, lima_pp_soft_reset_poll, 0, 100);
> +       if (ret) {
> +               dev_err(dev->dev, "pp %s reset time out\n", lima_ip_name(ip));
> +               return ret;
> +       }
> +
> +       pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
> +       pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
> +       return 0;
> +}
> +
> +static int lima_pp_soft_reset_async_wait(struct lima_ip *ip)
> +{
> +       int i, err = 0;
> +
> +       if (!ip->data.async_reset)
> +               return 0;
> +
> +       if (ip->id == lima_ip_pp_bcast) {
> +               struct lima_device *dev = ip->dev;
> +               struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> +               struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
> +
> +               for (i = 0; i < frame->num_pp; i++)
> +                       err |= lima_pp_soft_reset_async_wait_one(pipe->processor[i]);
> +       }
> +       else
> +               err = lima_pp_soft_reset_async_wait_one(ip);
> +
> +       ip->data.async_reset = false;
> +       return err;
> +}
> +
> +static void lima_pp_write_frame(struct lima_ip *ip, u32 *frame, u32 *wb)
> +{
> +       int i, j, n = 0;
> +
> +       for (i = 0; i < LIMA_PP_FRAME_REG_NUM; i++)
> +               writel(frame[i], ip->iomem + LIMA_PP_FRAME + i * 4);
> +
> +       for (i = 0; i < 3; i++) {
> +               for (j = 0; j < LIMA_PP_WB_REG_NUM; j++)
> +                       writel(wb[n++], ip->iomem + LIMA_PP_WB(i) + j * 4);
> +       }
> +}
> +
> +static int lima_pp_hard_reset_poll(struct lima_ip *ip)
> +{
> +       pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC01A0000);
> +       return pp_read(LIMA_PP_PERF_CNT_0_LIMIT) == 0xC01A0000;
> +}
> +
> +static int lima_pp_hard_reset(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +       int ret;
> +
> +       pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC0FFE000);
> +       pp_write(LIMA_PP_INT_MASK, 0);
> +       pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_FORCE_RESET);
> +       ret = lima_poll_timeout(ip, lima_pp_hard_reset_poll, 10, 100);
> +       if (ret) {
> +               dev_err(dev->dev, "pp hard reset timeout\n");
> +               return ret;
> +       }
> +
> +       pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0);
> +       pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
> +       pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
> +       return 0;
> +}
> +
> +static void lima_pp_print_version(struct lima_ip *ip)
> +{
> +       u32 version, major, minor;
> +       char *name;
> +
> +       version = pp_read(LIMA_PP_VERSION);
> +       major = (version >> 8) & 0xFF;
> +       minor = version & 0xFF;
> +       switch (version >> 16) {
> +       case 0xC807:
> +           name = "mali200";
> +               break;
> +       case 0xCE07:
> +               name = "mali300";
> +               break;
> +       case 0xCD07:
> +               name = "mali400";
> +               break;
> +       case 0xCF07:
> +               name = "mali450";
> +               break;
> +       default:
> +               name = "unknow";
> +               break;
> +       }
> +       dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
> +                lima_ip_name(ip), name, major, minor);
> +}
> +
> +int lima_pp_init(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +       int err;
> +
> +       lima_pp_print_version(ip);
> +
> +       ip->data.async_reset = false;
> +       lima_pp_soft_reset_async(ip);
> +       err = lima_pp_soft_reset_async_wait(ip);
> +       if (err)
> +               return err;
> +
> +       err = devm_request_irq(dev->dev, ip->irq, lima_pp_irq_handler,
> +                              IRQF_SHARED, lima_ip_name(ip), ip);
> +       if (err) {
> +               dev_err(dev->dev, "pp %s fail to request irq\n",
> +                       lima_ip_name(ip));
> +               return err;
> +       }
> +
> +       return 0;
> +}
> +
> +void lima_pp_fini(struct lima_ip *ip)
> +{
> +
> +}
> +
> +int lima_pp_bcast_init(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +       int err;
> +
> +       err = devm_request_irq(dev->dev, ip->irq, lima_pp_bcast_irq_handler,
> +                              IRQF_SHARED, lima_ip_name(ip), ip);
> +       if (err) {
> +               dev_err(dev->dev, "pp %s fail to request irq\n",
> +                       lima_ip_name(ip));
> +               return err;
> +       }
> +
> +       return 0;
> +}
> +
> +void lima_pp_bcast_fini(struct lima_ip *ip)
> +{
> +
> +}
> +
> +static int lima_pp_task_validate(struct lima_sched_pipe *pipe,
> +                                struct lima_sched_task *task)
> +{
> +       u32 num_pp;
> +
> +       if (pipe->bcast_processor) {
> +               struct drm_lima_m450_pp_frame *f = task->frame;
> +               num_pp = f->num_pp;
> +       }
> +       else {
> +               struct drm_lima_m400_pp_frame *f = task->frame;
> +               num_pp = f->num_pp;
> +       }
> +
> +       if (num_pp == 0 || num_pp > pipe->num_processor)
> +               return -EINVAL;
> +
> +       return 0;
> +}
> +
> +static void lima_pp_task_run(struct lima_sched_pipe *pipe,
> +                            struct lima_sched_task *task)
> +{
> +       if (pipe->bcast_processor) {
> +               struct drm_lima_m450_pp_frame *frame = task->frame;
> +               struct lima_device *dev = pipe->bcast_processor->dev;
> +               struct lima_ip *ip = pipe->bcast_processor;
> +               int i;
> +
> +               pipe->done = 0;
> +               atomic_set(&pipe->task, frame->num_pp);
> +
> +               if (frame->use_dlbu) {
> +                       lima_dlbu_enable(dev, frame->num_pp);
> +
> +                       frame->frame[LIMA_PP_FRAME >> 2] = LIMA_VA_RESERVE_DLBU;
> +                       lima_dlbu_set_reg(dev->ip + lima_ip_dlbu, frame->dlbu_regs);
> +               }
> +               else
> +                       lima_dlbu_disable(dev);
> +
> +               lima_bcast_enable(dev, frame->num_pp);
> +
> +               lima_pp_soft_reset_async_wait(ip);
> +
> +               lima_pp_write_frame(ip, frame->frame, frame->wb);
> +
> +               for (i = 0; i < frame->num_pp; i++) {
> +                       struct lima_ip *ip = pipe->processor[i];
> +
> +                       pp_write(LIMA_PP_STACK, frame->fragment_stack_address[i]);
> +                       if (!frame->use_dlbu)
> +                               pp_write(LIMA_PP_FRAME, frame->plbu_array_address[i]);
> +               }
> +
> +               pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
> +       }
> +       else {
> +               struct drm_lima_m400_pp_frame *frame = task->frame;
> +               int i;
> +
> +               atomic_set(&pipe->task, frame->num_pp);
> +
> +               for (i = 0; i < frame->num_pp; i++) {
> +                       struct lima_ip *ip = pipe->processor[i];
> +
> +                       frame->frame[LIMA_PP_FRAME >> 2] =
> +                               frame->plbu_array_address[i];
> +                       frame->frame[LIMA_PP_STACK >> 2] =
> +                               frame->fragment_stack_address[i];
> +
> +                       lima_pp_soft_reset_async_wait(ip);
> +
> +                       lima_pp_write_frame(ip, frame->frame, frame->wb);
> +
> +                       pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
> +               }
> +       }
> +}
> +
> +static void lima_pp_task_fini(struct lima_sched_pipe *pipe)
> +{
> +       if (pipe->bcast_processor)
> +               lima_pp_soft_reset_async(pipe->bcast_processor);
> +       else {
> +               int i;
> +               for (i = 0; i < pipe->num_processor; i++)
> +                       lima_pp_soft_reset_async(pipe->processor[i]);
> +       }
> +}
> +
> +static void lima_pp_task_error(struct lima_sched_pipe *pipe)
> +{
> +       int i;
> +
> +       for (i = 0; i < pipe->num_processor; i++) {
> +               struct lima_ip *ip = pipe->processor[i];
> +
> +               dev_err(ip->dev->dev, "pp task error %d int_state=%x status=%x\n",
> +                       i, pp_read(LIMA_PP_INT_STATUS), pp_read(LIMA_PP_STATUS));
> +
> +               lima_pp_hard_reset(ip);
> +       }
> +}
> +
> +static void lima_pp_task_mmu_error(struct lima_sched_pipe *pipe)
> +{
> +       if (atomic_dec_and_test(&pipe->task))
> +               lima_sched_pipe_task_done(pipe);
> +}
> +
> +static struct kmem_cache *lima_pp_task_slab = NULL;
> +static int lima_pp_task_slab_refcnt = 0;
> +
> +int lima_pp_pipe_init(struct lima_device *dev)
> +{
> +       int frame_size;
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> +
> +       if (dev->id == lima_gpu_mali400)
> +               frame_size = sizeof(struct drm_lima_m400_pp_frame);
> +       else
> +               frame_size = sizeof(struct drm_lima_m450_pp_frame);
> +
> +       if (!lima_pp_task_slab) {
> +               lima_pp_task_slab = kmem_cache_create_usercopy(
> +                       "lima_pp_task", sizeof(struct lima_sched_task) + frame_size,
> +                       0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
> +                       frame_size, NULL);
> +               if (!lima_pp_task_slab)
> +                       return -ENOMEM;
> +       }
> +       lima_pp_task_slab_refcnt++;
> +
> +       pipe->frame_size = frame_size;
> +       pipe->task_slab = lima_pp_task_slab;
> +
> +       pipe->task_validate = lima_pp_task_validate;
> +       pipe->task_run = lima_pp_task_run;
> +       pipe->task_fini = lima_pp_task_fini;
> +       pipe->task_error = lima_pp_task_error;
> +       pipe->task_mmu_error = lima_pp_task_mmu_error;
> +
> +       return 0;
> +}
> +
> +void lima_pp_pipe_fini(struct lima_device *dev)
> +{
> +       if (!--lima_pp_task_slab_refcnt) {
> +               kmem_cache_destroy(lima_pp_task_slab);
> +               lima_pp_task_slab = NULL;
> +       }
> +}
> diff --git a/drivers/gpu/drm/lima/lima_pp.h b/drivers/gpu/drm/lima/lima_pp.h
> new file mode 100644
> index 000000000000..f83f8cb4d30a
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_pp.h
> @@ -0,0 +1,19 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_PP_H__
> +#define __LIMA_PP_H__
> +
> +struct lima_ip;
> +struct lima_device;
> +
> +int lima_pp_init(struct lima_ip *ip);
> +void lima_pp_fini(struct lima_ip *ip);
> +
> +int lima_pp_bcast_init(struct lima_ip *ip);
> +void lima_pp_bcast_fini(struct lima_ip *ip);
> +
> +int lima_pp_pipe_init(struct lima_device *dev);
> +void lima_pp_pipe_fini(struct lima_device *dev);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_regs.h b/drivers/gpu/drm/lima/lima_regs.h
> new file mode 100644
> index 000000000000..d5ade8fc8901
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_regs.h
> @@ -0,0 +1,298 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* Copyright 2010-2017 ARM Limited. All rights reserved.
> + * Copyright 2017-2018 Qiang Yu <yuq825@gmail.com>
> + */
> +
> +#ifndef __LIMA_REGS_H__
> +#define __LIMA_REGS_H__
> +
> +/* This file's register definition is collected from the
> + * official ARM Mali Utgard GPU kernel driver source code
> + */
> +
> +/* PMU regs */
> +#define LIMA_PMU_POWER_UP                  0x00
> +#define LIMA_PMU_POWER_DOWN                0x04
> +#define   LIMA_PMU_POWER_GP0_MASK          BIT(0)
> +#define   LIMA_PMU_POWER_L2_MASK           BIT(1)
> +#define   LIMA_PMU_POWER_PP_MASK(i)        BIT(2 + i)
> +
> +/*
> + * On Mali450 each block automatically starts up its corresponding L2
> + * and the PPs are not fully independent controllable.
> + * Instead PP0, PP1-3 and PP4-7 can be turned on or off.
> + */
> +#define   LIMA450_PMU_POWER_PP0_MASK       BIT(1)
> +#define   LIMA450_PMU_POWER_PP13_MASK      BIT(2)
> +#define   LIMA450_PMU_POWER_PP47_MASK      BIT(3)
> +
> +#define LIMA_PMU_STATUS                    0x08
> +#define LIMA_PMU_INT_MASK                  0x0C
> +#define LIMA_PMU_INT_RAWSTAT               0x10
> +#define LIMA_PMU_INT_CLEAR                 0x18
> +#define   LIMA_PMU_INT_CMD_MASK            BIT(0)
> +#define LIMA_PMU_SW_DELAY                  0x1C
> +
> +/* L2 cache regs */
> +#define LIMA_L2_CACHE_SIZE                   0x0004
> +#define LIMA_L2_CACHE_STATUS                 0x0008
> +#define   LIMA_L2_CACHE_STATUS_COMMAND_BUSY  BIT(0)
> +#define   LIMA_L2_CACHE_STATUS_DATA_BUSY     BIT(1)
> +#define LIMA_L2_CACHE_COMMAND                0x0010
> +#define   LIMA_L2_CACHE_COMMAND_CLEAR_ALL    BIT(0)
> +#define LIMA_L2_CACHE_CLEAR_PAGE             0x0014
> +#define LIMA_L2_CACHE_MAX_READS              0x0018
> +#define LIMA_L2_CACHE_ENABLE                 0x001C
> +#define   LIMA_L2_CACHE_ENABLE_ACCESS        BIT(0)
> +#define   LIMA_L2_CACHE_ENABLE_READ_ALLOCATE BIT(1)
> +#define LIMA_L2_CACHE_PERFCNT_SRC0           0x0020
> +#define LIMA_L2_CACHE_PERFCNT_VAL0           0x0024
> +#define LIMA_L2_CACHE_PERFCNT_SRC1           0x0028
> +#define LIMA_L2_CACHE_ERFCNT_VAL1            0x002C
> +
> +/* GP regs */
> +#define LIMA_GP_VSCL_START_ADDR                0x00
> +#define LIMA_GP_VSCL_END_ADDR                  0x04
> +#define LIMA_GP_PLBUCL_START_ADDR              0x08
> +#define LIMA_GP_PLBUCL_END_ADDR                0x0c
> +#define LIMA_GP_PLBU_ALLOC_START_ADDR          0x10
> +#define LIMA_GP_PLBU_ALLOC_END_ADDR            0x14
> +#define LIMA_GP_CMD                            0x20
> +#define   LIMA_GP_CMD_START_VS                 BIT(0)
> +#define   LIMA_GP_CMD_START_PLBU               BIT(1)
> +#define   LIMA_GP_CMD_UPDATE_PLBU_ALLOC        BIT(4)
> +#define   LIMA_GP_CMD_RESET                    BIT(5)
> +#define   LIMA_GP_CMD_FORCE_HANG               BIT(6)
> +#define   LIMA_GP_CMD_STOP_BUS                 BIT(9)
> +#define   LIMA_GP_CMD_SOFT_RESET               BIT(10)
> +#define LIMA_GP_INT_RAWSTAT                    0x24
> +#define LIMA_GP_INT_CLEAR                      0x28
> +#define LIMA_GP_INT_MASK                       0x2C
> +#define LIMA_GP_INT_STAT                       0x30
> +#define   LIMA_GP_IRQ_VS_END_CMD_LST           BIT(0)
> +#define   LIMA_GP_IRQ_PLBU_END_CMD_LST         BIT(1)
> +#define   LIMA_GP_IRQ_PLBU_OUT_OF_MEM          BIT(2)
> +#define   LIMA_GP_IRQ_VS_SEM_IRQ               BIT(3)
> +#define   LIMA_GP_IRQ_PLBU_SEM_IRQ             BIT(4)
> +#define   LIMA_GP_IRQ_HANG                     BIT(5)
> +#define   LIMA_GP_IRQ_FORCE_HANG               BIT(6)
> +#define   LIMA_GP_IRQ_PERF_CNT_0_LIMIT         BIT(7)
> +#define   LIMA_GP_IRQ_PERF_CNT_1_LIMIT         BIT(8)
> +#define   LIMA_GP_IRQ_WRITE_BOUND_ERR          BIT(9)
> +#define   LIMA_GP_IRQ_SYNC_ERROR               BIT(10)
> +#define   LIMA_GP_IRQ_AXI_BUS_ERROR            BIT(11)
> +#define   LIMA_GP_IRQ_AXI_BUS_STOPPED          BIT(12)
> +#define   LIMA_GP_IRQ_VS_INVALID_CMD           BIT(13)
> +#define   LIMA_GP_IRQ_PLB_INVALID_CMD          BIT(14)
> +#define   LIMA_GP_IRQ_RESET_COMPLETED          BIT(19)
> +#define   LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW      BIT(20)
> +#define   LIMA_GP_IRQ_SEMAPHORE_OVERFLOW       BIT(21)
> +#define   LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS  BIT(22)
> +#define LIMA_GP_WRITE_BOUND_LOW                0x34
> +#define LIMA_GP_PERF_CNT_0_ENABLE              0x3C
> +#define LIMA_GP_PERF_CNT_1_ENABLE              0x40
> +#define LIMA_GP_PERF_CNT_0_SRC                 0x44
> +#define LIMA_GP_PERF_CNT_1_SRC                 0x48
> +#define LIMA_GP_PERF_CNT_0_VALUE               0x4C
> +#define LIMA_GP_PERF_CNT_1_VALUE               0x50
> +#define LIMA_GP_PERF_CNT_0_LIMIT               0x54
> +#define LIMA_GP_STATUS                         0x68
> +#define   LIMA_GP_STATUS_VS_ACTIVE             BIT(1)
> +#define   LIMA_GP_STATUS_BUS_STOPPED           BIT(2)
> +#define   LIMA_GP_STATUS_PLBU_ACTIVE           BIT(3)
> +#define   LIMA_GP_STATUS_BUS_ERROR             BIT(6)
> +#define   LIMA_GP_STATUS_WRITE_BOUND_ERR       BIT(8)
> +#define LIMA_GP_VERSION                        0x6C
> +#define LIMA_GP_VSCL_START_ADDR_READ           0x80
> +#define LIMA_GP_PLBCL_START_ADDR_READ          0x84
> +#define LIMA_GP_CONTR_AXI_BUS_ERROR_STAT       0x94
> +
> +#define LIMA_GP_IRQ_MASK_ALL              \
> +       (                                  \
> +        LIMA_GP_IRQ_VS_END_CMD_LST      | \
> +        LIMA_GP_IRQ_PLBU_END_CMD_LST    | \
> +        LIMA_GP_IRQ_PLBU_OUT_OF_MEM     | \
> +        LIMA_GP_IRQ_VS_SEM_IRQ          | \
> +        LIMA_GP_IRQ_PLBU_SEM_IRQ        | \
> +        LIMA_GP_IRQ_HANG                | \
> +        LIMA_GP_IRQ_FORCE_HANG          | \
> +        LIMA_GP_IRQ_PERF_CNT_0_LIMIT    | \
> +        LIMA_GP_IRQ_PERF_CNT_1_LIMIT    | \
> +        LIMA_GP_IRQ_WRITE_BOUND_ERR     | \
> +        LIMA_GP_IRQ_SYNC_ERROR          | \
> +        LIMA_GP_IRQ_AXI_BUS_ERROR       | \
> +        LIMA_GP_IRQ_AXI_BUS_STOPPED     | \
> +        LIMA_GP_IRQ_VS_INVALID_CMD      | \
> +        LIMA_GP_IRQ_PLB_INVALID_CMD     | \
> +        LIMA_GP_IRQ_RESET_COMPLETED     | \
> +        LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW | \
> +        LIMA_GP_IRQ_SEMAPHORE_OVERFLOW  | \
> +        LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
> +
> +#define LIMA_GP_IRQ_MASK_ERROR             \
> +       (                                  \
> +        LIMA_GP_IRQ_PLBU_OUT_OF_MEM     | \
> +        LIMA_GP_IRQ_FORCE_HANG          | \
> +        LIMA_GP_IRQ_WRITE_BOUND_ERR     | \
> +        LIMA_GP_IRQ_SYNC_ERROR          | \
> +        LIMA_GP_IRQ_AXI_BUS_ERROR       | \
> +        LIMA_GP_IRQ_VS_INVALID_CMD      | \
> +        LIMA_GP_IRQ_PLB_INVALID_CMD     | \
> +        LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW | \
> +        LIMA_GP_IRQ_SEMAPHORE_OVERFLOW  | \
> +        LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
> +
> +#define LIMA_GP_IRQ_MASK_USED             \
> +       (                                  \
> +        LIMA_GP_IRQ_VS_END_CMD_LST      | \
> +        LIMA_GP_IRQ_PLBU_END_CMD_LST    | \
> +        LIMA_GP_IRQ_MASK_ERROR)
> +
> +/* PP regs */
> +#define LIMA_PP_FRAME                        0x0000
> +#define LIMA_PP_RSW                         0x0004
> +#define LIMA_PP_STACK                       0x0030
> +#define LIMA_PP_STACK_SIZE                  0x0034
> +#define LIMA_PP_ORIGIN_OFFSET_X                     0x0040
> +#define LIMA_PP_WB(i)                       (0x0100 * (i + 1))
> +#define   LIMA_PP_WB_SOURCE_SELECT           0x0000
> +#define          LIMA_PP_WB_SOURCE_ADDR             0x0004
> +
> +#define LIMA_PP_VERSION                      0x1000
> +#define LIMA_PP_CURRENT_REND_LIST_ADDR       0x1004
> +#define LIMA_PP_STATUS                       0x1008
> +#define   LIMA_PP_STATUS_RENDERING_ACTIVE    BIT(0)
> +#define   LIMA_PP_STATUS_BUS_STOPPED         BIT(4)
> +#define LIMA_PP_CTRL                         0x100c
> +#define   LIMA_PP_CTRL_STOP_BUS              BIT(0)
> +#define   LIMA_PP_CTRL_FLUSH_CACHES          BIT(3)
> +#define   LIMA_PP_CTRL_FORCE_RESET           BIT(5)
> +#define   LIMA_PP_CTRL_START_RENDERING       BIT(6)
> +#define   LIMA_PP_CTRL_SOFT_RESET            BIT(7)
> +#define LIMA_PP_INT_RAWSTAT                  0x1020
> +#define LIMA_PP_INT_CLEAR                    0x1024
> +#define LIMA_PP_INT_MASK                     0x1028
> +#define LIMA_PP_INT_STATUS                   0x102c
> +#define   LIMA_PP_IRQ_END_OF_FRAME           BIT(0)
> +#define   LIMA_PP_IRQ_END_OF_TILE            BIT(1)
> +#define   LIMA_PP_IRQ_HANG                   BIT(2)
> +#define   LIMA_PP_IRQ_FORCE_HANG             BIT(3)
> +#define   LIMA_PP_IRQ_BUS_ERROR              BIT(4)
> +#define   LIMA_PP_IRQ_BUS_STOP               BIT(5)
> +#define   LIMA_PP_IRQ_CNT_0_LIMIT            BIT(6)
> +#define   LIMA_PP_IRQ_CNT_1_LIMIT            BIT(7)
> +#define   LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR   BIT(8)
> +#define   LIMA_PP_IRQ_INVALID_PLIST_COMMAND  BIT(9)
> +#define   LIMA_PP_IRQ_CALL_STACK_UNDERFLOW   BIT(10)
> +#define   LIMA_PP_IRQ_CALL_STACK_OVERFLOW    BIT(11)
> +#define   LIMA_PP_IRQ_RESET_COMPLETED        BIT(12)
> +#define LIMA_PP_WRITE_BOUNDARY_LOW           0x1044
> +#define LIMA_PP_BUS_ERROR_STATUS             0x1050
> +#define LIMA_PP_PERF_CNT_0_ENABLE            0x1080
> +#define LIMA_PP_PERF_CNT_0_SRC               0x1084
> +#define LIMA_PP_PERF_CNT_0_LIMIT             0x1088
> +#define LIMA_PP_PERF_CNT_0_VALUE             0x108c
> +#define LIMA_PP_PERF_CNT_1_ENABLE            0x10a0
> +#define LIMA_PP_PERF_CNT_1_SRC               0x10a4
> +#define LIMA_PP_PERF_CNT_1_LIMIT             0x10a8
> +#define LIMA_PP_PERF_CNT_1_VALUE             0x10ac
> +#define LIMA_PP_PERFMON_CONTR                0x10b0
> +#define LIMA_PP_PERFMON_BASE                 0x10b4
> +
> +#define LIMA_PP_IRQ_MASK_ALL                 \
> +       (                                    \
> +        LIMA_PP_IRQ_END_OF_FRAME          | \
> +        LIMA_PP_IRQ_END_OF_TILE           | \
> +        LIMA_PP_IRQ_HANG                  | \
> +        LIMA_PP_IRQ_FORCE_HANG            | \
> +        LIMA_PP_IRQ_BUS_ERROR             | \
> +        LIMA_PP_IRQ_BUS_STOP              | \
> +        LIMA_PP_IRQ_CNT_0_LIMIT           | \
> +        LIMA_PP_IRQ_CNT_1_LIMIT           | \
> +        LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR  | \
> +        LIMA_PP_IRQ_INVALID_PLIST_COMMAND | \
> +        LIMA_PP_IRQ_CALL_STACK_UNDERFLOW  | \
> +        LIMA_PP_IRQ_CALL_STACK_OVERFLOW   | \
> +        LIMA_PP_IRQ_RESET_COMPLETED)
> +
> +#define LIMA_PP_IRQ_MASK_ERROR               \
> +       (                                    \
> +        LIMA_PP_IRQ_FORCE_HANG            | \
> +        LIMA_PP_IRQ_BUS_ERROR             | \
> +        LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR  | \
> +        LIMA_PP_IRQ_INVALID_PLIST_COMMAND | \
> +        LIMA_PP_IRQ_CALL_STACK_UNDERFLOW  | \
> +        LIMA_PP_IRQ_CALL_STACK_OVERFLOW)
> +
> +#define LIMA_PP_IRQ_MASK_USED                \
> +       (                                    \
> +        LIMA_PP_IRQ_END_OF_FRAME          | \
> +        LIMA_PP_IRQ_MASK_ERROR)
> +
> +/* MMU regs */
> +#define LIMA_MMU_DTE_ADDR                     0x0000
> +#define LIMA_MMU_STATUS                       0x0004
> +#define   LIMA_MMU_STATUS_PAGING_ENABLED      BIT(0)
> +#define   LIMA_MMU_STATUS_PAGE_FAULT_ACTIVE   BIT(1)
> +#define   LIMA_MMU_STATUS_STALL_ACTIVE        BIT(2)
> +#define   LIMA_MMU_STATUS_IDLE                BIT(3)
> +#define   LIMA_MMU_STATUS_REPLAY_BUFFER_EMPTY BIT(4)
> +#define   LIMA_MMU_STATUS_PAGE_FAULT_IS_WRITE BIT(5)
> +#define   LIMA_MMU_STATUS_BUS_ID(x)           ((x >> 6) & 0x1F)
> +#define LIMA_MMU_COMMAND                      0x0008
> +#define   LIMA_MMU_COMMAND_ENABLE_PAGING      0x00
> +#define   LIMA_MMU_COMMAND_DISABLE_PAGING     0x01
> +#define   LIMA_MMU_COMMAND_ENABLE_STALL       0x02
> +#define   LIMA_MMU_COMMAND_DISABLE_STALL      0x03
> +#define   LIMA_MMU_COMMAND_ZAP_CACHE          0x04
> +#define   LIMA_MMU_COMMAND_PAGE_FAULT_DONE    0x05
> +#define   LIMA_MMU_COMMAND_HARD_RESET         0x06
> +#define LIMA_MMU_PAGE_FAULT_ADDR              0x000C
> +#define LIMA_MMU_ZAP_ONE_LINE                 0x0010
> +#define LIMA_MMU_INT_RAWSTAT                  0x0014
> +#define LIMA_MMU_INT_CLEAR                    0x0018
> +#define LIMA_MMU_INT_MASK                     0x001C
> +#define   LIMA_MMU_INT_PAGE_FAULT             BIT(0)
> +#define   LIMA_MMU_INT_READ_BUS_ERROR         BIT(1)
> +#define LIMA_MMU_INT_STATUS                   0x0020
> +
> +#define LIMA_VM_FLAG_PRESENT          BIT(0)
> +#define LIMA_VM_FLAG_READ_PERMISSION  BIT(1)
> +#define LIMA_VM_FLAG_WRITE_PERMISSION BIT(2)
> +#define LIMA_VM_FLAG_OVERRIDE_CACHE   BIT(3)
> +#define LIMA_VM_FLAG_WRITE_CACHEABLE  BIT(4)
> +#define LIMA_VM_FLAG_WRITE_ALLOCATE   BIT(5)
> +#define LIMA_VM_FLAG_WRITE_BUFFERABLE BIT(6)
> +#define LIMA_VM_FLAG_READ_CACHEABLE   BIT(7)
> +#define LIMA_VM_FLAG_READ_ALLOCATE    BIT(8)
> +#define LIMA_VM_FLAG_MASK             0x1FF
> +
> +#define LIMA_VM_FLAGS_CACHE (                   \
> +               LIMA_VM_FLAG_PRESENT |           \
> +               LIMA_VM_FLAG_READ_PERMISSION |   \
> +               LIMA_VM_FLAG_WRITE_PERMISSION |  \
> +               LIMA_VM_FLAG_OVERRIDE_CACHE |    \
> +               LIMA_VM_FLAG_WRITE_CACHEABLE |   \
> +               LIMA_VM_FLAG_WRITE_BUFFERABLE |  \
> +               LIMA_VM_FLAG_READ_CACHEABLE |    \
> +               LIMA_VM_FLAG_READ_ALLOCATE )
> +
> +#define LIMA_VM_FLAGS_UNCACHE (                        \
> +               LIMA_VM_FLAG_PRESENT |          \
> +               LIMA_VM_FLAG_READ_PERMISSION |  \
> +               LIMA_VM_FLAG_WRITE_PERMISSION )
> +
> +/* DLBU regs */
> +#define LIMA_DLBU_MASTER_TLLIST_PHYS_ADDR  0x0000
> +#define        LIMA_DLBU_MASTER_TLLIST_VADDR      0x0004
> +#define        LIMA_DLBU_TLLIST_VBASEADDR         0x0008
> +#define        LIMA_DLBU_FB_DIM                   0x000C
> +#define        LIMA_DLBU_TLLIST_CONF              0x0010
> +#define        LIMA_DLBU_START_TILE_POS           0x0014
> +#define        LIMA_DLBU_PP_ENABLE_MASK           0x0018
> +
> +/* BCAST regs */
> +#define LIMA_BCAST_BROADCAST_MASK    0x0
> +#define LIMA_BCAST_INTERRUPT_MASK    0x4
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
> new file mode 100644
> index 000000000000..539b29ce5e9a
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_sched.c
> @@ -0,0 +1,398 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <linux/kthread.h>
> +#include <linux/slab.h>
> +
> +#include "lima_drv.h"
> +#include "lima_sched.h"
> +#include "lima_vm.h"
> +#include "lima_mmu.h"
> +#include "lima_l2_cache.h"
> +#include "lima_object.h"
> +
> +struct lima_fence {
> +       struct dma_fence base;
> +       struct lima_sched_pipe *pipe;
> +};
> +
> +static struct kmem_cache *lima_fence_slab = NULL;
> +
> +int lima_sched_slab_init(void)
> +{
> +       lima_fence_slab = kmem_cache_create(
> +               "lima_fence", sizeof(struct lima_fence), 0,
> +               SLAB_HWCACHE_ALIGN, NULL);
> +       if (!lima_fence_slab)
> +               return -ENOMEM;
> +
> +       return 0;
> +}
> +
> +void lima_sched_slab_fini(void)
> +{
> +       kmem_cache_destroy(lima_fence_slab);
> +}
> +
> +static inline struct lima_fence *to_lima_fence(struct dma_fence *fence)
> +{
> +       return container_of(fence, struct lima_fence, base);
> +}
> +
> +static const char *lima_fence_get_driver_name(struct dma_fence *fence)
> +{
> +       return "lima";
> +}
> +
> +static const char *lima_fence_get_timeline_name(struct dma_fence *fence)
> +{
> +       struct lima_fence *f = to_lima_fence(fence);
> +
> +       return f->pipe->base.name;
> +}
> +
> +static bool lima_fence_enable_signaling(struct dma_fence *fence)
> +{
> +       return true;
> +}
> +
> +static void lima_fence_release_rcu(struct rcu_head *rcu)
> +{
> +       struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
> +       struct lima_fence *fence = to_lima_fence(f);
> +
> +       kmem_cache_free(lima_fence_slab, fence);
> +}
> +
> +static void lima_fence_release(struct dma_fence *fence)
> +{
> +       struct lima_fence *f = to_lima_fence(fence);
> +
> +       call_rcu(&f->base.rcu, lima_fence_release_rcu);
> +}
> +
> +static const struct dma_fence_ops lima_fence_ops = {
> +       .get_driver_name = lima_fence_get_driver_name,
> +       .get_timeline_name = lima_fence_get_timeline_name,
> +       .enable_signaling = lima_fence_enable_signaling,
> +       .wait = dma_fence_default_wait,
> +       .release = lima_fence_release,
> +};
> +
> +static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
> +{
> +       struct lima_fence *fence;
> +
> +       fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);

Out of curiosity, what is the benefit of using a separate slab here?
If this is beneficial, then other drivers should do this too and it
should be common. Otherwise, it adds some complexity.

And maybe the slab should be initialzed in probe rather than module_init.

> +       if (!fence)
> +              return NULL;
> +
> +       fence->pipe = pipe;
> +       dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock,
> +                      pipe->fence_context, ++pipe->fence_seqno);
> +
> +       return fence;
> +}
> +
> +static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job)
> +{
> +       return container_of(job, struct lima_sched_task, base);
> +}
> +
> +static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler *sched)
> +{
> +       return container_of(sched, struct lima_sched_pipe, base);
> +}
> +
> +int lima_sched_task_init(struct lima_sched_task *task,
> +                        struct lima_sched_context *context,
> +                        struct lima_bo **bos, int num_bos,
> +                        struct lima_vm *vm)
> +{
> +       int err, i;
> +
> +       task->bos = kmemdup(bos, sizeof(*bos) * num_bos, GFP_KERNEL);
> +       if (!task->bos)
> +               return -ENOMEM;
> +
> +       for (i = 0; i < num_bos; i++)
> +               drm_gem_object_get(&bos[i]->gem);
> +
> +       err = drm_sched_job_init(&task->base, &context->base, vm);
> +       if (err) {
> +               kfree(task->bos);
> +               return err;
> +       }
> +
> +       task->num_bos = num_bos;
> +       task->vm = lima_vm_get(vm);
> +       return 0;
> +}
> +
> +void lima_sched_task_fini(struct lima_sched_task *task)
> +{
> +       int i;
> +
> +       drm_sched_job_cleanup(&task->base);
> +
> +       for (i = 0; i < task->num_dep; i++)
> +               dma_fence_put(task->dep[i]);
> +
> +       kfree(task->dep);
> +
> +       if (task->bos) {
> +               for (i = 0; i < task->num_bos; i++)
> +                       drm_gem_object_put_unlocked(&task->bos[i]->gem);
> +               kfree(task->bos);
> +       }
> +
> +       lima_vm_put(task->vm);
> +}
> +
> +int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence)
> +{
> +       int i, new_dep = 4;
> +
> +       /* same context's fence is definitly earlier then this task */
> +       if (fence->context == task->base.s_fence->finished.context) {
> +               dma_fence_put(fence);
> +               return 0;
> +       }
> +
> +       if (task->dep && task->num_dep == task->max_dep)
> +               new_dep = task->max_dep * 2;
> +
> +       if (task->max_dep < new_dep) {
> +               void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL);
> +               if (!dep)
> +                       return -ENOMEM;
> +               task->max_dep = new_dep;
> +               task->dep = dep;
> +       }
> +
> +       for (i = 0; i < task->num_dep; i++) {
> +               if (task->dep[i]->context == fence->context &&
> +                   dma_fence_is_later(fence, task->dep[i])) {
> +                       dma_fence_put(task->dep[i]);
> +                       task->dep[i] = fence;
> +                       return 0;
> +               }
> +       }
> +
> +       task->dep[task->num_dep++] = fence;
> +       return 0;
> +}
> +
> +int lima_sched_context_init(struct lima_sched_pipe *pipe,
> +                           struct lima_sched_context *context,
> +                           atomic_t *guilty)
> +{
> +       struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL;
> +
> +       return drm_sched_entity_init(&context->base, &rq, 1, guilty);
> +}
> +
> +void lima_sched_context_fini(struct lima_sched_pipe *pipe,
> +                            struct lima_sched_context *context)
> +{
> +       drm_sched_entity_fini(&context->base);
> +}
> +
> +struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context,
> +                                               struct lima_sched_task *task)
> +{
> +       struct dma_fence *fence = dma_fence_get(&task->base.s_fence->finished);
> +
> +       drm_sched_entity_push_job(&task->base, &context->base);
> +       return fence;
> +}
> +
> +static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job,
> +                                              struct drm_sched_entity *entity)
> +{
> +       struct lima_sched_task *task = to_lima_task(job);
> +       int i;
> +
> +       for (i = 0; i < task->num_dep; i++) {
> +               struct dma_fence *fence = task->dep[i];
> +
> +               if (!task->dep[i])
> +                       continue;
> +
> +               task->dep[i] = NULL;
> +
> +               if (!dma_fence_is_signaled(fence))
> +                       return fence;
> +
> +               dma_fence_put(fence);
> +       }
> +
> +       return NULL;
> +}
> +
> +static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
> +{
> +       struct lima_sched_task *task = to_lima_task(job);
> +       struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
> +       struct lima_fence *fence;
> +       struct dma_fence *ret;
> +       struct lima_vm *vm = NULL, *last_vm = NULL;
> +       int i;
> +
> +       /* after GPU reset */
> +       if (job->s_fence->finished.error < 0)
> +               return NULL;
> +
> +       fence = lima_fence_create(pipe);
> +       if (!fence)
> +               return NULL;
> +       task->fence = &fence->base;
> +
> +       /* for caller usage of the fence, otherwise irq handler
> +        * may consume the fence before caller use it */
> +       ret = dma_fence_get(task->fence);
> +
> +       pipe->current_task = task;
> +
> +       /* this is needed for MMU to work correctly, otherwise GP/PP
> +        * will hang or page fault for unknown reason after running for
> +        * a while.
> +        *
> +        * Need to investigate:
> +        * 1. is it related to TLB
> +        * 2. how much performance will be affected by L2 cache flush
> +        * 3. can we reduce the calling of this function because all
> +        *    GP/PP use the same L2 cache on mali400
> +        *
> +        * TODO:
> +        * 1. move this to task fini to save some wait time?
> +        * 2. when GP/PP use different l2 cache, need PP wait GP l2
> +        *    cache flush?
> +        */
> +       for (i = 0; i < pipe->num_l2_cache; i++)
> +               lima_l2_cache_flush(pipe->l2_cache[i]);
> +
> +       if (task->vm != pipe->current_vm) {
> +               vm = lima_vm_get(task->vm);
> +               last_vm = pipe->current_vm;
> +               pipe->current_vm = task->vm;
> +       }
> +
> +       if (pipe->bcast_mmu)
> +               lima_mmu_switch_vm(pipe->bcast_mmu, vm);
> +       else {
> +               for (i = 0; i < pipe->num_mmu; i++)
> +                       lima_mmu_switch_vm(pipe->mmu[i], vm);
> +       }
> +
> +       if (last_vm)
> +               lima_vm_put(last_vm);
> +
> +       pipe->error = false;
> +       pipe->task_run(pipe, task);
> +
> +       return task->fence;
> +}
> +
> +static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
> +                                        struct lima_sched_task *task)
> +{
> +       kthread_park(pipe->base.thread);
> +       drm_sched_hw_job_reset(&pipe->base, &task->base);
> +
> +       pipe->task_error(pipe);
> +
> +       if (pipe->bcast_mmu)
> +               lima_mmu_page_fault_resume(pipe->bcast_mmu);
> +       else {
> +               int i;
> +               for (i = 0; i < pipe->num_mmu; i++)
> +                       lima_mmu_page_fault_resume(pipe->mmu[i]);
> +       }
> +
> +       if (pipe->current_vm)
> +               lima_vm_put(pipe->current_vm);
> +
> +       pipe->current_vm = NULL;
> +       pipe->current_task = NULL;
> +
> +       drm_sched_job_recovery(&pipe->base);
> +       kthread_unpark(pipe->base.thread);
> +}
> +
> +static void lima_sched_timedout_job(struct drm_sched_job *job)
> +{
> +       struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
> +       struct lima_sched_task *task = to_lima_task(job);
> +
> +       DRM_ERROR("lima job timeout\n");
> +
> +       lima_sched_handle_error_task(pipe, task);
> +}
> +
> +static void lima_sched_free_job(struct drm_sched_job *job)
> +{
> +       struct lima_sched_task *task = to_lima_task(job);
> +       struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
> +       struct lima_vm *vm = task->vm;
> +       struct lima_bo **bos = task->bos;
> +       int i;
> +
> +       dma_fence_put(task->fence);
> +
> +       for (i = 0; i < task->num_bos; i++)
> +               lima_vm_bo_del(vm, bos[i]);
> +
> +       lima_sched_task_fini(task);
> +       kmem_cache_free(pipe->task_slab, task);
> +}
> +
> +const struct drm_sched_backend_ops lima_sched_ops = {
> +       .dependency = lima_sched_dependency,
> +       .run_job = lima_sched_run_job,
> +       .timedout_job = lima_sched_timedout_job,
> +       .free_job = lima_sched_free_job,
> +};
> +
> +static void lima_sched_error_work(struct work_struct *work)
> +{
> +       struct lima_sched_pipe *pipe =
> +               container_of(work, struct lima_sched_pipe, error_work);
> +       struct lima_sched_task *task = pipe->current_task;
> +
> +       lima_sched_handle_error_task(pipe, task);
> +}
> +
> +int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
> +{
> +       long timeout;
> +
> +       if (lima_sched_timeout_ms <= 0)
> +               timeout = MAX_SCHEDULE_TIMEOUT;
> +       else
> +               timeout = msecs_to_jiffies(lima_sched_timeout_ms);
> +
> +       pipe->fence_context = dma_fence_context_alloc(1);
> +       spin_lock_init(&pipe->fence_lock);
> +
> +       INIT_WORK(&pipe->error_work, lima_sched_error_work);
> +
> +       return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name);
> +}
> +
> +void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
> +{
> +       drm_sched_fini(&pipe->base);
> +}
> +
> +void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
> +{
> +       if (pipe->error)
> +               schedule_work(&pipe->error_work);
> +       else {
> +               struct lima_sched_task *task = pipe->current_task;
> +
> +               pipe->task_fini(pipe);
> +               dma_fence_signal(task->fence);
> +       }
> +}
> diff --git a/drivers/gpu/drm/lima/lima_sched.h b/drivers/gpu/drm/lima/lima_sched.h
> new file mode 100644
> index 000000000000..44985e4da3fb
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_sched.h
> @@ -0,0 +1,104 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_SCHED_H__
> +#define __LIMA_SCHED_H__
> +
> +#include <drm/gpu_scheduler.h>
> +
> +struct lima_vm;
> +
> +struct lima_sched_task {
> +       struct drm_sched_job base;
> +
> +       struct lima_vm *vm;
> +       void *frame;
> +
> +       struct dma_fence **dep;
> +       int num_dep;
> +       int max_dep;
> +
> +       struct lima_bo **bos;
> +       int num_bos;
> +
> +       /* pipe fence */
> +       struct dma_fence *fence;
> +};
> +
> +struct lima_sched_context {
> +       struct drm_sched_entity base;
> +};
> +
> +#define LIMA_SCHED_PIPE_MAX_MMU       8
> +#define LIMA_SCHED_PIPE_MAX_L2_CACHE  2
> +#define LIMA_SCHED_PIPE_MAX_PROCESSOR 8
> +
> +struct lima_ip;
> +
> +struct lima_sched_pipe {
> +       struct drm_gpu_scheduler base;
> +
> +       u64 fence_context;
> +       u32 fence_seqno;
> +       spinlock_t fence_lock;
> +
> +       struct lima_sched_task *current_task;
> +       struct lima_vm *current_vm;
> +
> +       struct lima_ip *mmu[LIMA_SCHED_PIPE_MAX_MMU];
> +       int num_mmu;
> +
> +       struct lima_ip *l2_cache[LIMA_SCHED_PIPE_MAX_L2_CACHE];
> +       int num_l2_cache;
> +
> +       struct lima_ip *processor[LIMA_SCHED_PIPE_MAX_PROCESSOR];
> +       int num_processor;
> +
> +       struct lima_ip *bcast_processor;
> +       struct lima_ip *bcast_mmu;
> +
> +       u32 done;
> +       bool error;
> +       atomic_t task;
> +
> +       int frame_size;
> +       struct kmem_cache *task_slab;
> +
> +       int (*task_validate)(struct lima_sched_pipe *pipe, struct lima_sched_task *task);
> +       void (*task_run)(struct lima_sched_pipe *pipe, struct lima_sched_task *task);
> +       void (*task_fini)(struct lima_sched_pipe *pipe);
> +       void (*task_error)(struct lima_sched_pipe *pipe);
> +       void (*task_mmu_error)(struct lima_sched_pipe *pipe);
> +
> +       struct work_struct error_work;
> +};
> +
> +int lima_sched_task_init(struct lima_sched_task *task,
> +                        struct lima_sched_context *context,
> +                        struct lima_bo **bos, int num_bos,
> +                        struct lima_vm *vm);
> +void lima_sched_task_fini(struct lima_sched_task *task);
> +int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence);
> +
> +int lima_sched_context_init(struct lima_sched_pipe *pipe,
> +                           struct lima_sched_context *context,
> +                           atomic_t *guilty);
> +void lima_sched_context_fini(struct lima_sched_pipe *pipe,
> +                            struct lima_sched_context *context);
> +struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context,
> +                                               struct lima_sched_task *task);
> +
> +int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name);
> +void lima_sched_pipe_fini(struct lima_sched_pipe *pipe);
> +void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe);
> +
> +static inline void lima_sched_pipe_mmu_error(struct lima_sched_pipe *pipe)
> +{
> +       pipe->error = true;
> +       pipe->task_mmu_error(pipe);
> +}
> +
> +int lima_sched_slab_init(void);
> +void lima_sched_slab_fini(void);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_vm.c b/drivers/gpu/drm/lima/lima_vm.c
> new file mode 100644
> index 000000000000..39eba3fae019
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_vm.c
> @@ -0,0 +1,280 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#include <linux/slab.h>
> +#include <linux/dma-mapping.h>
> +
> +#include "lima_device.h"
> +#include "lima_vm.h"
> +#include "lima_object.h"
> +#include "lima_regs.h"
> +
> +struct lima_bo_va {
> +       struct list_head list;
> +       unsigned ref_count;
> +
> +       struct drm_mm_node node;
> +
> +       struct lima_vm *vm;
> +};
> +
> +#define LIMA_VM_PD_SHIFT 22
> +#define LIMA_VM_PT_SHIFT 12
> +#define LIMA_VM_PB_SHIFT (LIMA_VM_PD_SHIFT + LIMA_VM_NUM_PT_PER_BT_SHIFT)
> +#define LIMA_VM_BT_SHIFT LIMA_VM_PT_SHIFT
> +
> +#define LIMA_VM_PT_MASK ((1 << LIMA_VM_PD_SHIFT) - 1)
> +#define LIMA_VM_BT_MASK ((1 << LIMA_VM_PB_SHIFT) - 1)
> +
> +#define LIMA_PDE(va) (va >> LIMA_VM_PD_SHIFT)
> +#define LIMA_PTE(va) ((va & LIMA_VM_PT_MASK) >> LIMA_VM_PT_SHIFT)
> +#define LIMA_PBE(va) (va >> LIMA_VM_PB_SHIFT)
> +#define LIMA_BTE(va) ((va & LIMA_VM_BT_MASK) >> LIMA_VM_BT_SHIFT)
> +
> +
> +static void lima_vm_unmap_page_table(struct lima_vm *vm, u32 start, u32 end)
> +{
> +       u32 addr;
> +
> +       for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
> +               u32 pbe = LIMA_PBE(addr);
> +               u32 bte = LIMA_BTE(addr);
> +
> +               vm->bts[pbe].cpu[bte] = 0;
> +       }
> +}
> +
> +static int lima_vm_map_page_table(struct lima_vm *vm, dma_addr_t *dma,
> +                                 u32 start, u32 end)
> +{
> +       u64 addr;
> +       int i = 0;
> +
> +       for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
> +               u32 pbe = LIMA_PBE(addr);
> +               u32 bte = LIMA_BTE(addr);
> +
> +               if (!vm->bts[pbe].cpu) {
> +                       dma_addr_t pts;
> +                       u32 *pd;
> +                       int j;
> +
> +                       vm->bts[pbe].cpu = dma_alloc_wc(
> +                               vm->dev->dev, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT,
> +                               &vm->bts[pbe].dma, GFP_KERNEL | __GFP_ZERO);
> +                       if (!vm->bts[pbe].cpu) {
> +                               if (addr != start)
> +                                       lima_vm_unmap_page_table(vm, start, addr - 1);
> +                               return -ENOMEM;
> +                       }
> +
> +                       pts = vm->bts[pbe].dma;
> +                       pd = vm->pd.cpu + (pbe << LIMA_VM_NUM_PT_PER_BT_SHIFT);
> +                       for (j = 0; j < LIMA_VM_NUM_PT_PER_BT; j++) {
> +                               pd[j] = pts | LIMA_VM_FLAG_PRESENT;
> +                               pts += LIMA_PAGE_SIZE;
> +                       }
> +               }
> +
> +               vm->bts[pbe].cpu[bte] = dma[i++] | LIMA_VM_FLAGS_CACHE;
> +       }
> +
> +       return 0;
> +}
> +
> +static struct lima_bo_va *
> +lima_vm_bo_find(struct lima_vm *vm, struct lima_bo *bo)
> +{
> +       struct lima_bo_va *bo_va, *ret = NULL;
> +
> +       list_for_each_entry(bo_va, &bo->va, list) {
> +               if (bo_va->vm == vm) {
> +                       ret = bo_va;
> +                       break;
> +               }
> +       }
> +
> +       return ret;
> +}
> +
> +int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create)
> +{
> +       struct lima_bo_va *bo_va;
> +       int err;
> +
> +       mutex_lock(&bo->lock);
> +
> +       bo_va = lima_vm_bo_find(vm, bo);
> +       if (bo_va) {
> +               bo_va->ref_count++;
> +               mutex_unlock(&bo->lock);
> +               return 0;
> +       }
> +
> +       /* should not create new bo_va if not asked by caller */
> +       if (!create) {
> +               mutex_unlock(&bo->lock);
> +               return -ENOENT;
> +       }
> +
> +       bo_va = kzalloc(sizeof(*bo_va), GFP_KERNEL);
> +       if (!bo_va) {
> +               err = -ENOMEM;
> +               goto err_out0;
> +       }
> +
> +       bo_va->vm = vm;
> +       bo_va->ref_count = 1;
> +
> +       mutex_lock(&vm->lock);
> +
> +       err = drm_mm_insert_node(&vm->mm, &bo_va->node, bo->gem.size);
> +       if (err)
> +               goto err_out1;
> +
> +       err = lima_vm_map_page_table(vm, bo->pages_dma_addr, bo_va->node.start,
> +                                    bo_va->node.start + bo_va->node.size - 1);
> +       if (err)
> +               goto err_out2;
> +
> +       mutex_unlock(&vm->lock);
> +
> +       list_add_tail(&bo_va->list, &bo->va);

So you can have 1 BO at multiple VAs? Is that really needed?

> +
> +       mutex_unlock(&bo->lock);
> +       return 0;
> +
> +err_out2:
> +       drm_mm_remove_node(&bo_va->node);
> +err_out1:
> +       mutex_unlock(&vm->lock);
> +       kfree(bo_va);
> +err_out0:
> +       mutex_unlock(&bo->lock);
> +       return err;
> +}
> +
> +void lima_vm_bo_del(struct lima_vm *vm, struct lima_bo *bo)
> +{
> +       struct lima_bo_va *bo_va;
> +
> +       mutex_lock(&bo->lock);
> +
> +       bo_va = lima_vm_bo_find(vm, bo);
> +       if (--bo_va->ref_count > 0) {
> +               mutex_unlock(&bo->lock);
> +               return;
> +       }
> +
> +       mutex_lock(&vm->lock);
> +
> +       lima_vm_unmap_page_table(vm, bo_va->node.start,
> +                                bo_va->node.start + bo_va->node.size - 1);
> +
> +       drm_mm_remove_node(&bo_va->node);
> +
> +       mutex_unlock(&vm->lock);
> +
> +       list_del(&bo_va->list);
> +
> +       mutex_unlock(&bo->lock);
> +
> +       kfree(bo_va);
> +}
> +
> +u32 lima_vm_get_va(struct lima_vm *vm, struct lima_bo *bo)
> +{
> +       struct lima_bo_va *bo_va;
> +       u32 ret;
> +
> +       mutex_lock(&bo->lock);
> +
> +       bo_va = lima_vm_bo_find(vm, bo);
> +       ret = bo_va->node.start;
> +
> +       mutex_unlock(&bo->lock);
> +
> +       return ret;
> +}
> +
> +struct lima_vm *lima_vm_create(struct lima_device *dev)
> +{
> +       struct lima_vm *vm;
> +
> +       vm = kzalloc(sizeof(*vm), GFP_KERNEL);
> +       if (!vm)
> +               return NULL;
> +
> +       vm->dev = dev;
> +       mutex_init(&vm->lock);
> +       kref_init(&vm->refcount);
> +
> +       vm->pd.cpu = dma_alloc_wc(dev->dev, LIMA_PAGE_SIZE, &vm->pd.dma,
> +                                 GFP_KERNEL | __GFP_ZERO);
> +       if (!vm->pd.cpu)
> +               goto err_out0;
> +
> +       if (dev->dlbu_cpu) {
> +               int err = lima_vm_map_page_table(
> +                       vm, &dev->dlbu_dma, LIMA_VA_RESERVE_DLBU,
> +                       LIMA_VA_RESERVE_DLBU + LIMA_PAGE_SIZE - 1);
> +               if (err)
> +                       goto err_out1;
> +       }
> +
> +       drm_mm_init(&vm->mm, dev->va_start, dev->va_end - dev->va_start);
> +
> +       return vm;
> +
> +err_out1:
> +       dma_free_wc(dev->dev, LIMA_PAGE_SIZE, vm->pd.cpu, vm->pd.dma);
> +err_out0:
> +       kfree(vm);
> +       return NULL;
> +}
> +
> +void lima_vm_release(struct kref *kref)
> +{
> +       struct lima_vm *vm = container_of(kref, struct lima_vm, refcount);
> +       int i;
> +
> +       drm_mm_takedown(&vm->mm);
> +
> +       for (i = 0; i < LIMA_VM_NUM_BT; i++) {
> +               if (vm->bts[i].cpu)
> +                       dma_free_wc(vm->dev->dev, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT,
> +                                   vm->bts[i].cpu, vm->bts[i].dma);
> +       }
> +
> +        if (vm->pd.cpu)
> +               dma_free_wc(vm->dev->dev, LIMA_PAGE_SIZE, vm->pd.cpu, vm->pd.dma);
> +
> +       kfree(vm);
> +}
> +
> +void lima_vm_print(struct lima_vm *vm)
> +{
> +       int i, j, k;
> +       u32 *pd, *pt;
> +
> +       if (!vm->pd.cpu)
> +               return;
> +
> +       pd = vm->pd.cpu;
> +       for (i = 0; i < LIMA_VM_NUM_BT; i++) {
> +               if (!vm->bts[i].cpu)
> +                       continue;
> +
> +               pt = vm->bts[i].cpu;
> +               for (j = 0; j < LIMA_VM_NUM_PT_PER_BT; j++) {
> +                       int idx = (i << LIMA_VM_NUM_PT_PER_BT_SHIFT) + j;
> +                       printk(KERN_INFO "lima vm pd %03x:%08x\n", idx, pd[idx]);
> +
> +                       for (k = 0; k < LIMA_PAGE_ENT_NUM; k++) {
> +                               u32 pte = *pt++;
> +                               if (pte)
> +                                       printk(KERN_INFO "  pt %03x:%08x\n", k, pte);
> +                       }
> +               }
> +       }
> +}
> diff --git a/drivers/gpu/drm/lima/lima_vm.h b/drivers/gpu/drm/lima/lima_vm.h
> new file mode 100644
> index 000000000000..a135e2f05315
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_vm.h
> @@ -0,0 +1,62 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_VM_H__
> +#define __LIMA_VM_H__
> +
> +#include <drm/drm_mm.h>
> +#include <linux/kref.h>
> +
> +#define LIMA_PAGE_SIZE    4096
> +#define LIMA_PAGE_MASK    (LIMA_PAGE_SIZE - 1)
> +#define LIMA_PAGE_ENT_NUM (LIMA_PAGE_SIZE / sizeof(u32))
> +
> +#define LIMA_VM_NUM_PT_PER_BT_SHIFT 3
> +#define LIMA_VM_NUM_PT_PER_BT (1 << LIMA_VM_NUM_PT_PER_BT_SHIFT)
> +#define LIMA_VM_NUM_BT (LIMA_PAGE_ENT_NUM >> LIMA_VM_NUM_PT_PER_BT_SHIFT)
> +
> +#define LIMA_VA_RESERVE_START  0xFFF00000
> +#define LIMA_VA_RESERVE_DLBU   LIMA_VA_RESERVE_START
> +#define LIMA_VA_RESERVE_END    0x100000000
> +
> +struct lima_device;
> +
> +struct lima_vm_page {
> +       u32 *cpu;
> +       dma_addr_t dma;
> +};
> +
> +struct lima_vm {
> +       struct mutex lock;
> +       struct kref refcount;
> +
> +       struct drm_mm mm;
> +
> +       struct lima_device *dev;
> +
> +       struct lima_vm_page pd;
> +       struct lima_vm_page bts[LIMA_VM_NUM_BT];
> +};
> +
> +int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create);
> +void lima_vm_bo_del(struct lima_vm *vm, struct lima_bo *bo);
> +
> +u32 lima_vm_get_va(struct lima_vm *vm, struct lima_bo *bo);
> +
> +struct lima_vm *lima_vm_create(struct lima_device *dev);
> +void lima_vm_release(struct kref *kref);
> +
> +static inline struct lima_vm *lima_vm_get(struct lima_vm *vm)
> +{
> +       kref_get(&vm->refcount);
> +       return vm;
> +}
> +
> +static inline void lima_vm_put(struct lima_vm *vm)
> +{
> +       kref_put(&vm->refcount, lima_vm_release);
> +}
> +
> +void lima_vm_print(struct lima_vm *vm);
> +
> +#endif
> diff --git a/include/uapi/drm/lima_drm.h b/include/uapi/drm/lima_drm.h
> new file mode 100644
> index 000000000000..64fb4807958d
> --- /dev/null
> +++ b/include/uapi/drm/lima_drm.h
> @@ -0,0 +1,126 @@
> +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
> +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> +
> +#ifndef __LIMA_DRM_H__
> +#define __LIMA_DRM_H__
> +
> +#include "drm.h"
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif
> +
> +#define LIMA_INFO_GPU_MALI400 0x00
> +#define LIMA_INFO_GPU_MALI450 0x01
> +
> +struct drm_lima_info {
> +       __u32 gpu_id;   /* out */
> +       __u32 num_pp;   /* out */
> +       __u32 valid;    /* out */
> +       __u32 _resv[7];
> +};
> +
> +struct drm_lima_gem_create {
> +       __u32 size;    /* in */
> +       __u32 flags;   /* in */
> +       __u32 handle;  /* out */
> +       __u32 pad;
> +};
> +
> +struct drm_lima_gem_info {
> +       __u32 handle;  /* in */
> +       __u32 va;      /* out */
> +       __u64 offset;  /* out */
> +};
> +
> +#define LIMA_SUBMIT_BO_READ   0x01
> +#define LIMA_SUBMIT_BO_WRITE  0x02
> +
> +struct drm_lima_gem_submit_bo {
> +       __u32 handle;  /* in */
> +       __u32 flags;   /* in */
> +};
> +
> +#define LIMA_GP_FRAME_REG_NUM 6
> +
> +struct drm_lima_gp_frame {
> +       __u32 frame[LIMA_GP_FRAME_REG_NUM];
> +};
> +
> +#define LIMA_PP_FRAME_REG_NUM 23
> +#define LIMA_PP_WB_REG_NUM 12
> +
> +struct drm_lima_m400_pp_frame {
> +       __u32 frame[LIMA_PP_FRAME_REG_NUM];
> +       __u32 num_pp;
> +       __u32 wb[3 * LIMA_PP_WB_REG_NUM];
> +       __u32 plbu_array_address[4];
> +       __u32 fragment_stack_address[4];
> +};
> +
> +struct drm_lima_m450_pp_frame {
> +       __u32 frame[LIMA_PP_FRAME_REG_NUM];
> +       __u32 num_pp;
> +       __u32 wb[3 * LIMA_PP_WB_REG_NUM];
> +       __u32 use_dlbu;
> +       __u32 _pad;
> +       union {
> +               __u32 plbu_array_address[8];
> +               __u32 dlbu_regs[4];
> +       };
> +       __u32 fragment_stack_address[8];
> +};
> +
> +#define LIMA_PIPE_GP  0x00
> +#define LIMA_PIPE_PP  0x01
> +
> +#define LIMA_SUBMIT_FLAG_EXPLICIT_FENCE (1 << 0)
> +
> +struct drm_lima_gem_submit {
> +       __u32 ctx;         /* in */
> +       __u32 pipe;        /* in */
> +       __u32 nr_bos;      /* in */
> +       __u32 frame_size;  /* in */
> +       __u64 bos;         /* in */
> +       __u64 frame;       /* in */
> +       __u32 flags;       /* in */
> +       __u32 out_sync;    /* in */
> +       __u32 in_sync[2];  /* in */
> +};
> +
> +#define LIMA_GEM_WAIT_READ   0x01
> +#define LIMA_GEM_WAIT_WRITE  0x02
> +
> +struct drm_lima_gem_wait {
> +       __u32 handle;      /* in */
> +       __u32 op;          /* in */
> +       __s64 timeout_ns;  /* in */
> +};
> +
> +#define LIMA_CTX_OP_CREATE 1
> +#define LIMA_CTX_OP_FREE   2
> +
> +struct drm_lima_ctx {
> +       __u32 op;          /* in */
> +       __u32 id;          /* in/out */
> +};
> +
> +#define DRM_LIMA_INFO        0x00
> +#define DRM_LIMA_GEM_CREATE  0x01
> +#define DRM_LIMA_GEM_INFO    0x02
> +#define DRM_LIMA_GEM_SUBMIT  0x03
> +#define DRM_LIMA_GEM_WAIT    0x04
> +#define DRM_LIMA_CTX         0x05
> +
> +#define DRM_IOCTL_LIMA_INFO DRM_IOR(DRM_COMMAND_BASE + DRM_LIMA_INFO, struct drm_lima_info)
> +#define DRM_IOCTL_LIMA_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_CREATE, struct drm_lima_gem_create)
> +#define DRM_IOCTL_LIMA_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_INFO, struct drm_lima_gem_info)
> +#define DRM_IOCTL_LIMA_GEM_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_SUBMIT, struct drm_lima_gem_submit)
> +#define DRM_IOCTL_LIMA_GEM_WAIT DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_WAIT, struct drm_lima_gem_wait)
> +#define DRM_IOCTL_LIMA_CTX DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_CTX, struct drm_lima_ctx)
> +
> +#if defined(__cplusplus)
> +}
> +#endif
> +
> +#endif /* __LIMA_DRM_H__ */
> --
> 2.17.1
>
Qiang Yu March 2, 2019, 2:32 a.m. UTC | #3
On Thu, Feb 28, 2019 at 5:41 AM Rob Herring <robh@kernel.org> wrote:
>
> On Wed, Feb 27, 2019 at 7:42 AM Qiang Yu <yuq825@gmail.com> wrote:
> >
>
> Looks pretty good. A few small things and some questions hopefully
> some others can answer.
>
> > - Mali 4xx GPUs have two kinds of processors GP and PP. GP is for
> >   OpenGL vertex shader processing and PP is for fragment shader
> >   processing. Each processor has its own MMU so prcessors work in
> >   virtual address space.
> > - There's only one GP but multiple PP (max 4 for mali 400 and 8
> >   for mali 450) in the same mali 4xx GPU. All PPs are grouped
> >   togather to handle a single fragment shader task divided by
> >   FB output tiled pixels. Mali 400 user space driver is
> >   responsible for assign target tiled pixels to each PP, but mali
> >   450 has a HW module called DLBU to dynamically balance each
> >   PP's load.
> > - User space driver allocate buffer object and map into GPU
> >   virtual address space, upload command stream and draw data with
> >   CPU mmap of the buffer object, then submit task to GP/PP with
> >   a register frame indicating where is the command stream and misc
> >   settings.
> > - There's no command stream validation/relocation due to each user
> >   process has its own GPU virtual address space. GP/PP's MMU switch
> >   virtual address space before running two tasks from different
> >   user process. Error or evil user space code just get MMU fault
> >   or GP/PP error IRQ, then the HW/SW will be recovered.
> > - Use GEM+shmem for MM. Currently just alloc and pin memory when
> >   gem object creation. GPU vm map of the buffer is also done in
> >   the alloc stage in kernel space. We may delay the memory
> >   allocation and real GPU vm map to command submition stage in the
> >   furture as improvement.
> > - Use drm_sched for GPU task schedule. Each OpenGL context should
> >   have a lima context object in the kernel to distinguish tasks
> >   from different user. drm_sched gets task from each lima context
> >   in a fair way.
> >
> > v3:
> > - fix comments from kbuild robot
> > - restrict supported arch to tested ones
> >
> > v2:
> > - fix syscall argument check
> > - fix job finish fence leak since kernel 5.0
> > - use drm syncobj to replace native fence
> > - move buffer object GPU va map into kernel
> > - reserve syscall argument space for future info
> > - remove kernel gem modifier
> > - switch TTM back to GEM+shmem MM
> > - use time based io poll
> > - use whole register name
> > - adopt gem reservation obj integration
> > - use drm_timeout_abs_to_jiffies
> >
> > Cc: Eric Anholt <eric@anholt.net>
> > Cc: Rob Herring <robh@kernel.org>
> > Cc: Christian König <ckoenig.leichtzumerken@gmail.com>
> > Cc: Daniel Vetter <daniel@ffwll.ch>
> > Cc: Alex Deucher <alexdeucher@gmail.com>
> > Signed-off-by: Andreas Baierl <ichgeh@imkreisrum.de>
> > Signed-off-by: Erico Nunes <nunes.erico@gmail.com>
> > Signed-off-by: Heiko Stuebner <heiko@sntech.de>
> > Signed-off-by: Marek Vasut <marex@denx.de>
> > Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
> > Signed-off-by: Simon Shields <simon@lineageos.org>
> > Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
> > Signed-off-by: Qiang Yu <yuq825@gmail.com>
> > ---
>
> > diff --git a/drivers/gpu/drm/lima/lima_bcast.c b/drivers/gpu/drm/lima/lima_bcast.c
> > new file mode 100644
> > index 000000000000..398e6d604426
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_bcast.c
> > @@ -0,0 +1,46 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
>
> It's 2019 now.
>
> > +
> > +#include <linux/io.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_bcast.h"
> > +#include "lima_regs.h"
> > +
> > +#define bcast_write(reg, data) writel(data, ip->iomem + reg)
> > +#define bcast_read(reg) readl(ip->iomem + reg)
> > +
> > +void lima_bcast_enable(struct lima_device *dev, int num_pp)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +       struct lima_ip *ip = dev->ip + lima_ip_bcast;
> > +       int i, mask = bcast_read(LIMA_BCAST_BROADCAST_MASK) & 0xffff0000;
> > +
> > +       for (i = 0; i < num_pp; i++) {
> > +               struct lima_ip *pp = pipe->processor[i];
> > +               mask |= 1 << (pp->id - lima_ip_pp0);
> > +       }
> > +
> > +       bcast_write(LIMA_BCAST_BROADCAST_MASK, mask);
> > +}
> > +
> > +int lima_bcast_init(struct lima_ip *ip)
> > +{
> > +       int i, mask = 0;
> > +
> > +       for (i = lima_ip_pp0; i <= lima_ip_pp7; i++) {
> > +               if (ip->dev->ip[i].present)
> > +                       mask |= 1 << (i - lima_ip_pp0);
> > +       }
> > +
> > +       bcast_write(LIMA_BCAST_BROADCAST_MASK, mask << 16);
> > +       bcast_write(LIMA_BCAST_INTERRUPT_MASK, mask);
> > +       return 0;
> > +}
> > +
> > +void lima_bcast_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > +
> > diff --git a/drivers/gpu/drm/lima/lima_bcast.h b/drivers/gpu/drm/lima/lima_bcast.h
> > new file mode 100644
> > index 000000000000..345e3e809860
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_bcast.h
> > @@ -0,0 +1,14 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_BCAST_H__
> > +#define __LIMA_BCAST_H__
> > +
> > +struct lima_ip;
> > +
> > +int lima_bcast_init(struct lima_ip *ip);
> > +void lima_bcast_fini(struct lima_ip *ip);
> > +
> > +void lima_bcast_enable(struct lima_device *dev, int num_pp);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_ctx.c b/drivers/gpu/drm/lima/lima_ctx.c
> > new file mode 100644
> > index 000000000000..439cb44d7a0d
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_ctx.c
> > @@ -0,0 +1,105 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <linux/slab.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_ctx.h"
> > +
> > +int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id)
> > +{
> > +       struct lima_ctx *ctx;
> > +       int i, err;
> > +
> > +       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> > +       if (!ctx)
> > +               return -ENOMEM;
> > +       ctx->dev = dev;
> > +       kref_init(&ctx->refcnt);
> > +
> > +       for (i = 0; i < lima_pipe_num; i++) {
> > +               err = lima_sched_context_init(dev->pipe + i, ctx->context + i, &ctx->guilty);
> > +               if (err)
> > +                       goto err_out0;
> > +       }
> > +
> > +       idr_preload(GFP_KERNEL);
> > +       spin_lock(&mgr->lock);
> > +       err = idr_alloc(&mgr->handles, ctx, 1, 0, GFP_ATOMIC);
> > +       spin_unlock(&mgr->lock);
> > +       idr_preload_end();
> > +       if (err < 0)
> > +               goto err_out0;
> > +
> > +       *id = err;
> > +       return 0;
> > +
> > +err_out0:
> > +       for (i--; i >= 0; i--)
> > +               lima_sched_context_fini(dev->pipe + i, ctx->context + i);
> > +       kfree(ctx);
> > +       return err;
> > +}
> > +
> > +static void lima_ctx_do_release(struct kref *ref)
> > +{
> > +       struct lima_ctx *ctx = container_of(ref, struct lima_ctx, refcnt);
> > +       int i;
> > +
> > +       for (i = 0; i < lima_pipe_num; i++)
> > +               lima_sched_context_fini(ctx->dev->pipe + i, ctx->context + i);
> > +       kfree(ctx);
> > +}
> > +
> > +int lima_ctx_free(struct lima_ctx_mgr *mgr, u32 id)
> > +{
> > +       struct lima_ctx *ctx;
> > +
> > +       spin_lock(&mgr->lock);
> > +       ctx = idr_remove(&mgr->handles, id);
> > +       spin_unlock(&mgr->lock);
> > +
> > +       if (ctx) {
> > +               kref_put(&ctx->refcnt, lima_ctx_do_release);
> > +               return 0;
> > +       }
> > +       return -EINVAL;
> > +}
> > +
> > +struct lima_ctx *lima_ctx_get(struct lima_ctx_mgr *mgr, u32 id)
> > +{
> > +       struct lima_ctx *ctx;
> > +
> > +       spin_lock(&mgr->lock);
> > +       ctx = idr_find(&mgr->handles, id);
> > +       if (ctx)
> > +               kref_get(&ctx->refcnt);
> > +       spin_unlock(&mgr->lock);
> > +       return ctx;
> > +}
> > +
> > +void lima_ctx_put(struct lima_ctx *ctx)
> > +{
> > +       kref_put(&ctx->refcnt, lima_ctx_do_release);
> > +}
> > +
> > +void lima_ctx_mgr_init(struct lima_ctx_mgr *mgr)
> > +{
> > +       spin_lock_init(&mgr->lock);
> > +       idr_init(&mgr->handles);
> > +}
> > +
> > +void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr)
> > +{
> > +       struct lima_ctx *ctx;
> > +       struct idr *idp;
> > +       uint32_t id;
> > +
> > +       idp = &mgr->handles;
> > +
> > +       idr_for_each_entry(idp, ctx, id) {
> > +               kref_put(&ctx->refcnt, lima_ctx_do_release);
> > +       }
> > +
> > +       idr_destroy(&mgr->handles);
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_ctx.h b/drivers/gpu/drm/lima/lima_ctx.h
> > new file mode 100644
> > index 000000000000..2d32ff9b30ad
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_ctx.h
> > @@ -0,0 +1,30 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_CTX_H__
> > +#define __LIMA_CTX_H__
> > +
> > +#include <linux/idr.h>
> > +
> > +#include "lima_device.h"
> > +
> > +struct lima_ctx {
> > +       struct kref refcnt;
> > +       struct lima_device *dev;
> > +       struct lima_sched_context context[lima_pipe_num];
> > +       atomic_t guilty;
> > +};
> > +
> > +struct lima_ctx_mgr {
> > +       spinlock_t lock;
> > +       struct idr handles;
> > +};
> > +
> > +int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id);
> > +int lima_ctx_free(struct lima_ctx_mgr *mgr, u32 id);
> > +struct lima_ctx *lima_ctx_get(struct lima_ctx_mgr *mgr, u32 id);
> > +void lima_ctx_put(struct lima_ctx *ctx);
> > +void lima_ctx_mgr_init(struct lima_ctx_mgr *mgr);
> > +void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c
> > new file mode 100644
> > index 000000000000..2e137a0baddb
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_device.c
> > @@ -0,0 +1,376 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <linux/regulator/consumer.h>
> > +#include <linux/reset.h>
> > +#include <linux/clk.h>
> > +#include <linux/dma-mapping.h>
> > +#include <linux/platform_device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_gp.h"
> > +#include "lima_pp.h"
> > +#include "lima_mmu.h"
> > +#include "lima_pmu.h"
> > +#include "lima_l2_cache.h"
> > +#include "lima_dlbu.h"
> > +#include "lima_bcast.h"
> > +#include "lima_vm.h"
> > +
> > +struct lima_ip_desc {
> > +       char *name;
> > +       char *irq_name;
> > +       bool must_have[lima_gpu_num];
> > +       int offset[lima_gpu_num];
> > +
> > +       int (*init)(struct lima_ip *);
> > +       void (*fini)(struct lima_ip *);
> > +};
> > +
> > +#define LIMA_IP_DESC(ipname, mst0, mst1, off0, off1, func, irq) \
> > +       [lima_ip_##ipname] = { \
> > +               .name = #ipname, \
> > +               .irq_name = irq, \
> > +               .must_have = { \
> > +                       [lima_gpu_mali400] = mst0, \
> > +                       [lima_gpu_mali450] = mst1, \
> > +               }, \
> > +               .offset = { \
> > +                       [lima_gpu_mali400] = off0, \
> > +                       [lima_gpu_mali450] = off1, \
> > +               }, \
> > +               .init = lima_##func##_init, \
> > +               .fini = lima_##func##_fini, \
> > +       }
> > +
> > +static struct lima_ip_desc lima_ip_desc[lima_ip_num] = {
> > +       LIMA_IP_DESC(pmu,         false, false, 0x02000, 0x02000, pmu,      "pmu"),
> > +       LIMA_IP_DESC(l2_cache0,   true,  true,  0x01000, 0x10000, l2_cache, NULL),
> > +       LIMA_IP_DESC(l2_cache1,   false, true,  -1,      0x01000, l2_cache, NULL),
> > +       LIMA_IP_DESC(l2_cache2,   false, false, -1,      0x11000, l2_cache, NULL),
> > +       LIMA_IP_DESC(gp,          true,  true,  0x00000, 0x00000, gp,       "gp"),
> > +       LIMA_IP_DESC(pp0,         true,  true,  0x08000, 0x08000, pp,       "pp0"),
> > +       LIMA_IP_DESC(pp1,         false, false, 0x0A000, 0x0A000, pp,       "pp1"),
> > +       LIMA_IP_DESC(pp2,         false, false, 0x0C000, 0x0C000, pp,       "pp2"),
> > +       LIMA_IP_DESC(pp3,         false, false, 0x0E000, 0x0E000, pp,       "pp3"),
> > +       LIMA_IP_DESC(pp4,         false, false, -1,      0x28000, pp,       "pp4"),
> > +       LIMA_IP_DESC(pp5,         false, false, -1,      0x2A000, pp,       "pp5"),
> > +       LIMA_IP_DESC(pp6,         false, false, -1,      0x2C000, pp,       "pp6"),
> > +       LIMA_IP_DESC(pp7,         false, false, -1,      0x2E000, pp,       "pp7"),
> > +       LIMA_IP_DESC(gpmmu,       true,  true,  0x03000, 0x03000, mmu,      "gpmmu"),
> > +       LIMA_IP_DESC(ppmmu0,      true,  true,  0x04000, 0x04000, mmu,      "ppmmu0"),
> > +       LIMA_IP_DESC(ppmmu1,      false, false, 0x05000, 0x05000, mmu,      "ppmmu1"),
> > +       LIMA_IP_DESC(ppmmu2,      false, false, 0x06000, 0x06000, mmu,      "ppmmu2"),
> > +       LIMA_IP_DESC(ppmmu3,      false, false, 0x07000, 0x07000, mmu,      "ppmmu3"),
> > +       LIMA_IP_DESC(ppmmu4,      false, false, -1,      0x1C000, mmu,      "ppmmu4"),
> > +       LIMA_IP_DESC(ppmmu5,      false, false, -1,      0x1D000, mmu,      "ppmmu5"),
> > +       LIMA_IP_DESC(ppmmu6,      false, false, -1,      0x1E000, mmu,      "ppmmu6"),
> > +       LIMA_IP_DESC(ppmmu7,      false, false, -1,      0x1F000, mmu,      "ppmmu7"),
> > +       LIMA_IP_DESC(dlbu,        false, true,  -1,      0x14000, dlbu,     NULL),
> > +       LIMA_IP_DESC(bcast,       false, true,  -1,      0x13000, bcast,    NULL),
> > +       LIMA_IP_DESC(pp_bcast,    false, true,  -1,      0x16000, pp_bcast, "pp"),
> > +       LIMA_IP_DESC(ppmmu_bcast, false, true,  -1,      0x15000, mmu,      NULL),
> > +};
> > +
> > +const char *lima_ip_name(struct lima_ip *ip)
> > +{
> > +       return lima_ip_desc[ip->id].name;
> > +}
> > +
> > +static int lima_clk_init(struct lima_device *dev)
> > +{
> > +       int err;
> > +       unsigned long bus_rate, gpu_rate;
> > +
> > +       dev->clk_bus = devm_clk_get(dev->dev, "bus");
> > +       if (IS_ERR(dev->clk_bus)) {
> > +               dev_err(dev->dev, "get bus clk failed %ld\n", PTR_ERR(dev->clk_bus));
> > +               return PTR_ERR(dev->clk_bus);
> > +       }
> > +
> > +       dev->clk_gpu = devm_clk_get(dev->dev, "core");
> > +       if (IS_ERR(dev->clk_gpu)) {
> > +               dev_err(dev->dev, "get core clk failed %ld\n", PTR_ERR(dev->clk_gpu));
> > +               return PTR_ERR(dev->clk_gpu);
> > +       }
> > +
> > +       bus_rate = clk_get_rate(dev->clk_bus);
> > +       dev_info(dev->dev, "bus rate = %lu\n", bus_rate);
> > +
> > +       gpu_rate = clk_get_rate(dev->clk_gpu);
> > +       dev_info(dev->dev, "mod rate = %lu", gpu_rate);
> > +
> > +       if ((err = clk_prepare_enable(dev->clk_bus)))
> > +               return err;
> > +       if ((err = clk_prepare_enable(dev->clk_gpu)))
> > +               goto error_out0;
> > +
> > +       dev->reset = devm_reset_control_get_optional(dev->dev, NULL);
> > +       if (IS_ERR(dev->reset)) {
> > +               err = PTR_ERR(dev->reset);
> > +               goto error_out1;
> > +       } else if (dev->reset != NULL) {
> > +               if ((err = reset_control_deassert(dev->reset)))
> > +                       goto error_out1;
> > +       }
> > +
> > +       return 0;
> > +
> > +error_out1:
> > +       clk_disable_unprepare(dev->clk_gpu);
> > +error_out0:
> > +       clk_disable_unprepare(dev->clk_bus);
> > +       return err;
> > +}
> > +
> > +static void lima_clk_fini(struct lima_device *dev)
> > +{
> > +       if (dev->reset != NULL)
> > +               reset_control_assert(dev->reset);
> > +       clk_disable_unprepare(dev->clk_gpu);
> > +       clk_disable_unprepare(dev->clk_bus);
> > +}
> > +
> > +static int lima_regulator_init(struct lima_device *dev)
> > +{
> > +       int ret;
> > +       dev->regulator = devm_regulator_get_optional(dev->dev, "mali");
> > +       if (IS_ERR(dev->regulator)) {
> > +               ret = PTR_ERR(dev->regulator);
> > +               dev->regulator = NULL;
> > +               if (ret == -ENODEV)
> > +                       return 0;
> > +               dev_err(dev->dev, "failed to get regulator: %d\n", ret);
> > +               return ret;
> > +       }
> > +
> > +       ret = regulator_enable(dev->regulator);
> > +       if (ret < 0) {
> > +               dev_err(dev->dev, "failed to enable regulator: %d\n", ret);
> > +               return ret;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static void lima_regulator_fini(struct lima_device *dev)
> > +{
> > +       if (dev->regulator)
> > +               regulator_disable(dev->regulator);
> > +}
> > +
> > +static int lima_init_ip(struct lima_device *dev, int index)
> > +{
> > +       struct lima_ip_desc *desc = lima_ip_desc + index;
> > +       struct lima_ip *ip = dev->ip + index;
> > +       int offset = desc->offset[dev->id];
> > +       bool must = desc->must_have[dev->id];
> > +       int err;
> > +
> > +       if (offset < 0)
> > +               return 0;
> > +
> > +       ip->dev = dev;
> > +       ip->id = index;
> > +       ip->iomem = dev->iomem + offset;
> > +       if (desc->irq_name) {
> > +               err = platform_get_irq_byname(dev->pdev, desc->irq_name);
> > +               if (err < 0)
> > +                       goto out;
> > +               ip->irq = err;
> > +       }
> > +
> > +       err = desc->init(ip);
> > +       if (!err) {
> > +               ip->present = true;
> > +               return 0;
> > +       }
> > +
> > +out:
> > +       return must ? err : 0;
> > +}
> > +
> > +static void lima_fini_ip(struct lima_device *ldev, int index)
> > +{
> > +       struct lima_ip_desc *desc = lima_ip_desc + index;
> > +       struct lima_ip *ip = ldev->ip + index;
> > +
> > +       if (ip->present)
> > +               desc->fini(ip);
> > +}
> > +
> > +static int lima_init_gp_pipe(struct lima_device *dev)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
> > +       int err;
> > +
> > +       if ((err = lima_sched_pipe_init(pipe, "gp")))
> > +               return err;
> > +
> > +       pipe->l2_cache[pipe->num_l2_cache++] = dev->ip + lima_ip_l2_cache0;
> > +       pipe->mmu[pipe->num_mmu++] = dev->ip + lima_ip_gpmmu;
> > +       pipe->processor[pipe->num_processor++] = dev->ip + lima_ip_gp;
> > +
> > +       if ((err = lima_gp_pipe_init(dev))) {
> > +               lima_sched_pipe_fini(pipe);
> > +               return err;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static void lima_fini_gp_pipe(struct lima_device *dev)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
> > +
> > +       lima_gp_pipe_fini(dev);
> > +       lima_sched_pipe_fini(pipe);
> > +}
> > +
> > +static int lima_init_pp_pipe(struct lima_device *dev)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +       int err, i;
> > +
> > +       if ((err = lima_sched_pipe_init(pipe, "pp")))
> > +               return err;
> > +
> > +       for (i = 0; i < LIMA_SCHED_PIPE_MAX_PROCESSOR; i++) {
> > +               struct lima_ip *pp = dev->ip + lima_ip_pp0 + i;
> > +               struct lima_ip *ppmmu = dev->ip + lima_ip_ppmmu0 + i;
> > +               struct lima_ip *l2_cache;
> > +
> > +               if (dev->id == lima_gpu_mali400)
> > +                       l2_cache = dev->ip + lima_ip_l2_cache0;
> > +               else
> > +                       l2_cache = dev->ip + lima_ip_l2_cache1 + (i >> 2);
> > +
> > +               if (pp->present && ppmmu->present && l2_cache->present) {
> > +                       pipe->mmu[pipe->num_mmu++] = ppmmu;
> > +                       pipe->processor[pipe->num_processor++] = pp;
> > +                       if (!pipe->l2_cache[i >> 2])
> > +                               pipe->l2_cache[pipe->num_l2_cache++] = l2_cache;
> > +               }
> > +       }
> > +
> > +       if (dev->ip[lima_ip_bcast].present) {
> > +               pipe->bcast_processor = dev->ip + lima_ip_pp_bcast;
> > +               pipe->bcast_mmu = dev->ip + lima_ip_ppmmu_bcast;
> > +       }
> > +
> > +       if ((err = lima_pp_pipe_init(dev))) {
> > +               lima_sched_pipe_fini(pipe);
> > +               return err;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static void lima_fini_pp_pipe(struct lima_device *dev)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +
> > +       lima_pp_pipe_fini(dev);
> > +       lima_sched_pipe_fini(pipe);
> > +}
> > +
> > +int lima_device_init(struct lima_device *ldev)
> > +{
> > +       int err, i;
> > +       struct resource *res;
> > +
> > +       dma_set_coherent_mask(ldev->dev, DMA_BIT_MASK(32));
> > +
> > +       err = lima_clk_init(ldev);
> > +       if (err) {
> > +               dev_err(ldev->dev, "clk init fail %d\n", err);
> > +               return err;
> > +       }
> > +
> > +       if ((err = lima_regulator_init(ldev))) {
> > +               dev_err(ldev->dev, "regulator init fail %d\n", err);
> > +               goto err_out0;
> > +       }
> > +
> > +       ldev->empty_vm = lima_vm_create(ldev);
> > +       if (!ldev->empty_vm) {
> > +               err = -ENOMEM;
> > +               goto err_out1;
> > +       }
> > +
> > +       ldev->va_start = 0;
> > +       if (ldev->id == lima_gpu_mali450) {
> > +               ldev->va_end = LIMA_VA_RESERVE_START;
> > +               ldev->dlbu_cpu = dma_alloc_wc(
> > +                       ldev->dev, LIMA_PAGE_SIZE,
> > +                       &ldev->dlbu_dma, GFP_KERNEL);
> > +               if (!ldev->dlbu_cpu) {
> > +                       err = -ENOMEM;
> > +                       goto err_out2;
> > +               }
> > +       }
> > +       else
> > +               ldev->va_end = LIMA_VA_RESERVE_END;
> > +
> > +       res = platform_get_resource(ldev->pdev, IORESOURCE_MEM, 0);
> > +       ldev->iomem = devm_ioremap_resource(ldev->dev, res);
> > +       if (IS_ERR(ldev->iomem)) {
> > +               dev_err(ldev->dev, "fail to ioremap iomem\n");
> > +               err = PTR_ERR(ldev->iomem);
> > +               goto err_out3;
> > +       }
> > +
> > +       for (i = 0; i < lima_ip_num; i++) {
> > +               err = lima_init_ip(ldev, i);
> > +               if (err)
> > +                       goto err_out4;
> > +       }
> > +
> > +       err = lima_init_gp_pipe(ldev);
> > +       if (err)
> > +               goto err_out4;
> > +
> > +       err = lima_init_pp_pipe(ldev);
> > +       if (err)
> > +               goto err_out5;
> > +
> > +       return 0;
> > +
> > +err_out5:
> > +       lima_fini_gp_pipe(ldev);
> > +err_out4:
> > +       while (--i >= 0)
> > +               lima_fini_ip(ldev, i);
> > +err_out3:
> > +       if (ldev->dlbu_cpu)
> > +               dma_free_wc(ldev->dev, LIMA_PAGE_SIZE,
> > +                           ldev->dlbu_cpu, ldev->dlbu_dma);
> > +err_out2:
> > +       lima_vm_put(ldev->empty_vm);
> > +err_out1:
> > +       lima_regulator_fini(ldev);
> > +err_out0:
> > +       lima_clk_fini(ldev);
> > +       return err;
> > +}
> > +
> > +void lima_device_fini(struct lima_device *ldev)
> > +{
> > +       int i;
> > +
> > +       lima_fini_pp_pipe(ldev);
> > +       lima_fini_gp_pipe(ldev);
> > +
> > +       for (i = lima_ip_num - 1; i >= 0; i--)
> > +               lima_fini_ip(ldev, i);
> > +
> > +       if (ldev->dlbu_cpu)
> > +               dma_free_wc(ldev->dev, LIMA_PAGE_SIZE,
> > +                           ldev->dlbu_cpu, ldev->dlbu_dma);
> > +
> > +       lima_vm_put(ldev->empty_vm);
> > +
> > +       lima_regulator_fini(ldev);
> > +
> > +       lima_clk_fini(ldev);
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_device.h b/drivers/gpu/drm/lima/lima_device.h
> > new file mode 100644
> > index 000000000000..41499f28ae13
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_device.h
> > @@ -0,0 +1,129 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_DEVICE_H__
> > +#define __LIMA_DEVICE_H__
> > +
> > +#include <drm/drm_device.h>
> > +#include <linux/delay.h>
> > +
> > +#include "lima_sched.h"
> > +
> > +enum lima_gpu_id {
> > +       lima_gpu_mali400 = 0,
> > +       lima_gpu_mali450,
> > +       lima_gpu_num,
> > +};
> > +
> > +enum lima_ip_id {
> > +       lima_ip_pmu,
> > +       lima_ip_gpmmu,
> > +       lima_ip_ppmmu0,
> > +       lima_ip_ppmmu1,
> > +       lima_ip_ppmmu2,
> > +       lima_ip_ppmmu3,
> > +       lima_ip_ppmmu4,
> > +       lima_ip_ppmmu5,
> > +       lima_ip_ppmmu6,
> > +       lima_ip_ppmmu7,
> > +       lima_ip_gp,
> > +       lima_ip_pp0,
> > +       lima_ip_pp1,
> > +       lima_ip_pp2,
> > +       lima_ip_pp3,
> > +       lima_ip_pp4,
> > +       lima_ip_pp5,
> > +       lima_ip_pp6,
> > +       lima_ip_pp7,
> > +       lima_ip_l2_cache0,
> > +       lima_ip_l2_cache1,
> > +       lima_ip_l2_cache2,
> > +       lima_ip_dlbu,
> > +       lima_ip_bcast,
> > +       lima_ip_pp_bcast,
> > +       lima_ip_ppmmu_bcast,
> > +       lima_ip_num,
> > +};
> > +
> > +struct lima_device;
> > +
> > +struct lima_ip {
> > +       struct lima_device *dev;
> > +       enum lima_ip_id id;
> > +       bool present;
> > +
> > +       void __iomem *iomem;
> > +       int irq;
> > +
> > +       union {
> > +               /* gp/pp */
> > +               bool async_reset;
> > +               /* l2 cache */
> > +               spinlock_t lock;
> > +       } data;
> > +};
> > +
> > +enum lima_pipe_id {
> > +       lima_pipe_gp,
> > +       lima_pipe_pp,
> > +       lima_pipe_num,
> > +};
> > +
> > +struct lima_device {
> > +       struct device *dev;
> > +       struct drm_device *ddev;
> > +       struct platform_device *pdev;
> > +
> > +       enum lima_gpu_id id;
> > +       int num_pp;
> > +
> > +       void __iomem *iomem;
> > +       struct clk *clk_bus;
> > +       struct clk *clk_gpu;
> > +       struct reset_control *reset;
> > +       struct regulator *regulator;
> > +
> > +       struct lima_ip ip[lima_ip_num];
> > +       struct lima_sched_pipe pipe[lima_pipe_num];
> > +
> > +       struct lima_vm *empty_vm;
> > +       uint64_t va_start;
> > +       uint64_t va_end;
> > +
> > +       u32 *dlbu_cpu;
> > +       dma_addr_t dlbu_dma;
> > +};
> > +
> > +static inline struct lima_device *
> > +to_lima_dev(struct drm_device *dev)
> > +{
> > +       return dev->dev_private;
> > +}
> > +
> > +int lima_device_init(struct lima_device *ldev);
> > +void lima_device_fini(struct lima_device *ldev);
> > +
> > +const char *lima_ip_name(struct lima_ip *ip);
> > +
> > +typedef int (*lima_poll_func_t)(struct lima_ip *);
> > +
> > +static inline int lima_poll_timeout(struct lima_ip *ip, lima_poll_func_t func,
> > +                                   int sleep_us, int timeout_us)
> > +{
> > +       ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
> > +
> > +       might_sleep_if(sleep_us);
> > +       while (1) {
> > +               if (func(ip))
> > +                       return 0;
> > +
> > +               if (timeout_us && ktime_compare(ktime_get(), timeout) > 0)
> > +                       return -ETIMEDOUT;
> > +
> > +               if (sleep_us)
> > +                       usleep_range((sleep_us >> 2) + 1, sleep_us);
> > +       }
> > +       return 0;
> > +}
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_dlbu.c b/drivers/gpu/drm/lima/lima_dlbu.c
> > new file mode 100644
> > index 000000000000..b7739712f235
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_dlbu.c
> > @@ -0,0 +1,56 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <linux/io.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_dlbu.h"
> > +#include "lima_vm.h"
> > +#include "lima_regs.h"
> > +
> > +#define dlbu_write(reg, data) writel(data, ip->iomem + reg)
> > +#define dlbu_read(reg) readl(ip->iomem + reg)
> > +
> > +void lima_dlbu_enable(struct lima_device *dev, int num_pp)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +       struct lima_ip *ip = dev->ip + lima_ip_dlbu;
> > +       int i, mask = 0;
> > +
> > +       for (i = 0; i < num_pp; i++) {
> > +               struct lima_ip *pp = pipe->processor[i];
> > +               mask |= 1 << (pp->id - lima_ip_pp0);
> > +       }
> > +
> > +       dlbu_write(LIMA_DLBU_PP_ENABLE_MASK, mask);
> > +}
> > +
> > +void lima_dlbu_disable(struct lima_device *dev)
> > +{
> > +       struct lima_ip *ip = dev->ip + lima_ip_dlbu;
> > +       dlbu_write(LIMA_DLBU_PP_ENABLE_MASK, 0);
> > +}
> > +
> > +void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg)
> > +{
> > +       dlbu_write(LIMA_DLBU_TLLIST_VBASEADDR, reg[0]);
> > +       dlbu_write(LIMA_DLBU_FB_DIM, reg[1]);
> > +       dlbu_write(LIMA_DLBU_TLLIST_CONF, reg[2]);
> > +       dlbu_write(LIMA_DLBU_START_TILE_POS, reg[3]);
> > +}
> > +
> > +int lima_dlbu_init(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +
> > +       dlbu_write(LIMA_DLBU_MASTER_TLLIST_PHYS_ADDR, dev->dlbu_dma | 1);
> > +       dlbu_write(LIMA_DLBU_MASTER_TLLIST_VADDR, LIMA_VA_RESERVE_DLBU);
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_dlbu_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_dlbu.h b/drivers/gpu/drm/lima/lima_dlbu.h
> > new file mode 100644
> > index 000000000000..60cba387cf30
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_dlbu.h
> > @@ -0,0 +1,18 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_DLBU_H__
> > +#define __LIMA_DLBU_H__
> > +
> > +struct lima_ip;
> > +struct lima_device;
> > +
> > +void lima_dlbu_enable(struct lima_device *dev, int num_pp);
> > +void lima_dlbu_disable(struct lima_device *dev);
> > +
> > +void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg);
> > +
> > +int lima_dlbu_init(struct lima_ip *ip);
> > +void lima_dlbu_fini(struct lima_ip *ip);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
> > new file mode 100644
> > index 000000000000..e93bce16ee10
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_drv.c
> > @@ -0,0 +1,353 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <linux/module.h>
> > +#include <linux/of_platform.h>
> > +#include <linux/log2.h>
> > +#include <drm/drm_prime.h>
> > +#include <drm/lima_drm.h>
> > +
> > +#include "lima_drv.h"
> > +#include "lima_gem.h"
> > +#include "lima_gem_prime.h"
> > +#include "lima_vm.h"
> > +
> > +int lima_sched_timeout_ms = 0;
> > +int lima_sched_max_tasks = 32;
> > +
> > +MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms (0 = no timeout (default))");
> > +module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
> > +
> > +MODULE_PARM_DESC(sched_max_tasks, "max queued task num in a context (default 32)");
> > +module_param_named(sched_max_tasks, lima_sched_max_tasks, int, 0444);
> > +
> > +static int lima_ioctl_info(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
>
> For panfrost, we generalized this to "get param" like other drivers.
> Looks like you can only add 7 more items.
>
> What about GPU revisions?

Currently I don't know there's any programming difference between GPUs
with different revision. Would be appreciate if anyone can tell me before
some hard reverse engineering effort.

Regards,
Qiang

>
> > +       struct drm_lima_info *info = data;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +
> > +       switch (ldev->id) {
> > +       case lima_gpu_mali400:
> > +               info->gpu_id = LIMA_INFO_GPU_MALI400;
>
> Personally, I'd return the either the raw model or something like
> '400' rather than making up some enumeration.
>
> > +               break;
> > +       case lima_gpu_mali450:
> > +               info->gpu_id = LIMA_INFO_GPU_MALI450;
> > +               break;
> > +       default:
> > +               return -ENODEV;
> > +       }
> > +       info->num_pp = ldev->pipe[lima_pipe_pp].num_processor;
> > +       info->valid = 0;
> > +       return 0;
> > +}
> > +
> > +static int lima_ioctl_gem_create(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_create *args = data;
> > +
> > +       if (args->flags)
> > +               return -EINVAL;
> > +
> > +       if (args->size == 0)
> > +               return -EINVAL;
> > +
> > +       return lima_gem_create_handle(dev, file, args->size, args->flags, &args->handle);
> > +}
> > +
> > +static int lima_ioctl_gem_info(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_info *args = data;
> > +
> > +       return lima_gem_get_info(file, args->handle, &args->va, &args->offset);
> > +}
> > +
> > +static int lima_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_submit *args = data;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +       struct lima_drm_priv *priv = file->driver_priv;
> > +       struct drm_lima_gem_submit_bo *bos;
> > +       struct lima_sched_pipe *pipe;
> > +       struct lima_sched_task *task;
> > +       struct lima_ctx *ctx;
> > +       struct lima_submit submit = {0};
> > +       size_t size;
> > +       int err = 0;
> > +
> > +       if (args->pipe >= lima_pipe_num || args->nr_bos == 0)
> > +               return -EINVAL;
> > +
> > +       if (args->flags & ~(LIMA_SUBMIT_FLAG_EXPLICIT_FENCE))
> > +               return -EINVAL;
> > +
> > +       pipe = ldev->pipe + args->pipe;
> > +       if (args->frame_size != pipe->frame_size)
> > +               return -EINVAL;
> > +
> > +       bos = kvcalloc(args->nr_bos, sizeof(*submit.bos) + sizeof(*submit.lbos), GFP_KERNEL);
> > +       if (!bos)
> > +               return -ENOMEM;
> > +
> > +       size = args->nr_bos * sizeof(*submit.bos);
> > +       if (copy_from_user(bos, u64_to_user_ptr(args->bos), size)) {
> > +               err = -EFAULT;
> > +               goto out0;
> > +       }
> > +
> > +       task = kmem_cache_zalloc(pipe->task_slab, GFP_KERNEL);
> > +       if (!task) {
> > +               err = -ENOMEM;
> > +               goto out0;
> > +       }
> > +
> > +       task->frame = task + 1;
> > +       if (copy_from_user(task->frame, u64_to_user_ptr(args->frame), args->frame_size)) {
> > +               err = -EFAULT;
> > +               goto out1;
> > +       }
> > +
> > +       err = pipe->task_validate(pipe, task);
> > +       if (err)
> > +               goto out1;
> > +
> > +       ctx = lima_ctx_get(&priv->ctx_mgr, args->ctx);
> > +       if (!ctx) {
> > +               err = -ENOENT;
> > +               goto out1;
> > +       }
> > +
> > +       submit.pipe = args->pipe;
> > +       submit.bos = bos;
> > +       submit.lbos = (void *)bos + size;
> > +       submit.nr_bos = args->nr_bos;
> > +       submit.task = task;
> > +       submit.ctx = ctx;
> > +       submit.flags = args->flags;
> > +       submit.in_sync[0] = args->in_sync[0];
> > +       submit.in_sync[1] = args->in_sync[1];
> > +       submit.out_sync = args->out_sync;
> > +
> > +       err = lima_gem_submit(file, &submit);
> > +
> > +       lima_ctx_put(ctx);
> > +out1:
> > +       if (err)
> > +               kmem_cache_free(pipe->task_slab, task);
> > +out0:
> > +       kvfree(bos);
> > +       return err;
> > +}
> > +
> > +static int lima_ioctl_gem_wait(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_wait *args = data;
> > +
> > +       if (args->op & ~(LIMA_GEM_WAIT_READ|LIMA_GEM_WAIT_WRITE))
> > +               return -EINVAL;
> > +
> > +       return lima_gem_wait(file, args->handle, args->op, args->timeout_ns);
> > +}
> > +
> > +static int lima_ioctl_ctx(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_ctx *args = data;
> > +       struct lima_drm_priv *priv = file->driver_priv;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +
> > +       if (args->op == LIMA_CTX_OP_CREATE)
> > +               return lima_ctx_create(ldev, &priv->ctx_mgr, &args->id);
> > +       else if (args->op == LIMA_CTX_OP_FREE)
> > +               return lima_ctx_free(&priv->ctx_mgr, args->id);
>
> Wasn't it suggested in the prior version to not mux these? Make them 2 ioctls.
>
> > +
> > +       return -EINVAL;
> > +}
> > +
> > +static int lima_drm_driver_open(struct drm_device *dev, struct drm_file *file)
> > +{
> > +       int err;
> > +       struct lima_drm_priv *priv;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +
> > +       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> > +       if (!priv)
> > +               return -ENOMEM;
> > +
> > +       priv->vm = lima_vm_create(ldev);
> > +       if (!priv->vm) {
> > +               err = -ENOMEM;
> > +               goto err_out0;
> > +       }
> > +
> > +       lima_ctx_mgr_init(&priv->ctx_mgr);
> > +
> > +       file->driver_priv = priv;
> > +       return 0;
> > +
> > +err_out0:
> > +       kfree(priv);
> > +       return err;
> > +}
> > +
> > +static void lima_drm_driver_postclose(struct drm_device *dev, struct drm_file *file)
> > +{
> > +       struct lima_drm_priv *priv = file->driver_priv;
> > +
> > +       lima_ctx_mgr_fini(&priv->ctx_mgr);
> > +       lima_vm_put(priv->vm);
> > +       kfree(priv);
> > +}
> > +
> > +static const struct drm_ioctl_desc lima_drm_driver_ioctls[] = {
> > +       DRM_IOCTL_DEF_DRV(LIMA_INFO, lima_ioctl_info, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_CREATE, lima_ioctl_gem_create, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_INFO, lima_ioctl_gem_info, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_SUBMIT, lima_ioctl_gem_submit, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_WAIT, lima_ioctl_gem_wait, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_CTX, lima_ioctl_ctx, DRM_AUTH|DRM_RENDER_ALLOW),
> > +};
> > +
> > +static const struct file_operations lima_drm_driver_fops = {
> > +       .owner              = THIS_MODULE,
> > +       .open               = drm_open,
> > +       .release            = drm_release,
> > +       .unlocked_ioctl     = drm_ioctl,
> > +#ifdef CONFIG_COMPAT
> > +       .compat_ioctl       = drm_compat_ioctl,
> > +#endif
> > +       .mmap               = lima_gem_mmap,
> > +};
> > +
> > +static struct drm_driver lima_drm_driver = {
> > +       .driver_features    = DRIVER_RENDER | DRIVER_GEM | DRIVER_PRIME | DRIVER_SYNCOBJ,
> > +       .open               = lima_drm_driver_open,
> > +       .postclose          = lima_drm_driver_postclose,
> > +       .ioctls             = lima_drm_driver_ioctls,
> > +       .num_ioctls         = ARRAY_SIZE(lima_drm_driver_ioctls),
> > +       .fops               = &lima_drm_driver_fops,
> > +       .gem_free_object_unlocked = lima_gem_free_object,
> > +       .gem_open_object    = lima_gem_object_open,
> > +       .gem_close_object   = lima_gem_object_close,
> > +       .gem_vm_ops         = &lima_gem_vm_ops,
> > +       .name               = "lima",
> > +       .desc               = "lima DRM",
> > +       .date               = "20190217",
> > +       .major              = 1,
> > +       .minor              = 0,
> > +       .patchlevel         = 0,
> > +
> > +       .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> > +       .gem_prime_import_sg_table = lima_gem_prime_import_sg_table,
> > +       .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> > +       .gem_prime_get_sg_table = lima_gem_prime_get_sg_table,
> > +       .gem_prime_mmap = lima_gem_prime_mmap,
> > +};
> > +
> > +static int lima_pdev_probe(struct platform_device *pdev)
> > +{
> > +       struct lima_device *ldev;
> > +       struct drm_device *ddev;
> > +       int err;
> > +
> > +       ldev = devm_kzalloc(&pdev->dev, sizeof(*ldev), GFP_KERNEL);
> > +       if (!ldev)
> > +               return -ENOMEM;
> > +
> > +       ldev->pdev = pdev;
> > +       ldev->dev = &pdev->dev;
> > +       ldev->id = (enum lima_gpu_id)of_device_get_match_data(&pdev->dev);
> > +
> > +       platform_set_drvdata(pdev, ldev);
> > +
> > +       /* Allocate and initialize the DRM device. */
> > +       ddev = drm_dev_alloc(&lima_drm_driver, &pdev->dev);
> > +       if (IS_ERR(ddev))
> > +               return PTR_ERR(ddev);
> > +
> > +       ddev->dev_private = ldev;
> > +       ldev->ddev = ddev;
> > +
> > +       err = lima_device_init(ldev);
> > +       if (err) {
> > +               dev_err(&pdev->dev, "Fatal error during GPU init\n");
> > +               goto err_out0;
> > +       }
> > +
> > +       /*
> > +        * Register the DRM device with the core and the connectors with
> > +        * sysfs.
> > +        */
> > +       err = drm_dev_register(ddev, 0);
> > +       if (err < 0)
> > +               goto err_out1;
> > +
> > +       return 0;
> > +
> > +err_out1:
> > +       lima_device_fini(ldev);
> > +err_out0:
> > +       drm_dev_put(ddev);
> > +       return err;
> > +}
> > +
> > +static int lima_pdev_remove(struct platform_device *pdev)
> > +{
> > +       struct lima_device *ldev = platform_get_drvdata(pdev);
> > +       struct drm_device *ddev = ldev->ddev;
> > +
> > +       drm_dev_unregister(ddev);
> > +       lima_device_fini(ldev);
> > +       drm_dev_put(ddev);
> > +       return 0;
> > +}
> > +
> > +static const struct of_device_id dt_match[] = {
> > +       { .compatible = "arm,mali-400", .data = (void *)lima_gpu_mali400 },
> > +       { .compatible = "arm,mali-450", .data = (void *)lima_gpu_mali450 },
> > +       {}
> > +};
> > +MODULE_DEVICE_TABLE(of, dt_match);
> > +
> > +static struct platform_driver lima_platform_driver = {
> > +       .probe      = lima_pdev_probe,
> > +       .remove     = lima_pdev_remove,
> > +       .driver     = {
> > +               .name   = "lima",
> > +               .of_match_table = dt_match,
> > +       },
> > +};
> > +
> > +static void lima_check_module_param(void)
> > +{
> > +       if (lima_sched_max_tasks < 4)
> > +               lima_sched_max_tasks = 4;
> > +       else
> > +               lima_sched_max_tasks = roundup_pow_of_two(lima_sched_max_tasks);
> > +}
> > +
> > +static int __init lima_init(void)
> > +{
> > +       int ret;
> > +
> > +       lima_check_module_param();
> > +       ret = lima_sched_slab_init();
> > +       if (ret)
> > +               return ret;
> > +
> > +       ret = platform_driver_register(&lima_platform_driver);
> > +       if (ret)
> > +               lima_sched_slab_fini();
> > +
> > +       return ret;
> > +}
> > +module_init(lima_init);
> > +
> > +static void __exit lima_exit(void)
> > +{
> > +       platform_driver_unregister(&lima_platform_driver);
> > +       lima_sched_slab_fini();
> > +}
> > +module_exit(lima_exit);
> > +
> > +MODULE_AUTHOR("Lima Project Developers");
> > +MODULE_DESCRIPTION("Lima DRM Driver");
> > +MODULE_LICENSE("GPL v2");
> > diff --git a/drivers/gpu/drm/lima/lima_drv.h b/drivers/gpu/drm/lima/lima_drv.h
> > new file mode 100644
> > index 000000000000..640a548cd617
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_drv.h
> > @@ -0,0 +1,46 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_DRV_H__
> > +#define __LIMA_DRV_H__
> > +
> > +#include <drm/drmP.h>
> > +
> > +#include "lima_ctx.h"
> > +
> > +extern int lima_sched_timeout_ms;
> > +extern int lima_sched_max_tasks;
> > +
> > +struct lima_vm;
> > +struct lima_bo;
> > +struct lima_sched_task;
> > +
> > +struct drm_lima_gem_submit_bo;
> > +
> > +struct lima_drm_priv {
> > +       struct lima_vm *vm;
> > +       struct lima_ctx_mgr ctx_mgr;
> > +};
> > +
> > +struct lima_submit {
> > +       struct lima_ctx *ctx;
> > +       int pipe;
> > +       u32 flags;
> > +
> > +       struct drm_lima_gem_submit_bo *bos;
> > +       struct lima_bo **lbos;
> > +       u32 nr_bos;
> > +
> > +       u32 in_sync[2];
> > +       u32 out_sync;
> > +
> > +       struct lima_sched_task *task;
> > +};
> > +
> > +static inline struct lima_drm_priv *
> > +to_lima_drm_priv(struct drm_file *file)
> > +{
> > +       return file->driver_priv;
> > +}
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
> > new file mode 100644
> > index 000000000000..666960345566
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_gem.c
> > @@ -0,0 +1,379 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <drm/drmP.h>
> > +#include <drm/drm_syncobj.h>
> > +#include <drm/drm_utils.h>
> > +#include <linux/sync_file.h>
> > +#include <linux/pfn_t.h>
> > +
> > +#include <drm/lima_drm.h>
> > +
> > +#include "lima_drv.h"
> > +#include "lima_gem.h"
> > +#include "lima_gem_prime.h"
> > +#include "lima_vm.h"
> > +#include "lima_object.h"
> > +
> > +int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file,
> > +                          u32 size, u32 flags, u32 *handle)
> > +{
> > +       int err;
> > +       struct lima_bo *bo;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +
> > +       bo = lima_bo_create(ldev, size, flags, NULL, NULL);
> > +       if (IS_ERR(bo))
> > +               return PTR_ERR(bo);
> > +
> > +       err = drm_gem_handle_create(file, &bo->gem, handle);
> > +
> > +       /* drop reference from allocate - handle holds it now */
> > +       drm_gem_object_put_unlocked(&bo->gem);
> > +
> > +       return err;
> > +}
> > +
> > +void lima_gem_free_object(struct drm_gem_object *obj)
> > +{
> > +       struct lima_bo *bo = to_lima_bo(obj);
> > +
> > +       if (!list_empty(&bo->va))
> > +               dev_err(obj->dev->dev, "lima gem free bo still has va\n");
> > +
> > +       lima_bo_destroy(bo);
> > +}
> > +
> > +int lima_gem_object_open(struct drm_gem_object *obj, struct drm_file *file)
> > +{
> > +       struct lima_bo *bo = to_lima_bo(obj);
> > +       struct lima_drm_priv *priv = to_lima_drm_priv(file);
> > +       struct lima_vm *vm = priv->vm;
> > +
> > +       return lima_vm_bo_add(vm, bo, true);
> > +}
> > +
> > +void lima_gem_object_close(struct drm_gem_object *obj, struct drm_file *file)
> > +{
> > +       struct lima_bo *bo = to_lima_bo(obj);
> > +       struct lima_drm_priv *priv = to_lima_drm_priv(file);
> > +       struct lima_vm *vm = priv->vm;
> > +
> > +       lima_vm_bo_del(vm, bo);
> > +}
> > +
> > +int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset)
> > +{
> > +       struct drm_gem_object *obj;
> > +       struct lima_bo *bo;
> > +       struct lima_drm_priv *priv = to_lima_drm_priv(file);
> > +       struct lima_vm *vm = priv->vm;
> > +       int err;
> > +
> > +       obj = drm_gem_object_lookup(file, handle);
> > +       if (!obj)
> > +               return -ENOENT;
> > +
> > +       bo = to_lima_bo(obj);
> > +
> > +       *va = lima_vm_get_va(vm, bo);
> > +
> > +       err = drm_gem_create_mmap_offset(obj);
> > +       if (!err)
> > +               *offset = drm_vma_node_offset_addr(&obj->vma_node);
> > +
> > +       drm_gem_object_put_unlocked(obj);
> > +       return err;
> > +}
> > +
> > +static vm_fault_t lima_gem_fault(struct vm_fault *vmf)
> > +{
> > +       struct vm_area_struct *vma = vmf->vma;
> > +       struct drm_gem_object *obj = vma->vm_private_data;
> > +       struct lima_bo *bo = to_lima_bo(obj);
> > +       pfn_t pfn;
> > +       pgoff_t pgoff;
> > +
> > +       /* We don't use vmf->pgoff since that has the fake offset: */
> > +       pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
> > +       pfn = __pfn_to_pfn_t(page_to_pfn(bo->pages[pgoff]), PFN_DEV);
> > +
> > +       return vmf_insert_mixed(vma, vmf->address, pfn);
> > +}
> > +
> > +const struct vm_operations_struct lima_gem_vm_ops = {
> > +       .fault = lima_gem_fault,
> > +       .open = drm_gem_vm_open,
> > +       .close = drm_gem_vm_close,
> > +};
> > +
> > +void lima_set_vma_flags(struct vm_area_struct *vma)
> > +{
> > +       pgprot_t prot = vm_get_page_prot(vma->vm_flags);
> > +
> > +       vma->vm_flags |= VM_MIXEDMAP;
> > +       vma->vm_flags &= ~VM_PFNMAP;
> > +       vma->vm_page_prot = pgprot_writecombine(prot);
> > +}
> > +
> > +int lima_gem_mmap(struct file *filp, struct vm_area_struct *vma)
> > +{
> > +       int ret;
> > +
> > +       ret = drm_gem_mmap(filp, vma);
> > +       if (ret)
> > +               return ret;
> > +
> > +       lima_set_vma_flags(vma);
> > +       return 0;
> > +}
> > +
> > +static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo,
> > +                           bool write, bool explicit)
> > +{
> > +       int err = 0;
> > +
> > +       if (!write) {
> > +               err = reservation_object_reserve_shared(bo->gem.resv, 1);
> > +               if (err)
> > +                       return err;
> > +       }
> > +
> > +       /* explicit sync use user passed dep fence */
> > +       if (explicit)
> > +               return 0;
> > +
> > +       /* implicit sync use bo fence in resv obj */
> > +       if (write) {
> > +               unsigned nr_fences;
> > +               struct dma_fence **fences;
> > +               int i;
> > +
> > +               err = reservation_object_get_fences_rcu(
> > +                       bo->gem.resv, NULL, &nr_fences, &fences);
> > +               if (err || !nr_fences)
> > +                       return err;
> > +
> > +               for (i = 0; i < nr_fences; i++) {
> > +                       err = lima_sched_task_add_dep(task, fences[i]);
> > +                       if (err)
> > +                               break;
> > +               }
> > +
> > +               /* for error case free remaining fences */
> > +               for ( ; i < nr_fences; i++)
> > +                       dma_fence_put(fences[i]);
> > +
> > +               kfree(fences);
> > +       }
> > +       else {
> > +               struct dma_fence *fence;
> > +               fence = reservation_object_get_excl_rcu(bo->gem.resv);
> > +               if (fence) {
> > +                       err = lima_sched_task_add_dep(task, fence);
> > +                       if (err)
> > +                               dma_fence_put(fence);
> > +               }
> > +       }
> > +
> > +       return err;
> > +}
> > +
> > +static int lima_gem_lock_bos(struct lima_bo **bos, u32 nr_bos,
> > +                            struct ww_acquire_ctx *ctx)
> > +{
> > +       int i, ret = 0, contended, slow_locked = -1;
> > +
> > +       ww_acquire_init(ctx, &reservation_ww_class);
> > +
> > +retry:
> > +       for (i = 0; i < nr_bos; i++) {
> > +               if (i == slow_locked) {
> > +                       slow_locked = -1;
> > +                       continue;
> > +               }
> > +
> > +               ret = ww_mutex_lock_interruptible(&bos[i]->gem.resv->lock, ctx);
> > +               if (ret < 0) {
> > +                       contended = i;
> > +                       goto err;
> > +               }
> > +       }
> > +
> > +       ww_acquire_done(ctx);
> > +       return 0;
> > +
> > +err:
> > +       for (i--; i >= 0; i--)
> > +               ww_mutex_unlock(&bos[i]->gem.resv->lock);
> > +
> > +       if (slow_locked >= 0)
> > +               ww_mutex_unlock(&bos[slow_locked]->gem.resv->lock);
> > +
> > +       if (ret == -EDEADLK) {
> > +               /* we lost out in a seqno race, lock and retry.. */
> > +               ret = ww_mutex_lock_slow_interruptible(
> > +                       &bos[contended]->gem.resv->lock, ctx);
> > +               if (!ret) {
> > +                       slow_locked = contended;
> > +                       goto retry;
> > +               }
> > +       }
> > +       ww_acquire_fini(ctx);
> > +
> > +       return ret;
> > +}
> > +
> > +static void lima_gem_unlock_bos(struct lima_bo **bos, u32 nr_bos,
> > +                               struct ww_acquire_ctx *ctx)
> > +{
> > +       int i;
> > +
> > +       for (i = 0; i < nr_bos; i++)
> > +               ww_mutex_unlock(&bos[i]->gem.resv->lock);
> > +       ww_acquire_fini(ctx);
> > +}
> > +
> > +static int lima_gem_add_deps(struct drm_file *file, struct lima_submit *submit)
> > +{
> > +       int i, err;
> > +
> > +       for (i = 0; i < ARRAY_SIZE(submit->in_sync); i++) {
> > +               struct dma_fence *fence = NULL;
> > +
> > +               if (!submit->in_sync[i])
> > +                       continue;
> > +
> > +               err = drm_syncobj_find_fence(file, submit->in_sync[i],
> > +                                            0, 0, &fence);
> > +               if (err)
> > +                       return err;
> > +
> > +               err = lima_sched_task_add_dep(submit->task, fence);
> > +               if (err) {
> > +                       dma_fence_put(fence);
> > +                       return err;
> > +               }
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +int lima_gem_submit(struct drm_file *file, struct lima_submit *submit)
> > +{
> > +       int i, err = 0;
> > +       struct ww_acquire_ctx ctx;
> > +       struct lima_drm_priv *priv = to_lima_drm_priv(file);
> > +       struct lima_vm *vm = priv->vm;
> > +       struct drm_syncobj *out_sync = NULL;
> > +       struct dma_fence *fence;
> > +       struct lima_bo **bos = submit->lbos;
> > +
> > +       if (submit->out_sync) {
> > +               out_sync = drm_syncobj_find(file, submit->out_sync);
> > +               if (!out_sync)
> > +                       return -ENOENT;
> > +       }
> > +
> > +       for (i = 0; i < submit->nr_bos; i++) {
> > +               struct drm_gem_object *obj;
> > +               struct lima_bo *bo;
> > +
> > +               obj = drm_gem_object_lookup(file, submit->bos[i].handle);
> > +               if (!obj) {
> > +                       err = -ENOENT;
> > +                       goto err_out0;
> > +               }
> > +
> > +               bo = to_lima_bo(obj);
> > +
> > +               /* increase refcnt of gpu va map to prevent unmapped when executing,
> > +                * will be decreased when task done */
> > +               err = lima_vm_bo_add(vm, bo, false);
> > +               if (err) {
> > +                       drm_gem_object_put_unlocked(obj);
> > +                       goto err_out0;
> > +               }
> > +
> > +               bos[i] = bo;
> > +       }
> > +
> > +       err = lima_gem_lock_bos(bos, submit->nr_bos, &ctx);
> > +       if (err)
> > +               goto err_out0;
> > +
> > +       err = lima_sched_task_init(
> > +               submit->task, submit->ctx->context + submit->pipe,
> > +               bos, submit->nr_bos, vm);
> > +       if (err)
> > +               goto err_out1;
> > +
> > +       err = lima_gem_add_deps(file, submit);
> > +       if (err)
> > +               goto err_out2;
> > +
> > +       for (i = 0; i < submit->nr_bos; i++) {
> > +               err = lima_gem_sync_bo(
> > +                       submit->task, bos[i],
> > +                       submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE,
> > +                       submit->flags & LIMA_SUBMIT_FLAG_EXPLICIT_FENCE);
> > +               if (err)
> > +                       goto err_out2;
> > +       }
> > +
> > +       fence = lima_sched_context_queue_task(
> > +               submit->ctx->context + submit->pipe, submit->task);
> > +
> > +       for (i = 0; i < submit->nr_bos; i++) {
> > +               if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE)
> > +                       reservation_object_add_excl_fence(bos[i]->gem.resv, fence);
> > +               else
> > +                       reservation_object_add_shared_fence(bos[i]->gem.resv, fence);
> > +       }
> > +
> > +       lima_gem_unlock_bos(bos, submit->nr_bos, &ctx);
> > +
> > +       for (i = 0; i < submit->nr_bos; i++)
> > +               drm_gem_object_put_unlocked(&bos[i]->gem);
> > +
> > +       if (out_sync) {
> > +               drm_syncobj_replace_fence(out_sync, fence);
> > +               drm_syncobj_put(out_sync);
> > +       }
> > +
> > +       dma_fence_put(fence);
> > +
> > +       return 0;
> > +
> > +err_out2:
> > +       lima_sched_task_fini(submit->task);
> > +err_out1:
> > +       lima_gem_unlock_bos(bos, submit->nr_bos, &ctx);
> > +err_out0:
> > +       for (i = 0; i < submit->nr_bos; i++) {
> > +               if (!bos[i])
> > +                       break;
> > +               lima_vm_bo_del(vm, bos[i]);
> > +               drm_gem_object_put_unlocked(&bos[i]->gem);
> > +       }
> > +       if (out_sync)
> > +               drm_syncobj_put(out_sync);
> > +       return err;
> > +}
> > +
> > +int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, s64 timeout_ns)
> > +{
> > +       bool write = op & LIMA_GEM_WAIT_WRITE;
> > +       long ret, timeout;
> > +
> > +       if (!op)
> > +               return 0;
> > +
> > +       timeout = drm_timeout_abs_to_jiffies(timeout_ns);
> > +
> > +       ret = drm_gem_reservation_object_wait(file, handle, write, timeout);
> > +       if (ret == 0)
> > +               ret = timeout ? -ETIMEDOUT : -EBUSY;
> > +
> > +       return ret;
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_gem.h b/drivers/gpu/drm/lima/lima_gem.h
> > new file mode 100644
> > index 000000000000..f1c4658100a8
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_gem.h
> > @@ -0,0 +1,25 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_GEM_H__
> > +#define __LIMA_GEM_H__
> > +
> > +struct lima_bo;
> > +struct lima_submit;
> > +
> > +extern const struct vm_operations_struct lima_gem_vm_ops;
> > +
> > +struct lima_bo *lima_gem_create_bo(struct drm_device *dev, u32 size, u32 flags);
> > +int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file,
> > +                          u32 size, u32 flags, u32 *handle);
> > +void lima_gem_free_object(struct drm_gem_object *obj);
> > +int lima_gem_object_open(struct drm_gem_object *obj, struct drm_file *file);
> > +void lima_gem_object_close(struct drm_gem_object *obj, struct drm_file *file);
> > +int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset);
> > +int lima_gem_mmap(struct file *filp, struct vm_area_struct *vma);
> > +int lima_gem_submit(struct drm_file *file, struct lima_submit *submit);
> > +int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, s64 timeout_ns);
> > +
> > +void lima_set_vma_flags(struct vm_area_struct *vma);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_gem_prime.c b/drivers/gpu/drm/lima/lima_gem_prime.c
> > new file mode 100644
> > index 000000000000..fe8348a055f6
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_gem_prime.c
> > @@ -0,0 +1,47 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <linux/dma-buf.h>
> > +#include <drm/drm_prime.h>
> > +#include <drm/drm_drv.h>
> > +#include <drm/drm_file.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_object.h"
> > +#include "lima_gem.h"
> > +#include "lima_gem_prime.h"
> > +
> > +struct drm_gem_object *lima_gem_prime_import_sg_table(
> > +       struct drm_device *dev, struct dma_buf_attachment *attach,
> > +       struct sg_table *sgt)
> > +{
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +       struct lima_bo *bo;
> > +
> > +       bo = lima_bo_create(ldev, attach->dmabuf->size, 0, sgt,
> > +                           attach->dmabuf->resv);
> > +       if (IS_ERR(bo))
> > +               return ERR_CAST(bo);
> > +
> > +       return &bo->gem;
> > +}
> > +
> > +struct sg_table *lima_gem_prime_get_sg_table(struct drm_gem_object *obj)
> > +{
> > +       struct lima_bo *bo = to_lima_bo(obj);
> > +       int npages = obj->size >> PAGE_SHIFT;
> > +
> > +       return drm_prime_pages_to_sg(bo->pages, npages);
> > +}
> > +
> > +int lima_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
> > +{
> > +       int ret;
> > +
> > +       ret = drm_gem_mmap_obj(obj, obj->size, vma);
> > +       if (ret)
> > +               return ret;
> > +
> > +       lima_set_vma_flags(vma);
> > +       return 0;
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_gem_prime.h b/drivers/gpu/drm/lima/lima_gem_prime.h
> > new file mode 100644
> > index 000000000000..ceb1be9840a5
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_gem_prime.h
> > @@ -0,0 +1,13 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_GEM_PRIME_H__
> > +#define __LIMA_GEM_PRIME_H__
> > +
> > +struct drm_gem_object *lima_gem_prime_import_sg_table(
> > +       struct drm_device *dev, struct dma_buf_attachment *attach,
> > +       struct sg_table *sgt);
> > +struct sg_table *lima_gem_prime_get_sg_table(struct drm_gem_object *obj);
> > +int lima_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_gp.c b/drivers/gpu/drm/lima/lima_gp.c
> > new file mode 100644
> > index 000000000000..4f4e9f5f7e19
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_gp.c
> > @@ -0,0 +1,282 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <linux/interrupt.h>
> > +#include <linux/iopoll.h>
> > +#include <linux/device.h>
> > +#include <linux/slab.h>
> > +
> > +#include <drm/lima_drm.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_gp.h"
> > +#include "lima_regs.h"
> > +
> > +#define gp_write(reg, data) writel(data, ip->iomem + reg)
> > +#define gp_read(reg) readl(ip->iomem + reg)
> > +
> > +static irqreturn_t lima_gp_irq_handler(int irq, void *data)
> > +{
> > +       struct lima_ip *ip = data;
> > +       struct lima_device *dev = ip->dev;
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
> > +       u32 state = gp_read(LIMA_GP_INT_STAT);
> > +       u32 status = gp_read(LIMA_GP_STATUS);
> > +       bool done = false;
> > +
> > +       /* for shared irq case */
> > +       if (!state)
> > +               return IRQ_NONE;
> > +
> > +       if (state & LIMA_GP_IRQ_MASK_ERROR) {
> > +               dev_err(dev->dev, "gp error irq state=%x status=%x\n",
> > +                       state, status);
> > +
> > +               /* mask all interrupts before hard reset */
> > +               gp_write(LIMA_GP_INT_MASK, 0);
> > +
> > +               pipe->error = true;
> > +               done = true;
> > +       }
> > +       else {
> > +               bool valid = state & (LIMA_GP_IRQ_VS_END_CMD_LST |
> > +                                     LIMA_GP_IRQ_PLBU_END_CMD_LST);
> > +               bool active = status & (LIMA_GP_STATUS_VS_ACTIVE |
> > +                                       LIMA_GP_STATUS_PLBU_ACTIVE);
> > +               done = valid && !active;
> > +       }
> > +
> > +       gp_write(LIMA_GP_INT_CLEAR, state);
> > +
> > +       if (done)
> > +               lima_sched_pipe_task_done(pipe);
> > +
> > +       return IRQ_HANDLED;
> > +}
> > +
> > +static void lima_gp_soft_reset_async(struct lima_ip *ip)
> > +{
> > +       if (ip->data.async_reset)
> > +               return;
> > +
> > +       gp_write(LIMA_GP_INT_MASK, 0);
> > +       gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_RESET_COMPLETED);
> > +       gp_write(LIMA_GP_CMD, LIMA_GP_CMD_SOFT_RESET);
> > +       ip->data.async_reset = true;
> > +}
> > +
> > +static int lima_gp_soft_reset_async_wait(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +       u32 v;
> > +
> > +       if (!ip->data.async_reset)
> > +               return 0;
> > +
> > +       err = readl_poll_timeout(ip->iomem + LIMA_GP_INT_RAWSTAT, v,
> > +                                v & LIMA_GP_IRQ_RESET_COMPLETED,
> > +                                0, 100);
> > +       if (err) {
> > +               dev_err(dev->dev, "gp soft reset time out\n");
> > +               return err;
> > +       }
> > +
> > +       gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_MASK_ALL);
> > +       gp_write(LIMA_GP_INT_MASK, LIMA_GP_IRQ_MASK_USED);
> > +
> > +       ip->data.async_reset = false;
> > +       return 0;
> > +}
> > +
> > +static int lima_gp_task_validate(struct lima_sched_pipe *pipe,
> > +                                struct lima_sched_task *task)
> > +{
> > +       struct drm_lima_gp_frame *frame = task->frame;
> > +       u32 *f = frame->frame;
> > +       (void)pipe;
> > +
> > +       if (f[LIMA_GP_VSCL_START_ADDR >> 2] >
> > +           f[LIMA_GP_VSCL_END_ADDR >> 2] ||
> > +           f[LIMA_GP_PLBUCL_START_ADDR >> 2] >
> > +           f[LIMA_GP_PLBUCL_END_ADDR >> 2] ||
> > +           f[LIMA_GP_PLBU_ALLOC_START_ADDR >> 2] >
> > +           f[LIMA_GP_PLBU_ALLOC_END_ADDR >> 2])
> > +               return -EINVAL;
> > +
> > +       if (f[LIMA_GP_VSCL_START_ADDR >> 2] ==
> > +           f[LIMA_GP_VSCL_END_ADDR >> 2] &&
> > +           f[LIMA_GP_PLBUCL_START_ADDR >> 2] ==
> > +           f[LIMA_GP_PLBUCL_END_ADDR >> 2])
> > +               return -EINVAL;
> > +
> > +       return 0;
> > +}
> > +
> > +static void lima_gp_task_run(struct lima_sched_pipe *pipe,
> > +                            struct lima_sched_task *task)
> > +{
> > +       struct lima_ip *ip = pipe->processor[0];
> > +       struct drm_lima_gp_frame *frame = task->frame;
> > +       u32 *f = frame->frame;
> > +       u32 cmd = 0;
> > +       int i;
> > +
> > +       if (f[LIMA_GP_VSCL_START_ADDR >> 2] !=
> > +           f[LIMA_GP_VSCL_END_ADDR >> 2])
> > +               cmd |= LIMA_GP_CMD_START_VS;
> > +       if (f[LIMA_GP_PLBUCL_START_ADDR >> 2] !=
> > +           f[LIMA_GP_PLBUCL_END_ADDR >> 2])
> > +               cmd |= LIMA_GP_CMD_START_PLBU;
> > +
> > +       /* before any hw ops, wait last success task async soft reset */
> > +       lima_gp_soft_reset_async_wait(ip);
> > +
> > +       for (i = 0; i < LIMA_GP_FRAME_REG_NUM; i++)
> > +               writel(f[i], ip->iomem + LIMA_GP_VSCL_START_ADDR + i * 4);
> > +
> > +       gp_write(LIMA_GP_CMD, LIMA_GP_CMD_UPDATE_PLBU_ALLOC);
> > +       gp_write(LIMA_GP_CMD, cmd);
> > +}
> > +
> > +static int lima_gp_hard_reset_poll(struct lima_ip *ip)
> > +{
> > +       gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0xC01A0000);
> > +       return gp_read(LIMA_GP_PERF_CNT_0_LIMIT) == 0xC01A0000;
> > +}
> > +
> > +static int lima_gp_hard_reset(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int ret;
> > +
> > +       gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0xC0FFE000);
> > +       gp_write(LIMA_GP_INT_MASK, 0);
> > +       gp_write(LIMA_GP_CMD, LIMA_GP_CMD_RESET);
> > +       ret = lima_poll_timeout(ip, lima_gp_hard_reset_poll, 10, 100);
> > +       if (ret) {
> > +               dev_err(dev->dev, "gp hard reset timeout\n");
> > +               return ret;
> > +       }
> > +
> > +       gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0);
> > +       gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_MASK_ALL);
> > +       gp_write(LIMA_GP_INT_MASK, LIMA_GP_IRQ_MASK_USED);
> > +       return 0;
> > +}
> > +
> > +static void lima_gp_task_fini(struct lima_sched_pipe *pipe)
> > +{
> > +       lima_gp_soft_reset_async(pipe->processor[0]);
> > +}
> > +
> > +static void lima_gp_task_error(struct lima_sched_pipe *pipe)
> > +{
> > +       struct lima_ip *ip = pipe->processor[0];
> > +
> > +       dev_err(ip->dev->dev, "gp task error int_state=%x status=%x\n",
> > +               gp_read(LIMA_GP_INT_STAT), gp_read(LIMA_GP_STATUS));
> > +
> > +       lima_gp_hard_reset(ip);
> > +}
> > +
> > +static void lima_gp_task_mmu_error(struct lima_sched_pipe *pipe)
> > +{
> > +       lima_sched_pipe_task_done(pipe);
> > +}
> > +
> > +static void lima_gp_print_version(struct lima_ip *ip)
> > +{
> > +       u32 version, major, minor;
> > +       char *name;
> > +
> > +       version = gp_read(LIMA_GP_VERSION);
> > +       major = (version >> 8) & 0xFF;
> > +       minor = version & 0xFF;
> > +       switch (version >> 16) {
> > +       case 0xA07:
> > +           name = "mali200";
> > +               break;
> > +       case 0xC07:
> > +               name = "mali300";
> > +               break;
> > +       case 0xB07:
> > +               name = "mali400";
> > +               break;
> > +       case 0xD07:
> > +               name = "mali450";
> > +               break;
> > +       default:
> > +               name = "unknow";
> > +               break;
> > +       }
> > +       dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
> > +                lima_ip_name(ip), name, major, minor);
> > +}
> > +
> > +static struct kmem_cache *lima_gp_task_slab = NULL;
> > +static int lima_gp_task_slab_refcnt = 0;
> > +
> > +int lima_gp_init(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +
> > +       lima_gp_print_version(ip);
> > +
> > +       ip->data.async_reset = false;
> > +       lima_gp_soft_reset_async(ip);
> > +       err = lima_gp_soft_reset_async_wait(ip);
> > +       if (err)
> > +               return err;
> > +
> > +       err = devm_request_irq(dev->dev, ip->irq, lima_gp_irq_handler,
> > +                              IRQF_SHARED, lima_ip_name(ip), ip);
> > +       if (err) {
> > +               dev_err(dev->dev, "gp %s fail to request irq\n",
> > +                       lima_ip_name(ip));
> > +               return err;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_gp_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > +
> > +int lima_gp_pipe_init(struct lima_device *dev)
> > +{
> > +       int frame_size = sizeof(struct drm_lima_gp_frame);
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
> > +
> > +       if (!lima_gp_task_slab) {
> > +               lima_gp_task_slab = kmem_cache_create_usercopy(
> > +                       "lima_gp_task", sizeof(struct lima_sched_task) + frame_size,
> > +                       0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
> > +                       frame_size, NULL);
> > +               if (!lima_gp_task_slab)
> > +                       return -ENOMEM;
> > +       }
> > +       lima_gp_task_slab_refcnt++;
> > +
> > +       pipe->frame_size = frame_size;
> > +       pipe->task_slab = lima_gp_task_slab;
> > +
> > +       pipe->task_validate = lima_gp_task_validate;
> > +       pipe->task_run = lima_gp_task_run;
> > +       pipe->task_fini = lima_gp_task_fini;
> > +       pipe->task_error = lima_gp_task_error;
> > +       pipe->task_mmu_error = lima_gp_task_mmu_error;
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_gp_pipe_fini(struct lima_device *dev)
> > +{
> > +       if (!--lima_gp_task_slab_refcnt) {
> > +               kmem_cache_destroy(lima_gp_task_slab);
> > +               lima_gp_task_slab = NULL;
> > +       }
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_gp.h b/drivers/gpu/drm/lima/lima_gp.h
> > new file mode 100644
> > index 000000000000..55bc48ec7603
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_gp.h
> > @@ -0,0 +1,16 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_GP_H__
> > +#define __LIMA_GP_H__
> > +
> > +struct lima_ip;
> > +struct lima_device;
> > +
> > +int lima_gp_init(struct lima_ip *ip);
> > +void lima_gp_fini(struct lima_ip *ip);
> > +
> > +int lima_gp_pipe_init(struct lima_device *dev);
> > +void lima_gp_pipe_fini(struct lima_device *dev);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_l2_cache.c b/drivers/gpu/drm/lima/lima_l2_cache.c
> > new file mode 100644
> > index 000000000000..2ba4786f9ec7
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_l2_cache.c
> > @@ -0,0 +1,80 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <linux/iopoll.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_l2_cache.h"
> > +#include "lima_regs.h"
> > +
> > +#define l2_cache_write(reg, data) writel(data, ip->iomem + reg)
> > +#define l2_cache_read(reg) readl(ip->iomem + reg)
> > +
> > +static int lima_l2_cache_wait_idle(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +       u32 v;
> > +
> > +       err = readl_poll_timeout(ip->iomem + LIMA_L2_CACHE_STATUS, v,
> > +                                !(v & LIMA_L2_CACHE_STATUS_COMMAND_BUSY),
> > +                                0, 1000);
> > +       if (err) {
> > +           dev_err(dev->dev, "l2 cache wait command timeout\n");
> > +           return err;
> > +       }
> > +       return 0;
> > +}
> > +
> > +int lima_l2_cache_flush(struct lima_ip *ip)
> > +{
> > +       int ret;
> > +
> > +       spin_lock(&ip->data.lock);
> > +       l2_cache_write(LIMA_L2_CACHE_COMMAND, LIMA_L2_CACHE_COMMAND_CLEAR_ALL);
> > +       ret = lima_l2_cache_wait_idle(ip);
> > +       spin_unlock(&ip->data.lock);
> > +       return ret;
> > +}
> > +
> > +int lima_l2_cache_init(struct lima_ip *ip)
> > +{
> > +       int i, err;
> > +       u32 size;
> > +       struct lima_device *dev = ip->dev;
> > +
> > +       /* l2_cache2 only exists when one of PP4-7 present */
> > +       if (ip->id == lima_ip_l2_cache2) {
> > +               for (i = lima_ip_pp4; i <= lima_ip_pp7; i++) {
> > +                       if (dev->ip[i].present)
> > +                               break;
> > +               }
> > +               if (i > lima_ip_pp7)
> > +                       return -ENODEV;
> > +       }
> > +
> > +       spin_lock_init(&ip->data.lock);
> > +
> > +       size = l2_cache_read(LIMA_L2_CACHE_SIZE);
> > +       dev_info(dev->dev, "l2 cache %uK, %u-way, %ubyte cache line, %ubit external bus\n",
> > +                1 << (((size >> 16) & 0xff) - 10),
> > +                1 << ((size >> 8) & 0xff),
> > +                1 << (size & 0xff),
> > +                1 << ((size >> 24) & 0xff));
> > +
> > +       err = lima_l2_cache_flush(ip);
> > +       if (err)
> > +               return err;
> > +
> > +       l2_cache_write(LIMA_L2_CACHE_ENABLE,
> > +                      LIMA_L2_CACHE_ENABLE_ACCESS|LIMA_L2_CACHE_ENABLE_READ_ALLOCATE);
> > +       l2_cache_write(LIMA_L2_CACHE_MAX_READS, 0x1c);
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_l2_cache_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_l2_cache.h b/drivers/gpu/drm/lima/lima_l2_cache.h
> > new file mode 100644
> > index 000000000000..2ff91eafefbe
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_l2_cache.h
> > @@ -0,0 +1,14 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_L2_CACHE_H__
> > +#define __LIMA_L2_CACHE_H__
> > +
> > +struct lima_ip;
> > +
> > +int lima_l2_cache_init(struct lima_ip *ip);
> > +void lima_l2_cache_fini(struct lima_ip *ip);
> > +
> > +int lima_l2_cache_flush(struct lima_ip *ip);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_mmu.c b/drivers/gpu/drm/lima/lima_mmu.c
> > new file mode 100644
> > index 000000000000..c6c151d33cf8
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_mmu.c
> > @@ -0,0 +1,142 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <linux/interrupt.h>
> > +#include <linux/iopoll.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_mmu.h"
> > +#include "lima_vm.h"
> > +#include "lima_object.h"
> > +#include "lima_regs.h"
> > +
> > +#define mmu_write(reg, data) writel(data, ip->iomem + reg)
> > +#define mmu_read(reg) readl(ip->iomem + reg)
> > +
> > +#define lima_mmu_send_command(cmd, addr, val, cond)         \
> > +({                                                          \
> > +       int __ret;                                           \
> > +                                                            \
> > +       mmu_write(LIMA_MMU_COMMAND, cmd);                    \
> > +       __ret = readl_poll_timeout(ip->iomem + (addr), val,  \
> > +                                 cond, 0, 100);             \
> > +       if (__ret)                                           \
> > +               dev_err(dev->dev,                            \
> > +                       "mmu command %x timeout\n", cmd);    \
> > +       __ret;                                               \
> > +})
> > +
> > +static irqreturn_t lima_mmu_irq_handler(int irq, void *data)
> > +{
> > +       struct lima_ip *ip = data;
> > +       struct lima_device *dev = ip->dev;
> > +       u32 status = mmu_read(LIMA_MMU_INT_STATUS);
> > +       struct lima_sched_pipe *pipe;
> > +
> > +       /* for shared irq case */
> > +       if (!status)
> > +               return IRQ_NONE;
> > +
> > +       if (status & LIMA_MMU_INT_PAGE_FAULT) {
> > +               u32 fault = mmu_read(LIMA_MMU_PAGE_FAULT_ADDR);
> > +               dev_err(dev->dev, "mmu page fault at 0x%x from bus id %d of type %s on %s\n",
> > +                       fault, LIMA_MMU_STATUS_BUS_ID(status),
> > +                       status & LIMA_MMU_STATUS_PAGE_FAULT_IS_WRITE ? "write" : "read",
> > +                       lima_ip_name(ip));
> > +       }
> > +
> > +       if (status & LIMA_MMU_INT_READ_BUS_ERROR) {
> > +               dev_err(dev->dev, "mmu %s irq bus error\n", lima_ip_name(ip));
> > +       }
> > +
> > +       /* mask all interrupts before resume */
> > +       mmu_write(LIMA_MMU_INT_MASK, 0);
> > +       mmu_write(LIMA_MMU_INT_CLEAR, status);
> > +
> > +       pipe = dev->pipe + (ip->id == lima_ip_gpmmu ? lima_pipe_gp : lima_pipe_pp);
> > +       lima_sched_pipe_mmu_error(pipe);
> > +
> > +       return IRQ_HANDLED;
> > +}
> > +
> > +int lima_mmu_init(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +       u32 v;
> > +
> > +       if (ip->id == lima_ip_ppmmu_bcast)
> > +               return 0;
> > +
> > +       mmu_write(LIMA_MMU_DTE_ADDR, 0xCAFEBABE);
> > +       if (mmu_read(LIMA_MMU_DTE_ADDR) != 0xCAFEB000) {
> > +               dev_err(dev->dev, "mmu %s dte write test fail\n", lima_ip_name(ip));
> > +               return -EIO;
> > +       }
> > +
> > +       mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_HARD_RESET);
> > +       err = lima_mmu_send_command(LIMA_MMU_COMMAND_HARD_RESET,
> > +                                   LIMA_MMU_DTE_ADDR, v, v == 0);
> > +       if (err)
> > +               return err;
> > +
> > +       err = devm_request_irq(dev->dev, ip->irq, lima_mmu_irq_handler,
> > +                              IRQF_SHARED, lima_ip_name(ip), ip);
> > +       if (err) {
> > +               dev_err(dev->dev, "mmu %s fail to request irq\n", lima_ip_name(ip));
> > +               return err;
> > +       }
> > +
> > +       mmu_write(LIMA_MMU_INT_MASK, LIMA_MMU_INT_PAGE_FAULT | LIMA_MMU_INT_READ_BUS_ERROR);
> > +       mmu_write(LIMA_MMU_DTE_ADDR, dev->empty_vm->pd.dma);
> > +       return lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_PAGING,
> > +                                    LIMA_MMU_STATUS, v,
> > +                                    v & LIMA_MMU_STATUS_PAGING_ENABLED);
> > +}
> > +
> > +void lima_mmu_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > +
> > +void lima_mmu_switch_vm(struct lima_ip *ip, struct lima_vm *vm)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       u32 v;
> > +
> > +       lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_STALL,
> > +                             LIMA_MMU_STATUS, v,
> > +                             v & LIMA_MMU_STATUS_STALL_ACTIVE);
> > +
> > +       if (vm)
> > +               mmu_write(LIMA_MMU_DTE_ADDR, vm->pd.dma);
> > +
> > +       /* flush the TLB */
> > +       mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_ZAP_CACHE);
> > +
> > +       lima_mmu_send_command(LIMA_MMU_COMMAND_DISABLE_STALL,
> > +                             LIMA_MMU_STATUS, v,
> > +                             !(v & LIMA_MMU_STATUS_STALL_ACTIVE));
> > +}
> > +
> > +void lima_mmu_page_fault_resume(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       u32 status = mmu_read(LIMA_MMU_STATUS);
> > +       u32 v;
> > +
> > +       if (status & LIMA_MMU_STATUS_PAGE_FAULT_ACTIVE) {
> > +               dev_info(dev->dev, "mmu resume\n");
> > +
> > +               mmu_write(LIMA_MMU_INT_MASK, 0);
> > +               mmu_write(LIMA_MMU_DTE_ADDR, 0xCAFEBABE);
> > +               lima_mmu_send_command(LIMA_MMU_COMMAND_HARD_RESET,
> > +                                     LIMA_MMU_DTE_ADDR, v, v == 0);
> > +               mmu_write(LIMA_MMU_INT_MASK, LIMA_MMU_INT_PAGE_FAULT | LIMA_MMU_INT_READ_BUS_ERROR);
> > +               mmu_write(LIMA_MMU_DTE_ADDR, dev->empty_vm->pd.dma);
> > +               lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_PAGING,
> > +                                     LIMA_MMU_STATUS, v,
> > +                                     v & LIMA_MMU_STATUS_PAGING_ENABLED);
> > +       }
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_mmu.h b/drivers/gpu/drm/lima/lima_mmu.h
> > new file mode 100644
> > index 000000000000..ca173b60fc73
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_mmu.h
> > @@ -0,0 +1,16 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_MMU_H__
> > +#define __LIMA_MMU_H__
> > +
> > +struct lima_ip;
> > +struct lima_vm;
> > +
> > +int lima_mmu_init(struct lima_ip *ip);
> > +void lima_mmu_fini(struct lima_ip *ip);
> > +
> > +void lima_mmu_switch_vm(struct lima_ip *ip, struct lima_vm *vm);
> > +void lima_mmu_page_fault_resume(struct lima_ip *ip);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_object.c b/drivers/gpu/drm/lima/lima_object.c
> > new file mode 100644
> > index 000000000000..28ff1b8e1dca
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_object.c
> > @@ -0,0 +1,124 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <drm/drm_prime.h>
> > +#include <linux/pagemap.h>
> > +#include <linux/dma-mapping.h>
> > +
> > +#include "lima_object.h"
> > +
> > +void lima_bo_destroy(struct lima_bo *bo)
> > +{
> > +        if (bo->sgt) {
> > +               kfree(bo->pages);
> > +               drm_prime_gem_destroy(&bo->gem, bo->sgt);
> > +       }
> > +       else {
> > +               if (bo->pages_dma_addr) {
> > +                       int i, npages = bo->gem.size >> PAGE_SHIFT;
> > +
> > +                       for (i = 0; i < npages; i++) {
> > +                               if (bo->pages_dma_addr[i])
> > +                                       dma_unmap_page(bo->gem.dev->dev,
> > +                                                      bo->pages_dma_addr[i],
> > +                                                      PAGE_SIZE, DMA_BIDIRECTIONAL);
> > +                       }
> > +               }
> > +
> > +               if (bo->pages)
> > +                       drm_gem_put_pages(&bo->gem, bo->pages, true, true);
> > +       }
> > +
> > +       kfree(bo->pages_dma_addr);
> > +       drm_gem_object_release(&bo->gem);
> > +       kfree(bo);
> > +}
> > +
> > +static struct lima_bo *lima_bo_create_struct(struct lima_device *dev, u32 size, u32 flags,
> > +                                            struct reservation_object *resv)
> > +{
> > +       struct lima_bo *bo;
> > +       int err;
> > +
> > +       size = PAGE_ALIGN(size);
> > +
> > +       bo = kzalloc(sizeof(*bo), GFP_KERNEL);
> > +       if (!bo)
> > +               return ERR_PTR(-ENOMEM);
> > +
> > +       mutex_init(&bo->lock);
> > +       INIT_LIST_HEAD(&bo->va);
> > +       bo->gem.resv = resv;
> > +
> > +       err = drm_gem_object_init(dev->ddev, &bo->gem, size);
> > +       if (err) {
> > +               kfree(bo);
> > +               return ERR_PTR(err);
> > +       }
> > +
> > +       return bo;
> > +}
> > +
> > +struct lima_bo *lima_bo_create(struct lima_device *dev, u32 size,
> > +                              u32 flags, struct sg_table *sgt,
> > +                              struct reservation_object *resv)
> > +{
> > +       int i, err;
> > +       size_t npages;
> > +       struct lima_bo *bo, *ret;
> > +
> > +       bo = lima_bo_create_struct(dev, size, flags, resv);
> > +       if (IS_ERR(bo))
> > +               return bo;
> > +
> > +       npages = bo->gem.size >> PAGE_SHIFT;
> > +
> > +       bo->pages_dma_addr = kzalloc(npages * sizeof(dma_addr_t), GFP_KERNEL);
> > +       if (!bo->pages_dma_addr) {
> > +               ret = ERR_PTR(-ENOMEM);
> > +               goto err_out;
> > +       }
> > +
> > +       if (sgt) {
> > +               bo->sgt = sgt;
> > +
> > +               bo->pages = kzalloc(npages * sizeof(*bo->pages), GFP_KERNEL);
> > +               if (!bo->pages) {
> > +                       ret = ERR_PTR(-ENOMEM);
> > +                       goto err_out;
> > +               }
> > +
> > +               err = drm_prime_sg_to_page_addr_arrays(
> > +                       sgt, bo->pages, bo->pages_dma_addr, npages);
> > +               if (err) {
> > +                       ret = ERR_PTR(err);
> > +                       goto err_out;
> > +               }
> > +       }
> > +       else {
> > +               mapping_set_gfp_mask(bo->gem.filp->f_mapping, GFP_DMA32);
> > +               bo->pages = drm_gem_get_pages(&bo->gem);
> > +               if (IS_ERR(bo->pages)) {
> > +                       ret = ERR_CAST(bo->pages);
> > +                       bo->pages = NULL;
> > +                       goto err_out;
> > +               }
> > +
> > +               for (i = 0; i < npages; i++) {
> > +                       dma_addr_t addr = dma_map_page(dev->dev, bo->pages[i], 0,
> > +                                                      PAGE_SIZE, DMA_BIDIRECTIONAL);
> > +                       if (dma_mapping_error(dev->dev, addr)) {
> > +                               ret = ERR_PTR(-EFAULT);
> > +                               goto err_out;
> > +                       }
> > +                       bo->pages_dma_addr[i] = addr;
> > +               }
> > +
> > +       }
> > +
> > +       return bo;
> > +
> > +err_out:
> > +       lima_bo_destroy(bo);
> > +       return ret;
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_object.h b/drivers/gpu/drm/lima/lima_object.h
> > new file mode 100644
> > index 000000000000..70099f1045ac
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_object.h
> > @@ -0,0 +1,36 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_OBJECT_H__
> > +#define __LIMA_OBJECT_H__
> > +
> > +#include <drm/drm_gem.h>
> > +
> > +#include "lima_device.h"
> > +
> > +struct lima_bo {
> > +       struct drm_gem_object gem;
> > +
> > +       struct page **pages;
> > +       dma_addr_t *pages_dma_addr;
> > +       struct sg_table *sgt;
> > +       void *vaddr;
> > +
> > +       struct mutex lock;
> > +       struct list_head va;
> > +};
> > +
> > +static inline struct lima_bo *
> > +to_lima_bo(struct drm_gem_object *obj)
> > +{
> > +       return container_of(obj, struct lima_bo, gem);
> > +}
> > +
> > +struct lima_bo *lima_bo_create(struct lima_device *dev, u32 size,
> > +                              u32 flags, struct sg_table *sgt,
> > +                              struct reservation_object *resv);
> > +void lima_bo_destroy(struct lima_bo *bo);
> > +void *lima_bo_vmap(struct lima_bo *bo);
> > +void lima_bo_vunmap(struct lima_bo *bo);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_pmu.c b/drivers/gpu/drm/lima/lima_pmu.c
> > new file mode 100644
> > index 000000000000..3c50524b70a7
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_pmu.c
> > @@ -0,0 +1,59 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <linux/iopoll.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_pmu.h"
> > +#include "lima_regs.h"
> > +
> > +#define pmu_write(reg, data) writel(data, ip->iomem + reg)
> > +#define pmu_read(reg) readl(ip->iomem + reg)
> > +
> > +static int lima_pmu_wait_cmd(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +       u32 v;
> > +
> > +       err = readl_poll_timeout(ip->iomem + LIMA_PMU_INT_RAWSTAT,
> > +                                v, v & LIMA_PMU_INT_CMD_MASK,
> > +                                100, 100000);
> > +       if (err) {
> > +               dev_err(dev->dev, "timeout wait pmd cmd\n");
> > +               return err;
> > +       }
> > +
> > +       pmu_write(LIMA_PMU_INT_CLEAR, LIMA_PMU_INT_CMD_MASK);
> > +       return 0;
> > +}
> > +
> > +int lima_pmu_init(struct lima_ip *ip)
> > +{
> > +       int err;
> > +       u32 stat;
> > +
> > +       pmu_write(LIMA_PMU_INT_MASK, 0);
> > +
> > +       /* If this value is too low, when in high GPU clk freq,
> > +        * GPU will be in unstable state. */
> > +       pmu_write(LIMA_PMU_SW_DELAY, 0xffff);
> > +
> > +       /* status reg 1=off 0=on */
> > +       stat = pmu_read(LIMA_PMU_STATUS);
> > +
> > +       /* power up all ip */
> > +       if (stat) {
> > +               pmu_write(LIMA_PMU_POWER_UP, stat);
> > +               err = lima_pmu_wait_cmd(ip);
> > +               if (err)
> > +                       return err;
> > +       }
> > +       return 0;
> > +}
> > +
> > +void lima_pmu_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_pmu.h b/drivers/gpu/drm/lima/lima_pmu.h
> > new file mode 100644
> > index 000000000000..1cf94a35bdf9
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_pmu.h
> > @@ -0,0 +1,12 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_PMU_H__
> > +#define __LIMA_PMU_H__
> > +
> > +struct lima_ip;
> > +
> > +int lima_pmu_init(struct lima_ip *ip);
> > +void lima_pmu_fini(struct lima_ip *ip);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
> > new file mode 100644
> > index 000000000000..7b36c29eee89
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_pp.c
> > @@ -0,0 +1,423 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <linux/interrupt.h>
> > +#include <linux/io.h>
> > +#include <linux/device.h>
> > +#include <linux/slab.h>
> > +
> > +#include <drm/lima_drm.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_pp.h"
> > +#include "lima_dlbu.h"
> > +#include "lima_bcast.h"
> > +#include "lima_vm.h"
> > +#include "lima_regs.h"
> > +
> > +#define pp_write(reg, data) writel(data, ip->iomem + reg)
> > +#define pp_read(reg) readl(ip->iomem + reg)
> > +
> > +static void lima_pp_handle_irq(struct lima_ip *ip, u32 state)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +
> > +       if (state & LIMA_PP_IRQ_MASK_ERROR) {
> > +               u32 status = pp_read(LIMA_PP_STATUS);
> > +
> > +               dev_err(dev->dev, "pp error irq state=%x status=%x\n",
> > +                       state, status);
> > +
> > +               pipe->error = true;
> > +
> > +               /* mask all interrupts before hard reset */
> > +               pp_write(LIMA_PP_INT_MASK, 0);
> > +       }
> > +
> > +       pp_write(LIMA_PP_INT_CLEAR, state);
> > +}
> > +
> > +static irqreturn_t lima_pp_irq_handler(int irq, void *data)
> > +{
> > +       struct lima_ip *ip = data;
> > +       struct lima_device *dev = ip->dev;
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +       u32 state = pp_read(LIMA_PP_INT_STATUS);
> > +
> > +       /* for shared irq case */
> > +       if (!state)
> > +               return IRQ_NONE;
> > +
> > +       lima_pp_handle_irq(ip, state);
> > +
> > +       if (atomic_dec_and_test(&pipe->task))
> > +               lima_sched_pipe_task_done(pipe);
> > +
> > +       return IRQ_HANDLED;
> > +}
> > +
> > +static irqreturn_t lima_pp_bcast_irq_handler(int irq, void *data)
> > +{
> > +       int i;
> > +       irqreturn_t ret = IRQ_NONE;
> > +       struct lima_ip *pp_bcast = data;
> > +       struct lima_device *dev = pp_bcast->dev;
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +       struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
> > +
> > +       for (i = 0; i < frame->num_pp; i++) {
> > +               struct lima_ip *ip = pipe->processor[i];
> > +               u32 status, state;
> > +
> > +               if (pipe->done & (1 << i))
> > +                       continue;
> > +
> > +               /* status read first in case int state change in the middle
> > +                * which may miss the interrupt handling */
> > +               status = pp_read(LIMA_PP_STATUS);
> > +               state = pp_read(LIMA_PP_INT_STATUS);
> > +
> > +               if (state) {
> > +                       lima_pp_handle_irq(ip, state);
> > +                       ret = IRQ_HANDLED;
> > +               }
> > +               else {
> > +                       if (status & LIMA_PP_STATUS_RENDERING_ACTIVE)
> > +                               continue;
> > +               }
> > +
> > +               pipe->done |= (1 << i);
> > +               if (atomic_dec_and_test(&pipe->task))
> > +                       lima_sched_pipe_task_done(pipe);
> > +       }
> > +
> > +       return ret;
> > +}
> > +
> > +static void lima_pp_soft_reset_async(struct lima_ip *ip)
> > +{
> > +       if (ip->data.async_reset)
> > +               return;
> > +
> > +       pp_write(LIMA_PP_INT_MASK, 0);
> > +       pp_write(LIMA_PP_INT_RAWSTAT, LIMA_PP_IRQ_MASK_ALL);
> > +       pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_SOFT_RESET);
> > +       ip->data.async_reset = true;
> > +}
> > +
> > +static int lima_pp_soft_reset_poll(struct lima_ip *ip)
> > +{
> > +       return !(pp_read(LIMA_PP_STATUS) & LIMA_PP_STATUS_RENDERING_ACTIVE) &&
> > +               pp_read(LIMA_PP_INT_RAWSTAT) == LIMA_PP_IRQ_RESET_COMPLETED;
> > +}
> > +
> > +static int lima_pp_soft_reset_async_wait_one(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int ret;
> > +
> > +       ret = lima_poll_timeout(ip, lima_pp_soft_reset_poll, 0, 100);
> > +       if (ret) {
> > +               dev_err(dev->dev, "pp %s reset time out\n", lima_ip_name(ip));
> > +               return ret;
> > +       }
> > +
> > +       pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
> > +       pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
> > +       return 0;
> > +}
> > +
> > +static int lima_pp_soft_reset_async_wait(struct lima_ip *ip)
> > +{
> > +       int i, err = 0;
> > +
> > +       if (!ip->data.async_reset)
> > +               return 0;
> > +
> > +       if (ip->id == lima_ip_pp_bcast) {
> > +               struct lima_device *dev = ip->dev;
> > +               struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +               struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
> > +
> > +               for (i = 0; i < frame->num_pp; i++)
> > +                       err |= lima_pp_soft_reset_async_wait_one(pipe->processor[i]);
> > +       }
> > +       else
> > +               err = lima_pp_soft_reset_async_wait_one(ip);
> > +
> > +       ip->data.async_reset = false;
> > +       return err;
> > +}
> > +
> > +static void lima_pp_write_frame(struct lima_ip *ip, u32 *frame, u32 *wb)
> > +{
> > +       int i, j, n = 0;
> > +
> > +       for (i = 0; i < LIMA_PP_FRAME_REG_NUM; i++)
> > +               writel(frame[i], ip->iomem + LIMA_PP_FRAME + i * 4);
> > +
> > +       for (i = 0; i < 3; i++) {
> > +               for (j = 0; j < LIMA_PP_WB_REG_NUM; j++)
> > +                       writel(wb[n++], ip->iomem + LIMA_PP_WB(i) + j * 4);
> > +       }
> > +}
> > +
> > +static int lima_pp_hard_reset_poll(struct lima_ip *ip)
> > +{
> > +       pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC01A0000);
> > +       return pp_read(LIMA_PP_PERF_CNT_0_LIMIT) == 0xC01A0000;
> > +}
> > +
> > +static int lima_pp_hard_reset(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int ret;
> > +
> > +       pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC0FFE000);
> > +       pp_write(LIMA_PP_INT_MASK, 0);
> > +       pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_FORCE_RESET);
> > +       ret = lima_poll_timeout(ip, lima_pp_hard_reset_poll, 10, 100);
> > +       if (ret) {
> > +               dev_err(dev->dev, "pp hard reset timeout\n");
> > +               return ret;
> > +       }
> > +
> > +       pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0);
> > +       pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
> > +       pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
> > +       return 0;
> > +}
> > +
> > +static void lima_pp_print_version(struct lima_ip *ip)
> > +{
> > +       u32 version, major, minor;
> > +       char *name;
> > +
> > +       version = pp_read(LIMA_PP_VERSION);
> > +       major = (version >> 8) & 0xFF;
> > +       minor = version & 0xFF;
> > +       switch (version >> 16) {
> > +       case 0xC807:
> > +           name = "mali200";
> > +               break;
> > +       case 0xCE07:
> > +               name = "mali300";
> > +               break;
> > +       case 0xCD07:
> > +               name = "mali400";
> > +               break;
> > +       case 0xCF07:
> > +               name = "mali450";
> > +               break;
> > +       default:
> > +               name = "unknow";
> > +               break;
> > +       }
> > +       dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
> > +                lima_ip_name(ip), name, major, minor);
> > +}
> > +
> > +int lima_pp_init(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +
> > +       lima_pp_print_version(ip);
> > +
> > +       ip->data.async_reset = false;
> > +       lima_pp_soft_reset_async(ip);
> > +       err = lima_pp_soft_reset_async_wait(ip);
> > +       if (err)
> > +               return err;
> > +
> > +       err = devm_request_irq(dev->dev, ip->irq, lima_pp_irq_handler,
> > +                              IRQF_SHARED, lima_ip_name(ip), ip);
> > +       if (err) {
> > +               dev_err(dev->dev, "pp %s fail to request irq\n",
> > +                       lima_ip_name(ip));
> > +               return err;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_pp_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > +
> > +int lima_pp_bcast_init(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +
> > +       err = devm_request_irq(dev->dev, ip->irq, lima_pp_bcast_irq_handler,
> > +                              IRQF_SHARED, lima_ip_name(ip), ip);
> > +       if (err) {
> > +               dev_err(dev->dev, "pp %s fail to request irq\n",
> > +                       lima_ip_name(ip));
> > +               return err;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_pp_bcast_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > +
> > +static int lima_pp_task_validate(struct lima_sched_pipe *pipe,
> > +                                struct lima_sched_task *task)
> > +{
> > +       u32 num_pp;
> > +
> > +       if (pipe->bcast_processor) {
> > +               struct drm_lima_m450_pp_frame *f = task->frame;
> > +               num_pp = f->num_pp;
> > +       }
> > +       else {
> > +               struct drm_lima_m400_pp_frame *f = task->frame;
> > +               num_pp = f->num_pp;
> > +       }
> > +
> > +       if (num_pp == 0 || num_pp > pipe->num_processor)
> > +               return -EINVAL;
> > +
> > +       return 0;
> > +}
> > +
> > +static void lima_pp_task_run(struct lima_sched_pipe *pipe,
> > +                            struct lima_sched_task *task)
> > +{
> > +       if (pipe->bcast_processor) {
> > +               struct drm_lima_m450_pp_frame *frame = task->frame;
> > +               struct lima_device *dev = pipe->bcast_processor->dev;
> > +               struct lima_ip *ip = pipe->bcast_processor;
> > +               int i;
> > +
> > +               pipe->done = 0;
> > +               atomic_set(&pipe->task, frame->num_pp);
> > +
> > +               if (frame->use_dlbu) {
> > +                       lima_dlbu_enable(dev, frame->num_pp);
> > +
> > +                       frame->frame[LIMA_PP_FRAME >> 2] = LIMA_VA_RESERVE_DLBU;
> > +                       lima_dlbu_set_reg(dev->ip + lima_ip_dlbu, frame->dlbu_regs);
> > +               }
> > +               else
> > +                       lima_dlbu_disable(dev);
> > +
> > +               lima_bcast_enable(dev, frame->num_pp);
> > +
> > +               lima_pp_soft_reset_async_wait(ip);
> > +
> > +               lima_pp_write_frame(ip, frame->frame, frame->wb);
> > +
> > +               for (i = 0; i < frame->num_pp; i++) {
> > +                       struct lima_ip *ip = pipe->processor[i];
> > +
> > +                       pp_write(LIMA_PP_STACK, frame->fragment_stack_address[i]);
> > +                       if (!frame->use_dlbu)
> > +                               pp_write(LIMA_PP_FRAME, frame->plbu_array_address[i]);
> > +               }
> > +
> > +               pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
> > +       }
> > +       else {
> > +               struct drm_lima_m400_pp_frame *frame = task->frame;
> > +               int i;
> > +
> > +               atomic_set(&pipe->task, frame->num_pp);
> > +
> > +               for (i = 0; i < frame->num_pp; i++) {
> > +                       struct lima_ip *ip = pipe->processor[i];
> > +
> > +                       frame->frame[LIMA_PP_FRAME >> 2] =
> > +                               frame->plbu_array_address[i];
> > +                       frame->frame[LIMA_PP_STACK >> 2] =
> > +                               frame->fragment_stack_address[i];
> > +
> > +                       lima_pp_soft_reset_async_wait(ip);
> > +
> > +                       lima_pp_write_frame(ip, frame->frame, frame->wb);
> > +
> > +                       pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
> > +               }
> > +       }
> > +}
> > +
> > +static void lima_pp_task_fini(struct lima_sched_pipe *pipe)
> > +{
> > +       if (pipe->bcast_processor)
> > +               lima_pp_soft_reset_async(pipe->bcast_processor);
> > +       else {
> > +               int i;
> > +               for (i = 0; i < pipe->num_processor; i++)
> > +                       lima_pp_soft_reset_async(pipe->processor[i]);
> > +       }
> > +}
> > +
> > +static void lima_pp_task_error(struct lima_sched_pipe *pipe)
> > +{
> > +       int i;
> > +
> > +       for (i = 0; i < pipe->num_processor; i++) {
> > +               struct lima_ip *ip = pipe->processor[i];
> > +
> > +               dev_err(ip->dev->dev, "pp task error %d int_state=%x status=%x\n",
> > +                       i, pp_read(LIMA_PP_INT_STATUS), pp_read(LIMA_PP_STATUS));
> > +
> > +               lima_pp_hard_reset(ip);
> > +       }
> > +}
> > +
> > +static void lima_pp_task_mmu_error(struct lima_sched_pipe *pipe)
> > +{
> > +       if (atomic_dec_and_test(&pipe->task))
> > +               lima_sched_pipe_task_done(pipe);
> > +}
> > +
> > +static struct kmem_cache *lima_pp_task_slab = NULL;
> > +static int lima_pp_task_slab_refcnt = 0;
> > +
> > +int lima_pp_pipe_init(struct lima_device *dev)
> > +{
> > +       int frame_size;
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +
> > +       if (dev->id == lima_gpu_mali400)
> > +               frame_size = sizeof(struct drm_lima_m400_pp_frame);
> > +       else
> > +               frame_size = sizeof(struct drm_lima_m450_pp_frame);
> > +
> > +       if (!lima_pp_task_slab) {
> > +               lima_pp_task_slab = kmem_cache_create_usercopy(
> > +                       "lima_pp_task", sizeof(struct lima_sched_task) + frame_size,
> > +                       0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
> > +                       frame_size, NULL);
> > +               if (!lima_pp_task_slab)
> > +                       return -ENOMEM;
> > +       }
> > +       lima_pp_task_slab_refcnt++;
> > +
> > +       pipe->frame_size = frame_size;
> > +       pipe->task_slab = lima_pp_task_slab;
> > +
> > +       pipe->task_validate = lima_pp_task_validate;
> > +       pipe->task_run = lima_pp_task_run;
> > +       pipe->task_fini = lima_pp_task_fini;
> > +       pipe->task_error = lima_pp_task_error;
> > +       pipe->task_mmu_error = lima_pp_task_mmu_error;
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_pp_pipe_fini(struct lima_device *dev)
> > +{
> > +       if (!--lima_pp_task_slab_refcnt) {
> > +               kmem_cache_destroy(lima_pp_task_slab);
> > +               lima_pp_task_slab = NULL;
> > +       }
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_pp.h b/drivers/gpu/drm/lima/lima_pp.h
> > new file mode 100644
> > index 000000000000..f83f8cb4d30a
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_pp.h
> > @@ -0,0 +1,19 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_PP_H__
> > +#define __LIMA_PP_H__
> > +
> > +struct lima_ip;
> > +struct lima_device;
> > +
> > +int lima_pp_init(struct lima_ip *ip);
> > +void lima_pp_fini(struct lima_ip *ip);
> > +
> > +int lima_pp_bcast_init(struct lima_ip *ip);
> > +void lima_pp_bcast_fini(struct lima_ip *ip);
> > +
> > +int lima_pp_pipe_init(struct lima_device *dev);
> > +void lima_pp_pipe_fini(struct lima_device *dev);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_regs.h b/drivers/gpu/drm/lima/lima_regs.h
> > new file mode 100644
> > index 000000000000..d5ade8fc8901
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_regs.h
> > @@ -0,0 +1,298 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/* Copyright 2010-2017 ARM Limited. All rights reserved.
> > + * Copyright 2017-2018 Qiang Yu <yuq825@gmail.com>
> > + */
> > +
> > +#ifndef __LIMA_REGS_H__
> > +#define __LIMA_REGS_H__
> > +
> > +/* This file's register definition is collected from the
> > + * official ARM Mali Utgard GPU kernel driver source code
> > + */
> > +
> > +/* PMU regs */
> > +#define LIMA_PMU_POWER_UP                  0x00
> > +#define LIMA_PMU_POWER_DOWN                0x04
> > +#define   LIMA_PMU_POWER_GP0_MASK          BIT(0)
> > +#define   LIMA_PMU_POWER_L2_MASK           BIT(1)
> > +#define   LIMA_PMU_POWER_PP_MASK(i)        BIT(2 + i)
> > +
> > +/*
> > + * On Mali450 each block automatically starts up its corresponding L2
> > + * and the PPs are not fully independent controllable.
> > + * Instead PP0, PP1-3 and PP4-7 can be turned on or off.
> > + */
> > +#define   LIMA450_PMU_POWER_PP0_MASK       BIT(1)
> > +#define   LIMA450_PMU_POWER_PP13_MASK      BIT(2)
> > +#define   LIMA450_PMU_POWER_PP47_MASK      BIT(3)
> > +
> > +#define LIMA_PMU_STATUS                    0x08
> > +#define LIMA_PMU_INT_MASK                  0x0C
> > +#define LIMA_PMU_INT_RAWSTAT               0x10
> > +#define LIMA_PMU_INT_CLEAR                 0x18
> > +#define   LIMA_PMU_INT_CMD_MASK            BIT(0)
> > +#define LIMA_PMU_SW_DELAY                  0x1C
> > +
> > +/* L2 cache regs */
> > +#define LIMA_L2_CACHE_SIZE                   0x0004
> > +#define LIMA_L2_CACHE_STATUS                 0x0008
> > +#define   LIMA_L2_CACHE_STATUS_COMMAND_BUSY  BIT(0)
> > +#define   LIMA_L2_CACHE_STATUS_DATA_BUSY     BIT(1)
> > +#define LIMA_L2_CACHE_COMMAND                0x0010
> > +#define   LIMA_L2_CACHE_COMMAND_CLEAR_ALL    BIT(0)
> > +#define LIMA_L2_CACHE_CLEAR_PAGE             0x0014
> > +#define LIMA_L2_CACHE_MAX_READS              0x0018
> > +#define LIMA_L2_CACHE_ENABLE                 0x001C
> > +#define   LIMA_L2_CACHE_ENABLE_ACCESS        BIT(0)
> > +#define   LIMA_L2_CACHE_ENABLE_READ_ALLOCATE BIT(1)
> > +#define LIMA_L2_CACHE_PERFCNT_SRC0           0x0020
> > +#define LIMA_L2_CACHE_PERFCNT_VAL0           0x0024
> > +#define LIMA_L2_CACHE_PERFCNT_SRC1           0x0028
> > +#define LIMA_L2_CACHE_ERFCNT_VAL1            0x002C
> > +
> > +/* GP regs */
> > +#define LIMA_GP_VSCL_START_ADDR                0x00
> > +#define LIMA_GP_VSCL_END_ADDR                  0x04
> > +#define LIMA_GP_PLBUCL_START_ADDR              0x08
> > +#define LIMA_GP_PLBUCL_END_ADDR                0x0c
> > +#define LIMA_GP_PLBU_ALLOC_START_ADDR          0x10
> > +#define LIMA_GP_PLBU_ALLOC_END_ADDR            0x14
> > +#define LIMA_GP_CMD                            0x20
> > +#define   LIMA_GP_CMD_START_VS                 BIT(0)
> > +#define   LIMA_GP_CMD_START_PLBU               BIT(1)
> > +#define   LIMA_GP_CMD_UPDATE_PLBU_ALLOC        BIT(4)
> > +#define   LIMA_GP_CMD_RESET                    BIT(5)
> > +#define   LIMA_GP_CMD_FORCE_HANG               BIT(6)
> > +#define   LIMA_GP_CMD_STOP_BUS                 BIT(9)
> > +#define   LIMA_GP_CMD_SOFT_RESET               BIT(10)
> > +#define LIMA_GP_INT_RAWSTAT                    0x24
> > +#define LIMA_GP_INT_CLEAR                      0x28
> > +#define LIMA_GP_INT_MASK                       0x2C
> > +#define LIMA_GP_INT_STAT                       0x30
> > +#define   LIMA_GP_IRQ_VS_END_CMD_LST           BIT(0)
> > +#define   LIMA_GP_IRQ_PLBU_END_CMD_LST         BIT(1)
> > +#define   LIMA_GP_IRQ_PLBU_OUT_OF_MEM          BIT(2)
> > +#define   LIMA_GP_IRQ_VS_SEM_IRQ               BIT(3)
> > +#define   LIMA_GP_IRQ_PLBU_SEM_IRQ             BIT(4)
> > +#define   LIMA_GP_IRQ_HANG                     BIT(5)
> > +#define   LIMA_GP_IRQ_FORCE_HANG               BIT(6)
> > +#define   LIMA_GP_IRQ_PERF_CNT_0_LIMIT         BIT(7)
> > +#define   LIMA_GP_IRQ_PERF_CNT_1_LIMIT         BIT(8)
> > +#define   LIMA_GP_IRQ_WRITE_BOUND_ERR          BIT(9)
> > +#define   LIMA_GP_IRQ_SYNC_ERROR               BIT(10)
> > +#define   LIMA_GP_IRQ_AXI_BUS_ERROR            BIT(11)
> > +#define   LIMA_GP_IRQ_AXI_BUS_STOPPED          BIT(12)
> > +#define   LIMA_GP_IRQ_VS_INVALID_CMD           BIT(13)
> > +#define   LIMA_GP_IRQ_PLB_INVALID_CMD          BIT(14)
> > +#define   LIMA_GP_IRQ_RESET_COMPLETED          BIT(19)
> > +#define   LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW      BIT(20)
> > +#define   LIMA_GP_IRQ_SEMAPHORE_OVERFLOW       BIT(21)
> > +#define   LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS  BIT(22)
> > +#define LIMA_GP_WRITE_BOUND_LOW                0x34
> > +#define LIMA_GP_PERF_CNT_0_ENABLE              0x3C
> > +#define LIMA_GP_PERF_CNT_1_ENABLE              0x40
> > +#define LIMA_GP_PERF_CNT_0_SRC                 0x44
> > +#define LIMA_GP_PERF_CNT_1_SRC                 0x48
> > +#define LIMA_GP_PERF_CNT_0_VALUE               0x4C
> > +#define LIMA_GP_PERF_CNT_1_VALUE               0x50
> > +#define LIMA_GP_PERF_CNT_0_LIMIT               0x54
> > +#define LIMA_GP_STATUS                         0x68
> > +#define   LIMA_GP_STATUS_VS_ACTIVE             BIT(1)
> > +#define   LIMA_GP_STATUS_BUS_STOPPED           BIT(2)
> > +#define   LIMA_GP_STATUS_PLBU_ACTIVE           BIT(3)
> > +#define   LIMA_GP_STATUS_BUS_ERROR             BIT(6)
> > +#define   LIMA_GP_STATUS_WRITE_BOUND_ERR       BIT(8)
> > +#define LIMA_GP_VERSION                        0x6C
> > +#define LIMA_GP_VSCL_START_ADDR_READ           0x80
> > +#define LIMA_GP_PLBCL_START_ADDR_READ          0x84
> > +#define LIMA_GP_CONTR_AXI_BUS_ERROR_STAT       0x94
> > +
> > +#define LIMA_GP_IRQ_MASK_ALL              \
> > +       (                                  \
> > +        LIMA_GP_IRQ_VS_END_CMD_LST      | \
> > +        LIMA_GP_IRQ_PLBU_END_CMD_LST    | \
> > +        LIMA_GP_IRQ_PLBU_OUT_OF_MEM     | \
> > +        LIMA_GP_IRQ_VS_SEM_IRQ          | \
> > +        LIMA_GP_IRQ_PLBU_SEM_IRQ        | \
> > +        LIMA_GP_IRQ_HANG                | \
> > +        LIMA_GP_IRQ_FORCE_HANG          | \
> > +        LIMA_GP_IRQ_PERF_CNT_0_LIMIT    | \
> > +        LIMA_GP_IRQ_PERF_CNT_1_LIMIT    | \
> > +        LIMA_GP_IRQ_WRITE_BOUND_ERR     | \
> > +        LIMA_GP_IRQ_SYNC_ERROR          | \
> > +        LIMA_GP_IRQ_AXI_BUS_ERROR       | \
> > +        LIMA_GP_IRQ_AXI_BUS_STOPPED     | \
> > +        LIMA_GP_IRQ_VS_INVALID_CMD      | \
> > +        LIMA_GP_IRQ_PLB_INVALID_CMD     | \
> > +        LIMA_GP_IRQ_RESET_COMPLETED     | \
> > +        LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW | \
> > +        LIMA_GP_IRQ_SEMAPHORE_OVERFLOW  | \
> > +        LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
> > +
> > +#define LIMA_GP_IRQ_MASK_ERROR             \
> > +       (                                  \
> > +        LIMA_GP_IRQ_PLBU_OUT_OF_MEM     | \
> > +        LIMA_GP_IRQ_FORCE_HANG          | \
> > +        LIMA_GP_IRQ_WRITE_BOUND_ERR     | \
> > +        LIMA_GP_IRQ_SYNC_ERROR          | \
> > +        LIMA_GP_IRQ_AXI_BUS_ERROR       | \
> > +        LIMA_GP_IRQ_VS_INVALID_CMD      | \
> > +        LIMA_GP_IRQ_PLB_INVALID_CMD     | \
> > +        LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW | \
> > +        LIMA_GP_IRQ_SEMAPHORE_OVERFLOW  | \
> > +        LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
> > +
> > +#define LIMA_GP_IRQ_MASK_USED             \
> > +       (                                  \
> > +        LIMA_GP_IRQ_VS_END_CMD_LST      | \
> > +        LIMA_GP_IRQ_PLBU_END_CMD_LST    | \
> > +        LIMA_GP_IRQ_MASK_ERROR)
> > +
> > +/* PP regs */
> > +#define LIMA_PP_FRAME                        0x0000
> > +#define LIMA_PP_RSW                         0x0004
> > +#define LIMA_PP_STACK                       0x0030
> > +#define LIMA_PP_STACK_SIZE                  0x0034
> > +#define LIMA_PP_ORIGIN_OFFSET_X                     0x0040
> > +#define LIMA_PP_WB(i)                       (0x0100 * (i + 1))
> > +#define   LIMA_PP_WB_SOURCE_SELECT           0x0000
> > +#define          LIMA_PP_WB_SOURCE_ADDR             0x0004
> > +
> > +#define LIMA_PP_VERSION                      0x1000
> > +#define LIMA_PP_CURRENT_REND_LIST_ADDR       0x1004
> > +#define LIMA_PP_STATUS                       0x1008
> > +#define   LIMA_PP_STATUS_RENDERING_ACTIVE    BIT(0)
> > +#define   LIMA_PP_STATUS_BUS_STOPPED         BIT(4)
> > +#define LIMA_PP_CTRL                         0x100c
> > +#define   LIMA_PP_CTRL_STOP_BUS              BIT(0)
> > +#define   LIMA_PP_CTRL_FLUSH_CACHES          BIT(3)
> > +#define   LIMA_PP_CTRL_FORCE_RESET           BIT(5)
> > +#define   LIMA_PP_CTRL_START_RENDERING       BIT(6)
> > +#define   LIMA_PP_CTRL_SOFT_RESET            BIT(7)
> > +#define LIMA_PP_INT_RAWSTAT                  0x1020
> > +#define LIMA_PP_INT_CLEAR                    0x1024
> > +#define LIMA_PP_INT_MASK                     0x1028
> > +#define LIMA_PP_INT_STATUS                   0x102c
> > +#define   LIMA_PP_IRQ_END_OF_FRAME           BIT(0)
> > +#define   LIMA_PP_IRQ_END_OF_TILE            BIT(1)
> > +#define   LIMA_PP_IRQ_HANG                   BIT(2)
> > +#define   LIMA_PP_IRQ_FORCE_HANG             BIT(3)
> > +#define   LIMA_PP_IRQ_BUS_ERROR              BIT(4)
> > +#define   LIMA_PP_IRQ_BUS_STOP               BIT(5)
> > +#define   LIMA_PP_IRQ_CNT_0_LIMIT            BIT(6)
> > +#define   LIMA_PP_IRQ_CNT_1_LIMIT            BIT(7)
> > +#define   LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR   BIT(8)
> > +#define   LIMA_PP_IRQ_INVALID_PLIST_COMMAND  BIT(9)
> > +#define   LIMA_PP_IRQ_CALL_STACK_UNDERFLOW   BIT(10)
> > +#define   LIMA_PP_IRQ_CALL_STACK_OVERFLOW    BIT(11)
> > +#define   LIMA_PP_IRQ_RESET_COMPLETED        BIT(12)
> > +#define LIMA_PP_WRITE_BOUNDARY_LOW           0x1044
> > +#define LIMA_PP_BUS_ERROR_STATUS             0x1050
> > +#define LIMA_PP_PERF_CNT_0_ENABLE            0x1080
> > +#define LIMA_PP_PERF_CNT_0_SRC               0x1084
> > +#define LIMA_PP_PERF_CNT_0_LIMIT             0x1088
> > +#define LIMA_PP_PERF_CNT_0_VALUE             0x108c
> > +#define LIMA_PP_PERF_CNT_1_ENABLE            0x10a0
> > +#define LIMA_PP_PERF_CNT_1_SRC               0x10a4
> > +#define LIMA_PP_PERF_CNT_1_LIMIT             0x10a8
> > +#define LIMA_PP_PERF_CNT_1_VALUE             0x10ac
> > +#define LIMA_PP_PERFMON_CONTR                0x10b0
> > +#define LIMA_PP_PERFMON_BASE                 0x10b4
> > +
> > +#define LIMA_PP_IRQ_MASK_ALL                 \
> > +       (                                    \
> > +        LIMA_PP_IRQ_END_OF_FRAME          | \
> > +        LIMA_PP_IRQ_END_OF_TILE           | \
> > +        LIMA_PP_IRQ_HANG                  | \
> > +        LIMA_PP_IRQ_FORCE_HANG            | \
> > +        LIMA_PP_IRQ_BUS_ERROR             | \
> > +        LIMA_PP_IRQ_BUS_STOP              | \
> > +        LIMA_PP_IRQ_CNT_0_LIMIT           | \
> > +        LIMA_PP_IRQ_CNT_1_LIMIT           | \
> > +        LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR  | \
> > +        LIMA_PP_IRQ_INVALID_PLIST_COMMAND | \
> > +        LIMA_PP_IRQ_CALL_STACK_UNDERFLOW  | \
> > +        LIMA_PP_IRQ_CALL_STACK_OVERFLOW   | \
> > +        LIMA_PP_IRQ_RESET_COMPLETED)
> > +
> > +#define LIMA_PP_IRQ_MASK_ERROR               \
> > +       (                                    \
> > +        LIMA_PP_IRQ_FORCE_HANG            | \
> > +        LIMA_PP_IRQ_BUS_ERROR             | \
> > +        LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR  | \
> > +        LIMA_PP_IRQ_INVALID_PLIST_COMMAND | \
> > +        LIMA_PP_IRQ_CALL_STACK_UNDERFLOW  | \
> > +        LIMA_PP_IRQ_CALL_STACK_OVERFLOW)
> > +
> > +#define LIMA_PP_IRQ_MASK_USED                \
> > +       (                                    \
> > +        LIMA_PP_IRQ_END_OF_FRAME          | \
> > +        LIMA_PP_IRQ_MASK_ERROR)
> > +
> > +/* MMU regs */
> > +#define LIMA_MMU_DTE_ADDR                     0x0000
> > +#define LIMA_MMU_STATUS                       0x0004
> > +#define   LIMA_MMU_STATUS_PAGING_ENABLED      BIT(0)
> > +#define   LIMA_MMU_STATUS_PAGE_FAULT_ACTIVE   BIT(1)
> > +#define   LIMA_MMU_STATUS_STALL_ACTIVE        BIT(2)
> > +#define   LIMA_MMU_STATUS_IDLE                BIT(3)
> > +#define   LIMA_MMU_STATUS_REPLAY_BUFFER_EMPTY BIT(4)
> > +#define   LIMA_MMU_STATUS_PAGE_FAULT_IS_WRITE BIT(5)
> > +#define   LIMA_MMU_STATUS_BUS_ID(x)           ((x >> 6) & 0x1F)
> > +#define LIMA_MMU_COMMAND                      0x0008
> > +#define   LIMA_MMU_COMMAND_ENABLE_PAGING      0x00
> > +#define   LIMA_MMU_COMMAND_DISABLE_PAGING     0x01
> > +#define   LIMA_MMU_COMMAND_ENABLE_STALL       0x02
> > +#define   LIMA_MMU_COMMAND_DISABLE_STALL      0x03
> > +#define   LIMA_MMU_COMMAND_ZAP_CACHE          0x04
> > +#define   LIMA_MMU_COMMAND_PAGE_FAULT_DONE    0x05
> > +#define   LIMA_MMU_COMMAND_HARD_RESET         0x06
> > +#define LIMA_MMU_PAGE_FAULT_ADDR              0x000C
> > +#define LIMA_MMU_ZAP_ONE_LINE                 0x0010
> > +#define LIMA_MMU_INT_RAWSTAT                  0x0014
> > +#define LIMA_MMU_INT_CLEAR                    0x0018
> > +#define LIMA_MMU_INT_MASK                     0x001C
> > +#define   LIMA_MMU_INT_PAGE_FAULT             BIT(0)
> > +#define   LIMA_MMU_INT_READ_BUS_ERROR         BIT(1)
> > +#define LIMA_MMU_INT_STATUS                   0x0020
> > +
> > +#define LIMA_VM_FLAG_PRESENT          BIT(0)
> > +#define LIMA_VM_FLAG_READ_PERMISSION  BIT(1)
> > +#define LIMA_VM_FLAG_WRITE_PERMISSION BIT(2)
> > +#define LIMA_VM_FLAG_OVERRIDE_CACHE   BIT(3)
> > +#define LIMA_VM_FLAG_WRITE_CACHEABLE  BIT(4)
> > +#define LIMA_VM_FLAG_WRITE_ALLOCATE   BIT(5)
> > +#define LIMA_VM_FLAG_WRITE_BUFFERABLE BIT(6)
> > +#define LIMA_VM_FLAG_READ_CACHEABLE   BIT(7)
> > +#define LIMA_VM_FLAG_READ_ALLOCATE    BIT(8)
> > +#define LIMA_VM_FLAG_MASK             0x1FF
> > +
> > +#define LIMA_VM_FLAGS_CACHE (                   \
> > +               LIMA_VM_FLAG_PRESENT |           \
> > +               LIMA_VM_FLAG_READ_PERMISSION |   \
> > +               LIMA_VM_FLAG_WRITE_PERMISSION |  \
> > +               LIMA_VM_FLAG_OVERRIDE_CACHE |    \
> > +               LIMA_VM_FLAG_WRITE_CACHEABLE |   \
> > +               LIMA_VM_FLAG_WRITE_BUFFERABLE |  \
> > +               LIMA_VM_FLAG_READ_CACHEABLE |    \
> > +               LIMA_VM_FLAG_READ_ALLOCATE )
> > +
> > +#define LIMA_VM_FLAGS_UNCACHE (                        \
> > +               LIMA_VM_FLAG_PRESENT |          \
> > +               LIMA_VM_FLAG_READ_PERMISSION |  \
> > +               LIMA_VM_FLAG_WRITE_PERMISSION )
> > +
> > +/* DLBU regs */
> > +#define LIMA_DLBU_MASTER_TLLIST_PHYS_ADDR  0x0000
> > +#define        LIMA_DLBU_MASTER_TLLIST_VADDR      0x0004
> > +#define        LIMA_DLBU_TLLIST_VBASEADDR         0x0008
> > +#define        LIMA_DLBU_FB_DIM                   0x000C
> > +#define        LIMA_DLBU_TLLIST_CONF              0x0010
> > +#define        LIMA_DLBU_START_TILE_POS           0x0014
> > +#define        LIMA_DLBU_PP_ENABLE_MASK           0x0018
> > +
> > +/* BCAST regs */
> > +#define LIMA_BCAST_BROADCAST_MASK    0x0
> > +#define LIMA_BCAST_INTERRUPT_MASK    0x4
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
> > new file mode 100644
> > index 000000000000..539b29ce5e9a
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_sched.c
> > @@ -0,0 +1,398 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <linux/kthread.h>
> > +#include <linux/slab.h>
> > +
> > +#include "lima_drv.h"
> > +#include "lima_sched.h"
> > +#include "lima_vm.h"
> > +#include "lima_mmu.h"
> > +#include "lima_l2_cache.h"
> > +#include "lima_object.h"
> > +
> > +struct lima_fence {
> > +       struct dma_fence base;
> > +       struct lima_sched_pipe *pipe;
> > +};
> > +
> > +static struct kmem_cache *lima_fence_slab = NULL;
> > +
> > +int lima_sched_slab_init(void)
> > +{
> > +       lima_fence_slab = kmem_cache_create(
> > +               "lima_fence", sizeof(struct lima_fence), 0,
> > +               SLAB_HWCACHE_ALIGN, NULL);
> > +       if (!lima_fence_slab)
> > +               return -ENOMEM;
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_sched_slab_fini(void)
> > +{
> > +       kmem_cache_destroy(lima_fence_slab);
> > +}
> > +
> > +static inline struct lima_fence *to_lima_fence(struct dma_fence *fence)
> > +{
> > +       return container_of(fence, struct lima_fence, base);
> > +}
> > +
> > +static const char *lima_fence_get_driver_name(struct dma_fence *fence)
> > +{
> > +       return "lima";
> > +}
> > +
> > +static const char *lima_fence_get_timeline_name(struct dma_fence *fence)
> > +{
> > +       struct lima_fence *f = to_lima_fence(fence);
> > +
> > +       return f->pipe->base.name;
> > +}
> > +
> > +static bool lima_fence_enable_signaling(struct dma_fence *fence)
> > +{
> > +       return true;
> > +}
> > +
> > +static void lima_fence_release_rcu(struct rcu_head *rcu)
> > +{
> > +       struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
> > +       struct lima_fence *fence = to_lima_fence(f);
> > +
> > +       kmem_cache_free(lima_fence_slab, fence);
> > +}
> > +
> > +static void lima_fence_release(struct dma_fence *fence)
> > +{
> > +       struct lima_fence *f = to_lima_fence(fence);
> > +
> > +       call_rcu(&f->base.rcu, lima_fence_release_rcu);
> > +}
> > +
> > +static const struct dma_fence_ops lima_fence_ops = {
> > +       .get_driver_name = lima_fence_get_driver_name,
> > +       .get_timeline_name = lima_fence_get_timeline_name,
> > +       .enable_signaling = lima_fence_enable_signaling,
> > +       .wait = dma_fence_default_wait,
> > +       .release = lima_fence_release,
> > +};
> > +
> > +static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
> > +{
> > +       struct lima_fence *fence;
> > +
> > +       fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
>
> Out of curiosity, what is the benefit of using a separate slab here?
> If this is beneficial, then other drivers should do this too and it
> should be common. Otherwise, it adds some complexity.
>
> And maybe the slab should be initialzed in probe rather than module_init.
>
> > +       if (!fence)
> > +              return NULL;
> > +
> > +       fence->pipe = pipe;
> > +       dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock,
> > +                      pipe->fence_context, ++pipe->fence_seqno);
> > +
> > +       return fence;
> > +}
> > +
> > +static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job)
> > +{
> > +       return container_of(job, struct lima_sched_task, base);
> > +}
> > +
> > +static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler *sched)
> > +{
> > +       return container_of(sched, struct lima_sched_pipe, base);
> > +}
> > +
> > +int lima_sched_task_init(struct lima_sched_task *task,
> > +                        struct lima_sched_context *context,
> > +                        struct lima_bo **bos, int num_bos,
> > +                        struct lima_vm *vm)
> > +{
> > +       int err, i;
> > +
> > +       task->bos = kmemdup(bos, sizeof(*bos) * num_bos, GFP_KERNEL);
> > +       if (!task->bos)
> > +               return -ENOMEM;
> > +
> > +       for (i = 0; i < num_bos; i++)
> > +               drm_gem_object_get(&bos[i]->gem);
> > +
> > +       err = drm_sched_job_init(&task->base, &context->base, vm);
> > +       if (err) {
> > +               kfree(task->bos);
> > +               return err;
> > +       }
> > +
> > +       task->num_bos = num_bos;
> > +       task->vm = lima_vm_get(vm);
> > +       return 0;
> > +}
> > +
> > +void lima_sched_task_fini(struct lima_sched_task *task)
> > +{
> > +       int i;
> > +
> > +       drm_sched_job_cleanup(&task->base);
> > +
> > +       for (i = 0; i < task->num_dep; i++)
> > +               dma_fence_put(task->dep[i]);
> > +
> > +       kfree(task->dep);
> > +
> > +       if (task->bos) {
> > +               for (i = 0; i < task->num_bos; i++)
> > +                       drm_gem_object_put_unlocked(&task->bos[i]->gem);
> > +               kfree(task->bos);
> > +       }
> > +
> > +       lima_vm_put(task->vm);
> > +}
> > +
> > +int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence)
> > +{
> > +       int i, new_dep = 4;
> > +
> > +       /* same context's fence is definitly earlier then this task */
> > +       if (fence->context == task->base.s_fence->finished.context) {
> > +               dma_fence_put(fence);
> > +               return 0;
> > +       }
> > +
> > +       if (task->dep && task->num_dep == task->max_dep)
> > +               new_dep = task->max_dep * 2;
> > +
> > +       if (task->max_dep < new_dep) {
> > +               void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL);
> > +               if (!dep)
> > +                       return -ENOMEM;
> > +               task->max_dep = new_dep;
> > +               task->dep = dep;
> > +       }
> > +
> > +       for (i = 0; i < task->num_dep; i++) {
> > +               if (task->dep[i]->context == fence->context &&
> > +                   dma_fence_is_later(fence, task->dep[i])) {
> > +                       dma_fence_put(task->dep[i]);
> > +                       task->dep[i] = fence;
> > +                       return 0;
> > +               }
> > +       }
> > +
> > +       task->dep[task->num_dep++] = fence;
> > +       return 0;
> > +}
> > +
> > +int lima_sched_context_init(struct lima_sched_pipe *pipe,
> > +                           struct lima_sched_context *context,
> > +                           atomic_t *guilty)
> > +{
> > +       struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL;
> > +
> > +       return drm_sched_entity_init(&context->base, &rq, 1, guilty);
> > +}
> > +
> > +void lima_sched_context_fini(struct lima_sched_pipe *pipe,
> > +                            struct lima_sched_context *context)
> > +{
> > +       drm_sched_entity_fini(&context->base);
> > +}
> > +
> > +struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context,
> > +                                               struct lima_sched_task *task)
> > +{
> > +       struct dma_fence *fence = dma_fence_get(&task->base.s_fence->finished);
> > +
> > +       drm_sched_entity_push_job(&task->base, &context->base);
> > +       return fence;
> > +}
> > +
> > +static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job,
> > +                                              struct drm_sched_entity *entity)
> > +{
> > +       struct lima_sched_task *task = to_lima_task(job);
> > +       int i;
> > +
> > +       for (i = 0; i < task->num_dep; i++) {
> > +               struct dma_fence *fence = task->dep[i];
> > +
> > +               if (!task->dep[i])
> > +                       continue;
> > +
> > +               task->dep[i] = NULL;
> > +
> > +               if (!dma_fence_is_signaled(fence))
> > +                       return fence;
> > +
> > +               dma_fence_put(fence);
> > +       }
> > +
> > +       return NULL;
> > +}
> > +
> > +static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
> > +{
> > +       struct lima_sched_task *task = to_lima_task(job);
> > +       struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
> > +       struct lima_fence *fence;
> > +       struct dma_fence *ret;
> > +       struct lima_vm *vm = NULL, *last_vm = NULL;
> > +       int i;
> > +
> > +       /* after GPU reset */
> > +       if (job->s_fence->finished.error < 0)
> > +               return NULL;
> > +
> > +       fence = lima_fence_create(pipe);
> > +       if (!fence)
> > +               return NULL;
> > +       task->fence = &fence->base;
> > +
> > +       /* for caller usage of the fence, otherwise irq handler
> > +        * may consume the fence before caller use it */
> > +       ret = dma_fence_get(task->fence);
> > +
> > +       pipe->current_task = task;
> > +
> > +       /* this is needed for MMU to work correctly, otherwise GP/PP
> > +        * will hang or page fault for unknown reason after running for
> > +        * a while.
> > +        *
> > +        * Need to investigate:
> > +        * 1. is it related to TLB
> > +        * 2. how much performance will be affected by L2 cache flush
> > +        * 3. can we reduce the calling of this function because all
> > +        *    GP/PP use the same L2 cache on mali400
> > +        *
> > +        * TODO:
> > +        * 1. move this to task fini to save some wait time?
> > +        * 2. when GP/PP use different l2 cache, need PP wait GP l2
> > +        *    cache flush?
> > +        */
> > +       for (i = 0; i < pipe->num_l2_cache; i++)
> > +               lima_l2_cache_flush(pipe->l2_cache[i]);
> > +
> > +       if (task->vm != pipe->current_vm) {
> > +               vm = lima_vm_get(task->vm);
> > +               last_vm = pipe->current_vm;
> > +               pipe->current_vm = task->vm;
> > +       }
> > +
> > +       if (pipe->bcast_mmu)
> > +               lima_mmu_switch_vm(pipe->bcast_mmu, vm);
> > +       else {
> > +               for (i = 0; i < pipe->num_mmu; i++)
> > +                       lima_mmu_switch_vm(pipe->mmu[i], vm);
> > +       }
> > +
> > +       if (last_vm)
> > +               lima_vm_put(last_vm);
> > +
> > +       pipe->error = false;
> > +       pipe->task_run(pipe, task);
> > +
> > +       return task->fence;
> > +}
> > +
> > +static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
> > +                                        struct lima_sched_task *task)
> > +{
> > +       kthread_park(pipe->base.thread);
> > +       drm_sched_hw_job_reset(&pipe->base, &task->base);
> > +
> > +       pipe->task_error(pipe);
> > +
> > +       if (pipe->bcast_mmu)
> > +               lima_mmu_page_fault_resume(pipe->bcast_mmu);
> > +       else {
> > +               int i;
> > +               for (i = 0; i < pipe->num_mmu; i++)
> > +                       lima_mmu_page_fault_resume(pipe->mmu[i]);
> > +       }
> > +
> > +       if (pipe->current_vm)
> > +               lima_vm_put(pipe->current_vm);
> > +
> > +       pipe->current_vm = NULL;
> > +       pipe->current_task = NULL;
> > +
> > +       drm_sched_job_recovery(&pipe->base);
> > +       kthread_unpark(pipe->base.thread);
> > +}
> > +
> > +static void lima_sched_timedout_job(struct drm_sched_job *job)
> > +{
> > +       struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
> > +       struct lima_sched_task *task = to_lima_task(job);
> > +
> > +       DRM_ERROR("lima job timeout\n");
> > +
> > +       lima_sched_handle_error_task(pipe, task);
> > +}
> > +
> > +static void lima_sched_free_job(struct drm_sched_job *job)
> > +{
> > +       struct lima_sched_task *task = to_lima_task(job);
> > +       struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
> > +       struct lima_vm *vm = task->vm;
> > +       struct lima_bo **bos = task->bos;
> > +       int i;
> > +
> > +       dma_fence_put(task->fence);
> > +
> > +       for (i = 0; i < task->num_bos; i++)
> > +               lima_vm_bo_del(vm, bos[i]);
> > +
> > +       lima_sched_task_fini(task);
> > +       kmem_cache_free(pipe->task_slab, task);
> > +}
> > +
> > +const struct drm_sched_backend_ops lima_sched_ops = {
> > +       .dependency = lima_sched_dependency,
> > +       .run_job = lima_sched_run_job,
> > +       .timedout_job = lima_sched_timedout_job,
> > +       .free_job = lima_sched_free_job,
> > +};
> > +
> > +static void lima_sched_error_work(struct work_struct *work)
> > +{
> > +       struct lima_sched_pipe *pipe =
> > +               container_of(work, struct lima_sched_pipe, error_work);
> > +       struct lima_sched_task *task = pipe->current_task;
> > +
> > +       lima_sched_handle_error_task(pipe, task);
> > +}
> > +
> > +int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
> > +{
> > +       long timeout;
> > +
> > +       if (lima_sched_timeout_ms <= 0)
> > +               timeout = MAX_SCHEDULE_TIMEOUT;
> > +       else
> > +               timeout = msecs_to_jiffies(lima_sched_timeout_ms);
> > +
> > +       pipe->fence_context = dma_fence_context_alloc(1);
> > +       spin_lock_init(&pipe->fence_lock);
> > +
> > +       INIT_WORK(&pipe->error_work, lima_sched_error_work);
> > +
> > +       return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name);
> > +}
> > +
> > +void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
> > +{
> > +       drm_sched_fini(&pipe->base);
> > +}
> > +
> > +void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
> > +{
> > +       if (pipe->error)
> > +               schedule_work(&pipe->error_work);
> > +       else {
> > +               struct lima_sched_task *task = pipe->current_task;
> > +
> > +               pipe->task_fini(pipe);
> > +               dma_fence_signal(task->fence);
> > +       }
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_sched.h b/drivers/gpu/drm/lima/lima_sched.h
> > new file mode 100644
> > index 000000000000..44985e4da3fb
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_sched.h
> > @@ -0,0 +1,104 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_SCHED_H__
> > +#define __LIMA_SCHED_H__
> > +
> > +#include <drm/gpu_scheduler.h>
> > +
> > +struct lima_vm;
> > +
> > +struct lima_sched_task {
> > +       struct drm_sched_job base;
> > +
> > +       struct lima_vm *vm;
> > +       void *frame;
> > +
> > +       struct dma_fence **dep;
> > +       int num_dep;
> > +       int max_dep;
> > +
> > +       struct lima_bo **bos;
> > +       int num_bos;
> > +
> > +       /* pipe fence */
> > +       struct dma_fence *fence;
> > +};
> > +
> > +struct lima_sched_context {
> > +       struct drm_sched_entity base;
> > +};
> > +
> > +#define LIMA_SCHED_PIPE_MAX_MMU       8
> > +#define LIMA_SCHED_PIPE_MAX_L2_CACHE  2
> > +#define LIMA_SCHED_PIPE_MAX_PROCESSOR 8
> > +
> > +struct lima_ip;
> > +
> > +struct lima_sched_pipe {
> > +       struct drm_gpu_scheduler base;
> > +
> > +       u64 fence_context;
> > +       u32 fence_seqno;
> > +       spinlock_t fence_lock;
> > +
> > +       struct lima_sched_task *current_task;
> > +       struct lima_vm *current_vm;
> > +
> > +       struct lima_ip *mmu[LIMA_SCHED_PIPE_MAX_MMU];
> > +       int num_mmu;
> > +
> > +       struct lima_ip *l2_cache[LIMA_SCHED_PIPE_MAX_L2_CACHE];
> > +       int num_l2_cache;
> > +
> > +       struct lima_ip *processor[LIMA_SCHED_PIPE_MAX_PROCESSOR];
> > +       int num_processor;
> > +
> > +       struct lima_ip *bcast_processor;
> > +       struct lima_ip *bcast_mmu;
> > +
> > +       u32 done;
> > +       bool error;
> > +       atomic_t task;
> > +
> > +       int frame_size;
> > +       struct kmem_cache *task_slab;
> > +
> > +       int (*task_validate)(struct lima_sched_pipe *pipe, struct lima_sched_task *task);
> > +       void (*task_run)(struct lima_sched_pipe *pipe, struct lima_sched_task *task);
> > +       void (*task_fini)(struct lima_sched_pipe *pipe);
> > +       void (*task_error)(struct lima_sched_pipe *pipe);
> > +       void (*task_mmu_error)(struct lima_sched_pipe *pipe);
> > +
> > +       struct work_struct error_work;
> > +};
> > +
> > +int lima_sched_task_init(struct lima_sched_task *task,
> > +                        struct lima_sched_context *context,
> > +                        struct lima_bo **bos, int num_bos,
> > +                        struct lima_vm *vm);
> > +void lima_sched_task_fini(struct lima_sched_task *task);
> > +int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence);
> > +
> > +int lima_sched_context_init(struct lima_sched_pipe *pipe,
> > +                           struct lima_sched_context *context,
> > +                           atomic_t *guilty);
> > +void lima_sched_context_fini(struct lima_sched_pipe *pipe,
> > +                            struct lima_sched_context *context);
> > +struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context,
> > +                                               struct lima_sched_task *task);
> > +
> > +int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name);
> > +void lima_sched_pipe_fini(struct lima_sched_pipe *pipe);
> > +void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe);
> > +
> > +static inline void lima_sched_pipe_mmu_error(struct lima_sched_pipe *pipe)
> > +{
> > +       pipe->error = true;
> > +       pipe->task_mmu_error(pipe);
> > +}
> > +
> > +int lima_sched_slab_init(void);
> > +void lima_sched_slab_fini(void);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_vm.c b/drivers/gpu/drm/lima/lima_vm.c
> > new file mode 100644
> > index 000000000000..39eba3fae019
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_vm.c
> > @@ -0,0 +1,280 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#include <linux/slab.h>
> > +#include <linux/dma-mapping.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_vm.h"
> > +#include "lima_object.h"
> > +#include "lima_regs.h"
> > +
> > +struct lima_bo_va {
> > +       struct list_head list;
> > +       unsigned ref_count;
> > +
> > +       struct drm_mm_node node;
> > +
> > +       struct lima_vm *vm;
> > +};
> > +
> > +#define LIMA_VM_PD_SHIFT 22
> > +#define LIMA_VM_PT_SHIFT 12
> > +#define LIMA_VM_PB_SHIFT (LIMA_VM_PD_SHIFT + LIMA_VM_NUM_PT_PER_BT_SHIFT)
> > +#define LIMA_VM_BT_SHIFT LIMA_VM_PT_SHIFT
> > +
> > +#define LIMA_VM_PT_MASK ((1 << LIMA_VM_PD_SHIFT) - 1)
> > +#define LIMA_VM_BT_MASK ((1 << LIMA_VM_PB_SHIFT) - 1)
> > +
> > +#define LIMA_PDE(va) (va >> LIMA_VM_PD_SHIFT)
> > +#define LIMA_PTE(va) ((va & LIMA_VM_PT_MASK) >> LIMA_VM_PT_SHIFT)
> > +#define LIMA_PBE(va) (va >> LIMA_VM_PB_SHIFT)
> > +#define LIMA_BTE(va) ((va & LIMA_VM_BT_MASK) >> LIMA_VM_BT_SHIFT)
> > +
> > +
> > +static void lima_vm_unmap_page_table(struct lima_vm *vm, u32 start, u32 end)
> > +{
> > +       u32 addr;
> > +
> > +       for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
> > +               u32 pbe = LIMA_PBE(addr);
> > +               u32 bte = LIMA_BTE(addr);
> > +
> > +               vm->bts[pbe].cpu[bte] = 0;
> > +       }
> > +}
> > +
> > +static int lima_vm_map_page_table(struct lima_vm *vm, dma_addr_t *dma,
> > +                                 u32 start, u32 end)
> > +{
> > +       u64 addr;
> > +       int i = 0;
> > +
> > +       for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
> > +               u32 pbe = LIMA_PBE(addr);
> > +               u32 bte = LIMA_BTE(addr);
> > +
> > +               if (!vm->bts[pbe].cpu) {
> > +                       dma_addr_t pts;
> > +                       u32 *pd;
> > +                       int j;
> > +
> > +                       vm->bts[pbe].cpu = dma_alloc_wc(
> > +                               vm->dev->dev, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT,
> > +                               &vm->bts[pbe].dma, GFP_KERNEL | __GFP_ZERO);
> > +                       if (!vm->bts[pbe].cpu) {
> > +                               if (addr != start)
> > +                                       lima_vm_unmap_page_table(vm, start, addr - 1);
> > +                               return -ENOMEM;
> > +                       }
> > +
> > +                       pts = vm->bts[pbe].dma;
> > +                       pd = vm->pd.cpu + (pbe << LIMA_VM_NUM_PT_PER_BT_SHIFT);
> > +                       for (j = 0; j < LIMA_VM_NUM_PT_PER_BT; j++) {
> > +                               pd[j] = pts | LIMA_VM_FLAG_PRESENT;
> > +                               pts += LIMA_PAGE_SIZE;
> > +                       }
> > +               }
> > +
> > +               vm->bts[pbe].cpu[bte] = dma[i++] | LIMA_VM_FLAGS_CACHE;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static struct lima_bo_va *
> > +lima_vm_bo_find(struct lima_vm *vm, struct lima_bo *bo)
> > +{
> > +       struct lima_bo_va *bo_va, *ret = NULL;
> > +
> > +       list_for_each_entry(bo_va, &bo->va, list) {
> > +               if (bo_va->vm == vm) {
> > +                       ret = bo_va;
> > +                       break;
> > +               }
> > +       }
> > +
> > +       return ret;
> > +}
> > +
> > +int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create)
> > +{
> > +       struct lima_bo_va *bo_va;
> > +       int err;
> > +
> > +       mutex_lock(&bo->lock);
> > +
> > +       bo_va = lima_vm_bo_find(vm, bo);
> > +       if (bo_va) {
> > +               bo_va->ref_count++;
> > +               mutex_unlock(&bo->lock);
> > +               return 0;
> > +       }
> > +
> > +       /* should not create new bo_va if not asked by caller */
> > +       if (!create) {
> > +               mutex_unlock(&bo->lock);
> > +               return -ENOENT;
> > +       }
> > +
> > +       bo_va = kzalloc(sizeof(*bo_va), GFP_KERNEL);
> > +       if (!bo_va) {
> > +               err = -ENOMEM;
> > +               goto err_out0;
> > +       }
> > +
> > +       bo_va->vm = vm;
> > +       bo_va->ref_count = 1;
> > +
> > +       mutex_lock(&vm->lock);
> > +
> > +       err = drm_mm_insert_node(&vm->mm, &bo_va->node, bo->gem.size);
> > +       if (err)
> > +               goto err_out1;
> > +
> > +       err = lima_vm_map_page_table(vm, bo->pages_dma_addr, bo_va->node.start,
> > +                                    bo_va->node.start + bo_va->node.size - 1);
> > +       if (err)
> > +               goto err_out2;
> > +
> > +       mutex_unlock(&vm->lock);
> > +
> > +       list_add_tail(&bo_va->list, &bo->va);
>
> So you can have 1 BO at multiple VAs? Is that really needed?
>
> > +
> > +       mutex_unlock(&bo->lock);
> > +       return 0;
> > +
> > +err_out2:
> > +       drm_mm_remove_node(&bo_va->node);
> > +err_out1:
> > +       mutex_unlock(&vm->lock);
> > +       kfree(bo_va);
> > +err_out0:
> > +       mutex_unlock(&bo->lock);
> > +       return err;
> > +}
> > +
> > +void lima_vm_bo_del(struct lima_vm *vm, struct lima_bo *bo)
> > +{
> > +       struct lima_bo_va *bo_va;
> > +
> > +       mutex_lock(&bo->lock);
> > +
> > +       bo_va = lima_vm_bo_find(vm, bo);
> > +       if (--bo_va->ref_count > 0) {
> > +               mutex_unlock(&bo->lock);
> > +               return;
> > +       }
> > +
> > +       mutex_lock(&vm->lock);
> > +
> > +       lima_vm_unmap_page_table(vm, bo_va->node.start,
> > +                                bo_va->node.start + bo_va->node.size - 1);
> > +
> > +       drm_mm_remove_node(&bo_va->node);
> > +
> > +       mutex_unlock(&vm->lock);
> > +
> > +       list_del(&bo_va->list);
> > +
> > +       mutex_unlock(&bo->lock);
> > +
> > +       kfree(bo_va);
> > +}
> > +
> > +u32 lima_vm_get_va(struct lima_vm *vm, struct lima_bo *bo)
> > +{
> > +       struct lima_bo_va *bo_va;
> > +       u32 ret;
> > +
> > +       mutex_lock(&bo->lock);
> > +
> > +       bo_va = lima_vm_bo_find(vm, bo);
> > +       ret = bo_va->node.start;
> > +
> > +       mutex_unlock(&bo->lock);
> > +
> > +       return ret;
> > +}
> > +
> > +struct lima_vm *lima_vm_create(struct lima_device *dev)
> > +{
> > +       struct lima_vm *vm;
> > +
> > +       vm = kzalloc(sizeof(*vm), GFP_KERNEL);
> > +       if (!vm)
> > +               return NULL;
> > +
> > +       vm->dev = dev;
> > +       mutex_init(&vm->lock);
> > +       kref_init(&vm->refcount);
> > +
> > +       vm->pd.cpu = dma_alloc_wc(dev->dev, LIMA_PAGE_SIZE, &vm->pd.dma,
> > +                                 GFP_KERNEL | __GFP_ZERO);
> > +       if (!vm->pd.cpu)
> > +               goto err_out0;
> > +
> > +       if (dev->dlbu_cpu) {
> > +               int err = lima_vm_map_page_table(
> > +                       vm, &dev->dlbu_dma, LIMA_VA_RESERVE_DLBU,
> > +                       LIMA_VA_RESERVE_DLBU + LIMA_PAGE_SIZE - 1);
> > +               if (err)
> > +                       goto err_out1;
> > +       }
> > +
> > +       drm_mm_init(&vm->mm, dev->va_start, dev->va_end - dev->va_start);
> > +
> > +       return vm;
> > +
> > +err_out1:
> > +       dma_free_wc(dev->dev, LIMA_PAGE_SIZE, vm->pd.cpu, vm->pd.dma);
> > +err_out0:
> > +       kfree(vm);
> > +       return NULL;
> > +}
> > +
> > +void lima_vm_release(struct kref *kref)
> > +{
> > +       struct lima_vm *vm = container_of(kref, struct lima_vm, refcount);
> > +       int i;
> > +
> > +       drm_mm_takedown(&vm->mm);
> > +
> > +       for (i = 0; i < LIMA_VM_NUM_BT; i++) {
> > +               if (vm->bts[i].cpu)
> > +                       dma_free_wc(vm->dev->dev, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT,
> > +                                   vm->bts[i].cpu, vm->bts[i].dma);
> > +       }
> > +
> > +        if (vm->pd.cpu)
> > +               dma_free_wc(vm->dev->dev, LIMA_PAGE_SIZE, vm->pd.cpu, vm->pd.dma);
> > +
> > +       kfree(vm);
> > +}
> > +
> > +void lima_vm_print(struct lima_vm *vm)
> > +{
> > +       int i, j, k;
> > +       u32 *pd, *pt;
> > +
> > +       if (!vm->pd.cpu)
> > +               return;
> > +
> > +       pd = vm->pd.cpu;
> > +       for (i = 0; i < LIMA_VM_NUM_BT; i++) {
> > +               if (!vm->bts[i].cpu)
> > +                       continue;
> > +
> > +               pt = vm->bts[i].cpu;
> > +               for (j = 0; j < LIMA_VM_NUM_PT_PER_BT; j++) {
> > +                       int idx = (i << LIMA_VM_NUM_PT_PER_BT_SHIFT) + j;
> > +                       printk(KERN_INFO "lima vm pd %03x:%08x\n", idx, pd[idx]);
> > +
> > +                       for (k = 0; k < LIMA_PAGE_ENT_NUM; k++) {
> > +                               u32 pte = *pt++;
> > +                               if (pte)
> > +                                       printk(KERN_INFO "  pt %03x:%08x\n", k, pte);
> > +                       }
> > +               }
> > +       }
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_vm.h b/drivers/gpu/drm/lima/lima_vm.h
> > new file mode 100644
> > index 000000000000..a135e2f05315
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_vm.h
> > @@ -0,0 +1,62 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_VM_H__
> > +#define __LIMA_VM_H__
> > +
> > +#include <drm/drm_mm.h>
> > +#include <linux/kref.h>
> > +
> > +#define LIMA_PAGE_SIZE    4096
> > +#define LIMA_PAGE_MASK    (LIMA_PAGE_SIZE - 1)
> > +#define LIMA_PAGE_ENT_NUM (LIMA_PAGE_SIZE / sizeof(u32))
> > +
> > +#define LIMA_VM_NUM_PT_PER_BT_SHIFT 3
> > +#define LIMA_VM_NUM_PT_PER_BT (1 << LIMA_VM_NUM_PT_PER_BT_SHIFT)
> > +#define LIMA_VM_NUM_BT (LIMA_PAGE_ENT_NUM >> LIMA_VM_NUM_PT_PER_BT_SHIFT)
> > +
> > +#define LIMA_VA_RESERVE_START  0xFFF00000
> > +#define LIMA_VA_RESERVE_DLBU   LIMA_VA_RESERVE_START
> > +#define LIMA_VA_RESERVE_END    0x100000000
> > +
> > +struct lima_device;
> > +
> > +struct lima_vm_page {
> > +       u32 *cpu;
> > +       dma_addr_t dma;
> > +};
> > +
> > +struct lima_vm {
> > +       struct mutex lock;
> > +       struct kref refcount;
> > +
> > +       struct drm_mm mm;
> > +
> > +       struct lima_device *dev;
> > +
> > +       struct lima_vm_page pd;
> > +       struct lima_vm_page bts[LIMA_VM_NUM_BT];
> > +};
> > +
> > +int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create);
> > +void lima_vm_bo_del(struct lima_vm *vm, struct lima_bo *bo);
> > +
> > +u32 lima_vm_get_va(struct lima_vm *vm, struct lima_bo *bo);
> > +
> > +struct lima_vm *lima_vm_create(struct lima_device *dev);
> > +void lima_vm_release(struct kref *kref);
> > +
> > +static inline struct lima_vm *lima_vm_get(struct lima_vm *vm)
> > +{
> > +       kref_get(&vm->refcount);
> > +       return vm;
> > +}
> > +
> > +static inline void lima_vm_put(struct lima_vm *vm)
> > +{
> > +       kref_put(&vm->refcount, lima_vm_release);
> > +}
> > +
> > +void lima_vm_print(struct lima_vm *vm);
> > +
> > +#endif
> > diff --git a/include/uapi/drm/lima_drm.h b/include/uapi/drm/lima_drm.h
> > new file mode 100644
> > index 000000000000..64fb4807958d
> > --- /dev/null
> > +++ b/include/uapi/drm/lima_drm.h
> > @@ -0,0 +1,126 @@
> > +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > +
> > +#ifndef __LIMA_DRM_H__
> > +#define __LIMA_DRM_H__
> > +
> > +#include "drm.h"
> > +
> > +#if defined(__cplusplus)
> > +extern "C" {
> > +#endif
> > +
> > +#define LIMA_INFO_GPU_MALI400 0x00
> > +#define LIMA_INFO_GPU_MALI450 0x01
> > +
> > +struct drm_lima_info {
> > +       __u32 gpu_id;   /* out */
> > +       __u32 num_pp;   /* out */
> > +       __u32 valid;    /* out */
> > +       __u32 _resv[7];
> > +};
> > +
> > +struct drm_lima_gem_create {
> > +       __u32 size;    /* in */
> > +       __u32 flags;   /* in */
> > +       __u32 handle;  /* out */
> > +       __u32 pad;
> > +};
> > +
> > +struct drm_lima_gem_info {
> > +       __u32 handle;  /* in */
> > +       __u32 va;      /* out */
> > +       __u64 offset;  /* out */
> > +};
> > +
> > +#define LIMA_SUBMIT_BO_READ   0x01
> > +#define LIMA_SUBMIT_BO_WRITE  0x02
> > +
> > +struct drm_lima_gem_submit_bo {
> > +       __u32 handle;  /* in */
> > +       __u32 flags;   /* in */
> > +};
> > +
> > +#define LIMA_GP_FRAME_REG_NUM 6
> > +
> > +struct drm_lima_gp_frame {
> > +       __u32 frame[LIMA_GP_FRAME_REG_NUM];
> > +};
> > +
> > +#define LIMA_PP_FRAME_REG_NUM 23
> > +#define LIMA_PP_WB_REG_NUM 12
> > +
> > +struct drm_lima_m400_pp_frame {
> > +       __u32 frame[LIMA_PP_FRAME_REG_NUM];
> > +       __u32 num_pp;
> > +       __u32 wb[3 * LIMA_PP_WB_REG_NUM];
> > +       __u32 plbu_array_address[4];
> > +       __u32 fragment_stack_address[4];
> > +};
> > +
> > +struct drm_lima_m450_pp_frame {
> > +       __u32 frame[LIMA_PP_FRAME_REG_NUM];
> > +       __u32 num_pp;
> > +       __u32 wb[3 * LIMA_PP_WB_REG_NUM];
> > +       __u32 use_dlbu;
> > +       __u32 _pad;
> > +       union {
> > +               __u32 plbu_array_address[8];
> > +               __u32 dlbu_regs[4];
> > +       };
> > +       __u32 fragment_stack_address[8];
> > +};
> > +
> > +#define LIMA_PIPE_GP  0x00
> > +#define LIMA_PIPE_PP  0x01
> > +
> > +#define LIMA_SUBMIT_FLAG_EXPLICIT_FENCE (1 << 0)
> > +
> > +struct drm_lima_gem_submit {
> > +       __u32 ctx;         /* in */
> > +       __u32 pipe;        /* in */
> > +       __u32 nr_bos;      /* in */
> > +       __u32 frame_size;  /* in */
> > +       __u64 bos;         /* in */
> > +       __u64 frame;       /* in */
> > +       __u32 flags;       /* in */
> > +       __u32 out_sync;    /* in */
> > +       __u32 in_sync[2];  /* in */
> > +};
> > +
> > +#define LIMA_GEM_WAIT_READ   0x01
> > +#define LIMA_GEM_WAIT_WRITE  0x02
> > +
> > +struct drm_lima_gem_wait {
> > +       __u32 handle;      /* in */
> > +       __u32 op;          /* in */
> > +       __s64 timeout_ns;  /* in */
> > +};
> > +
> > +#define LIMA_CTX_OP_CREATE 1
> > +#define LIMA_CTX_OP_FREE   2
> > +
> > +struct drm_lima_ctx {
> > +       __u32 op;          /* in */
> > +       __u32 id;          /* in/out */
> > +};
> > +
> > +#define DRM_LIMA_INFO        0x00
> > +#define DRM_LIMA_GEM_CREATE  0x01
> > +#define DRM_LIMA_GEM_INFO    0x02
> > +#define DRM_LIMA_GEM_SUBMIT  0x03
> > +#define DRM_LIMA_GEM_WAIT    0x04
> > +#define DRM_LIMA_CTX         0x05
> > +
> > +#define DRM_IOCTL_LIMA_INFO DRM_IOR(DRM_COMMAND_BASE + DRM_LIMA_INFO, struct drm_lima_info)
> > +#define DRM_IOCTL_LIMA_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_CREATE, struct drm_lima_gem_create)
> > +#define DRM_IOCTL_LIMA_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_INFO, struct drm_lima_gem_info)
> > +#define DRM_IOCTL_LIMA_GEM_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_SUBMIT, struct drm_lima_gem_submit)
> > +#define DRM_IOCTL_LIMA_GEM_WAIT DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_WAIT, struct drm_lima_gem_wait)
> > +#define DRM_IOCTL_LIMA_CTX DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_CTX, struct drm_lima_ctx)
> > +
> > +#if defined(__cplusplus)
> > +}
> > +#endif
> > +
> > +#endif /* __LIMA_DRM_H__ */
> > --
> > 2.17.1
> >
Qiang Yu March 2, 2019, 5:22 a.m. UTC | #4
> > +static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
> > +{
> > +       struct lima_fence *fence;
> > +
> > +       fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
>
> Out of curiosity, what is the benefit of using a separate slab here?
> If this is beneficial, then other drivers should do this too and it
> should be common. Otherwise, it adds some complexity.

fence is pretty frequently alloc free struct, so make it a slab. And it's used
in get/put pattern, so may live longer than embedded struct. This is referenced
from amdgpu driver.

>
> And maybe the slab should be initialzed in probe rather than module_init.
>
Either way is OK. But live in module init is easier not to init twice
for two devices.

> > +int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create)
> > +{
> > +       struct lima_bo_va *bo_va;
> > +       int err;
> > +
> > +       mutex_lock(&bo->lock);
> > +
> > +       bo_va = lima_vm_bo_find(vm, bo);
> > +       if (bo_va) {
> > +               bo_va->ref_count++;
> > +               mutex_unlock(&bo->lock);
> > +               return 0;
> > +       }
> > +
> > +       /* should not create new bo_va if not asked by caller */
> > +       if (!create) {
> > +               mutex_unlock(&bo->lock);
> > +               return -ENOENT;
> > +       }
> > +
> > +       bo_va = kzalloc(sizeof(*bo_va), GFP_KERNEL);
> > +       if (!bo_va) {
> > +               err = -ENOMEM;
> > +               goto err_out0;
> > +       }
> > +
> > +       bo_va->vm = vm;
> > +       bo_va->ref_count = 1;
> > +
> > +       mutex_lock(&vm->lock);
> > +
> > +       err = drm_mm_insert_node(&vm->mm, &bo_va->node, bo->gem.size);
> > +       if (err)
> > +               goto err_out1;
> > +
> > +       err = lima_vm_map_page_table(vm, bo->pages_dma_addr, bo_va->node.start,
> > +                                    bo_va->node.start + bo_va->node.size - 1);
> > +       if (err)
> > +               goto err_out2;
> > +
> > +       mutex_unlock(&vm->lock);
> > +
> > +       list_add_tail(&bo_va->list, &bo->va);
>
> So you can have 1 BO at multiple VAs? Is that really needed?
>
Actually 1 BO can't have multi VA in single VM, but one VA in each VM.
When a BO is exported/imported between two process, i.e. xserver and client,
two processes have different VM, so can't make sure it can be mapped at the same
place.

Regards,
Qiang
Rob Clark March 2, 2019, 6:23 p.m. UTC | #5
On Fri, Mar 1, 2019 at 9:32 PM Qiang Yu <yuq825@gmail.com> wrote:
>
> On Thu, Feb 28, 2019 at 5:41 AM Rob Herring <robh@kernel.org> wrote:
> >
> > On Wed, Feb 27, 2019 at 7:42 AM Qiang Yu <yuq825@gmail.com> wrote:
> > > diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c> > > new file mode 100644
> > > index 000000000000..e93bce16ee10
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/lima/lima_drv.c
> > > @@ -0,0 +1,353 @@
> > > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > > +
> > > +#include <linux/module.h>
> > > +#include <linux/of_platform.h>
> > > +#include <linux/log2.h>
> > > +#include <drm/drm_prime.h>
> > > +#include <drm/lima_drm.h>
> > > +
> > > +#include "lima_drv.h"
> > > +#include "lima_gem.h"
> > > +#include "lima_gem_prime.h"
> > > +#include "lima_vm.h"
> > > +
> > > +int lima_sched_timeout_ms = 0;
> > > +int lima_sched_max_tasks = 32;
> > > +
> > > +MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms (0 = no timeout (default))");
> > > +module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
> > > +
> > > +MODULE_PARM_DESC(sched_max_tasks, "max queued task num in a context (default 32)");
> > > +module_param_named(sched_max_tasks, lima_sched_max_tasks, int, 0444);
> > > +
> > > +static int lima_ioctl_info(struct drm_device *dev, void *data, struct drm_file *file)
> > > +{
> >
> > For panfrost, we generalized this to "get param" like other drivers.
> > Looks like you can only add 7 more items.
> >
> > What about GPU revisions?
>
> Currently I don't know there's any programming difference between GPUs
> with different revision. Would be appreciate if anyone can tell me before
> some hard reverse engineering effort.
>

Probably a safe bet there are some revisions that need userspace
workarounds.. and given that kernel to userspace uabi is something we
end up having to live with for a long time, better to expose more
information to userspace just in case.

BR,
-R
Rob Herring (Arm) March 4, 2019, 5:20 p.m. UTC | #6
On Sat, Mar 2, 2019 at 12:23 PM Rob Clark <robdclark@gmail.com> wrote:
>
> On Fri, Mar 1, 2019 at 9:32 PM Qiang Yu <yuq825@gmail.com> wrote:
> >
> > On Thu, Feb 28, 2019 at 5:41 AM Rob Herring <robh@kernel.org> wrote:
> > >
> > > On Wed, Feb 27, 2019 at 7:42 AM Qiang Yu <yuq825@gmail.com> wrote:
> > > > diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c> > > new file mode 100644
> > > > index 000000000000..e93bce16ee10
> > > > --- /dev/null
> > > > +++ b/drivers/gpu/drm/lima/lima_drv.c
> > > > @@ -0,0 +1,353 @@
> > > > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > > > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > > > +
> > > > +#include <linux/module.h>
> > > > +#include <linux/of_platform.h>
> > > > +#include <linux/log2.h>
> > > > +#include <drm/drm_prime.h>
> > > > +#include <drm/lima_drm.h>
> > > > +
> > > > +#include "lima_drv.h"
> > > > +#include "lima_gem.h"
> > > > +#include "lima_gem_prime.h"
> > > > +#include "lima_vm.h"
> > > > +
> > > > +int lima_sched_timeout_ms = 0;
> > > > +int lima_sched_max_tasks = 32;
> > > > +
> > > > +MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms (0 = no timeout (default))");
> > > > +module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
> > > > +
> > > > +MODULE_PARM_DESC(sched_max_tasks, "max queued task num in a context (default 32)");
> > > > +module_param_named(sched_max_tasks, lima_sched_max_tasks, int, 0444);
> > > > +
> > > > +static int lima_ioctl_info(struct drm_device *dev, void *data, struct drm_file *file)
> > > > +{
> > >
> > > For panfrost, we generalized this to "get param" like other drivers.
> > > Looks like you can only add 7 more items.
> > >
> > > What about GPU revisions?
> >
> > Currently I don't know there's any programming difference between GPUs
> > with different revision. Would be appreciate if anyone can tell me before
> > some hard reverse engineering effort.

What does the vendor kernel driver have? I haven't checked utgard, but
there's no shortage of quirks in the midgard/bifrost driver. I'd
imagine utgard to be similar.

> Probably a safe bet there are some revisions that need userspace
> workarounds.. and given that kernel to userspace uabi is something we
> end up having to live with for a long time, better to expose more
> information to userspace just in case.

Right.

More importantly than the 1 example I gave, design the ABI to be
extendable beyond 7 more u32 values. It is quite easy to support 2^32
params.

Rob
Qiang Yu March 5, 2019, 1:36 a.m. UTC | #7
On Tue, Mar 5, 2019 at 1:20 AM Rob Herring <robh@kernel.org> wrote:
>
> On Sat, Mar 2, 2019 at 12:23 PM Rob Clark <robdclark@gmail.com> wrote:
> >
> > On Fri, Mar 1, 2019 at 9:32 PM Qiang Yu <yuq825@gmail.com> wrote:
> > >
> > > On Thu, Feb 28, 2019 at 5:41 AM Rob Herring <robh@kernel.org> wrote:
> > > >
> > > > On Wed, Feb 27, 2019 at 7:42 AM Qiang Yu <yuq825@gmail.com> wrote:
> > > > > diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c> > > new file mode 100644
> > > > > index 000000000000..e93bce16ee10
> > > > > --- /dev/null
> > > > > +++ b/drivers/gpu/drm/lima/lima_drv.c
> > > > > @@ -0,0 +1,353 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > > > > +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
> > > > > +
> > > > > +#include <linux/module.h>
> > > > > +#include <linux/of_platform.h>
> > > > > +#include <linux/log2.h>
> > > > > +#include <drm/drm_prime.h>
> > > > > +#include <drm/lima_drm.h>
> > > > > +
> > > > > +#include "lima_drv.h"
> > > > > +#include "lima_gem.h"
> > > > > +#include "lima_gem_prime.h"
> > > > > +#include "lima_vm.h"
> > > > > +
> > > > > +int lima_sched_timeout_ms = 0;
> > > > > +int lima_sched_max_tasks = 32;
> > > > > +
> > > > > +MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms (0 = no timeout (default))");
> > > > > +module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
> > > > > +
> > > > > +MODULE_PARM_DESC(sched_max_tasks, "max queued task num in a context (default 32)");
> > > > > +module_param_named(sched_max_tasks, lima_sched_max_tasks, int, 0444);
> > > > > +
> > > > > +static int lima_ioctl_info(struct drm_device *dev, void *data, struct drm_file *file)
> > > > > +{
> > > >
> > > > For panfrost, we generalized this to "get param" like other drivers.
> > > > Looks like you can only add 7 more items.
> > > >
> > > > What about GPU revisions?
> > >
> > > Currently I don't know there's any programming difference between GPUs
> > > with different revision. Would be appreciate if anyone can tell me before
> > > some hard reverse engineering effort.
>
> What does the vendor kernel driver have? I haven't checked utgard, but
> there's no shortage of quirks in the midgard/bifrost driver. I'd
> imagine utgard to be similar.

Vendor kernel driver will export the version. I've added it in the following
version of the patch.

>
> > Probably a safe bet there are some revisions that need userspace
> > workarounds.. and given that kernel to userspace uabi is something we
> > end up having to live with for a long time, better to expose more
> > information to userspace just in case.
>
> Right.
>
> More importantly than the 1 example I gave, design the ABI to be
> extendable beyond 7 more u32 values. It is quite easy to support 2^32
> params.
>
OK, I've changed to this way in latter version of this patch.

Regards,
Qiang
Rob Herring (Arm) March 5, 2019, 3:31 p.m. UTC | #8
On Fri, Mar 1, 2019 at 11:23 PM Qiang Yu <yuq825@gmail.com> wrote:
>
> > > +static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
> > > +{
> > > +       struct lima_fence *fence;
> > > +
> > > +       fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
> >
> > Out of curiosity, what is the benefit of using a separate slab here?
> > If this is beneficial, then other drivers should do this too and it
> > should be common. Otherwise, it adds some complexity.
>
> fence is pretty frequently alloc free struct, so make it a slab. And it's used
> in get/put pattern, so may live longer than embedded struct. This is referenced
> from amdgpu driver.
>
> >
> > And maybe the slab should be initialzed in probe rather than module_init.
> >
> Either way is OK. But live in module init is easier not to init twice
> for two devices.

True, but I was thinking more about initializing it for 0 devices
which can be common if built-in on a multi-platform kernel.

> > > +int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create)
> > > +{
> > > +       struct lima_bo_va *bo_va;
> > > +       int err;
> > > +
> > > +       mutex_lock(&bo->lock);
> > > +
> > > +       bo_va = lima_vm_bo_find(vm, bo);
> > > +       if (bo_va) {
> > > +               bo_va->ref_count++;
> > > +               mutex_unlock(&bo->lock);
> > > +               return 0;
> > > +       }
> > > +
> > > +       /* should not create new bo_va if not asked by caller */
> > > +       if (!create) {
> > > +               mutex_unlock(&bo->lock);
> > > +               return -ENOENT;
> > > +       }
> > > +
> > > +       bo_va = kzalloc(sizeof(*bo_va), GFP_KERNEL);
> > > +       if (!bo_va) {
> > > +               err = -ENOMEM;
> > > +               goto err_out0;
> > > +       }
> > > +
> > > +       bo_va->vm = vm;
> > > +       bo_va->ref_count = 1;
> > > +
> > > +       mutex_lock(&vm->lock);
> > > +
> > > +       err = drm_mm_insert_node(&vm->mm, &bo_va->node, bo->gem.size);
> > > +       if (err)
> > > +               goto err_out1;
> > > +
> > > +       err = lima_vm_map_page_table(vm, bo->pages_dma_addr, bo_va->node.start,
> > > +                                    bo_va->node.start + bo_va->node.size - 1);
> > > +       if (err)
> > > +               goto err_out2;
> > > +
> > > +       mutex_unlock(&vm->lock);
> > > +
> > > +       list_add_tail(&bo_va->list, &bo->va);
> >
> > So you can have 1 BO at multiple VAs? Is that really needed?
> >
> Actually 1 BO can't have multi VA in single VM, but one VA in each VM.
> When a BO is exported/imported between two process, i.e. xserver and client,
> two processes have different VM, so can't make sure it can be mapped at the same
> place.

Right, but when you import a BO, a new BO struct is created and
therefore a new list. If there's only 1 VA, then you don't need a
list. Just move 'node' into lima_bo. (It is possible I missed some
detail though.)

Rob
Eric Anholt March 5, 2019, 8:18 p.m. UTC | #9
Rob Herring <robh@kernel.org> writes:

> On Fri, Mar 1, 2019 at 11:23 PM Qiang Yu <yuq825@gmail.com> wrote:
>>
>> > > +static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
>> > > +{
>> > > +       struct lima_fence *fence;
>> > > +
>> > > +       fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
>> >
>> > Out of curiosity, what is the benefit of using a separate slab here?
>> > If this is beneficial, then other drivers should do this too and it
>> > should be common. Otherwise, it adds some complexity.
>>
>> fence is pretty frequently alloc free struct, so make it a slab. And it's used
>> in get/put pattern, so may live longer than embedded struct. This is referenced
>> from amdgpu driver.
>>
>> >
>> > And maybe the slab should be initialzed in probe rather than module_init.
>> >
>> Either way is OK. But live in module init is easier not to init twice
>> for two devices.
>
> True, but I was thinking more about initializing it for 0 devices
> which can be common if built-in on a multi-platform kernel.
>
>> > > +int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create)
>> > > +{
>> > > +       struct lima_bo_va *bo_va;
>> > > +       int err;
>> > > +
>> > > +       mutex_lock(&bo->lock);
>> > > +
>> > > +       bo_va = lima_vm_bo_find(vm, bo);
>> > > +       if (bo_va) {
>> > > +               bo_va->ref_count++;
>> > > +               mutex_unlock(&bo->lock);
>> > > +               return 0;
>> > > +       }
>> > > +
>> > > +       /* should not create new bo_va if not asked by caller */
>> > > +       if (!create) {
>> > > +               mutex_unlock(&bo->lock);
>> > > +               return -ENOENT;
>> > > +       }
>> > > +
>> > > +       bo_va = kzalloc(sizeof(*bo_va), GFP_KERNEL);
>> > > +       if (!bo_va) {
>> > > +               err = -ENOMEM;
>> > > +               goto err_out0;
>> > > +       }
>> > > +
>> > > +       bo_va->vm = vm;
>> > > +       bo_va->ref_count = 1;
>> > > +
>> > > +       mutex_lock(&vm->lock);
>> > > +
>> > > +       err = drm_mm_insert_node(&vm->mm, &bo_va->node, bo->gem.size);
>> > > +       if (err)
>> > > +               goto err_out1;
>> > > +
>> > > +       err = lima_vm_map_page_table(vm, bo->pages_dma_addr, bo_va->node.start,
>> > > +                                    bo_va->node.start + bo_va->node.size - 1);
>> > > +       if (err)
>> > > +               goto err_out2;
>> > > +
>> > > +       mutex_unlock(&vm->lock);
>> > > +
>> > > +       list_add_tail(&bo_va->list, &bo->va);
>> >
>> > So you can have 1 BO at multiple VAs? Is that really needed?
>> >
>> Actually 1 BO can't have multi VA in single VM, but one VA in each VM.
>> When a BO is exported/imported between two process, i.e. xserver and client,
>> two processes have different VM, so can't make sure it can be mapped at the same
>> place.
>
> Right, but when you import a BO, a new BO struct is created and
> therefore a new list. If there's only 1 VA, then you don't need a
> list. Just move 'node' into lima_bo. (It is possible I missed some
> detail though.)

You only make a new GEM BO struct on importing a new dmabuf into the
driver -- export/imports between process share the same GEM BO struct
(unless I've misread what you're saying).
Qiang Yu March 6, 2019, 2:01 a.m. UTC | #10
On Wed, Mar 6, 2019 at 4:18 AM Eric Anholt <eric@anholt.net> wrote:
>
> Rob Herring <robh@kernel.org> writes:
>
> > On Fri, Mar 1, 2019 at 11:23 PM Qiang Yu <yuq825@gmail.com> wrote:
> >>
> >> > > +static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
> >> > > +{
> >> > > +       struct lima_fence *fence;
> >> > > +
> >> > > +       fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
> >> >
> >> > Out of curiosity, what is the benefit of using a separate slab here?
> >> > If this is beneficial, then other drivers should do this too and it
> >> > should be common. Otherwise, it adds some complexity.
> >>
> >> fence is pretty frequently alloc free struct, so make it a slab. And it's used
> >> in get/put pattern, so may live longer than embedded struct. This is referenced
> >> from amdgpu driver.
> >>
> >> >
> >> > And maybe the slab should be initialzed in probe rather than module_init.
> >> >
> >> Either way is OK. But live in module init is easier not to init twice
> >> for two devices.
> >
> > True, but I was thinking more about initializing it for 0 devices
> > which can be common if built-in on a multi-platform kernel.

I think it's more common to build this driver to a loadable module.
But your concern make sense for build in case, I'll move this to
probe.

> >
> >> > > +int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create)
> >> > > +{
> >> > > +       struct lima_bo_va *bo_va;
> >> > > +       int err;
> >> > > +
> >> > > +       mutex_lock(&bo->lock);
> >> > > +
> >> > > +       bo_va = lima_vm_bo_find(vm, bo);
> >> > > +       if (bo_va) {
> >> > > +               bo_va->ref_count++;
> >> > > +               mutex_unlock(&bo->lock);
> >> > > +               return 0;
> >> > > +       }
> >> > > +
> >> > > +       /* should not create new bo_va if not asked by caller */
> >> > > +       if (!create) {
> >> > > +               mutex_unlock(&bo->lock);
> >> > > +               return -ENOENT;
> >> > > +       }
> >> > > +
> >> > > +       bo_va = kzalloc(sizeof(*bo_va), GFP_KERNEL);
> >> > > +       if (!bo_va) {
> >> > > +               err = -ENOMEM;
> >> > > +               goto err_out0;
> >> > > +       }
> >> > > +
> >> > > +       bo_va->vm = vm;
> >> > > +       bo_va->ref_count = 1;
> >> > > +
> >> > > +       mutex_lock(&vm->lock);
> >> > > +
> >> > > +       err = drm_mm_insert_node(&vm->mm, &bo_va->node, bo->gem.size);
> >> > > +       if (err)
> >> > > +               goto err_out1;
> >> > > +
> >> > > +       err = lima_vm_map_page_table(vm, bo->pages_dma_addr, bo_va->node.start,
> >> > > +                                    bo_va->node.start + bo_va->node.size - 1);
> >> > > +       if (err)
> >> > > +               goto err_out2;
> >> > > +
> >> > > +       mutex_unlock(&vm->lock);
> >> > > +
> >> > > +       list_add_tail(&bo_va->list, &bo->va);
> >> >
> >> > So you can have 1 BO at multiple VAs? Is that really needed?
> >> >
> >> Actually 1 BO can't have multi VA in single VM, but one VA in each VM.
> >> When a BO is exported/imported between two process, i.e. xserver and client,
> >> two processes have different VM, so can't make sure it can be mapped at the same
> >> place.
> >
> > Right, but when you import a BO, a new BO struct is created and
> > therefore a new list. If there's only 1 VA, then you don't need a
> > list. Just move 'node' into lima_bo. (It is possible I missed some
> > detail though.)
>
> You only make a new GEM BO struct on importing a new dmabuf into the
> driver -- export/imports between process share the same GEM BO struct
> (unless I've misread what you're saying).

Be more clear, GEM BO struct gets created only when import dmabuf
from different devices.

Regards,
Qiang
diff mbox series

Patch

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 4385f00e1d05..dfefcb393858 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -333,6 +333,8 @@  source "drivers/gpu/drm/tve200/Kconfig"
 
 source "drivers/gpu/drm/xen/Kconfig"
 
+source "drivers/gpu/drm/lima/Kconfig"
+
 # Keep legacy drivers last
 
 menuconfig DRM_LEGACY
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index ce8d1d384319..8d024b729902 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -109,3 +109,4 @@  obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
 obj-$(CONFIG_DRM_PL111) += pl111/
 obj-$(CONFIG_DRM_TVE200) += tve200/
 obj-$(CONFIG_DRM_XEN) += xen/
+obj-$(CONFIG_DRM_LIMA)  += lima/
diff --git a/drivers/gpu/drm/lima/Kconfig b/drivers/gpu/drm/lima/Kconfig
new file mode 100644
index 000000000000..260a3b41f364
--- /dev/null
+++ b/drivers/gpu/drm/lima/Kconfig
@@ -0,0 +1,10 @@ 
+# SPDX-License-Identifier: GPL-2.0 OR MIT
+# Copyright 2017-2018 Qiang Yu <yuq825@gmail.com>
+
+config DRM_LIMA
+       tristate "LIMA (DRM support for ARM Mali 400/450 GPU)"
+       depends on DRM
+       depends on ARM || ARM64 || COMPILE_TEST
+       select DRM_SCHED
+       help
+         DRM driver for ARM Mali 400/450 GPUs.
diff --git a/drivers/gpu/drm/lima/Makefile b/drivers/gpu/drm/lima/Makefile
new file mode 100644
index 000000000000..430e16814535
--- /dev/null
+++ b/drivers/gpu/drm/lima/Makefile
@@ -0,0 +1,21 @@ 
+# SPDX-License-Identifier: GPL-2.0 OR MIT
+# Copyright 2017-2018 Qiang Yu <yuq825@gmail.com>
+
+lima-y := \
+	lima_drv.o \
+	lima_device.o \
+	lima_pmu.o \
+	lima_l2_cache.o \
+	lima_mmu.o \
+	lima_gp.o \
+	lima_pp.o \
+	lima_gem.o \
+	lima_vm.o \
+	lima_sched.o \
+	lima_ctx.o \
+	lima_gem_prime.o \
+	lima_dlbu.o \
+	lima_bcast.o \
+	lima_object.o
+
+obj-$(CONFIG_DRM_LIMA) += lima.o
diff --git a/drivers/gpu/drm/lima/lima_bcast.c b/drivers/gpu/drm/lima/lima_bcast.c
new file mode 100644
index 000000000000..398e6d604426
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_bcast.c
@@ -0,0 +1,46 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/io.h>
+#include <linux/device.h>
+
+#include "lima_device.h"
+#include "lima_bcast.h"
+#include "lima_regs.h"
+
+#define bcast_write(reg, data) writel(data, ip->iomem + reg)
+#define bcast_read(reg) readl(ip->iomem + reg)
+
+void lima_bcast_enable(struct lima_device *dev, int num_pp)
+{
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+	struct lima_ip *ip = dev->ip + lima_ip_bcast;
+	int i, mask = bcast_read(LIMA_BCAST_BROADCAST_MASK) & 0xffff0000;
+
+	for (i = 0; i < num_pp; i++) {
+		struct lima_ip *pp = pipe->processor[i];
+		mask |= 1 << (pp->id - lima_ip_pp0);
+	}
+
+	bcast_write(LIMA_BCAST_BROADCAST_MASK, mask);
+}
+
+int lima_bcast_init(struct lima_ip *ip)
+{
+	int i, mask = 0;
+
+	for (i = lima_ip_pp0; i <= lima_ip_pp7; i++) {
+		if (ip->dev->ip[i].present)
+			mask |= 1 << (i - lima_ip_pp0);
+	}
+
+	bcast_write(LIMA_BCAST_BROADCAST_MASK, mask << 16);
+	bcast_write(LIMA_BCAST_INTERRUPT_MASK, mask);
+	return 0;
+}
+
+void lima_bcast_fini(struct lima_ip *ip)
+{
+	
+}
+
diff --git a/drivers/gpu/drm/lima/lima_bcast.h b/drivers/gpu/drm/lima/lima_bcast.h
new file mode 100644
index 000000000000..345e3e809860
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_bcast.h
@@ -0,0 +1,14 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_BCAST_H__
+#define __LIMA_BCAST_H__
+
+struct lima_ip;
+
+int lima_bcast_init(struct lima_ip *ip);
+void lima_bcast_fini(struct lima_ip *ip);
+
+void lima_bcast_enable(struct lima_device *dev, int num_pp);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_ctx.c b/drivers/gpu/drm/lima/lima_ctx.c
new file mode 100644
index 000000000000..439cb44d7a0d
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_ctx.c
@@ -0,0 +1,105 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/slab.h>
+
+#include "lima_device.h"
+#include "lima_ctx.h"
+
+int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id)
+{
+	struct lima_ctx *ctx;
+	int i, err;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	ctx->dev = dev;
+	kref_init(&ctx->refcnt);
+
+	for (i = 0; i < lima_pipe_num; i++) {
+		err = lima_sched_context_init(dev->pipe + i, ctx->context + i, &ctx->guilty);
+		if (err)
+			goto err_out0;
+	}
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&mgr->lock);
+	err = idr_alloc(&mgr->handles, ctx, 1, 0, GFP_ATOMIC);
+	spin_unlock(&mgr->lock);
+	idr_preload_end();
+	if (err < 0)
+		goto err_out0;
+
+	*id = err;
+	return 0;
+
+err_out0:
+	for (i--; i >= 0; i--)
+		lima_sched_context_fini(dev->pipe + i, ctx->context + i);
+	kfree(ctx);
+	return err;
+}
+
+static void lima_ctx_do_release(struct kref *ref)
+{
+	struct lima_ctx *ctx = container_of(ref, struct lima_ctx, refcnt);
+	int i;
+
+	for (i = 0; i < lima_pipe_num; i++)
+		lima_sched_context_fini(ctx->dev->pipe + i, ctx->context + i);
+	kfree(ctx);
+}
+
+int lima_ctx_free(struct lima_ctx_mgr *mgr, u32 id)
+{
+	struct lima_ctx *ctx;
+
+	spin_lock(&mgr->lock);
+	ctx = idr_remove(&mgr->handles, id);
+	spin_unlock(&mgr->lock);
+
+	if (ctx) {
+		kref_put(&ctx->refcnt, lima_ctx_do_release);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+struct lima_ctx *lima_ctx_get(struct lima_ctx_mgr *mgr, u32 id)
+{
+	struct lima_ctx *ctx;
+
+	spin_lock(&mgr->lock);
+	ctx = idr_find(&mgr->handles, id);
+	if (ctx)
+		kref_get(&ctx->refcnt);
+	spin_unlock(&mgr->lock);
+	return ctx;
+}
+
+void lima_ctx_put(struct lima_ctx *ctx)
+{
+	kref_put(&ctx->refcnt, lima_ctx_do_release);
+}
+
+void lima_ctx_mgr_init(struct lima_ctx_mgr *mgr)
+{
+	spin_lock_init(&mgr->lock);
+	idr_init(&mgr->handles);
+}
+
+void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr)
+{
+	struct lima_ctx *ctx;
+	struct idr *idp;
+	uint32_t id;
+
+	idp = &mgr->handles;
+
+	idr_for_each_entry(idp, ctx, id) {
+	        kref_put(&ctx->refcnt, lima_ctx_do_release);
+	}
+
+	idr_destroy(&mgr->handles);
+}
diff --git a/drivers/gpu/drm/lima/lima_ctx.h b/drivers/gpu/drm/lima/lima_ctx.h
new file mode 100644
index 000000000000..2d32ff9b30ad
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_ctx.h
@@ -0,0 +1,30 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_CTX_H__
+#define __LIMA_CTX_H__
+
+#include <linux/idr.h>
+
+#include "lima_device.h"
+
+struct lima_ctx {
+	struct kref refcnt;
+	struct lima_device *dev;
+	struct lima_sched_context context[lima_pipe_num];
+	atomic_t guilty;
+};
+
+struct lima_ctx_mgr {
+	spinlock_t lock;
+	struct idr handles;
+};
+
+int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id);
+int lima_ctx_free(struct lima_ctx_mgr *mgr, u32 id);
+struct lima_ctx *lima_ctx_get(struct lima_ctx_mgr *mgr, u32 id);
+void lima_ctx_put(struct lima_ctx *ctx);
+void lima_ctx_mgr_init(struct lima_ctx_mgr *mgr);
+void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c
new file mode 100644
index 000000000000..2e137a0baddb
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_device.c
@@ -0,0 +1,376 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/regulator/consumer.h>
+#include <linux/reset.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+
+#include "lima_device.h"
+#include "lima_gp.h"
+#include "lima_pp.h"
+#include "lima_mmu.h"
+#include "lima_pmu.h"
+#include "lima_l2_cache.h"
+#include "lima_dlbu.h"
+#include "lima_bcast.h"
+#include "lima_vm.h"
+
+struct lima_ip_desc {
+	char *name;
+	char *irq_name;
+	bool must_have[lima_gpu_num];
+	int offset[lima_gpu_num];
+
+	int (*init)(struct lima_ip *);
+	void (*fini)(struct lima_ip *);
+};
+
+#define LIMA_IP_DESC(ipname, mst0, mst1, off0, off1, func, irq) \
+	[lima_ip_##ipname] = { \
+		.name = #ipname, \
+		.irq_name = irq, \
+		.must_have = { \
+			[lima_gpu_mali400] = mst0, \
+			[lima_gpu_mali450] = mst1, \
+		}, \
+		.offset = { \
+			[lima_gpu_mali400] = off0, \
+			[lima_gpu_mali450] = off1, \
+		}, \
+		.init = lima_##func##_init, \
+		.fini = lima_##func##_fini, \
+	}
+
+static struct lima_ip_desc lima_ip_desc[lima_ip_num] = {
+	LIMA_IP_DESC(pmu,         false, false, 0x02000, 0x02000, pmu,      "pmu"),
+	LIMA_IP_DESC(l2_cache0,   true,  true,  0x01000, 0x10000, l2_cache, NULL),
+	LIMA_IP_DESC(l2_cache1,   false, true,  -1,      0x01000, l2_cache, NULL),
+	LIMA_IP_DESC(l2_cache2,   false, false, -1,      0x11000, l2_cache, NULL),
+	LIMA_IP_DESC(gp,          true,  true,  0x00000, 0x00000, gp,       "gp"),
+	LIMA_IP_DESC(pp0,         true,  true,  0x08000, 0x08000, pp,       "pp0"),
+	LIMA_IP_DESC(pp1,         false, false, 0x0A000, 0x0A000, pp,       "pp1"),
+	LIMA_IP_DESC(pp2,         false, false, 0x0C000, 0x0C000, pp,       "pp2"),
+	LIMA_IP_DESC(pp3,         false, false, 0x0E000, 0x0E000, pp,       "pp3"),
+	LIMA_IP_DESC(pp4,         false, false, -1,      0x28000, pp,       "pp4"),
+	LIMA_IP_DESC(pp5,         false, false, -1,      0x2A000, pp,       "pp5"),
+	LIMA_IP_DESC(pp6,         false, false, -1,      0x2C000, pp,       "pp6"),
+	LIMA_IP_DESC(pp7,         false, false, -1,      0x2E000, pp,       "pp7"),
+	LIMA_IP_DESC(gpmmu,       true,  true,  0x03000, 0x03000, mmu,      "gpmmu"),
+	LIMA_IP_DESC(ppmmu0,      true,  true,  0x04000, 0x04000, mmu,      "ppmmu0"),
+	LIMA_IP_DESC(ppmmu1,      false, false, 0x05000, 0x05000, mmu,      "ppmmu1"),
+	LIMA_IP_DESC(ppmmu2,      false, false, 0x06000, 0x06000, mmu,      "ppmmu2"),
+	LIMA_IP_DESC(ppmmu3,      false, false, 0x07000, 0x07000, mmu,      "ppmmu3"),
+	LIMA_IP_DESC(ppmmu4,      false, false, -1,      0x1C000, mmu,      "ppmmu4"),
+	LIMA_IP_DESC(ppmmu5,      false, false, -1,      0x1D000, mmu,      "ppmmu5"),
+	LIMA_IP_DESC(ppmmu6,      false, false, -1,      0x1E000, mmu,      "ppmmu6"),
+	LIMA_IP_DESC(ppmmu7,      false, false, -1,      0x1F000, mmu,      "ppmmu7"),
+	LIMA_IP_DESC(dlbu,        false, true,  -1,      0x14000, dlbu,     NULL),
+	LIMA_IP_DESC(bcast,       false, true,  -1,      0x13000, bcast,    NULL),
+	LIMA_IP_DESC(pp_bcast,    false, true,  -1,      0x16000, pp_bcast, "pp"),
+	LIMA_IP_DESC(ppmmu_bcast, false, true,  -1,      0x15000, mmu,      NULL),
+};
+
+const char *lima_ip_name(struct lima_ip *ip)
+{
+	return lima_ip_desc[ip->id].name;
+}
+
+static int lima_clk_init(struct lima_device *dev)
+{
+	int err;
+	unsigned long bus_rate, gpu_rate;
+
+	dev->clk_bus = devm_clk_get(dev->dev, "bus");
+	if (IS_ERR(dev->clk_bus)) {
+		dev_err(dev->dev, "get bus clk failed %ld\n", PTR_ERR(dev->clk_bus));
+		return PTR_ERR(dev->clk_bus);
+	}
+
+	dev->clk_gpu = devm_clk_get(dev->dev, "core");
+	if (IS_ERR(dev->clk_gpu)) {
+		dev_err(dev->dev, "get core clk failed %ld\n", PTR_ERR(dev->clk_gpu));
+		return PTR_ERR(dev->clk_gpu);
+	}
+
+	bus_rate = clk_get_rate(dev->clk_bus);
+	dev_info(dev->dev, "bus rate = %lu\n", bus_rate);
+
+	gpu_rate = clk_get_rate(dev->clk_gpu);
+	dev_info(dev->dev, "mod rate = %lu", gpu_rate);
+
+	if ((err = clk_prepare_enable(dev->clk_bus)))
+		return err;
+	if ((err = clk_prepare_enable(dev->clk_gpu)))
+		goto error_out0;
+
+	dev->reset = devm_reset_control_get_optional(dev->dev, NULL);
+	if (IS_ERR(dev->reset)) {
+		err = PTR_ERR(dev->reset);
+		goto error_out1;
+	} else if (dev->reset != NULL) {
+		if ((err = reset_control_deassert(dev->reset)))
+			goto error_out1;
+	}
+
+	return 0;
+
+error_out1:
+	clk_disable_unprepare(dev->clk_gpu);
+error_out0:
+	clk_disable_unprepare(dev->clk_bus);
+	return err;
+}
+
+static void lima_clk_fini(struct lima_device *dev)
+{
+	if (dev->reset != NULL)
+		reset_control_assert(dev->reset);
+	clk_disable_unprepare(dev->clk_gpu);
+	clk_disable_unprepare(dev->clk_bus);
+}
+
+static int lima_regulator_init(struct lima_device *dev)
+{
+	int ret;
+	dev->regulator = devm_regulator_get_optional(dev->dev, "mali");
+	if (IS_ERR(dev->regulator)) {
+		ret = PTR_ERR(dev->regulator);
+		dev->regulator = NULL;
+		if (ret == -ENODEV)
+			return 0;
+		dev_err(dev->dev, "failed to get regulator: %d\n", ret);
+		return ret;
+	}
+
+	ret = regulator_enable(dev->regulator);
+	if (ret < 0) {
+		dev_err(dev->dev, "failed to enable regulator: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void lima_regulator_fini(struct lima_device *dev)
+{
+	if (dev->regulator)
+		regulator_disable(dev->regulator);
+}
+
+static int lima_init_ip(struct lima_device *dev, int index)
+{
+	struct lima_ip_desc *desc = lima_ip_desc + index;
+	struct lima_ip *ip = dev->ip + index;
+	int offset = desc->offset[dev->id];
+	bool must = desc->must_have[dev->id];
+	int err;
+
+	if (offset < 0)
+		return 0;
+
+	ip->dev = dev;
+	ip->id = index;
+	ip->iomem = dev->iomem + offset;
+	if (desc->irq_name) {
+		err = platform_get_irq_byname(dev->pdev, desc->irq_name);
+		if (err < 0)
+			goto out;
+		ip->irq = err;
+	}
+
+	err = desc->init(ip);
+	if (!err) {
+		ip->present = true;
+		return 0;
+	}
+
+out:
+	return must ? err : 0;
+}
+
+static void lima_fini_ip(struct lima_device *ldev, int index)
+{
+	struct lima_ip_desc *desc = lima_ip_desc + index;
+	struct lima_ip *ip = ldev->ip + index;
+
+	if (ip->present)
+		desc->fini(ip);
+}
+
+static int lima_init_gp_pipe(struct lima_device *dev)
+{
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
+	int err;
+
+	if ((err = lima_sched_pipe_init(pipe, "gp")))
+		return err;
+
+	pipe->l2_cache[pipe->num_l2_cache++] = dev->ip + lima_ip_l2_cache0;
+	pipe->mmu[pipe->num_mmu++] = dev->ip + lima_ip_gpmmu;
+	pipe->processor[pipe->num_processor++] = dev->ip + lima_ip_gp;
+
+	if ((err = lima_gp_pipe_init(dev))) {
+		lima_sched_pipe_fini(pipe);
+		return err;
+	}
+
+	return 0;
+}
+
+static void lima_fini_gp_pipe(struct lima_device *dev)
+{
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
+
+	lima_gp_pipe_fini(dev);
+	lima_sched_pipe_fini(pipe);
+}
+
+static int lima_init_pp_pipe(struct lima_device *dev)
+{
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+	int err, i;
+
+	if ((err = lima_sched_pipe_init(pipe, "pp")))
+		return err;
+
+	for (i = 0; i < LIMA_SCHED_PIPE_MAX_PROCESSOR; i++) {
+		struct lima_ip *pp = dev->ip + lima_ip_pp0 + i;
+		struct lima_ip *ppmmu = dev->ip + lima_ip_ppmmu0 + i;
+		struct lima_ip *l2_cache;
+
+		if (dev->id == lima_gpu_mali400)
+			l2_cache = dev->ip + lima_ip_l2_cache0;
+		else
+			l2_cache = dev->ip + lima_ip_l2_cache1 + (i >> 2);
+
+		if (pp->present && ppmmu->present && l2_cache->present) {
+			pipe->mmu[pipe->num_mmu++] = ppmmu;
+			pipe->processor[pipe->num_processor++] = pp;
+			if (!pipe->l2_cache[i >> 2])
+				pipe->l2_cache[pipe->num_l2_cache++] = l2_cache;
+		}
+	}
+
+	if (dev->ip[lima_ip_bcast].present) {
+		pipe->bcast_processor = dev->ip + lima_ip_pp_bcast;
+		pipe->bcast_mmu = dev->ip + lima_ip_ppmmu_bcast;
+	}
+
+	if ((err = lima_pp_pipe_init(dev))) {
+		lima_sched_pipe_fini(pipe);
+		return err;
+	}
+
+	return 0;
+}
+
+static void lima_fini_pp_pipe(struct lima_device *dev)
+{
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+
+	lima_pp_pipe_fini(dev);
+	lima_sched_pipe_fini(pipe);
+}
+
+int lima_device_init(struct lima_device *ldev)
+{
+	int err, i;
+	struct resource *res;
+
+	dma_set_coherent_mask(ldev->dev, DMA_BIT_MASK(32));
+
+	err = lima_clk_init(ldev);
+	if (err) {
+		dev_err(ldev->dev, "clk init fail %d\n", err);
+		return err;
+	}
+
+	if ((err = lima_regulator_init(ldev))) {
+		dev_err(ldev->dev, "regulator init fail %d\n", err);
+		goto err_out0;
+	}
+
+	ldev->empty_vm = lima_vm_create(ldev);
+	if (!ldev->empty_vm) {
+		err = -ENOMEM;
+		goto err_out1;
+	}
+
+	ldev->va_start = 0;
+	if (ldev->id == lima_gpu_mali450) {
+		ldev->va_end = LIMA_VA_RESERVE_START;
+		ldev->dlbu_cpu = dma_alloc_wc(
+			ldev->dev, LIMA_PAGE_SIZE,
+			&ldev->dlbu_dma, GFP_KERNEL);
+		if (!ldev->dlbu_cpu) {
+			err = -ENOMEM;
+			goto err_out2;
+		}
+	}
+	else
+		ldev->va_end = LIMA_VA_RESERVE_END;
+
+	res = platform_get_resource(ldev->pdev, IORESOURCE_MEM, 0);
+	ldev->iomem = devm_ioremap_resource(ldev->dev, res);
+	if (IS_ERR(ldev->iomem)) {
+		dev_err(ldev->dev, "fail to ioremap iomem\n");
+	        err = PTR_ERR(ldev->iomem);
+		goto err_out3;
+	}
+
+	for (i = 0; i < lima_ip_num; i++) {
+		err = lima_init_ip(ldev, i);
+		if (err)
+			goto err_out4;
+	}
+
+	err = lima_init_gp_pipe(ldev);
+	if (err)
+		goto err_out4;
+
+	err = lima_init_pp_pipe(ldev);
+	if (err)
+		goto err_out5;
+
+	return 0;
+
+err_out5:
+	lima_fini_gp_pipe(ldev);
+err_out4:
+	while (--i >= 0)
+		lima_fini_ip(ldev, i);
+err_out3:
+	if (ldev->dlbu_cpu)
+		dma_free_wc(ldev->dev, LIMA_PAGE_SIZE,
+			    ldev->dlbu_cpu, ldev->dlbu_dma);
+err_out2:
+	lima_vm_put(ldev->empty_vm);
+err_out1:
+	lima_regulator_fini(ldev);
+err_out0:
+	lima_clk_fini(ldev);
+	return err;
+}
+
+void lima_device_fini(struct lima_device *ldev)
+{
+	int i;
+
+	lima_fini_pp_pipe(ldev);
+	lima_fini_gp_pipe(ldev);
+
+	for (i = lima_ip_num - 1; i >= 0; i--)
+		lima_fini_ip(ldev, i);
+
+	if (ldev->dlbu_cpu)
+		dma_free_wc(ldev->dev, LIMA_PAGE_SIZE,
+			    ldev->dlbu_cpu, ldev->dlbu_dma);
+
+	lima_vm_put(ldev->empty_vm);
+
+	lima_regulator_fini(ldev);
+
+	lima_clk_fini(ldev);
+}
diff --git a/drivers/gpu/drm/lima/lima_device.h b/drivers/gpu/drm/lima/lima_device.h
new file mode 100644
index 000000000000..41499f28ae13
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_device.h
@@ -0,0 +1,129 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_DEVICE_H__
+#define __LIMA_DEVICE_H__
+
+#include <drm/drm_device.h>
+#include <linux/delay.h>
+
+#include "lima_sched.h"
+
+enum lima_gpu_id {
+	lima_gpu_mali400 = 0,
+	lima_gpu_mali450,
+	lima_gpu_num,
+};
+
+enum lima_ip_id {
+	lima_ip_pmu,
+	lima_ip_gpmmu,
+	lima_ip_ppmmu0,
+	lima_ip_ppmmu1,
+	lima_ip_ppmmu2,
+	lima_ip_ppmmu3,
+	lima_ip_ppmmu4,
+	lima_ip_ppmmu5,
+	lima_ip_ppmmu6,
+	lima_ip_ppmmu7,
+	lima_ip_gp,
+	lima_ip_pp0,
+	lima_ip_pp1,
+	lima_ip_pp2,
+	lima_ip_pp3,
+	lima_ip_pp4,
+	lima_ip_pp5,
+	lima_ip_pp6,
+	lima_ip_pp7,
+	lima_ip_l2_cache0,
+	lima_ip_l2_cache1,
+	lima_ip_l2_cache2,
+	lima_ip_dlbu,
+	lima_ip_bcast,
+	lima_ip_pp_bcast,
+	lima_ip_ppmmu_bcast,
+	lima_ip_num,
+};
+
+struct lima_device;
+
+struct lima_ip {
+	struct lima_device *dev;
+	enum lima_ip_id id;
+	bool present;
+
+	void __iomem *iomem;
+	int irq;
+
+	union {
+		/* gp/pp */
+		bool async_reset;
+		/* l2 cache */
+		spinlock_t lock;
+	} data;
+};
+
+enum lima_pipe_id {
+	lima_pipe_gp,
+	lima_pipe_pp,
+	lima_pipe_num,
+};
+
+struct lima_device {
+	struct device *dev;
+	struct drm_device *ddev;
+	struct platform_device *pdev;
+
+	enum lima_gpu_id id;
+	int num_pp;
+
+	void __iomem *iomem;
+	struct clk *clk_bus;
+	struct clk *clk_gpu;
+	struct reset_control *reset;
+	struct regulator *regulator;
+
+	struct lima_ip ip[lima_ip_num];
+	struct lima_sched_pipe pipe[lima_pipe_num];
+
+	struct lima_vm *empty_vm;
+	uint64_t va_start;
+	uint64_t va_end;
+
+	u32 *dlbu_cpu;
+	dma_addr_t dlbu_dma;
+};
+
+static inline struct lima_device *
+to_lima_dev(struct drm_device *dev)
+{
+	return dev->dev_private;
+}
+
+int lima_device_init(struct lima_device *ldev);
+void lima_device_fini(struct lima_device *ldev);
+
+const char *lima_ip_name(struct lima_ip *ip);
+
+typedef int (*lima_poll_func_t)(struct lima_ip *);
+
+static inline int lima_poll_timeout(struct lima_ip *ip, lima_poll_func_t func,
+				    int sleep_us, int timeout_us)
+{
+	ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
+
+	might_sleep_if(sleep_us);
+	while (1) {
+		if (func(ip))
+			return 0;
+
+		if (timeout_us && ktime_compare(ktime_get(), timeout) > 0)
+			return -ETIMEDOUT;
+
+		if (sleep_us)
+			usleep_range((sleep_us >> 2) + 1, sleep_us);
+	}
+	return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_dlbu.c b/drivers/gpu/drm/lima/lima_dlbu.c
new file mode 100644
index 000000000000..b7739712f235
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_dlbu.c
@@ -0,0 +1,56 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/io.h>
+#include <linux/device.h>
+
+#include "lima_device.h"
+#include "lima_dlbu.h"
+#include "lima_vm.h"
+#include "lima_regs.h"
+
+#define dlbu_write(reg, data) writel(data, ip->iomem + reg)
+#define dlbu_read(reg) readl(ip->iomem + reg)
+
+void lima_dlbu_enable(struct lima_device *dev, int num_pp)
+{
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+	struct lima_ip *ip = dev->ip + lima_ip_dlbu;
+	int i, mask = 0;
+
+	for (i = 0; i < num_pp; i++) {
+		struct lima_ip *pp = pipe->processor[i];
+		mask |= 1 << (pp->id - lima_ip_pp0);
+	}
+
+	dlbu_write(LIMA_DLBU_PP_ENABLE_MASK, mask);
+}
+
+void lima_dlbu_disable(struct lima_device *dev)
+{
+	struct lima_ip *ip = dev->ip + lima_ip_dlbu;
+	dlbu_write(LIMA_DLBU_PP_ENABLE_MASK, 0);
+}
+
+void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg)
+{
+	dlbu_write(LIMA_DLBU_TLLIST_VBASEADDR, reg[0]);
+	dlbu_write(LIMA_DLBU_FB_DIM, reg[1]);
+	dlbu_write(LIMA_DLBU_TLLIST_CONF, reg[2]);
+	dlbu_write(LIMA_DLBU_START_TILE_POS, reg[3]);
+}
+
+int lima_dlbu_init(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+
+	dlbu_write(LIMA_DLBU_MASTER_TLLIST_PHYS_ADDR, dev->dlbu_dma | 1);
+	dlbu_write(LIMA_DLBU_MASTER_TLLIST_VADDR, LIMA_VA_RESERVE_DLBU);
+
+	return 0;
+}
+
+void lima_dlbu_fini(struct lima_ip *ip)
+{
+	
+}
diff --git a/drivers/gpu/drm/lima/lima_dlbu.h b/drivers/gpu/drm/lima/lima_dlbu.h
new file mode 100644
index 000000000000..60cba387cf30
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_dlbu.h
@@ -0,0 +1,18 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_DLBU_H__
+#define __LIMA_DLBU_H__
+
+struct lima_ip;
+struct lima_device;
+
+void lima_dlbu_enable(struct lima_device *dev, int num_pp);
+void lima_dlbu_disable(struct lima_device *dev);
+
+void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg);
+
+int lima_dlbu_init(struct lima_ip *ip);
+void lima_dlbu_fini(struct lima_ip *ip);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
new file mode 100644
index 000000000000..e93bce16ee10
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_drv.c
@@ -0,0 +1,353 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/log2.h>
+#include <drm/drm_prime.h>
+#include <drm/lima_drm.h>
+
+#include "lima_drv.h"
+#include "lima_gem.h"
+#include "lima_gem_prime.h"
+#include "lima_vm.h"
+
+int lima_sched_timeout_ms = 0;
+int lima_sched_max_tasks = 32;
+
+MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms (0 = no timeout (default))");
+module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
+
+MODULE_PARM_DESC(sched_max_tasks, "max queued task num in a context (default 32)");
+module_param_named(sched_max_tasks, lima_sched_max_tasks, int, 0444);
+
+static int lima_ioctl_info(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_info *info = data;
+	struct lima_device *ldev = to_lima_dev(dev);
+
+	switch (ldev->id) {
+	case lima_gpu_mali400:
+		info->gpu_id = LIMA_INFO_GPU_MALI400;
+		break;
+	case lima_gpu_mali450:
+		info->gpu_id = LIMA_INFO_GPU_MALI450;
+		break;
+	default:
+		return -ENODEV;
+	}
+	info->num_pp = ldev->pipe[lima_pipe_pp].num_processor;
+	info->valid = 0;
+	return 0;
+}
+
+static int lima_ioctl_gem_create(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_gem_create *args = data;
+
+	if (args->flags)
+		return -EINVAL;
+
+	if (args->size == 0)
+		return -EINVAL;
+
+	return lima_gem_create_handle(dev, file, args->size, args->flags, &args->handle);
+}
+
+static int lima_ioctl_gem_info(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_gem_info *args = data;
+
+	return lima_gem_get_info(file, args->handle, &args->va, &args->offset);
+}
+
+static int lima_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_gem_submit *args = data;
+	struct lima_device *ldev = to_lima_dev(dev);
+	struct lima_drm_priv *priv = file->driver_priv;
+	struct drm_lima_gem_submit_bo *bos;
+	struct lima_sched_pipe *pipe;
+	struct lima_sched_task *task;
+	struct lima_ctx *ctx;
+	struct lima_submit submit = {0};
+	size_t size;
+	int err = 0;
+
+	if (args->pipe >= lima_pipe_num || args->nr_bos == 0)
+		return -EINVAL;
+
+	if (args->flags & ~(LIMA_SUBMIT_FLAG_EXPLICIT_FENCE))
+		return -EINVAL;
+
+	pipe = ldev->pipe + args->pipe;
+	if (args->frame_size != pipe->frame_size)
+		return -EINVAL;
+
+	bos = kvcalloc(args->nr_bos, sizeof(*submit.bos) + sizeof(*submit.lbos), GFP_KERNEL);
+	if (!bos)
+		return -ENOMEM;
+
+	size = args->nr_bos * sizeof(*submit.bos);
+	if (copy_from_user(bos, u64_to_user_ptr(args->bos), size)) {
+		err = -EFAULT;
+		goto out0;
+	}
+
+	task = kmem_cache_zalloc(pipe->task_slab, GFP_KERNEL);
+	if (!task) {
+		err = -ENOMEM;
+		goto out0;
+	}
+
+	task->frame = task + 1;
+	if (copy_from_user(task->frame, u64_to_user_ptr(args->frame), args->frame_size)) {
+		err = -EFAULT;
+		goto out1;
+	}
+
+	err = pipe->task_validate(pipe, task);
+	if (err)
+		goto out1;
+
+	ctx = lima_ctx_get(&priv->ctx_mgr, args->ctx);
+	if (!ctx) {
+		err = -ENOENT;
+		goto out1;
+	}
+
+	submit.pipe = args->pipe;
+	submit.bos = bos;
+	submit.lbos = (void *)bos + size;
+	submit.nr_bos = args->nr_bos;
+	submit.task = task;
+	submit.ctx = ctx;
+	submit.flags = args->flags;
+	submit.in_sync[0] = args->in_sync[0];
+	submit.in_sync[1] = args->in_sync[1];
+	submit.out_sync = args->out_sync;
+
+	err = lima_gem_submit(file, &submit);
+
+	lima_ctx_put(ctx);
+out1:
+	if (err)
+		kmem_cache_free(pipe->task_slab, task);
+out0:
+	kvfree(bos);
+	return err;
+}
+
+static int lima_ioctl_gem_wait(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_gem_wait *args = data;
+
+	if (args->op & ~(LIMA_GEM_WAIT_READ|LIMA_GEM_WAIT_WRITE))
+		return -EINVAL;
+
+	return lima_gem_wait(file, args->handle, args->op, args->timeout_ns);
+}
+
+static int lima_ioctl_ctx(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_ctx *args = data;
+	struct lima_drm_priv *priv = file->driver_priv;
+	struct lima_device *ldev = to_lima_dev(dev);
+
+	if (args->op == LIMA_CTX_OP_CREATE)
+		return lima_ctx_create(ldev, &priv->ctx_mgr, &args->id);
+	else if (args->op == LIMA_CTX_OP_FREE)
+		return lima_ctx_free(&priv->ctx_mgr, args->id);
+
+	return -EINVAL;
+}
+
+static int lima_drm_driver_open(struct drm_device *dev, struct drm_file *file)
+{
+	int err;
+	struct lima_drm_priv *priv;
+	struct lima_device *ldev = to_lima_dev(dev);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->vm = lima_vm_create(ldev);
+	if (!priv->vm) {
+		err = -ENOMEM;
+		goto err_out0;
+	}
+
+	lima_ctx_mgr_init(&priv->ctx_mgr);
+
+	file->driver_priv = priv;
+	return 0;
+
+err_out0:
+	kfree(priv);
+	return err;
+}
+
+static void lima_drm_driver_postclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct lima_drm_priv *priv = file->driver_priv;
+
+	lima_ctx_mgr_fini(&priv->ctx_mgr);
+	lima_vm_put(priv->vm);
+	kfree(priv);
+}
+
+static const struct drm_ioctl_desc lima_drm_driver_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(LIMA_INFO, lima_ioctl_info, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(LIMA_GEM_CREATE, lima_ioctl_gem_create, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(LIMA_GEM_INFO, lima_ioctl_gem_info, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(LIMA_GEM_SUBMIT, lima_ioctl_gem_submit, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(LIMA_GEM_WAIT, lima_ioctl_gem_wait, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(LIMA_CTX, lima_ioctl_ctx, DRM_AUTH|DRM_RENDER_ALLOW),
+};
+
+static const struct file_operations lima_drm_driver_fops = {
+	.owner              = THIS_MODULE,
+	.open               = drm_open,
+	.release            = drm_release,
+	.unlocked_ioctl     = drm_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl       = drm_compat_ioctl,
+#endif
+	.mmap               = lima_gem_mmap,
+};
+
+static struct drm_driver lima_drm_driver = {
+	.driver_features    = DRIVER_RENDER | DRIVER_GEM | DRIVER_PRIME | DRIVER_SYNCOBJ,
+	.open               = lima_drm_driver_open,
+	.postclose          = lima_drm_driver_postclose,
+	.ioctls             = lima_drm_driver_ioctls,
+	.num_ioctls         = ARRAY_SIZE(lima_drm_driver_ioctls),
+	.fops               = &lima_drm_driver_fops,
+	.gem_free_object_unlocked = lima_gem_free_object,
+	.gem_open_object    = lima_gem_object_open,
+	.gem_close_object   = lima_gem_object_close,
+	.gem_vm_ops         = &lima_gem_vm_ops,
+	.name               = "lima",
+	.desc               = "lima DRM",
+	.date               = "20190217",
+	.major              = 1,
+	.minor              = 0,
+	.patchlevel         = 0,
+
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_import_sg_table = lima_gem_prime_import_sg_table,
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.gem_prime_get_sg_table = lima_gem_prime_get_sg_table,
+	.gem_prime_mmap = lima_gem_prime_mmap,
+};
+
+static int lima_pdev_probe(struct platform_device *pdev)
+{
+	struct lima_device *ldev;
+	struct drm_device *ddev;
+	int err;
+
+	ldev = devm_kzalloc(&pdev->dev, sizeof(*ldev), GFP_KERNEL);
+	if (!ldev)
+		return -ENOMEM;
+
+	ldev->pdev = pdev;
+	ldev->dev = &pdev->dev;
+	ldev->id = (enum lima_gpu_id)of_device_get_match_data(&pdev->dev);
+
+	platform_set_drvdata(pdev, ldev);
+
+	/* Allocate and initialize the DRM device. */
+	ddev = drm_dev_alloc(&lima_drm_driver, &pdev->dev);
+	if (IS_ERR(ddev))
+		return PTR_ERR(ddev);
+
+	ddev->dev_private = ldev;
+	ldev->ddev = ddev;
+
+	err = lima_device_init(ldev);
+	if (err) {
+		dev_err(&pdev->dev, "Fatal error during GPU init\n");
+		goto err_out0;
+	}
+
+	/*
+	 * Register the DRM device with the core and the connectors with
+	 * sysfs.
+	 */
+	err = drm_dev_register(ddev, 0);
+	if (err < 0)
+		goto err_out1;
+
+	return 0;
+
+err_out1:
+	lima_device_fini(ldev);
+err_out0:
+	drm_dev_put(ddev);
+	return err;
+}
+
+static int lima_pdev_remove(struct platform_device *pdev)
+{
+	struct lima_device *ldev = platform_get_drvdata(pdev);
+	struct drm_device *ddev = ldev->ddev;
+
+	drm_dev_unregister(ddev);
+	lima_device_fini(ldev);
+	drm_dev_put(ddev);
+	return 0;
+}
+
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "arm,mali-400", .data = (void *)lima_gpu_mali400 },
+	{ .compatible = "arm,mali-450", .data = (void *)lima_gpu_mali450 },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static struct platform_driver lima_platform_driver = {
+	.probe      = lima_pdev_probe,
+	.remove     = lima_pdev_remove,
+	.driver     = {
+		.name   = "lima",
+		.of_match_table = dt_match,
+	},
+};
+
+static void lima_check_module_param(void)
+{
+	if (lima_sched_max_tasks < 4)
+		lima_sched_max_tasks = 4;
+	else
+		lima_sched_max_tasks = roundup_pow_of_two(lima_sched_max_tasks);
+}
+
+static int __init lima_init(void)
+{
+	int ret;
+
+	lima_check_module_param();
+	ret = lima_sched_slab_init();
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&lima_platform_driver);
+	if (ret)
+		lima_sched_slab_fini();
+
+	return ret;
+}
+module_init(lima_init);
+
+static void __exit lima_exit(void)
+{
+	platform_driver_unregister(&lima_platform_driver);
+	lima_sched_slab_fini();
+}
+module_exit(lima_exit);
+
+MODULE_AUTHOR("Lima Project Developers");
+MODULE_DESCRIPTION("Lima DRM Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/lima/lima_drv.h b/drivers/gpu/drm/lima/lima_drv.h
new file mode 100644
index 000000000000..640a548cd617
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_drv.h
@@ -0,0 +1,46 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_DRV_H__
+#define __LIMA_DRV_H__
+
+#include <drm/drmP.h>
+
+#include "lima_ctx.h"
+
+extern int lima_sched_timeout_ms;
+extern int lima_sched_max_tasks;
+
+struct lima_vm;
+struct lima_bo;
+struct lima_sched_task;
+
+struct drm_lima_gem_submit_bo;
+
+struct lima_drm_priv {
+	struct lima_vm *vm;
+	struct lima_ctx_mgr ctx_mgr;
+};
+
+struct lima_submit {
+	struct lima_ctx *ctx;
+	int pipe;
+	u32 flags;
+
+	struct drm_lima_gem_submit_bo *bos;
+	struct lima_bo **lbos;
+	u32 nr_bos;
+
+	u32 in_sync[2];
+	u32 out_sync;
+
+	struct lima_sched_task *task;
+};
+
+static inline struct lima_drm_priv *
+to_lima_drm_priv(struct drm_file *file)
+{
+	return file->driver_priv;
+}
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
new file mode 100644
index 000000000000..666960345566
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gem.c
@@ -0,0 +1,379 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <drm/drmP.h>
+#include <drm/drm_syncobj.h>
+#include <drm/drm_utils.h>
+#include <linux/sync_file.h>
+#include <linux/pfn_t.h>
+
+#include <drm/lima_drm.h>
+
+#include "lima_drv.h"
+#include "lima_gem.h"
+#include "lima_gem_prime.h"
+#include "lima_vm.h"
+#include "lima_object.h"
+
+int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file,
+			   u32 size, u32 flags, u32 *handle)
+{
+	int err;
+	struct lima_bo *bo;
+	struct lima_device *ldev = to_lima_dev(dev);
+
+	bo = lima_bo_create(ldev, size, flags, NULL, NULL);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	err = drm_gem_handle_create(file, &bo->gem, handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_put_unlocked(&bo->gem);
+
+	return err;
+}
+
+void lima_gem_free_object(struct drm_gem_object *obj)
+{
+	struct lima_bo *bo = to_lima_bo(obj);
+
+	if (!list_empty(&bo->va))
+		dev_err(obj->dev->dev, "lima gem free bo still has va\n");
+
+	lima_bo_destroy(bo);
+}
+
+int lima_gem_object_open(struct drm_gem_object *obj, struct drm_file *file)
+{
+	struct lima_bo *bo = to_lima_bo(obj);
+	struct lima_drm_priv *priv = to_lima_drm_priv(file);
+	struct lima_vm *vm = priv->vm;
+
+	return lima_vm_bo_add(vm, bo, true);
+}
+
+void lima_gem_object_close(struct drm_gem_object *obj, struct drm_file *file)
+{
+	struct lima_bo *bo = to_lima_bo(obj);
+	struct lima_drm_priv *priv = to_lima_drm_priv(file);
+	struct lima_vm *vm = priv->vm;
+
+	lima_vm_bo_del(vm, bo);
+}
+
+int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset)
+{
+	struct drm_gem_object *obj;
+	struct lima_bo *bo;
+	struct lima_drm_priv *priv = to_lima_drm_priv(file);
+	struct lima_vm *vm = priv->vm;
+	int err;
+
+	obj = drm_gem_object_lookup(file, handle);
+	if (!obj)
+		return -ENOENT;
+
+	bo = to_lima_bo(obj);
+
+	*va = lima_vm_get_va(vm, bo);
+
+	err = drm_gem_create_mmap_offset(obj);
+	if (!err)
+		*offset = drm_vma_node_offset_addr(&obj->vma_node);
+
+	drm_gem_object_put_unlocked(obj);
+	return err;
+}
+
+static vm_fault_t lima_gem_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct lima_bo *bo = to_lima_bo(obj);
+	pfn_t pfn;
+	pgoff_t pgoff;
+
+	/* We don't use vmf->pgoff since that has the fake offset: */
+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+	pfn = __pfn_to_pfn_t(page_to_pfn(bo->pages[pgoff]), PFN_DEV);
+
+	return vmf_insert_mixed(vma, vmf->address, pfn);
+}
+
+const struct vm_operations_struct lima_gem_vm_ops = {
+	.fault = lima_gem_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+void lima_set_vma_flags(struct vm_area_struct *vma)
+{
+	pgprot_t prot = vm_get_page_prot(vma->vm_flags);
+
+	vma->vm_flags |= VM_MIXEDMAP;
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_page_prot = pgprot_writecombine(prot);
+}
+
+int lima_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	int ret;
+
+	ret = drm_gem_mmap(filp, vma);
+	if (ret)
+		return ret;
+
+	lima_set_vma_flags(vma);
+	return 0;
+}
+
+static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo,
+			    bool write, bool explicit)
+{
+	int err = 0;
+
+	if (!write) {
+		err = reservation_object_reserve_shared(bo->gem.resv, 1);
+		if (err)
+			return err;
+	}
+
+	/* explicit sync use user passed dep fence */
+	if (explicit)
+		return 0;
+
+	/* implicit sync use bo fence in resv obj */
+	if (write) {
+		unsigned nr_fences;
+		struct dma_fence **fences;
+		int i;
+
+		err = reservation_object_get_fences_rcu(
+			bo->gem.resv, NULL, &nr_fences, &fences);
+		if (err || !nr_fences)
+			return err;
+
+		for (i = 0; i < nr_fences; i++) {
+			err = lima_sched_task_add_dep(task, fences[i]);
+			if (err)
+				break;
+		}
+
+		/* for error case free remaining fences */
+		for ( ; i < nr_fences; i++)
+			dma_fence_put(fences[i]);
+
+		kfree(fences);
+	}
+	else {
+		struct dma_fence *fence;
+		fence = reservation_object_get_excl_rcu(bo->gem.resv);
+		if (fence) {
+			err = lima_sched_task_add_dep(task, fence);
+			if (err)
+				dma_fence_put(fence);
+		}
+	}
+
+	return err;
+}
+
+static int lima_gem_lock_bos(struct lima_bo **bos, u32 nr_bos,
+			     struct ww_acquire_ctx *ctx)
+{
+	int i, ret = 0, contended, slow_locked = -1;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (i = 0; i < nr_bos; i++) {
+		if (i == slow_locked) {
+			slow_locked = -1;
+			continue;
+		}
+
+		ret = ww_mutex_lock_interruptible(&bos[i]->gem.resv->lock, ctx);
+		if (ret < 0) {
+			contended = i;
+			goto err;
+		}
+	}
+
+	ww_acquire_done(ctx);
+	return 0;
+
+err:
+	for (i--; i >= 0; i--)
+		ww_mutex_unlock(&bos[i]->gem.resv->lock);
+
+	if (slow_locked >= 0)
+		ww_mutex_unlock(&bos[slow_locked]->gem.resv->lock);
+
+	if (ret == -EDEADLK) {
+		/* we lost out in a seqno race, lock and retry.. */
+		ret = ww_mutex_lock_slow_interruptible(
+			&bos[contended]->gem.resv->lock, ctx);
+		if (!ret) {
+			slow_locked = contended;
+			goto retry;
+		}
+	}
+	ww_acquire_fini(ctx);
+
+	return ret;
+}
+
+static void lima_gem_unlock_bos(struct lima_bo **bos, u32 nr_bos,
+				struct ww_acquire_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < nr_bos; i++)
+		ww_mutex_unlock(&bos[i]->gem.resv->lock);
+	ww_acquire_fini(ctx);
+}
+
+static int lima_gem_add_deps(struct drm_file *file, struct lima_submit *submit)
+{
+	int i, err;
+
+	for (i = 0; i < ARRAY_SIZE(submit->in_sync); i++) {
+		struct dma_fence *fence = NULL;
+
+		if (!submit->in_sync[i])
+			continue;
+
+		err = drm_syncobj_find_fence(file, submit->in_sync[i],
+					     0, 0, &fence);
+		if (err)
+			return err;
+
+		err = lima_sched_task_add_dep(submit->task, fence);
+		if (err) {
+			dma_fence_put(fence);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int lima_gem_submit(struct drm_file *file, struct lima_submit *submit)
+{
+	int i, err = 0;
+	struct ww_acquire_ctx ctx;
+	struct lima_drm_priv *priv = to_lima_drm_priv(file);
+	struct lima_vm *vm = priv->vm;
+	struct drm_syncobj *out_sync = NULL;
+	struct dma_fence *fence;
+	struct lima_bo **bos = submit->lbos;
+
+	if (submit->out_sync) {
+		out_sync = drm_syncobj_find(file, submit->out_sync);
+		if (!out_sync)
+			return -ENOENT;
+	}
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct drm_gem_object *obj;
+		struct lima_bo *bo;
+
+		obj = drm_gem_object_lookup(file, submit->bos[i].handle);
+		if (!obj) {
+			err = -ENOENT;
+			goto err_out0;
+		}
+
+		bo = to_lima_bo(obj);
+
+		/* increase refcnt of gpu va map to prevent unmapped when executing,
+		 * will be decreased when task done */
+		err = lima_vm_bo_add(vm, bo, false);
+		if (err) {
+			drm_gem_object_put_unlocked(obj);
+			goto err_out0;
+		}
+
+		bos[i] = bo;
+	}
+
+	err = lima_gem_lock_bos(bos, submit->nr_bos, &ctx);
+	if (err)
+		goto err_out0;
+
+	err = lima_sched_task_init(
+		submit->task, submit->ctx->context + submit->pipe,
+		bos, submit->nr_bos, vm);
+	if (err)
+		goto err_out1;
+
+	err = lima_gem_add_deps(file, submit);
+	if (err)
+		goto err_out2;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		err = lima_gem_sync_bo(
+			submit->task, bos[i],
+			submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE,
+			submit->flags & LIMA_SUBMIT_FLAG_EXPLICIT_FENCE);
+		if (err)
+			goto err_out2;
+	}
+
+	fence = lima_sched_context_queue_task(
+		submit->ctx->context + submit->pipe, submit->task);
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE)
+			reservation_object_add_excl_fence(bos[i]->gem.resv, fence);
+		else
+			reservation_object_add_shared_fence(bos[i]->gem.resv, fence);
+	}
+
+	lima_gem_unlock_bos(bos, submit->nr_bos, &ctx);
+
+	for (i = 0; i < submit->nr_bos; i++)
+		drm_gem_object_put_unlocked(&bos[i]->gem);
+
+	if (out_sync) {
+		drm_syncobj_replace_fence(out_sync, fence);
+		drm_syncobj_put(out_sync);
+	}
+
+	dma_fence_put(fence);
+
+	return 0;
+
+err_out2:
+	lima_sched_task_fini(submit->task);
+err_out1:
+	lima_gem_unlock_bos(bos, submit->nr_bos, &ctx);
+err_out0:
+	for (i = 0; i < submit->nr_bos; i++) {
+		if (!bos[i])
+			break;
+		lima_vm_bo_del(vm, bos[i]);
+		drm_gem_object_put_unlocked(&bos[i]->gem);
+	}
+	if (out_sync)
+		drm_syncobj_put(out_sync);
+	return err;
+}
+
+int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, s64 timeout_ns)
+{
+	bool write = op & LIMA_GEM_WAIT_WRITE;
+	long ret, timeout;
+
+	if (!op)
+		return 0;
+
+	timeout = drm_timeout_abs_to_jiffies(timeout_ns);
+
+	ret = drm_gem_reservation_object_wait(file, handle, write, timeout);
+	if (ret == 0)
+		ret = timeout ? -ETIMEDOUT : -EBUSY;
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/lima/lima_gem.h b/drivers/gpu/drm/lima/lima_gem.h
new file mode 100644
index 000000000000..f1c4658100a8
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gem.h
@@ -0,0 +1,25 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_GEM_H__
+#define __LIMA_GEM_H__
+
+struct lima_bo;
+struct lima_submit;
+
+extern const struct vm_operations_struct lima_gem_vm_ops;
+
+struct lima_bo *lima_gem_create_bo(struct drm_device *dev, u32 size, u32 flags);
+int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file,
+			   u32 size, u32 flags, u32 *handle);
+void lima_gem_free_object(struct drm_gem_object *obj);
+int lima_gem_object_open(struct drm_gem_object *obj, struct drm_file *file);
+void lima_gem_object_close(struct drm_gem_object *obj, struct drm_file *file);
+int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset);
+int lima_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+int lima_gem_submit(struct drm_file *file, struct lima_submit *submit);
+int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, s64 timeout_ns);
+
+void lima_set_vma_flags(struct vm_area_struct *vma);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_gem_prime.c b/drivers/gpu/drm/lima/lima_gem_prime.c
new file mode 100644
index 000000000000..fe8348a055f6
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gem_prime.c
@@ -0,0 +1,47 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/dma-buf.h>
+#include <drm/drm_prime.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+
+#include "lima_device.h"
+#include "lima_object.h"
+#include "lima_gem.h"
+#include "lima_gem_prime.h"
+
+struct drm_gem_object *lima_gem_prime_import_sg_table(
+	struct drm_device *dev, struct dma_buf_attachment *attach,
+	struct sg_table *sgt)
+{
+	struct lima_device *ldev = to_lima_dev(dev);
+	struct lima_bo *bo;
+
+	bo = lima_bo_create(ldev, attach->dmabuf->size, 0, sgt,
+			    attach->dmabuf->resv);
+	if (IS_ERR(bo))
+	        return ERR_CAST(bo);
+
+	return &bo->gem;
+}
+
+struct sg_table *lima_gem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+	struct lima_bo *bo = to_lima_bo(obj);
+	int npages = obj->size >> PAGE_SHIFT;
+
+	return drm_prime_pages_to_sg(bo->pages, npages);
+}
+
+int lima_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	int ret;
+
+	ret = drm_gem_mmap_obj(obj, obj->size, vma);
+	if (ret)
+		return ret;
+
+	lima_set_vma_flags(vma);
+	return 0;
+}
diff --git a/drivers/gpu/drm/lima/lima_gem_prime.h b/drivers/gpu/drm/lima/lima_gem_prime.h
new file mode 100644
index 000000000000..ceb1be9840a5
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gem_prime.h
@@ -0,0 +1,13 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_GEM_PRIME_H__
+#define __LIMA_GEM_PRIME_H__
+
+struct drm_gem_object *lima_gem_prime_import_sg_table(
+	struct drm_device *dev, struct dma_buf_attachment *attach,
+	struct sg_table *sgt);
+struct sg_table *lima_gem_prime_get_sg_table(struct drm_gem_object *obj);
+int lima_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_gp.c b/drivers/gpu/drm/lima/lima_gp.c
new file mode 100644
index 000000000000..4f4e9f5f7e19
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gp.c
@@ -0,0 +1,282 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include <drm/lima_drm.h>
+
+#include "lima_device.h"
+#include "lima_gp.h"
+#include "lima_regs.h"
+
+#define gp_write(reg, data) writel(data, ip->iomem + reg)
+#define gp_read(reg) readl(ip->iomem + reg)
+
+static irqreturn_t lima_gp_irq_handler(int irq, void *data)
+{
+	struct lima_ip *ip = data;
+	struct lima_device *dev = ip->dev;
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
+	u32 state = gp_read(LIMA_GP_INT_STAT);
+	u32 status = gp_read(LIMA_GP_STATUS);
+	bool done = false;
+
+	/* for shared irq case */
+	if (!state)
+		return IRQ_NONE;
+
+	if (state & LIMA_GP_IRQ_MASK_ERROR) {
+		dev_err(dev->dev, "gp error irq state=%x status=%x\n",
+			state, status);
+
+		/* mask all interrupts before hard reset */
+		gp_write(LIMA_GP_INT_MASK, 0);
+
+		pipe->error = true;
+		done = true;
+	}
+	else {
+		bool valid = state & (LIMA_GP_IRQ_VS_END_CMD_LST |
+				      LIMA_GP_IRQ_PLBU_END_CMD_LST);
+		bool active = status & (LIMA_GP_STATUS_VS_ACTIVE |
+					LIMA_GP_STATUS_PLBU_ACTIVE);
+		done = valid && !active;
+	}
+
+	gp_write(LIMA_GP_INT_CLEAR, state);
+
+	if (done)
+		lima_sched_pipe_task_done(pipe);
+
+	return IRQ_HANDLED;
+}
+
+static void lima_gp_soft_reset_async(struct lima_ip *ip)
+{
+	if (ip->data.async_reset)
+		return;
+
+	gp_write(LIMA_GP_INT_MASK, 0);
+	gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_RESET_COMPLETED);
+	gp_write(LIMA_GP_CMD, LIMA_GP_CMD_SOFT_RESET);
+	ip->data.async_reset = true;
+}
+
+static int lima_gp_soft_reset_async_wait(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+	u32 v;
+
+	if (!ip->data.async_reset)
+		return 0;
+
+	err = readl_poll_timeout(ip->iomem + LIMA_GP_INT_RAWSTAT, v,
+				 v & LIMA_GP_IRQ_RESET_COMPLETED,
+				 0, 100);
+	if (err) {
+		dev_err(dev->dev, "gp soft reset time out\n");
+		return err;
+	}
+
+	gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_MASK_ALL);
+	gp_write(LIMA_GP_INT_MASK, LIMA_GP_IRQ_MASK_USED);
+
+	ip->data.async_reset = false;
+	return 0;
+}
+
+static int lima_gp_task_validate(struct lima_sched_pipe *pipe,
+				 struct lima_sched_task *task)
+{
+	struct drm_lima_gp_frame *frame = task->frame;
+	u32 *f = frame->frame;
+	(void)pipe;
+
+	if (f[LIMA_GP_VSCL_START_ADDR >> 2] >
+	    f[LIMA_GP_VSCL_END_ADDR >> 2] ||
+	    f[LIMA_GP_PLBUCL_START_ADDR >> 2] >
+	    f[LIMA_GP_PLBUCL_END_ADDR >> 2] ||
+	    f[LIMA_GP_PLBU_ALLOC_START_ADDR >> 2] >
+	    f[LIMA_GP_PLBU_ALLOC_END_ADDR >> 2])
+		return -EINVAL;
+
+	if (f[LIMA_GP_VSCL_START_ADDR >> 2] ==
+	    f[LIMA_GP_VSCL_END_ADDR >> 2] &&
+	    f[LIMA_GP_PLBUCL_START_ADDR >> 2] ==
+	    f[LIMA_GP_PLBUCL_END_ADDR >> 2])
+		return -EINVAL;
+
+	return 0;
+}
+
+static void lima_gp_task_run(struct lima_sched_pipe *pipe,
+			     struct lima_sched_task *task)
+{
+	struct lima_ip *ip = pipe->processor[0];
+	struct drm_lima_gp_frame *frame = task->frame;
+	u32 *f = frame->frame;
+	u32 cmd = 0;
+	int i;
+
+	if (f[LIMA_GP_VSCL_START_ADDR >> 2] !=
+	    f[LIMA_GP_VSCL_END_ADDR >> 2])
+		cmd |= LIMA_GP_CMD_START_VS;
+	if (f[LIMA_GP_PLBUCL_START_ADDR >> 2] !=
+	    f[LIMA_GP_PLBUCL_END_ADDR >> 2])
+		cmd |= LIMA_GP_CMD_START_PLBU;
+
+	/* before any hw ops, wait last success task async soft reset */
+	lima_gp_soft_reset_async_wait(ip);
+
+	for (i = 0; i < LIMA_GP_FRAME_REG_NUM; i++)
+		writel(f[i], ip->iomem + LIMA_GP_VSCL_START_ADDR + i * 4);
+
+	gp_write(LIMA_GP_CMD, LIMA_GP_CMD_UPDATE_PLBU_ALLOC);
+	gp_write(LIMA_GP_CMD, cmd);
+}
+
+static int lima_gp_hard_reset_poll(struct lima_ip *ip)
+{
+	gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0xC01A0000);
+	return gp_read(LIMA_GP_PERF_CNT_0_LIMIT) == 0xC01A0000;
+}
+
+static int lima_gp_hard_reset(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int ret;
+
+	gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0xC0FFE000);
+	gp_write(LIMA_GP_INT_MASK, 0);
+	gp_write(LIMA_GP_CMD, LIMA_GP_CMD_RESET);
+	ret = lima_poll_timeout(ip, lima_gp_hard_reset_poll, 10, 100);
+	if (ret) {
+		dev_err(dev->dev, "gp hard reset timeout\n");
+		return ret;
+	}
+
+	gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0);
+	gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_MASK_ALL);
+	gp_write(LIMA_GP_INT_MASK, LIMA_GP_IRQ_MASK_USED);
+	return 0;
+}
+
+static void lima_gp_task_fini(struct lima_sched_pipe *pipe)
+{
+	lima_gp_soft_reset_async(pipe->processor[0]);
+}
+
+static void lima_gp_task_error(struct lima_sched_pipe *pipe)
+{
+	struct lima_ip *ip = pipe->processor[0];
+
+	dev_err(ip->dev->dev, "gp task error int_state=%x status=%x\n",
+		gp_read(LIMA_GP_INT_STAT), gp_read(LIMA_GP_STATUS));
+
+	lima_gp_hard_reset(ip);
+}
+
+static void lima_gp_task_mmu_error(struct lima_sched_pipe *pipe)
+{
+	lima_sched_pipe_task_done(pipe);
+}
+
+static void lima_gp_print_version(struct lima_ip *ip)
+{
+	u32 version, major, minor;
+	char *name;
+
+	version = gp_read(LIMA_GP_VERSION);
+	major = (version >> 8) & 0xFF;
+	minor = version & 0xFF;
+	switch (version >> 16) {
+	case 0xA07:
+	    name = "mali200";
+		break;
+	case 0xC07:
+		name = "mali300";
+		break;
+	case 0xB07:
+		name = "mali400";
+		break;
+	case 0xD07:
+		name = "mali450";
+		break;
+	default:
+		name = "unknow";
+		break;
+	}
+	dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
+		 lima_ip_name(ip), name, major, minor);
+}
+
+static struct kmem_cache *lima_gp_task_slab = NULL;
+static int lima_gp_task_slab_refcnt = 0;
+
+int lima_gp_init(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+
+	lima_gp_print_version(ip);
+
+	ip->data.async_reset = false;
+	lima_gp_soft_reset_async(ip);
+	err = lima_gp_soft_reset_async_wait(ip);
+	if (err)
+		return err;
+
+	err = devm_request_irq(dev->dev, ip->irq, lima_gp_irq_handler,
+			       IRQF_SHARED, lima_ip_name(ip), ip);
+	if (err) {
+		dev_err(dev->dev, "gp %s fail to request irq\n",
+			lima_ip_name(ip));
+		return err;
+	}
+
+	return 0;
+}
+
+void lima_gp_fini(struct lima_ip *ip)
+{
+
+}
+
+int lima_gp_pipe_init(struct lima_device *dev)
+{
+	int frame_size = sizeof(struct drm_lima_gp_frame);
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
+
+	if (!lima_gp_task_slab) {
+		lima_gp_task_slab = kmem_cache_create_usercopy(
+			"lima_gp_task", sizeof(struct lima_sched_task) + frame_size,
+			0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
+			frame_size, NULL);
+		if (!lima_gp_task_slab)
+			return -ENOMEM;
+	}
+	lima_gp_task_slab_refcnt++;
+
+	pipe->frame_size = frame_size;
+	pipe->task_slab = lima_gp_task_slab;
+
+	pipe->task_validate = lima_gp_task_validate;
+	pipe->task_run = lima_gp_task_run;
+	pipe->task_fini = lima_gp_task_fini;
+	pipe->task_error = lima_gp_task_error;
+	pipe->task_mmu_error = lima_gp_task_mmu_error;
+
+	return 0;
+}
+
+void lima_gp_pipe_fini(struct lima_device *dev)
+{
+	if (!--lima_gp_task_slab_refcnt) {
+		kmem_cache_destroy(lima_gp_task_slab);
+		lima_gp_task_slab = NULL;
+	}
+}
diff --git a/drivers/gpu/drm/lima/lima_gp.h b/drivers/gpu/drm/lima/lima_gp.h
new file mode 100644
index 000000000000..55bc48ec7603
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gp.h
@@ -0,0 +1,16 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_GP_H__
+#define __LIMA_GP_H__
+
+struct lima_ip;
+struct lima_device;
+
+int lima_gp_init(struct lima_ip *ip);
+void lima_gp_fini(struct lima_ip *ip);
+
+int lima_gp_pipe_init(struct lima_device *dev);
+void lima_gp_pipe_fini(struct lima_device *dev);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_l2_cache.c b/drivers/gpu/drm/lima/lima_l2_cache.c
new file mode 100644
index 000000000000..2ba4786f9ec7
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_l2_cache.c
@@ -0,0 +1,80 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/iopoll.h>
+#include <linux/device.h>
+
+#include "lima_device.h"
+#include "lima_l2_cache.h"
+#include "lima_regs.h"
+
+#define l2_cache_write(reg, data) writel(data, ip->iomem + reg)
+#define l2_cache_read(reg) readl(ip->iomem + reg)
+
+static int lima_l2_cache_wait_idle(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+	u32 v;
+
+	err = readl_poll_timeout(ip->iomem + LIMA_L2_CACHE_STATUS, v,
+				 !(v & LIMA_L2_CACHE_STATUS_COMMAND_BUSY),
+				 0, 1000);
+	if (err) {
+	    dev_err(dev->dev, "l2 cache wait command timeout\n");
+	    return err;
+	}
+	return 0;
+}
+
+int lima_l2_cache_flush(struct lima_ip *ip)
+{
+	int ret;
+
+	spin_lock(&ip->data.lock);
+	l2_cache_write(LIMA_L2_CACHE_COMMAND, LIMA_L2_CACHE_COMMAND_CLEAR_ALL);
+	ret = lima_l2_cache_wait_idle(ip);
+	spin_unlock(&ip->data.lock);
+	return ret;
+}
+
+int lima_l2_cache_init(struct lima_ip *ip)
+{
+	int i, err;
+	u32 size;
+	struct lima_device *dev = ip->dev;
+
+	/* l2_cache2 only exists when one of PP4-7 present */
+	if (ip->id == lima_ip_l2_cache2) {
+		for (i = lima_ip_pp4; i <= lima_ip_pp7; i++) {
+			if (dev->ip[i].present)
+				break;
+		}
+		if (i > lima_ip_pp7)
+			return -ENODEV;
+	}
+
+	spin_lock_init(&ip->data.lock);
+
+	size = l2_cache_read(LIMA_L2_CACHE_SIZE);
+	dev_info(dev->dev, "l2 cache %uK, %u-way, %ubyte cache line, %ubit external bus\n",
+		 1 << (((size >> 16) & 0xff) - 10),
+		 1 << ((size >> 8) & 0xff),
+		 1 << (size & 0xff),
+		 1 << ((size >> 24) & 0xff));
+
+	err = lima_l2_cache_flush(ip);
+	if (err)
+		return err;
+
+	l2_cache_write(LIMA_L2_CACHE_ENABLE,
+		       LIMA_L2_CACHE_ENABLE_ACCESS|LIMA_L2_CACHE_ENABLE_READ_ALLOCATE);
+	l2_cache_write(LIMA_L2_CACHE_MAX_READS, 0x1c);
+
+	return 0;
+}
+
+void lima_l2_cache_fini(struct lima_ip *ip)
+{
+
+}
diff --git a/drivers/gpu/drm/lima/lima_l2_cache.h b/drivers/gpu/drm/lima/lima_l2_cache.h
new file mode 100644
index 000000000000..2ff91eafefbe
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_l2_cache.h
@@ -0,0 +1,14 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_L2_CACHE_H__
+#define __LIMA_L2_CACHE_H__
+
+struct lima_ip;
+
+int lima_l2_cache_init(struct lima_ip *ip);
+void lima_l2_cache_fini(struct lima_ip *ip);
+
+int lima_l2_cache_flush(struct lima_ip *ip);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_mmu.c b/drivers/gpu/drm/lima/lima_mmu.c
new file mode 100644
index 000000000000..c6c151d33cf8
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_mmu.c
@@ -0,0 +1,142 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/device.h>
+
+#include "lima_device.h"
+#include "lima_mmu.h"
+#include "lima_vm.h"
+#include "lima_object.h"
+#include "lima_regs.h"
+
+#define mmu_write(reg, data) writel(data, ip->iomem + reg)
+#define mmu_read(reg) readl(ip->iomem + reg)
+
+#define lima_mmu_send_command(cmd, addr, val, cond)	     \
+({							     \
+	int __ret;					     \
+							     \
+	mmu_write(LIMA_MMU_COMMAND, cmd);		     \
+	__ret = readl_poll_timeout(ip->iomem + (addr), val,  \
+				  cond, 0, 100);	     \
+	if (__ret)					     \
+		dev_err(dev->dev,			     \
+			"mmu command %x timeout\n", cmd);    \
+	__ret;						     \
+})
+
+static irqreturn_t lima_mmu_irq_handler(int irq, void *data)
+{
+	struct lima_ip *ip = data;
+	struct lima_device *dev = ip->dev;
+	u32 status = mmu_read(LIMA_MMU_INT_STATUS);
+	struct lima_sched_pipe *pipe;
+
+	/* for shared irq case */
+	if (!status)
+		return IRQ_NONE;
+
+	if (status & LIMA_MMU_INT_PAGE_FAULT) {
+		u32 fault = mmu_read(LIMA_MMU_PAGE_FAULT_ADDR);
+		dev_err(dev->dev, "mmu page fault at 0x%x from bus id %d of type %s on %s\n",
+			fault, LIMA_MMU_STATUS_BUS_ID(status),
+			status & LIMA_MMU_STATUS_PAGE_FAULT_IS_WRITE ? "write" : "read",
+			lima_ip_name(ip));
+	}
+
+	if (status & LIMA_MMU_INT_READ_BUS_ERROR) {
+		dev_err(dev->dev, "mmu %s irq bus error\n", lima_ip_name(ip));
+	}
+
+	/* mask all interrupts before resume */
+	mmu_write(LIMA_MMU_INT_MASK, 0);
+	mmu_write(LIMA_MMU_INT_CLEAR, status);
+
+	pipe = dev->pipe + (ip->id == lima_ip_gpmmu ? lima_pipe_gp : lima_pipe_pp);
+	lima_sched_pipe_mmu_error(pipe);
+
+	return IRQ_HANDLED;
+}
+
+int lima_mmu_init(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+	u32 v;
+
+	if (ip->id == lima_ip_ppmmu_bcast)
+		return 0;
+
+	mmu_write(LIMA_MMU_DTE_ADDR, 0xCAFEBABE);
+	if (mmu_read(LIMA_MMU_DTE_ADDR) != 0xCAFEB000) {
+		dev_err(dev->dev, "mmu %s dte write test fail\n", lima_ip_name(ip));
+		return -EIO;
+	}
+
+	mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_HARD_RESET);
+	err = lima_mmu_send_command(LIMA_MMU_COMMAND_HARD_RESET,
+				    LIMA_MMU_DTE_ADDR, v, v == 0);
+	if (err)
+		return err;
+
+	err = devm_request_irq(dev->dev, ip->irq, lima_mmu_irq_handler,
+			       IRQF_SHARED, lima_ip_name(ip), ip);
+	if (err) {
+		dev_err(dev->dev, "mmu %s fail to request irq\n", lima_ip_name(ip));
+		return err;
+	}
+
+	mmu_write(LIMA_MMU_INT_MASK, LIMA_MMU_INT_PAGE_FAULT | LIMA_MMU_INT_READ_BUS_ERROR);
+	mmu_write(LIMA_MMU_DTE_ADDR, dev->empty_vm->pd.dma);
+	return lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_PAGING,
+				     LIMA_MMU_STATUS, v,
+				     v & LIMA_MMU_STATUS_PAGING_ENABLED);
+}
+
+void lima_mmu_fini(struct lima_ip *ip)
+{
+
+}
+
+void lima_mmu_switch_vm(struct lima_ip *ip, struct lima_vm *vm)
+{
+	struct lima_device *dev = ip->dev;
+	u32 v;
+
+	lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_STALL,
+			      LIMA_MMU_STATUS, v,
+			      v & LIMA_MMU_STATUS_STALL_ACTIVE);
+
+	if (vm)
+		mmu_write(LIMA_MMU_DTE_ADDR, vm->pd.dma);
+
+	/* flush the TLB */
+	mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_ZAP_CACHE);
+
+	lima_mmu_send_command(LIMA_MMU_COMMAND_DISABLE_STALL,
+			      LIMA_MMU_STATUS, v,
+			      !(v & LIMA_MMU_STATUS_STALL_ACTIVE));
+}
+
+void lima_mmu_page_fault_resume(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	u32 status = mmu_read(LIMA_MMU_STATUS);
+	u32 v;
+
+	if (status & LIMA_MMU_STATUS_PAGE_FAULT_ACTIVE) {
+		dev_info(dev->dev, "mmu resume\n");
+
+		mmu_write(LIMA_MMU_INT_MASK, 0);
+		mmu_write(LIMA_MMU_DTE_ADDR, 0xCAFEBABE);
+		lima_mmu_send_command(LIMA_MMU_COMMAND_HARD_RESET,
+				      LIMA_MMU_DTE_ADDR, v, v == 0);
+	        mmu_write(LIMA_MMU_INT_MASK, LIMA_MMU_INT_PAGE_FAULT | LIMA_MMU_INT_READ_BUS_ERROR);
+		mmu_write(LIMA_MMU_DTE_ADDR, dev->empty_vm->pd.dma);
+		lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_PAGING,
+				      LIMA_MMU_STATUS, v,
+				      v & LIMA_MMU_STATUS_PAGING_ENABLED);
+	}
+}
diff --git a/drivers/gpu/drm/lima/lima_mmu.h b/drivers/gpu/drm/lima/lima_mmu.h
new file mode 100644
index 000000000000..ca173b60fc73
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_mmu.h
@@ -0,0 +1,16 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_MMU_H__
+#define __LIMA_MMU_H__
+
+struct lima_ip;
+struct lima_vm;
+
+int lima_mmu_init(struct lima_ip *ip);
+void lima_mmu_fini(struct lima_ip *ip);
+
+void lima_mmu_switch_vm(struct lima_ip *ip, struct lima_vm *vm);
+void lima_mmu_page_fault_resume(struct lima_ip *ip);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_object.c b/drivers/gpu/drm/lima/lima_object.c
new file mode 100644
index 000000000000..28ff1b8e1dca
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_object.c
@@ -0,0 +1,124 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <drm/drm_prime.h>
+#include <linux/pagemap.h>
+#include <linux/dma-mapping.h>
+
+#include "lima_object.h"
+
+void lima_bo_destroy(struct lima_bo *bo)
+{
+        if (bo->sgt) {
+		kfree(bo->pages);
+		drm_prime_gem_destroy(&bo->gem, bo->sgt);
+	}
+	else {
+		if (bo->pages_dma_addr) {
+			int i, npages = bo->gem.size >> PAGE_SHIFT;
+
+			for (i = 0; i < npages; i++) {
+				if (bo->pages_dma_addr[i])
+					dma_unmap_page(bo->gem.dev->dev,
+						       bo->pages_dma_addr[i],
+						       PAGE_SIZE, DMA_BIDIRECTIONAL);
+			}
+		}
+
+		if (bo->pages)
+			drm_gem_put_pages(&bo->gem, bo->pages, true, true);
+	}
+
+	kfree(bo->pages_dma_addr);
+	drm_gem_object_release(&bo->gem);
+	kfree(bo);
+}
+
+static struct lima_bo *lima_bo_create_struct(struct lima_device *dev, u32 size, u32 flags,
+					     struct reservation_object *resv)
+{
+	struct lima_bo *bo;
+	int err;
+
+	size = PAGE_ALIGN(size);
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&bo->lock);
+	INIT_LIST_HEAD(&bo->va);
+	bo->gem.resv = resv;
+
+	err = drm_gem_object_init(dev->ddev, &bo->gem, size);
+	if (err) {
+		kfree(bo);
+		return ERR_PTR(err);
+	}
+
+	return bo;
+}
+
+struct lima_bo *lima_bo_create(struct lima_device *dev, u32 size,
+			       u32 flags, struct sg_table *sgt,
+			       struct reservation_object *resv)
+{
+	int i, err;
+	size_t npages;
+	struct lima_bo *bo, *ret;
+
+	bo = lima_bo_create_struct(dev, size, flags, resv);
+	if (IS_ERR(bo))
+		return bo;
+
+	npages = bo->gem.size >> PAGE_SHIFT;
+
+	bo->pages_dma_addr = kzalloc(npages * sizeof(dma_addr_t), GFP_KERNEL);
+	if (!bo->pages_dma_addr) {
+		ret = ERR_PTR(-ENOMEM);
+		goto err_out;
+	}
+
+	if (sgt) {
+		bo->sgt = sgt;
+
+		bo->pages = kzalloc(npages * sizeof(*bo->pages), GFP_KERNEL);
+		if (!bo->pages) {
+			ret = ERR_PTR(-ENOMEM);
+			goto err_out;
+		}
+
+		err = drm_prime_sg_to_page_addr_arrays(
+			sgt, bo->pages, bo->pages_dma_addr, npages);
+		if (err) {
+			ret = ERR_PTR(err);
+			goto err_out;
+		}
+	}
+	else {
+		mapping_set_gfp_mask(bo->gem.filp->f_mapping, GFP_DMA32);
+	        bo->pages = drm_gem_get_pages(&bo->gem);
+		if (IS_ERR(bo->pages)) {
+			ret = ERR_CAST(bo->pages);
+			bo->pages = NULL;
+			goto err_out;
+		}
+
+		for (i = 0; i < npages; i++) {
+			dma_addr_t addr = dma_map_page(dev->dev, bo->pages[i], 0,
+						       PAGE_SIZE, DMA_BIDIRECTIONAL);
+			if (dma_mapping_error(dev->dev, addr)) {
+				ret = ERR_PTR(-EFAULT);
+				goto err_out;
+			}
+			bo->pages_dma_addr[i] = addr;
+		}
+
+	}
+
+	return bo;
+
+err_out:
+	lima_bo_destroy(bo);
+	return ret;
+}
diff --git a/drivers/gpu/drm/lima/lima_object.h b/drivers/gpu/drm/lima/lima_object.h
new file mode 100644
index 000000000000..70099f1045ac
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_object.h
@@ -0,0 +1,36 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_OBJECT_H__
+#define __LIMA_OBJECT_H__
+
+#include <drm/drm_gem.h>
+
+#include "lima_device.h"
+
+struct lima_bo {
+	struct drm_gem_object gem;
+
+	struct page **pages;
+	dma_addr_t *pages_dma_addr;
+	struct sg_table *sgt;
+	void *vaddr;
+
+	struct mutex lock;
+	struct list_head va;
+};
+
+static inline struct lima_bo *
+to_lima_bo(struct drm_gem_object *obj)
+{
+	return container_of(obj, struct lima_bo, gem);
+}
+
+struct lima_bo *lima_bo_create(struct lima_device *dev, u32 size,
+			       u32 flags, struct sg_table *sgt,
+			       struct reservation_object *resv);
+void lima_bo_destroy(struct lima_bo *bo);
+void *lima_bo_vmap(struct lima_bo *bo);
+void lima_bo_vunmap(struct lima_bo *bo);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_pmu.c b/drivers/gpu/drm/lima/lima_pmu.c
new file mode 100644
index 000000000000..3c50524b70a7
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_pmu.c
@@ -0,0 +1,59 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/iopoll.h>
+#include <linux/device.h>
+
+#include "lima_device.h"
+#include "lima_pmu.h"
+#include "lima_regs.h"
+
+#define pmu_write(reg, data) writel(data, ip->iomem + reg)
+#define pmu_read(reg) readl(ip->iomem + reg)
+
+static int lima_pmu_wait_cmd(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+	u32 v;
+
+	err = readl_poll_timeout(ip->iomem + LIMA_PMU_INT_RAWSTAT,
+				 v, v & LIMA_PMU_INT_CMD_MASK,
+				 100, 100000);
+	if (err) {
+		dev_err(dev->dev, "timeout wait pmd cmd\n");
+		return err;
+	}
+
+	pmu_write(LIMA_PMU_INT_CLEAR, LIMA_PMU_INT_CMD_MASK);
+	return 0;
+}
+
+int lima_pmu_init(struct lima_ip *ip)
+{
+	int err;
+	u32 stat;
+
+	pmu_write(LIMA_PMU_INT_MASK, 0);
+
+	/* If this value is too low, when in high GPU clk freq,
+	 * GPU will be in unstable state. */
+	pmu_write(LIMA_PMU_SW_DELAY, 0xffff);
+
+	/* status reg 1=off 0=on */
+	stat = pmu_read(LIMA_PMU_STATUS);
+
+	/* power up all ip */
+	if (stat) {
+		pmu_write(LIMA_PMU_POWER_UP, stat);
+		err = lima_pmu_wait_cmd(ip);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+void lima_pmu_fini(struct lima_ip *ip)
+{
+
+}
diff --git a/drivers/gpu/drm/lima/lima_pmu.h b/drivers/gpu/drm/lima/lima_pmu.h
new file mode 100644
index 000000000000..1cf94a35bdf9
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_pmu.h
@@ -0,0 +1,12 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_PMU_H__
+#define __LIMA_PMU_H__
+
+struct lima_ip;
+
+int lima_pmu_init(struct lima_ip *ip);
+void lima_pmu_fini(struct lima_ip *ip);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
new file mode 100644
index 000000000000..7b36c29eee89
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_pp.c
@@ -0,0 +1,423 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include <drm/lima_drm.h>
+
+#include "lima_device.h"
+#include "lima_pp.h"
+#include "lima_dlbu.h"
+#include "lima_bcast.h"
+#include "lima_vm.h"
+#include "lima_regs.h"
+
+#define pp_write(reg, data) writel(data, ip->iomem + reg)
+#define pp_read(reg) readl(ip->iomem + reg)
+
+static void lima_pp_handle_irq(struct lima_ip *ip, u32 state)
+{
+	struct lima_device *dev = ip->dev;
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+
+	if (state & LIMA_PP_IRQ_MASK_ERROR) {
+		u32 status = pp_read(LIMA_PP_STATUS);
+
+		dev_err(dev->dev, "pp error irq state=%x status=%x\n",
+			state, status);
+
+		pipe->error = true;
+
+		/* mask all interrupts before hard reset */
+		pp_write(LIMA_PP_INT_MASK, 0);
+	}
+
+	pp_write(LIMA_PP_INT_CLEAR, state);
+}
+
+static irqreturn_t lima_pp_irq_handler(int irq, void *data)
+{
+	struct lima_ip *ip = data;
+	struct lima_device *dev = ip->dev;
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+	u32 state = pp_read(LIMA_PP_INT_STATUS);
+
+	/* for shared irq case */
+	if (!state)
+		return IRQ_NONE;
+
+	lima_pp_handle_irq(ip, state);
+
+	if (atomic_dec_and_test(&pipe->task))
+		lima_sched_pipe_task_done(pipe);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t lima_pp_bcast_irq_handler(int irq, void *data)
+{
+	int i;
+	irqreturn_t ret = IRQ_NONE;
+	struct lima_ip *pp_bcast = data;
+	struct lima_device *dev = pp_bcast->dev;
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+	struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
+
+	for (i = 0; i < frame->num_pp; i++) {
+		struct lima_ip *ip = pipe->processor[i];
+		u32 status, state;
+
+		if (pipe->done & (1 << i))
+			continue;
+
+		/* status read first in case int state change in the middle
+		 * which may miss the interrupt handling */
+		status = pp_read(LIMA_PP_STATUS);
+		state = pp_read(LIMA_PP_INT_STATUS);
+
+		if (state) {
+			lima_pp_handle_irq(ip, state);
+			ret = IRQ_HANDLED;
+		}
+		else {
+			if (status & LIMA_PP_STATUS_RENDERING_ACTIVE)
+				continue;
+		}
+
+		pipe->done |= (1 << i);
+		if (atomic_dec_and_test(&pipe->task))
+			lima_sched_pipe_task_done(pipe);
+	}
+
+	return ret;
+}
+
+static void lima_pp_soft_reset_async(struct lima_ip *ip)
+{
+	if (ip->data.async_reset)
+		return;
+
+	pp_write(LIMA_PP_INT_MASK, 0);
+	pp_write(LIMA_PP_INT_RAWSTAT, LIMA_PP_IRQ_MASK_ALL);
+	pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_SOFT_RESET);
+	ip->data.async_reset = true;
+}
+
+static int lima_pp_soft_reset_poll(struct lima_ip *ip)
+{
+	return !(pp_read(LIMA_PP_STATUS) & LIMA_PP_STATUS_RENDERING_ACTIVE) &&
+		pp_read(LIMA_PP_INT_RAWSTAT) == LIMA_PP_IRQ_RESET_COMPLETED;
+}
+
+static int lima_pp_soft_reset_async_wait_one(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int ret;
+
+	ret = lima_poll_timeout(ip, lima_pp_soft_reset_poll, 0, 100);
+	if (ret) {
+		dev_err(dev->dev, "pp %s reset time out\n", lima_ip_name(ip));
+		return ret;
+	}
+
+	pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
+	pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
+	return 0;
+}
+
+static int lima_pp_soft_reset_async_wait(struct lima_ip *ip)
+{
+	int i, err = 0;
+
+	if (!ip->data.async_reset)
+		return 0;
+
+	if (ip->id == lima_ip_pp_bcast) {
+		struct lima_device *dev = ip->dev;
+		struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+		struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
+
+		for (i = 0; i < frame->num_pp; i++)
+			err |= lima_pp_soft_reset_async_wait_one(pipe->processor[i]);
+	}
+	else
+		err = lima_pp_soft_reset_async_wait_one(ip);
+
+	ip->data.async_reset = false;
+	return err;
+}
+
+static void lima_pp_write_frame(struct lima_ip *ip, u32 *frame, u32 *wb)
+{
+	int i, j, n = 0;
+
+	for (i = 0; i < LIMA_PP_FRAME_REG_NUM; i++)
+		writel(frame[i], ip->iomem + LIMA_PP_FRAME + i * 4);
+
+	for (i = 0; i < 3; i++) {
+		for (j = 0; j < LIMA_PP_WB_REG_NUM; j++)
+			writel(wb[n++], ip->iomem + LIMA_PP_WB(i) + j * 4);
+	}
+}
+
+static int lima_pp_hard_reset_poll(struct lima_ip *ip)
+{
+	pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC01A0000);
+	return pp_read(LIMA_PP_PERF_CNT_0_LIMIT) == 0xC01A0000;
+}
+
+static int lima_pp_hard_reset(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int ret;
+
+	pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC0FFE000);
+	pp_write(LIMA_PP_INT_MASK, 0);
+	pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_FORCE_RESET);
+	ret = lima_poll_timeout(ip, lima_pp_hard_reset_poll, 10, 100);
+	if (ret) {
+		dev_err(dev->dev, "pp hard reset timeout\n");
+		return ret;
+	}
+
+	pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0);
+	pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
+	pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
+	return 0;
+}
+
+static void lima_pp_print_version(struct lima_ip *ip)
+{
+	u32 version, major, minor;
+	char *name;
+
+	version = pp_read(LIMA_PP_VERSION);
+	major = (version >> 8) & 0xFF;
+	minor = version & 0xFF;
+	switch (version >> 16) {
+	case 0xC807:
+	    name = "mali200";
+		break;
+	case 0xCE07:
+		name = "mali300";
+		break;
+	case 0xCD07:
+		name = "mali400";
+		break;
+	case 0xCF07:
+		name = "mali450";
+		break;
+	default:
+		name = "unknow";
+		break;
+	}
+	dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
+		 lima_ip_name(ip), name, major, minor);
+}
+
+int lima_pp_init(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+
+	lima_pp_print_version(ip);
+
+	ip->data.async_reset = false;
+	lima_pp_soft_reset_async(ip);
+	err = lima_pp_soft_reset_async_wait(ip);
+	if (err)
+		return err;
+
+	err = devm_request_irq(dev->dev, ip->irq, lima_pp_irq_handler,
+			       IRQF_SHARED, lima_ip_name(ip), ip);
+	if (err) {
+		dev_err(dev->dev, "pp %s fail to request irq\n",
+			lima_ip_name(ip));
+		return err;
+	}
+
+	return 0;
+}
+
+void lima_pp_fini(struct lima_ip *ip)
+{
+	
+}
+
+int lima_pp_bcast_init(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+
+	err = devm_request_irq(dev->dev, ip->irq, lima_pp_bcast_irq_handler,
+			       IRQF_SHARED, lima_ip_name(ip), ip);
+	if (err) {
+		dev_err(dev->dev, "pp %s fail to request irq\n",
+			lima_ip_name(ip));
+		return err;
+	}
+
+	return 0;
+}
+
+void lima_pp_bcast_fini(struct lima_ip *ip)
+{
+	
+}
+
+static int lima_pp_task_validate(struct lima_sched_pipe *pipe,
+				 struct lima_sched_task *task)
+{
+	u32 num_pp;
+
+	if (pipe->bcast_processor) {
+		struct drm_lima_m450_pp_frame *f = task->frame;
+	        num_pp = f->num_pp;
+	}
+	else {
+		struct drm_lima_m400_pp_frame *f = task->frame;
+		num_pp = f->num_pp;
+	}
+
+	if (num_pp == 0 || num_pp > pipe->num_processor)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void lima_pp_task_run(struct lima_sched_pipe *pipe,
+			     struct lima_sched_task *task)
+{
+	if (pipe->bcast_processor) {
+		struct drm_lima_m450_pp_frame *frame = task->frame;
+		struct lima_device *dev = pipe->bcast_processor->dev;
+		struct lima_ip *ip = pipe->bcast_processor;
+		int i;
+
+		pipe->done = 0;
+		atomic_set(&pipe->task, frame->num_pp);
+
+		if (frame->use_dlbu) {
+			lima_dlbu_enable(dev, frame->num_pp);
+
+		        frame->frame[LIMA_PP_FRAME >> 2] = LIMA_VA_RESERVE_DLBU;
+			lima_dlbu_set_reg(dev->ip + lima_ip_dlbu, frame->dlbu_regs);
+		}
+		else
+			lima_dlbu_disable(dev);
+
+		lima_bcast_enable(dev, frame->num_pp);
+
+		lima_pp_soft_reset_async_wait(ip);
+
+	        lima_pp_write_frame(ip, frame->frame, frame->wb);
+
+		for (i = 0; i < frame->num_pp; i++) {
+			struct lima_ip *ip = pipe->processor[i];
+
+			pp_write(LIMA_PP_STACK, frame->fragment_stack_address[i]);
+			if (!frame->use_dlbu)
+				pp_write(LIMA_PP_FRAME, frame->plbu_array_address[i]);
+		}
+
+		pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
+	}
+	else {
+		struct drm_lima_m400_pp_frame *frame = task->frame;
+		int i;
+
+		atomic_set(&pipe->task, frame->num_pp);
+
+		for (i = 0; i < frame->num_pp; i++) {
+			struct lima_ip *ip = pipe->processor[i];
+
+			frame->frame[LIMA_PP_FRAME >> 2] =
+				frame->plbu_array_address[i];
+			frame->frame[LIMA_PP_STACK >> 2] =
+				frame->fragment_stack_address[i];
+
+			lima_pp_soft_reset_async_wait(ip);
+
+		        lima_pp_write_frame(ip, frame->frame, frame->wb);
+
+			pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
+		}
+	}
+}
+
+static void lima_pp_task_fini(struct lima_sched_pipe *pipe)
+{
+	if (pipe->bcast_processor)
+		lima_pp_soft_reset_async(pipe->bcast_processor);
+	else {
+		int i;
+		for (i = 0; i < pipe->num_processor; i++)
+			lima_pp_soft_reset_async(pipe->processor[i]);
+	}
+}
+
+static void lima_pp_task_error(struct lima_sched_pipe *pipe)
+{
+	int i;
+
+	for (i = 0; i < pipe->num_processor; i++) {
+		struct lima_ip *ip = pipe->processor[i];
+
+		dev_err(ip->dev->dev, "pp task error %d int_state=%x status=%x\n",
+			i, pp_read(LIMA_PP_INT_STATUS), pp_read(LIMA_PP_STATUS));
+
+		lima_pp_hard_reset(ip);
+	}
+}
+
+static void lima_pp_task_mmu_error(struct lima_sched_pipe *pipe)
+{
+	if (atomic_dec_and_test(&pipe->task))
+		lima_sched_pipe_task_done(pipe);
+}
+
+static struct kmem_cache *lima_pp_task_slab = NULL;
+static int lima_pp_task_slab_refcnt = 0;
+
+int lima_pp_pipe_init(struct lima_device *dev)
+{
+	int frame_size;
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+
+	if (dev->id == lima_gpu_mali400)
+		frame_size = sizeof(struct drm_lima_m400_pp_frame);
+	else
+		frame_size = sizeof(struct drm_lima_m450_pp_frame);
+
+	if (!lima_pp_task_slab) {
+		lima_pp_task_slab = kmem_cache_create_usercopy(
+			"lima_pp_task", sizeof(struct lima_sched_task) + frame_size,
+			0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
+			frame_size, NULL);
+		if (!lima_pp_task_slab)
+			return -ENOMEM;
+	}
+	lima_pp_task_slab_refcnt++;
+
+	pipe->frame_size = frame_size;
+	pipe->task_slab = lima_pp_task_slab;
+
+	pipe->task_validate = lima_pp_task_validate;
+	pipe->task_run = lima_pp_task_run;
+	pipe->task_fini = lima_pp_task_fini;
+	pipe->task_error = lima_pp_task_error;
+	pipe->task_mmu_error = lima_pp_task_mmu_error;
+
+	return 0;
+}
+
+void lima_pp_pipe_fini(struct lima_device *dev)
+{
+	if (!--lima_pp_task_slab_refcnt) {
+		kmem_cache_destroy(lima_pp_task_slab);
+		lima_pp_task_slab = NULL;
+	}
+}
diff --git a/drivers/gpu/drm/lima/lima_pp.h b/drivers/gpu/drm/lima/lima_pp.h
new file mode 100644
index 000000000000..f83f8cb4d30a
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_pp.h
@@ -0,0 +1,19 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_PP_H__
+#define __LIMA_PP_H__
+
+struct lima_ip;
+struct lima_device;
+
+int lima_pp_init(struct lima_ip *ip);
+void lima_pp_fini(struct lima_ip *ip);
+
+int lima_pp_bcast_init(struct lima_ip *ip);
+void lima_pp_bcast_fini(struct lima_ip *ip);
+
+int lima_pp_pipe_init(struct lima_device *dev);
+void lima_pp_pipe_fini(struct lima_device *dev);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_regs.h b/drivers/gpu/drm/lima/lima_regs.h
new file mode 100644
index 000000000000..d5ade8fc8901
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_regs.h
@@ -0,0 +1,298 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2010-2017 ARM Limited. All rights reserved.
+ * Copyright 2017-2018 Qiang Yu <yuq825@gmail.com>
+ */
+
+#ifndef __LIMA_REGS_H__
+#define __LIMA_REGS_H__
+
+/* This file's register definition is collected from the
+ * official ARM Mali Utgard GPU kernel driver source code
+ */
+
+/* PMU regs */
+#define LIMA_PMU_POWER_UP                  0x00
+#define LIMA_PMU_POWER_DOWN                0x04
+#define   LIMA_PMU_POWER_GP0_MASK          BIT(0)
+#define   LIMA_PMU_POWER_L2_MASK           BIT(1)
+#define   LIMA_PMU_POWER_PP_MASK(i)        BIT(2 + i)
+
+/*
+ * On Mali450 each block automatically starts up its corresponding L2
+ * and the PPs are not fully independent controllable.
+ * Instead PP0, PP1-3 and PP4-7 can be turned on or off.
+ */
+#define   LIMA450_PMU_POWER_PP0_MASK       BIT(1)
+#define   LIMA450_PMU_POWER_PP13_MASK      BIT(2)
+#define   LIMA450_PMU_POWER_PP47_MASK      BIT(3)
+
+#define LIMA_PMU_STATUS                    0x08
+#define LIMA_PMU_INT_MASK                  0x0C
+#define LIMA_PMU_INT_RAWSTAT               0x10
+#define LIMA_PMU_INT_CLEAR                 0x18
+#define   LIMA_PMU_INT_CMD_MASK            BIT(0)
+#define LIMA_PMU_SW_DELAY                  0x1C
+
+/* L2 cache regs */
+#define LIMA_L2_CACHE_SIZE                   0x0004
+#define LIMA_L2_CACHE_STATUS                 0x0008
+#define   LIMA_L2_CACHE_STATUS_COMMAND_BUSY  BIT(0)
+#define   LIMA_L2_CACHE_STATUS_DATA_BUSY     BIT(1)
+#define LIMA_L2_CACHE_COMMAND                0x0010
+#define   LIMA_L2_CACHE_COMMAND_CLEAR_ALL    BIT(0)
+#define LIMA_L2_CACHE_CLEAR_PAGE             0x0014
+#define LIMA_L2_CACHE_MAX_READS              0x0018
+#define LIMA_L2_CACHE_ENABLE                 0x001C
+#define   LIMA_L2_CACHE_ENABLE_ACCESS        BIT(0)
+#define   LIMA_L2_CACHE_ENABLE_READ_ALLOCATE BIT(1)
+#define LIMA_L2_CACHE_PERFCNT_SRC0           0x0020
+#define LIMA_L2_CACHE_PERFCNT_VAL0           0x0024
+#define LIMA_L2_CACHE_PERFCNT_SRC1           0x0028
+#define LIMA_L2_CACHE_ERFCNT_VAL1            0x002C
+
+/* GP regs */
+#define LIMA_GP_VSCL_START_ADDR                0x00
+#define LIMA_GP_VSCL_END_ADDR                  0x04
+#define LIMA_GP_PLBUCL_START_ADDR              0x08
+#define LIMA_GP_PLBUCL_END_ADDR                0x0c
+#define LIMA_GP_PLBU_ALLOC_START_ADDR          0x10
+#define LIMA_GP_PLBU_ALLOC_END_ADDR            0x14
+#define LIMA_GP_CMD                            0x20
+#define   LIMA_GP_CMD_START_VS                 BIT(0)
+#define   LIMA_GP_CMD_START_PLBU               BIT(1)
+#define   LIMA_GP_CMD_UPDATE_PLBU_ALLOC        BIT(4)
+#define   LIMA_GP_CMD_RESET                    BIT(5)
+#define   LIMA_GP_CMD_FORCE_HANG               BIT(6)
+#define   LIMA_GP_CMD_STOP_BUS                 BIT(9)
+#define   LIMA_GP_CMD_SOFT_RESET               BIT(10)
+#define LIMA_GP_INT_RAWSTAT                    0x24
+#define LIMA_GP_INT_CLEAR                      0x28
+#define LIMA_GP_INT_MASK                       0x2C
+#define LIMA_GP_INT_STAT                       0x30
+#define   LIMA_GP_IRQ_VS_END_CMD_LST           BIT(0)
+#define   LIMA_GP_IRQ_PLBU_END_CMD_LST         BIT(1)
+#define   LIMA_GP_IRQ_PLBU_OUT_OF_MEM          BIT(2)
+#define   LIMA_GP_IRQ_VS_SEM_IRQ               BIT(3)
+#define   LIMA_GP_IRQ_PLBU_SEM_IRQ             BIT(4)
+#define   LIMA_GP_IRQ_HANG                     BIT(5)
+#define   LIMA_GP_IRQ_FORCE_HANG               BIT(6)
+#define   LIMA_GP_IRQ_PERF_CNT_0_LIMIT         BIT(7)
+#define   LIMA_GP_IRQ_PERF_CNT_1_LIMIT         BIT(8)
+#define   LIMA_GP_IRQ_WRITE_BOUND_ERR          BIT(9)
+#define   LIMA_GP_IRQ_SYNC_ERROR               BIT(10)
+#define   LIMA_GP_IRQ_AXI_BUS_ERROR            BIT(11)
+#define   LIMA_GP_IRQ_AXI_BUS_STOPPED          BIT(12)
+#define   LIMA_GP_IRQ_VS_INVALID_CMD           BIT(13)
+#define   LIMA_GP_IRQ_PLB_INVALID_CMD          BIT(14)
+#define   LIMA_GP_IRQ_RESET_COMPLETED          BIT(19)
+#define   LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW      BIT(20)
+#define   LIMA_GP_IRQ_SEMAPHORE_OVERFLOW       BIT(21)
+#define   LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS  BIT(22)
+#define LIMA_GP_WRITE_BOUND_LOW                0x34
+#define LIMA_GP_PERF_CNT_0_ENABLE              0x3C
+#define LIMA_GP_PERF_CNT_1_ENABLE              0x40
+#define LIMA_GP_PERF_CNT_0_SRC                 0x44
+#define LIMA_GP_PERF_CNT_1_SRC                 0x48
+#define LIMA_GP_PERF_CNT_0_VALUE               0x4C
+#define LIMA_GP_PERF_CNT_1_VALUE               0x50
+#define LIMA_GP_PERF_CNT_0_LIMIT               0x54
+#define LIMA_GP_STATUS                         0x68
+#define   LIMA_GP_STATUS_VS_ACTIVE             BIT(1)
+#define   LIMA_GP_STATUS_BUS_STOPPED           BIT(2)
+#define   LIMA_GP_STATUS_PLBU_ACTIVE           BIT(3)
+#define   LIMA_GP_STATUS_BUS_ERROR             BIT(6)
+#define   LIMA_GP_STATUS_WRITE_BOUND_ERR       BIT(8)
+#define LIMA_GP_VERSION                        0x6C
+#define LIMA_GP_VSCL_START_ADDR_READ           0x80
+#define LIMA_GP_PLBCL_START_ADDR_READ          0x84
+#define LIMA_GP_CONTR_AXI_BUS_ERROR_STAT       0x94
+
+#define LIMA_GP_IRQ_MASK_ALL		   \
+	(				   \
+	 LIMA_GP_IRQ_VS_END_CMD_LST      | \
+	 LIMA_GP_IRQ_PLBU_END_CMD_LST    | \
+	 LIMA_GP_IRQ_PLBU_OUT_OF_MEM     | \
+	 LIMA_GP_IRQ_VS_SEM_IRQ          | \
+	 LIMA_GP_IRQ_PLBU_SEM_IRQ        | \
+	 LIMA_GP_IRQ_HANG                | \
+	 LIMA_GP_IRQ_FORCE_HANG          | \
+	 LIMA_GP_IRQ_PERF_CNT_0_LIMIT    | \
+	 LIMA_GP_IRQ_PERF_CNT_1_LIMIT    | \
+	 LIMA_GP_IRQ_WRITE_BOUND_ERR     | \
+	 LIMA_GP_IRQ_SYNC_ERROR          | \
+	 LIMA_GP_IRQ_AXI_BUS_ERROR       | \
+	 LIMA_GP_IRQ_AXI_BUS_STOPPED     | \
+	 LIMA_GP_IRQ_VS_INVALID_CMD      | \
+	 LIMA_GP_IRQ_PLB_INVALID_CMD     | \
+	 LIMA_GP_IRQ_RESET_COMPLETED     | \
+	 LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW | \
+	 LIMA_GP_IRQ_SEMAPHORE_OVERFLOW  | \
+	 LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+#define LIMA_GP_IRQ_MASK_ERROR             \
+	(                                  \
+	 LIMA_GP_IRQ_PLBU_OUT_OF_MEM     | \
+	 LIMA_GP_IRQ_FORCE_HANG          | \
+	 LIMA_GP_IRQ_WRITE_BOUND_ERR     | \
+	 LIMA_GP_IRQ_SYNC_ERROR          | \
+	 LIMA_GP_IRQ_AXI_BUS_ERROR       | \
+	 LIMA_GP_IRQ_VS_INVALID_CMD      | \
+	 LIMA_GP_IRQ_PLB_INVALID_CMD     | \
+	 LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW | \
+	 LIMA_GP_IRQ_SEMAPHORE_OVERFLOW  | \
+	 LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+#define LIMA_GP_IRQ_MASK_USED		   \
+	(				   \
+	 LIMA_GP_IRQ_VS_END_CMD_LST      | \
+	 LIMA_GP_IRQ_PLBU_END_CMD_LST    | \
+	 LIMA_GP_IRQ_MASK_ERROR)
+
+/* PP regs */
+#define LIMA_PP_FRAME                        0x0000
+#define LIMA_PP_RSW			     0x0004
+#define LIMA_PP_STACK			     0x0030
+#define LIMA_PP_STACK_SIZE		     0x0034
+#define LIMA_PP_ORIGIN_OFFSET_X	             0x0040
+#define LIMA_PP_WB(i) 			     (0x0100 * (i + 1))
+#define   LIMA_PP_WB_SOURCE_SELECT           0x0000
+#define	  LIMA_PP_WB_SOURCE_ADDR             0x0004
+
+#define LIMA_PP_VERSION                      0x1000
+#define LIMA_PP_CURRENT_REND_LIST_ADDR       0x1004
+#define LIMA_PP_STATUS                       0x1008
+#define   LIMA_PP_STATUS_RENDERING_ACTIVE    BIT(0)
+#define   LIMA_PP_STATUS_BUS_STOPPED         BIT(4)
+#define LIMA_PP_CTRL                         0x100c
+#define   LIMA_PP_CTRL_STOP_BUS              BIT(0)
+#define   LIMA_PP_CTRL_FLUSH_CACHES          BIT(3)
+#define   LIMA_PP_CTRL_FORCE_RESET           BIT(5)
+#define   LIMA_PP_CTRL_START_RENDERING       BIT(6)
+#define   LIMA_PP_CTRL_SOFT_RESET            BIT(7)
+#define LIMA_PP_INT_RAWSTAT                  0x1020
+#define LIMA_PP_INT_CLEAR                    0x1024
+#define LIMA_PP_INT_MASK                     0x1028
+#define LIMA_PP_INT_STATUS                   0x102c
+#define   LIMA_PP_IRQ_END_OF_FRAME           BIT(0)
+#define   LIMA_PP_IRQ_END_OF_TILE            BIT(1)
+#define   LIMA_PP_IRQ_HANG                   BIT(2)
+#define   LIMA_PP_IRQ_FORCE_HANG             BIT(3)
+#define   LIMA_PP_IRQ_BUS_ERROR              BIT(4)
+#define   LIMA_PP_IRQ_BUS_STOP               BIT(5)
+#define   LIMA_PP_IRQ_CNT_0_LIMIT            BIT(6)
+#define   LIMA_PP_IRQ_CNT_1_LIMIT            BIT(7)
+#define   LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR   BIT(8)
+#define   LIMA_PP_IRQ_INVALID_PLIST_COMMAND  BIT(9)
+#define   LIMA_PP_IRQ_CALL_STACK_UNDERFLOW   BIT(10)
+#define   LIMA_PP_IRQ_CALL_STACK_OVERFLOW    BIT(11)
+#define   LIMA_PP_IRQ_RESET_COMPLETED        BIT(12)
+#define LIMA_PP_WRITE_BOUNDARY_LOW           0x1044
+#define LIMA_PP_BUS_ERROR_STATUS             0x1050
+#define LIMA_PP_PERF_CNT_0_ENABLE            0x1080
+#define LIMA_PP_PERF_CNT_0_SRC               0x1084
+#define LIMA_PP_PERF_CNT_0_LIMIT             0x1088
+#define LIMA_PP_PERF_CNT_0_VALUE             0x108c
+#define LIMA_PP_PERF_CNT_1_ENABLE            0x10a0
+#define LIMA_PP_PERF_CNT_1_SRC               0x10a4
+#define LIMA_PP_PERF_CNT_1_LIMIT             0x10a8
+#define LIMA_PP_PERF_CNT_1_VALUE             0x10ac
+#define LIMA_PP_PERFMON_CONTR                0x10b0
+#define LIMA_PP_PERFMON_BASE                 0x10b4
+
+#define LIMA_PP_IRQ_MASK_ALL                 \
+	(                                    \
+	 LIMA_PP_IRQ_END_OF_FRAME          | \
+	 LIMA_PP_IRQ_END_OF_TILE           | \
+	 LIMA_PP_IRQ_HANG                  | \
+	 LIMA_PP_IRQ_FORCE_HANG            | \
+	 LIMA_PP_IRQ_BUS_ERROR             | \
+	 LIMA_PP_IRQ_BUS_STOP              | \
+	 LIMA_PP_IRQ_CNT_0_LIMIT           | \
+	 LIMA_PP_IRQ_CNT_1_LIMIT           | \
+	 LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR  | \
+	 LIMA_PP_IRQ_INVALID_PLIST_COMMAND | \
+	 LIMA_PP_IRQ_CALL_STACK_UNDERFLOW  | \
+	 LIMA_PP_IRQ_CALL_STACK_OVERFLOW   | \
+	 LIMA_PP_IRQ_RESET_COMPLETED)
+
+#define LIMA_PP_IRQ_MASK_ERROR               \
+	(                                    \
+	 LIMA_PP_IRQ_FORCE_HANG            | \
+	 LIMA_PP_IRQ_BUS_ERROR             | \
+	 LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR  | \
+	 LIMA_PP_IRQ_INVALID_PLIST_COMMAND | \
+	 LIMA_PP_IRQ_CALL_STACK_UNDERFLOW  | \
+	 LIMA_PP_IRQ_CALL_STACK_OVERFLOW)
+
+#define LIMA_PP_IRQ_MASK_USED                \
+	(                                    \
+	 LIMA_PP_IRQ_END_OF_FRAME          | \
+	 LIMA_PP_IRQ_MASK_ERROR)
+
+/* MMU regs */
+#define LIMA_MMU_DTE_ADDR                     0x0000
+#define LIMA_MMU_STATUS                       0x0004
+#define   LIMA_MMU_STATUS_PAGING_ENABLED      BIT(0)
+#define   LIMA_MMU_STATUS_PAGE_FAULT_ACTIVE   BIT(1)
+#define   LIMA_MMU_STATUS_STALL_ACTIVE        BIT(2)
+#define   LIMA_MMU_STATUS_IDLE                BIT(3)
+#define   LIMA_MMU_STATUS_REPLAY_BUFFER_EMPTY BIT(4)
+#define   LIMA_MMU_STATUS_PAGE_FAULT_IS_WRITE BIT(5)
+#define   LIMA_MMU_STATUS_BUS_ID(x)           ((x >> 6) & 0x1F)
+#define LIMA_MMU_COMMAND                      0x0008
+#define   LIMA_MMU_COMMAND_ENABLE_PAGING      0x00
+#define   LIMA_MMU_COMMAND_DISABLE_PAGING     0x01
+#define   LIMA_MMU_COMMAND_ENABLE_STALL       0x02
+#define   LIMA_MMU_COMMAND_DISABLE_STALL      0x03
+#define   LIMA_MMU_COMMAND_ZAP_CACHE          0x04
+#define   LIMA_MMU_COMMAND_PAGE_FAULT_DONE    0x05
+#define   LIMA_MMU_COMMAND_HARD_RESET         0x06
+#define LIMA_MMU_PAGE_FAULT_ADDR              0x000C
+#define LIMA_MMU_ZAP_ONE_LINE                 0x0010
+#define LIMA_MMU_INT_RAWSTAT                  0x0014
+#define LIMA_MMU_INT_CLEAR                    0x0018
+#define LIMA_MMU_INT_MASK                     0x001C
+#define   LIMA_MMU_INT_PAGE_FAULT             BIT(0)
+#define   LIMA_MMU_INT_READ_BUS_ERROR         BIT(1)
+#define LIMA_MMU_INT_STATUS                   0x0020
+
+#define LIMA_VM_FLAG_PRESENT          BIT(0)
+#define LIMA_VM_FLAG_READ_PERMISSION  BIT(1)
+#define LIMA_VM_FLAG_WRITE_PERMISSION BIT(2)
+#define LIMA_VM_FLAG_OVERRIDE_CACHE   BIT(3)
+#define LIMA_VM_FLAG_WRITE_CACHEABLE  BIT(4)
+#define LIMA_VM_FLAG_WRITE_ALLOCATE   BIT(5)
+#define LIMA_VM_FLAG_WRITE_BUFFERABLE BIT(6)
+#define LIMA_VM_FLAG_READ_CACHEABLE   BIT(7)
+#define LIMA_VM_FLAG_READ_ALLOCATE    BIT(8)
+#define LIMA_VM_FLAG_MASK             0x1FF
+
+#define LIMA_VM_FLAGS_CACHE (			 \
+		LIMA_VM_FLAG_PRESENT |		 \
+		LIMA_VM_FLAG_READ_PERMISSION |	 \
+		LIMA_VM_FLAG_WRITE_PERMISSION |	 \
+		LIMA_VM_FLAG_OVERRIDE_CACHE |	 \
+		LIMA_VM_FLAG_WRITE_CACHEABLE |	 \
+		LIMA_VM_FLAG_WRITE_BUFFERABLE |	 \
+		LIMA_VM_FLAG_READ_CACHEABLE |	 \
+		LIMA_VM_FLAG_READ_ALLOCATE )
+
+#define LIMA_VM_FLAGS_UNCACHE (			\
+		LIMA_VM_FLAG_PRESENT |		\
+		LIMA_VM_FLAG_READ_PERMISSION |	\
+		LIMA_VM_FLAG_WRITE_PERMISSION )
+
+/* DLBU regs */
+#define LIMA_DLBU_MASTER_TLLIST_PHYS_ADDR  0x0000
+#define	LIMA_DLBU_MASTER_TLLIST_VADDR      0x0004
+#define	LIMA_DLBU_TLLIST_VBASEADDR         0x0008
+#define	LIMA_DLBU_FB_DIM                   0x000C
+#define	LIMA_DLBU_TLLIST_CONF              0x0010
+#define	LIMA_DLBU_START_TILE_POS           0x0014
+#define	LIMA_DLBU_PP_ENABLE_MASK           0x0018
+
+/* BCAST regs */
+#define LIMA_BCAST_BROADCAST_MASK    0x0
+#define LIMA_BCAST_INTERRUPT_MASK    0x4
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
new file mode 100644
index 000000000000..539b29ce5e9a
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -0,0 +1,398 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/kthread.h>
+#include <linux/slab.h>
+
+#include "lima_drv.h"
+#include "lima_sched.h"
+#include "lima_vm.h"
+#include "lima_mmu.h"
+#include "lima_l2_cache.h"
+#include "lima_object.h"
+
+struct lima_fence {
+	struct dma_fence base;
+	struct lima_sched_pipe *pipe;
+};
+
+static struct kmem_cache *lima_fence_slab = NULL;
+
+int lima_sched_slab_init(void)
+{
+	lima_fence_slab = kmem_cache_create(
+		"lima_fence", sizeof(struct lima_fence), 0,
+		SLAB_HWCACHE_ALIGN, NULL);
+	if (!lima_fence_slab)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void lima_sched_slab_fini(void)
+{
+	kmem_cache_destroy(lima_fence_slab);
+}
+
+static inline struct lima_fence *to_lima_fence(struct dma_fence *fence)
+{
+	return container_of(fence, struct lima_fence, base);
+}
+
+static const char *lima_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "lima";
+}
+
+static const char *lima_fence_get_timeline_name(struct dma_fence *fence)
+{
+	struct lima_fence *f = to_lima_fence(fence);
+
+	return f->pipe->base.name;
+}
+
+static bool lima_fence_enable_signaling(struct dma_fence *fence)
+{
+	return true;
+}
+
+static void lima_fence_release_rcu(struct rcu_head *rcu)
+{
+	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
+	struct lima_fence *fence = to_lima_fence(f);
+
+	kmem_cache_free(lima_fence_slab, fence);
+}
+
+static void lima_fence_release(struct dma_fence *fence)
+{
+	struct lima_fence *f = to_lima_fence(fence);
+
+	call_rcu(&f->base.rcu, lima_fence_release_rcu);
+}
+
+static const struct dma_fence_ops lima_fence_ops = {
+	.get_driver_name = lima_fence_get_driver_name,
+	.get_timeline_name = lima_fence_get_timeline_name,
+	.enable_signaling = lima_fence_enable_signaling,
+	.wait = dma_fence_default_wait,
+	.release = lima_fence_release,
+};
+
+static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
+{
+	struct lima_fence *fence;
+
+	fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
+	if (!fence)
+	       return NULL;
+
+	fence->pipe = pipe;
+	dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock,
+		       pipe->fence_context, ++pipe->fence_seqno);
+
+	return fence;
+}
+
+static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job)
+{
+	return container_of(job, struct lima_sched_task, base);
+}
+
+static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler *sched)
+{
+	return container_of(sched, struct lima_sched_pipe, base);
+}
+
+int lima_sched_task_init(struct lima_sched_task *task,
+			 struct lima_sched_context *context,
+			 struct lima_bo **bos, int num_bos,
+			 struct lima_vm *vm)
+{
+	int err, i;
+
+	task->bos = kmemdup(bos, sizeof(*bos) * num_bos, GFP_KERNEL);
+	if (!task->bos)
+		return -ENOMEM;	
+
+	for (i = 0; i < num_bos; i++)
+		drm_gem_object_get(&bos[i]->gem);
+
+	err = drm_sched_job_init(&task->base, &context->base, vm);
+	if (err) {
+		kfree(task->bos);
+		return err;
+	}
+
+	task->num_bos = num_bos;
+	task->vm = lima_vm_get(vm);
+	return 0;
+}
+
+void lima_sched_task_fini(struct lima_sched_task *task)
+{
+	int i;
+
+	drm_sched_job_cleanup(&task->base);
+
+	for (i = 0; i < task->num_dep; i++)
+		dma_fence_put(task->dep[i]);
+
+	kfree(task->dep);
+
+	if (task->bos) {
+		for (i = 0; i < task->num_bos; i++)
+			drm_gem_object_put_unlocked(&task->bos[i]->gem);
+		kfree(task->bos);
+	}
+
+	lima_vm_put(task->vm);
+}
+
+int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence)
+{
+	int i, new_dep = 4;
+
+	/* same context's fence is definitly earlier then this task */
+	if (fence->context == task->base.s_fence->finished.context) {
+		dma_fence_put(fence);
+		return 0;
+	}
+
+	if (task->dep && task->num_dep == task->max_dep)
+		new_dep = task->max_dep * 2;
+
+	if (task->max_dep < new_dep) {
+		void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL);
+		if (!dep)
+			return -ENOMEM;
+		task->max_dep = new_dep;
+		task->dep = dep;
+	}
+
+	for (i = 0; i < task->num_dep; i++) {
+		if (task->dep[i]->context == fence->context &&
+		    dma_fence_is_later(fence, task->dep[i])) {
+			dma_fence_put(task->dep[i]);
+			task->dep[i] = fence;
+			return 0;
+		}
+	}
+
+	task->dep[task->num_dep++] = fence;
+	return 0;
+}
+
+int lima_sched_context_init(struct lima_sched_pipe *pipe,
+			    struct lima_sched_context *context,
+			    atomic_t *guilty)
+{
+	struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL;
+
+	return drm_sched_entity_init(&context->base, &rq, 1, guilty);
+}
+
+void lima_sched_context_fini(struct lima_sched_pipe *pipe,
+			     struct lima_sched_context *context)
+{
+	drm_sched_entity_fini(&context->base);
+}
+
+struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context,
+						struct lima_sched_task *task)
+{
+	struct dma_fence *fence = dma_fence_get(&task->base.s_fence->finished);
+
+	drm_sched_entity_push_job(&task->base, &context->base);
+	return fence;
+}
+
+static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job,
+					       struct drm_sched_entity *entity)
+{
+	struct lima_sched_task *task = to_lima_task(job);
+	int i;
+
+	for (i = 0; i < task->num_dep; i++) {
+		struct dma_fence *fence = task->dep[i];
+
+		if (!task->dep[i])
+			continue;
+
+		task->dep[i] = NULL;
+
+		if (!dma_fence_is_signaled(fence))
+			return fence;
+
+		dma_fence_put(fence);
+	}
+
+	return NULL;
+}
+
+static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
+{
+	struct lima_sched_task *task = to_lima_task(job);
+	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+	struct lima_fence *fence;
+	struct dma_fence *ret;
+	struct lima_vm *vm = NULL, *last_vm = NULL;
+	int i;
+
+	/* after GPU reset */
+	if (job->s_fence->finished.error < 0)
+		return NULL;
+
+	fence = lima_fence_create(pipe);
+	if (!fence)
+		return NULL;
+	task->fence = &fence->base;
+
+	/* for caller usage of the fence, otherwise irq handler 
+	 * may consume the fence before caller use it */
+	ret = dma_fence_get(task->fence);
+
+	pipe->current_task = task;
+
+	/* this is needed for MMU to work correctly, otherwise GP/PP
+	 * will hang or page fault for unknown reason after running for
+	 * a while.
+	 *
+	 * Need to investigate:
+	 * 1. is it related to TLB
+	 * 2. how much performance will be affected by L2 cache flush
+	 * 3. can we reduce the calling of this function because all
+	 *    GP/PP use the same L2 cache on mali400
+	 *
+	 * TODO:
+	 * 1. move this to task fini to save some wait time?
+	 * 2. when GP/PP use different l2 cache, need PP wait GP l2
+	 *    cache flush?
+	 */
+	for (i = 0; i < pipe->num_l2_cache; i++)
+		lima_l2_cache_flush(pipe->l2_cache[i]);
+
+	if (task->vm != pipe->current_vm) {
+		vm = lima_vm_get(task->vm);
+		last_vm = pipe->current_vm;
+		pipe->current_vm = task->vm;
+	}
+
+	if (pipe->bcast_mmu)
+		lima_mmu_switch_vm(pipe->bcast_mmu, vm);
+	else {
+		for (i = 0; i < pipe->num_mmu; i++)
+			lima_mmu_switch_vm(pipe->mmu[i], vm);
+	}
+
+	if (last_vm)
+		lima_vm_put(last_vm);
+
+	pipe->error = false;
+	pipe->task_run(pipe, task);
+
+	return task->fence;
+}
+
+static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
+					 struct lima_sched_task *task)
+{
+	kthread_park(pipe->base.thread);
+	drm_sched_hw_job_reset(&pipe->base, &task->base);
+
+	pipe->task_error(pipe);
+
+	if (pipe->bcast_mmu)
+		lima_mmu_page_fault_resume(pipe->bcast_mmu);
+	else {
+		int i;
+		for (i = 0; i < pipe->num_mmu; i++)
+			lima_mmu_page_fault_resume(pipe->mmu[i]);
+	}
+
+	if (pipe->current_vm)
+		lima_vm_put(pipe->current_vm);
+
+	pipe->current_vm = NULL;
+	pipe->current_task = NULL;
+
+	drm_sched_job_recovery(&pipe->base);
+	kthread_unpark(pipe->base.thread);
+}
+
+static void lima_sched_timedout_job(struct drm_sched_job *job)
+{
+	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+	struct lima_sched_task *task = to_lima_task(job);
+
+	DRM_ERROR("lima job timeout\n");
+
+	lima_sched_handle_error_task(pipe, task);
+}
+
+static void lima_sched_free_job(struct drm_sched_job *job)
+{
+	struct lima_sched_task *task = to_lima_task(job);
+	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+	struct lima_vm *vm = task->vm;
+	struct lima_bo **bos = task->bos;
+	int i;
+
+	dma_fence_put(task->fence);
+
+	for (i = 0; i < task->num_bos; i++)
+		lima_vm_bo_del(vm, bos[i]);
+
+	lima_sched_task_fini(task);
+	kmem_cache_free(pipe->task_slab, task);
+}
+
+const struct drm_sched_backend_ops lima_sched_ops = {
+	.dependency = lima_sched_dependency,
+	.run_job = lima_sched_run_job,
+	.timedout_job = lima_sched_timedout_job,
+	.free_job = lima_sched_free_job,
+};
+
+static void lima_sched_error_work(struct work_struct *work)
+{
+	struct lima_sched_pipe *pipe =
+		container_of(work, struct lima_sched_pipe, error_work);
+	struct lima_sched_task *task = pipe->current_task;
+
+	lima_sched_handle_error_task(pipe, task);
+}
+
+int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
+{
+	long timeout;
+
+	if (lima_sched_timeout_ms <= 0)
+		timeout = MAX_SCHEDULE_TIMEOUT;
+	else
+		timeout = msecs_to_jiffies(lima_sched_timeout_ms);
+
+	pipe->fence_context = dma_fence_context_alloc(1);
+	spin_lock_init(&pipe->fence_lock);
+
+	INIT_WORK(&pipe->error_work, lima_sched_error_work);
+
+	return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name);
+}
+
+void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
+{
+	drm_sched_fini(&pipe->base);
+}
+
+void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
+{
+	if (pipe->error)
+	        schedule_work(&pipe->error_work);
+	else {
+		struct lima_sched_task *task = pipe->current_task;
+
+		pipe->task_fini(pipe);
+		dma_fence_signal(task->fence);
+	}
+}
diff --git a/drivers/gpu/drm/lima/lima_sched.h b/drivers/gpu/drm/lima/lima_sched.h
new file mode 100644
index 000000000000..44985e4da3fb
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_sched.h
@@ -0,0 +1,104 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_SCHED_H__
+#define __LIMA_SCHED_H__
+
+#include <drm/gpu_scheduler.h>
+
+struct lima_vm;
+
+struct lima_sched_task {
+	struct drm_sched_job base;
+
+	struct lima_vm *vm;
+	void *frame;
+
+	struct dma_fence **dep;
+	int num_dep;
+	int max_dep;
+
+	struct lima_bo **bos;
+	int num_bos;
+
+	/* pipe fence */
+	struct dma_fence *fence;
+};
+
+struct lima_sched_context {
+	struct drm_sched_entity base;
+};
+
+#define LIMA_SCHED_PIPE_MAX_MMU       8
+#define LIMA_SCHED_PIPE_MAX_L2_CACHE  2
+#define LIMA_SCHED_PIPE_MAX_PROCESSOR 8
+
+struct lima_ip;
+
+struct lima_sched_pipe {
+	struct drm_gpu_scheduler base;
+
+	u64 fence_context;
+	u32 fence_seqno;
+	spinlock_t fence_lock;
+
+	struct lima_sched_task *current_task;
+	struct lima_vm *current_vm;
+
+	struct lima_ip *mmu[LIMA_SCHED_PIPE_MAX_MMU];
+	int num_mmu;
+
+	struct lima_ip *l2_cache[LIMA_SCHED_PIPE_MAX_L2_CACHE];
+	int num_l2_cache;
+
+	struct lima_ip *processor[LIMA_SCHED_PIPE_MAX_PROCESSOR];
+	int num_processor;
+
+	struct lima_ip *bcast_processor;
+	struct lima_ip *bcast_mmu;
+
+	u32 done;
+	bool error;
+	atomic_t task;
+
+	int frame_size;
+	struct kmem_cache *task_slab;
+
+	int (*task_validate)(struct lima_sched_pipe *pipe, struct lima_sched_task *task);
+	void (*task_run)(struct lima_sched_pipe *pipe, struct lima_sched_task *task);
+	void (*task_fini)(struct lima_sched_pipe *pipe);
+	void (*task_error)(struct lima_sched_pipe *pipe);
+	void (*task_mmu_error)(struct lima_sched_pipe *pipe);
+
+	struct work_struct error_work;
+};
+
+int lima_sched_task_init(struct lima_sched_task *task,
+			 struct lima_sched_context *context,
+			 struct lima_bo **bos, int num_bos,
+			 struct lima_vm *vm);
+void lima_sched_task_fini(struct lima_sched_task *task);
+int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence);
+
+int lima_sched_context_init(struct lima_sched_pipe *pipe,
+			    struct lima_sched_context *context,
+			    atomic_t *guilty);
+void lima_sched_context_fini(struct lima_sched_pipe *pipe,
+			     struct lima_sched_context *context);
+struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context,
+						struct lima_sched_task *task);
+
+int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name);
+void lima_sched_pipe_fini(struct lima_sched_pipe *pipe);
+void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe);
+
+static inline void lima_sched_pipe_mmu_error(struct lima_sched_pipe *pipe)
+{
+	pipe->error = true;
+	pipe->task_mmu_error(pipe);
+}
+
+int lima_sched_slab_init(void);
+void lima_sched_slab_fini(void);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_vm.c b/drivers/gpu/drm/lima/lima_vm.c
new file mode 100644
index 000000000000..39eba3fae019
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_vm.c
@@ -0,0 +1,280 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+
+#include "lima_device.h"
+#include "lima_vm.h"
+#include "lima_object.h"
+#include "lima_regs.h"
+
+struct lima_bo_va {
+	struct list_head list;
+	unsigned ref_count;
+
+	struct drm_mm_node node;
+
+	struct lima_vm *vm;
+};
+
+#define LIMA_VM_PD_SHIFT 22
+#define LIMA_VM_PT_SHIFT 12
+#define LIMA_VM_PB_SHIFT (LIMA_VM_PD_SHIFT + LIMA_VM_NUM_PT_PER_BT_SHIFT)
+#define LIMA_VM_BT_SHIFT LIMA_VM_PT_SHIFT
+
+#define LIMA_VM_PT_MASK ((1 << LIMA_VM_PD_SHIFT) - 1)
+#define LIMA_VM_BT_MASK ((1 << LIMA_VM_PB_SHIFT) - 1)
+
+#define LIMA_PDE(va) (va >> LIMA_VM_PD_SHIFT)
+#define LIMA_PTE(va) ((va & LIMA_VM_PT_MASK) >> LIMA_VM_PT_SHIFT)
+#define LIMA_PBE(va) (va >> LIMA_VM_PB_SHIFT)
+#define LIMA_BTE(va) ((va & LIMA_VM_BT_MASK) >> LIMA_VM_BT_SHIFT)
+
+
+static void lima_vm_unmap_page_table(struct lima_vm *vm, u32 start, u32 end)
+{
+	u32 addr;
+
+	for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
+		u32 pbe = LIMA_PBE(addr);
+		u32 bte = LIMA_BTE(addr);
+
+		vm->bts[pbe].cpu[bte] = 0;
+	}
+}
+
+static int lima_vm_map_page_table(struct lima_vm *vm, dma_addr_t *dma,
+				  u32 start, u32 end)
+{
+	u64 addr;
+	int i = 0;
+
+	for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
+		u32 pbe = LIMA_PBE(addr);
+		u32 bte = LIMA_BTE(addr);
+
+		if (!vm->bts[pbe].cpu) {
+			dma_addr_t pts;
+			u32 *pd;
+			int j;
+
+			vm->bts[pbe].cpu = dma_alloc_wc(
+				vm->dev->dev, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT,
+				&vm->bts[pbe].dma, GFP_KERNEL | __GFP_ZERO);
+			if (!vm->bts[pbe].cpu) {
+				if (addr != start)
+					lima_vm_unmap_page_table(vm, start, addr - 1);
+				return -ENOMEM;
+			}
+
+			pts = vm->bts[pbe].dma;
+			pd = vm->pd.cpu + (pbe << LIMA_VM_NUM_PT_PER_BT_SHIFT);
+			for (j = 0; j < LIMA_VM_NUM_PT_PER_BT; j++) {
+				pd[j] = pts | LIMA_VM_FLAG_PRESENT;
+				pts += LIMA_PAGE_SIZE;
+			}
+		}
+
+		vm->bts[pbe].cpu[bte] = dma[i++] | LIMA_VM_FLAGS_CACHE;
+	}
+
+	return 0;
+}
+
+static struct lima_bo_va *
+lima_vm_bo_find(struct lima_vm *vm, struct lima_bo *bo)
+{
+	struct lima_bo_va *bo_va, *ret = NULL;
+
+	list_for_each_entry(bo_va, &bo->va, list) {
+		if (bo_va->vm == vm) {
+			ret = bo_va;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create)
+{
+	struct lima_bo_va *bo_va;
+	int err;
+
+	mutex_lock(&bo->lock);
+
+	bo_va = lima_vm_bo_find(vm, bo);
+	if (bo_va) {
+		bo_va->ref_count++;
+		mutex_unlock(&bo->lock);
+		return 0;
+	}
+
+	/* should not create new bo_va if not asked by caller */
+	if (!create) {
+		mutex_unlock(&bo->lock);
+		return -ENOENT;
+	}
+
+	bo_va = kzalloc(sizeof(*bo_va), GFP_KERNEL);
+	if (!bo_va) {
+		err = -ENOMEM;
+		goto err_out0;
+	}
+
+	bo_va->vm = vm;
+	bo_va->ref_count = 1;
+
+	mutex_lock(&vm->lock);
+
+	err = drm_mm_insert_node(&vm->mm, &bo_va->node, bo->gem.size);
+	if (err)
+		goto err_out1;
+
+	err = lima_vm_map_page_table(vm, bo->pages_dma_addr, bo_va->node.start,
+				     bo_va->node.start + bo_va->node.size - 1);
+	if (err)
+	        goto err_out2;
+
+	mutex_unlock(&vm->lock);
+
+	list_add_tail(&bo_va->list, &bo->va);
+
+	mutex_unlock(&bo->lock);
+	return 0;
+
+err_out2:
+	drm_mm_remove_node(&bo_va->node);
+err_out1:
+	mutex_unlock(&vm->lock);
+	kfree(bo_va);
+err_out0:
+	mutex_unlock(&bo->lock);
+	return err;
+}
+
+void lima_vm_bo_del(struct lima_vm *vm, struct lima_bo *bo)
+{
+	struct lima_bo_va *bo_va;
+
+	mutex_lock(&bo->lock);
+
+	bo_va = lima_vm_bo_find(vm, bo);
+	if (--bo_va->ref_count > 0) {
+		mutex_unlock(&bo->lock);
+		return;
+	}
+
+	mutex_lock(&vm->lock);
+
+	lima_vm_unmap_page_table(vm, bo_va->node.start,
+				 bo_va->node.start + bo_va->node.size - 1);
+
+	drm_mm_remove_node(&bo_va->node);
+
+	mutex_unlock(&vm->lock);
+
+	list_del(&bo_va->list);
+
+	mutex_unlock(&bo->lock);
+
+	kfree(bo_va);
+}
+
+u32 lima_vm_get_va(struct lima_vm *vm, struct lima_bo *bo)
+{
+	struct lima_bo_va *bo_va;
+	u32 ret;
+
+	mutex_lock(&bo->lock);
+
+	bo_va = lima_vm_bo_find(vm, bo);
+	ret = bo_va->node.start;
+
+	mutex_unlock(&bo->lock);
+
+	return ret;
+}
+
+struct lima_vm *lima_vm_create(struct lima_device *dev)
+{
+	struct lima_vm *vm;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (!vm)
+		return NULL;
+
+	vm->dev = dev;
+	mutex_init(&vm->lock);
+	kref_init(&vm->refcount);
+
+	vm->pd.cpu = dma_alloc_wc(dev->dev, LIMA_PAGE_SIZE, &vm->pd.dma,
+				  GFP_KERNEL | __GFP_ZERO);
+	if (!vm->pd.cpu)
+		goto err_out0;
+
+	if (dev->dlbu_cpu) {
+		int err = lima_vm_map_page_table(
+			vm, &dev->dlbu_dma, LIMA_VA_RESERVE_DLBU,
+			LIMA_VA_RESERVE_DLBU + LIMA_PAGE_SIZE - 1);
+		if (err)
+			goto err_out1;
+	}
+
+	drm_mm_init(&vm->mm, dev->va_start, dev->va_end - dev->va_start);
+
+	return vm;
+
+err_out1:
+	dma_free_wc(dev->dev, LIMA_PAGE_SIZE, vm->pd.cpu, vm->pd.dma);
+err_out0:
+	kfree(vm);
+	return NULL;
+}
+
+void lima_vm_release(struct kref *kref)
+{
+	struct lima_vm *vm = container_of(kref, struct lima_vm, refcount);
+	int i;
+
+	drm_mm_takedown(&vm->mm);
+
+	for (i = 0; i < LIMA_VM_NUM_BT; i++) {
+		if (vm->bts[i].cpu)
+			dma_free_wc(vm->dev->dev, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT,
+				    vm->bts[i].cpu, vm->bts[i].dma);
+	}
+
+        if (vm->pd.cpu)
+		dma_free_wc(vm->dev->dev, LIMA_PAGE_SIZE, vm->pd.cpu, vm->pd.dma);
+
+	kfree(vm);
+}
+
+void lima_vm_print(struct lima_vm *vm)
+{
+	int i, j, k;
+	u32 *pd, *pt;
+
+	if (!vm->pd.cpu)
+		return;
+
+	pd = vm->pd.cpu;
+	for (i = 0; i < LIMA_VM_NUM_BT; i++) {
+		if (!vm->bts[i].cpu)
+			continue;
+
+		pt = vm->bts[i].cpu;
+		for (j = 0; j < LIMA_VM_NUM_PT_PER_BT; j++) {
+			int idx = (i << LIMA_VM_NUM_PT_PER_BT_SHIFT) + j;
+			printk(KERN_INFO "lima vm pd %03x:%08x\n", idx, pd[idx]);
+
+			for (k = 0; k < LIMA_PAGE_ENT_NUM; k++) {
+				u32 pte = *pt++;
+				if (pte)
+					printk(KERN_INFO "  pt %03x:%08x\n", k, pte);
+			}
+		}
+	}
+}
diff --git a/drivers/gpu/drm/lima/lima_vm.h b/drivers/gpu/drm/lima/lima_vm.h
new file mode 100644
index 000000000000..a135e2f05315
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_vm.h
@@ -0,0 +1,62 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_VM_H__
+#define __LIMA_VM_H__
+
+#include <drm/drm_mm.h>
+#include <linux/kref.h>
+
+#define LIMA_PAGE_SIZE    4096
+#define LIMA_PAGE_MASK    (LIMA_PAGE_SIZE - 1)
+#define LIMA_PAGE_ENT_NUM (LIMA_PAGE_SIZE / sizeof(u32))
+
+#define LIMA_VM_NUM_PT_PER_BT_SHIFT 3
+#define LIMA_VM_NUM_PT_PER_BT (1 << LIMA_VM_NUM_PT_PER_BT_SHIFT)
+#define LIMA_VM_NUM_BT (LIMA_PAGE_ENT_NUM >> LIMA_VM_NUM_PT_PER_BT_SHIFT)
+
+#define LIMA_VA_RESERVE_START  0xFFF00000
+#define LIMA_VA_RESERVE_DLBU   LIMA_VA_RESERVE_START
+#define LIMA_VA_RESERVE_END    0x100000000
+
+struct lima_device;
+
+struct lima_vm_page {
+	u32 *cpu;
+	dma_addr_t dma;
+};
+
+struct lima_vm {
+	struct mutex lock;
+	struct kref refcount;
+
+	struct drm_mm mm;
+
+	struct lima_device *dev;
+
+	struct lima_vm_page pd;
+	struct lima_vm_page bts[LIMA_VM_NUM_BT];
+};
+
+int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create);
+void lima_vm_bo_del(struct lima_vm *vm, struct lima_bo *bo);
+
+u32 lima_vm_get_va(struct lima_vm *vm, struct lima_bo *bo);
+
+struct lima_vm *lima_vm_create(struct lima_device *dev);
+void lima_vm_release(struct kref *kref);
+
+static inline struct lima_vm *lima_vm_get(struct lima_vm *vm)
+{
+	kref_get(&vm->refcount);
+	return vm;
+}
+
+static inline void lima_vm_put(struct lima_vm *vm)
+{
+	kref_put(&vm->refcount, lima_vm_release);
+}
+
+void lima_vm_print(struct lima_vm *vm);
+
+#endif
diff --git a/include/uapi/drm/lima_drm.h b/include/uapi/drm/lima_drm.h
new file mode 100644
index 000000000000..64fb4807958d
--- /dev/null
+++ b/include/uapi/drm/lima_drm.h
@@ -0,0 +1,126 @@ 
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_DRM_H__
+#define __LIMA_DRM_H__
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define LIMA_INFO_GPU_MALI400 0x00
+#define LIMA_INFO_GPU_MALI450 0x01
+
+struct drm_lima_info {
+	__u32 gpu_id;   /* out */
+	__u32 num_pp;   /* out */
+	__u32 valid;    /* out */
+	__u32 _resv[7];
+};
+
+struct drm_lima_gem_create {
+	__u32 size;    /* in */
+	__u32 flags;   /* in */
+	__u32 handle;  /* out */
+	__u32 pad;
+};
+
+struct drm_lima_gem_info {
+	__u32 handle;  /* in */
+	__u32 va;      /* out */
+	__u64 offset;  /* out */
+};
+
+#define LIMA_SUBMIT_BO_READ   0x01
+#define LIMA_SUBMIT_BO_WRITE  0x02
+
+struct drm_lima_gem_submit_bo {
+	__u32 handle;  /* in */
+	__u32 flags;   /* in */
+};
+
+#define LIMA_GP_FRAME_REG_NUM 6
+
+struct drm_lima_gp_frame {
+	__u32 frame[LIMA_GP_FRAME_REG_NUM];
+};
+
+#define LIMA_PP_FRAME_REG_NUM 23
+#define LIMA_PP_WB_REG_NUM 12
+
+struct drm_lima_m400_pp_frame {
+	__u32 frame[LIMA_PP_FRAME_REG_NUM];
+	__u32 num_pp;
+	__u32 wb[3 * LIMA_PP_WB_REG_NUM];
+	__u32 plbu_array_address[4];
+	__u32 fragment_stack_address[4];
+};
+
+struct drm_lima_m450_pp_frame {
+	__u32 frame[LIMA_PP_FRAME_REG_NUM];
+	__u32 num_pp;
+	__u32 wb[3 * LIMA_PP_WB_REG_NUM];
+	__u32 use_dlbu;
+	__u32 _pad;
+	union {
+		__u32 plbu_array_address[8];
+		__u32 dlbu_regs[4];
+	};
+	__u32 fragment_stack_address[8];
+};
+
+#define LIMA_PIPE_GP  0x00
+#define LIMA_PIPE_PP  0x01
+
+#define LIMA_SUBMIT_FLAG_EXPLICIT_FENCE (1 << 0)
+
+struct drm_lima_gem_submit {
+	__u32 ctx;         /* in */
+	__u32 pipe;        /* in */
+	__u32 nr_bos;      /* in */
+	__u32 frame_size;  /* in */
+	__u64 bos;         /* in */
+	__u64 frame;       /* in */
+	__u32 flags;       /* in */
+	__u32 out_sync;    /* in */
+	__u32 in_sync[2];  /* in */
+};
+
+#define LIMA_GEM_WAIT_READ   0x01
+#define LIMA_GEM_WAIT_WRITE  0x02
+
+struct drm_lima_gem_wait {
+	__u32 handle;      /* in */
+	__u32 op;          /* in */
+	__s64 timeout_ns;  /* in */
+};
+
+#define LIMA_CTX_OP_CREATE 1
+#define LIMA_CTX_OP_FREE   2
+
+struct drm_lima_ctx {
+	__u32 op;          /* in */
+	__u32 id;          /* in/out */
+};
+
+#define DRM_LIMA_INFO        0x00
+#define DRM_LIMA_GEM_CREATE  0x01
+#define DRM_LIMA_GEM_INFO    0x02
+#define DRM_LIMA_GEM_SUBMIT  0x03
+#define DRM_LIMA_GEM_WAIT    0x04
+#define DRM_LIMA_CTX         0x05
+
+#define DRM_IOCTL_LIMA_INFO DRM_IOR(DRM_COMMAND_BASE + DRM_LIMA_INFO, struct drm_lima_info)
+#define DRM_IOCTL_LIMA_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_CREATE, struct drm_lima_gem_create)
+#define DRM_IOCTL_LIMA_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_INFO, struct drm_lima_gem_info)
+#define DRM_IOCTL_LIMA_GEM_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_SUBMIT, struct drm_lima_gem_submit)
+#define DRM_IOCTL_LIMA_GEM_WAIT DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_WAIT, struct drm_lima_gem_wait)
+#define DRM_IOCTL_LIMA_CTX DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_CTX, struct drm_lima_ctx)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* __LIMA_DRM_H__ */