Message ID | 20200114145136.65373-3-akeem.g.abodunrin@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Security mitigation for Intel Gen7 and Gen7.5 | expand |
Quoting Akeem G Abodunrin (2020-01-14 14:51:36) > From: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com> > > On gen7 and gen7.5 devices, there could be leftover data residuals in > EU/L3 from the retiring context. This patch introduces workaround to clear > that residual contexts, by submitting a batch buffer with dedicated HW > context to the GPU with ring allocation for each context switching. > > Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com> > Signed-off-by: Akeem G Abodunrin <akeem.g.abodunrin@intel.com> > Cc: Chris Wilson <chris.p.wilson@intel.com> > Cc: Balestrieri Francesco <francesco.balestrieri@intel.com> > Cc: Bloomfield Jon <jon.bloomfield@intel.com> > Cc: Dutt Sudeep <sudeep.dutt@intel.com> > --- > drivers/gpu/drm/i915/Makefile | 1 + > drivers/gpu/drm/i915/gt/gen7_renderclear.c | 514 ++++++++++++++++++ > drivers/gpu/drm/i915/gt/gen7_renderclear.h | 15 + > drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 17 +- > .../gpu/drm/i915/gt/intel_ring_submission.c | 3 +- > drivers/gpu/drm/i915/i915_utils.h | 5 + > 6 files changed, 551 insertions(+), 4 deletions(-) > create mode 100644 drivers/gpu/drm/i915/gt/gen7_renderclear.c > create mode 100644 drivers/gpu/drm/i915/gt/gen7_renderclear.h > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index b8c5f8934dbd..e5386871f015 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -75,6 +75,7 @@ gt-y += \ > gt/debugfs_gt.o \ > gt/debugfs_gt_pm.o \ > gt/gen6_ppgtt.o \ > + gt/gen7_renderclear.o \ > gt/gen8_ppgtt.o \ > gt/intel_breadcrumbs.o \ > gt/intel_context.o \ > diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c > new file mode 100644 > index 000000000000..3e9fc2c05fbb > --- /dev/null > +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c > @@ -0,0 +1,514 @@ > +// SPDX-License-Identifier: MIT > +/* > + * Copyright © 2019 Intel Corporation > + */ > + > +#include "gen7_renderclear.h" > +#include "i915_drv.h" > +#include "i915_utils.h" > +#include "intel_gpu_commands.h" > + > +#define MAX_URB_ENTRIES 64 > +#define STATE_SIZE (4 * 1024) > + > +/* Media CB Kernel for gen7 devices */ > +static const u32 cb7_kernel[][4] = { > + { 0x00000001, 0x26020128, 0x00000024, 0x00000000 }, > + { 0x00000040, 0x20280c21, 0x00000028, 0x00000001 }, > + { 0x01000010, 0x20000c20, 0x0000002c, 0x00000000 }, > + { 0x00010220, 0x34001c00, 0x00001400, 0x0000002c }, > + { 0x00600001, 0x20600061, 0x00000000, 0x00000000 }, > + { 0x00000008, 0x20601c85, 0x00000e00, 0x0000000c }, > + { 0x00000005, 0x20601ca5, 0x00000060, 0x00000001 }, > + { 0x00000008, 0x20641c85, 0x00000e00, 0x0000000d }, > + { 0x00000005, 0x20641ca5, 0x00000064, 0x00000003 }, > + { 0x00000041, 0x207424a5, 0x00000064, 0x00000034 }, > + { 0x00000040, 0x206014a5, 0x00000060, 0x00000074 }, > + { 0x00000008, 0x20681c85, 0x00000e00, 0x00000008 }, > + { 0x00000005, 0x20681ca5, 0x00000068, 0x0000000f }, > + { 0x00000041, 0x20701ca5, 0x00000060, 0x00000010 }, > + { 0x00000040, 0x206814a5, 0x00000068, 0x00000070 }, > + { 0x00600001, 0x20a00061, 0x00000000, 0x00000000 }, > + { 0x00000005, 0x206c1c85, 0x00000e00, 0x00000007 }, > + { 0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004 }, > + { 0x00600001, 0x20800021, 0x008d0000, 0x00000000 }, > + { 0x00000001, 0x20800021, 0x0000006c, 0x00000000 }, > + { 0x00000001, 0x20840021, 0x00000068, 0x00000000 }, > + { 0x00000001, 0x20880061, 0x00000000, 0x00000003 }, > + { 0x00000005, 0x208c0d21, 0x00000086, 0xffffffff }, > + { 0x05600032, 0x20a01fa1, 0x008d0080, 0x02190001 }, > + { 0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001 }, > + { 0x05600032, 0x20a01fa1, 0x008d0080, 0x040a8001 }, > + { 0x02000040, 0x20281c21, 0x00000028, 0xffffffff }, > + { 0x00010220, 0x34001c00, 0x00001400, 0xfffffffc }, > + { 0x00000001, 0x26020128, 0x00000024, 0x00000000 }, > + { 0x00000001, 0x220000e4, 0x00000000, 0x00000000 }, > + { 0x00000001, 0x220801ec, 0x00000000, 0x007f007f }, > + { 0x00600001, 0x20400021, 0x008d0000, 0x00000000 }, > + { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 }, > + { 0x00200001, 0x20400121, 0x00450020, 0x00000000 }, > + { 0x00000001, 0x20480061, 0x00000000, 0x000f000f }, > + { 0x00000005, 0x204c0d21, 0x00000046, 0xffffffef }, > + { 0x00800001, 0x20600061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x20800061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x20a00061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x20c00061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x20e00061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x21000061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x21200061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x21400061, 0x00000000, 0x00000000 }, > + { 0x05600032, 0x20001fa0, 0x008d0040, 0x120a8000 }, > + { 0x00000040, 0x20402d21, 0x00000020, 0x00100010 }, > + { 0x05600032, 0x20001fa0, 0x008d0040, 0x120a8000 }, > + { 0x02000040, 0x22083d8c, 0x00000208, 0xffffffff }, > + { 0x00800001, 0xa0000109, 0x00000602, 0x00000000 }, > + { 0x00000040, 0x22001c84, 0x00000200, 0x00000020 }, > + { 0x00010220, 0x34001c00, 0x00001400, 0xfffffff8 }, > + { 0x07600032, 0x20001fa0, 0x008d0fe0, 0x82000010 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > +}; > + > +/* Media CB Kernel for gen7.5 devices */ > +static const u32 cb75_kernel[][4] = { > + { 0x00000001, 0x26020128, 0x00000024, 0x00000000 }, > + { 0x00000040, 0x20280c21, 0x00000028, 0x00000001 }, > + { 0x01000010, 0x20000c20, 0x0000002c, 0x00000000 }, > + { 0x00010220, 0x34001c00, 0x00001400, 0x00000160 }, > + { 0x00600001, 0x20600061, 0x00000000, 0x00000000 }, > + { 0x00000008, 0x20601c85, 0x00000e00, 0x0000000c }, > + { 0x00000005, 0x20601ca5, 0x00000060, 0x00000001 }, > + { 0x00000008, 0x20641c85, 0x00000e00, 0x0000000d }, > + { 0x00000005, 0x20641ca5, 0x00000064, 0x00000003 }, > + { 0x00000041, 0x207424a5, 0x00000064, 0x00000034 }, > + { 0x00000040, 0x206014a5, 0x00000060, 0x00000074 }, > + { 0x00000008, 0x20681c85, 0x00000e00, 0x00000008 }, > + { 0x00000005, 0x20681ca5, 0x00000068, 0x0000000f }, > + { 0x00000041, 0x20701ca5, 0x00000060, 0x00000010 }, > + { 0x00000040, 0x206814a5, 0x00000068, 0x00000070 }, > + { 0x00600001, 0x20a00061, 0x00000000, 0x00000000 }, > + { 0x00000005, 0x206c1c85, 0x00000e00, 0x00000007 }, > + { 0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004 }, > + { 0x00600001, 0x20800021, 0x008d0000, 0x00000000 }, > + { 0x00000001, 0x20800021, 0x0000006c, 0x00000000 }, > + { 0x00000001, 0x20840021, 0x00000068, 0x00000000 }, > + { 0x00000001, 0x20880061, 0x00000000, 0x00000003 }, > + { 0x00000005, 0x208c0d21, 0x00000086, 0xffffffff }, > + { 0x05600032, 0x20a01fa1, 0x008d0080, 0x02190001 }, > + { 0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001 }, > + { 0x05600032, 0x20a01fa1, 0x008d0080, 0x040a8001 }, > + { 0x02000040, 0x20281c21, 0x00000028, 0xffffffff }, > + { 0x00010220, 0x34001c00, 0x00001400, 0xffffffe0 }, > + { 0x00000001, 0x26020128, 0x00000024, 0x00000000 }, > + { 0x00000001, 0x220000e4, 0x00000000, 0x00000000 }, > + { 0x00000001, 0x220801ec, 0x00000000, 0x007f007f }, > + { 0x00600001, 0x20400021, 0x008d0000, 0x00000000 }, > + { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 }, > + { 0x00200001, 0x20400121, 0x00450020, 0x00000000 }, > + { 0x00000001, 0x20480061, 0x00000000, 0x000f000f }, > + { 0x00000005, 0x204c0d21, 0x00000046, 0xffffffef }, > + { 0x00800001, 0x20600061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x20800061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x20a00061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x20c00061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x20e00061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x21000061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x21200061, 0x00000000, 0x00000000 }, > + { 0x00800001, 0x21400061, 0x00000000, 0x00000000 }, > + { 0x05600032, 0x20001fa0, 0x008d0040, 0x120a8000 }, > + { 0x00000040, 0x20402d21, 0x00000020, 0x00100010 }, > + { 0x05600032, 0x20001fa0, 0x008d0040, 0x120a8000 }, > + { 0x02000040, 0x22083d8c, 0x00000208, 0xffffffff }, > + { 0x00800001, 0xa0000109, 0x00000602, 0x00000000 }, > + { 0x00000040, 0x22001c84, 0x00000200, 0x00000020 }, > + { 0x00010220, 0x34001c00, 0x00001400, 0xffffffc0 }, > + { 0x07600032, 0x20001fa0, 0x008d0fe0, 0x82000010 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, > +}; > + > +struct cb_kernel { > + const void *data; > + u32 size; > +}; > + > +#define CB_KERNEL(name) { .data = (name), .size = sizeof(name) } > + > +static const struct cb_kernel cb_kernel_gen7 = CB_KERNEL(cb7_kernel); > +static const struct cb_kernel cb_kernel_hsw = CB_KERNEL(cb75_kernel); > + > +struct batch_chunk { > + struct i915_vma *vma; > + u32 offset; > + u32 *start; > + u32 *end; > + u32 max_items; > +}; > + > +struct batch_vals { > + struct drm_i915_private *i915; Never set or used. > + u32 max_primitives; > + u32 max_urb_entries; > + u32 cmd_size; > + u32 state_size; > + u32 state_start; > + u32 batch_size; > + u32 surface_height; > + u32 surface_width; > + u32 scratch_size; > + u32 max_size; > +}; > + > +static void > +batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv) > +{ > + if (IS_HASWELL(i915)) { > + bv->max_primitives = 280; > + bv->max_urb_entries = MAX_URB_ENTRIES; > + bv->surface_height = 16 * 16; > + bv->surface_width = 32 * 2 * 16; > + } else { > + bv->max_primitives = 128; > + bv->max_urb_entries = MAX_URB_ENTRIES / 2; > + bv->surface_height = 16 * 8; > + bv->surface_width = 32 * 16; > + } > + bv->cmd_size = bv->max_primitives * 4096; > + bv->state_size = STATE_SIZE; > + bv->state_start = bv->cmd_size; > + bv->batch_size = bv->cmd_size + bv->state_size; > + bv->scratch_size = bv->surface_height * bv->surface_width; > + bv->max_size = bv->batch_size + bv->scratch_size; > +} > + > +static void batch_init(struct batch_chunk *bc, > + struct i915_vma *vma, > + u32 *start, u32 offset, u32 max_bytes) > +{ > + bc->vma = vma; > + bc->offset = offset; > + bc->start = start + bc->offset / sizeof(*bc->start); > + bc->end = bc->start; > + bc->max_items = max_bytes / sizeof(*bc->start); > +} > + > +static u32 batch_offset(const struct batch_chunk *bc, u32 *cs) > +{ > + return (cs - bc->start) * sizeof(*bc->start) + bc->offset; > +} > + > +static u32 batch_addr(const struct batch_chunk *bc) > +{ > + return bc->vma->node.start; > +} > + > +static void batch_add(struct batch_chunk *bc, const u32 d) > +{ > + GEM_DEBUG_WARN_ON((bc->end - bc->start) >= bc->max_items); > + *bc->end++ = d; > +} > + > +static u32 *batch_alloc_items(struct batch_chunk *bc, u32 align, u32 items) > +{ > + u32 *map; > + > + if (align) { > + u32 *end = ptr_align(bc->end, align); > + > + memset32(bc->end, 0, (end - bc->end) / sizeof(u32)); end and bc->end are both u32, so we are already taking sizeof(u32) into account. Just memset32(bc->end, 0, end - bc->end); (Good job we cleared the whole buffer just in case.) > + bc->end = end; > + } > + > + map = bc->end; > + bc->end += items; > + > + return map; > +} > + > +static u32 *batch_alloc_bytes(struct batch_chunk *bc, u32 align, u32 bytes) > +{ > + GEM_BUG_ON(!IS_ALIGNED(bytes, sizeof(*bc->start))); > + return batch_alloc_items(bc, align, bytes / sizeof(*bc->start)); > +} > + > +static u32 > +gen7_fill_surface_state(struct batch_chunk *state, > + const u32 dst_offset, > + const struct batch_vals *bv) > +{ > + u32 surface_h = bv->surface_height; > + u32 surface_w = bv->surface_width; > + u32 *cs = batch_alloc_items(state, 32, 8); > + u32 offset = batch_offset(state, cs); > + > +#define SURFACE_2D 1 > +#define SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 > +#define RENDER_CACHE_READ_WRITE 1 > + > + *cs++ = SURFACE_2D << 29 | > + (SURFACEFORMAT_B8G8R8A8_UNORM << 18) | > + (RENDER_CACHE_READ_WRITE << 8); > + > + *cs++ = batch_addr(state) + dst_offset; > + > + *cs++ = ((surface_h / 4 - 1) << 16) | (surface_w / 4 - 1); > + *cs++ = surface_w; > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = 0; > +#define SHADER_CHANNELS(r, g, b, a) \ > + (((r) << 25) | ((g) << 22) | ((b) << 19) | ((a) << 16)) > + *cs++ = SHADER_CHANNELS(4, 5, 6, 7); A useful debug trick would be to finish each packet with batch_advance(state, cs); #define batch_advance(X, CS) GEM_BUG_ON((X)->end != (CS)) > + > + return offset; > +} > + > +static u32 > +gen7_fill_binding_table(struct batch_chunk *state, > + const struct batch_vals *bv) > +{ > + u32 *cs = batch_alloc_items(state, 32, 8); > + u32 offset = batch_offset(state, cs); > + u32 surface_start; > + > + surface_start = gen7_fill_surface_state(state, bv->batch_size, bv); > + *cs++ = surface_start - state->offset; > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = 0; > + > + return offset; > +} > + > +static u32 > +gen7_fill_kernel_data(struct batch_chunk *state, > + const u32 *data, > + const u32 size) > +{ > + return batch_offset(state, > + memcpy(batch_alloc_bytes(state, 64, size), > + data, size)); > +} > + > +static u32 > +gen7_fill_interface_descriptor(struct batch_chunk *state, > + const struct batch_vals *bv, > + const struct cb_kernel *kernel, > + unsigned int count) > +{ > + u32 *cs = batch_alloc_items(state, 32, 8 * count); > + u32 offset = batch_offset(state, cs); > + > + *cs++ = gen7_fill_kernel_data(state, kernel->data, kernel->size); > + *cs++ = (1 << 7) | (1 << 13); > + *cs++ = 0; > + *cs++ = (gen7_fill_binding_table(state, bv) - state->offset) | 1; > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = 0; > + /* 1 - 63dummy idds */ > + memset32(cs, 0x00, (count - 1) * 8); > + > + return offset; > +} > + > +static void > +gen7_emit_state_base_address(struct batch_chunk *batch, > + u32 surface_state_base) > +{ > + u32 *cs = batch_alloc_items(batch, 0, 12); > + > + *cs++ = STATE_BASE_ADDRESS | (12 - 2); > + /* general */ > + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; > + /* surface */ > + *cs++ = batch_addr(batch) | surface_state_base | BASE_ADDRESS_MODIFY; > + /* dynamic */ > + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; > + /* indirect */ > + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; > + /* instruction */ > + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; > + > + /* general/dynamic/indirect/instruction access Bound */ > + *cs++ = 0; > + *cs++ = BASE_ADDRESS_MODIFY; > + *cs++ = 0; > + *cs++ = BASE_ADDRESS_MODIFY; > + *cs++ = 0; > + *cs++ = 0; > +} > + > +static void > +gen7_emit_vfe_state(struct batch_chunk *batch, > + const struct batch_vals *bv, > + u32 urb_size, u32 curbe_size, > + u32 mode) > +{ > + u32 urb_entries = bv->max_urb_entries; > + u32 threads = bv->max_primitives - 1; > + u32 *cs = batch_alloc_items(batch, 32, 8); > + > + *cs++ = MEDIA_VFE_STATE | (8 - 2); > + > + /* scratch buffer */ > + *cs++ = 0; > + > + /* number of threads & urb entries */ > + *cs++ = threads << 16 | > + urb_entries << 8 | > + mode << 2; /* GPGPU vs media mode */ *cs++ = threads << 16 | urb_entries << 8 | mode << 2; Only the comment overflows, rewrite the comment. > + > + *cs++ = 0; > + > + /* urb entry size & curbe size */ > + *cs++ = urb_size << 16 | /* in 256 bits unit */ > + curbe_size; /* in 256 bits unit */ You could just say in 256b units once, and pull this onto one line. > + > + /* scoreboard */ > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = 0; > +} > + > +static void > +gen7_emit_interface_descriptor_load(struct batch_chunk *batch, > + const u32 interface_descriptor, > + unsigned int count) > +{ > + u32 *cs = batch_alloc_items(batch, 8, 4); > + > + *cs++ = MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2); > + *cs++ = 0; > + *cs++ = count * 8 * sizeof(*cs); > + > + /* interface descriptor address, is relative to the dynamics base > + * address > + */ /* * This is the style we use for block * comments. */ > + *cs++ = interface_descriptor; > +} > + > +static void > +gen7_emit_media_object(struct batch_chunk *batch, > + unsigned int media_object_index) > +{ > + unsigned int x_offset = (media_object_index % 16) * 64; > + unsigned int y_offset = (media_object_index / 16) * 16; > + unsigned int inline_data_size; > + unsigned int media_batch_size; > + unsigned int i; > + u32 *cs; > + > + inline_data_size = 112 * 8; > + media_batch_size = inline_data_size + 6; > + > + cs = batch_alloc_items(batch, 8, media_batch_size); > + > + *cs++ = MEDIA_OBJECT | (media_batch_size - 2); > + > + /* interface descriptor offset */ > + *cs++ = 0; > + > + /* without indirect data */ > + *cs++ = 0; > + *cs++ = 0; > + > + /* scoreboard */ > + *cs++ = 0; > + *cs++ = 0; > + > + /* inline */ > + *cs++ = (y_offset << 16) | (x_offset); > + *cs++ = 0; > + *cs++ = 0x1E00; > + for (i = 3; i < inline_data_size; i++) > + *cs++ = 0; > +} > + > +static void gen7_emit_pipeline_flush(struct batch_chunk *batch) > +{ > + u32 *cs = batch_alloc_items(batch, 0, 5); > + > + *cs++ = GFX_OP_PIPE_CONTROL(5); > + *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE | > + PIPE_CONTROL_GLOBAL_GTT_IVB; > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = 0; > +} > + > +static void emit_batch(struct i915_vma * const vma, > + u32 *start, > + const struct batch_vals *bv) > +{ > + struct drm_i915_private *i915 = vma->vm->i915; > + unsigned int desc_count = 64; > + const u32 urb_size = 112; > + struct batch_chunk cmds, state; > + u32 interface_descriptor; > + unsigned int i; > + > + batch_init(&cmds, vma, start, 0, bv->cmd_size); > + batch_init(&state, vma, start, bv->state_start, bv->state_size); > + > + interface_descriptor = > + gen7_fill_interface_descriptor(&state, bv, > + IS_HASWELL(i915) ? > + &cb_kernel_hsw : &cb_kernel_gen7, > + desc_count); > + gen7_emit_pipeline_flush(&cmds); > + batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); > + batch_add(&cmds, MI_NOOP); > + gen7_emit_state_base_address(&cmds, interface_descriptor); > + gen7_emit_pipeline_flush(&cmds); > + > + gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); > + > + gen7_emit_interface_descriptor_load(&cmds, > + interface_descriptor, > + desc_count); > + > + for (i = 0; i < bv->max_primitives; i++) > + gen7_emit_media_object(&cmds, i); > + > + batch_add(&cmds, MI_BATCH_BUFFER_END); > +} > + > +int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, > + struct i915_vma * const vma) > +{ > + struct batch_vals bv; > + u32 *batch; > + > + batch_get_defaults(engine->i915, &bv); > + if (!vma) > + return bv.max_size; GEM_BUG_ON(vma->obj->base.size < bv.max_size); Yeah, we might revisit why this doesn't return the populated vma directly, since we have the vm available in engine->kernel_context->vm > + > + batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); > + if (IS_ERR(batch)) > + return PTR_ERR(batch); > + > + emit_batch(vma, memset(batch, 0, bv.max_size), &bv); > + > + i915_gem_object_flush_map(vma->obj); > + i915_gem_object_unpin_map(vma->obj); > + > + return 0; > +}
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index b8c5f8934dbd..e5386871f015 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -75,6 +75,7 @@ gt-y += \ gt/debugfs_gt.o \ gt/debugfs_gt_pm.o \ gt/gen6_ppgtt.o \ + gt/gen7_renderclear.o \ gt/gen8_ppgtt.o \ gt/intel_breadcrumbs.o \ gt/intel_context.o \ diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c new file mode 100644 index 000000000000..3e9fc2c05fbb --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -0,0 +1,514 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "gen7_renderclear.h" +#include "i915_drv.h" +#include "i915_utils.h" +#include "intel_gpu_commands.h" + +#define MAX_URB_ENTRIES 64 +#define STATE_SIZE (4 * 1024) + +/* Media CB Kernel for gen7 devices */ +static const u32 cb7_kernel[][4] = { + { 0x00000001, 0x26020128, 0x00000024, 0x00000000 }, + { 0x00000040, 0x20280c21, 0x00000028, 0x00000001 }, + { 0x01000010, 0x20000c20, 0x0000002c, 0x00000000 }, + { 0x00010220, 0x34001c00, 0x00001400, 0x0000002c }, + { 0x00600001, 0x20600061, 0x00000000, 0x00000000 }, + { 0x00000008, 0x20601c85, 0x00000e00, 0x0000000c }, + { 0x00000005, 0x20601ca5, 0x00000060, 0x00000001 }, + { 0x00000008, 0x20641c85, 0x00000e00, 0x0000000d }, + { 0x00000005, 0x20641ca5, 0x00000064, 0x00000003 }, + { 0x00000041, 0x207424a5, 0x00000064, 0x00000034 }, + { 0x00000040, 0x206014a5, 0x00000060, 0x00000074 }, + { 0x00000008, 0x20681c85, 0x00000e00, 0x00000008 }, + { 0x00000005, 0x20681ca5, 0x00000068, 0x0000000f }, + { 0x00000041, 0x20701ca5, 0x00000060, 0x00000010 }, + { 0x00000040, 0x206814a5, 0x00000068, 0x00000070 }, + { 0x00600001, 0x20a00061, 0x00000000, 0x00000000 }, + { 0x00000005, 0x206c1c85, 0x00000e00, 0x00000007 }, + { 0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004 }, + { 0x00600001, 0x20800021, 0x008d0000, 0x00000000 }, + { 0x00000001, 0x20800021, 0x0000006c, 0x00000000 }, + { 0x00000001, 0x20840021, 0x00000068, 0x00000000 }, + { 0x00000001, 0x20880061, 0x00000000, 0x00000003 }, + { 0x00000005, 0x208c0d21, 0x00000086, 0xffffffff }, + { 0x05600032, 0x20a01fa1, 0x008d0080, 0x02190001 }, + { 0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001 }, + { 0x05600032, 0x20a01fa1, 0x008d0080, 0x040a8001 }, + { 0x02000040, 0x20281c21, 0x00000028, 0xffffffff }, + { 0x00010220, 0x34001c00, 0x00001400, 0xfffffffc }, + { 0x00000001, 0x26020128, 0x00000024, 0x00000000 }, + { 0x00000001, 0x220000e4, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220801ec, 0x00000000, 0x007f007f }, + { 0x00600001, 0x20400021, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 }, + { 0x00200001, 0x20400121, 0x00450020, 0x00000000 }, + { 0x00000001, 0x20480061, 0x00000000, 0x000f000f }, + { 0x00000005, 0x204c0d21, 0x00000046, 0xffffffef }, + { 0x00800001, 0x20600061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x20800061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x20a00061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x20c00061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x20e00061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x21000061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x21200061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x21400061, 0x00000000, 0x00000000 }, + { 0x05600032, 0x20001fa0, 0x008d0040, 0x120a8000 }, + { 0x00000040, 0x20402d21, 0x00000020, 0x00100010 }, + { 0x05600032, 0x20001fa0, 0x008d0040, 0x120a8000 }, + { 0x02000040, 0x22083d8c, 0x00000208, 0xffffffff }, + { 0x00800001, 0xa0000109, 0x00000602, 0x00000000 }, + { 0x00000040, 0x22001c84, 0x00000200, 0x00000020 }, + { 0x00010220, 0x34001c00, 0x00001400, 0xfffffff8 }, + { 0x07600032, 0x20001fa0, 0x008d0fe0, 0x82000010 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, +}; + +/* Media CB Kernel for gen7.5 devices */ +static const u32 cb75_kernel[][4] = { + { 0x00000001, 0x26020128, 0x00000024, 0x00000000 }, + { 0x00000040, 0x20280c21, 0x00000028, 0x00000001 }, + { 0x01000010, 0x20000c20, 0x0000002c, 0x00000000 }, + { 0x00010220, 0x34001c00, 0x00001400, 0x00000160 }, + { 0x00600001, 0x20600061, 0x00000000, 0x00000000 }, + { 0x00000008, 0x20601c85, 0x00000e00, 0x0000000c }, + { 0x00000005, 0x20601ca5, 0x00000060, 0x00000001 }, + { 0x00000008, 0x20641c85, 0x00000e00, 0x0000000d }, + { 0x00000005, 0x20641ca5, 0x00000064, 0x00000003 }, + { 0x00000041, 0x207424a5, 0x00000064, 0x00000034 }, + { 0x00000040, 0x206014a5, 0x00000060, 0x00000074 }, + { 0x00000008, 0x20681c85, 0x00000e00, 0x00000008 }, + { 0x00000005, 0x20681ca5, 0x00000068, 0x0000000f }, + { 0x00000041, 0x20701ca5, 0x00000060, 0x00000010 }, + { 0x00000040, 0x206814a5, 0x00000068, 0x00000070 }, + { 0x00600001, 0x20a00061, 0x00000000, 0x00000000 }, + { 0x00000005, 0x206c1c85, 0x00000e00, 0x00000007 }, + { 0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004 }, + { 0x00600001, 0x20800021, 0x008d0000, 0x00000000 }, + { 0x00000001, 0x20800021, 0x0000006c, 0x00000000 }, + { 0x00000001, 0x20840021, 0x00000068, 0x00000000 }, + { 0x00000001, 0x20880061, 0x00000000, 0x00000003 }, + { 0x00000005, 0x208c0d21, 0x00000086, 0xffffffff }, + { 0x05600032, 0x20a01fa1, 0x008d0080, 0x02190001 }, + { 0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001 }, + { 0x05600032, 0x20a01fa1, 0x008d0080, 0x040a8001 }, + { 0x02000040, 0x20281c21, 0x00000028, 0xffffffff }, + { 0x00010220, 0x34001c00, 0x00001400, 0xffffffe0 }, + { 0x00000001, 0x26020128, 0x00000024, 0x00000000 }, + { 0x00000001, 0x220000e4, 0x00000000, 0x00000000 }, + { 0x00000001, 0x220801ec, 0x00000000, 0x007f007f }, + { 0x00600001, 0x20400021, 0x008d0000, 0x00000000 }, + { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 }, + { 0x00200001, 0x20400121, 0x00450020, 0x00000000 }, + { 0x00000001, 0x20480061, 0x00000000, 0x000f000f }, + { 0x00000005, 0x204c0d21, 0x00000046, 0xffffffef }, + { 0x00800001, 0x20600061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x20800061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x20a00061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x20c00061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x20e00061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x21000061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x21200061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x21400061, 0x00000000, 0x00000000 }, + { 0x05600032, 0x20001fa0, 0x008d0040, 0x120a8000 }, + { 0x00000040, 0x20402d21, 0x00000020, 0x00100010 }, + { 0x05600032, 0x20001fa0, 0x008d0040, 0x120a8000 }, + { 0x02000040, 0x22083d8c, 0x00000208, 0xffffffff }, + { 0x00800001, 0xa0000109, 0x00000602, 0x00000000 }, + { 0x00000040, 0x22001c84, 0x00000200, 0x00000020 }, + { 0x00010220, 0x34001c00, 0x00001400, 0xffffffc0 }, + { 0x07600032, 0x20001fa0, 0x008d0fe0, 0x82000010 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, +}; + +struct cb_kernel { + const void *data; + u32 size; +}; + +#define CB_KERNEL(name) { .data = (name), .size = sizeof(name) } + +static const struct cb_kernel cb_kernel_gen7 = CB_KERNEL(cb7_kernel); +static const struct cb_kernel cb_kernel_hsw = CB_KERNEL(cb75_kernel); + +struct batch_chunk { + struct i915_vma *vma; + u32 offset; + u32 *start; + u32 *end; + u32 max_items; +}; + +struct batch_vals { + struct drm_i915_private *i915; + u32 max_primitives; + u32 max_urb_entries; + u32 cmd_size; + u32 state_size; + u32 state_start; + u32 batch_size; + u32 surface_height; + u32 surface_width; + u32 scratch_size; + u32 max_size; +}; + +static void +batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv) +{ + if (IS_HASWELL(i915)) { + bv->max_primitives = 280; + bv->max_urb_entries = MAX_URB_ENTRIES; + bv->surface_height = 16 * 16; + bv->surface_width = 32 * 2 * 16; + } else { + bv->max_primitives = 128; + bv->max_urb_entries = MAX_URB_ENTRIES / 2; + bv->surface_height = 16 * 8; + bv->surface_width = 32 * 16; + } + bv->cmd_size = bv->max_primitives * 4096; + bv->state_size = STATE_SIZE; + bv->state_start = bv->cmd_size; + bv->batch_size = bv->cmd_size + bv->state_size; + bv->scratch_size = bv->surface_height * bv->surface_width; + bv->max_size = bv->batch_size + bv->scratch_size; +} + +static void batch_init(struct batch_chunk *bc, + struct i915_vma *vma, + u32 *start, u32 offset, u32 max_bytes) +{ + bc->vma = vma; + bc->offset = offset; + bc->start = start + bc->offset / sizeof(*bc->start); + bc->end = bc->start; + bc->max_items = max_bytes / sizeof(*bc->start); +} + +static u32 batch_offset(const struct batch_chunk *bc, u32 *cs) +{ + return (cs - bc->start) * sizeof(*bc->start) + bc->offset; +} + +static u32 batch_addr(const struct batch_chunk *bc) +{ + return bc->vma->node.start; +} + +static void batch_add(struct batch_chunk *bc, const u32 d) +{ + GEM_DEBUG_WARN_ON((bc->end - bc->start) >= bc->max_items); + *bc->end++ = d; +} + +static u32 *batch_alloc_items(struct batch_chunk *bc, u32 align, u32 items) +{ + u32 *map; + + if (align) { + u32 *end = ptr_align(bc->end, align); + + memset32(bc->end, 0, (end - bc->end) / sizeof(u32)); + bc->end = end; + } + + map = bc->end; + bc->end += items; + + return map; +} + +static u32 *batch_alloc_bytes(struct batch_chunk *bc, u32 align, u32 bytes) +{ + GEM_BUG_ON(!IS_ALIGNED(bytes, sizeof(*bc->start))); + return batch_alloc_items(bc, align, bytes / sizeof(*bc->start)); +} + +static u32 +gen7_fill_surface_state(struct batch_chunk *state, + const u32 dst_offset, + const struct batch_vals *bv) +{ + u32 surface_h = bv->surface_height; + u32 surface_w = bv->surface_width; + u32 *cs = batch_alloc_items(state, 32, 8); + u32 offset = batch_offset(state, cs); + +#define SURFACE_2D 1 +#define SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define RENDER_CACHE_READ_WRITE 1 + + *cs++ = SURFACE_2D << 29 | + (SURFACEFORMAT_B8G8R8A8_UNORM << 18) | + (RENDER_CACHE_READ_WRITE << 8); + + *cs++ = batch_addr(state) + dst_offset; + + *cs++ = ((surface_h / 4 - 1) << 16) | (surface_w / 4 - 1); + *cs++ = surface_w; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; +#define SHADER_CHANNELS(r, g, b, a) \ + (((r) << 25) | ((g) << 22) | ((b) << 19) | ((a) << 16)) + *cs++ = SHADER_CHANNELS(4, 5, 6, 7); + + return offset; +} + +static u32 +gen7_fill_binding_table(struct batch_chunk *state, + const struct batch_vals *bv) +{ + u32 *cs = batch_alloc_items(state, 32, 8); + u32 offset = batch_offset(state, cs); + u32 surface_start; + + surface_start = gen7_fill_surface_state(state, bv->batch_size, bv); + *cs++ = surface_start - state->offset; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + + return offset; +} + +static u32 +gen7_fill_kernel_data(struct batch_chunk *state, + const u32 *data, + const u32 size) +{ + return batch_offset(state, + memcpy(batch_alloc_bytes(state, 64, size), + data, size)); +} + +static u32 +gen7_fill_interface_descriptor(struct batch_chunk *state, + const struct batch_vals *bv, + const struct cb_kernel *kernel, + unsigned int count) +{ + u32 *cs = batch_alloc_items(state, 32, 8 * count); + u32 offset = batch_offset(state, cs); + + *cs++ = gen7_fill_kernel_data(state, kernel->data, kernel->size); + *cs++ = (1 << 7) | (1 << 13); + *cs++ = 0; + *cs++ = (gen7_fill_binding_table(state, bv) - state->offset) | 1; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + /* 1 - 63dummy idds */ + memset32(cs, 0x00, (count - 1) * 8); + + return offset; +} + +static void +gen7_emit_state_base_address(struct batch_chunk *batch, + u32 surface_state_base) +{ + u32 *cs = batch_alloc_items(batch, 0, 12); + + *cs++ = STATE_BASE_ADDRESS | (12 - 2); + /* general */ + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; + /* surface */ + *cs++ = batch_addr(batch) | surface_state_base | BASE_ADDRESS_MODIFY; + /* dynamic */ + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; + /* indirect */ + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; + /* instruction */ + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; + + /* general/dynamic/indirect/instruction access Bound */ + *cs++ = 0; + *cs++ = BASE_ADDRESS_MODIFY; + *cs++ = 0; + *cs++ = BASE_ADDRESS_MODIFY; + *cs++ = 0; + *cs++ = 0; +} + +static void +gen7_emit_vfe_state(struct batch_chunk *batch, + const struct batch_vals *bv, + u32 urb_size, u32 curbe_size, + u32 mode) +{ + u32 urb_entries = bv->max_urb_entries; + u32 threads = bv->max_primitives - 1; + u32 *cs = batch_alloc_items(batch, 32, 8); + + *cs++ = MEDIA_VFE_STATE | (8 - 2); + + /* scratch buffer */ + *cs++ = 0; + + /* number of threads & urb entries */ + *cs++ = threads << 16 | + urb_entries << 8 | + mode << 2; /* GPGPU vs media mode */ + + *cs++ = 0; + + /* urb entry size & curbe size */ + *cs++ = urb_size << 16 | /* in 256 bits unit */ + curbe_size; /* in 256 bits unit */ + + /* scoreboard */ + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; +} + +static void +gen7_emit_interface_descriptor_load(struct batch_chunk *batch, + const u32 interface_descriptor, + unsigned int count) +{ + u32 *cs = batch_alloc_items(batch, 8, 4); + + *cs++ = MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2); + *cs++ = 0; + *cs++ = count * 8 * sizeof(*cs); + + /* interface descriptor address, is relative to the dynamics base + * address + */ + *cs++ = interface_descriptor; +} + +static void +gen7_emit_media_object(struct batch_chunk *batch, + unsigned int media_object_index) +{ + unsigned int x_offset = (media_object_index % 16) * 64; + unsigned int y_offset = (media_object_index / 16) * 16; + unsigned int inline_data_size; + unsigned int media_batch_size; + unsigned int i; + u32 *cs; + + inline_data_size = 112 * 8; + media_batch_size = inline_data_size + 6; + + cs = batch_alloc_items(batch, 8, media_batch_size); + + *cs++ = MEDIA_OBJECT | (media_batch_size - 2); + + /* interface descriptor offset */ + *cs++ = 0; + + /* without indirect data */ + *cs++ = 0; + *cs++ = 0; + + /* scoreboard */ + *cs++ = 0; + *cs++ = 0; + + /* inline */ + *cs++ = (y_offset << 16) | (x_offset); + *cs++ = 0; + *cs++ = 0x1E00; + for (i = 3; i < inline_data_size; i++) + *cs++ = 0; +} + +static void gen7_emit_pipeline_flush(struct batch_chunk *batch) +{ + u32 *cs = batch_alloc_items(batch, 0, 5); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE | + PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; +} + +static void emit_batch(struct i915_vma * const vma, + u32 *start, + const struct batch_vals *bv) +{ + struct drm_i915_private *i915 = vma->vm->i915; + unsigned int desc_count = 64; + const u32 urb_size = 112; + struct batch_chunk cmds, state; + u32 interface_descriptor; + unsigned int i; + + batch_init(&cmds, vma, start, 0, bv->cmd_size); + batch_init(&state, vma, start, bv->state_start, bv->state_size); + + interface_descriptor = + gen7_fill_interface_descriptor(&state, bv, + IS_HASWELL(i915) ? + &cb_kernel_hsw : &cb_kernel_gen7, + desc_count); + gen7_emit_pipeline_flush(&cmds); + batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); + batch_add(&cmds, MI_NOOP); + gen7_emit_state_base_address(&cmds, interface_descriptor); + gen7_emit_pipeline_flush(&cmds); + + gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); + + gen7_emit_interface_descriptor_load(&cmds, + interface_descriptor, + desc_count); + + for (i = 0; i < bv->max_primitives; i++) + gen7_emit_media_object(&cmds, i); + + batch_add(&cmds, MI_BATCH_BUFFER_END); +} + +int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, + struct i915_vma * const vma) +{ + struct batch_vals bv; + u32 *batch; + + batch_get_defaults(engine->i915, &bv); + if (!vma) + return bv.max_size; + + batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + emit_batch(vma, memset(batch, 0, bv.max_size), &bv); + + i915_gem_object_flush_map(vma->obj); + i915_gem_object_unpin_map(vma->obj); + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.h b/drivers/gpu/drm/i915/gt/gen7_renderclear.h new file mode 100644 index 000000000000..bb100748e2c6 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __GEN7_RENDERCLEAR_H__ +#define __GEN7_RENDERCLEAR_H__ + +struct intel_engine_cs; +struct i915_vma; + +int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, + struct i915_vma * const vma); + +#endif /* __GEN7_RENDERCLEAR_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 51b8718513bc..f04214a54f75 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -292,10 +292,21 @@ #define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) #define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0) -#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16)) -#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16)) -#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16)) +#define STATE_BASE_ADDRESS \ + ((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16)) +#define BASE_ADDRESS_MODIFY REG_BIT(0) +#define PIPELINE_SELECT \ + ((0x3 << 29) | (0x1 << 27) | (0x1 << 24) | (0x4 << 16)) +#define PIPELINE_SELECT_MEDIA REG_BIT(0) +#define GFX_OP_3DSTATE_VF_STATISTICS \ + ((0x3 << 29) | (0x1 << 27) | (0x0 << 24) | (0xB << 16)) +#define MEDIA_VFE_STATE \ + ((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x0 << 16)) #define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18) +#define MEDIA_INTERFACE_DESCRIPTOR_LOAD \ + ((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x2 << 16)) +#define MEDIA_OBJECT \ + ((0x3 << 29) | (0x2 << 27) | (0x1 << 24) | (0x0 << 16)) #define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16)) #define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16)) #define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \ diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 204c450b7c42..854979b79a1e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -34,6 +34,7 @@ #include "gem/i915_gem_context.h" #include "gen6_ppgtt.h" +#include "gen7_renderclear.h" #include "i915_drv.h" #include "i915_trace.h" #include "intel_context.h" @@ -1984,7 +1985,7 @@ static void setup_vecs(struct intel_engine_cs *engine) static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine, struct i915_vma * const vma) { - return 0; + return gen7_setup_clear_gpr_bb(engine, vma); } static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index b0ade76bec90..7ac5b3565845 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -172,6 +172,11 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size) (typeof(ptr))(__v + 1); \ }) +#define ptr_align(ptr, align) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))round_up(__v, (align)); \ +}) + #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT) #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT) #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)