@@ -856,6 +856,7 @@ struct i915_ctx_hang_stats {
#define DEFAULT_CONTEXT_HANDLE 0
#define CONTEXT_NO_ZEROMAP (1<<0)
+#define CONTEXT_USE_TRTT (1 << 1)
/**
* struct intel_context - as the name implies, represents a context.
* @ref: reference count.
@@ -870,6 +871,8 @@ struct i915_ctx_hang_stats {
* @ppgtt: virtual memory space used by this context.
* @legacy_hw_ctx: render context backing object and whether it is correctly
* initialized (legacy ring submission mechanism only).
+ * @trtt_info: Programming parameters for tr-tt (redirection tables for
+ * userspace, for sparse resource management)
* @link: link in the global list of contexts.
*
* Contexts are memory images used by the hardware to store copies of their
@@ -880,7 +883,7 @@ struct intel_context {
int user_handle;
uint8_t remap_slice;
struct drm_i915_private *i915;
- int flags;
+ unsigned int flags;
struct drm_i915_file_private *file_priv;
struct i915_ctx_hang_stats hang_stats;
struct i915_hw_ppgtt *ppgtt;
@@ -901,6 +904,15 @@ struct intel_context {
uint32_t *lrc_reg_state;
} engine[I915_NUM_RINGS];
+ /* TRTT info */
+ struct intel_context_trtt {
+ u32 invd_tile_val;
+ u32 null_tile_val;
+ u64 l3_table_address;
+ u64 segment_base_addr;
+ struct i915_vma *vma;
+ } trtt_info;
+
struct list_head link;
};
@@ -2703,6 +2715,8 @@ struct drm_i915_cmd_table {
!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) && \
!IS_BROXTON(dev))
+#define HAS_TRTT(dev) (IS_GEN9(dev))
+
#define INTEL_PCH_DEVICE_ID_MASK 0xff00
#define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00
#define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00
@@ -133,6 +133,14 @@ static int get_context_size(struct drm_device *dev)
return ret;
}
+static void intel_context_free_trtt(struct intel_context *ctx)
+{
+ if (!ctx->trtt_info.vma)
+ return;
+
+ intel_trtt_context_destroy_vma(ctx->trtt_info.vma);
+}
+
static void i915_gem_context_clean(struct intel_context *ctx)
{
struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
@@ -164,6 +172,8 @@ void i915_gem_context_free(struct kref *ctx_ref)
*/
i915_gem_context_clean(ctx);
+ intel_context_free_trtt(ctx);
+
i915_ppgtt_put(ctx->ppgtt);
if (ctx->legacy_hw_ctx.rcs_state)
@@ -507,6 +517,127 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
return ctx;
}
+static int
+intel_context_get_trtt(struct intel_context *ctx,
+ struct drm_i915_gem_context_param *args)
+{
+ struct drm_i915_gem_context_trtt_param trtt_params;
+ struct drm_device *dev = ctx->i915->dev;
+
+ if (!HAS_TRTT(dev) || !USES_FULL_48BIT_PPGTT(dev)) {
+ return -ENODEV;
+ } else if (args->size < sizeof(trtt_params)) {
+ args->size = sizeof(trtt_params);
+ } else {
+ trtt_params.segment_base_addr =
+ ctx->trtt_info.segment_base_addr;
+ trtt_params.l3_table_address =
+ ctx->trtt_info.l3_table_address;
+ trtt_params.null_tile_val =
+ ctx->trtt_info.null_tile_val;
+ trtt_params.invd_tile_val =
+ ctx->trtt_info.invd_tile_val;
+
+ i915_gem_context_reference(ctx);
+ mutex_unlock(&dev->struct_mutex);
+
+ if (__copy_to_user(to_user_ptr(args->value),
+ &trtt_params,
+ sizeof(trtt_params))) {
+ mutex_lock(&dev->struct_mutex);
+ i915_gem_context_unreference(ctx);
+ return -EFAULT;
+ }
+
+ args->size = sizeof(trtt_params);
+ mutex_lock(&dev->struct_mutex);
+ i915_gem_context_unreference(ctx);
+ }
+
+ return 0;
+}
+
+static int
+intel_context_set_trtt(struct intel_context *ctx,
+ struct drm_i915_gem_context_param *args)
+{
+ struct drm_i915_gem_context_trtt_param trtt_params;
+ struct drm_device *dev = ctx->i915->dev;
+ int ret;
+
+ if (!HAS_TRTT(dev) || !USES_FULL_48BIT_PPGTT(dev))
+ return -ENODEV;
+ else if (ctx->flags & CONTEXT_USE_TRTT)
+ return -EEXIST;
+ else if (args->size < sizeof(trtt_params))
+ return -EINVAL;
+
+ i915_gem_context_reference(ctx);
+ mutex_unlock(&dev->struct_mutex);
+
+ if (copy_from_user(&trtt_params,
+ to_user_ptr(args->value),
+ sizeof(trtt_params))) {
+ mutex_lock(&dev->struct_mutex);
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+
+ /* Check if the setup happened from another path */
+ if (ctx->flags & CONTEXT_USE_TRTT) {
+ ret = -EEXIST;
+ goto exit;
+ }
+
+ /* basic sanity checks for the segment location & l3 table pointer */
+ if (trtt_params.segment_base_addr & (GEN9_TRTT_SEGMENT_SIZE - 1)) {
+ i915_dbg(dev, "segment base address not correctly aligned\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (((trtt_params.l3_table_address + PAGE_SIZE) >=
+ trtt_params.segment_base_addr) &&
+ (trtt_params.l3_table_address <
+ (trtt_params.segment_base_addr + GEN9_TRTT_SEGMENT_SIZE))) {
+ i915_dbg(dev, "l3 table address conflicts with trtt segment\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (trtt_params.l3_table_address & ~GEN9_TRTT_L3_GFXADDR_MASK) {
+ i915_dbg(dev, "invalid l3 table address\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ ctx->trtt_info.vma = intel_trtt_context_allocate_vma(&ctx->ppgtt->base,
+ trtt_params.segment_base_addr);
+ if (IS_ERR(ctx->trtt_info.vma)) {
+ ret = PTR_ERR(ctx->trtt_info.vma);
+ goto exit;
+ }
+
+ ctx->trtt_info.null_tile_val = trtt_params.null_tile_val;
+ ctx->trtt_info.invd_tile_val = trtt_params.invd_tile_val;
+ ctx->trtt_info.l3_table_address = trtt_params.l3_table_address;
+ ctx->trtt_info.segment_base_addr = trtt_params.segment_base_addr;
+
+ ret = intel_lr_rcs_context_setup_trtt(ctx);
+ if (ret) {
+ intel_trtt_context_destroy_vma(ctx->trtt_info.vma);
+ goto exit;
+ }
+
+ ctx->flags |= CONTEXT_USE_TRTT;
+
+exit:
+ i915_gem_context_unreference(ctx);
+ return ret;
+}
+
static inline int
mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
{
@@ -923,7 +1054,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
return PTR_ERR(ctx);
}
- args->size = 0;
+ args->size = (args->param != I915_CONTEXT_PARAM_TRTT) ? 0 : args->size;
switch (args->param) {
case I915_CONTEXT_PARAM_BAN_PERIOD:
args->value = ctx->hang_stats.ban_period_seconds;
@@ -939,6 +1070,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
else
args->value = to_i915(dev)->gtt.base.total;
break;
+ case I915_CONTEXT_PARAM_TRTT:
+ ret = intel_context_get_trtt(ctx, args);
+ break;
default:
ret = -EINVAL;
break;
@@ -984,6 +1118,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0;
}
break;
+ case I915_CONTEXT_PARAM_TRTT:
+ ret = intel_context_set_trtt(ctx, args);
+ break;
default:
ret = -EINVAL;
break;
@@ -2169,6 +2169,17 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
{
gtt_write_workarounds(dev);
+ if (HAS_TRTT(dev) && USES_FULL_48BIT_PPGTT(dev)) {
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ /*
+ * Globally enable TR-TT support in Hw.
+ * Still TR-TT enabling on per context basis is required.
+ * Non-trtt contexts are not affected by this setting.
+ */
+ I915_WRITE(GEN9_TR_CHICKEN_BIT_VECTOR,
+ GEN9_TRTT_BYPASS_DISABLE);
+ }
+
/* In the case of execlists, PPGTT is enabled by the context descriptor
* and the PDPs are contained within the context itself. We don't
* need to do anything here. */
@@ -3368,6 +3379,59 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
}
+void intel_trtt_context_destroy_vma(struct i915_vma *vma)
+{
+ struct i915_address_space *vm = vma->vm;
+
+ WARN_ON(!list_empty(&vma->obj_link));
+ WARN_ON(!list_empty(&vma->vm_link));
+ WARN_ON(!list_empty(&vma->exec_list));
+
+ WARN_ON(!vma->pin_count);
+
+ drm_mm_remove_node(&vma->node);
+ i915_ppgtt_put(i915_vm_to_ppgtt(vm));
+ kmem_cache_free(to_i915(vm->dev)->vmas, vma);
+}
+
+struct i915_vma *
+intel_trtt_context_allocate_vma(struct i915_address_space *vm,
+ uint64_t segment_base_addr)
+{
+ struct i915_vma *vma;
+ int ret;
+
+ vma = kmem_cache_zalloc(to_i915(vm->dev)->vmas, GFP_KERNEL);
+ if (!vma)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&vma->obj_link);
+ INIT_LIST_HEAD(&vma->vm_link);
+ INIT_LIST_HEAD(&vma->exec_list);
+ vma->vm = vm;
+ i915_ppgtt_get(i915_vm_to_ppgtt(vm));
+
+ /* Mark the vma as permanently pinned */
+ vma->pin_count = 1;
+
+ /* Reserve from the 48 bit PPGTT space */
+ vma->node.start = segment_base_addr;
+ vma->node.size = GEN9_TRTT_SEGMENT_SIZE;
+ ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ if (ret) {
+ ret = i915_gem_evict_for_vma(vma);
+ if (ret == 0)
+ ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ }
+ if (ret) {
+ DRM_ERROR("Reservation for TRTT segment failed: %i\n", ret);
+ intel_trtt_context_destroy_vma(vma);
+ return ERR_PTR(ret);
+ }
+
+ return vma;
+}
+
static struct scatterlist *
rotate_pages(const dma_addr_t *in, unsigned int offset,
unsigned int width, unsigned int height,
@@ -128,6 +128,10 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
#define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
+/* Fixed size segment */
+#define GEN9_TRTT_SEG_SIZE_SHIFT 44
+#define GEN9_TRTT_SEGMENT_SIZE (1ULL << GEN9_TRTT_SEG_SIZE_SHIFT)
+
enum i915_ggtt_view_type {
I915_GGTT_VIEW_NORMAL = 0,
I915_GGTT_VIEW_ROTATED,
@@ -562,4 +566,8 @@ size_t
i915_ggtt_view_size(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view);
+struct i915_vma *
+intel_trtt_context_allocate_vma(struct i915_address_space *vm,
+ uint64_t segment_base_addr);
+void intel_trtt_context_destroy_vma(struct i915_vma *vma);
#endif
@@ -186,6 +186,25 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GEN8_RPCS_EU_MIN_SHIFT 0
#define GEN8_RPCS_EU_MIN_MASK (0xf << GEN8_RPCS_EU_MIN_SHIFT)
+#define GEN9_TR_CHICKEN_BIT_VECTOR _MMIO(0x4DFC)
+#define GEN9_TRTT_BYPASS_DISABLE (1 << 0)
+
+/* TRTT registers in the H/W Context */
+#define GEN9_TRTT_L3_POINTER_DW0 _MMIO(0x4DE0)
+#define GEN9_TRTT_L3_POINTER_DW1 _MMIO(0x4DE4)
+#define GEN9_TRTT_L3_GFXADDR_MASK 0xFFFFFFFF0000
+
+#define GEN9_TRTT_NULL_TILE_REG _MMIO(0x4DE8)
+#define GEN9_TRTT_INVD_TILE_REG _MMIO(0x4DEC)
+
+#define GEN9_TRTT_VA_MASKDATA _MMIO(0x4DF0)
+#define GEN9_TRVA_MASK_VALUE 0xF0
+#define GEN9_TRVA_DATA_MASK 0xF
+
+#define GEN9_TRTT_TABLE_CONTROL _MMIO(0x4DF4)
+#define GEN9_TRTT_IN_GFX_VA_SPACE (1 << 1)
+#define GEN9_TRTT_ENABLE (1 << 0)
+
#define GAM_ECOCHK _MMIO(0x4090)
#define BDW_DISABLE_HDC_INVALIDATION (1<<25)
#define ECOCHK_SNB_BIT (1<<10)
@@ -1640,6 +1640,76 @@ static int gen9_init_render_ring(struct intel_engine_cs *ring)
return init_workarounds_ring(ring);
}
+static int gen9_init_rcs_context_trtt(struct drm_i915_gem_request *req)
+{
+ struct intel_ringbuffer *ringbuf = req->ringbuf;
+ int ret;
+
+ ret = intel_logical_ring_begin(req, 2 + 2);
+ if (ret)
+ return ret;
+
+ intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_TABLE_CONTROL);
+ intel_logical_ring_emit(ringbuf, 0);
+
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+ intel_logical_ring_advance(ringbuf);
+
+ return 0;
+}
+
+static int gen9_emit_trtt_regs(struct drm_i915_gem_request *req)
+{
+ struct intel_context *ctx = req->ctx;
+ struct intel_ringbuffer *ringbuf = req->ringbuf;
+ u64 masked_l3_gfx_address =
+ ctx->trtt_info.l3_table_address & GEN9_TRTT_L3_GFXADDR_MASK;
+ u32 trva_data_value =
+ (ctx->trtt_info.segment_base_addr >> GEN9_TRTT_SEG_SIZE_SHIFT) &
+ GEN9_TRVA_DATA_MASK;
+ const int num_lri_cmds = 6;
+ int ret;
+
+ /*
+ * Emitting LRIs to update the TRTT registers is most reliable, instead
+ * of directly updating the context image, as this will ensure that
+ * update happens in a serialized manner for the context and also
+ * lite-restore scenario will get handled.
+ */
+ ret = intel_logical_ring_begin(req, num_lri_cmds * 2 + 2);
+ if (ret)
+ return ret;
+
+ intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds));
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_L3_POINTER_DW0);
+ intel_logical_ring_emit(ringbuf, lower_32_bits(masked_l3_gfx_address));
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_L3_POINTER_DW1);
+ intel_logical_ring_emit(ringbuf, upper_32_bits(masked_l3_gfx_address));
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_NULL_TILE_REG);
+ intel_logical_ring_emit(ringbuf, ctx->trtt_info.null_tile_val);
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_INVD_TILE_REG);
+ intel_logical_ring_emit(ringbuf, ctx->trtt_info.invd_tile_val);
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_VA_MASKDATA);
+ intel_logical_ring_emit(ringbuf,
+ GEN9_TRVA_MASK_VALUE | trva_data_value);
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_TABLE_CONTROL);
+ intel_logical_ring_emit(ringbuf,
+ GEN9_TRTT_IN_GFX_VA_SPACE | GEN9_TRTT_ENABLE);
+
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+ intel_logical_ring_advance(ringbuf);
+
+ return 0;
+}
+
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
{
struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
@@ -1994,6 +2064,25 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
return intel_lr_context_render_state_init(req);
}
+static int gen9_init_rcs_context(struct drm_i915_gem_request *req)
+{
+ int ret;
+
+ /*
+ * Explictily disable TR-TT at the start of a new context.
+ * Otherwise on switching from a TR-TT context to a new Non TR-TT
+ * context the TR-TT settings of the outgoing context could get
+ * spilled on to the new incoming context as only the Ring Context
+ * part is loaded on the first submission of a new context, due to
+ * the setting of ENGINE_CTX_RESTORE_INHIBIT bit.
+ */
+ ret = gen9_init_rcs_context_trtt(req);
+ if (ret)
+ return ret;
+
+ return gen8_init_rcs_context(req);
+}
+
/**
* intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
*
@@ -2125,11 +2214,14 @@ static int logical_render_ring_init(struct drm_device *dev)
logical_ring_default_vfuncs(dev, ring);
/* Override some for render ring. */
- if (INTEL_INFO(dev)->gen >= 9)
+ if (INTEL_INFO(dev)->gen >= 9) {
ring->init_hw = gen9_init_render_ring;
- else
+ ring->init_context = gen9_init_rcs_context;
+ } else {
ring->init_hw = gen8_init_render_ring;
- ring->init_context = gen8_init_rcs_context;
+ ring->init_context = gen8_init_rcs_context;
+ }
+
ring->cleanup = intel_fini_pipe_control;
ring->emit_flush = gen8_emit_flush_render;
ring->emit_request = gen8_emit_request_render;
@@ -2669,3 +2761,29 @@ void intel_lr_context_reset(struct drm_device *dev,
ringbuf->tail = 0;
}
}
+
+int intel_lr_rcs_context_setup_trtt(struct intel_context *ctx)
+{
+ struct intel_engine_cs *ring = &(ctx->i915->ring[RCS]);
+ struct drm_i915_gem_request *req;
+ int ret;
+
+ if (!ctx->engine[RCS].state) {
+ ret = intel_lr_context_deferred_alloc(ctx, ring);
+ if (ret)
+ return ret;
+ }
+
+ req = i915_gem_request_alloc(ring, ctx);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ ret = gen9_emit_trtt_regs(req);
+ if (ret) {
+ i915_gem_request_cancel(req);
+ return ret;
+ }
+
+ i915_add_request(req);
+ return 0;
+}
@@ -107,6 +107,7 @@ void intel_lr_context_reset(struct drm_device *dev,
struct intel_context *ctx);
uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
struct intel_engine_cs *ring);
+int intel_lr_rcs_context_setup_trtt(struct intel_context *ctx);
u32 intel_execlists_ctx_id(struct intel_context *ctx,
struct intel_engine_cs *ring);
@@ -1167,7 +1167,15 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1
#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
+#define I915_CONTEXT_PARAM_TRTT 0x4
__u64 value;
};
+struct drm_i915_gem_context_trtt_param {
+ __u64 segment_base_addr;
+ __u64 l3_table_address;
+ __u32 invd_tile_val;
+ __u32 null_tile_val;
+};
+
#endif /* _UAPI_I915_DRM_H_ */