@@ -843,6 +843,7 @@ struct i915_ctx_hang_stats {
#define DEFAULT_CONTEXT_HANDLE 0
#define CONTEXT_NO_ZEROMAP (1<<0)
+#define CONTEXT_USE_TRTT (1<<1)
/**
* struct intel_context - as the name implies, represents a context.
* @ref: reference count.
@@ -867,7 +868,7 @@ struct intel_context {
int user_handle;
uint8_t remap_slice;
struct drm_i915_private *i915;
- int flags;
+ unsigned int flags;
struct drm_i915_file_private *file_priv;
struct i915_ctx_hang_stats hang_stats;
struct i915_hw_ppgtt *ppgtt;
@@ -885,6 +886,18 @@ struct intel_context {
int pin_count;
} engine[I915_NUM_RINGS];
+ /* TRTT info (redirection tables for userspace,
+ * for sparse resource management)
+ */
+ struct intel_context_trtt {
+ uint32_t invd_tile_val;
+ uint32_t null_tile_val;
+ uint64_t l3_table_address;
+ uint64_t segment_base_addr;
+ struct i915_vma *vma;
+ bool update_trtt_params;
+ } trtt_info;
+
struct list_head link;
};
@@ -2638,6 +2651,8 @@ struct drm_i915_cmd_table {
!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) && \
!IS_BROXTON(dev))
+#define HAS_TRTT(dev) (IS_GEN9(dev))
+
#define INTEL_PCH_DEVICE_ID_MASK 0xff00
#define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00
#define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00
@@ -133,6 +133,14 @@ static int get_context_size(struct drm_device *dev)
return ret;
}
+static void intel_context_free_trtt(struct intel_context *ctx)
+{
+ if (ctx->trtt_info.vma == NULL)
+ return;
+
+ intel_trtt_context_destroy_vma(ctx->trtt_info.vma);
+}
+
static void i915_gem_context_clean(struct intel_context *ctx)
{
struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
@@ -164,6 +172,8 @@ void i915_gem_context_free(struct kref *ctx_ref)
*/
i915_gem_context_clean(ctx);
+ intel_context_free_trtt(ctx);
+
i915_ppgtt_put(ctx->ppgtt);
if (ctx->legacy_hw_ctx.rcs_state)
@@ -516,6 +526,88 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
return ctx;
}
+static int
+intel_context_get_trtt(struct intel_context *ctx,
+ struct drm_i915_gem_context_param *args)
+{
+ struct drm_i915_gem_context_trtt_param trtt_params;
+ struct drm_device *dev = ctx->i915->dev;
+
+ if (!HAS_TRTT(dev) || !USES_FULL_48BIT_PPGTT(dev))
+ return -ENODEV;
+ else if (args->size < sizeof(trtt_params))
+ args->size = sizeof(trtt_params);
+ else {
+ trtt_params.segment_base_addr =
+ ctx->trtt_info.segment_base_addr;
+ trtt_params.l3_table_address =
+ ctx->trtt_info.l3_table_address;
+ trtt_params.null_tile_val =
+ ctx->trtt_info.null_tile_val;
+ trtt_params.invd_tile_val =
+ ctx->trtt_info.invd_tile_val;
+
+ if (__copy_to_user(to_user_ptr(args->value),
+ &trtt_params,
+ sizeof(trtt_params)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int
+intel_context_set_trtt(struct intel_context *ctx,
+ struct drm_i915_gem_context_param *args)
+{
+ struct drm_i915_gem_context_trtt_param trtt_params;
+ struct drm_device *dev = ctx->i915->dev;
+
+ if (!HAS_TRTT(dev) || !USES_FULL_48BIT_PPGTT(dev))
+ return -ENODEV;
+ else if (ctx->flags & CONTEXT_USE_TRTT)
+ return -EEXIST;
+ else if (args->size < sizeof(trtt_params))
+ return -EINVAL;
+ else if (copy_from_user(&trtt_params,
+ to_user_ptr(args->value),
+ sizeof(trtt_params)))
+ return -EFAULT;
+
+ /* basic sanity checks for the segment location & l3 table pointer */
+ if (trtt_params.segment_base_addr & (GEN9_TRTT_SEGMENT_SIZE - 1)) {
+ i915_dbg(dev, "segment base address not correctly aligned\n");
+ return -EINVAL;
+ }
+
+ if (((trtt_params.l3_table_address + PAGE_SIZE) >=
+ trtt_params.segment_base_addr) &&
+ (trtt_params.l3_table_address <
+ (trtt_params.segment_base_addr + GEN9_TRTT_SEGMENT_SIZE))) {
+ i915_dbg(dev, "l3 table address conflicts with trtt segment\n");
+ return -EINVAL;
+ }
+
+ if (trtt_params.l3_table_address & ~GEN9_TRTT_L3_GFXADDR_MASK) {
+ i915_dbg(dev, "invalid l3 table address\n");
+ return -EINVAL;
+ }
+
+ ctx->trtt_info.vma = intel_trtt_context_allocate_vma(&ctx->ppgtt->base,
+ trtt_params.segment_base_addr);
+ if (IS_ERR(ctx->trtt_info.vma))
+ return PTR_ERR(ctx->trtt_info.vma);
+
+ ctx->trtt_info.null_tile_val = trtt_params.null_tile_val;
+ ctx->trtt_info.invd_tile_val = trtt_params.invd_tile_val;
+ ctx->trtt_info.l3_table_address = trtt_params.l3_table_address;
+ ctx->trtt_info.segment_base_addr = trtt_params.segment_base_addr;
+ ctx->trtt_info.update_trtt_params = 1;
+
+ ctx->flags |= CONTEXT_USE_TRTT;
+ return 0;
+}
+
static inline int
mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
{
@@ -942,6 +1034,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
else
args->value = to_i915(dev)->gtt.base.total;
break;
+ case I915_CONTEXT_PARAM_TRTT:
+ ret = intel_context_get_trtt(ctx, args);
+ break;
default:
ret = -EINVAL;
break;
@@ -987,6 +1082,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0;
}
break;
+ case I915_CONTEXT_PARAM_TRTT:
+ ret = intel_context_set_trtt(ctx, args);
+ break;
default:
ret = -EINVAL;
break;
@@ -2146,6 +2146,17 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
int i915_ppgtt_init_hw(struct drm_device *dev)
{
+ if (HAS_TRTT(dev) && USES_FULL_48BIT_PPGTT(dev)) {
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ /*
+ * Globally enable TR-TT support in Hw.
+ * Still TR-TT enabling on per context basis is required.
+ * Non-trtt contexts are not affected by this setting.
+ */
+ I915_WRITE(GEN9_TR_CHICKEN_BIT_VECTOR,
+ GEN9_TRTT_BYPASS_DISABLE);
+ }
+
/* In the case of execlists, PPGTT is enabled by the context descriptor
* and the PDPs are contained within the context itself. We don't
* need to do anything here. */
@@ -3328,6 +3339,57 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
}
+void intel_trtt_context_destroy_vma(struct i915_vma *vma)
+{
+ struct i915_address_space *vm = vma->vm;
+
+ WARN_ON(!list_empty(&vma->vma_link));
+ WARN_ON(!list_empty(&vma->mm_list));
+ WARN_ON(!list_empty(&vma->exec_list));
+
+ drm_mm_remove_node(&vma->node);
+ i915_ppgtt_put(i915_vm_to_ppgtt(vm));
+ kmem_cache_free(to_i915(vm->dev)->vmas, vma);
+}
+
+struct i915_vma *
+intel_trtt_context_allocate_vma(struct i915_address_space *vm,
+ uint64_t segment_base_addr)
+{
+ struct i915_vma *vma;
+ int ret;
+
+ vma = kmem_cache_zalloc(to_i915(vm->dev)->vmas, GFP_KERNEL);
+ if (vma == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&vma->vma_link);
+ INIT_LIST_HEAD(&vma->mm_list);
+ INIT_LIST_HEAD(&vma->exec_list);
+ vma->vm = vm;
+ i915_ppgtt_get(i915_vm_to_ppgtt(vm));
+
+ /* Mark the vma as permanently pinned */
+ vma->pin_count = 1;
+
+ /* Reserve from the 48 bit PPGTT space */
+ vma->node.start = segment_base_addr;
+ vma->node.size = GEN9_TRTT_SEGMENT_SIZE;
+ ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ if (ret) {
+ ret = i915_gem_evict_for_vma(vma);
+ if (ret == 0)
+ ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ }
+ if (ret) {
+ DRM_ERROR("Reservation for TRTT segment failed: %i\n", ret);
+ intel_trtt_context_destroy_vma(vma);
+ return ERR_PTR(ret);
+ }
+
+ return vma;
+}
+
static struct scatterlist *
rotate_pages(dma_addr_t *in, unsigned int offset,
unsigned int width, unsigned int height,
@@ -129,6 +129,10 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
#define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
+/* Fixed size segment */
+#define GEN9_TRTT_SEG_SIZE_SHIFT 44
+#define GEN9_TRTT_SEGMENT_SIZE (1ULL << GEN9_TRTT_SEG_SIZE_SHIFT)
+
enum i915_ggtt_view_type {
I915_GGTT_VIEW_NORMAL = 0,
I915_GGTT_VIEW_ROTATED,
@@ -559,4 +563,8 @@ size_t
i915_ggtt_view_size(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view);
+struct i915_vma *
+intel_trtt_context_allocate_vma(struct i915_address_space *vm,
+ uint64_t segment_base_addr);
+void intel_trtt_context_destroy_vma(struct i915_vma *vma);
#endif
@@ -186,6 +186,25 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GEN8_RPCS_EU_MIN_SHIFT 0
#define GEN8_RPCS_EU_MIN_MASK (0xf << GEN8_RPCS_EU_MIN_SHIFT)
+#define GEN9_TR_CHICKEN_BIT_VECTOR _MMIO(0x4DFC)
+#define GEN9_TRTT_BYPASS_DISABLE (1<<0)
+
+/* TRTT registers in the H/W Context */
+#define GEN9_TRTT_L3_POINTER_DW0 _MMIO(0x4DE0)
+#define GEN9_TRTT_L3_POINTER_DW1 _MMIO(0x4DE4)
+#define GEN9_TRTT_L3_GFXADDR_MASK 0xFFFFFFFF0000
+
+#define GEN9_TRTT_NULL_TILE_REG _MMIO(0x4DE8)
+#define GEN9_TRTT_INVD_TILE_REG _MMIO(0x4DEC)
+
+#define GEN9_TRTT_VA_MASKDATA _MMIO(0x4DF0)
+#define GEN9_TRVA_MASK_VALUE 0xF0
+#define GEN9_TRVA_DATA_MASK 0xF
+
+#define GEN9_TRTT_TABLE_CONTROL _MMIO(0x4DF4)
+#define GEN9_TRTT_IN_GFX_VA_SPACE (1<<1)
+#define GEN9_TRTT_ENABLE (1<<0)
+
#define GAM_ECOCHK _MMIO(0x4090)
#define BDW_DISABLE_HDC_INVALIDATION (1<<25)
#define ECOCHK_SNB_BIT (1<<10)
@@ -1578,6 +1578,70 @@ static int gen9_init_render_ring(struct intel_engine_cs *ring)
return init_workarounds_ring(ring);
}
+static int gen9_init_context_trtt(struct drm_i915_gem_request *req)
+{
+ struct intel_ringbuffer *ringbuf = req->ringbuf;
+ int ret;
+
+ ret = intel_logical_ring_begin(req, 2 + 2);
+ if (ret)
+ return ret;
+
+ intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_TABLE_CONTROL);
+ intel_logical_ring_emit(ringbuf, 0);
+
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+ intel_logical_ring_advance(ringbuf);
+
+ return 0;
+}
+
+static int gen9_emit_trtt_regs(struct drm_i915_gem_request *req)
+{
+ struct intel_context *ctx = req->ctx;
+ struct intel_ringbuffer *ringbuf = req->ringbuf;
+ unsigned long masked_l3_gfx_address =
+ ctx->trtt_info.l3_table_address & GEN9_TRTT_L3_GFXADDR_MASK;
+ uint32_t trva_data_value =
+ (ctx->trtt_info.segment_base_addr >> GEN9_TRTT_SEG_SIZE_SHIFT) &
+ GEN9_TRVA_DATA_MASK;
+ const int num_lri_cmds = 6;
+ int ret;
+
+ ret = intel_logical_ring_begin(req, num_lri_cmds * 2 + 2);
+ if (ret)
+ return ret;
+
+ intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds));
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_L3_POINTER_DW0);
+ intel_logical_ring_emit(ringbuf, lower_32_bits(masked_l3_gfx_address));
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_L3_POINTER_DW1);
+ intel_logical_ring_emit(ringbuf, upper_32_bits(masked_l3_gfx_address));
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_NULL_TILE_REG);
+ intel_logical_ring_emit(ringbuf, ctx->trtt_info.null_tile_val);
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_INVD_TILE_REG);
+ intel_logical_ring_emit(ringbuf, ctx->trtt_info.invd_tile_val);
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_VA_MASKDATA);
+ intel_logical_ring_emit(ringbuf,
+ GEN9_TRVA_MASK_VALUE | trva_data_value);
+
+ intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_TABLE_CONTROL);
+ intel_logical_ring_emit(ringbuf,
+ GEN9_TRTT_IN_GFX_VA_SPACE | GEN9_TRTT_ENABLE);
+
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+ intel_logical_ring_advance(ringbuf);
+
+ return 0;
+}
+
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
{
struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
@@ -1631,6 +1695,17 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring);
}
+ /*
+ * Emitting LRIs to update the TRTT registers is most reliable, instead
+ * of directly updating the context image, as this will ensure that
+ * update happens in a serialized manner for the context and also
+ * lite-restore scenario will get handled.
+ */
+ if ((req->ring->id == RCS) && req->ctx->trtt_info.update_trtt_params) {
+ gen9_emit_trtt_regs(req);
+ req->ctx->trtt_info.update_trtt_params = 0;
+ }
+
ret = intel_logical_ring_begin(req, 4);
if (ret)
return ret;
@@ -1910,6 +1985,25 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
return intel_lr_context_render_state_init(req);
}
+static int gen9_init_rcs_context(struct drm_i915_gem_request *req)
+{
+ int ret;
+
+ /*
+ * Explictily disable TR-TT at the start of a new context.
+ * Otherwise on switching from a TR-TT context to a new Non TR-TT
+ * context the TR-TT settings of the outgoing context could get
+ * spilled on to the new incoming context as only the Ring Context
+ * part is loaded on the first submission of a new context, due to
+ * the setting of ENGINE_CTX_RESTORE_INHIBIT bit.
+ */
+ ret = gen9_init_context_trtt(req);
+ if (ret)
+ return ret;
+
+ return gen8_init_rcs_context(req);
+}
+
/**
* intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
*
@@ -2006,11 +2100,14 @@ static int logical_render_ring_init(struct drm_device *dev)
if (HAS_L3_DPF(dev))
ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
- if (INTEL_INFO(dev)->gen >= 9)
+ if (INTEL_INFO(dev)->gen >= 9) {
ring->init_hw = gen9_init_render_ring;
- else
+ ring->init_context = gen9_init_rcs_context;
+ } else {
ring->init_hw = gen8_init_render_ring;
- ring->init_context = gen8_init_rcs_context;
+ ring->init_context = gen8_init_rcs_context;
+ }
+
ring->cleanup = intel_fini_pipe_control;
if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
ring->get_seqno = bxt_a_get_seqno;
@@ -1140,7 +1140,15 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1
#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
+#define I915_CONTEXT_PARAM_TRTT 0x4
__u64 value;
};
+struct drm_i915_gem_context_trtt_param {
+ __u64 segment_base_addr;
+ __u64 l3_table_address;
+ __u32 invd_tile_val;
+ __u32 null_tile_val;
+};
+
#endif /* _UAPI_I915_DRM_H_ */