@@ -90,6 +90,7 @@
#include <drm/i915_drm.h>
#include "i915_drv.h"
#include "i915_trace.h"
+#include "intel_lrc_reg.h"
#include "intel_workarounds.h"
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -322,6 +323,14 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
return desc;
}
+static void intel_context_retire(struct i915_gem_active *active,
+ struct i915_request *rq)
+{
+ struct intel_context *ce = container_of(active, typeof(*ce), active);
+
+ intel_context_unpin(ce);
+}
+
static struct i915_gem_context *
__create_hw_context(struct drm_i915_private *dev_priv,
struct drm_i915_file_private *file_priv)
@@ -345,6 +354,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
ce->gem_context = ctx;
/* Use the whole device by default */
ce->sseu = intel_device_default_sseu(dev_priv);
+
+ init_request_active(&ce->active, intel_context_retire);
}
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
@@ -846,6 +857,56 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
return 0;
}
+static int get_sseu(struct i915_gem_context *ctx,
+ struct drm_i915_gem_context_param *args)
+{
+ struct drm_i915_gem_context_param_sseu user_sseu;
+ struct intel_engine_cs *engine;
+ struct intel_context *ce;
+ int ret;
+
+ if (args->size == 0)
+ goto out;
+ else if (args->size < sizeof(user_sseu))
+ return -EINVAL;
+
+ if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+ sizeof(user_sseu)))
+ return -EFAULT;
+
+ if (user_sseu.rsvd1 || user_sseu.rsvd2)
+ return -EINVAL;
+
+ engine = intel_engine_lookup_user(ctx->i915,
+ user_sseu.class,
+ user_sseu.instance);
+ if (!engine)
+ return -EINVAL;
+
+ /* Only use for mutex here is to serialize get_param and set_param. */
+ ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+ if (ret)
+ return ret;
+
+ ce = to_intel_context(ctx, engine);
+
+ user_sseu.slice_mask = ce->sseu.slice_mask;
+ user_sseu.subslice_mask = ce->sseu.subslice_mask;
+ user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+ user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+ mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+ if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+ sizeof(user_sseu)))
+ return -EFAULT;
+
+out:
+ args->size = sizeof(user_sseu);
+
+ return 0;
+}
+
int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
@@ -858,15 +919,17 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
if (!ctx)
return -ENOENT;
- args->size = 0;
switch (args->param) {
case I915_CONTEXT_PARAM_BAN_PERIOD:
ret = -EINVAL;
break;
case I915_CONTEXT_PARAM_NO_ZEROMAP:
+ args->size = 0;
args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
break;
case I915_CONTEXT_PARAM_GTT_SIZE:
+ args->size = 0;
+
if (ctx->ppgtt)
args->value = ctx->ppgtt->vm.total;
else if (to_i915(dev)->mm.aliasing_ppgtt)
@@ -875,14 +938,20 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
args->value = to_i915(dev)->ggtt.vm.total;
break;
case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+ args->size = 0;
args->value = i915_gem_context_no_error_capture(ctx);
break;
case I915_CONTEXT_PARAM_BANNABLE:
+ args->size = 0;
args->value = i915_gem_context_is_bannable(ctx);
break;
case I915_CONTEXT_PARAM_PRIORITY:
+ args->size = 0;
args->value = ctx->sched.priority;
break;
+ case I915_CONTEXT_PARAM_SSEU:
+ ret = get_sseu(ctx, args);
+ break;
default:
ret = -EINVAL;
break;
@@ -892,6 +961,271 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
return ret;
}
+static int gen8_emit_rpcs_config(struct i915_request *rq,
+ struct intel_context *ce,
+ struct intel_sseu sseu)
+{
+ u64 offset;
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ offset = ce->state->node.start +
+ LRC_STATE_PN * PAGE_SIZE +
+ (CTX_R_PWR_CLK_STATE + 1) * 4;
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+ *cs++ = gen8_make_rpcs(rq->i915, &sseu);
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int
+gen8_modify_rpcs_gpu(struct intel_context *ce,
+ struct intel_engine_cs *engine,
+ struct intel_sseu sseu)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct i915_request *rq, *prev;
+ int ret;
+
+ GEM_BUG_ON(!ce->pin_count);
+
+ lockdep_assert_held(&i915->drm.struct_mutex);
+
+ /* Submitting requests etc needs the hw awake. */
+ intel_runtime_pm_get(i915);
+
+ rq = i915_request_alloc(engine, i915->kernel_context);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ goto out_put;
+ }
+
+ /* Queue this switch after all other activity by this context. */
+ prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
+ &i915->drm.struct_mutex);
+ if (prev && !i915_request_completed(prev)) {
+ ret = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+ &prev->submit,
+ I915_FENCE_GFP);
+ if (ret < 0)
+ goto out_add;
+ }
+
+ ret = gen8_emit_rpcs_config(rq, ce, sseu);
+ if (ret)
+ goto out_add;
+
+ /* Order all following requests to be after. */
+ i915_timeline_set_barrier(ce->ring->timeline, rq);
+
+ /*
+ * Guarantee context image and the timeline remains pinned until the
+ * modifying request is retired by setting the ce activity tracker.
+ *
+ * But we only need to take one pin on the account of it. Or in other
+ * words transfer the pinned ce object to tracked active request.
+ */
+ if (!i915_gem_active_isset(&ce->active))
+ __intel_context_pin(ce);
+ i915_gem_active_set(&ce->active, rq);
+
+out_add:
+ i915_request_add(rq);
+out_put:
+ intel_runtime_pm_put(i915);
+
+ return ret;
+}
+
+static int
+i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
+ struct intel_sseu sseu)
+{
+ struct intel_context *ce = to_intel_context(ctx, engine);
+ int ret;
+
+ GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
+ GEM_BUG_ON(engine->id != RCS);
+
+ ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+ if (ret)
+ return ret;
+
+ /* Nothing to do if unmodified. */
+ if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
+ goto out;
+
+ /*
+ * If context is not idle we have to submit an ordered request to modify
+ * its context image via the kernel context. Pristine and idle contexts
+ * will be configured on pinning.
+ */
+ if (ce->pin_count)
+ ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
+
+ if (!ret)
+ ce->sseu = sseu;
+
+out:
+ mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+ return ret;
+}
+
+static int
+user_to_context_sseu(struct drm_i915_private *i915,
+ const struct drm_i915_gem_context_param_sseu *user,
+ struct intel_sseu *context)
+{
+ const struct sseu_dev_info *device = &INTEL_INFO(i915)->sseu;
+
+ /* No zeros in any field. */
+ if (!user->slice_mask || !user->subslice_mask ||
+ !user->min_eus_per_subslice || !user->max_eus_per_subslice)
+ return -EINVAL;
+
+ /* Max > min. */
+ if (user->max_eus_per_subslice < user->min_eus_per_subslice)
+ return -EINVAL;
+
+ /* Check validity against hardware. */
+ if (user->slice_mask & ~device->slice_mask)
+ return -EINVAL;
+
+ if (user->subslice_mask & ~device->subslice_mask[0])
+ return -EINVAL;
+
+ if (user->max_eus_per_subslice > device->max_eus_per_subslice)
+ return -EINVAL;
+
+ /*
+ * Some future proofing on the types since the uAPI is wider than the
+ * current internal implementation.
+ */
+ if (WARN_ON((fls(user->slice_mask) >
+ sizeof(context->slice_mask) * BITS_PER_BYTE) ||
+ (fls(user->subslice_mask) >
+ sizeof(context->subslice_mask) * BITS_PER_BYTE) ||
+ overflows_type(user->min_eus_per_subslice,
+ context->min_eus_per_subslice) ||
+ overflows_type(user->max_eus_per_subslice,
+ context->max_eus_per_subslice)))
+ return -EINVAL;
+
+ context->slice_mask = user->slice_mask;
+ context->subslice_mask = user->subslice_mask;
+ context->min_eus_per_subslice = user->min_eus_per_subslice;
+ context->max_eus_per_subslice = user->max_eus_per_subslice;
+
+ /* Part specific restrictions. */
+ if (IS_GEN11(i915)) {
+ unsigned int hw_s = hweight8(device->slice_mask);
+ unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+ unsigned int req_s = hweight8(context->slice_mask);
+ unsigned int req_ss = hweight8(context->subslice_mask);
+
+ /*
+ * Only full subslice enablement is possible if more than one
+ * slice is turned on.
+ */
+ if (req_s > 1 && req_ss != hw_ss_per_s)
+ return -EINVAL;
+
+ /*
+ * If more than four (SScount bitfield limit) subslices are
+ * requested then the number has to be even.
+ */
+ if (req_ss > 4 && (req_ss & 1))
+ return -EINVAL;
+
+ /*
+ * If only one slice is enabled and subslice count is below the
+ * device full enablement, it must be at most half of the all
+ * available subslices.
+ */
+ if (req_s == 1 && req_ss < hw_ss_per_s &&
+ req_ss > (hw_ss_per_s / 2))
+ return -EINVAL;
+
+ /* ABI restriction - VME use case only. */
+
+ /* All slices or one slice only. */
+ if (req_s != 1 && req_s != hw_s)
+ return -EINVAL;
+
+ /*
+ * Half subslices or full enablement only when one slice is
+ * enabled.
+ */
+ if (req_s == 1 &&
+ (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2)))
+ return -EINVAL;
+
+ /* No EU configuration changes. */
+ if ((user->min_eus_per_subslice !=
+ device->max_eus_per_subslice) ||
+ (user->max_eus_per_subslice !=
+ device->max_eus_per_subslice))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int set_sseu(struct i915_gem_context *ctx,
+ struct drm_i915_gem_context_param *args)
+{
+ struct drm_i915_private *i915 = ctx->i915;
+ struct drm_i915_gem_context_param_sseu user_sseu;
+ struct intel_engine_cs *engine;
+ struct intel_sseu sseu;
+ int ret;
+
+ if (args->size < sizeof(user_sseu))
+ return -EINVAL;
+
+ if (!IS_GEN11(i915))
+ return -ENODEV;
+
+ if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+ sizeof(user_sseu)))
+ return -EFAULT;
+
+ if (user_sseu.rsvd1 || user_sseu.rsvd2)
+ return -EINVAL;
+
+ engine = intel_engine_lookup_user(i915,
+ user_sseu.class,
+ user_sseu.instance);
+ if (!engine)
+ return -EINVAL;
+
+ /* Only render engine supports RPCS configuration. */
+ if (engine->class != RENDER_CLASS)
+ return -ENODEV;
+
+ ret = user_to_context_sseu(i915, &user_sseu, &sseu);
+ if (ret)
+ return ret;
+
+ ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+ if (ret)
+ return ret;
+
+ args->size = sizeof(user_sseu);
+
+ return 0;
+}
+
int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
@@ -953,7 +1287,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
ctx->sched.priority = priority;
}
break;
-
+ case I915_CONTEXT_PARAM_SSEU:
+ ret = set_sseu(ctx, args);
+ break;
default:
ret = -EINVAL;
break;
@@ -170,6 +170,12 @@ struct i915_gem_context {
u64 lrc_desc;
int pin_count;
+ /**
+ * active: Active tracker for the external rq activity on this
+ * intel_context object.
+ */
+ struct i915_gem_active active;
+
const struct intel_context_ops *ops;
/** sseu: Control eu/slice partitioning */
@@ -2557,7 +2557,9 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
* subslices are enabled, or a count between one and four on the first
* slice.
*/
- if (IS_GEN11(i915) && slices == 1 && subslices >= 4) {
+ if (IS_GEN11(i915) &&
+ slices == 1 &&
+ subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
GEM_BUG_ON(subslices & 1);
subslice_pg = false;
@@ -1478,9 +1478,52 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
#define I915_CONTEXT_DEFAULT_PRIORITY 0
#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
+ /*
+ * When using the following param, value should be a pointer to
+ * drm_i915_gem_context_param_sseu.
+ */
+#define I915_CONTEXT_PARAM_SSEU 0x7
__u64 value;
};
+struct drm_i915_gem_context_param_sseu {
+ /*
+ * Engine class & instance to be configured or queried.
+ */
+ __u16 class;
+ __u16 instance;
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 rsvd1;
+
+ /*
+ * Mask of slices to enable for the context. Valid values are a subset
+ * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+ */
+ __u64 slice_mask;
+
+ /*
+ * Mask of subslices to enable for the context. Valid values are a
+ * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+ */
+ __u64 subslice_mask;
+
+ /*
+ * Minimum/Maximum number of EUs to enable per subslice for the
+ * context. min_eus_per_subslice must be inferior or equal to
+ * max_eus_per_subslice.
+ */
+ __u16 min_eus_per_subslice;
+ __u16 max_eus_per_subslice;
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 rsvd2;
+};
+
enum drm_i915_oa_format {
I915_OA_FORMAT_A13 = 1, /* HSW only */
I915_OA_FORMAT_A29, /* HSW only */