@@ -59,6 +59,20 @@
#define LOCAL_I915_EXEC_FENCE_IN (1<<16)
#define LOCAL_I915_EXEC_FENCE_OUT (1<<17)
+struct local_drm_i915_gem_context_create_v2 {
+ /* output: id of new context*/
+ __u32 ctx_id;
+ __u32 flags;
+#define LOCAL_I915_GEM_CONTEXT_SHARE_GTT 0x1
+#define LOCAL_I915_GEM_CONTEXT_SINGLE_TIMELINE 0x2
+ __u32 share_ctx;
+ __u32 pad;
+};
+
+#define LOCAL_DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct local_drm_i915_gem_context_create_v2)
+
+#define LOCAL_I915_CONTEXT_PARAM_ENGINES 0x7
+
enum intel_engine_id {
RCS,
BCS,
@@ -143,6 +157,14 @@ struct w_step
DECLARE_EWMA(uint64_t, rt, 4, 2)
+struct ctx {
+ uint32_t id;
+ int priority;
+ bool targets_instance;
+ bool wants_balance;
+ unsigned int static_vcs;
+};
+
struct workload
{
unsigned int id;
@@ -164,11 +186,7 @@ struct workload
struct timespec repeat_start;
unsigned int nr_ctxs;
- struct {
- uint32_t id;
- int priority;
- unsigned int static_vcs;
- } *ctx_list;
+ struct ctx *ctx_list;
int sync_timeline;
uint32_t sync_seqno;
@@ -225,6 +243,7 @@ static int fd;
#define HEARTBEAT (1<<7)
#define GLOBAL_BALANCE (1<<8)
#define DEPSYNC (1<<9)
+#define I915 (1<<10)
#define SEQNO_IDX(engine) ((engine) * 16)
#define SEQNO_OFFSET(engine) (SEQNO_IDX(engine) * sizeof(uint32_t))
@@ -836,7 +855,11 @@ eb_set_engine(struct drm_i915_gem_execbuffer2 *eb,
if (engine == VCS2 && (flags & VCS2REMAP))
engine = BCS;
- eb->flags = eb_engine_map[engine];
+ if ((flags & I915) && engine == VCS) {
+ eb->flags = 0;
+ } else {
+ eb->flags = eb_engine_map[engine];
+ }
}
static void
@@ -862,6 +885,23 @@ get_status_objects(struct workload *wrk)
return wrk->status_object;
}
+static struct ctx *
+__get_ctx(struct workload *wrk, struct w_step *w)
+{
+ return &wrk->ctx_list[w->context * 2];
+}
+
+static uint32_t
+get_ctxid(struct workload *wrk, struct w_step *w)
+{
+ struct ctx *ctx = __get_ctx(wrk, w);
+
+ if (ctx->targets_instance && ctx->wants_balance && w->engine == VCS)
+ return wrk->ctx_list[w->context * 2 + 1].id;
+ else
+ return wrk->ctx_list[w->context * 2].id;
+}
+
static void
alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
{
@@ -914,7 +954,7 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
w->eb.buffers_ptr = to_user_pointer(w->obj);
w->eb.buffer_count = j + 1;
- w->eb.rsvd1 = wrk->ctx_list[w->context].id;
+ w->eb.rsvd1 = get_ctxid(wrk, w);
if (flags & SWAPVCS && engine == VCS1)
engine = VCS2;
@@ -927,17 +967,29 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
printf("%x|", w->obj[i].handle);
printf(" %10lu flags=%llx bb=%x[%u] ctx[%u]=%u\n",
w->bb_sz, w->eb.flags, w->bb_handle, j, w->context,
- wrk->ctx_list[w->context].id);
+ get_ctxid(wrk, w));
#endif
}
+static void __ctx_set_prio(uint32_t ctx_id, unsigned int prio)
+{
+ struct drm_i915_gem_context_param param = {
+ .ctx_id = ctx_id,
+ .param = I915_CONTEXT_PARAM_PRIORITY,
+ .value = prio,
+ };
+
+ if (prio)
+ gem_context_set_param(fd, ¶m);
+}
+
static void
prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
{
unsigned int ctx_vcs = 0;
int max_ctx = -1;
struct w_step *w;
- int i;
+ int i, j;
wrk->id = id;
wrk->prng = rand();
@@ -968,44 +1020,174 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
}
}
+ /*
+ * Pre-scan workload steps to allocate context list storage.
+ */
for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
- if ((int)w->context > max_ctx) {
- int delta = w->context + 1 - wrk->nr_ctxs;
+ int ctx = w->context * 2 + 1; /* Odd slots are special. */
+ int delta;
+
+ if (ctx <= max_ctx)
+ continue;
+
+ delta = ctx + 1 - wrk->nr_ctxs;
+
+ wrk->nr_ctxs += delta;
+ wrk->ctx_list = realloc(wrk->ctx_list,
+ wrk->nr_ctxs * sizeof(*wrk->ctx_list));
+ memset(&wrk->ctx_list[wrk->nr_ctxs - delta], 0,
+ delta * sizeof(*wrk->ctx_list));
+
+ max_ctx = ctx;
+ }
+
+ /*
+ * Identify if contexts target specific engine instances and if they
+ * want to be balanced.
+ */
+ for (j = 0; j < wrk->nr_ctxs; j += 2) {
+ bool targets = false;
+ bool balance = false;
+
+ for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
+ if (w->type != BATCH)
+ continue;
+
+ if (w->context != (j / 2))
+ continue;
- wrk->nr_ctxs += delta;
- wrk->ctx_list = realloc(wrk->ctx_list,
- wrk->nr_ctxs *
- sizeof(*wrk->ctx_list));
- memset(&wrk->ctx_list[wrk->nr_ctxs - delta], 0,
- delta * sizeof(*wrk->ctx_list));
+ if (w->engine == VCS)
+ balance = true;
+ else
+ targets = true;
+ }
- max_ctx = w->context;
+ if (flags & I915) {
+ wrk->ctx_list[j].targets_instance = targets;
+ wrk->ctx_list[j].wants_balance = balance;
}
+ }
- if (!wrk->ctx_list[w->context].id) {
- struct drm_i915_gem_context_create arg = {};
+ /*
+ * Create and configure contexts.
+ */
+ for (i = 0; i < wrk->nr_ctxs; i += 2) {
+ struct ctx *ctx = &wrk->ctx_list[i];
+ uint32_t ctx_id, share_ctx = 0;
- drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &arg);
- igt_assert(arg.ctx_id);
+ if (ctx->id)
+ continue;
- wrk->ctx_list[w->context].id = arg.ctx_id;
+ if (flags & I915) {
+ struct local_drm_i915_gem_context_create_v2 args = { };
- if (flags & GLOBAL_BALANCE) {
- wrk->ctx_list[w->context].static_vcs = context_vcs_rr;
- context_vcs_rr ^= 1;
- } else {
- wrk->ctx_list[w->context].static_vcs = ctx_vcs;
- ctx_vcs ^= 1;
- }
+ /* Find existing context to share ppgtt with. */
+ for (j = 0; j < wrk->nr_ctxs; j++) {
+ if (!wrk->ctx_list[j].id)
+ continue;
- if (wrk->prio) {
- struct drm_i915_gem_context_param param = {
- .ctx_id = arg.ctx_id,
- .param = I915_CONTEXT_PARAM_PRIORITY,
- .value = wrk->prio,
- };
- gem_context_set_param(fd, ¶m);
+ args.flags |= LOCAL_I915_GEM_CONTEXT_SHARE_GTT;
+ args.share_ctx = share_ctx =
+ wrk->ctx_list[j].id;
+ break;
}
+
+ if (!ctx->targets_instance)
+ args.flags |= LOCAL_I915_GEM_CONTEXT_SINGLE_TIMELINE;
+
+ drmIoctl(fd, LOCAL_DRM_IOCTL_I915_GEM_CONTEXT_CREATE,
+ &args);
+
+ ctx_id = args.ctx_id;
+ } else {
+ struct drm_i915_gem_context_create args = {};
+
+ drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &args);
+ ctx_id = args.ctx_id;
+ }
+
+ igt_assert(ctx_id);
+ ctx->id = ctx_id;
+
+ if (flags & GLOBAL_BALANCE) {
+ ctx->static_vcs = context_vcs_rr;
+ context_vcs_rr ^= 1;
+ } else {
+ ctx->static_vcs = ctx_vcs;
+ ctx_vcs ^= 1;
+ }
+
+ __ctx_set_prio(ctx_id, wrk->prio);
+
+ /*
+ * Do we need a separate context to satisfy this workloads which
+ * both want to target specific engines and be balanced by i915?
+ */
+ if ((flags & I915) && ctx->wants_balance &&
+ ctx->targets_instance) {
+ struct local_drm_i915_gem_context_create_v2 args = {};
+
+ igt_assert(share_ctx);
+
+ args.flags = LOCAL_I915_GEM_CONTEXT_SINGLE_TIMELINE |
+ LOCAL_I915_GEM_CONTEXT_SHARE_GTT;
+ args.share_ctx = share_ctx;
+
+ drmIoctl(fd, LOCAL_DRM_IOCTL_I915_GEM_CONTEXT_CREATE,
+ &args);
+
+ igt_assert(args.ctx_id);
+ ctx_id = args.ctx_id;
+ wrk->ctx_list[i + 1].id = args.ctx_id;
+
+ __ctx_set_prio(ctx_id, wrk->prio);
+ }
+
+ if (ctx->wants_balance) {
+ #define LOCAL_I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
+
+ struct local_i915_user_extension {
+ __u64 next_extension;
+ __u64 name;
+ };
+
+ struct local_i915_context_engines_load_balance {
+ struct local_i915_user_extension base;
+
+ __u64 flags; /* all undefined flags must be zero */
+ __u64 engines_mask;
+
+ __u64 mbz[4]; /* reserved for future use; must be zero */
+ } load_balance = {
+ .base.name = LOCAL_I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
+ .engines_mask = -1,
+ };
+
+ struct local_i915_context_param_engines {
+ __u64 extensions;
+
+ struct {
+ __u16 class; /* see enum drm_i915_gem_engine_class */
+ __u16 instance;
+ } engines[2];
+ } __attribute__((packed)) set_engines = {
+ .extensions = to_user_pointer(&load_balance),
+ .engines = {
+ { .class = I915_ENGINE_CLASS_VIDEO,
+ .instance = 0 },
+ { .class = I915_ENGINE_CLASS_VIDEO,
+ .instance = 1 },
+ },
+ };
+
+ struct drm_i915_gem_context_param param = {
+ .ctx_id = ctx_id,
+ .param = LOCAL_I915_CONTEXT_PARAM_ENGINES,
+ .size = sizeof(set_engines),
+ .value = to_user_pointer(&set_engines),
+ };
+
+ gem_context_set_param(fd, ¶m);
}
}
@@ -1380,7 +1562,7 @@ static enum intel_engine_id
context_balance(const struct workload_balancer *balancer,
struct workload *wrk, struct w_step *w)
{
- return get_vcs_engine(wrk->ctx_list[w->context].static_vcs);
+ return get_vcs_engine(__get_ctx(wrk, w)->static_vcs);
}
static unsigned int
@@ -1574,6 +1756,12 @@ static const struct workload_balancer all_balancers[] = {
.get_qd = get_engine_busy,
.balance = busy_avg_balance,
},
+ {
+ .id = 11,
+ .name = "i915",
+ .desc = "i915 balancing.",
+ .flags = I915,
+ },
};
static unsigned int
@@ -1952,7 +2140,8 @@ static void *run_workload(void *data)
last_sync = false;
wrk->nr_bb[engine]++;
- if (engine == VCS && wrk->balancer) {
+ if (engine == VCS && wrk->balancer &&
+ wrk->balancer->balance) {
engine = wrk->balancer->balance(wrk->balancer,
wrk, w);
wrk->nr_bb[engine]++;
@@ -2379,6 +2568,12 @@ int main(int argc, char **argv)
return 1;
}
+ if ((flags & VCS2REMAP) && (flags & I915)) {
+ if (verbose)
+ fprintf(stderr, "VCS remapping not supported with i915 balancing!\n");
+ return 1;
+ }
+
if (!nop_calibration) {
if (verbose > 1)
printf("Calibrating nop delay with %u%% tolerance...\n",
@@ -2464,11 +2659,17 @@ int main(int argc, char **argv)
printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
if (flags & SWAPVCS)
printf("Swapping VCS rings between clients.\n");
- if (flags & GLOBAL_BALANCE)
- printf("Using %s balancer in global mode.\n",
- balancer->name);
- else if (balancer)
+ if (flags & GLOBAL_BALANCE) {
+ if (flags & I915) {
+ printf("Ignoring global balancing with i915!\n");
+ flags &= ~GLOBAL_BALANCE;
+ } else {
+ printf("Using %s balancer in global mode.\n",
+ balancer->name);
+ }
+ } else if (balancer) {
printf("Using %s balancer.\n", balancer->name);
+ }
}
if (master_workload >= 0 && clients == 1)
@@ -2485,7 +2686,7 @@ int main(int argc, char **argv)
if (flags & SWAPVCS && i & 1)
flags_ &= ~SWAPVCS;
- if (flags & GLOBAL_BALANCE) {
+ if ((flags & GLOBAL_BALANCE) && !(flags & I915)) {
w[i]->balancer = &global_balancer;
w[i]->global_wrk = w[0];
w[i]->global_balancer = balancer;
@@ -49,10 +49,11 @@ my $nop;
my %opts;
my @balancers = ( 'rr', 'rand', 'qd', 'qdr', 'qdavg', 'rt', 'rtr', 'rtavg',
- 'context', 'busy', 'busy-avg' );
+ 'context', 'busy', 'busy-avg', 'i915' );
my %bal_skip_H = ( 'rr' => 1, 'rand' => 1, 'context' => 1, , 'busy' => 1,
- 'busy-avg' => 1 );
-my %bal_skip_R = ( 'context' => 1 );
+ 'busy-avg' => 1, 'i915' => 1 );
+my %bal_skip_R = ( 'context' => 1, 'i915' => 1 );
+my %bal_skip_G = ( 'i915' => 1 );
my @workloads = (
'media_load_balance_17i7.wsim',
@@ -498,6 +499,8 @@ foreach my $wrk (@saturation_workloads) {
my $bid;
if ($bal ne '') {
+ next GBAL if $G =~ '-G' and exists $bal_skip_G{$bal};
+
push @xargs, "-b $bal";
push @xargs, '-R' unless exists $bal_skip_R{$bal};
push @xargs, $G if $G ne '';