@@ -84,6 +84,7 @@ enum w_type
PREEMPTION,
ENGINE_MAP,
LOAD_BALANCE,
+ BOND,
};
struct deps
@@ -99,6 +100,11 @@ struct w_arg {
int prio;
};
+struct bond {
+ uint64_t mask;
+ enum intel_engine_id master;
+};
+
struct w_step
{
/* Workload step metadata */
@@ -122,6 +128,10 @@ struct w_step
enum intel_engine_id *engine_map;
};
bool load_balance;
+ struct {
+ uint64_t bond_mask;
+ enum intel_engine_id bond_master;
+ };
};
/* Implementation details */
@@ -153,6 +163,8 @@ struct ctx {
int priority;
unsigned int engine_map_count;
enum intel_engine_id *engine_map;
+ unsigned int bond_count;
+ struct bond *bonds;
bool targets_instance;
bool wants_balance;
unsigned int static_vcs;
@@ -523,6 +535,40 @@ parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
step.type = LOAD_BALANCE;
goto add_step;
+ } else if (!strcmp(field, "b")) {
+ unsigned int nr = 0;
+ while ((field = strtok_r(fstart, ".", &fctx))) {
+ tmp = atoi(field);
+ check_arg(nr == 0 && tmp <= 0,
+ "Invalid context at step %u!\n",
+ nr_steps);
+ check_arg(nr == 1 &&
+ (tmp < -1 || tmp == 0),
+ "Invalid siblings mask at step %u!\n",
+ nr_steps);
+ check_arg(nr > 2,
+ "Invalid bond format at step %u!\n",
+ nr_steps);
+
+ if (nr == 0) {
+ step.context = tmp;
+ } else if (nr == 1) {
+ step.bond_mask = tmp;
+ } else if (nr == 2) {
+ tmp = str_to_engine(field);
+ check_arg(tmp <= 0 ||
+ tmp == VCS ||
+ tmp == DEFAULT,
+ "Invalid master engine at step %u!\n",
+ nr_steps);
+ step.bond_master = tmp;
+ }
+
+ nr++;
+ }
+
+ step.type = BOND;
+ goto add_step;
}
tmp = atoi(field);
@@ -1044,6 +1090,8 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
* Transfer over engine map configuration from the workload step.
*/
for (j = 0; j < wrk->nr_ctxs; j += 2) {
+ struct ctx *ctx = &wrk->ctx_list[j];
+
bool targets = false;
bool balance = false;
@@ -1057,16 +1105,28 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
else
targets = true;
} else if (w->type == ENGINE_MAP) {
- wrk->ctx_list[j].engine_map = w->engine_map;
- wrk->ctx_list[j].engine_map_count =
- w->engine_map_count;
+ ctx->engine_map = w->engine_map;
+ ctx->engine_map_count = w->engine_map_count;
} else if (w->type == LOAD_BALANCE) {
- if (!wrk->ctx_list[j].engine_map) {
+ if (!ctx->engine_map) {
wsim_err("Load balancing needs an engine map!\n");
return 1;
}
- wrk->ctx_list[j].wants_balance =
- w->load_balance;
+ ctx->wants_balance = w->load_balance;
+ } else if (w->type == BOND) {
+ if (!ctx->wants_balance) {
+ wsim_err("Engine bonds need load balancing engine map!\n");
+ return 1;
+ }
+ ctx->bond_count++;
+ ctx->bonds = realloc(ctx->bonds,
+ ctx->bond_count *
+ sizeof(struct bond));
+ igt_assert(ctx->bonds);
+ ctx->bonds[ctx->bond_count - 1].mask =
+ w->bond_mask;
+ ctx->bonds[ctx->bond_count - 1].master =
+ w->bond_master;
}
}
@@ -1196,6 +1256,7 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
{ .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
.engines_mask = -1,
};
+ struct i915_context_engines_bond *bonds = NULL;
if (ctx->wants_balance) {
set_engines.extensions =
@@ -1211,7 +1272,31 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
ctx->engine_map[j] - VCS1; /* FIXME */
}
+ if (ctx->bond_count) {
+ bonds = calloc(ctx->bond_count, sizeof(*bonds));
+ load_balance.base.next_extension =
+ to_user_pointer(&bonds[0]);
+ }
+
+ for (j = 0; j < ctx->bond_count; j++) {
+ struct i915_context_engines_bond *bond =
+ &bonds[j];
+
+ if (j < (ctx->bond_count - 1))
+ bond->base.next_extension =
+ to_user_pointer(bond + 1);
+
+ bond->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
+ bond->master_class = I915_ENGINE_CLASS_VIDEO;
+ bond->master_instance =
+ ctx->bonds[j].master - VCS1;
+ bond->sibling_mask = ctx->bonds[j].mask;
+ }
+
gem_context_set_param(fd, ¶m);
+
+ if (bonds)
+ free(bonds);
} else if (ctx->wants_balance) {
struct i915_context_engines_load_balance load_balance =
{ .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
@@ -2182,7 +2267,8 @@ static void *run_workload(void *data)
continue;
} else if (w->type == PREEMPTION ||
w->type == ENGINE_MAP ||
- w->type == LOAD_BALANCE) {
+ w->type == LOAD_BALANCE ||
+ w->type == BOND) {
continue;
}
@@ -7,6 +7,7 @@ B.<uint>
M.<uint>.<str>[|<str>]...
P|X.<uint>.<int>
d|p|s|t|q|a.<int>,...
+b.<uint>.<uint>.<str>
f
For duration a range can be given from which a random value will be picked
@@ -26,6 +27,7 @@ Additional workload steps are also supported:
'f' - Create a sync fence.
'a' - Advance the previously created sync fence.
'B' - Turn on context load balancing.
+ 'b' - Set up engine bonds.
'M' - Set up engine map.
'P' - Context priority.
'X' - Context preemption control.
@@ -194,3 +196,51 @@ This enables load balancing for context number one.
Submissions to load balanced contexts are only allowed to use the DEFAULT engine
specifier.
+
+Engine bonds
+------------
+
+Engine bonds are extensions on load balanced contexts. They allow expressing
+rules of engine selection between two co-operating contexts tied with submit
+fences. In other words, the rule expression is telling the driver: "If you pick
+this engine for context one, then you have to pick that engine for context two".
+
+Syntax is:
+ b.<context>.<engine_mask>.<master_engine>
+
+Engine mask is a bitmask representing engines in the engine map configured for
+the same context.
+
+There can be multiple bonds tied to the same context.
+
+Example:
+
+ M.1.RCS|VECS
+ B.1
+ M.2.VCS1|VCS2
+ B.2
+ b.2.1.RCS
+ b.2.2.VECS
+
+This tells the driver that if it picked RCS for context one, it has to pick VCS1
+for context two. And if it picked VECS for context one, it has to pick VCS1 for
+context two.
+
+If we extend the above example with more workload directives:
+
+ 1.DEFAULT.1000.0.0
+ 2.DEFAULT.1000.s-1.0
+
+We get to a fully functional example where two batch buffers are submitted in a
+load balanced fashion, telling the driver they should run simultaneously and
+that valid engine pairs are either RCS + VCS1 (for two contexts respectively),
+or VECS + VCS2.
+
+This can also be extended using sync fences to improve chances of the first
+submission not getting on the hardware after the second one. Second block would
+then look like:
+
+ f
+ 1.DEFAULT.1000.f-1.0
+ 2.DEFAULT.1000.s-1.0
+ a.-3