diff mbox

Another flavour of for_each_engine_masked()

Message ID 1472739464-19399-1-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon Sept. 1, 2016, 2:17 p.m. UTC
This macro was recently updated to skip testing for non-existent or
uninteresting engines by using ffs() to directly find the next engine of
interest. However, it required the introduction of a caller-provided
temporary variable, which some people regard as inelegant. So, this
patch provides another variant, which still uses the fast-skip mechanism
but without requiring the temporary, for the cost of a slight increase
in code size (~20 bytes per callsite).

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            | 19 ++++++++++---------
 drivers/gpu/drm/i915/i915_gem_request.c    |  3 +--
 drivers/gpu/drm/i915/i915_guc_submission.c |  3 +--
 drivers/gpu/drm/i915/i915_irq.c            |  3 +--
 drivers/gpu/drm/i915/intel_uncore.c        |  9 +++------
 5 files changed, 16 insertions(+), 21 deletions(-)

Comments

Joonas Lahtinen Sept. 1, 2016, 2:36 p.m. UTC | #1
On to, 2016-09-01 at 15:17 +0100, Dave Gordon wrote:
> This macro was recently updated to skip testing for non-existent or
> uninteresting engines by using ffs() to directly find the next engine of
> interest. However, it required the introduction of a caller-provided
> temporary variable, which some people regard as inelegant. So, this
> patch provides another variant, which still uses the fast-skip mechanism
> but without requiring the temporary, for the cost of a slight increase
> in code size (~20 bytes per callsite).

I did the same kind of modification, it got Nacked.

I'd myself vote for loosing the temporary for 20 bytes too.

Regards, Joonas
Chris Wilson Sept. 1, 2016, 2:48 p.m. UTC | #2
On Thu, Sep 01, 2016 at 03:17:44PM +0100, Dave Gordon wrote:
> This macro was recently updated to skip testing for non-existent or
> uninteresting engines by using ffs() to directly find the next engine of
> interest. However, it required the introduction of a caller-provided
> temporary variable, which some people regard as inelegant. So, this
> patch provides another variant, which still uses the fast-skip mechanism
> but without requiring the temporary, for the cost of a slight increase
> in code size (~20 bytes per callsite).

Slight? Next.
-Chris
Dave Gordon Sept. 1, 2016, 4:59 p.m. UTC | #3
On 01/09/16 15:48, Chris Wilson wrote:
> On Thu, Sep 01, 2016 at 03:17:44PM +0100, Dave Gordon wrote:
>> This macro was recently updated to skip testing for non-existent or
>> uninteresting engines by using ffs() to directly find the next engine of
>> interest. However, it required the introduction of a caller-provided
>> temporary variable, which some people regard as inelegant. So, this
>> patch provides another variant, which still uses the fast-skip mechanism
>> but without requiring the temporary, for the cost of a slight increase
>> in code size (~20 bytes per callsite).
>
> Slight? Next.
> -Chris

106 bytes out of 1166488; that's 0.009% or 91ppm.
The adjective "slight" definitely applies :)

.Dave.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c413587..6c59876 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2093,16 +2093,17 @@  static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
 		for_each_if (((id__) = (engine__)->id, \
 			      intel_engine_initialized(engine__)))
 
-#define __mask_next_bit(mask) ({					\
-	int __idx = ffs(mask) - 1;					\
-	mask &= ~BIT(__idx);						\
-	__idx;								\
-})
-
 /* Iterator over subset of engines selected by mask */
-#define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \
-	for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask;	\
-	     tmp__ ? (engine__ = &(dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : 0; )
+#define for_each_engine_masked(engine__, dev_priv__, mask__)		\
+	for ((engine__) = NULL;						\
+	     ({								\
+		u32 next__ = INTEL_INFO(dev_priv__)->ring_mask;		\
+		if (likely(engine__))					\
+			next__ &= ~1u << (engine__)->id;		\
+		next__ = ffs(mask__ & next__);				\
+		(engine__) = (dev_priv__)->engine + next__ - 1;		\
+		next__;							\
+	     }); )
 
 enum hdmi_force_audio {
 	HDMI_AUDIO_OFF_DVI = -2,	/* no aux data for HDMI-DVI converter */
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 9cc08a1..5e55270 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -766,7 +766,6 @@  static bool engine_retire_requests(struct intel_engine_cs *engine)
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
-	unsigned int tmp;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
@@ -775,7 +774,7 @@  void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
 
 	GEM_BUG_ON(!dev_priv->gt.awake);
 
-	for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines, tmp)
+	for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines)
 		if (engine_retire_requests(engine))
 			dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
 
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 32699a7..e436941 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -330,7 +330,6 @@  static void guc_init_ctx_desc(struct intel_guc *guc,
 	struct i915_gem_context *ctx = client->owner;
 	struct guc_context_desc desc;
 	struct sg_table *sg;
-	unsigned int tmp;
 	u32 gfx_addr;
 
 	memset(&desc, 0, sizeof(desc));
@@ -340,7 +339,7 @@  static void guc_init_ctx_desc(struct intel_guc *guc,
 	desc.priority = client->priority;
 	desc.db_id = client->doorbell_id;
 
-	for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
+	for_each_engine_masked(engine, dev_priv, client->engines) {
 		struct intel_context *ce = &ctx->engine[engine->id];
 		uint32_t guc_engine_id = engine->guc_id;
 		struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 82358d4..7610eca 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3169,7 +3169,6 @@  static void i915_hangcheck_elapsed(struct work_struct *work)
 
 	if (hung) {
 		char msg[80];
-		unsigned int tmp;
 		int len;
 
 		/* If some rings hung but others were still busy, only
@@ -3179,7 +3178,7 @@  static void i915_hangcheck_elapsed(struct work_struct *work)
 			hung &= ~stuck;
 		len = scnprintf(msg, sizeof(msg),
 				"%s on ", stuck == hung ? "No progress" : "Hang");
-		for_each_engine_masked(engine, dev_priv, hung, tmp)
+		for_each_engine_masked(engine, dev_priv, hung)
 			len += scnprintf(msg + len, sizeof(msg) - len,
 					 "%s, ", engine->name);
 		msg[len-2] = '\0';
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index e9f68cd..43f8339 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1597,10 +1597,8 @@  static int gen6_reset_engines(struct drm_i915_private *dev_priv,
 	if (engine_mask == ALL_ENGINES) {
 		hw_mask = GEN6_GRDOM_FULL;
 	} else {
-		unsigned int tmp;
-
 		hw_mask = 0;
-		for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
+		for_each_engine_masked(engine, dev_priv, engine_mask)
 			hw_mask |= hw_engine_mask[engine->id];
 	}
 
@@ -1716,16 +1714,15 @@  static int gen8_reset_engines(struct drm_i915_private *dev_priv,
 			      unsigned engine_mask)
 {
 	struct intel_engine_cs *engine;
-	unsigned int tmp;
 
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
+	for_each_engine_masked(engine, dev_priv, engine_mask)
 		if (gen8_request_engine_reset(engine))
 			goto not_ready;
 
 	return gen6_reset_engines(dev_priv, engine_mask);
 
 not_ready:
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
+	for_each_engine_masked(engine, dev_priv, engine_mask)
 		gen8_unrequest_engine_reset(engine);
 
 	return -EIO;