diff mbox series

[2/9] sbitmap: add helper to clear a batch of tags

Message ID 20211012181742.672391-3-axboe@kernel.dk (mailing list archive)
State New, archived
Headers show
Series Batched completions | expand

Commit Message

Jens Axboe Oct. 12, 2021, 6:17 p.m. UTC
sbitmap currently only supports clearing tags one-by-one, add a helper
that allows the caller to pass in an array of tags to clear.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sbitmap.h | 11 +++++++++++
 lib/sbitmap.c           | 44 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 52 insertions(+), 3 deletions(-)

Comments

Bart Van Assche Oct. 12, 2021, 6:29 p.m. UTC | #1
On 10/12/21 11:17 AM, Jens Axboe wrote:
> +void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset,
> +				int *tags, int nr_tags)
> +{
> +	struct sbitmap *sb = &sbq->sb;
> +	unsigned long *addr = NULL;
> +	unsigned long mask = 0;
> +	int i;
> +
> +	smp_mb__before_atomic();
> +	for (i = 0; i < nr_tags; i++) {
> +		const int tag = tags[i] - offset;
> +		unsigned long *this_addr;
> +
> +		/* since we're clearing a batch, skip the deferred map */
> +		this_addr = &sb->map[SB_NR_TO_INDEX(sb, tag)].word;
> +		if (!addr) {
> +			addr = this_addr;
> +		} else if (addr != this_addr) {
> +			atomic_long_andnot(mask, (atomic_long_t *) addr);
> +			mask = 0;
> +			addr = this_addr;
> +		}
> +		mask |= (1UL << SB_NR_TO_BIT(sb, tag));
> +	}
> +
> +	if (mask)
> +		atomic_long_andnot(mask, (atomic_long_t *) addr);
> +
> +	smp_mb__after_atomic();
> +	sbitmap_queue_wake_up(sbq);
> +	sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(),
> +					tags[nr_tags - 1] - offset);
> +}
> +
>   void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
>   			 unsigned int cpu)
>   {

How does replacing the sbitmap_queue_clear() implementation by calling 
sbitmap_queue_clear_batch() affect performance? I'm wondering whether it 
is possible to prevent code duplication without affecting performance 
negatively.

Thanks,

Bart.
Jens Axboe Oct. 12, 2021, 6:34 p.m. UTC | #2
On 10/12/21 12:29 PM, Bart Van Assche wrote:
> On 10/12/21 11:17 AM, Jens Axboe wrote:
>> +void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset,
>> +				int *tags, int nr_tags)
>> +{
>> +	struct sbitmap *sb = &sbq->sb;
>> +	unsigned long *addr = NULL;
>> +	unsigned long mask = 0;
>> +	int i;
>> +
>> +	smp_mb__before_atomic();
>> +	for (i = 0; i < nr_tags; i++) {
>> +		const int tag = tags[i] - offset;
>> +		unsigned long *this_addr;
>> +
>> +		/* since we're clearing a batch, skip the deferred map */
>> +		this_addr = &sb->map[SB_NR_TO_INDEX(sb, tag)].word;
>> +		if (!addr) {
>> +			addr = this_addr;
>> +		} else if (addr != this_addr) {
>> +			atomic_long_andnot(mask, (atomic_long_t *) addr);
>> +			mask = 0;
>> +			addr = this_addr;
>> +		}
>> +		mask |= (1UL << SB_NR_TO_BIT(sb, tag));
>> +	}
>> +
>> +	if (mask)
>> +		atomic_long_andnot(mask, (atomic_long_t *) addr);
>> +
>> +	smp_mb__after_atomic();
>> +	sbitmap_queue_wake_up(sbq);
>> +	sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(),
>> +					tags[nr_tags - 1] - offset);
>> +}
>> +
>>   void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
>>   			 unsigned int cpu)
>>   {
> 
> How does replacing the sbitmap_queue_clear() implementation by calling 
> sbitmap_queue_clear_batch() affect performance? I'm wondering whether it 
> is possible to prevent code duplication without affecting performance 
> negatively.

Good question, I'd rather defer that to a followup though if it ends up
making sense. It's not that simple, as we play some tricks for the usual
clear path by inserting a deferred mask to avoid hitting the cacheline
repeatedly. That doesn't make sense to do for batched clears, obviously,
so they work in slightly different ways where the single bit clear has
an extra step to increase the efficiency.
diff mbox series

Patch

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index e30b56023ead..4a6ff274335a 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -528,6 +528,17 @@  void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
 			 unsigned int cpu);
 
+/**
+ * sbitmap_queue_clear_batch() - Free a batch of allocated bits
+ * &struct sbitmap_queue.
+ * @sbq: Bitmap to free from.
+ * @offset: offset for each tag in array
+ * @tags: array of tags
+ * @nr_tags: number of tags in array
+ */
+void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset,
+				int *tags, int nr_tags);
+
 static inline int sbq_index_inc(int index)
 {
 	return (index + 1) & (SBQ_WAIT_QUEUES - 1);
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index f398e0ae548e..c6e2f1f2c4d2 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -628,6 +628,46 @@  void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
 
+static inline void sbitmap_update_cpu_hint(struct sbitmap *sb, int cpu, int tag)
+{
+	if (likely(!sb->round_robin && tag < sb->depth))
+		*per_cpu_ptr(sb->alloc_hint, cpu) = tag;
+}
+
+void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset,
+				int *tags, int nr_tags)
+{
+	struct sbitmap *sb = &sbq->sb;
+	unsigned long *addr = NULL;
+	unsigned long mask = 0;
+	int i;
+
+	smp_mb__before_atomic();
+	for (i = 0; i < nr_tags; i++) {
+		const int tag = tags[i] - offset;
+		unsigned long *this_addr;
+
+		/* since we're clearing a batch, skip the deferred map */
+		this_addr = &sb->map[SB_NR_TO_INDEX(sb, tag)].word;
+		if (!addr) {
+			addr = this_addr;
+		} else if (addr != this_addr) {
+			atomic_long_andnot(mask, (atomic_long_t *) addr);
+			mask = 0;
+			addr = this_addr;
+		}
+		mask |= (1UL << SB_NR_TO_BIT(sb, tag));
+	}
+
+	if (mask)
+		atomic_long_andnot(mask, (atomic_long_t *) addr);
+
+	smp_mb__after_atomic();
+	sbitmap_queue_wake_up(sbq);
+	sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(),
+					tags[nr_tags - 1] - offset);
+}
+
 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
 			 unsigned int cpu)
 {
@@ -652,9 +692,7 @@  void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
 	 */
 	smp_mb__after_atomic();
 	sbitmap_queue_wake_up(sbq);
-
-	if (likely(!sbq->sb.round_robin && nr < sbq->sb.depth))
-		*per_cpu_ptr(sbq->sb.alloc_hint, cpu) = nr;
+	sbitmap_update_cpu_hint(&sbq->sb, cpu, nr);
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_clear);