Message ID | 20240715091125.36381-1-yang.yang@vivo.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v7] sbitmap: fix io hung due to race on sbitmap_word::cleared | expand |
On 7/15/24 2:11 AM, Yang Yang wrote: > + > + /** > + * @swap_lock: serializes simultaneous updates of ->word and ->cleared > + */ > + spinlock_t swap_lock; > } ____cacheline_aligned_in_smp; Thank you for having updated this comment. > -static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) > +static inline bool sbitmap_deferred_clear(struct sbitmap_word *map, > + unsigned int depth, unsigned int alloc_hint, bool wrap) > { > - unsigned long mask; > + unsigned long mask, word_mask; > + bool ret = false; > > - if (!READ_ONCE(map->cleared)) > - return false; > + guard(spinlock_irqsave)(&map->swap_lock); > + > + if (!map->cleared) { > + if (depth > 0) { > + word_mask = (~0UL) >> (BITS_PER_LONG - depth); > + /* > + * The current behavior is to always retry after moving > + * ->cleared to word, and we change it to retry in case > + * of any free bits. To avoid an infinite loop, we need > + * to take wrap & alloc_hint into account, otherwise a > + * soft lockup may occur. > + */ > + if (!wrap && alloc_hint) > + word_mask &= ~((1UL << alloc_hint) - 1); > + > + if ((READ_ONCE(map->word) & word_mask) == word_mask) > + ret = false; > + else > + ret = true; > + } > + > + return ret; > + } Now that guard()() is being used, the local variable 'ret' can be eliminated. The if (READ_ONCE() ...) statement can be changed into the following: return (READ_ONCE(map->word) & word_mask) != word_mask; and "return ret;" can be changed into "return false;". Additionally, the indentation depth can be reduced by changing "if (depth > 0) {" ... into "if (depth == 0) return false;". Otherwise this patch looks good to me. Thanks, Bart.
On 2024/7/16 1:26, Bart Van Assche wrote: > On 7/15/24 2:11 AM, Yang Yang wrote: >> + >> + /** >> + * @swap_lock: serializes simultaneous updates of ->word and ->cleared >> + */ >> + spinlock_t swap_lock; >> } ____cacheline_aligned_in_smp; > > Thank you for having updated this comment. > >> -static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) >> +static inline bool sbitmap_deferred_clear(struct sbitmap_word *map, >> + unsigned int depth, unsigned int alloc_hint, bool wrap) >> { >> - unsigned long mask; >> + unsigned long mask, word_mask; >> + bool ret = false; >> - if (!READ_ONCE(map->cleared)) >> - return false; >> + guard(spinlock_irqsave)(&map->swap_lock); >> + >> + if (!map->cleared) { >> + if (depth > 0) { >> + word_mask = (~0UL) >> (BITS_PER_LONG - depth); >> + /* >> + * The current behavior is to always retry after moving >> + * ->cleared to word, and we change it to retry in case >> + * of any free bits. To avoid an infinite loop, we need >> + * to take wrap & alloc_hint into account, otherwise a >> + * soft lockup may occur. >> + */ >> + if (!wrap && alloc_hint) >> + word_mask &= ~((1UL << alloc_hint) - 1); >> + >> + if ((READ_ONCE(map->word) & word_mask) == word_mask) >> + ret = false; >> + else >> + ret = true; >> + } >> + >> + return ret; >> + } > > Now that guard()() is being used, the local variable 'ret' can be eliminated. The if > (READ_ONCE() ...) statement can be changed into the > following: return (READ_ONCE(map->word) & word_mask) != word_mask; > and "return ret;" can be changed into "return false;". Additionally, > the indentation depth can be reduced by changing "if (depth > 0) {" ... > into "if (depth == 0) return false;". This looks much better, I will make these changes in the next version. Thanks. > > Otherwise this patch looks good to me. > > Thanks, > > Bart.
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index d662cf136021..c09cdcc99471 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -36,6 +36,11 @@ struct sbitmap_word { * @cleared: word holding cleared bits */ unsigned long cleared ____cacheline_aligned_in_smp; + + /** + * @swap_lock: serializes simultaneous updates of ->word and ->cleared + */ + spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 1e453f825c05..3c00fe4b74fc 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -60,12 +60,35 @@ static inline void update_alloc_hint_after_get(struct sbitmap *sb, /* * See if we have deferred clears that we can batch move */ -static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) +static inline bool sbitmap_deferred_clear(struct sbitmap_word *map, + unsigned int depth, unsigned int alloc_hint, bool wrap) { - unsigned long mask; + unsigned long mask, word_mask; + bool ret = false; - if (!READ_ONCE(map->cleared)) - return false; + guard(spinlock_irqsave)(&map->swap_lock); + + if (!map->cleared) { + if (depth > 0) { + word_mask = (~0UL) >> (BITS_PER_LONG - depth); + /* + * The current behavior is to always retry after moving + * ->cleared to word, and we change it to retry in case + * of any free bits. To avoid an infinite loop, we need + * to take wrap & alloc_hint into account, otherwise a + * soft lockup may occur. + */ + if (!wrap && alloc_hint) + word_mask &= ~((1UL << alloc_hint) - 1); + + if ((READ_ONCE(map->word) & word_mask) == word_mask) + ret = false; + else + ret = true; + } + + return ret; + } /* * First get a stable cleared mask, setting the old mask to 0. @@ -85,6 +108,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, bool alloc_hint) { unsigned int bits_per_word; + int i; if (shift < 0) shift = sbitmap_calculate_shift(depth); @@ -116,6 +140,9 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, return -ENOMEM; } + for (i = 0; i < sb->map_nr; i++) + spin_lock_init(&sb->map[i].swap_lock); + return 0; } EXPORT_SYMBOL_GPL(sbitmap_init_node); @@ -126,7 +153,7 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth) unsigned int i; for (i = 0; i < sb->map_nr; i++) - sbitmap_deferred_clear(&sb->map[i]); + sbitmap_deferred_clear(&sb->map[i], 0, 0, 0); sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); @@ -179,7 +206,7 @@ static int sbitmap_find_bit_in_word(struct sbitmap_word *map, alloc_hint, wrap); if (nr != -1) break; - if (!sbitmap_deferred_clear(map)) + if (!sbitmap_deferred_clear(map, depth, alloc_hint, wrap)) break; } while (1); @@ -496,7 +523,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, unsigned int map_depth = __map_depth(sb, index); unsigned long val; - sbitmap_deferred_clear(map); + sbitmap_deferred_clear(map, 0, 0, 0); val = READ_ONCE(map->word); if (val == (1UL << (map_depth - 1)) - 1) goto next;