Message ID | 20240716075206.23121-1-boy.wu@mediatek.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v3] blk-cgroup: Replace u64 sync with spinlock for iostat update | expand |
Hello, Boy. So, looking at the patch, I'm not sure per-blkg lock makes sense. On Tue, Jul 16, 2024 at 03:52:06PM +0800, boy.wu wrote: > @@ -995,15 +995,13 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur, > struct blkg_iostat *last) > { > struct blkg_iostat delta; > - unsigned long flags; > > /* propagate percpu delta to global */ > - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); > + guard(spinlock_irqsave)(&blkg->iostat.spinlock); > blkg_iostat_set(&delta, cur); > blkg_iostat_sub(&delta, last); > blkg_iostat_add(&blkg->iostat.cur, &delta); > blkg_iostat_add(last, &delta); > - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); > } This is already called with blkg_stat_lock held. > @@ -1051,10 +1048,8 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) > goto propagate_up; /* propagate up to parent only */ > > /* fetch the current per-cpu values */ > - do { > - seq = u64_stats_fetch_begin(&bisc->sync); > + scoped_guard(spinlock_irqsave, &bisc->spinlock) > blkg_iostat_set(&cur, &bisc->cur); > - } while (u64_stats_fetch_retry(&bisc->sync, seq)); This is per-cpu stat and we should keep using u64_sync for them. > @@ -1134,9 +1128,8 @@ static void blkcg_fill_root_iostats(void) > cpu_dkstats->sectors[STAT_DISCARD] << 9; > } > > - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); > + guard(spinlock_irqsave)(&blkg->iostat.spinlock); > blkg_iostat_set(&blkg->iostat.cur, &tmp); > - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); > } > } ... > @@ -1157,16 +1149,14 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) > > seq_printf(s, "%s ", dname); > > - do { > - seq = u64_stats_fetch_begin(&bis->sync); > - > + scoped_guard(spinlock_irqsave, &bis->spinlock) { > rbytes = bis->cur.bytes[BLKG_IOSTAT_READ]; > wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE]; > dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD]; > rios = bis->cur.ios[BLKG_IOSTAT_READ]; > wios = bis->cur.ios[BLKG_IOSTAT_WRITE]; > dios = bis->cur.ios[BLKG_IOSTAT_DISCARD]; > - } while (u64_stats_fetch_retry(&bis->sync, seq)); > + } The above two are the only places which can potentially benefit from per-blkg lock but these aren't hot paths. I'd just use blkg_stat_lock for the above. > @@ -2152,30 +2141,29 @@ void blk_cgroup_bio_start(struct bio *bio) > > cpu = get_cpu(); > bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu); > - flags = u64_stats_update_begin_irqsave(&bis->sync); > - > - /* > - * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split > - * bio and we would have already accounted for the size of the bio. > - */ > - if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { > - bio_set_flag(bio, BIO_CGROUP_ACCT); > - bis->cur.bytes[rwd] += bio->bi_iter.bi_size; > - } > - bis->cur.ios[rwd]++; > + scoped_guard(spinlock_irqsave, &bis->spinlock) { > + /* > + * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split > + * bio and we would have already accounted for the size of the bio. > + */ > + if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { > + bio_set_flag(bio, BIO_CGROUP_ACCT); > + bis->cur.bytes[rwd] += bio->bi_iter.bi_size; > + } > + bis->cur.ios[rwd]++; > > - /* > - * If the iostat_cpu isn't in a lockless list, put it into the > - * list to indicate that a stat update is pending. > - */ > - if (!READ_ONCE(bis->lqueued)) { > - struct llist_head *lhead = this_cpu_ptr(blkcg->lhead); > + /* > + * If the iostat_cpu isn't in a lockless list, put it into the > + * list to indicate that a stat update is pending. > + */ > + if (!READ_ONCE(bis->lqueued)) { > + struct llist_head *lhead = this_cpu_ptr(blkcg->lhead); > > - llist_add(&bis->lnode, lhead); > - WRITE_ONCE(bis->lqueued, true); > + llist_add(&bis->lnode, lhead); > + WRITE_ONCE(bis->lqueued, true); > + } These are per-cpu stat updates which should keep using u64_sync. We don't want to incur locking overhead for stat updates in the hot issue path. Thanks.
On Tue, 2024-07-16 at 11:13 -1000, Tejun Heo wrote: > > External email : Please do not click links or open attachments until > you have verified the sender or the content. > Hello, Boy. > > So, looking at the patch, I'm not sure per-blkg lock makes sense. > > On Tue, Jul 16, 2024 at 03:52:06PM +0800, boy.wu wrote: > > @@ -995,15 +995,13 @@ static void blkcg_iostat_update(struct > blkcg_gq *blkg, struct blkg_iostat *cur, > > struct blkg_iostat *last) > > { > > struct blkg_iostat delta; > > -unsigned long flags; > > > > /* propagate percpu delta to global */ > > -flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); > > +guard(spinlock_irqsave)(&blkg->iostat.spinlock); > > blkg_iostat_set(&delta, cur); > > blkg_iostat_sub(&delta, last); > > blkg_iostat_add(&blkg->iostat.cur, &delta); > > blkg_iostat_add(last, &delta); > > -u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); > > } > > This is already called with blkg_stat_lock held. In blkcg_iostat_update, it is protected by both blkg_stat_lock and u64 sync. I agree that no changes are needed here. > > > @@ -1051,10 +1048,8 @@ static void __blkcg_rstat_flush(struct blkcg > *blkcg, int cpu) > > goto propagate_up; /* propagate up to parent only */ > > > > /* fetch the current per-cpu values */ > > -do { > > -seq = u64_stats_fetch_begin(&bisc->sync); > > +scoped_guard(spinlock_irqsave, &bisc->spinlock) > > blkg_iostat_set(&cur, &bisc->cur); > > -} while (u64_stats_fetch_retry(&bisc->sync, seq)); > > This is per-cpu stat and we should keep using u64_sync for them. > > > @@ -1134,9 +1128,8 @@ static void blkcg_fill_root_iostats(void) > > cpu_dkstats->sectors[STAT_DISCARD] << 9; > > } > > > > -flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); > > +guard(spinlock_irqsave)(&blkg->iostat.spinlock); > > blkg_iostat_set(&blkg->iostat.cur, &tmp); > > -u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); > > } > > } > ... > > @@ -1157,16 +1149,14 @@ static void blkcg_print_one_stat(struct > blkcg_gq *blkg, struct seq_file *s) > > > > seq_printf(s, "%s ", dname); > > > > -do { > > -seq = u64_stats_fetch_begin(&bis->sync); > > - > > +scoped_guard(spinlock_irqsave, &bis->spinlock) { > > rbytes = bis->cur.bytes[BLKG_IOSTAT_READ]; > > wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE]; > > dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD]; > > rios = bis->cur.ios[BLKG_IOSTAT_READ]; > > wios = bis->cur.ios[BLKG_IOSTAT_WRITE]; > > dios = bis->cur.ios[BLKG_IOSTAT_DISCARD]; > > -} while (u64_stats_fetch_retry(&bis->sync, seq)); > > +} > > The above two are the only places which can potentially benefit from > per-blkg lock but these aren't hot paths. I'd just use blkg_stat_lock > for > the above. In __blkcg_rstat_flush, u64 sync is used for fetching data, changing to spinlock will increase overhead both 64 bit and 32 bit systems. 64 bit systems do noting in u64 sync, and 32 bit systems can read data in parallel in u64 sync if no one updating data. However, it is already protected by blkg_stat_lock, so there is no parallelism now, and there is no issue here. I think that no changes are needed here. In blkcg_fill_root_iostats, this is where the issue happens in 32 bit SMP systems, so spinlock needs to be added to protect it. In blkcg_print_one_stat, u64 sync is used for fetching data. Changing to spinlock will increase overhead like in __blkcg_rstat_flush. However, it is already protected by spin_lock_irq(&blkg->q->queue_lock) in blkcg_print_stat, so there is no parallelism now, and there is no issue here. I think that no changes are needed here. > > > @@ -2152,30 +2141,29 @@ void blk_cgroup_bio_start(struct bio *bio) > > > > cpu = get_cpu(); > > bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu); > > -flags = u64_stats_update_begin_irqsave(&bis->sync); > > - > > -/* > > - * If the bio is flagged with BIO_CGROUP_ACCT it means this is a > split > > - * bio and we would have already accounted for the size of the > bio. > > - */ > > -if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { > > -bio_set_flag(bio, BIO_CGROUP_ACCT); > > -bis->cur.bytes[rwd] += bio->bi_iter.bi_size; > > -} > > -bis->cur.ios[rwd]++; > > +scoped_guard(spinlock_irqsave, &bis->spinlock) { > > +/* > > + * If the bio is flagged with BIO_CGROUP_ACCT it means this is a > split > > + * bio and we would have already accounted for the size of the > bio. > > + */ > > +if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { > > +bio_set_flag(bio, BIO_CGROUP_ACCT); > > +bis->cur.bytes[rwd] += bio->bi_iter.bi_size; > > +} > > +bis->cur.ios[rwd]++; > > > > -/* > > - * If the iostat_cpu isn't in a lockless list, put it into the > > - * list to indicate that a stat update is pending. > > - */ > > -if (!READ_ONCE(bis->lqueued)) { > > -struct llist_head *lhead = this_cpu_ptr(blkcg->lhead); > > +/* > > + * If the iostat_cpu isn't in a lockless list, put it into the > > + * list to indicate that a stat update is pending. > > + */ > > +if (!READ_ONCE(bis->lqueued)) { > > +struct llist_head *lhead = this_cpu_ptr(blkcg->lhead); > > > > -llist_add(&bis->lnode, lhead); > > -WRITE_ONCE(bis->lqueued, true); > > +llist_add(&bis->lnode, lhead); > > +WRITE_ONCE(bis->lqueued, true); > > +} > > These are per-cpu stat updates which should keep using u64_sync. We > don't > want to incur locking overhead for stat updates in the hot issue > path. > I agree that no changes are needed in blk_cgroup_bio_start. > Thanks. > > -- > tejun I think we can look back to the original issue, which is that on 32 bit SMP systems will have concurrent problems on u64_stats_update_begin_irqsave and u64_stats_update_end_irqrestore in blkcg_fill_root_iostats. So, adding a lock only on 32 bits systems in blkcg_fill_root_iostats is to resolve the concurrent issue and not affect 64 bit systems, which do not have the issue in the first place, because u64 sync does noting in 64 bit systems and they don't need it. I think we can just fix it by following change. @@ -1134,9 +1128,8 @@ static void blkcg_fill_root_iostats(void) cpu_dkstats->sectors[STAT_DISCARD] << 9; } +#if BITS_PER_LONG == 32 + guard(spinlock_irqsave)(&blkg_stat_lock); +#endif flags = u64_stats_update_begin_irqsave(&blkg- >iostat.sync); blkg_iostat_set(&blkg->iostat.cur, &tmp); u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } }
On Wed, 2024-07-17 at 10:25 +0800, Boy.Wu wrote: > On Tue, 2024-07-16 at 11:13 -1000, Tejun Heo wrote: > > > > External email : Please do not click links or open attachments > > until > > you have verified the sender or the content. > > Hello, Boy. > > > > So, looking at the patch, I'm not sure per-blkg lock makes sense. > > > > On Tue, Jul 16, 2024 at 03:52:06PM +0800, boy.wu wrote: > > > @@ -995,15 +995,13 @@ static void blkcg_iostat_update(struct > > > > blkcg_gq *blkg, struct blkg_iostat *cur, > > > struct blkg_iostat *last) > > > { > > > struct blkg_iostat delta; > > > -unsigned long flags; > > > > > > /* propagate percpu delta to global */ > > > -flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); > > > +guard(spinlock_irqsave)(&blkg->iostat.spinlock); > > > blkg_iostat_set(&delta, cur); > > > blkg_iostat_sub(&delta, last); > > > blkg_iostat_add(&blkg->iostat.cur, &delta); > > > blkg_iostat_add(last, &delta); > > > -u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); > > > } > > > > This is already called with blkg_stat_lock held. > > In blkcg_iostat_update, it is protected by both blkg_stat_lock and > u64 > sync. I agree that no changes are needed here. > > > > > > @@ -1051,10 +1048,8 @@ static void __blkcg_rstat_flush(struct > > > blkcg > > > > *blkcg, int cpu) > > > goto propagate_up; /* propagate up to parent only */ > > > > > > /* fetch the current per-cpu values */ > > > -do { > > > -seq = u64_stats_fetch_begin(&bisc->sync); > > > +scoped_guard(spinlock_irqsave, &bisc->spinlock) > > > blkg_iostat_set(&cur, &bisc->cur); > > > -} while (u64_stats_fetch_retry(&bisc->sync, seq)); > > > > This is per-cpu stat and we should keep using u64_sync for them. > > > > > @@ -1134,9 +1128,8 @@ static void blkcg_fill_root_iostats(void) > > > cpu_dkstats->sectors[STAT_DISCARD] << 9; > > > } > > > > > > -flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); > > > +guard(spinlock_irqsave)(&blkg->iostat.spinlock); > > > blkg_iostat_set(&blkg->iostat.cur, &tmp); > > > -u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); > > > } > > > } > > > > ... > > > @@ -1157,16 +1149,14 @@ static void blkcg_print_one_stat(struct > > > > blkcg_gq *blkg, struct seq_file *s) > > > > > > seq_printf(s, "%s ", dname); > > > > > > -do { > > > -seq = u64_stats_fetch_begin(&bis->sync); > > > - > > > +scoped_guard(spinlock_irqsave, &bis->spinlock) { > > > rbytes = bis->cur.bytes[BLKG_IOSTAT_READ]; > > > wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE]; > > > dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD]; > > > rios = bis->cur.ios[BLKG_IOSTAT_READ]; > > > wios = bis->cur.ios[BLKG_IOSTAT_WRITE]; > > > dios = bis->cur.ios[BLKG_IOSTAT_DISCARD]; > > > -} while (u64_stats_fetch_retry(&bis->sync, seq)); > > > +} > > > > The above two are the only places which can potentially benefit > > from > > per-blkg lock but these aren't hot paths. I'd just use > > blkg_stat_lock > > for > > the above. > > In __blkcg_rstat_flush, u64 sync is used for fetching data, changing > to > spinlock will increase overhead both 64 bit and 32 bit systems. 64 > bit > systems do noting in u64 sync, and 32 bit systems can read data in > parallel in u64 sync if no one updating data. However, it is already > protected by blkg_stat_lock, so there is no parallelism now, and > there > is no issue here. I think that no changes are needed here. > > In blkcg_fill_root_iostats, this is where the issue happens in 32 bit > SMP systems, so spinlock needs to be added to protect it. > > In blkcg_print_one_stat, u64 sync is used for fetching data. Changing > to spinlock will increase overhead like in __blkcg_rstat_flush. > However, it is already protected by spin_lock_irq(&blkg->q- > >queue_lock) > in blkcg_print_stat, so there is no parallelism now, and there is no > issue here. I think that no changes are needed here. > > > > > > @@ -2152,30 +2141,29 @@ void blk_cgroup_bio_start(struct bio > > > *bio) > > > > > > cpu = get_cpu(); > > > bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu); > > > -flags = u64_stats_update_begin_irqsave(&bis->sync); > > > - > > > -/* > > > - * If the bio is flagged with BIO_CGROUP_ACCT it means this is a > > > > split > > > - * bio and we would have already accounted for the size of the > > > > bio. > > > - */ > > > -if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { > > > -bio_set_flag(bio, BIO_CGROUP_ACCT); > > > -bis->cur.bytes[rwd] += bio->bi_iter.bi_size; > > > -} > > > -bis->cur.ios[rwd]++; > > > +scoped_guard(spinlock_irqsave, &bis->spinlock) { > > > +/* > > > + * If the bio is flagged with BIO_CGROUP_ACCT it means this is a > > > > split > > > + * bio and we would have already accounted for the size of the > > > > bio. > > > + */ > > > +if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { > > > +bio_set_flag(bio, BIO_CGROUP_ACCT); > > > +bis->cur.bytes[rwd] += bio->bi_iter.bi_size; > > > +} > > > +bis->cur.ios[rwd]++; > > > > > > -/* > > > - * If the iostat_cpu isn't in a lockless list, put it into the > > > - * list to indicate that a stat update is pending. > > > - */ > > > -if (!READ_ONCE(bis->lqueued)) { > > > -struct llist_head *lhead = this_cpu_ptr(blkcg->lhead); > > > +/* > > > + * If the iostat_cpu isn't in a lockless list, put it into the > > > + * list to indicate that a stat update is pending. > > > + */ > > > +if (!READ_ONCE(bis->lqueued)) { > > > +struct llist_head *lhead = this_cpu_ptr(blkcg->lhead); > > > > > > -llist_add(&bis->lnode, lhead); > > > -WRITE_ONCE(bis->lqueued, true); > > > +llist_add(&bis->lnode, lhead); > > > +WRITE_ONCE(bis->lqueued, true); > > > +} > > > > These are per-cpu stat updates which should keep using u64_sync. We > > don't > > want to incur locking overhead for stat updates in the hot issue > > path. > > > > I agree that no changes are needed in blk_cgroup_bio_start. > > > Thanks. > > > > -- > > tejun > > I think we can look back to the original issue, which is that on 32 > bit > SMP systems will have concurrent problems on > u64_stats_update_begin_irqsave and u64_stats_update_end_irqrestore in > blkcg_fill_root_iostats. So, adding a lock only on 32 bits systems > in blkcg_fill_root_iostats is to resolve the concurrent issue and not > affect 64 bit systems, which do not have the issue in the first > place, > because u64 sync does noting in 64 bit systems and they don't need > it. > > I think we can just fix it by following change. > > @@ -1134,9 +1128,8 @@ static void blkcg_fill_root_iostats(void) > cpu_dkstats->sectors[STAT_DISCARD] << > 9; > } > > +#if BITS_PER_LONG == 32 > + guard(spinlock_irqsave)(&blkg_stat_lock); > +#endif > flags = u64_stats_update_begin_irqsave(&blkg- > > iostat.sync); > > blkg_iostat_set(&blkg->iostat.cur, &tmp); > u64_stats_update_end_irqrestore(&blkg->iostat.sync, > flags); > } > } I think there is another way to fix it. Maybe we should address the root cause, which is that u64_stats_update_begin_irqsave and u64_stats_update_end_irqrestore do not protect the seqcount in 32 bit SMP systems. This can be fix by the following change. diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 46040d66334a..94dd74b4fb2c 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -64,6 +64,7 @@ struct u64_stats_sync { #if BITS_PER_LONG == 32 seqcount_t seq; + spinlock_t spinlock; #endif }; @@ -138,6 +139,7 @@ static inline void u64_stats_inc(u64_stats_t *p) static inline void u64_stats_init(struct u64_stats_sync *syncp) { seqcount_init(&syncp->seq); + spin_lock_init(&syncp->spinlock) } static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) @@ -191,6 +193,7 @@ static inline unsigned long u64_stats_update_begin_irqsave(struct u64_stats_sync { unsigned long flags = __u64_stats_irqsave(); + spin_lock_irq(&syncp->spinlock); __u64_stats_update_begin(syncp); return flags; } @@ -199,6 +202,7 @@ static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, unsigned long flags) { __u64_stats_update_end(syncp); + spin_unlock_irq(&syncp->spinlock); __u64_stats_irqrestore(flags); } -- boy.wu
On Wed, 2024-07-17 at 11:44 +0800, Boy.Wu wrote: > > I think there is another way to fix it. Maybe we should address the > root cause, which is that u64_stats_update_begin_irqsave > and u64_stats_update_end_irqrestore do not protect the seqcount in 32 > bit SMP systems. This can be fix by the following change. > > diff --git a/include/linux/u64_stats_sync.h > b/include/linux/u64_stats_sync.h > index 46040d66334a..94dd74b4fb2c 100644 > --- a/include/linux/u64_stats_sync.h > +++ b/include/linux/u64_stats_sync.h > @@ -64,6 +64,7 @@ > struct u64_stats_sync { > #if BITS_PER_LONG == 32 > seqcount_t seq; > + spinlock_t spinlock; > #endif > }; > > @@ -138,6 +139,7 @@ static inline void u64_stats_inc(u64_stats_t *p) > static inline void u64_stats_init(struct u64_stats_sync *syncp) > { > seqcount_init(&syncp->seq); > + spin_lock_init(&syncp->spinlock) > } > > static inline void __u64_stats_update_begin(struct u64_stats_sync > *syncp) > @@ -191,6 +193,7 @@ static inline unsigned long > u64_stats_update_begin_irqsave(struct u64_stats_sync > { > unsigned long flags = __u64_stats_irqsave(); > > + spin_lock_irq(&syncp->spinlock); > __u64_stats_update_begin(syncp); > return flags; > } > @@ -199,6 +202,7 @@ static inline void > u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, > unsigned long > flags) > { > __u64_stats_update_end(syncp); > + spin_unlock_irq(&syncp->spinlock); > __u64_stats_irqrestore(flags); > } > > -- > boy.wu > Never mind, there is a usage in u64_stats_sync.h * Usage : * * Stats producer (writer) should use following template granted it already got * an exclusive access to counters (a lock is already taken, or per cpu * data is used [in a non preemptable context]) * * spin_lock_bh(...) or other synchronization to get exclusive access * ... * u64_stats_update_begin(&stats->syncp); * u64_stats_add(&stats->bytes64, len); // non atomic operation * u64_stats_inc(&stats->packets64); // non atomic operation * u64_stats_update_end(&stats->syncp); -- boy.wu
Hello, Does something like the following work for you? Thanks. diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 37e6cc91d576..ec1d191f5c83 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -329,7 +329,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); #endif - u64_stats_init(&blkg->iostat.sync); for_each_possible_cpu(cpu) { u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync); per_cpu_ptr(blkg->iostat_cpu, cpu)->blkg = blkg; @@ -632,24 +631,26 @@ static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src) static void __blkg_clear_stat(struct blkg_iostat_set *bis) { struct blkg_iostat cur = {0}; - unsigned long flags; - flags = u64_stats_update_begin_irqsave(&bis->sync); blkg_iostat_set(&bis->cur, &cur); blkg_iostat_set(&bis->last, &cur); - u64_stats_update_end_irqrestore(&bis->sync, flags); } static void blkg_clear_stat(struct blkcg_gq *blkg) { + unsigned long flags; int cpu; + raw_spin_lock_irqsave(&blkg_stat_lock, flags); + for_each_possible_cpu(cpu) { struct blkg_iostat_set *s = per_cpu_ptr(blkg->iostat_cpu, cpu); __blkg_clear_stat(s); } __blkg_clear_stat(&blkg->iostat); + + raw_spin_unlock_irqrestore(&blkg_stat_lock, flags); } static int blkcg_reset_stats(struct cgroup_subsys_state *css, @@ -998,12 +999,10 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur, unsigned long flags; /* propagate percpu delta to global */ - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); blkg_iostat_set(&delta, cur); blkg_iostat_sub(&delta, last); blkg_iostat_add(&blkg->iostat.cur, &delta); blkg_iostat_add(last, &delta); - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) @@ -1134,9 +1133,9 @@ static void blkcg_fill_root_iostats(void) cpu_dkstats->sectors[STAT_DISCARD] << 9; } - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); + raw_spin_lock_irqsave(&blkg_stat_lock, flags); blkg_iostat_set(&blkg->iostat.cur, &tmp); - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); + raw_spin_unlock_irqrestore(&blkg_stat_lock, flags); } } @@ -1145,7 +1144,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) struct blkg_iostat_set *bis = &blkg->iostat; u64 rbytes, wbytes, rios, wios, dbytes, dios; const char *dname; - unsigned seq; int i; if (!blkg->online) @@ -1157,16 +1155,14 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) seq_printf(s, "%s ", dname); - do { - seq = u64_stats_fetch_begin(&bis->sync); - - rbytes = bis->cur.bytes[BLKG_IOSTAT_READ]; - wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE]; - dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD]; - rios = bis->cur.ios[BLKG_IOSTAT_READ]; - wios = bis->cur.ios[BLKG_IOSTAT_WRITE]; - dios = bis->cur.ios[BLKG_IOSTAT_DISCARD]; - } while (u64_stats_fetch_retry(&bis->sync, seq)); + raw_spin_lock_irq(&blkg_stat_lock); + rbytes = bis->cur.bytes[BLKG_IOSTAT_READ]; + wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE]; + dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD]; + rios = bis->cur.ios[BLKG_IOSTAT_READ]; + wios = bis->cur.ios[BLKG_IOSTAT_WRITE]; + dios = bis->cur.ios[BLKG_IOSTAT_DISCARD]; + raw_spin_unlock_irq(&blkg_stat_lock, flags); if (rbytes || wbytes || rios || wios) { seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
On 7/17/24 12:55, tj@kernel.org wrote: > Hello, > > Does something like the following work for you? > > Thanks. > > diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c > index 37e6cc91d576..ec1d191f5c83 100644 > --- a/block/blk-cgroup.c > +++ b/block/blk-cgroup.c > @@ -329,7 +329,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, > INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); > #endif > > - u64_stats_init(&blkg->iostat.sync); > for_each_possible_cpu(cpu) { > u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync); > per_cpu_ptr(blkg->iostat_cpu, cpu)->blkg = blkg; > @@ -632,24 +631,26 @@ static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src) > static void __blkg_clear_stat(struct blkg_iostat_set *bis) > { > struct blkg_iostat cur = {0}; > - unsigned long flags; > > - flags = u64_stats_update_begin_irqsave(&bis->sync); > blkg_iostat_set(&bis->cur, &cur); > blkg_iostat_set(&bis->last, &cur); > - u64_stats_update_end_irqrestore(&bis->sync, flags); > } > > static void blkg_clear_stat(struct blkcg_gq *blkg) > { > + unsigned long flags; > int cpu; > > + raw_spin_lock_irqsave(&blkg_stat_lock, flags); > + > for_each_possible_cpu(cpu) { > struct blkg_iostat_set *s = per_cpu_ptr(blkg->iostat_cpu, cpu); > > __blkg_clear_stat(s); > } > __blkg_clear_stat(&blkg->iostat); > + > + raw_spin_unlock_irqrestore(&blkg_stat_lock, flags); > } > > static int blkcg_reset_stats(struct cgroup_subsys_state *css, > @@ -998,12 +999,10 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur, > unsigned long flags; > > /* propagate percpu delta to global */ > - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); > blkg_iostat_set(&delta, cur); > blkg_iostat_sub(&delta, last); > blkg_iostat_add(&blkg->iostat.cur, &delta); > blkg_iostat_add(last, &delta); > - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); > } > > static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) > @@ -1134,9 +1133,9 @@ static void blkcg_fill_root_iostats(void) > cpu_dkstats->sectors[STAT_DISCARD] << 9; > } > > - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); > + raw_spin_lock_irqsave(&blkg_stat_lock, flags); > blkg_iostat_set(&blkg->iostat.cur, &tmp); > - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); > + raw_spin_unlock_irqrestore(&blkg_stat_lock, flags); > } > } > > @@ -1145,7 +1144,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) > struct blkg_iostat_set *bis = &blkg->iostat; > u64 rbytes, wbytes, rios, wios, dbytes, dios; > const char *dname; > - unsigned seq; > int i; > > if (!blkg->online) > @@ -1157,16 +1155,14 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) > > seq_printf(s, "%s ", dname); > > - do { > - seq = u64_stats_fetch_begin(&bis->sync); > - > - rbytes = bis->cur.bytes[BLKG_IOSTAT_READ]; > - wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE]; > - dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD]; > - rios = bis->cur.ios[BLKG_IOSTAT_READ]; > - wios = bis->cur.ios[BLKG_IOSTAT_WRITE]; > - dios = bis->cur.ios[BLKG_IOSTAT_DISCARD]; > - } while (u64_stats_fetch_retry(&bis->sync, seq)); > + raw_spin_lock_irq(&blkg_stat_lock); > + rbytes = bis->cur.bytes[BLKG_IOSTAT_READ]; > + wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE]; > + dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD]; > + rios = bis->cur.ios[BLKG_IOSTAT_READ]; > + wios = bis->cur.ios[BLKG_IOSTAT_WRITE]; > + dios = bis->cur.ios[BLKG_IOSTAT_DISCARD]; > + raw_spin_unlock_irq(&blkg_stat_lock, flags); > > if (rbytes || wbytes || rios || wios) { > seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu", > bis->sync is still being used in blk_cgroup_bio_start(). Replacing it with a global lock may kill performance. We may have to use a per-cpu lock if we want to go this route of eliminating bis->sync. Cheers, Longman
Hello, Waiman. On Wed, Jul 17, 2024 at 01:37:56PM -0400, Waiman Long wrote: > bis->sync is still being used in blk_cgroup_bio_start(). Replacing it with a > global lock may kill performance. We may have to use a per-cpu lock if we > want to go this route of eliminating bis->sync. So, the idea is to keep using u64_sync for blkg->iostat_cpu and use blkg_stat_lock for blkg->iostat. The former is the only one which is updated in hot path, right? Thanks.
On 7/17/24 13:40, tj@kernel.org wrote: > Hello, Waiman. > > On Wed, Jul 17, 2024 at 01:37:56PM -0400, Waiman Long wrote: >> bis->sync is still being used in blk_cgroup_bio_start(). Replacing it with a >> global lock may kill performance. We may have to use a per-cpu lock if we >> want to go this route of eliminating bis->sync. > So, the idea is to keep using u64_sync for blkg->iostat_cpu and use > blkg_stat_lock for blkg->iostat. The former is the only one which is updated > in hot path, right? Well, it can be confusing whether we are dealing with blkg->iostat or blkg->iostat_cpu. In many cases, we are dealing with iostat_cpu instead of iostat like __blkcg_rstat_flush() and blkg_clear_stat(). So we can't eliminate the use of u64_stats_update_begin_irqsave() in those cases. Cheers, Longman
Hello, On Wed, Jul 17, 2024 at 02:18:39PM -0400, Waiman Long wrote: > Well, it can be confusing whether we are dealing with blkg->iostat or > blkg->iostat_cpu. In many cases, we are dealing with iostat_cpu instead of > iostat like __blkcg_rstat_flush() and blkg_clear_stat(). So we can't > eliminate the use of u64_stats_update_begin_irqsave() in those cases. I mean, we need to distinguish them. For 32bits, blkg->iostat has multiple updaters, so we can't use u64_sync; however, blkg->iostat_cpu has only one updater (except blkg_clear_stat() which I don't think we need to worry too much about), so u64_sync is fine. Thanks.
On 7/17/24 14:24, tj@kernel.org wrote: > Hello, > > On Wed, Jul 17, 2024 at 02:18:39PM -0400, Waiman Long wrote: >> Well, it can be confusing whether we are dealing with blkg->iostat or >> blkg->iostat_cpu. In many cases, we are dealing with iostat_cpu instead of >> iostat like __blkcg_rstat_flush() and blkg_clear_stat(). So we can't >> eliminate the use of u64_stats_update_begin_irqsave() in those cases. > I mean, we need to distinguish them. For 32bits, blkg->iostat has multiple > updaters, so we can't use u64_sync; however, blkg->iostat_cpu has only one > updater (except blkg_clear_stat() which I don't think we need to worry too > much about), so u64_sync is fine. I was wrong about __blkcg_rstat_flush(). Right, the main updater of iostat_cpu is blk_cgroup_bio_start(). We do need to drop down some comment on what is protected by u64_sync and what is by blkg_stat_lock though. It can be confusing. Cheers, Longman
On Wed, Jul 17, 2024 at 02:55:37PM -0400, Waiman Long wrote: > I was wrong about __blkcg_rstat_flush(). Right, the main updater of > iostat_cpu is blk_cgroup_bio_start(). We do need to drop down some comment > on what is protected by u64_sync and what is by blkg_stat_lock though. It > can be confusing. Oh yeah, definitely. It is confusing.
On Wed, 2024-07-17 at 06:55 -1000, tj@kernel.org wrote: > > External email : Please do not click links or open attachments until > you have verified the sender or the content. > Hello, > > Does something like the following work for you? > > Thanks. > > diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c > index 37e6cc91d576..ec1d191f5c83 100644 > --- a/block/blk-cgroup.c > +++ b/block/blk-cgroup.c > @@ -329,7 +329,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg > *blkcg, struct gendisk *disk, > INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); > #endif > > -u64_stats_init(&blkg->iostat.sync); > for_each_possible_cpu(cpu) { > u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync); > per_cpu_ptr(blkg->iostat_cpu, cpu)->blkg = blkg; > @@ -632,24 +631,26 @@ static void blkg_iostat_set(struct blkg_iostat > *dst, struct blkg_iostat *src) > static void __blkg_clear_stat(struct blkg_iostat_set *bis) > { > struct blkg_iostat cur = {0}; > -unsigned long flags; > > -flags = u64_stats_update_begin_irqsave(&bis->sync); > blkg_iostat_set(&bis->cur, &cur); > blkg_iostat_set(&bis->last, &cur); > -u64_stats_update_end_irqrestore(&bis->sync, flags); > } > > static void blkg_clear_stat(struct blkcg_gq *blkg) > { > +unsigned long flags; > int cpu; > > +raw_spin_lock_irqsave(&blkg_stat_lock, flags); > + > for_each_possible_cpu(cpu) { > struct blkg_iostat_set *s = per_cpu_ptr(blkg->iostat_cpu, cpu); > > __blkg_clear_stat(s); > } > __blkg_clear_stat(&blkg->iostat); > + > +raw_spin_unlock_irqrestore(&blkg_stat_lock, flags); > } > > static int blkcg_reset_stats(struct cgroup_subsys_state *css, > @@ -998,12 +999,10 @@ static void blkcg_iostat_update(struct blkcg_gq > *blkg, struct blkg_iostat *cur, > unsigned long flags; > > /* propagate percpu delta to global */ > -flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); > blkg_iostat_set(&delta, cur); > blkg_iostat_sub(&delta, last); > blkg_iostat_add(&blkg->iostat.cur, &delta); > blkg_iostat_add(last, &delta); > -u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); > } > > static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) > @@ -1134,9 +1133,9 @@ static void blkcg_fill_root_iostats(void) > cpu_dkstats->sectors[STAT_DISCARD] << 9; > } > > -flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); > +raw_spin_lock_irqsave(&blkg_stat_lock, flags); > blkg_iostat_set(&blkg->iostat.cur, &tmp); > -u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); > +raw_spin_unlock_irqrestore(&blkg_stat_lock, flags); > } > } > > @@ -1145,7 +1144,6 @@ static void blkcg_print_one_stat(struct > blkcg_gq *blkg, struct seq_file *s) > struct blkg_iostat_set *bis = &blkg->iostat; > u64 rbytes, wbytes, rios, wios, dbytes, dios; > const char *dname; > -unsigned seq; > int i; > > if (!blkg->online) > @@ -1157,16 +1155,14 @@ static void blkcg_print_one_stat(struct > blkcg_gq *blkg, struct seq_file *s) > > seq_printf(s, "%s ", dname); > > -do { > -seq = u64_stats_fetch_begin(&bis->sync); > - > -rbytes = bis->cur.bytes[BLKG_IOSTAT_READ]; > -wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE]; > -dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD]; > -rios = bis->cur.ios[BLKG_IOSTAT_READ]; > -wios = bis->cur.ios[BLKG_IOSTAT_WRITE]; > -dios = bis->cur.ios[BLKG_IOSTAT_DISCARD]; > -} while (u64_stats_fetch_retry(&bis->sync, seq)); > +raw_spin_lock_irq(&blkg_stat_lock); > +rbytes = bis->cur.bytes[BLKG_IOSTAT_READ]; > +wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE]; > +dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD]; > +rios = bis->cur.ios[BLKG_IOSTAT_READ]; > +wios = bis->cur.ios[BLKG_IOSTAT_WRITE]; > +dios = bis->cur.ios[BLKG_IOSTAT_DISCARD]; > +raw_spin_unlock_irq(&blkg_stat_lock, flags); > > if (rbytes || wbytes || rios || wios) { > seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu > dbytes=%llu dios=%llu", I think this will work, but as I mentioned before, this issue is only on 32 bit SMP systems. Replacing u64 sync with spinlock will increase overhead on 64 bit systems, because u64 sync does nothing on 64 bit systems. However, if it is acceptable, we can proceed with this solution. -- boy.wu
Hello, On Thu, Jul 18, 2024 at 01:21:32AM +0000, Boy Wu (吳勃誼) wrote: ... > I think this will work, but as I mentioned before, this issue is only > on 32 bit SMP systems. Replacing u64 sync with spinlock will increase > overhead on 64 bit systems, because u64 sync does nothing on 64 bit > systems. However, if it is acceptable, we can proceed with this > solution. We can encapsulate the spinlock in some helpers and make them conditional on 32bit. However, the u64_sync -> spinlock conversions in the suggested patch are all on cold paths, so I doubt it'd be all that noticeable especially given that the hottest path of them all is already grabbing blkg_stat_lock, but either way is fine by me. Thanks.
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 37e6cc91d576..4b66f37c45a0 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -329,7 +329,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); #endif - u64_stats_init(&blkg->iostat.sync); + spin_lock_init(&blkg->iostat.spinlock); for_each_possible_cpu(cpu) { u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync); per_cpu_ptr(blkg->iostat_cpu, cpu)->blkg = blkg; @@ -995,15 +995,13 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur, struct blkg_iostat *last) { struct blkg_iostat delta; - unsigned long flags; /* propagate percpu delta to global */ - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); + guard(spinlock_irqsave)(&blkg->iostat.spinlock); blkg_iostat_set(&delta, cur); blkg_iostat_sub(&delta, last); blkg_iostat_add(&blkg->iostat.cur, &delta); blkg_iostat_add(last, &delta); - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) @@ -1034,7 +1032,6 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) struct blkcg_gq *blkg = bisc->blkg; struct blkcg_gq *parent = blkg->parent; struct blkg_iostat cur; - unsigned int seq; /* * Order assignment of `next_bisc` from `bisc->lnode.next` in @@ -1051,10 +1048,8 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) goto propagate_up; /* propagate up to parent only */ /* fetch the current per-cpu values */ - do { - seq = u64_stats_fetch_begin(&bisc->sync); + scoped_guard(spinlock_irqsave, &bisc->spinlock) blkg_iostat_set(&cur, &bisc->cur); - } while (u64_stats_fetch_retry(&bisc->sync, seq)); blkcg_iostat_update(blkg, &cur, &bisc->last); @@ -1112,7 +1107,6 @@ static void blkcg_fill_root_iostats(void) struct blkcg_gq *blkg = bdev->bd_disk->queue->root_blkg; struct blkg_iostat tmp; int cpu; - unsigned long flags; memset(&tmp, 0, sizeof(tmp)); for_each_possible_cpu(cpu) { @@ -1134,9 +1128,8 @@ static void blkcg_fill_root_iostats(void) cpu_dkstats->sectors[STAT_DISCARD] << 9; } - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); + guard(spinlock_irqsave)(&blkg->iostat.spinlock); blkg_iostat_set(&blkg->iostat.cur, &tmp); - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } } @@ -1145,7 +1138,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) struct blkg_iostat_set *bis = &blkg->iostat; u64 rbytes, wbytes, rios, wios, dbytes, dios; const char *dname; - unsigned seq; int i; if (!blkg->online) @@ -1157,16 +1149,14 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) seq_printf(s, "%s ", dname); - do { - seq = u64_stats_fetch_begin(&bis->sync); - + scoped_guard(spinlock_irqsave, &bis->spinlock) { rbytes = bis->cur.bytes[BLKG_IOSTAT_READ]; wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE]; dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD]; rios = bis->cur.ios[BLKG_IOSTAT_READ]; wios = bis->cur.ios[BLKG_IOSTAT_WRITE]; dios = bis->cur.ios[BLKG_IOSTAT_DISCARD]; - } while (u64_stats_fetch_retry(&bis->sync, seq)); + } if (rbytes || wbytes || rios || wios) { seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu", @@ -2141,7 +2131,6 @@ void blk_cgroup_bio_start(struct bio *bio) struct blkcg *blkcg = bio->bi_blkg->blkcg; int rwd = blk_cgroup_io_type(bio), cpu; struct blkg_iostat_set *bis; - unsigned long flags; if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) return; @@ -2152,30 +2141,29 @@ void blk_cgroup_bio_start(struct bio *bio) cpu = get_cpu(); bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu); - flags = u64_stats_update_begin_irqsave(&bis->sync); - - /* - * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split - * bio and we would have already accounted for the size of the bio. - */ - if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { - bio_set_flag(bio, BIO_CGROUP_ACCT); - bis->cur.bytes[rwd] += bio->bi_iter.bi_size; - } - bis->cur.ios[rwd]++; + scoped_guard(spinlock_irqsave, &bis->spinlock) { + /* + * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split + * bio and we would have already accounted for the size of the bio. + */ + if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { + bio_set_flag(bio, BIO_CGROUP_ACCT); + bis->cur.bytes[rwd] += bio->bi_iter.bi_size; + } + bis->cur.ios[rwd]++; - /* - * If the iostat_cpu isn't in a lockless list, put it into the - * list to indicate that a stat update is pending. - */ - if (!READ_ONCE(bis->lqueued)) { - struct llist_head *lhead = this_cpu_ptr(blkcg->lhead); + /* + * If the iostat_cpu isn't in a lockless list, put it into the + * list to indicate that a stat update is pending. + */ + if (!READ_ONCE(bis->lqueued)) { + struct llist_head *lhead = this_cpu_ptr(blkcg->lhead); - llist_add(&bis->lnode, lhead); - WRITE_ONCE(bis->lqueued, true); + llist_add(&bis->lnode, lhead); + WRITE_ONCE(bis->lqueued, true); + } } - u64_stats_update_end_irqrestore(&bis->sync, flags); cgroup_rstat_updated(blkcg->css.cgroup, cpu); put_cpu(); } diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index bd472a30bc61..b9544969a131 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -44,6 +44,7 @@ struct blkg_iostat { }; struct blkg_iostat_set { + spinlock_t spinlock; struct u64_stats_sync sync; struct blkcg_gq *blkg; struct llist_node lnode;