diff mbox

blk-throttle: fix null pointer dereference while throttling writeback IOs

Message ID 5e5b83db-6f78-9869-fe9c-b1f6a09281ca@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jiufei Xue Oct. 10, 2017, 3:13 a.m. UTC
From: Jiufei Xue <jiufei.xjf@alibaba-inc.com>

A null pointer dereference can occur when blkcg is removed manually
with writeback IOs inflight. This is caused by the following case:

Writeback kworker submit the bio and set bio->bi_cg_private to tg
in blk_throtl_assoc_bio.
Then we remove the block cgroup manually, the blkg and tg would be
freed if there is no request inflight.
When the submitted bio come back, blk_throtl_bio_endio() fetch the tg
which was already freed.

Fix this by increasing the refcount of blkg in funcion
blk_throtl_assoc_bio() so that the blkg will not be freed until the
bio_endio called.

Signed-off-by: Jiufei Xue <jiufei.xjf@alibaba-inc.com>
---
 block/blk-throttle.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

Comments

Shaohua Li Oct. 10, 2017, 6:13 p.m. UTC | #1
On Tue, Oct 10, 2017 at 11:13:32AM +0800, xuejiufei wrote:
> From: Jiufei Xue <jiufei.xjf@alibaba-inc.com>
> 
> A null pointer dereference can occur when blkcg is removed manually
> with writeback IOs inflight. This is caused by the following case:
> 
> Writeback kworker submit the bio and set bio->bi_cg_private to tg
> in blk_throtl_assoc_bio.
> Then we remove the block cgroup manually, the blkg and tg would be
> freed if there is no request inflight.
> When the submitted bio come back, blk_throtl_bio_endio() fetch the tg
> which was already freed.
> 
> Fix this by increasing the refcount of blkg in funcion
> blk_throtl_assoc_bio() so that the blkg will not be freed until the
> bio_endio called.
> 
> Signed-off-by: Jiufei Xue <jiufei.xjf@alibaba-inc.com>
> ---
>  block/blk-throttle.c | 12 ++++++++++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
> index 17816a0..d80c3f0 100644
> --- a/block/blk-throttle.c
> +++ b/block/blk-throttle.c
> @@ -2112,8 +2112,12 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
>  static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
>  {
>  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
> -	if (bio->bi_css)
> +	if (bio->bi_css) {
> +		if (bio->bi_cg_private)
> +			blkg_put(tg_to_blkg(bio->bi_cg_private));
>  		bio->bi_cg_private = tg;
> +		blkg_get(tg_to_blkg(tg));
> +	}
>  	blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
>  #endif
>  }
> @@ -2283,8 +2287,10 @@ void blk_throtl_bio_endio(struct bio *bio)
>  
>  	start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
>  	finish_time = __blk_stat_time(finish_time_ns) >> 10;
> -	if (!start_time || finish_time <= start_time)
> +	if (!start_time || finish_time <= start_time) {
> +		blkg_put(tg_to_blkg(tg));
>  		return;
> +	}
>  
>  	lat = finish_time - start_time;
>  	/* this is only for bio based driver */
> @@ -2314,6 +2320,8 @@ void blk_throtl_bio_endio(struct bio *bio)
>  		tg->bio_cnt /= 2;
>  		tg->bad_bio_cnt /= 2;
>  	}
> +
> +	blkg_put(tg_to_blkg(tg));
>  }
>  #endif

Reviewed-by: Shaohua Li <shli@fb.com>
Jens Axboe Oct. 10, 2017, 6:48 p.m. UTC | #2
On 10/10/2017 12:13 PM, Shaohua Li wrote:
> On Tue, Oct 10, 2017 at 11:13:32AM +0800, xuejiufei wrote:
>> From: Jiufei Xue <jiufei.xjf@alibaba-inc.com>
>>
>> A null pointer dereference can occur when blkcg is removed manually
>> with writeback IOs inflight. This is caused by the following case:
>>
>> Writeback kworker submit the bio and set bio->bi_cg_private to tg
>> in blk_throtl_assoc_bio.
>> Then we remove the block cgroup manually, the blkg and tg would be
>> freed if there is no request inflight.
>> When the submitted bio come back, blk_throtl_bio_endio() fetch the tg
>> which was already freed.
>>
>> Fix this by increasing the refcount of blkg in funcion
>> blk_throtl_assoc_bio() so that the blkg will not be freed until the
>> bio_endio called.
>>
>> Signed-off-by: Jiufei Xue <jiufei.xjf@alibaba-inc.com>
>> ---
>>  block/blk-throttle.c | 12 ++++++++++--
>>  1 file changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
>> index 17816a0..d80c3f0 100644
>> --- a/block/blk-throttle.c
>> +++ b/block/blk-throttle.c
>> @@ -2112,8 +2112,12 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
>>  static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
>>  {
>>  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
>> -	if (bio->bi_css)
>> +	if (bio->bi_css) {
>> +		if (bio->bi_cg_private)
>> +			blkg_put(tg_to_blkg(bio->bi_cg_private));
>>  		bio->bi_cg_private = tg;
>> +		blkg_get(tg_to_blkg(tg));
>> +	}
>>  	blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
>>  #endif
>>  }
>> @@ -2283,8 +2287,10 @@ void blk_throtl_bio_endio(struct bio *bio)
>>  
>>  	start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
>>  	finish_time = __blk_stat_time(finish_time_ns) >> 10;
>> -	if (!start_time || finish_time <= start_time)
>> +	if (!start_time || finish_time <= start_time) {
>> +		blkg_put(tg_to_blkg(tg));
>>  		return;
>> +	}
>>  
>>  	lat = finish_time - start_time;
>>  	/* this is only for bio based driver */
>> @@ -2314,6 +2320,8 @@ void blk_throtl_bio_endio(struct bio *bio)
>>  		tg->bio_cnt /= 2;
>>  		tg->bad_bio_cnt /= 2;
>>  	}
>> +
>> +	blkg_put(tg_to_blkg(tg));
>>  }
>>  #endif
> 
> Reviewed-by: Shaohua Li <shli@fb.com>

I was going to queue this up for 4.15, but was wondering if there's a
strong reason to include it for 4.14 instead?
Shaohua Li Oct. 10, 2017, 7:05 p.m. UTC | #3
On Tue, Oct 10, 2017 at 12:48:38PM -0600, Jens Axboe wrote:
> On 10/10/2017 12:13 PM, Shaohua Li wrote:
> > On Tue, Oct 10, 2017 at 11:13:32AM +0800, xuejiufei wrote:
> >> From: Jiufei Xue <jiufei.xjf@alibaba-inc.com>
> >>
> >> A null pointer dereference can occur when blkcg is removed manually
> >> with writeback IOs inflight. This is caused by the following case:
> >>
> >> Writeback kworker submit the bio and set bio->bi_cg_private to tg
> >> in blk_throtl_assoc_bio.
> >> Then we remove the block cgroup manually, the blkg and tg would be
> >> freed if there is no request inflight.
> >> When the submitted bio come back, blk_throtl_bio_endio() fetch the tg
> >> which was already freed.
> >>
> >> Fix this by increasing the refcount of blkg in funcion
> >> blk_throtl_assoc_bio() so that the blkg will not be freed until the
> >> bio_endio called.
> >>
> >> Signed-off-by: Jiufei Xue <jiufei.xjf@alibaba-inc.com>
> >> ---
> >>  block/blk-throttle.c | 12 ++++++++++--
> >>  1 file changed, 10 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
> >> index 17816a0..d80c3f0 100644
> >> --- a/block/blk-throttle.c
> >> +++ b/block/blk-throttle.c
> >> @@ -2112,8 +2112,12 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
> >>  static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
> >>  {
> >>  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
> >> -	if (bio->bi_css)
> >> +	if (bio->bi_css) {
> >> +		if (bio->bi_cg_private)
> >> +			blkg_put(tg_to_blkg(bio->bi_cg_private));
> >>  		bio->bi_cg_private = tg;
> >> +		blkg_get(tg_to_blkg(tg));
> >> +	}
> >>  	blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
> >>  #endif
> >>  }
> >> @@ -2283,8 +2287,10 @@ void blk_throtl_bio_endio(struct bio *bio)
> >>  
> >>  	start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
> >>  	finish_time = __blk_stat_time(finish_time_ns) >> 10;
> >> -	if (!start_time || finish_time <= start_time)
> >> +	if (!start_time || finish_time <= start_time) {
> >> +		blkg_put(tg_to_blkg(tg));
> >>  		return;
> >> +	}
> >>  
> >>  	lat = finish_time - start_time;
> >>  	/* this is only for bio based driver */
> >> @@ -2314,6 +2320,8 @@ void blk_throtl_bio_endio(struct bio *bio)
> >>  		tg->bio_cnt /= 2;
> >>  		tg->bad_bio_cnt /= 2;
> >>  	}
> >> +
> >> +	blkg_put(tg_to_blkg(tg));
> >>  }
> >>  #endif
> > 
> > Reviewed-by: Shaohua Li <shli@fb.com>
> 
> I was going to queue this up for 4.15, but was wondering if there's a
> strong reason to include it for 4.14 instead?

Either is ok to me, this isn't easy to trigger, but on the other hand it's
quite safe.
Jens Axboe Oct. 10, 2017, 7:09 p.m. UTC | #4
On 10/10/2017 01:05 PM, Shaohua Li wrote:
> On Tue, Oct 10, 2017 at 12:48:38PM -0600, Jens Axboe wrote:
>> On 10/10/2017 12:13 PM, Shaohua Li wrote:
>>> On Tue, Oct 10, 2017 at 11:13:32AM +0800, xuejiufei wrote:
>>>> From: Jiufei Xue <jiufei.xjf@alibaba-inc.com>
>>>>
>>>> A null pointer dereference can occur when blkcg is removed manually
>>>> with writeback IOs inflight. This is caused by the following case:
>>>>
>>>> Writeback kworker submit the bio and set bio->bi_cg_private to tg
>>>> in blk_throtl_assoc_bio.
>>>> Then we remove the block cgroup manually, the blkg and tg would be
>>>> freed if there is no request inflight.
>>>> When the submitted bio come back, blk_throtl_bio_endio() fetch the tg
>>>> which was already freed.
>>>>
>>>> Fix this by increasing the refcount of blkg in funcion
>>>> blk_throtl_assoc_bio() so that the blkg will not be freed until the
>>>> bio_endio called.
>>>>
>>>> Signed-off-by: Jiufei Xue <jiufei.xjf@alibaba-inc.com>
>>>> ---
>>>>  block/blk-throttle.c | 12 ++++++++++--
>>>>  1 file changed, 10 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
>>>> index 17816a0..d80c3f0 100644
>>>> --- a/block/blk-throttle.c
>>>> +++ b/block/blk-throttle.c
>>>> @@ -2112,8 +2112,12 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
>>>>  static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
>>>>  {
>>>>  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
>>>> -	if (bio->bi_css)
>>>> +	if (bio->bi_css) {
>>>> +		if (bio->bi_cg_private)
>>>> +			blkg_put(tg_to_blkg(bio->bi_cg_private));
>>>>  		bio->bi_cg_private = tg;
>>>> +		blkg_get(tg_to_blkg(tg));
>>>> +	}
>>>>  	blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
>>>>  #endif
>>>>  }
>>>> @@ -2283,8 +2287,10 @@ void blk_throtl_bio_endio(struct bio *bio)
>>>>  
>>>>  	start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
>>>>  	finish_time = __blk_stat_time(finish_time_ns) >> 10;
>>>> -	if (!start_time || finish_time <= start_time)
>>>> +	if (!start_time || finish_time <= start_time) {
>>>> +		blkg_put(tg_to_blkg(tg));
>>>>  		return;
>>>> +	}
>>>>  
>>>>  	lat = finish_time - start_time;
>>>>  	/* this is only for bio based driver */
>>>> @@ -2314,6 +2320,8 @@ void blk_throtl_bio_endio(struct bio *bio)
>>>>  		tg->bio_cnt /= 2;
>>>>  		tg->bad_bio_cnt /= 2;
>>>>  	}
>>>> +
>>>> +	blkg_put(tg_to_blkg(tg));
>>>>  }
>>>>  #endif
>>>
>>> Reviewed-by: Shaohua Li <shli@fb.com>
>>
>> I was going to queue this up for 4.15, but was wondering if there's a
>> strong reason to include it for 4.14 instead?
> 
> Either is ok to me, this isn't easy to trigger, but on the other hand it's
> quite safe.

OK, let's just stick with 4.15 then.
diff mbox

Patch

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 17816a0..d80c3f0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2112,8 +2112,12 @@  static inline void throtl_update_latency_buckets(struct throtl_data *td)
 static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
 {
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-	if (bio->bi_css)
+	if (bio->bi_css) {
+		if (bio->bi_cg_private)
+			blkg_put(tg_to_blkg(bio->bi_cg_private));
 		bio->bi_cg_private = tg;
+		blkg_get(tg_to_blkg(tg));
+	}
 	blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
 #endif
 }
@@ -2283,8 +2287,10 @@  void blk_throtl_bio_endio(struct bio *bio)
 
 	start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
 	finish_time = __blk_stat_time(finish_time_ns) >> 10;
-	if (!start_time || finish_time <= start_time)
+	if (!start_time || finish_time <= start_time) {
+		blkg_put(tg_to_blkg(tg));
 		return;
+	}
 
 	lat = finish_time - start_time;
 	/* this is only for bio based driver */
@@ -2314,6 +2320,8 @@  void blk_throtl_bio_endio(struct bio *bio)
 		tg->bio_cnt /= 2;
 		tg->bad_bio_cnt /= 2;
 	}
+
+	blkg_put(tg_to_blkg(tg));
 }
 #endif