diff mbox series

[for-next,v4,3/5] null_blk: fix zone resource management for badblocks

Message ID 20250121081517.1212575-4-shinichiro.kawasaki@wdc.com (mailing list archive)
State New
Headers show
Series null_blk: improve write failure simulation | expand

Commit Message

Shin'ichiro Kawasaki Jan. 21, 2025, 8:15 a.m. UTC
When the badblocks parameter is set for zoned null_blk, zone resource
management does not work correctly. This issue arises because
null_zone_write() modifies the zone resource status and then call
null_process_cmd(), which handles the badblocks parameter. When
badblocks cause IO failures and no IO happens, the zone resource status
should not change. However, it has already changed.

To fix the unexpected change in zone resource status, when writes are
requested for sequential write required zones, handle badblocks not in
null_process_cmd() but in null_zone_write(). Modify null_zone_write() to
call null_handle_badblocks() before changing the zone resource status.
Also, call null_handle_memory_backed() instead of null_process_cmd().

Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
---
 drivers/block/null_blk/main.c     | 11 ++++-------
 drivers/block/null_blk/null_blk.h |  5 +++++
 drivers/block/null_blk/zoned.c    | 15 ++++++++++++---
 3 files changed, 21 insertions(+), 10 deletions(-)

Comments

Damien Le Moal Jan. 23, 2025, 3:37 a.m. UTC | #1
On 1/21/25 5:15 PM, Shin'ichiro Kawasaki wrote:
> When the badblocks parameter is set for zoned null_blk, zone resource
> management does not work correctly. This issue arises because
> null_zone_write() modifies the zone resource status and then call
> null_process_cmd(), which handles the badblocks parameter. When
> badblocks cause IO failures and no IO happens, the zone resource status
> should not change. However, it has already changed.

s/zone resource status/zone condition

s/it has already changed/it may have already changed
(because not all write operations change the zone condition)

> To fix the unexpected change in zone resource status, when writes are
> requested for sequential write required zones, handle badblocks not in
> null_process_cmd() but in null_zone_write(). Modify null_zone_write() to
> call null_handle_badblocks() before changing the zone resource status.
> Also, call null_handle_memory_backed() instead of null_process_cmd().
> 
> Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
> ---
>  drivers/block/null_blk/main.c     | 11 ++++-------
>  drivers/block/null_blk/null_blk.h |  5 +++++
>  drivers/block/null_blk/zoned.c    | 15 ++++++++++++---
>  3 files changed, 21 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
> index 2a060a6ea8c0..87037cb375c9 100644
> --- a/drivers/block/null_blk/main.c
> +++ b/drivers/block/null_blk/main.c
> @@ -1309,9 +1309,8 @@ static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
>  	return sts;
>  }
>  
> -static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
> -						 sector_t sector,
> -						 sector_t nr_sectors)
> +blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
> +				   sector_t nr_sectors)
>  {
>  	struct badblocks *bb = &cmd->nq->dev->badblocks;
>  	sector_t first_bad;
> @@ -1326,10 +1325,8 @@ static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
>  	return BLK_STS_IOERR;
>  }
>  
> -static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
> -						     enum req_op op,
> -						     sector_t sector,
> -						     sector_t nr_sectors)
> +blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
> +				       sector_t sector, sector_t nr_sectors)
>  {
>  	struct nullb_device *dev = cmd->nq->dev;
>  
> diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
> index 3c4c07f0418b..ee60f3a88796 100644
> --- a/drivers/block/null_blk/null_blk.h
> +++ b/drivers/block/null_blk/null_blk.h
> @@ -132,6 +132,11 @@ blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector,
>  				 sector_t nr_sectors);
>  blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
>  			      sector_t sector, unsigned int nr_sectors);
> +blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
> +				   sector_t nr_sectors);
> +blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
> +				       sector_t sector, sector_t nr_sectors);
> +
>  
>  #ifdef CONFIG_BLK_DEV_ZONED
>  int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim);
> diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
> index 0d5f9bf95229..09dae8d018aa 100644
> --- a/drivers/block/null_blk/zoned.c
> +++ b/drivers/block/null_blk/zoned.c
> @@ -389,6 +389,12 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>  		goto unlock_zone;
>  	}
>  
> +	if (dev->badblocks.shift != -1) {
> +		ret = null_handle_badblocks(cmd, sector, nr_sectors);
> +		if (ret != BLK_STS_OK)
> +			goto unlock_zone;
> +	}
> +

This should come after the below zone condition check and change...

>  	if (zone->cond == BLK_ZONE_COND_CLOSED ||
>  	    zone->cond == BLK_ZONE_COND_EMPTY) {
>  		if (dev->need_zone_res_mgmt) {
> @@ -412,9 +418,12 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>  		zone->cond = BLK_ZONE_COND_IMP_OPEN;
>  	}
>  
> -	ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
> -	if (ret != BLK_STS_OK)
> -		goto unlock_zone;

...so about here. If null_handle_badblocks() return BLK_STS_IOERR because the
first sector of the write operation is the bad one, then we will return an error
and not change the write pointer, but we do have correctly changed the zone
condition nevertheless, exactly like a real drive would.

> +	if (dev->memory_backed) {
> +		ret = null_handle_memory_backed(cmd, REQ_OP_WRITE, sector,
> +						nr_sectors);
> +		if (ret != BLK_STS_OK)
> +			goto unlock_zone;
> +	}
>  
>  	zone->wp += nr_sectors;
>  	if (zone->wp == zone->start + zone->capacity) {
Shin'ichiro Kawasaki Jan. 24, 2025, 4:25 a.m. UTC | #2
On Jan 23, 2025 / 12:37, Damien Le Moal wrote:
> On 1/21/25 5:15 PM, Shin'ichiro Kawasaki wrote:
> > When the badblocks parameter is set for zoned null_blk, zone resource
> > management does not work correctly. This issue arises because
> > null_zone_write() modifies the zone resource status and then call
> > null_process_cmd(), which handles the badblocks parameter. When
> > badblocks cause IO failures and no IO happens, the zone resource status
> > should not change. However, it has already changed.
> 
> s/zone resource status/zone condition
> 
> s/it has already changed/it may have already changed
> (because not all write operations change the zone condition)
> 
> > To fix the unexpected change in zone resource status, when writes are
> > requested for sequential write required zones, handle badblocks not in
> > null_process_cmd() but in null_zone_write(). Modify null_zone_write() to
> > call null_handle_badblocks() before changing the zone resource status.
> > Also, call null_handle_memory_backed() instead of null_process_cmd().

I will rewrite the whole commit message. Please see my comment below.

> > 
> > Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
> > ---
> >  drivers/block/null_blk/main.c     | 11 ++++-------
> >  drivers/block/null_blk/null_blk.h |  5 +++++
> >  drivers/block/null_blk/zoned.c    | 15 ++++++++++++---
> >  3 files changed, 21 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
> > index 2a060a6ea8c0..87037cb375c9 100644
> > --- a/drivers/block/null_blk/main.c
> > +++ b/drivers/block/null_blk/main.c
> > @@ -1309,9 +1309,8 @@ static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
> >  	return sts;
> >  }
> >  
> > -static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
> > -						 sector_t sector,
> > -						 sector_t nr_sectors)
> > +blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
> > +				   sector_t nr_sectors)
> >  {
> >  	struct badblocks *bb = &cmd->nq->dev->badblocks;
> >  	sector_t first_bad;
> > @@ -1326,10 +1325,8 @@ static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
> >  	return BLK_STS_IOERR;
> >  }
> >  
> > -static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
> > -						     enum req_op op,
> > -						     sector_t sector,
> > -						     sector_t nr_sectors)
> > +blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
> > +				       sector_t sector, sector_t nr_sectors)
> >  {
> >  	struct nullb_device *dev = cmd->nq->dev;
> >  
> > diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
> > index 3c4c07f0418b..ee60f3a88796 100644
> > --- a/drivers/block/null_blk/null_blk.h
> > +++ b/drivers/block/null_blk/null_blk.h
> > @@ -132,6 +132,11 @@ blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector,
> >  				 sector_t nr_sectors);
> >  blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
> >  			      sector_t sector, unsigned int nr_sectors);
> > +blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
> > +				   sector_t nr_sectors);
> > +blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
> > +				       sector_t sector, sector_t nr_sectors);
> > +
> >  
> >  #ifdef CONFIG_BLK_DEV_ZONED
> >  int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim);
> > diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
> > index 0d5f9bf95229..09dae8d018aa 100644
> > --- a/drivers/block/null_blk/zoned.c
> > +++ b/drivers/block/null_blk/zoned.c
> > @@ -389,6 +389,12 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
> >  		goto unlock_zone;
> >  	}
> >  
> > +	if (dev->badblocks.shift != -1) {
> > +		ret = null_handle_badblocks(cmd, sector, nr_sectors);
> > +		if (ret != BLK_STS_OK)
> > +			goto unlock_zone;
> > +	}
> > +
> 
> This should come after the below zone condition check and change...
> 
> >  	if (zone->cond == BLK_ZONE_COND_CLOSED ||
> >  	    zone->cond == BLK_ZONE_COND_EMPTY) {
> >  		if (dev->need_zone_res_mgmt) {
> > @@ -412,9 +418,12 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
> >  		zone->cond = BLK_ZONE_COND_IMP_OPEN;
> >  	}
> >  
> > -	ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
> > -	if (ret != BLK_STS_OK)
> > -		goto unlock_zone;
> 
> ...so about here. If null_handle_badblocks() return BLK_STS_IOERR because the
> first sector of the write operation is the bad one, then we will return an error
> and not change the write pointer, but we do have correctly changed the zone
> condition nevertheless, exactly like a real drive would.

Thanks. Now I see your point. I thought that the zone resource management part
should be skipped if the first sector of the write operation is the bad one. But
as you pointed out, that does not simulate real drives well. Even when the write
pointer does not move, it's the better to change the zone condition.

When I modified this patch as you suggested, I noticed that this patch no longer
changes the behavior. Even before this change, null_process_cmd() returns
BLK_STS_IOERR, and null_zone_write() skips the write pointer move. I still think
this patch is needed for easier review as a preparation for the 5th patch. I
keep this patch in v5, and will rewrite the whole commit message.
diff mbox series

Patch

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 2a060a6ea8c0..87037cb375c9 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1309,9 +1309,8 @@  static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
 	return sts;
 }
 
-static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
-						 sector_t sector,
-						 sector_t nr_sectors)
+blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
+				   sector_t nr_sectors)
 {
 	struct badblocks *bb = &cmd->nq->dev->badblocks;
 	sector_t first_bad;
@@ -1326,10 +1325,8 @@  static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
 	return BLK_STS_IOERR;
 }
 
-static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
-						     enum req_op op,
-						     sector_t sector,
-						     sector_t nr_sectors)
+blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
+				       sector_t sector, sector_t nr_sectors)
 {
 	struct nullb_device *dev = cmd->nq->dev;
 
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 3c4c07f0418b..ee60f3a88796 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -132,6 +132,11 @@  blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector,
 				 sector_t nr_sectors);
 blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
 			      sector_t sector, unsigned int nr_sectors);
+blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
+				   sector_t nr_sectors);
+blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
+				       sector_t sector, sector_t nr_sectors);
+
 
 #ifdef CONFIG_BLK_DEV_ZONED
 int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim);
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 0d5f9bf95229..09dae8d018aa 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -389,6 +389,12 @@  static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
 		goto unlock_zone;
 	}
 
+	if (dev->badblocks.shift != -1) {
+		ret = null_handle_badblocks(cmd, sector, nr_sectors);
+		if (ret != BLK_STS_OK)
+			goto unlock_zone;
+	}
+
 	if (zone->cond == BLK_ZONE_COND_CLOSED ||
 	    zone->cond == BLK_ZONE_COND_EMPTY) {
 		if (dev->need_zone_res_mgmt) {
@@ -412,9 +418,12 @@  static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
 		zone->cond = BLK_ZONE_COND_IMP_OPEN;
 	}
 
-	ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
-	if (ret != BLK_STS_OK)
-		goto unlock_zone;
+	if (dev->memory_backed) {
+		ret = null_handle_memory_backed(cmd, REQ_OP_WRITE, sector,
+						nr_sectors);
+		if (ret != BLK_STS_OK)
+			goto unlock_zone;
+	}
 
 	zone->wp += nr_sectors;
 	if (zone->wp == zone->start + zone->capacity) {