diff mbox series

scsi: scsi_debug: fix zone transition to full condition

Message ID 20220607014942.38384-1-damien.lemoal@opensource.wdc.com (mailing list archive)
State Superseded
Headers show
Series scsi: scsi_debug: fix zone transition to full condition | expand

Commit Message

Damien Le Moal June 7, 2022, 1:49 a.m. UTC
When a write command to a sequential write required or sequential write
preferred zone result in the zone write pointer reaching the end of the
zone, the zone condition must be set to full AND the number of
implicitly or explicitly open zones updated to have a correct accounting
for zone resources. However, the function zbc_inc_wp() only sets the
zone condition to full without updating the open zone counters,
resulting in a zone state machine breakage.

Factor out the correct code from zbc_finish_zone() to transition a zone
to the full condition and introduce the helper zbc_set_zone_full(). Use
this helper in zbc_finish_zone() and zbc_inc_wp() to correctly
transition zones to the full condition.

Fixes: 0d1cf9378bd4 ("scsi: scsi_debug: Add ZBC zone commands")
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 drivers/scsi/scsi_debug.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

Comments

Niklas Cassel June 7, 2022, 9:16 a.m. UTC | #1
On Tue, Jun 07, 2022 at 10:49:42AM +0900, Damien Le Moal wrote:
> When a write command to a sequential write required or sequential write
> preferred zone result in the zone write pointer reaching the end of the
> zone, the zone condition must be set to full AND the number of
> implicitly or explicitly open zones updated to have a correct accounting
> for zone resources. However, the function zbc_inc_wp() only sets the
> zone condition to full without updating the open zone counters,
> resulting in a zone state machine breakage.
>
> Factor out the correct code from zbc_finish_zone() to transition a zone
> to the full condition and introduce the helper zbc_set_zone_full(). Use
> this helper in zbc_finish_zone() and zbc_inc_wp() to correctly
> transition zones to the full condition.
>
> Fixes: 0d1cf9378bd4 ("scsi: scsi_debug: Add ZBC zone commands")
> Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> ---
>  drivers/scsi/scsi_debug.c | 27 +++++++++++++++++----------
>  1 file changed, 17 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
> index 1f423f723d06..6c2bb02a42d8 100644
> --- a/drivers/scsi/scsi_debug.c
> +++ b/drivers/scsi/scsi_debug.c
> @@ -2826,6 +2826,19 @@ static void zbc_open_zone(struct sdebug_dev_info *devip,
>	}
>  }
>
> +static inline void zbc_set_zone_full(struct sdebug_dev_info *devip,
> +				     struct sdeb_zone_state *zsp)
> +{
> +	enum sdebug_z_cond zc = zsp->z_cond;
> +
> +	if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
> +		zbc_close_zone(devip, zsp);
> +	if (zsp->z_cond == ZC4_CLOSED)
> +		devip->nr_closed--;
> +	zsp->z_wp = zsp->z_start + zsp->z_size;
> +	zsp->z_cond = ZC5_FULL;
> +}
> +
>  static void zbc_inc_wp(struct sdebug_dev_info *devip,
>		       unsigned long long lba, unsigned int num)
>  {
> @@ -2838,7 +2851,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
>	if (zsp->z_type == ZBC_ZTYPE_SWR) {
>		zsp->z_wp += num;
>		if (zsp->z_wp >= zend)
> -			zsp->z_cond = ZC5_FULL;
> +			zbc_set_zone_full(devip, zsp);
>		return;
>	}
>
> @@ -2857,7 +2870,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
>			n = num;
>		}
>		if (zsp->z_wp >= zend)
> -			zsp->z_cond = ZC5_FULL;
> +			zbc_set_zone_full(devip, zsp);

Hello Damien,

In the equivalent function (null_zone_write()) in null_blk,
we instead do this:

	if (zone->wp == zone->start + zone->capacity) {
		null_lock_zone_res(dev);
		if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
			dev->nr_zones_exp_open--;
		else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
			dev->nr_zones_imp_open--;
		zone->cond = BLK_ZONE_COND_FULL;
		null_unlock_zone_res(dev);
	}

Isn't it more clear to do the same here?
i.e. set the state to FULL, like before, and simply decrease the
imp/exp open counters.

zbc_set_zone_full() does some things that are not applicable in
the write path, specifically this:
> +     if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
> +             zbc_close_zone(devip, zsp);
> +     if (zsp->z_cond == ZC4_CLOSED)
> +             devip->nr_closed--;

e.g. with this new helper, if we are in e.g. IMP OPEN, we will now
set the zone state first to CLOSED, increase the nr_closed counter,
decrease the nr_closed counter, and then set the zone state to FULL.

Isn't it more clear to just set it to FULL directly, like before,
and simply add:
		if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
			dev->nr_zones_exp_open--;
		else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
			dev->nr_zones_imp_open--;

Just like the equivalent code in null_blk.

>
>		num -= n;
>		lba += n;
> @@ -4731,14 +4744,8 @@ static void zbc_finish_zone(struct sdebug_dev_info *devip,
>	enum sdebug_z_cond zc = zsp->z_cond;
>
>	if (zc == ZC4_CLOSED || zc == ZC2_IMPLICIT_OPEN ||
> -	    zc == ZC3_EXPLICIT_OPEN || (empty && zc == ZC1_EMPTY)) {
> -		if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
> -			zbc_close_zone(devip, zsp);
> -		if (zsp->z_cond == ZC4_CLOSED)
> -			devip->nr_closed--;
> -		zsp->z_wp = zsp->z_start + zsp->z_size;
> -		zsp->z_cond = ZC5_FULL;
> -	}
> +	    zc == ZC3_EXPLICIT_OPEN || (empty && zc == ZC1_EMPTY))
> +		zbc_set_zone_full(devip, zsp);

In the equivalent function (null_finish_zone()) in null_blk,
we instead do this:

	switch (zone->cond) {
	case BLK_ZONE_COND_FULL:
		/* finish operation on full is not an error */
		goto unlock;
	case BLK_ZONE_COND_EMPTY:
		ret = null_check_zone_resources(dev, zone);
		if (ret != BLK_STS_OK)
			goto unlock;
		break;
	case BLK_ZONE_COND_IMP_OPEN:
		dev->nr_zones_imp_open--;
		break;
	case BLK_ZONE_COND_EXP_OPEN:
		dev->nr_zones_exp_open--;
		break;
	case BLK_ZONE_COND_CLOSED:
		ret = null_check_zone_resources(dev, zone);
		if (ret != BLK_STS_OK)
			goto unlock;
		dev->nr_zones_closed--;
		break;
	default:
		ret = BLK_STS_IOERR;
		goto unlock;
	}

This might be a bit more verbose, but isn't it more clear to implement it in
this way, since the spec (ZBC) defines the transition for each zone state.
That way, it is easier to follow that each zone transition follows the spec.

I realize that the equivalent for null_check_zone_resources() in scsi debug,
is check_zbc_access_params().

So for scsi debug, the equivalent call to null_check_zone_resources() has
already been done elsewhere, and can be skipped, but other than that, the
code should be able to look the same as null_blk.

Doing so also avoids the unnecessary temporary transition to CLOSED, and
increasing + decreasing the nr_closed counter, before transitioning to FULL.

The reason why I don't really like this, is that ZBC does not mention that
finishing a zone temporarily transitions to close, so why should the scsi
debug code do so, especially when the the null_blk code shows that it is
not needed.


Kind regards,
Niklas
Damien Le Moal June 7, 2022, 9:25 a.m. UTC | #2
On 6/7/22 18:16, Niklas Cassel wrote:
> On Tue, Jun 07, 2022 at 10:49:42AM +0900, Damien Le Moal wrote:
>> When a write command to a sequential write required or sequential write
>> preferred zone result in the zone write pointer reaching the end of the
>> zone, the zone condition must be set to full AND the number of
>> implicitly or explicitly open zones updated to have a correct accounting
>> for zone resources. However, the function zbc_inc_wp() only sets the
>> zone condition to full without updating the open zone counters,
>> resulting in a zone state machine breakage.
>>
>> Factor out the correct code from zbc_finish_zone() to transition a zone
>> to the full condition and introduce the helper zbc_set_zone_full(). Use
>> this helper in zbc_finish_zone() and zbc_inc_wp() to correctly
>> transition zones to the full condition.
>>
>> Fixes: 0d1cf9378bd4 ("scsi: scsi_debug: Add ZBC zone commands")
>> Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
>> ---
>>   drivers/scsi/scsi_debug.c | 27 +++++++++++++++++----------
>>   1 file changed, 17 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
>> index 1f423f723d06..6c2bb02a42d8 100644
>> --- a/drivers/scsi/scsi_debug.c
>> +++ b/drivers/scsi/scsi_debug.c
>> @@ -2826,6 +2826,19 @@ static void zbc_open_zone(struct sdebug_dev_info *devip,
>> 	}
>>   }
>>
>> +static inline void zbc_set_zone_full(struct sdebug_dev_info *devip,
>> +				     struct sdeb_zone_state *zsp)
>> +{
>> +	enum sdebug_z_cond zc = zsp->z_cond;
>> +
>> +	if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
>> +		zbc_close_zone(devip, zsp);
>> +	if (zsp->z_cond == ZC4_CLOSED)
>> +		devip->nr_closed--;
>> +	zsp->z_wp = zsp->z_start + zsp->z_size;
>> +	zsp->z_cond = ZC5_FULL;
>> +}
>> +
>>   static void zbc_inc_wp(struct sdebug_dev_info *devip,
>> 		       unsigned long long lba, unsigned int num)
>>   {
>> @@ -2838,7 +2851,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
>> 	if (zsp->z_type == ZBC_ZTYPE_SWR) {
>> 		zsp->z_wp += num;
>> 		if (zsp->z_wp >= zend)
>> -			zsp->z_cond = ZC5_FULL;
>> +			zbc_set_zone_full(devip, zsp);
>> 		return;
>> 	}
>>
>> @@ -2857,7 +2870,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
>> 			n = num;
>> 		}
>> 		if (zsp->z_wp >= zend)
>> -			zsp->z_cond = ZC5_FULL;
>> +			zbc_set_zone_full(devip, zsp);
> 
> Hello Damien,
> 
> In the equivalent function (null_zone_write()) in null_blk,
> we instead do this:
> 
> 	if (zone->wp == zone->start + zone->capacity) {
> 		null_lock_zone_res(dev);
> 		if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
> 			dev->nr_zones_exp_open--;
> 		else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
> 			dev->nr_zones_imp_open--;
> 		zone->cond = BLK_ZONE_COND_FULL;
> 		null_unlock_zone_res(dev);
> 	}
> 
> Isn't it more clear to do the same here?
> i.e. set the state to FULL, like before, and simply decrease the
> imp/exp open counters.
> 
> zbc_set_zone_full() does some things that are not applicable in
> the write path, specifically this:
>> +     if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
>> +             zbc_close_zone(devip, zsp);
>> +     if (zsp->z_cond == ZC4_CLOSED)
>> +             devip->nr_closed--;
> 
> e.g. with this new helper, if we are in e.g. IMP OPEN, we will now
> set the zone state first to CLOSED, increase the nr_closed counter,
> decrease the nr_closed counter, and then set the zone state to FULL.

Yes. I am aware of this. It is indeed a bit inefficient, but this makes 
for a simple bug fix by covering all call sites (finish and write). If 
you look at zbc_rwp_zone() for zone reset, something similar end up 
being done, the closed condition is used as an intermediate one. So that 
one should be cleaned up too.

We should improve this, but I think this should be done in a followup 
patch(es) and I prefer to keep this bug fix patch small.
Unless you insist :)

> 
> Isn't it more clear to just set it to FULL directly, like before,
> and simply add:
> 		if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
> 			dev->nr_zones_exp_open--;
> 		else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
> 			dev->nr_zones_imp_open--;
> 
> Just like the equivalent code in null_blk.
> 
>>
>> 		num -= n;
>> 		lba += n;
>> @@ -4731,14 +4744,8 @@ static void zbc_finish_zone(struct sdebug_dev_info *devip,
>> 	enum sdebug_z_cond zc = zsp->z_cond;
>>
>> 	if (zc == ZC4_CLOSED || zc == ZC2_IMPLICIT_OPEN ||
>> -	    zc == ZC3_EXPLICIT_OPEN || (empty && zc == ZC1_EMPTY)) {
>> -		if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
>> -			zbc_close_zone(devip, zsp);
>> -		if (zsp->z_cond == ZC4_CLOSED)
>> -			devip->nr_closed--;
>> -		zsp->z_wp = zsp->z_start + zsp->z_size;
>> -		zsp->z_cond = ZC5_FULL;
>> -	}
>> +	    zc == ZC3_EXPLICIT_OPEN || (empty && zc == ZC1_EMPTY))
>> +		zbc_set_zone_full(devip, zsp);
> 
> In the equivalent function (null_finish_zone()) in null_blk,
> we instead do this:
> 
> 	switch (zone->cond) {
> 	case BLK_ZONE_COND_FULL:
> 		/* finish operation on full is not an error */
> 		goto unlock;
> 	case BLK_ZONE_COND_EMPTY:
> 		ret = null_check_zone_resources(dev, zone);
> 		if (ret != BLK_STS_OK)
> 			goto unlock;
> 		break;
> 	case BLK_ZONE_COND_IMP_OPEN:
> 		dev->nr_zones_imp_open--;
> 		break;
> 	case BLK_ZONE_COND_EXP_OPEN:
> 		dev->nr_zones_exp_open--;
> 		break;
> 	case BLK_ZONE_COND_CLOSED:
> 		ret = null_check_zone_resources(dev, zone);
> 		if (ret != BLK_STS_OK)
> 			goto unlock;
> 		dev->nr_zones_closed--;
> 		break;
> 	default:
> 		ret = BLK_STS_IOERR;
> 		goto unlock;
> 	}
> 
> This might be a bit more verbose, but isn't it more clear to implement it in
> this way, since the spec (ZBC) defines the transition for each zone state.
> That way, it is easier to follow that each zone transition follows the spec.
> 
> I realize that the equivalent for null_check_zone_resources() in scsi debug,
> is check_zbc_access_params().
> 
> So for scsi debug, the equivalent call to null_check_zone_resources() has
> already been done elsewhere, and can be skipped, but other than that, the
> code should be able to look the same as null_blk.
> 
> Doing so also avoids the unnecessary temporary transition to CLOSED, and
> increasing + decreasing the nr_closed counter, before transitioning to FULL.
> 
> The reason why I don't really like this, is that ZBC does not mention that
> finishing a zone temporarily transitions to close, so why should the scsi
> debug code do so, especially when the the null_blk code shows that it is
> not needed.

Agree. That use of closed condition as an intermediate state is useless. 
It does make the code much shorter, but not more efficient. We should 
clean this in followup patches.

> 
> 
> Kind regards,
> Niklas
Niklas Cassel June 7, 2022, 9:37 a.m. UTC | #3
On Tue, Jun 07, 2022 at 06:25:45PM +0900, Damien Le Moal wrote:
> On 6/7/22 18:16, Niklas Cassel wrote:
> > On Tue, Jun 07, 2022 at 10:49:42AM +0900, Damien Le Moal wrote:
> > > When a write command to a sequential write required or sequential write
> > > preferred zone result in the zone write pointer reaching the end of the
> > > zone, the zone condition must be set to full AND the number of
> > > implicitly or explicitly open zones updated to have a correct accounting
> > > for zone resources. However, the function zbc_inc_wp() only sets the
> > > zone condition to full without updating the open zone counters,
> > > resulting in a zone state machine breakage.
> > >
> > > Factor out the correct code from zbc_finish_zone() to transition a zone
> > > to the full condition and introduce the helper zbc_set_zone_full(). Use
> > > this helper in zbc_finish_zone() and zbc_inc_wp() to correctly
> > > transition zones to the full condition.
> > >
> > > Fixes: 0d1cf9378bd4 ("scsi: scsi_debug: Add ZBC zone commands")
> > > Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> > > ---
> > >   drivers/scsi/scsi_debug.c | 27 +++++++++++++++++----------
> > >   1 file changed, 17 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
> > > index 1f423f723d06..6c2bb02a42d8 100644
> > > --- a/drivers/scsi/scsi_debug.c
> > > +++ b/drivers/scsi/scsi_debug.c
> > > @@ -2826,6 +2826,19 @@ static void zbc_open_zone(struct sdebug_dev_info *devip,
> > >	}
> > >   }
> > >
> > > +static inline void zbc_set_zone_full(struct sdebug_dev_info *devip,
> > > +				     struct sdeb_zone_state *zsp)
> > > +{
> > > +	enum sdebug_z_cond zc = zsp->z_cond;
> > > +
> > > +	if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
> > > +		zbc_close_zone(devip, zsp);
> > > +	if (zsp->z_cond == ZC4_CLOSED)
> > > +		devip->nr_closed--;
> > > +	zsp->z_wp = zsp->z_start + zsp->z_size;
> > > +	zsp->z_cond = ZC5_FULL;
> > > +}
> > > +
> > >   static void zbc_inc_wp(struct sdebug_dev_info *devip,
> > >		       unsigned long long lba, unsigned int num)
> > >   {
> > > @@ -2838,7 +2851,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
> > >	if (zsp->z_type == ZBC_ZTYPE_SWR) {
> > >		zsp->z_wp += num;
> > >		if (zsp->z_wp >= zend)
> > > -			zsp->z_cond = ZC5_FULL;
> > > +			zbc_set_zone_full(devip, zsp);
> > >		return;
> > >	}
> > >
> > > @@ -2857,7 +2870,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
> > >			n = num;
> > >		}
> > >		if (zsp->z_wp >= zend)
> > > -			zsp->z_cond = ZC5_FULL;
> > > +			zbc_set_zone_full(devip, zsp);
> >
> > Hello Damien,
> >
> > In the equivalent function (null_zone_write()) in null_blk,
> > we instead do this:
> >
> >	if (zone->wp == zone->start + zone->capacity) {
> >		null_lock_zone_res(dev);
> >		if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
> >			dev->nr_zones_exp_open--;
> >		else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
> >			dev->nr_zones_imp_open--;
> >		zone->cond = BLK_ZONE_COND_FULL;
> >		null_unlock_zone_res(dev);
> >	}
> >
> > Isn't it more clear to do the same here?
> > i.e. set the state to FULL, like before, and simply decrease the
> > imp/exp open counters.
> >
> > zbc_set_zone_full() does some things that are not applicable in
> > the write path, specifically this:
> > > +     if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
> > > +             zbc_close_zone(devip, zsp);
> > > +     if (zsp->z_cond == ZC4_CLOSED)
> > > +             devip->nr_closed--;
> >
> > e.g. with this new helper, if we are in e.g. IMP OPEN, we will now
> > set the zone state first to CLOSED, increase the nr_closed counter,
> > decrease the nr_closed counter, and then set the zone state to FULL.
>
> Yes. I am aware of this. It is indeed a bit inefficient, but this makes for
> a simple bug fix by covering all call sites (finish and write). If you look
> at zbc_rwp_zone() for zone reset, something similar end up being done, the
> closed condition is used as an intermediate one. So that one should be
> cleaned up too.
>
> We should improve this, but I think this should be done in a followup
> patch(es) and I prefer to keep this bug fix patch small.
> Unless you insist :)

I just saw that zbc_rwp_zone() does the same after sending my email.

I also saw that zbc_close_zone() does a:

	if (!zbc_zone_is_seq(zsp))
		return;

(Although I don't see a similar check in zbc_finish_zone())

So one has to ensure that both SWR and SWP are still handled correctly
when doing this cleanup, so considering that this fix solves the problem,
it is probably better to leave the cleanup to remove the extra (and at
least in my opinion, confusing) state transition in a follow up series.

Therefore:
Reviewed-by: Niklas Cassel <niklas.cassel@wdc.com>
Damien Le Moal June 7, 2022, 9:40 a.m. UTC | #4
On 6/7/22 18:37, Niklas Cassel wrote:
> On Tue, Jun 07, 2022 at 06:25:45PM +0900, Damien Le Moal wrote:
>> On 6/7/22 18:16, Niklas Cassel wrote:
>>> On Tue, Jun 07, 2022 at 10:49:42AM +0900, Damien Le Moal wrote:
>>>> When a write command to a sequential write required or sequential write
>>>> preferred zone result in the zone write pointer reaching the end of the
>>>> zone, the zone condition must be set to full AND the number of
>>>> implicitly or explicitly open zones updated to have a correct accounting
>>>> for zone resources. However, the function zbc_inc_wp() only sets the
>>>> zone condition to full without updating the open zone counters,
>>>> resulting in a zone state machine breakage.
>>>>
>>>> Factor out the correct code from zbc_finish_zone() to transition a zone
>>>> to the full condition and introduce the helper zbc_set_zone_full(). Use
>>>> this helper in zbc_finish_zone() and zbc_inc_wp() to correctly
>>>> transition zones to the full condition.
>>>>
>>>> Fixes: 0d1cf9378bd4 ("scsi: scsi_debug: Add ZBC zone commands")
>>>> Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
>>>> ---
>>>>    drivers/scsi/scsi_debug.c | 27 +++++++++++++++++----------
>>>>    1 file changed, 17 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
>>>> index 1f423f723d06..6c2bb02a42d8 100644
>>>> --- a/drivers/scsi/scsi_debug.c
>>>> +++ b/drivers/scsi/scsi_debug.c
>>>> @@ -2826,6 +2826,19 @@ static void zbc_open_zone(struct sdebug_dev_info *devip,
>>>> 	}
>>>>    }
>>>>
>>>> +static inline void zbc_set_zone_full(struct sdebug_dev_info *devip,
>>>> +				     struct sdeb_zone_state *zsp)
>>>> +{
>>>> +	enum sdebug_z_cond zc = zsp->z_cond;
>>>> +
>>>> +	if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
>>>> +		zbc_close_zone(devip, zsp);
>>>> +	if (zsp->z_cond == ZC4_CLOSED)
>>>> +		devip->nr_closed--;
>>>> +	zsp->z_wp = zsp->z_start + zsp->z_size;
>>>> +	zsp->z_cond = ZC5_FULL;
>>>> +}
>>>> +
>>>>    static void zbc_inc_wp(struct sdebug_dev_info *devip,
>>>> 		       unsigned long long lba, unsigned int num)
>>>>    {
>>>> @@ -2838,7 +2851,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
>>>> 	if (zsp->z_type == ZBC_ZTYPE_SWR) {
>>>> 		zsp->z_wp += num;
>>>> 		if (zsp->z_wp >= zend)
>>>> -			zsp->z_cond = ZC5_FULL;
>>>> +			zbc_set_zone_full(devip, zsp);
>>>> 		return;
>>>> 	}
>>>>
>>>> @@ -2857,7 +2870,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
>>>> 			n = num;
>>>> 		}
>>>> 		if (zsp->z_wp >= zend)
>>>> -			zsp->z_cond = ZC5_FULL;
>>>> +			zbc_set_zone_full(devip, zsp);
>>>
>>> Hello Damien,
>>>
>>> In the equivalent function (null_zone_write()) in null_blk,
>>> we instead do this:
>>>
>>> 	if (zone->wp == zone->start + zone->capacity) {
>>> 		null_lock_zone_res(dev);
>>> 		if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
>>> 			dev->nr_zones_exp_open--;
>>> 		else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
>>> 			dev->nr_zones_imp_open--;
>>> 		zone->cond = BLK_ZONE_COND_FULL;
>>> 		null_unlock_zone_res(dev);
>>> 	}
>>>
>>> Isn't it more clear to do the same here?
>>> i.e. set the state to FULL, like before, and simply decrease the
>>> imp/exp open counters.
>>>
>>> zbc_set_zone_full() does some things that are not applicable in
>>> the write path, specifically this:
>>>> +     if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
>>>> +             zbc_close_zone(devip, zsp);
>>>> +     if (zsp->z_cond == ZC4_CLOSED)
>>>> +             devip->nr_closed--;
>>>
>>> e.g. with this new helper, if we are in e.g. IMP OPEN, we will now
>>> set the zone state first to CLOSED, increase the nr_closed counter,
>>> decrease the nr_closed counter, and then set the zone state to FULL.
>>
>> Yes. I am aware of this. It is indeed a bit inefficient, but this makes for
>> a simple bug fix by covering all call sites (finish and write). If you look
>> at zbc_rwp_zone() for zone reset, something similar end up being done, the
>> closed condition is used as an intermediate one. So that one should be
>> cleaned up too.
>>
>> We should improve this, but I think this should be done in a followup
>> patch(es) and I prefer to keep this bug fix patch small.
>> Unless you insist :)
> 
> I just saw that zbc_rwp_zone() does the same after sending my email.
> 
> I also saw that zbc_close_zone() does a:
> 
> 	if (!zbc_zone_is_seq(zsp))
> 		return;
> 
> (Although I don't see a similar check in zbc_finish_zone())
> 
> So one has to ensure that both SWR and SWP are still handled correctly
> when doing this cleanup, so considering that this fix solves the problem,
> it is probably better to leave the cleanup to remove the extra (and at
> least in my opinion, confusing) state transition in a follow up series.
> 
> Therefore:
> Reviewed-by: Niklas Cassel <niklas.cassel@wdc.com>

Thanks. Could do a fix like this which avoids the closed intermediate state:

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 1f423f723d06..6ff4e03ad521 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -2826,6 +2826,27 @@ static void zbc_open_zone(struct sdebug_dev_info 
*devip,
  	}
  }

+static void zbc_set_zone_full(struct sdebug_dev_info *devip,
+			      struct sdeb_zone_state *zsp)
+{
+	switch (zsp->z_cond) {
+	case ZC2_IMPLICIT_OPEN:
+		devip->nr_imp_open--;
+		break;
+	case ZC3_EXPLICIT_OPEN:
+		devip->nr_exp_open--;
+		break;
+	case ZC4_CLOSED:
+		devip->nr_closed--;
+		break;
+	default:
+		break;
+	}
+
+	zsp->z_wp = zsp->z_start + zsp->z_size;
+	zsp->z_cond = ZC5_FULL;
+}
+
  static void zbc_inc_wp(struct sdebug_dev_info *devip,
  		       unsigned long long lba, unsigned int num)
  {
@@ -2838,7 +2859,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
  	if (zsp->z_type == ZBC_ZTYPE_SWR) {
  		zsp->z_wp += num;
  		if (zsp->z_wp >= zend)
-			zsp->z_cond = ZC5_FULL;
+			zbc_set_zone_full(devip, zsp);
  		return;
  	}

@@ -2857,7 +2878,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
  			n = num;
  		}
  		if (zsp->z_wp >= zend)
-			zsp->z_cond = ZC5_FULL;
+			zbc_set_zone_full(devip, zsp);

  		num -= n;
  		lba += n;
@@ -4731,14 +4752,8 @@ static void zbc_finish_zone(struct 
sdebug_dev_info *devip,
  	enum sdebug_z_cond zc = zsp->z_cond;

  	if (zc == ZC4_CLOSED || zc == ZC2_IMPLICIT_OPEN ||
-	    zc == ZC3_EXPLICIT_OPEN || (empty && zc == ZC1_EMPTY)) {
-		if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
-			zbc_close_zone(devip, zsp);
-		if (zsp->z_cond == ZC4_CLOSED)
-			devip->nr_closed--;
-		zsp->z_wp = zsp->z_start + zsp->z_size;
-		zsp->z_cond = ZC5_FULL;
-	}
+	    zc == ZC3_EXPLICIT_OPEN || (empty && zc == ZC1_EMPTY))
+		zbc_set_zone_full(devip, zsp);
  }

  static void zbc_finish_all(struct sdebug_dev_info *devip)

Can send a v2 with that. Tested. It works fine too.
Niklas Cassel June 7, 2022, 10:05 a.m. UTC | #5
On Tue, Jun 07, 2022 at 06:40:14PM +0900, Damien Le Moal wrote:
> On 6/7/22 18:37, Niklas Cassel wrote:
> > On Tue, Jun 07, 2022 at 06:25:45PM +0900, Damien Le Moal wrote:
> > > On 6/7/22 18:16, Niklas Cassel wrote:
> > > > On Tue, Jun 07, 2022 at 10:49:42AM +0900, Damien Le Moal wrote:
> > > > > When a write command to a sequential write required or sequential write
> > > > > preferred zone result in the zone write pointer reaching the end of the
> > > > > zone, the zone condition must be set to full AND the number of
> > > > > implicitly or explicitly open zones updated to have a correct accounting
> > > > > for zone resources. However, the function zbc_inc_wp() only sets the
> > > > > zone condition to full without updating the open zone counters,
> > > > > resulting in a zone state machine breakage.
> > > > > 
> > > > > Factor out the correct code from zbc_finish_zone() to transition a zone
> > > > > to the full condition and introduce the helper zbc_set_zone_full(). Use
> > > > > this helper in zbc_finish_zone() and zbc_inc_wp() to correctly
> > > > > transition zones to the full condition.
> > > > > 
> > > > > Fixes: 0d1cf9378bd4 ("scsi: scsi_debug: Add ZBC zone commands")
> > > > > Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> > > > > ---
> > > > >    drivers/scsi/scsi_debug.c | 27 +++++++++++++++++----------
> > > > >    1 file changed, 17 insertions(+), 10 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
> > > > > index 1f423f723d06..6c2bb02a42d8 100644
> > > > > --- a/drivers/scsi/scsi_debug.c
> > > > > +++ b/drivers/scsi/scsi_debug.c
> > > > > @@ -2826,6 +2826,19 @@ static void zbc_open_zone(struct sdebug_dev_info *devip,
> > > > > 	}
> > > > >    }
> > > > > 
> > > > > +static inline void zbc_set_zone_full(struct sdebug_dev_info *devip,
> > > > > +				     struct sdeb_zone_state *zsp)
> > > > > +{
> > > > > +	enum sdebug_z_cond zc = zsp->z_cond;
> > > > > +
> > > > > +	if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
> > > > > +		zbc_close_zone(devip, zsp);
> > > > > +	if (zsp->z_cond == ZC4_CLOSED)
> > > > > +		devip->nr_closed--;
> > > > > +	zsp->z_wp = zsp->z_start + zsp->z_size;
> > > > > +	zsp->z_cond = ZC5_FULL;
> > > > > +}
> > > > > +
> > > > >    static void zbc_inc_wp(struct sdebug_dev_info *devip,
> > > > > 		       unsigned long long lba, unsigned int num)
> > > > >    {
> > > > > @@ -2838,7 +2851,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
> > > > > 	if (zsp->z_type == ZBC_ZTYPE_SWR) {
> > > > > 		zsp->z_wp += num;
> > > > > 		if (zsp->z_wp >= zend)
> > > > > -			zsp->z_cond = ZC5_FULL;
> > > > > +			zbc_set_zone_full(devip, zsp);
> > > > > 		return;
> > > > > 	}
> > > > > 
> > > > > @@ -2857,7 +2870,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
> > > > > 			n = num;
> > > > > 		}
> > > > > 		if (zsp->z_wp >= zend)
> > > > > -			zsp->z_cond = ZC5_FULL;
> > > > > +			zbc_set_zone_full(devip, zsp);
> > > > 
> > > > Hello Damien,
> > > > 
> > > > In the equivalent function (null_zone_write()) in null_blk,
> > > > we instead do this:
> > > > 
> > > > 	if (zone->wp == zone->start + zone->capacity) {
> > > > 		null_lock_zone_res(dev);
> > > > 		if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
> > > > 			dev->nr_zones_exp_open--;
> > > > 		else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
> > > > 			dev->nr_zones_imp_open--;
> > > > 		zone->cond = BLK_ZONE_COND_FULL;
> > > > 		null_unlock_zone_res(dev);
> > > > 	}
> > > > 
> > > > Isn't it more clear to do the same here?
> > > > i.e. set the state to FULL, like before, and simply decrease the
> > > > imp/exp open counters.
> > > > 
> > > > zbc_set_zone_full() does some things that are not applicable in
> > > > the write path, specifically this:
> > > > > +     if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
> > > > > +             zbc_close_zone(devip, zsp);
> > > > > +     if (zsp->z_cond == ZC4_CLOSED)
> > > > > +             devip->nr_closed--;
> > > > 
> > > > e.g. with this new helper, if we are in e.g. IMP OPEN, we will now
> > > > set the zone state first to CLOSED, increase the nr_closed counter,
> > > > decrease the nr_closed counter, and then set the zone state to FULL.
> > > 
> > > Yes. I am aware of this. It is indeed a bit inefficient, but this makes for
> > > a simple bug fix by covering all call sites (finish and write). If you look
> > > at zbc_rwp_zone() for zone reset, something similar end up being done, the
> > > closed condition is used as an intermediate one. So that one should be
> > > cleaned up too.
> > > 
> > > We should improve this, but I think this should be done in a followup
> > > patch(es) and I prefer to keep this bug fix patch small.
> > > Unless you insist :)
> > 
> > I just saw that zbc_rwp_zone() does the same after sending my email.
> > 
> > I also saw that zbc_close_zone() does a:
> > 
> > 	if (!zbc_zone_is_seq(zsp))
> > 		return;
> > 
> > (Although I don't see a similar check in zbc_finish_zone())
> > 
> > So one has to ensure that both SWR and SWP are still handled correctly
> > when doing this cleanup, so considering that this fix solves the problem,
> > it is probably better to leave the cleanup to remove the extra (and at
> > least in my opinion, confusing) state transition in a follow up series.
> > 
> > Therefore:
> > Reviewed-by: Niklas Cassel <niklas.cassel@wdc.com>
> 
> Thanks. Could do a fix like this which avoids the closed intermediate state:
> 
> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
> index 1f423f723d06..6ff4e03ad521 100644
> --- a/drivers/scsi/scsi_debug.c
> +++ b/drivers/scsi/scsi_debug.c
> @@ -2826,6 +2826,27 @@ static void zbc_open_zone(struct sdebug_dev_info
> *devip,
>  	}
>  }
> 
> +static void zbc_set_zone_full(struct sdebug_dev_info *devip,
> +			      struct sdeb_zone_state *zsp)
> +{
> +	switch (zsp->z_cond) {
> +	case ZC2_IMPLICIT_OPEN:
> +		devip->nr_imp_open--;
> +		break;
> +	case ZC3_EXPLICIT_OPEN:
> +		devip->nr_exp_open--;
> +		break;
> +	case ZC4_CLOSED:
> +		devip->nr_closed--;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	zsp->z_wp = zsp->z_start + zsp->z_size;
> +	zsp->z_cond = ZC5_FULL;
> +}
> +
>  static void zbc_inc_wp(struct sdebug_dev_info *devip,
>  		       unsigned long long lba, unsigned int num)
>  {
> @@ -2838,7 +2859,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
>  	if (zsp->z_type == ZBC_ZTYPE_SWR) {
>  		zsp->z_wp += num;
>  		if (zsp->z_wp >= zend)
> -			zsp->z_cond = ZC5_FULL;
> +			zbc_set_zone_full(devip, zsp);
>  		return;
>  	}
> 
> @@ -2857,7 +2878,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip,
>  			n = num;
>  		}
>  		if (zsp->z_wp >= zend)
> -			zsp->z_cond = ZC5_FULL;
> +			zbc_set_zone_full(devip, zsp);
> 
>  		num -= n;
>  		lba += n;
> @@ -4731,14 +4752,8 @@ static void zbc_finish_zone(struct sdebug_dev_info
> *devip,
>  	enum sdebug_z_cond zc = zsp->z_cond;
> 
>  	if (zc == ZC4_CLOSED || zc == ZC2_IMPLICIT_OPEN ||
> -	    zc == ZC3_EXPLICIT_OPEN || (empty && zc == ZC1_EMPTY)) {
> -		if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
> -			zbc_close_zone(devip, zsp);
> -		if (zsp->z_cond == ZC4_CLOSED)
> -			devip->nr_closed--;
> -		zsp->z_wp = zsp->z_start + zsp->z_size;
> -		zsp->z_cond = ZC5_FULL;
> -	}
> +	    zc == ZC3_EXPLICIT_OPEN || (empty && zc == ZC1_EMPTY))
> +		zbc_set_zone_full(devip, zsp);
>  }
> 
>  static void zbc_finish_all(struct sdebug_dev_info *devip)
> 
> Can send a v2 with that. Tested. It works fine too.

Looks good to me.

But if you send it, I think that you should send a 2/2 that cleans up
zbc_rwp_zone() as well, so that we remove the intermediate closed state
everywhere.

The reason why I really disliked the intermediate closed state when
transitioning to FULL/EMPTY, is because it is not in the spec.

For closed and empty zones, we do have an intermediate implicit open state,
e.g. for a finish zone operation, both in the spec and in the code
(check_zbc_access_params()), so this intermediate state was represented in
the code for a reason.

Someone reading the code could thus think that the intermediate closed state
also exists for a reason (is described in the spec), but that isn't the case.


Kind regards,
Niklas
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 1f423f723d06..6c2bb02a42d8 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -2826,6 +2826,19 @@  static void zbc_open_zone(struct sdebug_dev_info *devip,
 	}
 }
 
+static inline void zbc_set_zone_full(struct sdebug_dev_info *devip,
+				     struct sdeb_zone_state *zsp)
+{
+	enum sdebug_z_cond zc = zsp->z_cond;
+
+	if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
+		zbc_close_zone(devip, zsp);
+	if (zsp->z_cond == ZC4_CLOSED)
+		devip->nr_closed--;
+	zsp->z_wp = zsp->z_start + zsp->z_size;
+	zsp->z_cond = ZC5_FULL;
+}
+
 static void zbc_inc_wp(struct sdebug_dev_info *devip,
 		       unsigned long long lba, unsigned int num)
 {
@@ -2838,7 +2851,7 @@  static void zbc_inc_wp(struct sdebug_dev_info *devip,
 	if (zsp->z_type == ZBC_ZTYPE_SWR) {
 		zsp->z_wp += num;
 		if (zsp->z_wp >= zend)
-			zsp->z_cond = ZC5_FULL;
+			zbc_set_zone_full(devip, zsp);
 		return;
 	}
 
@@ -2857,7 +2870,7 @@  static void zbc_inc_wp(struct sdebug_dev_info *devip,
 			n = num;
 		}
 		if (zsp->z_wp >= zend)
-			zsp->z_cond = ZC5_FULL;
+			zbc_set_zone_full(devip, zsp);
 
 		num -= n;
 		lba += n;
@@ -4731,14 +4744,8 @@  static void zbc_finish_zone(struct sdebug_dev_info *devip,
 	enum sdebug_z_cond zc = zsp->z_cond;
 
 	if (zc == ZC4_CLOSED || zc == ZC2_IMPLICIT_OPEN ||
-	    zc == ZC3_EXPLICIT_OPEN || (empty && zc == ZC1_EMPTY)) {
-		if (zc == ZC2_IMPLICIT_OPEN || zc == ZC3_EXPLICIT_OPEN)
-			zbc_close_zone(devip, zsp);
-		if (zsp->z_cond == ZC4_CLOSED)
-			devip->nr_closed--;
-		zsp->z_wp = zsp->z_start + zsp->z_size;
-		zsp->z_cond = ZC5_FULL;
-	}
+	    zc == ZC3_EXPLICIT_OPEN || (empty && zc == ZC1_EMPTY))
+		zbc_set_zone_full(devip, zsp);
 }
 
 static void zbc_finish_all(struct sdebug_dev_info *devip)