diff mbox series

lightnvm: pblk: extend line wp balance check

Message ID 20190129084737.718-1-hans@owltronix.com (mailing list archive)
State New, archived
Headers show
Series lightnvm: pblk: extend line wp balance check | expand

Commit Message

Hans Holmberg Jan. 29, 2019, 8:47 a.m. UTC
From: Hans Holmberg <hans.holmberg@cnexlabs.com>

pblk stripes writes of minimal write size across all non-offline chunks
in a line, which means that the maximum write pointer delta should not
exceed the minimal write size. Extend the line write pointer balance check
to cover this case.

Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
---

This patch applies on top of Zhoujie's V3 of 
"lightnvm: pblk: ignore bad block wp for pblk_line_wp_is_unbalanced

 drivers/lightnvm/pblk-recovery.c | 60 ++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 23 deletions(-)

Comments

Javier González Jan. 29, 2019, 11:19 a.m. UTC | #1
> On 29 Jan 2019, at 09.47, hans@owltronix.com wrote:
> 
> From: Hans Holmberg <hans.holmberg@cnexlabs.com>
> 
> pblk stripes writes of minimal write size across all non-offline chunks
> in a line, which means that the maximum write pointer delta should not
> exceed the minimal write size. Extend the line write pointer balance check
> to cover this case.
> 
> Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
> ---
> 
> This patch applies on top of Zhoujie's V3 of
> "lightnvm: pblk: ignore bad block wp for pblk_line_wp_is_unbalanced
> 
> drivers/lightnvm/pblk-recovery.c | 60 ++++++++++++++++++++------------
> 1 file changed, 37 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
> index 02d466e6925e..d86f580036d3 100644
> --- a/drivers/lightnvm/pblk-recovery.c
> +++ b/drivers/lightnvm/pblk-recovery.c
> @@ -302,41 +302,55 @@ static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
> 	return (distance > line->left_msecs) ? line->left_msecs : distance;
> }
> 
> -static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
> -				      struct pblk_line *line)
> +/* Return a chunk belonging to a line by stripe(write order) index */
> +static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
> +						  struct pblk_line *line,
> +						  int index)
> {
> 	struct nvm_tgt_dev *dev = pblk->dev;
> 	struct nvm_geo *geo = &dev->geo;
> -	struct pblk_line_meta *lm = &pblk->lm;
> 	struct pblk_lun *rlun;
> -	struct nvm_chk_meta *chunk;
> 	struct ppa_addr ppa;
> -	u64 line_wp;
> -	int pos, i, bit;
> +	int pos;
> 
> -	bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
> -	if (bit >= lm->blk_per_line)
> -		return 0;
> -	rlun = &pblk->luns[bit];
> +	rlun = &pblk->luns[index];
> 	ppa = rlun->bppa;
> 	pos = pblk_ppa_to_pos(geo, ppa);
> -	chunk = &line->chks[pos];
> 
> -	line_wp = chunk->wp;
> +	return &line->chks[pos];
> +}
> 
> -	for (i = bit + 1; i < lm->blk_per_line; i++) {
> -		rlun = &pblk->luns[i];
> -		ppa = rlun->bppa;
> -		pos = pblk_ppa_to_pos(geo, ppa);
> -		chunk = &line->chks[pos];
> +static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
> +				      struct pblk_line *line)
> +{
> +	struct pblk_line_meta *lm = &pblk->lm;
> +	int blk_in_line = lm->blk_per_line;
> +	struct nvm_chk_meta *chunk;
> +	u64 max_wp, min_wp;
> +	int i;
> 
> -		if (chunk->state & NVM_CHK_ST_OFFLINE)
> -			continue;
> +	i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
> +
> +	/* If there is one or zero good chunks in the line,
> +	 * the write pointers can't be unbalanced.
> +	 */
> +	if (i >= (blk_in_line - 1))
> +		return 0;
> 
> -		if (chunk->wp > line_wp)
> +	chunk = pblk_get_stripe_chunk(pblk, line, i);
> +	max_wp = chunk->wp;
> +	if (max_wp > pblk->max_write_pgs)
> +		min_wp = max_wp - pblk->max_write_pgs;
> +	else
> +		min_wp = 0;
> +
> +	i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
> +	while (i < blk_in_line) {
> +		chunk = pblk_get_stripe_chunk(pblk, line, i);
> +		if (chunk->wp > max_wp || chunk->wp < min_wp)
> 			return 1;
> -		else if (chunk->wp < line_wp)
> -			line_wp = chunk->wp;
> +
> +		i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
> 	}
> 
> 	return 0;
> @@ -362,7 +376,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> 	int ret;
> 	u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
> 
> -	if (pblk_line_wp_is_unbalanced(pblk, line))
> +	if (pblk_line_wps_are_unbalanced(pblk, line))
> 		pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
> 
> 	ppa_list = p.ppa_list;
> --
> 2.17.1

If I am understanding correctly, you want to protect against the case
where a pfail has broken the ws_min partition of a chunk, right? I say
this because there is a guarantee that the minimal write size and pblk's
write size align with ws_min and ws_opt. Even if we have grown bad
blocks on pfail for the current line (which is a bigger problem because
we have potentially lost data), this guarantee would remain.

If this is the case, my first reaction would be to say that the
controller is responsible for guaranteeing atomicity for both scalar and
vector I/Os. If this is not guaranteed, we have bigger problems than
this (e.g., for the write error recovery path).

Are you thinking of a different case?

Javier
Hans Holmberg Jan. 29, 2019, 12:49 p.m. UTC | #2
On Tue, Jan 29, 2019 at 12:19 PM Javier González <javier@javigon.com> wrote:
>
>
> > On 29 Jan 2019, at 09.47, hans@owltronix.com wrote:
> >
> > From: Hans Holmberg <hans.holmberg@cnexlabs.com>
> >
> > pblk stripes writes of minimal write size across all non-offline chunks
> > in a line, which means that the maximum write pointer delta should not
> > exceed the minimal write size. Extend the line write pointer balance check
> > to cover this case.
> >
> > Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
> > ---
> >
> > This patch applies on top of Zhoujie's V3 of
> > "lightnvm: pblk: ignore bad block wp for pblk_line_wp_is_unbalanced
> >
> > drivers/lightnvm/pblk-recovery.c | 60 ++++++++++++++++++++------------
> > 1 file changed, 37 insertions(+), 23 deletions(-)
> >
> > diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
> > index 02d466e6925e..d86f580036d3 100644
> > --- a/drivers/lightnvm/pblk-recovery.c
> > +++ b/drivers/lightnvm/pblk-recovery.c
> > @@ -302,41 +302,55 @@ static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
> >       return (distance > line->left_msecs) ? line->left_msecs : distance;
> > }
> >
> > -static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
> > -                                   struct pblk_line *line)
> > +/* Return a chunk belonging to a line by stripe(write order) index */
> > +static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
> > +                                               struct pblk_line *line,
> > +                                               int index)
> > {
> >       struct nvm_tgt_dev *dev = pblk->dev;
> >       struct nvm_geo *geo = &dev->geo;
> > -     struct pblk_line_meta *lm = &pblk->lm;
> >       struct pblk_lun *rlun;
> > -     struct nvm_chk_meta *chunk;
> >       struct ppa_addr ppa;
> > -     u64 line_wp;
> > -     int pos, i, bit;
> > +     int pos;
> >
> > -     bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
> > -     if (bit >= lm->blk_per_line)
> > -             return 0;
> > -     rlun = &pblk->luns[bit];
> > +     rlun = &pblk->luns[index];
> >       ppa = rlun->bppa;
> >       pos = pblk_ppa_to_pos(geo, ppa);
> > -     chunk = &line->chks[pos];
> >
> > -     line_wp = chunk->wp;
> > +     return &line->chks[pos];
> > +}
> >
> > -     for (i = bit + 1; i < lm->blk_per_line; i++) {
> > -             rlun = &pblk->luns[i];
> > -             ppa = rlun->bppa;
> > -             pos = pblk_ppa_to_pos(geo, ppa);
> > -             chunk = &line->chks[pos];
> > +static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
> > +                                   struct pblk_line *line)
> > +{
> > +     struct pblk_line_meta *lm = &pblk->lm;
> > +     int blk_in_line = lm->blk_per_line;
> > +     struct nvm_chk_meta *chunk;
> > +     u64 max_wp, min_wp;
> > +     int i;
> >
> > -             if (chunk->state & NVM_CHK_ST_OFFLINE)
> > -                     continue;
> > +     i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
> > +
> > +     /* If there is one or zero good chunks in the line,
> > +      * the write pointers can't be unbalanced.
> > +      */
> > +     if (i >= (blk_in_line - 1))
> > +             return 0;
> >
> > -             if (chunk->wp > line_wp)
> > +     chunk = pblk_get_stripe_chunk(pblk, line, i);
> > +     max_wp = chunk->wp;
> > +     if (max_wp > pblk->max_write_pgs)
> > +             min_wp = max_wp - pblk->max_write_pgs;
> > +     else
> > +             min_wp = 0;
> > +
> > +     i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
> > +     while (i < blk_in_line) {
> > +             chunk = pblk_get_stripe_chunk(pblk, line, i);
> > +             if (chunk->wp > max_wp || chunk->wp < min_wp)
> >                       return 1;
> > -             else if (chunk->wp < line_wp)
> > -                     line_wp = chunk->wp;
> > +
> > +             i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
> >       }
> >
> >       return 0;
> > @@ -362,7 +376,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> >       int ret;
> >       u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
> >
> > -     if (pblk_line_wp_is_unbalanced(pblk, line))
> > +     if (pblk_line_wps_are_unbalanced(pblk, line))
> >               pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
> >
> >       ppa_list = p.ppa_list;
> > --
> > 2.17.1
>
> If I am understanding correctly, you want to protect against the case
> where a pfail has broken the ws_min partition of a chunk, right? I say
> this because there is a guarantee that the minimal write size and pblk's
> write size align with ws_min and ws_opt. Even if we have grown bad
> blocks on pfail for the current line (which is a bigger problem because
> we have potentially lost data), this guarantee would remain.
>
> If this is the case, my first reaction would be to say that the
> controller is responsible for guaranteeing atomicity for both scalar and
> vector I/Os. If this is not guaranteed, we have bigger problems than
> this (e.g., for the write error recovery path).
>
> Are you thinking of a different case?

The write pointer check triggers a warning if something unexpected has
happened to the chunks.
i.e. if something else than pblk messed with the disk, or if the user
tries to recover a pblk instance with an invalid lun configuration.

This patch adds a warning if a chunk wp is too small(i.e. if a chunk
was unexpectedly reset)

>
> Javier
>
Javier González Jan. 29, 2019, 3:03 p.m. UTC | #3
> On 29 Jan 2019, at 13.49, Hans Holmberg <hans@owltronix.com> wrote:
> 
> On Tue, Jan 29, 2019 at 12:19 PM Javier González <javier@javigon.com> wrote:
>>> On 29 Jan 2019, at 09.47, hans@owltronix.com wrote:
>>> 
>>> From: Hans Holmberg <hans.holmberg@cnexlabs.com>
>>> 
>>> pblk stripes writes of minimal write size across all non-offline chunks
>>> in a line, which means that the maximum write pointer delta should not
>>> exceed the minimal write size. Extend the line write pointer balance check
>>> to cover this case.
>>> 
>>> Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
>>> ---
>>> 
>>> This patch applies on top of Zhoujie's V3 of
>>> "lightnvm: pblk: ignore bad block wp for pblk_line_wp_is_unbalanced
>>> 
>>> drivers/lightnvm/pblk-recovery.c | 60 ++++++++++++++++++++------------
>>> 1 file changed, 37 insertions(+), 23 deletions(-)
>>> 
>>> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
>>> index 02d466e6925e..d86f580036d3 100644
>>> --- a/drivers/lightnvm/pblk-recovery.c
>>> +++ b/drivers/lightnvm/pblk-recovery.c
>>> @@ -302,41 +302,55 @@ static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
>>>      return (distance > line->left_msecs) ? line->left_msecs : distance;
>>> }
>>> 
>>> -static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
>>> -                                   struct pblk_line *line)
>>> +/* Return a chunk belonging to a line by stripe(write order) index */
>>> +static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
>>> +                                               struct pblk_line *line,
>>> +                                               int index)
>>> {
>>>      struct nvm_tgt_dev *dev = pblk->dev;
>>>      struct nvm_geo *geo = &dev->geo;
>>> -     struct pblk_line_meta *lm = &pblk->lm;
>>>      struct pblk_lun *rlun;
>>> -     struct nvm_chk_meta *chunk;
>>>      struct ppa_addr ppa;
>>> -     u64 line_wp;
>>> -     int pos, i, bit;
>>> +     int pos;
>>> 
>>> -     bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
>>> -     if (bit >= lm->blk_per_line)
>>> -             return 0;
>>> -     rlun = &pblk->luns[bit];
>>> +     rlun = &pblk->luns[index];
>>>      ppa = rlun->bppa;
>>>      pos = pblk_ppa_to_pos(geo, ppa);
>>> -     chunk = &line->chks[pos];
>>> 
>>> -     line_wp = chunk->wp;
>>> +     return &line->chks[pos];
>>> +}
>>> 
>>> -     for (i = bit + 1; i < lm->blk_per_line; i++) {
>>> -             rlun = &pblk->luns[i];
>>> -             ppa = rlun->bppa;
>>> -             pos = pblk_ppa_to_pos(geo, ppa);
>>> -             chunk = &line->chks[pos];
>>> +static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
>>> +                                   struct pblk_line *line)
>>> +{
>>> +     struct pblk_line_meta *lm = &pblk->lm;
>>> +     int blk_in_line = lm->blk_per_line;
>>> +     struct nvm_chk_meta *chunk;
>>> +     u64 max_wp, min_wp;
>>> +     int i;
>>> 
>>> -             if (chunk->state & NVM_CHK_ST_OFFLINE)
>>> -                     continue;
>>> +     i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
>>> +
>>> +     /* If there is one or zero good chunks in the line,
>>> +      * the write pointers can't be unbalanced.
>>> +      */
>>> +     if (i >= (blk_in_line - 1))
>>> +             return 0;
>>> 
>>> -             if (chunk->wp > line_wp)
>>> +     chunk = pblk_get_stripe_chunk(pblk, line, i);
>>> +     max_wp = chunk->wp;
>>> +     if (max_wp > pblk->max_write_pgs)
>>> +             min_wp = max_wp - pblk->max_write_pgs;
>>> +     else
>>> +             min_wp = 0;
>>> +
>>> +     i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
>>> +     while (i < blk_in_line) {
>>> +             chunk = pblk_get_stripe_chunk(pblk, line, i);
>>> +             if (chunk->wp > max_wp || chunk->wp < min_wp)
>>>                      return 1;
>>> -             else if (chunk->wp < line_wp)
>>> -                     line_wp = chunk->wp;
>>> +
>>> +             i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
>>>      }
>>> 
>>>      return 0;
>>> @@ -362,7 +376,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>>>      int ret;
>>>      u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
>>> 
>>> -     if (pblk_line_wp_is_unbalanced(pblk, line))
>>> +     if (pblk_line_wps_are_unbalanced(pblk, line))
>>>              pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
>>> 
>>>      ppa_list = p.ppa_list;
>>> --
>>> 2.17.1
>> 
>> If I am understanding correctly, you want to protect against the case
>> where a pfail has broken the ws_min partition of a chunk, right? I say
>> this because there is a guarantee that the minimal write size and pblk's
>> write size align with ws_min and ws_opt. Even if we have grown bad
>> blocks on pfail for the current line (which is a bigger problem because
>> we have potentially lost data), this guarantee would remain.
>> 
>> If this is the case, my first reaction would be to say that the
>> controller is responsible for guaranteeing atomicity for both scalar and
>> vector I/Os. If this is not guaranteed, we have bigger problems than
>> this (e.g., for the write error recovery path).
>> 
>> Are you thinking of a different case?
> 
> The write pointer check triggers a warning if something unexpected has
> happened to the chunks.
> i.e. if something else than pblk messed with the disk, or if the user
> tries to recover a pblk instance with an invalid lun configuration.

But this will only solve a very specific corner case of this, right?
This is, when you are writing at WP on a middle LUN exactly < ws_min.

For example, If the user attempts to start pblk with a different LUN
configuration, the alignment is the same an pblk will actually fail
because it cannot find any emeta.

For completion, the original wp_unbalanced patch attempted to protect
against the case where several outstanding I/Os to different PUs are
inflight and then you have a pfail. In this case, a write to a PU that
is not "next" in the line bitmap completes and we have a whole, meaning
that if we recover this case, we risk to overwrite valid data, as we
break the "sequentiality" the line, which allows for recovering by
replaying the P2L stored on the OOB.

> 
> This patch adds a warning if a chunk wp is too small(i.e. if a chunk
> was unexpectedly reset)

I am not arguing against the implementation, I am just trying to
understand what you are trying to fix. If it is the case I described
originally, then I do not think it is possible on the Open-Channel
architecture. If you want to add protection against corruptions, then
this needs more work as many corruption cases are missing - you would
need something like a OOB watermark to protect open lines and something
like a OOB lba CRC check in emeta to validate that no data has been
altered.

If you ask me, I do not think the latter belongs to pblk, and if it did,
I would suggest a whole new (optional) feature that adds this short of
integrity protection, ideally, reusing NVMe PI. In the original pblk, we
have followed the same assumption as block devices do; you can go an
write on the side to a block device that has a FS on top; the block
device will not complain at all - if the FS detects this then they will
try to fix it, otherwise you lost data.

In this context, we have discussed about a pblk tool a la fsck, which can
cover all this cases instead of adding more complexity to the recovery
path in the kernel. Here, you patch makes sense we fail if something
suspicious has occurred and move the burden to the pblk tool for it to
do the repair. But again, if the goal s adding integrity protection, it
needs to cover the rest of the cases.

Hope this makes sense.

Javier
Hans Holmberg Jan. 29, 2019, 3:49 p.m. UTC | #4
On Tue, Jan 29, 2019 at 4:03 PM Javier González <javier@javigon.com> wrote:
>
> > On 29 Jan 2019, at 13.49, Hans Holmberg <hans@owltronix.com> wrote:
> >
> > On Tue, Jan 29, 2019 at 12:19 PM Javier González <javier@javigon.com> wrote:
> >>> On 29 Jan 2019, at 09.47, hans@owltronix.com wrote:
> >>>
> >>> From: Hans Holmberg <hans.holmberg@cnexlabs.com>
> >>>
> >>> pblk stripes writes of minimal write size across all non-offline chunks
> >>> in a line, which means that the maximum write pointer delta should not
> >>> exceed the minimal write size. Extend the line write pointer balance check
> >>> to cover this case.
> >>>
> >>> Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
> >>> ---
> >>>
> >>> This patch applies on top of Zhoujie's V3 of
> >>> "lightnvm: pblk: ignore bad block wp for pblk_line_wp_is_unbalanced
> >>>
> >>> drivers/lightnvm/pblk-recovery.c | 60 ++++++++++++++++++++------------
> >>> 1 file changed, 37 insertions(+), 23 deletions(-)
> >>>
> >>> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
> >>> index 02d466e6925e..d86f580036d3 100644
> >>> --- a/drivers/lightnvm/pblk-recovery.c
> >>> +++ b/drivers/lightnvm/pblk-recovery.c
> >>> @@ -302,41 +302,55 @@ static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
> >>>      return (distance > line->left_msecs) ? line->left_msecs : distance;
> >>> }
> >>>
> >>> -static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
> >>> -                                   struct pblk_line *line)
> >>> +/* Return a chunk belonging to a line by stripe(write order) index */
> >>> +static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
> >>> +                                               struct pblk_line *line,
> >>> +                                               int index)
> >>> {
> >>>      struct nvm_tgt_dev *dev = pblk->dev;
> >>>      struct nvm_geo *geo = &dev->geo;
> >>> -     struct pblk_line_meta *lm = &pblk->lm;
> >>>      struct pblk_lun *rlun;
> >>> -     struct nvm_chk_meta *chunk;
> >>>      struct ppa_addr ppa;
> >>> -     u64 line_wp;
> >>> -     int pos, i, bit;
> >>> +     int pos;
> >>>
> >>> -     bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
> >>> -     if (bit >= lm->blk_per_line)
> >>> -             return 0;
> >>> -     rlun = &pblk->luns[bit];
> >>> +     rlun = &pblk->luns[index];
> >>>      ppa = rlun->bppa;
> >>>      pos = pblk_ppa_to_pos(geo, ppa);
> >>> -     chunk = &line->chks[pos];
> >>>
> >>> -     line_wp = chunk->wp;
> >>> +     return &line->chks[pos];
> >>> +}
> >>>
> >>> -     for (i = bit + 1; i < lm->blk_per_line; i++) {
> >>> -             rlun = &pblk->luns[i];
> >>> -             ppa = rlun->bppa;
> >>> -             pos = pblk_ppa_to_pos(geo, ppa);
> >>> -             chunk = &line->chks[pos];
> >>> +static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
> >>> +                                   struct pblk_line *line)
> >>> +{
> >>> +     struct pblk_line_meta *lm = &pblk->lm;
> >>> +     int blk_in_line = lm->blk_per_line;
> >>> +     struct nvm_chk_meta *chunk;
> >>> +     u64 max_wp, min_wp;
> >>> +     int i;
> >>>
> >>> -             if (chunk->state & NVM_CHK_ST_OFFLINE)
> >>> -                     continue;
> >>> +     i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
> >>> +
> >>> +     /* If there is one or zero good chunks in the line,
> >>> +      * the write pointers can't be unbalanced.
> >>> +      */
> >>> +     if (i >= (blk_in_line - 1))
> >>> +             return 0;
> >>>
> >>> -             if (chunk->wp > line_wp)
> >>> +     chunk = pblk_get_stripe_chunk(pblk, line, i);
> >>> +     max_wp = chunk->wp;
> >>> +     if (max_wp > pblk->max_write_pgs)
> >>> +             min_wp = max_wp - pblk->max_write_pgs;
> >>> +     else
> >>> +             min_wp = 0;
> >>> +
> >>> +     i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
> >>> +     while (i < blk_in_line) {
> >>> +             chunk = pblk_get_stripe_chunk(pblk, line, i);
> >>> +             if (chunk->wp > max_wp || chunk->wp < min_wp)
> >>>                      return 1;
> >>> -             else if (chunk->wp < line_wp)
> >>> -                     line_wp = chunk->wp;
> >>> +
> >>> +             i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
> >>>      }
> >>>
> >>>      return 0;
> >>> @@ -362,7 +376,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> >>>      int ret;
> >>>      u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
> >>>
> >>> -     if (pblk_line_wp_is_unbalanced(pblk, line))
> >>> +     if (pblk_line_wps_are_unbalanced(pblk, line))
> >>>              pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
> >>>
> >>>      ppa_list = p.ppa_list;
> >>> --
> >>> 2.17.1
> >>
> >> If I am understanding correctly, you want to protect against the case
> >> where a pfail has broken the ws_min partition of a chunk, right? I say
> >> this because there is a guarantee that the minimal write size and pblk's
> >> write size align with ws_min and ws_opt. Even if we have grown bad
> >> blocks on pfail for the current line (which is a bigger problem because
> >> we have potentially lost data), this guarantee would remain.
> >>
> >> If this is the case, my first reaction would be to say that the
> >> controller is responsible for guaranteeing atomicity for both scalar and
> >> vector I/Os. If this is not guaranteed, we have bigger problems than
> >> this (e.g., for the write error recovery path).
> >>
> >> Are you thinking of a different case?
> >
> > The write pointer check triggers a warning if something unexpected has
> > happened to the chunks.
> > i.e. if something else than pblk messed with the disk, or if the user
> > tries to recover a pblk instance with an invalid lun configuration.
>
> But this will only solve a very specific corner case of this, right?
> This is, when you are writing at WP on a middle LUN exactly < ws_min.
>> For example, If the user attempts to start pblk with a different LUN
> configuration, the alignment is the same an pblk will actually fail
> because it cannot find any emeta.

Well, we will try to recover a line if the emeta data cannot be read
or the data fails the crc check, and the recovery path assumes that
the write pointers pointers have been incremented in the pblk stripe
order. We'll get a warning now for all cases if this assumption is not
valid.

> For completion, the original wp_unbalanced patch attempted to protect
> against the case where several outstanding I/Os to different PUs are
> inflight and then you have a pfail. In this case, a write to a PU that
> is not "next" in the line bitmap completes and we have a whole, meaning
> that if we recover this case, we risk to overwrite valid data, as we
> break the "sequentiality" the line, which allows for recovering by
> replaying the P2L stored on the OOB.

For outstanding writes, we can leave no guarantees anyway. If a
presync was attached to a bio, that bio will complete when the write
completes.

>
> >
> > This patch adds a warning if a chunk wp is too small(i.e. if a chunk
> > was unexpectedly reset)
>
> I am not arguing against the implementation, I am just trying to
> understand what you are trying to fix. If it is the case I described
> originally, then I do not think it is possible on the Open-Channel
> architecture. If you want to add protection against corruptions, then
> this needs more work as many corruption cases are missing - you would
> need something like a OOB watermark to protect open lines and something
> like a OOB lba CRC check in emeta to validate that no data has been
> altered.
>
> If you ask me, I do not think the latter belongs to pblk, and if it did,
> I would suggest a whole new (optional) feature that adds this short of
> integrity protection, ideally, reusing NVMe PI. In the original pblk, we
> have followed the same assumption as block devices do; you can go an
> write on the side to a block device that has a FS on top; the block
> device will not complain at all - if the FS detects this then they will
> try to fix it, otherwise you lost data.
>
> In this context, we have discussed about a pblk tool a la fsck, which can
> cover all this cases instead of adding more complexity to the recovery
> path in the kernel. Here, you patch makes sense we fail if something
> suspicious has occurred and move the burden to the pblk tool for it to
> do the repair. But again, if the goal s adding integrity protection, it
> needs to cover the rest of the cases.
>
> Hope this makes sense.

It does! I'm not trying to do anything more than warn if the write
pointers are not what we expect them to be.

You're right, we need some sort of superblock and some recovery tool
to make pblk more robust against corruptions.
Now, we get a warning at least :)
Javier González Jan. 29, 2019, 6:23 p.m. UTC | #5
> On 29 Jan 2019, at 16.49, Hans Holmberg <hans@owltronix.com> wrote:
> 
> On Tue, Jan 29, 2019 at 4:03 PM Javier González <javier@javigon.com> wrote:
>>> On 29 Jan 2019, at 13.49, Hans Holmberg <hans@owltronix.com> wrote:
>>> 
>>> On Tue, Jan 29, 2019 at 12:19 PM Javier González <javier@javigon.com> wrote:
>>>>> On 29 Jan 2019, at 09.47, hans@owltronix.com wrote:
>>>>> 
>>>>> From: Hans Holmberg <hans.holmberg@cnexlabs.com>
>>>>> 
>>>>> pblk stripes writes of minimal write size across all non-offline chunks
>>>>> in a line, which means that the maximum write pointer delta should not
>>>>> exceed the minimal write size. Extend the line write pointer balance check
>>>>> to cover this case.
>>>>> 
>>>>> Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
>>>>> ---
>>>>> 
>>>>> This patch applies on top of Zhoujie's V3 of
>>>>> "lightnvm: pblk: ignore bad block wp for pblk_line_wp_is_unbalanced
>>>>> 
>>>>> drivers/lightnvm/pblk-recovery.c | 60 ++++++++++++++++++++------------
>>>>> 1 file changed, 37 insertions(+), 23 deletions(-)
>>>>> 
>>>>> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
>>>>> index 02d466e6925e..d86f580036d3 100644
>>>>> --- a/drivers/lightnvm/pblk-recovery.c
>>>>> +++ b/drivers/lightnvm/pblk-recovery.c
>>>>> @@ -302,41 +302,55 @@ static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
>>>>>     return (distance > line->left_msecs) ? line->left_msecs : distance;
>>>>> }
>>>>> 
>>>>> -static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
>>>>> -                                   struct pblk_line *line)
>>>>> +/* Return a chunk belonging to a line by stripe(write order) index */
>>>>> +static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
>>>>> +                                               struct pblk_line *line,
>>>>> +                                               int index)
>>>>> {
>>>>>     struct nvm_tgt_dev *dev = pblk->dev;
>>>>>     struct nvm_geo *geo = &dev->geo;
>>>>> -     struct pblk_line_meta *lm = &pblk->lm;
>>>>>     struct pblk_lun *rlun;
>>>>> -     struct nvm_chk_meta *chunk;
>>>>>     struct ppa_addr ppa;
>>>>> -     u64 line_wp;
>>>>> -     int pos, i, bit;
>>>>> +     int pos;
>>>>> 
>>>>> -     bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
>>>>> -     if (bit >= lm->blk_per_line)
>>>>> -             return 0;
>>>>> -     rlun = &pblk->luns[bit];
>>>>> +     rlun = &pblk->luns[index];
>>>>>     ppa = rlun->bppa;
>>>>>     pos = pblk_ppa_to_pos(geo, ppa);
>>>>> -     chunk = &line->chks[pos];
>>>>> 
>>>>> -     line_wp = chunk->wp;
>>>>> +     return &line->chks[pos];
>>>>> +}
>>>>> 
>>>>> -     for (i = bit + 1; i < lm->blk_per_line; i++) {
>>>>> -             rlun = &pblk->luns[i];
>>>>> -             ppa = rlun->bppa;
>>>>> -             pos = pblk_ppa_to_pos(geo, ppa);
>>>>> -             chunk = &line->chks[pos];
>>>>> +static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
>>>>> +                                   struct pblk_line *line)
>>>>> +{
>>>>> +     struct pblk_line_meta *lm = &pblk->lm;
>>>>> +     int blk_in_line = lm->blk_per_line;
>>>>> +     struct nvm_chk_meta *chunk;
>>>>> +     u64 max_wp, min_wp;
>>>>> +     int i;
>>>>> 
>>>>> -             if (chunk->state & NVM_CHK_ST_OFFLINE)
>>>>> -                     continue;
>>>>> +     i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
>>>>> +
>>>>> +     /* If there is one or zero good chunks in the line,
>>>>> +      * the write pointers can't be unbalanced.
>>>>> +      */
>>>>> +     if (i >= (blk_in_line - 1))
>>>>> +             return 0;
>>>>> 
>>>>> -             if (chunk->wp > line_wp)
>>>>> +     chunk = pblk_get_stripe_chunk(pblk, line, i);
>>>>> +     max_wp = chunk->wp;
>>>>> +     if (max_wp > pblk->max_write_pgs)
>>>>> +             min_wp = max_wp - pblk->max_write_pgs;
>>>>> +     else
>>>>> +             min_wp = 0;
>>>>> +
>>>>> +     i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
>>>>> +     while (i < blk_in_line) {
>>>>> +             chunk = pblk_get_stripe_chunk(pblk, line, i);
>>>>> +             if (chunk->wp > max_wp || chunk->wp < min_wp)
>>>>>                     return 1;
>>>>> -             else if (chunk->wp < line_wp)
>>>>> -                     line_wp = chunk->wp;
>>>>> +
>>>>> +             i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
>>>>>     }
>>>>> 
>>>>>     return 0;
>>>>> @@ -362,7 +376,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>>>>>     int ret;
>>>>>     u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
>>>>> 
>>>>> -     if (pblk_line_wp_is_unbalanced(pblk, line))
>>>>> +     if (pblk_line_wps_are_unbalanced(pblk, line))
>>>>>             pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
>>>>> 
>>>>>     ppa_list = p.ppa_list;
>>>>> --
>>>>> 2.17.1
>>>> 
>>>> If I am understanding correctly, you want to protect against the case
>>>> where a pfail has broken the ws_min partition of a chunk, right? I say
>>>> this because there is a guarantee that the minimal write size and pblk's
>>>> write size align with ws_min and ws_opt. Even if we have grown bad
>>>> blocks on pfail for the current line (which is a bigger problem because
>>>> we have potentially lost data), this guarantee would remain.
>>>> 
>>>> If this is the case, my first reaction would be to say that the
>>>> controller is responsible for guaranteeing atomicity for both scalar and
>>>> vector I/Os. If this is not guaranteed, we have bigger problems than
>>>> this (e.g., for the write error recovery path).
>>>> 
>>>> Are you thinking of a different case?
>>> 
>>> The write pointer check triggers a warning if something unexpected has
>>> happened to the chunks.
>>> i.e. if something else than pblk messed with the disk, or if the user
>>> tries to recover a pblk instance with an invalid lun configuration.
>> 
>> But this will only solve a very specific corner case of this, right?
>> This is, when you are writing at WP on a middle LUN exactly < ws_min.
>>> For example, If the user attempts to start pblk with a different LUN
>> configuration, the alignment is the same an pblk will actually fail
>> because it cannot find any emeta.
> 
> Well, we will try to recover a line if the emeta data cannot be read
> or the data fails the crc check, and the recovery path assumes that
> the write pointers pointers have been incremented in the pblk stripe
> order. We'll get a warning now for all cases if this assumption is not
> valid.
> 
>> For completion, the original wp_unbalanced patch attempted to protect
>> against the case where several outstanding I/Os to different PUs are
>> inflight and then you have a pfail. In this case, a write to a PU that
>> is not "next" in the line bitmap completes and we have a whole, meaning
>> that if we recover this case, we risk to overwrite valid data, as we
>> break the "sequentiality" the line, which allows for recovering by
>> replaying the P2L stored on the OOB.
> 
> For outstanding writes, we can leave no guarantees anyway. If a
> presync was attached to a bio, that bio will complete when the write
> completes.
> 
>>> This patch adds a warning if a chunk wp is too small(i.e. if a chunk
>>> was unexpectedly reset)
>> 
>> I am not arguing against the implementation, I am just trying to
>> understand what you are trying to fix. If it is the case I described
>> originally, then I do not think it is possible on the Open-Channel
>> architecture. If you want to add protection against corruptions, then
>> this needs more work as many corruption cases are missing - you would
>> need something like a OOB watermark to protect open lines and something
>> like a OOB lba CRC check in emeta to validate that no data has been
>> altered.
>> 
>> If you ask me, I do not think the latter belongs to pblk, and if it did,
>> I would suggest a whole new (optional) feature that adds this short of
>> integrity protection, ideally, reusing NVMe PI. In the original pblk, we
>> have followed the same assumption as block devices do; you can go an
>> write on the side to a block device that has a FS on top; the block
>> device will not complain at all - if the FS detects this then they will
>> try to fix it, otherwise you lost data.
>> 
>> In this context, we have discussed about a pblk tool a la fsck, which can
>> cover all this cases instead of adding more complexity to the recovery
>> path in the kernel. Here, you patch makes sense we fail if something
>> suspicious has occurred and move the burden to the pblk tool for it to
>> do the repair. But again, if the goal s adding integrity protection, it
>> needs to cover the rest of the cases.
>> 
>> Hope this makes sense.
> 
> It does! I'm not trying to do anything more than warn if the write
> pointers are not what we expect them to be.
> 
> You're right, we need some sort of superblock and some recovery tool
> to make pblk more robust against corruptions.
> Now, we get a warning at least :)

Sounds good! Now I understand.

It is good for me to apply the patch then.

Reviewed-by: Javier González <javier@javigon.com>
Zhoujie Wu Jan. 29, 2019, 10:10 p.m. UTC | #6
Sorry that my Linux email client has configuration issue and can't reply email. Used my outlook to reply as plain text and hope that I won't corrupt the format.
Tested on my board and it works well. Since this is a good fix, I think you don't need to do it based on my previous v3 patch and can directly apply yours:)

Tested-by: Zhoujie Wu <zjwu@marvell.com>


-----Original Message-----
From: hans@owltronix.com <hans@owltronix.com> 
Sent: Tuesday, January 29, 2019 12:48 AM
To: Matias Bjorling <mb@lightnvm.io>
Cc: javier@javigon.com; Zhoujie Wu <zjwu@marvell.com>; linux-block@vger.kernel.org; linux-kernel@vger.kernel.org; Hans Holmberg <hans.holmberg@cnexlabs.com>
Subject: [EXT] [PATCH] lightnvm: pblk: extend line wp balance check

External Email

----------------------------------------------------------------------
From: Hans Holmberg <hans.holmberg@cnexlabs.com>

pblk stripes writes of minimal write size across all non-offline chunks in a line, which means that the maximum write pointer delta should not exceed the minimal write size. Extend the line write pointer balance check to cover this case.

Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
---

This patch applies on top of Zhoujie's V3 of
"lightnvm: pblk: ignore bad block wp for pblk_line_wp_is_unbalanced

 drivers/lightnvm/pblk-recovery.c | 60 ++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 23 deletions(-)

diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 02d466e6925e..d86f580036d3 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -302,41 +302,55 @@ static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
 	return (distance > line->left_msecs) ? line->left_msecs : distance;  }
 
-static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
-				      struct pblk_line *line)
+/* Return a chunk belonging to a line by stripe(write order) index */ 
+static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
+						  struct pblk_line *line,
+						  int index)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
-	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_lun *rlun;
-	struct nvm_chk_meta *chunk;
 	struct ppa_addr ppa;
-	u64 line_wp;
-	int pos, i, bit;
+	int pos;
 
-	bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
-	if (bit >= lm->blk_per_line)
-		return 0;
-	rlun = &pblk->luns[bit];
+	rlun = &pblk->luns[index];
 	ppa = rlun->bppa;
 	pos = pblk_ppa_to_pos(geo, ppa);
-	chunk = &line->chks[pos];
 
-	line_wp = chunk->wp;
+	return &line->chks[pos];
+}
 
-	for (i = bit + 1; i < lm->blk_per_line; i++) {
-		rlun = &pblk->luns[i];
-		ppa = rlun->bppa;
-		pos = pblk_ppa_to_pos(geo, ppa);
-		chunk = &line->chks[pos];
+static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
+				      struct pblk_line *line)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	int blk_in_line = lm->blk_per_line;
+	struct nvm_chk_meta *chunk;
+	u64 max_wp, min_wp;
+	int i;
 
-		if (chunk->state & NVM_CHK_ST_OFFLINE)
-			continue;
+	i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
+
+	/* If there is one or zero good chunks in the line,
+	 * the write pointers can't be unbalanced.
+	 */
+	if (i >= (blk_in_line - 1))
+		return 0;
 
-		if (chunk->wp > line_wp)
+	chunk = pblk_get_stripe_chunk(pblk, line, i);
+	max_wp = chunk->wp;
+	if (max_wp > pblk->max_write_pgs)
+		min_wp = max_wp - pblk->max_write_pgs;
+	else
+		min_wp = 0;
+
+	i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
+	while (i < blk_in_line) {
+		chunk = pblk_get_stripe_chunk(pblk, line, i);
+		if (chunk->wp > max_wp || chunk->wp < min_wp)
 			return 1;
-		else if (chunk->wp < line_wp)
-			line_wp = chunk->wp;
+
+		i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
 	}
 
 	return 0;
@@ -362,7 +376,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 	int ret;
 	u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
 
-	if (pblk_line_wp_is_unbalanced(pblk, line))
+	if (pblk_line_wps_are_unbalanced(pblk, line))
 		pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
 
 	ppa_list = p.ppa_list;
--
2.17.1
Hans Holmberg Jan. 30, 2019, 7:27 a.m. UTC | #7
On Tue, Jan 29, 2019 at 11:10 PM Zhoujie Wu <zjwu@marvell.com> wrote:
>
> Sorry that my Linux email client has configuration issue and can't reply email. Used my outlook to reply as plain text and hope that I won't corrupt the format.
> Tested on my board and it works well. Since this is a good fix, I think you don't need to do it based on my previous v3 patch and can directly apply yours:)
>
> Tested-by: Zhoujie Wu <zjwu@marvell.com>

Thanks Zhoujie!

I'll squash the two patches into a V2, carrying over the tested-by and
credit you as Reported-by if you don't mind.


>
>
> -----Original Message-----
> From: hans@owltronix.com <hans@owltronix.com>
> Sent: Tuesday, January 29, 2019 12:48 AM
> To: Matias Bjorling <mb@lightnvm.io>
> Cc: javier@javigon.com; Zhoujie Wu <zjwu@marvell.com>; linux-block@vger.kernel.org; linux-kernel@vger.kernel.org; Hans Holmberg <hans.holmberg@cnexlabs.com>
> Subject: [EXT] [PATCH] lightnvm: pblk: extend line wp balance check
>
> External Email
>
> ----------------------------------------------------------------------
> From: Hans Holmberg <hans.holmberg@cnexlabs.com>
>
> pblk stripes writes of minimal write size across all non-offline chunks in a line, which means that the maximum write pointer delta should not exceed the minimal write size. Extend the line write pointer balance check to cover this case.
>
> Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
> ---
>
> This patch applies on top of Zhoujie's V3 of
> "lightnvm: pblk: ignore bad block wp for pblk_line_wp_is_unbalanced
>
>  drivers/lightnvm/pblk-recovery.c | 60 ++++++++++++++++++++------------
>  1 file changed, 37 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
> index 02d466e6925e..d86f580036d3 100644
> --- a/drivers/lightnvm/pblk-recovery.c
> +++ b/drivers/lightnvm/pblk-recovery.c
> @@ -302,41 +302,55 @@ static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
>         return (distance > line->left_msecs) ? line->left_msecs : distance;  }
>
> -static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
> -                                     struct pblk_line *line)
> +/* Return a chunk belonging to a line by stripe(write order) index */
> +static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
> +                                                 struct pblk_line *line,
> +                                                 int index)
>  {
>         struct nvm_tgt_dev *dev = pblk->dev;
>         struct nvm_geo *geo = &dev->geo;
> -       struct pblk_line_meta *lm = &pblk->lm;
>         struct pblk_lun *rlun;
> -       struct nvm_chk_meta *chunk;
>         struct ppa_addr ppa;
> -       u64 line_wp;
> -       int pos, i, bit;
> +       int pos;
>
> -       bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
> -       if (bit >= lm->blk_per_line)
> -               return 0;
> -       rlun = &pblk->luns[bit];
> +       rlun = &pblk->luns[index];
>         ppa = rlun->bppa;
>         pos = pblk_ppa_to_pos(geo, ppa);
> -       chunk = &line->chks[pos];
>
> -       line_wp = chunk->wp;
> +       return &line->chks[pos];
> +}
>
> -       for (i = bit + 1; i < lm->blk_per_line; i++) {
> -               rlun = &pblk->luns[i];
> -               ppa = rlun->bppa;
> -               pos = pblk_ppa_to_pos(geo, ppa);
> -               chunk = &line->chks[pos];
> +static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
> +                                     struct pblk_line *line)
> +{
> +       struct pblk_line_meta *lm = &pblk->lm;
> +       int blk_in_line = lm->blk_per_line;
> +       struct nvm_chk_meta *chunk;
> +       u64 max_wp, min_wp;
> +       int i;
>
> -               if (chunk->state & NVM_CHK_ST_OFFLINE)
> -                       continue;
> +       i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
> +
> +       /* If there is one or zero good chunks in the line,
> +        * the write pointers can't be unbalanced.
> +        */
> +       if (i >= (blk_in_line - 1))
> +               return 0;
>
> -               if (chunk->wp > line_wp)
> +       chunk = pblk_get_stripe_chunk(pblk, line, i);
> +       max_wp = chunk->wp;
> +       if (max_wp > pblk->max_write_pgs)
> +               min_wp = max_wp - pblk->max_write_pgs;
> +       else
> +               min_wp = 0;
> +
> +       i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
> +       while (i < blk_in_line) {
> +               chunk = pblk_get_stripe_chunk(pblk, line, i);
> +               if (chunk->wp > max_wp || chunk->wp < min_wp)
>                         return 1;
> -               else if (chunk->wp < line_wp)
> -                       line_wp = chunk->wp;
> +
> +               i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
>         }
>
>         return 0;
> @@ -362,7 +376,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>         int ret;
>         u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
>
> -       if (pblk_line_wp_is_unbalanced(pblk, line))
> +       if (pblk_line_wps_are_unbalanced(pblk, line))
>                 pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
>
>         ppa_list = p.ppa_list;
> --
> 2.17.1
>
diff mbox series

Patch

diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 02d466e6925e..d86f580036d3 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -302,41 +302,55 @@  static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
 	return (distance > line->left_msecs) ? line->left_msecs : distance;
 }
 
-static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
-				      struct pblk_line *line)
+/* Return a chunk belonging to a line by stripe(write order) index */
+static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
+						  struct pblk_line *line,
+						  int index)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
-	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_lun *rlun;
-	struct nvm_chk_meta *chunk;
 	struct ppa_addr ppa;
-	u64 line_wp;
-	int pos, i, bit;
+	int pos;
 
-	bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
-	if (bit >= lm->blk_per_line)
-		return 0;
-	rlun = &pblk->luns[bit];
+	rlun = &pblk->luns[index];
 	ppa = rlun->bppa;
 	pos = pblk_ppa_to_pos(geo, ppa);
-	chunk = &line->chks[pos];
 
-	line_wp = chunk->wp;
+	return &line->chks[pos];
+}
 
-	for (i = bit + 1; i < lm->blk_per_line; i++) {
-		rlun = &pblk->luns[i];
-		ppa = rlun->bppa;
-		pos = pblk_ppa_to_pos(geo, ppa);
-		chunk = &line->chks[pos];
+static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
+				      struct pblk_line *line)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	int blk_in_line = lm->blk_per_line;
+	struct nvm_chk_meta *chunk;
+	u64 max_wp, min_wp;
+	int i;
 
-		if (chunk->state & NVM_CHK_ST_OFFLINE)
-			continue;
+	i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
+
+	/* If there is one or zero good chunks in the line,
+	 * the write pointers can't be unbalanced.
+	 */
+	if (i >= (blk_in_line - 1))
+		return 0;
 
-		if (chunk->wp > line_wp)
+	chunk = pblk_get_stripe_chunk(pblk, line, i);
+	max_wp = chunk->wp;
+	if (max_wp > pblk->max_write_pgs)
+		min_wp = max_wp - pblk->max_write_pgs;
+	else
+		min_wp = 0;
+
+	i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
+	while (i < blk_in_line) {
+		chunk = pblk_get_stripe_chunk(pblk, line, i);
+		if (chunk->wp > max_wp || chunk->wp < min_wp)
 			return 1;
-		else if (chunk->wp < line_wp)
-			line_wp = chunk->wp;
+
+		i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
 	}
 
 	return 0;
@@ -362,7 +376,7 @@  static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 	int ret;
 	u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
 
-	if (pblk_line_wp_is_unbalanced(pblk, line))
+	if (pblk_line_wps_are_unbalanced(pblk, line))
 		pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
 
 	ppa_list = p.ppa_list;