Message ID | 20190227171442.11853-13-igor.j.konopko@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | lightnvm: bugfixes and improvements | expand |
> On 27 Feb 2019, at 18.14, Igor Konopko <igor.j.konopko@intel.com> wrote: > > When we creating pblk instance with factory > flag, there is a possibility that some chunks > are in open state, which does not allow to > issue erase request to them directly. Such a > chunk should be filled with some empty data in > order to achieve close state. Without that we > are risking that some erase operation will be > rejected by the drive due to inproper chunk > state. > > This patch implements closing chunk logic in pblk > for that case, when creating instance with factory > flag in order to fix that issue > > Signed-off-by: Igor Konopko <igor.j.konopko@intel.com> > --- > drivers/lightnvm/pblk-core.c | 114 +++++++++++++++++++++++++++++++++++ > drivers/lightnvm/pblk-init.c | 14 ++++- > drivers/lightnvm/pblk.h | 2 + > 3 files changed, 128 insertions(+), 2 deletions(-) > > diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c > index fa4dc05608ff..d3c45393f093 100644 > --- a/drivers/lightnvm/pblk-core.c > +++ b/drivers/lightnvm/pblk-core.c > @@ -161,6 +161,120 @@ struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, > return meta + ch_off + lun_off + chk_off; > } > > +static void pblk_close_chunk(struct pblk *pblk, struct ppa_addr ppa, int count) > +{ > + struct nvm_tgt_dev *dev = pblk->dev; > + struct nvm_geo *geo = &dev->geo; > + struct bio *bio; > + struct ppa_addr *ppa_list; > + struct nvm_rq rqd; > + void *meta_list, *data; > + dma_addr_t dma_meta_list, dma_ppa_list; > + int i, rq_ppas, rq_len, ret; > + > + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); > + if (!meta_list) > + return; > + > + ppa_list = meta_list + pblk_dma_meta_size(pblk); > + dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); > + > + rq_ppas = pblk_calc_secs(pblk, count, 0, false); > + if (!rq_ppas) > + rq_ppas = pblk->min_write_pgs; > + rq_len = rq_ppas * geo->csecs; > + > + data = kzalloc(rq_len, GFP_KERNEL); > + if (!data) > + goto free_meta_list; > + > + memset(&rqd, 0, sizeof(struct nvm_rq)); > + rqd.opcode = NVM_OP_PWRITE; > + rqd.nr_ppas = rq_ppas; > + rqd.meta_list = meta_list; > + rqd.ppa_list = ppa_list; > + rqd.dma_ppa_list = dma_ppa_list; > + rqd.dma_meta_list = dma_meta_list; > + > +next_rq: > + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); > + if (IS_ERR(bio)) > + goto out_next; > + > + bio->bi_iter.bi_sector = 0; /* artificial bio */ > + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); > + > + rqd.bio = bio; > + for (i = 0; i < rqd.nr_ppas; i++) { > + rqd.ppa_list[i] = ppa; > + rqd.ppa_list[i].m.sec += i; > + pblk_get_meta(pblk, meta_list, i)->lba = > + cpu_to_le64(ADDR_EMPTY); > + } > + > + ret = nvm_submit_io_sync(dev, &rqd); > + if (ret) { > + bio_put(bio); > + goto out_next; > + } > + > + if (rqd.error) > + goto free_data; > + > +out_next: > + count -= rqd.nr_ppas; > + ppa.m.sec += rqd.nr_ppas; > + if (count > 0) > + goto next_rq; > + > +free_data: > + kfree(data); > +free_meta_list: > + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); > +} > + > +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) > +{ > + struct nvm_tgt_dev *dev = pblk->dev; > + struct nvm_geo *geo = &dev->geo; > + struct nvm_chk_meta *chunk_meta; > + struct ppa_addr ppa; > + int i, j, k, count; > + > + for (i = 0; i < geo->num_chk; i++) { > + for (j = 0; j < geo->num_lun; j++) { > + for (k = 0; k < geo->num_ch; k++) { > + ppa.ppa = 0; > + ppa.m.grp = k; > + ppa.m.pu = j; > + ppa.m.chk = i; > + > + chunk_meta = pblk_chunk_get_off(pblk, > + meta, ppa); > + if (chunk_meta->state == NVM_CHK_ST_OPEN) { > + ppa.m.sec = chunk_meta->wp; > + count = geo->clba - chunk_meta->wp; > + pblk_close_chunk(pblk, ppa, count); > + } > + } > + } > + } > +} > + > +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) > +{ > + struct nvm_tgt_dev *dev = pblk->dev; > + struct nvm_geo *geo = &dev->geo; > + int i; > + > + for (i = 0; i < geo->all_luns; i++) { > + if (meta[i].state == NVM_CHK_ST_OPEN) > + return true; > + } > + > + return false; > +} > + > void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, > u64 paddr) > { > diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c > index 9913a4514eb6..83abe6960b46 100644 > --- a/drivers/lightnvm/pblk-init.c > +++ b/drivers/lightnvm/pblk-init.c > @@ -1028,13 +1028,14 @@ static int pblk_line_meta_init(struct pblk *pblk) > return 0; > } > > -static int pblk_lines_init(struct pblk *pblk) > +static int pblk_lines_init(struct pblk *pblk, bool factory_init) > { > struct pblk_line_mgmt *l_mg = &pblk->l_mg; > struct pblk_line *line; > void *chunk_meta; > int nr_free_chks = 0; > int i, ret; > + bool retry = false; > > ret = pblk_line_meta_init(pblk); > if (ret) > @@ -1048,12 +1049,21 @@ static int pblk_lines_init(struct pblk *pblk) > if (ret) > goto fail_free_meta; > > +get_chunk_meta: > chunk_meta = pblk_get_chunk_meta(pblk); > if (IS_ERR(chunk_meta)) { > ret = PTR_ERR(chunk_meta); > goto fail_free_luns; > } > > + if (factory_init && !retry && > + pblk_are_opened_chunks(pblk, chunk_meta)) { > + pblk_close_opened_chunks(pblk, chunk_meta); > + retry = true; > + vfree(chunk_meta); > + goto get_chunk_meta; > + } > + > pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), > GFP_KERNEL); > if (!pblk->lines) { > @@ -1244,7 +1254,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, > goto fail; > } > > - ret = pblk_lines_init(pblk); > + ret = pblk_lines_init(pblk, flags & NVM_TARGET_FACTORY); > if (ret) { > pblk_err(pblk, "could not initialize lines\n"); > goto fail_free_core; > diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h > index b266563508e6..b248642c4dfb 100644 > --- a/drivers/lightnvm/pblk.h > +++ b/drivers/lightnvm/pblk.h > @@ -793,6 +793,8 @@ struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); > struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, > struct nvm_chk_meta *lp, > struct ppa_addr ppa); > +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); > +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); > void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); > void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); > int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); > -- > 2.17.1 I know that the OCSSD 2.0 spec does not allow to transition from open to free, but to me this is a spec bug as there is no underlying issue in reading an open block. Note that all controllers I know support this, and the upcoming Denali spec. fixes this too. Besides, the factory flag is intended to start a pblk instance immediately, without having to pay the price of padding any past device state. If you still want to do this, I think this belongs in a user space tool. Javier
On Mon, Mar 4, 2019 at 9:46 AM Javier González <javier@javigon.com> wrote: > > > On 27 Feb 2019, at 18.14, Igor Konopko <igor.j.konopko@intel.com> wrote: > > > > When we creating pblk instance with factory > > flag, there is a possibility that some chunks > > are in open state, which does not allow to > > issue erase request to them directly. Such a > > chunk should be filled with some empty data in > > order to achieve close state. Without that we > > are risking that some erase operation will be > > rejected by the drive due to inproper chunk > > state. > > > > This patch implements closing chunk logic in pblk > > for that case, when creating instance with factory > > flag in order to fix that issue > > > > Signed-off-by: Igor Konopko <igor.j.konopko@intel.com> > > --- > > drivers/lightnvm/pblk-core.c | 114 +++++++++++++++++++++++++++++++++++ > > drivers/lightnvm/pblk-init.c | 14 ++++- > > drivers/lightnvm/pblk.h | 2 + > > 3 files changed, 128 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c > > index fa4dc05608ff..d3c45393f093 100644 > > --- a/drivers/lightnvm/pblk-core.c > > +++ b/drivers/lightnvm/pblk-core.c > > @@ -161,6 +161,120 @@ struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, > > return meta + ch_off + lun_off + chk_off; > > } > > > > +static void pblk_close_chunk(struct pblk *pblk, struct ppa_addr ppa, int count) > > +{ > > + struct nvm_tgt_dev *dev = pblk->dev; > > + struct nvm_geo *geo = &dev->geo; > > + struct bio *bio; > > + struct ppa_addr *ppa_list; > > + struct nvm_rq rqd; > > + void *meta_list, *data; > > + dma_addr_t dma_meta_list, dma_ppa_list; > > + int i, rq_ppas, rq_len, ret; > > + > > + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); > > + if (!meta_list) > > + return; > > + > > + ppa_list = meta_list + pblk_dma_meta_size(pblk); > > + dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); > > + > > + rq_ppas = pblk_calc_secs(pblk, count, 0, false); > > + if (!rq_ppas) > > + rq_ppas = pblk->min_write_pgs; > > + rq_len = rq_ppas * geo->csecs; > > + > > + data = kzalloc(rq_len, GFP_KERNEL); > > + if (!data) > > + goto free_meta_list; > > + > > + memset(&rqd, 0, sizeof(struct nvm_rq)); > > + rqd.opcode = NVM_OP_PWRITE; > > + rqd.nr_ppas = rq_ppas; > > + rqd.meta_list = meta_list; > > + rqd.ppa_list = ppa_list; > > + rqd.dma_ppa_list = dma_ppa_list; > > + rqd.dma_meta_list = dma_meta_list; > > + > > +next_rq: > > + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); > > + if (IS_ERR(bio)) > > + goto out_next; > > + > > + bio->bi_iter.bi_sector = 0; /* artificial bio */ > > + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); > > + > > + rqd.bio = bio; > > + for (i = 0; i < rqd.nr_ppas; i++) { > > + rqd.ppa_list[i] = ppa; > > + rqd.ppa_list[i].m.sec += i; > > + pblk_get_meta(pblk, meta_list, i)->lba = > > + cpu_to_le64(ADDR_EMPTY); > > + } > > + > > + ret = nvm_submit_io_sync(dev, &rqd); > > + if (ret) { > > + bio_put(bio); > > + goto out_next; > > + } > > + > > + if (rqd.error) > > + goto free_data; > > + > > +out_next: > > + count -= rqd.nr_ppas; > > + ppa.m.sec += rqd.nr_ppas; > > + if (count > 0) > > + goto next_rq; > > + > > +free_data: > > + kfree(data); > > +free_meta_list: > > + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); > > +} > > + > > +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) > > +{ > > + struct nvm_tgt_dev *dev = pblk->dev; > > + struct nvm_geo *geo = &dev->geo; > > + struct nvm_chk_meta *chunk_meta; > > + struct ppa_addr ppa; > > + int i, j, k, count; > > + > > + for (i = 0; i < geo->num_chk; i++) { > > + for (j = 0; j < geo->num_lun; j++) { > > + for (k = 0; k < geo->num_ch; k++) { > > + ppa.ppa = 0; > > + ppa.m.grp = k; > > + ppa.m.pu = j; > > + ppa.m.chk = i; > > + > > + chunk_meta = pblk_chunk_get_off(pblk, > > + meta, ppa); > > + if (chunk_meta->state == NVM_CHK_ST_OPEN) { > > + ppa.m.sec = chunk_meta->wp; > > + count = geo->clba - chunk_meta->wp; > > + pblk_close_chunk(pblk, ppa, count); > > + } > > + } > > + } > > + } > > +} > > + > > +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) > > +{ > > + struct nvm_tgt_dev *dev = pblk->dev; > > + struct nvm_geo *geo = &dev->geo; > > + int i; > > + > > + for (i = 0; i < geo->all_luns; i++) { > > + if (meta[i].state == NVM_CHK_ST_OPEN) > > + return true; > > + } > > + > > + return false; > > +} > > + > > void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, > > u64 paddr) > > { > > diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c > > index 9913a4514eb6..83abe6960b46 100644 > > --- a/drivers/lightnvm/pblk-init.c > > +++ b/drivers/lightnvm/pblk-init.c > > @@ -1028,13 +1028,14 @@ static int pblk_line_meta_init(struct pblk *pblk) > > return 0; > > } > > > > -static int pblk_lines_init(struct pblk *pblk) > > +static int pblk_lines_init(struct pblk *pblk, bool factory_init) > > { > > struct pblk_line_mgmt *l_mg = &pblk->l_mg; > > struct pblk_line *line; > > void *chunk_meta; > > int nr_free_chks = 0; > > int i, ret; > > + bool retry = false; > > > > ret = pblk_line_meta_init(pblk); > > if (ret) > > @@ -1048,12 +1049,21 @@ static int pblk_lines_init(struct pblk *pblk) > > if (ret) > > goto fail_free_meta; > > > > +get_chunk_meta: > > chunk_meta = pblk_get_chunk_meta(pblk); > > if (IS_ERR(chunk_meta)) { > > ret = PTR_ERR(chunk_meta); > > goto fail_free_luns; > > } > > > > + if (factory_init && !retry && > > + pblk_are_opened_chunks(pblk, chunk_meta)) { > > + pblk_close_opened_chunks(pblk, chunk_meta); > > + retry = true; > > + vfree(chunk_meta); > > + goto get_chunk_meta; > > + } > > + > > pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), > > GFP_KERNEL); > > if (!pblk->lines) { > > @@ -1244,7 +1254,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, > > goto fail; > > } > > > > - ret = pblk_lines_init(pblk); > > + ret = pblk_lines_init(pblk, flags & NVM_TARGET_FACTORY); > > if (ret) { > > pblk_err(pblk, "could not initialize lines\n"); > > goto fail_free_core; > > diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h > > index b266563508e6..b248642c4dfb 100644 > > --- a/drivers/lightnvm/pblk.h > > +++ b/drivers/lightnvm/pblk.h > > @@ -793,6 +793,8 @@ struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); > > struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, > > struct nvm_chk_meta *lp, > > struct ppa_addr ppa); > > +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); > > +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); > > void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); > > void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); > > int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); > > -- > > 2.17.1 > > I know that the OCSSD 2.0 spec does not allow to transition from open to > free, but to me this is a spec bug as there is no underlying issue in > reading an open block. Note that all controllers I know support this, > and the upcoming Denali spec. fixes this too. + Klaus whom I discussed this with. Yeah, i think that "early reset" is a nice feature. It would be nice to extend the OCSSD spec with a new capabilities bit indicating if this is indeed supported or not. Matias: what do you think? > > Besides, the factory flag is intended to start a pblk instance > immediately, without having to pay the price of padding any past device > state. If you still want to do this, I think this belongs in a user space tool. > Hear, hear! Serially padding any open chunks during -f create call would be terrible user experience. Lets say that padding a chunk takes one second, we would, in a worst-case scenario in an example disk, be stuck in a syscall for 1*64*1000 seconds ~ 17 hours. A tool, like dm-zoned's dmzadm would be the right approach, see Documentation/device-mapper/dm-zoned.txt All new pblk instances would then have to be pre-formatted with "pblkadm" A new physical storage format containing a superblock would also be a good idea. / Hans
On 04.03.2019 11:05, Hans Holmberg wrote: > On Mon, Mar 4, 2019 at 9:46 AM Javier González <javier@javigon.com> wrote: >> >>> On 27 Feb 2019, at 18.14, Igor Konopko <igor.j.konopko@intel.com> wrote: >>> >>> When we creating pblk instance with factory >>> flag, there is a possibility that some chunks >>> are in open state, which does not allow to >>> issue erase request to them directly. Such a >>> chunk should be filled with some empty data in >>> order to achieve close state. Without that we >>> are risking that some erase operation will be >>> rejected by the drive due to inproper chunk >>> state. >>> >>> This patch implements closing chunk logic in pblk >>> for that case, when creating instance with factory >>> flag in order to fix that issue >>> >>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com> >>> --- >>> drivers/lightnvm/pblk-core.c | 114 +++++++++++++++++++++++++++++++++++ >>> drivers/lightnvm/pblk-init.c | 14 ++++- >>> drivers/lightnvm/pblk.h | 2 + >>> 3 files changed, 128 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c >>> index fa4dc05608ff..d3c45393f093 100644 >>> --- a/drivers/lightnvm/pblk-core.c >>> +++ b/drivers/lightnvm/pblk-core.c >>> @@ -161,6 +161,120 @@ struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, >>> return meta + ch_off + lun_off + chk_off; >>> } >>> >>> +static void pblk_close_chunk(struct pblk *pblk, struct ppa_addr ppa, int count) >>> +{ >>> + struct nvm_tgt_dev *dev = pblk->dev; >>> + struct nvm_geo *geo = &dev->geo; >>> + struct bio *bio; >>> + struct ppa_addr *ppa_list; >>> + struct nvm_rq rqd; >>> + void *meta_list, *data; >>> + dma_addr_t dma_meta_list, dma_ppa_list; >>> + int i, rq_ppas, rq_len, ret; >>> + >>> + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); >>> + if (!meta_list) >>> + return; >>> + >>> + ppa_list = meta_list + pblk_dma_meta_size(pblk); >>> + dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); >>> + >>> + rq_ppas = pblk_calc_secs(pblk, count, 0, false); >>> + if (!rq_ppas) >>> + rq_ppas = pblk->min_write_pgs; >>> + rq_len = rq_ppas * geo->csecs; >>> + >>> + data = kzalloc(rq_len, GFP_KERNEL); >>> + if (!data) >>> + goto free_meta_list; >>> + >>> + memset(&rqd, 0, sizeof(struct nvm_rq)); >>> + rqd.opcode = NVM_OP_PWRITE; >>> + rqd.nr_ppas = rq_ppas; >>> + rqd.meta_list = meta_list; >>> + rqd.ppa_list = ppa_list; >>> + rqd.dma_ppa_list = dma_ppa_list; >>> + rqd.dma_meta_list = dma_meta_list; >>> + >>> +next_rq: >>> + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); >>> + if (IS_ERR(bio)) >>> + goto out_next; >>> + >>> + bio->bi_iter.bi_sector = 0; /* artificial bio */ >>> + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); >>> + >>> + rqd.bio = bio; >>> + for (i = 0; i < rqd.nr_ppas; i++) { >>> + rqd.ppa_list[i] = ppa; >>> + rqd.ppa_list[i].m.sec += i; >>> + pblk_get_meta(pblk, meta_list, i)->lba = >>> + cpu_to_le64(ADDR_EMPTY); >>> + } >>> + >>> + ret = nvm_submit_io_sync(dev, &rqd); >>> + if (ret) { >>> + bio_put(bio); >>> + goto out_next; >>> + } >>> + >>> + if (rqd.error) >>> + goto free_data; >>> + >>> +out_next: >>> + count -= rqd.nr_ppas; >>> + ppa.m.sec += rqd.nr_ppas; >>> + if (count > 0) >>> + goto next_rq; >>> + >>> +free_data: >>> + kfree(data); >>> +free_meta_list: >>> + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); >>> +} >>> + >>> +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) >>> +{ >>> + struct nvm_tgt_dev *dev = pblk->dev; >>> + struct nvm_geo *geo = &dev->geo; >>> + struct nvm_chk_meta *chunk_meta; >>> + struct ppa_addr ppa; >>> + int i, j, k, count; >>> + >>> + for (i = 0; i < geo->num_chk; i++) { >>> + for (j = 0; j < geo->num_lun; j++) { >>> + for (k = 0; k < geo->num_ch; k++) { >>> + ppa.ppa = 0; >>> + ppa.m.grp = k; >>> + ppa.m.pu = j; >>> + ppa.m.chk = i; >>> + >>> + chunk_meta = pblk_chunk_get_off(pblk, >>> + meta, ppa); >>> + if (chunk_meta->state == NVM_CHK_ST_OPEN) { >>> + ppa.m.sec = chunk_meta->wp; >>> + count = geo->clba - chunk_meta->wp; >>> + pblk_close_chunk(pblk, ppa, count); >>> + } >>> + } >>> + } >>> + } >>> +} >>> + >>> +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) >>> +{ >>> + struct nvm_tgt_dev *dev = pblk->dev; >>> + struct nvm_geo *geo = &dev->geo; >>> + int i; >>> + >>> + for (i = 0; i < geo->all_luns; i++) { >>> + if (meta[i].state == NVM_CHK_ST_OPEN) >>> + return true; >>> + } >>> + >>> + return false; >>> +} >>> + >>> void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, >>> u64 paddr) >>> { >>> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c >>> index 9913a4514eb6..83abe6960b46 100644 >>> --- a/drivers/lightnvm/pblk-init.c >>> +++ b/drivers/lightnvm/pblk-init.c >>> @@ -1028,13 +1028,14 @@ static int pblk_line_meta_init(struct pblk *pblk) >>> return 0; >>> } >>> >>> -static int pblk_lines_init(struct pblk *pblk) >>> +static int pblk_lines_init(struct pblk *pblk, bool factory_init) >>> { >>> struct pblk_line_mgmt *l_mg = &pblk->l_mg; >>> struct pblk_line *line; >>> void *chunk_meta; >>> int nr_free_chks = 0; >>> int i, ret; >>> + bool retry = false; >>> >>> ret = pblk_line_meta_init(pblk); >>> if (ret) >>> @@ -1048,12 +1049,21 @@ static int pblk_lines_init(struct pblk *pblk) >>> if (ret) >>> goto fail_free_meta; >>> >>> +get_chunk_meta: >>> chunk_meta = pblk_get_chunk_meta(pblk); >>> if (IS_ERR(chunk_meta)) { >>> ret = PTR_ERR(chunk_meta); >>> goto fail_free_luns; >>> } >>> >>> + if (factory_init && !retry && >>> + pblk_are_opened_chunks(pblk, chunk_meta)) { >>> + pblk_close_opened_chunks(pblk, chunk_meta); >>> + retry = true; >>> + vfree(chunk_meta); >>> + goto get_chunk_meta; >>> + } >>> + >>> pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), >>> GFP_KERNEL); >>> if (!pblk->lines) { >>> @@ -1244,7 +1254,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, >>> goto fail; >>> } >>> >>> - ret = pblk_lines_init(pblk); >>> + ret = pblk_lines_init(pblk, flags & NVM_TARGET_FACTORY); >>> if (ret) { >>> pblk_err(pblk, "could not initialize lines\n"); >>> goto fail_free_core; >>> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h >>> index b266563508e6..b248642c4dfb 100644 >>> --- a/drivers/lightnvm/pblk.h >>> +++ b/drivers/lightnvm/pblk.h >>> @@ -793,6 +793,8 @@ struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); >>> struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, >>> struct nvm_chk_meta *lp, >>> struct ppa_addr ppa); >>> +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); >>> +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); >>> void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); >>> void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); >>> int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); >>> -- >>> 2.17.1 >> >> I know that the OCSSD 2.0 spec does not allow to transition from open to >> free, but to me this is a spec bug as there is no underlying issue in >> reading an open block. Note that all controllers I know support this, >> and the upcoming Denali spec. fixes this too. > > + Klaus whom I discussed this with. > Yeah, i think that "early reset" is a nice feature. It would be nice > to extend the OCSSD spec with a new capabilities bit indicating if > this is indeed supported or not. > Matias: what do you think? > >> >> Besides, the factory flag is intended to start a pblk instance >> immediately, without having to pay the price of padding any past device >> state. If you still want to do this, I think this belongs in a user space tool. >> > > Hear, hear! > > Serially padding any open chunks during -f create call would be > terrible user experience. > Lets say that padding a chunk takes one second, we would, in a > worst-case scenario in an example disk, be stuck in a syscall for > 1*64*1000 seconds ~ 17 hours. > You are both right - this can be time consuming. So we can drop the "padding" part and let user do that. What about changing this patch to at least print some warning on dmesg when we have some open chunks in such a case? > A tool, like dm-zoned's dmzadm would be the right approach, see > Documentation/device-mapper/dm-zoned.txt > All new pblk instances would then have to be pre-formatted with "pblkadm" > > A new physical storage format containing a superblock would also be a good idea. > > / Hans >
On Mon, Mar 4, 2019 at 1:56 PM Igor Konopko <igor.j.konopko@intel.com> wrote: > > > > On 04.03.2019 11:05, Hans Holmberg wrote: > > On Mon, Mar 4, 2019 at 9:46 AM Javier González <javier@javigon.com> wrote: > >> > >>> On 27 Feb 2019, at 18.14, Igor Konopko <igor.j.konopko@intel.com> wrote: > >>> > >>> When we creating pblk instance with factory > >>> flag, there is a possibility that some chunks > >>> are in open state, which does not allow to > >>> issue erase request to them directly. Such a > >>> chunk should be filled with some empty data in > >>> order to achieve close state. Without that we > >>> are risking that some erase operation will be > >>> rejected by the drive due to inproper chunk > >>> state. > >>> > >>> This patch implements closing chunk logic in pblk > >>> for that case, when creating instance with factory > >>> flag in order to fix that issue > >>> > >>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com> > >>> --- > >>> drivers/lightnvm/pblk-core.c | 114 +++++++++++++++++++++++++++++++++++ > >>> drivers/lightnvm/pblk-init.c | 14 ++++- > >>> drivers/lightnvm/pblk.h | 2 + > >>> 3 files changed, 128 insertions(+), 2 deletions(-) > >>> > >>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c > >>> index fa4dc05608ff..d3c45393f093 100644 > >>> --- a/drivers/lightnvm/pblk-core.c > >>> +++ b/drivers/lightnvm/pblk-core.c > >>> @@ -161,6 +161,120 @@ struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, > >>> return meta + ch_off + lun_off + chk_off; > >>> } > >>> > >>> +static void pblk_close_chunk(struct pblk *pblk, struct ppa_addr ppa, int count) > >>> +{ > >>> + struct nvm_tgt_dev *dev = pblk->dev; > >>> + struct nvm_geo *geo = &dev->geo; > >>> + struct bio *bio; > >>> + struct ppa_addr *ppa_list; > >>> + struct nvm_rq rqd; > >>> + void *meta_list, *data; > >>> + dma_addr_t dma_meta_list, dma_ppa_list; > >>> + int i, rq_ppas, rq_len, ret; > >>> + > >>> + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); > >>> + if (!meta_list) > >>> + return; > >>> + > >>> + ppa_list = meta_list + pblk_dma_meta_size(pblk); > >>> + dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); > >>> + > >>> + rq_ppas = pblk_calc_secs(pblk, count, 0, false); > >>> + if (!rq_ppas) > >>> + rq_ppas = pblk->min_write_pgs; > >>> + rq_len = rq_ppas * geo->csecs; > >>> + > >>> + data = kzalloc(rq_len, GFP_KERNEL); > >>> + if (!data) > >>> + goto free_meta_list; > >>> + > >>> + memset(&rqd, 0, sizeof(struct nvm_rq)); > >>> + rqd.opcode = NVM_OP_PWRITE; > >>> + rqd.nr_ppas = rq_ppas; > >>> + rqd.meta_list = meta_list; > >>> + rqd.ppa_list = ppa_list; > >>> + rqd.dma_ppa_list = dma_ppa_list; > >>> + rqd.dma_meta_list = dma_meta_list; > >>> + > >>> +next_rq: > >>> + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); > >>> + if (IS_ERR(bio)) > >>> + goto out_next; > >>> + > >>> + bio->bi_iter.bi_sector = 0; /* artificial bio */ > >>> + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); > >>> + > >>> + rqd.bio = bio; > >>> + for (i = 0; i < rqd.nr_ppas; i++) { > >>> + rqd.ppa_list[i] = ppa; > >>> + rqd.ppa_list[i].m.sec += i; > >>> + pblk_get_meta(pblk, meta_list, i)->lba = > >>> + cpu_to_le64(ADDR_EMPTY); > >>> + } > >>> + > >>> + ret = nvm_submit_io_sync(dev, &rqd); > >>> + if (ret) { > >>> + bio_put(bio); > >>> + goto out_next; > >>> + } > >>> + > >>> + if (rqd.error) > >>> + goto free_data; > >>> + > >>> +out_next: > >>> + count -= rqd.nr_ppas; > >>> + ppa.m.sec += rqd.nr_ppas; > >>> + if (count > 0) > >>> + goto next_rq; > >>> + > >>> +free_data: > >>> + kfree(data); > >>> +free_meta_list: > >>> + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); > >>> +} > >>> + > >>> +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) > >>> +{ > >>> + struct nvm_tgt_dev *dev = pblk->dev; > >>> + struct nvm_geo *geo = &dev->geo; > >>> + struct nvm_chk_meta *chunk_meta; > >>> + struct ppa_addr ppa; > >>> + int i, j, k, count; > >>> + > >>> + for (i = 0; i < geo->num_chk; i++) { > >>> + for (j = 0; j < geo->num_lun; j++) { > >>> + for (k = 0; k < geo->num_ch; k++) { > >>> + ppa.ppa = 0; > >>> + ppa.m.grp = k; > >>> + ppa.m.pu = j; > >>> + ppa.m.chk = i; > >>> + > >>> + chunk_meta = pblk_chunk_get_off(pblk, > >>> + meta, ppa); > >>> + if (chunk_meta->state == NVM_CHK_ST_OPEN) { > >>> + ppa.m.sec = chunk_meta->wp; > >>> + count = geo->clba - chunk_meta->wp; > >>> + pblk_close_chunk(pblk, ppa, count); > >>> + } > >>> + } > >>> + } > >>> + } > >>> +} > >>> + > >>> +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) > >>> +{ > >>> + struct nvm_tgt_dev *dev = pblk->dev; > >>> + struct nvm_geo *geo = &dev->geo; > >>> + int i; > >>> + > >>> + for (i = 0; i < geo->all_luns; i++) { > >>> + if (meta[i].state == NVM_CHK_ST_OPEN) > >>> + return true; > >>> + } > >>> + > >>> + return false; > >>> +} > >>> + > >>> void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, > >>> u64 paddr) > >>> { > >>> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c > >>> index 9913a4514eb6..83abe6960b46 100644 > >>> --- a/drivers/lightnvm/pblk-init.c > >>> +++ b/drivers/lightnvm/pblk-init.c > >>> @@ -1028,13 +1028,14 @@ static int pblk_line_meta_init(struct pblk *pblk) > >>> return 0; > >>> } > >>> > >>> -static int pblk_lines_init(struct pblk *pblk) > >>> +static int pblk_lines_init(struct pblk *pblk, bool factory_init) > >>> { > >>> struct pblk_line_mgmt *l_mg = &pblk->l_mg; > >>> struct pblk_line *line; > >>> void *chunk_meta; > >>> int nr_free_chks = 0; > >>> int i, ret; > >>> + bool retry = false; > >>> > >>> ret = pblk_line_meta_init(pblk); > >>> if (ret) > >>> @@ -1048,12 +1049,21 @@ static int pblk_lines_init(struct pblk *pblk) > >>> if (ret) > >>> goto fail_free_meta; > >>> > >>> +get_chunk_meta: > >>> chunk_meta = pblk_get_chunk_meta(pblk); > >>> if (IS_ERR(chunk_meta)) { > >>> ret = PTR_ERR(chunk_meta); > >>> goto fail_free_luns; > >>> } > >>> > >>> + if (factory_init && !retry && > >>> + pblk_are_opened_chunks(pblk, chunk_meta)) { > >>> + pblk_close_opened_chunks(pblk, chunk_meta); > >>> + retry = true; > >>> + vfree(chunk_meta); > >>> + goto get_chunk_meta; > >>> + } > >>> + > >>> pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), > >>> GFP_KERNEL); > >>> if (!pblk->lines) { > >>> @@ -1244,7 +1254,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, > >>> goto fail; > >>> } > >>> > >>> - ret = pblk_lines_init(pblk); > >>> + ret = pblk_lines_init(pblk, flags & NVM_TARGET_FACTORY); > >>> if (ret) { > >>> pblk_err(pblk, "could not initialize lines\n"); > >>> goto fail_free_core; > >>> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h > >>> index b266563508e6..b248642c4dfb 100644 > >>> --- a/drivers/lightnvm/pblk.h > >>> +++ b/drivers/lightnvm/pblk.h > >>> @@ -793,6 +793,8 @@ struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); > >>> struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, > >>> struct nvm_chk_meta *lp, > >>> struct ppa_addr ppa); > >>> +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); > >>> +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); > >>> void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); > >>> void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); > >>> int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); > >>> -- > >>> 2.17.1 > >> > >> I know that the OCSSD 2.0 spec does not allow to transition from open to > >> free, but to me this is a spec bug as there is no underlying issue in > >> reading an open block. Note that all controllers I know support this, > >> and the upcoming Denali spec. fixes this too. > > > > + Klaus whom I discussed this with. > > Yeah, i think that "early reset" is a nice feature. It would be nice > > to extend the OCSSD spec with a new capabilities bit indicating if > > this is indeed supported or not. > > Matias: what do you think? > > > >> > >> Besides, the factory flag is intended to start a pblk instance > >> immediately, without having to pay the price of padding any past device > >> state. If you still want to do this, I think this belongs in a user space tool. > >> > > > > Hear, hear! > > > > Serially padding any open chunks during -f create call would be > > terrible user experience. > > Lets say that padding a chunk takes one second, we would, in a > > worst-case scenario in an example disk, be stuck in a syscall for > > 1*64*1000 seconds ~ 17 hours. > > > > You are both right - this can be time consuming. So we can drop the > "padding" part and let user do that. > > What about changing this patch to at least print some warning on dmesg > when we have some open chunks in such a case? Sounds good to me. > > > A tool, like dm-zoned's dmzadm would be the right approach, see > > Documentation/device-mapper/dm-zoned.txt > > All new pblk instances would then have to be pre-formatted with "pblkadm" > > > > A new physical storage format containing a superblock would also be a good idea. > > > > / Hans > >
On 3/4/19 9:27 AM, Javier González wrote: >> On 27 Feb 2019, at 18.14, Igor Konopko <igor.j.konopko@intel.com> wrote: >> >> When we creating pblk instance with factory >> flag, there is a possibility that some chunks >> are in open state, which does not allow to >> issue erase request to them directly. Such a >> chunk should be filled with some empty data in >> order to achieve close state. Without that we >> are risking that some erase operation will be >> rejected by the drive due to inproper chunk >> state. >> >> This patch implements closing chunk logic in pblk >> for that case, when creating instance with factory >> flag in order to fix that issue >> >> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com> >> --- >> drivers/lightnvm/pblk-core.c | 114 +++++++++++++++++++++++++++++++++++ >> drivers/lightnvm/pblk-init.c | 14 ++++- >> drivers/lightnvm/pblk.h | 2 + >> 3 files changed, 128 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c >> index fa4dc05608ff..d3c45393f093 100644 >> --- a/drivers/lightnvm/pblk-core.c >> +++ b/drivers/lightnvm/pblk-core.c >> @@ -161,6 +161,120 @@ struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, >> return meta + ch_off + lun_off + chk_off; >> } >> >> +static void pblk_close_chunk(struct pblk *pblk, struct ppa_addr ppa, int count) >> +{ >> + struct nvm_tgt_dev *dev = pblk->dev; >> + struct nvm_geo *geo = &dev->geo; >> + struct bio *bio; >> + struct ppa_addr *ppa_list; >> + struct nvm_rq rqd; >> + void *meta_list, *data; >> + dma_addr_t dma_meta_list, dma_ppa_list; >> + int i, rq_ppas, rq_len, ret; >> + >> + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); >> + if (!meta_list) >> + return; >> + >> + ppa_list = meta_list + pblk_dma_meta_size(pblk); >> + dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); >> + >> + rq_ppas = pblk_calc_secs(pblk, count, 0, false); >> + if (!rq_ppas) >> + rq_ppas = pblk->min_write_pgs; >> + rq_len = rq_ppas * geo->csecs; >> + >> + data = kzalloc(rq_len, GFP_KERNEL); >> + if (!data) >> + goto free_meta_list; >> + >> + memset(&rqd, 0, sizeof(struct nvm_rq)); >> + rqd.opcode = NVM_OP_PWRITE; >> + rqd.nr_ppas = rq_ppas; >> + rqd.meta_list = meta_list; >> + rqd.ppa_list = ppa_list; >> + rqd.dma_ppa_list = dma_ppa_list; >> + rqd.dma_meta_list = dma_meta_list; >> + >> +next_rq: >> + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); >> + if (IS_ERR(bio)) >> + goto out_next; >> + >> + bio->bi_iter.bi_sector = 0; /* artificial bio */ >> + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); >> + >> + rqd.bio = bio; >> + for (i = 0; i < rqd.nr_ppas; i++) { >> + rqd.ppa_list[i] = ppa; >> + rqd.ppa_list[i].m.sec += i; >> + pblk_get_meta(pblk, meta_list, i)->lba = >> + cpu_to_le64(ADDR_EMPTY); >> + } >> + >> + ret = nvm_submit_io_sync(dev, &rqd); >> + if (ret) { >> + bio_put(bio); >> + goto out_next; >> + } >> + >> + if (rqd.error) >> + goto free_data; >> + >> +out_next: >> + count -= rqd.nr_ppas; >> + ppa.m.sec += rqd.nr_ppas; >> + if (count > 0) >> + goto next_rq; >> + >> +free_data: >> + kfree(data); >> +free_meta_list: >> + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); >> +} >> + >> +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) >> +{ >> + struct nvm_tgt_dev *dev = pblk->dev; >> + struct nvm_geo *geo = &dev->geo; >> + struct nvm_chk_meta *chunk_meta; >> + struct ppa_addr ppa; >> + int i, j, k, count; >> + >> + for (i = 0; i < geo->num_chk; i++) { >> + for (j = 0; j < geo->num_lun; j++) { >> + for (k = 0; k < geo->num_ch; k++) { >> + ppa.ppa = 0; >> + ppa.m.grp = k; >> + ppa.m.pu = j; >> + ppa.m.chk = i; >> + >> + chunk_meta = pblk_chunk_get_off(pblk, >> + meta, ppa); >> + if (chunk_meta->state == NVM_CHK_ST_OPEN) { >> + ppa.m.sec = chunk_meta->wp; >> + count = geo->clba - chunk_meta->wp; >> + pblk_close_chunk(pblk, ppa, count); >> + } >> + } >> + } >> + } >> +} >> + >> +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) >> +{ >> + struct nvm_tgt_dev *dev = pblk->dev; >> + struct nvm_geo *geo = &dev->geo; >> + int i; >> + >> + for (i = 0; i < geo->all_luns; i++) { >> + if (meta[i].state == NVM_CHK_ST_OPEN) >> + return true; >> + } >> + >> + return false; >> +} >> + >> void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, >> u64 paddr) >> { >> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c >> index 9913a4514eb6..83abe6960b46 100644 >> --- a/drivers/lightnvm/pblk-init.c >> +++ b/drivers/lightnvm/pblk-init.c >> @@ -1028,13 +1028,14 @@ static int pblk_line_meta_init(struct pblk *pblk) >> return 0; >> } >> >> -static int pblk_lines_init(struct pblk *pblk) >> +static int pblk_lines_init(struct pblk *pblk, bool factory_init) >> { >> struct pblk_line_mgmt *l_mg = &pblk->l_mg; >> struct pblk_line *line; >> void *chunk_meta; >> int nr_free_chks = 0; >> int i, ret; >> + bool retry = false; >> >> ret = pblk_line_meta_init(pblk); >> if (ret) >> @@ -1048,12 +1049,21 @@ static int pblk_lines_init(struct pblk *pblk) >> if (ret) >> goto fail_free_meta; >> >> +get_chunk_meta: >> chunk_meta = pblk_get_chunk_meta(pblk); >> if (IS_ERR(chunk_meta)) { >> ret = PTR_ERR(chunk_meta); >> goto fail_free_luns; >> } >> >> + if (factory_init && !retry && >> + pblk_are_opened_chunks(pblk, chunk_meta)) { >> + pblk_close_opened_chunks(pblk, chunk_meta); >> + retry = true; >> + vfree(chunk_meta); >> + goto get_chunk_meta; >> + } >> + >> pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), >> GFP_KERNEL); >> if (!pblk->lines) { >> @@ -1244,7 +1254,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, >> goto fail; >> } >> >> - ret = pblk_lines_init(pblk); >> + ret = pblk_lines_init(pblk, flags & NVM_TARGET_FACTORY); >> if (ret) { >> pblk_err(pblk, "could not initialize lines\n"); >> goto fail_free_core; >> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h >> index b266563508e6..b248642c4dfb 100644 >> --- a/drivers/lightnvm/pblk.h >> +++ b/drivers/lightnvm/pblk.h >> @@ -793,6 +793,8 @@ struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); >> struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, >> struct nvm_chk_meta *lp, >> struct ppa_addr ppa); >> +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); >> +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); >> void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); >> void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); >> int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); >> -- >> 2.17.1 > > I know that the OCSSD 2.0 spec does not allow to transition from open to > free, but to me this is a spec bug as there is no underlying issue in > reading an open block. Note that all controllers I know support this, > and the upcoming Denali spec. fixes this too. The issue is not whether the chunk can be read. It is that going from Open -> Empty -> Open causes an erase on some implementations, and causes the media to wear out prematurely.
> > + Klaus whom I discussed this with. > Yeah, i think that "early reset" is a nice feature. It would be nice > to extend the OCSSD spec with a new capabilities bit indicating if > this is indeed supported or not. > Matias: what do you think? I don't mind as long as it is gated by a feature bit. An ECN can be made to fix that up and then software can be updated to read that bit for what to do.
> On 4 Mar 2019, at 14.18, Matias Bjørling <mb@lightnvm.io> wrote: > > On 3/4/19 9:27 AM, Javier González wrote: >>> On 27 Feb 2019, at 18.14, Igor Konopko <igor.j.konopko@intel.com> wrote: >>> >>> When we creating pblk instance with factory >>> flag, there is a possibility that some chunks >>> are in open state, which does not allow to >>> issue erase request to them directly. Such a >>> chunk should be filled with some empty data in >>> order to achieve close state. Without that we >>> are risking that some erase operation will be >>> rejected by the drive due to inproper chunk >>> state. >>> >>> This patch implements closing chunk logic in pblk >>> for that case, when creating instance with factory >>> flag in order to fix that issue >>> >>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com> >>> --- >>> drivers/lightnvm/pblk-core.c | 114 +++++++++++++++++++++++++++++++++++ >>> drivers/lightnvm/pblk-init.c | 14 ++++- >>> drivers/lightnvm/pblk.h | 2 + >>> 3 files changed, 128 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c >>> index fa4dc05608ff..d3c45393f093 100644 >>> --- a/drivers/lightnvm/pblk-core.c >>> +++ b/drivers/lightnvm/pblk-core.c >>> @@ -161,6 +161,120 @@ struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, >>> return meta + ch_off + lun_off + chk_off; >>> } >>> >>> +static void pblk_close_chunk(struct pblk *pblk, struct ppa_addr ppa, int count) >>> +{ >>> + struct nvm_tgt_dev *dev = pblk->dev; >>> + struct nvm_geo *geo = &dev->geo; >>> + struct bio *bio; >>> + struct ppa_addr *ppa_list; >>> + struct nvm_rq rqd; >>> + void *meta_list, *data; >>> + dma_addr_t dma_meta_list, dma_ppa_list; >>> + int i, rq_ppas, rq_len, ret; >>> + >>> + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); >>> + if (!meta_list) >>> + return; >>> + >>> + ppa_list = meta_list + pblk_dma_meta_size(pblk); >>> + dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); >>> + >>> + rq_ppas = pblk_calc_secs(pblk, count, 0, false); >>> + if (!rq_ppas) >>> + rq_ppas = pblk->min_write_pgs; >>> + rq_len = rq_ppas * geo->csecs; >>> + >>> + data = kzalloc(rq_len, GFP_KERNEL); >>> + if (!data) >>> + goto free_meta_list; >>> + >>> + memset(&rqd, 0, sizeof(struct nvm_rq)); >>> + rqd.opcode = NVM_OP_PWRITE; >>> + rqd.nr_ppas = rq_ppas; >>> + rqd.meta_list = meta_list; >>> + rqd.ppa_list = ppa_list; >>> + rqd.dma_ppa_list = dma_ppa_list; >>> + rqd.dma_meta_list = dma_meta_list; >>> + >>> +next_rq: >>> + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); >>> + if (IS_ERR(bio)) >>> + goto out_next; >>> + >>> + bio->bi_iter.bi_sector = 0; /* artificial bio */ >>> + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); >>> + >>> + rqd.bio = bio; >>> + for (i = 0; i < rqd.nr_ppas; i++) { >>> + rqd.ppa_list[i] = ppa; >>> + rqd.ppa_list[i].m.sec += i; >>> + pblk_get_meta(pblk, meta_list, i)->lba = >>> + cpu_to_le64(ADDR_EMPTY); >>> + } >>> + >>> + ret = nvm_submit_io_sync(dev, &rqd); >>> + if (ret) { >>> + bio_put(bio); >>> + goto out_next; >>> + } >>> + >>> + if (rqd.error) >>> + goto free_data; >>> + >>> +out_next: >>> + count -= rqd.nr_ppas; >>> + ppa.m.sec += rqd.nr_ppas; >>> + if (count > 0) >>> + goto next_rq; >>> + >>> +free_data: >>> + kfree(data); >>> +free_meta_list: >>> + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); >>> +} >>> + >>> +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) >>> +{ >>> + struct nvm_tgt_dev *dev = pblk->dev; >>> + struct nvm_geo *geo = &dev->geo; >>> + struct nvm_chk_meta *chunk_meta; >>> + struct ppa_addr ppa; >>> + int i, j, k, count; >>> + >>> + for (i = 0; i < geo->num_chk; i++) { >>> + for (j = 0; j < geo->num_lun; j++) { >>> + for (k = 0; k < geo->num_ch; k++) { >>> + ppa.ppa = 0; >>> + ppa.m.grp = k; >>> + ppa.m.pu = j; >>> + ppa.m.chk = i; >>> + >>> + chunk_meta = pblk_chunk_get_off(pblk, >>> + meta, ppa); >>> + if (chunk_meta->state == NVM_CHK_ST_OPEN) { >>> + ppa.m.sec = chunk_meta->wp; >>> + count = geo->clba - chunk_meta->wp; >>> + pblk_close_chunk(pblk, ppa, count); >>> + } >>> + } >>> + } >>> + } >>> +} >>> + >>> +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) >>> +{ >>> + struct nvm_tgt_dev *dev = pblk->dev; >>> + struct nvm_geo *geo = &dev->geo; >>> + int i; >>> + >>> + for (i = 0; i < geo->all_luns; i++) { >>> + if (meta[i].state == NVM_CHK_ST_OPEN) >>> + return true; >>> + } >>> + >>> + return false; >>> +} >>> + >>> void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, >>> u64 paddr) >>> { >>> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c >>> index 9913a4514eb6..83abe6960b46 100644 >>> --- a/drivers/lightnvm/pblk-init.c >>> +++ b/drivers/lightnvm/pblk-init.c >>> @@ -1028,13 +1028,14 @@ static int pblk_line_meta_init(struct pblk *pblk) >>> return 0; >>> } >>> >>> -static int pblk_lines_init(struct pblk *pblk) >>> +static int pblk_lines_init(struct pblk *pblk, bool factory_init) >>> { >>> struct pblk_line_mgmt *l_mg = &pblk->l_mg; >>> struct pblk_line *line; >>> void *chunk_meta; >>> int nr_free_chks = 0; >>> int i, ret; >>> + bool retry = false; >>> >>> ret = pblk_line_meta_init(pblk); >>> if (ret) >>> @@ -1048,12 +1049,21 @@ static int pblk_lines_init(struct pblk *pblk) >>> if (ret) >>> goto fail_free_meta; >>> >>> +get_chunk_meta: >>> chunk_meta = pblk_get_chunk_meta(pblk); >>> if (IS_ERR(chunk_meta)) { >>> ret = PTR_ERR(chunk_meta); >>> goto fail_free_luns; >>> } >>> >>> + if (factory_init && !retry && >>> + pblk_are_opened_chunks(pblk, chunk_meta)) { >>> + pblk_close_opened_chunks(pblk, chunk_meta); >>> + retry = true; >>> + vfree(chunk_meta); >>> + goto get_chunk_meta; >>> + } >>> + >>> pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), >>> GFP_KERNEL); >>> if (!pblk->lines) { >>> @@ -1244,7 +1254,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, >>> goto fail; >>> } >>> >>> - ret = pblk_lines_init(pblk); >>> + ret = pblk_lines_init(pblk, flags & NVM_TARGET_FACTORY); >>> if (ret) { >>> pblk_err(pblk, "could not initialize lines\n"); >>> goto fail_free_core; >>> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h >>> index b266563508e6..b248642c4dfb 100644 >>> --- a/drivers/lightnvm/pblk.h >>> +++ b/drivers/lightnvm/pblk.h >>> @@ -793,6 +793,8 @@ struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); >>> struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, >>> struct nvm_chk_meta *lp, >>> struct ppa_addr ppa); >>> +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); >>> +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); >>> void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); >>> void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); >>> int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); >>> -- >>> 2.17.1 >> I know that the OCSSD 2.0 spec does not allow to transition from open to >> free, but to me this is a spec bug as there is no underlying issue in >> reading an open block. Note that all controllers I know support this, >> and the upcoming Denali spec. fixes this too. > > The issue is not whether the chunk can be read. It is that going from > Open -> Empty -> Open causes an erase on some implementations, and > causes the media to wear out prematurely. If the host is padding data to be able to close, the effect on wear is the same.
> On 4 Mar 2019, at 14.19, Matias Bjørling <mb@lightnvm.io> wrote: > >> + Klaus whom I discussed this with. >> Yeah, i think that "early reset" is a nice feature. It would be nice >> to extend the OCSSD spec with a new capabilities bit indicating if >> this is indeed supported or not. >> Matias: what do you think? > > I don't mind as long as it is gated by a feature bit. An ECN can be > made to fix that up and then software can be updated to read that bit > for what to do. Works for me. Javier
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index fa4dc05608ff..d3c45393f093 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -161,6 +161,120 @@ struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, return meta + ch_off + lun_off + chk_off; } +static void pblk_close_chunk(struct pblk *pblk, struct ppa_addr ppa, int count) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct bio *bio; + struct ppa_addr *ppa_list; + struct nvm_rq rqd; + void *meta_list, *data; + dma_addr_t dma_meta_list, dma_ppa_list; + int i, rq_ppas, rq_len, ret; + + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); + if (!meta_list) + return; + + ppa_list = meta_list + pblk_dma_meta_size(pblk); + dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); + + rq_ppas = pblk_calc_secs(pblk, count, 0, false); + if (!rq_ppas) + rq_ppas = pblk->min_write_pgs; + rq_len = rq_ppas * geo->csecs; + + data = kzalloc(rq_len, GFP_KERNEL); + if (!data) + goto free_meta_list; + + memset(&rqd, 0, sizeof(struct nvm_rq)); + rqd.opcode = NVM_OP_PWRITE; + rqd.nr_ppas = rq_ppas; + rqd.meta_list = meta_list; + rqd.ppa_list = ppa_list; + rqd.dma_ppa_list = dma_ppa_list; + rqd.dma_meta_list = dma_meta_list; + +next_rq: + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); + if (IS_ERR(bio)) + goto out_next; + + bio->bi_iter.bi_sector = 0; /* artificial bio */ + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + rqd.bio = bio; + for (i = 0; i < rqd.nr_ppas; i++) { + rqd.ppa_list[i] = ppa; + rqd.ppa_list[i].m.sec += i; + pblk_get_meta(pblk, meta_list, i)->lba = + cpu_to_le64(ADDR_EMPTY); + } + + ret = nvm_submit_io_sync(dev, &rqd); + if (ret) { + bio_put(bio); + goto out_next; + } + + if (rqd.error) + goto free_data; + +out_next: + count -= rqd.nr_ppas; + ppa.m.sec += rqd.nr_ppas; + if (count > 0) + goto next_rq; + +free_data: + kfree(data); +free_meta_list: + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); +} + +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct nvm_chk_meta *chunk_meta; + struct ppa_addr ppa; + int i, j, k, count; + + for (i = 0; i < geo->num_chk; i++) { + for (j = 0; j < geo->num_lun; j++) { + for (k = 0; k < geo->num_ch; k++) { + ppa.ppa = 0; + ppa.m.grp = k; + ppa.m.pu = j; + ppa.m.chk = i; + + chunk_meta = pblk_chunk_get_off(pblk, + meta, ppa); + if (chunk_meta->state == NVM_CHK_ST_OPEN) { + ppa.m.sec = chunk_meta->wp; + count = geo->clba - chunk_meta->wp; + pblk_close_chunk(pblk, ppa, count); + } + } + } + } +} + +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int i; + + for (i = 0; i < geo->all_luns; i++) { + if (meta[i].state == NVM_CHK_ST_OPEN) + return true; + } + + return false; +} + void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, u64 paddr) { diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 9913a4514eb6..83abe6960b46 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1028,13 +1028,14 @@ static int pblk_line_meta_init(struct pblk *pblk) return 0; } -static int pblk_lines_init(struct pblk *pblk) +static int pblk_lines_init(struct pblk *pblk, bool factory_init) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line *line; void *chunk_meta; int nr_free_chks = 0; int i, ret; + bool retry = false; ret = pblk_line_meta_init(pblk); if (ret) @@ -1048,12 +1049,21 @@ static int pblk_lines_init(struct pblk *pblk) if (ret) goto fail_free_meta; +get_chunk_meta: chunk_meta = pblk_get_chunk_meta(pblk); if (IS_ERR(chunk_meta)) { ret = PTR_ERR(chunk_meta); goto fail_free_luns; } + if (factory_init && !retry && + pblk_are_opened_chunks(pblk, chunk_meta)) { + pblk_close_opened_chunks(pblk, chunk_meta); + retry = true; + vfree(chunk_meta); + goto get_chunk_meta; + } + pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), GFP_KERNEL); if (!pblk->lines) { @@ -1244,7 +1254,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, goto fail; } - ret = pblk_lines_init(pblk); + ret = pblk_lines_init(pblk, flags & NVM_TARGET_FACTORY); if (ret) { pblk_err(pblk, "could not initialize lines\n"); goto fail_free_core; diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index b266563508e6..b248642c4dfb 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -793,6 +793,8 @@ struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, struct nvm_chk_meta *lp, struct ppa_addr ppa); +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
When we creating pblk instance with factory flag, there is a possibility that some chunks are in open state, which does not allow to issue erase request to them directly. Such a chunk should be filled with some empty data in order to achieve close state. Without that we are risking that some erase operation will be rejected by the drive due to inproper chunk state. This patch implements closing chunk logic in pblk for that case, when creating instance with factory flag in order to fix that issue Signed-off-by: Igor Konopko <igor.j.konopko@intel.com> --- drivers/lightnvm/pblk-core.c | 114 +++++++++++++++++++++++++++++++++++ drivers/lightnvm/pblk-init.c | 14 ++++- drivers/lightnvm/pblk.h | 2 + 3 files changed, 128 insertions(+), 2 deletions(-)