Message ID | 1454591299-30305-4-git-send-email-javier@javigon.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 02/04/2016 02:08 PM, Javier González wrote: > Since writes are buffered in memory, incoming reads must retrieve > buffered pages instead of submitting the I/O to the media. > > This patch implements this logic. When a read bio arrives to rrpc, valid > pages from the flash blocks residing in memory are copied. If there are > any "holes" in the bio, a new bio is submitted to the media to retrieve > the necessary pages. The original bio is updated accordingly. > > Signed-off-by: Javier González <javier@cnexlabs.com> > --- > drivers/lightnvm/rrpc.c | 451 ++++++++++++++++++++++++++++++++++++----------- > include/linux/lightnvm.h | 1 + > 2 files changed, 346 insertions(+), 106 deletions(-) > > diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c > index e9fb19d..6348d52 100644 > --- a/drivers/lightnvm/rrpc.c > +++ b/drivers/lightnvm/rrpc.c > @@ -827,10 +827,13 @@ static void rrpc_end_io(struct nvm_rq *rqd) > struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance); > uint8_t nr_pages = rqd->nr_pages; > > - if (bio_data_dir(rqd->bio) == WRITE) > + if (bio_data_dir(rqd->bio) == WRITE) { > rrpc_end_io_write(rrpc, rqd, nr_pages); > - else > + } else { > + if (rqd->flags & NVM_IOTYPE_SYNC) > + return; > rrpc_end_io_read(rrpc, rqd, nr_pages); > + } > > bio_put(rqd->bio); > > @@ -842,83 +845,6 @@ static void rrpc_end_io(struct nvm_rq *rqd) > mempool_free(rqd, rrpc->rq_pool); > } > > -static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio, > - struct nvm_rq *rqd, struct rrpc_buf_rq *brrqd, > - unsigned long flags, int nr_pages) > -{ > - struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); > - struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rrqd); > - struct rrpc_addr *gp; > - sector_t laddr = rrpc_get_laddr(bio); > - int is_gc = flags & NVM_IOTYPE_GC; > - int i; > - > - if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) { > - nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list); > - mempool_free(rrqd, rrpc->rrq_pool); > - mempool_free(rqd, rrpc->rq_pool); > - return NVM_IO_REQUEUE; > - } > - > - for (i = 0; i < nr_pages; i++) { > - /* We assume that mapping occurs at 4KB granularity */ > - BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects)); > - gp = &rrpc->trans_map[laddr + i]; > - > - if (gp->rblk) { > - rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev, > - gp->addr); > - } else { > - BUG_ON(is_gc); > - rrpc_unlock_laddr(rrpc, r); > - nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, > - rqd->dma_ppa_list); > - mempool_free(rrqd, rrpc->rrq_pool); > - mempool_free(rqd, rrpc->rq_pool); > - return NVM_IO_DONE; > - } > - > - brrqd[i].addr = gp; > - } > - > - rqd->opcode = NVM_OP_HBREAD; > - > - return NVM_IO_OK; > -} > - > -static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd, > - unsigned long flags) > -{ > - struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); > - int is_gc = flags & NVM_IOTYPE_GC; > - sector_t laddr = rrpc_get_laddr(bio); > - struct rrpc_addr *gp; > - > - if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) { > - mempool_free(rrqd, rrpc->rrq_pool); > - mempool_free(rqd, rrpc->rq_pool); > - return NVM_IO_REQUEUE; > - } > - > - BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects)); > - gp = &rrpc->trans_map[laddr]; > - > - if (gp->rblk) { > - rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr); > - } else { > - BUG_ON(is_gc); > - rrpc_unlock_rq(rrpc, rrqd); > - mempool_free(rrqd, rrpc->rrq_pool); > - mempool_free(rqd, rrpc->rq_pool); > - return NVM_IO_DONE; > - } > - > - rqd->opcode = NVM_OP_HBREAD; > - rrqd->addr = gp; > - > - return NVM_IO_OK; > -} > - > /* > * Copy data from current bio to block write buffer. This if necessary > * to guarantee durability if a flash block becomes bad before all pages > @@ -1051,14 +977,335 @@ static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio, > return NVM_IO_DONE; > } > > +static int rrpc_buffer_write(struct rrpc *rrpc, struct bio *bio, > + struct rrpc_rq *rrqd, unsigned long flags) > +{ > + uint8_t nr_pages = rrpc_get_pages(bio); > + > + rrqd->nr_pages = nr_pages; > + > + if (nr_pages > 1) > + return rrpc_write_ppalist_rq(rrpc, bio, rrqd, flags, nr_pages); > + else > + return rrpc_write_rq(rrpc, bio, rrqd, flags); > +} > + > +static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio, > + struct nvm_rq *rqd, struct rrpc_buf_rq *brrqd, > + unsigned long flags, int nr_pages) > +{ > + struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); > + struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rrqd); > + struct rrpc_addr *gp; > + sector_t laddr = rrpc_get_laddr(bio); > + int is_gc = flags & NVM_IOTYPE_GC; > + int i; > + > + if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) { > + nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list); > + return NVM_IO_REQUEUE; > + } > + > + for (i = 0; i < nr_pages; i++) { > + /* We assume that mapping occurs at 4KB granularity */ > + BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects)); > + gp = &rrpc->trans_map[laddr + i]; > + > + if (gp->rblk) { > + rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev, > + gp->addr); > + } else { > + BUG_ON(is_gc); > + rrpc_unlock_laddr(rrpc, r); > + nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, > + rqd->dma_ppa_list); > + return NVM_IO_DONE; > + } > + > + brrqd[i].addr = gp; > + } > + > + rqd->opcode = NVM_OP_HBREAD; > + > + return NVM_IO_OK; > +} > + > +static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd, > + unsigned long flags) > +{ > + struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); > + int is_gc = flags & NVM_IOTYPE_GC; > + sector_t laddr = rrpc_get_laddr(bio); > + struct rrpc_addr *gp; > + > + if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) > + return NVM_IO_REQUEUE; > + > + BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects)); > + gp = &rrpc->trans_map[laddr]; > + > + if (gp->rblk) { > + rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr); > + } else { > + BUG_ON(is_gc); > + rrpc_unlock_rq(rrpc, rrqd); > + return NVM_IO_DONE; > + } > + > + rqd->opcode = NVM_OP_HBREAD; > + rrqd->addr = gp; > + > + return NVM_IO_OK; > +} > + > +static int rrpc_read_w_buf_entry(struct bio *bio, struct rrpc_block *rblk, > + struct bvec_iter iter, int entry) > +{ > + struct buf_entry *read_entry; > + struct bio_vec bv; > + struct page *page; > + void *kaddr; > + void *data; > + int read = 0; > + > + lockdep_assert_held(&rblk->w_buf.s_lock); > + > + spin_lock(&rblk->w_buf.w_lock); > + if (entry >= rblk->w_buf.cur_mem) { > + spin_unlock(&rblk->w_buf.w_lock); > + goto out; > + } > + spin_unlock(&rblk->w_buf.w_lock); > + > + read_entry = &rblk->w_buf.entries[entry]; > + data = read_entry->data; > + > + bv = bio_iter_iovec(bio, iter); > + page = bv.bv_page; > + kaddr = kmap_atomic(page); > + memcpy(kaddr + bv.bv_offset, data, RRPC_EXPOSED_PAGE_SIZE); > + kunmap_atomic(kaddr); > + read++; > + > +out: > + return read; > +} > + > +static int rrpc_read_from_w_buf(struct rrpc *rrpc, struct nvm_rq *rqd, > + struct rrpc_buf_rq *brrqd, unsigned long *read_bitmap) > +{ > + struct nvm_dev *dev = rrpc->dev; > + struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); > + struct rrpc_addr *addr; > + struct bio *bio = rqd->bio; > + struct bvec_iter iter = bio->bi_iter; > + struct rrpc_block *rblk; > + unsigned long blk_id; > + int nr_pages = rqd->nr_pages; > + int left = nr_pages; > + int read = 0; > + int entry; > + int i; > + > + if (nr_pages != bio->bi_vcnt) > + goto out; > + > + if (nr_pages == 1) { > + rblk = rrqd->addr->rblk; > + > + /* If the write buffer exists, the block is open in memory */ > + spin_lock(&rblk->w_buf.s_lock); > + atomic_inc(&rblk->w_buf.refs); > + if (rblk->w_buf.entries) { > + blk_id = rblk->parent->id; > + entry = rrqd->addr->addr - > + (blk_id * dev->sec_per_pg * dev->pgs_per_blk); > + > + read = rrpc_read_w_buf_entry(bio, rblk, iter, entry); > + > + left -= read; > + WARN_ON(test_and_set_bit(0, read_bitmap)); > + } > + bio_advance_iter(bio, &iter, RRPC_EXPOSED_PAGE_SIZE); > + > + atomic_dec(&rblk->w_buf.refs); > + spin_unlock(&rblk->w_buf.s_lock); > + > + goto out; > + } > + > + /* Iterate through all pages and copy those that are found in the write > + * buffer. We will complete the holes (if any) with a intermediate bio > + * later on > + */ > + for (i = 0; i < nr_pages; i++) { > + addr = brrqd[i].addr; > + rblk = addr->rblk; > + > + /* If the write buffer exists, the block is open in memory */ > + spin_lock(&rblk->w_buf.s_lock); > + atomic_inc(&rblk->w_buf.refs); > + if (rblk->w_buf.entries) { > + blk_id = rblk->parent->id; > + entry = addr->addr - (blk_id * dev->sec_per_pg * > + dev->pgs_per_blk); > + > + read = rrpc_read_w_buf_entry(bio, rblk, iter, entry); > + > + left -= read; > + WARN_ON(test_and_set_bit(i, read_bitmap)); > + } > + bio_advance_iter(bio, &iter, RRPC_EXPOSED_PAGE_SIZE); > + > + atomic_dec(&rblk->w_buf.refs); > + spin_unlock(&rblk->w_buf.s_lock); > + } > + > +out: > + return left; > +} > + > +static int rrpc_submit_read_io(struct rrpc *rrpc, struct bio *bio, > + struct nvm_rq *rqd, unsigned long flags) > +{ > + struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); > + int err; > + > + err = nvm_submit_io(rrpc->dev, rqd); > + if (err) { > + pr_err("rrpc: I/O submission failed: %d\n", err); > + bio_put(bio); > + if (!(flags & NVM_IOTYPE_GC)) { > + rrpc_unlock_rq(rrpc, rrqd); > + if (rqd->nr_pages > 1) > + nvm_dev_dma_free(rrpc->dev, > + rqd->ppa_list, rqd->dma_ppa_list); > + } > + return NVM_IO_ERR; > + } > + > + return NVM_IO_OK; > +} > + > +static int rrpc_fill_partial_read_bio(struct rrpc *rrpc, struct bio *bio, > + unsigned long *read_bitmap, struct nvm_rq *rqd, > + struct rrpc_buf_rq *brrqd, uint8_t nr_pages) > +{ > + struct bio *new_bio; > + struct page *page; > + struct bio_vec src_bv, dst_bv; > + void *src_p, *dst_p; > + int nr_holes = nr_pages - bitmap_weight(read_bitmap, nr_pages); > + int hole; > + int i = 0; > + int ret; > + DECLARE_COMPLETION_ONSTACK(wait); > + > + new_bio = bio_alloc(GFP_KERNEL, nr_holes); > + if (!new_bio) { > + pr_err("nvm: rrpc: could not alloc read bio\n"); > + return NVM_IO_ERR; > + } > + > + hole = find_first_zero_bit(read_bitmap, nr_pages); > + do { > + page = mempool_alloc(rrpc->page_pool, GFP_KERNEL); > + if (!page) { > + bio_put(new_bio); > + pr_err("nvm: rrpc: could not alloc read page\n"); > + goto err; > + } > + > + ret = bio_add_page(new_bio, page, RRPC_EXPOSED_PAGE_SIZE, 0); > + if (ret != RRPC_EXPOSED_PAGE_SIZE) { > + pr_err("nvm: rrpc: could not add page to bio\n"); > + mempool_free(page, rrpc->page_pool); > + goto err; > + } > + > + rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev, > + brrqd[hole].addr->addr); > + > + i++; > + hole = find_next_zero_bit(read_bitmap, nr_pages, hole + 1); > + } while (hole != nr_pages); > + > + if (nr_holes != new_bio->bi_vcnt) { > + pr_err("rrpc: malformed bio\n"); > + goto err; > + } > + > + new_bio->bi_iter.bi_sector = bio->bi_iter.bi_sector; > + new_bio->bi_rw = READ; > + new_bio->bi_private = &wait; > + new_bio->bi_end_io = rrpc_end_sync_bio; > + > + rqd->flags |= NVM_IOTYPE_SYNC; > + rqd->bio = new_bio; > + rqd->nr_pages = nr_holes; > + > + rrpc_submit_read_io(rrpc, new_bio, rqd, rqd->flags); > + wait_for_completion_io(&wait); > + > + if (new_bio->bi_error) > + goto err; > + > + /* Fill the holes in the original bio */ > + i = 0; > + hole = find_first_zero_bit(read_bitmap, nr_pages); > + do { > + src_bv = new_bio->bi_io_vec[i]; > + dst_bv = bio->bi_io_vec[hole]; > + > + src_p = kmap_atomic(src_bv.bv_page); > + dst_p = kmap_atomic(dst_bv.bv_page); > + > + memcpy(dst_p + dst_bv.bv_offset, > + src_p + src_bv.bv_offset, > + RRPC_EXPOSED_PAGE_SIZE); > + > + kunmap_atomic(src_p); > + kunmap_atomic(dst_p); > + > + mempool_free(&src_bv.bv_page, rrpc->page_pool); > + > + i++; > + hole = find_next_zero_bit(read_bitmap, nr_pages, hole + 1); > + } while (hole != nr_pages); > + > + bio_put(new_bio); > + > + /* Complete the original bio and associated request */ > + rqd->flags &= ~NVM_IOTYPE_SYNC; > + rqd->bio = bio; > + rqd->nr_pages = nr_pages; > + > + bio_endio(bio); > + rrpc_end_io(rqd); > + return NVM_IO_OK; > + > +err: > + /* Free allocated pages in new bio */ > + for (i = 0; i < new_bio->bi_vcnt; i++) { > + src_bv = new_bio->bi_io_vec[i]; > + mempool_free(&src_bv.bv_page, rrpc->page_pool); > + } > + bio_endio(new_bio); > + return NVM_IO_ERR; > +} > + > static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio, > struct rrpc_rq *rrqd, unsigned long flags) > { > struct nvm_rq *rqd; > struct rrpc_buf_rq brrqd[rrpc->max_write_pgs]; > + unsigned long read_bitmap; /* Max 64 ppas per request */ > + uint8_t left; > uint8_t nr_pages = rrpc_get_pages(bio); > int err; > > + bitmap_zero(&read_bitmap, nr_pages); > + > rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL); > if (!rqd) { > pr_err_ratelimited("rrpc: not able to queue bio."); > @@ -1073,22 +1320,25 @@ static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio, > &rqd->dma_ppa_list); > if (!rqd->ppa_list) { > pr_err("rrpc: not able to allocate ppa list\n"); > - mempool_free(rrqd, rrpc->rrq_pool); > mempool_free(rqd, rrpc->rq_pool); > + mempool_free(rrqd, rrpc->rrq_pool); > return NVM_IO_ERR; > } > > err = rrpc_read_ppalist_rq(rrpc, bio, rqd, brrqd, flags, > nr_pages); > if (err) { > - mempool_free(rrqd, rrpc->rrq_pool); > mempool_free(rqd, rrpc->rq_pool); > + mempool_free(rrqd, rrpc->rrq_pool); > return err; > } > } else { > err = rrpc_read_rq(rrpc, bio, rqd, flags); > - if (err) > + if (err) { > + mempool_free(rrqd, rrpc->rrq_pool); > + mempool_free(rqd, rrpc->rq_pool); > return err; > + } > } > > bio_get(bio); > @@ -1097,33 +1347,22 @@ static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio, > rqd->nr_pages = rrqd->nr_pages = nr_pages; > rqd->flags = flags; > > - err = nvm_submit_io(rrpc->dev, rqd); > - if (err) { > - pr_err("rrpc: I/O submission failed: %d\n", err); > - bio_put(bio); > - if (!(flags & NVM_IOTYPE_GC)) { > - rrpc_unlock_rq(rrpc, rrqd); > - if (rqd->nr_pages > 1) > - nvm_dev_dma_free(rrpc->dev, > - rqd->ppa_list, rqd->dma_ppa_list); > - } > + left = rrpc_read_from_w_buf(rrpc, rqd, brrqd, &read_bitmap); > + if (left == 0) { > + bio_endio(bio); > + rrpc_end_io(rqd); > + return NVM_IO_OK; > + } else if (left < 0) > return NVM_IO_ERR; > - } > > - return NVM_IO_OK; > -} > + if (bitmap_empty(&read_bitmap, nr_pages)) > + return rrpc_submit_read_io(rrpc, bio, rqd, flags); > > -static int rrpc_buffer_write(struct rrpc *rrpc, struct bio *bio, > - struct rrpc_rq *rrqd, unsigned long flags) > -{ > - uint8_t nr_pages = rrpc_get_pages(bio); > - > - rrqd->nr_pages = nr_pages; > - > - if (nr_pages > 1) > - return rrpc_write_ppalist_rq(rrpc, bio, rrqd, flags, nr_pages); > - else > - return rrpc_write_rq(rrpc, bio, rrqd, flags); > + /* The read bio could not be completely read from the write buffer. This > + * case only occurs when several pages are sent in a single bio > + */ > + return rrpc_fill_partial_read_bio(rrpc, bio, &read_bitmap, rqd, brrqd, > + nr_pages); > } > > static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio, > diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h > index eda9743..ae26ced 100644 > --- a/include/linux/lightnvm.h > +++ b/include/linux/lightnvm.h > @@ -11,6 +11,7 @@ enum { > > NVM_IOTYPE_NONE = 0, > NVM_IOTYPE_GC = 1, > + NVM_IOTYPE_SYNC = 2, > }; > > #define NVM_BLK_BITS (16) > Seems like this can be merged into the write buffer patch as well? -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index e9fb19d..6348d52 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -827,10 +827,13 @@ static void rrpc_end_io(struct nvm_rq *rqd) struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance); uint8_t nr_pages = rqd->nr_pages; - if (bio_data_dir(rqd->bio) == WRITE) + if (bio_data_dir(rqd->bio) == WRITE) { rrpc_end_io_write(rrpc, rqd, nr_pages); - else + } else { + if (rqd->flags & NVM_IOTYPE_SYNC) + return; rrpc_end_io_read(rrpc, rqd, nr_pages); + } bio_put(rqd->bio); @@ -842,83 +845,6 @@ static void rrpc_end_io(struct nvm_rq *rqd) mempool_free(rqd, rrpc->rq_pool); } -static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio, - struct nvm_rq *rqd, struct rrpc_buf_rq *brrqd, - unsigned long flags, int nr_pages) -{ - struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); - struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rrqd); - struct rrpc_addr *gp; - sector_t laddr = rrpc_get_laddr(bio); - int is_gc = flags & NVM_IOTYPE_GC; - int i; - - if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) { - nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list); - mempool_free(rrqd, rrpc->rrq_pool); - mempool_free(rqd, rrpc->rq_pool); - return NVM_IO_REQUEUE; - } - - for (i = 0; i < nr_pages; i++) { - /* We assume that mapping occurs at 4KB granularity */ - BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects)); - gp = &rrpc->trans_map[laddr + i]; - - if (gp->rblk) { - rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev, - gp->addr); - } else { - BUG_ON(is_gc); - rrpc_unlock_laddr(rrpc, r); - nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, - rqd->dma_ppa_list); - mempool_free(rrqd, rrpc->rrq_pool); - mempool_free(rqd, rrpc->rq_pool); - return NVM_IO_DONE; - } - - brrqd[i].addr = gp; - } - - rqd->opcode = NVM_OP_HBREAD; - - return NVM_IO_OK; -} - -static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd, - unsigned long flags) -{ - struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); - int is_gc = flags & NVM_IOTYPE_GC; - sector_t laddr = rrpc_get_laddr(bio); - struct rrpc_addr *gp; - - if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) { - mempool_free(rrqd, rrpc->rrq_pool); - mempool_free(rqd, rrpc->rq_pool); - return NVM_IO_REQUEUE; - } - - BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects)); - gp = &rrpc->trans_map[laddr]; - - if (gp->rblk) { - rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr); - } else { - BUG_ON(is_gc); - rrpc_unlock_rq(rrpc, rrqd); - mempool_free(rrqd, rrpc->rrq_pool); - mempool_free(rqd, rrpc->rq_pool); - return NVM_IO_DONE; - } - - rqd->opcode = NVM_OP_HBREAD; - rrqd->addr = gp; - - return NVM_IO_OK; -} - /* * Copy data from current bio to block write buffer. This if necessary * to guarantee durability if a flash block becomes bad before all pages @@ -1051,14 +977,335 @@ static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio, return NVM_IO_DONE; } +static int rrpc_buffer_write(struct rrpc *rrpc, struct bio *bio, + struct rrpc_rq *rrqd, unsigned long flags) +{ + uint8_t nr_pages = rrpc_get_pages(bio); + + rrqd->nr_pages = nr_pages; + + if (nr_pages > 1) + return rrpc_write_ppalist_rq(rrpc, bio, rrqd, flags, nr_pages); + else + return rrpc_write_rq(rrpc, bio, rrqd, flags); +} + +static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio, + struct nvm_rq *rqd, struct rrpc_buf_rq *brrqd, + unsigned long flags, int nr_pages) +{ + struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); + struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rrqd); + struct rrpc_addr *gp; + sector_t laddr = rrpc_get_laddr(bio); + int is_gc = flags & NVM_IOTYPE_GC; + int i; + + if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) { + nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list); + return NVM_IO_REQUEUE; + } + + for (i = 0; i < nr_pages; i++) { + /* We assume that mapping occurs at 4KB granularity */ + BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects)); + gp = &rrpc->trans_map[laddr + i]; + + if (gp->rblk) { + rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev, + gp->addr); + } else { + BUG_ON(is_gc); + rrpc_unlock_laddr(rrpc, r); + nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, + rqd->dma_ppa_list); + return NVM_IO_DONE; + } + + brrqd[i].addr = gp; + } + + rqd->opcode = NVM_OP_HBREAD; + + return NVM_IO_OK; +} + +static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd, + unsigned long flags) +{ + struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); + int is_gc = flags & NVM_IOTYPE_GC; + sector_t laddr = rrpc_get_laddr(bio); + struct rrpc_addr *gp; + + if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) + return NVM_IO_REQUEUE; + + BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects)); + gp = &rrpc->trans_map[laddr]; + + if (gp->rblk) { + rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr); + } else { + BUG_ON(is_gc); + rrpc_unlock_rq(rrpc, rrqd); + return NVM_IO_DONE; + } + + rqd->opcode = NVM_OP_HBREAD; + rrqd->addr = gp; + + return NVM_IO_OK; +} + +static int rrpc_read_w_buf_entry(struct bio *bio, struct rrpc_block *rblk, + struct bvec_iter iter, int entry) +{ + struct buf_entry *read_entry; + struct bio_vec bv; + struct page *page; + void *kaddr; + void *data; + int read = 0; + + lockdep_assert_held(&rblk->w_buf.s_lock); + + spin_lock(&rblk->w_buf.w_lock); + if (entry >= rblk->w_buf.cur_mem) { + spin_unlock(&rblk->w_buf.w_lock); + goto out; + } + spin_unlock(&rblk->w_buf.w_lock); + + read_entry = &rblk->w_buf.entries[entry]; + data = read_entry->data; + + bv = bio_iter_iovec(bio, iter); + page = bv.bv_page; + kaddr = kmap_atomic(page); + memcpy(kaddr + bv.bv_offset, data, RRPC_EXPOSED_PAGE_SIZE); + kunmap_atomic(kaddr); + read++; + +out: + return read; +} + +static int rrpc_read_from_w_buf(struct rrpc *rrpc, struct nvm_rq *rqd, + struct rrpc_buf_rq *brrqd, unsigned long *read_bitmap) +{ + struct nvm_dev *dev = rrpc->dev; + struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); + struct rrpc_addr *addr; + struct bio *bio = rqd->bio; + struct bvec_iter iter = bio->bi_iter; + struct rrpc_block *rblk; + unsigned long blk_id; + int nr_pages = rqd->nr_pages; + int left = nr_pages; + int read = 0; + int entry; + int i; + + if (nr_pages != bio->bi_vcnt) + goto out; + + if (nr_pages == 1) { + rblk = rrqd->addr->rblk; + + /* If the write buffer exists, the block is open in memory */ + spin_lock(&rblk->w_buf.s_lock); + atomic_inc(&rblk->w_buf.refs); + if (rblk->w_buf.entries) { + blk_id = rblk->parent->id; + entry = rrqd->addr->addr - + (blk_id * dev->sec_per_pg * dev->pgs_per_blk); + + read = rrpc_read_w_buf_entry(bio, rblk, iter, entry); + + left -= read; + WARN_ON(test_and_set_bit(0, read_bitmap)); + } + bio_advance_iter(bio, &iter, RRPC_EXPOSED_PAGE_SIZE); + + atomic_dec(&rblk->w_buf.refs); + spin_unlock(&rblk->w_buf.s_lock); + + goto out; + } + + /* Iterate through all pages and copy those that are found in the write + * buffer. We will complete the holes (if any) with a intermediate bio + * later on + */ + for (i = 0; i < nr_pages; i++) { + addr = brrqd[i].addr; + rblk = addr->rblk; + + /* If the write buffer exists, the block is open in memory */ + spin_lock(&rblk->w_buf.s_lock); + atomic_inc(&rblk->w_buf.refs); + if (rblk->w_buf.entries) { + blk_id = rblk->parent->id; + entry = addr->addr - (blk_id * dev->sec_per_pg * + dev->pgs_per_blk); + + read = rrpc_read_w_buf_entry(bio, rblk, iter, entry); + + left -= read; + WARN_ON(test_and_set_bit(i, read_bitmap)); + } + bio_advance_iter(bio, &iter, RRPC_EXPOSED_PAGE_SIZE); + + atomic_dec(&rblk->w_buf.refs); + spin_unlock(&rblk->w_buf.s_lock); + } + +out: + return left; +} + +static int rrpc_submit_read_io(struct rrpc *rrpc, struct bio *bio, + struct nvm_rq *rqd, unsigned long flags) +{ + struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); + int err; + + err = nvm_submit_io(rrpc->dev, rqd); + if (err) { + pr_err("rrpc: I/O submission failed: %d\n", err); + bio_put(bio); + if (!(flags & NVM_IOTYPE_GC)) { + rrpc_unlock_rq(rrpc, rrqd); + if (rqd->nr_pages > 1) + nvm_dev_dma_free(rrpc->dev, + rqd->ppa_list, rqd->dma_ppa_list); + } + return NVM_IO_ERR; + } + + return NVM_IO_OK; +} + +static int rrpc_fill_partial_read_bio(struct rrpc *rrpc, struct bio *bio, + unsigned long *read_bitmap, struct nvm_rq *rqd, + struct rrpc_buf_rq *brrqd, uint8_t nr_pages) +{ + struct bio *new_bio; + struct page *page; + struct bio_vec src_bv, dst_bv; + void *src_p, *dst_p; + int nr_holes = nr_pages - bitmap_weight(read_bitmap, nr_pages); + int hole; + int i = 0; + int ret; + DECLARE_COMPLETION_ONSTACK(wait); + + new_bio = bio_alloc(GFP_KERNEL, nr_holes); + if (!new_bio) { + pr_err("nvm: rrpc: could not alloc read bio\n"); + return NVM_IO_ERR; + } + + hole = find_first_zero_bit(read_bitmap, nr_pages); + do { + page = mempool_alloc(rrpc->page_pool, GFP_KERNEL); + if (!page) { + bio_put(new_bio); + pr_err("nvm: rrpc: could not alloc read page\n"); + goto err; + } + + ret = bio_add_page(new_bio, page, RRPC_EXPOSED_PAGE_SIZE, 0); + if (ret != RRPC_EXPOSED_PAGE_SIZE) { + pr_err("nvm: rrpc: could not add page to bio\n"); + mempool_free(page, rrpc->page_pool); + goto err; + } + + rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev, + brrqd[hole].addr->addr); + + i++; + hole = find_next_zero_bit(read_bitmap, nr_pages, hole + 1); + } while (hole != nr_pages); + + if (nr_holes != new_bio->bi_vcnt) { + pr_err("rrpc: malformed bio\n"); + goto err; + } + + new_bio->bi_iter.bi_sector = bio->bi_iter.bi_sector; + new_bio->bi_rw = READ; + new_bio->bi_private = &wait; + new_bio->bi_end_io = rrpc_end_sync_bio; + + rqd->flags |= NVM_IOTYPE_SYNC; + rqd->bio = new_bio; + rqd->nr_pages = nr_holes; + + rrpc_submit_read_io(rrpc, new_bio, rqd, rqd->flags); + wait_for_completion_io(&wait); + + if (new_bio->bi_error) + goto err; + + /* Fill the holes in the original bio */ + i = 0; + hole = find_first_zero_bit(read_bitmap, nr_pages); + do { + src_bv = new_bio->bi_io_vec[i]; + dst_bv = bio->bi_io_vec[hole]; + + src_p = kmap_atomic(src_bv.bv_page); + dst_p = kmap_atomic(dst_bv.bv_page); + + memcpy(dst_p + dst_bv.bv_offset, + src_p + src_bv.bv_offset, + RRPC_EXPOSED_PAGE_SIZE); + + kunmap_atomic(src_p); + kunmap_atomic(dst_p); + + mempool_free(&src_bv.bv_page, rrpc->page_pool); + + i++; + hole = find_next_zero_bit(read_bitmap, nr_pages, hole + 1); + } while (hole != nr_pages); + + bio_put(new_bio); + + /* Complete the original bio and associated request */ + rqd->flags &= ~NVM_IOTYPE_SYNC; + rqd->bio = bio; + rqd->nr_pages = nr_pages; + + bio_endio(bio); + rrpc_end_io(rqd); + return NVM_IO_OK; + +err: + /* Free allocated pages in new bio */ + for (i = 0; i < new_bio->bi_vcnt; i++) { + src_bv = new_bio->bi_io_vec[i]; + mempool_free(&src_bv.bv_page, rrpc->page_pool); + } + bio_endio(new_bio); + return NVM_IO_ERR; +} + static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio, struct rrpc_rq *rrqd, unsigned long flags) { struct nvm_rq *rqd; struct rrpc_buf_rq brrqd[rrpc->max_write_pgs]; + unsigned long read_bitmap; /* Max 64 ppas per request */ + uint8_t left; uint8_t nr_pages = rrpc_get_pages(bio); int err; + bitmap_zero(&read_bitmap, nr_pages); + rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL); if (!rqd) { pr_err_ratelimited("rrpc: not able to queue bio."); @@ -1073,22 +1320,25 @@ static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio, &rqd->dma_ppa_list); if (!rqd->ppa_list) { pr_err("rrpc: not able to allocate ppa list\n"); - mempool_free(rrqd, rrpc->rrq_pool); mempool_free(rqd, rrpc->rq_pool); + mempool_free(rrqd, rrpc->rrq_pool); return NVM_IO_ERR; } err = rrpc_read_ppalist_rq(rrpc, bio, rqd, brrqd, flags, nr_pages); if (err) { - mempool_free(rrqd, rrpc->rrq_pool); mempool_free(rqd, rrpc->rq_pool); + mempool_free(rrqd, rrpc->rrq_pool); return err; } } else { err = rrpc_read_rq(rrpc, bio, rqd, flags); - if (err) + if (err) { + mempool_free(rrqd, rrpc->rrq_pool); + mempool_free(rqd, rrpc->rq_pool); return err; + } } bio_get(bio); @@ -1097,33 +1347,22 @@ static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio, rqd->nr_pages = rrqd->nr_pages = nr_pages; rqd->flags = flags; - err = nvm_submit_io(rrpc->dev, rqd); - if (err) { - pr_err("rrpc: I/O submission failed: %d\n", err); - bio_put(bio); - if (!(flags & NVM_IOTYPE_GC)) { - rrpc_unlock_rq(rrpc, rrqd); - if (rqd->nr_pages > 1) - nvm_dev_dma_free(rrpc->dev, - rqd->ppa_list, rqd->dma_ppa_list); - } + left = rrpc_read_from_w_buf(rrpc, rqd, brrqd, &read_bitmap); + if (left == 0) { + bio_endio(bio); + rrpc_end_io(rqd); + return NVM_IO_OK; + } else if (left < 0) return NVM_IO_ERR; - } - return NVM_IO_OK; -} + if (bitmap_empty(&read_bitmap, nr_pages)) + return rrpc_submit_read_io(rrpc, bio, rqd, flags); -static int rrpc_buffer_write(struct rrpc *rrpc, struct bio *bio, - struct rrpc_rq *rrqd, unsigned long flags) -{ - uint8_t nr_pages = rrpc_get_pages(bio); - - rrqd->nr_pages = nr_pages; - - if (nr_pages > 1) - return rrpc_write_ppalist_rq(rrpc, bio, rrqd, flags, nr_pages); - else - return rrpc_write_rq(rrpc, bio, rrqd, flags); + /* The read bio could not be completely read from the write buffer. This + * case only occurs when several pages are sent in a single bio + */ + return rrpc_fill_partial_read_bio(rrpc, bio, &read_bitmap, rqd, brrqd, + nr_pages); } static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio, diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index eda9743..ae26ced 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -11,6 +11,7 @@ enum { NVM_IOTYPE_NONE = 0, NVM_IOTYPE_GC = 1, + NVM_IOTYPE_SYNC = 2, }; #define NVM_BLK_BITS (16)
Since writes are buffered in memory, incoming reads must retrieve buffered pages instead of submitting the I/O to the media. This patch implements this logic. When a read bio arrives to rrpc, valid pages from the flash blocks residing in memory are copied. If there are any "holes" in the bio, a new bio is submitted to the media to retrieve the necessary pages. The original bio is updated accordingly. Signed-off-by: Javier González <javier@cnexlabs.com> --- drivers/lightnvm/rrpc.c | 451 ++++++++++++++++++++++++++++++++++++----------- include/linux/lightnvm.h | 1 + 2 files changed, 346 insertions(+), 106 deletions(-)