Message ID | 1451901289-27149-1-git-send-email-ww.tao0320@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 01/04/2016 10:54 AM, Wenwei Tao wrote: > We allocate gcb to queue full block to the gc list, > but gcb allocation may fail, if that happens, the > block will not get reclaimed. So add the full block > direct to the gc list, omit the queuing step. > > Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com> > --- > drivers/lightnvm/rrpc.c | 47 ++++++++++------------------------------------- > 1 file changed, 10 insertions(+), 37 deletions(-) > > diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c > index 40b0309..27fb98d 100644 > --- a/drivers/lightnvm/rrpc.c > +++ b/drivers/lightnvm/rrpc.c > @@ -475,24 +475,6 @@ static void rrpc_lun_gc(struct work_struct *work) > /* TODO: Hint that request queue can be started again */ > } > > -static void rrpc_gc_queue(struct work_struct *work) > -{ > - struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc, > - ws_gc); > - struct rrpc *rrpc = gcb->rrpc; > - struct rrpc_block *rblk = gcb->rblk; > - struct nvm_lun *lun = rblk->parent->lun; > - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset]; > - > - spin_lock(&rlun->lock); > - list_add_tail(&rblk->prio, &rlun->prio_list); > - spin_unlock(&rlun->lock); > - > - mempool_free(gcb, rrpc->gcb_pool); > - pr_debug("nvm: block '%lu' is full, allow GC (sched)\n", > - rblk->parent->id); > -} > - > static const struct block_device_operations rrpc_fops = { > .owner = THIS_MODULE, > }; > @@ -620,39 +602,30 @@ err: > return NULL; > } > > -static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk) > -{ > - struct rrpc_block_gc *gcb; > - > - gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC); > - if (!gcb) { > - pr_err("rrpc: unable to queue block for gc."); > - return; > - } > - > - gcb->rrpc = rrpc; > - gcb->rblk = rblk; > - > - INIT_WORK(&gcb->ws_gc, rrpc_gc_queue); > - queue_work(rrpc->kgc_wq, &gcb->ws_gc); > -} > - > static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd, > sector_t laddr, uint8_t npages) > { > struct rrpc_addr *p; > struct rrpc_block *rblk; > struct nvm_lun *lun; > + struct rrpc_lun *rlun; > int cmnt_size, i; > > for (i = 0; i < npages; i++) { > p = &rrpc->trans_map[laddr + i]; > rblk = p->rblk; > lun = rblk->parent->lun; > + rlun = &rrpc->luns[lun->id - rrpc->lun_offset]; > > cmnt_size = atomic_inc_return(&rblk->data_cmnt_size); > - if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) > - rrpc_run_gc(rrpc, rblk); > + if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) { > + pr_debug("nvm: block '%lu' is full, allow GC (sched)\n", > + rblk->parent->id); > + spin_lock(&rlun->lock); A deadlock might occur, as the lock can be called from interrupt context. The other ->rlun usages will have to be converted to spinlock_irqsave/spinlock_irqrestore to be valid. The reason for the queueing is that the ->rlun lock is held for a while in rrpc_lun_gc. Therefore, it rather takes the queueing overhead, than disable interrupts on the CPU for the duration of the ->prio sorting and selection of victim block. My assumptions about this optimization might be premature. So I like to be proved wrong. > + list_add_tail(&rblk->prio, &rlun->prio_list); > + spin_unlock(&rlun->lock); > + > + } > } > } > > -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
You are right, a deadlock might occur if interrupt is not disabled. We might add the block to prio_list when we find the block is full in rrpc_alloc_addr and check whether all the writes are complete in rrpc_lun_gc, in this way we may avoid gcb allocation fail and irq disable issues. But this still has a problem. We allocate page from block before write, but the bio submission may fail, the bio never get execute and rrpc_end_io never get called on this bio, this may lead to a situation: a block's pages are all allocated, but not all of them are used. So this block is not fully used now, and will not get reclaimed for further use. I think we may need to put the page back when the page is not actually used/programmed. 2016-01-04 19:24 GMT+08:00 Matias Bjørling <mb@lightnvm.io>: > On 01/04/2016 10:54 AM, Wenwei Tao wrote: >> >> We allocate gcb to queue full block to the gc list, >> but gcb allocation may fail, if that happens, the >> block will not get reclaimed. So add the full block >> direct to the gc list, omit the queuing step. >> >> Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com> >> --- >> drivers/lightnvm/rrpc.c | 47 >> ++++++++++------------------------------------- >> 1 file changed, 10 insertions(+), 37 deletions(-) >> >> diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c >> index 40b0309..27fb98d 100644 >> --- a/drivers/lightnvm/rrpc.c >> +++ b/drivers/lightnvm/rrpc.c >> @@ -475,24 +475,6 @@ static void rrpc_lun_gc(struct work_struct *work) >> /* TODO: Hint that request queue can be started again */ >> } >> >> -static void rrpc_gc_queue(struct work_struct *work) >> -{ >> - struct rrpc_block_gc *gcb = container_of(work, struct >> rrpc_block_gc, >> - >> ws_gc); >> - struct rrpc *rrpc = gcb->rrpc; >> - struct rrpc_block *rblk = gcb->rblk; >> - struct nvm_lun *lun = rblk->parent->lun; >> - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset]; >> - >> - spin_lock(&rlun->lock); >> - list_add_tail(&rblk->prio, &rlun->prio_list); >> - spin_unlock(&rlun->lock); >> - >> - mempool_free(gcb, rrpc->gcb_pool); >> - pr_debug("nvm: block '%lu' is full, allow GC (sched)\n", >> - rblk->parent->id); >> -} >> - >> static const struct block_device_operations rrpc_fops = { >> .owner = THIS_MODULE, >> }; >> @@ -620,39 +602,30 @@ err: >> return NULL; >> } >> >> -static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk) >> -{ >> - struct rrpc_block_gc *gcb; >> - >> - gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC); >> - if (!gcb) { >> - pr_err("rrpc: unable to queue block for gc."); >> - return; >> - } >> - >> - gcb->rrpc = rrpc; >> - gcb->rblk = rblk; >> - >> - INIT_WORK(&gcb->ws_gc, rrpc_gc_queue); >> - queue_work(rrpc->kgc_wq, &gcb->ws_gc); >> -} >> - >> static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd, >> sector_t laddr, uint8_t >> npages) >> { >> struct rrpc_addr *p; >> struct rrpc_block *rblk; >> struct nvm_lun *lun; >> + struct rrpc_lun *rlun; >> int cmnt_size, i; >> >> for (i = 0; i < npages; i++) { >> p = &rrpc->trans_map[laddr + i]; >> rblk = p->rblk; >> lun = rblk->parent->lun; >> + rlun = &rrpc->luns[lun->id - rrpc->lun_offset]; >> >> cmnt_size = atomic_inc_return(&rblk->data_cmnt_size); >> - if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) >> - rrpc_run_gc(rrpc, rblk); >> + if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) { >> + pr_debug("nvm: block '%lu' is full, allow GC >> (sched)\n", >> + rblk->parent->id); >> + spin_lock(&rlun->lock); > > > A deadlock might occur, as the lock can be called from interrupt context. > The other ->rlun usages will have to be converted to > spinlock_irqsave/spinlock_irqrestore to be valid. > > The reason for the queueing is that the ->rlun lock is held for a while in > rrpc_lun_gc. Therefore, it rather takes the queueing overhead, than disable > interrupts on the CPU for the duration of the ->prio sorting and selection > of victim block. My assumptions about this optimization might be premature. > So I like to be proved wrong. > > >> + list_add_tail(&rblk->prio, &rlun->prio_list); >> + spin_unlock(&rlun->lock); >> + >> + } >> } >> } >> >> > > -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 01/05/2016 10:58 AM, Wenwei Tao wrote: > You are right, a deadlock might occur if interrupt is not disabled. > > We might add the block to prio_list when we find the block is full in > rrpc_alloc_addr and check whether all the writes are complete in > rrpc_lun_gc, in this way we may avoid gcb allocation fail and irq > disable issues. > > But this still has a problem. We allocate page from block before > write, but the bio submission may fail, the bio never get execute and > rrpc_end_io never get called on this bio, this may lead to a > situation: a block's pages are all allocated, but not all of them are > used. So this block is not fully used now, and will not get reclaimed > for further use. > > I think we may need to put the page back when the page is not actually > used/programmed. You're right. It has to take multiple cases into account on failure, where if a write fails, it might mean the hole block must be written to a new block, the I/O it self must be remapped to another address, or something else depending on the error code. This logic is yet to be routed into rrpc. Javier is working on write buffering, which will fix some of these issues. -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index 40b0309..27fb98d 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -475,24 +475,6 @@ static void rrpc_lun_gc(struct work_struct *work) /* TODO: Hint that request queue can be started again */ } -static void rrpc_gc_queue(struct work_struct *work) -{ - struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc, - ws_gc); - struct rrpc *rrpc = gcb->rrpc; - struct rrpc_block *rblk = gcb->rblk; - struct nvm_lun *lun = rblk->parent->lun; - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset]; - - spin_lock(&rlun->lock); - list_add_tail(&rblk->prio, &rlun->prio_list); - spin_unlock(&rlun->lock); - - mempool_free(gcb, rrpc->gcb_pool); - pr_debug("nvm: block '%lu' is full, allow GC (sched)\n", - rblk->parent->id); -} - static const struct block_device_operations rrpc_fops = { .owner = THIS_MODULE, }; @@ -620,39 +602,30 @@ err: return NULL; } -static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk) -{ - struct rrpc_block_gc *gcb; - - gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC); - if (!gcb) { - pr_err("rrpc: unable to queue block for gc."); - return; - } - - gcb->rrpc = rrpc; - gcb->rblk = rblk; - - INIT_WORK(&gcb->ws_gc, rrpc_gc_queue); - queue_work(rrpc->kgc_wq, &gcb->ws_gc); -} - static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd, sector_t laddr, uint8_t npages) { struct rrpc_addr *p; struct rrpc_block *rblk; struct nvm_lun *lun; + struct rrpc_lun *rlun; int cmnt_size, i; for (i = 0; i < npages; i++) { p = &rrpc->trans_map[laddr + i]; rblk = p->rblk; lun = rblk->parent->lun; + rlun = &rrpc->luns[lun->id - rrpc->lun_offset]; cmnt_size = atomic_inc_return(&rblk->data_cmnt_size); - if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) - rrpc_run_gc(rrpc, rblk); + if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) { + pr_debug("nvm: block '%lu' is full, allow GC (sched)\n", + rblk->parent->id); + spin_lock(&rlun->lock); + list_add_tail(&rblk->prio, &rlun->prio_list); + spin_unlock(&rlun->lock); + + } } }
We allocate gcb to queue full block to the gc list, but gcb allocation may fail, if that happens, the block will not get reclaimed. So add the full block direct to the gc list, omit the queuing step. Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com> --- drivers/lightnvm/rrpc.c | 47 ++++++++++------------------------------------- 1 file changed, 10 insertions(+), 37 deletions(-)