Message ID | 1419405208-25975-36-git-send-email-loghyr@primarydata.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hey Dros and Tom, I see you're adding some new FIXME and TODOs in the comments. Is there a plan for addressing these eventually? Thanks, Anna On 12/24/2014 02:13 AM, Tom Haynes wrote: > From: Weston Andros Adamson <dros@primarydata.com> > > This patch adds mirrored write support to the pgio layer. The default > is to use one mirror, but pgio callers may define callbacks to change > this to any value up to the (arbitrarily selected) limit of 16. > > The basic idea is to break out members of nfs_pageio_descriptor that cannot > be shared between mirrored DSes and put them in a new structure. > > Signed-off-by: Weston Andros Adamson <dros@primarydata.com> > --- > fs/nfs/direct.c | 17 ++- > fs/nfs/internal.h | 1 + > fs/nfs/objlayout/objio_osd.c | 3 +- > fs/nfs/pagelist.c | 270 +++++++++++++++++++++++++++++++++++-------- > fs/nfs/pnfs.c | 26 +++-- > fs/nfs/read.c | 30 ++++- > fs/nfs/write.c | 10 +- > include/linux/nfs_page.h | 20 +++- > include/linux/nfs_xdr.h | 1 + > 9 files changed, 311 insertions(+), 67 deletions(-) > > diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c > index 1ee41d7..0178d4f 100644 > --- a/fs/nfs/direct.c > +++ b/fs/nfs/direct.c > @@ -360,8 +360,14 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) > spin_lock(&dreq->lock); > if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) > dreq->error = hdr->error; > - else > - dreq->count += hdr->good_bytes; > + else { > + /* > + * FIXME: right now this only accounts for bytes written > + * to the first mirror > + */ > + if (hdr->pgio_mirror_idx == 0) > + dreq->count += hdr->good_bytes; > + } > spin_unlock(&dreq->lock); > > while (!list_empty(&hdr->pages)) { > @@ -724,7 +730,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) > dreq->error = hdr->error; > } > if (dreq->error == 0) { > - dreq->count += hdr->good_bytes; > + /* > + * FIXME: right now this only accounts for bytes written > + * to the first mirror > + */ > + if (hdr->pgio_mirror_idx == 0) > + dreq->count += hdr->good_bytes; > if (nfs_write_need_commit(hdr)) { > if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) > request_commit = true; > diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h > index 05f9a87..ef1c703 100644 > --- a/fs/nfs/internal.h > +++ b/fs/nfs/internal.h > @@ -469,6 +469,7 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo, > struct nfs_direct_req *dreq); > int nfs_key_timeout_notify(struct file *filp, struct inode *inode); > bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); > +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); > > #ifdef CONFIG_MIGRATION > extern int nfs_migrate_page(struct address_space *, > diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c > index d007780..9a5f2ee 100644 > --- a/fs/nfs/objlayout/objio_osd.c > +++ b/fs/nfs/objlayout/objio_osd.c > @@ -537,11 +537,12 @@ int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) > static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, > struct nfs_page *prev, struct nfs_page *req) > { > + struct nfs_pgio_mirror *mirror = &pgio->pg_mirrors[pgio->pg_mirror_idx]; > unsigned int size; > > size = pnfs_generic_pg_test(pgio, prev, req); > > - if (!size || pgio->pg_count + req->wb_bytes > > + if (!size || mirror->pg_count + req->wb_bytes > > (unsigned long)pgio->pg_layout_private) > return 0; > > diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c > index 1c03187..eec12b7 100644 > --- a/fs/nfs/pagelist.c > +++ b/fs/nfs/pagelist.c > @@ -46,17 +46,22 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, > struct nfs_pgio_header *hdr, > void (*release)(struct nfs_pgio_header *hdr)) > { > - hdr->req = nfs_list_entry(desc->pg_list.next); > + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; > + > + > + hdr->req = nfs_list_entry(mirror->pg_list.next); > hdr->inode = desc->pg_inode; > hdr->cred = hdr->req->wb_context->cred; > hdr->io_start = req_offset(hdr->req); > - hdr->good_bytes = desc->pg_count; > + hdr->good_bytes = mirror->pg_count; > hdr->dreq = desc->pg_dreq; > hdr->layout_private = desc->pg_layout_private; > hdr->release = release; > hdr->completion_ops = desc->pg_completion_ops; > if (hdr->completion_ops->init_hdr) > hdr->completion_ops->init_hdr(hdr); > + > + hdr->pgio_mirror_idx = desc->pg_mirror_idx; > } > EXPORT_SYMBOL_GPL(nfs_pgheader_init); > > @@ -480,7 +485,10 @@ nfs_wait_on_request(struct nfs_page *req) > size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, > struct nfs_page *prev, struct nfs_page *req) > { > - if (desc->pg_count > desc->pg_bsize) { > + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; > + > + > + if (mirror->pg_count > mirror->pg_bsize) { > /* should never happen */ > WARN_ON_ONCE(1); > return 0; > @@ -490,11 +498,11 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, > * Limit the request size so that we can still allocate a page array > * for it without upsetting the slab allocator. > */ > - if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * > + if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * > sizeof(struct page) > PAGE_SIZE) > return 0; > > - return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); > + return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); > } > EXPORT_SYMBOL_GPL(nfs_generic_pg_test); > > @@ -651,10 +659,18 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); > static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, > struct nfs_pgio_header *hdr) > { > + struct nfs_pgio_mirror *mirror; > + u32 midx; > + > set_bit(NFS_IOHDR_REDO, &hdr->flags); > nfs_pgio_data_destroy(hdr); > hdr->completion_ops->completion(hdr); > - desc->pg_completion_ops->error_cleanup(&desc->pg_list); > + /* TODO: Make sure it's right to clean up all mirrors here > + * and not just hdr->pgio_mirror_idx */ > + for (midx = 0; midx < desc->pg_mirror_count; midx++) { > + mirror = &desc->pg_mirrors[midx]; > + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); > + } > return -ENOMEM; > } > > @@ -671,6 +687,17 @@ static void nfs_pgio_release(void *calldata) > hdr->completion_ops->completion(hdr); > } > > +static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, > + unsigned int bsize) > +{ > + INIT_LIST_HEAD(&mirror->pg_list); > + mirror->pg_bytes_written = 0; > + mirror->pg_count = 0; > + mirror->pg_bsize = bsize; > + mirror->pg_base = 0; > + mirror->pg_recoalesce = 0; > +} > + > /** > * nfs_pageio_init - initialise a page io descriptor > * @desc: pointer to descriptor > @@ -687,13 +714,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, > size_t bsize, > int io_flags) > { > - INIT_LIST_HEAD(&desc->pg_list); > - desc->pg_bytes_written = 0; > - desc->pg_count = 0; > - desc->pg_bsize = bsize; > - desc->pg_base = 0; > + struct nfs_pgio_mirror *new; > + int i; > + > desc->pg_moreio = 0; > - desc->pg_recoalesce = 0; > desc->pg_inode = inode; > desc->pg_ops = pg_ops; > desc->pg_completion_ops = compl_ops; > @@ -703,6 +727,26 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, > desc->pg_lseg = NULL; > desc->pg_dreq = NULL; > desc->pg_layout_private = NULL; > + desc->pg_bsize = bsize; > + > + desc->pg_mirror_count = 1; > + desc->pg_mirror_idx = 0; > + > + if (pg_ops->pg_get_mirror_count) { > + /* until we have a request, we don't have an lseg and no > + * idea how many mirrors there will be */ > + new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, > + sizeof(struct nfs_pgio_mirror), GFP_KERNEL); > + desc->pg_mirrors_dynamic = new; > + desc->pg_mirrors = new; > + > + for (i = 0; i < NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX; i++) > + nfs_pageio_mirror_init(&desc->pg_mirrors[i], bsize); > + } else { > + desc->pg_mirrors_dynamic = NULL; > + desc->pg_mirrors = desc->pg_mirrors_static; > + nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); > + } > } > EXPORT_SYMBOL_GPL(nfs_pageio_init); > > @@ -738,14 +782,16 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) > int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, > struct nfs_pgio_header *hdr) > { > + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; > + > struct nfs_page *req; > struct page **pages, > *last_page; > - struct list_head *head = &desc->pg_list; > + struct list_head *head = &mirror->pg_list; > struct nfs_commit_info cinfo; > unsigned int pagecount, pageused; > > - pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); > + pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); > if (!nfs_pgarray_set(&hdr->page_array, pagecount)) > return nfs_pgio_error(desc, hdr); > > @@ -773,7 +819,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, > desc->pg_ioflags &= ~FLUSH_COND_STABLE; > > /* Set up the argument struct */ > - nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); > + nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo); > desc->pg_rpc_callops = &nfs_pgio_common_ops; > return 0; > } > @@ -781,12 +827,17 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); > > static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) > { > + struct nfs_pgio_mirror *mirror; > struct nfs_pgio_header *hdr; > int ret; > > + mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; > + > hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); > if (!hdr) { > - desc->pg_completion_ops->error_cleanup(&desc->pg_list); > + /* TODO: make sure this is right with mirroring - or > + * should it back out all mirrors? */ > + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); > return -ENOMEM; > } > nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); > @@ -801,6 +852,49 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) > return ret; > } > > +/* > + * nfs_pageio_setup_mirroring - determine if mirroring is to be used > + * by calling the pg_get_mirror_count op > + */ > +static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, > + struct nfs_page *req) > +{ > + int mirror_count = 1; > + > + if (!pgio->pg_ops->pg_get_mirror_count) > + return 0; > + > + mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); > + > + if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) > + return -EINVAL; > + > + if (WARN_ON_ONCE(!pgio->pg_mirrors_dynamic)) > + return -EINVAL; > + > + pgio->pg_mirror_count = mirror_count; > + > + return 0; > +} > + > +/* > + * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) > + */ > +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) > +{ > + pgio->pg_mirror_count = 1; > + pgio->pg_mirror_idx = 0; > +} > + > +static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) > +{ > + pgio->pg_mirror_count = 1; > + pgio->pg_mirror_idx = 0; > + pgio->pg_mirrors = pgio->pg_mirrors_static; > + kfree(pgio->pg_mirrors_dynamic); > + pgio->pg_mirrors_dynamic = NULL; > +} > + > static bool nfs_match_open_context(const struct nfs_open_context *ctx1, > const struct nfs_open_context *ctx2) > { > @@ -867,19 +961,22 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, > static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, > struct nfs_page *req) > { > + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; > + > struct nfs_page *prev = NULL; > - if (desc->pg_count != 0) { > - prev = nfs_list_entry(desc->pg_list.prev); > + > + if (mirror->pg_count != 0) { > + prev = nfs_list_entry(mirror->pg_list.prev); > } else { > if (desc->pg_ops->pg_init) > desc->pg_ops->pg_init(desc, req); > - desc->pg_base = req->wb_pgbase; > + mirror->pg_base = req->wb_pgbase; > } > if (!nfs_can_coalesce_requests(prev, req, desc)) > return 0; > nfs_list_remove_request(req); > - nfs_list_add_request(req, &desc->pg_list); > - desc->pg_count += req->wb_bytes; > + nfs_list_add_request(req, &mirror->pg_list); > + mirror->pg_count += req->wb_bytes; > return 1; > } > > @@ -888,16 +985,19 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, > */ > static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) > { > - if (!list_empty(&desc->pg_list)) { > + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; > + > + > + if (!list_empty(&mirror->pg_list)) { > int error = desc->pg_ops->pg_doio(desc); > if (error < 0) > desc->pg_error = error; > else > - desc->pg_bytes_written += desc->pg_count; > + mirror->pg_bytes_written += mirror->pg_count; > } > - if (list_empty(&desc->pg_list)) { > - desc->pg_count = 0; > - desc->pg_base = 0; > + if (list_empty(&mirror->pg_list)) { > + mirror->pg_count = 0; > + mirror->pg_base = 0; > } > } > > @@ -915,10 +1015,14 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) > static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, > struct nfs_page *req) > { > + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; > + > struct nfs_page *subreq; > unsigned int bytes_left = 0; > unsigned int offset, pgbase; > > + WARN_ON_ONCE(desc->pg_mirror_idx >= desc->pg_mirror_count); > + > nfs_page_group_lock(req, false); > > subreq = req; > @@ -938,7 +1042,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, > nfs_pageio_doio(desc); > if (desc->pg_error < 0) > return 0; > - if (desc->pg_recoalesce) > + if (mirror->pg_recoalesce) > return 0; > /* retry add_request for this subreq */ > nfs_page_group_lock(req, false); > @@ -976,14 +1080,16 @@ err_ptr: > > static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) > { > + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; > LIST_HEAD(head); > > do { > - list_splice_init(&desc->pg_list, &head); > - desc->pg_bytes_written -= desc->pg_count; > - desc->pg_count = 0; > - desc->pg_base = 0; > - desc->pg_recoalesce = 0; > + list_splice_init(&mirror->pg_list, &head); > + mirror->pg_bytes_written -= mirror->pg_count; > + mirror->pg_count = 0; > + mirror->pg_base = 0; > + mirror->pg_recoalesce = 0; > + > desc->pg_moreio = 0; > > while (!list_empty(&head)) { > @@ -997,11 +1103,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) > return 0; > break; > } > - } while (desc->pg_recoalesce); > + } while (mirror->pg_recoalesce); > return 1; > } > > -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, > +static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, > struct nfs_page *req) > { > int ret; > @@ -1014,9 +1120,78 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, > break; > ret = nfs_do_recoalesce(desc); > } while (ret); > + > return ret; > } > > +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, > + struct nfs_page *req) > +{ > + u32 midx; > + unsigned int pgbase, offset, bytes; > + struct nfs_page *dupreq, *lastreq; > + > + pgbase = req->wb_pgbase; > + offset = req->wb_offset; > + bytes = req->wb_bytes; > + > + nfs_pageio_setup_mirroring(desc, req); > + > + for (midx = 0; midx < desc->pg_mirror_count; midx++) { > + if (midx) { > + nfs_page_group_lock(req, false); > + > + /* find the last request */ > + for (lastreq = req->wb_head; > + lastreq->wb_this_page != req->wb_head; > + lastreq = lastreq->wb_this_page) > + ; > + > + dupreq = nfs_create_request(req->wb_context, > + req->wb_page, lastreq, pgbase, bytes); > + > + if (IS_ERR(dupreq)) { > + nfs_page_group_unlock(req); > + return 0; > + } > + > + nfs_lock_request(dupreq); > + nfs_page_group_unlock(req); > + dupreq->wb_offset = offset; > + dupreq->wb_index = req->wb_index; > + } else > + dupreq = req; > + > + desc->pg_mirror_idx = midx; > + if (!nfs_pageio_add_request_mirror(desc, dupreq)) > + return 0; > + } > + > + return 1; > +} > + > +/* > + * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an > + * nfs_pageio_descriptor > + * @desc: pointer to io descriptor > + */ > +static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, > + u32 mirror_idx) > +{ > + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; > + u32 restore_idx = desc->pg_mirror_idx; > + > + desc->pg_mirror_idx = mirror_idx; > + for (;;) { > + nfs_pageio_doio(desc); > + if (!mirror->pg_recoalesce) > + break; > + if (!nfs_do_recoalesce(desc)) > + break; > + } > + desc->pg_mirror_idx = restore_idx; > +} > + > /* > * nfs_pageio_resend - Transfer requests to new descriptor and resend > * @hdr - the pgio header to move request from > @@ -1055,16 +1230,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_resend); > */ > void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) > { > - for (;;) { > - nfs_pageio_doio(desc); > - if (!desc->pg_recoalesce) > - break; > - if (!nfs_do_recoalesce(desc)) > - break; > - } > + u32 midx; > + > + for (midx = 0; midx < desc->pg_mirror_count; midx++) > + nfs_pageio_complete_mirror(desc, midx); > > if (desc->pg_ops->pg_cleanup) > desc->pg_ops->pg_cleanup(desc); > + nfs_pageio_cleanup_mirroring(desc); > } > > /** > @@ -1080,10 +1253,17 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) > */ > void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) > { > - if (!list_empty(&desc->pg_list)) { > - struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); > - if (index != prev->wb_index + 1) > - nfs_pageio_complete(desc); > + struct nfs_pgio_mirror *mirror; > + struct nfs_page *prev; > + u32 midx; > + > + for (midx = 0; midx < desc->pg_mirror_count; midx++) { > + mirror = &desc->pg_mirrors[midx]; > + if (!list_empty(&mirror->pg_list)) { > + prev = nfs_list_entry(mirror->pg_list.prev); > + if (index != prev->wb_index + 1) > + nfs_pageio_complete_mirror(desc, midx); > + } > } > } > > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c > index 2da2e77..5f7c422 100644 > --- a/fs/nfs/pnfs.c > +++ b/fs/nfs/pnfs.c > @@ -1646,8 +1646,8 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); > * of bytes (maximum @req->wb_bytes) that can be coalesced. > */ > size_t > -pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, > - struct nfs_page *req) > +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, > + struct nfs_page *prev, struct nfs_page *req) > { > unsigned int size; > u64 seg_end, req_start, seg_left; > @@ -1729,10 +1729,12 @@ static void > pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, > struct nfs_pgio_header *hdr) > { > + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; > + > if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { > - list_splice_tail_init(&hdr->pages, &desc->pg_list); > + list_splice_tail_init(&hdr->pages, &mirror->pg_list); > nfs_pageio_reset_write_mds(desc); > - desc->pg_recoalesce = 1; > + mirror->pg_recoalesce = 1; > } > nfs_pgio_data_destroy(hdr); > } > @@ -1781,12 +1783,14 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); > int > pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) > { > + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; > + > struct nfs_pgio_header *hdr; > int ret; > > hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); > if (!hdr) { > - desc->pg_completion_ops->error_cleanup(&desc->pg_list); > + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); > return -ENOMEM; > } > nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); > @@ -1795,6 +1799,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) > ret = nfs_generic_pgio(desc, hdr); > if (!ret) > pnfs_do_write(desc, hdr, desc->pg_ioflags); > + > return ret; > } > EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); > @@ -1839,10 +1844,13 @@ static void > pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, > struct nfs_pgio_header *hdr) > { > + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; > + > + > if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { > - list_splice_tail_init(&hdr->pages, &desc->pg_list); > + list_splice_tail_init(&hdr->pages, &mirror->pg_list); > nfs_pageio_reset_read_mds(desc); > - desc->pg_recoalesce = 1; > + mirror->pg_recoalesce = 1; > } > nfs_pgio_data_destroy(hdr); > } > @@ -1893,12 +1901,14 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); > int > pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) > { > + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; > + > struct nfs_pgio_header *hdr; > int ret; > > hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); > if (!hdr) { > - desc->pg_completion_ops->error_cleanup(&desc->pg_list); > + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); > return -ENOMEM; > } > nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); > diff --git a/fs/nfs/read.c b/fs/nfs/read.c > index 092ab49..568ecf0 100644 > --- a/fs/nfs/read.c > +++ b/fs/nfs/read.c > @@ -70,8 +70,15 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_read); > > void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) > { > + struct nfs_pgio_mirror *mirror; > + > pgio->pg_ops = &nfs_pgio_rw_ops; > - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; > + > + /* read path should never have more than one mirror */ > + WARN_ON_ONCE(pgio->pg_mirror_count != 1); > + > + mirror = &pgio->pg_mirrors[0]; > + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; > } > EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); > > @@ -81,6 +88,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, > struct nfs_page *new; > unsigned int len; > struct nfs_pageio_descriptor pgio; > + struct nfs_pgio_mirror *pgm; > > len = nfs_page_length(page); > if (len == 0) > @@ -97,7 +105,13 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, > &nfs_async_read_completion_ops); > nfs_pageio_add_request(&pgio, new); > nfs_pageio_complete(&pgio); > - NFS_I(inode)->read_io += pgio.pg_bytes_written; > + > + /* It doesn't make sense to do mirrored reads! */ > + WARN_ON_ONCE(pgio.pg_mirror_count != 1); > + > + pgm = &pgio.pg_mirrors[0]; > + NFS_I(inode)->read_io += pgm->pg_bytes_written; > + > return 0; > } > > @@ -352,6 +366,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, > struct list_head *pages, unsigned nr_pages) > { > struct nfs_pageio_descriptor pgio; > + struct nfs_pgio_mirror *pgm; > struct nfs_readdesc desc = { > .pgio = &pgio, > }; > @@ -387,10 +402,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, > &nfs_async_read_completion_ops); > > ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); > - > nfs_pageio_complete(&pgio); > - NFS_I(inode)->read_io += pgio.pg_bytes_written; > - npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; > + > + /* It doesn't make sense to do mirrored reads! */ > + WARN_ON_ONCE(pgio.pg_mirror_count != 1); > + > + pgm = &pgio.pg_mirrors[0]; > + NFS_I(inode)->read_io += pgm->pg_bytes_written; > + npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >> > + PAGE_CACHE_SHIFT; > nfs_add_stats(inode, NFSIOS_READPAGES, npages); > read_complete: > put_nfs_open_context(desc.ctx); > diff --git a/fs/nfs/write.c b/fs/nfs/write.c > index db802d9..2f6ee8e 100644 > --- a/fs/nfs/write.c > +++ b/fs/nfs/write.c > @@ -906,7 +906,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) > if (nfs_write_need_commit(hdr)) { > memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); > nfs_mark_request_commit(req, hdr->lseg, &cinfo, > - 0); > + hdr->pgio_mirror_idx); > goto next; > } > remove_req: > @@ -1305,8 +1305,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_write); > > void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) > { > + struct nfs_pgio_mirror *mirror; > + > pgio->pg_ops = &nfs_pgio_rw_ops; > - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; > + > + nfs_pageio_stop_mirroring(pgio); > + > + mirror = &pgio->pg_mirrors[0]; > + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; > } > EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); > > diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h > index 479c566..3eb072d 100644 > --- a/include/linux/nfs_page.h > +++ b/include/linux/nfs_page.h > @@ -58,6 +58,8 @@ struct nfs_pageio_ops { > size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, > struct nfs_page *); > int (*pg_doio)(struct nfs_pageio_descriptor *); > + unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *, > + struct nfs_page *); > void (*pg_cleanup)(struct nfs_pageio_descriptor *); > }; > > @@ -74,15 +76,17 @@ struct nfs_rw_ops { > struct rpc_task_setup *, int); > }; > > -struct nfs_pageio_descriptor { > +struct nfs_pgio_mirror { > struct list_head pg_list; > unsigned long pg_bytes_written; > size_t pg_count; > size_t pg_bsize; > unsigned int pg_base; > - unsigned char pg_moreio : 1, > - pg_recoalesce : 1; > + unsigned char pg_recoalesce : 1; > +}; > > +struct nfs_pageio_descriptor { > + unsigned char pg_moreio : 1; > struct inode *pg_inode; > const struct nfs_pageio_ops *pg_ops; > const struct nfs_rw_ops *pg_rw_ops; > @@ -93,8 +97,18 @@ struct nfs_pageio_descriptor { > struct pnfs_layout_segment *pg_lseg; > struct nfs_direct_req *pg_dreq; > void *pg_layout_private; > + unsigned int pg_bsize; /* default bsize for mirrors */ > + > + u32 pg_mirror_count; > + struct nfs_pgio_mirror *pg_mirrors; > + struct nfs_pgio_mirror pg_mirrors_static[1]; > + struct nfs_pgio_mirror *pg_mirrors_dynamic; > + u32 pg_mirror_idx; /* current mirror */ > }; > > +/* arbitrarily selected limit to number of mirrors */ > +#define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16 > + > #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) > > extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h > index 5bc99f0..6400a1e 100644 > --- a/include/linux/nfs_xdr.h > +++ b/include/linux/nfs_xdr.h > @@ -1329,6 +1329,7 @@ struct nfs_pgio_header { > struct nfs_page_array page_array; > struct nfs_client *ds_clp; /* pNFS data server */ > int ds_commit_idx; /* ds index if ds_clp is set */ > + int pgio_mirror_idx;/* mirror index in pgio layer */ > }; > > struct nfs_mds_commit_info { > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
These issues are addressed and the comments are removed in subsequent patches from the same series. Instead of having one huge patch that implements all of mirroring, I chose split it out into smaller patches. These notes were useful in making sure that the issues were addressed and should be useful as a guide to someone bisecting, etc. -dros > On Jan 6, 2015, at 1:11 PM, Anna Schumaker <Anna.Schumaker@netapp.com> wrote: > > Hey Dros and Tom, > > I see you're adding some new FIXME and TODOs in the comments. Is there a plan for addressing these eventually? > > Thanks, > Anna > > On 12/24/2014 02:13 AM, Tom Haynes wrote: >> From: Weston Andros Adamson <dros@primarydata.com> >> >> This patch adds mirrored write support to the pgio layer. The default >> is to use one mirror, but pgio callers may define callbacks to change >> this to any value up to the (arbitrarily selected) limit of 16. >> >> The basic idea is to break out members of nfs_pageio_descriptor that cannot >> be shared between mirrored DSes and put them in a new structure. >> >> Signed-off-by: Weston Andros Adamson <dros@primarydata.com> >> --- >> fs/nfs/direct.c | 17 ++- >> fs/nfs/internal.h | 1 + >> fs/nfs/objlayout/objio_osd.c | 3 +- >> fs/nfs/pagelist.c | 270 +++++++++++++++++++++++++++++++++++-------- >> fs/nfs/pnfs.c | 26 +++-- >> fs/nfs/read.c | 30 ++++- >> fs/nfs/write.c | 10 +- >> include/linux/nfs_page.h | 20 +++- >> include/linux/nfs_xdr.h | 1 + >> 9 files changed, 311 insertions(+), 67 deletions(-) >> >> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c >> index 1ee41d7..0178d4f 100644 >> --- a/fs/nfs/direct.c >> +++ b/fs/nfs/direct.c >> @@ -360,8 +360,14 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) >> spin_lock(&dreq->lock); >> if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) >> dreq->error = hdr->error; >> - else >> - dreq->count += hdr->good_bytes; >> + else { >> + /* >> + * FIXME: right now this only accounts for bytes written >> + * to the first mirror >> + */ >> + if (hdr->pgio_mirror_idx == 0) >> + dreq->count += hdr->good_bytes; >> + } >> spin_unlock(&dreq->lock); >> >> while (!list_empty(&hdr->pages)) { >> @@ -724,7 +730,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) >> dreq->error = hdr->error; >> } >> if (dreq->error == 0) { >> - dreq->count += hdr->good_bytes; >> + /* >> + * FIXME: right now this only accounts for bytes written >> + * to the first mirror >> + */ >> + if (hdr->pgio_mirror_idx == 0) >> + dreq->count += hdr->good_bytes; >> if (nfs_write_need_commit(hdr)) { >> if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) >> request_commit = true; >> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h >> index 05f9a87..ef1c703 100644 >> --- a/fs/nfs/internal.h >> +++ b/fs/nfs/internal.h >> @@ -469,6 +469,7 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo, >> struct nfs_direct_req *dreq); >> int nfs_key_timeout_notify(struct file *filp, struct inode *inode); >> bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); >> +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); >> >> #ifdef CONFIG_MIGRATION >> extern int nfs_migrate_page(struct address_space *, >> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c >> index d007780..9a5f2ee 100644 >> --- a/fs/nfs/objlayout/objio_osd.c >> +++ b/fs/nfs/objlayout/objio_osd.c >> @@ -537,11 +537,12 @@ int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) >> static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, >> struct nfs_page *prev, struct nfs_page *req) >> { >> + struct nfs_pgio_mirror *mirror = &pgio->pg_mirrors[pgio->pg_mirror_idx]; >> unsigned int size; >> >> size = pnfs_generic_pg_test(pgio, prev, req); >> >> - if (!size || pgio->pg_count + req->wb_bytes > >> + if (!size || mirror->pg_count + req->wb_bytes > >> (unsigned long)pgio->pg_layout_private) >> return 0; >> >> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c >> index 1c03187..eec12b7 100644 >> --- a/fs/nfs/pagelist.c >> +++ b/fs/nfs/pagelist.c >> @@ -46,17 +46,22 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, >> struct nfs_pgio_header *hdr, >> void (*release)(struct nfs_pgio_header *hdr)) >> { >> - hdr->req = nfs_list_entry(desc->pg_list.next); >> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >> + >> + >> + hdr->req = nfs_list_entry(mirror->pg_list.next); >> hdr->inode = desc->pg_inode; >> hdr->cred = hdr->req->wb_context->cred; >> hdr->io_start = req_offset(hdr->req); >> - hdr->good_bytes = desc->pg_count; >> + hdr->good_bytes = mirror->pg_count; >> hdr->dreq = desc->pg_dreq; >> hdr->layout_private = desc->pg_layout_private; >> hdr->release = release; >> hdr->completion_ops = desc->pg_completion_ops; >> if (hdr->completion_ops->init_hdr) >> hdr->completion_ops->init_hdr(hdr); >> + >> + hdr->pgio_mirror_idx = desc->pg_mirror_idx; >> } >> EXPORT_SYMBOL_GPL(nfs_pgheader_init); >> >> @@ -480,7 +485,10 @@ nfs_wait_on_request(struct nfs_page *req) >> size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, >> struct nfs_page *prev, struct nfs_page *req) >> { >> - if (desc->pg_count > desc->pg_bsize) { >> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >> + >> + >> + if (mirror->pg_count > mirror->pg_bsize) { >> /* should never happen */ >> WARN_ON_ONCE(1); >> return 0; >> @@ -490,11 +498,11 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, >> * Limit the request size so that we can still allocate a page array >> * for it without upsetting the slab allocator. >> */ >> - if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * >> + if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * >> sizeof(struct page) > PAGE_SIZE) >> return 0; >> >> - return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); >> + return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); >> } >> EXPORT_SYMBOL_GPL(nfs_generic_pg_test); >> >> @@ -651,10 +659,18 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); >> static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, >> struct nfs_pgio_header *hdr) >> { >> + struct nfs_pgio_mirror *mirror; >> + u32 midx; >> + >> set_bit(NFS_IOHDR_REDO, &hdr->flags); >> nfs_pgio_data_destroy(hdr); >> hdr->completion_ops->completion(hdr); >> - desc->pg_completion_ops->error_cleanup(&desc->pg_list); >> + /* TODO: Make sure it's right to clean up all mirrors here >> + * and not just hdr->pgio_mirror_idx */ >> + for (midx = 0; midx < desc->pg_mirror_count; midx++) { >> + mirror = &desc->pg_mirrors[midx]; >> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); >> + } >> return -ENOMEM; >> } >> >> @@ -671,6 +687,17 @@ static void nfs_pgio_release(void *calldata) >> hdr->completion_ops->completion(hdr); >> } >> >> +static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, >> + unsigned int bsize) >> +{ >> + INIT_LIST_HEAD(&mirror->pg_list); >> + mirror->pg_bytes_written = 0; >> + mirror->pg_count = 0; >> + mirror->pg_bsize = bsize; >> + mirror->pg_base = 0; >> + mirror->pg_recoalesce = 0; >> +} >> + >> /** >> * nfs_pageio_init - initialise a page io descriptor >> * @desc: pointer to descriptor >> @@ -687,13 +714,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, >> size_t bsize, >> int io_flags) >> { >> - INIT_LIST_HEAD(&desc->pg_list); >> - desc->pg_bytes_written = 0; >> - desc->pg_count = 0; >> - desc->pg_bsize = bsize; >> - desc->pg_base = 0; >> + struct nfs_pgio_mirror *new; >> + int i; >> + >> desc->pg_moreio = 0; >> - desc->pg_recoalesce = 0; >> desc->pg_inode = inode; >> desc->pg_ops = pg_ops; >> desc->pg_completion_ops = compl_ops; >> @@ -703,6 +727,26 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, >> desc->pg_lseg = NULL; >> desc->pg_dreq = NULL; >> desc->pg_layout_private = NULL; >> + desc->pg_bsize = bsize; >> + >> + desc->pg_mirror_count = 1; >> + desc->pg_mirror_idx = 0; >> + >> + if (pg_ops->pg_get_mirror_count) { >> + /* until we have a request, we don't have an lseg and no >> + * idea how many mirrors there will be */ >> + new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, >> + sizeof(struct nfs_pgio_mirror), GFP_KERNEL); >> + desc->pg_mirrors_dynamic = new; >> + desc->pg_mirrors = new; >> + >> + for (i = 0; i < NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX; i++) >> + nfs_pageio_mirror_init(&desc->pg_mirrors[i], bsize); >> + } else { >> + desc->pg_mirrors_dynamic = NULL; >> + desc->pg_mirrors = desc->pg_mirrors_static; >> + nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); >> + } >> } >> EXPORT_SYMBOL_GPL(nfs_pageio_init); >> >> @@ -738,14 +782,16 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) >> int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, >> struct nfs_pgio_header *hdr) >> { >> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >> + >> struct nfs_page *req; >> struct page **pages, >> *last_page; >> - struct list_head *head = &desc->pg_list; >> + struct list_head *head = &mirror->pg_list; >> struct nfs_commit_info cinfo; >> unsigned int pagecount, pageused; >> >> - pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); >> + pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); >> if (!nfs_pgarray_set(&hdr->page_array, pagecount)) >> return nfs_pgio_error(desc, hdr); >> >> @@ -773,7 +819,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, >> desc->pg_ioflags &= ~FLUSH_COND_STABLE; >> >> /* Set up the argument struct */ >> - nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); >> + nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo); >> desc->pg_rpc_callops = &nfs_pgio_common_ops; >> return 0; >> } >> @@ -781,12 +827,17 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); >> >> static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) >> { >> + struct nfs_pgio_mirror *mirror; >> struct nfs_pgio_header *hdr; >> int ret; >> >> + mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >> + >> hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); >> if (!hdr) { >> - desc->pg_completion_ops->error_cleanup(&desc->pg_list); >> + /* TODO: make sure this is right with mirroring - or >> + * should it back out all mirrors? */ >> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); >> return -ENOMEM; >> } >> nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); >> @@ -801,6 +852,49 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) >> return ret; >> } >> >> +/* >> + * nfs_pageio_setup_mirroring - determine if mirroring is to be used >> + * by calling the pg_get_mirror_count op >> + */ >> +static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, >> + struct nfs_page *req) >> +{ >> + int mirror_count = 1; >> + >> + if (!pgio->pg_ops->pg_get_mirror_count) >> + return 0; >> + >> + mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); >> + >> + if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) >> + return -EINVAL; >> + >> + if (WARN_ON_ONCE(!pgio->pg_mirrors_dynamic)) >> + return -EINVAL; >> + >> + pgio->pg_mirror_count = mirror_count; >> + >> + return 0; >> +} >> + >> +/* >> + * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) >> + */ >> +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) >> +{ >> + pgio->pg_mirror_count = 1; >> + pgio->pg_mirror_idx = 0; >> +} >> + >> +static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) >> +{ >> + pgio->pg_mirror_count = 1; >> + pgio->pg_mirror_idx = 0; >> + pgio->pg_mirrors = pgio->pg_mirrors_static; >> + kfree(pgio->pg_mirrors_dynamic); >> + pgio->pg_mirrors_dynamic = NULL; >> +} >> + >> static bool nfs_match_open_context(const struct nfs_open_context *ctx1, >> const struct nfs_open_context *ctx2) >> { >> @@ -867,19 +961,22 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, >> static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, >> struct nfs_page *req) >> { >> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >> + >> struct nfs_page *prev = NULL; >> - if (desc->pg_count != 0) { >> - prev = nfs_list_entry(desc->pg_list.prev); >> + >> + if (mirror->pg_count != 0) { >> + prev = nfs_list_entry(mirror->pg_list.prev); >> } else { >> if (desc->pg_ops->pg_init) >> desc->pg_ops->pg_init(desc, req); >> - desc->pg_base = req->wb_pgbase; >> + mirror->pg_base = req->wb_pgbase; >> } >> if (!nfs_can_coalesce_requests(prev, req, desc)) >> return 0; >> nfs_list_remove_request(req); >> - nfs_list_add_request(req, &desc->pg_list); >> - desc->pg_count += req->wb_bytes; >> + nfs_list_add_request(req, &mirror->pg_list); >> + mirror->pg_count += req->wb_bytes; >> return 1; >> } >> >> @@ -888,16 +985,19 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, >> */ >> static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) >> { >> - if (!list_empty(&desc->pg_list)) { >> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >> + >> + >> + if (!list_empty(&mirror->pg_list)) { >> int error = desc->pg_ops->pg_doio(desc); >> if (error < 0) >> desc->pg_error = error; >> else >> - desc->pg_bytes_written += desc->pg_count; >> + mirror->pg_bytes_written += mirror->pg_count; >> } >> - if (list_empty(&desc->pg_list)) { >> - desc->pg_count = 0; >> - desc->pg_base = 0; >> + if (list_empty(&mirror->pg_list)) { >> + mirror->pg_count = 0; >> + mirror->pg_base = 0; >> } >> } >> >> @@ -915,10 +1015,14 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) >> static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >> struct nfs_page *req) >> { >> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >> + >> struct nfs_page *subreq; >> unsigned int bytes_left = 0; >> unsigned int offset, pgbase; >> >> + WARN_ON_ONCE(desc->pg_mirror_idx >= desc->pg_mirror_count); >> + >> nfs_page_group_lock(req, false); >> >> subreq = req; >> @@ -938,7 +1042,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >> nfs_pageio_doio(desc); >> if (desc->pg_error < 0) >> return 0; >> - if (desc->pg_recoalesce) >> + if (mirror->pg_recoalesce) >> return 0; >> /* retry add_request for this subreq */ >> nfs_page_group_lock(req, false); >> @@ -976,14 +1080,16 @@ err_ptr: >> >> static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) >> { >> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >> LIST_HEAD(head); >> >> do { >> - list_splice_init(&desc->pg_list, &head); >> - desc->pg_bytes_written -= desc->pg_count; >> - desc->pg_count = 0; >> - desc->pg_base = 0; >> - desc->pg_recoalesce = 0; >> + list_splice_init(&mirror->pg_list, &head); >> + mirror->pg_bytes_written -= mirror->pg_count; >> + mirror->pg_count = 0; >> + mirror->pg_base = 0; >> + mirror->pg_recoalesce = 0; >> + >> desc->pg_moreio = 0; >> >> while (!list_empty(&head)) { >> @@ -997,11 +1103,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) >> return 0; >> break; >> } >> - } while (desc->pg_recoalesce); >> + } while (mirror->pg_recoalesce); >> return 1; >> } >> >> -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >> +static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, >> struct nfs_page *req) >> { >> int ret; >> @@ -1014,9 +1120,78 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >> break; >> ret = nfs_do_recoalesce(desc); >> } while (ret); >> + >> return ret; >> } >> >> +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >> + struct nfs_page *req) >> +{ >> + u32 midx; >> + unsigned int pgbase, offset, bytes; >> + struct nfs_page *dupreq, *lastreq; >> + >> + pgbase = req->wb_pgbase; >> + offset = req->wb_offset; >> + bytes = req->wb_bytes; >> + >> + nfs_pageio_setup_mirroring(desc, req); >> + >> + for (midx = 0; midx < desc->pg_mirror_count; midx++) { >> + if (midx) { >> + nfs_page_group_lock(req, false); >> + >> + /* find the last request */ >> + for (lastreq = req->wb_head; >> + lastreq->wb_this_page != req->wb_head; >> + lastreq = lastreq->wb_this_page) >> + ; >> + >> + dupreq = nfs_create_request(req->wb_context, >> + req->wb_page, lastreq, pgbase, bytes); >> + >> + if (IS_ERR(dupreq)) { >> + nfs_page_group_unlock(req); >> + return 0; >> + } >> + >> + nfs_lock_request(dupreq); >> + nfs_page_group_unlock(req); >> + dupreq->wb_offset = offset; >> + dupreq->wb_index = req->wb_index; >> + } else >> + dupreq = req; >> + >> + desc->pg_mirror_idx = midx; >> + if (!nfs_pageio_add_request_mirror(desc, dupreq)) >> + return 0; >> + } >> + >> + return 1; >> +} >> + >> +/* >> + * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an >> + * nfs_pageio_descriptor >> + * @desc: pointer to io descriptor >> + */ >> +static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, >> + u32 mirror_idx) >> +{ >> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; >> + u32 restore_idx = desc->pg_mirror_idx; >> + >> + desc->pg_mirror_idx = mirror_idx; >> + for (;;) { >> + nfs_pageio_doio(desc); >> + if (!mirror->pg_recoalesce) >> + break; >> + if (!nfs_do_recoalesce(desc)) >> + break; >> + } >> + desc->pg_mirror_idx = restore_idx; >> +} >> + >> /* >> * nfs_pageio_resend - Transfer requests to new descriptor and resend >> * @hdr - the pgio header to move request from >> @@ -1055,16 +1230,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_resend); >> */ >> void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) >> { >> - for (;;) { >> - nfs_pageio_doio(desc); >> - if (!desc->pg_recoalesce) >> - break; >> - if (!nfs_do_recoalesce(desc)) >> - break; >> - } >> + u32 midx; >> + >> + for (midx = 0; midx < desc->pg_mirror_count; midx++) >> + nfs_pageio_complete_mirror(desc, midx); >> >> if (desc->pg_ops->pg_cleanup) >> desc->pg_ops->pg_cleanup(desc); >> + nfs_pageio_cleanup_mirroring(desc); >> } >> >> /** >> @@ -1080,10 +1253,17 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) >> */ >> void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) >> { >> - if (!list_empty(&desc->pg_list)) { >> - struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); >> - if (index != prev->wb_index + 1) >> - nfs_pageio_complete(desc); >> + struct nfs_pgio_mirror *mirror; >> + struct nfs_page *prev; >> + u32 midx; >> + >> + for (midx = 0; midx < desc->pg_mirror_count; midx++) { >> + mirror = &desc->pg_mirrors[midx]; >> + if (!list_empty(&mirror->pg_list)) { >> + prev = nfs_list_entry(mirror->pg_list.prev); >> + if (index != prev->wb_index + 1) >> + nfs_pageio_complete_mirror(desc, midx); >> + } >> } >> } >> >> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c >> index 2da2e77..5f7c422 100644 >> --- a/fs/nfs/pnfs.c >> +++ b/fs/nfs/pnfs.c >> @@ -1646,8 +1646,8 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); >> * of bytes (maximum @req->wb_bytes) that can be coalesced. >> */ >> size_t >> -pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, >> - struct nfs_page *req) >> +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, >> + struct nfs_page *prev, struct nfs_page *req) >> { >> unsigned int size; >> u64 seg_end, req_start, seg_left; >> @@ -1729,10 +1729,12 @@ static void >> pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, >> struct nfs_pgio_header *hdr) >> { >> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >> + >> if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { >> - list_splice_tail_init(&hdr->pages, &desc->pg_list); >> + list_splice_tail_init(&hdr->pages, &mirror->pg_list); >> nfs_pageio_reset_write_mds(desc); >> - desc->pg_recoalesce = 1; >> + mirror->pg_recoalesce = 1; >> } >> nfs_pgio_data_destroy(hdr); >> } >> @@ -1781,12 +1783,14 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); >> int >> pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) >> { >> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >> + >> struct nfs_pgio_header *hdr; >> int ret; >> >> hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); >> if (!hdr) { >> - desc->pg_completion_ops->error_cleanup(&desc->pg_list); >> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); >> return -ENOMEM; >> } >> nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); >> @@ -1795,6 +1799,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) >> ret = nfs_generic_pgio(desc, hdr); >> if (!ret) >> pnfs_do_write(desc, hdr, desc->pg_ioflags); >> + >> return ret; >> } >> EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); >> @@ -1839,10 +1844,13 @@ static void >> pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, >> struct nfs_pgio_header *hdr) >> { >> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >> + >> + >> if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { >> - list_splice_tail_init(&hdr->pages, &desc->pg_list); >> + list_splice_tail_init(&hdr->pages, &mirror->pg_list); >> nfs_pageio_reset_read_mds(desc); >> - desc->pg_recoalesce = 1; >> + mirror->pg_recoalesce = 1; >> } >> nfs_pgio_data_destroy(hdr); >> } >> @@ -1893,12 +1901,14 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); >> int >> pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) >> { >> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >> + >> struct nfs_pgio_header *hdr; >> int ret; >> >> hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); >> if (!hdr) { >> - desc->pg_completion_ops->error_cleanup(&desc->pg_list); >> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); >> return -ENOMEM; >> } >> nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); >> diff --git a/fs/nfs/read.c b/fs/nfs/read.c >> index 092ab49..568ecf0 100644 >> --- a/fs/nfs/read.c >> +++ b/fs/nfs/read.c >> @@ -70,8 +70,15 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_read); >> >> void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) >> { >> + struct nfs_pgio_mirror *mirror; >> + >> pgio->pg_ops = &nfs_pgio_rw_ops; >> - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; >> + >> + /* read path should never have more than one mirror */ >> + WARN_ON_ONCE(pgio->pg_mirror_count != 1); >> + >> + mirror = &pgio->pg_mirrors[0]; >> + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; >> } >> EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); >> >> @@ -81,6 +88,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, >> struct nfs_page *new; >> unsigned int len; >> struct nfs_pageio_descriptor pgio; >> + struct nfs_pgio_mirror *pgm; >> >> len = nfs_page_length(page); >> if (len == 0) >> @@ -97,7 +105,13 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, >> &nfs_async_read_completion_ops); >> nfs_pageio_add_request(&pgio, new); >> nfs_pageio_complete(&pgio); >> - NFS_I(inode)->read_io += pgio.pg_bytes_written; >> + >> + /* It doesn't make sense to do mirrored reads! */ >> + WARN_ON_ONCE(pgio.pg_mirror_count != 1); >> + >> + pgm = &pgio.pg_mirrors[0]; >> + NFS_I(inode)->read_io += pgm->pg_bytes_written; >> + >> return 0; >> } >> >> @@ -352,6 +366,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, >> struct list_head *pages, unsigned nr_pages) >> { >> struct nfs_pageio_descriptor pgio; >> + struct nfs_pgio_mirror *pgm; >> struct nfs_readdesc desc = { >> .pgio = &pgio, >> }; >> @@ -387,10 +402,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, >> &nfs_async_read_completion_ops); >> >> ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); >> - >> nfs_pageio_complete(&pgio); >> - NFS_I(inode)->read_io += pgio.pg_bytes_written; >> - npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; >> + >> + /* It doesn't make sense to do mirrored reads! */ >> + WARN_ON_ONCE(pgio.pg_mirror_count != 1); >> + >> + pgm = &pgio.pg_mirrors[0]; >> + NFS_I(inode)->read_io += pgm->pg_bytes_written; >> + npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >> >> + PAGE_CACHE_SHIFT; >> nfs_add_stats(inode, NFSIOS_READPAGES, npages); >> read_complete: >> put_nfs_open_context(desc.ctx); >> diff --git a/fs/nfs/write.c b/fs/nfs/write.c >> index db802d9..2f6ee8e 100644 >> --- a/fs/nfs/write.c >> +++ b/fs/nfs/write.c >> @@ -906,7 +906,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) >> if (nfs_write_need_commit(hdr)) { >> memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); >> nfs_mark_request_commit(req, hdr->lseg, &cinfo, >> - 0); >> + hdr->pgio_mirror_idx); >> goto next; >> } >> remove_req: >> @@ -1305,8 +1305,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_write); >> >> void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) >> { >> + struct nfs_pgio_mirror *mirror; >> + >> pgio->pg_ops = &nfs_pgio_rw_ops; >> - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; >> + >> + nfs_pageio_stop_mirroring(pgio); >> + >> + mirror = &pgio->pg_mirrors[0]; >> + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; >> } >> EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); >> >> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h >> index 479c566..3eb072d 100644 >> --- a/include/linux/nfs_page.h >> +++ b/include/linux/nfs_page.h >> @@ -58,6 +58,8 @@ struct nfs_pageio_ops { >> size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, >> struct nfs_page *); >> int (*pg_doio)(struct nfs_pageio_descriptor *); >> + unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *, >> + struct nfs_page *); >> void (*pg_cleanup)(struct nfs_pageio_descriptor *); >> }; >> >> @@ -74,15 +76,17 @@ struct nfs_rw_ops { >> struct rpc_task_setup *, int); >> }; >> >> -struct nfs_pageio_descriptor { >> +struct nfs_pgio_mirror { >> struct list_head pg_list; >> unsigned long pg_bytes_written; >> size_t pg_count; >> size_t pg_bsize; >> unsigned int pg_base; >> - unsigned char pg_moreio : 1, >> - pg_recoalesce : 1; >> + unsigned char pg_recoalesce : 1; >> +}; >> >> +struct nfs_pageio_descriptor { >> + unsigned char pg_moreio : 1; >> struct inode *pg_inode; >> const struct nfs_pageio_ops *pg_ops; >> const struct nfs_rw_ops *pg_rw_ops; >> @@ -93,8 +97,18 @@ struct nfs_pageio_descriptor { >> struct pnfs_layout_segment *pg_lseg; >> struct nfs_direct_req *pg_dreq; >> void *pg_layout_private; >> + unsigned int pg_bsize; /* default bsize for mirrors */ >> + >> + u32 pg_mirror_count; >> + struct nfs_pgio_mirror *pg_mirrors; >> + struct nfs_pgio_mirror pg_mirrors_static[1]; >> + struct nfs_pgio_mirror *pg_mirrors_dynamic; >> + u32 pg_mirror_idx; /* current mirror */ >> }; >> >> +/* arbitrarily selected limit to number of mirrors */ >> +#define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16 >> + >> #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) >> >> extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, >> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h >> index 5bc99f0..6400a1e 100644 >> --- a/include/linux/nfs_xdr.h >> +++ b/include/linux/nfs_xdr.h >> @@ -1329,6 +1329,7 @@ struct nfs_pgio_header { >> struct nfs_page_array page_array; >> struct nfs_client *ds_clp; /* pNFS data server */ >> int ds_commit_idx; /* ds index if ds_clp is set */ >> + int pgio_mirror_idx;/* mirror index in pgio layer */ >> }; >> >> struct nfs_mds_commit_info { >> > > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 01/06/2015 01:27 PM, Weston Andros Adamson wrote: > These issues are addressed and the comments are removed in subsequent patches > from the same series. > > Instead of having one huge patch that implements all of mirroring, I chose split > it out into smaller patches. These notes were useful in making sure that the issues > were addressed and should be useful as a guide to someone bisecting, etc. Got it. I'm still working my way through these patches, so I haven't seen the ones that remove the comments yet. Thanks! Anna > > -dros > > >> On Jan 6, 2015, at 1:11 PM, Anna Schumaker <Anna.Schumaker@netapp.com> wrote: >> >> Hey Dros and Tom, >> >> I see you're adding some new FIXME and TODOs in the comments. Is there a plan for addressing these eventually? >> >> Thanks, >> Anna >> >> On 12/24/2014 02:13 AM, Tom Haynes wrote: >>> From: Weston Andros Adamson <dros@primarydata.com> >>> >>> This patch adds mirrored write support to the pgio layer. The default >>> is to use one mirror, but pgio callers may define callbacks to change >>> this to any value up to the (arbitrarily selected) limit of 16. >>> >>> The basic idea is to break out members of nfs_pageio_descriptor that cannot >>> be shared between mirrored DSes and put them in a new structure. >>> >>> Signed-off-by: Weston Andros Adamson <dros@primarydata.com> >>> --- >>> fs/nfs/direct.c | 17 ++- >>> fs/nfs/internal.h | 1 + >>> fs/nfs/objlayout/objio_osd.c | 3 +- >>> fs/nfs/pagelist.c | 270 +++++++++++++++++++++++++++++++++++-------- >>> fs/nfs/pnfs.c | 26 +++-- >>> fs/nfs/read.c | 30 ++++- >>> fs/nfs/write.c | 10 +- >>> include/linux/nfs_page.h | 20 +++- >>> include/linux/nfs_xdr.h | 1 + >>> 9 files changed, 311 insertions(+), 67 deletions(-) >>> >>> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c >>> index 1ee41d7..0178d4f 100644 >>> --- a/fs/nfs/direct.c >>> +++ b/fs/nfs/direct.c >>> @@ -360,8 +360,14 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) >>> spin_lock(&dreq->lock); >>> if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) >>> dreq->error = hdr->error; >>> - else >>> - dreq->count += hdr->good_bytes; >>> + else { >>> + /* >>> + * FIXME: right now this only accounts for bytes written >>> + * to the first mirror >>> + */ >>> + if (hdr->pgio_mirror_idx == 0) >>> + dreq->count += hdr->good_bytes; >>> + } >>> spin_unlock(&dreq->lock); >>> >>> while (!list_empty(&hdr->pages)) { >>> @@ -724,7 +730,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) >>> dreq->error = hdr->error; >>> } >>> if (dreq->error == 0) { >>> - dreq->count += hdr->good_bytes; >>> + /* >>> + * FIXME: right now this only accounts for bytes written >>> + * to the first mirror >>> + */ >>> + if (hdr->pgio_mirror_idx == 0) >>> + dreq->count += hdr->good_bytes; >>> if (nfs_write_need_commit(hdr)) { >>> if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) >>> request_commit = true; >>> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h >>> index 05f9a87..ef1c703 100644 >>> --- a/fs/nfs/internal.h >>> +++ b/fs/nfs/internal.h >>> @@ -469,6 +469,7 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo, >>> struct nfs_direct_req *dreq); >>> int nfs_key_timeout_notify(struct file *filp, struct inode *inode); >>> bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); >>> +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); >>> >>> #ifdef CONFIG_MIGRATION >>> extern int nfs_migrate_page(struct address_space *, >>> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c >>> index d007780..9a5f2ee 100644 >>> --- a/fs/nfs/objlayout/objio_osd.c >>> +++ b/fs/nfs/objlayout/objio_osd.c >>> @@ -537,11 +537,12 @@ int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) >>> static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, >>> struct nfs_page *prev, struct nfs_page *req) >>> { >>> + struct nfs_pgio_mirror *mirror = &pgio->pg_mirrors[pgio->pg_mirror_idx]; >>> unsigned int size; >>> >>> size = pnfs_generic_pg_test(pgio, prev, req); >>> >>> - if (!size || pgio->pg_count + req->wb_bytes > >>> + if (!size || mirror->pg_count + req->wb_bytes > >>> (unsigned long)pgio->pg_layout_private) >>> return 0; >>> >>> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c >>> index 1c03187..eec12b7 100644 >>> --- a/fs/nfs/pagelist.c >>> +++ b/fs/nfs/pagelist.c >>> @@ -46,17 +46,22 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, >>> struct nfs_pgio_header *hdr, >>> void (*release)(struct nfs_pgio_header *hdr)) >>> { >>> - hdr->req = nfs_list_entry(desc->pg_list.next); >>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>> + >>> + >>> + hdr->req = nfs_list_entry(mirror->pg_list.next); >>> hdr->inode = desc->pg_inode; >>> hdr->cred = hdr->req->wb_context->cred; >>> hdr->io_start = req_offset(hdr->req); >>> - hdr->good_bytes = desc->pg_count; >>> + hdr->good_bytes = mirror->pg_count; >>> hdr->dreq = desc->pg_dreq; >>> hdr->layout_private = desc->pg_layout_private; >>> hdr->release = release; >>> hdr->completion_ops = desc->pg_completion_ops; >>> if (hdr->completion_ops->init_hdr) >>> hdr->completion_ops->init_hdr(hdr); >>> + >>> + hdr->pgio_mirror_idx = desc->pg_mirror_idx; >>> } >>> EXPORT_SYMBOL_GPL(nfs_pgheader_init); >>> >>> @@ -480,7 +485,10 @@ nfs_wait_on_request(struct nfs_page *req) >>> size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, >>> struct nfs_page *prev, struct nfs_page *req) >>> { >>> - if (desc->pg_count > desc->pg_bsize) { >>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>> + >>> + >>> + if (mirror->pg_count > mirror->pg_bsize) { >>> /* should never happen */ >>> WARN_ON_ONCE(1); >>> return 0; >>> @@ -490,11 +498,11 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, >>> * Limit the request size so that we can still allocate a page array >>> * for it without upsetting the slab allocator. >>> */ >>> - if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * >>> + if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * >>> sizeof(struct page) > PAGE_SIZE) >>> return 0; >>> >>> - return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); >>> + return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); >>> } >>> EXPORT_SYMBOL_GPL(nfs_generic_pg_test); >>> >>> @@ -651,10 +659,18 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); >>> static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, >>> struct nfs_pgio_header *hdr) >>> { >>> + struct nfs_pgio_mirror *mirror; >>> + u32 midx; >>> + >>> set_bit(NFS_IOHDR_REDO, &hdr->flags); >>> nfs_pgio_data_destroy(hdr); >>> hdr->completion_ops->completion(hdr); >>> - desc->pg_completion_ops->error_cleanup(&desc->pg_list); >>> + /* TODO: Make sure it's right to clean up all mirrors here >>> + * and not just hdr->pgio_mirror_idx */ >>> + for (midx = 0; midx < desc->pg_mirror_count; midx++) { >>> + mirror = &desc->pg_mirrors[midx]; >>> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); >>> + } >>> return -ENOMEM; >>> } >>> >>> @@ -671,6 +687,17 @@ static void nfs_pgio_release(void *calldata) >>> hdr->completion_ops->completion(hdr); >>> } >>> >>> +static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, >>> + unsigned int bsize) >>> +{ >>> + INIT_LIST_HEAD(&mirror->pg_list); >>> + mirror->pg_bytes_written = 0; >>> + mirror->pg_count = 0; >>> + mirror->pg_bsize = bsize; >>> + mirror->pg_base = 0; >>> + mirror->pg_recoalesce = 0; >>> +} >>> + >>> /** >>> * nfs_pageio_init - initialise a page io descriptor >>> * @desc: pointer to descriptor >>> @@ -687,13 +714,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, >>> size_t bsize, >>> int io_flags) >>> { >>> - INIT_LIST_HEAD(&desc->pg_list); >>> - desc->pg_bytes_written = 0; >>> - desc->pg_count = 0; >>> - desc->pg_bsize = bsize; >>> - desc->pg_base = 0; >>> + struct nfs_pgio_mirror *new; >>> + int i; >>> + >>> desc->pg_moreio = 0; >>> - desc->pg_recoalesce = 0; >>> desc->pg_inode = inode; >>> desc->pg_ops = pg_ops; >>> desc->pg_completion_ops = compl_ops; >>> @@ -703,6 +727,26 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, >>> desc->pg_lseg = NULL; >>> desc->pg_dreq = NULL; >>> desc->pg_layout_private = NULL; >>> + desc->pg_bsize = bsize; >>> + >>> + desc->pg_mirror_count = 1; >>> + desc->pg_mirror_idx = 0; >>> + >>> + if (pg_ops->pg_get_mirror_count) { >>> + /* until we have a request, we don't have an lseg and no >>> + * idea how many mirrors there will be */ >>> + new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, >>> + sizeof(struct nfs_pgio_mirror), GFP_KERNEL); >>> + desc->pg_mirrors_dynamic = new; >>> + desc->pg_mirrors = new; >>> + >>> + for (i = 0; i < NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX; i++) >>> + nfs_pageio_mirror_init(&desc->pg_mirrors[i], bsize); >>> + } else { >>> + desc->pg_mirrors_dynamic = NULL; >>> + desc->pg_mirrors = desc->pg_mirrors_static; >>> + nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); >>> + } >>> } >>> EXPORT_SYMBOL_GPL(nfs_pageio_init); >>> >>> @@ -738,14 +782,16 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) >>> int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, >>> struct nfs_pgio_header *hdr) >>> { >>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>> + >>> struct nfs_page *req; >>> struct page **pages, >>> *last_page; >>> - struct list_head *head = &desc->pg_list; >>> + struct list_head *head = &mirror->pg_list; >>> struct nfs_commit_info cinfo; >>> unsigned int pagecount, pageused; >>> >>> - pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); >>> + pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); >>> if (!nfs_pgarray_set(&hdr->page_array, pagecount)) >>> return nfs_pgio_error(desc, hdr); >>> >>> @@ -773,7 +819,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, >>> desc->pg_ioflags &= ~FLUSH_COND_STABLE; >>> >>> /* Set up the argument struct */ >>> - nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); >>> + nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo); >>> desc->pg_rpc_callops = &nfs_pgio_common_ops; >>> return 0; >>> } >>> @@ -781,12 +827,17 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); >>> >>> static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) >>> { >>> + struct nfs_pgio_mirror *mirror; >>> struct nfs_pgio_header *hdr; >>> int ret; >>> >>> + mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>> + >>> hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); >>> if (!hdr) { >>> - desc->pg_completion_ops->error_cleanup(&desc->pg_list); >>> + /* TODO: make sure this is right with mirroring - or >>> + * should it back out all mirrors? */ >>> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); >>> return -ENOMEM; >>> } >>> nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); >>> @@ -801,6 +852,49 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) >>> return ret; >>> } >>> >>> +/* >>> + * nfs_pageio_setup_mirroring - determine if mirroring is to be used >>> + * by calling the pg_get_mirror_count op >>> + */ >>> +static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, >>> + struct nfs_page *req) >>> +{ >>> + int mirror_count = 1; >>> + >>> + if (!pgio->pg_ops->pg_get_mirror_count) >>> + return 0; >>> + >>> + mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); >>> + >>> + if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) >>> + return -EINVAL; >>> + >>> + if (WARN_ON_ONCE(!pgio->pg_mirrors_dynamic)) >>> + return -EINVAL; >>> + >>> + pgio->pg_mirror_count = mirror_count; >>> + >>> + return 0; >>> +} >>> + >>> +/* >>> + * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) >>> + */ >>> +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) >>> +{ >>> + pgio->pg_mirror_count = 1; >>> + pgio->pg_mirror_idx = 0; >>> +} >>> + >>> +static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) >>> +{ >>> + pgio->pg_mirror_count = 1; >>> + pgio->pg_mirror_idx = 0; >>> + pgio->pg_mirrors = pgio->pg_mirrors_static; >>> + kfree(pgio->pg_mirrors_dynamic); >>> + pgio->pg_mirrors_dynamic = NULL; >>> +} >>> + >>> static bool nfs_match_open_context(const struct nfs_open_context *ctx1, >>> const struct nfs_open_context *ctx2) >>> { >>> @@ -867,19 +961,22 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, >>> static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, >>> struct nfs_page *req) >>> { >>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>> + >>> struct nfs_page *prev = NULL; >>> - if (desc->pg_count != 0) { >>> - prev = nfs_list_entry(desc->pg_list.prev); >>> + >>> + if (mirror->pg_count != 0) { >>> + prev = nfs_list_entry(mirror->pg_list.prev); >>> } else { >>> if (desc->pg_ops->pg_init) >>> desc->pg_ops->pg_init(desc, req); >>> - desc->pg_base = req->wb_pgbase; >>> + mirror->pg_base = req->wb_pgbase; >>> } >>> if (!nfs_can_coalesce_requests(prev, req, desc)) >>> return 0; >>> nfs_list_remove_request(req); >>> - nfs_list_add_request(req, &desc->pg_list); >>> - desc->pg_count += req->wb_bytes; >>> + nfs_list_add_request(req, &mirror->pg_list); >>> + mirror->pg_count += req->wb_bytes; >>> return 1; >>> } >>> >>> @@ -888,16 +985,19 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, >>> */ >>> static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) >>> { >>> - if (!list_empty(&desc->pg_list)) { >>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>> + >>> + >>> + if (!list_empty(&mirror->pg_list)) { >>> int error = desc->pg_ops->pg_doio(desc); >>> if (error < 0) >>> desc->pg_error = error; >>> else >>> - desc->pg_bytes_written += desc->pg_count; >>> + mirror->pg_bytes_written += mirror->pg_count; >>> } >>> - if (list_empty(&desc->pg_list)) { >>> - desc->pg_count = 0; >>> - desc->pg_base = 0; >>> + if (list_empty(&mirror->pg_list)) { >>> + mirror->pg_count = 0; >>> + mirror->pg_base = 0; >>> } >>> } >>> >>> @@ -915,10 +1015,14 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) >>> static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >>> struct nfs_page *req) >>> { >>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>> + >>> struct nfs_page *subreq; >>> unsigned int bytes_left = 0; >>> unsigned int offset, pgbase; >>> >>> + WARN_ON_ONCE(desc->pg_mirror_idx >= desc->pg_mirror_count); >>> + >>> nfs_page_group_lock(req, false); >>> >>> subreq = req; >>> @@ -938,7 +1042,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >>> nfs_pageio_doio(desc); >>> if (desc->pg_error < 0) >>> return 0; >>> - if (desc->pg_recoalesce) >>> + if (mirror->pg_recoalesce) >>> return 0; >>> /* retry add_request for this subreq */ >>> nfs_page_group_lock(req, false); >>> @@ -976,14 +1080,16 @@ err_ptr: >>> >>> static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) >>> { >>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>> LIST_HEAD(head); >>> >>> do { >>> - list_splice_init(&desc->pg_list, &head); >>> - desc->pg_bytes_written -= desc->pg_count; >>> - desc->pg_count = 0; >>> - desc->pg_base = 0; >>> - desc->pg_recoalesce = 0; >>> + list_splice_init(&mirror->pg_list, &head); >>> + mirror->pg_bytes_written -= mirror->pg_count; >>> + mirror->pg_count = 0; >>> + mirror->pg_base = 0; >>> + mirror->pg_recoalesce = 0; >>> + >>> desc->pg_moreio = 0; >>> >>> while (!list_empty(&head)) { >>> @@ -997,11 +1103,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) >>> return 0; >>> break; >>> } >>> - } while (desc->pg_recoalesce); >>> + } while (mirror->pg_recoalesce); >>> return 1; >>> } >>> >>> -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >>> +static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, >>> struct nfs_page *req) >>> { >>> int ret; >>> @@ -1014,9 +1120,78 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >>> break; >>> ret = nfs_do_recoalesce(desc); >>> } while (ret); >>> + >>> return ret; >>> } >>> >>> +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >>> + struct nfs_page *req) >>> +{ >>> + u32 midx; >>> + unsigned int pgbase, offset, bytes; >>> + struct nfs_page *dupreq, *lastreq; >>> + >>> + pgbase = req->wb_pgbase; >>> + offset = req->wb_offset; >>> + bytes = req->wb_bytes; >>> + >>> + nfs_pageio_setup_mirroring(desc, req); >>> + >>> + for (midx = 0; midx < desc->pg_mirror_count; midx++) { >>> + if (midx) { >>> + nfs_page_group_lock(req, false); >>> + >>> + /* find the last request */ >>> + for (lastreq = req->wb_head; >>> + lastreq->wb_this_page != req->wb_head; >>> + lastreq = lastreq->wb_this_page) >>> + ; >>> + >>> + dupreq = nfs_create_request(req->wb_context, >>> + req->wb_page, lastreq, pgbase, bytes); >>> + >>> + if (IS_ERR(dupreq)) { >>> + nfs_page_group_unlock(req); >>> + return 0; >>> + } >>> + >>> + nfs_lock_request(dupreq); >>> + nfs_page_group_unlock(req); >>> + dupreq->wb_offset = offset; >>> + dupreq->wb_index = req->wb_index; >>> + } else >>> + dupreq = req; >>> + >>> + desc->pg_mirror_idx = midx; >>> + if (!nfs_pageio_add_request_mirror(desc, dupreq)) >>> + return 0; >>> + } >>> + >>> + return 1; >>> +} >>> + >>> +/* >>> + * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an >>> + * nfs_pageio_descriptor >>> + * @desc: pointer to io descriptor >>> + */ >>> +static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, >>> + u32 mirror_idx) >>> +{ >>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; >>> + u32 restore_idx = desc->pg_mirror_idx; >>> + >>> + desc->pg_mirror_idx = mirror_idx; >>> + for (;;) { >>> + nfs_pageio_doio(desc); >>> + if (!mirror->pg_recoalesce) >>> + break; >>> + if (!nfs_do_recoalesce(desc)) >>> + break; >>> + } >>> + desc->pg_mirror_idx = restore_idx; >>> +} >>> + >>> /* >>> * nfs_pageio_resend - Transfer requests to new descriptor and resend >>> * @hdr - the pgio header to move request from >>> @@ -1055,16 +1230,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_resend); >>> */ >>> void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) >>> { >>> - for (;;) { >>> - nfs_pageio_doio(desc); >>> - if (!desc->pg_recoalesce) >>> - break; >>> - if (!nfs_do_recoalesce(desc)) >>> - break; >>> - } >>> + u32 midx; >>> + >>> + for (midx = 0; midx < desc->pg_mirror_count; midx++) >>> + nfs_pageio_complete_mirror(desc, midx); >>> >>> if (desc->pg_ops->pg_cleanup) >>> desc->pg_ops->pg_cleanup(desc); >>> + nfs_pageio_cleanup_mirroring(desc); >>> } >>> >>> /** >>> @@ -1080,10 +1253,17 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) >>> */ >>> void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) >>> { >>> - if (!list_empty(&desc->pg_list)) { >>> - struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); >>> - if (index != prev->wb_index + 1) >>> - nfs_pageio_complete(desc); >>> + struct nfs_pgio_mirror *mirror; >>> + struct nfs_page *prev; >>> + u32 midx; >>> + >>> + for (midx = 0; midx < desc->pg_mirror_count; midx++) { >>> + mirror = &desc->pg_mirrors[midx]; >>> + if (!list_empty(&mirror->pg_list)) { >>> + prev = nfs_list_entry(mirror->pg_list.prev); >>> + if (index != prev->wb_index + 1) >>> + nfs_pageio_complete_mirror(desc, midx); >>> + } >>> } >>> } >>> >>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c >>> index 2da2e77..5f7c422 100644 >>> --- a/fs/nfs/pnfs.c >>> +++ b/fs/nfs/pnfs.c >>> @@ -1646,8 +1646,8 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); >>> * of bytes (maximum @req->wb_bytes) that can be coalesced. >>> */ >>> size_t >>> -pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, >>> - struct nfs_page *req) >>> +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, >>> + struct nfs_page *prev, struct nfs_page *req) >>> { >>> unsigned int size; >>> u64 seg_end, req_start, seg_left; >>> @@ -1729,10 +1729,12 @@ static void >>> pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, >>> struct nfs_pgio_header *hdr) >>> { >>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>> + >>> if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { >>> - list_splice_tail_init(&hdr->pages, &desc->pg_list); >>> + list_splice_tail_init(&hdr->pages, &mirror->pg_list); >>> nfs_pageio_reset_write_mds(desc); >>> - desc->pg_recoalesce = 1; >>> + mirror->pg_recoalesce = 1; >>> } >>> nfs_pgio_data_destroy(hdr); >>> } >>> @@ -1781,12 +1783,14 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); >>> int >>> pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) >>> { >>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>> + >>> struct nfs_pgio_header *hdr; >>> int ret; >>> >>> hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); >>> if (!hdr) { >>> - desc->pg_completion_ops->error_cleanup(&desc->pg_list); >>> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); >>> return -ENOMEM; >>> } >>> nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); >>> @@ -1795,6 +1799,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) >>> ret = nfs_generic_pgio(desc, hdr); >>> if (!ret) >>> pnfs_do_write(desc, hdr, desc->pg_ioflags); >>> + >>> return ret; >>> } >>> EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); >>> @@ -1839,10 +1844,13 @@ static void >>> pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, >>> struct nfs_pgio_header *hdr) >>> { >>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>> + >>> + >>> if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { >>> - list_splice_tail_init(&hdr->pages, &desc->pg_list); >>> + list_splice_tail_init(&hdr->pages, &mirror->pg_list); >>> nfs_pageio_reset_read_mds(desc); >>> - desc->pg_recoalesce = 1; >>> + mirror->pg_recoalesce = 1; >>> } >>> nfs_pgio_data_destroy(hdr); >>> } >>> @@ -1893,12 +1901,14 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); >>> int >>> pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) >>> { >>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>> + >>> struct nfs_pgio_header *hdr; >>> int ret; >>> >>> hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); >>> if (!hdr) { >>> - desc->pg_completion_ops->error_cleanup(&desc->pg_list); >>> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); >>> return -ENOMEM; >>> } >>> nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); >>> diff --git a/fs/nfs/read.c b/fs/nfs/read.c >>> index 092ab49..568ecf0 100644 >>> --- a/fs/nfs/read.c >>> +++ b/fs/nfs/read.c >>> @@ -70,8 +70,15 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_read); >>> >>> void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) >>> { >>> + struct nfs_pgio_mirror *mirror; >>> + >>> pgio->pg_ops = &nfs_pgio_rw_ops; >>> - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; >>> + >>> + /* read path should never have more than one mirror */ >>> + WARN_ON_ONCE(pgio->pg_mirror_count != 1); >>> + >>> + mirror = &pgio->pg_mirrors[0]; >>> + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; >>> } >>> EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); >>> >>> @@ -81,6 +88,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, >>> struct nfs_page *new; >>> unsigned int len; >>> struct nfs_pageio_descriptor pgio; >>> + struct nfs_pgio_mirror *pgm; >>> >>> len = nfs_page_length(page); >>> if (len == 0) >>> @@ -97,7 +105,13 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, >>> &nfs_async_read_completion_ops); >>> nfs_pageio_add_request(&pgio, new); >>> nfs_pageio_complete(&pgio); >>> - NFS_I(inode)->read_io += pgio.pg_bytes_written; >>> + >>> + /* It doesn't make sense to do mirrored reads! */ >>> + WARN_ON_ONCE(pgio.pg_mirror_count != 1); >>> + >>> + pgm = &pgio.pg_mirrors[0]; >>> + NFS_I(inode)->read_io += pgm->pg_bytes_written; >>> + >>> return 0; >>> } >>> >>> @@ -352,6 +366,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, >>> struct list_head *pages, unsigned nr_pages) >>> { >>> struct nfs_pageio_descriptor pgio; >>> + struct nfs_pgio_mirror *pgm; >>> struct nfs_readdesc desc = { >>> .pgio = &pgio, >>> }; >>> @@ -387,10 +402,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, >>> &nfs_async_read_completion_ops); >>> >>> ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); >>> - >>> nfs_pageio_complete(&pgio); >>> - NFS_I(inode)->read_io += pgio.pg_bytes_written; >>> - npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; >>> + >>> + /* It doesn't make sense to do mirrored reads! */ >>> + WARN_ON_ONCE(pgio.pg_mirror_count != 1); >>> + >>> + pgm = &pgio.pg_mirrors[0]; >>> + NFS_I(inode)->read_io += pgm->pg_bytes_written; >>> + npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >> >>> + PAGE_CACHE_SHIFT; >>> nfs_add_stats(inode, NFSIOS_READPAGES, npages); >>> read_complete: >>> put_nfs_open_context(desc.ctx); >>> diff --git a/fs/nfs/write.c b/fs/nfs/write.c >>> index db802d9..2f6ee8e 100644 >>> --- a/fs/nfs/write.c >>> +++ b/fs/nfs/write.c >>> @@ -906,7 +906,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) >>> if (nfs_write_need_commit(hdr)) { >>> memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); >>> nfs_mark_request_commit(req, hdr->lseg, &cinfo, >>> - 0); >>> + hdr->pgio_mirror_idx); >>> goto next; >>> } >>> remove_req: >>> @@ -1305,8 +1305,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_write); >>> >>> void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) >>> { >>> + struct nfs_pgio_mirror *mirror; >>> + >>> pgio->pg_ops = &nfs_pgio_rw_ops; >>> - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; >>> + >>> + nfs_pageio_stop_mirroring(pgio); >>> + >>> + mirror = &pgio->pg_mirrors[0]; >>> + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; >>> } >>> EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); >>> >>> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h >>> index 479c566..3eb072d 100644 >>> --- a/include/linux/nfs_page.h >>> +++ b/include/linux/nfs_page.h >>> @@ -58,6 +58,8 @@ struct nfs_pageio_ops { >>> size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, >>> struct nfs_page *); >>> int (*pg_doio)(struct nfs_pageio_descriptor *); >>> + unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *, >>> + struct nfs_page *); >>> void (*pg_cleanup)(struct nfs_pageio_descriptor *); >>> }; >>> >>> @@ -74,15 +76,17 @@ struct nfs_rw_ops { >>> struct rpc_task_setup *, int); >>> }; >>> >>> -struct nfs_pageio_descriptor { >>> +struct nfs_pgio_mirror { >>> struct list_head pg_list; >>> unsigned long pg_bytes_written; >>> size_t pg_count; >>> size_t pg_bsize; >>> unsigned int pg_base; >>> - unsigned char pg_moreio : 1, >>> - pg_recoalesce : 1; >>> + unsigned char pg_recoalesce : 1; >>> +}; >>> >>> +struct nfs_pageio_descriptor { >>> + unsigned char pg_moreio : 1; >>> struct inode *pg_inode; >>> const struct nfs_pageio_ops *pg_ops; >>> const struct nfs_rw_ops *pg_rw_ops; >>> @@ -93,8 +97,18 @@ struct nfs_pageio_descriptor { >>> struct pnfs_layout_segment *pg_lseg; >>> struct nfs_direct_req *pg_dreq; >>> void *pg_layout_private; >>> + unsigned int pg_bsize; /* default bsize for mirrors */ >>> + >>> + u32 pg_mirror_count; >>> + struct nfs_pgio_mirror *pg_mirrors; >>> + struct nfs_pgio_mirror pg_mirrors_static[1]; >>> + struct nfs_pgio_mirror *pg_mirrors_dynamic; >>> + u32 pg_mirror_idx; /* current mirror */ >>> }; >>> >>> +/* arbitrarily selected limit to number of mirrors */ >>> +#define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16 >>> + >>> #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) >>> >>> extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, >>> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h >>> index 5bc99f0..6400a1e 100644 >>> --- a/include/linux/nfs_xdr.h >>> +++ b/include/linux/nfs_xdr.h >>> @@ -1329,6 +1329,7 @@ struct nfs_pgio_header { >>> struct nfs_page_array page_array; >>> struct nfs_client *ds_clp; /* pNFS data server */ >>> int ds_commit_idx; /* ds index if ds_clp is set */ >>> + int pgio_mirror_idx;/* mirror index in pgio layer */ >>> }; >>> >>> struct nfs_mds_commit_info { >>> >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> On Jan 6, 2015, at 1:32 PM, Anna Schumaker <Anna.Schumaker@netapp.com> wrote: > > On 01/06/2015 01:27 PM, Weston Andros Adamson wrote: >> These issues are addressed and the comments are removed in subsequent patches >> from the same series. >> >> Instead of having one huge patch that implements all of mirroring, I chose split >> it out into smaller patches. These notes were useful in making sure that the issues >> were addressed and should be useful as a guide to someone bisecting, etc. > > Got it. I'm still working my way through these patches, so I haven't seen the ones that remove the comments yet. > > Thanks! >> Thanks for reviewing! -dros >> >> >>> On Jan 6, 2015, at 1:11 PM, Anna Schumaker <Anna.Schumaker@netapp.com> wrote: >>> >>> Hey Dros and Tom, >>> >>> I see you're adding some new FIXME and TODOs in the comments. Is there a plan for addressing these eventually? >>> >>> Thanks, >>> Anna >>> >>> On 12/24/2014 02:13 AM, Tom Haynes wrote: >>>> From: Weston Andros Adamson <dros@primarydata.com> >>>> >>>> This patch adds mirrored write support to the pgio layer. The default >>>> is to use one mirror, but pgio callers may define callbacks to change >>>> this to any value up to the (arbitrarily selected) limit of 16. >>>> >>>> The basic idea is to break out members of nfs_pageio_descriptor that cannot >>>> be shared between mirrored DSes and put them in a new structure. >>>> >>>> Signed-off-by: Weston Andros Adamson <dros@primarydata.com> >>>> --- >>>> fs/nfs/direct.c | 17 ++- >>>> fs/nfs/internal.h | 1 + >>>> fs/nfs/objlayout/objio_osd.c | 3 +- >>>> fs/nfs/pagelist.c | 270 +++++++++++++++++++++++++++++++++++-------- >>>> fs/nfs/pnfs.c | 26 +++-- >>>> fs/nfs/read.c | 30 ++++- >>>> fs/nfs/write.c | 10 +- >>>> include/linux/nfs_page.h | 20 +++- >>>> include/linux/nfs_xdr.h | 1 + >>>> 9 files changed, 311 insertions(+), 67 deletions(-) >>>> >>>> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c >>>> index 1ee41d7..0178d4f 100644 >>>> --- a/fs/nfs/direct.c >>>> +++ b/fs/nfs/direct.c >>>> @@ -360,8 +360,14 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) >>>> spin_lock(&dreq->lock); >>>> if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) >>>> dreq->error = hdr->error; >>>> - else >>>> - dreq->count += hdr->good_bytes; >>>> + else { >>>> + /* >>>> + * FIXME: right now this only accounts for bytes written >>>> + * to the first mirror >>>> + */ >>>> + if (hdr->pgio_mirror_idx == 0) >>>> + dreq->count += hdr->good_bytes; >>>> + } >>>> spin_unlock(&dreq->lock); >>>> >>>> while (!list_empty(&hdr->pages)) { >>>> @@ -724,7 +730,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) >>>> dreq->error = hdr->error; >>>> } >>>> if (dreq->error == 0) { >>>> - dreq->count += hdr->good_bytes; >>>> + /* >>>> + * FIXME: right now this only accounts for bytes written >>>> + * to the first mirror >>>> + */ >>>> + if (hdr->pgio_mirror_idx == 0) >>>> + dreq->count += hdr->good_bytes; >>>> if (nfs_write_need_commit(hdr)) { >>>> if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) >>>> request_commit = true; >>>> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h >>>> index 05f9a87..ef1c703 100644 >>>> --- a/fs/nfs/internal.h >>>> +++ b/fs/nfs/internal.h >>>> @@ -469,6 +469,7 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo, >>>> struct nfs_direct_req *dreq); >>>> int nfs_key_timeout_notify(struct file *filp, struct inode *inode); >>>> bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); >>>> +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); >>>> >>>> #ifdef CONFIG_MIGRATION >>>> extern int nfs_migrate_page(struct address_space *, >>>> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c >>>> index d007780..9a5f2ee 100644 >>>> --- a/fs/nfs/objlayout/objio_osd.c >>>> +++ b/fs/nfs/objlayout/objio_osd.c >>>> @@ -537,11 +537,12 @@ int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) >>>> static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, >>>> struct nfs_page *prev, struct nfs_page *req) >>>> { >>>> + struct nfs_pgio_mirror *mirror = &pgio->pg_mirrors[pgio->pg_mirror_idx]; >>>> unsigned int size; >>>> >>>> size = pnfs_generic_pg_test(pgio, prev, req); >>>> >>>> - if (!size || pgio->pg_count + req->wb_bytes > >>>> + if (!size || mirror->pg_count + req->wb_bytes > >>>> (unsigned long)pgio->pg_layout_private) >>>> return 0; >>>> >>>> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c >>>> index 1c03187..eec12b7 100644 >>>> --- a/fs/nfs/pagelist.c >>>> +++ b/fs/nfs/pagelist.c >>>> @@ -46,17 +46,22 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, >>>> struct nfs_pgio_header *hdr, >>>> void (*release)(struct nfs_pgio_header *hdr)) >>>> { >>>> - hdr->req = nfs_list_entry(desc->pg_list.next); >>>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>>> + >>>> + >>>> + hdr->req = nfs_list_entry(mirror->pg_list.next); >>>> hdr->inode = desc->pg_inode; >>>> hdr->cred = hdr->req->wb_context->cred; >>>> hdr->io_start = req_offset(hdr->req); >>>> - hdr->good_bytes = desc->pg_count; >>>> + hdr->good_bytes = mirror->pg_count; >>>> hdr->dreq = desc->pg_dreq; >>>> hdr->layout_private = desc->pg_layout_private; >>>> hdr->release = release; >>>> hdr->completion_ops = desc->pg_completion_ops; >>>> if (hdr->completion_ops->init_hdr) >>>> hdr->completion_ops->init_hdr(hdr); >>>> + >>>> + hdr->pgio_mirror_idx = desc->pg_mirror_idx; >>>> } >>>> EXPORT_SYMBOL_GPL(nfs_pgheader_init); >>>> >>>> @@ -480,7 +485,10 @@ nfs_wait_on_request(struct nfs_page *req) >>>> size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, >>>> struct nfs_page *prev, struct nfs_page *req) >>>> { >>>> - if (desc->pg_count > desc->pg_bsize) { >>>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>>> + >>>> + >>>> + if (mirror->pg_count > mirror->pg_bsize) { >>>> /* should never happen */ >>>> WARN_ON_ONCE(1); >>>> return 0; >>>> @@ -490,11 +498,11 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, >>>> * Limit the request size so that we can still allocate a page array >>>> * for it without upsetting the slab allocator. >>>> */ >>>> - if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * >>>> + if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * >>>> sizeof(struct page) > PAGE_SIZE) >>>> return 0; >>>> >>>> - return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); >>>> + return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); >>>> } >>>> EXPORT_SYMBOL_GPL(nfs_generic_pg_test); >>>> >>>> @@ -651,10 +659,18 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); >>>> static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, >>>> struct nfs_pgio_header *hdr) >>>> { >>>> + struct nfs_pgio_mirror *mirror; >>>> + u32 midx; >>>> + >>>> set_bit(NFS_IOHDR_REDO, &hdr->flags); >>>> nfs_pgio_data_destroy(hdr); >>>> hdr->completion_ops->completion(hdr); >>>> - desc->pg_completion_ops->error_cleanup(&desc->pg_list); >>>> + /* TODO: Make sure it's right to clean up all mirrors here >>>> + * and not just hdr->pgio_mirror_idx */ >>>> + for (midx = 0; midx < desc->pg_mirror_count; midx++) { >>>> + mirror = &desc->pg_mirrors[midx]; >>>> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); >>>> + } >>>> return -ENOMEM; >>>> } >>>> >>>> @@ -671,6 +687,17 @@ static void nfs_pgio_release(void *calldata) >>>> hdr->completion_ops->completion(hdr); >>>> } >>>> >>>> +static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, >>>> + unsigned int bsize) >>>> +{ >>>> + INIT_LIST_HEAD(&mirror->pg_list); >>>> + mirror->pg_bytes_written = 0; >>>> + mirror->pg_count = 0; >>>> + mirror->pg_bsize = bsize; >>>> + mirror->pg_base = 0; >>>> + mirror->pg_recoalesce = 0; >>>> +} >>>> + >>>> /** >>>> * nfs_pageio_init - initialise a page io descriptor >>>> * @desc: pointer to descriptor >>>> @@ -687,13 +714,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, >>>> size_t bsize, >>>> int io_flags) >>>> { >>>> - INIT_LIST_HEAD(&desc->pg_list); >>>> - desc->pg_bytes_written = 0; >>>> - desc->pg_count = 0; >>>> - desc->pg_bsize = bsize; >>>> - desc->pg_base = 0; >>>> + struct nfs_pgio_mirror *new; >>>> + int i; >>>> + >>>> desc->pg_moreio = 0; >>>> - desc->pg_recoalesce = 0; >>>> desc->pg_inode = inode; >>>> desc->pg_ops = pg_ops; >>>> desc->pg_completion_ops = compl_ops; >>>> @@ -703,6 +727,26 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, >>>> desc->pg_lseg = NULL; >>>> desc->pg_dreq = NULL; >>>> desc->pg_layout_private = NULL; >>>> + desc->pg_bsize = bsize; >>>> + >>>> + desc->pg_mirror_count = 1; >>>> + desc->pg_mirror_idx = 0; >>>> + >>>> + if (pg_ops->pg_get_mirror_count) { >>>> + /* until we have a request, we don't have an lseg and no >>>> + * idea how many mirrors there will be */ >>>> + new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, >>>> + sizeof(struct nfs_pgio_mirror), GFP_KERNEL); >>>> + desc->pg_mirrors_dynamic = new; >>>> + desc->pg_mirrors = new; >>>> + >>>> + for (i = 0; i < NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX; i++) >>>> + nfs_pageio_mirror_init(&desc->pg_mirrors[i], bsize); >>>> + } else { >>>> + desc->pg_mirrors_dynamic = NULL; >>>> + desc->pg_mirrors = desc->pg_mirrors_static; >>>> + nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); >>>> + } >>>> } >>>> EXPORT_SYMBOL_GPL(nfs_pageio_init); >>>> >>>> @@ -738,14 +782,16 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) >>>> int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, >>>> struct nfs_pgio_header *hdr) >>>> { >>>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>>> + >>>> struct nfs_page *req; >>>> struct page **pages, >>>> *last_page; >>>> - struct list_head *head = &desc->pg_list; >>>> + struct list_head *head = &mirror->pg_list; >>>> struct nfs_commit_info cinfo; >>>> unsigned int pagecount, pageused; >>>> >>>> - pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); >>>> + pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); >>>> if (!nfs_pgarray_set(&hdr->page_array, pagecount)) >>>> return nfs_pgio_error(desc, hdr); >>>> >>>> @@ -773,7 +819,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, >>>> desc->pg_ioflags &= ~FLUSH_COND_STABLE; >>>> >>>> /* Set up the argument struct */ >>>> - nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); >>>> + nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo); >>>> desc->pg_rpc_callops = &nfs_pgio_common_ops; >>>> return 0; >>>> } >>>> @@ -781,12 +827,17 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); >>>> >>>> static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) >>>> { >>>> + struct nfs_pgio_mirror *mirror; >>>> struct nfs_pgio_header *hdr; >>>> int ret; >>>> >>>> + mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>>> + >>>> hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); >>>> if (!hdr) { >>>> - desc->pg_completion_ops->error_cleanup(&desc->pg_list); >>>> + /* TODO: make sure this is right with mirroring - or >>>> + * should it back out all mirrors? */ >>>> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); >>>> return -ENOMEM; >>>> } >>>> nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); >>>> @@ -801,6 +852,49 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) >>>> return ret; >>>> } >>>> >>>> +/* >>>> + * nfs_pageio_setup_mirroring - determine if mirroring is to be used >>>> + * by calling the pg_get_mirror_count op >>>> + */ >>>> +static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, >>>> + struct nfs_page *req) >>>> +{ >>>> + int mirror_count = 1; >>>> + >>>> + if (!pgio->pg_ops->pg_get_mirror_count) >>>> + return 0; >>>> + >>>> + mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); >>>> + >>>> + if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) >>>> + return -EINVAL; >>>> + >>>> + if (WARN_ON_ONCE(!pgio->pg_mirrors_dynamic)) >>>> + return -EINVAL; >>>> + >>>> + pgio->pg_mirror_count = mirror_count; >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +/* >>>> + * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) >>>> + */ >>>> +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) >>>> +{ >>>> + pgio->pg_mirror_count = 1; >>>> + pgio->pg_mirror_idx = 0; >>>> +} >>>> + >>>> +static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) >>>> +{ >>>> + pgio->pg_mirror_count = 1; >>>> + pgio->pg_mirror_idx = 0; >>>> + pgio->pg_mirrors = pgio->pg_mirrors_static; >>>> + kfree(pgio->pg_mirrors_dynamic); >>>> + pgio->pg_mirrors_dynamic = NULL; >>>> +} >>>> + >>>> static bool nfs_match_open_context(const struct nfs_open_context *ctx1, >>>> const struct nfs_open_context *ctx2) >>>> { >>>> @@ -867,19 +961,22 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, >>>> static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, >>>> struct nfs_page *req) >>>> { >>>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>>> + >>>> struct nfs_page *prev = NULL; >>>> - if (desc->pg_count != 0) { >>>> - prev = nfs_list_entry(desc->pg_list.prev); >>>> + >>>> + if (mirror->pg_count != 0) { >>>> + prev = nfs_list_entry(mirror->pg_list.prev); >>>> } else { >>>> if (desc->pg_ops->pg_init) >>>> desc->pg_ops->pg_init(desc, req); >>>> - desc->pg_base = req->wb_pgbase; >>>> + mirror->pg_base = req->wb_pgbase; >>>> } >>>> if (!nfs_can_coalesce_requests(prev, req, desc)) >>>> return 0; >>>> nfs_list_remove_request(req); >>>> - nfs_list_add_request(req, &desc->pg_list); >>>> - desc->pg_count += req->wb_bytes; >>>> + nfs_list_add_request(req, &mirror->pg_list); >>>> + mirror->pg_count += req->wb_bytes; >>>> return 1; >>>> } >>>> >>>> @@ -888,16 +985,19 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, >>>> */ >>>> static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) >>>> { >>>> - if (!list_empty(&desc->pg_list)) { >>>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>>> + >>>> + >>>> + if (!list_empty(&mirror->pg_list)) { >>>> int error = desc->pg_ops->pg_doio(desc); >>>> if (error < 0) >>>> desc->pg_error = error; >>>> else >>>> - desc->pg_bytes_written += desc->pg_count; >>>> + mirror->pg_bytes_written += mirror->pg_count; >>>> } >>>> - if (list_empty(&desc->pg_list)) { >>>> - desc->pg_count = 0; >>>> - desc->pg_base = 0; >>>> + if (list_empty(&mirror->pg_list)) { >>>> + mirror->pg_count = 0; >>>> + mirror->pg_base = 0; >>>> } >>>> } >>>> >>>> @@ -915,10 +1015,14 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) >>>> static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >>>> struct nfs_page *req) >>>> { >>>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>>> + >>>> struct nfs_page *subreq; >>>> unsigned int bytes_left = 0; >>>> unsigned int offset, pgbase; >>>> >>>> + WARN_ON_ONCE(desc->pg_mirror_idx >= desc->pg_mirror_count); >>>> + >>>> nfs_page_group_lock(req, false); >>>> >>>> subreq = req; >>>> @@ -938,7 +1042,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >>>> nfs_pageio_doio(desc); >>>> if (desc->pg_error < 0) >>>> return 0; >>>> - if (desc->pg_recoalesce) >>>> + if (mirror->pg_recoalesce) >>>> return 0; >>>> /* retry add_request for this subreq */ >>>> nfs_page_group_lock(req, false); >>>> @@ -976,14 +1080,16 @@ err_ptr: >>>> >>>> static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) >>>> { >>>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>>> LIST_HEAD(head); >>>> >>>> do { >>>> - list_splice_init(&desc->pg_list, &head); >>>> - desc->pg_bytes_written -= desc->pg_count; >>>> - desc->pg_count = 0; >>>> - desc->pg_base = 0; >>>> - desc->pg_recoalesce = 0; >>>> + list_splice_init(&mirror->pg_list, &head); >>>> + mirror->pg_bytes_written -= mirror->pg_count; >>>> + mirror->pg_count = 0; >>>> + mirror->pg_base = 0; >>>> + mirror->pg_recoalesce = 0; >>>> + >>>> desc->pg_moreio = 0; >>>> >>>> while (!list_empty(&head)) { >>>> @@ -997,11 +1103,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) >>>> return 0; >>>> break; >>>> } >>>> - } while (desc->pg_recoalesce); >>>> + } while (mirror->pg_recoalesce); >>>> return 1; >>>> } >>>> >>>> -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >>>> +static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, >>>> struct nfs_page *req) >>>> { >>>> int ret; >>>> @@ -1014,9 +1120,78 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >>>> break; >>>> ret = nfs_do_recoalesce(desc); >>>> } while (ret); >>>> + >>>> return ret; >>>> } >>>> >>>> +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, >>>> + struct nfs_page *req) >>>> +{ >>>> + u32 midx; >>>> + unsigned int pgbase, offset, bytes; >>>> + struct nfs_page *dupreq, *lastreq; >>>> + >>>> + pgbase = req->wb_pgbase; >>>> + offset = req->wb_offset; >>>> + bytes = req->wb_bytes; >>>> + >>>> + nfs_pageio_setup_mirroring(desc, req); >>>> + >>>> + for (midx = 0; midx < desc->pg_mirror_count; midx++) { >>>> + if (midx) { >>>> + nfs_page_group_lock(req, false); >>>> + >>>> + /* find the last request */ >>>> + for (lastreq = req->wb_head; >>>> + lastreq->wb_this_page != req->wb_head; >>>> + lastreq = lastreq->wb_this_page) >>>> + ; >>>> + >>>> + dupreq = nfs_create_request(req->wb_context, >>>> + req->wb_page, lastreq, pgbase, bytes); >>>> + >>>> + if (IS_ERR(dupreq)) { >>>> + nfs_page_group_unlock(req); >>>> + return 0; >>>> + } >>>> + >>>> + nfs_lock_request(dupreq); >>>> + nfs_page_group_unlock(req); >>>> + dupreq->wb_offset = offset; >>>> + dupreq->wb_index = req->wb_index; >>>> + } else >>>> + dupreq = req; >>>> + >>>> + desc->pg_mirror_idx = midx; >>>> + if (!nfs_pageio_add_request_mirror(desc, dupreq)) >>>> + return 0; >>>> + } >>>> + >>>> + return 1; >>>> +} >>>> + >>>> +/* >>>> + * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an >>>> + * nfs_pageio_descriptor >>>> + * @desc: pointer to io descriptor >>>> + */ >>>> +static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, >>>> + u32 mirror_idx) >>>> +{ >>>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; >>>> + u32 restore_idx = desc->pg_mirror_idx; >>>> + >>>> + desc->pg_mirror_idx = mirror_idx; >>>> + for (;;) { >>>> + nfs_pageio_doio(desc); >>>> + if (!mirror->pg_recoalesce) >>>> + break; >>>> + if (!nfs_do_recoalesce(desc)) >>>> + break; >>>> + } >>>> + desc->pg_mirror_idx = restore_idx; >>>> +} >>>> + >>>> /* >>>> * nfs_pageio_resend - Transfer requests to new descriptor and resend >>>> * @hdr - the pgio header to move request from >>>> @@ -1055,16 +1230,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_resend); >>>> */ >>>> void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) >>>> { >>>> - for (;;) { >>>> - nfs_pageio_doio(desc); >>>> - if (!desc->pg_recoalesce) >>>> - break; >>>> - if (!nfs_do_recoalesce(desc)) >>>> - break; >>>> - } >>>> + u32 midx; >>>> + >>>> + for (midx = 0; midx < desc->pg_mirror_count; midx++) >>>> + nfs_pageio_complete_mirror(desc, midx); >>>> >>>> if (desc->pg_ops->pg_cleanup) >>>> desc->pg_ops->pg_cleanup(desc); >>>> + nfs_pageio_cleanup_mirroring(desc); >>>> } >>>> >>>> /** >>>> @@ -1080,10 +1253,17 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) >>>> */ >>>> void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) >>>> { >>>> - if (!list_empty(&desc->pg_list)) { >>>> - struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); >>>> - if (index != prev->wb_index + 1) >>>> - nfs_pageio_complete(desc); >>>> + struct nfs_pgio_mirror *mirror; >>>> + struct nfs_page *prev; >>>> + u32 midx; >>>> + >>>> + for (midx = 0; midx < desc->pg_mirror_count; midx++) { >>>> + mirror = &desc->pg_mirrors[midx]; >>>> + if (!list_empty(&mirror->pg_list)) { >>>> + prev = nfs_list_entry(mirror->pg_list.prev); >>>> + if (index != prev->wb_index + 1) >>>> + nfs_pageio_complete_mirror(desc, midx); >>>> + } >>>> } >>>> } >>>> >>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c >>>> index 2da2e77..5f7c422 100644 >>>> --- a/fs/nfs/pnfs.c >>>> +++ b/fs/nfs/pnfs.c >>>> @@ -1646,8 +1646,8 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); >>>> * of bytes (maximum @req->wb_bytes) that can be coalesced. >>>> */ >>>> size_t >>>> -pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, >>>> - struct nfs_page *req) >>>> +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, >>>> + struct nfs_page *prev, struct nfs_page *req) >>>> { >>>> unsigned int size; >>>> u64 seg_end, req_start, seg_left; >>>> @@ -1729,10 +1729,12 @@ static void >>>> pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, >>>> struct nfs_pgio_header *hdr) >>>> { >>>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>>> + >>>> if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { >>>> - list_splice_tail_init(&hdr->pages, &desc->pg_list); >>>> + list_splice_tail_init(&hdr->pages, &mirror->pg_list); >>>> nfs_pageio_reset_write_mds(desc); >>>> - desc->pg_recoalesce = 1; >>>> + mirror->pg_recoalesce = 1; >>>> } >>>> nfs_pgio_data_destroy(hdr); >>>> } >>>> @@ -1781,12 +1783,14 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); >>>> int >>>> pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) >>>> { >>>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>>> + >>>> struct nfs_pgio_header *hdr; >>>> int ret; >>>> >>>> hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); >>>> if (!hdr) { >>>> - desc->pg_completion_ops->error_cleanup(&desc->pg_list); >>>> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); >>>> return -ENOMEM; >>>> } >>>> nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); >>>> @@ -1795,6 +1799,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) >>>> ret = nfs_generic_pgio(desc, hdr); >>>> if (!ret) >>>> pnfs_do_write(desc, hdr, desc->pg_ioflags); >>>> + >>>> return ret; >>>> } >>>> EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); >>>> @@ -1839,10 +1844,13 @@ static void >>>> pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, >>>> struct nfs_pgio_header *hdr) >>>> { >>>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>>> + >>>> + >>>> if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { >>>> - list_splice_tail_init(&hdr->pages, &desc->pg_list); >>>> + list_splice_tail_init(&hdr->pages, &mirror->pg_list); >>>> nfs_pageio_reset_read_mds(desc); >>>> - desc->pg_recoalesce = 1; >>>> + mirror->pg_recoalesce = 1; >>>> } >>>> nfs_pgio_data_destroy(hdr); >>>> } >>>> @@ -1893,12 +1901,14 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); >>>> int >>>> pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) >>>> { >>>> + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; >>>> + >>>> struct nfs_pgio_header *hdr; >>>> int ret; >>>> >>>> hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); >>>> if (!hdr) { >>>> - desc->pg_completion_ops->error_cleanup(&desc->pg_list); >>>> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); >>>> return -ENOMEM; >>>> } >>>> nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); >>>> diff --git a/fs/nfs/read.c b/fs/nfs/read.c >>>> index 092ab49..568ecf0 100644 >>>> --- a/fs/nfs/read.c >>>> +++ b/fs/nfs/read.c >>>> @@ -70,8 +70,15 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_read); >>>> >>>> void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) >>>> { >>>> + struct nfs_pgio_mirror *mirror; >>>> + >>>> pgio->pg_ops = &nfs_pgio_rw_ops; >>>> - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; >>>> + >>>> + /* read path should never have more than one mirror */ >>>> + WARN_ON_ONCE(pgio->pg_mirror_count != 1); >>>> + >>>> + mirror = &pgio->pg_mirrors[0]; >>>> + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; >>>> } >>>> EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); >>>> >>>> @@ -81,6 +88,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, >>>> struct nfs_page *new; >>>> unsigned int len; >>>> struct nfs_pageio_descriptor pgio; >>>> + struct nfs_pgio_mirror *pgm; >>>> >>>> len = nfs_page_length(page); >>>> if (len == 0) >>>> @@ -97,7 +105,13 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, >>>> &nfs_async_read_completion_ops); >>>> nfs_pageio_add_request(&pgio, new); >>>> nfs_pageio_complete(&pgio); >>>> - NFS_I(inode)->read_io += pgio.pg_bytes_written; >>>> + >>>> + /* It doesn't make sense to do mirrored reads! */ >>>> + WARN_ON_ONCE(pgio.pg_mirror_count != 1); >>>> + >>>> + pgm = &pgio.pg_mirrors[0]; >>>> + NFS_I(inode)->read_io += pgm->pg_bytes_written; >>>> + >>>> return 0; >>>> } >>>> >>>> @@ -352,6 +366,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, >>>> struct list_head *pages, unsigned nr_pages) >>>> { >>>> struct nfs_pageio_descriptor pgio; >>>> + struct nfs_pgio_mirror *pgm; >>>> struct nfs_readdesc desc = { >>>> .pgio = &pgio, >>>> }; >>>> @@ -387,10 +402,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, >>>> &nfs_async_read_completion_ops); >>>> >>>> ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); >>>> - >>>> nfs_pageio_complete(&pgio); >>>> - NFS_I(inode)->read_io += pgio.pg_bytes_written; >>>> - npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; >>>> + >>>> + /* It doesn't make sense to do mirrored reads! */ >>>> + WARN_ON_ONCE(pgio.pg_mirror_count != 1); >>>> + >>>> + pgm = &pgio.pg_mirrors[0]; >>>> + NFS_I(inode)->read_io += pgm->pg_bytes_written; >>>> + npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >> >>>> + PAGE_CACHE_SHIFT; >>>> nfs_add_stats(inode, NFSIOS_READPAGES, npages); >>>> read_complete: >>>> put_nfs_open_context(desc.ctx); >>>> diff --git a/fs/nfs/write.c b/fs/nfs/write.c >>>> index db802d9..2f6ee8e 100644 >>>> --- a/fs/nfs/write.c >>>> +++ b/fs/nfs/write.c >>>> @@ -906,7 +906,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) >>>> if (nfs_write_need_commit(hdr)) { >>>> memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); >>>> nfs_mark_request_commit(req, hdr->lseg, &cinfo, >>>> - 0); >>>> + hdr->pgio_mirror_idx); >>>> goto next; >>>> } >>>> remove_req: >>>> @@ -1305,8 +1305,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_write); >>>> >>>> void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) >>>> { >>>> + struct nfs_pgio_mirror *mirror; >>>> + >>>> pgio->pg_ops = &nfs_pgio_rw_ops; >>>> - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; >>>> + >>>> + nfs_pageio_stop_mirroring(pgio); >>>> + >>>> + mirror = &pgio->pg_mirrors[0]; >>>> + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; >>>> } >>>> EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); >>>> >>>> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h >>>> index 479c566..3eb072d 100644 >>>> --- a/include/linux/nfs_page.h >>>> +++ b/include/linux/nfs_page.h >>>> @@ -58,6 +58,8 @@ struct nfs_pageio_ops { >>>> size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, >>>> struct nfs_page *); >>>> int (*pg_doio)(struct nfs_pageio_descriptor *); >>>> + unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *, >>>> + struct nfs_page *); >>>> void (*pg_cleanup)(struct nfs_pageio_descriptor *); >>>> }; >>>> >>>> @@ -74,15 +76,17 @@ struct nfs_rw_ops { >>>> struct rpc_task_setup *, int); >>>> }; >>>> >>>> -struct nfs_pageio_descriptor { >>>> +struct nfs_pgio_mirror { >>>> struct list_head pg_list; >>>> unsigned long pg_bytes_written; >>>> size_t pg_count; >>>> size_t pg_bsize; >>>> unsigned int pg_base; >>>> - unsigned char pg_moreio : 1, >>>> - pg_recoalesce : 1; >>>> + unsigned char pg_recoalesce : 1; >>>> +}; >>>> >>>> +struct nfs_pageio_descriptor { >>>> + unsigned char pg_moreio : 1; >>>> struct inode *pg_inode; >>>> const struct nfs_pageio_ops *pg_ops; >>>> const struct nfs_rw_ops *pg_rw_ops; >>>> @@ -93,8 +97,18 @@ struct nfs_pageio_descriptor { >>>> struct pnfs_layout_segment *pg_lseg; >>>> struct nfs_direct_req *pg_dreq; >>>> void *pg_layout_private; >>>> + unsigned int pg_bsize; /* default bsize for mirrors */ >>>> + >>>> + u32 pg_mirror_count; >>>> + struct nfs_pgio_mirror *pg_mirrors; >>>> + struct nfs_pgio_mirror pg_mirrors_static[1]; >>>> + struct nfs_pgio_mirror *pg_mirrors_dynamic; >>>> + u32 pg_mirror_idx; /* current mirror */ >>>> }; >>>> >>>> +/* arbitrarily selected limit to number of mirrors */ >>>> +#define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16 >>>> + >>>> #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) >>>> >>>> extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, >>>> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h >>>> index 5bc99f0..6400a1e 100644 >>>> --- a/include/linux/nfs_xdr.h >>>> +++ b/include/linux/nfs_xdr.h >>>> @@ -1329,6 +1329,7 @@ struct nfs_pgio_header { >>>> struct nfs_page_array page_array; >>>> struct nfs_client *ds_clp; /* pNFS data server */ >>>> int ds_commit_idx; /* ds index if ds_clp is set */ >>>> + int pgio_mirror_idx;/* mirror index in pgio layer */ >>>> }; >>>> >>>> struct nfs_mds_commit_info { >>>> >>> >>> -- >>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >>> the body of a message to majordomo@vger.kernel.org >>> More majordomo info at http://vger.kernel.org/majordomo-info.html >> > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1ee41d7..0178d4f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -360,8 +360,14 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) spin_lock(&dreq->lock); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) dreq->error = hdr->error; - else - dreq->count += hdr->good_bytes; + else { + /* + * FIXME: right now this only accounts for bytes written + * to the first mirror + */ + if (hdr->pgio_mirror_idx == 0) + dreq->count += hdr->good_bytes; + } spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { @@ -724,7 +730,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) dreq->error = hdr->error; } if (dreq->error == 0) { - dreq->count += hdr->good_bytes; + /* + * FIXME: right now this only accounts for bytes written + * to the first mirror + */ + if (hdr->pgio_mirror_idx == 0) + dreq->count += hdr->good_bytes; if (nfs_write_need_commit(hdr)) { if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) request_commit = true; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 05f9a87..ef1c703 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -469,6 +469,7 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq); int nfs_key_timeout_notify(struct file *filp, struct inode *inode); bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index d007780..9a5f2ee 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -537,11 +537,12 @@ int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { + struct nfs_pgio_mirror *mirror = &pgio->pg_mirrors[pgio->pg_mirror_idx]; unsigned int size; size = pnfs_generic_pg_test(pgio, prev, req); - if (!size || pgio->pg_count + req->wb_bytes > + if (!size || mirror->pg_count + req->wb_bytes > (unsigned long)pgio->pg_layout_private) return 0; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 1c03187..eec12b7 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -46,17 +46,22 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)) { - hdr->req = nfs_list_entry(desc->pg_list.next); + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + + hdr->req = nfs_list_entry(mirror->pg_list.next); hdr->inode = desc->pg_inode; hdr->cred = hdr->req->wb_context->cred; hdr->io_start = req_offset(hdr->req); - hdr->good_bytes = desc->pg_count; + hdr->good_bytes = mirror->pg_count; hdr->dreq = desc->pg_dreq; hdr->layout_private = desc->pg_layout_private; hdr->release = release; hdr->completion_ops = desc->pg_completion_ops; if (hdr->completion_ops->init_hdr) hdr->completion_ops->init_hdr(hdr); + + hdr->pgio_mirror_idx = desc->pg_mirror_idx; } EXPORT_SYMBOL_GPL(nfs_pgheader_init); @@ -480,7 +485,10 @@ nfs_wait_on_request(struct nfs_page *req) size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) { - if (desc->pg_count > desc->pg_bsize) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + + if (mirror->pg_count > mirror->pg_bsize) { /* should never happen */ WARN_ON_ONCE(1); return 0; @@ -490,11 +498,11 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, * Limit the request size so that we can still allocate a page array * for it without upsetting the slab allocator. */ - if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * + if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * sizeof(struct page) > PAGE_SIZE) return 0; - return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); + return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); } EXPORT_SYMBOL_GPL(nfs_generic_pg_test); @@ -651,10 +659,18 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror; + u32 midx; + set_bit(NFS_IOHDR_REDO, &hdr->flags); nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + /* TODO: Make sure it's right to clean up all mirrors here + * and not just hdr->pgio_mirror_idx */ + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + mirror = &desc->pg_mirrors[midx]; + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); + } return -ENOMEM; } @@ -671,6 +687,17 @@ static void nfs_pgio_release(void *calldata) hdr->completion_ops->completion(hdr); } +static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, + unsigned int bsize) +{ + INIT_LIST_HEAD(&mirror->pg_list); + mirror->pg_bytes_written = 0; + mirror->pg_count = 0; + mirror->pg_bsize = bsize; + mirror->pg_base = 0; + mirror->pg_recoalesce = 0; +} + /** * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor @@ -687,13 +714,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, size_t bsize, int io_flags) { - INIT_LIST_HEAD(&desc->pg_list); - desc->pg_bytes_written = 0; - desc->pg_count = 0; - desc->pg_bsize = bsize; - desc->pg_base = 0; + struct nfs_pgio_mirror *new; + int i; + desc->pg_moreio = 0; - desc->pg_recoalesce = 0; desc->pg_inode = inode; desc->pg_ops = pg_ops; desc->pg_completion_ops = compl_ops; @@ -703,6 +727,26 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_lseg = NULL; desc->pg_dreq = NULL; desc->pg_layout_private = NULL; + desc->pg_bsize = bsize; + + desc->pg_mirror_count = 1; + desc->pg_mirror_idx = 0; + + if (pg_ops->pg_get_mirror_count) { + /* until we have a request, we don't have an lseg and no + * idea how many mirrors there will be */ + new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, + sizeof(struct nfs_pgio_mirror), GFP_KERNEL); + desc->pg_mirrors_dynamic = new; + desc->pg_mirrors = new; + + for (i = 0; i < NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX; i++) + nfs_pageio_mirror_init(&desc->pg_mirrors[i], bsize); + } else { + desc->pg_mirrors_dynamic = NULL; + desc->pg_mirrors = desc->pg_mirrors_static; + nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); + } } EXPORT_SYMBOL_GPL(nfs_pageio_init); @@ -738,14 +782,16 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_page *req; struct page **pages, *last_page; - struct list_head *head = &desc->pg_list; + struct list_head *head = &mirror->pg_list; struct nfs_commit_info cinfo; unsigned int pagecount, pageused; - pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); + pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); if (!nfs_pgarray_set(&hdr->page_array, pagecount)) return nfs_pgio_error(desc, hdr); @@ -773,7 +819,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); + nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo); desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -781,12 +827,17 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror; struct nfs_pgio_header *hdr; int ret; + mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + /* TODO: make sure this is right with mirroring - or + * should it back out all mirrors? */ + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); return -ENOMEM; } nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); @@ -801,6 +852,49 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) return ret; } +/* + * nfs_pageio_setup_mirroring - determine if mirroring is to be used + * by calling the pg_get_mirror_count op + */ +static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + int mirror_count = 1; + + if (!pgio->pg_ops->pg_get_mirror_count) + return 0; + + mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); + + if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) + return -EINVAL; + + if (WARN_ON_ONCE(!pgio->pg_mirrors_dynamic)) + return -EINVAL; + + pgio->pg_mirror_count = mirror_count; + + return 0; +} + +/* + * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) + */ +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_mirror_count = 1; + pgio->pg_mirror_idx = 0; +} + +static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_mirror_count = 1; + pgio->pg_mirror_idx = 0; + pgio->pg_mirrors = pgio->pg_mirrors_static; + kfree(pgio->pg_mirrors_dynamic); + pgio->pg_mirrors_dynamic = NULL; +} + static bool nfs_match_open_context(const struct nfs_open_context *ctx1, const struct nfs_open_context *ctx2) { @@ -867,19 +961,22 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_page *prev = NULL; - if (desc->pg_count != 0) { - prev = nfs_list_entry(desc->pg_list.prev); + + if (mirror->pg_count != 0) { + prev = nfs_list_entry(mirror->pg_list.prev); } else { if (desc->pg_ops->pg_init) desc->pg_ops->pg_init(desc, req); - desc->pg_base = req->wb_pgbase; + mirror->pg_base = req->wb_pgbase; } if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; nfs_list_remove_request(req); - nfs_list_add_request(req, &desc->pg_list); - desc->pg_count += req->wb_bytes; + nfs_list_add_request(req, &mirror->pg_list); + mirror->pg_count += req->wb_bytes; return 1; } @@ -888,16 +985,19 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, */ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { - if (!list_empty(&desc->pg_list)) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + + if (!list_empty(&mirror->pg_list)) { int error = desc->pg_ops->pg_doio(desc); if (error < 0) desc->pg_error = error; else - desc->pg_bytes_written += desc->pg_count; + mirror->pg_bytes_written += mirror->pg_count; } - if (list_empty(&desc->pg_list)) { - desc->pg_count = 0; - desc->pg_base = 0; + if (list_empty(&mirror->pg_list)) { + mirror->pg_count = 0; + mirror->pg_base = 0; } } @@ -915,10 +1015,14 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_page *subreq; unsigned int bytes_left = 0; unsigned int offset, pgbase; + WARN_ON_ONCE(desc->pg_mirror_idx >= desc->pg_mirror_count); + nfs_page_group_lock(req, false); subreq = req; @@ -938,7 +1042,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, nfs_pageio_doio(desc); if (desc->pg_error < 0) return 0; - if (desc->pg_recoalesce) + if (mirror->pg_recoalesce) return 0; /* retry add_request for this subreq */ nfs_page_group_lock(req, false); @@ -976,14 +1080,16 @@ err_ptr: static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; LIST_HEAD(head); do { - list_splice_init(&desc->pg_list, &head); - desc->pg_bytes_written -= desc->pg_count; - desc->pg_count = 0; - desc->pg_base = 0; - desc->pg_recoalesce = 0; + list_splice_init(&mirror->pg_list, &head); + mirror->pg_bytes_written -= mirror->pg_count; + mirror->pg_count = 0; + mirror->pg_base = 0; + mirror->pg_recoalesce = 0; + desc->pg_moreio = 0; while (!list_empty(&head)) { @@ -997,11 +1103,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) return 0; break; } - } while (desc->pg_recoalesce); + } while (mirror->pg_recoalesce); return 1; } -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, +static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { int ret; @@ -1014,9 +1120,78 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, break; ret = nfs_do_recoalesce(desc); } while (ret); + return ret; } +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + u32 midx; + unsigned int pgbase, offset, bytes; + struct nfs_page *dupreq, *lastreq; + + pgbase = req->wb_pgbase; + offset = req->wb_offset; + bytes = req->wb_bytes; + + nfs_pageio_setup_mirroring(desc, req); + + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + if (midx) { + nfs_page_group_lock(req, false); + + /* find the last request */ + for (lastreq = req->wb_head; + lastreq->wb_this_page != req->wb_head; + lastreq = lastreq->wb_this_page) + ; + + dupreq = nfs_create_request(req->wb_context, + req->wb_page, lastreq, pgbase, bytes); + + if (IS_ERR(dupreq)) { + nfs_page_group_unlock(req); + return 0; + } + + nfs_lock_request(dupreq); + nfs_page_group_unlock(req); + dupreq->wb_offset = offset; + dupreq->wb_index = req->wb_index; + } else + dupreq = req; + + desc->pg_mirror_idx = midx; + if (!nfs_pageio_add_request_mirror(desc, dupreq)) + return 0; + } + + return 1; +} + +/* + * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an + * nfs_pageio_descriptor + * @desc: pointer to io descriptor + */ +static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, + u32 mirror_idx) +{ + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; + u32 restore_idx = desc->pg_mirror_idx; + + desc->pg_mirror_idx = mirror_idx; + for (;;) { + nfs_pageio_doio(desc); + if (!mirror->pg_recoalesce) + break; + if (!nfs_do_recoalesce(desc)) + break; + } + desc->pg_mirror_idx = restore_idx; +} + /* * nfs_pageio_resend - Transfer requests to new descriptor and resend * @hdr - the pgio header to move request from @@ -1055,16 +1230,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_resend); */ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) { - for (;;) { - nfs_pageio_doio(desc); - if (!desc->pg_recoalesce) - break; - if (!nfs_do_recoalesce(desc)) - break; - } + u32 midx; + + for (midx = 0; midx < desc->pg_mirror_count; midx++) + nfs_pageio_complete_mirror(desc, midx); if (desc->pg_ops->pg_cleanup) desc->pg_ops->pg_cleanup(desc); + nfs_pageio_cleanup_mirroring(desc); } /** @@ -1080,10 +1253,17 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) */ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) { - if (!list_empty(&desc->pg_list)) { - struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); - if (index != prev->wb_index + 1) - nfs_pageio_complete(desc); + struct nfs_pgio_mirror *mirror; + struct nfs_page *prev; + u32 midx; + + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + mirror = &desc->pg_mirrors[midx]; + if (!list_empty(&mirror->pg_list)) { + prev = nfs_list_entry(mirror->pg_list.prev); + if (index != prev->wb_index + 1) + nfs_pageio_complete_mirror(desc, midx); + } } } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2da2e77..5f7c422 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1646,8 +1646,8 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); * of bytes (maximum @req->wb_bytes) that can be coalesced. */ size_t -pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, - struct nfs_page *req) +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, + struct nfs_page *prev, struct nfs_page *req) { unsigned int size; u64 seg_end, req_start, seg_left; @@ -1729,10 +1729,12 @@ static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { - list_splice_tail_init(&hdr->pages, &desc->pg_list); + list_splice_tail_init(&hdr->pages, &mirror->pg_list); nfs_pageio_reset_write_mds(desc); - desc->pg_recoalesce = 1; + mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); } @@ -1781,12 +1783,14 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_header *hdr; int ret; hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); return -ENOMEM; } nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); @@ -1795,6 +1799,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) ret = nfs_generic_pgio(desc, hdr); if (!ret) pnfs_do_write(desc, hdr, desc->pg_ioflags); + return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); @@ -1839,10 +1844,13 @@ static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { - list_splice_tail_init(&hdr->pages, &desc->pg_list); + list_splice_tail_init(&hdr->pages, &mirror->pg_list); nfs_pageio_reset_read_mds(desc); - desc->pg_recoalesce = 1; + mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); } @@ -1893,12 +1901,14 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_header *hdr; int ret; hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); return -ENOMEM; } nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 092ab49..568ecf0 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -70,8 +70,15 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_read); void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { + struct nfs_pgio_mirror *mirror; + pgio->pg_ops = &nfs_pgio_rw_ops; - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; + + /* read path should never have more than one mirror */ + WARN_ON_ONCE(pgio->pg_mirror_count != 1); + + mirror = &pgio->pg_mirrors[0]; + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); @@ -81,6 +88,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct nfs_page *new; unsigned int len; struct nfs_pageio_descriptor pgio; + struct nfs_pgio_mirror *pgm; len = nfs_page_length(page); if (len == 0) @@ -97,7 +105,13 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, &nfs_async_read_completion_ops); nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); - NFS_I(inode)->read_io += pgio.pg_bytes_written; + + /* It doesn't make sense to do mirrored reads! */ + WARN_ON_ONCE(pgio.pg_mirror_count != 1); + + pgm = &pgio.pg_mirrors[0]; + NFS_I(inode)->read_io += pgm->pg_bytes_written; + return 0; } @@ -352,6 +366,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { struct nfs_pageio_descriptor pgio; + struct nfs_pgio_mirror *pgm; struct nfs_readdesc desc = { .pgio = &pgio, }; @@ -387,10 +402,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, &nfs_async_read_completion_ops); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); - nfs_pageio_complete(&pgio); - NFS_I(inode)->read_io += pgio.pg_bytes_written; - npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + /* It doesn't make sense to do mirrored reads! */ + WARN_ON_ONCE(pgio.pg_mirror_count != 1); + + pgm = &pgio.pg_mirrors[0]; + NFS_I(inode)->read_io += pgm->pg_bytes_written; + npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; nfs_add_stats(inode, NFSIOS_READPAGES, npages); read_complete: put_nfs_open_context(desc.ctx); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index db802d9..2f6ee8e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -906,7 +906,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) if (nfs_write_need_commit(hdr)) { memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo, - 0); + hdr->pgio_mirror_idx); goto next; } remove_req: @@ -1305,8 +1305,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_write); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { + struct nfs_pgio_mirror *mirror; + pgio->pg_ops = &nfs_pgio_rw_ops; - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; + + nfs_pageio_stop_mirroring(pgio); + + mirror = &pgio->pg_mirrors[0]; + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; } EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 479c566..3eb072d 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -58,6 +58,8 @@ struct nfs_pageio_ops { size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); int (*pg_doio)(struct nfs_pageio_descriptor *); + unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *, + struct nfs_page *); void (*pg_cleanup)(struct nfs_pageio_descriptor *); }; @@ -74,15 +76,17 @@ struct nfs_rw_ops { struct rpc_task_setup *, int); }; -struct nfs_pageio_descriptor { +struct nfs_pgio_mirror { struct list_head pg_list; unsigned long pg_bytes_written; size_t pg_count; size_t pg_bsize; unsigned int pg_base; - unsigned char pg_moreio : 1, - pg_recoalesce : 1; + unsigned char pg_recoalesce : 1; +}; +struct nfs_pageio_descriptor { + unsigned char pg_moreio : 1; struct inode *pg_inode; const struct nfs_pageio_ops *pg_ops; const struct nfs_rw_ops *pg_rw_ops; @@ -93,8 +97,18 @@ struct nfs_pageio_descriptor { struct pnfs_layout_segment *pg_lseg; struct nfs_direct_req *pg_dreq; void *pg_layout_private; + unsigned int pg_bsize; /* default bsize for mirrors */ + + u32 pg_mirror_count; + struct nfs_pgio_mirror *pg_mirrors; + struct nfs_pgio_mirror pg_mirrors_static[1]; + struct nfs_pgio_mirror *pg_mirrors_dynamic; + u32 pg_mirror_idx; /* current mirror */ }; +/* arbitrarily selected limit to number of mirrors */ +#define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16 + #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5bc99f0..6400a1e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1329,6 +1329,7 @@ struct nfs_pgio_header { struct nfs_page_array page_array; struct nfs_client *ds_clp; /* pNFS data server */ int ds_commit_idx; /* ds index if ds_clp is set */ + int pgio_mirror_idx;/* mirror index in pgio layer */ }; struct nfs_mds_commit_info {