Message ID | 1374267830-30154-5-git-send-email-bjschuma@netapp.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: > From: Bryan Schumaker <bjschuma@netapp.com> > > Rather than performing the copy right away, schedule it to run later and > reply to the client. Later, send a callback to notify the client that > the copy has finished. I believe you need to implement the referring triple support described in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race described in http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 . I see cb_delay initialized below, but not otherwise used. Am I missing anything? What about OFFLOAD_STATUS and OFFLOAD_ABORT? In some common cases the reply will be very quick, and we might be better off handling it synchronously. Could we implement a heuristic like "copy synchronously if the filesystem has special support or the range is less than the maximum iosize, otherwise copy asynchronously"? --b. > Signed-off-by: Bryan Schumaker <bjschuma@netapp.com> > --- > fs/nfsd/nfs4callback.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/nfs4proc.c | 59 +++++++++++++++------ > fs/nfsd/nfs4state.c | 11 ++++ > fs/nfsd/state.h | 21 ++++++++ > fs/nfsd/xdr4cb.h | 9 ++++ > 5 files changed, 221 insertions(+), 15 deletions(-) > > diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c > index 7f05cd1..8f797e1 100644 > --- a/fs/nfsd/nfs4callback.c > +++ b/fs/nfsd/nfs4callback.c > @@ -52,6 +52,9 @@ enum { > NFSPROC4_CLNT_CB_NULL = 0, > NFSPROC4_CLNT_CB_RECALL, > NFSPROC4_CLNT_CB_SEQUENCE, > + > + /* NFS v4.2 callback */ > + NFSPROC4_CLNT_CB_OFFLOAD, > }; > > struct nfs4_cb_compound_hdr { > @@ -110,6 +113,7 @@ enum nfs_cb_opnum4 { > OP_CB_WANTS_CANCELLED = 12, > OP_CB_NOTIFY_LOCK = 13, > OP_CB_NOTIFY_DEVICEID = 14, > + OP_CB_OFFLOAD = 15, > OP_CB_ILLEGAL = 10044 > }; > > @@ -469,6 +473,31 @@ out_default: > return nfs_cb_stat_to_errno(nfserr); > } > > +static void encode_cb_offload4args(struct xdr_stream *xdr, > + const struct nfs4_cb_offload *offload, > + struct nfs4_cb_compound_hdr *hdr) > +{ > + __be32 *p; > + > + if (hdr->minorversion < 2) > + return; > + > + encode_nfs_cb_opnum4(xdr, OP_CB_OFFLOAD); > + encode_nfs_fh4(xdr, &offload->co_dst_fh); > + encode_stateid4(xdr, &offload->co_stid->sc_stateid); > + > + p = xdr_reserve_space(xdr, 4); > + *p = cpu_to_be32(1); > + encode_stateid4(xdr, &offload->co_stid->sc_stateid); > + > + p = xdr_reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); > + p = xdr_encode_hyper(p, offload->co_count); > + *p++ = cpu_to_be32(offload->co_stable_how); > + xdr_encode_opaque_fixed(p, offload->co_verifier.data, NFS4_VERIFIER_SIZE); > + > + hdr->nops++; > +} > + > /* > * NFSv4.0 and NFSv4.1 XDR encode functions > * > @@ -505,6 +534,23 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, > encode_cb_nops(&hdr); > } > > +/* > + * CB_OFFLOAD > + */ > +static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, struct xdr_stream *xdr, > + const struct nfsd4_callback *cb) > +{ > + const struct nfs4_cb_offload *args = cb->cb_op; > + struct nfs4_cb_compound_hdr hdr = { > + .ident = cb->cb_clp->cl_cb_ident, > + .minorversion = cb->cb_minorversion, > + }; > + > + encode_cb_compound4args(xdr, &hdr); > + encode_cb_sequence4args(xdr, cb, &hdr); > + encode_cb_offload4args(xdr, args, &hdr); > + encode_cb_nops(&hdr); > +} > > /* > * NFSv4.0 and NFSv4.1 XDR decode functions > @@ -552,6 +598,36 @@ out: > } > > /* > + * CB_OFFLOAD > + */ > +static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp, struct xdr_stream *xdr, > + struct nfsd4_callback *cb) > +{ > + struct nfs4_cb_compound_hdr hdr; > + enum nfsstat4 nfserr; > + int status; > + > + status = decode_cb_compound4res(xdr, &hdr); > + if (unlikely(status)) > + goto out; > + > + if (cb != NULL) { > + status = decode_cb_sequence4res(xdr, cb); > + if (unlikely(status)) > + goto out; > + } > + > + status = decode_cb_op_status(xdr, OP_CB_OFFLOAD, &nfserr); > + if (unlikely(status)) > + goto out; > + if (unlikely(nfserr != NFS4_OK)) > + status = nfs_cb_stat_to_errno(nfserr); > + > +out: > + return status; > +} > + > +/* > * RPC procedure tables > */ > #define PROC(proc, call, argtype, restype) \ > @@ -568,6 +644,7 @@ out: > static struct rpc_procinfo nfs4_cb_procedures[] = { > PROC(CB_NULL, NULL, cb_null, cb_null), > PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall), > + PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), > }; > > static struct rpc_version nfs_cb_version4 = { > @@ -1017,6 +1094,11 @@ void nfsd4_init_callback(struct nfsd4_callback *cb) > INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); > } > > +void nfsd4_init_delayed_callback(struct nfsd4_callback *cb) > +{ > + INIT_DELAYED_WORK(&cb->cb_delay, nfsd4_do_callback_rpc); > +} > + > void nfsd4_cb_recall(struct nfs4_delegation *dp) > { > struct nfsd4_callback *cb = &dp->dl_recall; > @@ -1036,3 +1118,57 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) > > run_nfsd4_cb(&dp->dl_recall); > } > + > +static void nfsd4_cb_offload_done(struct rpc_task *task, void *calldata) > +{ > + struct nfsd4_callback *cb = calldata; > + struct nfs4_client *clp = cb->cb_clp; > + struct rpc_clnt *current_rpc_client = clp->cl_cb_client; > + > + nfsd4_cb_done(task, calldata); > + > + if (current_rpc_client != task->tk_client) > + return; > + > + if (cb->cb_done) > + return; > + > + if (task->tk_status != 0) > + nfsd4_mark_cb_down(clp, task->tk_status); > + cb->cb_done = true; > +} > + > +static void nfsd4_cb_offload_release(void *calldata) > +{ > + struct nfsd4_callback *cb = calldata; > + struct nfs4_cb_offload *offload = container_of(cb, struct nfs4_cb_offload, co_callback); > + > + if (cb->cb_done) { > + nfs4_free_offload_stateid(offload->co_stid); > + kfree(offload); > + } > +} > + > +static const struct rpc_call_ops nfsd4_cb_offload_ops = { > + .rpc_call_prepare = nfsd4_cb_prepare, > + .rpc_call_done = nfsd4_cb_offload_done, > + .rpc_release = nfsd4_cb_offload_release, > +}; > + > +void nfsd4_cb_offload(struct nfs4_cb_offload *offload) > +{ > + struct nfsd4_callback *cb = &offload->co_callback; > + > + cb->cb_op = offload; > + cb->cb_clp = offload->co_stid->sc_client; > + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_OFFLOAD]; > + cb->cb_msg.rpc_argp = cb; > + cb->cb_msg.rpc_resp = cb; > + > + cb->cb_ops = &nfsd4_cb_offload_ops; > + > + INIT_LIST_HEAD(&cb->cb_per_client); > + cb->cb_done = true; > + > + run_nfsd4_cb(cb); > +} > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index d4584ea..66a787f 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -35,6 +35,7 @@ > #include <linux/file.h> > #include <linux/slab.h> > > +#include "state.h" > #include "idmap.h" > #include "cache.h" > #include "xdr4.h" > @@ -1062,29 +1063,57 @@ out: > return status; > } > > +static void > +nfsd4_copy_async(struct work_struct *w) > +{ > + __be32 status; > + struct nfs4_cb_offload *offload; > + > + offload = container_of(w, struct nfs4_cb_offload, co_work); > + status = nfsd_copy_range(offload->co_src_file, offload->co_src_pos, > + offload->co_dst_file, offload->co_dst_pos, > + offload->co_count); > + > + if (status == nfs_ok) { > + offload->co_stable_how = NFS_FILE_SYNC; > + gen_boot_verifier(&offload->co_verifier, offload->co_net); > + fput(offload->co_src_file); > + fput(offload->co_dst_file); > + } > + nfsd4_cb_offload(offload); > +} > + > static __be32 > nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > struct nfsd4_copy *copy) > { > - __be32 status; > struct file *src = NULL, *dst = NULL; > + struct nfs4_cb_offload *offload; > > - status = nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst); > - if (status) > - return status; > - > - status = nfsd_copy_range(src, copy->cp_src_pos, > - dst, copy->cp_dst_pos, > - copy->cp_count); > + if (nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst)) > + return nfserr_jukebox; > > - if (status == nfs_ok) { > - copy->cp_res.wr_stateid = NULL; > - copy->cp_res.wr_bytes_written = copy->cp_count; > - copy->cp_res.wr_stable_how = NFS_FILE_SYNC; > - gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp)); > - } > + offload = kmalloc(sizeof(struct nfs4_cb_offload), GFP_KERNEL); > + if (!offload) > + return nfserr_jukebox; > > - return status; > + offload->co_src_file = get_file(src); > + offload->co_dst_file = get_file(dst); > + offload->co_src_pos = copy->cp_src_pos; > + offload->co_dst_pos = copy->cp_dst_pos; > + offload->co_count = copy->cp_count; > + offload->co_stid = nfs4_alloc_offload_stateid(cstate->session->se_client); > + offload->co_net = SVC_NET(rqstp); > + INIT_WORK(&offload->co_work, nfsd4_copy_async); > + nfsd4_init_callback(&offload->co_callback); > + memcpy(&offload->co_dst_fh, &cstate->current_fh, sizeof(struct knfsd_fh)); > + > + copy->cp_res.wr_stateid = &offload->co_stid->sc_stateid; > + copy->cp_res.wr_bytes_written = 0; > + copy->cp_res.wr_stable_how = NFS_UNSTABLE; > + > + schedule_work(&offload->co_work); > + return nfs_ok; > } > > /* This routine never returns NFS_OK! If there are no other errors, it > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index c4e270e..582edb5 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -364,6 +364,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) > return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); > } > > +struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *clp) > +{ > + return nfs4_alloc_stid(clp, stateid_slab); > +} > + > static struct nfs4_delegation * > alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) > { > @@ -617,6 +622,12 @@ static void free_generic_stateid(struct nfs4_ol_stateid *stp) > kmem_cache_free(stateid_slab, stp); > } > > +void nfs4_free_offload_stateid(struct nfs4_stid *stid) > +{ > + remove_stid(stid); > + kmem_cache_free(stateid_slab, stid); > +} > + > static void release_lock_stateid(struct nfs4_ol_stateid *stp) > { > struct file *file; > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index 2478805..56682fb 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -70,6 +70,7 @@ struct nfsd4_callback { > struct rpc_message cb_msg; > const struct rpc_call_ops *cb_ops; > struct work_struct cb_work; > + struct delayed_work cb_delay; > bool cb_done; > }; > > @@ -101,6 +102,22 @@ struct nfs4_delegation { > struct nfsd4_callback dl_recall; > }; > > +struct nfs4_cb_offload { > + struct file *co_src_file; > + struct file *co_dst_file; > + u64 co_src_pos; > + u64 co_dst_pos; > + u64 co_count; > + u32 co_stable_how; > + struct knfsd_fh co_dst_fh; > + nfs4_verifier co_verifier; > + struct net *co_net; > + > + struct nfs4_stid *co_stid; > + struct work_struct co_work; > + struct nfsd4_callback co_callback; > +}; > + > /* client delegation callback info */ > struct nfs4_cb_conn { > /* SETCLIENTID info */ > @@ -468,10 +485,12 @@ extern void nfs4_free_openowner(struct nfs4_openowner *); > extern void nfs4_free_lockowner(struct nfs4_lockowner *); > extern int set_callback_cred(void); > extern void nfsd4_init_callback(struct nfsd4_callback *); > +extern void nfsd4_init_delayed_callback(struct nfsd4_callback *); > extern void nfsd4_probe_callback(struct nfs4_client *clp); > extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); > extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); > extern void nfsd4_cb_recall(struct nfs4_delegation *dp); > +extern void nfsd4_cb_offload(struct nfs4_cb_offload *); > extern int nfsd4_create_callback_queue(void); > extern void nfsd4_destroy_callback_queue(void); > extern void nfsd4_shutdown_callback(struct nfs4_client *); > @@ -480,6 +499,8 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, > struct nfsd_net *nn); > extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); > extern void put_client_renew(struct nfs4_client *clp); > +extern struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *); > +extern void nfs4_free_offload_stateid(struct nfs4_stid *); > > /* nfs4recover operations */ > extern int nfsd4_client_tracking_init(struct net *net); > diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h > index c5c55df..75b0ef7 100644 > --- a/fs/nfsd/xdr4cb.h > +++ b/fs/nfsd/xdr4cb.h > @@ -21,3 +21,12 @@ > #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ > cb_sequence_dec_sz + \ > op_dec_sz) > + > +#define NFS4_enc_cb_offload_sz (cb_compound_enc_hdr_sz + \ > + cb_sequence_enc_sz + \ > + 1 + enc_stateid_sz + 2 + 1 + \ > + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) > + > +#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \ > + cb_sequence_dec_sz + \ > + op_dec_sz) > -- > 1.8.3.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 07/22/2013 02:50 PM, J. Bruce Fields wrote: > On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: >> From: Bryan Schumaker <bjschuma@netapp.com> >> >> Rather than performing the copy right away, schedule it to run later and >> reply to the client. Later, send a callback to notify the client that >> the copy has finished. > > I believe you need to implement the referring triple support described > in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race > described in > http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 > . I'll re-read and re-write. > > I see cb_delay initialized below, but not otherwise used. Am I missing > anything? Whoops! I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously. I must have forgotten to take it out :( > > What about OFFLOAD_STATUS and OFFLOAD_ABORT? I haven't thought out those too much... I haven't thought about a use for them on the client yet. > > In some common cases the reply will be very quick, and we might be > better off handling it synchronously. Could we implement a heuristic > like "copy synchronously if the filesystem has special support or the > range is less than the maximum iosize, otherwise copy asynchronously"? I'm sure that can be done, I'm just not sure how to do it yet... > > --b. > > >> Signed-off-by: Bryan Schumaker <bjschuma@netapp.com> >> --- >> fs/nfsd/nfs4callback.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++ >> fs/nfsd/nfs4proc.c | 59 +++++++++++++++------ >> fs/nfsd/nfs4state.c | 11 ++++ >> fs/nfsd/state.h | 21 ++++++++ >> fs/nfsd/xdr4cb.h | 9 ++++ >> 5 files changed, 221 insertions(+), 15 deletions(-) >> >> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c >> index 7f05cd1..8f797e1 100644 >> --- a/fs/nfsd/nfs4callback.c >> +++ b/fs/nfsd/nfs4callback.c >> @@ -52,6 +52,9 @@ enum { >> NFSPROC4_CLNT_CB_NULL = 0, >> NFSPROC4_CLNT_CB_RECALL, >> NFSPROC4_CLNT_CB_SEQUENCE, >> + >> + /* NFS v4.2 callback */ >> + NFSPROC4_CLNT_CB_OFFLOAD, >> }; >> >> struct nfs4_cb_compound_hdr { >> @@ -110,6 +113,7 @@ enum nfs_cb_opnum4 { >> OP_CB_WANTS_CANCELLED = 12, >> OP_CB_NOTIFY_LOCK = 13, >> OP_CB_NOTIFY_DEVICEID = 14, >> + OP_CB_OFFLOAD = 15, >> OP_CB_ILLEGAL = 10044 >> }; >> >> @@ -469,6 +473,31 @@ out_default: >> return nfs_cb_stat_to_errno(nfserr); >> } >> >> +static void encode_cb_offload4args(struct xdr_stream *xdr, >> + const struct nfs4_cb_offload *offload, >> + struct nfs4_cb_compound_hdr *hdr) >> +{ >> + __be32 *p; >> + >> + if (hdr->minorversion < 2) >> + return; >> + >> + encode_nfs_cb_opnum4(xdr, OP_CB_OFFLOAD); >> + encode_nfs_fh4(xdr, &offload->co_dst_fh); >> + encode_stateid4(xdr, &offload->co_stid->sc_stateid); >> + >> + p = xdr_reserve_space(xdr, 4); >> + *p = cpu_to_be32(1); >> + encode_stateid4(xdr, &offload->co_stid->sc_stateid); >> + >> + p = xdr_reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); >> + p = xdr_encode_hyper(p, offload->co_count); >> + *p++ = cpu_to_be32(offload->co_stable_how); >> + xdr_encode_opaque_fixed(p, offload->co_verifier.data, NFS4_VERIFIER_SIZE); >> + >> + hdr->nops++; >> +} >> + >> /* >> * NFSv4.0 and NFSv4.1 XDR encode functions >> * >> @@ -505,6 +534,23 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, >> encode_cb_nops(&hdr); >> } >> >> +/* >> + * CB_OFFLOAD >> + */ >> +static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, struct xdr_stream *xdr, >> + const struct nfsd4_callback *cb) >> +{ >> + const struct nfs4_cb_offload *args = cb->cb_op; >> + struct nfs4_cb_compound_hdr hdr = { >> + .ident = cb->cb_clp->cl_cb_ident, >> + .minorversion = cb->cb_minorversion, >> + }; >> + >> + encode_cb_compound4args(xdr, &hdr); >> + encode_cb_sequence4args(xdr, cb, &hdr); >> + encode_cb_offload4args(xdr, args, &hdr); >> + encode_cb_nops(&hdr); >> +} >> >> /* >> * NFSv4.0 and NFSv4.1 XDR decode functions >> @@ -552,6 +598,36 @@ out: >> } >> >> /* >> + * CB_OFFLOAD >> + */ >> +static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp, struct xdr_stream *xdr, >> + struct nfsd4_callback *cb) >> +{ >> + struct nfs4_cb_compound_hdr hdr; >> + enum nfsstat4 nfserr; >> + int status; >> + >> + status = decode_cb_compound4res(xdr, &hdr); >> + if (unlikely(status)) >> + goto out; >> + >> + if (cb != NULL) { >> + status = decode_cb_sequence4res(xdr, cb); >> + if (unlikely(status)) >> + goto out; >> + } >> + >> + status = decode_cb_op_status(xdr, OP_CB_OFFLOAD, &nfserr); >> + if (unlikely(status)) >> + goto out; >> + if (unlikely(nfserr != NFS4_OK)) >> + status = nfs_cb_stat_to_errno(nfserr); >> + >> +out: >> + return status; >> +} >> + >> +/* >> * RPC procedure tables >> */ >> #define PROC(proc, call, argtype, restype) \ >> @@ -568,6 +644,7 @@ out: >> static struct rpc_procinfo nfs4_cb_procedures[] = { >> PROC(CB_NULL, NULL, cb_null, cb_null), >> PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall), >> + PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), >> }; >> >> static struct rpc_version nfs_cb_version4 = { >> @@ -1017,6 +1094,11 @@ void nfsd4_init_callback(struct nfsd4_callback *cb) >> INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); >> } >> >> +void nfsd4_init_delayed_callback(struct nfsd4_callback *cb) >> +{ >> + INIT_DELAYED_WORK(&cb->cb_delay, nfsd4_do_callback_rpc); >> +} >> + >> void nfsd4_cb_recall(struct nfs4_delegation *dp) >> { >> struct nfsd4_callback *cb = &dp->dl_recall; >> @@ -1036,3 +1118,57 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) >> >> run_nfsd4_cb(&dp->dl_recall); >> } >> + >> +static void nfsd4_cb_offload_done(struct rpc_task *task, void *calldata) >> +{ >> + struct nfsd4_callback *cb = calldata; >> + struct nfs4_client *clp = cb->cb_clp; >> + struct rpc_clnt *current_rpc_client = clp->cl_cb_client; >> + >> + nfsd4_cb_done(task, calldata); >> + >> + if (current_rpc_client != task->tk_client) >> + return; >> + >> + if (cb->cb_done) >> + return; >> + >> + if (task->tk_status != 0) >> + nfsd4_mark_cb_down(clp, task->tk_status); >> + cb->cb_done = true; >> +} >> + >> +static void nfsd4_cb_offload_release(void *calldata) >> +{ >> + struct nfsd4_callback *cb = calldata; >> + struct nfs4_cb_offload *offload = container_of(cb, struct nfs4_cb_offload, co_callback); >> + >> + if (cb->cb_done) { >> + nfs4_free_offload_stateid(offload->co_stid); >> + kfree(offload); >> + } >> +} >> + >> +static const struct rpc_call_ops nfsd4_cb_offload_ops = { >> + .rpc_call_prepare = nfsd4_cb_prepare, >> + .rpc_call_done = nfsd4_cb_offload_done, >> + .rpc_release = nfsd4_cb_offload_release, >> +}; >> + >> +void nfsd4_cb_offload(struct nfs4_cb_offload *offload) >> +{ >> + struct nfsd4_callback *cb = &offload->co_callback; >> + >> + cb->cb_op = offload; >> + cb->cb_clp = offload->co_stid->sc_client; >> + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_OFFLOAD]; >> + cb->cb_msg.rpc_argp = cb; >> + cb->cb_msg.rpc_resp = cb; >> + >> + cb->cb_ops = &nfsd4_cb_offload_ops; >> + >> + INIT_LIST_HEAD(&cb->cb_per_client); >> + cb->cb_done = true; >> + >> + run_nfsd4_cb(cb); >> +} >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >> index d4584ea..66a787f 100644 >> --- a/fs/nfsd/nfs4proc.c >> +++ b/fs/nfsd/nfs4proc.c >> @@ -35,6 +35,7 @@ >> #include <linux/file.h> >> #include <linux/slab.h> >> >> +#include "state.h" >> #include "idmap.h" >> #include "cache.h" >> #include "xdr4.h" >> @@ -1062,29 +1063,57 @@ out: >> return status; >> } >> >> +static void >> +nfsd4_copy_async(struct work_struct *w) >> +{ >> + __be32 status; >> + struct nfs4_cb_offload *offload; >> + >> + offload = container_of(w, struct nfs4_cb_offload, co_work); >> + status = nfsd_copy_range(offload->co_src_file, offload->co_src_pos, >> + offload->co_dst_file, offload->co_dst_pos, >> + offload->co_count); >> + >> + if (status == nfs_ok) { >> + offload->co_stable_how = NFS_FILE_SYNC; >> + gen_boot_verifier(&offload->co_verifier, offload->co_net); >> + fput(offload->co_src_file); >> + fput(offload->co_dst_file); >> + } >> + nfsd4_cb_offload(offload); >> +} >> + >> static __be32 >> nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> struct nfsd4_copy *copy) >> { >> - __be32 status; >> struct file *src = NULL, *dst = NULL; >> + struct nfs4_cb_offload *offload; >> >> - status = nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst); >> - if (status) >> - return status; >> - >> - status = nfsd_copy_range(src, copy->cp_src_pos, >> - dst, copy->cp_dst_pos, >> - copy->cp_count); >> + if (nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst)) >> + return nfserr_jukebox; >> >> - if (status == nfs_ok) { >> - copy->cp_res.wr_stateid = NULL; >> - copy->cp_res.wr_bytes_written = copy->cp_count; >> - copy->cp_res.wr_stable_how = NFS_FILE_SYNC; >> - gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp)); >> - } >> + offload = kmalloc(sizeof(struct nfs4_cb_offload), GFP_KERNEL); >> + if (!offload) >> + return nfserr_jukebox; >> >> - return status; >> + offload->co_src_file = get_file(src); >> + offload->co_dst_file = get_file(dst); >> + offload->co_src_pos = copy->cp_src_pos; >> + offload->co_dst_pos = copy->cp_dst_pos; >> + offload->co_count = copy->cp_count; >> + offload->co_stid = nfs4_alloc_offload_stateid(cstate->session->se_client); >> + offload->co_net = SVC_NET(rqstp); >> + INIT_WORK(&offload->co_work, nfsd4_copy_async); >> + nfsd4_init_callback(&offload->co_callback); >> + memcpy(&offload->co_dst_fh, &cstate->current_fh, sizeof(struct knfsd_fh)); >> + >> + copy->cp_res.wr_stateid = &offload->co_stid->sc_stateid; >> + copy->cp_res.wr_bytes_written = 0; >> + copy->cp_res.wr_stable_how = NFS_UNSTABLE; >> + >> + schedule_work(&offload->co_work); >> + return nfs_ok; >> } >> >> /* This routine never returns NFS_OK! If there are no other errors, it >> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >> index c4e270e..582edb5 100644 >> --- a/fs/nfsd/nfs4state.c >> +++ b/fs/nfsd/nfs4state.c >> @@ -364,6 +364,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) >> return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); >> } >> >> +struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *clp) >> +{ >> + return nfs4_alloc_stid(clp, stateid_slab); >> +} >> + >> static struct nfs4_delegation * >> alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) >> { >> @@ -617,6 +622,12 @@ static void free_generic_stateid(struct nfs4_ol_stateid *stp) >> kmem_cache_free(stateid_slab, stp); >> } >> >> +void nfs4_free_offload_stateid(struct nfs4_stid *stid) >> +{ >> + remove_stid(stid); >> + kmem_cache_free(stateid_slab, stid); >> +} >> + >> static void release_lock_stateid(struct nfs4_ol_stateid *stp) >> { >> struct file *file; >> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h >> index 2478805..56682fb 100644 >> --- a/fs/nfsd/state.h >> +++ b/fs/nfsd/state.h >> @@ -70,6 +70,7 @@ struct nfsd4_callback { >> struct rpc_message cb_msg; >> const struct rpc_call_ops *cb_ops; >> struct work_struct cb_work; >> + struct delayed_work cb_delay; >> bool cb_done; >> }; >> >> @@ -101,6 +102,22 @@ struct nfs4_delegation { >> struct nfsd4_callback dl_recall; >> }; >> >> +struct nfs4_cb_offload { >> + struct file *co_src_file; >> + struct file *co_dst_file; >> + u64 co_src_pos; >> + u64 co_dst_pos; >> + u64 co_count; >> + u32 co_stable_how; >> + struct knfsd_fh co_dst_fh; >> + nfs4_verifier co_verifier; >> + struct net *co_net; >> + >> + struct nfs4_stid *co_stid; >> + struct work_struct co_work; >> + struct nfsd4_callback co_callback; >> +}; >> + >> /* client delegation callback info */ >> struct nfs4_cb_conn { >> /* SETCLIENTID info */ >> @@ -468,10 +485,12 @@ extern void nfs4_free_openowner(struct nfs4_openowner *); >> extern void nfs4_free_lockowner(struct nfs4_lockowner *); >> extern int set_callback_cred(void); >> extern void nfsd4_init_callback(struct nfsd4_callback *); >> +extern void nfsd4_init_delayed_callback(struct nfsd4_callback *); >> extern void nfsd4_probe_callback(struct nfs4_client *clp); >> extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); >> extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); >> extern void nfsd4_cb_recall(struct nfs4_delegation *dp); >> +extern void nfsd4_cb_offload(struct nfs4_cb_offload *); >> extern int nfsd4_create_callback_queue(void); >> extern void nfsd4_destroy_callback_queue(void); >> extern void nfsd4_shutdown_callback(struct nfs4_client *); >> @@ -480,6 +499,8 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, >> struct nfsd_net *nn); >> extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); >> extern void put_client_renew(struct nfs4_client *clp); >> +extern struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *); >> +extern void nfs4_free_offload_stateid(struct nfs4_stid *); >> >> /* nfs4recover operations */ >> extern int nfsd4_client_tracking_init(struct net *net); >> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h >> index c5c55df..75b0ef7 100644 >> --- a/fs/nfsd/xdr4cb.h >> +++ b/fs/nfsd/xdr4cb.h >> @@ -21,3 +21,12 @@ >> #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ >> cb_sequence_dec_sz + \ >> op_dec_sz) >> + >> +#define NFS4_enc_cb_offload_sz (cb_compound_enc_hdr_sz + \ >> + cb_sequence_enc_sz + \ >> + 1 + enc_stateid_sz + 2 + 1 + \ >> + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) >> + >> +#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \ >> + cb_sequence_dec_sz + \ >> + op_dec_sz) >> -- >> 1.8.3.3 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jul 22, 2013 at 03:17:29PM -0400, Bryan Schumaker wrote: > On 07/22/2013 02:50 PM, J. Bruce Fields wrote: > > On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: > >> From: Bryan Schumaker <bjschuma@netapp.com> > >> > >> Rather than performing the copy right away, schedule it to run later and > >> reply to the client. Later, send a callback to notify the client that > >> the copy has finished. > > > > I believe you need to implement the referring triple support described > > in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race > > described in > > http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 > > . > > I'll re-read and re-write. > > > > > I see cb_delay initialized below, but not otherwise used. Am I missing > > anything? > > Whoops! I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously. I must have forgotten to take it out :( > > > > > What about OFFLOAD_STATUS and OFFLOAD_ABORT? > > I haven't thought out those too much... I haven't thought about a use for them on the client yet. If it might be a long-running copy, I assume the client needs the ability to abort if the caller is killed. (Dumb question: what happens on the network partition? Does the server abort the copy when it expires the client state?) In any case, http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 says "If a server's COPY operation returns a stateid, then the server MUST also support these operations: CB_OFFLOAD, OFFLOAD_ABORT, and OFFLOAD_STATUS." So even if we've no use for them on the client then we still need to implement them (and probably just write a basic pynfs test). Either that or update the spec. > > In some common cases the reply will be very quick, and we might be > > better off handling it synchronously. Could we implement a heuristic > > like "copy synchronously if the filesystem has special support or the > > range is less than the maximum iosize, otherwise copy asynchronously"? > > I'm sure that can be done, I'm just not sure how to do it yet... OK, thanks. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 07/22/2013 03:30 PM, J. Bruce Fields wrote: > On Mon, Jul 22, 2013 at 03:17:29PM -0400, Bryan Schumaker wrote: >> On 07/22/2013 02:50 PM, J. Bruce Fields wrote: >>> On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: >>>> From: Bryan Schumaker <bjschuma@netapp.com> >>>> >>>> Rather than performing the copy right away, schedule it to run later and >>>> reply to the client. Later, send a callback to notify the client that >>>> the copy has finished. >>> >>> I believe you need to implement the referring triple support described >>> in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race >>> described in >>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 >>> . >> >> I'll re-read and re-write. >> >>> >>> I see cb_delay initialized below, but not otherwise used. Am I missing >>> anything? >> >> Whoops! I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously. I must have forgotten to take it out :( >> >>> >>> What about OFFLOAD_STATUS and OFFLOAD_ABORT? >> >> I haven't thought out those too much... I haven't thought about a use for them on the client yet. > > If it might be a long-running copy, I assume the client needs the > ability to abort if the caller is killed. > > (Dumb question: what happens on the network partition? Does the server > abort the copy when it expires the client state?) > > In any case, > http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 > says "If a server's COPY operation returns a stateid, then the server > MUST also support these operations: CB_OFFLOAD, OFFLOAD_ABORT, and > OFFLOAD_STATUS." > > So even if we've no use for them on the client then we still need to > implement them (and probably just write a basic pynfs test). Either > that or update the spec. Fair enough. I'll think it out and do something! Easy solution: save this patch for later and only support the sync version of copy for the final version of this patch series. - Bryan > >>> In some common cases the reply will be very quick, and we might be >>> better off handling it synchronously. Could we implement a heuristic >>> like "copy synchronously if the filesystem has special support or the >>> range is less than the maximum iosize, otherwise copy asynchronously"? >> >> I'm sure that can be done, I'm just not sure how to do it yet... > > OK, thanks. > > --b. > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jul 22, 2013 at 03:37:00PM -0400, Bryan Schumaker wrote: > On 07/22/2013 03:30 PM, J. Bruce Fields wrote: > > On Mon, Jul 22, 2013 at 03:17:29PM -0400, Bryan Schumaker wrote: > >> On 07/22/2013 02:50 PM, J. Bruce Fields wrote: > >>> On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: > >>>> From: Bryan Schumaker <bjschuma@netapp.com> > >>>> > >>>> Rather than performing the copy right away, schedule it to run later and > >>>> reply to the client. Later, send a callback to notify the client that > >>>> the copy has finished. > >>> > >>> I believe you need to implement the referring triple support described > >>> in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race > >>> described in > >>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 > >>> . > >> > >> I'll re-read and re-write. > >> > >>> > >>> I see cb_delay initialized below, but not otherwise used. Am I missing > >>> anything? > >> > >> Whoops! I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously. I must have forgotten to take it out :( > >> > >>> > >>> What about OFFLOAD_STATUS and OFFLOAD_ABORT? > >> > >> I haven't thought out those too much... I haven't thought about a use for them on the client yet. > > > > If it might be a long-running copy, I assume the client needs the > > ability to abort if the caller is killed. > > > > (Dumb question: what happens on the network partition? Does the server > > abort the copy when it expires the client state?) > > > > In any case, > > http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 > > says "If a server's COPY operation returns a stateid, then the server > > MUST also support these operations: CB_OFFLOAD, OFFLOAD_ABORT, and > > OFFLOAD_STATUS." > > > > So even if we've no use for them on the client then we still need to > > implement them (and probably just write a basic pynfs test). Either > > that or update the spec. > > Fair enough. I'll think it out and do something! Easy solution: save this patch for later and only support the sync version of copy for the final version of this patch series. I can't remember--does the spec give the server a clear way to bail out and tell the client to fall back on a normal copy in cases where the server knows the copy could take an unreasonable amount of time? --b. > > - Bryan > > > > >>> In some common cases the reply will be very quick, and we might be > >>> better off handling it synchronously. Could we implement a heuristic > >>> like "copy synchronously if the filesystem has special support or the > >>> range is less than the maximum iosize, otherwise copy asynchronously"? > >> > >> I'm sure that can be done, I'm just not sure how to do it yet... > > > > OK, thanks. > > > > --b. > > > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 07/22/2013 03:43 PM, J. Bruce Fields wrote: > On Mon, Jul 22, 2013 at 03:37:00PM -0400, Bryan Schumaker wrote: >> On 07/22/2013 03:30 PM, J. Bruce Fields wrote: >>> On Mon, Jul 22, 2013 at 03:17:29PM -0400, Bryan Schumaker wrote: >>>> On 07/22/2013 02:50 PM, J. Bruce Fields wrote: >>>>> On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: >>>>>> From: Bryan Schumaker <bjschuma@netapp.com> >>>>>> >>>>>> Rather than performing the copy right away, schedule it to run later and >>>>>> reply to the client. Later, send a callback to notify the client that >>>>>> the copy has finished. >>>>> >>>>> I believe you need to implement the referring triple support described >>>>> in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race >>>>> described in >>>>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 >>>>> . >>>> >>>> I'll re-read and re-write. >>>> >>>>> >>>>> I see cb_delay initialized below, but not otherwise used. Am I missing >>>>> anything? >>>> >>>> Whoops! I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously. I must have forgotten to take it out :( >>>> >>>>> >>>>> What about OFFLOAD_STATUS and OFFLOAD_ABORT? >>>> >>>> I haven't thought out those too much... I haven't thought about a use for them on the client yet. >>> >>> If it might be a long-running copy, I assume the client needs the >>> ability to abort if the caller is killed. >>> >>> (Dumb question: what happens on the network partition? Does the server >>> abort the copy when it expires the client state?) >>> >>> In any case, >>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 >>> says "If a server's COPY operation returns a stateid, then the server >>> MUST also support these operations: CB_OFFLOAD, OFFLOAD_ABORT, and >>> OFFLOAD_STATUS." >>> >>> So even if we've no use for them on the client then we still need to >>> implement them (and probably just write a basic pynfs test). Either >>> that or update the spec. >> >> Fair enough. I'll think it out and do something! Easy solution: save this patch for later and only support the sync version of copy for the final version of this patch series. > > I can't remember--does the spec give the server a clear way to bail out > and tell the client to fall back on a normal copy in cases where the > server knows the copy could take an unreasonable amount of time? > > --b. I don't think so. Is there ever a case where copying over the network would be slower than copying on the server? > >> >> - Bryan >> >>> >>>>> In some common cases the reply will be very quick, and we might be >>>>> better off handling it synchronously. Could we implement a heuristic >>>>> like "copy synchronously if the filesystem has special support or the >>>>> range is less than the maximum iosize, otherwise copy asynchronously"? >>>> >>>> I'm sure that can be done, I'm just not sure how to do it yet... >>> >>> OK, thanks. >>> >>> --b. >>> >> -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jul 22, 2013 at 03:54:00PM -0400, Bryan Schumaker wrote: > On 07/22/2013 03:43 PM, J. Bruce Fields wrote: > > On Mon, Jul 22, 2013 at 03:37:00PM -0400, Bryan Schumaker wrote: > >> On 07/22/2013 03:30 PM, J. Bruce Fields wrote: > >>> On Mon, Jul 22, 2013 at 03:17:29PM -0400, Bryan Schumaker wrote: > >>>> On 07/22/2013 02:50 PM, J. Bruce Fields wrote: > >>>>> On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: > >>>>>> From: Bryan Schumaker <bjschuma@netapp.com> > >>>>>> > >>>>>> Rather than performing the copy right away, schedule it to run later and > >>>>>> reply to the client. Later, send a callback to notify the client that > >>>>>> the copy has finished. > >>>>> > >>>>> I believe you need to implement the referring triple support described > >>>>> in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race > >>>>> described in > >>>>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 > >>>>> . > >>>> > >>>> I'll re-read and re-write. > >>>> > >>>>> > >>>>> I see cb_delay initialized below, but not otherwise used. Am I missing > >>>>> anything? > >>>> > >>>> Whoops! I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously. I must have forgotten to take it out :( > >>>> > >>>>> > >>>>> What about OFFLOAD_STATUS and OFFLOAD_ABORT? > >>>> > >>>> I haven't thought out those too much... I haven't thought about a use for them on the client yet. > >>> > >>> If it might be a long-running copy, I assume the client needs the > >>> ability to abort if the caller is killed. > >>> > >>> (Dumb question: what happens on the network partition? Does the server > >>> abort the copy when it expires the client state?) > >>> > >>> In any case, > >>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 > >>> says "If a server's COPY operation returns a stateid, then the server > >>> MUST also support these operations: CB_OFFLOAD, OFFLOAD_ABORT, and > >>> OFFLOAD_STATUS." > >>> > >>> So even if we've no use for them on the client then we still need to > >>> implement them (and probably just write a basic pynfs test). Either > >>> that or update the spec. > >> > >> Fair enough. I'll think it out and do something! Easy solution: save this patch for later and only support the sync version of copy for the final version of this patch series. > > > > I can't remember--does the spec give the server a clear way to bail out > > and tell the client to fall back on a normal copy in cases where the > > server knows the copy could take an unreasonable amount of time? > > > > --b. > > I don't think so. Is there ever a case where copying over the network would be slower than copying on the server? Mybe not, but if the copy will take a minute, then we don't want to tie up an rpc slot for a minute. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 07/22/2013 08:55 PM, J. Bruce Fields wrote: > On Mon, Jul 22, 2013 at 03:54:00PM -0400, Bryan Schumaker wrote: >> On 07/22/2013 03:43 PM, J. Bruce Fields wrote: >>> On Mon, Jul 22, 2013 at 03:37:00PM -0400, Bryan Schumaker wrote: >>>> On 07/22/2013 03:30 PM, J. Bruce Fields wrote: >>>>> On Mon, Jul 22, 2013 at 03:17:29PM -0400, Bryan Schumaker wrote: >>>>>> On 07/22/2013 02:50 PM, J. Bruce Fields wrote: >>>>>>> On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: >>>>>>>> From: Bryan Schumaker <bjschuma@netapp.com> >>>>>>>> >>>>>>>> Rather than performing the copy right away, schedule it to run later and >>>>>>>> reply to the client. Later, send a callback to notify the client that >>>>>>>> the copy has finished. >>>>>>> I believe you need to implement the referring triple support described >>>>>>> in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race >>>>>>> described in >>>>>>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 >>>>>>> . >>>>>> I'll re-read and re-write. >>>>>> >>>>>>> I see cb_delay initialized below, but not otherwise used. Am I missing >>>>>>> anything? >>>>>> Whoops! I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously. I must have forgotten to take it out :( >>>>>> >>>>>>> What about OFFLOAD_STATUS and OFFLOAD_ABORT? >>>>>> I haven't thought out those too much... I haven't thought about a use for them on the client yet. >>>>> If it might be a long-running copy, I assume the client needs the >>>>> ability to abort if the caller is killed. >>>>> >>>>> (Dumb question: what happens on the network partition? Does the server >>>>> abort the copy when it expires the client state?) >>>>> >>>>> In any case, >>>>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 >>>>> says "If a server's COPY operation returns a stateid, then the server >>>>> MUST also support these operations: CB_OFFLOAD, OFFLOAD_ABORT, and >>>>> OFFLOAD_STATUS." >>>>> >>>>> So even if we've no use for them on the client then we still need to >>>>> implement them (and probably just write a basic pynfs test). Either >>>>> that or update the spec. >>>> Fair enough. I'll think it out and do something! Easy solution: save this patch for later and only support the sync version of copy for the final version of this patch series. >>> I can't remember--does the spec give the server a clear way to bail out >>> and tell the client to fall back on a normal copy in cases where the >>> server knows the copy could take an unreasonable amount of time? >>> >>> --b. >> I don't think so. Is there ever a case where copying over the network would be slower than copying on the server? > Mybe not, but if the copy will take a minute, then we don't want to tie > up an rpc slot for a minute. > > --b. I think that we need to be able to handle copies that would take a lot longer than just a minute - this offload could take a very long time I assume depending on the size of the data getting copied and the back end storage device.... ric -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Aug 05, 2013 at 09:38:20AM +0100, Ric Wheeler wrote: > On 07/22/2013 08:55 PM, J. Bruce Fields wrote: > >On Mon, Jul 22, 2013 at 03:54:00PM -0400, Bryan Schumaker wrote: > >>On 07/22/2013 03:43 PM, J. Bruce Fields wrote: > >>>On Mon, Jul 22, 2013 at 03:37:00PM -0400, Bryan Schumaker wrote: > >>>>On 07/22/2013 03:30 PM, J. Bruce Fields wrote: > >>>>>On Mon, Jul 22, 2013 at 03:17:29PM -0400, Bryan Schumaker wrote: > >>>>>>On 07/22/2013 02:50 PM, J. Bruce Fields wrote: > >>>>>>>On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: > >>>>>>>>From: Bryan Schumaker <bjschuma@netapp.com> > >>>>>>>> > >>>>>>>>Rather than performing the copy right away, schedule it to run later and > >>>>>>>>reply to the client. Later, send a callback to notify the client that > >>>>>>>>the copy has finished. > >>>>>>>I believe you need to implement the referring triple support described > >>>>>>>in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race > >>>>>>>described in > >>>>>>>http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 > >>>>>>>. > >>>>>>I'll re-read and re-write. > >>>>>> > >>>>>>>I see cb_delay initialized below, but not otherwise used. Am I missing > >>>>>>>anything? > >>>>>>Whoops! I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously. I must have forgotten to take it out :( > >>>>>> > >>>>>>>What about OFFLOAD_STATUS and OFFLOAD_ABORT? > >>>>>>I haven't thought out those too much... I haven't thought about a use for them on the client yet. > >>>>>If it might be a long-running copy, I assume the client needs the > >>>>>ability to abort if the caller is killed. > >>>>> > >>>>>(Dumb question: what happens on the network partition? Does the server > >>>>>abort the copy when it expires the client state?) > >>>>> > >>>>>In any case, > >>>>>http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 > >>>>>says "If a server's COPY operation returns a stateid, then the server > >>>>>MUST also support these operations: CB_OFFLOAD, OFFLOAD_ABORT, and > >>>>>OFFLOAD_STATUS." > >>>>> > >>>>>So even if we've no use for them on the client then we still need to > >>>>>implement them (and probably just write a basic pynfs test). Either > >>>>>that or update the spec. > >>>>Fair enough. I'll think it out and do something! Easy solution: save this patch for later and only support the sync version of copy for the final version of this patch series. > >>>I can't remember--does the spec give the server a clear way to bail out > >>>and tell the client to fall back on a normal copy in cases where the > >>>server knows the copy could take an unreasonable amount of time? > >>> > >>>--b. > >>I don't think so. Is there ever a case where copying over the network would be slower than copying on the server? > >Mybe not, but if the copy will take a minute, then we don't want to tie > >up an rpc slot for a minute. > > > >--b. > > I think that we need to be able to handle copies that would take a > lot longer than just a minute - this offload could take a very long > time I assume depending on the size of the data getting copied and > the back end storage device.... Bryan suggested in offline discussion that one possibility might be to copy, say, at most a gigabyte at a time before returning and making the client continue the copy. Where for "a gigabyte" read, "some amount that doesn't take too long to copy but is still enough to allow close to full bandwidth". Hopefully that's an easy number to find. But based on http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-14.1.2 the COPY operation isn't designed for that--it doesn't give the option of returning bytes_copied in the successful case. Maybe we should fix that in the spec, or maybe we just need to implement the asynchronous case. I guess it depends on which is easier, a) implementing the asynchronous case (and the referring-triple support to fix the COPY/callback races), or b) implementing this sort of "short copy" loop in a way that gives good performance. On the client side it's clearly a) since you're forced to handle that case anyway. (Unless we argue that *all* copies should work that way, and that the spec should ditch the asynchronous case.) On the server side, b) looks easier. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/05/2013 03:41 PM, J. Bruce Fields wrote: > On Mon, Aug 05, 2013 at 09:38:20AM +0100, Ric Wheeler wrote: >> On 07/22/2013 08:55 PM, J. Bruce Fields wrote: >>> On Mon, Jul 22, 2013 at 03:54:00PM -0400, Bryan Schumaker wrote: >>>> On 07/22/2013 03:43 PM, J. Bruce Fields wrote: >>>>> On Mon, Jul 22, 2013 at 03:37:00PM -0400, Bryan Schumaker wrote: >>>>>> On 07/22/2013 03:30 PM, J. Bruce Fields wrote: >>>>>>> On Mon, Jul 22, 2013 at 03:17:29PM -0400, Bryan Schumaker wrote: >>>>>>>> On 07/22/2013 02:50 PM, J. Bruce Fields wrote: >>>>>>>>> On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: >>>>>>>>>> From: Bryan Schumaker <bjschuma@netapp.com> >>>>>>>>>> >>>>>>>>>> Rather than performing the copy right away, schedule it to run later and >>>>>>>>>> reply to the client. Later, send a callback to notify the client that >>>>>>>>>> the copy has finished. >>>>>>>>> I believe you need to implement the referring triple support described >>>>>>>>> in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race >>>>>>>>> described in >>>>>>>>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 >>>>>>>>> . >>>>>>>> I'll re-read and re-write. >>>>>>>> >>>>>>>>> I see cb_delay initialized below, but not otherwise used. Am I missing >>>>>>>>> anything? >>>>>>>> Whoops! I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously. I must have forgotten to take it out :( >>>>>>>> >>>>>>>>> What about OFFLOAD_STATUS and OFFLOAD_ABORT? >>>>>>>> I haven't thought out those too much... I haven't thought about a use for them on the client yet. >>>>>>> If it might be a long-running copy, I assume the client needs the >>>>>>> ability to abort if the caller is killed. >>>>>>> >>>>>>> (Dumb question: what happens on the network partition? Does the server >>>>>>> abort the copy when it expires the client state?) >>>>>>> >>>>>>> In any case, >>>>>>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 >>>>>>> says "If a server's COPY operation returns a stateid, then the server >>>>>>> MUST also support these operations: CB_OFFLOAD, OFFLOAD_ABORT, and >>>>>>> OFFLOAD_STATUS." >>>>>>> >>>>>>> So even if we've no use for them on the client then we still need to >>>>>>> implement them (and probably just write a basic pynfs test). Either >>>>>>> that or update the spec. >>>>>> Fair enough. I'll think it out and do something! Easy solution: save this patch for later and only support the sync version of copy for the final version of this patch series. >>>>> I can't remember--does the spec give the server a clear way to bail out >>>>> and tell the client to fall back on a normal copy in cases where the >>>>> server knows the copy could take an unreasonable amount of time? >>>>> >>>>> --b. >>>> I don't think so. Is there ever a case where copying over the network would be slower than copying on the server? >>> Mybe not, but if the copy will take a minute, then we don't want to tie >>> up an rpc slot for a minute. >>> >>> --b. >> I think that we need to be able to handle copies that would take a >> lot longer than just a minute - this offload could take a very long >> time I assume depending on the size of the data getting copied and >> the back end storage device.... > Bryan suggested in offline discussion that one possibility might be to > copy, say, at most a gigabyte at a time before returning and making the > client continue the copy. > > Where for "a gigabyte" read, "some amount that doesn't take too long to > copy but is still enough to allow close to full bandwidth". Hopefully > that's an easy number to find. > > But based on > http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-14.1.2 > the COPY operation isn't designed for that--it doesn't give the option > of returning bytes_copied in the successful case. > > Maybe we should fix that in the spec, or maybe we just need to implement > the asynchronous case. I guess it depends on which is easier, > > a) implementing the asynchronous case (and the referring-triple > support to fix the COPY/callback races), or > b) implementing this sort of "short copy" loop in a way that gives > good performance. > > On the client side it's clearly a) since you're forced to handle that > case anyway. (Unless we argue that *all* copies should work that way, > and that the spec should ditch the asynchronous case.) On the server > side, b) looks easier. > > --b. I am not sure that 1GB/time is enough - for a lot of servers, you could do an enormous range since no data is actually moved inside of the target (just pointers updated like in reflinked files for example).... ric -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/05/2013 10:41 AM, J. Bruce Fields wrote: > On Mon, Aug 05, 2013 at 09:38:20AM +0100, Ric Wheeler wrote: >> On 07/22/2013 08:55 PM, J. Bruce Fields wrote: >>> On Mon, Jul 22, 2013 at 03:54:00PM -0400, Bryan Schumaker wrote: >>>> On 07/22/2013 03:43 PM, J. Bruce Fields wrote: >>>>> On Mon, Jul 22, 2013 at 03:37:00PM -0400, Bryan Schumaker wrote: >>>>>> On 07/22/2013 03:30 PM, J. Bruce Fields wrote: >>>>>>> On Mon, Jul 22, 2013 at 03:17:29PM -0400, Bryan Schumaker wrote: >>>>>>>> On 07/22/2013 02:50 PM, J. Bruce Fields wrote: >>>>>>>>> On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: >>>>>>>>>> From: Bryan Schumaker <bjschuma@netapp.com> >>>>>>>>>> >>>>>>>>>> Rather than performing the copy right away, schedule it to run later and >>>>>>>>>> reply to the client. Later, send a callback to notify the client that >>>>>>>>>> the copy has finished. >>>>>>>>> I believe you need to implement the referring triple support described >>>>>>>>> in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race >>>>>>>>> described in >>>>>>>>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 >>>>>>>>> . >>>>>>>> I'll re-read and re-write. >>>>>>>> >>>>>>>>> I see cb_delay initialized below, but not otherwise used. Am I missing >>>>>>>>> anything? >>>>>>>> Whoops! I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously. I must have forgotten to take it out :( >>>>>>>> >>>>>>>>> What about OFFLOAD_STATUS and OFFLOAD_ABORT? >>>>>>>> I haven't thought out those too much... I haven't thought about a use for them on the client yet. >>>>>>> If it might be a long-running copy, I assume the client needs the >>>>>>> ability to abort if the caller is killed. >>>>>>> >>>>>>> (Dumb question: what happens on the network partition? Does the server >>>>>>> abort the copy when it expires the client state?) >>>>>>> >>>>>>> In any case, >>>>>>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 >>>>>>> says "If a server's COPY operation returns a stateid, then the server >>>>>>> MUST also support these operations: CB_OFFLOAD, OFFLOAD_ABORT, and >>>>>>> OFFLOAD_STATUS." >>>>>>> >>>>>>> So even if we've no use for them on the client then we still need to >>>>>>> implement them (and probably just write a basic pynfs test). Either >>>>>>> that or update the spec. >>>>>> Fair enough. I'll think it out and do something! Easy solution: save this patch for later and only support the sync version of copy for the final version of this patch series. >>>>> I can't remember--does the spec give the server a clear way to bail out >>>>> and tell the client to fall back on a normal copy in cases where the >>>>> server knows the copy could take an unreasonable amount of time? >>>>> >>>>> --b. >>>> I don't think so. Is there ever a case where copying over the network would be slower than copying on the server? >>> Mybe not, but if the copy will take a minute, then we don't want to tie >>> up an rpc slot for a minute. >>> >>> --b. >> >> I think that we need to be able to handle copies that would take a >> lot longer than just a minute - this offload could take a very long >> time I assume depending on the size of the data getting copied and >> the back end storage device.... > > Bryan suggested in offline discussion that one possibility might be to > copy, say, at most a gigabyte at a time before returning and making the > client continue the copy. > > Where for "a gigabyte" read, "some amount that doesn't take too long to > copy but is still enough to allow close to full bandwidth". Hopefully > that's an easy number to find. > > But based on > http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-14.1.2 > the COPY operation isn't designed for that--it doesn't give the option > of returning bytes_copied in the successful case. Wouldn't the wr_count field in a write_response4 struct be the bytes copied? I'm working on a patch for puting a limit on the amount copied - is there a "1 gigabyte in bytes" constant somewhere? - Bryan > > Maybe we should fix that in the spec, or maybe we just need to implement > the asynchronous case. I guess it depends on which is easier, > > a) implementing the asynchronous case (and the referring-triple > support to fix the COPY/callback races), or > b) implementing this sort of "short copy" loop in a way that gives > good performance. > > On the client side it's clearly a) since you're forced to handle that > case anyway. (Unless we argue that *all* copies should work that way, > and that the spec should ditch the asynchronous case.) On the server > side, b) looks easier. > > --b. > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
T24gTW9uLCAyMDEzLTA4LTA1IGF0IDEwOjQxIC0wNDAwLCBKLiBCcnVjZSBGaWVsZHMgd3JvdGU6 DQo+IE9uIE1vbiwgQXVnIDA1LCAyMDEzIGF0IDA5OjM4OjIwQU0gKzAxMDAsIFJpYyBXaGVlbGVy IHdyb3RlOg0KPiA+IE9uIDA3LzIyLzIwMTMgMDg6NTUgUE0sIEouIEJydWNlIEZpZWxkcyB3cm90 ZToNCj4gPiA+T24gTW9uLCBKdWwgMjIsIDIwMTMgYXQgMDM6NTQ6MDBQTSAtMDQwMCwgQnJ5YW4g U2NodW1ha2VyIHdyb3RlOg0KPiA+ID4+T24gMDcvMjIvMjAxMyAwMzo0MyBQTSwgSi4gQnJ1Y2Ug RmllbGRzIHdyb3RlOg0KPiA+ID4+Pk9uIE1vbiwgSnVsIDIyLCAyMDEzIGF0IDAzOjM3OjAwUE0g LTA0MDAsIEJyeWFuIFNjaHVtYWtlciB3cm90ZToNCj4gPiA+Pj4+T24gMDcvMjIvMjAxMyAwMzoz MCBQTSwgSi4gQnJ1Y2UgRmllbGRzIHdyb3RlOg0KPiA+ID4+Pj4+T24gTW9uLCBKdWwgMjIsIDIw MTMgYXQgMDM6MTc6MjlQTSAtMDQwMCwgQnJ5YW4gU2NodW1ha2VyIHdyb3RlOg0KPiA+ID4+Pj4+ Pk9uIDA3LzIyLzIwMTMgMDI6NTAgUE0sIEouIEJydWNlIEZpZWxkcyB3cm90ZToNCj4gPiA+Pj4+ Pj4+T24gRnJpLCBKdWwgMTksIDIwMTMgYXQgMDU6MDM6NDlQTSAtMDQwMCwgYmpzY2h1bWFAbmV0 YXBwLmNvbSB3cm90ZToNCj4gPiA+Pj4+Pj4+PkZyb206IEJyeWFuIFNjaHVtYWtlciA8YmpzY2h1 bWFAbmV0YXBwLmNvbT4NCj4gPiA+Pj4+Pj4+Pg0KPiA+ID4+Pj4+Pj4+UmF0aGVyIHRoYW4gcGVy Zm9ybWluZyB0aGUgY29weSByaWdodCBhd2F5LCBzY2hlZHVsZSBpdCB0byBydW4gbGF0ZXIgYW5k DQo+ID4gPj4+Pj4+Pj5yZXBseSB0byB0aGUgY2xpZW50LiAgTGF0ZXIsIHNlbmQgYSBjYWxsYmFj ayB0byBub3RpZnkgdGhlIGNsaWVudCB0aGF0DQo+ID4gPj4+Pj4+Pj50aGUgY29weSBoYXMgZmlu aXNoZWQuDQo+ID4gPj4+Pj4+PkkgYmVsaWV2ZSB5b3UgbmVlZCB0byBpbXBsZW1lbnQgdGhlIHJl ZmVycmluZyB0cmlwbGUgc3VwcG9ydCBkZXNjcmliZWQNCj4gPiA+Pj4+Pj4+aW4gaHR0cDovL3Rv b2xzLmlldGYub3JnL2h0bWwvcmZjNTY2MSNzZWN0aW9uLTIuMTAuNi4zIHRvIGZpeCB0aGUgcmFj ZQ0KPiA+ID4+Pj4+Pj5kZXNjcmliZWQgaW4NCj4gPiA+Pj4+Pj4+aHR0cDovL3Rvb2xzLmlldGYu b3JnL2h0bWwvZHJhZnQtaWV0Zi1uZnN2NC1taW5vcnZlcnNpb24yLTE5I3NlY3Rpb24tMTUuMS4z DQo+ID4gPj4+Pj4+Pi4NCj4gPiA+Pj4+Pj5JJ2xsIHJlLXJlYWQgYW5kIHJlLXdyaXRlLg0KPiA+ ID4+Pj4+Pg0KPiA+ID4+Pj4+Pj5JIHNlZSBjYl9kZWxheSBpbml0aWFsaXplZCBiZWxvdywgYnV0 IG5vdCBvdGhlcndpc2UgdXNlZC4gIEFtIEkgbWlzc2luZw0KPiA+ID4+Pj4+Pj5hbnl0aGluZz8N Cj4gPiA+Pj4+Pj5XaG9vcHMhICBJIHdhcyB1c2luZyB0aGF0IGVhcmxpZXIgdG8gdHJ5IHRvIGZh a2UgdXAgYSBjYWxsYmFjaywgYnV0IEkgZXZlbnR1YWxseSBkZWNpZGVkIGl0J3MgZWFzaWVyIHRv IGp1c3QgZG8gdGhlIGNvcHkgYXN5bmNocm9ub3VzbHkuICBJIG11c3QgaGF2ZSBmb3Jnb3R0ZW4g dG8gdGFrZSBpdCBvdXQgOigNCj4gPiA+Pj4+Pj4NCj4gPiA+Pj4+Pj4+V2hhdCBhYm91dCBPRkZM T0FEX1NUQVRVUyBhbmQgT0ZGTE9BRF9BQk9SVD8NCj4gPiA+Pj4+Pj5JIGhhdmVuJ3QgdGhvdWdo dCBvdXQgdGhvc2UgdG9vIG11Y2guLi4gSSBoYXZlbid0IHRob3VnaHQgYWJvdXQgYSB1c2UgZm9y IHRoZW0gb24gdGhlIGNsaWVudCB5ZXQuDQo+ID4gPj4+Pj5JZiBpdCBtaWdodCBiZSBhIGxvbmct cnVubmluZyBjb3B5LCBJIGFzc3VtZSB0aGUgY2xpZW50IG5lZWRzIHRoZQ0KPiA+ID4+Pj4+YWJp bGl0eSB0byBhYm9ydCBpZiB0aGUgY2FsbGVyIGlzIGtpbGxlZC4NCj4gPiA+Pj4+Pg0KPiA+ID4+ Pj4+KER1bWIgcXVlc3Rpb246IHdoYXQgaGFwcGVucyBvbiB0aGUgbmV0d29yayBwYXJ0aXRpb24/ ICBEb2VzIHRoZSBzZXJ2ZXINCj4gPiA+Pj4+PmFib3J0IHRoZSBjb3B5IHdoZW4gaXQgZXhwaXJl cyB0aGUgY2xpZW50IHN0YXRlPykNCj4gPiA+Pj4+Pg0KPiA+ID4+Pj4+SW4gYW55IGNhc2UsDQo+ ID4gPj4+Pj5odHRwOi8vdG9vbHMuaWV0Zi5vcmcvaHRtbC9kcmFmdC1pZXRmLW5mc3Y0LW1pbm9y dmVyc2lvbjItMTkjc2VjdGlvbi0xNS4xLjMNCj4gPiA+Pj4+PnNheXMgIklmIGEgc2VydmVyJ3Mg Q09QWSBvcGVyYXRpb24gcmV0dXJucyBhIHN0YXRlaWQsIHRoZW4gdGhlIHNlcnZlcg0KPiA+ID4+ Pj4+TVVTVCBhbHNvIHN1cHBvcnQgdGhlc2Ugb3BlcmF0aW9uczogQ0JfT0ZGTE9BRCwgT0ZGTE9B RF9BQk9SVCwgYW5kDQo+ID4gPj4+Pj5PRkZMT0FEX1NUQVRVUy4iDQo+ID4gPj4+Pj4NCj4gPiA+ Pj4+PlNvIGV2ZW4gaWYgd2UndmUgbm8gdXNlIGZvciB0aGVtIG9uIHRoZSBjbGllbnQgdGhlbiB3 ZSBzdGlsbCBuZWVkIHRvDQo+ID4gPj4+Pj5pbXBsZW1lbnQgdGhlbSAoYW5kIHByb2JhYmx5IGp1 c3Qgd3JpdGUgYSBiYXNpYyBweW5mcyB0ZXN0KS4gIEVpdGhlcg0KPiA+ID4+Pj4+dGhhdCBvciB1 cGRhdGUgdGhlIHNwZWMuDQo+ID4gPj4+PkZhaXIgZW5vdWdoLiAgSSdsbCB0aGluayBpdCBvdXQg YW5kIGRvIHNvbWV0aGluZyEgIEVhc3kgc29sdXRpb246IHNhdmUgdGhpcyBwYXRjaCBmb3IgbGF0 ZXIgYW5kIG9ubHkgc3VwcG9ydCB0aGUgc3luYyB2ZXJzaW9uIG9mIGNvcHkgZm9yIHRoZSBmaW5h bCB2ZXJzaW9uIG9mIHRoaXMgcGF0Y2ggc2VyaWVzLg0KPiA+ID4+PkkgY2FuJ3QgcmVtZW1iZXIt LWRvZXMgdGhlIHNwZWMgZ2l2ZSB0aGUgc2VydmVyIGEgY2xlYXIgd2F5IHRvIGJhaWwgb3V0DQo+ ID4gPj4+YW5kIHRlbGwgdGhlIGNsaWVudCB0byBmYWxsIGJhY2sgb24gYSBub3JtYWwgY29weSBp biBjYXNlcyB3aGVyZSB0aGUNCj4gPiA+Pj5zZXJ2ZXIga25vd3MgdGhlIGNvcHkgY291bGQgdGFr ZSBhbiB1bnJlYXNvbmFibGUgYW1vdW50IG9mIHRpbWU/DQo+ID4gPj4+DQo+ID4gPj4+LS1iLg0K PiA+ID4+SSBkb24ndCB0aGluayBzby4gIElzIHRoZXJlIGV2ZXIgYSBjYXNlIHdoZXJlIGNvcHlp bmcgb3ZlciB0aGUgbmV0d29yayB3b3VsZCBiZSBzbG93ZXIgdGhhbiBjb3B5aW5nIG9uIHRoZSBz ZXJ2ZXI/DQo+ID4gPk15YmUgbm90LCBidXQgaWYgdGhlIGNvcHkgd2lsbCB0YWtlIGEgbWludXRl LCB0aGVuIHdlIGRvbid0IHdhbnQgdG8gdGllDQo+ID4gPnVwIGFuIHJwYyBzbG90IGZvciBhIG1p bnV0ZS4NCj4gPiA+DQo+ID4gPi0tYi4NCj4gPiANCj4gPiBJIHRoaW5rIHRoYXQgd2UgbmVlZCB0 byBiZSBhYmxlIHRvIGhhbmRsZSBjb3BpZXMgdGhhdCB3b3VsZCB0YWtlIGENCj4gPiBsb3QgbG9u Z2VyIHRoYW4ganVzdCBhIG1pbnV0ZSAtIHRoaXMgb2ZmbG9hZCBjb3VsZCB0YWtlIGEgdmVyeSBs b25nDQo+ID4gdGltZSBJIGFzc3VtZSBkZXBlbmRpbmcgb24gdGhlIHNpemUgb2YgdGhlIGRhdGEg Z2V0dGluZyBjb3BpZWQgYW5kDQo+ID4gdGhlIGJhY2sgZW5kIHN0b3JhZ2UgZGV2aWNlLi4uLg0K PiANCj4gQnJ5YW4gc3VnZ2VzdGVkIGluIG9mZmxpbmUgZGlzY3Vzc2lvbiB0aGF0IG9uZSBwb3Nz aWJpbGl0eSBtaWdodCBiZSB0bw0KPiBjb3B5LCBzYXksIGF0IG1vc3QgYSBnaWdhYnl0ZSBhdCBh IHRpbWUgYmVmb3JlIHJldHVybmluZyBhbmQgbWFraW5nIHRoZQ0KPiBjbGllbnQgY29udGludWUg dGhlIGNvcHkuDQo+IA0KPiBXaGVyZSBmb3IgImEgZ2lnYWJ5dGUiIHJlYWQsICJzb21lIGFtb3Vu dCB0aGF0IGRvZXNuJ3QgdGFrZSB0b28gbG9uZyB0bw0KPiBjb3B5IGJ1dCBpcyBzdGlsbCBlbm91 Z2ggdG8gYWxsb3cgY2xvc2UgdG8gZnVsbCBiYW5kd2lkdGgiLiAgSG9wZWZ1bGx5DQo+IHRoYXQn cyBhbiBlYXN5IG51bWJlciB0byBmaW5kLg0KPiANCj4gQnV0IGJhc2VkIG9uDQo+IGh0dHA6Ly90 b29scy5pZXRmLm9yZy9odG1sL2RyYWZ0LWlldGYtbmZzdjQtbWlub3J2ZXJzaW9uMi0xOSNzZWN0 aW9uLTE0LjEuMg0KPiB0aGUgQ09QWSBvcGVyYXRpb24gaXNuJ3QgZGVzaWduZWQgZm9yIHRoYXQt LWl0IGRvZXNuJ3QgZ2l2ZSB0aGUgb3B0aW9uDQo+IG9mIHJldHVybmluZyBieXRlc19jb3BpZWQg aW4gdGhlIHN1Y2Nlc3NmdWwgY2FzZS4NCg0KVGhlIHJlYXNvbiBpcyB0aGF0IHRoZSBzcGVjIHdy aXRlcnMgZGlkIG5vdCB3YW50IHRvIGZvcmNlIHRoZSBzZXJ2ZXIgdG8NCmNvcHkgdGhlIGRhdGEg aW4gc2VxdWVudGlhbCBvcmRlciAob3IgYW55IG90aGVyIHBhcnRpY3VsYXIgb3JkZXIgZm9yDQp0 aGF0IG1hdHRlcikuDQoNCklmIHRoZSBjb3B5IHdhcyBzaG9ydCwgdGhlbiB0aGUgY2xpZW50IGNh bid0IGtub3cgd2hpY2ggYnl0ZXMgd2VyZQ0KY29waWVkOyB0aGV5IGNvdWxkIGJlIGF0IHRoZSBi ZWdpbm5pbmcgb2YgdGhlIGZpbGUsIGluIHRoZSBtaWRkbGUsIG9yDQpldmVuIHRoZSB2ZXJ5IGVu ZC4gQmFzaWNhbGx5LCBpdCBuZWVkcyB0byByZWRvIHRoZSBlbnRpcmUgY29weSBpbiBvcmRlcg0K dG8gYmUgY2VydGFpbi4NCg0KPiBNYXliZSB3ZSBzaG91bGQgZml4IHRoYXQgaW4gdGhlIHNwZWMs IG9yIG1heWJlIHdlIGp1c3QgbmVlZCB0byBpbXBsZW1lbnQNCj4gdGhlIGFzeW5jaHJvbm91cyBj YXNlLiAgSSBndWVzcyBpdCBkZXBlbmRzIG9uIHdoaWNoIGlzIGVhc2llciwNCj4gDQo+IAlhKSBp bXBsZW1lbnRpbmcgdGhlIGFzeW5jaHJvbm91cyBjYXNlIChhbmQgdGhlIHJlZmVycmluZy10cmlw bGUNCj4gCSAgIHN1cHBvcnQgdG8gZml4IHRoZSBDT1BZL2NhbGxiYWNrIHJhY2VzKSwgb3INCj4g CWIpIGltcGxlbWVudGluZyB0aGlzIHNvcnQgb2YgInNob3J0IGNvcHkiIGxvb3AgaW4gYSB3YXkg dGhhdCBnaXZlcw0KPiAJICAgZ29vZCBwZXJmb3JtYW5jZS4NCj4gDQo+IE9uIHRoZSBjbGllbnQg c2lkZSBpdCdzIGNsZWFybHkgYSkgc2luY2UgeW91J3JlIGZvcmNlZCB0byBoYW5kbGUgdGhhdA0K PiBjYXNlIGFueXdheS4gIChVbmxlc3Mgd2UgYXJndWUgdGhhdCAqYWxsKiBjb3BpZXMgc2hvdWxk IHdvcmsgdGhhdCB3YXksDQo+IGFuZCB0aGF0IHRoZSBzcGVjIHNob3VsZCBkaXRjaCB0aGUgYXN5 bmNocm9ub3VzIGNhc2UuKSBPbiB0aGUgc2VydmVyDQo+IHNpZGUsIGIpIGxvb2tzIGVhc2llci4N Cj4gDQo+IC0tYi4NCg0KLS0gDQpUcm9uZCBNeWtsZWJ1c3QNCkxpbnV4IE5GUyBjbGllbnQgbWFp bnRhaW5lcg0KDQpOZXRBcHANClRyb25kLk15a2xlYnVzdEBuZXRhcHAuY29tDQp3d3cubmV0YXBw LmNvbQ0K -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Aug 05, 2013 at 03:44:18PM +0100, Ric Wheeler wrote: > On 08/05/2013 03:41 PM, J. Bruce Fields wrote: > >On Mon, Aug 05, 2013 at 09:38:20AM +0100, Ric Wheeler wrote: > >>On 07/22/2013 08:55 PM, J. Bruce Fields wrote: > >>>On Mon, Jul 22, 2013 at 03:54:00PM -0400, Bryan Schumaker wrote: > >>>>On 07/22/2013 03:43 PM, J. Bruce Fields wrote: > >>>>>On Mon, Jul 22, 2013 at 03:37:00PM -0400, Bryan Schumaker wrote: > >>>>>>On 07/22/2013 03:30 PM, J. Bruce Fields wrote: > >>>>>>>On Mon, Jul 22, 2013 at 03:17:29PM -0400, Bryan Schumaker wrote: > >>>>>>>>On 07/22/2013 02:50 PM, J. Bruce Fields wrote: > >>>>>>>>>On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: > >>>>>>>>>>From: Bryan Schumaker <bjschuma@netapp.com> > >>>>>>>>>> > >>>>>>>>>>Rather than performing the copy right away, schedule it to run later and > >>>>>>>>>>reply to the client. Later, send a callback to notify the client that > >>>>>>>>>>the copy has finished. > >>>>>>>>>I believe you need to implement the referring triple support described > >>>>>>>>>in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race > >>>>>>>>>described in > >>>>>>>>>http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 > >>>>>>>>>. > >>>>>>>>I'll re-read and re-write. > >>>>>>>> > >>>>>>>>>I see cb_delay initialized below, but not otherwise used. Am I missing > >>>>>>>>>anything? > >>>>>>>>Whoops! I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously. I must have forgotten to take it out :( > >>>>>>>> > >>>>>>>>>What about OFFLOAD_STATUS and OFFLOAD_ABORT? > >>>>>>>>I haven't thought out those too much... I haven't thought about a use for them on the client yet. > >>>>>>>If it might be a long-running copy, I assume the client needs the > >>>>>>>ability to abort if the caller is killed. > >>>>>>> > >>>>>>>(Dumb question: what happens on the network partition? Does the server > >>>>>>>abort the copy when it expires the client state?) > >>>>>>> > >>>>>>>In any case, > >>>>>>>http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 > >>>>>>>says "If a server's COPY operation returns a stateid, then the server > >>>>>>>MUST also support these operations: CB_OFFLOAD, OFFLOAD_ABORT, and > >>>>>>>OFFLOAD_STATUS." > >>>>>>> > >>>>>>>So even if we've no use for them on the client then we still need to > >>>>>>>implement them (and probably just write a basic pynfs test). Either > >>>>>>>that or update the spec. > >>>>>>Fair enough. I'll think it out and do something! Easy solution: save this patch for later and only support the sync version of copy for the final version of this patch series. > >>>>>I can't remember--does the spec give the server a clear way to bail out > >>>>>and tell the client to fall back on a normal copy in cases where the > >>>>>server knows the copy could take an unreasonable amount of time? > >>>>> > >>>>>--b. > >>>>I don't think so. Is there ever a case where copying over the network would be slower than copying on the server? > >>>Mybe not, but if the copy will take a minute, then we don't want to tie > >>>up an rpc slot for a minute. > >>> > >>>--b. > >>I think that we need to be able to handle copies that would take a > >>lot longer than just a minute - this offload could take a very long > >>time I assume depending on the size of the data getting copied and > >>the back end storage device.... > >Bryan suggested in offline discussion that one possibility might be to > >copy, say, at most a gigabyte at a time before returning and making the > >client continue the copy. > > > >Where for "a gigabyte" read, "some amount that doesn't take too long to > >copy but is still enough to allow close to full bandwidth". Hopefully > >that's an easy number to find. > > > >But based on > >http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-14.1.2 > >the COPY operation isn't designed for that--it doesn't give the option > >of returning bytes_copied in the successful case. > > > >Maybe we should fix that in the spec, or maybe we just need to implement > >the asynchronous case. I guess it depends on which is easier, > > > > a) implementing the asynchronous case (and the referring-triple > > support to fix the COPY/callback races), or > > b) implementing this sort of "short copy" loop in a way that gives > > good performance. > > > >On the client side it's clearly a) since you're forced to handle that > >case anyway. (Unless we argue that *all* copies should work that way, > >and that the spec should ditch the asynchronous case.) On the server > >side, b) looks easier. > > > >--b. > > I am not sure that 1GB/time is enough - for a lot of servers, you > could do an enormous range since no data is actually moved inside of > the target (just pointers updated like in reflinked files for > example).... Right, but the short copy return would be optional on the server's part--so the client would request the whole range, and the server could copy it all in the quick update-some-pointers case while copying only the first gigabyte (or whatever) in the "dumb" read-write-loop case. But in the "dumb" case the server still needs a large enough range to make its IO efficient and to amortize the cost of the extra round trips, since the client is forced to serialize the COPY requests by the need to see the bytes_copied result. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Aug 05, 2013 at 02:50:38PM +0000, Myklebust, Trond wrote: > On Mon, 2013-08-05 at 10:41 -0400, J. Bruce Fields wrote: > > Bryan suggested in offline discussion that one possibility might be to > > copy, say, at most a gigabyte at a time before returning and making the > > client continue the copy. > > > > Where for "a gigabyte" read, "some amount that doesn't take too long to > > copy but is still enough to allow close to full bandwidth". Hopefully > > that's an easy number to find. > > > > But based on > > http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-14.1.2 > > the COPY operation isn't designed for that--it doesn't give the option > > of returning bytes_copied in the successful case. > > The reason is that the spec writers did not want to force the server to > copy the data in sequential order (or any other particular order for > that matter). Well, servers would still have the option not to return success unless the whole copy succeeded, so I'm not sure this *forces* servers to do sequential copies. (Unless we also got rid of the callback.) --b. > > If the copy was short, then the client can't know which bytes were > copied; they could be at the beginning of the file, in the middle, or > even the very end. Basically, it needs to redo the entire copy in order > to be certain. > > > Maybe we should fix that in the spec, or maybe we just need to implement > > the asynchronous case. I guess it depends on which is easier, > > > > a) implementing the asynchronous case (and the referring-triple > > support to fix the COPY/callback races), or > > b) implementing this sort of "short copy" loop in a way that gives > > good performance. > > > > On the client side it's clearly a) since you're forced to handle that > > case anyway. (Unless we argue that *all* copies should work that way, > > and that the spec should ditch the asynchronous case.) On the server > > side, b) looks easier. > > > > --b. > > -- > Trond Myklebust > Linux NFS client maintainer > > NetApp > Trond.Myklebust@netapp.com > www.netapp.com > N?????r??y????b?X???v?^?)?{.n?+????{???"??^n?r???z???h?????&???G???h?(????j"???m??????z?????f???h???~?m -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Aug 5, 2013, at 2:11 PM, "J. Bruce Fields" <bfields@fieldses.org> wrote: > On Mon, Aug 05, 2013 at 02:50:38PM +0000, Myklebust, Trond wrote: >> On Mon, 2013-08-05 at 10:41 -0400, J. Bruce Fields wrote: >>> Bryan suggested in offline discussion that one possibility might be to >>> copy, say, at most a gigabyte at a time before returning and making the >>> client continue the copy. >>> >>> Where for "a gigabyte" read, "some amount that doesn't take too long to >>> copy but is still enough to allow close to full bandwidth". Hopefully >>> that's an easy number to find. >>> >>> But based on >>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-14.1.2 >>> the COPY operation isn't designed for that--it doesn't give the option >>> of returning bytes_copied in the successful case. >> >> The reason is that the spec writers did not want to force the server to >> copy the data in sequential order (or any other particular order for >> that matter). > > Well, servers would still have the option not to return success unless > the whole copy succeeded, so I'm not sure this *forces* servers to do > sequential copies. > > (Unless we also got rid of the callback.) If the client initiates a full-file copy and the operation fails, I would think that the client itself can try copying sufficiently large chunks of the file via separate individual COPY operations. If any of those operations fails, then the client can fall back again to a traditional over-the-wire copy operation. > --b. > >> >> If the copy was short, then the client can't know which bytes were >> copied; they could be at the beginning of the file, in the middle, or >> even the very end. Basically, it needs to redo the entire copy in order >> to be certain. >> >>> Maybe we should fix that in the spec, or maybe we just need to implement >>> the asynchronous case. I guess it depends on which is easier, >>> >>> a) implementing the asynchronous case (and the referring-triple >>> support to fix the COPY/callback races), or >>> b) implementing this sort of "short copy" loop in a way that gives >>> good performance. >>> >>> On the client side it's clearly a) since you're forced to handle that >>> case anyway. (Unless we argue that *all* copies should work that way, >>> and that the spec should ditch the asynchronous case.) On the server >>> side, b) looks easier. >>> >>> --b. >> >> -- >> Trond Myklebust >> Linux NFS client maintainer >> >> NetApp >> Trond.Myklebust@netapp.com >> www.netapp.com >> N?????r??y????b?X???v?^?)?{.n?+????{???"??^n?r???z???h?????&???G???h?(????j"???m??????z?????f???h???~?m > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2013-08-05 at 14:17 -0400, Chuck Lever wrote: > On Aug 5, 2013, at 2:11 PM, "J. Bruce Fields" <bfields@fieldses.org> wrote: > > > On Mon, Aug 05, 2013 at 02:50:38PM +0000, Myklebust, Trond wrote: > >> On Mon, 2013-08-05 at 10:41 -0400, J. Bruce Fields wrote: > >>> Bryan suggested in offline discussion that one possibility might be to > >>> copy, say, at most a gigabyte at a time before returning and making the > >>> client continue the copy. > >>> > >>> Where for "a gigabyte" read, "some amount that doesn't take too long to > >>> copy but is still enough to allow close to full bandwidth". Hopefully > >>> that's an easy number to find. > >>> > >>> But based on > >>> http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-14.1.2 > >>> the COPY operation isn't designed for that--it doesn't give the option > >>> of returning bytes_copied in the successful case. > >> > >> The reason is that the spec writers did not want to force the server to > >> copy the data in sequential order (or any other particular order for > >> that matter). > > > > Well, servers would still have the option not to return success unless > > the whole copy succeeded, so I'm not sure this *forces* servers to do > > sequential copies. > > > > (Unless we also got rid of the callback.) > > If the client initiates a full-file copy and the operation fails, I would think that the client itself can try copying sufficiently large chunks of the file via separate individual COPY operations. If any of those operations fails, then the client can fall back again to a traditional over-the-wire copy operation. How does the client determine what constitutes a "sufficiently large chunk" in the mind of the server, and why do we want to add that functionality in the first place? Fallback to traditional copy in the case where the server doesn't support offload is fine, but all these screwball special cases are not. We already have sync vs async. Now you want to add chunked sync and chunked async too? NACK... -- Trond Myklebust Linux NFS client maintainer NetApp Trond.Myklebust@netapp.com www.netapp.com
On Mon, Aug 05, 2013 at 02:11:21PM -0400, J. Bruce Fields wrote: > On Mon, Aug 05, 2013 at 02:50:38PM +0000, Myklebust, Trond wrote: > > On Mon, 2013-08-05 at 10:41 -0400, J. Bruce Fields wrote: > > > Bryan suggested in offline discussion that one possibility might be to > > > copy, say, at most a gigabyte at a time before returning and making the > > > client continue the copy. > > > > > > Where for "a gigabyte" read, "some amount that doesn't take too long to > > > copy but is still enough to allow close to full bandwidth". Hopefully > > > that's an easy number to find. > > > > > > But based on > > > http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-14.1.2 > > > the COPY operation isn't designed for that--it doesn't give the option > > > of returning bytes_copied in the successful case. > > > > The reason is that the spec writers did not want to force the server to > > copy the data in sequential order (or any other particular order for > > that matter). > > Well, servers would still have the option not to return success unless > the whole copy succeeded, so I'm not sure this *forces* servers to do > sequential copies. Uh, sorry, I was confused, I missed the write_response4 in the COPY result entirely. Yeah obviously that's useless. (Why's it there anyway? No client or application is going to care about anything other than whether it's 0 or not, right?) So maybe it would be useful to add a way for a server to optionally communicate a sequential bytes_written, I don't know. Without that, at least, I think the only reasonable implementation of "dumb" server-side copies will need to implement the asynchronous case (and referring triples). Which might be worth doing. But for the first cut maybe we should instead *only* implement this on btrfs (or whoever else can do quick copies). --b. > > (Unless we also got rid of the callback.) > > If the copy was short, then the client can't know which bytes were > > copied; they could be at the beginning of the file, in the middle, or > > even the very end. Basically, it needs to redo the entire copy in order > > to be certain. > > > > > Maybe we should fix that in the spec, or maybe we just need to implement > > > the asynchronous case. I guess it depends on which is easier, > > > > > > a) implementing the asynchronous case (and the referring-triple > > > support to fix the COPY/callback races), or > > > b) implementing this sort of "short copy" loop in a way that gives > > > good performance. > > > > > > On the client side it's clearly a) since you're forced to handle that > > > case anyway. (Unless we argue that *all* copies should work that way, > > > and that the spec should ditch the asynchronous case.) On the server > > > side, b) looks easier. > > > > > > --b. > > > > -- > > Trond Myklebust > > Linux NFS client maintainer > > > > NetApp > > Trond.Myklebust@netapp.com > > www.netapp.com > > N?????r??y????b?X???v?^?)?{.n?+????{???"??^n?r???z???h?????&???G???h?(????j"???m??????z?????f???h???~?m -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7f05cd1..8f797e1 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -52,6 +52,9 @@ enum { NFSPROC4_CLNT_CB_NULL = 0, NFSPROC4_CLNT_CB_RECALL, NFSPROC4_CLNT_CB_SEQUENCE, + + /* NFS v4.2 callback */ + NFSPROC4_CLNT_CB_OFFLOAD, }; struct nfs4_cb_compound_hdr { @@ -110,6 +113,7 @@ enum nfs_cb_opnum4 { OP_CB_WANTS_CANCELLED = 12, OP_CB_NOTIFY_LOCK = 13, OP_CB_NOTIFY_DEVICEID = 14, + OP_CB_OFFLOAD = 15, OP_CB_ILLEGAL = 10044 }; @@ -469,6 +473,31 @@ out_default: return nfs_cb_stat_to_errno(nfserr); } +static void encode_cb_offload4args(struct xdr_stream *xdr, + const struct nfs4_cb_offload *offload, + struct nfs4_cb_compound_hdr *hdr) +{ + __be32 *p; + + if (hdr->minorversion < 2) + return; + + encode_nfs_cb_opnum4(xdr, OP_CB_OFFLOAD); + encode_nfs_fh4(xdr, &offload->co_dst_fh); + encode_stateid4(xdr, &offload->co_stid->sc_stateid); + + p = xdr_reserve_space(xdr, 4); + *p = cpu_to_be32(1); + encode_stateid4(xdr, &offload->co_stid->sc_stateid); + + p = xdr_reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); + p = xdr_encode_hyper(p, offload->co_count); + *p++ = cpu_to_be32(offload->co_stable_how); + xdr_encode_opaque_fixed(p, offload->co_verifier.data, NFS4_VERIFIER_SIZE); + + hdr->nops++; +} + /* * NFSv4.0 and NFSv4.1 XDR encode functions * @@ -505,6 +534,23 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, encode_cb_nops(&hdr); } +/* + * CB_OFFLOAD + */ +static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfsd4_callback *cb) +{ + const struct nfs4_cb_offload *args = cb->cb_op; + struct nfs4_cb_compound_hdr hdr = { + .ident = cb->cb_clp->cl_cb_ident, + .minorversion = cb->cb_minorversion, + }; + + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_offload4args(xdr, args, &hdr); + encode_cb_nops(&hdr); +} /* * NFSv4.0 and NFSv4.1 XDR decode functions @@ -552,6 +598,36 @@ out: } /* + * CB_OFFLOAD + */ +static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + struct nfsd4_callback *cb) +{ + struct nfs4_cb_compound_hdr hdr; + enum nfsstat4 nfserr; + int status; + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + goto out; + + if (cb != NULL) { + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status)) + goto out; + } + + status = decode_cb_op_status(xdr, OP_CB_OFFLOAD, &nfserr); + if (unlikely(status)) + goto out; + if (unlikely(nfserr != NFS4_OK)) + status = nfs_cb_stat_to_errno(nfserr); + +out: + return status; +} + +/* * RPC procedure tables */ #define PROC(proc, call, argtype, restype) \ @@ -568,6 +644,7 @@ out: static struct rpc_procinfo nfs4_cb_procedures[] = { PROC(CB_NULL, NULL, cb_null, cb_null), PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall), + PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), }; static struct rpc_version nfs_cb_version4 = { @@ -1017,6 +1094,11 @@ void nfsd4_init_callback(struct nfsd4_callback *cb) INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); } +void nfsd4_init_delayed_callback(struct nfsd4_callback *cb) +{ + INIT_DELAYED_WORK(&cb->cb_delay, nfsd4_do_callback_rpc); +} + void nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfsd4_callback *cb = &dp->dl_recall; @@ -1036,3 +1118,57 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) run_nfsd4_cb(&dp->dl_recall); } + +static void nfsd4_cb_offload_done(struct rpc_task *task, void *calldata) +{ + struct nfsd4_callback *cb = calldata; + struct nfs4_client *clp = cb->cb_clp; + struct rpc_clnt *current_rpc_client = clp->cl_cb_client; + + nfsd4_cb_done(task, calldata); + + if (current_rpc_client != task->tk_client) + return; + + if (cb->cb_done) + return; + + if (task->tk_status != 0) + nfsd4_mark_cb_down(clp, task->tk_status); + cb->cb_done = true; +} + +static void nfsd4_cb_offload_release(void *calldata) +{ + struct nfsd4_callback *cb = calldata; + struct nfs4_cb_offload *offload = container_of(cb, struct nfs4_cb_offload, co_callback); + + if (cb->cb_done) { + nfs4_free_offload_stateid(offload->co_stid); + kfree(offload); + } +} + +static const struct rpc_call_ops nfsd4_cb_offload_ops = { + .rpc_call_prepare = nfsd4_cb_prepare, + .rpc_call_done = nfsd4_cb_offload_done, + .rpc_release = nfsd4_cb_offload_release, +}; + +void nfsd4_cb_offload(struct nfs4_cb_offload *offload) +{ + struct nfsd4_callback *cb = &offload->co_callback; + + cb->cb_op = offload; + cb->cb_clp = offload->co_stid->sc_client; + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_OFFLOAD]; + cb->cb_msg.rpc_argp = cb; + cb->cb_msg.rpc_resp = cb; + + cb->cb_ops = &nfsd4_cb_offload_ops; + + INIT_LIST_HEAD(&cb->cb_per_client); + cb->cb_done = true; + + run_nfsd4_cb(cb); +} diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d4584ea..66a787f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -35,6 +35,7 @@ #include <linux/file.h> #include <linux/slab.h> +#include "state.h" #include "idmap.h" #include "cache.h" #include "xdr4.h" @@ -1062,29 +1063,57 @@ out: return status; } +static void +nfsd4_copy_async(struct work_struct *w) +{ + __be32 status; + struct nfs4_cb_offload *offload; + + offload = container_of(w, struct nfs4_cb_offload, co_work); + status = nfsd_copy_range(offload->co_src_file, offload->co_src_pos, + offload->co_dst_file, offload->co_dst_pos, + offload->co_count); + + if (status == nfs_ok) { + offload->co_stable_how = NFS_FILE_SYNC; + gen_boot_verifier(&offload->co_verifier, offload->co_net); + fput(offload->co_src_file); + fput(offload->co_dst_file); + } + nfsd4_cb_offload(offload); +} + static __be32 nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_copy *copy) { - __be32 status; struct file *src = NULL, *dst = NULL; + struct nfs4_cb_offload *offload; - status = nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst); - if (status) - return status; - - status = nfsd_copy_range(src, copy->cp_src_pos, - dst, copy->cp_dst_pos, - copy->cp_count); + if (nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst)) + return nfserr_jukebox; - if (status == nfs_ok) { - copy->cp_res.wr_stateid = NULL; - copy->cp_res.wr_bytes_written = copy->cp_count; - copy->cp_res.wr_stable_how = NFS_FILE_SYNC; - gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp)); - } + offload = kmalloc(sizeof(struct nfs4_cb_offload), GFP_KERNEL); + if (!offload) + return nfserr_jukebox; - return status; + offload->co_src_file = get_file(src); + offload->co_dst_file = get_file(dst); + offload->co_src_pos = copy->cp_src_pos; + offload->co_dst_pos = copy->cp_dst_pos; + offload->co_count = copy->cp_count; + offload->co_stid = nfs4_alloc_offload_stateid(cstate->session->se_client); + offload->co_net = SVC_NET(rqstp); + INIT_WORK(&offload->co_work, nfsd4_copy_async); + nfsd4_init_callback(&offload->co_callback); + memcpy(&offload->co_dst_fh, &cstate->current_fh, sizeof(struct knfsd_fh)); + + copy->cp_res.wr_stateid = &offload->co_stid->sc_stateid; + copy->cp_res.wr_bytes_written = 0; + copy->cp_res.wr_stable_how = NFS_UNSTABLE; + + schedule_work(&offload->co_work); + return nfs_ok; } /* This routine never returns NFS_OK! If there are no other errors, it diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c4e270e..582edb5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -364,6 +364,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); } +struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *clp) +{ + return nfs4_alloc_stid(clp, stateid_slab); +} + static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) { @@ -617,6 +622,12 @@ static void free_generic_stateid(struct nfs4_ol_stateid *stp) kmem_cache_free(stateid_slab, stp); } +void nfs4_free_offload_stateid(struct nfs4_stid *stid) +{ + remove_stid(stid); + kmem_cache_free(stateid_slab, stid); +} + static void release_lock_stateid(struct nfs4_ol_stateid *stp) { struct file *file; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 2478805..56682fb 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -70,6 +70,7 @@ struct nfsd4_callback { struct rpc_message cb_msg; const struct rpc_call_ops *cb_ops; struct work_struct cb_work; + struct delayed_work cb_delay; bool cb_done; }; @@ -101,6 +102,22 @@ struct nfs4_delegation { struct nfsd4_callback dl_recall; }; +struct nfs4_cb_offload { + struct file *co_src_file; + struct file *co_dst_file; + u64 co_src_pos; + u64 co_dst_pos; + u64 co_count; + u32 co_stable_how; + struct knfsd_fh co_dst_fh; + nfs4_verifier co_verifier; + struct net *co_net; + + struct nfs4_stid *co_stid; + struct work_struct co_work; + struct nfsd4_callback co_callback; +}; + /* client delegation callback info */ struct nfs4_cb_conn { /* SETCLIENTID info */ @@ -468,10 +485,12 @@ extern void nfs4_free_openowner(struct nfs4_openowner *); extern void nfs4_free_lockowner(struct nfs4_lockowner *); extern int set_callback_cred(void); extern void nfsd4_init_callback(struct nfsd4_callback *); +extern void nfsd4_init_delayed_callback(struct nfsd4_callback *); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_cb_recall(struct nfs4_delegation *dp); +extern void nfsd4_cb_offload(struct nfs4_cb_offload *); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); @@ -480,6 +499,8 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); extern void put_client_renew(struct nfs4_client *clp); +extern struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *); +extern void nfs4_free_offload_stateid(struct nfs4_stid *); /* nfs4recover operations */ extern int nfsd4_client_tracking_init(struct net *net); diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index c5c55df..75b0ef7 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -21,3 +21,12 @@ #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) + +#define NFS4_enc_cb_offload_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + enc_stateid_sz + 2 + 1 + \ + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) + +#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz)