Message ID | 1303320392-21068-1-git-send-email-bhalevy@panasas.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, 2011-04-20 at 20:26 +0300, Benny Halevy wrote: > Signed-off-by: Alexandros Batsakis <batsakis@netapp.com> > Signed-off-by: Andy Adamson <andros@citi.umich.edu> > Signed-off-by: Andy Adamson <andros@netapp.com> > Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> > Signed-off-by: Fred Isaman <iisaman@citi.umich.edu> > Signed-off-by: Fred Isaman <iisaman@netapp.com> > Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> > Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn> > Signed-off-by: Benny Halevy <bhalevy@panasas.com> > --- > fs/nfs/inode.c | 3 +- > fs/nfs/nfs4proc.c | 97 ++++++++++++++++++++++++++++++++++++++ > fs/nfs/nfs4xdr.c | 118 ++++++++++++++++++++++++++++++++++++++++++++-- > fs/nfs/pnfs.c | 96 ++++++++++++++++++++++++++++++++++++++ > fs/nfs/pnfs.h | 22 +++++++++ > include/linux/nfs4.h | 1 + > include/linux/nfs_xdr.h | 23 +++++++++ > 7 files changed, 353 insertions(+), 7 deletions(-) > > diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c > index 57bb31a..73a2529 100644 > --- a/fs/nfs/inode.c > +++ b/fs/nfs/inode.c > @@ -1424,9 +1424,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) > */ > void nfs4_evict_inode(struct inode *inode) > { > - pnfs_destroy_layout(NFS_I(inode)); > + pnfs_return_layout(inode, NULL, true); Why does this want to come before the call to truncate_inode_pages()? Is there any reason not to put pnfs_return_layout() and pnfs_destroy_layout into a single helper here? > truncate_inode_pages(&inode->i_data, 0); > end_writeback(inode); > + pnfs_destroy_layout(NFS_I(inode)); > /* If we are holding a delegation, return it! */ > nfs_inode_return_delegation_noreclaim(inode); > /* First call standard NFS clear_inode() code */ > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c > index 9bf41ea..b03defb 100644 > --- a/fs/nfs/nfs4proc.c > +++ b/fs/nfs/nfs4proc.c > @@ -5662,6 +5662,103 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) > return status; > } > > +static void > +nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) > +{ > + struct nfs4_layoutreturn *lrp = calldata; > + > + dprintk("--> %s\n", __func__); > + if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, > + &lrp->res.seq_res, 0, task)) > + return; > + rpc_call_start(task); > +} > + > +static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) > +{ > + struct nfs4_layoutreturn *lrp = calldata; > + struct nfs_server *server; > + > + dprintk("--> %s\n", __func__); > + > + if (!nfs4_sequence_done(task, &lrp->res.seq_res)) > + return; > + > + if (lrp->args.return_type == RETURN_FILE) > + server = NFS_SERVER(lrp->args.inode); > + else > + server = NULL; > + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { > + nfs_restart_rpc(task, lrp->clp); > + return; > + } > + if ((task->tk_status == 0) && (lrp->args.return_type == RETURN_FILE)) { > + struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; > + > + spin_lock(&lo->plh_inode->i_lock); > + if (lrp->res.lrs_present) > + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); > + else > + BUG_ON(!list_empty(&lo->plh_segs)); > + spin_unlock(&lo->plh_inode->i_lock); > + } > + dprintk("<-- %s\n", __func__); > +} > + > +static void nfs4_layoutreturn_release(void *calldata) > +{ > + struct nfs4_layoutreturn *lrp = calldata; > + > + dprintk("--> %s return_type %d\n", __func__, lrp->args.return_type); > + if (lrp->args.return_type == RETURN_FILE) { > + struct inode *ino = lrp->args.inode; > + struct pnfs_layout_hdr *lo = NFS_I(ino)->layout; > + > + put_layout_hdr(lo); > + } > + kfree(calldata); > + dprintk("<-- %s\n", __func__); > +} > + > +static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { > + .rpc_call_prepare = nfs4_layoutreturn_prepare, > + .rpc_call_done = nfs4_layoutreturn_done, > + .rpc_release = nfs4_layoutreturn_release, > +}; > + > +int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync) Why the 'issync' parameter? > +{ > + struct rpc_task *task; > + struct rpc_message msg = { > + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], > + .rpc_argp = &lrp->args, > + .rpc_resp = &lrp->res, > + }; > + struct rpc_task_setup task_setup_data = { > + .rpc_client = lrp->clp->cl_rpcclient, > + .rpc_message = &msg, > + .callback_ops = &nfs4_layoutreturn_call_ops, > + .callback_data = lrp, > + .flags = RPC_TASK_ASYNC, > + }; > + int status = 0; > + > + dprintk("--> %s\n", __func__); > + task = rpc_run_task(&task_setup_data); > + if (IS_ERR(task)) > + return PTR_ERR(task); > + if (!issync) > + goto out; > + status = nfs4_wait_for_completion_rpc_task(task); > + if (status != 0) > + goto out; > + status = task->tk_status; > +out: > + dprintk("<-- %s\n", __func__); > + rpc_put_task(task); > + return status; > +} > + > static int > _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) > { > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c > index dddfb57..53ea3e5 100644 > --- a/fs/nfs/nfs4xdr.c > +++ b/fs/nfs/nfs4xdr.c > @@ -338,7 +338,12 @@ static int nfs4_stat_to_errno(int); > 1 /* layoutupdate4 layout type */ + \ > 1 /* NULL filelayout layoutupdate4 payload */) > #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) > - > +#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ > + encode_stateid_maxsz + \ > + 1 /* FIXME: opaque lrf_body always empty at > + *the moment */) > +#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ > + 1 + decode_stateid_maxsz) > #else /* CONFIG_NFS_V4_1 */ > #define encode_sequence_maxsz 0 > #define decode_sequence_maxsz 0 > @@ -760,7 +765,14 @@ static int nfs4_stat_to_errno(int); > decode_putfh_maxsz + \ > decode_layoutcommit_maxsz + \ > decode_getattr_maxsz) > - > +#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \ > + encode_sequence_maxsz + \ > + encode_putfh_maxsz + \ > + encode_layoutreturn_maxsz) > +#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \ > + decode_sequence_maxsz + \ > + decode_putfh_maxsz + \ > + decode_layoutreturn_maxsz) > > const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + > compound_encode_hdr_maxsz + > @@ -1890,6 +1902,37 @@ encode_layoutcommit(struct xdr_stream *xdr, > hdr->replen += decode_layoutcommit_maxsz; > return 0; > } > + > +static void > +encode_layoutreturn(struct xdr_stream *xdr, > + const struct nfs4_layoutreturn_args *args, > + struct compound_hdr *hdr) > +{ > + nfs4_stateid stateid; > + __be32 *p; > + > + p = reserve_space(xdr, 20); > + *p++ = cpu_to_be32(OP_LAYOUTRETURN); > + *p++ = cpu_to_be32(args->reclaim); > + *p++ = cpu_to_be32(args->layout_type); > + *p++ = cpu_to_be32(args->range.iomode); > + *p = cpu_to_be32(args->return_type); > + if (args->return_type == RETURN_FILE) { > + p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); > + p = xdr_encode_hyper(p, args->range.offset); > + p = xdr_encode_hyper(p, args->range.length); > + spin_lock(&args->inode->i_lock); > + memcpy(stateid.data, NFS_I(args->inode)->layout->plh_stateid.data, > + NFS4_STATEID_SIZE); > + spin_unlock(&args->inode->i_lock); > + p = xdr_encode_opaque_fixed(p, &stateid.data, > + NFS4_STATEID_SIZE); > + p = reserve_space(xdr, 4); > + *p = cpu_to_be32(0); > + } > + hdr->nops++; > + hdr->replen += decode_layoutreturn_maxsz; > +} > #endif /* CONFIG_NFS_V4_1 */ > > /* > @@ -2707,9 +2750,9 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, > /* > * Encode LAYOUTCOMMIT request > */ > -static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, > - struct xdr_stream *xdr, > - struct nfs4_layoutcommit_args *args) > +static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, > + struct xdr_stream *xdr, > + struct nfs4_layoutcommit_args *args) > { > struct compound_hdr hdr = { > .minorversion = nfs4_xdr_minorversion(&args->seq_args), > @@ -2721,7 +2764,24 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, > encode_layoutcommit(xdr, args, &hdr); > encode_getfattr(xdr, args->bitmask, &hdr); > encode_nops(&hdr); > - return 0; > +} > + > +/* > + * Encode LAYOUTRETURN request > + */ > +static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, > + struct xdr_stream *xdr, > + struct nfs4_layoutreturn_args *args) > +{ > + struct compound_hdr hdr = { > + .minorversion = nfs4_xdr_minorversion(&args->seq_args), > + }; > + > + encode_compound_hdr(xdr, req, &hdr); > + encode_sequence(xdr, &args->seq_args, &hdr); > + encode_putfh(xdr, NFS_FH(args->inode), &hdr); > + encode_layoutreturn(xdr, args, &hdr); > + encode_nops(&hdr); > } > #endif /* CONFIG_NFS_V4_1 */ > > @@ -5202,6 +5262,27 @@ out_overflow: > return -EIO; > } > > +static int decode_layoutreturn(struct xdr_stream *xdr, > + struct nfs4_layoutreturn_res *res) > +{ > + __be32 *p; > + int status; > + > + status = decode_op_hdr(xdr, OP_LAYOUTRETURN); > + if (status) > + return status; > + p = xdr_inline_decode(xdr, 4); > + if (unlikely(!p)) > + goto out_overflow; > + res->lrs_present = be32_to_cpup(p); > + if (res->lrs_present) > + status = decode_stateid(xdr, &res->stateid); > + return status; > +out_overflow: > + print_overflow_msg(__func__, xdr); > + return -EIO; > +} > + > static int decode_layoutcommit(struct xdr_stream *xdr, > struct rpc_rqst *req, > struct nfs4_layoutcommit_res *res) > @@ -6319,6 +6400,30 @@ out: > } > > /* > + * Decode LAYOUTRETURN response > + */ > +static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, > + struct xdr_stream *xdr, > + struct nfs4_layoutreturn_res *res) > +{ > + struct compound_hdr hdr; > + int status; > + > + status = decode_compound_hdr(xdr, &hdr); > + if (status) > + goto out; > + status = decode_sequence(xdr, &res->seq_res, rqstp); > + if (status) > + goto out; > + status = decode_putfh(xdr); > + if (status) > + goto out; > + status = decode_layoutreturn(xdr, res); > +out: > + return status; > +} > + > +/* > * Decode LAYOUTCOMMIT response > */ > static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, > @@ -6544,6 +6649,7 @@ struct rpc_procinfo nfs4_procedures[] = { > PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), > PROC(LAYOUTGET, enc_layoutget, dec_layoutget), > PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), > + PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), > #endif /* CONFIG_NFS_V4_1 */ > }; > > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c > index d9ab972..89e7725 100644 > --- a/fs/nfs/pnfs.c > +++ b/fs/nfs/pnfs.c > @@ -321,6 +321,36 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, > return invalid - removed; > } > > +/* Returns false if there was nothing to do, true otherwise */ > +static bool > +pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, > + struct pnfs_layout_range *range) > +{ > + struct pnfs_layout_segment *lseg, *next; > + bool rv = false; > + > + dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n", > + __func__, lo, range->offset, range->length, range->iomode); > + assert_spin_locked(&lo->plh_inode->i_lock); Not needed in the case of nfs4_evict_inode. > + if (list_empty(&lo->plh_segs)) { > + if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) > + put_layout_hdr_locked(lo); > + return 0; > + } > + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) > + if (should_free_lseg(lseg->pls_range.iomode, range->iomode)) { > + dprintk("%s: freeing lseg %p iomode %d " > + "offset %llu length %llu\n", __func__, > + lseg, lseg->pls_range.iomode, > + lseg->pls_range.offset, > + lseg->pls_range.length); > + mark_lseg_invalid(lseg, tmp_list); > + rv = true; > + } > + dprintk("%s:Return %d\n", __func__, rv); > + return rv; > +} > + > /* note free_me must contain lsegs from a single layout_hdr */ > void > pnfs_free_lseg_list(struct list_head *free_me) > @@ -539,6 +569,72 @@ out_err_free: > return NULL; > } > > +static int > +return_layout(struct inode *ino, struct pnfs_layout_range *range, bool wait) > +{ > + struct nfs4_layoutreturn *lrp; > + struct nfs_server *server = NFS_SERVER(ino); > + int status = -ENOMEM; > + > + dprintk("--> %s\n", __func__); > + > + lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); > + if (lrp == NULL) { > + put_layout_hdr(NFS_I(ino)->layout); > + goto out; > + } > + lrp->args.reclaim = 0; > + lrp->args.layout_type = server->pnfs_curr_ld->id; > + lrp->args.return_type = RETURN_FILE; > + lrp->args.range = *range; > + lrp->args.inode = ino; > + lrp->clp = server->nfs_client; > + > + status = nfs4_proc_layoutreturn(lrp, wait); > +out: > + dprintk("<-- %s status: %d\n", __func__, status); > + return status; > +} > + > +/* Initiates a LAYOUTRETURN(FILE) */ > +int > +_pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range, bool wait) What are the 'range' and 'wait' parameters for? We don't have any users other than nfs4_evict_inode. > +{ > + struct pnfs_layout_hdr *lo = NULL; > + struct nfs_inode *nfsi = NFS_I(ino); > + struct pnfs_layout_range arg; > + LIST_HEAD(tmp_list); > + int status = 0; > + > + dprintk("--> %s\n", __func__); > + > + arg.iomode = range ? range->iomode : IOMODE_ANY; > + arg.offset = 0; > + arg.length = NFS4_MAX_UINT64; > + > + spin_lock(&ino->i_lock); > + lo = nfsi->layout; > + if (!lo || !pnfs_clear_lseg_list(lo, &tmp_list, &arg)) { > + spin_unlock(&ino->i_lock); > + dprintk("%s: no layout segments to return\n", __func__); > + goto out; > + } > + /* Reference matched in nfs4_layoutreturn_release */ > + get_layout_hdr(lo); > + spin_unlock(&ino->i_lock); > + pnfs_free_lseg_list(&tmp_list); > + > + /* Return layout even if layoutcommit fails */ > + status = pnfs_layoutcommit_inode(ino, wait); Why is this needed? Again, by the time we get to nfs4_evict_inode, the inode is guaranteed to be clean. > + if (status) > + dprintk("%s: layoutcommit failed, status=%d. Returning layout anyway\n", > + __func__, status); > + status = return_layout(ino, &arg, wait); > +out: > + dprintk("<-- %s status: %d\n", __func__, status); > + return status; > +} > + > bool pnfs_roc(struct inode *ino) > { > struct pnfs_layout_hdr *lo; > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h > index 4cb0a0d..a308f3c 100644 > --- a/fs/nfs/pnfs.h > +++ b/fs/nfs/pnfs.h > @@ -123,6 +123,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); > extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, > struct pnfs_device *dev); > extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); > +extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool wait); > > /* pnfs.c */ > void get_layout_hdr(struct pnfs_layout_hdr *lo); > @@ -158,6 +159,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); > bool pnfs_roc_drain(struct inode *ino, u32 *barrier); > void pnfs_set_layoutcommit(struct nfs_write_data *wdata); > int pnfs_layoutcommit_inode(struct inode *inode, bool sync); > +int _pnfs_return_layout(struct inode *, struct pnfs_layout_range *, bool wait); > > static inline int lo_fail_bit(u32 iomode) > { > @@ -226,6 +228,19 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req) > put_lseg(req->wb_commit_lseg); > } > > +static inline int pnfs_return_layout(struct inode *ino, > + struct pnfs_layout_range *range, > + bool wait) > +{ > + struct nfs_inode *nfsi = NFS_I(ino); > + struct nfs_server *nfss = NFS_SERVER(ino); > + > + if (pnfs_enabled_sb(nfss) && nfsi->layout) > + return _pnfs_return_layout(ino, range, wait); > + > + return 0; > +} > + > #else /* CONFIG_NFS_V4_1 */ > > static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) > @@ -267,6 +282,13 @@ pnfs_try_to_write_data(struct nfs_write_data *data, > return PNFS_NOT_ATTEMPTED; > } > > +static inline int pnfs_return_layout(struct inode *ino, > + struct pnfs_layout_range *range, > + bool wait) > +{ > + return 0; > +} > + > static inline bool > pnfs_roc(struct inode *ino) > { > diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h > index 178fafe..9376eaf 100644 > --- a/include/linux/nfs4.h > +++ b/include/linux/nfs4.h > @@ -562,6 +562,7 @@ enum { > NFSPROC4_CLNT_LAYOUTGET, > NFSPROC4_CLNT_GETDEVICEINFO, > NFSPROC4_CLNT_LAYOUTCOMMIT, > + NFSPROC4_CLNT_LAYOUTRETURN, > }; > > /* nfs41 types */ > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h > index 78b101e..455ddfb 100644 > --- a/include/linux/nfs_xdr.h > +++ b/include/linux/nfs_xdr.h > @@ -266,6 +266,29 @@ struct nfs4_layoutcommit_data { > struct nfs4_layoutcommit_res res; > }; > > +struct nfs4_layoutreturn_args { > + __u32 reclaim; > + __u32 layout_type; > + __u32 return_type; Why do we need a 'return_type'? As far as I can see, this will always be RETURN_FILE. > + struct pnfs_layout_range range; > + struct inode *inode; > + struct nfs4_sequence_args seq_args; > +}; > + > +struct nfs4_layoutreturn_res { > + struct nfs4_sequence_res seq_res; > + u32 lrs_present; > + nfs4_stateid stateid; > +}; > + > +struct nfs4_layoutreturn { > + struct nfs4_layoutreturn_args args; > + struct nfs4_layoutreturn_res res; > + struct rpc_cred *cred; > + struct nfs_client *clp; > + int rpc_status; > +}; > + > /* > * Arguments to the open call. > */
On 2011-04-20 22:53, Trond Myklebust wrote: > On Wed, 2011-04-20 at 20:26 +0300, Benny Halevy wrote: >> @@ -1424,9 +1424,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) >> */ >> void nfs4_evict_inode(struct inode *inode) >> { >> - pnfs_destroy_layout(NFS_I(inode)); >> + pnfs_return_layout(inode, NULL, true); > > Why does this want to come before the call to truncate_inode_pages()? Actually, I don't see any good reason. :-/ > Is there any reason not to put pnfs_return_layout() and > pnfs_destroy_layout into a single helper here? > Looks like an overkill to me for this one call site. >> truncate_inode_pages(&inode->i_data, 0); >> end_writeback(inode); >> + pnfs_destroy_layout(NFS_I(inode)); >> /* If we are holding a delegation, return it! */ >> nfs_inode_return_delegation_noreclaim(inode); >> /* First call standard NFS clear_inode() code */ >> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c >> index 9bf41ea..b03defb 100644 >> --- a/fs/nfs/nfs4proc.c >> +++ b/fs/nfs/nfs4proc.c >> @@ -5662,6 +5662,103 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) >> return status; >> } >> >> +static void >> +nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) >> +{ >> + struct nfs4_layoutreturn *lrp = calldata; >> + >> + dprintk("--> %s\n", __func__); >> + if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, >> + &lrp->res.seq_res, 0, task)) >> + return; >> + rpc_call_start(task); >> +} >> + >> +static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) >> +{ >> + struct nfs4_layoutreturn *lrp = calldata; >> + struct nfs_server *server; >> + >> + dprintk("--> %s\n", __func__); >> + >> + if (!nfs4_sequence_done(task, &lrp->res.seq_res)) >> + return; >> + >> + if (lrp->args.return_type == RETURN_FILE) >> + server = NFS_SERVER(lrp->args.inode); >> + else >> + server = NULL; >> + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { >> + nfs_restart_rpc(task, lrp->clp); >> + return; >> + } >> + if ((task->tk_status == 0) && (lrp->args.return_type == RETURN_FILE)) { >> + struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; >> + >> + spin_lock(&lo->plh_inode->i_lock); >> + if (lrp->res.lrs_present) >> + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); >> + else >> + BUG_ON(!list_empty(&lo->plh_segs)); >> + spin_unlock(&lo->plh_inode->i_lock); >> + } >> + dprintk("<-- %s\n", __func__); >> +} >> + >> +static void nfs4_layoutreturn_release(void *calldata) >> +{ >> + struct nfs4_layoutreturn *lrp = calldata; >> + >> + dprintk("--> %s return_type %d\n", __func__, lrp->args.return_type); >> + if (lrp->args.return_type == RETURN_FILE) { >> + struct inode *ino = lrp->args.inode; >> + struct pnfs_layout_hdr *lo = NFS_I(ino)->layout; >> + >> + put_layout_hdr(lo); >> + } >> + kfree(calldata); >> + dprintk("<-- %s\n", __func__); >> +} >> + >> +static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { >> + .rpc_call_prepare = nfs4_layoutreturn_prepare, >> + .rpc_call_done = nfs4_layoutreturn_done, >> + .rpc_release = nfs4_layoutreturn_release, >> +}; >> + >> +int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync) > > Why the 'issync' parameter? > Hmm, you're right. Currently all call sites use it as sync. >> +{ >> + struct rpc_task *task; >> + struct rpc_message msg = { >> + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], >> + .rpc_argp = &lrp->args, >> + .rpc_resp = &lrp->res, >> + }; >> + struct rpc_task_setup task_setup_data = { >> + .rpc_client = lrp->clp->cl_rpcclient, >> + .rpc_message = &msg, >> + .callback_ops = &nfs4_layoutreturn_call_ops, >> + .callback_data = lrp, >> + .flags = RPC_TASK_ASYNC, >> + }; >> + int status = 0; >> + >> + dprintk("--> %s\n", __func__); >> + task = rpc_run_task(&task_setup_data); >> + if (IS_ERR(task)) >> + return PTR_ERR(task); >> + if (!issync) >> + goto out; >> + status = nfs4_wait_for_completion_rpc_task(task); >> + if (status != 0) >> + goto out; >> + status = task->tk_status; >> +out: >> + dprintk("<-- %s\n", __func__); >> + rpc_put_task(task); >> + return status; >> +} >> + >> static int >> _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) >> { >> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c >> index dddfb57..53ea3e5 100644 >> --- a/fs/nfs/nfs4xdr.c >> +++ b/fs/nfs/nfs4xdr.c >> @@ -338,7 +338,12 @@ static int nfs4_stat_to_errno(int); >> 1 /* layoutupdate4 layout type */ + \ >> 1 /* NULL filelayout layoutupdate4 payload */) >> #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) >> - >> +#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ >> + encode_stateid_maxsz + \ >> + 1 /* FIXME: opaque lrf_body always empty at >> + *the moment */) >> +#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ >> + 1 + decode_stateid_maxsz) >> #else /* CONFIG_NFS_V4_1 */ >> #define encode_sequence_maxsz 0 >> #define decode_sequence_maxsz 0 >> @@ -760,7 +765,14 @@ static int nfs4_stat_to_errno(int); >> decode_putfh_maxsz + \ >> decode_layoutcommit_maxsz + \ >> decode_getattr_maxsz) >> - >> +#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \ >> + encode_sequence_maxsz + \ >> + encode_putfh_maxsz + \ >> + encode_layoutreturn_maxsz) >> +#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \ >> + decode_sequence_maxsz + \ >> + decode_putfh_maxsz + \ >> + decode_layoutreturn_maxsz) >> >> const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + >> compound_encode_hdr_maxsz + >> @@ -1890,6 +1902,37 @@ encode_layoutcommit(struct xdr_stream *xdr, >> hdr->replen += decode_layoutcommit_maxsz; >> return 0; >> } >> + >> +static void >> +encode_layoutreturn(struct xdr_stream *xdr, >> + const struct nfs4_layoutreturn_args *args, >> + struct compound_hdr *hdr) >> +{ >> + nfs4_stateid stateid; >> + __be32 *p; >> + >> + p = reserve_space(xdr, 20); >> + *p++ = cpu_to_be32(OP_LAYOUTRETURN); >> + *p++ = cpu_to_be32(args->reclaim); >> + *p++ = cpu_to_be32(args->layout_type); >> + *p++ = cpu_to_be32(args->range.iomode); >> + *p = cpu_to_be32(args->return_type); >> + if (args->return_type == RETURN_FILE) { >> + p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); >> + p = xdr_encode_hyper(p, args->range.offset); >> + p = xdr_encode_hyper(p, args->range.length); >> + spin_lock(&args->inode->i_lock); >> + memcpy(stateid.data, NFS_I(args->inode)->layout->plh_stateid.data, >> + NFS4_STATEID_SIZE); >> + spin_unlock(&args->inode->i_lock); >> + p = xdr_encode_opaque_fixed(p, &stateid.data, >> + NFS4_STATEID_SIZE); >> + p = reserve_space(xdr, 4); >> + *p = cpu_to_be32(0); >> + } >> + hdr->nops++; >> + hdr->replen += decode_layoutreturn_maxsz; >> +} >> #endif /* CONFIG_NFS_V4_1 */ >> >> /* >> @@ -2707,9 +2750,9 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, >> /* >> * Encode LAYOUTCOMMIT request >> */ >> -static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, >> - struct xdr_stream *xdr, >> - struct nfs4_layoutcommit_args *args) >> +static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, >> + struct xdr_stream *xdr, >> + struct nfs4_layoutcommit_args *args) >> { >> struct compound_hdr hdr = { >> .minorversion = nfs4_xdr_minorversion(&args->seq_args), >> @@ -2721,7 +2764,24 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, >> encode_layoutcommit(xdr, args, &hdr); >> encode_getfattr(xdr, args->bitmask, &hdr); >> encode_nops(&hdr); >> - return 0; >> +} >> + >> +/* >> + * Encode LAYOUTRETURN request >> + */ >> +static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, >> + struct xdr_stream *xdr, >> + struct nfs4_layoutreturn_args *args) >> +{ >> + struct compound_hdr hdr = { >> + .minorversion = nfs4_xdr_minorversion(&args->seq_args), >> + }; >> + >> + encode_compound_hdr(xdr, req, &hdr); >> + encode_sequence(xdr, &args->seq_args, &hdr); >> + encode_putfh(xdr, NFS_FH(args->inode), &hdr); >> + encode_layoutreturn(xdr, args, &hdr); >> + encode_nops(&hdr); >> } >> #endif /* CONFIG_NFS_V4_1 */ >> >> @@ -5202,6 +5262,27 @@ out_overflow: >> return -EIO; >> } >> >> +static int decode_layoutreturn(struct xdr_stream *xdr, >> + struct nfs4_layoutreturn_res *res) >> +{ >> + __be32 *p; >> + int status; >> + >> + status = decode_op_hdr(xdr, OP_LAYOUTRETURN); >> + if (status) >> + return status; >> + p = xdr_inline_decode(xdr, 4); >> + if (unlikely(!p)) >> + goto out_overflow; >> + res->lrs_present = be32_to_cpup(p); >> + if (res->lrs_present) >> + status = decode_stateid(xdr, &res->stateid); >> + return status; >> +out_overflow: >> + print_overflow_msg(__func__, xdr); >> + return -EIO; >> +} >> + >> static int decode_layoutcommit(struct xdr_stream *xdr, >> struct rpc_rqst *req, >> struct nfs4_layoutcommit_res *res) >> @@ -6319,6 +6400,30 @@ out: >> } >> >> /* >> + * Decode LAYOUTRETURN response >> + */ >> +static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, >> + struct xdr_stream *xdr, >> + struct nfs4_layoutreturn_res *res) >> +{ >> + struct compound_hdr hdr; >> + int status; >> + >> + status = decode_compound_hdr(xdr, &hdr); >> + if (status) >> + goto out; >> + status = decode_sequence(xdr, &res->seq_res, rqstp); >> + if (status) >> + goto out; >> + status = decode_putfh(xdr); >> + if (status) >> + goto out; >> + status = decode_layoutreturn(xdr, res); >> +out: >> + return status; >> +} >> + >> +/* >> * Decode LAYOUTCOMMIT response >> */ >> static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, >> @@ -6544,6 +6649,7 @@ struct rpc_procinfo nfs4_procedures[] = { >> PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), >> PROC(LAYOUTGET, enc_layoutget, dec_layoutget), >> PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), >> + PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), >> #endif /* CONFIG_NFS_V4_1 */ >> }; >> >> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c >> index d9ab972..89e7725 100644 >> --- a/fs/nfs/pnfs.c >> +++ b/fs/nfs/pnfs.c >> @@ -321,6 +321,36 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, >> return invalid - removed; >> } >> >> +/* Returns false if there was nothing to do, true otherwise */ >> +static bool >> +pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, >> + struct pnfs_layout_range *range) >> +{ >> + struct pnfs_layout_segment *lseg, *next; >> + bool rv = false; >> + >> + dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n", >> + __func__, lo, range->offset, range->length, range->iomode); >> + assert_spin_locked(&lo->plh_inode->i_lock); > > Not needed in the case of nfs4_evict_inode. > True. I can remove the assert but the lock better be taken if called while the inode is alive. >> + if (list_empty(&lo->plh_segs)) { >> + if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) >> + put_layout_hdr_locked(lo); >> + return 0; >> + } >> + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) >> + if (should_free_lseg(lseg->pls_range.iomode, range->iomode)) { >> + dprintk("%s: freeing lseg %p iomode %d " >> + "offset %llu length %llu\n", __func__, >> + lseg, lseg->pls_range.iomode, >> + lseg->pls_range.offset, >> + lseg->pls_range.length); >> + mark_lseg_invalid(lseg, tmp_list); >> + rv = true; >> + } >> + dprintk("%s:Return %d\n", __func__, rv); >> + return rv; >> +} >> + >> /* note free_me must contain lsegs from a single layout_hdr */ >> void >> pnfs_free_lseg_list(struct list_head *free_me) >> @@ -539,6 +569,72 @@ out_err_free: >> return NULL; >> } >> >> +static int >> +return_layout(struct inode *ino, struct pnfs_layout_range *range, bool wait) >> +{ >> + struct nfs4_layoutreturn *lrp; >> + struct nfs_server *server = NFS_SERVER(ino); >> + int status = -ENOMEM; >> + >> + dprintk("--> %s\n", __func__); >> + >> + lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); >> + if (lrp == NULL) { >> + put_layout_hdr(NFS_I(ino)->layout); >> + goto out; >> + } >> + lrp->args.reclaim = 0; >> + lrp->args.layout_type = server->pnfs_curr_ld->id; >> + lrp->args.return_type = RETURN_FILE; >> + lrp->args.range = *range; >> + lrp->args.inode = ino; >> + lrp->clp = server->nfs_client; >> + >> + status = nfs4_proc_layoutreturn(lrp, wait); >> +out: >> + dprintk("<-- %s status: %d\n", __func__, status); >> + return status; >> +} >> + >> +/* Initiates a LAYOUTRETURN(FILE) */ >> +int >> +_pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range, bool wait) > > What are the 'range' and 'wait' parameters for? We don't have any users > other than nfs4_evict_inode. > OK, I'll introduce these with their usage. >> +{ >> + struct pnfs_layout_hdr *lo = NULL; >> + struct nfs_inode *nfsi = NFS_I(ino); >> + struct pnfs_layout_range arg; >> + LIST_HEAD(tmp_list); >> + int status = 0; >> + >> + dprintk("--> %s\n", __func__); >> + >> + arg.iomode = range ? range->iomode : IOMODE_ANY; >> + arg.offset = 0; >> + arg.length = NFS4_MAX_UINT64; >> + >> + spin_lock(&ino->i_lock); >> + lo = nfsi->layout; >> + if (!lo || !pnfs_clear_lseg_list(lo, &tmp_list, &arg)) { >> + spin_unlock(&ino->i_lock); >> + dprintk("%s: no layout segments to return\n", __func__); >> + goto out; >> + } >> + /* Reference matched in nfs4_layoutreturn_release */ >> + get_layout_hdr(lo); >> + spin_unlock(&ino->i_lock); >> + pnfs_free_lseg_list(&tmp_list); >> + >> + /* Return layout even if layoutcommit fails */ >> + status = pnfs_layoutcommit_inode(ino, wait); > > Why is this needed? Again, by the time we get to nfs4_evict_inode, the > inode is guaranteed to be clean. > OK, let me just add a WARN_ON if we get here and layout commit is required. Does that work for you? >> + if (status) >> + dprintk("%s: layoutcommit failed, status=%d. Returning layout anyway\n", >> + __func__, status); >> + status = return_layout(ino, &arg, wait); >> +out: >> + dprintk("<-- %s status: %d\n", __func__, status); >> + return status; >> +} >> + >> bool pnfs_roc(struct inode *ino) >> { >> struct pnfs_layout_hdr *lo; >> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h >> index 4cb0a0d..a308f3c 100644 >> --- a/fs/nfs/pnfs.h >> +++ b/fs/nfs/pnfs.h >> @@ -123,6 +123,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); >> extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, >> struct pnfs_device *dev); >> extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); >> +extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool wait); >> >> /* pnfs.c */ >> void get_layout_hdr(struct pnfs_layout_hdr *lo); >> @@ -158,6 +159,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); >> bool pnfs_roc_drain(struct inode *ino, u32 *barrier); >> void pnfs_set_layoutcommit(struct nfs_write_data *wdata); >> int pnfs_layoutcommit_inode(struct inode *inode, bool sync); >> +int _pnfs_return_layout(struct inode *, struct pnfs_layout_range *, bool wait); >> >> static inline int lo_fail_bit(u32 iomode) >> { >> @@ -226,6 +228,19 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req) >> put_lseg(req->wb_commit_lseg); >> } >> >> +static inline int pnfs_return_layout(struct inode *ino, >> + struct pnfs_layout_range *range, >> + bool wait) >> +{ >> + struct nfs_inode *nfsi = NFS_I(ino); >> + struct nfs_server *nfss = NFS_SERVER(ino); >> + >> + if (pnfs_enabled_sb(nfss) && nfsi->layout) >> + return _pnfs_return_layout(ino, range, wait); >> + >> + return 0; >> +} >> + >> #else /* CONFIG_NFS_V4_1 */ >> >> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) >> @@ -267,6 +282,13 @@ pnfs_try_to_write_data(struct nfs_write_data *data, >> return PNFS_NOT_ATTEMPTED; >> } >> >> +static inline int pnfs_return_layout(struct inode *ino, >> + struct pnfs_layout_range *range, >> + bool wait) >> +{ >> + return 0; >> +} >> + >> static inline bool >> pnfs_roc(struct inode *ino) >> { >> diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h >> index 178fafe..9376eaf 100644 >> --- a/include/linux/nfs4.h >> +++ b/include/linux/nfs4.h >> @@ -562,6 +562,7 @@ enum { >> NFSPROC4_CLNT_LAYOUTGET, >> NFSPROC4_CLNT_GETDEVICEINFO, >> NFSPROC4_CLNT_LAYOUTCOMMIT, >> + NFSPROC4_CLNT_LAYOUTRETURN, >> }; >> >> /* nfs41 types */ >> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h >> index 78b101e..455ddfb 100644 >> --- a/include/linux/nfs_xdr.h >> +++ b/include/linux/nfs_xdr.h >> @@ -266,6 +266,29 @@ struct nfs4_layoutcommit_data { >> struct nfs4_layoutcommit_res res; >> }; >> >> +struct nfs4_layoutreturn_args { >> + __u32 reclaim; >> + __u32 layout_type; >> + __u32 return_type; > > Why do we need a 'return_type'? As far as I can see, this will always be > RETURN_FILE. > Yeah, we can remove this for now. Benny >> + struct pnfs_layout_range range; >> + struct inode *inode; >> + struct nfs4_sequence_args seq_args; >> +}; >> + >> +struct nfs4_layoutreturn_res { >> + struct nfs4_sequence_res seq_res; >> + u32 lrs_present; >> + nfs4_stateid stateid; >> +}; >> + >> +struct nfs4_layoutreturn { >> + struct nfs4_layoutreturn_args args; >> + struct nfs4_layoutreturn_res res; >> + struct rpc_cred *cred; >> + struct nfs_client *clp; >> + int rpc_status; >> +}; >> + >> /* >> * Arguments to the open call. >> */ > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 57bb31a..73a2529 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1424,9 +1424,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ void nfs4_evict_inode(struct inode *inode) { - pnfs_destroy_layout(NFS_I(inode)); + pnfs_return_layout(inode, NULL, true); truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); + pnfs_destroy_layout(NFS_I(inode)); /* If we are holding a delegation, return it! */ nfs_inode_return_delegation_noreclaim(inode); /* First call standard NFS clear_inode() code */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9bf41ea..b03defb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5662,6 +5662,103 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) return status; } +static void +nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_layoutreturn *lrp = calldata; + + dprintk("--> %s\n", __func__); + if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, + &lrp->res.seq_res, 0, task)) + return; + rpc_call_start(task); +} + +static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_layoutreturn *lrp = calldata; + struct nfs_server *server; + + dprintk("--> %s\n", __func__); + + if (!nfs4_sequence_done(task, &lrp->res.seq_res)) + return; + + if (lrp->args.return_type == RETURN_FILE) + server = NFS_SERVER(lrp->args.inode); + else + server = NULL; + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { + nfs_restart_rpc(task, lrp->clp); + return; + } + if ((task->tk_status == 0) && (lrp->args.return_type == RETURN_FILE)) { + struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; + + spin_lock(&lo->plh_inode->i_lock); + if (lrp->res.lrs_present) + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); + else + BUG_ON(!list_empty(&lo->plh_segs)); + spin_unlock(&lo->plh_inode->i_lock); + } + dprintk("<-- %s\n", __func__); +} + +static void nfs4_layoutreturn_release(void *calldata) +{ + struct nfs4_layoutreturn *lrp = calldata; + + dprintk("--> %s return_type %d\n", __func__, lrp->args.return_type); + if (lrp->args.return_type == RETURN_FILE) { + struct inode *ino = lrp->args.inode; + struct pnfs_layout_hdr *lo = NFS_I(ino)->layout; + + put_layout_hdr(lo); + } + kfree(calldata); + dprintk("<-- %s\n", __func__); +} + +static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { + .rpc_call_prepare = nfs4_layoutreturn_prepare, + .rpc_call_done = nfs4_layoutreturn_done, + .rpc_release = nfs4_layoutreturn_release, +}; + +int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync) +{ + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], + .rpc_argp = &lrp->args, + .rpc_resp = &lrp->res, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = lrp->clp->cl_rpcclient, + .rpc_message = &msg, + .callback_ops = &nfs4_layoutreturn_call_ops, + .callback_data = lrp, + .flags = RPC_TASK_ASYNC, + }; + int status = 0; + + dprintk("--> %s\n", __func__); + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + if (!issync) + goto out; + status = nfs4_wait_for_completion_rpc_task(task); + if (status != 0) + goto out; + status = task->tk_status; +out: + dprintk("<-- %s\n", __func__); + rpc_put_task(task); + return status; +} + static int _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index dddfb57..53ea3e5 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -338,7 +338,12 @@ static int nfs4_stat_to_errno(int); 1 /* layoutupdate4 layout type */ + \ 1 /* NULL filelayout layoutupdate4 payload */) #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) - +#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ + encode_stateid_maxsz + \ + 1 /* FIXME: opaque lrf_body always empty at + *the moment */) +#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ + 1 + decode_stateid_maxsz) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -760,7 +765,14 @@ static int nfs4_stat_to_errno(int); decode_putfh_maxsz + \ decode_layoutcommit_maxsz + \ decode_getattr_maxsz) - +#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_layoutreturn_maxsz) +#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_layoutreturn_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1890,6 +1902,37 @@ encode_layoutcommit(struct xdr_stream *xdr, hdr->replen += decode_layoutcommit_maxsz; return 0; } + +static void +encode_layoutreturn(struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args, + struct compound_hdr *hdr) +{ + nfs4_stateid stateid; + __be32 *p; + + p = reserve_space(xdr, 20); + *p++ = cpu_to_be32(OP_LAYOUTRETURN); + *p++ = cpu_to_be32(args->reclaim); + *p++ = cpu_to_be32(args->layout_type); + *p++ = cpu_to_be32(args->range.iomode); + *p = cpu_to_be32(args->return_type); + if (args->return_type == RETURN_FILE) { + p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); + p = xdr_encode_hyper(p, args->range.offset); + p = xdr_encode_hyper(p, args->range.length); + spin_lock(&args->inode->i_lock); + memcpy(stateid.data, NFS_I(args->inode)->layout->plh_stateid.data, + NFS4_STATEID_SIZE); + spin_unlock(&args->inode->i_lock); + p = xdr_encode_opaque_fixed(p, &stateid.data, + NFS4_STATEID_SIZE); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(0); + } + hdr->nops++; + hdr->replen += decode_layoutreturn_maxsz; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2707,9 +2750,9 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, /* * Encode LAYOUTCOMMIT request */ -static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, - struct xdr_stream *xdr, - struct nfs4_layoutcommit_args *args) +static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_layoutcommit_args *args) { struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), @@ -2721,7 +2764,24 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, encode_layoutcommit(xdr, args, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; +} + +/* + * Encode LAYOUTRETURN request + */ +static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_layoutreturn_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, NFS_FH(args->inode), &hdr); + encode_layoutreturn(xdr, args, &hdr); + encode_nops(&hdr); } #endif /* CONFIG_NFS_V4_1 */ @@ -5202,6 +5262,27 @@ out_overflow: return -EIO; } +static int decode_layoutreturn(struct xdr_stream *xdr, + struct nfs4_layoutreturn_res *res) +{ + __be32 *p; + int status; + + status = decode_op_hdr(xdr, OP_LAYOUTRETURN); + if (status) + return status; + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->lrs_present = be32_to_cpup(p); + if (res->lrs_present) + status = decode_stateid(xdr, &res->stateid); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + static int decode_layoutcommit(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_layoutcommit_res *res) @@ -6319,6 +6400,30 @@ out: } /* + * Decode LAYOUTRETURN response + */ +static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_layoutreturn_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_layoutreturn(xdr, res); +out: + return status; +} + +/* * Decode LAYOUTCOMMIT response */ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, @@ -6544,6 +6649,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), PROC(LAYOUTGET, enc_layoutget, dec_layoutget), PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), + PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d9ab972..89e7725 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -321,6 +321,36 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return invalid - removed; } +/* Returns false if there was nothing to do, true otherwise */ +static bool +pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, + struct pnfs_layout_range *range) +{ + struct pnfs_layout_segment *lseg, *next; + bool rv = false; + + dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n", + __func__, lo, range->offset, range->length, range->iomode); + assert_spin_locked(&lo->plh_inode->i_lock); + if (list_empty(&lo->plh_segs)) { + if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) + put_layout_hdr_locked(lo); + return 0; + } + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) + if (should_free_lseg(lseg->pls_range.iomode, range->iomode)) { + dprintk("%s: freeing lseg %p iomode %d " + "offset %llu length %llu\n", __func__, + lseg, lseg->pls_range.iomode, + lseg->pls_range.offset, + lseg->pls_range.length); + mark_lseg_invalid(lseg, tmp_list); + rv = true; + } + dprintk("%s:Return %d\n", __func__, rv); + return rv; +} + /* note free_me must contain lsegs from a single layout_hdr */ void pnfs_free_lseg_list(struct list_head *free_me) @@ -539,6 +569,72 @@ out_err_free: return NULL; } +static int +return_layout(struct inode *ino, struct pnfs_layout_range *range, bool wait) +{ + struct nfs4_layoutreturn *lrp; + struct nfs_server *server = NFS_SERVER(ino); + int status = -ENOMEM; + + dprintk("--> %s\n", __func__); + + lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); + if (lrp == NULL) { + put_layout_hdr(NFS_I(ino)->layout); + goto out; + } + lrp->args.reclaim = 0; + lrp->args.layout_type = server->pnfs_curr_ld->id; + lrp->args.return_type = RETURN_FILE; + lrp->args.range = *range; + lrp->args.inode = ino; + lrp->clp = server->nfs_client; + + status = nfs4_proc_layoutreturn(lrp, wait); +out: + dprintk("<-- %s status: %d\n", __func__, status); + return status; +} + +/* Initiates a LAYOUTRETURN(FILE) */ +int +_pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range, bool wait) +{ + struct pnfs_layout_hdr *lo = NULL; + struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_range arg; + LIST_HEAD(tmp_list); + int status = 0; + + dprintk("--> %s\n", __func__); + + arg.iomode = range ? range->iomode : IOMODE_ANY; + arg.offset = 0; + arg.length = NFS4_MAX_UINT64; + + spin_lock(&ino->i_lock); + lo = nfsi->layout; + if (!lo || !pnfs_clear_lseg_list(lo, &tmp_list, &arg)) { + spin_unlock(&ino->i_lock); + dprintk("%s: no layout segments to return\n", __func__); + goto out; + } + /* Reference matched in nfs4_layoutreturn_release */ + get_layout_hdr(lo); + spin_unlock(&ino->i_lock); + pnfs_free_lseg_list(&tmp_list); + + /* Return layout even if layoutcommit fails */ + status = pnfs_layoutcommit_inode(ino, wait); + if (status) + dprintk("%s: layoutcommit failed, status=%d. Returning layout anyway\n", + __func__, status); + status = return_layout(ino, &arg, wait); +out: + dprintk("<-- %s status: %d\n", __func__, status); + return status; +} + bool pnfs_roc(struct inode *ino) { struct pnfs_layout_hdr *lo; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 4cb0a0d..a308f3c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -123,6 +123,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); +extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool wait); /* pnfs.c */ void get_layout_hdr(struct pnfs_layout_hdr *lo); @@ -158,6 +159,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier); void pnfs_set_layoutcommit(struct nfs_write_data *wdata); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); +int _pnfs_return_layout(struct inode *, struct pnfs_layout_range *, bool wait); static inline int lo_fail_bit(u32 iomode) { @@ -226,6 +228,19 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req) put_lseg(req->wb_commit_lseg); } +static inline int pnfs_return_layout(struct inode *ino, + struct pnfs_layout_range *range, + bool wait) +{ + struct nfs_inode *nfsi = NFS_I(ino); + struct nfs_server *nfss = NFS_SERVER(ino); + + if (pnfs_enabled_sb(nfss) && nfsi->layout) + return _pnfs_return_layout(ino, range, wait); + + return 0; +} + #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -267,6 +282,13 @@ pnfs_try_to_write_data(struct nfs_write_data *data, return PNFS_NOT_ATTEMPTED; } +static inline int pnfs_return_layout(struct inode *ino, + struct pnfs_layout_range *range, + bool wait) +{ + return 0; +} + static inline bool pnfs_roc(struct inode *ino) { diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 178fafe..9376eaf 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -562,6 +562,7 @@ enum { NFSPROC4_CLNT_LAYOUTGET, NFSPROC4_CLNT_GETDEVICEINFO, NFSPROC4_CLNT_LAYOUTCOMMIT, + NFSPROC4_CLNT_LAYOUTRETURN, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 78b101e..455ddfb 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -266,6 +266,29 @@ struct nfs4_layoutcommit_data { struct nfs4_layoutcommit_res res; }; +struct nfs4_layoutreturn_args { + __u32 reclaim; + __u32 layout_type; + __u32 return_type; + struct pnfs_layout_range range; + struct inode *inode; + struct nfs4_sequence_args seq_args; +}; + +struct nfs4_layoutreturn_res { + struct nfs4_sequence_res seq_res; + u32 lrs_present; + nfs4_stateid stateid; +}; + +struct nfs4_layoutreturn { + struct nfs4_layoutreturn_args args; + struct nfs4_layoutreturn_res res; + struct rpc_cred *cred; + struct nfs_client *clp; + int rpc_status; +}; + /* * Arguments to the open call. */