Message ID | 1382972247-1108-5-git-send-email-bjschuma@netapp.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Oct 28, 2013 at 10:57:26AM -0400, Anna Schumaker wrote: > This patch adds in the SEEK operation used by clients doing an llseek on > a file to find either hole or data sections in a file. I'm faking the > "allocated" status right now, since I haven't quite figured out how to > tell if a range is allocated on disk yet. > > This patch is missing correctly determining the "allocated" status of > the HOLE / DATA range. I expect I'll need to learn all about how fiemap > works before properly setting these values. > > Signed-off-by: Anna Schumaker <bjschuma@netapp.com> > --- > fs/nfsd/nfs4proc.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/nfs4xdr.c | 40 ++++++++++++++++++++++++++++++++++++++-- > fs/nfsd/xdr4.h | 15 +++++++++++++++ > include/linux/nfs4.h | 2 +- > 4 files changed, 98 insertions(+), 3 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index 3210c6f..bc45ed2 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -1064,6 +1064,45 @@ nfsd4_write_plus(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > return nfserr_union_notsupp; > } > > +static __be32 > +nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > + struct nfsd4_seek *seek) > +{ > + struct file *file; > + loff_t pos, end_pos; > + __be32 status; > + > + status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, > + &seek->seek_stateid, > + RD_STATE | WR_STATE, &file); Note nfs4_preprocess_stateid_op requires the caller to hold the state lock. Andyou want to either hold it till you're done using "file" or take a reference on file before dropping it. --b. > + if (status != nfs_ok) > + return status; > + > + if (seek->seek_whence == NFS4_CONTENT_DATA) { > + pos = vfs_llseek(file, seek->seek_offset, SEEK_DATA); > + end_pos = vfs_llseek(file, pos, SEEK_HOLE); > + seek->seek_allocated = true; > + } else { > + pos = vfs_llseek(file, seek->seek_offset, SEEK_HOLE); > + end_pos = vfs_llseek(file, pos, SEEK_DATA); > + seek->seek_allocated = false; > + } > + > + if (pos < 0) > + return nfserrno(pos); > + else if (end_pos == -ENXIO) { > + seek->seek_length = 0; > + seek->seek_eof = true; > + } else if (end_pos < 0) > + return nfserrno(end_pos); > + else > + seek->seek_length = end_pos - pos; > + > + seek->seek_offset = pos; > + > + return nfs_ok; > +} > + > /* This routine never returns NFS_OK! If there are no other errors, it > * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the > * attributes matched. VERIFY is implemented by mapping NFSERR_SAME > @@ -1885,6 +1924,11 @@ static struct nfsd4_operation nfsd4_ops[] = { > .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, > .op_get_currentstateid = (stateid_getter)nfsd4_get_writestateid, > }, > + > + [OP_SEEK] = { > + .op_func = (nfsd4op_func)nfsd4_seek, > + .op_name = "OP_SEEK", > + }, > }; > > #ifdef NFSD_DEBUG > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index 1e4e9af..8434740 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -1511,6 +1511,22 @@ nfsd4_decode_write_plus(struct nfsd4_compoundargs *argp, > } > > static __be32 > +nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) > +{ > + DECODE_HEAD; > + > + status = nfsd4_decode_stateid(argp, &seek->seek_stateid); > + if (status) > + return status; > + > + READ_BUF(12); > + READ64(seek->seek_offset); > + READ32(seek->seek_whence); > + > + DECODE_TAIL; > +} > + > +static __be32 > nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) > { > return nfs_ok; > @@ -1693,7 +1709,7 @@ static nfsd4_dec nfsd42_dec_ops[] = { > [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp, > [OP_WRITE_PLUS] = (nfsd4_dec)nfsd4_decode_write_plus, > [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, > - [OP_SEEK] = (nfsd4_dec)nfsd4_decode_notsupp, > + [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, > [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, > }; > > @@ -3650,6 +3666,26 @@ nfsd4_encode_write_plus(struct nfsd4_compoundres *resp, __be32 nfserr, > } > > static __be32 > +nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, > + struct nfsd4_seek *seek) > +{ > + __be32 *p; > + > + if (nfserr) > + return nfserr; > + > + RESERVE_SPACE(28); > + WRITE32(seek->seek_eof); > + WRITE32(seek->seek_whence); > + WRITE64(seek->seek_offset); > + WRITE64(seek->seek_length); > + WRITE32(seek->seek_allocated); > + ADJUST_ARGS(); > + > + return nfserr; > +} > + > +static __be32 > nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) > { > return nfserr; > @@ -3730,7 +3766,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { > [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop, > [OP_WRITE_PLUS] = (nfsd4_enc)nfsd4_encode_write_plus, > [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, > - [OP_SEEK] = (nfsd4_enc)nfsd4_encode_noop, > + [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, > [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, > }; > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index aaef9ab..ae9debc 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -451,6 +451,20 @@ struct nfsd4_write_plus { > struct nfsd42_write_res wp_res; > }; > > +struct nfsd4_seek { > + /* request */ > + stateid_t seek_stateid; > + > + /* Shared between request and response */ > + u64 seek_offset; > + u32 seek_whence; > + > + /* response */ > + u32 seek_eof; > + u64 seek_length; > + u32 seek_allocated; > +}; > + > struct nfsd4_op { > int opnum; > __be32 status; > @@ -499,6 +513,7 @@ struct nfsd4_op { > > /* NFSv4.2 */ > struct nfsd4_write_plus write_plus; > + struct nfsd4_seek seek; > } u; > struct nfs4_replay * replay; > }; > diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h > index 55ed991..81d6b09 100644 > --- a/include/linux/nfs4.h > +++ b/include/linux/nfs4.h > @@ -128,7 +128,7 @@ enum nfs_opnum4 { > Needs to be updated if more operations are defined in future.*/ > > #define FIRST_NFS4_OP OP_ACCESS > -#define LAST_NFS4_OP OP_WRITE_PLUS > +#define LAST_NFS4_OP OP_SEEK > > enum nfsstat4 { > NFS4_OK = 0, > -- > 1.8.4.1 > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Oct 28, 2013 at 10:57:26AM -0400, Anna Schumaker wrote: > This patch adds in the SEEK operation used by clients doing an llseek on > a file to find either hole or data sections in a file. I'm faking the > "allocated" status right now, since I haven't quite figured out how to > tell if a range is allocated on disk yet. > > This patch is missing correctly determining the "allocated" status of > the HOLE / DATA range. I expect I'll need to learn all about how fiemap > works before properly setting these values. What is the definition of allocated in this context? Specificly how does it different from meaning of allocated as used by SEEK_DATA? Going out to fiemap is something we should absolutely avoid. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon 28 Oct 2013 05:51:26 PM EDT, J. Bruce Fields wrote: > On Mon, Oct 28, 2013 at 10:57:26AM -0400, Anna Schumaker wrote: >> This patch adds in the SEEK operation used by clients doing an llseek on >> a file to find either hole or data sections in a file. I'm faking the >> "allocated" status right now, since I haven't quite figured out how to >> tell if a range is allocated on disk yet. >> >> This patch is missing correctly determining the "allocated" status of >> the HOLE / DATA range. I expect I'll need to learn all about how fiemap >> works before properly setting these values. >> >> Signed-off-by: Anna Schumaker <bjschuma@netapp.com> >> --- >> fs/nfsd/nfs4proc.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ >> fs/nfsd/nfs4xdr.c | 40 ++++++++++++++++++++++++++++++++++++++-- >> fs/nfsd/xdr4.h | 15 +++++++++++++++ >> include/linux/nfs4.h | 2 +- >> 4 files changed, 98 insertions(+), 3 deletions(-) >> >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >> index 3210c6f..bc45ed2 100644 >> --- a/fs/nfsd/nfs4proc.c >> +++ b/fs/nfsd/nfs4proc.c >> @@ -1064,6 +1064,45 @@ nfsd4_write_plus(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> return nfserr_union_notsupp; >> } >> >> +static __be32 >> +nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> + struct nfsd4_seek *seek) >> +{ >> + struct file *file; >> + loff_t pos, end_pos; >> + __be32 status; >> + >> + status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, >> + &seek->seek_stateid, >> + RD_STATE | WR_STATE, &file); > > Note nfs4_preprocess_stateid_op requires the caller to hold the state > lock. Andyou want to either hold it till you're done using "file" or > take a reference on file before dropping it. That's useful to know. I'll fix it up here, and anywhere else I call nfs4_preprocess_stateid_op()! > > --b. > >> + if (status != nfs_ok) >> + return status; >> + >> + if (seek->seek_whence == NFS4_CONTENT_DATA) { >> + pos = vfs_llseek(file, seek->seek_offset, SEEK_DATA); >> + end_pos = vfs_llseek(file, pos, SEEK_HOLE); >> + seek->seek_allocated = true; >> + } else { >> + pos = vfs_llseek(file, seek->seek_offset, SEEK_HOLE); >> + end_pos = vfs_llseek(file, pos, SEEK_DATA); >> + seek->seek_allocated = false; >> + } >> + >> + if (pos < 0) >> + return nfserrno(pos); >> + else if (end_pos == -ENXIO) { >> + seek->seek_length = 0; >> + seek->seek_eof = true; >> + } else if (end_pos < 0) >> + return nfserrno(end_pos); >> + else >> + seek->seek_length = end_pos - pos; >> + >> + seek->seek_offset = pos; >> + >> + return nfs_ok; >> +} >> + >> /* This routine never returns NFS_OK! If there are no other errors, it >> * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the >> * attributes matched. VERIFY is implemented by mapping NFSERR_SAME >> @@ -1885,6 +1924,11 @@ static struct nfsd4_operation nfsd4_ops[] = { >> .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, >> .op_get_currentstateid = (stateid_getter)nfsd4_get_writestateid, >> }, >> + >> + [OP_SEEK] = { >> + .op_func = (nfsd4op_func)nfsd4_seek, >> + .op_name = "OP_SEEK", >> + }, >> }; >> >> #ifdef NFSD_DEBUG >> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >> index 1e4e9af..8434740 100644 >> --- a/fs/nfsd/nfs4xdr.c >> +++ b/fs/nfsd/nfs4xdr.c >> @@ -1511,6 +1511,22 @@ nfsd4_decode_write_plus(struct nfsd4_compoundargs *argp, >> } >> >> static __be32 >> +nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) >> +{ >> + DECODE_HEAD; >> + >> + status = nfsd4_decode_stateid(argp, &seek->seek_stateid); >> + if (status) >> + return status; >> + >> + READ_BUF(12); >> + READ64(seek->seek_offset); >> + READ32(seek->seek_whence); >> + >> + DECODE_TAIL; >> +} >> + >> +static __be32 >> nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) >> { >> return nfs_ok; >> @@ -1693,7 +1709,7 @@ static nfsd4_dec nfsd42_dec_ops[] = { >> [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp, >> [OP_WRITE_PLUS] = (nfsd4_dec)nfsd4_decode_write_plus, >> [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, >> - [OP_SEEK] = (nfsd4_dec)nfsd4_decode_notsupp, >> + [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, >> [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, >> }; >> >> @@ -3650,6 +3666,26 @@ nfsd4_encode_write_plus(struct nfsd4_compoundres *resp, __be32 nfserr, >> } >> >> static __be32 >> +nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, >> + struct nfsd4_seek *seek) >> +{ >> + __be32 *p; >> + >> + if (nfserr) >> + return nfserr; >> + >> + RESERVE_SPACE(28); >> + WRITE32(seek->seek_eof); >> + WRITE32(seek->seek_whence); >> + WRITE64(seek->seek_offset); >> + WRITE64(seek->seek_length); >> + WRITE32(seek->seek_allocated); >> + ADJUST_ARGS(); >> + >> + return nfserr; >> +} >> + >> +static __be32 >> nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) >> { >> return nfserr; >> @@ -3730,7 +3766,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { >> [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop, >> [OP_WRITE_PLUS] = (nfsd4_enc)nfsd4_encode_write_plus, >> [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, >> - [OP_SEEK] = (nfsd4_enc)nfsd4_encode_noop, >> + [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, >> [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, >> }; >> >> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h >> index aaef9ab..ae9debc 100644 >> --- a/fs/nfsd/xdr4.h >> +++ b/fs/nfsd/xdr4.h >> @@ -451,6 +451,20 @@ struct nfsd4_write_plus { >> struct nfsd42_write_res wp_res; >> }; >> >> +struct nfsd4_seek { >> + /* request */ >> + stateid_t seek_stateid; >> + >> + /* Shared between request and response */ >> + u64 seek_offset; >> + u32 seek_whence; >> + >> + /* response */ >> + u32 seek_eof; >> + u64 seek_length; >> + u32 seek_allocated; >> +}; >> + >> struct nfsd4_op { >> int opnum; >> __be32 status; >> @@ -499,6 +513,7 @@ struct nfsd4_op { >> >> /* NFSv4.2 */ >> struct nfsd4_write_plus write_plus; >> + struct nfsd4_seek seek; >> } u; >> struct nfs4_replay * replay; >> }; >> diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h >> index 55ed991..81d6b09 100644 >> --- a/include/linux/nfs4.h >> +++ b/include/linux/nfs4.h >> @@ -128,7 +128,7 @@ enum nfs_opnum4 { >> Needs to be updated if more operations are defined in future.*/ >> >> #define FIRST_NFS4_OP OP_ACCESS >> -#define LAST_NFS4_OP OP_WRITE_PLUS >> +#define LAST_NFS4_OP OP_SEEK >> >> enum nfsstat4 { >> NFS4_OK = 0, >> -- >> 1.8.4.1 >> -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue 29 Oct 2013 03:35:58 AM EDT, Christoph Hellwig wrote: > On Mon, Oct 28, 2013 at 10:57:26AM -0400, Anna Schumaker wrote: >> This patch adds in the SEEK operation used by clients doing an llseek on >> a file to find either hole or data sections in a file. I'm faking the >> "allocated" status right now, since I haven't quite figured out how to >> tell if a range is allocated on disk yet. >> >> This patch is missing correctly determining the "allocated" status of >> the HOLE / DATA range. I expect I'll need to learn all about how fiemap >> works before properly setting these values. > > What is the definition of allocated in this context? Specificly how > does it different from meaning of allocated as used by SEEK_DATA? From what I can tell, I think the allocated flag will just tell the clients if all the blocks exist on disk or not. Is there a way to have a hole with allocated blocks? Or maybe it's supposed to represent partially allocated blocks? I checked the draft, and it doesn't actually say what they expect allocated to represent... > > Going out to fiemap is something we should absolutely avoid. > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Oct 29, 2013 at 09:00:48AM -0400, Anna Schumaker wrote: > From what I can tell, I think the allocated flag will just tell the > clients if all the blocks exist on disk or not. Is there a way to have > a hole with allocated blocks? Or maybe it's supposed to represent > partially allocated blocks? I checked the draft, and it doesn't > actually say what they expect allocated to represent... You can have preallocated space, which behaves like a hole in that reads return zeroes. As far as SEEK_DATA/SEEK_HOLE is concerned we tend to treat them as holes when we can as that's what the users expect. Note that at least the pnfs block rfc has a special state representing this kind of preallocated space. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Oct 29, 2013 at 06:07:21AM -0700, Christoph Hellwig wrote: > On Tue, Oct 29, 2013 at 09:00:48AM -0400, Anna Schumaker wrote: > > From what I can tell, I think the allocated flag will just tell the > > clients if all the blocks exist on disk or not. Is there a way to have > > a hole with allocated blocks? Or maybe it's supposed to represent > > partially allocated blocks? I checked the draft, and it doesn't > > actually say what they expect allocated to represent... > > You can have preallocated space, which behaves like a hole in that > reads return zeroes. As far as SEEK_DATA/SEEK_HOLE is concerned we > tend to treat them as holes when we can as that's what the users expect. > > Note that at least the pnfs block rfc has a special state representing > this kind of preallocated space. http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-20#section-5.2 E.g. "Hole: A byte range within a Sparse file that contains regions of all zeroes. For block-based file systems, this could also be an unallocated region of the file." So it sounds like a hole can either be allocated or not, which seems to agree with what you expect? I haven't read these parts of the spec yet at all.... --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Btw, I just noticed that current nfs mainline fails xfstests 286 which test SEEK_DATA/SEEK_HOLE. That sounds like a useful test case for your patches. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 10/31/2013 12:04 PM, Christoph Hellwig wrote: > Btw, I just noticed that current nfs mainline fails xfstests 286 which > test SEEK_DATA/SEEK_HOLE. That sounds like a useful test case for your > patches. > Yeah, I've been using it to test this. I'm having trouble getting the last few parts of both test 11 and 12 to pass, which is better than what's happening now! Anna -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 10/31/2013 12:04 PM, Christoph Hellwig wrote: > Btw, I just noticed that current nfs mainline fails xfstests 286 which > test SEEK_DATA/SEEK_HOLE. That sounds like a useful test case for your > patches. > I read too quickly. I've been using 285 to test against, but I'll see what 286 gets me! Thanks for the tip :) -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Oct 31, 2013 at 12:17:30PM -0400, Anna Schumaker wrote: > On 10/31/2013 12:04 PM, Christoph Hellwig wrote: > > Btw, I just noticed that current nfs mainline fails xfstests 286 which > > test SEEK_DATA/SEEK_HOLE. That sounds like a useful test case for your > > patches. > > > > I read too quickly. I've been using 285 to test against, but I'll see what 286 gets me! Thanks for the tip :) I actually meant 285. 286 tests SEEK_HOLE as well but passes already with the dumb default behaviour. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Oct 29, 2013 at 09:30:06AM -0400, J. Bruce Fields wrote:
> I haven't read these parts of the spec yet at all....
Btw, is there a way do to something like a git blame for the spec? I'd
really love to confront people personally with some of the odd bits they
came up with.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Nov 02, 2013 at 06:48:37AM -0700, Christoph Hellwig wrote: > On Tue, Oct 29, 2013 at 09:30:06AM -0400, J. Bruce Fields wrote: > > I haven't read these parts of the spec yet at all.... > > Btw, is there a way do to something like a git blame for the spec? I'd > really love to confront people personally with some of the odd bits they > came up with. It *is* in git, so you can literally run git blame: https://github.com/loghyr/NFSv4.2 But it'll probably credit almost everything to Tom and he's typically just the messenger.... Just direct gripes to nfsv4@ietf.org. They also take patches. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Nov 02, 2013 at 10:37:29AM -0400, J. Bruce Fields wrote: > On Sat, Nov 02, 2013 at 06:48:37AM -0700, Christoph Hellwig wrote: > > On Tue, Oct 29, 2013 at 09:30:06AM -0400, J. Bruce Fields wrote: > > > I haven't read these parts of the spec yet at all.... > > > > Btw, is there a way do to something like a git blame for the spec? I'd > > really love to confront people personally with some of the odd bits they > > came up with. > > It *is* in git, so you can literally run git blame: > > https://github.com/loghyr/NFSv4.2 > > But it'll probably credit almost everything to Tom and he's typically > just the messenger.... Indeed, a quick try attributes everything to him. No good way to figure out who is to blame for the whole WRITE PLUS idiocy. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Nov 02, 2013 at 07:41:07AM -0700, Christoph Hellwig wrote: > On Sat, Nov 02, 2013 at 10:37:29AM -0400, J. Bruce Fields wrote: > > On Sat, Nov 02, 2013 at 06:48:37AM -0700, Christoph Hellwig wrote: > > > On Tue, Oct 29, 2013 at 09:30:06AM -0400, J. Bruce Fields wrote: > > > > I haven't read these parts of the spec yet at all.... > > > > > > Btw, is there a way do to something like a git blame for the spec? I'd > > > really love to confront people personally with some of the odd bits they > > > came up with. > > > > It *is* in git, so you can literally run git blame: > > > > https://github.com/loghyr/NFSv4.2 > > > > But it'll probably credit almost everything to Tom and he's typically > > just the messenger.... > > Indeed, a quick try attributes everything to him. No good way to figure > out who is to blame for the whole WRITE PLUS idiocy. Imagine it was me, if it helps. The important thing is that we understand what should be fixed. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Nov 04, 2013 at 11:46:58AM -0500, J. Bruce Fields wrote: > Imagine it was me, if it helps. > > The important thing is that we understand what should be fixed. The important part is how someone got the idea for the pattern writing and the weird ADBs from and why they believe they should overload a single command for all of them. And apparently reqular data writes as well, even if I haven't found that part of the spec yet. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Nov 4, 2013, at 9:05 AM, Christoph Hellwig <hch@infradead.org> wrote: > On Mon, Nov 04, 2013 at 11:46:58AM -0500, J. Bruce Fields wrote: >> Imagine it was me, if it helps. >> >> The important thing is that we understand what should be fixed. > > The important part is how someone got the idea for the pattern writing > and the weird ADBs from The original proposal is http://tools.ietf.org/html/draft-eisler-nfsv4-enterprise-apps-01 and has been somewhat modified in its expression in NFSv4.2. > and why they believe they should overload a > single command for all of them. I believe that file initialization is closer in semantics to COPY_OFFLOAD than it is to WRITE, and I find it awkward to use the WRITE_PLUS operation for initialization. But I could never make a strong technical argument why they should not be joined, other than "Clutter!". > And apparently reqular data writes as > well, even if I haven't found that part of the spec yet. http://www.ietf.org/id/draft-ietf-nfsv4-minorversion2-20.txt is the latest version I can find. The usual process is to explain what needs to be fixed (ie, make a problem statement) as Bruce said. That is often accompanied by an alternate design proposal that addresses your issues. It is appropriate to address your comments to the group, and not to a single person. As in most online communities, ad hominem and confrontation are not appreciated. -- Chuck Lever chuck[dot]lever[at]oracle[dot]com -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Nov 4, 2013, at 8:46 AM, J. Bruce Fields <bfields@fieldses.org> wrote: > On Sat, Nov 02, 2013 at 07:41:07AM -0700, Christoph Hellwig wrote: >> On Sat, Nov 02, 2013 at 10:37:29AM -0400, J. Bruce Fields wrote: >>> On Sat, Nov 02, 2013 at 06:48:37AM -0700, Christoph Hellwig wrote: >>>> On Tue, Oct 29, 2013 at 09:30:06AM -0400, J. Bruce Fields wrote: >>>>> I haven't read these parts of the spec yet at all.... >>>> >>>> Btw, is there a way do to something like a git blame for the spec? I'd >>>> really love to confront people personally with some of the odd bits they >>>> came up with. >>> >>> It *is* in git, so you can literally run git blame: >>> >>> https://github.com/loghyr/NFSv4.2 >>> >>> But it'll probably credit almost everything to Tom and he's typically >>> just the messenger.... >> >> Indeed, a quick try attributes everything to him. No good way to figure >> out who is to blame for the whole WRITE PLUS idiocy. > > Imagine it was me, if it helps. > Nah, most likely me, no imagination needed. We were also looking at bringing larger WRITEs during 4.3 and I wanted to have WRITE_PLUS already in place for that. > The important thing is that we understand what should be fixed. > > --b. > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Nov 04, 2013 at 01:56:39PM -0800, Thomas Haynes wrote: > Nah, most likely me, no imagination needed. > > We were also looking at bringing larger WRITEs during 4.3 and I wanted > to have WRITE_PLUS already in place for that. What exactly does a WRITE PLUS with various non-related overloads in 4.2 buy you for introducing large writes in 4.3? -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Nov 4, 2013, at 5:03 PM, Christoph Hellwig <hch@infradead.org> wrote: > On Mon, Nov 04, 2013 at 01:56:39PM -0800, Thomas Haynes wrote: >> Nah, most likely me, no imagination needed. >> >> We were also looking at bringing larger WRITEs during 4.3 and I wanted >> to have WRITE_PLUS already in place for that. > > What exactly does a WRITE PLUS with various non-related overloads in 4.2 > buy you for introducing large writes in 4.3? In 4.3, READ_PLUS would be extended for large reads. We would at that time need a WRITE_PLUS to be extended for large writes. The relationship falls from having WRITE_PLUS handle all of the variants handled by READ_PLUS. It needs to accept all of them because it does not know what will be returned. We did not want many new operators and also wanted the operators to be extensible. With this approach, you can define a new arm of the discriminated union, not have to implement it, and not burn an operator. Some of the history is captured here: http://www.ietf.org/mail-archive/web/nfsv4/current/msg11235.html http://www.ietf.org/mail-archive/web/nfsv4/current/msg11470.html http://www.ietf.org/proceedings/84/slides/slides-84-nfsv4-1.pdf (slide 6) It doesn't capture the intent of NFS4ERR_UNION_NOTSUPP in this decision. 11.1.1.1. NFS4ERR_UNION_NOTSUPP (Error Code 10090) One of the arguments to the operation is a discriminated union and while the server supports the given operation, it does not support the selected arm of the discriminated union. For an example, see READ_PLUS (Section 14.10). -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Nov 05, 2013 at 02:07:15AM +0000, Haynes, Tom wrote: > We did not want many new operators and also wanted the operators to > be extensible. With this approach, you can define a new arm of the > discriminated union, not have to implement it, and not burn an > operator. That seems very much like a non-argument. If saving operators was a good argument the NFS operations should be MULTIPLEX1 with many sub opcodes, followed by MULTIPLEX2 once it fills up. > Some of the history is captured here: > > http://www.ietf.org/mail-archive/web/nfsv4/current/msg11235.html That mail seems to draw the wrong conclusion that a hole punching or a preallocation are equivalent to a server side copy from /dev/zero. Treating a pattern write as a server side copy is fine and I'd fully support that. Hole punching and preallocation on the other hand are primarily metadata operations that reserve or free space. They only happen to zero out the range as zero is the most convenient well known pattern to avoid stale data exposure. > http://www.ietf.org/mail-archive/web/nfsv4/current/msg11470.html I think Chuck's reply summarizes very well why a pattern initialization should not be mixed with an actual data write. > http://www.ietf.org/proceedings/84/slides/slides-84-nfsv4-1.pdf (slide 6) That side seems to inadvertently sum up a lot of what's wrong with merging hole punching and preallocations into some form of super write: - COMMIT doesn't really apply to pure metadata operations like a hole punch and preallocation, so fitting it into a WRITE that expects COMMIT causes all kinds of problems (as we saw in the thread about Annas implementation). - requiring the server to be able to handle offloads for these operations does not make any sense, because they are again very quick metadata operation, and not long running operation like a pattern initialization on the server. Note that the arbitrary pattern initialization over multiple blocks is a very different operation from a space allocation even if the latter happens to also zero the range as a side effect. > > It doesn't capture the intent of NFS4ERR_UNION_NOTSUPP in > this decision. > > 11.1.1.1. NFS4ERR_UNION_NOTSUPP (Error Code 10090) > > One of the arguments to the operation is a discriminated union and > while the server supports the given operation, it does not support > the selected arm of the discriminated union. For an example, see > READ_PLUS (Section 14.10). Btw, there is an odd use of this error in 14.7.3.: "WRITE_PLUS has to support all of the errors which are returned by WRITE plus NFS4ERR_UNION_NOTSUPP. If the client asks for a hole and the server does not support that arm of the discriminated union, but does support one or more additional arms, it can signal to the client that it supports the operation, but not the arm with NFS4ERR_UNION_NOTSUPP." This does not specicly writes but appears to assume hole punching is the only optional arm. On the other hand to me it appears the only interesting arm, with the data arm buying nothing over WRITE in 4.2 and thus being entirely superflous, and ADHs being a complicated map to Unix filesystems on the backend. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Nov 04, 2013 at 09:22:11AM -0800, Chuck Lever wrote: > The original proposal is > > http://tools.ietf.org/html/draft-eisler-nfsv4-enterprise-apps-01 > > and has been somewhat modified in its expression in NFSv4.2. The INITIALIZE operation in that proposal look like a sane translation of the SCSI WRITE SAME commands to NFS, but I have no idea how that eveloved into the monster of ADHs in the NFS4.2 draft. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3210c6f..bc45ed2 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1064,6 +1064,45 @@ nfsd4_write_plus(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_union_notsupp; } +static __be32 +nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_seek *seek) +{ + struct file *file; + loff_t pos, end_pos; + __be32 status; + + status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, + &seek->seek_stateid, + RD_STATE | WR_STATE, &file); + if (status != nfs_ok) + return status; + + if (seek->seek_whence == NFS4_CONTENT_DATA) { + pos = vfs_llseek(file, seek->seek_offset, SEEK_DATA); + end_pos = vfs_llseek(file, pos, SEEK_HOLE); + seek->seek_allocated = true; + } else { + pos = vfs_llseek(file, seek->seek_offset, SEEK_HOLE); + end_pos = vfs_llseek(file, pos, SEEK_DATA); + seek->seek_allocated = false; + } + + if (pos < 0) + return nfserrno(pos); + else if (end_pos == -ENXIO) { + seek->seek_length = 0; + seek->seek_eof = true; + } else if (end_pos < 0) + return nfserrno(end_pos); + else + seek->seek_length = end_pos - pos; + + seek->seek_offset = pos; + + return nfs_ok; +} + /* This routine never returns NFS_OK! If there are no other errors, it * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the * attributes matched. VERIFY is implemented by mapping NFSERR_SAME @@ -1885,6 +1924,11 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, .op_get_currentstateid = (stateid_getter)nfsd4_get_writestateid, }, + + [OP_SEEK] = { + .op_func = (nfsd4op_func)nfsd4_seek, + .op_name = "OP_SEEK", + }, }; #ifdef NFSD_DEBUG diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1e4e9af..8434740 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1511,6 +1511,22 @@ nfsd4_decode_write_plus(struct nfsd4_compoundargs *argp, } static __be32 +nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) +{ + DECODE_HEAD; + + status = nfsd4_decode_stateid(argp, &seek->seek_stateid); + if (status) + return status; + + READ_BUF(12); + READ64(seek->seek_offset); + READ32(seek->seek_whence); + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) { return nfs_ok; @@ -1693,7 +1709,7 @@ static nfsd4_dec nfsd42_dec_ops[] = { [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_WRITE_PLUS] = (nfsd4_dec)nfsd4_decode_write_plus, [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_SEEK] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, }; @@ -3650,6 +3666,26 @@ nfsd4_encode_write_plus(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 +nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_seek *seek) +{ + __be32 *p; + + if (nfserr) + return nfserr; + + RESERVE_SPACE(28); + WRITE32(seek->seek_eof); + WRITE32(seek->seek_whence); + WRITE64(seek->seek_offset); + WRITE64(seek->seek_length); + WRITE32(seek->seek_allocated); + ADJUST_ARGS(); + + return nfserr; +} + +static __be32 nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) { return nfserr; @@ -3730,7 +3766,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop, [OP_WRITE_PLUS] = (nfsd4_enc)nfsd4_encode_write_plus, [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SEEK] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, }; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index aaef9ab..ae9debc 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -451,6 +451,20 @@ struct nfsd4_write_plus { struct nfsd42_write_res wp_res; }; +struct nfsd4_seek { + /* request */ + stateid_t seek_stateid; + + /* Shared between request and response */ + u64 seek_offset; + u32 seek_whence; + + /* response */ + u32 seek_eof; + u64 seek_length; + u32 seek_allocated; +}; + struct nfsd4_op { int opnum; __be32 status; @@ -499,6 +513,7 @@ struct nfsd4_op { /* NFSv4.2 */ struct nfsd4_write_plus write_plus; + struct nfsd4_seek seek; } u; struct nfs4_replay * replay; }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 55ed991..81d6b09 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -128,7 +128,7 @@ enum nfs_opnum4 { Needs to be updated if more operations are defined in future.*/ #define FIRST_NFS4_OP OP_ACCESS -#define LAST_NFS4_OP OP_WRITE_PLUS +#define LAST_NFS4_OP OP_SEEK enum nfsstat4 { NFS4_OK = 0,
This patch adds in the SEEK operation used by clients doing an llseek on a file to find either hole or data sections in a file. I'm faking the "allocated" status right now, since I haven't quite figured out how to tell if a range is allocated on disk yet. This patch is missing correctly determining the "allocated" status of the HOLE / DATA range. I expect I'll need to learn all about how fiemap works before properly setting these values. Signed-off-by: Anna Schumaker <bjschuma@netapp.com> --- fs/nfsd/nfs4proc.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfs4xdr.c | 40 ++++++++++++++++++++++++++++++++++++++-- fs/nfsd/xdr4.h | 15 +++++++++++++++ include/linux/nfs4.h | 2 +- 4 files changed, 98 insertions(+), 3 deletions(-)