Message ID | 1426540688-32095-4-git-send-email-Anna.Schumaker@Netapp.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Mar 16, 2015 at 05:18:08PM -0400, Anna Schumaker wrote: > This patch implements sending an array of segments back to the client. > Clients should be prepared to handle multiple segment reads to make this > useful. We try to splice the first data segment into the XDR result, > and remaining segments are encoded directly. I'm still interested in what would happen if we started with an implementation like: - if the entire requested range falls within a hole, return that single hole. - otherwise, just treat the thing as one big data segment. That would provide a benefit in the case there are large-ish holes with minimal impact otherwise. (Though patches for full support are still useful even if only for client-testing purposes.) --b. > > Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com> > --- > fs/nfsd/nfs4proc.c | 4 ++-- > fs/nfsd/nfs4xdr.c | 35 ++++++++++++++++++++++++----------- > 2 files changed, 26 insertions(+), 13 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index e9f4d8f..6801973 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -1862,8 +1862,8 @@ static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op > { > u32 maxcount = svc_max_payload(rqstp); > u32 rlen = min(op->u.read.rd_length, maxcount); > - /* enough extra xdr space for encoding either a hole or data segment. */ > - u32 xdr = 5; > + /* Extra xdr padding for encoding multiple segments. */ > + u32 xdr = 20; > > return (op_encode_hdr_size + 2 + xdr + XDR_QUADLEN(rlen)) * sizeof(__be32); > } > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index 799d52c..5eaecd2 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -4117,7 +4117,7 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, > > static __be32 > nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct nfsd4_read *read, > - struct file *file) > + struct file *file, loff_t hole_pos) > { > __be32 *p, err; > unsigned long maxcount; > @@ -4128,20 +4128,26 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct nfsd4_read *r > return nfserr_resource; > xdr_commit_encode(xdr); > > + if (hole_pos <= read->rd_offset) > + hole_pos = i_size_read(file_inode(file)); > + > maxcount = svc_max_payload(resp->rqstp); > maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); > maxcount = min_t(unsigned long, maxcount, read->rd_length); > + maxcount = min_t(unsigned long, maxcount, hole_pos - read->rd_offset); > > if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) > err = nfsd4_encode_splice_read(resp, read, file, &maxcount); > else > err = nfsd4_encode_readv(resp, read, file, &maxcount); > + clear_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); > > *p++ = cpu_to_be32(NFS4_CONTENT_DATA); > p = xdr_encode_hyper(p, read->rd_offset); > *p++ = cpu_to_be32(maxcount); > > read->rd_offset += maxcount; > + read->rd_length -= maxcount; > return err; > } > > @@ -4156,7 +4162,7 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, struct nfsd4_read *r > if (data_pos == -ENXIO) > data_pos = i_size_read(file_inode(file)); > if (data_pos <= read->rd_offset) > - return nfsd4_encode_read_plus_data(resp, read, file); > + return nfsd4_encode_read_plus_data(resp, read, file, 0); > > maxcount = data_pos - read->rd_offset; > p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8); > @@ -4165,6 +4171,10 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, struct nfsd4_read *r > p = xdr_encode_hyper(p, maxcount); > > read->rd_offset += maxcount; > + if (maxcount > read->rd_length) > + read->rd_length = 0; > + else > + read->rd_length -= maxcount; > return nfs_ok; > } > > @@ -4197,17 +4207,20 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, > goto err_truncate; > } > > - hole_pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); > - if (hole_pos == -ENXIO) > - goto out_encode; > + do { > + hole_pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); > + if (hole_pos == -ENXIO) > + break; > > - if (hole_pos == read->rd_offset) > - err = nfsd4_encode_read_plus_hole(resp, read, file); > - else > - err = nfsd4_encode_read_plus_data(resp, read, file); > - segments++; > + if (hole_pos == read->rd_offset) > + err = nfsd4_encode_read_plus_hole(resp, read, file); > + else > + err = nfsd4_encode_read_plus_data(resp, read, file, hole_pos); > + if (err) > + break; > + segments++; > + } while (read->rd_length > 0); > > -out_encode: > eof = (read->rd_offset >= i_size_read(file_inode(file))); > *p++ = cpu_to_be32(eof); > *p++ = cpu_to_be32(segments); > -- > 2.3.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Mar 17, 2015 at 03:56:33PM -0400, J. Bruce Fields wrote: > On Mon, Mar 16, 2015 at 05:18:08PM -0400, Anna Schumaker wrote: > > This patch implements sending an array of segments back to the client. > > Clients should be prepared to handle multiple segment reads to make this > > useful. We try to splice the first data segment into the XDR result, > > and remaining segments are encoded directly. > > I'm still interested in what would happen if we started with an > implementation like: > > - if the entire requested range falls within a hole, return that > single hole. > - otherwise, just treat the thing as one big data segment. > > That would provide a benefit in the case there are large-ish holes > with minimal impact otherwise. > > (Though patches for full support are still useful even if only for > client-testing purposes.) Also, looks like xvs_io -c "fiemap -v" <file> will give hole sizes for a given <file>. (Thanks, esandeen.) Running that on a few of my test vm images shows a fair number of large (hundreds of megs) files, which suggests identifying only >=rwsize holes might still be useful. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Mar 17, 2015 at 04:07:38PM -0400, J. Bruce Fields wrote: > On Tue, Mar 17, 2015 at 03:56:33PM -0400, J. Bruce Fields wrote: > > On Mon, Mar 16, 2015 at 05:18:08PM -0400, Anna Schumaker wrote: > > > This patch implements sending an array of segments back to the client. > > > Clients should be prepared to handle multiple segment reads to make this > > > useful. We try to splice the first data segment into the XDR result, > > > and remaining segments are encoded directly. > > > > I'm still interested in what would happen if we started with an > > implementation like: > > > > - if the entire requested range falls within a hole, return that > > single hole. > > - otherwise, just treat the thing as one big data segment. > > > > That would provide a benefit in the case there are large-ish holes > > with minimal impact otherwise. > > > > (Though patches for full support are still useful even if only for > > client-testing purposes.) > > Also, looks like > > xvs_io -c "fiemap -v" <file> > > will give hole sizes for a given <file>. (Thanks, esandeen.) Running > that on a few of my test vm images shows a fair number of large > (hundreds of megs) files, which suggests identifying only >=rwsize holes > might still be useful. Just for fun.... I wrote the following test program and ran it on my collection of testing vm's. Some looked like this: f21-1.qcow2 144784 -rw-------. 1 qemu qemu 8591507456 Mar 16 10:13 f21-1.qcow2 total hole bytes: 8443252736 (98%) in aligned 1MB chunks: 8428453888 (98%) So, basically, read_plus would save transferring most of the data even when only handling 1MB holes. But some looked like this: 501524 -rw-------. 1 qemu qemu 8589934592 May 20 2014 rhel6-1-1.img total hole bytes: 8077516800 (94%) in aligned 1MB chunks: 0 (0%) So the READ_PLUS that caught every hole might save a lot, the one that only caught 1MB holes wouldn't help at all. And there were lots of examples in between those two extremes. (But, check my math, I haven't tested this carefully.) --b. #define _GNU_SOURCE #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> #include <err.h> long round_up(long n, long b) { return ((n + b - 1)/b) * b; } long round_down(long n, long b) { return (n/b) * b; } long hbytes = 0; long rplusbytes = 0; do_stats(off_t hole_start, off_t hole_end) { off_t hole_start_up, hole_end_down; hole_start_up = round_up(hole_start, 1024*1024); hole_end_down = round_down(hole_end, 1024*1024); hbytes += hole_end - hole_start; if (hole_start_up < hole_end_down) rplusbytes += hole_end_down - hole_start_up; } int main(int argc, char *argv[]) { off_t hole_start, hole_end; int fd; char *name; /* Map out holes with SEEK_HOLE, SEEK_DATA */ /* Useful statistics: * - what percentage of file is in holes? * - what percentage of file would be skipped if we read it * sequentially in 1MB chunks? */ if (argc != 2) errx(1, "usage: %s <filename>\n", argv[0]); name = argv[1]; fd = open(name, O_RDONLY); if (fd == -1) err(1, "open"); hole_end = 0; while (1) { hole_start = lseek(fd, hole_end, SEEK_HOLE); if (hole_start == -1) err(1, "lseek"); hole_end = lseek(fd, hole_start, SEEK_DATA); if (hole_end == -1) { if (errno == ENXIO) break; err(1, "lseek"); } do_stats(hole_start, hole_end); } hole_end = lseek(fd, 0, SEEK_END); do_stats(hole_start, hole_end); printf("total hole bytes: %ld (%.0f%)\n", hbytes, 100 * (float)hbytes/hole_end); printf("in aligned 1MB chunks: %ld (%.0f%)\n", rplusbytes, 100 * (float)rplusbytes/hole_end); } -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/17/2015 05:36 PM, J. Bruce Fields wrote: > On Tue, Mar 17, 2015 at 04:07:38PM -0400, J. Bruce Fields wrote: >> On Tue, Mar 17, 2015 at 03:56:33PM -0400, J. Bruce Fields wrote: >>> On Mon, Mar 16, 2015 at 05:18:08PM -0400, Anna Schumaker wrote: >>>> This patch implements sending an array of segments back to the client. >>>> Clients should be prepared to handle multiple segment reads to make this >>>> useful. We try to splice the first data segment into the XDR result, >>>> and remaining segments are encoded directly. >>> >>> I'm still interested in what would happen if we started with an >>> implementation like: >>> >>> - if the entire requested range falls within a hole, return that >>> single hole. >>> - otherwise, just treat the thing as one big data segment. >>> >>> That would provide a benefit in the case there are large-ish holes >>> with minimal impact otherwise. >>> >>> (Though patches for full support are still useful even if only for >>> client-testing purposes.) >> >> Also, looks like >> >> xvs_io -c "fiemap -v" <file> >> >> will give hole sizes for a given <file>. (Thanks, esandeen.) Running >> that on a few of my test vm images shows a fair number of large >> (hundreds of megs) files, which suggests identifying only >=rwsize holes >> might still be useful. > > Just for fun.... I wrote the following test program and ran it on my > collection of testing vm's. Some looked like this: > > f21-1.qcow2 > 144784 -rw-------. 1 qemu qemu 8591507456 Mar 16 10:13 f21-1.qcow2 > total hole bytes: 8443252736 (98%) > in aligned 1MB chunks: 8428453888 (98%) > > So, basically, read_plus would save transferring most of the data even > when only handling 1MB holes. > > But some looked like this: > > 501524 -rw-------. 1 qemu qemu 8589934592 May 20 2014 rhel6-1-1.img > total hole bytes: 8077516800 (94%) > in aligned 1MB chunks: 0 (0%) > > So the READ_PLUS that caught every hole might save a lot, the one that > only caught 1MB holes wouldn't help at all. > > And there were lots of examples in between those two extremes. I tested with three different 512 MB files: 100% data, 100% hole, and alternating every megabyte. The results were surprising: | v4.1 | v4.2 ----------------------- data | 0.685s | 0.714s hole | 0.485s | 15.547s mixed | 1.283s | 0.448 >From what I can tell, the 100% hole case takes so long because of the SEEK_DATA call in nfsd4_encode_read_plus_hole(). I took this out to trick the function into thinking that the entire file was already a hole, and runtime dropped to the levels of v4.1 and v4.2. I wonder if this is filesystem dependent? My server is exporting ext4. Anna > > (But, check my math, I haven't tested this carefully.) > > --b. > > #define _GNU_SOURCE > #include <stdio.h> > #include <sys/types.h> > #include <sys/stat.h> > #include <fcntl.h> > #include <unistd.h> > #include <errno.h> > #include <err.h> > > long round_up(long n, long b) > { > return ((n + b - 1)/b) * b; > } > > long round_down(long n, long b) > { > return (n/b) * b; > } > > long hbytes = 0; > long rplusbytes = 0; > > do_stats(off_t hole_start, off_t hole_end) > { > off_t hole_start_up, hole_end_down; > > hole_start_up = round_up(hole_start, 1024*1024); > hole_end_down = round_down(hole_end, 1024*1024); > > hbytes += hole_end - hole_start; > if (hole_start_up < hole_end_down) > rplusbytes += hole_end_down - hole_start_up; > } > > int main(int argc, char *argv[]) > { > off_t hole_start, hole_end; > int fd; > char *name; > > /* Map out holes with SEEK_HOLE, SEEK_DATA */ > /* Useful statistics: > * - what percentage of file is in holes? > * - what percentage of file would be skipped if we read it > * sequentially in 1MB chunks? > */ > > if (argc != 2) > errx(1, "usage: %s <filename>\n", argv[0]); > name = argv[1]; > fd = open(name, O_RDONLY); > if (fd == -1) > err(1, "open"); > > hole_end = 0; > while (1) { > hole_start = lseek(fd, hole_end, SEEK_HOLE); > if (hole_start == -1) > err(1, "lseek"); > hole_end = lseek(fd, hole_start, SEEK_DATA); > if (hole_end == -1) { > if (errno == ENXIO) > break; > err(1, "lseek"); > } > do_stats(hole_start, hole_end); > } > hole_end = lseek(fd, 0, SEEK_END); > do_stats(hole_start, hole_end); > printf("total hole bytes: %ld (%.0f%)\n", hbytes, > 100 * (float)hbytes/hole_end); > printf("in aligned 1MB chunks: %ld (%.0f%)\n", rplusbytes, > 100 * (float)rplusbytes/hole_end); > } > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Mar 18, 2015 at 02:16:29PM -0400, Anna Schumaker wrote: > On 03/17/2015 05:36 PM, J. Bruce Fields wrote: > > On Tue, Mar 17, 2015 at 04:07:38PM -0400, J. Bruce Fields wrote: > >> On Tue, Mar 17, 2015 at 03:56:33PM -0400, J. Bruce Fields wrote: > >>> On Mon, Mar 16, 2015 at 05:18:08PM -0400, Anna Schumaker wrote: > >>>> This patch implements sending an array of segments back to the client. > >>>> Clients should be prepared to handle multiple segment reads to make this > >>>> useful. We try to splice the first data segment into the XDR result, > >>>> and remaining segments are encoded directly. > >>> > >>> I'm still interested in what would happen if we started with an > >>> implementation like: > >>> > >>> - if the entire requested range falls within a hole, return that > >>> single hole. > >>> - otherwise, just treat the thing as one big data segment. > >>> > >>> That would provide a benefit in the case there are large-ish holes > >>> with minimal impact otherwise. > >>> > >>> (Though patches for full support are still useful even if only for > >>> client-testing purposes.) > >> > >> Also, looks like > >> > >> xvs_io -c "fiemap -v" <file> > >> > >> will give hole sizes for a given <file>. (Thanks, esandeen.) Running > >> that on a few of my test vm images shows a fair number of large > >> (hundreds of megs) files, which suggests identifying only >=rwsize holes > >> might still be useful. > > > > Just for fun.... I wrote the following test program and ran it on my > > collection of testing vm's. Some looked like this: > > > > f21-1.qcow2 > > 144784 -rw-------. 1 qemu qemu 8591507456 Mar 16 10:13 f21-1.qcow2 > > total hole bytes: 8443252736 (98%) > > in aligned 1MB chunks: 8428453888 (98%) > > > > So, basically, read_plus would save transferring most of the data even > > when only handling 1MB holes. > > > > But some looked like this: > > > > 501524 -rw-------. 1 qemu qemu 8589934592 May 20 2014 rhel6-1-1.img > > total hole bytes: 8077516800 (94%) > > in aligned 1MB chunks: 0 (0%) > > > > So the READ_PLUS that caught every hole might save a lot, the one that > > only caught 1MB holes wouldn't help at all. > > > > And there were lots of examples in between those two extremes. > > I tested with three different 512 MB files: 100% data, 100% hole, and alternating every megabyte. The results were surprising: > > | v4.1 | v4.2 > ----------------------- > data | 0.685s | 0.714s > hole | 0.485s | 15.547s > mixed | 1.283s | 0.448 > > >From what I can tell, the 100% hole case takes so long because of the > >SEEK_DATA call in nfsd4_encode_read_plus_hole(). I took this out to > >trick the function into thinking that the entire file was already a > >hole, and runtime dropped to the levels of v4.1 and v4.2. Wait, that 15s is due to just one SEEK_DATA? > I wonder > >if this is filesystem dependent? My server is exporting ext4. Sounds like just a bug. I've been doing lots of lseek(.,.,SEEK_DATA) on both ext4 and xfs without seeing anything that weird. I believe it does return -ENXIO in the case SEEK_DATA is called at an offset beyond which there's no more data. At least that's what I saw in userspace. So maybe your code just isn't handling that case correctly? --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/18/2015 02:55 PM, J. Bruce Fields wrote: > On Wed, Mar 18, 2015 at 02:16:29PM -0400, Anna Schumaker wrote: >> On 03/17/2015 05:36 PM, J. Bruce Fields wrote: >>> On Tue, Mar 17, 2015 at 04:07:38PM -0400, J. Bruce Fields wrote: >>>> On Tue, Mar 17, 2015 at 03:56:33PM -0400, J. Bruce Fields wrote: >>>>> On Mon, Mar 16, 2015 at 05:18:08PM -0400, Anna Schumaker wrote: >>>>>> This patch implements sending an array of segments back to the client. >>>>>> Clients should be prepared to handle multiple segment reads to make this >>>>>> useful. We try to splice the first data segment into the XDR result, >>>>>> and remaining segments are encoded directly. >>>>> >>>>> I'm still interested in what would happen if we started with an >>>>> implementation like: >>>>> >>>>> - if the entire requested range falls within a hole, return that >>>>> single hole. >>>>> - otherwise, just treat the thing as one big data segment. >>>>> >>>>> That would provide a benefit in the case there are large-ish holes >>>>> with minimal impact otherwise. >>>>> >>>>> (Though patches for full support are still useful even if only for >>>>> client-testing purposes.) >>>> >>>> Also, looks like >>>> >>>> xvs_io -c "fiemap -v" <file> >>>> >>>> will give hole sizes for a given <file>. (Thanks, esandeen.) Running >>>> that on a few of my test vm images shows a fair number of large >>>> (hundreds of megs) files, which suggests identifying only >=rwsize holes >>>> might still be useful. >>> >>> Just for fun.... I wrote the following test program and ran it on my >>> collection of testing vm's. Some looked like this: >>> >>> f21-1.qcow2 >>> 144784 -rw-------. 1 qemu qemu 8591507456 Mar 16 10:13 f21-1.qcow2 >>> total hole bytes: 8443252736 (98%) >>> in aligned 1MB chunks: 8428453888 (98%) >>> >>> So, basically, read_plus would save transferring most of the data even >>> when only handling 1MB holes. >>> >>> But some looked like this: >>> >>> 501524 -rw-------. 1 qemu qemu 8589934592 May 20 2014 rhel6-1-1.img >>> total hole bytes: 8077516800 (94%) >>> in aligned 1MB chunks: 0 (0%) >>> >>> So the READ_PLUS that caught every hole might save a lot, the one that >>> only caught 1MB holes wouldn't help at all. >>> >>> And there were lots of examples in between those two extremes. >> >> I tested with three different 512 MB files: 100% data, 100% hole, and alternating every megabyte. The results were surprising: >> >> | v4.1 | v4.2 >> ----------------------- >> data | 0.685s | 0.714s >> hole | 0.485s | 15.547s >> mixed | 1.283s | 0.448 >> >> >From what I can tell, the 100% hole case takes so long because of the >>> SEEK_DATA call in nfsd4_encode_read_plus_hole(). I took this out to >>> trick the function into thinking that the entire file was already a >>> hole, and runtime dropped to the levels of v4.1 and v4.2. > > Wait, that 15s is due to just one SEEK_DATA? The server is returning a larger hole than the client can read at once, so there are several SEEK_DATA calls made to verify that there are no data segments before the end of the file. > >> I wonder >>> if this is filesystem dependent? My server is exporting ext4. > > Sounds like just a bug. I've been doing lots of lseek(.,.,SEEK_DATA) on > both ext4 and xfs without seeing anything that weird. It looks like something weird on ext4. I switched my exported filesystem to xfs: | v4.1 | v4.2 ------+--------+------- data | 0.764s | 1.343s hole | 0.572s | 0.205s mixed | 0.634s | 0.472s I bumped up the test to 1G files: | v4.1 | v4.2 ------+--------+------- data | 1.578s | 1.743s hole | 1.241s | 0.443s mixed | 1.884s | 0.913s Let me know if I should test anything larger! Anna > > I believe it does return -ENXIO in the case SEEK_DATA is called at an > offset beyond which there's no more data. At least that's what I saw in > userspace. So maybe your code just isn't handling that case correctly? > > --b. > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Mar 18, 2015 at 04:39:24PM -0400, Anna Schumaker wrote: > On 03/18/2015 02:55 PM, J. Bruce Fields wrote: > > On Wed, Mar 18, 2015 at 02:16:29PM -0400, Anna Schumaker wrote: > >> On 03/17/2015 05:36 PM, J. Bruce Fields wrote: > >>> On Tue, Mar 17, 2015 at 04:07:38PM -0400, J. Bruce Fields wrote: > >>>> On Tue, Mar 17, 2015 at 03:56:33PM -0400, J. Bruce Fields wrote: > >>>>> On Mon, Mar 16, 2015 at 05:18:08PM -0400, Anna Schumaker wrote: > >>>>>> This patch implements sending an array of segments back to the client. > >>>>>> Clients should be prepared to handle multiple segment reads to make this > >>>>>> useful. We try to splice the first data segment into the XDR result, > >>>>>> and remaining segments are encoded directly. > >>>>> > >>>>> I'm still interested in what would happen if we started with an > >>>>> implementation like: > >>>>> > >>>>> - if the entire requested range falls within a hole, return that > >>>>> single hole. > >>>>> - otherwise, just treat the thing as one big data segment. > >>>>> > >>>>> That would provide a benefit in the case there are large-ish holes > >>>>> with minimal impact otherwise. > >>>>> > >>>>> (Though patches for full support are still useful even if only for > >>>>> client-testing purposes.) > >>>> > >>>> Also, looks like > >>>> > >>>> xvs_io -c "fiemap -v" <file> > >>>> > >>>> will give hole sizes for a given <file>. (Thanks, esandeen.) Running > >>>> that on a few of my test vm images shows a fair number of large > >>>> (hundreds of megs) files, which suggests identifying only >=rwsize holes > >>>> might still be useful. > >>> > >>> Just for fun.... I wrote the following test program and ran it on my > >>> collection of testing vm's. Some looked like this: > >>> > >>> f21-1.qcow2 > >>> 144784 -rw-------. 1 qemu qemu 8591507456 Mar 16 10:13 f21-1.qcow2 > >>> total hole bytes: 8443252736 (98%) > >>> in aligned 1MB chunks: 8428453888 (98%) > >>> > >>> So, basically, read_plus would save transferring most of the data even > >>> when only handling 1MB holes. > >>> > >>> But some looked like this: > >>> > >>> 501524 -rw-------. 1 qemu qemu 8589934592 May 20 2014 rhel6-1-1.img > >>> total hole bytes: 8077516800 (94%) > >>> in aligned 1MB chunks: 0 (0%) > >>> > >>> So the READ_PLUS that caught every hole might save a lot, the one that > >>> only caught 1MB holes wouldn't help at all. > >>> > >>> And there were lots of examples in between those two extremes. > >> > >> I tested with three different 512 MB files: 100% data, 100% hole, and alternating every megabyte. The results were surprising: > >> > >> | v4.1 | v4.2 > >> ----------------------- > >> data | 0.685s | 0.714s > >> hole | 0.485s | 15.547s > >> mixed | 1.283s | 0.448 > >> > >> >From what I can tell, the 100% hole case takes so long because of the > >>> SEEK_DATA call in nfsd4_encode_read_plus_hole(). I took this out to > >>> trick the function into thinking that the entire file was already a > >>> hole, and runtime dropped to the levels of v4.1 and v4.2. > > > > Wait, that 15s is due to just one SEEK_DATA? > > The server is returning a larger hole than the client can read at once, so there are several SEEK_DATA calls made to verify that there are no data segments before the end of the file. > > > > >> I wonder > >>> if this is filesystem dependent? My server is exporting ext4. > > > > Sounds like just a bug. I've been doing lots of lseek(.,.,SEEK_DATA) on > > both ext4 and xfs without seeing anything that weird. > > It looks like something weird on ext4. I switched my exported filesystem to xfs: Huh. Maybe we should report a bug.... > > | v4.1 | v4.2 > ------+--------+------- > data | 0.764s | 1.343s That's too bad. Non-sparse files are surely still a common case and we'd like to not see a slowdown there.... I wonder if we can figure out where it's coming from? > hole | 0.572s | 0.205s > mixed | 0.634s | 0.472s > > > I bumped up the test to 1G files: > > | v4.1 | v4.2 > ------+--------+------- > data | 1.578s | 1.743s > hole | 1.241s | 0.443s > mixed | 1.884s | 0.913s > > Let me know if I should test anything larger! The other thing I'd be interested in would be a "mixed" case that alternates every 4k. That will test the worst case where we we do a 1MB read and get back only a 4k hole. Aligned 1MB holes are somewhat of a best case. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/18/2015 04:55 PM, J. Bruce Fields wrote: > On Wed, Mar 18, 2015 at 04:39:24PM -0400, Anna Schumaker wrote: >> On 03/18/2015 02:55 PM, J. Bruce Fields wrote: >>> On Wed, Mar 18, 2015 at 02:16:29PM -0400, Anna Schumaker wrote: >>>> On 03/17/2015 05:36 PM, J. Bruce Fields wrote: >>>>> On Tue, Mar 17, 2015 at 04:07:38PM -0400, J. Bruce Fields wrote: >>>>>> On Tue, Mar 17, 2015 at 03:56:33PM -0400, J. Bruce Fields wrote: >>>>>>> On Mon, Mar 16, 2015 at 05:18:08PM -0400, Anna Schumaker wrote: >>>>>>>> This patch implements sending an array of segments back to the client. >>>>>>>> Clients should be prepared to handle multiple segment reads to make this >>>>>>>> useful. We try to splice the first data segment into the XDR result, >>>>>>>> and remaining segments are encoded directly. >>>>>>> >>>>>>> I'm still interested in what would happen if we started with an >>>>>>> implementation like: >>>>>>> >>>>>>> - if the entire requested range falls within a hole, return that >>>>>>> single hole. >>>>>>> - otherwise, just treat the thing as one big data segment. >>>>>>> >>>>>>> That would provide a benefit in the case there are large-ish holes >>>>>>> with minimal impact otherwise. >>>>>>> >>>>>>> (Though patches for full support are still useful even if only for >>>>>>> client-testing purposes.) >>>>>> >>>>>> Also, looks like >>>>>> >>>>>> xvs_io -c "fiemap -v" <file> >>>>>> >>>>>> will give hole sizes for a given <file>. (Thanks, esandeen.) Running >>>>>> that on a few of my test vm images shows a fair number of large >>>>>> (hundreds of megs) files, which suggests identifying only >=rwsize holes >>>>>> might still be useful. >>>>> >>>>> Just for fun.... I wrote the following test program and ran it on my >>>>> collection of testing vm's. Some looked like this: >>>>> >>>>> f21-1.qcow2 >>>>> 144784 -rw-------. 1 qemu qemu 8591507456 Mar 16 10:13 f21-1.qcow2 >>>>> total hole bytes: 8443252736 (98%) >>>>> in aligned 1MB chunks: 8428453888 (98%) >>>>> >>>>> So, basically, read_plus would save transferring most of the data even >>>>> when only handling 1MB holes. >>>>> >>>>> But some looked like this: >>>>> >>>>> 501524 -rw-------. 1 qemu qemu 8589934592 May 20 2014 rhel6-1-1.img >>>>> total hole bytes: 8077516800 (94%) >>>>> in aligned 1MB chunks: 0 (0%) >>>>> >>>>> So the READ_PLUS that caught every hole might save a lot, the one that >>>>> only caught 1MB holes wouldn't help at all. >>>>> >>>>> And there were lots of examples in between those two extremes. >>>> >>>> I tested with three different 512 MB files: 100% data, 100% hole, and alternating every megabyte. The results were surprising: >>>> >>>> | v4.1 | v4.2 >>>> ----------------------- >>>> data | 0.685s | 0.714s >>>> hole | 0.485s | 15.547s >>>> mixed | 1.283s | 0.448 >>>> >>>> >From what I can tell, the 100% hole case takes so long because of the >>>>> SEEK_DATA call in nfsd4_encode_read_plus_hole(). I took this out to >>>>> trick the function into thinking that the entire file was already a >>>>> hole, and runtime dropped to the levels of v4.1 and v4.2. >>> >>> Wait, that 15s is due to just one SEEK_DATA? >> >> The server is returning a larger hole than the client can read at once, so there are several SEEK_DATA calls made to verify that there are no data segments before the end of the file. >> >>> >>>> I wonder >>>>> if this is filesystem dependent? My server is exporting ext4. >>> >>> Sounds like just a bug. I've been doing lots of lseek(.,.,SEEK_DATA) on >>> both ext4 and xfs without seeing anything that weird. >> >> It looks like something weird on ext4. I switched my exported filesystem to xfs: > > Huh. Maybe we should report a bug.... > >> >> | v4.1 | v4.2 >> ------+--------+------- >> data | 0.764s | 1.343s > > That's too bad. Non-sparse files are surely still a common case and > we'd like to not see a slowdown there.... I wonder if we can figure out > where it's coming from? That's a good question, especially since the 1G file didn't double this time. Maybe a VM quirk? > >> hole | 0.572s | 0.205s >> mixed | 0.634s | 0.472s >> >> >> I bumped up the test to 1G files: >> >> | v4.1 | v4.2 >> ------+--------+------- >> data | 1.578s | 1.743s >> hole | 1.241s | 0.443s >> mixed | 1.884s | 0.913s >> >> Let me know if I should test anything larger! > > The other thing I'd be interested in would be a "mixed" case that > alternates every 4k. That will test the worst case where we we do a 1MB > read and get back only a 4k hole. Aligned 1MB holes are somewhat of a > best case. I probably won't get a chance to test this until I'm back from my vacation, but I'll keep the suggestion in mind! Anna > > --b. > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Mar 18, 2015 at 05:03:32PM -0400, Anna Schumaker wrote: > On 03/18/2015 04:55 PM, J. Bruce Fields wrote: > > On Wed, Mar 18, 2015 at 04:39:24PM -0400, Anna Schumaker wrote: > >> On 03/18/2015 02:55 PM, J. Bruce Fields wrote: > >>> On Wed, Mar 18, 2015 at 02:16:29PM -0400, Anna Schumaker wrote: > >>>> On 03/17/2015 05:36 PM, J. Bruce Fields wrote: > >>>>> On Tue, Mar 17, 2015 at 04:07:38PM -0400, J. Bruce Fields wrote: > >>>>>> On Tue, Mar 17, 2015 at 03:56:33PM -0400, J. Bruce Fields wrote: > >>>>>>> On Mon, Mar 16, 2015 at 05:18:08PM -0400, Anna Schumaker wrote: > >>>>>>>> This patch implements sending an array of segments back to the client. > >>>>>>>> Clients should be prepared to handle multiple segment reads to make this > >>>>>>>> useful. We try to splice the first data segment into the XDR result, > >>>>>>>> and remaining segments are encoded directly. > >>>>>>> > >>>>>>> I'm still interested in what would happen if we started with an > >>>>>>> implementation like: > >>>>>>> > >>>>>>> - if the entire requested range falls within a hole, return that > >>>>>>> single hole. > >>>>>>> - otherwise, just treat the thing as one big data segment. > >>>>>>> > >>>>>>> That would provide a benefit in the case there are large-ish holes > >>>>>>> with minimal impact otherwise. > >>>>>>> > >>>>>>> (Though patches for full support are still useful even if only for > >>>>>>> client-testing purposes.) > >>>>>> > >>>>>> Also, looks like > >>>>>> > >>>>>> xvs_io -c "fiemap -v" <file> > >>>>>> > >>>>>> will give hole sizes for a given <file>. (Thanks, esandeen.) Running > >>>>>> that on a few of my test vm images shows a fair number of large > >>>>>> (hundreds of megs) files, which suggests identifying only >=rwsize holes > >>>>>> might still be useful. > >>>>> > >>>>> Just for fun.... I wrote the following test program and ran it on my > >>>>> collection of testing vm's. Some looked like this: > >>>>> > >>>>> f21-1.qcow2 > >>>>> 144784 -rw-------. 1 qemu qemu 8591507456 Mar 16 10:13 f21-1.qcow2 > >>>>> total hole bytes: 8443252736 (98%) > >>>>> in aligned 1MB chunks: 8428453888 (98%) > >>>>> > >>>>> So, basically, read_plus would save transferring most of the data even > >>>>> when only handling 1MB holes. > >>>>> > >>>>> But some looked like this: > >>>>> > >>>>> 501524 -rw-------. 1 qemu qemu 8589934592 May 20 2014 rhel6-1-1.img > >>>>> total hole bytes: 8077516800 (94%) > >>>>> in aligned 1MB chunks: 0 (0%) > >>>>> > >>>>> So the READ_PLUS that caught every hole might save a lot, the one that > >>>>> only caught 1MB holes wouldn't help at all. > >>>>> > >>>>> And there were lots of examples in between those two extremes. > >>>> > >>>> I tested with three different 512 MB files: 100% data, 100% hole, and alternating every megabyte. The results were surprising: > >>>> > >>>> | v4.1 | v4.2 > >>>> ----------------------- > >>>> data | 0.685s | 0.714s > >>>> hole | 0.485s | 15.547s > >>>> mixed | 1.283s | 0.448 > >>>> > >>>> >From what I can tell, the 100% hole case takes so long because of the > >>>>> SEEK_DATA call in nfsd4_encode_read_plus_hole(). I took this out to > >>>>> trick the function into thinking that the entire file was already a > >>>>> hole, and runtime dropped to the levels of v4.1 and v4.2. > >>> > >>> Wait, that 15s is due to just one SEEK_DATA? > >> > >> The server is returning a larger hole than the client can read at once, so there are several SEEK_DATA calls made to verify that there are no data segments before the end of the file. > >> > >>> > >>>> I wonder > >>>>> if this is filesystem dependent? My server is exporting ext4. > >>> > >>> Sounds like just a bug. I've been doing lots of lseek(.,.,SEEK_DATA) on > >>> both ext4 and xfs without seeing anything that weird. > >> > >> It looks like something weird on ext4. I switched my exported filesystem to xfs: > > > > Huh. Maybe we should report a bug.... > > > >> > >> | v4.1 | v4.2 > >> ------+--------+------- > >> data | 0.764s | 1.343s > > > > That's too bad. Non-sparse files are surely still a common case and > > we'd like to not see a slowdown there.... I wonder if we can figure out > > where it's coming from? > > That's a good question, especially since the 1G file didn't double this time. Maybe a VM quirk? We definitely need to figure it out, I think. If we can't make READ_PLUS perform as well as READ (or very close to it) in the non-sparse case then I don't think we'll want it, and as Trond suggested we may want to consider something more fiemap-like instead. I don't know, maybe the client could try to be clever and only use READ_PLUS if the space_used/size ratio is lower than some threshhold, but it could get a little complicated to tune. It's annoying that asking "does this range contain zeroes" is actually taking longer than just reading the whole range.... --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Mar 19, 2015 at 08:00:05AM -0700, Marc Eshel wrote: > linux-nfs-owner@vger.kernel.org wrote on 03/18/2015 02:11:44 PM: > > > From: "J. Bruce Fields" <bfields@fieldses.org> > > To: Anna Schumaker <Anna.Schumaker@netapp.com> > > Cc: linux-nfs@vger.kernel.org > > Date: 03/18/2015 02:14 PM > > Subject: Re: [PATCH v3 3/3] NFSD: Add support for encoding multiple > segments > > Sent by: linux-nfs-owner@vger.kernel.org > > > > On Wed, Mar 18, 2015 at 05:03:32PM -0400, Anna Schumaker wrote: > > > On 03/18/2015 04:55 PM, J. Bruce Fields wrote: > > > > On Wed, Mar 18, 2015 at 04:39:24PM -0400, Anna Schumaker wrote: > > > >> On 03/18/2015 02:55 PM, J. Bruce Fields wrote: > > > >>> On Wed, Mar 18, 2015 at 02:16:29PM -0400, Anna Schumaker wrote: > > > >>>> On 03/17/2015 05:36 PM, J. Bruce Fields wrote: > > > >>>>> On Tue, Mar 17, 2015 at 04:07:38PM -0400, J. Bruce Fields wrote: > > > >>>>>> On Tue, Mar 17, 2015 at 03:56:33PM -0400, J. Bruce Fields > wrote: > > > >>>>>>> On Mon, Mar 16, 2015 at 05:18:08PM -0400, Anna Schumaker > wrote: > > > >>>>>>>> This patch implements sending an array of segments back > > to the client. > > > >>>>>>>> Clients should be prepared to handle multiple segment > > reads to make this > > > >>>>>>>> useful. We try to splice the first data segment into the > > XDR result, > > > >>>>>>>> and remaining segments are encoded directly. > > > >>>>>>> > > > >>>>>>> I'm still interested in what would happen if we started with > an > > > >>>>>>> implementation like: > > > >>>>>>> > > > >>>>>>> - if the entire requested range falls within a hole, return > that > > > >>>>>>> single hole. > > > >>>>>>> - otherwise, just treat the thing as one big data segment. > > > >>>>>>> > > > >>>>>>> That would provide a benefit in the case there are large-ish > holes > > > >>>>>>> with minimal impact otherwise. > > > >>>>>>> > > > >>>>>>> (Though patches for full support are still useful even if only > for > > > >>>>>>> client-testing purposes.) > > > >>>>>> > > > >>>>>> Also, looks like > > > >>>>>> > > > >>>>>> xvs_io -c "fiemap -v" <file> > > > >>>>>> > > > >>>>>> will give hole sizes for a given <file>. (Thanks, > > esandeen.) Running > > > >>>>>> that on a few of my test vm images shows a fair number of large > > > >>>>>> (hundreds of megs) files, which suggests identifying only > > >=rwsize holes > > > >>>>>> might still be useful. > > > >>>>> > > > >>>>> Just for fun.... I wrote the following test program and ran it > on my > > > >>>>> collection of testing vm's. Some looked like this: > > > >>>>> > > > >>>>> f21-1.qcow2 > > > >>>>> 144784 -rw-------. 1 qemu qemu 8591507456 Mar 16 10:13 > f21-1.qcow2 > > > >>>>> total hole bytes: 8443252736 (98%) > > > >>>>> in aligned 1MB chunks: 8428453888 (98%) > > > >>>>> > > > >>>>> So, basically, read_plus would save transferring most of thedata > even > > > >>>>> when only handling 1MB holes. > > > >>>>> > > > >>>>> But some looked like this: > > > >>>>> > > > >>>>> 501524 -rw-------. 1 qemu qemu 8589934592 May 20 2014 > > rhel6-1-1.img > > > >>>>> total hole bytes: 8077516800 (94%) > > > >>>>> in aligned 1MB chunks: 0 (0%) > > > >>>>> > > > >>>>> So the READ_PLUS that caught every hole might save a lot, the > one that > > > >>>>> only caught 1MB holes wouldn't help at all. > > > >>>>> > > > >>>>> And there were lots of examples in between those two extremes. > > > >>>> > > > >>>> I tested with three different 512 MB files: 100% data, 100% > > hole, and alternating every megabyte. The results were surprising: > > > >>>> > > > >>>> | v4.1 | v4.2 > > > >>>> ----------------------- > > > >>>> data | 0.685s | 0.714s > > > >>>> hole | 0.485s | 15.547s > > > >>>> mixed | 1.283s | 0.448 > > > >>>> > > > >>>> >From what I can tell, the 100% hole case takes so long because > of the > > > >>>>> SEEK_DATA call in nfsd4_encode_read_plus_hole(). I took this > out to > > > >>>>> trick the function into thinking that the entire file was > already a > > > >>>>> hole, and runtime dropped to the levels of v4.1 and v4.2. > > > >>> > > > >>> Wait, that 15s is due to just one SEEK_DATA? > > > >> > > > >> The server is returning a larger hole than the client can read > > at once, so there are several SEEK_DATA calls made to verify that > > there are no data segments before the end of the file. > > > >> > > > >>> > > > >>>> I wonder > > > >>>>> if this is filesystem dependent? My server is exporting ext4. > > > >>> > > > >>> Sounds like just a bug. I've been doing lots of > lseek(.,.,SEEK_DATA) on > > > >>> both ext4 and xfs without seeing anything that weird. > > > >> > > > >> It looks like something weird on ext4. I switched my exported > > filesystem to xfs: > > > > > > > > Huh. Maybe we should report a bug.... > > > > > > > >> > > > >> | v4.1 | v4.2 > > > >> ------+--------+------- > > > >> data | 0.764s | 1.343s > > > > > > > > That's too bad. Non-sparse files are surely still a common case and > > > > we'd like to not see a slowdown there.... I wonder if we can figure > out > > > > where it's coming from? > > > > > > That's a good question, especially since the 1G file didn't double > > this time. Maybe a VM quirk? > > > > We definitely need to figure it out, I think. If we can't make > > READ_PLUS perform as well as READ (or very close to it) in the > > non-sparse case then I don't think we'll want it, and as Trond suggested > > we may want to consider something more fiemap-like instead. > > Testing Anna's NFS client with the Ganesha NFS server and GPFS file system > shows the same numbers for READ with v4.1 and READ_PUSE with v4.2 of a > data file. Using sparse files READ_PLUS is 5 times faster than READ. Thanks! Is it possible to report the exact numbers? Is Ganesha also implementing READ_PLUS with SEEK_HOLE/SEEK_DATA? If so then maybe the difference is the filesystem. Might be interesting to run the same sort of test with ganesha exporting xfs and/or knfsd exporting GPFS. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
linux-nfs-owner@vger.kernel.org wrote on 03/19/2015 08:36:27 AM: > From: "J. Bruce Fields" <bfields@fieldses.org> > To: Marc Eshel/Almaden/IBM@IBMUS > Cc: Anna Schumaker <Anna.Schumaker@netapp.com>, linux- > nfs@vger.kernel.org, linux-nfs-owner@vger.kernel.org > Date: 03/19/2015 08:36 AM > Subject: Re: [PATCH v3 3/3] NFSD: Add support for encoding multiple segments > Sent by: linux-nfs-owner@vger.kernel.org > > On Thu, Mar 19, 2015 at 08:00:05AM -0700, Marc Eshel wrote: > > linux-nfs-owner@vger.kernel.org wrote on 03/18/2015 02:11:44 PM: > > > > > From: "J. Bruce Fields" <bfields@fieldses.org> > > > To: Anna Schumaker <Anna.Schumaker@netapp.com> > > > Cc: linux-nfs@vger.kernel.org > > > Date: 03/18/2015 02:14 PM > > > Subject: Re: [PATCH v3 3/3] NFSD: Add support for encoding multiple > > segments > > > Sent by: linux-nfs-owner@vger.kernel.org > > > > > > On Wed, Mar 18, 2015 at 05:03:32PM -0400, Anna Schumaker wrote: > > > > On 03/18/2015 04:55 PM, J. Bruce Fields wrote: > > > > > On Wed, Mar 18, 2015 at 04:39:24PM -0400, Anna Schumaker wrote: > > > > >> On 03/18/2015 02:55 PM, J. Bruce Fields wrote: > > > > >>> On Wed, Mar 18, 2015 at 02:16:29PM -0400, Anna Schumaker wrote: > > > > >>>> On 03/17/2015 05:36 PM, J. Bruce Fields wrote: > > > > >>>>> On Tue, Mar 17, 2015 at 04:07:38PM -0400, J. Bruce Fields wrote: > > > > >>>>>> On Tue, Mar 17, 2015 at 03:56:33PM -0400, J. Bruce Fields > > wrote: > > > > >>>>>>> On Mon, Mar 16, 2015 at 05:18:08PM -0400, Anna Schumaker > > wrote: > > > > >>>>>>>> This patch implements sending an array of segments back > > > to the client. > > > > >>>>>>>> Clients should be prepared to handle multiple segment > > > reads to make this > > > > >>>>>>>> useful. We try to splice the first data segment into the > > > XDR result, > > > > >>>>>>>> and remaining segments are encoded directly. > > > > >>>>>>> > > > > >>>>>>> I'm still interested in what would happen if we started with > > an > > > > >>>>>>> implementation like: > > > > >>>>>>> > > > > >>>>>>> - if the entire requested range falls within a hole, return > > that > > > > >>>>>>> single hole. > > > > >>>>>>> - otherwise, just treat the thing as one big data segment. > > > > >>>>>>> > > > > >>>>>>> That would provide a benefit in the case there are large-ish > > holes > > > > >>>>>>> with minimal impact otherwise. > > > > >>>>>>> > > > > >>>>>>> (Though patches for full support are still useful even if only > > for > > > > >>>>>>> client-testing purposes.) > > > > >>>>>> > > > > >>>>>> Also, looks like > > > > >>>>>> > > > > >>>>>> xvs_io -c "fiemap -v" <file> > > > > >>>>>> > > > > >>>>>> will give hole sizes for a given <file>. (Thanks, > > > esandeen.) Running > > > > >>>>>> that on a few of my test vm images shows a fair number of large > > > > >>>>>> (hundreds of megs) files, which suggests identifying only > > > >=rwsize holes > > > > >>>>>> might still be useful. > > > > >>>>> > > > > >>>>> Just for fun.... I wrote the following test program and ran it > > on my > > > > >>>>> collection of testing vm's. Some looked like this: > > > > >>>>> > > > > >>>>> f21-1.qcow2 > > > > >>>>> 144784 -rw-------. 1 qemu qemu 8591507456 Mar 16 10:13 > > f21-1.qcow2 > > > > >>>>> total hole bytes: 8443252736 (98%) > > > > >>>>> in aligned 1MB chunks: 8428453888 (98%) > > > > >>>>> > > > > >>>>> So, basically, read_plus would save transferring most of thedata > > even > > > > >>>>> when only handling 1MB holes. > > > > >>>>> > > > > >>>>> But some looked like this: > > > > >>>>> > > > > >>>>> 501524 -rw-------. 1 qemu qemu 8589934592 May 20 2014 > > > rhel6-1-1.img > > > > >>>>> total hole bytes: 8077516800 (94%) > > > > >>>>> in aligned 1MB chunks: 0 (0%) > > > > >>>>> > > > > >>>>> So the READ_PLUS that caught every hole might save a lot, the > > one that > > > > >>>>> only caught 1MB holes wouldn't help at all. > > > > >>>>> > > > > >>>>> And there were lots of examples in between those two extremes. > > > > >>>> > > > > >>>> I tested with three different 512 MB files: 100% data, 100% > > > hole, and alternating every megabyte. The results were surprising: > > > > >>>> > > > > >>>> | v4.1 | v4.2 > > > > >>>> ----------------------- > > > > >>>> data | 0.685s | 0.714s > > > > >>>> hole | 0.485s | 15.547s > > > > >>>> mixed | 1.283s | 0.448 > > > > >>>> > > > > >>>> >From what I can tell, the 100% hole case takes so long because > > of the > > > > >>>>> SEEK_DATA call in nfsd4_encode_read_plus_hole(). I took this > > out to > > > > >>>>> trick the function into thinking that the entire file was > > already a > > > > >>>>> hole, and runtime dropped to the levels of v4.1 and v4.2. > > > > >>> > > > > >>> Wait, that 15s is due to just one SEEK_DATA? > > > > >> > > > > >> The server is returning a larger hole than the client can read > > > at once, so there are several SEEK_DATA calls made to verify that > > > there are no data segments before the end of the file. > > > > >> > > > > >>> > > > > >>>> I wonder > > > > >>>>> if this is filesystem dependent? My server is exporting ext4. > > > > >>> > > > > >>> Sounds like just a bug. I've been doing lots of > > lseek(.,.,SEEK_DATA) on > > > > >>> both ext4 and xfs without seeing anything that weird. > > > > >> > > > > >> It looks like something weird on ext4. I switched my exported > > > filesystem to xfs: > > > > > > > > > > Huh. Maybe we should report a bug.... > > > > > > > > > >> > > > > >> | v4.1 | v4.2 > > > > >> ------+--------+------- > > > > >> data | 0.764s | 1.343s > > > > > > > > > > That's too bad. Non-sparse files are surely still a common case and > > > > > we'd like to not see a slowdown there.... I wonder if we can figure > > out > > > > > where it's coming from? > > > > > > > > That's a good question, especially since the 1G file didn't double > > > this time. Maybe a VM quirk? > > > > > > We definitely need to figure it out, I think. If we can't make > > > READ_PLUS perform as well as READ (or very close to it) in the > > > non-sparse case then I don't think we'll want it, and as Trond suggested > > > we may want to consider something more fiemap-like instead. > > > > Testing Anna's NFS client with the Ganesha NFS server and GPFS file system > > shows the same numbers for READ with v4.1 and READ_PLUS with v4.2 of a > > data file. Using sparse files READ_PLUS is 5 times faster than READ. > > Thanks! Is it possible to report the exact numbers? This is a copy of a 100M file. [root@fin16 ~]# umount /mnt [root@fin16 ~]# mount -t nfs4 -o minorversion=1 9.1.74.120:/gpfsA /mnt [root@fin16 ~]# time cp /mnt/100M /dev/null real 0m1.597s user 0m0.000s sys 0m0.062s [root@fin16 ~]# umount /mnt [root@fin16 ~]# mount -t nfs4 -o minorversion=2 9.1.74.120:/gpfsA /mnt [root@fin16 ~]# time cp /mnt/100M /dev/null real 0m1.595s user 0m0.002s sys 0m0.057s > > Is Ganesha also implementing READ_PLUS with SEEK_HOLE/SEEK_DATA? If so > then maybe the difference is the filesystem. Might be interesting to > run the same sort of test with ganesha exporting xfs and/or knfsd > exporting GPFS. GPFS did not implement it using SEEK it just calls the fs read and if there is no data the fs returns ENODATA return code. It is not yet implemented on other FSLAs > > --b. > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Maybe this is a question for xfs developers. So, we have a new READ_PLUS call that's basically just a version of READ optimized for sparse files: http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-33#section-15.10 It allows an NFS server to return either file data (like a normal READ call) or, at the server's discretion, records saying "this range of the data is all zeroes". Anna tried implementing READ_PLUS for knfsd using vfs_llseek(.,.,SEEK_HOLE) followed by an ordinary read if that determines we're not at a hole. (Very) preliminary results suggest that's slower than a plain READ for an xfs file with no holes. (And *much* slower in the ext4 case for some reason.) Is that expected, and should we be doing this some other way instead? --b. On Thu, Mar 19, 2015 at 09:28:09AM -0700, Marc Eshel wrote: > linux-nfs-owner@vger.kernel.org wrote on 03/19/2015 08:36:27 AM: > > > From: "J. Bruce Fields" <bfields@fieldses.org> > > To: Marc Eshel/Almaden/IBM@IBMUS > > Cc: Anna Schumaker <Anna.Schumaker@netapp.com>, linux- > > nfs@vger.kernel.org, linux-nfs-owner@vger.kernel.org > > Date: 03/19/2015 08:36 AM > > Subject: Re: [PATCH v3 3/3] NFSD: Add support for encoding multiple > segments > > Sent by: linux-nfs-owner@vger.kernel.org > > > > On Thu, Mar 19, 2015 at 08:00:05AM -0700, Marc Eshel wrote: > > > linux-nfs-owner@vger.kernel.org wrote on 03/18/2015 02:11:44 PM: > > > > From: "J. Bruce Fields" <bfields@fieldses.org> > > > > On Wed, Mar 18, 2015 at 05:03:32PM -0400, Anna Schumaker wrote: > > > > > On 03/18/2015 04:55 PM, J. Bruce Fields wrote: > > > > > > On Wed, Mar 18, 2015 at 04:39:24PM -0400, Anna Schumaker wrote: > > > > > >> | v4.1 | v4.2 > > > > > >> ------+--------+------- > > > > > >> data | 0.764s | 1.343s > > > > > > > > > > > > That's too bad. Non-sparse files are surely still a common case > and > > > > > > we'd like to not see a slowdown there.... I wonder if we can > figure > > > out > > > > > > where it's coming from? > > > > > > > > > > That's a good question, especially since the 1G file didn't double > > > > this time. Maybe a VM quirk? > > > > > > > > We definitely need to figure it out, I think. If we can't make > > > > READ_PLUS perform as well as READ (or very close to it) in the > > > > non-sparse case then I don't think we'll want it, and as Trond > suggested > > > > we may want to consider something more fiemap-like instead. > > > > > > Testing Anna's NFS client with the Ganesha NFS server and GPFS file > system > > > shows the same numbers for READ with v4.1 and READ_PLUS with v4.2 of a > > > > data file. Using sparse files READ_PLUS is 5 times faster than READ. > > > > Thanks! Is it possible to report the exact numbers? > > This is a copy of a 100M file. > > [root@fin16 ~]# umount /mnt > [root@fin16 ~]# mount -t nfs4 -o minorversion=1 9.1.74.120:/gpfsA /mnt > [root@fin16 ~]# time cp /mnt/100M /dev/null > > real 0m1.597s > user 0m0.000s > sys 0m0.062s > [root@fin16 ~]# umount /mnt > [root@fin16 ~]# mount -t nfs4 -o minorversion=2 9.1.74.120:/gpfsA /mnt > [root@fin16 ~]# time cp /mnt/100M /dev/null > > real 0m1.595s > user 0m0.002s > sys 0m0.057s > > > > > Is Ganesha also implementing READ_PLUS with SEEK_HOLE/SEEK_DATA? If so > > then maybe the difference is the filesystem. Might be interesting to > > run the same sort of test with ganesha exporting xfs and/or knfsd > > exporting GPFS. > > GPFS did not implement it using SEEK it just calls the fs read and if > there is no data the fs returns ENODATA return code. It is not yet > implemented on other FSLAs > > > > > --b. > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > On Thu, Mar 19, 2015 at 09:28:09AM -0700, Marc Eshel wrote: > linux-nfs-owner@vger.kernel.org wrote on 03/19/2015 08:36:27 AM: > > > From: "J. Bruce Fields" <bfields@fieldses.org> > > To: Marc Eshel/Almaden/IBM@IBMUS > > Cc: Anna Schumaker <Anna.Schumaker@netapp.com>, linux- > > nfs@vger.kernel.org, linux-nfs-owner@vger.kernel.org > > Date: 03/19/2015 08:36 AM > > Subject: Re: [PATCH v3 3/3] NFSD: Add support for encoding multiple > segments > > Sent by: linux-nfs-owner@vger.kernel.org > > > > On Thu, Mar 19, 2015 at 08:00:05AM -0700, Marc Eshel wrote: > > > linux-nfs-owner@vger.kernel.org wrote on 03/18/2015 02:11:44 PM: > > > > > > > From: "J. Bruce Fields" <bfields@fieldses.org> > > > > To: Anna Schumaker <Anna.Schumaker@netapp.com> > > > > Cc: linux-nfs@vger.kernel.org > > > > Date: 03/18/2015 02:14 PM > > > > Subject: Re: [PATCH v3 3/3] NFSD: Add support for encoding multiple > > > segments > > > > Sent by: linux-nfs-owner@vger.kernel.org > > > > > > > > On Wed, Mar 18, 2015 at 05:03:32PM -0400, Anna Schumaker wrote: > > > > > On 03/18/2015 04:55 PM, J. Bruce Fields wrote: > > > > > > On Wed, Mar 18, 2015 at 04:39:24PM -0400, Anna Schumaker wrote: > > > > > >> On 03/18/2015 02:55 PM, J. Bruce Fields wrote: > > > > > >>> On Wed, Mar 18, 2015 at 02:16:29PM -0400, Anna Schumaker > wrote: > > > > > >>>> On 03/17/2015 05:36 PM, J. Bruce Fields wrote: > > > > > >>>>> On Tue, Mar 17, 2015 at 04:07:38PM -0400, J. Bruce Fields > wrote: > > > > > >>>>>> On Tue, Mar 17, 2015 at 03:56:33PM -0400, J. Bruce Fields > > > wrote: > > > > > >>>>>>> On Mon, Mar 16, 2015 at 05:18:08PM -0400, Anna Schumaker > > > wrote: > > > > > >>>>>>>> This patch implements sending an array of segments back > > > > to the client. > > > > > >>>>>>>> Clients should be prepared to handle multiple segment > > > > reads to make this > > > > > >>>>>>>> useful. We try to splice the first data segment into the > > > > XDR result, > > > > > >>>>>>>> and remaining segments are encoded directly. > > > > > >>>>>>> > > > > > >>>>>>> I'm still interested in what would happen if we started > with > > > an > > > > > >>>>>>> implementation like: > > > > > >>>>>>> > > > > > >>>>>>> - if the entire requested range falls within a hole, > return > > > that > > > > > >>>>>>> single hole. > > > > > >>>>>>> - otherwise, just treat the thing as one big data > segment. > > > > > >>>>>>> > > > > > >>>>>>> That would provide a benefit in the case there are > large-ish > > > holes > > > > > >>>>>>> with minimal impact otherwise. > > > > > >>>>>>> > > > > > >>>>>>> (Though patches for full support are still useful even if > only > > > for > > > > > >>>>>>> client-testing purposes.) > > > > > >>>>>> > > > > > >>>>>> Also, looks like > > > > > >>>>>> > > > > > >>>>>> xvs_io -c "fiemap -v" <file> > > > > > >>>>>> > > > > > >>>>>> will give hole sizes for a given <file>. (Thanks, > > > > esandeen.) Running > > > > > >>>>>> that on a few of my test vm images shows a fair number of > large > > > > > >>>>>> (hundreds of megs) files, which suggests identifying only > > > > >=rwsize holes > > > > > >>>>>> might still be useful. > > > > > >>>>> > > > > > >>>>> Just for fun.... I wrote the following test program and ran > it > > > on my > > > > > >>>>> collection of testing vm's. Some looked like this: > > > > > >>>>> > > > > > >>>>> f21-1.qcow2 > > > > > >>>>> 144784 -rw-------. 1 qemu qemu 8591507456 Mar 16 10:13 > > > f21-1.qcow2 > > > > > >>>>> total hole bytes: 8443252736 (98%) > > > > > >>>>> in aligned 1MB chunks: 8428453888 (98%) > > > > > >>>>> > > > > > >>>>> So, basically, read_plus would save transferring most of > thedata > > > even > > > > > >>>>> when only handling 1MB holes. > > > > > >>>>> > > > > > >>>>> But some looked like this: > > > > > >>>>> > > > > > >>>>> 501524 -rw-------. 1 qemu qemu 8589934592 May 20 2014 > > > > rhel6-1-1.img > > > > > >>>>> total hole bytes: 8077516800 (94%) > > > > > >>>>> in aligned 1MB chunks: 0 (0%) > > > > > >>>>> > > > > > >>>>> So the READ_PLUS that caught every hole might save a lot, > the > > > one that > > > > > >>>>> only caught 1MB holes wouldn't help at all. > > > > > >>>>> > > > > > >>>>> And there were lots of examples in between those two > extremes. > > > > > >>>> > > > > > >>>> I tested with three different 512 MB files: 100% data, 100% > > > > hole, and alternating every megabyte. The results were surprising: > > > > > >>>> > > > > > >>>> | v4.1 | v4.2 > > > > > >>>> ----------------------- > > > > > >>>> data | 0.685s | 0.714s > > > > > >>>> hole | 0.485s | 15.547s > > > > > >>>> mixed | 1.283s | 0.448 > > > > > >>>> > > > > > >>>> >From what I can tell, the 100% hole case takes so long > because > > > of the > > > > > >>>>> SEEK_DATA call in nfsd4_encode_read_plus_hole(). I took > this > > > out to > > > > > >>>>> trick the function into thinking that the entire file was > > > already a > > > > > >>>>> hole, and runtime dropped to the levels of v4.1 and v4.2. > > > > > >>> > > > > > >>> Wait, that 15s is due to just one SEEK_DATA? > > > > > >> > > > > > >> The server is returning a larger hole than the client can read > > > > at once, so there are several SEEK_DATA calls made to verify that > > > > there are no data segments before the end of the file. > > > > > >> > > > > > >>> > > > > > >>>> I wonder > > > > > >>>>> if this is filesystem dependent? My server is exporting > ext4. > > > > > >>> > > > > > >>> Sounds like just a bug. I've been doing lots of > > > lseek(.,.,SEEK_DATA) on > > > > > >>> both ext4 and xfs without seeing anything that weird. > > > > > >> > > > > > >> It looks like something weird on ext4. I switched my exported > > > > filesystem to xfs: > > > > > > > > > > > > Huh. Maybe we should report a bug.... > > > > > > > > > > > >> > > > > > >> | v4.1 | v4.2 > > > > > >> ------+--------+------- > > > > > >> data | 0.764s | 1.343s > > > > > > > > > > > > That's too bad. Non-sparse files are surely still a common case > and > > > > > > we'd like to not see a slowdown there.... I wonder if we can > figure > > > out > > > > > > where it's coming from? > > > > > > > > > > That's a good question, especially since the 1G file didn't double > > > > this time. Maybe a VM quirk? > > > > > > > > We definitely need to figure it out, I think. If we can't make > > > > READ_PLUS perform as well as READ (or very close to it) in the > > > > non-sparse case then I don't think we'll want it, and as Trond > suggested > > > > we may want to consider something more fiemap-like instead. > > > > > > Testing Anna's NFS client with the Ganesha NFS server and GPFS file > system > > > shows the same numbers for READ with v4.1 and READ_PLUS with v4.2 of a > > > > data file. Using sparse files READ_PLUS is 5 times faster than READ. > > > > Thanks! Is it possible to report the exact numbers? > > This is a copy of a 100M file. > > [root@fin16 ~]# umount /mnt > [root@fin16 ~]# mount -t nfs4 -o minorversion=1 9.1.74.120:/gpfsA /mnt > [root@fin16 ~]# time cp /mnt/100M /dev/null > > real 0m1.597s > user 0m0.000s > sys 0m0.062s > [root@fin16 ~]# umount /mnt > [root@fin16 ~]# mount -t nfs4 -o minorversion=2 9.1.74.120:/gpfsA /mnt > [root@fin16 ~]# time cp /mnt/100M /dev/null > > real 0m1.595s > user 0m0.002s > sys 0m0.057s > > > > > Is Ganesha also implementing READ_PLUS with SEEK_HOLE/SEEK_DATA? If so > > then maybe the difference is the filesystem. Might be interesting to > > run the same sort of test with ganesha exporting xfs and/or knfsd > > exporting GPFS. > > GPFS did not implement it using SEEK it just calls the fs read and if > there is no data the fs returns ENODATA return code. It is not yet > implemented on other FSLAs > > > > > --b. > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Mar 20, 2015 at 11:17:18AM -0400, J. Bruce Fields wrote: > Maybe this is a question for xfs developers. > > So, we have a new READ_PLUS call that's basically just a version of READ > optimized for sparse files: > > http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-33#section-15.10 > > It allows an NFS server to return either file data (like a normal READ > call) or, at the server's discretion, records saying "this range of the > data is all zeroes". > > Anna tried implementing READ_PLUS for knfsd using > vfs_llseek(.,.,SEEK_HOLE) followed by an ordinary read if that > determines we're not at a hole. > > (Very) preliminary results suggest that's slower than a plain READ for > an xfs file with no holes. (And *much* slower in the ext4 case for some > reason.) It should be a fairly cheap operastion, and does extent tree operations that are pretty similar to an (uncached) read. Do you have profiles? > Is that expected, and should we be doing this some other way instead? Are the read cached or uncached? If they are from pagecache just copying the zeroes is pretty much unbeatable compared to extent tree lookups, so we'd need a new page flag (difficult..) to see that a page is a hole (and then it would only work for the whole page), but for uncached reads an optimization would be to tell a read that it's an NFS READ_PLUS so that it could just read until it reach a hole, and then we'd need some way to communicate the hole size (or just fall back to SEEK_HOLE for that case). -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Mar 20, 2015 at 09:23:03AM -0700, Christoph Hellwig wrote: > On Fri, Mar 20, 2015 at 11:17:18AM -0400, J. Bruce Fields wrote: > > Maybe this is a question for xfs developers. > > > > So, we have a new READ_PLUS call that's basically just a version of READ > > optimized for sparse files: > > > > http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-33#section-15.10 > > > > It allows an NFS server to return either file data (like a normal READ > > call) or, at the server's discretion, records saying "this range of the > > data is all zeroes". > > > > Anna tried implementing READ_PLUS for knfsd using > > vfs_llseek(.,.,SEEK_HOLE) followed by an ordinary read if that > > determines we're not at a hole. > > > > (Very) preliminary results suggest that's slower than a plain READ for > > an xfs file with no holes. (And *much* slower in the ext4 case for some > > reason.) > > It should be a fairly cheap operastion, and does extent tree operations > that are pretty similar to an (uncached) read. Do you have profiles? > > > Is that expected, and should we be doing this some other way instead? > > Are the read cached or uncached? I don't know, and don't have profiles. I'll either try to reproduce or wait till Anna's back from vacation. > If they are from pagecache just copying the zeroes is pretty much > unbeatable compared to extent tree lookups, so we'd need a new page > flag (difficult..) to see that a page is a hole (and then it would > only work for the whole page), but for uncached reads an optimization > would be to tell a read that it's an NFS READ_PLUS so that it could > just read until it reach a hole, and then we'd need some way to > communicate the hole size (or just fall back to SEEK_HOLE for that > case). Ugh, OK. We'll do some more tests before coming back to ask about that.... --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Mar 20, 2015 at 2:26 PM, J. Bruce Fields <bfields@fieldses.org> wrote: > On Fri, Mar 20, 2015 at 09:23:03AM -0700, Christoph Hellwig wrote: >> On Fri, Mar 20, 2015 at 11:17:18AM -0400, J. Bruce Fields wrote: >> > Maybe this is a question for xfs developers. >> > >> > So, we have a new READ_PLUS call that's basically just a version of READ >> > optimized for sparse files: >> > >> > http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-33#section-15.10 >> > >> > It allows an NFS server to return either file data (like a normal READ >> > call) or, at the server's discretion, records saying "this range of the >> > data is all zeroes". >> > >> > Anna tried implementing READ_PLUS for knfsd using >> > vfs_llseek(.,.,SEEK_HOLE) followed by an ordinary read if that >> > determines we're not at a hole. >> > >> > (Very) preliminary results suggest that's slower than a plain READ for >> > an xfs file with no holes. (And *much* slower in the ext4 case for some >> > reason.) >> >> It should be a fairly cheap operastion, and does extent tree operations >> that are pretty similar to an (uncached) read. Do you have profiles? >> >> > Is that expected, and should we be doing this some other way instead? >> >> Are the read cached or uncached? > > I don't know, and don't have profiles. I'll either try to reproduce or > wait till Anna's back from vacation. I'm using whatever functions NFSD already uses for reading files, which I expect go through the VFS. Is there a flag that controls cache behavior? > >> If they are from pagecache just copying the zeroes is pretty much >> unbeatable compared to extent tree lookups, so we'd need a new page >> flag (difficult..) to see that a page is a hole (and then it would >> only work for the whole page), but for uncached reads an optimization >> would be to tell a read that it's an NFS READ_PLUS so that it could >> just read until it reach a hole, and then we'd need some way to >> communicate the hole size (or just fall back to SEEK_HOLE for that >> case). > > Ugh, OK. We'll do some more tests before coming back to ask about > that.... I only had time for the one run, so I'll do more trials and see if that one read is always so long. I'm still hoping it was something in the way my VM was scheduling its tasks! Anna > > --b. > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Mar 24, 2015 at 08:43:31AM -0400, Anna Schumaker wrote: > > I don't know, and don't have profiles. I'll either try to reproduce or > > wait till Anna's back from vacation. > > I'm using whatever functions NFSD already uses for reading files, > which I expect go through the VFS. Is there a flag that controls > cache behavior? There's the O_DIRECT flag, but that's not what I mean. If you just wrote to it it's a cached read, if you did unmount the filesystem after writing, or did an echo to /proc/sys/vm/drop_caches you get uncached read behavior. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/24/2015 01:49 PM, Christoph Hellwig wrote: > On Tue, Mar 24, 2015 at 08:43:31AM -0400, Anna Schumaker wrote: >>> I don't know, and don't have profiles. I'll either try to reproduce or >>> wait till Anna's back from vacation. >> >> I'm using whatever functions NFSD already uses for reading files, >> which I expect go through the VFS. Is there a flag that controls >> cache behavior? > > There's the O_DIRECT flag, but that's not what I mean. If you just > wrote to it it's a cached read, if you did unmount the filesystem after > writing, or did an echo to /proc/sys/vm/drop_caches you get uncached > read behavior. Oh, I'm doing uncached reads for my tests. I'm collecting updated numbers now! Anna > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: ########################## # # # Without READ_PLUS # # # ########################## NFS v4.1: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | |---------|---------|---------|---------|---------|---------|---------| NFS v4.2: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | |---------|---------|---------|---------|---------|---------|---------| ####################### # # # With READ_PLUS # # # ####################### NFS v4.1: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | |---------|---------|---------|---------|---------|---------|---------| NFS v4.2: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | |---------|---------|---------|---------|---------|---------|---------| On 03/24/2015 01:49 PM, Christoph Hellwig wrote: > On Tue, Mar 24, 2015 at 08:43:31AM -0400, Anna Schumaker wrote: >>> I don't know, and don't have profiles. I'll either try to reproduce or >>> wait till Anna's back from vacation. >> >> I'm using whatever functions NFSD already uses for reading files, >> which I expect go through the VFS. Is there a flag that controls >> cache behavior? > > There's the O_DIRECT flag, but that's not what I mean. If you just > wrote to it it's a cached read, if you did unmount the filesystem after > writing, or did an echo to /proc/sys/vm/drop_caches you get uncached > read behavior. > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker <Anna.Schumaker@netapp.com> wrote: > Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: > > ########################## > # # > # Without READ_PLUS # > # # > ########################## > > > NFS v4.1: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | > | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | > | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | > |---------|---------|---------|---------|---------|---------|---------| > > > > > NFS v4.2: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | > | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | > | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | > |---------|---------|---------|---------|---------|---------|---------| > > > > > > ####################### > # # > # With READ_PLUS # > # # > ####################### > > > NFS v4.1: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | > | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | > | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | > |---------|---------|---------|---------|---------|---------|---------| > > > > > NFS v4.2: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | > | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | > | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | > |---------|---------|---------|---------|---------|---------|---------| > So there is a clear win in the 100% hole case here, but otherwise the statistical fluctuations are dominating the numbers. Can you get us a little more stats and then perhaps run the results through nfsometer? > > > On 03/24/2015 01:49 PM, Christoph Hellwig wrote: >> On Tue, Mar 24, 2015 at 08:43:31AM -0400, Anna Schumaker wrote: >>>> I don't know, and don't have profiles. I'll either try to reproduce or >>>> wait till Anna's back from vacation. >>> >>> I'm using whatever functions NFSD already uses for reading files, >>> which I expect go through the VFS. Is there a flag that controls >>> cache behavior? >> >> There's the O_DIRECT flag, but that's not what I mean. If you just >> wrote to it it's a cached read, if you did unmount the filesystem after >> writing, or did an echo to /proc/sys/vm/drop_caches you get uncached >> read behavior. >> > > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/26/2015 11:32 AM, Trond Myklebust wrote: > On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker > <Anna.Schumaker@netapp.com> wrote: >> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: >> >> ########################## >> # # >> # Without READ_PLUS # >> # # >> ########################## >> >> >> NFS v4.1: >> Trial >> |---------|---------|---------|---------|---------|---------|---------| >> | | 1 | 2 | 3 | 4 | 5 | Average | >> |---------|---------|---------|---------|---------|---------|---------| >> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | >> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | >> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | >> |---------|---------|---------|---------|---------|---------|---------| >> >> >> >> >> NFS v4.2: >> Trial >> |---------|---------|---------|---------|---------|---------|---------| >> | | 1 | 2 | 3 | 4 | 5 | Average | >> |---------|---------|---------|---------|---------|---------|---------| >> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | >> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | >> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | >> |---------|---------|---------|---------|---------|---------|---------| >> >> >> >> >> >> ####################### >> # # >> # With READ_PLUS # >> # # >> ####################### >> >> >> NFS v4.1: >> Trial >> |---------|---------|---------|---------|---------|---------|---------| >> | | 1 | 2 | 3 | 4 | 5 | Average | >> |---------|---------|---------|---------|---------|---------|---------| >> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | >> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | >> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | >> |---------|---------|---------|---------|---------|---------|---------| >> >> >> >> >> NFS v4.2: >> Trial >> |---------|---------|---------|---------|---------|---------|---------| >> | | 1 | 2 | 3 | 4 | 5 | Average | >> |---------|---------|---------|---------|---------|---------|---------| >> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | >> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | >> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | >> |---------|---------|---------|---------|---------|---------|---------| >> > > So there is a clear win in the 100% hole case here, but otherwise the > statistical fluctuations are dominating the numbers. Can you get us a > little more stats and then perhaps run the results through nfsometer? Sure! Do you want any information besides runtime? Anna > >> >> >> On 03/24/2015 01:49 PM, Christoph Hellwig wrote: >>> On Tue, Mar 24, 2015 at 08:43:31AM -0400, Anna Schumaker wrote: >>>>> I don't know, and don't have profiles. I'll either try to reproduce or >>>>> wait till Anna's back from vacation. >>>> >>>> I'm using whatever functions NFSD already uses for reading files, >>>> which I expect go through the VFS. Is there a flag that controls >>>> cache behavior? >>> >>> There's the O_DIRECT flag, but that's not what I mean. If you just >>> wrote to it it's a cached read, if you did unmount the filesystem after >>> writing, or did an echo to /proc/sys/vm/drop_caches you get uncached >>> read behavior. >>> >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > > > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: > On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker > <Anna.Schumaker@netapp.com> wrote: > > Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: > > > > ########################## > > # # > > # Without READ_PLUS # > > # # > > ########################## > > > > > > NFS v4.1: > > Trial > > |---------|---------|---------|---------|---------|---------|---------| > > | | 1 | 2 | 3 | 4 | 5 | Average | > > |---------|---------|---------|---------|---------|---------|---------| > > | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | > > | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | > > | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | > > |---------|---------|---------|---------|---------|---------|---------| > > > > > > > > > > NFS v4.2: > > Trial > > |---------|---------|---------|---------|---------|---------|---------| > > | | 1 | 2 | 3 | 4 | 5 | Average | > > |---------|---------|---------|---------|---------|---------|---------| > > | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | > > | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | > > | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | > > |---------|---------|---------|---------|---------|---------|---------| > > > > > > > > > > > > ####################### > > # # > > # With READ_PLUS # > > # # > > ####################### > > > > > > NFS v4.1: > > Trial > > |---------|---------|---------|---------|---------|---------|---------| > > | | 1 | 2 | 3 | 4 | 5 | Average | > > |---------|---------|---------|---------|---------|---------|---------| > > | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | > > | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | > > | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | > > |---------|---------|---------|---------|---------|---------|---------| > > > > > > > > > > NFS v4.2: > > Trial > > |---------|---------|---------|---------|---------|---------|---------| > > | | 1 | 2 | 3 | 4 | 5 | Average | > > |---------|---------|---------|---------|---------|---------|---------| > > | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | > > | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | > > | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | > > |---------|---------|---------|---------|---------|---------|---------| > > > > So there is a clear win in the 100% hole case here, but otherwise the > statistical fluctuations are dominating the numbers. Can you get us a > little more stats and then perhaps run the results through nfsometer? Also, could you describe the setup (are these still kvm's), and how you're clearing the cache between runs? --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/26/2015 11:38 AM, J. Bruce Fields wrote: > On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: >> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker >> <Anna.Schumaker@netapp.com> wrote: >>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: >>> >>> ########################## >>> # # >>> # Without READ_PLUS # >>> # # >>> ########################## >>> >>> >>> NFS v4.1: >>> Trial >>> |---------|---------|---------|---------|---------|---------|---------| >>> | | 1 | 2 | 3 | 4 | 5 | Average | >>> |---------|---------|---------|---------|---------|---------|---------| >>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | >>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | >>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | >>> |---------|---------|---------|---------|---------|---------|---------| >>> >>> >>> >>> >>> NFS v4.2: >>> Trial >>> |---------|---------|---------|---------|---------|---------|---------| >>> | | 1 | 2 | 3 | 4 | 5 | Average | >>> |---------|---------|---------|---------|---------|---------|---------| >>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | >>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | >>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | >>> |---------|---------|---------|---------|---------|---------|---------| >>> >>> >>> >>> >>> >>> ####################### >>> # # >>> # With READ_PLUS # >>> # # >>> ####################### >>> >>> >>> NFS v4.1: >>> Trial >>> |---------|---------|---------|---------|---------|---------|---------| >>> | | 1 | 2 | 3 | 4 | 5 | Average | >>> |---------|---------|---------|---------|---------|---------|---------| >>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | >>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | >>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | >>> |---------|---------|---------|---------|---------|---------|---------| >>> >>> >>> >>> >>> NFS v4.2: >>> Trial >>> |---------|---------|---------|---------|---------|---------|---------| >>> | | 1 | 2 | 3 | 4 | 5 | Average | >>> |---------|---------|---------|---------|---------|---------|---------| >>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | >>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | >>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | >>> |---------|---------|---------|---------|---------|---------|---------| >>> >> >> So there is a clear win in the 100% hole case here, but otherwise the >> statistical fluctuations are dominating the numbers. Can you get us a >> little more stats and then perhaps run the results through nfsometer? > > Also, could you describe the setup (are these still kvm's), and how > you're clearing the cache between runs? These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. > > --b. > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Mar 26, 2015 at 11:47 AM, Anna Schumaker <Anna.Schumaker@netapp.com> wrote: > On 03/26/2015 11:38 AM, J. Bruce Fields wrote: >> On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: >>> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker >>> <Anna.Schumaker@netapp.com> wrote: >>>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: >>>> >>>> ########################## >>>> # # >>>> # Without READ_PLUS # >>>> # # >>>> ########################## >>>> >>>> >>>> NFS v4.1: >>>> Trial >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | >>>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | >>>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> >>>> >>>> >>>> >>>> NFS v4.2: >>>> Trial >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | >>>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | >>>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> >>>> >>>> >>>> >>>> >>>> ####################### >>>> # # >>>> # With READ_PLUS # >>>> # # >>>> ####################### >>>> >>>> >>>> NFS v4.1: >>>> Trial >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | >>>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | >>>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> >>>> >>>> >>>> >>>> NFS v4.2: >>>> Trial >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | >>>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | >>>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> >>> >>> So there is a clear win in the 100% hole case here, but otherwise the >>> statistical fluctuations are dominating the numbers. Can you get us a >>> little more stats and then perhaps run the results through nfsometer? >> >> Also, could you describe the setup (are these still kvm's), and how >> you're clearing the cache between runs? > > These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. I agree that you have to use the 'drop_caches' interface on the server, but why not just use O_DIRECT on the clients?
On 03/26/2015 12:06 PM, Trond Myklebust wrote: > On Thu, Mar 26, 2015 at 11:47 AM, Anna Schumaker > <Anna.Schumaker@netapp.com> wrote: >> On 03/26/2015 11:38 AM, J. Bruce Fields wrote: >>> On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: >>>> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker >>>> <Anna.Schumaker@netapp.com> wrote: >>>>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: >>>>> >>>>> ########################## >>>>> # # >>>>> # Without READ_PLUS # >>>>> # # >>>>> ########################## >>>>> >>>>> >>>>> NFS v4.1: >>>>> Trial >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | >>>>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | >>>>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> >>>>> >>>>> >>>>> >>>>> NFS v4.2: >>>>> Trial >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | >>>>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | >>>>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> >>>>> >>>>> >>>>> >>>>> >>>>> ####################### >>>>> # # >>>>> # With READ_PLUS # >>>>> # # >>>>> ####################### >>>>> >>>>> >>>>> NFS v4.1: >>>>> Trial >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | >>>>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | >>>>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> >>>>> >>>>> >>>>> >>>>> NFS v4.2: >>>>> Trial >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | >>>>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | >>>>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> >>>> >>>> So there is a clear win in the 100% hole case here, but otherwise the >>>> statistical fluctuations are dominating the numbers. Can you get us a >>>> little more stats and then perhaps run the results through nfsometer? >>> >>> Also, could you describe the setup (are these still kvm's), and how >>> you're clearing the cache between runs? >> >> These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. > > I agree that you have to use the 'drop_caches' interface on the > server, but why not just use O_DIRECT on the clients? I've been reading by using cat from my test shell script: `time cat /nfs/file > /dev/null`. I can write something to read files with O_DIRECT if that would be more useful! Anna > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Mar 26, 2015 at 11:47:03AM -0400, Anna Schumaker wrote: > On 03/26/2015 11:38 AM, J. Bruce Fields wrote: > > On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: > >> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker > >> <Anna.Schumaker@netapp.com> wrote: > >>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: > >>> > >>> ########################## > >>> # # > >>> # Without READ_PLUS # > >>> # # > >>> ########################## > >>> > >>> > >>> NFS v4.1: > >>> Trial > >>> |---------|---------|---------|---------|---------|---------|---------| > >>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>> |---------|---------|---------|---------|---------|---------|---------| > >>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | > >>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | > >>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | > >>> |---------|---------|---------|---------|---------|---------|---------| > >>> > >>> > >>> > >>> > >>> NFS v4.2: > >>> Trial > >>> |---------|---------|---------|---------|---------|---------|---------| > >>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>> |---------|---------|---------|---------|---------|---------|---------| > >>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | > >>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | > >>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | > >>> |---------|---------|---------|---------|---------|---------|---------| > >>> > >>> > >>> > >>> > >>> > >>> ####################### > >>> # # > >>> # With READ_PLUS # > >>> # # > >>> ####################### > >>> > >>> > >>> NFS v4.1: > >>> Trial > >>> |---------|---------|---------|---------|---------|---------|---------| > >>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>> |---------|---------|---------|---------|---------|---------|---------| > >>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | > >>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | > >>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | > >>> |---------|---------|---------|---------|---------|---------|---------| > >>> > >>> > >>> > >>> > >>> NFS v4.2: > >>> Trial > >>> |---------|---------|---------|---------|---------|---------|---------| > >>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>> |---------|---------|---------|---------|---------|---------|---------| > >>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | > >>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | > >>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | > >>> |---------|---------|---------|---------|---------|---------|---------| > >>> > >> > >> So there is a clear win in the 100% hole case here, but otherwise the > >> statistical fluctuations are dominating the numbers. Can you get us a > >> little more stats and then perhaps run the results through nfsometer? > > > > Also, could you describe the setup (are these still kvm's), and how > > you're clearing the cache between runs? > > These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. What sort of device is the exported xfs filesystem on? (Can't there be a second level of caching on the guest, depending on how it's set up?) Can we get results on bare metal? (The kvm test might be a good worst-case for read_plus, as I'd expect bandwidth to be relatively high compared to the cost of the extra memcpy's or seek calls. But it also seems more complicated.) --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Mar 26, 2015 at 12:11 PM, Anna Schumaker <Anna.Schumaker@netapp.com> wrote: > On 03/26/2015 12:06 PM, Trond Myklebust wrote: >> On Thu, Mar 26, 2015 at 11:47 AM, Anna Schumaker >> <Anna.Schumaker@netapp.com> wrote: >>> On 03/26/2015 11:38 AM, J. Bruce Fields wrote: >>>> On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: >>>>> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker >>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: >>>>>> >>>>>> ########################## >>>>>> # # >>>>>> # Without READ_PLUS # >>>>>> # # >>>>>> ########################## >>>>>> >>>>>> >>>>>> NFS v4.1: >>>>>> Trial >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | >>>>>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | >>>>>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> NFS v4.2: >>>>>> Trial >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | >>>>>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | >>>>>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> ####################### >>>>>> # # >>>>>> # With READ_PLUS # >>>>>> # # >>>>>> ####################### >>>>>> >>>>>> >>>>>> NFS v4.1: >>>>>> Trial >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | >>>>>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | >>>>>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> NFS v4.2: >>>>>> Trial >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | >>>>>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | >>>>>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> >>>>> >>>>> So there is a clear win in the 100% hole case here, but otherwise the >>>>> statistical fluctuations are dominating the numbers. Can you get us a >>>>> little more stats and then perhaps run the results through nfsometer? >>>> >>>> Also, could you describe the setup (are these still kvm's), and how >>>> you're clearing the cache between runs? >>> >>> These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. >> >> I agree that you have to use the 'drop_caches' interface on the >> server, but why not just use O_DIRECT on the clients? > > I've been reading by using cat from my test shell script: `time cat /nfs/file > /dev/null`. I can write something to read files with O_DIRECT if that would be more useful! > 'dd' can do that for you if the appropriate incantations are performed.
On 03/26/2015 12:13 PM, Trond Myklebust wrote: > On Thu, Mar 26, 2015 at 12:11 PM, Anna Schumaker > <Anna.Schumaker@netapp.com> wrote: >> On 03/26/2015 12:06 PM, Trond Myklebust wrote: >>> On Thu, Mar 26, 2015 at 11:47 AM, Anna Schumaker >>> <Anna.Schumaker@netapp.com> wrote: >>>> On 03/26/2015 11:38 AM, J. Bruce Fields wrote: >>>>> On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: >>>>>> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker >>>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: >>>>>>> >>>>>>> ########################## >>>>>>> # # >>>>>>> # Without READ_PLUS # >>>>>>> # # >>>>>>> ########################## >>>>>>> >>>>>>> >>>>>>> NFS v4.1: >>>>>>> Trial >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | >>>>>>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | >>>>>>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> NFS v4.2: >>>>>>> Trial >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | >>>>>>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | >>>>>>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> ####################### >>>>>>> # # >>>>>>> # With READ_PLUS # >>>>>>> # # >>>>>>> ####################### >>>>>>> >>>>>>> >>>>>>> NFS v4.1: >>>>>>> Trial >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | >>>>>>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | >>>>>>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> NFS v4.2: >>>>>>> Trial >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | >>>>>>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | >>>>>>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> >>>>>> >>>>>> So there is a clear win in the 100% hole case here, but otherwise the >>>>>> statistical fluctuations are dominating the numbers. Can you get us a >>>>>> little more stats and then perhaps run the results through nfsometer? >>>>> >>>>> Also, could you describe the setup (are these still kvm's), and how >>>>> you're clearing the cache between runs? >>>> >>>> These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. >>> >>> I agree that you have to use the 'drop_caches' interface on the >>> server, but why not just use O_DIRECT on the clients? >> >> I've been reading by using cat from my test shell script: `time cat /nfs/file > /dev/null`. I can write something to read files with O_DIRECT if that would be more useful! >> > > 'dd' can do that for you if the appropriate incantations are performed. Got it. I'll sacrifice a goat to 'dd' and rerun the tests with O_DIRECT! > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/26/2015 12:11 PM, J. Bruce Fields wrote: > On Thu, Mar 26, 2015 at 11:47:03AM -0400, Anna Schumaker wrote: >> On 03/26/2015 11:38 AM, J. Bruce Fields wrote: >>> On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: >>>> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker >>>> <Anna.Schumaker@netapp.com> wrote: >>>>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: >>>>> >>>>> ########################## >>>>> # # >>>>> # Without READ_PLUS # >>>>> # # >>>>> ########################## >>>>> >>>>> >>>>> NFS v4.1: >>>>> Trial >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | >>>>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | >>>>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> >>>>> >>>>> >>>>> >>>>> NFS v4.2: >>>>> Trial >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | >>>>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | >>>>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> >>>>> >>>>> >>>>> >>>>> >>>>> ####################### >>>>> # # >>>>> # With READ_PLUS # >>>>> # # >>>>> ####################### >>>>> >>>>> >>>>> NFS v4.1: >>>>> Trial >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | >>>>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | >>>>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> >>>>> >>>>> >>>>> >>>>> NFS v4.2: >>>>> Trial >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | >>>>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | >>>>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | >>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>> >>>> >>>> So there is a clear win in the 100% hole case here, but otherwise the >>>> statistical fluctuations are dominating the numbers. Can you get us a >>>> little more stats and then perhaps run the results through nfsometer? >>> >>> Also, could you describe the setup (are these still kvm's), and how >>> you're clearing the cache between runs? >> >> These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. > > What sort of device is the exported xfs filesystem on? (Can't there > be a second level of caching on the guest, depending on how it's set > up?) My host is a macbook pro running Archlinux, and I have all my virtio disks set to "cache mode = none". Let me know if you were asking something different! > > Can we get results on bare metal? (The kvm test might be a good > worst-case for read_plus, as I'd expect bandwidth to be relatively high > compared to the cost of the extra memcpy's or seek calls. But it also > seems more complicated.) I do all of my testing on kvm these days! I'll see how difficult it is to setup refind with a custom kernel to test between my laptop and my desktop (or I could run the test between my raspberry pis!) Anna > > --b. > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
I did two separate dd tests with the same 5G file from yesterday, and still using the same virtual machines. First, I ran dd using direct IO for reads: dd if=/nfs/file iflag=direct of=/dev/null bs=128K Mixed file performance was awful, so I reran without direct IO enabled for comparison: dd if=/nfs/file iflag=nocache of=/dev/null oflag=nocache bs=128K bs=128K sets the block size used by dd to the NFS rsize, without this dd will only read 512 bytes at a time and take forever to complete. ########################## # # # Without READ_PLUS # # # ########################## NFS v4.1, iflag=direct: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 11.704s | 11.055s | 11.329s | 11.453s | 10.741s | 11.256s | | Hole | 9.839s | 9.326s | 9.381s | 9.430s | 8.875s | 9.370s | | Mixed | 19.150s | 19.468s | 18.650s | 18.537s | 19.312s | 19.023s | |---------|---------|---------|---------|---------|---------|---------| NFS v4.2, iflag=direct: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 10.927s | 10.885s | 11.114s | 11.283s | 10.371s | 10.916s | | Hole | 9.515s | 9.039s | 9.116s | 8.867s | 8.905s | 9.088s | | Mixed | 19.149s | 18.656s | 19.400s | 18.834s | 20.041s | 19.216s | |---------|---------|---------|---------|---------|---------|---------| NFS v4.1, iflag=nocache oflag=nocache: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 6.808s | 6.698s | 7.482s | 6.761s | 7.235s | 6.995s | | Hole | 5.350s | 5.148s | 5.161s | 5.070s | 5.089s | 5.164s | | Mixed | 9.316s | 8.731s | 9.072s | 9.145s | 8.627s | 8.978s | |---------|---------|---------|---------|---------|---------|---------| NFS v4.2, iflag=nocache oflag=nocache: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 6.686s | 6.848s | 6.876s | 6.799s | 7.815s | 7.004s | | Hole | 5.092s | 5.330s | 5.050s | 5.280s | 5.030s | 5.156s | | Mixed | 8.142s | 7.897s | 8.040s | 7.960s | 8.050s | 8.018s | |---------|---------|---------|---------|---------|---------|---------| ####################### # # # With READ_PLUS # # # ####################### NFS v4.1, iflag=direct: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 9.464s | 10.181s | 10.048s | 9.452s | 10.795s | 9.988s | | Hole | 7.954s | 8.486s | 7.762s | 7.969s | 8.299s | 8.094s | | Mixed | 19.037s | 18.323s | 18.965s | 18.156s | 19.185s | 18.733s | |---------|---------|---------|---------|---------|---------|---------| NFS v4.2, iflag=direct: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 11.923s | 10.026s | 10.222s | 12.387s | 11.431s | 11.198s | | Hole | 3.247s | 3.155s | 3.191s | 3.243s | 3.202s | 3.208s | | Mixed | 54.677s | 54.697s | 52.978s | 53.704s | 54.054s | 54.022s | |---------|---------|---------|---------|---------|---------|---------| NFS v4.1, iflag=nocache oflag=nocache: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 6.788s | 6.802s | 6.750s | 6.756s | 6.852s | 6.790s | | Hole | 5.143s | 5.165s | 5.104s | 5.154s | 5.116s | 5.136s | | Mixed | 7.902s | 7.693s | 9.169s | 8.186s | 9.157s | 8.421s | |---------|---------|---------|---------|---------|---------|---------| NFS v4.2, iflag=nocache oflag=nocache: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 6.897s | 6.862s | 7.054s | 6.961s | 7.081s | 6.971s | | Hole | 1.690s | 1.673s | 1.553s | 1.554s | 1.490s | 1.592s | | Mixed | 9.009s | 7.840s | 7.661s | 8.945s | 7.649s | 8.221s | |---------|---------|---------|---------|---------|---------|---------| On 03/26/2015 12:13 PM, Trond Myklebust wrote: > On Thu, Mar 26, 2015 at 12:11 PM, Anna Schumaker > <Anna.Schumaker@netapp.com> wrote: >> On 03/26/2015 12:06 PM, Trond Myklebust wrote: >>> On Thu, Mar 26, 2015 at 11:47 AM, Anna Schumaker >>> <Anna.Schumaker@netapp.com> wrote: >>>> On 03/26/2015 11:38 AM, J. Bruce Fields wrote: >>>>> On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: >>>>>> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker >>>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: >>>>>>> >>>>>>> ########################## >>>>>>> # # >>>>>>> # Without READ_PLUS # >>>>>>> # # >>>>>>> ########################## >>>>>>> >>>>>>> >>>>>>> NFS v4.1: >>>>>>> Trial >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | >>>>>>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | >>>>>>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> NFS v4.2: >>>>>>> Trial >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | >>>>>>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | >>>>>>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> ####################### >>>>>>> # # >>>>>>> # With READ_PLUS # >>>>>>> # # >>>>>>> ####################### >>>>>>> >>>>>>> >>>>>>> NFS v4.1: >>>>>>> Trial >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | >>>>>>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | >>>>>>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> NFS v4.2: >>>>>>> Trial >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | >>>>>>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | >>>>>>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | >>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>> >>>>>> >>>>>> So there is a clear win in the 100% hole case here, but otherwise the >>>>>> statistical fluctuations are dominating the numbers. Can you get us a >>>>>> little more stats and then perhaps run the results through nfsometer? >>>>> >>>>> Also, could you describe the setup (are these still kvm's), and how >>>>> you're clearing the cache between runs? >>>> >>>> These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. >>> >>> I agree that you have to use the 'drop_caches' interface on the >>> server, but why not just use O_DIRECT on the clients? >> >> I've been reading by using cat from my test shell script: `time cat /nfs/file > /dev/null`. I can write something to read files with O_DIRECT if that would be more useful! >> > > 'dd' can do that for you if the appropriate incantations are performed. > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Mar 27, 2015 at 3:04 PM, Anna Schumaker <Anna.Schumaker@netapp.com> wrote: > I did two separate dd tests with the same 5G file from yesterday, and still using the same virtual machines. First, I ran dd using direct IO for reads: > dd if=/nfs/file iflag=direct of=/dev/null bs=128K > > Mixed file performance was awful, so I reran without direct IO enabled for comparison: > dd if=/nfs/file iflag=nocache of=/dev/null oflag=nocache bs=128K > > bs=128K sets the block size used by dd to the NFS rsize, without this dd will only read 512 bytes at a time and take forever to complete. > > > ########################## > # # > # Without READ_PLUS # > # # > ########################## > > > NFS v4.1, iflag=direct: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 11.704s | 11.055s | 11.329s | 11.453s | 10.741s | 11.256s | > | Hole | 9.839s | 9.326s | 9.381s | 9.430s | 8.875s | 9.370s | > | Mixed | 19.150s | 19.468s | 18.650s | 18.537s | 19.312s | 19.023s | > |---------|---------|---------|---------|---------|---------|---------| > > > NFS v4.2, iflag=direct: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 10.927s | 10.885s | 11.114s | 11.283s | 10.371s | 10.916s | > | Hole | 9.515s | 9.039s | 9.116s | 8.867s | 8.905s | 9.088s | > | Mixed | 19.149s | 18.656s | 19.400s | 18.834s | 20.041s | 19.216s | > |---------|---------|---------|---------|---------|---------|---------| > > > > > NFS v4.1, iflag=nocache oflag=nocache: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 6.808s | 6.698s | 7.482s | 6.761s | 7.235s | 6.995s | > | Hole | 5.350s | 5.148s | 5.161s | 5.070s | 5.089s | 5.164s | > | Mixed | 9.316s | 8.731s | 9.072s | 9.145s | 8.627s | 8.978s | > |---------|---------|---------|---------|---------|---------|---------| > > > NFS v4.2, iflag=nocache oflag=nocache: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 6.686s | 6.848s | 6.876s | 6.799s | 7.815s | 7.004s | > | Hole | 5.092s | 5.330s | 5.050s | 5.280s | 5.030s | 5.156s | > | Mixed | 8.142s | 7.897s | 8.040s | 7.960s | 8.050s | 8.018s | > |---------|---------|---------|---------|---------|---------|---------| > > > > > > > ####################### > # # > # With READ_PLUS # > # # > ####################### > > > NFS v4.1, iflag=direct: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 9.464s | 10.181s | 10.048s | 9.452s | 10.795s | 9.988s | > | Hole | 7.954s | 8.486s | 7.762s | 7.969s | 8.299s | 8.094s | > | Mixed | 19.037s | 18.323s | 18.965s | 18.156s | 19.185s | 18.733s | > |---------|---------|---------|---------|---------|---------|---------| > > > NFS v4.2, iflag=direct: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 11.923s | 10.026s | 10.222s | 12.387s | 11.431s | 11.198s | > | Hole | 3.247s | 3.155s | 3.191s | 3.243s | 3.202s | 3.208s | > | Mixed | 54.677s | 54.697s | 52.978s | 53.704s | 54.054s | 54.022s | That's a bit nasty. Any idea what is going on with the Mixed case here? > |---------|---------|---------|---------|---------|---------|---------| > > > > > NFS v4.1, iflag=nocache oflag=nocache: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 6.788s | 6.802s | 6.750s | 6.756s | 6.852s | 6.790s | > | Hole | 5.143s | 5.165s | 5.104s | 5.154s | 5.116s | 5.136s | > | Mixed | 7.902s | 7.693s | 9.169s | 8.186s | 9.157s | 8.421s | > |---------|---------|---------|---------|---------|---------|---------| > > > NFS v4.2, iflag=nocache oflag=nocache: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 6.897s | 6.862s | 7.054s | 6.961s | 7.081s | 6.971s | > | Hole | 1.690s | 1.673s | 1.553s | 1.554s | 1.490s | 1.592s | > | Mixed | 9.009s | 7.840s | 7.661s | 8.945s | 7.649s | 8.221s | > |---------|---------|---------|---------|---------|---------|---------| > > > On 03/26/2015 12:13 PM, Trond Myklebust wrote: >> On Thu, Mar 26, 2015 at 12:11 PM, Anna Schumaker >> <Anna.Schumaker@netapp.com> wrote: >>> On 03/26/2015 12:06 PM, Trond Myklebust wrote: >>>> On Thu, Mar 26, 2015 at 11:47 AM, Anna Schumaker >>>> <Anna.Schumaker@netapp.com> wrote: >>>>> On 03/26/2015 11:38 AM, J. Bruce Fields wrote: >>>>>> On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: >>>>>>> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker >>>>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>>>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: >>>>>>>> >>>>>>>> ########################## >>>>>>>> # # >>>>>>>> # Without READ_PLUS # >>>>>>>> # # >>>>>>>> ########################## >>>>>>>> >>>>>>>> >>>>>>>> NFS v4.1: >>>>>>>> Trial >>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | >>>>>>>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | >>>>>>>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | >>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> NFS v4.2: >>>>>>>> Trial >>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | >>>>>>>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | >>>>>>>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | >>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> ####################### >>>>>>>> # # >>>>>>>> # With READ_PLUS # >>>>>>>> # # >>>>>>>> ####################### >>>>>>>> >>>>>>>> >>>>>>>> NFS v4.1: >>>>>>>> Trial >>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | >>>>>>>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | >>>>>>>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | >>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> NFS v4.2: >>>>>>>> Trial >>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | >>>>>>>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | >>>>>>>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | >>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>> >>>>>>> >>>>>>> So there is a clear win in the 100% hole case here, but otherwise the >>>>>>> statistical fluctuations are dominating the numbers. Can you get us a >>>>>>> little more stats and then perhaps run the results through nfsometer? >>>>>> >>>>>> Also, could you describe the setup (are these still kvm's), and how >>>>>> you're clearing the cache between runs? >>>>> >>>>> These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. >>>> >>>> I agree that you have to use the 'drop_caches' interface on the >>>> server, but why not just use O_DIRECT on the clients? >>> >>> I've been reading by using cat from my test shell script: `time cat /nfs/file > /dev/null`. I can write something to read files with O_DIRECT if that would be more useful! >>> >> >> 'dd' can do that for you if the appropriate incantations are performed. >> > > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/27/2015 04:22 PM, Trond Myklebust wrote: > On Fri, Mar 27, 2015 at 3:04 PM, Anna Schumaker > <Anna.Schumaker@netapp.com> wrote: >> I did two separate dd tests with the same 5G file from yesterday, and still using the same virtual machines. First, I ran dd using direct IO for reads: >> dd if=/nfs/file iflag=direct of=/dev/null bs=128K >> >> Mixed file performance was awful, so I reran without direct IO enabled for comparison: >> dd if=/nfs/file iflag=nocache of=/dev/null oflag=nocache bs=128K >> >> bs=128K sets the block size used by dd to the NFS rsize, without this dd will only read 512 bytes at a time and take forever to complete. >> >> >> ########################## >> # # >> # Without READ_PLUS # >> # # >> ########################## >> >> >> NFS v4.1, iflag=direct: >> Trial >> |---------|---------|---------|---------|---------|---------|---------| >> | | 1 | 2 | 3 | 4 | 5 | Average | >> |---------|---------|---------|---------|---------|---------|---------| >> | Data | 11.704s | 11.055s | 11.329s | 11.453s | 10.741s | 11.256s | >> | Hole | 9.839s | 9.326s | 9.381s | 9.430s | 8.875s | 9.370s | >> | Mixed | 19.150s | 19.468s | 18.650s | 18.537s | 19.312s | 19.023s | >> |---------|---------|---------|---------|---------|---------|---------| >> >> >> NFS v4.2, iflag=direct: >> Trial >> |---------|---------|---------|---------|---------|---------|---------| >> | | 1 | 2 | 3 | 4 | 5 | Average | >> |---------|---------|---------|---------|---------|---------|---------| >> | Data | 10.927s | 10.885s | 11.114s | 11.283s | 10.371s | 10.916s | >> | Hole | 9.515s | 9.039s | 9.116s | 8.867s | 8.905s | 9.088s | >> | Mixed | 19.149s | 18.656s | 19.400s | 18.834s | 20.041s | 19.216s | >> |---------|---------|---------|---------|---------|---------|---------| >> >> >> >> >> NFS v4.1, iflag=nocache oflag=nocache: >> Trial >> |---------|---------|---------|---------|---------|---------|---------| >> | | 1 | 2 | 3 | 4 | 5 | Average | >> |---------|---------|---------|---------|---------|---------|---------| >> | Data | 6.808s | 6.698s | 7.482s | 6.761s | 7.235s | 6.995s | >> | Hole | 5.350s | 5.148s | 5.161s | 5.070s | 5.089s | 5.164s | >> | Mixed | 9.316s | 8.731s | 9.072s | 9.145s | 8.627s | 8.978s | >> |---------|---------|---------|---------|---------|---------|---------| >> >> >> NFS v4.2, iflag=nocache oflag=nocache: >> Trial >> |---------|---------|---------|---------|---------|---------|---------| >> | | 1 | 2 | 3 | 4 | 5 | Average | >> |---------|---------|---------|---------|---------|---------|---------| >> | Data | 6.686s | 6.848s | 6.876s | 6.799s | 7.815s | 7.004s | >> | Hole | 5.092s | 5.330s | 5.050s | 5.280s | 5.030s | 5.156s | >> | Mixed | 8.142s | 7.897s | 8.040s | 7.960s | 8.050s | 8.018s | >> |---------|---------|---------|---------|---------|---------|---------| >> >> >> >> >> >> >> ####################### >> # # >> # With READ_PLUS # >> # # >> ####################### >> >> >> NFS v4.1, iflag=direct: >> Trial >> |---------|---------|---------|---------|---------|---------|---------| >> | | 1 | 2 | 3 | 4 | 5 | Average | >> |---------|---------|---------|---------|---------|---------|---------| >> | Data | 9.464s | 10.181s | 10.048s | 9.452s | 10.795s | 9.988s | >> | Hole | 7.954s | 8.486s | 7.762s | 7.969s | 8.299s | 8.094s | >> | Mixed | 19.037s | 18.323s | 18.965s | 18.156s | 19.185s | 18.733s | >> |---------|---------|---------|---------|---------|---------|---------| >> >> >> NFS v4.2, iflag=direct: >> Trial >> |---------|---------|---------|---------|---------|---------|---------| >> | | 1 | 2 | 3 | 4 | 5 | Average | >> |---------|---------|---------|---------|---------|---------|---------| >> | Data | 11.923s | 10.026s | 10.222s | 12.387s | 11.431s | 11.198s | >> | Hole | 3.247s | 3.155s | 3.191s | 3.243s | 3.202s | 3.208s | >> | Mixed | 54.677s | 54.697s | 52.978s | 53.704s | 54.054s | 54.022s | > > That's a bit nasty. Any idea what is going on with the Mixed case here? Not offhand, but my first guess would be something to do with extra seeks to find how long each hole and data segment is. Anna > >> |---------|---------|---------|---------|---------|---------|---------| >> >> >> >> >> NFS v4.1, iflag=nocache oflag=nocache: >> Trial >> |---------|---------|---------|---------|---------|---------|---------| >> | | 1 | 2 | 3 | 4 | 5 | Average | >> |---------|---------|---------|---------|---------|---------|---------| >> | Data | 6.788s | 6.802s | 6.750s | 6.756s | 6.852s | 6.790s | >> | Hole | 5.143s | 5.165s | 5.104s | 5.154s | 5.116s | 5.136s | >> | Mixed | 7.902s | 7.693s | 9.169s | 8.186s | 9.157s | 8.421s | >> |---------|---------|---------|---------|---------|---------|---------| >> >> >> NFS v4.2, iflag=nocache oflag=nocache: >> Trial >> |---------|---------|---------|---------|---------|---------|---------| >> | | 1 | 2 | 3 | 4 | 5 | Average | >> |---------|---------|---------|---------|---------|---------|---------| >> | Data | 6.897s | 6.862s | 7.054s | 6.961s | 7.081s | 6.971s | >> | Hole | 1.690s | 1.673s | 1.553s | 1.554s | 1.490s | 1.592s | >> | Mixed | 9.009s | 7.840s | 7.661s | 8.945s | 7.649s | 8.221s | >> |---------|---------|---------|---------|---------|---------|---------| >> >> >> On 03/26/2015 12:13 PM, Trond Myklebust wrote: >>> On Thu, Mar 26, 2015 at 12:11 PM, Anna Schumaker >>> <Anna.Schumaker@netapp.com> wrote: >>>> On 03/26/2015 12:06 PM, Trond Myklebust wrote: >>>>> On Thu, Mar 26, 2015 at 11:47 AM, Anna Schumaker >>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>> On 03/26/2015 11:38 AM, J. Bruce Fields wrote: >>>>>>> On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: >>>>>>>> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker >>>>>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>>>>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: >>>>>>>>> >>>>>>>>> ########################## >>>>>>>>> # # >>>>>>>>> # Without READ_PLUS # >>>>>>>>> # # >>>>>>>>> ########################## >>>>>>>>> >>>>>>>>> >>>>>>>>> NFS v4.1: >>>>>>>>> Trial >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | >>>>>>>>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | >>>>>>>>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> NFS v4.2: >>>>>>>>> Trial >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | >>>>>>>>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | >>>>>>>>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> ####################### >>>>>>>>> # # >>>>>>>>> # With READ_PLUS # >>>>>>>>> # # >>>>>>>>> ####################### >>>>>>>>> >>>>>>>>> >>>>>>>>> NFS v4.1: >>>>>>>>> Trial >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | >>>>>>>>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | >>>>>>>>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> NFS v4.2: >>>>>>>>> Trial >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | >>>>>>>>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | >>>>>>>>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>> >>>>>>>> >>>>>>>> So there is a clear win in the 100% hole case here, but otherwise the >>>>>>>> statistical fluctuations are dominating the numbers. Can you get us a >>>>>>>> little more stats and then perhaps run the results through nfsometer? >>>>>>> >>>>>>> Also, could you describe the setup (are these still kvm's), and how >>>>>>> you're clearing the cache between runs? >>>>>> >>>>>> These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. >>>>> >>>>> I agree that you have to use the 'drop_caches' interface on the >>>>> server, but why not just use O_DIRECT on the clients? >>>> >>>> I've been reading by using cat from my test shell script: `time cat /nfs/file > /dev/null`. I can write something to read files with O_DIRECT if that would be more useful! >>>> >>> >>> 'dd' can do that for you if the appropriate incantations are performed. >>> >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > > > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Mar 27, 2015 at 04:46:55PM -0400, Anna Schumaker wrote: > On 03/27/2015 04:22 PM, Trond Myklebust wrote: > > On Fri, Mar 27, 2015 at 3:04 PM, Anna Schumaker > > <Anna.Schumaker@netapp.com> wrote: > >> I did two separate dd tests with the same 5G file from yesterday, and still using the same virtual machines. First, I ran dd using direct IO for reads: > >> dd if=/nfs/file iflag=direct of=/dev/null bs=128K > >> > >> Mixed file performance was awful, so I reran without direct IO enabled for comparison: > >> dd if=/nfs/file iflag=nocache of=/dev/null oflag=nocache bs=128K > >> > >> bs=128K sets the block size used by dd to the NFS rsize, without this dd will only read 512 bytes at a time and take forever to complete. > >> > >> > >> ########################## > >> # # > >> # Without READ_PLUS # > >> # # > >> ########################## > >> > >> > >> NFS v4.1, iflag=direct: > >> Trial > >> |---------|---------|---------|---------|---------|---------|---------| > >> | | 1 | 2 | 3 | 4 | 5 | Average | > >> |---------|---------|---------|---------|---------|---------|---------| > >> | Data | 11.704s | 11.055s | 11.329s | 11.453s | 10.741s | 11.256s | > >> | Hole | 9.839s | 9.326s | 9.381s | 9.430s | 8.875s | 9.370s | > >> | Mixed | 19.150s | 19.468s | 18.650s | 18.537s | 19.312s | 19.023s | > >> |---------|---------|---------|---------|---------|---------|---------| > >> > >> > >> NFS v4.2, iflag=direct: > >> Trial > >> |---------|---------|---------|---------|---------|---------|---------| > >> | | 1 | 2 | 3 | 4 | 5 | Average | > >> |---------|---------|---------|---------|---------|---------|---------| > >> | Data | 10.927s | 10.885s | 11.114s | 11.283s | 10.371s | 10.916s | > >> | Hole | 9.515s | 9.039s | 9.116s | 8.867s | 8.905s | 9.088s | > >> | Mixed | 19.149s | 18.656s | 19.400s | 18.834s | 20.041s | 19.216s | > >> |---------|---------|---------|---------|---------|---------|---------| > >> > >> > >> > >> > >> NFS v4.1, iflag=nocache oflag=nocache: > >> Trial > >> |---------|---------|---------|---------|---------|---------|---------| > >> | | 1 | 2 | 3 | 4 | 5 | Average | > >> |---------|---------|---------|---------|---------|---------|---------| > >> | Data | 6.808s | 6.698s | 7.482s | 6.761s | 7.235s | 6.995s | > >> | Hole | 5.350s | 5.148s | 5.161s | 5.070s | 5.089s | 5.164s | > >> | Mixed | 9.316s | 8.731s | 9.072s | 9.145s | 8.627s | 8.978s | > >> |---------|---------|---------|---------|---------|---------|---------| > >> > >> > >> NFS v4.2, iflag=nocache oflag=nocache: > >> Trial > >> |---------|---------|---------|---------|---------|---------|---------| > >> | | 1 | 2 | 3 | 4 | 5 | Average | > >> |---------|---------|---------|---------|---------|---------|---------| > >> | Data | 6.686s | 6.848s | 6.876s | 6.799s | 7.815s | 7.004s | > >> | Hole | 5.092s | 5.330s | 5.050s | 5.280s | 5.030s | 5.156s | > >> | Mixed | 8.142s | 7.897s | 8.040s | 7.960s | 8.050s | 8.018s | > >> |---------|---------|---------|---------|---------|---------|---------| > >> > >> > >> > >> > >> > >> > >> ####################### > >> # # > >> # With READ_PLUS # > >> # # > >> ####################### > >> > >> > >> NFS v4.1, iflag=direct: > >> Trial > >> |---------|---------|---------|---------|---------|---------|---------| > >> | | 1 | 2 | 3 | 4 | 5 | Average | > >> |---------|---------|---------|---------|---------|---------|---------| > >> | Data | 9.464s | 10.181s | 10.048s | 9.452s | 10.795s | 9.988s | > >> | Hole | 7.954s | 8.486s | 7.762s | 7.969s | 8.299s | 8.094s | > >> | Mixed | 19.037s | 18.323s | 18.965s | 18.156s | 19.185s | 18.733s | > >> |---------|---------|---------|---------|---------|---------|---------| > >> > >> > >> NFS v4.2, iflag=direct: > >> Trial > >> |---------|---------|---------|---------|---------|---------|---------| > >> | | 1 | 2 | 3 | 4 | 5 | Average | > >> |---------|---------|---------|---------|---------|---------|---------| > >> | Data | 11.923s | 10.026s | 10.222s | 12.387s | 11.431s | 11.198s | > >> | Hole | 3.247s | 3.155s | 3.191s | 3.243s | 3.202s | 3.208s | > >> | Mixed | 54.677s | 54.697s | 52.978s | 53.704s | 54.054s | 54.022s | > > > > That's a bit nasty. Any idea what is going on with the Mixed case here? > > Not offhand, but my first guess would be something to do with extra seeks to find how long each hole and data segment is. Remind us what "mixed" means? (I think you were alternating, but how large is each segment?) --b. > > Anna > > > > >> |---------|---------|---------|---------|---------|---------|---------| > >> > >> > >> > >> > >> NFS v4.1, iflag=nocache oflag=nocache: > >> Trial > >> |---------|---------|---------|---------|---------|---------|---------| > >> | | 1 | 2 | 3 | 4 | 5 | Average | > >> |---------|---------|---------|---------|---------|---------|---------| > >> | Data | 6.788s | 6.802s | 6.750s | 6.756s | 6.852s | 6.790s | > >> | Hole | 5.143s | 5.165s | 5.104s | 5.154s | 5.116s | 5.136s | > >> | Mixed | 7.902s | 7.693s | 9.169s | 8.186s | 9.157s | 8.421s | > >> |---------|---------|---------|---------|---------|---------|---------| > >> > >> > >> NFS v4.2, iflag=nocache oflag=nocache: > >> Trial > >> |---------|---------|---------|---------|---------|---------|---------| > >> | | 1 | 2 | 3 | 4 | 5 | Average | > >> |---------|---------|---------|---------|---------|---------|---------| > >> | Data | 6.897s | 6.862s | 7.054s | 6.961s | 7.081s | 6.971s | > >> | Hole | 1.690s | 1.673s | 1.553s | 1.554s | 1.490s | 1.592s | > >> | Mixed | 9.009s | 7.840s | 7.661s | 8.945s | 7.649s | 8.221s | > >> |---------|---------|---------|---------|---------|---------|---------| > >> > >> > >> On 03/26/2015 12:13 PM, Trond Myklebust wrote: > >>> On Thu, Mar 26, 2015 at 12:11 PM, Anna Schumaker > >>> <Anna.Schumaker@netapp.com> wrote: > >>>> On 03/26/2015 12:06 PM, Trond Myklebust wrote: > >>>>> On Thu, Mar 26, 2015 at 11:47 AM, Anna Schumaker > >>>>> <Anna.Schumaker@netapp.com> wrote: > >>>>>> On 03/26/2015 11:38 AM, J. Bruce Fields wrote: > >>>>>>> On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: > >>>>>>>> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker > >>>>>>>> <Anna.Schumaker@netapp.com> wrote: > >>>>>>>>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: > >>>>>>>>> > >>>>>>>>> ########################## > >>>>>>>>> # # > >>>>>>>>> # Without READ_PLUS # > >>>>>>>>> # # > >>>>>>>>> ########################## > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> NFS v4.1: > >>>>>>>>> Trial > >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | > >>>>>>>>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | > >>>>>>>>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | > >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> NFS v4.2: > >>>>>>>>> Trial > >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | > >>>>>>>>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | > >>>>>>>>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | > >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> ####################### > >>>>>>>>> # # > >>>>>>>>> # With READ_PLUS # > >>>>>>>>> # # > >>>>>>>>> ####################### > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> NFS v4.1: > >>>>>>>>> Trial > >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | > >>>>>>>>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | > >>>>>>>>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | > >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> NFS v4.2: > >>>>>>>>> Trial > >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | > >>>>>>>>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | > >>>>>>>>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | > >>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>> > >>>>>>>> > >>>>>>>> So there is a clear win in the 100% hole case here, but otherwise the > >>>>>>>> statistical fluctuations are dominating the numbers. Can you get us a > >>>>>>>> little more stats and then perhaps run the results through nfsometer? > >>>>>>> > >>>>>>> Also, could you describe the setup (are these still kvm's), and how > >>>>>>> you're clearing the cache between runs? > >>>>>> > >>>>>> These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. > >>>>> > >>>>> I agree that you have to use the 'drop_caches' interface on the > >>>>> server, but why not just use O_DIRECT on the clients? > >>>> > >>>> I've been reading by using cat from my test shell script: `time cat /nfs/file > /dev/null`. I can write something to read files with O_DIRECT if that would be more useful! > >>>> > >>> > >>> 'dd' can do that for you if the appropriate incantations are performed. > >>> > >> > >> -- > >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > >> the body of a message to majordomo@vger.kernel.org > >> More majordomo info at http://vger.kernel.org/majordomo-info.html > > > > > > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/27/2015 04:54 PM, J. Bruce Fields wrote: > On Fri, Mar 27, 2015 at 04:46:55PM -0400, Anna Schumaker wrote: >> On 03/27/2015 04:22 PM, Trond Myklebust wrote: >>> On Fri, Mar 27, 2015 at 3:04 PM, Anna Schumaker >>> <Anna.Schumaker@netapp.com> wrote: >>>> I did two separate dd tests with the same 5G file from yesterday, and still using the same virtual machines. First, I ran dd using direct IO for reads: >>>> dd if=/nfs/file iflag=direct of=/dev/null bs=128K >>>> >>>> Mixed file performance was awful, so I reran without direct IO enabled for comparison: >>>> dd if=/nfs/file iflag=nocache of=/dev/null oflag=nocache bs=128K >>>> >>>> bs=128K sets the block size used by dd to the NFS rsize, without this dd will only read 512 bytes at a time and take forever to complete. >>>> >>>> >>>> ########################## >>>> # # >>>> # Without READ_PLUS # >>>> # # >>>> ########################## >>>> >>>> >>>> NFS v4.1, iflag=direct: >>>> Trial >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | Data | 11.704s | 11.055s | 11.329s | 11.453s | 10.741s | 11.256s | >>>> | Hole | 9.839s | 9.326s | 9.381s | 9.430s | 8.875s | 9.370s | >>>> | Mixed | 19.150s | 19.468s | 18.650s | 18.537s | 19.312s | 19.023s | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> >>>> >>>> NFS v4.2, iflag=direct: >>>> Trial >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | Data | 10.927s | 10.885s | 11.114s | 11.283s | 10.371s | 10.916s | >>>> | Hole | 9.515s | 9.039s | 9.116s | 8.867s | 8.905s | 9.088s | >>>> | Mixed | 19.149s | 18.656s | 19.400s | 18.834s | 20.041s | 19.216s | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> >>>> >>>> >>>> >>>> NFS v4.1, iflag=nocache oflag=nocache: >>>> Trial >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | Data | 6.808s | 6.698s | 7.482s | 6.761s | 7.235s | 6.995s | >>>> | Hole | 5.350s | 5.148s | 5.161s | 5.070s | 5.089s | 5.164s | >>>> | Mixed | 9.316s | 8.731s | 9.072s | 9.145s | 8.627s | 8.978s | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> >>>> >>>> NFS v4.2, iflag=nocache oflag=nocache: >>>> Trial >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | Data | 6.686s | 6.848s | 6.876s | 6.799s | 7.815s | 7.004s | >>>> | Hole | 5.092s | 5.330s | 5.050s | 5.280s | 5.030s | 5.156s | >>>> | Mixed | 8.142s | 7.897s | 8.040s | 7.960s | 8.050s | 8.018s | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> >>>> >>>> >>>> >>>> >>>> >>>> ####################### >>>> # # >>>> # With READ_PLUS # >>>> # # >>>> ####################### >>>> >>>> >>>> NFS v4.1, iflag=direct: >>>> Trial >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | Data | 9.464s | 10.181s | 10.048s | 9.452s | 10.795s | 9.988s | >>>> | Hole | 7.954s | 8.486s | 7.762s | 7.969s | 8.299s | 8.094s | >>>> | Mixed | 19.037s | 18.323s | 18.965s | 18.156s | 19.185s | 18.733s | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> >>>> >>>> NFS v4.2, iflag=direct: >>>> Trial >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | Data | 11.923s | 10.026s | 10.222s | 12.387s | 11.431s | 11.198s | >>>> | Hole | 3.247s | 3.155s | 3.191s | 3.243s | 3.202s | 3.208s | >>>> | Mixed | 54.677s | 54.697s | 52.978s | 53.704s | 54.054s | 54.022s | >>> >>> That's a bit nasty. Any idea what is going on with the Mixed case here? >> >> Not offhand, but my first guess would be something to do with extra seeks to find how long each hole and data segment is. > > Remind us what "mixed" means? (I think you were alternating, but how > large is each segment?) "Mixed" is alternating 4K segments. > > --b. > >> >> Anna >> >>> >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> >>>> >>>> >>>> >>>> NFS v4.1, iflag=nocache oflag=nocache: >>>> Trial >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | Data | 6.788s | 6.802s | 6.750s | 6.756s | 6.852s | 6.790s | >>>> | Hole | 5.143s | 5.165s | 5.104s | 5.154s | 5.116s | 5.136s | >>>> | Mixed | 7.902s | 7.693s | 9.169s | 8.186s | 9.157s | 8.421s | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> >>>> >>>> NFS v4.2, iflag=nocache oflag=nocache: >>>> Trial >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> | Data | 6.897s | 6.862s | 7.054s | 6.961s | 7.081s | 6.971s | >>>> | Hole | 1.690s | 1.673s | 1.553s | 1.554s | 1.490s | 1.592s | >>>> | Mixed | 9.009s | 7.840s | 7.661s | 8.945s | 7.649s | 8.221s | >>>> |---------|---------|---------|---------|---------|---------|---------| >>>> >>>> >>>> On 03/26/2015 12:13 PM, Trond Myklebust wrote: >>>>> On Thu, Mar 26, 2015 at 12:11 PM, Anna Schumaker >>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>> On 03/26/2015 12:06 PM, Trond Myklebust wrote: >>>>>>> On Thu, Mar 26, 2015 at 11:47 AM, Anna Schumaker >>>>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>>>> On 03/26/2015 11:38 AM, J. Bruce Fields wrote: >>>>>>>>> On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: >>>>>>>>>> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker >>>>>>>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>>>>>>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: >>>>>>>>>>> >>>>>>>>>>> ########################## >>>>>>>>>>> # # >>>>>>>>>>> # Without READ_PLUS # >>>>>>>>>>> # # >>>>>>>>>>> ########################## >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> NFS v4.1: >>>>>>>>>>> Trial >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | >>>>>>>>>>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | >>>>>>>>>>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> NFS v4.2: >>>>>>>>>>> Trial >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | >>>>>>>>>>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | >>>>>>>>>>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> ####################### >>>>>>>>>>> # # >>>>>>>>>>> # With READ_PLUS # >>>>>>>>>>> # # >>>>>>>>>>> ####################### >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> NFS v4.1: >>>>>>>>>>> Trial >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | >>>>>>>>>>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | >>>>>>>>>>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> NFS v4.2: >>>>>>>>>>> Trial >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | >>>>>>>>>>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | >>>>>>>>>>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>> >>>>>>>>>> >>>>>>>>>> So there is a clear win in the 100% hole case here, but otherwise the >>>>>>>>>> statistical fluctuations are dominating the numbers. Can you get us a >>>>>>>>>> little more stats and then perhaps run the results through nfsometer? >>>>>>>>> >>>>>>>>> Also, could you describe the setup (are these still kvm's), and how >>>>>>>>> you're clearing the cache between runs? >>>>>>>> >>>>>>>> These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. >>>>>>> >>>>>>> I agree that you have to use the 'drop_caches' interface on the >>>>>>> server, but why not just use O_DIRECT on the clients? >>>>>> >>>>>> I've been reading by using cat from my test shell script: `time cat /nfs/file > /dev/null`. I can write something to read files with O_DIRECT if that would be more useful! >>>>>> >>>>> >>>>> 'dd' can do that for you if the appropriate incantations are performed. >>>>> >>>> >>>> -- >>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >>>> the body of a message to majordomo@vger.kernel.org >>>> More majordomo info at http://vger.kernel.org/majordomo-info.html >>> >>> >>> -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Mar 27, 2015 at 04:55:26PM -0400, Anna Schumaker wrote: > On 03/27/2015 04:54 PM, J. Bruce Fields wrote: > > On Fri, Mar 27, 2015 at 04:46:55PM -0400, Anna Schumaker wrote: > >> On 03/27/2015 04:22 PM, Trond Myklebust wrote: > >>> On Fri, Mar 27, 2015 at 3:04 PM, Anna Schumaker > >>> <Anna.Schumaker@netapp.com> wrote: > >>>> I did two separate dd tests with the same 5G file from yesterday, and still using the same virtual machines. First, I ran dd using direct IO for reads: > >>>> dd if=/nfs/file iflag=direct of=/dev/null bs=128K > >>>> > >>>> Mixed file performance was awful, so I reran without direct IO enabled for comparison: > >>>> dd if=/nfs/file iflag=nocache of=/dev/null oflag=nocache bs=128K > >>>> > >>>> bs=128K sets the block size used by dd to the NFS rsize, without this dd will only read 512 bytes at a time and take forever to complete. > >>>> > >>>> > >>>> ########################## > >>>> # # > >>>> # Without READ_PLUS # > >>>> # # > >>>> ########################## > >>>> > >>>> > >>>> NFS v4.1, iflag=direct: > >>>> Trial > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | Data | 11.704s | 11.055s | 11.329s | 11.453s | 10.741s | 11.256s | > >>>> | Hole | 9.839s | 9.326s | 9.381s | 9.430s | 8.875s | 9.370s | > >>>> | Mixed | 19.150s | 19.468s | 18.650s | 18.537s | 19.312s | 19.023s | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> > >>>> > >>>> NFS v4.2, iflag=direct: > >>>> Trial > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | Data | 10.927s | 10.885s | 11.114s | 11.283s | 10.371s | 10.916s | > >>>> | Hole | 9.515s | 9.039s | 9.116s | 8.867s | 8.905s | 9.088s | > >>>> | Mixed | 19.149s | 18.656s | 19.400s | 18.834s | 20.041s | 19.216s | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> > >>>> > >>>> > >>>> > >>>> NFS v4.1, iflag=nocache oflag=nocache: > >>>> Trial > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | Data | 6.808s | 6.698s | 7.482s | 6.761s | 7.235s | 6.995s | > >>>> | Hole | 5.350s | 5.148s | 5.161s | 5.070s | 5.089s | 5.164s | > >>>> | Mixed | 9.316s | 8.731s | 9.072s | 9.145s | 8.627s | 8.978s | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> > >>>> > >>>> NFS v4.2, iflag=nocache oflag=nocache: > >>>> Trial > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | Data | 6.686s | 6.848s | 6.876s | 6.799s | 7.815s | 7.004s | > >>>> | Hole | 5.092s | 5.330s | 5.050s | 5.280s | 5.030s | 5.156s | > >>>> | Mixed | 8.142s | 7.897s | 8.040s | 7.960s | 8.050s | 8.018s | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> ####################### > >>>> # # > >>>> # With READ_PLUS # > >>>> # # > >>>> ####################### > >>>> > >>>> > >>>> NFS v4.1, iflag=direct: > >>>> Trial > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | Data | 9.464s | 10.181s | 10.048s | 9.452s | 10.795s | 9.988s | > >>>> | Hole | 7.954s | 8.486s | 7.762s | 7.969s | 8.299s | 8.094s | > >>>> | Mixed | 19.037s | 18.323s | 18.965s | 18.156s | 19.185s | 18.733s | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> > >>>> > >>>> NFS v4.2, iflag=direct: > >>>> Trial > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | Data | 11.923s | 10.026s | 10.222s | 12.387s | 11.431s | 11.198s | > >>>> | Hole | 3.247s | 3.155s | 3.191s | 3.243s | 3.202s | 3.208s | > >>>> | Mixed | 54.677s | 54.697s | 52.978s | 53.704s | 54.054s | 54.022s | > >>> > >>> That's a bit nasty. Any idea what is going on with the Mixed case here? > >> > >> Not offhand, but my first guess would be something to do with extra seeks to find how long each hole and data segment is. > > > > Remind us what "mixed" means? (I think you were alternating, but how > > large is each segment?) > > "Mixed" is alternating 4K segments. So it's probably doing 128/4 = 32 reads where previously one was necessary. You could confirm that by looking at the READ counts in /proc/self/mountstats. With odirect turned off maybe that's hidden by readahead? --b. > > > > > --b. > > > >> > >> Anna > >> > >>> > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> > >>>> > >>>> > >>>> > >>>> NFS v4.1, iflag=nocache oflag=nocache: > >>>> Trial > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | Data | 6.788s | 6.802s | 6.750s | 6.756s | 6.852s | 6.790s | > >>>> | Hole | 5.143s | 5.165s | 5.104s | 5.154s | 5.116s | 5.136s | > >>>> | Mixed | 7.902s | 7.693s | 9.169s | 8.186s | 9.157s | 8.421s | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> > >>>> > >>>> NFS v4.2, iflag=nocache oflag=nocache: > >>>> Trial > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> | Data | 6.897s | 6.862s | 7.054s | 6.961s | 7.081s | 6.971s | > >>>> | Hole | 1.690s | 1.673s | 1.553s | 1.554s | 1.490s | 1.592s | > >>>> | Mixed | 9.009s | 7.840s | 7.661s | 8.945s | 7.649s | 8.221s | > >>>> |---------|---------|---------|---------|---------|---------|---------| > >>>> > >>>> > >>>> On 03/26/2015 12:13 PM, Trond Myklebust wrote: > >>>>> On Thu, Mar 26, 2015 at 12:11 PM, Anna Schumaker > >>>>> <Anna.Schumaker@netapp.com> wrote: > >>>>>> On 03/26/2015 12:06 PM, Trond Myklebust wrote: > >>>>>>> On Thu, Mar 26, 2015 at 11:47 AM, Anna Schumaker > >>>>>>> <Anna.Schumaker@netapp.com> wrote: > >>>>>>>> On 03/26/2015 11:38 AM, J. Bruce Fields wrote: > >>>>>>>>> On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: > >>>>>>>>>> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker > >>>>>>>>>> <Anna.Schumaker@netapp.com> wrote: > >>>>>>>>>>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: > >>>>>>>>>>> > >>>>>>>>>>> ########################## > >>>>>>>>>>> # # > >>>>>>>>>>> # Without READ_PLUS # > >>>>>>>>>>> # # > >>>>>>>>>>> ########################## > >>>>>>>>>>> > >>>>>>>>>>> > >>>>>>>>>>> NFS v4.1: > >>>>>>>>>>> Trial > >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>>>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | > >>>>>>>>>>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | > >>>>>>>>>>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | > >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>>>> > >>>>>>>>>>> > >>>>>>>>>>> > >>>>>>>>>>> > >>>>>>>>>>> NFS v4.2: > >>>>>>>>>>> Trial > >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>>>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | > >>>>>>>>>>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | > >>>>>>>>>>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | > >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>>>> > >>>>>>>>>>> > >>>>>>>>>>> > >>>>>>>>>>> > >>>>>>>>>>> > >>>>>>>>>>> ####################### > >>>>>>>>>>> # # > >>>>>>>>>>> # With READ_PLUS # > >>>>>>>>>>> # # > >>>>>>>>>>> ####################### > >>>>>>>>>>> > >>>>>>>>>>> > >>>>>>>>>>> NFS v4.1: > >>>>>>>>>>> Trial > >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>>>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | > >>>>>>>>>>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | > >>>>>>>>>>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | > >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>>>> > >>>>>>>>>>> > >>>>>>>>>>> > >>>>>>>>>>> > >>>>>>>>>>> NFS v4.2: > >>>>>>>>>>> Trial > >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | > >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>>>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | > >>>>>>>>>>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | > >>>>>>>>>>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | > >>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| > >>>>>>>>>>> > >>>>>>>>>> > >>>>>>>>>> So there is a clear win in the 100% hole case here, but otherwise the > >>>>>>>>>> statistical fluctuations are dominating the numbers. Can you get us a > >>>>>>>>>> little more stats and then perhaps run the results through nfsometer? > >>>>>>>>> > >>>>>>>>> Also, could you describe the setup (are these still kvm's), and how > >>>>>>>>> you're clearing the cache between runs? > >>>>>>>> > >>>>>>>> These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. > >>>>>>> > >>>>>>> I agree that you have to use the 'drop_caches' interface on the > >>>>>>> server, but why not just use O_DIRECT on the clients? > >>>>>> > >>>>>> I've been reading by using cat from my test shell script: `time cat /nfs/file > /dev/null`. I can write something to read files with O_DIRECT if that would be more useful! > >>>>>> > >>>>> > >>>>> 'dd' can do that for you if the appropriate incantations are performed. > >>>>> > >>>> > >>>> -- > >>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > >>>> the body of a message to majordomo@vger.kernel.org > >>>> More majordomo info at http://vger.kernel.org/majordomo-info.html > >>> > >>> > >>> -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: > So there is a clear win in the 100% hole case here, but otherwise the > statistical fluctuations are dominating the numbers. Can you get us a > little more stats and then perhaps run the results through nfsometer? And that's just the uncached reads if I understand the thread correctly. The cached case isn't uncommon in real life, so regressing it isn't really an option either. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
I just ran some more tests comparing the directio case across different filesystem types. These tests used three 1G files: 100% data, 100% hole, and mixed file with alternating 4k data and hole segments. The mixed case seems to be consistently slower compared to NFS v4.1, and I'm at a loss for anything I could do to make it faster. Here are my numbers: ########### # # # XFS # # # ########### NFS v4.1: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 1.883s | 1.808s | 1.781s | 1.685s | 1.591s | 1.746s | | Hole | 1.815s | 1.635s | 1.682s | 1.698s | 1.653s | 1.697s | | Mixed | 2.089s | 2.024s | 1.970s | 1.925s | 2.049s | 2.011s | |---------|---------|---------|---------|---------|---------|---------| NFS v4.2: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 1.849s | 1.879s | 1.852s | 1.799s | 1.781s | 1.832s | | Hole | 0.668s | 0.600s | 0.611s | 0.619s | 0.617s | 0.623s | | Mixed | 5.913s | 5.811s | 5.952s | 5.962s | 5.806s | 5.889s | |---------|---------|---------|---------|---------|---------|---------| ############ # # # EXT4 # # # ############ NFS v4.1: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 2.637s | 1.823s | 1.792s | 1.816s | 2.000s | 2.014s | | Hole | 1.734s | 1.743s | 1.709s | 1.761s | 1.871s | 1.764s | | Mixed | 5.465s | 2.158s | 2.254s | 2.676s | 2.422s | 2.995s | |---------|---------|---------|---------|---------|---------|---------| NFS v4.2: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 1.934s | 1.783s | 1.800s | 2.010s | 1.982s | 1.902s | | Hole | 63.568s | 63.423s | 64.671s | 66.190s | 65.985s | 64.767s | | Mixed | 6.010s | 5.798s | 6.146s | 6.460s | 6.720s | 6.225s | |---------|---------|---------|---------|---------|---------|---------| ############# # # # BTRFS # # # ############# NFS v4.1: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 2.386s | 1.952s | 1.832s | 1.818s | 1.826s | 1.963s | | Hole | 1.759s | 1.717s | 1.754s | 1.621s | 1.708s | 1.712s | | Mixed | 2.889s | 2.272s | 2.778s | 2.277s | 2.255s | 2.494s | |---------|---------|---------|---------|---------|---------|---------| NFS v4.2: Trial |---------|---------|---------|---------|---------|---------|---------| | | 1 | 2 | 3 | 4 | 5 | Average | |---------|---------|---------|---------|---------|---------|---------| | Data | 2.586s | 1.816s | 2.022s | 1.862s | 1.975s | 2.052s | | Hole | 0.646s | 0.659s | 0.669s | 0.628s | 0.605s | 0.641s | | Mixed | 8.555s | 8.553s | 7.904s | 8.567s | 8.286s | 8.373s | |---------|---------|---------|---------|---------|---------|---------| On 03/27/2015 05:08 PM, J. Bruce Fields wrote: > On Fri, Mar 27, 2015 at 04:55:26PM -0400, Anna Schumaker wrote: >> On 03/27/2015 04:54 PM, J. Bruce Fields wrote: >>> On Fri, Mar 27, 2015 at 04:46:55PM -0400, Anna Schumaker wrote: >>>> On 03/27/2015 04:22 PM, Trond Myklebust wrote: >>>>> On Fri, Mar 27, 2015 at 3:04 PM, Anna Schumaker >>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>> I did two separate dd tests with the same 5G file from yesterday, and still using the same virtual machines. First, I ran dd using direct IO for reads: >>>>>> dd if=/nfs/file iflag=direct of=/dev/null bs=128K >>>>>> >>>>>> Mixed file performance was awful, so I reran without direct IO enabled for comparison: >>>>>> dd if=/nfs/file iflag=nocache of=/dev/null oflag=nocache bs=128K >>>>>> >>>>>> bs=128K sets the block size used by dd to the NFS rsize, without this dd will only read 512 bytes at a time and take forever to complete. >>>>>> >>>>>> >>>>>> ########################## >>>>>> # # >>>>>> # Without READ_PLUS # >>>>>> # # >>>>>> ########################## >>>>>> >>>>>> >>>>>> NFS v4.1, iflag=direct: >>>>>> Trial >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | Data | 11.704s | 11.055s | 11.329s | 11.453s | 10.741s | 11.256s | >>>>>> | Hole | 9.839s | 9.326s | 9.381s | 9.430s | 8.875s | 9.370s | >>>>>> | Mixed | 19.150s | 19.468s | 18.650s | 18.537s | 19.312s | 19.023s | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> >>>>>> >>>>>> NFS v4.2, iflag=direct: >>>>>> Trial >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | Data | 10.927s | 10.885s | 11.114s | 11.283s | 10.371s | 10.916s | >>>>>> | Hole | 9.515s | 9.039s | 9.116s | 8.867s | 8.905s | 9.088s | >>>>>> | Mixed | 19.149s | 18.656s | 19.400s | 18.834s | 20.041s | 19.216s | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> NFS v4.1, iflag=nocache oflag=nocache: >>>>>> Trial >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | Data | 6.808s | 6.698s | 7.482s | 6.761s | 7.235s | 6.995s | >>>>>> | Hole | 5.350s | 5.148s | 5.161s | 5.070s | 5.089s | 5.164s | >>>>>> | Mixed | 9.316s | 8.731s | 9.072s | 9.145s | 8.627s | 8.978s | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> >>>>>> >>>>>> NFS v4.2, iflag=nocache oflag=nocache: >>>>>> Trial >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | Data | 6.686s | 6.848s | 6.876s | 6.799s | 7.815s | 7.004s | >>>>>> | Hole | 5.092s | 5.330s | 5.050s | 5.280s | 5.030s | 5.156s | >>>>>> | Mixed | 8.142s | 7.897s | 8.040s | 7.960s | 8.050s | 8.018s | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> ####################### >>>>>> # # >>>>>> # With READ_PLUS # >>>>>> # # >>>>>> ####################### >>>>>> >>>>>> >>>>>> NFS v4.1, iflag=direct: >>>>>> Trial >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | Data | 9.464s | 10.181s | 10.048s | 9.452s | 10.795s | 9.988s | >>>>>> | Hole | 7.954s | 8.486s | 7.762s | 7.969s | 8.299s | 8.094s | >>>>>> | Mixed | 19.037s | 18.323s | 18.965s | 18.156s | 19.185s | 18.733s | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> >>>>>> >>>>>> NFS v4.2, iflag=direct: >>>>>> Trial >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | Data | 11.923s | 10.026s | 10.222s | 12.387s | 11.431s | 11.198s | >>>>>> | Hole | 3.247s | 3.155s | 3.191s | 3.243s | 3.202s | 3.208s | >>>>>> | Mixed | 54.677s | 54.697s | 52.978s | 53.704s | 54.054s | 54.022s | >>>>> >>>>> That's a bit nasty. Any idea what is going on with the Mixed case here? >>>> >>>> Not offhand, but my first guess would be something to do with extra seeks to find how long each hole and data segment is. >>> >>> Remind us what "mixed" means? (I think you were alternating, but how >>> large is each segment?) >> >> "Mixed" is alternating 4K segments. > > So it's probably doing 128/4 = 32 reads where previously one was > necessary. You could confirm that by looking at the READ counts in > /proc/self/mountstats. With odirect turned off maybe that's hidden by > readahead? > > --b. > >> >>> >>> --b. >>> >>>> >>>> Anna >>>> >>>>> >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> NFS v4.1, iflag=nocache oflag=nocache: >>>>>> Trial >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | Data | 6.788s | 6.802s | 6.750s | 6.756s | 6.852s | 6.790s | >>>>>> | Hole | 5.143s | 5.165s | 5.104s | 5.154s | 5.116s | 5.136s | >>>>>> | Mixed | 7.902s | 7.693s | 9.169s | 8.186s | 9.157s | 8.421s | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> >>>>>> >>>>>> NFS v4.2, iflag=nocache oflag=nocache: >>>>>> Trial >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> | Data | 6.897s | 6.862s | 7.054s | 6.961s | 7.081s | 6.971s | >>>>>> | Hole | 1.690s | 1.673s | 1.553s | 1.554s | 1.490s | 1.592s | >>>>>> | Mixed | 9.009s | 7.840s | 7.661s | 8.945s | 7.649s | 8.221s | >>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>> >>>>>> >>>>>> On 03/26/2015 12:13 PM, Trond Myklebust wrote: >>>>>>> On Thu, Mar 26, 2015 at 12:11 PM, Anna Schumaker >>>>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>>>> On 03/26/2015 12:06 PM, Trond Myklebust wrote: >>>>>>>>> On Thu, Mar 26, 2015 at 11:47 AM, Anna Schumaker >>>>>>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>>>>>> On 03/26/2015 11:38 AM, J. Bruce Fields wrote: >>>>>>>>>>> On Thu, Mar 26, 2015 at 11:32:25AM -0400, Trond Myklebust wrote: >>>>>>>>>>>> On Thu, Mar 26, 2015 at 11:21 AM, Anna Schumaker >>>>>>>>>>>> <Anna.Schumaker@netapp.com> wrote: >>>>>>>>>>>>> Here are my updated numbers! I tested with files 5G in size: one 100% data, one 100% hole, and one alternating between hole and data every 4K. I collected data for both v4.1 and v4.2 with and without the READ_PLUS patches: >>>>>>>>>>>>> >>>>>>>>>>>>> ########################## >>>>>>>>>>>>> # # >>>>>>>>>>>>> # Without READ_PLUS # >>>>>>>>>>>>> # # >>>>>>>>>>>>> ########################## >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>> NFS v4.1: >>>>>>>>>>>>> Trial >>>>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>>>> | Data | 8.723s | 7.243s | 8.252s | 6.997s | 6.980s | 7.639s | >>>>>>>>>>>>> | Hole | 5.271s | 5.224s | 5.060s | 4.897s | 5.321s | 5.155s | >>>>>>>>>>>>> | Mixed | 8.050s | 10.057s | 7.919s | 8.060s | 9.557s | 8.729s | >>>>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>> NFS v4.2: >>>>>>>>>>>>> Trial >>>>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>>>> | Data | 6.707s | 7.070s | 6.722s | 6.761s | 6.810s | 6.814s | >>>>>>>>>>>>> | Hole | 5.152s | 5.149s | 5.213s | 5.206s | 5.312s | 5.206s | >>>>>>>>>>>>> | Mixed | 7.979s | 7.985s | 8.177s | 7.772s | 8.280s | 8.039s | >>>>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>> ####################### >>>>>>>>>>>>> # # >>>>>>>>>>>>> # With READ_PLUS # >>>>>>>>>>>>> # # >>>>>>>>>>>>> ####################### >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>> NFS v4.1: >>>>>>>>>>>>> Trial >>>>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>>>> | Data | 9.082s | 7.008s | 7.116s | 6.771s | 7.902s | 7.576s | >>>>>>>>>>>>> | Hole | 5.333s | 5.358s | 5.380s | 5.161s | 5.282s | 5.303s | >>>>>>>>>>>>> | Mixed | 8.189s | 8.308s | 9.540s | 7.937s | 8.420s | 8.479s | >>>>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>> NFS v4.2: >>>>>>>>>>>>> Trial >>>>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>>>> | | 1 | 2 | 3 | 4 | 5 | Average | >>>>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>>>> | Data | 7.033s | 6.829s | 7.025s | 6.873s | 7.134s | 6.979s | >>>>>>>>>>>>> | Hole | 1.794s | 1.800s | 1.905s | 1.811s | 1.725s | 1.807s | >>>>>>>>>>>>> | Mixed | 7.590s | 8.777s | 9.423s | 10.366s | 8.024s | 8.836s | >>>>>>>>>>>>> |---------|---------|---------|---------|---------|---------|---------| >>>>>>>>>>>>> >>>>>>>>>>>> >>>>>>>>>>>> So there is a clear win in the 100% hole case here, but otherwise the >>>>>>>>>>>> statistical fluctuations are dominating the numbers. Can you get us a >>>>>>>>>>>> little more stats and then perhaps run the results through nfsometer? >>>>>>>>>>> >>>>>>>>>>> Also, could you describe the setup (are these still kvm's), and how >>>>>>>>>>> you're clearing the cache between runs? >>>>>>>>>> >>>>>>>>>> These are still KVMs and my server is exporting an xfs filesystem. I clear caches by running "echo 3 > /proc/sys/vm/drop_caches" on the server before every read, and I remount my client after reading each set of three files once. >>>>>>>>> >>>>>>>>> I agree that you have to use the 'drop_caches' interface on the >>>>>>>>> server, but why not just use O_DIRECT on the clients? >>>>>>>> >>>>>>>> I've been reading by using cat from my test shell script: `time cat /nfs/file > /dev/null`. I can write something to read files with O_DIRECT if that would be more useful! >>>>>>>> >>>>>>> >>>>>>> 'dd' can do that for you if the appropriate incantations are performed. >>>>>>> >>>>>> >>>>>> -- >>>>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >>>>>> the body of a message to majordomo@vger.kernel.org >>>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html >>>>> >>>>> >>>>> -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Apr 15, 2015 at 03:32:02PM -0400, Anna Schumaker wrote: > I just ran some more tests comparing the directio case across > different filesystem types. These tests used three 1G files: 100% > data, 100% hole, and mixed file with alternating 4k data and hole > segments. The mixed case seems to be consistently slower compared to > NFS v4.1, and I'm at a loss for anything I could do to make it faster. > Here are my numbers: Have you tried the implementation we discussed that always returns a single segment covering the whole requested range, by treating holes as data if necessary when they don't cover the whole range? (Also: I assume it's the same as before, but: when you post test results, could you repost if necessary: - what the actual test is - what the hardware/software setup is on client and server so that we have reproduceable results for posterity's sake.) Interesting that "Mixed" is a little slower even before READ_PLUS. And I guess we should really report this to ext4 people, looks like they may have a bug. --b. > > ########### > # # > # XFS # > # # > ########### > > > NFS v4.1: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 1.883s | 1.808s | 1.781s | 1.685s | 1.591s | 1.746s | > | Hole | 1.815s | 1.635s | 1.682s | 1.698s | 1.653s | 1.697s | > | Mixed | 2.089s | 2.024s | 1.970s | 1.925s | 2.049s | 2.011s | > |---------|---------|---------|---------|---------|---------|---------| > > > NFS v4.2: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 1.849s | 1.879s | 1.852s | 1.799s | 1.781s | 1.832s | > | Hole | 0.668s | 0.600s | 0.611s | 0.619s | 0.617s | 0.623s | > | Mixed | 5.913s | 5.811s | 5.952s | 5.962s | 5.806s | 5.889s | > |---------|---------|---------|---------|---------|---------|---------| > > > > > > ############ > # # > # EXT4 # > # # > ############ > > > NFS v4.1: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 2.637s | 1.823s | 1.792s | 1.816s | 2.000s | 2.014s | > | Hole | 1.734s | 1.743s | 1.709s | 1.761s | 1.871s | 1.764s | > | Mixed | 5.465s | 2.158s | 2.254s | 2.676s | 2.422s | 2.995s | > |---------|---------|---------|---------|---------|---------|---------| > > > NFS v4.2: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 1.934s | 1.783s | 1.800s | 2.010s | 1.982s | 1.902s | > | Hole | 63.568s | 63.423s | 64.671s | 66.190s | 65.985s | 64.767s | > | Mixed | 6.010s | 5.798s | 6.146s | 6.460s | 6.720s | 6.225s | > |---------|---------|---------|---------|---------|---------|---------| > > > > > > ############# > # # > # BTRFS # > # # > ############# > > > NFS v4.1: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 2.386s | 1.952s | 1.832s | 1.818s | 1.826s | 1.963s | > | Hole | 1.759s | 1.717s | 1.754s | 1.621s | 1.708s | 1.712s | > | Mixed | 2.889s | 2.272s | 2.778s | 2.277s | 2.255s | 2.494s | > |---------|---------|---------|---------|---------|---------|---------| > > > NFS v4.2: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 2.586s | 1.816s | 2.022s | 1.862s | 1.975s | 2.052s | > | Hole | 0.646s | 0.659s | 0.669s | 0.628s | 0.605s | 0.641s | > | Mixed | 8.555s | 8.553s | 7.904s | 8.567s | 8.286s | 8.373s | > |---------|---------|---------|---------|---------|---------|---------| -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Apr 15, 2015 at 03:56:14PM -0400, J. Bruce Fields wrote: > On Wed, Apr 15, 2015 at 03:32:02PM -0400, Anna Schumaker wrote: > > I just ran some more tests comparing the directio case across > > different filesystem types. These tests used three 1G files: 100% > > data, 100% hole, and mixed file with alternating 4k data and hole > > segments. The mixed case seems to be consistently slower compared to > > NFS v4.1, and I'm at a loss for anything I could do to make it faster. > > Here are my numbers: > > Have you tried the implementation we discussed that always returns a > single segment covering the whole requested range, by treating holes as > data if necessary when they don't cover the whole range? > > (Also: I assume it's the same as before, but: when you post test > results, could you repost if necessary: > > - what the actual test is > - what the hardware/software setup is on client and server > > so that we have reproduceable results for posterity's sake.) > > Interesting that "Mixed" is a little slower even before READ_PLUS. > > And I guess we should really report this to ext4 people, looks like they > may have a bug. FWIW, this is what I was using to test SEEK_HOLE/SEEK_DATA and map out holes on files on my local disk. Might be worth checking whether the ext4 slowdowns are reproduceable just with something like this, to rule out protocol problems. --b. #define _GNU_SOURCE #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> #include <err.h> long round_up(long n, long b) { return ((n + b - 1)/b) * b; } long round_down(long n, long b) { return (n/b) * b; } long hbytes = 0; long rplusbytes = 0; long num_holes = 0; do_stats(off_t hole_start, off_t hole_end) { off_t hole_start_up, hole_end_down; hole_start_up = round_up(hole_start, 1024*1024); hole_end_down = round_down(hole_end, 1024*1024); hbytes += hole_end - hole_start; if (hole_start < hole_end) num_holes++; if (hole_start_up < hole_end_down) rplusbytes += hole_end_down - hole_start_up; } int main(int argc, char *argv[]) { off_t hole_start, hole_end; int fd; char *name; /* Map out holes with SEEK_HOLE, SEEK_DATA */ /* Useful statistics: * - what percentage of file is in holes? * - what percentage of file would be skipped if we read it * sequentially in 1MB chunks? */ if (argc != 2) errx(1, "usage: %s <filename>\n", argv[0]); name = argv[1]; fd = open(name, O_RDONLY); if (fd == -1) err(1, "open"); hole_end = 0; while (1) { hole_start = lseek(fd, hole_end, SEEK_HOLE); if (hole_start == -1) err(1, "lseek"); hole_end = lseek(fd, hole_start, SEEK_DATA); if (hole_end == -1) { if (errno == ENXIO) break; err(1, "lseek"); } do_stats(hole_start, hole_end); } hole_end = lseek(fd, 0, SEEK_END); do_stats(hole_start, hole_end); printf("%ld holes\n", num_holes); printf("total hole bytes: %ld (%.0f%)\n", hbytes, 100 * (float)hbytes/hole_end); printf("in aligned 1MB chunks: %ld (%.0f%)\n", rplusbytes, 100 * (float)rplusbytes/hole_end); } -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Apr 15, 2015 at 04:00:16PM -0400, J. Bruce Fields wrote: > On Wed, Apr 15, 2015 at 03:56:14PM -0400, J. Bruce Fields wrote: > > On Wed, Apr 15, 2015 at 03:32:02PM -0400, Anna Schumaker wrote: > > > I just ran some more tests comparing the directio case across > > > different filesystem types. These tests used three 1G files: 100% > > > data, 100% hole, and mixed file with alternating 4k data and hole > > > segments. The mixed case seems to be consistently slower compared to > > > NFS v4.1, and I'm at a loss for anything I could do to make it faster. > > > Here are my numbers: > > > > Have you tried the implementation we discussed that always returns a > > single segment covering the whole requested range, by treating holes as > > data if necessary when they don't cover the whole range? > > > > (Also: I assume it's the same as before, but: when you post test > > results, could you repost if necessary: > > > > - what the actual test is > > - what the hardware/software setup is on client and server > > > > so that we have reproduceable results for posterity's sake.) > > > > Interesting that "Mixed" is a little slower even before READ_PLUS. > > > > And I guess we should really report this to ext4 people, looks like they > > may have a bug. > > FWIW, this is what I was using to test SEEK_HOLE/SEEK_DATA and map out > holes on files on my local disk. Might be worth checking whether the > ext4 slowdowns are reproduceable just with something like this, to rule > out protocol problems. Wheel reinvention. :) $ rm -f /mnt/scratch/bar $ for i in `seq 20 -2 0`; do > sudo xfs_io -f -c "pwrite $((i * 8192)) 4096" /mnt/scratch/bar > done ..... $ sync $ sudo xfs_io -c "seek -ar 0" /mnt/scratch/bar Whence Result DATA 0 HOLE 4096 DATA 16384 HOLE 20480 DATA 32768 HOLE 36864 DATA 49152 HOLE 53248 DATA 65536 HOLE 69632 DATA 81920 HOLE 86016 DATA 98304 HOLE 102400 DATA 114688 HOLE 118784 DATA 131072 HOLE 135168 DATA 147456 HOLE 151552 DATA 163840 HOLE 167936 $ -Dave.
On Wed, Apr 15, 2015 at 03:32:02PM -0400, Anna Schumaker wrote: > I just ran some more tests comparing the directio case across > different filesystem types. These tests used three 1G files: > 100% data, 100% hole, and mixed file with alternating 4k data and > hole segments. The mixed case seems to be consistently slower > compared to NFS v4.1, and I'm at a loss for anything I could do to > make it faster. Here are my numbers: > > ########### > # # > # XFS # > # # > ########### > > > NFS v4.1: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 1.883s | 1.808s | 1.781s | 1.685s | 1.591s | 1.746s | > | Hole | 1.815s | 1.635s | 1.682s | 1.698s | 1.653s | 1.697s | > | Mixed | 2.089s | 2.024s | 1.970s | 1.925s | 2.049s | 2.011s | > |---------|---------|---------|---------|---------|---------|---------| > > > NFS v4.2: > Trial > |---------|---------|---------|---------|---------|---------|---------| > | | 1 | 2 | 3 | 4 | 5 | Average | > |---------|---------|---------|---------|---------|---------|---------| > | Data | 1.849s | 1.879s | 1.852s | 1.799s | 1.781s | 1.832s | > | Hole | 0.668s | 0.600s | 0.611s | 0.619s | 0.617s | 0.623s | > | Mixed | 5.913s | 5.811s | 5.952s | 5.962s | 5.806s | 5.889s | > |---------|---------|---------|---------|---------|---------|---------| What that says to me is that the READ_PLUS when there are (worst case) mixed holes is either burning a lot more CPU than we expected or it is serialising somewhere (not sure where, everything in XFS should be shared locks on read/seek). Can you run a perf profile (even just a snapshot from perf top) on the server so we can see a bit about what is happening on the CPU for the different workloads? Cheers, Dave.
On Thu, Apr 16, 2015 at 08:50:02AM +1000, Dave Chinner wrote: > On Wed, Apr 15, 2015 at 04:00:16PM -0400, J. Bruce Fields wrote: > > On Wed, Apr 15, 2015 at 03:56:14PM -0400, J. Bruce Fields wrote: > > > On Wed, Apr 15, 2015 at 03:32:02PM -0400, Anna Schumaker wrote: > > > > I just ran some more tests comparing the directio case across > > > > different filesystem types. These tests used three 1G files: 100% > > > > data, 100% hole, and mixed file with alternating 4k data and hole > > > > segments. The mixed case seems to be consistently slower compared to > > > > NFS v4.1, and I'm at a loss for anything I could do to make it faster. > > > > Here are my numbers: > > > > > > Have you tried the implementation we discussed that always returns a > > > single segment covering the whole requested range, by treating holes as > > > data if necessary when they don't cover the whole range? Uh, sorry, I forgot, I think you're running with the patches that support full multi-segment READ_PLUS on both sides so there's not that issue with multiplying RPC's in this case. Still, might be interesting to compare. And wouldn't hurt to remind us of these details when you repost this stuff to help keep my forgetful self going in circles. > > > (Also: I assume it's the same as before, but: when you post test > > > results, could you repost if necessary: > > > > > > - what the actual test is > > > - what the hardware/software setup is on client and server > > > > > > so that we have reproduceable results for posterity's sake.) > > > > > > Interesting that "Mixed" is a little slower even before READ_PLUS. > > > > > > And I guess we should really report this to ext4 people, looks like they > > > may have a bug. > > > > FWIW, this is what I was using to test SEEK_HOLE/SEEK_DATA and map out > > holes on files on my local disk. Might be worth checking whether the > > ext4 slowdowns are reproduceable just with something like this, to rule > > out protocol problems. > > Wheel reinvention. :) xfs_io appears to have a lot of wheels. OK, I'll go read that man page one of these days. --b. > > $ rm -f /mnt/scratch/bar > $ for i in `seq 20 -2 0`; do > > sudo xfs_io -f -c "pwrite $((i * 8192)) 4096" /mnt/scratch/bar > > done > ..... > $ sync > $ sudo xfs_io -c "seek -ar 0" /mnt/scratch/bar > Whence Result > DATA 0 > HOLE 4096 > DATA 16384 > HOLE 20480 > DATA 32768 > HOLE 36864 > DATA 49152 > HOLE 53248 > DATA 65536 > HOLE 69632 > DATA 81920 > HOLE 86016 > DATA 98304 > HOLE 102400 > DATA 114688 > HOLE 118784 > DATA 131072 > HOLE 135168 > DATA 147456 > HOLE 151552 > DATA 163840 > HOLE 167936 > $ > > -Dave. > -- > Dave Chinner > david@fromorbit.com -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e9f4d8f..6801973 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1862,8 +1862,8 @@ static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op { u32 maxcount = svc_max_payload(rqstp); u32 rlen = min(op->u.read.rd_length, maxcount); - /* enough extra xdr space for encoding either a hole or data segment. */ - u32 xdr = 5; + /* Extra xdr padding for encoding multiple segments. */ + u32 xdr = 20; return (op_encode_hdr_size + 2 + xdr + XDR_QUADLEN(rlen)) * sizeof(__be32); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 799d52c..5eaecd2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4117,7 +4117,7 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct nfsd4_read *read, - struct file *file) + struct file *file, loff_t hole_pos) { __be32 *p, err; unsigned long maxcount; @@ -4128,20 +4128,26 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct nfsd4_read *r return nfserr_resource; xdr_commit_encode(xdr); + if (hole_pos <= read->rd_offset) + hole_pos = i_size_read(file_inode(file)); + maxcount = svc_max_payload(resp->rqstp); maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); maxcount = min_t(unsigned long, maxcount, read->rd_length); + maxcount = min_t(unsigned long, maxcount, hole_pos - read->rd_offset); if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) err = nfsd4_encode_splice_read(resp, read, file, &maxcount); else err = nfsd4_encode_readv(resp, read, file, &maxcount); + clear_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); *p++ = cpu_to_be32(NFS4_CONTENT_DATA); p = xdr_encode_hyper(p, read->rd_offset); *p++ = cpu_to_be32(maxcount); read->rd_offset += maxcount; + read->rd_length -= maxcount; return err; } @@ -4156,7 +4162,7 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, struct nfsd4_read *r if (data_pos == -ENXIO) data_pos = i_size_read(file_inode(file)); if (data_pos <= read->rd_offset) - return nfsd4_encode_read_plus_data(resp, read, file); + return nfsd4_encode_read_plus_data(resp, read, file, 0); maxcount = data_pos - read->rd_offset; p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8); @@ -4165,6 +4171,10 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, struct nfsd4_read *r p = xdr_encode_hyper(p, maxcount); read->rd_offset += maxcount; + if (maxcount > read->rd_length) + read->rd_length = 0; + else + read->rd_length -= maxcount; return nfs_ok; } @@ -4197,17 +4207,20 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, goto err_truncate; } - hole_pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); - if (hole_pos == -ENXIO) - goto out_encode; + do { + hole_pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); + if (hole_pos == -ENXIO) + break; - if (hole_pos == read->rd_offset) - err = nfsd4_encode_read_plus_hole(resp, read, file); - else - err = nfsd4_encode_read_plus_data(resp, read, file); - segments++; + if (hole_pos == read->rd_offset) + err = nfsd4_encode_read_plus_hole(resp, read, file); + else + err = nfsd4_encode_read_plus_data(resp, read, file, hole_pos); + if (err) + break; + segments++; + } while (read->rd_length > 0); -out_encode: eof = (read->rd_offset >= i_size_read(file_inode(file))); *p++ = cpu_to_be32(eof); *p++ = cpu_to_be32(segments);
This patch implements sending an array of segments back to the client. Clients should be prepared to handle multiple segment reads to make this useful. We try to splice the first data segment into the XDR result, and remaining segments are encoded directly. Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com> --- fs/nfsd/nfs4proc.c | 4 ++-- fs/nfsd/nfs4xdr.c | 35 ++++++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 13 deletions(-)