From patchwork Sun Jun 12 23:43:55 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jim Rees X-Patchwork-Id: 873302 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.4) with ESMTP id p5CNhq9n007930 for ; Sun, 12 Jun 2011 23:44:15 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754345Ab1FLXn7 (ORCPT ); Sun, 12 Jun 2011 19:43:59 -0400 Received: from int-mailstore01.merit.edu ([207.75.116.232]:60576 "EHLO int-mailstore01.merit.edu" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754233Ab1FLXn6 (ORCPT ); Sun, 12 Jun 2011 19:43:58 -0400 Received: from localhost (localhost.localdomain [127.0.0.1]) by int-mailstore01.merit.edu (Postfix) with ESMTP id EEB98308EFBD; Sun, 12 Jun 2011 19:43:57 -0400 (EDT) X-Virus-Scanned: amavisd-new at int-mailstore01.merit.edu Received: from int-mailstore01.merit.edu ([127.0.0.1]) by localhost (int-mailstore01.merit.edu [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id FmV-DCEZQ6bo; Sun, 12 Jun 2011 19:43:57 -0400 (EDT) Received: from merit.edu (74-126-0-171.static.123.net [74.126.0.171]) by int-mailstore01.merit.edu (Postfix) with ESMTPSA id 8AAB3308EFA1; Sun, 12 Jun 2011 19:43:56 -0400 (EDT) X-Mailbox-Line: From ea5e2bf59d726fbfec6f63af9eda83dbc15cb96d Mon Sep 17 00:00:00 2001 Message-Id: In-Reply-To: References: Date: Sun, 12 Jun 2011 19:43:55 -0400 Subject: [PATCH 04/34] pnfs: hook nfs_write_begin/end to allow layout driver manipulation From: Jim Rees To: linux-nfs@vger.kernel.org Cc: peter honeyman Sender: linux-nfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Sun, 12 Jun 2011 23:44:16 +0000 (UTC) From: Peng Tao Signed-off-by: Fred Isaman Signed-off-by: Benny Halevy Reported-by: Alexandros Batsakis Signed-off-by: Andy Adamson Signed-off-by: Fred Isaman Signed-off-by: Benny Halevy Signed-off-by: Peng Tao --- fs/nfs/file.c | 26 ++++++++++- fs/nfs/pnfs.c | 41 +++++++++++++++++ fs/nfs/pnfs.h | 115 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/write.c | 12 +++-- include/linux/nfs_fs.h | 3 +- 5 files changed, 189 insertions(+), 8 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2f093ed..1768762 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -384,12 +384,15 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = pos >> PAGE_CACHE_SHIFT; struct page *page; int once_thru = 0; + struct pnfs_layout_segment *lseg; dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, mapping->host->i_ino, len, (long long) pos); - + lseg = pnfs_update_layout(mapping->host, + nfs_file_open_context(file), + pos, len, IOMODE_RW, GFP_NOFS); start: /* * Prevent starvation issues if someone is doing a consistency @@ -409,6 +412,9 @@ start: if (ret) { unlock_page(page); page_cache_release(page); + *pagep = NULL; + *fsdata = NULL; + goto out; } else if (!once_thru && nfs_want_read_modify_write(file, page, pos, len)) { once_thru = 1; @@ -417,6 +423,12 @@ start: if (!ret) goto start; } + ret = pnfs_write_begin(file, page, pos, len, lseg, fsdata); + out: + if (ret) { + put_lseg(lseg); + *fsdata = NULL; + } return ret; } @@ -426,6 +438,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, { unsigned offset = pos & (PAGE_CACHE_SIZE - 1); int status; + struct pnfs_layout_segment *lseg; dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", file->f_path.dentry->d_parent->d_name.name, @@ -452,10 +465,17 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, zero_user_segment(page, pglen, PAGE_CACHE_SIZE); } - status = nfs_updatepage(file, page, offset, copied); + lseg = nfs4_pull_lseg_from_fsdata(file, fsdata); + status = pnfs_write_end(file, page, pos, len, copied, lseg); + if (status) + goto out; + status = nfs_updatepage(file, page, offset, copied, lseg, fsdata); +out: unlock_page(page); page_cache_release(page); + pnfs_write_end_cleanup(file, fsdata); + put_lseg(lseg); if (status < 0) return status; @@ -577,7 +597,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = VM_FAULT_LOCKED; if (nfs_flush_incompatible(filp, page) == 0 && - nfs_updatepage(filp, page, 0, pagelen) == 0) + nfs_updatepage(filp, page, 0, pagelen, NULL, NULL) == 0) goto out; ret = VM_FAULT_SIGBUS; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f03a5e0..e693718 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1138,6 +1138,41 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, } /* + * This gives the layout driver an opportunity to read in page "around" + * the data to be written. It returns 0 on success, otherwise an error code + * which will either be passed up to user, or ignored if + * some previous part of write succeeded. + * Note the range [pos, pos+len-1] is entirely within the page. + */ +int _pnfs_write_begin(struct inode *inode, struct page *page, + loff_t pos, unsigned len, + struct pnfs_layout_segment *lseg, + struct pnfs_fsdata **fsdata) +{ + struct pnfs_fsdata *data; + int status = 0; + + dprintk("--> %s: pos=%llu len=%u\n", + __func__, (unsigned long long)pos, len); + data = kzalloc(sizeof(struct pnfs_fsdata), GFP_KERNEL); + if (!data) { + status = -ENOMEM; + goto out; + } + data->lseg = lseg; /* refcount passed into data to be managed there */ + status = NFS_SERVER(inode)->pnfs_curr_ld->write_begin( + lseg, page, pos, len, data); + if (status) { + kfree(data); + data = NULL; + } +out: + *fsdata = data; + dprintk("<-- %s: status=%d\n", __func__, status); + return status; +} + +/* * Called by non rpc-based layout drivers */ int @@ -1237,6 +1272,12 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata) } EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); +void pnfs_free_fsdata(struct pnfs_fsdata *fsdata) +{ + /* lseg refcounting handled directly in nfs_write_end */ + kfree(fsdata); +} + /* * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a3fc0f2..525ec55 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -54,6 +54,12 @@ enum pnfs_try_status { PNFS_NOT_ATTEMPTED = 1, }; +struct pnfs_fsdata { + struct pnfs_layout_segment *lseg; + int bypass_eof; + void *private; +}; + #ifdef CONFIG_NFS_V4_1 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" @@ -106,6 +112,14 @@ struct pnfs_layoutdriver_type { */ enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); + int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page, + loff_t pos, unsigned count, + struct pnfs_fsdata *fsdata); + int (*write_end)(struct inode *inode, struct page *page, loff_t pos, + unsigned count, unsigned copied, + struct pnfs_layout_segment *lseg); + void (*write_end_cleanup)(struct file *filp, + struct pnfs_fsdata *fsdata); void (*free_deviceid_node) (struct nfs4_deviceid_node *); @@ -175,6 +189,7 @@ enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, const struct rpc_call_ops *); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); +void pnfs_free_fsdata(struct pnfs_fsdata *fsdata); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); @@ -186,6 +201,10 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, struct nfs4_state *open_state); +int _pnfs_write_begin(struct inode *inode, struct page *page, + loff_t pos, unsigned len, + struct pnfs_layout_segment *lseg, + struct pnfs_fsdata **fsdata); int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, struct pnfs_layout_range *recall_range); @@ -287,6 +306,13 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req) put_lseg(req->wb_commit_lseg); } +static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg, + struct pnfs_fsdata *fsdata) +{ + return !fsdata || ((struct pnfs_layout_segment *)fsdata == lseg) || + !fsdata->bypass_eof; +} + /* Should the pNFS client commit and return the layout upon a setattr */ static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) @@ -297,6 +323,49 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode) PNFS_LAYOUTRET_ON_SETATTR; } +static inline int pnfs_write_begin(struct file *filp, struct page *page, + loff_t pos, unsigned len, + struct pnfs_layout_segment *lseg, + void **fsdata) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct nfs_server *nfss = NFS_SERVER(inode); + int status = 0; + + *fsdata = lseg; + if (lseg && nfss->pnfs_curr_ld->write_begin) + status = _pnfs_write_begin(inode, page, pos, len, lseg, + (struct pnfs_fsdata **) fsdata); + return status; +} + +/* CAREFUL - what happens if copied < len??? */ +static inline int pnfs_write_end(struct file *filp, struct page *page, + loff_t pos, unsigned len, unsigned copied, + struct pnfs_layout_segment *lseg) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct nfs_server *nfss = NFS_SERVER(inode); + + if (nfss->pnfs_curr_ld && nfss->pnfs_curr_ld->write_end) + return nfss->pnfs_curr_ld->write_end(inode, page, pos, len, + copied, lseg); + else + return 0; +} + +static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata) +{ + struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode); + + if (fsdata && nfss->pnfs_curr_ld) { + if (nfss->pnfs_curr_ld->write_end_cleanup) + nfss->pnfs_curr_ld->write_end_cleanup(filp, fsdata); + if (nfss->pnfs_curr_ld->write_begin) + pnfs_free_fsdata(fsdata); + } +} + static inline int pnfs_return_layout(struct inode *ino) { struct nfs_inode *nfsi = NFS_I(ino); @@ -317,6 +386,19 @@ static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, pgio->pg_test = ld->pg_test; } +static inline struct pnfs_layout_segment * +nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata) +{ + if (fsdata) { + struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode); + + if (nfss->pnfs_curr_ld && nfss->pnfs_curr_ld->write_begin) + return ((struct pnfs_fsdata *) fsdata)->lseg; + return (struct pnfs_layout_segment *)fsdata; + } + return NULL; +} + #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -345,6 +427,12 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, return NULL; } +static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg, + struct pnfs_fsdata *fsdata) +{ + return 1; +} + static inline enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *data, const struct rpc_call_ops *call_ops) @@ -364,6 +452,26 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } +static inline int pnfs_write_begin(struct file *filp, struct page *page, + loff_t pos, unsigned len, + struct pnfs_layout_segment *lseg, + void **fsdata) +{ + *fsdata = NULL; + return 0; +} + +static inline int pnfs_write_end(struct file *filp, struct page *page, + loff_t pos, unsigned len, unsigned copied, + struct pnfs_layout_segment *lseg) +{ + return 0; +} + +static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata) +{ +} + static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) { @@ -435,6 +543,13 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl) { } + +static inline struct pnfs_layout_segment * +nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata) +{ + return NULL; +} + #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e268e3b..75e2a6b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -673,7 +673,9 @@ out: } static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, - unsigned int offset, unsigned int count) + unsigned int offset, unsigned int count, + struct pnfs_layout_segment *lseg, void *fsdata) + { struct nfs_page *req; @@ -681,7 +683,8 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, if (IS_ERR(req)) return PTR_ERR(req); /* Update file length */ - nfs_grow_file(page, offset, count); + if (pnfs_grow_ok(lseg, fsdata)) + nfs_grow_file(page, offset, count); nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); nfs_mark_request_dirty(req); nfs_clear_page_tag_locked(req); @@ -734,7 +737,8 @@ static int nfs_write_pageuptodate(struct page *page, struct inode *inode) * things with a page scheduled for an RPC call (e.g. invalidate it). */ int nfs_updatepage(struct file *file, struct page *page, - unsigned int offset, unsigned int count) + unsigned int offset, unsigned int count, + struct pnfs_layout_segment *lseg, void *fsdata) { struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = page->mapping->host; @@ -759,7 +763,7 @@ int nfs_updatepage(struct file *file, struct page *page, offset = 0; } - status = nfs_writepage_setup(ctx, page, offset, count); + status = nfs_writepage_setup(ctx, page, offset, count, lseg, fsdata); if (status < 0) nfs_set_pageerror(page); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1b93b9c..be1ac1d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -510,7 +510,8 @@ extern int nfs_congestion_kb; extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct page *page); -extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); +extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int, + struct pnfs_layout_segment *, void *); extern void nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); /*