Message ID | 1435094920-22442-3-git-send-email-mfasheh@suse.de (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
On Tue, Jun 23, 2015 at 02:28:37PM -0700, Mark Fasheh wrote: > ->readpage() does page_lock() before extent_lock(), we do the opposite in > extent-same. We want to reverse the order in btrfs_extent_same() but it's > not quite straightforward since the page locks are taken inside btrfs_cmp_data(). > > So I split btrfs_cmp_data() into 3 parts with a small context structure that > is passed between them. The first, btrfs_cmp_data_prepare() gathers up the > pages needed (taking page lock as required) and puts them on our context > structure. At this point, we are safe to lock the extent range. Afterwards, > we use btrfs_cmp_data() to do the data compare as usual and btrfs_cmp_data_free() > to clean up our context. Reviewed-by: Liu Bo <bo.li.liu@oracle.com> > > Signed-off-by: Mark Fasheh <mfasheh@suse.de> > Reviewed-by: David Sterba <dsterba@suse.cz> > --- > fs/btrfs/ioctl.c | 148 +++++++++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 117 insertions(+), 31 deletions(-) > > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > index 2deea1f..b899584 100644 > --- a/fs/btrfs/ioctl.c > +++ b/fs/btrfs/ioctl.c > @@ -2755,14 +2755,11 @@ out: > return ret; > } > > -static struct page *extent_same_get_page(struct inode *inode, u64 off) > +static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) > { > struct page *page; > - pgoff_t index; > struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; > > - index = off >> PAGE_CACHE_SHIFT; > - > page = grab_cache_page(inode->i_mapping, index); > if (!page) > return NULL; > @@ -2783,6 +2780,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off) > return page; > } > > +static int gather_extent_pages(struct inode *inode, struct page **pages, > + int num_pages, u64 off) > +{ > + int i; > + pgoff_t index = off >> PAGE_CACHE_SHIFT; > + > + for (i = 0; i < num_pages; i++) { > + pages[i] = extent_same_get_page(inode, index + i); > + if (!pages[i]) > + return -ENOMEM; > + } > + return 0; > +} > + > static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) > { > /* do any pending delalloc/csum calc on src, one way or > @@ -2808,52 +2819,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) > } > } > > -static void btrfs_double_unlock(struct inode *inode1, u64 loff1, > - struct inode *inode2, u64 loff2, u64 len) > +static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) > { > - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); > - unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); > - > mutex_unlock(&inode1->i_mutex); > mutex_unlock(&inode2->i_mutex); > } > > -static void btrfs_double_lock(struct inode *inode1, u64 loff1, > - struct inode *inode2, u64 loff2, u64 len) > +static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) > +{ > + if (inode1 < inode2) > + swap(inode1, inode2); > + > + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); > + if (inode1 != inode2) > + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); > +} > + > +static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, > + struct inode *inode2, u64 loff2, u64 len) > +{ > + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); > + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); > +} > + > +static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, > + struct inode *inode2, u64 loff2, u64 len) > { > if (inode1 < inode2) { > swap(inode1, inode2); > swap(loff1, loff2); > } > - > - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); > lock_extent_range(inode1, loff1, len); > - if (inode1 != inode2) { > - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); > + if (inode1 != inode2) > lock_extent_range(inode2, loff2, len); > +} > + > +struct cmp_pages { > + int num_pages; > + struct page **src_pages; > + struct page **dst_pages; > +}; > + > +static void btrfs_cmp_data_free(struct cmp_pages *cmp) > +{ > + int i; > + struct page *pg; > + > + for (i = 0; i < cmp->num_pages; i++) { > + pg = cmp->src_pages[i]; > + if (pg) > + page_cache_release(pg); > + pg = cmp->dst_pages[i]; > + if (pg) > + page_cache_release(pg); > + } > + kfree(cmp->src_pages); > + kfree(cmp->dst_pages); > +} > + > +static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, > + struct inode *dst, u64 dst_loff, > + u64 len, struct cmp_pages *cmp) > +{ > + int ret; > + int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT; > + struct page **src_pgarr, **dst_pgarr; > + > + /* > + * We must gather up all the pages before we initiate our > + * extent locking. We use an array for the page pointers. Size > + * of the array is bounded by len, which is in turn bounded by > + * BTRFS_MAX_DEDUPE_LEN. > + */ > + src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); > + dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); > + if (!src_pgarr || !dst_pgarr) { > + kfree(src_pgarr); > + kfree(dst_pgarr); > + return -ENOMEM; > } > + cmp->num_pages = num_pages; > + cmp->src_pages = src_pgarr; > + cmp->dst_pages = dst_pgarr; > + > + ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff); > + if (ret) > + goto out; > + > + ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff); > + > +out: > + if (ret) > + btrfs_cmp_data_free(cmp); > + return 0; > } > > static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, > - u64 dst_loff, u64 len) > + u64 dst_loff, u64 len, struct cmp_pages *cmp) > { > int ret = 0; > + int i; > struct page *src_page, *dst_page; > unsigned int cmp_len = PAGE_CACHE_SIZE; > void *addr, *dst_addr; > > + i = 0; > while (len) { > if (len < PAGE_CACHE_SIZE) > cmp_len = len; > > - src_page = extent_same_get_page(src, loff); > - if (!src_page) > - return -EINVAL; > - dst_page = extent_same_get_page(dst, dst_loff); > - if (!dst_page) { > - page_cache_release(src_page); > - return -EINVAL; > - } > + BUG_ON(i >= cmp->num_pages); > + > + src_page = cmp->src_pages[i]; > + dst_page = cmp->dst_pages[i]; > + > addr = kmap_atomic(src_page); > dst_addr = kmap_atomic(dst_page); > > @@ -2865,15 +2944,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, > > kunmap_atomic(addr); > kunmap_atomic(dst_addr); > - page_cache_release(src_page); > - page_cache_release(dst_page); > > if (ret) > break; > > - loff += cmp_len; > - dst_loff += cmp_len; > len -= cmp_len; > + i++; > } > > return ret; > @@ -2904,6 +2980,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, > { > int ret; > u64 len = olen; > + struct cmp_pages cmp; > > /* > * btrfs_clone() can't handle extents in the same file > @@ -2916,7 +2993,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, > if (len == 0) > return 0; > > - btrfs_double_lock(src, loff, dst, dst_loff, len); > + btrfs_double_inode_lock(src, dst); > > ret = extent_same_check_offsets(src, loff, &len, olen); > if (ret) > @@ -2933,13 +3010,22 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, > goto out_unlock; > } > > + ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); > + if (ret) > + goto out_unlock; > + > + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); > + > /* pass original length for comparison so we stay within i_size */ > - ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen); > + ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); > if (ret == 0) > ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); > > + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); > + > + btrfs_cmp_data_free(&cmp); > out_unlock: > - btrfs_double_unlock(src, loff, dst, dst_loff, len); > + btrfs_double_inode_unlock(src, dst); > > return ret; > } > -- > 2.1.2 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2deea1f..b899584 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2755,14 +2755,11 @@ out: return ret; } -static struct page *extent_same_get_page(struct inode *inode, u64 off) +static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) { struct page *page; - pgoff_t index; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; - index = off >> PAGE_CACHE_SHIFT; - page = grab_cache_page(inode->i_mapping, index); if (!page) return NULL; @@ -2783,6 +2780,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off) return page; } +static int gather_extent_pages(struct inode *inode, struct page **pages, + int num_pages, u64 off) +{ + int i; + pgoff_t index = off >> PAGE_CACHE_SHIFT; + + for (i = 0; i < num_pages; i++) { + pages[i] = extent_same_get_page(inode, index + i); + if (!pages[i]) + return -ENOMEM; + } + return 0; +} + static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) { /* do any pending delalloc/csum calc on src, one way or @@ -2808,52 +2819,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) } } -static void btrfs_double_unlock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) +static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) { - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); - unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); - mutex_unlock(&inode1->i_mutex); mutex_unlock(&inode2->i_mutex); } -static void btrfs_double_lock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) +static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) +{ + if (inode1 < inode2) + swap(inode1, inode2); + + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + if (inode1 != inode2) + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); +} + +static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); +} + +static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) { if (inode1 < inode2) { swap(inode1, inode2); swap(loff1, loff2); } - - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); lock_extent_range(inode1, loff1, len); - if (inode1 != inode2) { - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + if (inode1 != inode2) lock_extent_range(inode2, loff2, len); +} + +struct cmp_pages { + int num_pages; + struct page **src_pages; + struct page **dst_pages; +}; + +static void btrfs_cmp_data_free(struct cmp_pages *cmp) +{ + int i; + struct page *pg; + + for (i = 0; i < cmp->num_pages; i++) { + pg = cmp->src_pages[i]; + if (pg) + page_cache_release(pg); + pg = cmp->dst_pages[i]; + if (pg) + page_cache_release(pg); + } + kfree(cmp->src_pages); + kfree(cmp->dst_pages); +} + +static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, + struct inode *dst, u64 dst_loff, + u64 len, struct cmp_pages *cmp) +{ + int ret; + int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT; + struct page **src_pgarr, **dst_pgarr; + + /* + * We must gather up all the pages before we initiate our + * extent locking. We use an array for the page pointers. Size + * of the array is bounded by len, which is in turn bounded by + * BTRFS_MAX_DEDUPE_LEN. + */ + src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); + dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); + if (!src_pgarr || !dst_pgarr) { + kfree(src_pgarr); + kfree(dst_pgarr); + return -ENOMEM; } + cmp->num_pages = num_pages; + cmp->src_pages = src_pgarr; + cmp->dst_pages = dst_pgarr; + + ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff); + if (ret) + goto out; + + ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff); + +out: + if (ret) + btrfs_cmp_data_free(cmp); + return 0; } static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, - u64 dst_loff, u64 len) + u64 dst_loff, u64 len, struct cmp_pages *cmp) { int ret = 0; + int i; struct page *src_page, *dst_page; unsigned int cmp_len = PAGE_CACHE_SIZE; void *addr, *dst_addr; + i = 0; while (len) { if (len < PAGE_CACHE_SIZE) cmp_len = len; - src_page = extent_same_get_page(src, loff); - if (!src_page) - return -EINVAL; - dst_page = extent_same_get_page(dst, dst_loff); - if (!dst_page) { - page_cache_release(src_page); - return -EINVAL; - } + BUG_ON(i >= cmp->num_pages); + + src_page = cmp->src_pages[i]; + dst_page = cmp->dst_pages[i]; + addr = kmap_atomic(src_page); dst_addr = kmap_atomic(dst_page); @@ -2865,15 +2944,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, kunmap_atomic(addr); kunmap_atomic(dst_addr); - page_cache_release(src_page); - page_cache_release(dst_page); if (ret) break; - loff += cmp_len; - dst_loff += cmp_len; len -= cmp_len; + i++; } return ret; @@ -2904,6 +2980,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, { int ret; u64 len = olen; + struct cmp_pages cmp; /* * btrfs_clone() can't handle extents in the same file @@ -2916,7 +2993,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, if (len == 0) return 0; - btrfs_double_lock(src, loff, dst, dst_loff, len); + btrfs_double_inode_lock(src, dst); ret = extent_same_check_offsets(src, loff, &len, olen); if (ret) @@ -2933,13 +3010,22 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, goto out_unlock; } + ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); + if (ret) + goto out_unlock; + + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); + /* pass original length for comparison so we stay within i_size */ - ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen); + ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); if (ret == 0) ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); + + btrfs_cmp_data_free(&cmp); out_unlock: - btrfs_double_unlock(src, loff, dst, dst_loff, len); + btrfs_double_inode_unlock(src, dst); return ret; }