Message ID | 0f9937165d8fc1b8b6332ac97e59593022e9fa5b.1418173063.git.osandov@osandov.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Dec 09, 2014 at 05:45:48PM -0800, Omar Sandoval wrote: > +static void __clear_swapfile_extents(struct inode *inode) > +{ > + u64 isize = inode->i_size; > + struct extent_map *em; > + u64 start, len; > + > + start = 0; > + while (start < isize) { > + len = isize - start; > + em = btrfs_get_extent(inode, NULL, 0, start, len, 0); > + if (IS_ERR(em)) > + return; This could transiently fail if there's no memory to allocate the em, and would leak the following extents. > + > + clear_bit(EXTENT_FLAG_SWAPFILE, &em->flags); > + > + start = extent_map_end(em); > + free_extent_map(em); > + } > +} > + > +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, > + sector_t *span) > +{ > + struct inode *inode = file_inode(file); > + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; > + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; > + int ret = 0; > + u64 isize = inode->i_size; > + struct extent_state *cached_state = NULL; > + struct extent_map *em; > + u64 start, len; > + > + if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) { > + /* Can't do direct I/O on a compressed file. */ > + btrfs_err(fs_info, "swapfile is compressed"); > + return -EINVAL; > + } > + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) { > + /* > + * Going through the copy-on-write path while swapping pages > + * in/out and doing a bunch of allocations could stress the > + * memory management code that got us there in the first place, > + * and that's sure to be a bad time. > + */ > + btrfs_err(fs_info, "swapfile is copy-on-write"); > + return -EINVAL; > + } > + > + lock_extent_bits(io_tree, 0, isize - 1, 0, &cached_state); > + > + /* > + * All of the extents must be allocated and support direct I/O. Inline > + * extents and compressed extents fall back to buffered I/O, so those > + * are no good. Additionally, all of the extents must be safe for nocow. > + */ > + atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles); > + start = 0; > + while (start < isize) { > + len = isize - start; > + em = btrfs_get_extent(inode, NULL, 0, start, len, 0); > + if (IS_ERR(em)) { IS_ERR_OR_NULL(em) From now on the em is valid and has to be free_extent_map()ed ... > + ret = PTR_ERR(em); > + goto out; > + } > + > + if (test_bit(EXTENT_FLAG_VACANCY, &em->flags) || > + em->block_start == EXTENT_MAP_HOLE) { > + btrfs_err(fs_info, "swapfile has holes"); > + ret = -EINVAL; ... and all the error branches would miss it. > + goto out; > + } > + if (em->block_start == EXTENT_MAP_INLINE) { > + /* > + * It's unlikely we'll ever actually find ourselves > + * here, as a file small enough to fit inline won't be > + * big enough to store more than the swap header, but in > + * case something changes in the future, let's catch it > + * here rather than later. > + */ > + btrfs_err(fs_info, "swapfile is inline"); > + ret = -EINVAL; here > + goto out; > + } > + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { > + btrfs_err(fs_info, "swapfile is compresed"); > + ret = -EINVAL; here > + goto out; > + } > + ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL); > + if (ret < 0) { here > + goto out; > + } else if (ret == 1) { > + ret = 0; > + } else { > + btrfs_err(fs_info, "swapfile has extent requiring COW (%llu-%llu)", > + start, start + len - 1); > + ret = -EINVAL; here > + goto out; > + } > + > + set_bit(EXTENT_FLAG_SWAPFILE, &em->flags); > + > + start = extent_map_end(em); > + free_extent_map(em); > + } > + > +out: > + if (ret) { should be fixed by: if (!IS_ERR_OR_NULL(em)) free_extent_map(em); > + __clear_swapfile_extents(inode); > + atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles); > + } > + unlock_extent_cached(io_tree, 0, isize - 1, &cached_state, GFP_NOFS); > + return ret; > +} -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Dec 12, 2014 at 11:51:22AM +0100, David Sterba wrote: > On Tue, Dec 09, 2014 at 05:45:48PM -0800, Omar Sandoval wrote: > > +static void __clear_swapfile_extents(struct inode *inode) > > +{ > > + u64 isize = inode->i_size; > > + struct extent_map *em; > > + u64 start, len; > > + > > + start = 0; > > + while (start < isize) { > > + len = isize - start; > > + em = btrfs_get_extent(inode, NULL, 0, start, len, 0); > > + if (IS_ERR(em)) > > + return; > > This could transiently fail if there's no memory to allocate the em, and > would leak the following extents. > This leak I was aware of, and at the time I didn't see a good way to get around it. After all, if we can't get the current extent, there's no way to iterate through the rest of them. Now I see that instead of doing this at the btrfs_get_extent level, I can just go through all of the extent_maps in the extent_map_tree. > > + > > + clear_bit(EXTENT_FLAG_SWAPFILE, &em->flags); > > + > > + start = extent_map_end(em); > > + free_extent_map(em); > > + } > > +} > > + > > +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, > > + sector_t *span) > > +{ > > + struct inode *inode = file_inode(file); > > + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; > > + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; > > + int ret = 0; > > + u64 isize = inode->i_size; > > + struct extent_state *cached_state = NULL; > > + struct extent_map *em; > > + u64 start, len; > > + > > + if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) { > > + /* Can't do direct I/O on a compressed file. */ > > + btrfs_err(fs_info, "swapfile is compressed"); > > + return -EINVAL; > > + } > > + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) { > > + /* > > + * Going through the copy-on-write path while swapping pages > > + * in/out and doing a bunch of allocations could stress the > > + * memory management code that got us there in the first place, > > + * and that's sure to be a bad time. > > + */ > > + btrfs_err(fs_info, "swapfile is copy-on-write"); > > + return -EINVAL; > > + } > > + > > + lock_extent_bits(io_tree, 0, isize - 1, 0, &cached_state); > > + > > + /* > > + * All of the extents must be allocated and support direct I/O. Inline > > + * extents and compressed extents fall back to buffered I/O, so those > > + * are no good. Additionally, all of the extents must be safe for nocow. > > + */ > > + atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles); > > + start = 0; > > + while (start < isize) { > > + len = isize - start; > > + em = btrfs_get_extent(inode, NULL, 0, start, len, 0); > > + if (IS_ERR(em)) { > > IS_ERR_OR_NULL(em) > > From now on the em is valid and has to be free_extent_map()ed ... > > > + ret = PTR_ERR(em); > > + goto out; > > + } > > + > > + if (test_bit(EXTENT_FLAG_VACANCY, &em->flags) || > > + em->block_start == EXTENT_MAP_HOLE) { > > + btrfs_err(fs_info, "swapfile has holes"); > > + ret = -EINVAL; > > ... and all the error branches would miss it. > > > + goto out; > > + } > > + if (em->block_start == EXTENT_MAP_INLINE) { > > + /* > > + * It's unlikely we'll ever actually find ourselves > > + * here, as a file small enough to fit inline won't be > > + * big enough to store more than the swap header, but in > > + * case something changes in the future, let's catch it > > + * here rather than later. > > + */ > > + btrfs_err(fs_info, "swapfile is inline"); > > + ret = -EINVAL; > > here > > > + goto out; > > + } > > + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { > > + btrfs_err(fs_info, "swapfile is compresed"); > > + ret = -EINVAL; > > here > > > + goto out; > > + } > > + ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL); > > + if (ret < 0) { > > here > > > + goto out; > > + } else if (ret == 1) { > > + ret = 0; > > + } else { > > + btrfs_err(fs_info, "swapfile has extent requiring COW (%llu-%llu)", > > + start, start + len - 1); > > + ret = -EINVAL; > > here > > > + goto out; > > + } > > + > > + set_bit(EXTENT_FLAG_SWAPFILE, &em->flags); > > + > > + start = extent_map_end(em); > > + free_extent_map(em); > > + } > > + > > +out: > > + if (ret) { > > should be fixed by: > > if (!IS_ERR_OR_NULL(em)) > free_extent_map(em); > > > + __clear_swapfile_extents(inode); > > + atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles); > > + } > > + unlock_extent_cached(io_tree, 0, isize - 1, &cached_state, GFP_NOFS); > > + return ret; > > +} This leak I completely missed. Thanks.
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7c2dfb2..76b58d7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7191,6 +7191,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * this will cow the extent, reset the len in case we changed * it above */ + WARN_ON_ONCE(IS_SWAPFILE(inode)); len = bh_result->b_size; free_extent_map(em); em = btrfs_new_extent_direct(inode, start, len); @@ -9443,6 +9444,134 @@ out_inode: } +static void __clear_swapfile_extents(struct inode *inode) +{ + u64 isize = inode->i_size; + struct extent_map *em; + u64 start, len; + + start = 0; + while (start < isize) { + len = isize - start; + em = btrfs_get_extent(inode, NULL, 0, start, len, 0); + if (IS_ERR(em)) + return; + + clear_bit(EXTENT_FLAG_SWAPFILE, &em->flags); + + start = extent_map_end(em); + free_extent_map(em); + } +} + +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + struct inode *inode = file_inode(file); + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + int ret = 0; + u64 isize = inode->i_size; + struct extent_state *cached_state = NULL; + struct extent_map *em; + u64 start, len; + + if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) { + /* Can't do direct I/O on a compressed file. */ + btrfs_err(fs_info, "swapfile is compressed"); + return -EINVAL; + } + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) { + /* + * Going through the copy-on-write path while swapping pages + * in/out and doing a bunch of allocations could stress the + * memory management code that got us there in the first place, + * and that's sure to be a bad time. + */ + btrfs_err(fs_info, "swapfile is copy-on-write"); + return -EINVAL; + } + + lock_extent_bits(io_tree, 0, isize - 1, 0, &cached_state); + + /* + * All of the extents must be allocated and support direct I/O. Inline + * extents and compressed extents fall back to buffered I/O, so those + * are no good. Additionally, all of the extents must be safe for nocow. + */ + atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles); + start = 0; + while (start < isize) { + len = isize - start; + em = btrfs_get_extent(inode, NULL, 0, start, len, 0); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto out; + } + + if (test_bit(EXTENT_FLAG_VACANCY, &em->flags) || + em->block_start == EXTENT_MAP_HOLE) { + btrfs_err(fs_info, "swapfile has holes"); + ret = -EINVAL; + goto out; + } + if (em->block_start == EXTENT_MAP_INLINE) { + /* + * It's unlikely we'll ever actually find ourselves + * here, as a file small enough to fit inline won't be + * big enough to store more than the swap header, but in + * case something changes in the future, let's catch it + * here rather than later. + */ + btrfs_err(fs_info, "swapfile is inline"); + ret = -EINVAL; + goto out; + } + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { + btrfs_err(fs_info, "swapfile is compresed"); + ret = -EINVAL; + goto out; + } + ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL); + if (ret < 0) { + goto out; + } else if (ret == 1) { + ret = 0; + } else { + btrfs_err(fs_info, "swapfile has extent requiring COW (%llu-%llu)", + start, start + len - 1); + ret = -EINVAL; + goto out; + } + + set_bit(EXTENT_FLAG_SWAPFILE, &em->flags); + + start = extent_map_end(em); + free_extent_map(em); + } + +out: + if (ret) { + __clear_swapfile_extents(inode); + atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles); + } + unlock_extent_cached(io_tree, 0, isize - 1, &cached_state, GFP_NOFS); + return ret; +} + +static void btrfs_swap_deactivate(struct file *file) +{ + struct inode *inode = file_inode(file); + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_state *cached_state = NULL; + u64 isize = inode->i_size; + + lock_extent_bits(io_tree, 0, isize - 1, 0, &cached_state); + __clear_swapfile_extents(inode); + unlock_extent_cached(io_tree, 0, isize - 1, &cached_state, GFP_NOFS); + atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles); +} + static const struct inode_operations btrfs_dir_inode_operations = { .getattr = btrfs_getattr, .lookup = btrfs_lookup, @@ -9520,6 +9649,8 @@ static const struct address_space_operations btrfs_aops = { .releasepage = btrfs_releasepage, .set_page_dirty = btrfs_set_page_dirty, .error_remove_page = generic_error_remove_page, + .swap_activate = btrfs_swap_activate, + .swap_deactivate = btrfs_swap_deactivate, }; static const struct address_space_operations btrfs_symlink_aops = {
Implement the swap file a_ops on btrfs. Activation does two things: 1. Checks for a usable swap file: it must be fully allocated (no holes), support direct I/O (so no compressed or inline extents) and must be eligible for nocow in its entirety in order to avoid doing a bunch of allocations for a COW when we're already low on memory 2. Pins the extent maps in memory with EXTENT_FLAG_SWAPFILE Deactivation unpins all of the extent maps. Signed-off-by: Omar Sandoval <osandov@osandov.com> --- fs/btrfs/inode.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+)