@@ -115,9 +115,11 @@ static inline u32 count_max_extents(u64 size, u64 max_extent_size)
*/
enum btrfs_metadata_reserve_type {
BTRFS_RESERVE_NORMAL,
+ BTRFS_RESERVE_COMPRESS,
};
u64 btrfs_max_extent_size(enum btrfs_metadata_reserve_type reserve_type);
+int inode_need_compress(struct inode *inode);
struct btrfs_mapping_tree {
struct extent_map_tree map_tree;
@@ -5989,6 +5989,8 @@ u64 btrfs_max_extent_size(enum btrfs_metadata_reserve_type reserve_type)
{
if (reserve_type == BTRFS_RESERVE_NORMAL)
return BTRFS_MAX_EXTENT_SIZE;
+ else if (reserve_type == BTRFS_RESERVE_COMPRESS)
+ return SZ_128K;
ASSERT(0);
return BTRFS_MAX_EXTENT_SIZE;
@@ -597,7 +597,7 @@ static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
btrfs_debug_check_extent_io_range(tree, start, end);
if (bits & EXTENT_DELALLOC)
- bits |= EXTENT_NORESERVE;
+ bits |= EXTENT_NORESERVE | EXTENT_COMPRESS;
if (delete)
bits |= ~EXTENT_CTLBITS;
@@ -736,6 +736,60 @@ static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
}
+static void adjust_one_outstanding_extent(struct inode *inode, u64 len,
+ enum btrfs_metadata_reserve_type reserve_type)
+{
+ unsigned int old_extents, new_extents;
+ u64 max_extent_size = btrfs_max_extent_size(reserve_type);
+
+ old_extents = div64_u64(len + max_extent_size - 1, max_extent_size);
+ new_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE);
+ if (old_extents <= new_extents)
+ return;
+
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents -= old_extents - new_extents;
+ spin_unlock(&BTRFS_I(inode)->lock);
+}
+
+/*
+ * For a extent with EXTENT_COMPRESS flag, if later it does not go through
+ * compress path, we need to adjust the number of outstanding_extents.
+ * It's because for extent with EXTENT_COMPRESS flag, its number of outstanding
+ * extents is calculated by 128KB, so here we need to adjust it.
+ */
+void adjust_outstanding_extents(struct inode *inode, u64 start, u64 end,
+ enum btrfs_metadata_reserve_type reserve_type)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+ struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+
+ spin_lock(&tree->lock);
+ node = tree_search(tree, start);
+ if (!node)
+ goto out;
+
+ while (1) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->start > end)
+ goto out;
+ /*
+ * The whole range is locked, so we can safely clear
+ * EXTENT_COMPRESS flag.
+ */
+ state->state &= ~EXTENT_COMPRESS;
+ adjust_one_outstanding_extent(inode,
+ state->end - state->start + 1, reserve_type);
+ node = rb_next(node);
+ if (!node)
+ break;
+ }
+out:
+ spin_unlock(&tree->lock);
+}
+
static void wait_on_state(struct extent_io_tree *tree,
struct extent_state *state)
__releases(tree->lock)
@@ -1488,6 +1542,7 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
u64 cur_start = *start;
u64 found = 0;
u64 total_bytes = 0;
+ unsigned int pre_state;
spin_lock(&tree->lock);
@@ -1505,7 +1560,8 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
while (1) {
state = rb_entry(node, struct extent_state, rb_node);
if (found && (state->start != cur_start ||
- (state->state & EXTENT_BOUNDARY))) {
+ (state->state & EXTENT_BOUNDARY) ||
+ (state->state ^ pre_state) & EXTENT_COMPRESS)) {
goto out;
}
if (!(state->state & EXTENT_DELALLOC)) {
@@ -1521,6 +1577,7 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
found++;
*end = state->end;
cur_start = state->end + 1;
+ pre_state = state->state;
node = rb_next(node);
total_bytes += state->end - state->start + 1;
if (total_bytes >= max_bytes)
@@ -23,6 +23,7 @@
#define EXTENT_QGROUP_RESERVED (1U << 16)
#define EXTENT_CLEAR_DATA_RESV (1U << 17)
#define EXTENT_DELALLOC_NEW (1U << 18)
+#define EXTENT_COMPRESS (1U << 19)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
EXTENT_CLEAR_DATA_RESV)
@@ -301,6 +302,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask);
+enum btrfs_metadata_reserve_type;
+void adjust_outstanding_extents(struct inode *inode, u64 start, u64 end,
+ enum btrfs_metadata_reserve_type reserve_type);
+
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
@@ -1602,6 +1602,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (!pages)
return -ENOMEM;
+ if (inode_need_compress(inode))
+ reserve_type = BTRFS_RESERVE_COMPRESS;
+
while (iov_iter_count(i) > 0) {
size_t offset = pos & (PAGE_SIZE - 1);
size_t sector_offset;
@@ -393,7 +393,7 @@ static noinline int add_async_extent(struct async_cow *cow,
return 0;
}
-static inline int inode_need_compress(struct inode *inode)
+int inode_need_compress(struct inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -730,6 +730,17 @@ static noinline void submit_compressed_extents(struct inode *inode,
async_extent->start +
async_extent->ram_size - 1);
+ /*
+ * We use 128KB as max extent size to calculate number
+ * of outstanding extents for this extent before, now
+ * it'll go throuth uncompressed IO, we need to use
+ * 128MB as max extent size to re-calculate number of
+ * outstanding extents for this extent.
+ */
+ adjust_outstanding_extents(inode, async_extent->start,
+ async_extent->start +
+ async_extent->ram_size - 1,
+ BTRFS_RESERVE_COMPRESS);
/* allocate blocks */
ret = cow_file_range(inode, async_cow->locked_page,
async_extent->start,
@@ -1173,7 +1184,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
static int cow_file_range_async(struct inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started,
- unsigned long *nr_written)
+ unsigned long *nr_written,
+ enum btrfs_metadata_reserve_type reserve_type)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct async_cow *async_cow;
@@ -1191,10 +1203,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
async_cow->locked_page = locked_page;
async_cow->start = start;
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
- !btrfs_test_opt(fs_info, FORCE_COMPRESS))
- cur_end = end;
- else
+ cur_end = end;
+ if (reserve_type == BTRFS_RESERVE_COMPRESS)
cur_end = min(end, start + SZ_512K - 1);
async_cow->end = cur_end;
@@ -1571,21 +1581,37 @@ static int run_delalloc_range(void *private_data, struct page *locked_page,
struct inode *inode = private_data;
int ret;
int force_cow = need_force_cow(inode, start, end);
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ int need_compress;
+ enum btrfs_metadata_reserve_type reserve_type = BTRFS_RESERVE_NORMAL;
+
+ need_compress = test_range_bit(io_tree, start, end,
+ EXTENT_COMPRESS, 1, NULL);
+ if (need_compress)
+ reserve_type = BTRFS_RESERVE_COMPRESS;
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
+ if (need_compress)
+ adjust_outstanding_extents(inode, start, end,
+ reserve_type);
+
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 1, nr_written);
} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
+ if (need_compress)
+ adjust_outstanding_extents(inode, start, end,
+ reserve_type);
+
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 0, nr_written);
- } else if (!inode_need_compress(inode)) {
+ } else if (!need_compress) {
ret = cow_file_range(inode, locked_page, start, end, end,
page_started, nr_written, 1, NULL);
} else {
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags);
ret = cow_file_range_async(inode, locked_page, start, end,
- page_started, nr_written);
+ page_started, nr_written, reserve_type);
}
if (ret)
btrfs_cleanup_ordered_extents(inode, start, end - start + 1);
@@ -1607,6 +1633,8 @@ static void btrfs_split_extent_hook(void *private_data,
if (btrfs_is_free_space_inode(BTRFS_I(inode)))
return;
+ if (orig->state & EXTENT_COMPRESS)
+ reserve_type = BTRFS_RESERVE_COMPRESS;
max_extent_size = btrfs_max_extent_size(reserve_type);
size = orig->end - orig->start + 1;
@@ -1654,6 +1682,8 @@ static void btrfs_merge_extent_hook(void *private_data,
if (btrfs_is_free_space_inode(BTRFS_I(inode)))
return;
+ if (other->state & EXTENT_COMPRESS)
+ reserve_type = BTRFS_RESERVE_COMPRESS;
max_extent_size = btrfs_max_extent_size(reserve_type);
if (new->start > other->start)
@@ -1770,6 +1800,8 @@ static void btrfs_set_bit_hook(void *private_data,
BTRFS_RESERVE_NORMAL;
bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
+ if (*bits & EXTENT_COMPRESS)
+ reserve_type = BTRFS_RESERVE_COMPRESS;
max_extent_size = btrfs_max_extent_size(reserve_type);
num_extents = count_max_extents(len, max_extent_size);
@@ -1835,6 +1867,8 @@ static void btrfs_clear_bit_hook(void *private_data,
struct btrfs_root *root = inode->root;
bool do_list = !btrfs_is_free_space_inode(inode);
+ if (state->state & EXTENT_COMPRESS)
+ reserve_type = BTRFS_RESERVE_COMPRESS;
max_extent_size = btrfs_max_extent_size(reserve_type);
num_extents = count_max_extents(len, max_extent_size);
@@ -2046,18 +2080,30 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
return 0;
}
+/*
+ * Normally flag should be 0, but if a data range will go through compress path,
+ * set flag to 1. Note: here we should ensure enum btrfs_metadata_reserve_type
+ * and flag's values are consistent.
+ */
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state,
enum btrfs_metadata_reserve_type reserve_type)
{
int ret;
+ unsigned int bits;
u64 max_extent_size = btrfs_max_extent_size(reserve_type);
u64 num_extents = div64_u64(end - start + max_extent_size,
max_extent_size);
+ /* compression path */
+ if (reserve_type == BTRFS_RESERVE_COMPRESS)
+ bits = EXTENT_DELALLOC | EXTENT_COMPRESS | EXTENT_UPTODATE;
+ else
+ bits = EXTENT_DELALLOC | EXTENT_UPTODATE;
+
WARN_ON((end & (PAGE_SIZE - 1)) == 0);
- ret = set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
- cached_state);
+ ret = set_extent_bit(&BTRFS_I(inode)->io_tree, start, end,
+ bits, NULL, cached_state, GFP_NOFS);
/*
* btrfs_delalloc_reserve_metadata() will first add number of
@@ -2084,14 +2130,20 @@ int btrfs_set_extent_defrag(struct inode *inode, u64 start, u64 end,
enum btrfs_metadata_reserve_type reserve_type)
{
int ret;
+ unsigned int bits;
u64 max_extent_size = btrfs_max_extent_size(reserve_type);
u64 num_extents = div64_u64(end - start + max_extent_size,
max_extent_size);
WARN_ON((end & (PAGE_SIZE - 1)) == 0);
- ret = set_extent_defrag(&BTRFS_I(inode)->io_tree, start, end,
- cached_state);
+ if (reserve_type == BTRFS_RESERVE_COMPRESS)
+ bits = EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG |
+ EXTENT_COMPRESS;
+ else
+ bits = EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG;
+ ret = set_extent_bit(&BTRFS_I(inode)->io_tree, start, end,
+ bits, NULL, cached_state, GFP_NOFS);
if (ret == 0 && !btrfs_is_free_space_inode(BTRFS_I(inode))) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents -= num_extents;
@@ -2151,6 +2203,8 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
goto again;
}
+ if (inode_need_compress(inode))
+ reserve_type = BTRFS_RESERVE_COMPRESS;
ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
PAGE_SIZE, reserve_type);
if (ret) {
@@ -3062,8 +3116,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
trans->block_rsv = &fs_info->delalloc_block_rsv;
- if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
+ if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) {
compress_type = ordered_extent->compress_type;
+ reserve_type = BTRFS_RESERVE_COMPRESS;
+ }
+
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
BUG_ON(compress_type);
ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
@@ -4860,6 +4917,9 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
u64 block_end;
enum btrfs_metadata_reserve_type reserve_type = BTRFS_RESERVE_NORMAL;
+ if (inode_need_compress(inode))
+ reserve_type = BTRFS_RESERVE_COMPRESS;
+
if ((offset & (blocksize - 1)) == 0 &&
(!len || ((len & (blocksize - 1)) == 0)))
goto out;
@@ -9154,6 +9214,8 @@ int btrfs_page_mkwrite(struct vm_fault *vmf)
page_end = page_start + PAGE_SIZE - 1;
end = page_end;
+ if (inode_need_compress(inode))
+ reserve_type = BTRFS_RESERVE_COMPRESS;
/*
* Reserving delalloc space after obtaining the page lock can lead to
* deadlock. For example, if a dirty page is locked by this function
@@ -1137,6 +1137,8 @@ static int cluster_pages_for_defrag(struct inode *inode,
page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
+ if (inode_need_compress(inode))
+ reserve_type = BTRFS_RESERVE_COMPRESS;
ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT, reserve_type);
@@ -3188,6 +3188,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
if (!cluster->nr)
return 0;
+ if (inode_need_compress(inode))
+ reserve_type = BTRFS_RESERVE_COMPRESS;
+
ra = kzalloc(sizeof(*ra), GFP_NOFS);
if (!ra)
return -ENOMEM;