From patchwork Wed Mar 16 08:51:13 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Li Zefan X-Patchwork-Id: 638961 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p2G8mtR0017327 for ; Wed, 16 Mar 2011 08:48:55 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751856Ab1CPIsx (ORCPT ); Wed, 16 Mar 2011 04:48:53 -0400 Received: from cn.fujitsu.com ([222.73.24.84]:51837 "EHLO song.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1751636Ab1CPIsw (ORCPT ); Wed, 16 Mar 2011 04:48:52 -0400 Received: from tang.cn.fujitsu.com (tang.cn.fujitsu.com [10.167.250.3]) by song.cn.fujitsu.com (Postfix) with ESMTP id 20B07170080 for ; Wed, 16 Mar 2011 16:48:46 +0800 (CST) Received: from mailserver.fnst.cn.fujitus.com (tang.cn.fujitsu.com [127.0.0.1]) by tang.cn.fujitsu.com (8.14.3/8.13.1) with ESMTP id p2G8giqT015083 for ; Wed, 16 Mar 2011 16:42:44 +0800 Received: from [10.167.225.230] ([10.167.225.230]) by mailserver.fnst.cn.fujitus.com (Lotus Domino Release 8.5.1FP4) with ESMTP id 2011031616472641-28523 ; Wed, 16 Mar 2011 16:47:26 +0800 Message-ID: <4D807A01.7030701@cn.fujitsu.com> Date: Wed, 16 Mar 2011 16:51:13 +0800 From: Li Zefan User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.9) Gecko/20100921 Fedora/3.1.4-1.fc14 Thunderbird/3.1.4 MIME-Version: 1.0 To: "linux-btrfs@vger.kernel.org" Subject: [PATCH 7/7] Btrfs: Support reading/writing on disk free ino cache References: <4D807977.1040506@cn.fujitsu.com> In-Reply-To: <4D807977.1040506@cn.fujitsu.com> X-MIMETrack: Itemize by SMTP Server on mailserver/fnst(Release 8.5.1FP4|July 25, 2010) at 2011-03-16 16:47:26, Serialize by Router on mailserver/fnst(Release 8.5.1FP4|July 25, 2010) at 2011-03-16 16:47:26, Serialize complete at 2011-03-16 16:47:26 Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Wed, 16 Mar 2011 08:48:56 +0000 (UTC) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 40f38f7..5217c81 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -103,6 +103,12 @@ struct btrfs_ordered_sum; /* For storing free space cache */ #define BTRFS_FREE_SPACE_OBJECTID -11ULL +/* + * The inode number assigned to the special inode for sotring + * free ino cache + */ +#define BTRFS_FREE_INO_OBJECTID -12ULL + /* dummy objectid represents multiple objectids */ #define BTRFS_MULTIPLE_OBJECTIDS -255ULL @@ -1109,6 +1115,7 @@ struct btrfs_root { wait_queue_head_t cache_wait; struct btrfs_free_space_ctl *free_ino_pinned; u64 cache_progress; + struct inode *cache_inode; struct mutex log_mutex; wait_queue_head_t log_writer_wait; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5c92066..9e8a449 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2403,6 +2403,7 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) static void free_fs_root(struct btrfs_root *root) { + iput(root->cache_inode); WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); if (root->anon_super.s_dev) { down_write(&root->anon_super.s_umount); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3ad4621..104f2eb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3122,7 +3122,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) /* make sure bytes are sectorsize aligned */ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); - if (root == root->fs_info->tree_root) { + if (root == root->fs_info->tree_root || + BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { alloc_chunk = 0; committed = 1; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 327e249..3c9e041 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -206,7 +206,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, return ret; } - return btrfs_update_inode(trans, root, inode); + ret = btrfs_update_inode(trans, root, inode); + return ret; } static int readahead_cache(struct inode *inode) @@ -504,6 +505,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, spin_lock(&block_group->lock); block_group->disk_cache_state = BTRFS_DC_CLEAR; spin_unlock(&block_group->lock); + ret = 0; printk(KERN_ERR "btrfs: failed to load free space cache " "for block group %llu\n", block_group->key.objectid); @@ -817,6 +819,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, spin_lock(&block_group->lock); block_group->disk_cache_state = BTRFS_DC_ERROR; spin_unlock(&block_group->lock); + ret = 0; printk(KERN_ERR "btrfs: failed to write free space cache " "for block group %llu\n", block_group->key.objectid); @@ -2264,3 +2267,95 @@ out: return ino; } + +struct inode *lookup_free_ino_inode(struct btrfs_root *root, + struct btrfs_path *path) +{ + struct inode *inode = NULL; + + spin_lock(&root->cache_lock); + if (root->cache_inode) + inode = igrab(root->cache_inode); + spin_unlock(&root->cache_lock); + if (inode) + return inode; + + inode = __lookup_free_space_inode(root, path, 0); + if (IS_ERR(inode)) + return inode; + + spin_lock(&root->cache_lock); + if (!root->fs_info->closing) + root->cache_inode = igrab(inode); + spin_unlock(&root->cache_lock); + + return inode; +} + +int create_free_ino_inode(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_path *path) +{ + return __create_free_space_inode(root, trans, path, + BTRFS_FREE_INO_OBJECTID, 0); +} + +int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) +{ + struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; + struct btrfs_path *path; + struct inode *inode; + int ret = 0; + u64 root_gen = btrfs_root_generation(&root->root_item); + + /* + * If we're unmounting then just return, since this does a search on the + * normal root and not the commit root and we could deadlock. + */ + smp_mb(); + if (fs_info->closing) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return 0; + + inode = lookup_free_ino_inode(root, path); + if (IS_ERR(inode)) + goto out; + + if (root_gen != BTRFS_I(inode)->generation) + goto out_put; + + ret = __load_free_space_cache(root, inode, ctl, path, 0); + + if (ret < 0) + printk(KERN_ERR "btrfs: failed to load free ino cache for " + "root %llu\n", root->root_key.objectid); +out_put: + iput(inode); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_write_out_ino_cache(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_path *path) +{ + struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; + struct inode *inode; + int ret; + + inode = lookup_free_ino_inode(root, path); + if (IS_ERR(inode)) + return 0; + + ret = __btrfs_write_out_cache(root, inode, ctl, trans, path, 0); + if (ret < 0) + printk(KERN_ERR "btrfs: failed to write free ino cache " + "for root %llu\n", root->root_key.objectid); + + iput(inode); + return ret; +} diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 282eeda..c805ebc 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -65,6 +65,17 @@ int btrfs_write_out_cache(struct btrfs_root *root, struct btrfs_block_group_cache *block_group, struct btrfs_path *path); +struct inode *lookup_free_ino_inode(struct btrfs_root *root, + struct btrfs_path *path); +int create_free_ino_inode(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_path *path); +int load_free_ino_cache(struct btrfs_fs_info *fs_info, + struct btrfs_root *root); +int btrfs_write_out_ino_cache(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_path *path); + void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group); int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, u64 bytenr, u64 size); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index cfa9f5e..e94e5b4 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -137,6 +137,7 @@ out: static void start_caching(struct btrfs_root *root) { struct task_struct *tsk; + int ret; spin_lock(&root->cache_lock); if (root->cached != BTRFS_CACHE_NO) { @@ -147,6 +148,14 @@ static void start_caching(struct btrfs_root *root) root->cached = BTRFS_CACHE_STARTED; spin_unlock(&root->cache_lock); + ret = load_free_ino_cache(root->fs_info, root); + if (ret == 1) { + spin_lock(&root->cache_lock); + root->cached = BTRFS_CACHE_FINISHED; + spin_unlock(&root->cache_lock); + return; + } + tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", root->root_key.objectid); BUG_ON(IS_ERR(tsk)); @@ -352,6 +361,84 @@ void btrfs_init_free_ino_ctl(struct btrfs_root *root) pinned->op = &pinned_free_ino_op; } +int btrfs_save_ino_cache(struct btrfs_root *root, + struct btrfs_trans_handle *trans) +{ + struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; + struct btrfs_path *path; + struct inode *inode; + u64 alloc_hint = 0; + int ret; + int prealloc; + bool retry = false; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; +again: + inode = lookup_free_ino_inode(root, path); + if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { + ret = PTR_ERR(inode); + goto out; + } + + if (IS_ERR(inode)) { + BUG_ON(retry); + retry = true; + + ret = create_free_ino_inode(root, trans, path); + if (ret) + goto out; + goto again; + } + + BTRFS_I(inode)->generation = 0; + ret = btrfs_update_inode(trans, root, inode); + WARN_ON(ret); + + if (i_size_read(inode) > 0) { + ret = btrfs_truncate_free_space_cache(root, trans, path, inode); + if (ret) + goto out_put; + } + + spin_lock(&root->cache_lock); + if (root->cached != BTRFS_CACHE_FINISHED) { + ret = -1; + spin_unlock(&root->cache_lock); + goto out_put; + } + spin_unlock(&root->cache_lock); + + spin_lock(&ctl->tree_lock); + prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents; + prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE); + prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE; + spin_unlock(&ctl->tree_lock); + + /* Just to make sure we have enough space */ + prealloc += 8 * PAGE_CACHE_SIZE; + + ret = btrfs_check_data_free_space(inode, prealloc); + if (ret) + goto out_put; + + ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, + prealloc, prealloc, &alloc_hint); + if (ret) + goto out_put; + btrfs_free_reserved_data_space(inode, prealloc); + +out_put: + iput(inode); +out: + if (ret == 0) + ret = btrfs_write_out_ino_cache(root, trans, path); + + btrfs_free_path(path); + return ret; +} + static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid) { struct btrfs_path *path; diff --git a/fs/btrfs/inode-map.h b/fs/btrfs/inode-map.h index eb91845..ddb347b 100644 --- a/fs/btrfs/inode-map.h +++ b/fs/btrfs/inode-map.h @@ -5,6 +5,8 @@ void btrfs_init_free_ino_ctl(struct btrfs_root *root); void btrfs_unpin_free_ino(struct btrfs_root *root); void btrfs_return_ino(struct btrfs_root *root, u64 objectid); int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid); +int btrfs_save_ino_cache(struct btrfs_root *root, + struct btrfs_trans_handle *trans); int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d96d858..595646a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -740,6 +740,15 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, return alloc_hint; } +static inline bool is_free_space_inode(struct btrfs_root *root, + struct inode *inode) +{ + if (root == root->fs_info->tree_root || + BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) + return true; + return false; +} + /* * when extent_io.c finds a delayed allocation range in the file, * the call backs end up in this code. The basic idea is to @@ -772,7 +781,7 @@ static noinline int cow_file_range(struct inode *inode, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; - BUG_ON(root == root->fs_info->tree_root); + BUG_ON(is_free_space_inode(root, inode)); trans = btrfs_join_transaction(root, 1); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); @@ -1043,17 +1052,18 @@ static noinline int run_delalloc_nocow(struct inode *inode, int type; int nocow; int check_prev = 1; - bool nolock = false; + bool nolock; u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); BUG_ON(!path); - if (root == root->fs_info->tree_root) { - nolock = true; + + nolock = is_free_space_inode(root, inode); + + if (nolock) trans = btrfs_join_transaction_nolock(root, 1); - } else { + else trans = btrfs_join_transaction(root, 1); - } BUG_ON(IS_ERR(trans)); cow_start = (u64)-1; @@ -1310,8 +1320,7 @@ static int btrfs_set_bit_hook(struct inode *inode, if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - int do_list = (root->root_key.objectid != - BTRFS_ROOT_TREE_OBJECTID); + bool do_list = !is_free_space_inode(root, inode); if (*bits & EXTENT_FIRST_DELALLOC) *bits &= ~EXTENT_FIRST_DELALLOC; @@ -1344,8 +1353,7 @@ static int btrfs_clear_bit_hook(struct inode *inode, if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - int do_list = (root->root_key.objectid != - BTRFS_ROOT_TREE_OBJECTID); + bool do_list = !is_free_space_inode(root, inode); if (*bits & EXTENT_FIRST_DELALLOC) *bits &= ~EXTENT_FIRST_DELALLOC; @@ -1452,7 +1460,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; - if (root == root->fs_info->tree_root) + if (is_free_space_inode(root, inode)) ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); else ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); @@ -1692,7 +1700,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct extent_state *cached_state = NULL; int compress_type = 0; int ret; - bool nolock = false; + bool nolock; ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, end - start + 1); @@ -1700,7 +1708,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) return 0; BUG_ON(!ordered_extent); - nolock = (root == root->fs_info->tree_root); + nolock = is_free_space_inode(root, inode); if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); @@ -3418,7 +3426,9 @@ delete: if (path->slots[0] == 0 || path->slots[0] != pending_del_slot) { - if (root->ref_cows) { + if (root->ref_cows && + BTRFS_I(inode)->location.objectid != + BTRFS_FREE_INO_OBJECTID) { err = -EAGAIN; goto out; } @@ -3740,7 +3750,7 @@ void btrfs_evict_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || - root == root->fs_info->tree_root)) + is_free_space_inode(root, inode))) goto no_delete; if (is_bad_inode(inode)) { @@ -4363,7 +4373,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) return 0; smp_mb(); - nolock = (root->fs_info->closing && root == root->fs_info->tree_root); + if (root->fs_info->closing && is_free_space_inode(root, inode)) + nolock = true; if (wbc->sync_mode == WB_SYNC_ALL) { if (nolock) @@ -6755,7 +6766,7 @@ int btrfs_drop_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; if (btrfs_root_refs(&root->root_item) == 0 && - root != root->fs_info->tree_root) + !is_free_space_inode(root, inode)) return 1; else return generic_drop_inode(inode); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 6f5a704..06825c5 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -752,6 +752,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, btrfs_update_reloc_root(trans, root); btrfs_orphan_commit_root(trans, root); + btrfs_save_ino_cache(root, trans); + if (root->commit_root != root->node) { mutex_lock(&root->fs_commit_mutex); switch_commit_root(root);