From patchwork Fri Oct 23 08:03:23 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Zhaolei X-Patchwork-Id: 7470621 Return-Path: X-Original-To: patchwork-linux-btrfs@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork2.web.kernel.org (Postfix) with ESMTP id CEEDBBEEA4 for ; Fri, 23 Oct 2015 08:05:44 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id C9C65208BC for ; Fri, 23 Oct 2015 08:05:42 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 8B4BB208B3 for ; Fri, 23 Oct 2015 08:05:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751624AbbJWIFZ (ORCPT ); Fri, 23 Oct 2015 04:05:25 -0400 Received: from cn.fujitsu.com ([59.151.112.132]:4672 "EHLO heian.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-FAIL) by vger.kernel.org with ESMTP id S1751097AbbJWIFN (ORCPT ); Fri, 23 Oct 2015 04:05:13 -0400 X-IronPort-AV: E=Sophos;i="5.15,520,1432569600"; d="scan'208";a="102188799" Received: from unknown (HELO edo.cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 23 Oct 2015 16:07:20 +0800 Received: from G08CNEXCHPEKD01.g08.fujitsu.local (localhost.localdomain [127.0.0.1]) by edo.cn.fujitsu.com (8.14.3/8.13.1) with ESMTP id t9N84Uju010451 for ; Fri, 23 Oct 2015 16:04:30 +0800 Received: from localhost.localdomain (10.167.226.114) by G08CNEXCHPEKD01.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server id 14.3.181.6; Fri, 23 Oct 2015 16:05:01 +0800 From: Zhao Lei To: CC: Zhao Lei Subject: [PATCH] btrfs: Remove code for no-cow in scrub/replace Date: Fri, 23 Oct 2015 16:03:23 +0800 Message-ID: <110ea4fe9c32048517e83616ba63dacefffef54d.1445587196.git.zhaolei@cn.fujitsu.com> X-Mailer: git-send-email 1.8.5.1 MIME-Version: 1.0 Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org X-Spam-Status: No, score=-6.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Since we set source bg to readonly in scrub/replace, we don't need to consider confliction of no-cow write in scrub/replace operaion. This patch removes special code for no-cow mode in scrub/replace, reduced 670 lines. Tested by continuous xfstests in 5 days, include generic and btrfs groups with 10 mount options include nodatacow. Signed-off-by: Zhao Lei --- fs/btrfs/ctree.h | 1 - fs/btrfs/scrub.c | 669 ------------------------------------------------------- 2 files changed, 670 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 938efe3..3387509 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1688,7 +1688,6 @@ struct btrfs_fs_info { int scrub_workers_refcnt; struct btrfs_workqueue *scrub_workers; struct btrfs_workqueue *scrub_wr_completion_workers; - struct btrfs_workqueue *scrub_nocow_workers; struct btrfs_workqueue *scrub_parity_workers; #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index d64f557..6027679 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -205,32 +205,6 @@ struct scrub_ctx { atomic_t refs; }; -struct scrub_fixup_nodatasum { - struct scrub_ctx *sctx; - struct btrfs_device *dev; - u64 logical; - struct btrfs_root *root; - struct btrfs_work work; - int mirror_num; -}; - -struct scrub_nocow_inode { - u64 inum; - u64 offset; - u64 root; - struct list_head list; -}; - -struct scrub_copy_nocow_ctx { - struct scrub_ctx *sctx; - u64 logical; - u64 len; - int mirror_num; - u64 physical_for_dev_replace; - struct list_head inodes; - struct btrfs_work work; -}; - struct scrub_warning { struct btrfs_path *path; u64 extent_item_size; @@ -242,8 +216,6 @@ struct scrub_warning { static void scrub_pending_bio_inc(struct scrub_ctx *sctx); static void scrub_pending_bio_dec(struct scrub_ctx *sctx); -static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); -static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); static int scrub_setup_recheck_block(struct scrub_block *original_sblock, struct scrub_block *sblocks_for_recheck); @@ -298,13 +270,6 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, static void scrub_wr_submit(struct scrub_ctx *sctx); static void scrub_wr_bio_end_io(struct bio *bio, int err); static void scrub_wr_bio_end_io_worker(struct btrfs_work *work); -static int write_page_nocow(struct scrub_ctx *sctx, - u64 physical_for_dev_replace, struct page *page); -static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, - struct scrub_copy_nocow_ctx *ctx); -static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, - int mirror_num, u64 physical_for_dev_replace); -static void copy_nocow_pages_worker(struct btrfs_work *work); static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void scrub_put_ctx(struct scrub_ctx *sctx); @@ -355,60 +320,6 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) scrub_pause_off(fs_info); } -/* - * used for workers that require transaction commits (i.e., for the - * NOCOW case) - */ -static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) -{ - struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; - - atomic_inc(&sctx->refs); - /* - * increment scrubs_running to prevent cancel requests from - * completing as long as a worker is running. we must also - * increment scrubs_paused to prevent deadlocking on pause - * requests used for transactions commits (as the worker uses a - * transaction context). it is safe to regard the worker - * as paused for all matters practical. effectively, we only - * avoid cancellation requests from completing. - */ - mutex_lock(&fs_info->scrub_lock); - atomic_inc(&fs_info->scrubs_running); - atomic_inc(&fs_info->scrubs_paused); - mutex_unlock(&fs_info->scrub_lock); - - /* - * check if @scrubs_running=@scrubs_paused condition - * inside wait_event() is not an atomic operation. - * which means we may inc/dec @scrub_running/paused - * at any time. Let's wake up @scrub_pause_wait as - * much as we can to let commit transaction blocked less. - */ - wake_up(&fs_info->scrub_pause_wait); - - atomic_inc(&sctx->workers_pending); -} - -/* used for workers that require transaction commits */ -static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx) -{ - struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; - - /* - * see scrub_pending_trans_workers_inc() why we're pretending - * to be paused in the scrub counters - */ - mutex_lock(&fs_info->scrub_lock); - atomic_dec(&fs_info->scrubs_running); - atomic_dec(&fs_info->scrubs_paused); - mutex_unlock(&fs_info->scrub_lock); - atomic_dec(&sctx->workers_pending); - wake_up(&fs_info->scrub_pause_wait); - wake_up(&sctx->list_wait); - scrub_put_ctx(sctx); -} - static void scrub_free_csums(struct scrub_ctx *sctx) { while (!list_empty(&sctx->csum_list)) { @@ -686,194 +597,6 @@ out: btrfs_free_path(path); } -static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) -{ - struct page *page = NULL; - unsigned long index; - struct scrub_fixup_nodatasum *fixup = fixup_ctx; - int ret; - int corrected = 0; - struct btrfs_key key; - struct inode *inode = NULL; - struct btrfs_fs_info *fs_info; - u64 end = offset + PAGE_SIZE - 1; - struct btrfs_root *local_root; - int srcu_index; - - key.objectid = root; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - fs_info = fixup->root->fs_info; - srcu_index = srcu_read_lock(&fs_info->subvol_srcu); - - local_root = btrfs_read_fs_root_no_name(fs_info, &key); - if (IS_ERR(local_root)) { - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - return PTR_ERR(local_root); - } - - key.type = BTRFS_INODE_ITEM_KEY; - key.objectid = inum; - key.offset = 0; - inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - index = offset >> PAGE_CACHE_SHIFT; - - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); - if (!page) { - ret = -ENOMEM; - goto out; - } - - if (PageUptodate(page)) { - if (PageDirty(page)) { - /* - * we need to write the data to the defect sector. the - * data that was in that sector is not in memory, - * because the page was modified. we must not write the - * modified page to that sector. - * - * TODO: what could be done here: wait for the delalloc - * runner to write out that page (might involve - * COW) and see whether the sector is still - * referenced afterwards. - * - * For the meantime, we'll treat this error - * incorrectable, although there is a chance that a - * later scrub will find the bad sector again and that - * there's no dirty page in memory, then. - */ - ret = -EIO; - goto out; - } - ret = repair_io_failure(inode, offset, PAGE_SIZE, - fixup->logical, page, - offset - page_offset(page), - fixup->mirror_num); - unlock_page(page); - corrected = !ret; - } else { - /* - * we need to get good data first. the general readpage path - * will call repair_io_failure for us, we just have to make - * sure we read the bad mirror. - */ - ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, - EXTENT_DAMAGED, GFP_NOFS); - if (ret) { - /* set_extent_bits should give proper error */ - WARN_ON(ret > 0); - if (ret > 0) - ret = -EFAULT; - goto out; - } - - ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page, - btrfs_get_extent, - fixup->mirror_num); - wait_on_page_locked(page); - - corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset, - end, EXTENT_DAMAGED, 0, NULL); - if (!corrected) - clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, - EXTENT_DAMAGED, GFP_NOFS); - } - -out: - if (page) - put_page(page); - - iput(inode); - - if (ret < 0) - return ret; - - if (ret == 0 && corrected) { - /* - * we only need to call readpage for one of the inodes belonging - * to this extent. so make iterate_extent_inodes stop - */ - return 1; - } - - return -EIO; -} - -static void scrub_fixup_nodatasum(struct btrfs_work *work) -{ - int ret; - struct scrub_fixup_nodatasum *fixup; - struct scrub_ctx *sctx; - struct btrfs_trans_handle *trans = NULL; - struct btrfs_path *path; - int uncorrectable = 0; - - fixup = container_of(work, struct scrub_fixup_nodatasum, work); - sctx = fixup->sctx; - - path = btrfs_alloc_path(); - if (!path) { - spin_lock(&sctx->stat_lock); - ++sctx->stat.malloc_errors; - spin_unlock(&sctx->stat_lock); - uncorrectable = 1; - goto out; - } - - trans = btrfs_join_transaction(fixup->root); - if (IS_ERR(trans)) { - uncorrectable = 1; - goto out; - } - - /* - * the idea is to trigger a regular read through the standard path. we - * read a page from the (failed) logical address by specifying the - * corresponding copynum of the failed sector. thus, that readpage is - * expected to fail. - * that is the point where on-the-fly error correction will kick in - * (once it's finished) and rewrite the failed sector if a good copy - * can be found. - */ - ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info, - path, scrub_fixup_readpage, - fixup); - if (ret < 0) { - uncorrectable = 1; - goto out; - } - WARN_ON(ret != 1); - - spin_lock(&sctx->stat_lock); - ++sctx->stat.corrected_errors; - spin_unlock(&sctx->stat_lock); - -out: - if (trans && !IS_ERR(trans)) - btrfs_end_transaction(trans, fixup->root); - if (uncorrectable) { - spin_lock(&sctx->stat_lock); - ++sctx->stat.uncorrectable_errors; - spin_unlock(&sctx->stat_lock); - btrfs_dev_replace_stats_inc( - &sctx->dev_root->fs_info->dev_replace. - num_uncorrectable_read_errors); - printk_ratelimited_in_rcu(KERN_ERR "BTRFS: " - "unable to fixup (nodatasum) error at logical %llu on dev %s\n", - fixup->logical, rcu_str_deref(fixup->dev->name)); - } - - btrfs_free_path(path); - kfree(fixup); - - scrub_pending_trans_workers_dec(sctx); -} - static inline void scrub_get_recover(struct scrub_recover *recover) { atomic_inc(&recover->refs); @@ -940,11 +663,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) csum = sblock_to_check->pagev[0]->csum; dev = sblock_to_check->pagev[0]->dev; - if (sctx->is_dev_replace && !is_metadata && !have_csum) { - sblocks_for_recheck = NULL; - goto nodatasum_case; - } - /* * read all mirrors one after the other. This includes to * re-read the extent or metadata block that failed (that was @@ -1058,36 +776,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) goto out; } - if (!is_metadata && !have_csum) { - struct scrub_fixup_nodatasum *fixup_nodatasum; - - WARN_ON(sctx->is_dev_replace); - -nodatasum_case: - - /* - * !is_metadata and !have_csum, this means that the data - * might not be COW'ed, that it might be modified - * concurrently. The general strategy to work on the - * commit root does not help in the case when COW is not - * used. - */ - fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS); - if (!fixup_nodatasum) - goto did_not_correct_error; - fixup_nodatasum->sctx = sctx; - fixup_nodatasum->dev = dev; - fixup_nodatasum->logical = logical; - fixup_nodatasum->root = fs_info->extent_root; - fixup_nodatasum->mirror_num = failed_mirror_index + 1; - scrub_pending_trans_workers_inc(sctx); - btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper, - scrub_fixup_nodatasum, NULL, NULL); - btrfs_queue_work(fs_info->scrub_workers, - &fixup_nodatasum->work); - goto out; - } - /* * now build and submit the bios for the other mirrors, check * checksums. @@ -2575,23 +2263,9 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, while (len) { u64 l = min_t(u64, len, blocksize); int have_csum = 0; - - if (flags & BTRFS_EXTENT_FLAG_DATA) { - /* push csums to sbio */ - have_csum = scrub_find_csum(sctx, logical, l, csum); - if (have_csum == 0) - ++sctx->stat.no_csum; - if (sctx->is_dev_replace && !have_csum) { - ret = copy_nocow_pages(sctx, logical, l, - mirror_num, - physical_for_dev_replace); - goto behind_scrub_pages; - } - } ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen, mirror_num, have_csum ? csum : NULL, 0, physical_for_dev_replace); -behind_scrub_pages: if (ret) return ret; len -= l; @@ -3762,10 +3436,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, if (!fs_info->scrub_wr_completion_workers) goto fail_scrub_wr_completion_workers; - fs_info->scrub_nocow_workers = - btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); - if (!fs_info->scrub_nocow_workers) - goto fail_scrub_nocow_workers; fs_info->scrub_parity_workers = btrfs_alloc_workqueue("btrfs-scrubparity", flags, max_active, 2); @@ -3776,8 +3446,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, return 0; fail_scrub_parity_workers: - btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); -fail_scrub_nocow_workers: btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); fail_scrub_wr_completion_workers: btrfs_destroy_workqueue(fs_info->scrub_workers); @@ -3790,7 +3458,6 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) if (--fs_info->scrub_workers_refcnt == 0) { btrfs_destroy_workqueue(fs_info->scrub_workers); btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); - btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); btrfs_destroy_workqueue(fs_info->scrub_parity_workers); } WARN_ON(fs_info->scrub_workers_refcnt < 0); @@ -4081,339 +3748,3 @@ static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx) mutex_unlock(&wr_ctx->wr_lock); } -static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, - int mirror_num, u64 physical_for_dev_replace) -{ - struct scrub_copy_nocow_ctx *nocow_ctx; - struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; - - nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS); - if (!nocow_ctx) { - spin_lock(&sctx->stat_lock); - sctx->stat.malloc_errors++; - spin_unlock(&sctx->stat_lock); - return -ENOMEM; - } - - scrub_pending_trans_workers_inc(sctx); - - nocow_ctx->sctx = sctx; - nocow_ctx->logical = logical; - nocow_ctx->len = len; - nocow_ctx->mirror_num = mirror_num; - nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; - btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper, - copy_nocow_pages_worker, NULL, NULL); - INIT_LIST_HEAD(&nocow_ctx->inodes); - btrfs_queue_work(fs_info->scrub_nocow_workers, - &nocow_ctx->work); - - return 0; -} - -static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx) -{ - struct scrub_copy_nocow_ctx *nocow_ctx = ctx; - struct scrub_nocow_inode *nocow_inode; - - nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS); - if (!nocow_inode) - return -ENOMEM; - nocow_inode->inum = inum; - nocow_inode->offset = offset; - nocow_inode->root = root; - list_add_tail(&nocow_inode->list, &nocow_ctx->inodes); - return 0; -} - -#define COPY_COMPLETE 1 - -static void copy_nocow_pages_worker(struct btrfs_work *work) -{ - struct scrub_copy_nocow_ctx *nocow_ctx = - container_of(work, struct scrub_copy_nocow_ctx, work); - struct scrub_ctx *sctx = nocow_ctx->sctx; - u64 logical = nocow_ctx->logical; - u64 len = nocow_ctx->len; - int mirror_num = nocow_ctx->mirror_num; - u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; - int ret; - struct btrfs_trans_handle *trans = NULL; - struct btrfs_fs_info *fs_info; - struct btrfs_path *path; - struct btrfs_root *root; - int not_written = 0; - - fs_info = sctx->dev_root->fs_info; - root = fs_info->extent_root; - - path = btrfs_alloc_path(); - if (!path) { - spin_lock(&sctx->stat_lock); - sctx->stat.malloc_errors++; - spin_unlock(&sctx->stat_lock); - not_written = 1; - goto out; - } - - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - not_written = 1; - goto out; - } - - ret = iterate_inodes_from_logical(logical, fs_info, path, - record_inode_for_nocow, nocow_ctx); - if (ret != 0 && ret != -ENOENT) { - btrfs_warn(fs_info, "iterate_inodes_from_logical() failed: log %llu, " - "phys %llu, len %llu, mir %u, ret %d", - logical, physical_for_dev_replace, len, mirror_num, - ret); - not_written = 1; - goto out; - } - - btrfs_end_transaction(trans, root); - trans = NULL; - while (!list_empty(&nocow_ctx->inodes)) { - struct scrub_nocow_inode *entry; - entry = list_first_entry(&nocow_ctx->inodes, - struct scrub_nocow_inode, - list); - list_del_init(&entry->list); - ret = copy_nocow_pages_for_inode(entry->inum, entry->offset, - entry->root, nocow_ctx); - kfree(entry); - if (ret == COPY_COMPLETE) { - ret = 0; - break; - } else if (ret) { - break; - } - } -out: - while (!list_empty(&nocow_ctx->inodes)) { - struct scrub_nocow_inode *entry; - entry = list_first_entry(&nocow_ctx->inodes, - struct scrub_nocow_inode, - list); - list_del_init(&entry->list); - kfree(entry); - } - if (trans && !IS_ERR(trans)) - btrfs_end_transaction(trans, root); - if (not_written) - btrfs_dev_replace_stats_inc(&fs_info->dev_replace. - num_uncorrectable_read_errors); - - btrfs_free_path(path); - kfree(nocow_ctx); - - scrub_pending_trans_workers_dec(sctx); -} - -static int check_extent_to_block(struct inode *inode, u64 start, u64 len, - u64 logical) -{ - struct extent_state *cached_state = NULL; - struct btrfs_ordered_extent *ordered; - struct extent_io_tree *io_tree; - struct extent_map *em; - u64 lockstart = start, lockend = start + len - 1; - int ret = 0; - - io_tree = &BTRFS_I(inode)->io_tree; - - lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state); - ordered = btrfs_lookup_ordered_range(inode, lockstart, len); - if (ordered) { - btrfs_put_ordered_extent(ordered); - ret = 1; - goto out_unlock; - } - - em = btrfs_get_extent(inode, NULL, 0, start, len, 0); - if (IS_ERR(em)) { - ret = PTR_ERR(em); - goto out_unlock; - } - - /* - * This extent does not actually cover the logical extent anymore, - * move on to the next inode. - */ - if (em->block_start > logical || - em->block_start + em->block_len < logical + len) { - free_extent_map(em); - ret = 1; - goto out_unlock; - } - free_extent_map(em); - -out_unlock: - unlock_extent_cached(io_tree, lockstart, lockend, &cached_state, - GFP_NOFS); - return ret; -} - -static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, - struct scrub_copy_nocow_ctx *nocow_ctx) -{ - struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; - struct btrfs_key key; - struct inode *inode; - struct page *page; - struct btrfs_root *local_root; - struct extent_io_tree *io_tree; - u64 physical_for_dev_replace; - u64 nocow_ctx_logical; - u64 len = nocow_ctx->len; - unsigned long index; - int srcu_index; - int ret = 0; - int err = 0; - - key.objectid = root; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - srcu_index = srcu_read_lock(&fs_info->subvol_srcu); - - local_root = btrfs_read_fs_root_no_name(fs_info, &key); - if (IS_ERR(local_root)) { - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - return PTR_ERR(local_root); - } - - key.type = BTRFS_INODE_ITEM_KEY; - key.objectid = inum; - key.offset = 0; - inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - /* Avoid truncate/dio/punch hole.. */ - mutex_lock(&inode->i_mutex); - inode_dio_wait(inode); - - physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; - io_tree = &BTRFS_I(inode)->io_tree; - nocow_ctx_logical = nocow_ctx->logical; - - ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical); - if (ret) { - ret = ret > 0 ? 0 : ret; - goto out; - } - - while (len >= PAGE_CACHE_SIZE) { - index = offset >> PAGE_CACHE_SHIFT; -again: - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); - if (!page) { - btrfs_err(fs_info, "find_or_create_page() failed"); - ret = -ENOMEM; - goto out; - } - - if (PageUptodate(page)) { - if (PageDirty(page)) - goto next_page; - } else { - ClearPageError(page); - err = extent_read_full_page(io_tree, page, - btrfs_get_extent, - nocow_ctx->mirror_num); - if (err) { - ret = err; - goto next_page; - } - - lock_page(page); - /* - * If the page has been remove from the page cache, - * the data on it is meaningless, because it may be - * old one, the new data may be written into the new - * page in the page cache. - */ - if (page->mapping != inode->i_mapping) { - unlock_page(page); - page_cache_release(page); - goto again; - } - if (!PageUptodate(page)) { - ret = -EIO; - goto next_page; - } - } - - ret = check_extent_to_block(inode, offset, len, - nocow_ctx_logical); - if (ret) { - ret = ret > 0 ? 0 : ret; - goto next_page; - } - - err = write_page_nocow(nocow_ctx->sctx, - physical_for_dev_replace, page); - if (err) - ret = err; -next_page: - unlock_page(page); - page_cache_release(page); - - if (ret) - break; - - offset += PAGE_CACHE_SIZE; - physical_for_dev_replace += PAGE_CACHE_SIZE; - nocow_ctx_logical += PAGE_CACHE_SIZE; - len -= PAGE_CACHE_SIZE; - } - ret = COPY_COMPLETE; -out: - mutex_unlock(&inode->i_mutex); - iput(inode); - return ret; -} - -static int write_page_nocow(struct scrub_ctx *sctx, - u64 physical_for_dev_replace, struct page *page) -{ - struct bio *bio; - struct btrfs_device *dev; - int ret; - - dev = sctx->wr_ctx.tgtdev; - if (!dev) - return -EIO; - if (!dev->bdev) { - printk_ratelimited(KERN_WARNING - "BTRFS: scrub write_page_nocow(bdev == NULL) is unexpected!\n"); - return -EIO; - } - bio = btrfs_io_bio_alloc(GFP_NOFS, 1); - if (!bio) { - spin_lock(&sctx->stat_lock); - sctx->stat.malloc_errors++; - spin_unlock(&sctx->stat_lock); - return -ENOMEM; - } - bio->bi_iter.bi_size = 0; - bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; - bio->bi_bdev = dev->bdev; - ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); - if (ret != PAGE_CACHE_SIZE) { -leave_with_eio: - bio_put(bio); - btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); - return -EIO; - } - - if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) - goto leave_with_eio; - - bio_put(bio); - return 0; -}