Message ID | 9abbfc83c08b2cea215f870f26c553b58fbabeab.1677048584.git.wqu@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v4] btrfs: make dev-replace properly follow its read mode | expand |
Hi Qu, Thank you for the patch! Yet something to improve: [auto build test ERROR on kdave/for-next] [also build test ERROR on linus/master v6.2] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Qu-Wenruo/btrfs-make-dev-replace-properly-follow-its-read-mode/20230222-150629 base: https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-next patch link: https://lore.kernel.org/r/9abbfc83c08b2cea215f870f26c553b58fbabeab.1677048584.git.wqu%40suse.com patch subject: [PATCH v4] btrfs: make dev-replace properly follow its read mode config: riscv-randconfig-r036-20230222 (https://download.01.org/0day-ci/archive/20230222/202302221728.8obJ8jgw-lkp@intel.com/config) compiler: riscv64-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/07c729ddc3a8f3074e5f61c4def8836bdfc37f73 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Qu-Wenruo/btrfs-make-dev-replace-properly-follow-its-read-mode/20230222-150629 git checkout 07c729ddc3a8f3074e5f61c4def8836bdfc37f73 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=riscv olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash fs/btrfs/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> | Link: https://lore.kernel.org/oe-kbuild-all/202302221728.8obJ8jgw-lkp@intel.com/ All errors (new ones prefixed by >>): fs/btrfs/scrub.c: In function 'scrub_find_good_copy': >> fs/btrfs/scrub.c:2762:49: error: 'struct btrfs_io_context' has no member named 'replace_nr_stripes' 2762 | for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { | ^~ fs/btrfs/scrub.c:2773:49: error: 'struct btrfs_io_context' has no member named 'replace_nr_stripes' 2773 | for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { | ^~ vim +2762 fs/btrfs/scrub.c 2704 2705 static bool should_use_device(struct btrfs_fs_info *fs_info, 2706 struct btrfs_device *dev, 2707 bool follow_replace_read_mode) 2708 { 2709 struct btrfs_device *replace_srcdev = fs_info->dev_replace.srcdev; 2710 struct btrfs_device *replace_tgtdev = fs_info->dev_replace.tgtdev; 2711 2712 if (!dev->bdev) 2713 return false; 2714 2715 /* 2716 * We're doing scrub/replace, if it's pure scrub, no tgtdev should be 2717 * here. 2718 * If it's replace, we're going to write data to tgtdev, thus the current 2719 * data of the tgtdev is all garbage, thus we can not use it at all. 2720 */ 2721 if (dev == replace_tgtdev) 2722 return false; 2723 2724 /* No need to follow replace read policy, any existing device is fine. */ 2725 if (!follow_replace_read_mode) 2726 return true; 2727 2728 /* Need to follow the policy. */ 2729 if (fs_info->dev_replace.cont_reading_from_srcdev_mode == 2730 BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID) 2731 return dev != replace_srcdev; 2732 return true; 2733 } 2734 static int scrub_find_good_copy(struct btrfs_fs_info *fs_info, 2735 u64 extent_logical, u32 extent_len, 2736 u64 *extent_physical, 2737 struct btrfs_device **extent_dev, 2738 int *extent_mirror_num) 2739 { 2740 u64 mapped_length; 2741 struct btrfs_io_context *bioc = NULL; 2742 int ret; 2743 int i; 2744 2745 mapped_length = extent_len; 2746 ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, 2747 extent_logical, &mapped_length, &bioc, 0); 2748 if (ret || !bioc || mapped_length < extent_len) { 2749 btrfs_put_bioc(bioc); 2750 btrfs_err_rl(fs_info, "btrfs_map_block() failed for logical %llu: %d", 2751 extent_logical, ret); 2752 return -EIO; 2753 } 2754 2755 /* 2756 * First loop to exclude all missing devices and the source 2757 * device if needed. 2758 * And we don't want to use target device as mirror either, 2759 * as we're doing the replace, the target device range 2760 * contains nothing. 2761 */ > 2762 for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { 2763 struct btrfs_io_stripe *stripe = &bioc->stripes[i]; 2764 2765 if (!should_use_device(fs_info, stripe->dev, true)) 2766 continue; 2767 goto found; 2768 } 2769 /* 2770 * We didn't find any alternative mirrors, we have to break our 2771 * replace read mode, or we can not read at all. 2772 */ 2773 for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { 2774 struct btrfs_io_stripe *stripe = &bioc->stripes[i]; 2775 2776 if (!should_use_device(fs_info, stripe->dev, false)) 2777 continue; 2778 goto found; 2779 } 2780 2781 btrfs_err_rl(fs_info, "failed to find any live mirror for logical %llu", 2782 extent_logical); 2783 return -EIO; 2784 2785 found: 2786 *extent_physical = bioc->stripes[i].physical; 2787 *extent_mirror_num = i + 1; 2788 *extent_dev = bioc->stripes[i].dev; 2789 btrfs_put_bioc(bioc); 2790 return 0; 2791 } 2792
Hi Qu, Thank you for the patch! Yet something to improve: [auto build test ERROR on kdave/for-next] [also build test ERROR on linus/master v6.2] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Qu-Wenruo/btrfs-make-dev-replace-properly-follow-its-read-mode/20230222-150629 base: https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-next patch link: https://lore.kernel.org/r/9abbfc83c08b2cea215f870f26c553b58fbabeab.1677048584.git.wqu%40suse.com patch subject: [PATCH v4] btrfs: make dev-replace properly follow its read mode config: hexagon-randconfig-r045-20230222 (https://download.01.org/0day-ci/archive/20230222/202302221749.qDyhdaZ9-lkp@intel.com/config) compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project db89896bbbd2251fff457699635acbbedeead27f) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/07c729ddc3a8f3074e5f61c4def8836bdfc37f73 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Qu-Wenruo/btrfs-make-dev-replace-properly-follow-its-read-mode/20230222-150629 git checkout 07c729ddc3a8f3074e5f61c4def8836bdfc37f73 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash fs/btrfs/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> | Link: https://lore.kernel.org/oe-kbuild-all/202302221749.qDyhdaZ9-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from fs/btrfs/scrub.c:6: In file included from include/linux/blkdev.h:9: In file included from include/linux/blk_types.h:10: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:12: In file included from include/linux/hardirq.h:11: In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/hexagon/include/asm/io.h:334: include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __raw_readb(PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr)); ~~~~~~~~~~ ^ include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu' #define __le16_to_cpu(x) ((__force __u16)(__le16)(x)) ^ In file included from fs/btrfs/scrub.c:6: In file included from include/linux/blkdev.h:9: In file included from include/linux/blk_types.h:10: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:12: In file included from include/linux/hardirq.h:11: In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/hexagon/include/asm/io.h:334: include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr)); ~~~~~~~~~~ ^ include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu' #define __le32_to_cpu(x) ((__force __u32)(__le32)(x)) ^ In file included from fs/btrfs/scrub.c:6: In file included from include/linux/blkdev.h:9: In file included from include/linux/blk_types.h:10: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:12: In file included from include/linux/hardirq.h:11: In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/hexagon/include/asm/io.h:334: include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writeb(value, PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); ~~~~~~~~~~ ^ >> fs/btrfs/scrub.c:2762:44: error: no member named 'replace_nr_stripes' in 'struct btrfs_io_context' for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { ~~~~ ^ fs/btrfs/scrub.c:2773:44: error: no member named 'replace_nr_stripes' in 'struct btrfs_io_context' for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { ~~~~ ^ 6 warnings and 2 errors generated. vim +2762 fs/btrfs/scrub.c 2704 2705 static bool should_use_device(struct btrfs_fs_info *fs_info, 2706 struct btrfs_device *dev, 2707 bool follow_replace_read_mode) 2708 { 2709 struct btrfs_device *replace_srcdev = fs_info->dev_replace.srcdev; 2710 struct btrfs_device *replace_tgtdev = fs_info->dev_replace.tgtdev; 2711 2712 if (!dev->bdev) 2713 return false; 2714 2715 /* 2716 * We're doing scrub/replace, if it's pure scrub, no tgtdev should be 2717 * here. 2718 * If it's replace, we're going to write data to tgtdev, thus the current 2719 * data of the tgtdev is all garbage, thus we can not use it at all. 2720 */ 2721 if (dev == replace_tgtdev) 2722 return false; 2723 2724 /* No need to follow replace read policy, any existing device is fine. */ 2725 if (!follow_replace_read_mode) 2726 return true; 2727 2728 /* Need to follow the policy. */ 2729 if (fs_info->dev_replace.cont_reading_from_srcdev_mode == 2730 BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID) 2731 return dev != replace_srcdev; 2732 return true; 2733 } 2734 static int scrub_find_good_copy(struct btrfs_fs_info *fs_info, 2735 u64 extent_logical, u32 extent_len, 2736 u64 *extent_physical, 2737 struct btrfs_device **extent_dev, 2738 int *extent_mirror_num) 2739 { 2740 u64 mapped_length; 2741 struct btrfs_io_context *bioc = NULL; 2742 int ret; 2743 int i; 2744 2745 mapped_length = extent_len; 2746 ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, 2747 extent_logical, &mapped_length, &bioc, 0); 2748 if (ret || !bioc || mapped_length < extent_len) { 2749 btrfs_put_bioc(bioc); 2750 btrfs_err_rl(fs_info, "btrfs_map_block() failed for logical %llu: %d", 2751 extent_logical, ret); 2752 return -EIO; 2753 } 2754 2755 /* 2756 * First loop to exclude all missing devices and the source 2757 * device if needed. 2758 * And we don't want to use target device as mirror either, 2759 * as we're doing the replace, the target device range 2760 * contains nothing. 2761 */ > 2762 for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { 2763 struct btrfs_io_stripe *stripe = &bioc->stripes[i]; 2764 2765 if (!should_use_device(fs_info, stripe->dev, true)) 2766 continue; 2767 goto found; 2768 } 2769 /* 2770 * We didn't find any alternative mirrors, we have to break our 2771 * replace read mode, or we can not read at all. 2772 */ 2773 for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { 2774 struct btrfs_io_stripe *stripe = &bioc->stripes[i]; 2775 2776 if (!should_use_device(fs_info, stripe->dev, false)) 2777 continue; 2778 goto found; 2779 } 2780 2781 btrfs_err_rl(fs_info, "failed to find any live mirror for logical %llu", 2782 extent_logical); 2783 return -EIO; 2784 2785 found: 2786 *extent_physical = bioc->stripes[i].physical; 2787 *extent_mirror_num = i + 1; 2788 *extent_dev = bioc->stripes[i].dev; 2789 btrfs_put_bioc(bioc); 2790 return 0; 2791 } 2792
Hi Qu, Thank you for the patch! Yet something to improve: [auto build test ERROR on kdave/for-next] [also build test ERROR on linus/master v6.2 next-20230222] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Qu-Wenruo/btrfs-make-dev-replace-properly-follow-its-read-mode/20230222-150629 base: https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-next patch link: https://lore.kernel.org/r/9abbfc83c08b2cea215f870f26c553b58fbabeab.1677048584.git.wqu%40suse.com patch subject: [PATCH v4] btrfs: make dev-replace properly follow its read mode config: s390-randconfig-r013-20230222 (https://download.01.org/0day-ci/archive/20230222/202302221816.1fqvNytO-lkp@intel.com/config) compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project db89896bbbd2251fff457699635acbbedeead27f) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install s390 cross compiling tool for clang build # apt-get install binutils-s390x-linux-gnu # https://github.com/intel-lab-lkp/linux/commit/07c729ddc3a8f3074e5f61c4def8836bdfc37f73 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Qu-Wenruo/btrfs-make-dev-replace-properly-follow-its-read-mode/20230222-150629 git checkout 07c729ddc3a8f3074e5f61c4def8836bdfc37f73 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=s390 olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> | Link: https://lore.kernel.org/oe-kbuild-all/202302221816.1fqvNytO-lkp@intel.com/ All errors (new ones prefixed by >>): >> fs/btrfs/scrub.c:2762:44: error: no member named 'replace_nr_stripes' in 'struct btrfs_io_context' for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { ~~~~ ^ fs/btrfs/scrub.c:2773:44: error: no member named 'replace_nr_stripes' in 'struct btrfs_io_context' for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { ~~~~ ^ 2 errors generated. vim +2762 fs/btrfs/scrub.c 2704 2705 static bool should_use_device(struct btrfs_fs_info *fs_info, 2706 struct btrfs_device *dev, 2707 bool follow_replace_read_mode) 2708 { 2709 struct btrfs_device *replace_srcdev = fs_info->dev_replace.srcdev; 2710 struct btrfs_device *replace_tgtdev = fs_info->dev_replace.tgtdev; 2711 2712 if (!dev->bdev) 2713 return false; 2714 2715 /* 2716 * We're doing scrub/replace, if it's pure scrub, no tgtdev should be 2717 * here. 2718 * If it's replace, we're going to write data to tgtdev, thus the current 2719 * data of the tgtdev is all garbage, thus we can not use it at all. 2720 */ 2721 if (dev == replace_tgtdev) 2722 return false; 2723 2724 /* No need to follow replace read policy, any existing device is fine. */ 2725 if (!follow_replace_read_mode) 2726 return true; 2727 2728 /* Need to follow the policy. */ 2729 if (fs_info->dev_replace.cont_reading_from_srcdev_mode == 2730 BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID) 2731 return dev != replace_srcdev; 2732 return true; 2733 } 2734 static int scrub_find_good_copy(struct btrfs_fs_info *fs_info, 2735 u64 extent_logical, u32 extent_len, 2736 u64 *extent_physical, 2737 struct btrfs_device **extent_dev, 2738 int *extent_mirror_num) 2739 { 2740 u64 mapped_length; 2741 struct btrfs_io_context *bioc = NULL; 2742 int ret; 2743 int i; 2744 2745 mapped_length = extent_len; 2746 ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, 2747 extent_logical, &mapped_length, &bioc, 0); 2748 if (ret || !bioc || mapped_length < extent_len) { 2749 btrfs_put_bioc(bioc); 2750 btrfs_err_rl(fs_info, "btrfs_map_block() failed for logical %llu: %d", 2751 extent_logical, ret); 2752 return -EIO; 2753 } 2754 2755 /* 2756 * First loop to exclude all missing devices and the source 2757 * device if needed. 2758 * And we don't want to use target device as mirror either, 2759 * as we're doing the replace, the target device range 2760 * contains nothing. 2761 */ > 2762 for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { 2763 struct btrfs_io_stripe *stripe = &bioc->stripes[i]; 2764 2765 if (!should_use_device(fs_info, stripe->dev, true)) 2766 continue; 2767 goto found; 2768 } 2769 /* 2770 * We didn't find any alternative mirrors, we have to break our 2771 * replace read mode, or we can not read at all. 2772 */ 2773 for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { 2774 struct btrfs_io_stripe *stripe = &bioc->stripes[i]; 2775 2776 if (!should_use_device(fs_info, stripe->dev, false)) 2777 continue; 2778 goto found; 2779 } 2780 2781 btrfs_err_rl(fs_info, "failed to find any live mirror for logical %llu", 2782 extent_logical); 2783 return -EIO; 2784 2785 found: 2786 *extent_physical = bioc->stripes[i].physical; 2787 *extent_mirror_num = i + 1; 2788 *extent_dev = bioc->stripes[i].dev; 2789 btrfs_put_bioc(bioc); 2790 return 0; 2791 } 2792
On Wed, Feb 22, 2023 at 03:04:37PM +0800, Qu Wenruo wrote: > [BUG] > Although dev replace ioctl has a way to specify the mode on whether we > should read from the source device, it's not properly followed. > > # mkfs.btrfs -f -d raid1 -m raid1 $dev1 $dev2 > # mount $dev1 $mnt > # xfs_io -f -c "pwrite 0 32M" $mnt/file > # sync > # btrfs replace start -r -f 1 $dev3 $mnt > > And one extra trace is added to scrub_submit(), showing the detail about > the bio: > > btrfs-1115669 [005] ..... 5437.027093: scrub_submit.part.0: devid=1 logical=22036480 phy=22036480 len=16384 > btrfs-1115669 [005] ..... 5437.027372: scrub_submit.part.0: devid=1 logical=30457856 phy=30457856 len=32768 > btrfs-1115669 [005] ..... 5437.027440: scrub_submit.part.0: devid=1 logical=30507008 phy=30507008 len=49152 > btrfs-1115669 [005] ..... 5437.027487: scrub_submit.part.0: devid=1 logical=30605312 phy=30605312 len=32768 > btrfs-1115669 [005] ..... 5437.027556: scrub_submit.part.0: devid=1 logical=30703616 phy=30703616 len=65536 > btrfs-1115669 [005] ..... 5437.028186: scrub_submit.part.0: devid=1 logical=298844160 phy=298844160 len=131072 > ... > btrfs-1115669 [005] ..... 5437.076243: scrub_submit.part.0: devid=1 logical=322961408 phy=322961408 len=131072 > btrfs-1115669 [005] ..... 5437.076248: scrub_submit.part.0: devid=1 logical=323092480 phy=323092480 len=131072 > > One can see that all the read are submitted to devid 1, even we have > specified "-r" option to avoid read from the source device. > > [CAUSE] > The dev-replace read mode is only set but not followed by scrub code > at all. > > In fact, only common read path is properly following the read mode, > but scrub itself has its own read path, thus not following the mode. > > [FIX] > Here we enhance scrub_find_good_copy() to also follow the read mode. > > The idea is pretty simple, in the first loop, we avoid the following > devices: > > - Missing devices > This is the existing condition > > - The source device if the replace wants to avoid it. > > And if above loop found no candidate (e.g. replace a single device), > then we discard the 2nd condition, and try again. > > Since we're here, also enhance the function scrub_find_good_copy() by: > > - Remove the forward declaration > > - Makes it return int > To indicates errors, e.g. no good mirror found. > > - Add extra error messages > > Now with the same trace, "btrfs replace start -r" works as expected: > > btrfs-1121013 [000] ..... 5991.905971: scrub_submit.part.0: devid=2 logical=22036480 phy=1064960 len=16384 > btrfs-1121013 [000] ..... 5991.906276: scrub_submit.part.0: devid=2 logical=30457856 phy=9486336 len=32768 > btrfs-1121013 [000] ..... 5991.906365: scrub_submit.part.0: devid=2 logical=30507008 phy=9535488 len=49152 > btrfs-1121013 [000] ..... 5991.906423: scrub_submit.part.0: devid=2 logical=30605312 phy=9633792 len=32768 > btrfs-1121013 [000] ..... 5991.906504: scrub_submit.part.0: devid=2 logical=30703616 phy=9732096 len=65536 > btrfs-1121013 [000] ..... 5991.907314: scrub_submit.part.0: devid=2 logical=298844160 phy=277872640 len=131072 > btrfs-1121013 [000] ..... 5991.907575: scrub_submit.part.0: devid=2 logical=298975232 phy=278003712 len=131072 > btrfs-1121013 [000] ..... 5991.907822: scrub_submit.part.0: devid=2 logical=299106304 phy=278134784 len=131072 > ... > btrfs-1121013 [000] ..... 5991.947417: scrub_submit.part.0: devid=2 logical=318504960 phy=297533440 len=131072 > btrfs-1121013 [000] ..... 5991.947664: scrub_submit.part.0: devid=2 logical=318636032 phy=297664512 len=131072 > btrfs-1121013 [000] ..... 5991.947920: scrub_submit.part.0: devid=2 logical=318767104 phy=297795584 len=131072 > > Reviewed-by: Anand Jain <anand.jain@oracle.com> > Signed-off-by: Qu Wenruo <wqu@suse.com> > --- > Changelog: > v2: > - Rename "replace read policy" to "replace read mode" in comments > This is avoid the confusion with the existing read policy. > No behavior change. > > v3: > - Avoid using different mirrors if our profile is RAID56 > RAID56 doesn't contain extra copies, they rebuilt data from P/Q. > Thus for RAID56, we can not directly select another stripe and > use it as a read source. > > v4: > - Fix the failure in btrfs/027 > The change in v3 is not enough for RAID56, as for missing data stripe > dev, we still can not use other mirrors. > This fix gives RAID56 higher priority than missing data stripe device. I didn't do an in-depth review so please send v5 if needed, now added to misc-next, thanks.
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ee3fe6c291fe..d7363b0ff327 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -423,11 +423,6 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len, static void scrub_bio_end_io(struct bio *bio); static void scrub_bio_end_io_worker(struct work_struct *work); static void scrub_block_complete(struct scrub_block *sblock); -static void scrub_find_good_copy(struct btrfs_fs_info *fs_info, - u64 extent_logical, u32 extent_len, - u64 *extent_physical, - struct btrfs_device **extent_dev, - int *extent_mirror_num); static int scrub_add_sector_to_wr_bio(struct scrub_ctx *sctx, struct scrub_sector *sector); static void scrub_wr_submit(struct scrub_ctx *sctx); @@ -2709,6 +2704,112 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum) return 1; } +static bool should_use_device(struct btrfs_fs_info *fs_info, + struct btrfs_device *dev, + bool follow_replace_read_mode) +{ + struct btrfs_device *replace_srcdev = fs_info->dev_replace.srcdev; + struct btrfs_device *replace_tgtdev = fs_info->dev_replace.tgtdev; + + if (!dev->bdev) + return false; + + /* + * We're doing scrub/replace, if it's pure scrub, no tgtdev should be + * here. + * If it's replace, we're going to write data to tgtdev, thus the current + * data of the tgtdev is all garbage, thus we can not use it at all. + */ + if (dev == replace_tgtdev) + return false; + + /* No need to follow replace read policy, any existing device is fine. */ + if (!follow_replace_read_mode) + return true; + + /* Need to follow the policy. */ + if (fs_info->dev_replace.cont_reading_from_srcdev_mode == + BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID) + return dev != replace_srcdev; + return true; +} +static int scrub_find_good_copy(struct btrfs_fs_info *fs_info, + u64 extent_logical, u32 extent_len, + u64 *extent_physical, + struct btrfs_device **extent_dev, + int *extent_mirror_num) +{ + u64 mapped_length; + struct btrfs_io_context *bioc = NULL; + int ret; + int i; + + mapped_length = extent_len; + ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, + extent_logical, &mapped_length, &bioc, 0); + if (ret || !bioc || mapped_length < extent_len) { + btrfs_put_bioc(bioc); + btrfs_err_rl(fs_info, "btrfs_map_block() failed for logical %llu: %d", + extent_logical, ret); + return -EIO; + } + + /* + * First loop to exclude all missing devices and the source + * device if needed. + * And we don't want to use target device as mirror either, + * as we're doing the replace, the target device range + * contains nothing. + */ + for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { + struct btrfs_io_stripe *stripe = &bioc->stripes[i]; + + if (!should_use_device(fs_info, stripe->dev, true)) + continue; + goto found; + } + /* + * We didn't find any alternative mirrors, we have to break our + * replace read mode, or we can not read at all. + */ + for (i = 0; i < bioc->num_stripes - bioc->replace_nr_stripes; i++) { + struct btrfs_io_stripe *stripe = &bioc->stripes[i]; + + if (!should_use_device(fs_info, stripe->dev, false)) + continue; + goto found; + } + + btrfs_err_rl(fs_info, "failed to find any live mirror for logical %llu", + extent_logical); + return -EIO; + +found: + *extent_physical = bioc->stripes[i].physical; + *extent_mirror_num = i + 1; + *extent_dev = bioc->stripes[i].dev; + btrfs_put_bioc(bioc); + return 0; +} + +static bool scrub_need_different_mirror(struct scrub_ctx *sctx, + struct map_lookup *map, + struct btrfs_device *dev) +{ + /* + * For RAID56, all the extra mirrors are rebuilt from other P/Q, + * can not utilize other mirrors direct. + */ + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) + return false; + + if (!dev->bdev) + return true; + + return sctx->fs_info->dev_replace.cont_reading_from_srcdev_mode == + BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID; +} + /* scrub extent tries to collect up to 64 kB for each bio */ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map, u64 logical, u32 len, @@ -2746,17 +2847,15 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map, } /* - * For dev-replace case, we can have @dev being a missing device. - * Regular scrub will avoid its execution on missing device at all, - * as that would trigger tons of read error. - * - * Reading from missing device will cause read error counts to - * increase unnecessarily. - * So here we change the read source to a good mirror. + * For dev-replace case, we can have @dev being a missing device, or + * we want to avoid read from the source device if possible. */ - if (sctx->is_dev_replace && !dev->bdev) - scrub_find_good_copy(sctx->fs_info, logical, len, &src_physical, - &src_dev, &src_mirror); + if (sctx->is_dev_replace && scrub_need_different_mirror(sctx, map, dev)) { + ret = scrub_find_good_copy(sctx->fs_info, logical, len, + &src_physical, &src_dev, &src_mirror); + if (ret < 0) + return ret; + } while (len) { u32 l = min(len, blocksize); int have_csum = 0; @@ -4544,28 +4643,3 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV; } - -static void scrub_find_good_copy(struct btrfs_fs_info *fs_info, - u64 extent_logical, u32 extent_len, - u64 *extent_physical, - struct btrfs_device **extent_dev, - int *extent_mirror_num) -{ - u64 mapped_length; - struct btrfs_io_context *bioc = NULL; - int ret; - - mapped_length = extent_len; - ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical, - &mapped_length, &bioc, 0); - if (ret || !bioc || mapped_length < extent_len || - !bioc->stripes[0].dev->bdev) { - btrfs_put_bioc(bioc); - return; - } - - *extent_physical = bioc->stripes[0].physical; - *extent_mirror_num = bioc->mirror_num; - *extent_dev = bioc->stripes[0].dev; - btrfs_put_bioc(bioc); -}