diff mbox

Btrfs-progs: fsck: disable --init-extent-tree option when using snapshots

Message ID

1394447961-27981-1-git-send-email-wangsl.fnst@cn.fujitsu.com (mailing list archive)

State

Under Review, archived

Headers

show

From: Wang Shilong <wangsl.fnst@cn.fujitsu.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH] Btrfs-progs: fsck: disable --init-extent-tree option when
	using snapshots
Date: Mon, 10 Mar 2014 18:39:21 +0800
Message-Id: <1394447961-27981-1-git-send-email-wangsl.fnst@cn.fujitsu.com>
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk

Commit Message

Wang Shilong March 10, 2014, 10:39 a.m. UTC

We haven't supported to rebuild extent tree if there are any *FULL BACKREF*
with broken filesystem, disable this option when detecting snapshots.

Signed-off-by: Wang Shilong <wangsl.fnst@cn.fujitsu.com>
---
 cmds-check.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

Comments

Wang Shilong March 10, 2014, 12:12 p.m. UTC | #1

Hi Josef,

As i haven't thought any better ideas to rebuild extent tree which contains
extent that owns 'FULL BACKREF' flag.

Considering an extent's refs can be equal or more than 1 if this extent has
*FULL BACKREF* flag, so we could not make sure an extent's flag by only
searching fs/file tree any more.

So until now, i just disable this option if snapshots exists, please correct me
if i miss something here. Or you have any better ideas to solve this problem.~_~


Thanks,
Wang
2014-03-10 18:39 GMT+08:00 Wang Shilong <wangsl.fnst@cn.fujitsu.com>:
> We haven't supported to rebuild extent tree if there are any *FULL BACKREF*
> with broken filesystem, disable this option when detecting snapshots.
>
> Signed-off-by: Wang Shilong <wangsl.fnst@cn.fujitsu.com>
> ---
>  cmds-check.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
>
> diff --git a/cmds-check.c b/cmds-check.c
> index d1cafe1..ddee897 100644
> --- a/cmds-check.c
> +++ b/cmds-check.c
> @@ -6143,6 +6143,56 @@ static int reset_block_groups(struct btrfs_fs_info *fs_info)
>         return 0;
>  }
>
> +static int is_snapshot_exist(struct btrfs_fs_info *fs_info)
> +{
> +       struct btrfs_root *root = fs_info->tree_root;
> +       struct btrfs_path *path;
> +       struct extent_buffer *leaf;
> +       struct btrfs_key key;
> +       int ret;
> +       int found = 0;
> +
> +       path = btrfs_alloc_path();
> +       if (!path)
> +               return -ENOMEM;
> +
> +       key.objectid = BTRFS_FIRST_FREE_OBJECTID;
> +       key.type = BTRFS_ROOT_ITEM_KEY;
> +       key.offset = 0;
> +
> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +       if (ret < 0)
> +               goto out;
> +
> +       while (1) {
> +               if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
> +                       ret = btrfs_next_leaf(root, path);
> +                       if (ret)
> +                               goto out;
> +               }
> +               leaf = path->nodes[0];
> +               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
> +
> +               if (key.type != BTRFS_ROOT_ITEM_KEY ||
> +                   key.objectid < BTRFS_FIRST_FREE_OBJECTID) {
> +                       path->slots[0]++;
> +                       continue;
> +               }
> +               if (key.offset > 0) {
> +                       found = 1;
> +                       break;
> +               }
> +               path->slots[0]++;
> +       }
> +out:
> +       btrfs_free_path(path);
> +       if (found)
> +               return 1;
> +       else if (ret >= 0)
> +               return 0;
> +       return ret;
> +}
> +
>  static int reset_balance(struct btrfs_trans_handle *trans,
>                          struct btrfs_fs_info *fs_info)
>  {
> @@ -6537,6 +6587,18 @@ int cmd_check(int argc, char **argv)
>                 ret = -EIO;
>                 goto close_out;
>         }
> +       if (init_extent_tree) {
> +               ret = is_snapshot_exist(info);
> +               if (ret < 0) {
> +                       fprintf(stderr, "ERROR: fail to check if there are snapshots in btrfs filesystem\n");
> +                       ret = 1;
> +                       goto close_out;
> +               } else if (ret) {
> +                       fprintf(stderr, "Snapshots detected, unable to rebuild extent tree for such case.\n");
> +                       ret = 1;
> +                       goto close_out;
> +               }
> +       }
>
>         if (init_extent_tree || init_csum_tree) {
>                 struct btrfs_trans_handle *trans;
> --
> 1.9.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Josef Bacik March 10, 2014, 3:50 p.m. UTC | #2

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 03/10/2014 08:12 AM, Shilong Wang wrote:
> Hi Josef,
> 
> As i haven't thought any better ideas to rebuild extent tree which
> contains extent that owns 'FULL BACKREF' flag.
> 
> Considering an extent's refs can be equal or more than 1 if this
> extent has *FULL BACKREF* flag, so we could not make sure an
> extent's flag by only searching fs/file tree any more.
> 
> So until now, i just disable this option if snapshots exists,
> please correct me if i miss something here. Or you have any better
> ideas to solve this problem.~_~
> 
> 

I thought the fsck stuff rebuilds full backref refs properly, does it
not?  If it doesn't we need to fix that, however I'm fine with
disabling the option if snapshots exist for the time being.  Thanks,

Josef
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQIcBAEBAgAGBQJTHd9NAAoJEANb+wAKly3BYCYP/0iTaaa7w0SnfXtgjoVyX+nT
+e0Pa46zeKzpTujotCDb9E/2PBesCAvA4Psog3rkfsqJ2nXN9cERN4E6/JG4nAHh
Hv4KPo+w+tCkC4U2wSoDivYrVk9G5SH25ewkgW6iheSYNIlm+PLbOQz9DzGjCFDp
51J9tG5E010siOyhlLCyGj8ZTj+gXuoQVWKCS8dOpCLMrbYYjMDXa562hqWaLoS/
t3eSfP7Tnnpl43NiMZI4fWrzmlFa5lba5iJmG59FeyiseRH4Zrhee4St1L1xDL5A
/6f3tJJT7DJjRRJFv0nJAOvOPyFaK8bMaYmOQJg6VrhcyPKM3BxBVEab3HrmQ7jt
LCMWobpIcM7e5BugmbTGGsFymhv05SQgvYGzpzRVXdsSzqubuqTcXwloNU5RyyFF
sXT9IiW9wAibHe7mDN7V6nfo1bVfHsjvSVi1rqz4/zFOWyh8oqxfEhxUJIWhfFsn
j0WJevvqKnjBJujyyuQpL13tzh69qei0AHOEme3R46BSRMnyuacy/WOeyo4VXPcn
0GIeWbngAIWF/quhoQGkvofRMlPgftiDge8uz9pbm3IEKeiP9dQ/HvKsIHMKjnKW
3dEBvMV/CSUQNek4VjO1ALefTRZQvJVL8Wxdij4W+djJw/uVX7fOhuqdkqyfM3FY
CKSB3HUSUtDCammsvgQA
=OT98
-----END PGP SIGNATURE-----
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Wang Shilong March 11, 2014, 1:23 a.m. UTC | #3

On 03/10/2014 11:50 PM, Josef Bacik wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
>
> On 03/10/2014 08:12 AM, Shilong Wang wrote:
>> Hi Josef,
>>
>> As i haven't thought any better ideas to rebuild extent tree which
>> contains extent that owns 'FULL BACKREF' flag.
>>
>> Considering an extent's refs can be equal or more than 1 if this
>> extent has *FULL BACKREF* flag, so we could not make sure an
>> extent's flag by only searching fs/file tree any more.
>>
>> So until now, i just disable this option if snapshots exists,
>> please correct me if i miss something here. Or you have any better
>> ideas to solve this problem.~_~
>>
>>
> I thought the fsck stuff rebuilds full backref refs properly, does it
> not?  If it doesn't we need to fix that, however I'm fine with
> disabling the option if snapshots exist for the time being.  Thanks,
If there are no snapshots, --init-extent-tree can works as expected.
I just have not thought a better idea to rebuild extent tree if we do have
snapshots which means we may have an extent with *FULL BACKREF*
flag.

Thanks,
Wang
>
> Josef
> -----BEGIN PGP SIGNATURE-----
> Version: GnuPG v1
> Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/
>
> iQIcBAEBAgAGBQJTHd9NAAoJEANb+wAKly3BYCYP/0iTaaa7w0SnfXtgjoVyX+nT
> +e0Pa46zeKzpTujotCDb9E/2PBesCAvA4Psog3rkfsqJ2nXN9cERN4E6/JG4nAHh
> Hv4KPo+w+tCkC4U2wSoDivYrVk9G5SH25ewkgW6iheSYNIlm+PLbOQz9DzGjCFDp
> 51J9tG5E010siOyhlLCyGj8ZTj+gXuoQVWKCS8dOpCLMrbYYjMDXa562hqWaLoS/
> t3eSfP7Tnnpl43NiMZI4fWrzmlFa5lba5iJmG59FeyiseRH4Zrhee4St1L1xDL5A
> /6f3tJJT7DJjRRJFv0nJAOvOPyFaK8bMaYmOQJg6VrhcyPKM3BxBVEab3HrmQ7jt
> LCMWobpIcM7e5BugmbTGGsFymhv05SQgvYGzpzRVXdsSzqubuqTcXwloNU5RyyFF
> sXT9IiW9wAibHe7mDN7V6nfo1bVfHsjvSVi1rqz4/zFOWyh8oqxfEhxUJIWhfFsn
> j0WJevvqKnjBJujyyuQpL13tzh69qei0AHOEme3R46BSRMnyuacy/WOeyo4VXPcn
> 0GIeWbngAIWF/quhoQGkvofRMlPgftiDge8uz9pbm3IEKeiP9dQ/HvKsIHMKjnKW
> 3dEBvMV/CSUQNek4VjO1ALefTRZQvJVL8Wxdij4W+djJw/uVX7fOhuqdkqyfM3FY
> CKSB3HUSUtDCammsvgQA
> =OT98
> -----END PGP SIGNATURE-----
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Marc MERLIN March 14, 2014, 2:19 a.m. UTC | #4

On Sat, Mar 08, 2014 at 09:53:50PM +0000, Hugo Mills wrote:
>    Is there anything that can be done about the issues of btrfs send
> blocking? I've been writing a backup script (slowly), and several
> times I've managed to hit a situation where large chunks of the
> machine grind to a complete halt in D state because the backup script
> has jammed up.
 
Ah, we're doing the exact same thing then :)

>    Now, I'm aware that you can't send and receive to the same
> filesystem at the same time, and that's a restriction I can live with.
> However, having things that aren't related to the backup process
> suddenly stop working because the backup script is trying to log its
> progress to the same FS it's backing up is... umm... somewhat vexing,
> to say the least.

Mmmh, my backup doesn't log to disk, just in a screen bufffer, but I've seen
extensive hangs too, and my 6TB send/receive priming has been taking 6 days
on local disks. I think it stops all the time due to locks.
But as per my other message below, it's very bad when it deadlocks other
filesystems not involved in the backup, like my root filesystem.

See the other thread I appended to before seeing your message:
Subject: Re: 3.14.0-rc3: btrfs send/receive blocks btrfs IO on other devices (near deadlocks)                                

attached below.
I'll be happy to try new stuff, but I want that 6 day running send/receive
to finish first. It took so long that I don't want to do it again :)

Marc

On Thu, Mar 13, 2014 at 06:48:13PM -0700, Marc MERLIN wrote:
> Can anyone comment on this.
> 
> Are others seeing some btrfs operations on filesystem/diskA hang/deadlock
> other btrfs operations on filesystem/diskB ?
> 
> I just spent time fixing near data corruption in one of my systems due to
> a 7h delay between when the timestamp was written and the actual data was
> written, and traced it down to a btrfs hang that should never have happened
> on that filesystem.
> 
> Surely, it's not a single queue for all filesystem and devices, right?
> 
> If not, does anyone know what bugs I've been hitting then?
> 
> Is the full report below I spent quite a while getting together for you :)
> useful in any way to see where the hangs are?
> 
> To be honest, I'm looking at moving some important filesystems back to ext4
> because I can't afford such long hangs on my root filesystem when I have a
> media device that is doing heavy btrfs IO or a send/receive.
> 
> Mmmh, is it maybe just btrfs send/receive that is taking a btrfs-wide lock?
> Or btrfs scrub maybe?
> 
> Thanks,
> Marc
> 
> On Wed, Mar 12, 2014 at 08:18:08AM -0700, Marc MERLIN wrote:
> > I have a file server with 4 cpu cores and 5 btrfs devices:
> > Label: btrfs_boot  uuid: e4c1daa8-9c39-4a59-b0a9-86297d397f3b
> >         Total devices 1 FS bytes used 48.92GiB
> >         devid    1 size 79.93GiB used 73.04GiB path /dev/mapper/cryptroot
> > 
> > Label: varlocalspace  uuid: 9f46dbe2-1344-44c3-b0fb-af2888c34f18
> >         Total devices 1 FS bytes used 1.10TiB
> >         devid    1 size 1.63TiB used 1.50TiB path /dev/mapper/cryptraid0
> > 
> > Label: btrfs_pool1  uuid: 6358304a-2234-4243-b02d-4944c9af47d7
> >         Total devices 1 FS bytes used 7.16TiB
> >         devid    1 size 14.55TiB used 7.50TiB path /dev/mapper/dshelf1
> > 
> > Label: btrfs_pool2  uuid: cb9df6d3-a528-4afc-9a45-4fed5ec358d6
> >         Total devices 1 FS bytes used 3.34TiB
> >         devid    1 size 7.28TiB used 3.42TiB path /dev/mapper/dshelf2
> > 
> > Label: bigbackup  uuid: 024ba4d0-dacb-438d-9f1b-eeb34083fe49
> >         Total devices 5 FS bytes used 6.02TiB
> >         devid    1 size 1.82TiB used 1.43TiB path /dev/dm-9
> >         devid    2 size 1.82TiB used 1.43TiB path /dev/dm-6
> >         devid    3 size 1.82TiB used 1.43TiB path /dev/dm-5
> >         devid    4 size 1.82TiB used 1.43TiB path /dev/dm-7
> >         devid    5 size 1.82TiB used 1.43TiB path /dev/dm-8
> > 
> > 
> > I have a very long running btrfs send/receive from btrfs_pool1 to bigbackup
> > (long running meaning that it's been slowly copying over 5 days)
> > 
> > The problem is that this is blocking IO to btrfs_pool2 which is using
> > totally different drives.
> > By blocking IO I mean that IO to pool2 kind of works sometimes, and
> > hangs for very long times at other times.
> > 
> > It looks as if one rsync to btrfs_pool2 or one piece of IO hangs on a shared lock
> > and once that happens, all IO to btrfs_pool2 stops for a long time.
> > It does recover eventually without reboot, but the wait times are ridiculous (it 
> > could be 1H or more).
> > 
> > As I write this, I have a killall -9 rsync that waited for over 10mn before
> > these processes would finally die:
> > 23555       07:36 wait_current_trans.isra.15     rsync -av -SH --delete (...)
> > 23556       07:36 exit                           [rsync] <defunct>
> > 25387  2-04:41:22 wait_current_trans.isra.15     rsync --password-file  (...)
> > 27481       31:26 wait_current_trans.isra.15     rsync --password-file  (...)
> > 29268    04:41:34 wait_current_trans.isra.15     rsync --password-file  (...)
> > 29343    04:41:31 exit                           [rsync] <defunct>
> > 29492    04:41:27 wait_current_trans.isra.15     rsync --password-file  (...)
> > 
> > 14559    07:14:49 wait_current_trans.isra.15     cp -i -al current 20140312-feisty
> > 
> > This is all stuck in btrfs kernel code.
> > If someeone wants sysrq-w, there it is.
> > http://marc.merlins.org/tmp/btrfs_full.txt
> > 
> > A quick summary:
> > SysRq : Show Blocked State
> >   task                        PC stack   pid father
> > btrfs-cleaner   D ffff8802126b0840     0  3332      2 0x00000000
> >  ffff8800c5dc9d00 0000000000000046 ffff8800c5dc9fd8 ffff8800c69f6310
> >  00000000000141c0 ffff8800c69f6310 ffff88017574c170 ffff880211e671e8
> >  0000000000000000 ffff880211e67000 ffff8801e5936e20 ffff8800c5dc9d10
> > Call Trace:
> >  [<ffffffff8160b0d9>] schedule+0x73/0x75
> >  [<ffffffff8122a3c7>] wait_current_trans.isra.15+0x98/0xf4
> >  [<ffffffff81085062>] ? finish_wait+0x65/0x65
> >  [<ffffffff8122b86c>] start_transaction+0x48e/0x4f2
> >  [<ffffffff8122bc4f>] ? __btrfs_end_transaction+0x2a1/0x2c6
> >  [<ffffffff8122b8eb>] btrfs_start_transaction+0x1b/0x1d
> >  [<ffffffff8121c5cd>] btrfs_drop_snapshot+0x443/0x610
> >  [<ffffffff8160d7b3>] ? _raw_spin_unlock+0x17/0x2a
> >  [<ffffffff81074efb>] ? finish_task_switch+0x51/0xdb
> >  [<ffffffff8160afbf>] ? __schedule+0x537/0x5de
> >  [<ffffffff8122c08d>] btrfs_clean_one_deleted_snapshot+0x103/0x10f
> >  [<ffffffff81224859>] cleaner_kthread+0x103/0x136
> >  [<ffffffff81224756>] ? btrfs_alloc_root+0x26/0x26
> >  [<ffffffff8106bc1b>] kthread+0xae/0xb6
> >  [<ffffffff8106bb6d>] ? __kthread_parkme+0x61/0x61
> >  [<ffffffff816141bc>] ret_from_fork+0x7c/0xb0
> >  [<ffffffff8106bb6d>] ? __kthread_parkme+0x61/0x61
> > btrfs-transacti D ffff88021387eb00     0  3333      2 0x00000000
> >  ffff8800c5dcb890 0000000000000046 ffff8800c5dcbfd8 ffff88021387e5d0
> >  00000000000141c0 ffff88021387e5d0 ffff88021f2141c0 ffff88021387e5d0
> >  ffff8800c5dcb930 ffffffff810fe574 0000000000000002 ffff8800c5dcb8a0
> > Call Trace:
> >  [<ffffffff810fe574>] ? wait_on_page_read+0x3c/0x3c
> >  [<ffffffff8160b0d9>] schedule+0x73/0x75
> >  [<ffffffff8160b27e>] io_schedule+0x60/0x7a
> >  [<ffffffff810fe582>] sleep_on_page+0xe/0x12
> >  [<ffffffff8160b510>] __wait_on_bit+0x48/0x7a
> >  [<ffffffff810fe522>] wait_on_page_bit+0x7a/0x7c
> >  [<ffffffff81085096>] ? autoremove_wake_function+0x34/0x34
> >  [<ffffffff81245c70>] read_extent_buffer_pages+0x1bf/0x204
> >  [<ffffffff81223710>] ? free_root_pointers+0x5b/0x5b
> >  [<ffffffff81224412>] btree_read_extent_buffer_pages.constprop.45+0x66/0x100
> >  [<ffffffff81225367>] read_tree_block+0x2f/0x47
> >  [<ffffffff8120e4b6>] read_block_for_search.isra.26+0x24a/0x287
> >  [<ffffffff8120fcf7>] btrfs_search_slot+0x4f4/0x6bb
> >  [<ffffffff81214c3d>] lookup_inline_extent_backref+0xda/0x3fb
> >  [<ffffffff812167e1>] __btrfs_free_extent+0xf4/0x712
> >  [<ffffffff8121ba57>] __btrfs_run_delayed_refs+0x939/0xbdf
> >  [<ffffffff8121d896>] btrfs_run_delayed_refs+0x81/0x18f
> >  [<ffffffff8122af3e>] btrfs_commit_transaction+0x3a9/0x849
> >  [<ffffffff81085062>] ? finish_wait+0x65/0x65
> >  [<ffffffff81227598>] transaction_kthread+0xf8/0x1ab
> >  [<ffffffff812274a0>] ? btrfs_cleanup_transaction+0x43f/0x43f
> >  [<ffffffff8106bc1b>] kthread+0xae/0xb6
> >  [<ffffffff8106bb6d>] ? __kthread_parkme+0x61/0x61
> >  [<ffffffff816141bc>] ret_from_fork+0x7c/0xb0
> >  [<ffffffff8106bb6d>] ? __kthread_parkme+0x61/0x61
> > 
> > 
> > Worse, taking that dump gave me:
> > gargamel:/etc/udev/rules.d# echo w > /proc/sysrq-trigger 
> > Message from syslogd@gargamel at Mar 12 07:13:16 ...
> >  kernel:[1234536.531251] BUG: soft lockup - CPU#1 stuck for 22s! [mysqld:12552]
> > Message from syslogd@gargamel at Mar 12 07:13:16 ...
> >  kernel:[1234536.559276] BUG: soft lockup - CPU#2 stuck for 22s! [mysqld:4955]
> > Message from syslogd@gargamel at Mar 12 07:13:16 ...
> >  kernel:[1234540.538636] BUG: soft lockup - CPU#0 stuck for 22s! [kswapd0:48]
> > (there are more in the logs attached on what those lockups are)
> > 
> > Thankfully my btrfs send/receive is still working and should finish
> > today, but the amount of time it's been taking has been painful, and
> > the effect it's been having on the rest of my system, making it hang
> > or rendering its devices unusable for long periods of time, has been
> > punishing.
> > 
> > Can someone figure out from the kernel logs what is causing those near deadlocks?
> > 
> > Actually this was so bad apparently that sysrq w didn't even all make it
> > to syslog/disk (also on btrfs) but thankfully I got it on serial console.
> > 
> > 
> > I also found this suring sysrq. Shouldit be reported to someone else?
> > INFO: rcu_preempt detected stalls on CPUs/tasks:
> > 	3: (1 GPs behind) idle=395/140000000000000/0 softirq=284540927/284540928 last_accelerate: ed62/2821, nonlazy_posted: 1, ..
> > 	(detected by 0, t=15002 jiffies, g=100566635, c=100566634, q=87438)
> > sending NMI to all CPUs:
> > NMI backtrace for cpu 3
> > CPU: 3 PID: 21730 Comm: bash Not tainted 3.14.0-rc3-amd64-i915-preempt-20140216 #2
> > Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 3806 08/20/2012
> > task: ffff88001cf3a710 ti: ffff880037f78000 task.ti: ffff880037f78000
> > RIP: 0010:[<ffffffff81309a80>]  [<ffffffff81309a80>] paravirt_read_tsc+0x0/0xd
> > RSP: 0018:ffff880037f79ac0  EFLAGS: 00000046
> > RAX: 0000000000000003 RBX: 0000000094b043ff RCX: 0000000000000000
> > RDX: 0000000000000004 RSI: 00000000000003fd RDI: 0000000000000001
> > RBP: ffff880037f79ae8 R08: ffffffff81cf24d0 R09: 00000000fffffffe
> > R10: 0000000000001a18 R11: 0000000000000000 R12: 00000000000009fb
> > R13: 0000000000000003 R14: 0000000094b047d7 R15: 0000000000000036
> > FS:  0000000000000000(0000) GS:ffff88021f380000(0063) knlGS:00000000f754b6c0
> > CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
> > CR2: 00000000ffc13412 CR3: 0000000145622000 CR4: 00000000000407e0
> > Stack:
> >  ffffffff81309b59 ffffffff81f27560 00000000000026f0 0000000000000020
> >  ffffffff813c9e1b ffff880037f79af8 ffffffff81309ac9 ffff880037f79b08
> >  ffffffff81309aef ffff880037f79b30 ffffffff813c9cc6 ffffffff81f27560
> > Call Trace:
> >  [<ffffffff81309b59>] ? delay_tsc+0x3d/0xa4
> >  [<ffffffff813c9e1b>] ? serial8250_console_write+0x10d/0x10d
> >  [<ffffffff81309ac9>] __delay+0xf/0x11
> >  [<ffffffff81309aef>] __const_udelay+0x24/0x26
> >  [<ffffffff813c9cc6>] wait_for_xmitr+0x49/0x91
> >  [<ffffffff813c9e37>] serial8250_console_putchar+0x1c/0x2e
> >  [<ffffffff813c5d97>] uart_console_write+0x3f/0x54
> >  [<ffffffff813c9dc8>] serial8250_console_write+0xba/0x10d
> >  [<ffffffff8109363b>] call_console_drivers.constprop.6+0xbc/0xf0
> >  [<ffffffff81093bf7>] console_unlock+0x269/0x302
> >  [<ffffffff8109405e>] vprintk_emit+0x3ce/0x3ff
> >  [<ffffffff81604702>] printk+0x54/0x56
> >  [<ffffffff81089799>] ? arch_local_irq_save+0x15/0x1b
> >  [<ffffffff8108752e>] print_cfs_rq+0x4fc/0xd71
> >  [<ffffffff81080fff>] print_cfs_stats+0x5a/0x9e
> >  [<ffffffff81086c65>] print_cpu+0x538/0x8e3
> >  [<ffffffff81087f7e>] sysrq_sched_debug_show+0x1f/0x42
> >  [<ffffffff81078874>] show_state_filter+0x92/0x9f
> >  [<ffffffff813b7c7a>] sysrq_handle_showstate_blocked+0x13/0x15
> >  [<ffffffff813b82c3>] __handle_sysrq+0xa0/0x138
> >  [<ffffffff813b8630>] write_sysrq_trigger+0x28/0x37
> >  [<ffffffff811a565a>] proc_reg_write+0x5a/0x7c
> >  [<ffffffff81155417>] vfs_write+0xab/0x107
> >  [<ffffffff81155b19>] SyS_write+0x46/0x79
> >  [<ffffffff81615f6c>] sysenter_dispatch+0x7/0x21
> > Code: 89 e5 e8 a2 fe ff ff 89 c2 66 31 c0 c1 e2 10 01 d0 15 ff ff 00 00 f7 d0 c1 e8 10 5d c3 90 90 90 90 90 90 90 90 90 90 90 90 90 90 <55> 48 89 e5 e8 9c c4 d0 ff 66 90 5d c3 66 66 66 66 90 55 48 89 
> > 
> > 
> > Thanks,
> > Marc
> > -- 
> > "A mouse is a device used to point at the xterm you want to type in" - A.S.R.
> > Microsoft is to operating systems ....
> >                                       .... what McDonalds is to gourmet cooking
> > Home page: http://marc.merlins.org/                         | PGP 1024R/763BE901
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 
> 
> -- 
> "A mouse is a device used to point at the xterm you want to type in" - A.S.R.
> Microsoft is to operating systems ....
>                                       .... what McDonalds is to gourmet cooking
> Home page: http://marc.merlins.org/  
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

Wang Shilong March 14, 2014, 1:36 p.m. UTC | #5

Hi Josef,

Just ping this again.

Did you have any good ideas to rebuild extent tree if broken filesystem
is filled with snapshots.?

I was working on this recently, i was blocked that i can not verify if an extent
is *FULL BACKREF* mode or not. As a *FULL BACKREF* extent's refs can be 1
and more than 1..

I am willing to test  codes or have a try if you could give me some advice etc.

-Wang

> On 03/10/2014 11:50 PM, Josef Bacik wrote:
>> -----BEGIN PGP SIGNED MESSAGE-----
>> Hash: SHA1
>> 
>> On 03/10/2014 08:12 AM, Shilong Wang wrote:
>>> Hi Josef,
>>> 
>>> As i haven't thought any better ideas to rebuild extent tree which
>>> contains extent that owns 'FULL BACKREF' flag.
>>> 
>>> Considering an extent's refs can be equal or more than 1 if this
>>> extent has *FULL BACKREF* flag, so we could not make sure an
>>> extent's flag by only searching fs/file tree any more.
>>> 
>>> So until now, i just disable this option if snapshots exists,
>>> please correct me if i miss something here. Or you have any better
>>> ideas to solve this problem.~_~
>>> 
>>> 
>> I thought the fsck stuff rebuilds full backref refs properly, does it
>> not?  If it doesn't we need to fix that, however I'm fine with
>> disabling the option if snapshots exist for the time being.  Thanks,
> If there are no snapshots, --init-extent-tree can works as expected.
> I just have not thought a better idea to rebuild extent tree if we do have
> snapshots which means we may have an extent with *FULL BACKREF*
> flag.
> 
> Thanks,
> Wang
>> 
>> Josef
>> -----BEGIN PGP SIGNATURE-----
>> Version: GnuPG v1
>> Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/
>> 
>> iQIcBAEBAgAGBQJTHd9NAAoJEANb+wAKly3BYCYP/0iTaaa7w0SnfXtgjoVyX+nT
>> +e0Pa46zeKzpTujotCDb9E/2PBesCAvA4Psog3rkfsqJ2nXN9cERN4E6/JG4nAHh
>> Hv4KPo+w+tCkC4U2wSoDivYrVk9G5SH25ewkgW6iheSYNIlm+PLbOQz9DzGjCFDp
>> 51J9tG5E010siOyhlLCyGj8ZTj+gXuoQVWKCS8dOpCLMrbYYjMDXa562hqWaLoS/
>> t3eSfP7Tnnpl43NiMZI4fWrzmlFa5lba5iJmG59FeyiseRH4Zrhee4St1L1xDL5A
>> /6f3tJJT7DJjRRJFv0nJAOvOPyFaK8bMaYmOQJg6VrhcyPKM3BxBVEab3HrmQ7jt
>> LCMWobpIcM7e5BugmbTGGsFymhv05SQgvYGzpzRVXdsSzqubuqTcXwloNU5RyyFF
>> sXT9IiW9wAibHe7mDN7V6nfo1bVfHsjvSVi1rqz4/zFOWyh8oqxfEhxUJIWhfFsn
>> j0WJevvqKnjBJujyyuQpL13tzh69qei0AHOEme3R46BSRMnyuacy/WOeyo4VXPcn
>> 0GIeWbngAIWF/quhoQGkvofRMlPgftiDge8uz9pbm3IEKeiP9dQ/HvKsIHMKjnKW
>> 3dEBvMV/CSUQNek4VjO1ALefTRZQvJVL8Wxdij4W+djJw/uVX7fOhuqdkqyfM3FY
>> CKSB3HUSUtDCammsvgQA
>> =OT98
>> -----END PGP SIGNATURE-----
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Josef Bacik March 14, 2014, 2:36 p.m. UTC | #6

On 03/14/2014 09:36 AM, Wang Shilong wrote:
> Hi Josef,
> 
> Just ping this again.
> 
> Did you have any good ideas to rebuild extent tree if broken
> filesystem is filled with snapshots.?
> 
> I was working on this recently, i was blocked that i can not verify
> if an extent is *FULL BACKREF* mode or not. As a *FULL BACKREF*
> extent's refs can be 1 and more than 1..
> 
> I am willing to test  codes or have a try if you could give me some
> advice etc.
> 

Full backrefs aren't too hard.  Basically all you have to do is walk
down the fs tree and keep track of btrfs_header_owner(eb) for
everything we walk into.  If btrfs_header_owner(eb) == root->objectid
for the tree we are walking down then we need a ye olde normal backref
for this block.  If btrfs_header_owner(eb) != root->objectid we _may_
need a full backref, it depends on who owns the parent block.  The
following may be incomplete, I'm kind of sick

1) We walk down the original tree, every eb we encounter has
btrfs_header_owner(eb) == root->objectid.  We add normal references
for this root (BTRFS_TREE_BLOCK_REF_KEY) for this root.  World peace
is achieved.

2) We walk down the snapshotted tree.  Say we didn't change anything
at all, it was just a clean snapshot and then boom.  So the
btrfs_header_owner(root->node) == root->objectid, so normal backref.
We walk down to the next level, where btrfs_header_owner(eb) !=
root->objectid, but the level above did, so we add normal refs for all
of these blocks.  We go down the next level, now our
btrfs_header_owner(parent) != root->objectid and
btrfs_header_owner(eb) != root->objectid.  This is where we need to
now go back and see if btrfs_header_owner(eb) currently has a ref on
eb.  If it does we are done, move on to the next block in this same
level, we don't have to go further down.

3) Harder case, we snapshotted and then changed things in the original
root.  Do the same thing as in step 2, but now we get down to
btrfs_header_level(eb) != root->objectid && btrfs_header_level(parent)
!= root->objectid.  We lookup the references we have for eb and notice
that btrfs_header_owner(eb) no longer refers to eb.  So now we must
set FULL_BACKREF on this extent reference and add a
SHARED_BLOCK_REF_KEY for this eb using the parent->start as the
offset.  And we need to keep walking down and doing the same thing
until we either hit level 0 or btrfs_header_owner(eb) has a ref on the
block.

4) Not really a whole special case, just something to keep in mind, if
btrfs_header_owner(parent) == root->objectid but
btrfs_header_owner(eb) != root->objectid that means we have a normal
TREE_BLOCK_REF on eb, it's only when the parent doesn't match our
current root that it's a problem.

Does that make sense?  Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Wang Shilong March 17, 2014, 12:21 p.m. UTC | #7

Hi Josef,

Thanks for your information ^_^, i have just finished codes and it passed
my simple test and i will do more tests about rebuilding extent tree
with snapshots before
i send out patches.

Thanks,
Wang

2014-03-14 22:36 GMT+08:00 Josef Bacik <jbacik@fb.com>:
> On 03/14/2014 09:36 AM, Wang Shilong wrote:
>> Hi Josef,
>>
>> Just ping this again.
>>
>> Did you have any good ideas to rebuild extent tree if broken
>> filesystem is filled with snapshots.?
>>
>> I was working on this recently, i was blocked that i can not verify
>> if an extent is *FULL BACKREF* mode or not. As a *FULL BACKREF*
>> extent's refs can be 1 and more than 1..
>>
>> I am willing to test  codes or have a try if you could give me some
>> advice etc.
>>
>
> Full backrefs aren't too hard.  Basically all you have to do is walk
> down the fs tree and keep track of btrfs_header_owner(eb) for
> everything we walk into.  If btrfs_header_owner(eb) == root->objectid
> for the tree we are walking down then we need a ye olde normal backref
> for this block.  If btrfs_header_owner(eb) != root->objectid we _may_
> need a full backref, it depends on who owns the parent block.  The
> following may be incomplete, I'm kind of sick
>
> 1) We walk down the original tree, every eb we encounter has
> btrfs_header_owner(eb) == root->objectid.  We add normal references
> for this root (BTRFS_TREE_BLOCK_REF_KEY) for this root.  World peace
> is achieved.
>
> 2) We walk down the snapshotted tree.  Say we didn't change anything
> at all, it was just a clean snapshot and then boom.  So the
> btrfs_header_owner(root->node) == root->objectid, so normal backref.
> We walk down to the next level, where btrfs_header_owner(eb) !=
> root->objectid, but the level above did, so we add normal refs for all
> of these blocks.  We go down the next level, now our
> btrfs_header_owner(parent) != root->objectid and
> btrfs_header_owner(eb) != root->objectid.  This is where we need to
> now go back and see if btrfs_header_owner(eb) currently has a ref on
> eb.  If it does we are done, move on to the next block in this same
> level, we don't have to go further down.
>
> 3) Harder case, we snapshotted and then changed things in the original
> root.  Do the same thing as in step 2, but now we get down to
> btrfs_header_level(eb) != root->objectid && btrfs_header_level(parent)
> != root->objectid.  We lookup the references we have for eb and notice
> that btrfs_header_owner(eb) no longer refers to eb.  So now we must
> set FULL_BACKREF on this extent reference and add a
> SHARED_BLOCK_REF_KEY for this eb using the parent->start as the
> offset.  And we need to keep walking down and doing the same thing
> until we either hit level 0 or btrfs_header_owner(eb) has a ref on the
> block.
>
> 4) Not really a whole special case, just something to keep in mind, if
> btrfs_header_owner(parent) == root->objectid but
> btrfs_header_owner(eb) != root->objectid that means we have a normal
> TREE_BLOCK_REF on eb, it's only when the parent doesn't match our
> current root that it's a problem.
>
>
> Does that make sense?  Thanks,
>
> Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

diff mbox

Patch

diff --git a/cmds-check.c b/cmds-check.c
index d1cafe1..ddee897 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -6143,6 +6143,56 @@  static int reset_block_groups(struct btrfs_fs_info *fs_info)
 	return 0;
 }
 
+static int is_snapshot_exist(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_root *root = fs_info->tree_root;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	struct btrfs_key key;
+	int ret;
+	int found = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = 0;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+
+	while (1) {
+		if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret)
+				goto out;
+		}
+		leaf = path->nodes[0];
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+		if (key.type != BTRFS_ROOT_ITEM_KEY ||
+		    key.objectid < BTRFS_FIRST_FREE_OBJECTID) {
+			path->slots[0]++;
+			continue;
+		}
+		if (key.offset > 0) {
+			found = 1;
+			break;
+		}
+		path->slots[0]++;
+	}
+out:
+	btrfs_free_path(path);
+	if (found)
+		return 1;
+	else if (ret >= 0)
+		return 0;
+	return ret;
+}
+
 static int reset_balance(struct btrfs_trans_handle *trans,
 			 struct btrfs_fs_info *fs_info)
 {
@@ -6537,6 +6587,18 @@  int cmd_check(int argc, char **argv)
 		ret = -EIO;
 		goto close_out;
 	}
+	if (init_extent_tree) {
+		ret = is_snapshot_exist(info);
+		if (ret < 0) {
+			fprintf(stderr, "ERROR: fail to check if there are snapshots in btrfs filesystem\n");
+			ret = 1;
+			goto close_out;
+		} else if (ret) {
+			fprintf(stderr, "Snapshots detected, unable to rebuild extent tree for such case.\n");
+			ret = 1;
+			goto close_out;
+		}
+	}
 
 	if (init_extent_tree || init_csum_tree) {
 		struct btrfs_trans_handle *trans;