Message ID | 20160516210655.sfsjtj5oleeksaxq@floor.thefacebook.com (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
On Mon, May 16, 2016 at 05:06:55PM -0400, Chris Mason wrote: > Hi everyone, > > I've tried to cc most of the recent reports of this warning. It was > pretty hard to track down, but I've got a test program for it now. My > goal is to change xfs_io to add the madvise loop under > --do-horrible-things, instead of adding yet another special doio.c > function to xfstests. [...] > And now for the patch: [...] I've applied the patch and hammered it with real workloads (mostly sbuild). They used to trigger the warning nearly immediately, with the patch all seemed ok -- so there is an improvement. I then tried your test case, and alas: [13603.998647] ------------[ cut here ]------------ [13603.998668] WARNING: CPU: 0 PID: 11291 at fs/btrfs/extent-tree.c:4233 btrfs_free_reserved_data_space_noquota+0x63/0x80 [13603.998673] Modules linked in: vboxpci(O) vboxnetadp(O) vboxnetflt(O) vboxdrv(O) nvidia(PO) [last unloaded: vboxdrv] [13603.998692] CPU: 0 PID: 11291 Comm: kworker/u12:15 Tainted: P O 4.6.0+ #1 [13603.998697] Hardware name: System manufacturer System Product Name/M4A77T, BIOS 2401 05/18/2011 [13603.998706] Workqueue: btrfs-delalloc btrfs_delalloc_helper [13603.998715] 0000000000000000 ffffffff813817a0 0000000000000000 0000000000000000 [13603.998722] ffffffff8108ce82 000000000000b000 ffff8802226e2e00 000000000000afff [13603.998728] ffff88022b42501c 000000000000b000 ffff8802224b5000 ffffffff81286fb3 [13603.998734] Call Trace: [13603.998745] [<ffffffff813817a0>] ? dump_stack+0x46/0x66 [13603.998753] [<ffffffff8108ce82>] ? __warn+0xe2/0x100 [13603.998762] [<ffffffff81286fb3>] ? btrfs_free_reserved_data_space_noquota+0x63/0x80 [13603.998770] [<ffffffff812a2350>] ? btrfs_clear_bit_hook+0x2d0/0x330 [13603.998776] [<ffffffff812be660>] ? clear_state_bit+0x60/0x1e0 [13603.998782] [<ffffffff812be90f>] ? __clear_extent_bit+0x12f/0x390 [13603.998788] [<ffffffff812bf6dd>] ? extent_clear_unlock_delalloc+0x5d/0x220 [13603.998795] [<ffffffff8118cea7>] ? cache_grow+0x147/0x220 [13603.998800] [<ffffffff8118d98c>] ? kmem_cache_alloc+0x12c/0x3e0 [13603.998808] [<ffffffff811c22d0>] ? igrab+0x10/0x40 [13603.998815] [<ffffffff812baf57>] ? __btrfs_add_ordered_extent+0x1c7/0x2d0 [13603.998822] [<ffffffff812a8e8f>] ? submit_compressed_extents+0x41f/0x4e0 [13603.998830] [<ffffffff812d1c86>] ? btrfs_scrubparity_helper+0x136/0x1c0 [13603.998837] [<ffffffff810a2bad>] ? process_one_work+0x1bd/0x400 [13603.998843] [<ffffffff810a2e32>] ? worker_thread+0x42/0x4b0 [13603.998850] [<ffffffff810a2df0>] ? process_one_work+0x400/0x400 [13603.998856] [<ffffffff810a8268>] ? kthread+0xb8/0xd0 [13603.998863] [<ffffffff81747d02>] ? ret_from_fork+0x22/0x40 [13603.998868] [<ffffffff810a81b0>] ? kthread_worker_fn+0x160/0x160 [13603.998873] ---[ end trace 359dcd25d6847cee ]--- [13603.999155] ------------[ cut here ]------------ [13603.999167] WARNING: CPU: 0 PID: 11291 at fs/btrfs/extent-tree.c:4233 btrfs_free_reserved_data_space_noquota+0x63/0x80 [13603.999170] Modules linked in: vboxpci(O) vboxnetadp(O) vboxnetflt(O) vboxdrv(O) nvidia(PO) [last unloaded: vboxdrv] [13603.999183] CPU: 0 PID: 11291 Comm: kworker/u12:15 Tainted: P W O 4.6.0+ #1 [13603.999186] Hardware name: System manufacturer System Product Name/M4A77T, BIOS 2401 05/18/2011 [13603.999193] Workqueue: btrfs-delalloc btrfs_delalloc_helper [13603.999196] 0000000000000000 ffffffff813817a0 0000000000000000 0000000000000000 [13603.999202] ffffffff8108ce82 0000000000005000 ffff8802226e2e00 0000000000004fff [13603.999208] ffff88022b42501c 0000000000005000 ffff8802224b5000 ffffffff81286fb3 [13603.999214] Call Trace: [13603.999220] [<ffffffff813817a0>] ? dump_stack+0x46/0x66 [13603.999226] [<ffffffff8108ce82>] ? __warn+0xe2/0x100 [13603.999234] [<ffffffff81286fb3>] ? btrfs_free_reserved_data_space_noquota+0x63/0x80 [13603.999241] [<ffffffff812a2350>] ? btrfs_clear_bit_hook+0x2d0/0x330 [13603.999247] [<ffffffff812be660>] ? clear_state_bit+0x60/0x1e0 [13603.999252] [<ffffffff812be90f>] ? __clear_extent_bit+0x12f/0x390 [13603.999258] [<ffffffff812bf6dd>] ? extent_clear_unlock_delalloc+0x5d/0x220 [13603.999264] [<ffffffff8118d98c>] ? kmem_cache_alloc+0x12c/0x3e0 [13603.999271] [<ffffffff811c22d0>] ? igrab+0x10/0x40 [13603.999278] [<ffffffff812baf57>] ? __btrfs_add_ordered_extent+0x1c7/0x2d0 [13603.999284] [<ffffffff812a8e8f>] ? submit_compressed_extents+0x41f/0x4e0 [13603.999292] [<ffffffff812d1c86>] ? btrfs_scrubparity_helper+0x136/0x1c0 [13603.999299] [<ffffffff810a2bad>] ? process_one_work+0x1bd/0x400 [13603.999305] [<ffffffff810a2e32>] ? worker_thread+0x42/0x4b0 [13603.999312] [<ffffffff810a2df0>] ? process_one_work+0x400/0x400 [13603.999317] [<ffffffff810a8268>] ? kthread+0xb8/0xd0 [13603.999323] [<ffffffff81747d02>] ? ret_from_fork+0x22/0x40 [13603.999329] [<ffffffff810a81b0>] ? kthread_worker_fn+0x160/0x160 [13603.999333] ---[ end trace 359dcd25d6847cef ]--- [13634.053259] ------------[ cut here ]------------ [13634.053280] WARNING: CPU: 3 PID: 11291 at fs/btrfs/extent-tree.c:4233 btrfs_free_reserved_data_space_noquota+0x63/0x80 [13634.053285] Modules linked in: vboxpci(O) vboxnetadp(O) vboxnetflt(O) vboxdrv(O) nvidia(PO) [last unloaded: vboxdrv] [13634.053303] CPU: 3 PID: 11291 Comm: kworker/u12:15 Tainted: P W O 4.6.0+ #1 [13634.053308] Hardware name: System manufacturer System Product Name/M4A77T, BIOS 2401 05/18/2011 [13634.053317] Workqueue: btrfs-delalloc btrfs_delalloc_helper [13634.053321] 0000000000000000 ffffffff813817a0 0000000000000000 0000000000000000 [13634.053329] ffffffff8108ce82 0000000000003000 ffff8802226e2e00 0000000000002fff [13634.053335] ffff88022ecda41c 0000000000003000 ffff880227035800 ffffffff81286fb3 [13634.053341] Call Trace: [13634.053351] [<ffffffff813817a0>] ? dump_stack+0x46/0x66 [13634.053359] [<ffffffff8108ce82>] ? __warn+0xe2/0x100 [13634.053368] [<ffffffff81286fb3>] ? btrfs_free_reserved_data_space_noquota+0x63/0x80 [13634.053376] [<ffffffff812a2350>] ? btrfs_clear_bit_hook+0x2d0/0x330 [13634.053382] [<ffffffff812be660>] ? clear_state_bit+0x60/0x1e0 [13634.053388] [<ffffffff812be90f>] ? __clear_extent_bit+0x12f/0x390 [13634.053394] [<ffffffff812bf6dd>] ? extent_clear_unlock_delalloc+0x5d/0x220 [13634.053401] [<ffffffff8118d98c>] ? kmem_cache_alloc+0x12c/0x3e0 [13634.053408] [<ffffffff811c22d0>] ? igrab+0x10/0x40 [13634.053416] [<ffffffff812baf57>] ? __btrfs_add_ordered_extent+0x1c7/0x2d0 [13634.053422] [<ffffffff812a8e8f>] ? submit_compressed_extents+0x41f/0x4e0 [13634.053430] [<ffffffff812d1c86>] ? btrfs_scrubparity_helper+0x136/0x1c0 [13634.053437] [<ffffffff810a2bad>] ? process_one_work+0x1bd/0x400 [13634.053444] [<ffffffff810a2e32>] ? worker_thread+0x42/0x4b0 [13634.053450] [<ffffffff810a2df0>] ? process_one_work+0x400/0x400 [13634.053456] [<ffffffff810a8268>] ? kthread+0xb8/0xd0 [13634.053464] [<ffffffff81747d02>] ? ret_from_fork+0x22/0x40 [13634.053470] [<ffffffff810a81b0>] ? kthread_worker_fn+0x160/0x160 [13634.053474] ---[ end trace 359dcd25d6847cf0 ]--- [13646.118422] BTRFS info (device sda1): btrfs_destroy_inode: leftover csum_bytes Here's another run on an untainted kernel built with frame pointers etc: ./dammitdave foo [ 236.500257] ------------[ cut here ]------------ [ 236.500280] WARNING: CPU: 3 PID: 2940 at fs/btrfs/extent-tree.c:4233 btrfs_free_reserved_data_space_noquota+0xdd/0x160 [ 236.500285] Modules linked in: [ 236.500295] CPU: 3 PID: 2940 Comm: dammit Not tainted 4.6.0debug+ #1 [ 236.500301] Hardware name: System manufacturer System Product Name/M4A77T, BIOS 2401 05/18/2011 [ 236.500306] 0000000000000000 0000000042ec2fb0 ffff88022d5ffbd8 ffffffff816b1920 [ 236.500315] ffffffff81f86e3f 0000000042ec2fb0 0000000000000000 0000000000000000 [ 236.500322] ffff88022d5ffc20 ffffffff81118c2c ffff88022dfd4000 0000108900000000 [ 236.500330] Call Trace: [ 236.500342] [<ffffffff816b1920>] dump_stack+0x60/0xa0 [ 236.500352] [<ffffffff81118c2c>] __warn+0x10c/0x150 [ 236.500360] [<ffffffff81118d78>] warn_slowpath_null+0x18/0x20 [ 236.500368] [<ffffffff814e389d>] btrfs_free_reserved_data_space_noquota+0xdd/0x160 [ 236.500376] [<ffffffff814e3942>] btrfs_free_reserved_data_space+0x22/0x40 [ 236.500385] [<ffffffff8152eb78>] __btrfs_buffered_write+0x748/0xa20 [ 236.500394] [<ffffffff81624bdc>] ? security_inode_need_killpriv+0x3c/0x60 [ 236.500401] [<ffffffff815345ef>] btrfs_file_write_iter+0x4ff/0xb90 [ 236.500410] [<ffffffff81341847>] __vfs_write+0x117/0x1d0 [ 236.500417] [<ffffffff8134317d>] vfs_write+0xdd/0x290 [ 236.500425] [<ffffffff813748bd>] ? __fget_light+0x4d/0x120 [ 236.500432] [<ffffffff813453ee>] SyS_pwrite64+0x9e/0xc0 [ 236.500441] [<ffffffff81db989f>] entry_SYSCALL_64_fastpath+0x17/0x93 [ 236.500446] ---[ end trace 7df747a6a0962ae6 ]--- rm foo [ 323.851851] BTRFS info (device sda1): btrfs_destroy_inode: leftover csum_bytes
On Tue, May 17, 2016 at 05:14:51PM +0200, Adam Borowski wrote: > On Mon, May 16, 2016 at 05:06:55PM -0400, Chris Mason wrote: > > Hi everyone, > > > > I've tried to cc most of the recent reports of this warning. It was > > pretty hard to track down, but I've got a test program for it now. My > > goal is to change xfs_io to add the madvise loop under > > --do-horrible-things, instead of adding yet another special doio.c > > function to xfstests. > [...] > > And now for the patch: > [...] > > > I've applied the patch and hammered it with real workloads (mostly sbuild). > They used to trigger the warning nearly immediately, with the patch all > seemed ok -- so there is an improvement. Thanks for trying these out. > > I then tried your test case, and alas: > > Here's another run on an untainted kernel built with frame pointers etc: > > ./dammitdave foo > [ 236.500257] ------------[ cut here ]------------ > [ 236.500280] WARNING: CPU: 3 PID: 2940 at fs/btrfs/extent-tree.c:4233 btrfs_free_reserved_data_space_noquota+0xdd/0x160 > [ 236.500285] Modules linked in: > [ 236.500295] CPU: 3 PID: 2940 Comm: dammit Not tainted 4.6.0debug+ #1 > [ 236.500301] Hardware name: System manufacturer System Product Name/M4A77T, BIOS 2401 05/18/2011 > [ 236.500306] 0000000000000000 0000000042ec2fb0 ffff88022d5ffbd8 ffffffff816b1920 > [ 236.500315] ffffffff81f86e3f 0000000042ec2fb0 0000000000000000 0000000000000000 > [ 236.500322] ffff88022d5ffc20 ffffffff81118c2c ffff88022dfd4000 0000108900000000 > [ 236.500330] Call Trace: > [ 236.500342] [<ffffffff816b1920>] dump_stack+0x60/0xa0 > [ 236.500352] [<ffffffff81118c2c>] __warn+0x10c/0x150 > [ 236.500360] [<ffffffff81118d78>] warn_slowpath_null+0x18/0x20 > [ 236.500368] [<ffffffff814e389d>] btrfs_free_reserved_data_space_noquota+0xdd/0x160 > [ 236.500376] [<ffffffff814e3942>] btrfs_free_reserved_data_space+0x22/0x40 > [ 236.500385] [<ffffffff8152eb78>] __btrfs_buffered_write+0x748/0xa20 > [ 236.500394] [<ffffffff81624bdc>] ? security_inode_need_killpriv+0x3c/0x60 > [ 236.500401] [<ffffffff815345ef>] btrfs_file_write_iter+0x4ff/0xb90 > [ 236.500410] [<ffffffff81341847>] __vfs_write+0x117/0x1d0 > [ 236.500417] [<ffffffff8134317d>] vfs_write+0xdd/0x290 > [ 236.500425] [<ffffffff813748bd>] ? __fget_light+0x4d/0x120 > [ 236.500432] [<ffffffff813453ee>] SyS_pwrite64+0x9e/0xc0 > [ 236.500441] [<ffffffff81db989f>] entry_SYSCALL_64_fastpath+0x17/0x93 > [ 236.500446] ---[ end trace 7df747a6a0962ae6 ]--- > rm foo > [ 323.851851] BTRFS info (device sda1): btrfs_destroy_inode: leftover csum_bytes Hmmm, some of your traces mentioned compression, do you have compression enabled? I'll try to reproduce here, but could you try the same test on v4.5? Also, if you can gdb your vmlinux (or btrfs.ko) and find this line: gdb> list *__btrfs_buffered_write+0x748 It'll help. Thanks! -chris -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, May 17, 2016 at 12:23:44PM -0400, Chris Mason wrote: > On Tue, May 17, 2016 at 05:14:51PM +0200, Adam Borowski wrote: > > On Mon, May 16, 2016 at 05:06:55PM -0400, Chris Mason wrote: > > > And now for the patch: > > [...] > > > > I then tried your test case, and alas: > > > > Here's another run on an untainted kernel built with frame pointers etc: > > > > ./dammitdave foo > > [ 236.500257] ------------[ cut here ]------------ > > [ 236.500280] WARNING: CPU: 3 PID: 2940 at fs/btrfs/extent-tree.c:4233 btrfs_free_reserved_data_space_noquota+0xdd/0x160 > > [ 236.500285] Modules linked in: > > [ 236.500295] CPU: 3 PID: 2940 Comm: dammit Not tainted 4.6.0debug+ #1 > > [ 236.500301] Hardware name: System manufacturer System Product Name/M4A77T, BIOS 2401 05/18/2011 > > [ 236.500306] 0000000000000000 0000000042ec2fb0 ffff88022d5ffbd8 ffffffff816b1920 > > [ 236.500315] ffffffff81f86e3f 0000000042ec2fb0 0000000000000000 0000000000000000 > > [ 236.500322] ffff88022d5ffc20 ffffffff81118c2c ffff88022dfd4000 0000108900000000 > > [ 236.500330] Call Trace: > > [ 236.500342] [<ffffffff816b1920>] dump_stack+0x60/0xa0 > > [ 236.500352] [<ffffffff81118c2c>] __warn+0x10c/0x150 > > [ 236.500360] [<ffffffff81118d78>] warn_slowpath_null+0x18/0x20 > > [ 236.500368] [<ffffffff814e389d>] btrfs_free_reserved_data_space_noquota+0xdd/0x160 > > [ 236.500376] [<ffffffff814e3942>] btrfs_free_reserved_data_space+0x22/0x40 > > [ 236.500385] [<ffffffff8152eb78>] __btrfs_buffered_write+0x748/0xa20 > > [ 236.500394] [<ffffffff81624bdc>] ? security_inode_need_killpriv+0x3c/0x60 > > [ 236.500401] [<ffffffff815345ef>] btrfs_file_write_iter+0x4ff/0xb90 > > [ 236.500410] [<ffffffff81341847>] __vfs_write+0x117/0x1d0 > > [ 236.500417] [<ffffffff8134317d>] vfs_write+0xdd/0x290 > > [ 236.500425] [<ffffffff813748bd>] ? __fget_light+0x4d/0x120 > > [ 236.500432] [<ffffffff813453ee>] SyS_pwrite64+0x9e/0xc0 > > [ 236.500441] [<ffffffff81db989f>] entry_SYSCALL_64_fastpath+0x17/0x93 > > [ 236.500446] ---[ end trace 7df747a6a0962ae6 ]--- > > rm foo > > [ 323.851851] BTRFS info (device sda1): btrfs_destroy_inode: leftover csum_bytes > > Hmmm, some of your traces mentioned compression, do you have compression > enabled? Yeah, I mount with noatime,compress=lzo. > I'll try to reproduce here, but could you try the same test on v4.5? I've ran it for half an hour on vanilla 4.5.4 without any patches, no failures of any kind. > Also, if you can gdb your vmlinux (or btrfs.ko) and find this line: > > gdb> list *__btrfs_buffered_write+0x748 0xffffffff8152eb78 is in __btrfs_buffered_write (fs/btrfs/file.c:1564). 1559 1560 reserve_metadata: 1561 ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); 1562 if (ret) { 1563 if (!only_release_metadata) 1564 btrfs_free_reserved_data_space(inode, pos, 1565 write_bytes); 1566 else 1567 btrfs_end_write_no_snapshoting(root); 1568 break; Meow!
On Tue, May 17, 2016 at 10:47:27PM +0200, Adam Borowski wrote: > On Tue, May 17, 2016 at 12:23:44PM -0400, Chris Mason wrote: > > On Tue, May 17, 2016 at 05:14:51PM +0200, Adam Borowski wrote: > > > On Mon, May 16, 2016 at 05:06:55PM -0400, Chris Mason wrote: > > > > And now for the patch: > > > [...] > > > > > > I then tried your test case, and alas: > > > > > > Here's another run on an untainted kernel built with frame pointers etc: > > > > > > ./dammitdave foo > > > [ 236.500257] ------------[ cut here ]------------ > > > [ 236.500280] WARNING: CPU: 3 PID: 2940 at fs/btrfs/extent-tree.c:4233 btrfs_free_reserved_data_space_noquota+0xdd/0x160 > > > [ 236.500285] Modules linked in: > > > [ 236.500295] CPU: 3 PID: 2940 Comm: dammit Not tainted 4.6.0debug+ #1 > > > [ 236.500301] Hardware name: System manufacturer System Product Name/M4A77T, BIOS 2401 05/18/2011 > > > [ 236.500306] 0000000000000000 0000000042ec2fb0 ffff88022d5ffbd8 ffffffff816b1920 > > > [ 236.500315] ffffffff81f86e3f 0000000042ec2fb0 0000000000000000 0000000000000000 > > > [ 236.500322] ffff88022d5ffc20 ffffffff81118c2c ffff88022dfd4000 0000108900000000 > > > [ 236.500330] Call Trace: > > > [ 236.500342] [<ffffffff816b1920>] dump_stack+0x60/0xa0 > > > [ 236.500352] [<ffffffff81118c2c>] __warn+0x10c/0x150 > > > [ 236.500360] [<ffffffff81118d78>] warn_slowpath_null+0x18/0x20 > > > [ 236.500368] [<ffffffff814e389d>] btrfs_free_reserved_data_space_noquota+0xdd/0x160 > > > [ 236.500376] [<ffffffff814e3942>] btrfs_free_reserved_data_space+0x22/0x40 > > > [ 236.500385] [<ffffffff8152eb78>] __btrfs_buffered_write+0x748/0xa20 > > > [ 236.500394] [<ffffffff81624bdc>] ? security_inode_need_killpriv+0x3c/0x60 > > > [ 236.500401] [<ffffffff815345ef>] btrfs_file_write_iter+0x4ff/0xb90 > > > [ 236.500410] [<ffffffff81341847>] __vfs_write+0x117/0x1d0 > > > [ 236.500417] [<ffffffff8134317d>] vfs_write+0xdd/0x290 > > > [ 236.500425] [<ffffffff813748bd>] ? __fget_light+0x4d/0x120 > > > [ 236.500432] [<ffffffff813453ee>] SyS_pwrite64+0x9e/0xc0 > > > [ 236.500441] [<ffffffff81db989f>] entry_SYSCALL_64_fastpath+0x17/0x93 > > > [ 236.500446] ---[ end trace 7df747a6a0962ae6 ]--- > > > rm foo > > > [ 323.851851] BTRFS info (device sda1): btrfs_destroy_inode: leftover csum_bytes > > > > Hmmm, some of your traces mentioned compression, do you have compression > > enabled? > > Yeah, I mount with noatime,compress=lzo. > > > I'll try to reproduce here, but could you try the same test on v4.5? > > I've ran it for half an hour on vanilla 4.5.4 without any patches, no > failures of any kind. > > > Also, if you can gdb your vmlinux (or btrfs.ko) and find this line: > > > > gdb> list *__btrfs_buffered_write+0x748 > > 0xffffffff8152eb78 is in __btrfs_buffered_write (fs/btrfs/file.c:1564). > 1559 > 1560 reserve_metadata: > 1561 ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); > 1562 if (ret) { > 1563 if (!only_release_metadata) > 1564 btrfs_free_reserved_data_space(inode, pos, > 1565 write_bytes); > 1566 else > 1567 btrfs_end_write_no_snapshoting(root); > 1568 break; > Aha, so you're getting an enospc with compression on and the accounting doesn't line up. I should be able to trigger this. -chris -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, May 17, 2016 at 05:00:49PM -0400, Chris Mason wrote: > On Tue, May 17, 2016 at 10:47:27PM +0200, Adam Borowski wrote: > > > Hmmm, some of your traces mentioned compression, do you have compression > > > enabled? > > > > Yeah, I mount with noatime,compress=lzo. > > > > > Also, if you can gdb your vmlinux (or btrfs.ko) and find this line: > > > > > > gdb> list *__btrfs_buffered_write+0x748 > > > > 0xffffffff8152eb78 is in __btrfs_buffered_write (fs/btrfs/file.c:1564). > > 1559 > > 1560 reserve_metadata: > > 1561 ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); > > 1562 if (ret) { > > 1563 if (!only_release_metadata) > > 1564 btrfs_free_reserved_data_space(inode, pos, > > 1565 write_bytes); > > 1566 else > > 1567 btrfs_end_write_no_snapshoting(root); > > 1568 break; > > > > Aha, so you're getting an enospc with compression on and the accounting > doesn't line up. I should be able to trigger this. This partition was fully balanced just a few days ago so I didn't even think to look at df. [~]# df -h /dev/sda1 216G 161G 53G 76% / [~]# btrfs files df / Data, single: total=209.12GiB, used=156.46GiB System, single: total=32.00MiB, used=48.00KiB Metadata, single: total=6.00GiB, used=3.51GiB GlobalReserve, single: total=512.00MiB, used=0.00B However, after partially balancing it a bit: [~]# btrfs files df / Data, single: total=172.00GiB, used=156.03GiB System, single: total=32.00MiB, used=48.00KiB Metadata, single: total=5.00GiB, used=3.50GiB GlobalReserve, single: total=512.00MiB, used=0.00B I can't reproduce the failure anymore.
On Tue, May 17, 2016 at 11:57:07PM +0200, Adam Borowski wrote: > On Tue, May 17, 2016 at 05:00:49PM -0400, Chris Mason wrote: > > On Tue, May 17, 2016 at 10:47:27PM +0200, Adam Borowski wrote: > > > > Hmmm, some of your traces mentioned compression, do you have compression > > > > enabled? > > > > > > Yeah, I mount with noatime,compress=lzo. > > > > > > > Also, if you can gdb your vmlinux (or btrfs.ko) and find this line: > > > > > > > > gdb> list *__btrfs_buffered_write+0x748 > > > > > > 0xffffffff8152eb78 is in __btrfs_buffered_write (fs/btrfs/file.c:1564). > > > 1559 > > > 1560 reserve_metadata: > > > 1561 ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); > > > 1562 if (ret) { > > > 1563 if (!only_release_metadata) > > > 1564 btrfs_free_reserved_data_space(inode, pos, > > > 1565 write_bytes); > > > 1566 else > > > 1567 btrfs_end_write_no_snapshoting(root); > > > 1568 break; > > > > > > > Aha, so you're getting an enospc with compression on and the accounting > > doesn't line up. I should be able to trigger this. > > This partition was fully balanced just a few days ago so I didn't even think > to look at df. > > [~]# df -h > /dev/sda1 216G 161G 53G 76% / > > [~]# btrfs files df / > Data, single: total=209.12GiB, used=156.46GiB > System, single: total=32.00MiB, used=48.00KiB > Metadata, single: total=6.00GiB, used=3.51GiB > GlobalReserve, single: total=512.00MiB, used=0.00B > > However, after partially balancing it a bit: > > [~]# btrfs files df / > Data, single: total=172.00GiB, used=156.03GiB > System, single: total=32.00MiB, used=48.00KiB > Metadata, single: total=5.00GiB, used=3.50GiB > GlobalReserve, single: total=512.00MiB, used=0.00B > > I can't reproduce the failure anymore. Yeah, I'm not sure yet if this is related to the page faulting problem, but Dave Jones triggered this error back in v4.4. I'll still fix it, it should be an alignment issue with write_bytes. -chris -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, May 17, 2016 at 10:47:27PM +0200, Adam Borowski wrote: > On Tue, May 17, 2016 at 12:23:44PM -0400, Chris Mason wrote: > > On Tue, May 17, 2016 at 05:14:51PM +0200, Adam Borowski wrote: > > > On Mon, May 16, 2016 at 05:06:55PM -0400, Chris Mason wrote: > > > > And now for the patch: > > > [...] > > > > > > I then tried your test case, and alas: > > > > > > Here's another run on an untainted kernel built with frame pointers etc: > > > > > > ./dammitdave foo > > > [ 236.500257] ------------[ cut here ]------------ > > > [ 236.500280] WARNING: CPU: 3 PID: 2940 at fs/btrfs/extent-tree.c:4233 btrfs_free_reserved_data_space_noquota+0xdd/0x160 > > > [ 236.500285] Modules linked in: > > > [ 236.500295] CPU: 3 PID: 2940 Comm: dammit Not tainted 4.6.0debug+ #1 > > > [ 236.500301] Hardware name: System manufacturer System Product Name/M4A77T, BIOS 2401 05/18/2011 > > > [ 236.500306] 0000000000000000 0000000042ec2fb0 ffff88022d5ffbd8 ffffffff816b1920 > > > [ 236.500315] ffffffff81f86e3f 0000000042ec2fb0 0000000000000000 0000000000000000 > > > [ 236.500322] ffff88022d5ffc20 ffffffff81118c2c ffff88022dfd4000 0000108900000000 > > > [ 236.500330] Call Trace: > > > [ 236.500342] [<ffffffff816b1920>] dump_stack+0x60/0xa0 > > > [ 236.500352] [<ffffffff81118c2c>] __warn+0x10c/0x150 > > > [ 236.500360] [<ffffffff81118d78>] warn_slowpath_null+0x18/0x20 > > > [ 236.500368] [<ffffffff814e389d>] btrfs_free_reserved_data_space_noquota+0xdd/0x160 > > > [ 236.500376] [<ffffffff814e3942>] btrfs_free_reserved_data_space+0x22/0x40 > > > [ 236.500385] [<ffffffff8152eb78>] __btrfs_buffered_write+0x748/0xa20 > > > [ 236.500394] [<ffffffff81624bdc>] ? security_inode_need_killpriv+0x3c/0x60 > > > [ 236.500401] [<ffffffff815345ef>] btrfs_file_write_iter+0x4ff/0xb90 > > > [ 236.500410] [<ffffffff81341847>] __vfs_write+0x117/0x1d0 > > > [ 236.500417] [<ffffffff8134317d>] vfs_write+0xdd/0x290 > > > [ 236.500425] [<ffffffff813748bd>] ? __fget_light+0x4d/0x120 > > > [ 236.500432] [<ffffffff813453ee>] SyS_pwrite64+0x9e/0xc0 > > > [ 236.500441] [<ffffffff81db989f>] entry_SYSCALL_64_fastpath+0x17/0x93 > > > [ 236.500446] ---[ end trace 7df747a6a0962ae6 ]--- > > > rm foo > > > [ 323.851851] BTRFS info (device sda1): btrfs_destroy_inode: leftover csum_bytes > > > > Hmmm, some of your traces mentioned compression, do you have compression > > enabled? > > Yeah, I mount with noatime,compress=lzo. > > > I'll try to reproduce here, but could you try the same test on v4.5? > > I've ran it for half an hour on vanilla 4.5.4 without any patches, no > failures of any kind. > > > Also, if you can gdb your vmlinux (or btrfs.ko) and find this line: > > > > gdb> list *__btrfs_buffered_write+0x748 > > 0xffffffff8152eb78 is in __btrfs_buffered_write (fs/btrfs/file.c:1564). > 1559 > 1560 reserve_metadata: > 1561 ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); > 1562 if (ret) { > 1563 if (!only_release_metadata) > 1564 btrfs_free_reserved_data_space(inode, pos, > 1565 write_bytes); > 1566 else > 1567 btrfs_end_write_no_snapshoting(root); > 1568 break; > Hmpf, even with forcing btrfs_delalloc_reserve_metadata to randomly fail, I can't trigger this warning. Something else is going on. For now I'm going to send in the fault patch, I'm confident this new warning is something unrelated. -chris -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, May 19, 2016 at 04:10:20PM -0400, Chris Mason wrote: > On Tue, May 17, 2016 at 10:47:27PM +0200, Adam Borowski wrote: > > On Tue, May 17, 2016 at 12:23:44PM -0400, Chris Mason wrote: > > > On Tue, May 17, 2016 at 05:14:51PM +0200, Adam Borowski wrote: > > > > On Mon, May 16, 2016 at 05:06:55PM -0400, Chris Mason wrote: > > > > > And now for the patch: > > > > [...] > > > > > > > > I then tried your test case, and alas: > > > > > > > > Here's another run on an untainted kernel built with frame pointers etc: > > > > > > > > ./dammitdave foo > > > > [ 236.500280] WARNING: CPU: 3 PID: 2940 at fs/btrfs/extent-tree.c:4233 btrfs_free_reserved_data_space_noquota+0xdd/0x160 > > > > rm foo > > > > [ 323.851851] BTRFS info (device sda1): btrfs_destroy_inode: leftover csum_bytes > > > > > > Hmmm, some of your traces mentioned compression, do you have compression > > > enabled? > > > > Yeah, I mount with noatime,compress=lzo. > > > > > I'll try to reproduce here, but could you try the same test on v4.5? > > > > I've ran it for half an hour on vanilla 4.5.4 without any patches, no > > failures of any kind. > > > > > gdb> list *__btrfs_buffered_write+0x748 > > > > 0xffffffff8152eb78 is in __btrfs_buffered_write (fs/btrfs/file.c:1564). > > Hmpf, even with forcing btrfs_delalloc_reserve_metadata to randomly > fail, I can't trigger this warning. Something else is going on. > > For now I'm going to send in the fault patch, I'm confident this new > warning is something unrelated. After having balanced my sda1, I can't reproduce this anymore, even after having re-allocated all chunks. As the original bug produced different results (like, no warnings other than btrfs_destroy_inode:csum_bytes), it indeed looks like an unrelated regression of some kind. I re-checked both real loads and dammitdave yesterday: * 4.5.4 works ok * 4.6.0 without your patch warns btrfs_destroy_inode * 4.6.0 with the patch works ok thus, it at least fixes that one. Tested-by: Adam Borowski <kilobyte@angband.pl>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3ed795e..ab7c70e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1597,6 +1597,13 @@ again: copied = btrfs_copy_from_user(pos, write_bytes, pages, i); + num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, + reserve_bytes); + dirty_sectors = round_up(copied + sector_offset, + root->sectorsize); + dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, + dirty_sectors); + /* * if we have trouble faulting in the pages, fall * back to one page at a time @@ -1606,6 +1613,7 @@ again: if (copied == 0) { force_page_uptodate = true; + dirty_sectors = 0; dirty_pages = 0; } else { force_page_uptodate = false; @@ -1616,20 +1624,19 @@ again: /* * If we had a short copy we need to release the excess delaloc * bytes we reserved. We need to increment outstanding_extents - * because btrfs_delalloc_release_space will decrement it, but + * because btrfs_delalloc_release_space and + * btrfs_delalloc_release_metadata will decrement it, but * we still have an outstanding extent for the chunk we actually * managed to copy. */ - num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, - reserve_bytes); - dirty_sectors = round_up(copied + sector_offset, - root->sectorsize); - dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, - dirty_sectors); - if (num_sectors > dirty_sectors) { - release_bytes = (write_bytes - copied) - & ~((u64)root->sectorsize - 1); + /* + * we round down because we don't want to count + * any partial blocks actually send through the + * IO machines + */ + release_bytes = round_down(release_bytes - copied, + root->sectorsize); if (copied > 0) { spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++;
Hi everyone, I've tried to cc most of the recent reports of this warning. It was pretty hard to track down, but I've got a test program for it now. My goal is to change xfs_io to add the madvise loop under --do-horrible-things, instead of adding yet another special doio.c function to xfstests. I've run this through both my test case, and Dave's trinity code. And now for the patch: When btrfs_copy_from_user isn't able to copy all of the pages, we need to adjust our accounting to reflect the work that was actually done. Commit 2e78c927d79 changed around the decisions a little and we ended up skipping the accounting adjustments some of the time. This commit makes sure that when we don't copy anything at all, we still hop into the adjustments, and switches to release_bytes instead of write_bytes, since write_bytes isn't aligned. The accounting errors led to warnings during btrfs_destroy_inode: [ 70.847532] WARNING: CPU: 10 PID: 514 at fs/btrfs/inode.c:9350 btrfs_destroy_inode+0x2b3/0x2c0 [ 70.847536] Modules linked in: i2c_piix4 virtio_net i2c_core input_leds button led_class serio_raw acpi_cpufreq sch_fq_codel autofs4 virtio_blk [ 70.847538] CPU: 10 PID: 514 Comm: umount Tainted: G W 4.6.0-rc6_00062_g2997da1-dirty #23 [ 70.847539] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.0-1.fc24 04/01/2014 [ 70.847542] 0000000000000000 ffff880ff5cafab8 ffffffff8149d5e9 0000000000000202 [ 70.847543] 0000000000000000 0000000000000000 0000000000000000 ffff880ff5cafb08 [ 70.847547] ffffffff8107bdfd ffff880ff5cafaf8 000024868120013d ffff880ff5cafb28 [ 70.847547] Call Trace: [ 70.847550] [<ffffffff8149d5e9>] dump_stack+0x51/0x78 [ 70.847551] [<ffffffff8107bdfd>] __warn+0xfd/0x120 [ 70.847553] [<ffffffff8107be3d>] warn_slowpath_null+0x1d/0x20 [ 70.847555] [<ffffffff8139c9e3>] btrfs_destroy_inode+0x2b3/0x2c0 [ 70.847556] [<ffffffff812003a1>] ? __destroy_inode+0x71/0x140 [ 70.847558] [<ffffffff812004b3>] destroy_inode+0x43/0x70 [ 70.847559] [<ffffffff810b7b5f>] ? wake_up_bit+0x2f/0x40 [ 70.847560] [<ffffffff81200c68>] evict+0x148/0x1d0 [ 70.847562] [<ffffffff81398ade>] ? start_transaction+0x3de/0x460 [ 70.847564] [<ffffffff81200d49>] dispose_list+0x59/0x80 [ 70.847565] [<ffffffff81201ba0>] evict_inodes+0x180/0x190 [ 70.847566] [<ffffffff812191ff>] ? __sync_filesystem+0x3f/0x50 [ 70.847568] [<ffffffff811e95f8>] generic_shutdown_super+0x48/0x100 [ 70.847569] [<ffffffff810b75c0>] ? woken_wake_function+0x20/0x20 [ 70.847571] [<ffffffff811e9796>] kill_anon_super+0x16/0x30 [ 70.847573] [<ffffffff81365cde>] btrfs_kill_super+0x1e/0x130 [ 70.847574] [<ffffffff811e99be>] deactivate_locked_super+0x4e/0x90 [ 70.847576] [<ffffffff811e9e61>] deactivate_super+0x51/0x70 [ 70.847577] [<ffffffff8120536f>] cleanup_mnt+0x3f/0x80 [ 70.847579] [<ffffffff81205402>] __cleanup_mnt+0x12/0x20 [ 70.847581] [<ffffffff81098358>] task_work_run+0x68/0xa0 [ 70.847582] [<ffffffff810022b6>] exit_to_usermode_loop+0xd6/0xe0 [ 70.847583] [<ffffffff81002e1d>] do_syscall_64+0xbd/0x170 [ 70.847586] [<ffffffff817d4dbc>] entry_SYSCALL64_slow_path+0x25/0x25 This is the test program I used to force short returns from btrfs_copy_from_user void *dontneed(void *arg) { char *p = arg; int ret; while(1) { ret = madvise(p, BUFSIZE/4, MADV_DONTNEED); if (ret) { perror("madvise"); exit(1); } } } int main(int ac, char **av) { int ret; int fd; char *filename; unsigned long offset; char *buf; int i; pthread_t tid; if (ac != 2) { fprintf(stderr, "usage: dammitdave filename\n"); exit(1); } buf = mmap(NULL, BUFSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (buf == MAP_FAILED) { perror("mmap"); exit(1); } memset(buf, 'a', BUFSIZE); filename = av[1]; ret = pthread_create(&tid, NULL, dontneed, buf); if (ret) { fprintf(stderr, "error %d from pthread_create\n", ret); exit(1); } ret = pthread_detach(tid); if (ret) { fprintf(stderr, "pthread detach failed %d\n", ret); exit(1); } while (1) { fd = open(filename, O_RDWR | O_CREAT, 0600); if (fd < 0) { perror("open"); exit(1); } for (i = 0; i < ROUNDS; i++) { int this_write = BUFSIZE; offset = rand() % MAXSIZE; ret = pwrite(fd, buf, this_write, offset); if (ret < 0) { perror("pwrite"); exit(1); } else if (ret != this_write) { fprintf(stderr, "short write to %s offset %lu ret %d\n", filename, offset, ret); exit(1); } if (i == ROUNDS - 1) { ret = sync_file_range(fd, offset, 4096, SYNC_FILE_RANGE_WRITE); if (ret < 0) { perror("sync_file_range"); exit(1); } } } ret = ftruncate(fd, 0); if (ret < 0) { perror("ftruncate"); exit(1); } ret = close(fd); if (ret) { perror("close"); exit(1); } ret = unlink(filename); if (ret) { perror("unlink"); exit(1); } } return 0; } Signed-off-by: Chris Mason <clm@fb.com> Reported-by: Dave Jones <dsj@fb.com> Fixes: 2e78c927d79333f299a8ac81c2fd2952caeef335 cc: stable@vger.kernel.org # v4.6 --- fs/btrfs/file.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-)