Message ID | 20170818070516.GA15291@infradead.org (mailing list archive) |
---|---|
State | Deferred, archived |
Headers | show |
On Fri, Aug 18, 2017 at 12:05:16AM -0700, Christoph Hellwig wrote: > On Thu, Aug 17, 2017 at 04:31:29PM -0700, Darrick J. Wong wrote: > > Hi all, > > > > This RFC combines all the random little fixes and improvements to the > > verifiers that we've been talking about for the past month or so into a > > single patch series! > > > > We start by refactoring the long format btree block header verifier into > > a single helper functionn and de-macroing dir block verifiers to make > > them less shouty. Next, we change verifier functions to return the > > approximate instruction pointer of the faulting test so that we can > > report more precise fault information to dmesg/tracepoints. > > Just jumping here quickly because I don't have time for a detailed > review: > > How good does this instruction pointer thing resolved to the actual > issue? Ugh, it's terrible once you turn on the optimizer. if (!xfs_sb_version_hascrc(&mp->m_sb)) return __this_address; if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) return __this_address; if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) return __this_address; return NULL; becomes: if (!xfs_sb_version_hascrc(&mp->m_sb)) goto out; if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) goto out; if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) goto out; if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) goto out; return NULL; out: return __this_address; ...which is totally worthless, unless we want to compile all the verifier functions with __attribute__((optimize("O0"))), which is bogus. <sigh> Back to the drawing board on that one. --D > I'm currently watching a customer issue where a write verifier > triggers, and I gave them a patch to add a debug print to every failing > statement, including printing out the mismatch values if it's not > simply a binary comparism. I though about preparing that patch as > well as others for mainline. Here is the one I have at the moment: > > --- > From 6c5e2efc6f857228461d439feb3c98be58fb9744 Mon Sep 17 00:00:00 2001 > From: Christoph Hellwig <hch@lst.de> > Date: Sat, 5 Aug 2017 16:34:15 +0200 > Subject: xfs: print verbose information on dir leaf verifier failures > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > fs/xfs/libxfs/xfs_dir2_leaf.c | 33 ++++++++++++++++++++++++++------- > 1 file changed, 26 insertions(+), 7 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c > index b887fb2a2bcf..4386c68f72c6 100644 > --- a/fs/xfs/libxfs/xfs_dir2_leaf.c > +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c > @@ -113,27 +113,37 @@ xfs_dir3_leaf_check_int( > * Should factor in the size of the bests table as well. > * We can deduce a value for that from di_size. > */ > - if (hdr->count > ops->leaf_max_ents(geo)) > + if (hdr->count > ops->leaf_max_ents(geo)) { > + xfs_warn(mp, "count (%d) above max (%d)\n", > + hdr->count, ops->leaf_max_ents(geo)); > return false; > + } > > /* Leaves and bests don't overlap in leaf format. */ > if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || > hdr->magic == XFS_DIR3_LEAF1_MAGIC) && > - (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) > + (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) { > + xfs_warn(mp, "ents overlappings bests\n"); > return false; > + } > > /* Check hash value order, count stale entries. */ > for (i = stale = 0; i < hdr->count; i++) { > if (i + 1 < hdr->count) { > if (be32_to_cpu(ents[i].hashval) > > - be32_to_cpu(ents[i + 1].hashval)) > + be32_to_cpu(ents[i + 1].hashval)) { > + xfs_warn(mp, "broken hash order\n"); > return false; > + } > } > if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) > stale++; > } > - if (hdr->stale != stale) > + if (hdr->stale != stale) { > + xfs_warn(mp, "incorrect stalte count (%d, expected %d)\n", > + hdr->stale, stale); > return false; > + } > return true; > } > > @@ -159,12 +169,21 @@ xfs_dir3_leaf_verify( > magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC > : XFS_DIR3_LEAFN_MAGIC; > > - if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) > + if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) { > + xfs_warn(mp, "incorrect magic number (0x%hx, expected 0x%hx)\n", > + leaf3->info.hdr.magic, magic3); > return false; > - if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) > + } > + if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) { > + xfs_warn(mp, "incorrect uuid, (%pUb, expected %pUb)\n", > + &leaf3->info.uuid, &mp->m_sb.sb_meta_uuid); > return false; > - if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) > + } > + if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) { > + xfs_warn(mp, "incorrect blkno, (%lld, expected %lld)\n", > + be64_to_cpu(leaf3->info.blkno), bp->b_bn); > return false; > + } > if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) > return false; > } else { > -- > 2.11.0 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 18, 2017 at 10:06:07AM -0700, Darrick J. Wong wrote: > On Fri, Aug 18, 2017 at 12:05:16AM -0700, Christoph Hellwig wrote: > > On Thu, Aug 17, 2017 at 04:31:29PM -0700, Darrick J. Wong wrote: > > > Hi all, > > > > > > This RFC combines all the random little fixes and improvements to the > > > verifiers that we've been talking about for the past month or so into a > > > single patch series! > > > > > > We start by refactoring the long format btree block header verifier into > > > a single helper functionn and de-macroing dir block verifiers to make > > > them less shouty. Next, we change verifier functions to return the > > > approximate instruction pointer of the faulting test so that we can > > > report more precise fault information to dmesg/tracepoints. > > > > Just jumping here quickly because I don't have time for a detailed > > review: > > > > How good does this instruction pointer thing resolved to the actual > > issue? > > Ugh, it's terrible once you turn on the optimizer. > > if (!xfs_sb_version_hascrc(&mp->m_sb)) > return __this_address; > if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) > return __this_address; > if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) > return __this_address; > if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) > return __this_address; > return NULL; > > becomes: > > if (!xfs_sb_version_hascrc(&mp->m_sb)) > goto out; > if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) > goto out; > if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) > goto out; > if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) > goto out; > return NULL; > out: > return __this_address; > > ...which is totally worthless, unless we want to compile all the verifier > functions with __attribute__((optimize("O0"))), which is bogus. > > <sigh> Back to the drawing board on that one. Ok, there's /slightly/ less awful way to prevent gcc from optimizing the verifier function to the point of imprecise pointer value, but it involves writing to a volatile int: /* stupidly prevent gcc from over-optimizing getting the instruction ptr */ extern volatile int xfs_lineno; #define __this_address ({ __label__ __here; __here: xfs_lineno = __LINE__; &&__here; }) <grumble> Yucky, but it more or less works. --D > > > I'm currently watching a customer issue where a write verifier > > triggers, and I gave them a patch to add a debug print to every failing > > statement, including printing out the mismatch values if it's not > > simply a binary comparism. I though about preparing that patch as > > well as others for mainline. Here is the one I have at the moment: > > > > --- > > From 6c5e2efc6f857228461d439feb3c98be58fb9744 Mon Sep 17 00:00:00 2001 > > From: Christoph Hellwig <hch@lst.de> > > Date: Sat, 5 Aug 2017 16:34:15 +0200 > > Subject: xfs: print verbose information on dir leaf verifier failures > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > --- > > fs/xfs/libxfs/xfs_dir2_leaf.c | 33 ++++++++++++++++++++++++++------- > > 1 file changed, 26 insertions(+), 7 deletions(-) > > > > diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c > > index b887fb2a2bcf..4386c68f72c6 100644 > > --- a/fs/xfs/libxfs/xfs_dir2_leaf.c > > +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c > > @@ -113,27 +113,37 @@ xfs_dir3_leaf_check_int( > > * Should factor in the size of the bests table as well. > > * We can deduce a value for that from di_size. > > */ > > - if (hdr->count > ops->leaf_max_ents(geo)) > > + if (hdr->count > ops->leaf_max_ents(geo)) { > > + xfs_warn(mp, "count (%d) above max (%d)\n", > > + hdr->count, ops->leaf_max_ents(geo)); > > return false; > > + } > > > > /* Leaves and bests don't overlap in leaf format. */ > > if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || > > hdr->magic == XFS_DIR3_LEAF1_MAGIC) && > > - (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) > > + (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) { > > + xfs_warn(mp, "ents overlappings bests\n"); > > return false; > > + } > > > > /* Check hash value order, count stale entries. */ > > for (i = stale = 0; i < hdr->count; i++) { > > if (i + 1 < hdr->count) { > > if (be32_to_cpu(ents[i].hashval) > > > - be32_to_cpu(ents[i + 1].hashval)) > > + be32_to_cpu(ents[i + 1].hashval)) { > > + xfs_warn(mp, "broken hash order\n"); > > return false; > > + } > > } > > if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) > > stale++; > > } > > - if (hdr->stale != stale) > > + if (hdr->stale != stale) { > > + xfs_warn(mp, "incorrect stalte count (%d, expected %d)\n", > > + hdr->stale, stale); > > return false; > > + } > > return true; > > } > > > > @@ -159,12 +169,21 @@ xfs_dir3_leaf_verify( > > magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC > > : XFS_DIR3_LEAFN_MAGIC; > > > > - if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) > > + if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) { > > + xfs_warn(mp, "incorrect magic number (0x%hx, expected 0x%hx)\n", > > + leaf3->info.hdr.magic, magic3); > > return false; > > - if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) > > + } > > + if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) { > > + xfs_warn(mp, "incorrect uuid, (%pUb, expected %pUb)\n", > > + &leaf3->info.uuid, &mp->m_sb.sb_meta_uuid); > > return false; > > - if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) > > + } > > + if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) { > > + xfs_warn(mp, "incorrect blkno, (%lld, expected %lld)\n", > > + be64_to_cpu(leaf3->info.blkno), bp->b_bn); > > return false; > > + } > > if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) > > return false; > > } else { > > -- > > 2.11.0 > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 18, 2017 at 11:45:11AM -0700, Darrick J. Wong wrote: > On Fri, Aug 18, 2017 at 10:06:07AM -0700, Darrick J. Wong wrote: > > On Fri, Aug 18, 2017 at 12:05:16AM -0700, Christoph Hellwig wrote: > > > On Thu, Aug 17, 2017 at 04:31:29PM -0700, Darrick J. Wong wrote: > > > > Hi all, > > > > > > > > This RFC combines all the random little fixes and improvements to the > > > > verifiers that we've been talking about for the past month or so into a > > > > single patch series! > > > > > > > > We start by refactoring the long format btree block header verifier into > > > > a single helper functionn and de-macroing dir block verifiers to make > > > > them less shouty. Next, we change verifier functions to return the > > > > approximate instruction pointer of the faulting test so that we can > > > > report more precise fault information to dmesg/tracepoints. > > > > > > Just jumping here quickly because I don't have time for a detailed > > > review: > > > > > > How good does this instruction pointer thing resolved to the actual > > > issue? > > > > Ugh, it's terrible once you turn on the optimizer. > > > > if (!xfs_sb_version_hascrc(&mp->m_sb)) > > return __this_address; > > if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) > > return __this_address; > > if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) > > return __this_address; > > if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) > > return __this_address; > > return NULL; > > > > becomes: > > > > if (!xfs_sb_version_hascrc(&mp->m_sb)) > > goto out; > > if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) > > goto out; > > if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) > > goto out; > > if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) > > goto out; > > return NULL; > > out: > > return __this_address; > > > > ...which is totally worthless, unless we want to compile all the verifier > > functions with __attribute__((optimize("O0"))), which is bogus. > > > > <sigh> Back to the drawing board on that one. > > Ok, there's /slightly/ less awful way to prevent gcc from optimizing the > verifier function to the point of imprecise pointer value, but it involves > writing to a volatile int: > > /* stupidly prevent gcc from over-optimizing getting the instruction ptr */ > extern volatile int xfs_lineno; > #define __this_address ({ __label__ __here; __here: xfs_lineno = __LINE__; &&__here; }) > > <grumble> Yucky, but it more or less works. Demonstration on a filesystem with a corrupt refcountbt root: # dmesg & # mount /dev/sdf /opt XFS (sdf): EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk! XFS (sdf): EXPERIMENTAL reflink feature enabled. Use at your own risk! XFS (sdf): Mounting V5 Filesystem XFS (sdf): Starting recovery (logdev: internal) XFS (sdf): Ending recovery (logdev: internal) XFS (sdf): Metadata corruption detected at xfs_btree_sblock_v5hdr_verify+0x7e/0xc0 [xfs], xfs_refcountbt block 0x230 XFS (sdf): Unmount and run xfs_repair <snip> mount: mount /dev/sdf on /opt failed: Structure needs cleaning # gdb /usr/lib/debug/lib/modules/4.13.0-rc5-xfsx/vmlinux /proc/kcore <snip> (gdb) l *(xfs_btree_sblock_v5hdr_verify+0x7e) 0xffffffffa021cc4e is in xfs_btree_sblock_v5hdr_verify (fs/xfs/libxfs/xfs_btree.c:4656). 4651 fs/xfs/libxfs/xfs_btree.c: No such file or directory. (gdb) quit # gdb --args xfs_db /dev/sdf <snip> (gdb) run <snip> xfs_db> agf 0 xfs_db> addr refcntroot Metadata corruption detected at 0x449d68, xfs_refcountbt block 0x230/0x1000 xfs_db> ^Z Program received signal SIGTSTP, Stopped (user). 0x00007f3e83045500 in __read_nocancel () at ../sysdeps/unix/syscall-template.S:84 84 ../sysdeps/unix/syscall-template.S: No such file or directory. (gdb) l *(0x449d68) 0x449d68 is in xfs_btree_sblock_v5hdr_verify (xfs_btree.c:4656). 4651 xfs_btree.c: No such file or directory. xfs_btree.c: 4645:void * 4646:xfs_btree_sblock_v5hdr_verify( 4647: struct xfs_buf *bp) 4648:{ 4649: struct xfs_mount *mp = bp->b_target->bt_mount; 4650: struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4651: struct xfs_perag *pag = bp->b_pag; 4652: 4653: if (!xfs_sb_version_hascrc(&mp->m_sb)) 4654: return __this_address; 4655: if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4656: return __this_address; 4657: if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) 4658: return __this_address; 4659: if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4660: return __this_address; 4661: return NULL; 4662: } So assuming that the volatile int stuff isn't too horrifyingly gross, it actually /does/ allow us to pinpoint exactly which test tripped the verifier. --D > > --D > > > > > > I'm currently watching a customer issue where a write verifier > > > triggers, and I gave them a patch to add a debug print to every failing > > > statement, including printing out the mismatch values if it's not > > > simply a binary comparism. I though about preparing that patch as > > > well as others for mainline. Here is the one I have at the moment: > > > > > > --- > > > From 6c5e2efc6f857228461d439feb3c98be58fb9744 Mon Sep 17 00:00:00 2001 > > > From: Christoph Hellwig <hch@lst.de> > > > Date: Sat, 5 Aug 2017 16:34:15 +0200 > > > Subject: xfs: print verbose information on dir leaf verifier failures > > > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > > --- > > > fs/xfs/libxfs/xfs_dir2_leaf.c | 33 ++++++++++++++++++++++++++------- > > > 1 file changed, 26 insertions(+), 7 deletions(-) > > > > > > diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c > > > index b887fb2a2bcf..4386c68f72c6 100644 > > > --- a/fs/xfs/libxfs/xfs_dir2_leaf.c > > > +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c > > > @@ -113,27 +113,37 @@ xfs_dir3_leaf_check_int( > > > * Should factor in the size of the bests table as well. > > > * We can deduce a value for that from di_size. > > > */ > > > - if (hdr->count > ops->leaf_max_ents(geo)) > > > + if (hdr->count > ops->leaf_max_ents(geo)) { > > > + xfs_warn(mp, "count (%d) above max (%d)\n", > > > + hdr->count, ops->leaf_max_ents(geo)); > > > return false; > > > + } > > > > > > /* Leaves and bests don't overlap in leaf format. */ > > > if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || > > > hdr->magic == XFS_DIR3_LEAF1_MAGIC) && > > > - (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) > > > + (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) { > > > + xfs_warn(mp, "ents overlappings bests\n"); > > > return false; > > > + } > > > > > > /* Check hash value order, count stale entries. */ > > > for (i = stale = 0; i < hdr->count; i++) { > > > if (i + 1 < hdr->count) { > > > if (be32_to_cpu(ents[i].hashval) > > > > - be32_to_cpu(ents[i + 1].hashval)) > > > + be32_to_cpu(ents[i + 1].hashval)) { > > > + xfs_warn(mp, "broken hash order\n"); > > > return false; > > > + } > > > } > > > if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) > > > stale++; > > > } > > > - if (hdr->stale != stale) > > > + if (hdr->stale != stale) { > > > + xfs_warn(mp, "incorrect stalte count (%d, expected %d)\n", > > > + hdr->stale, stale); > > > return false; > > > + } > > > return true; > > > } > > > > > > @@ -159,12 +169,21 @@ xfs_dir3_leaf_verify( > > > magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC > > > : XFS_DIR3_LEAFN_MAGIC; > > > > > > - if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) > > > + if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) { > > > + xfs_warn(mp, "incorrect magic number (0x%hx, expected 0x%hx)\n", > > > + leaf3->info.hdr.magic, magic3); > > > return false; > > > - if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) > > > + } > > > + if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) { > > > + xfs_warn(mp, "incorrect uuid, (%pUb, expected %pUb)\n", > > > + &leaf3->info.uuid, &mp->m_sb.sb_meta_uuid); > > > return false; > > > - if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) > > > + } > > > + if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) { > > > + xfs_warn(mp, "incorrect blkno, (%lld, expected %lld)\n", > > > + be64_to_cpu(leaf3->info.blkno), bp->b_bn); > > > return false; > > > + } > > > if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) > > > return false; > > > } else { > > > -- > > > 2.11.0 > > > > > > -- > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > > the body of a message to majordomo@vger.kernel.org > > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 18, 2017 at 11:45:11AM -0700, Darrick J. Wong wrote: > On Fri, Aug 18, 2017 at 10:06:07AM -0700, Darrick J. Wong wrote: > > On Fri, Aug 18, 2017 at 12:05:16AM -0700, Christoph Hellwig wrote: > > > On Thu, Aug 17, 2017 at 04:31:29PM -0700, Darrick J. Wong wrote: > > > > Hi all, > > > > > > > > This RFC combines all the random little fixes and improvements to the > > > > verifiers that we've been talking about for the past month or so into a > > > > single patch series! > > > > > > > > We start by refactoring the long format btree block header verifier into > > > > a single helper functionn and de-macroing dir block verifiers to make > > > > them less shouty. Next, we change verifier functions to return the > > > > approximate instruction pointer of the faulting test so that we can > > > > report more precise fault information to dmesg/tracepoints. > > > > > > Just jumping here quickly because I don't have time for a detailed > > > review: > > > > > > How good does this instruction pointer thing resolved to the actual > > > issue? > > > > Ugh, it's terrible once you turn on the optimizer. > > > > if (!xfs_sb_version_hascrc(&mp->m_sb)) > > return __this_address; > > if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) > > return __this_address; > > if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) > > return __this_address; > > if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) > > return __this_address; > > return NULL; > > > > becomes: > > > > if (!xfs_sb_version_hascrc(&mp->m_sb)) > > goto out; > > if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) > > goto out; > > if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) > > goto out; > > if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) > > goto out; > > return NULL; > > out: > > return __this_address; > > > > ...which is totally worthless, unless we want to compile all the verifier > > functions with __attribute__((optimize("O0"))), which is bogus. > > > > <sigh> Back to the drawing board on that one. > > Ok, there's /slightly/ less awful way to prevent gcc from optimizing the > verifier function to the point of imprecise pointer value, but it involves > writing to a volatile int: > > /* stupidly prevent gcc from over-optimizing getting the instruction ptr */ > extern volatile int xfs_lineno; > #define __this_address ({ __label__ __here; __here: xfs_lineno = __LINE__; &&__here; }) > > <grumble> Yucky, but it more or less works. Can you declare the label as volatile, like you can an asm statement to prevent the compiler from optimising out asm statements? Even so, given the yuckiness is very isolated and should only affect the slow path code, I can live with this. Cheers, Dave.
On Sat, Aug 19, 2017 at 10:33:00AM +1000, Dave Chinner wrote: > On Fri, Aug 18, 2017 at 11:45:11AM -0700, Darrick J. Wong wrote: > > On Fri, Aug 18, 2017 at 10:06:07AM -0700, Darrick J. Wong wrote: > > > On Fri, Aug 18, 2017 at 12:05:16AM -0700, Christoph Hellwig wrote: > > > > On Thu, Aug 17, 2017 at 04:31:29PM -0700, Darrick J. Wong wrote: > > > > > Hi all, > > > > > > > > > > This RFC combines all the random little fixes and improvements to the > > > > > verifiers that we've been talking about for the past month or so into a > > > > > single patch series! > > > > > > > > > > We start by refactoring the long format btree block header verifier into > > > > > a single helper functionn and de-macroing dir block verifiers to make > > > > > them less shouty. Next, we change verifier functions to return the > > > > > approximate instruction pointer of the faulting test so that we can > > > > > report more precise fault information to dmesg/tracepoints. > > > > > > > > Just jumping here quickly because I don't have time for a detailed > > > > review: > > > > > > > > How good does this instruction pointer thing resolved to the actual > > > > issue? > > > > > > Ugh, it's terrible once you turn on the optimizer. > > > > > > if (!xfs_sb_version_hascrc(&mp->m_sb)) > > > return __this_address; > > > if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) > > > return __this_address; > > > if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) > > > return __this_address; > > > if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) > > > return __this_address; > > > return NULL; > > > > > > becomes: > > > > > > if (!xfs_sb_version_hascrc(&mp->m_sb)) > > > goto out; > > > if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) > > > goto out; > > > if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) > > > goto out; > > > if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) > > > goto out; > > > return NULL; > > > out: > > > return __this_address; > > > > > > ...which is totally worthless, unless we want to compile all the verifier > > > functions with __attribute__((optimize("O0"))), which is bogus. > > > > > > <sigh> Back to the drawing board on that one. > > > > Ok, there's /slightly/ less awful way to prevent gcc from optimizing the > > verifier function to the point of imprecise pointer value, but it involves > > writing to a volatile int: > > > > /* stupidly prevent gcc from over-optimizing getting the instruction ptr */ > > extern volatile int xfs_lineno; > > #define __this_address ({ __label__ __here; __here: xfs_lineno = __LINE__; &&__here; }) > > > > <grumble> Yucky, but it more or less works. > > Can you declare the label as volatile, like you can an asm > statement to prevent the compiler from optimising out asm > statements? > > Even so, given the yuckiness is very isolated and should only affect > the slow path code, I can live with this. Hmmm. I can't declare the label as volatile, but I /can/ inject asm volatile("") and that seems to prevent gcc from moving code hunks around: #define __this_address ({ __label__ __here; __here: asm volatile(""); &&__here; }) --D > > Cheers, > > Dave. > -- > Dave Chinner > david@fromorbit.com > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 18, 2017 at 05:58:16PM -0700, Darrick J. Wong wrote: > On Sat, Aug 19, 2017 at 10:33:00AM +1000, Dave Chinner wrote: > > On Fri, Aug 18, 2017 at 11:45:11AM -0700, Darrick J. Wong wrote: > > > On Fri, Aug 18, 2017 at 10:06:07AM -0700, Darrick J. Wong wrote: > > > > ...which is totally worthless, unless we want to compile all the verifier > > > > functions with __attribute__((optimize("O0"))), which is bogus. > > > > > > > > <sigh> Back to the drawing board on that one. > > > > > > Ok, there's /slightly/ less awful way to prevent gcc from optimizing the > > > verifier function to the point of imprecise pointer value, but it involves > > > writing to a volatile int: > > > > > > /* stupidly prevent gcc from over-optimizing getting the instruction ptr */ > > > extern volatile int xfs_lineno; > > > #define __this_address ({ __label__ __here; __here: xfs_lineno = __LINE__; &&__here; }) > > > > > > <grumble> Yucky, but it more or less works. > > > > Can you declare the label as volatile, like you can an asm > > statement to prevent the compiler from optimising out asm > > statements? > > > > Even so, given the yuckiness is very isolated and should only affect > > the slow path code, I can live with this. > > Hmmm. I can't declare the label as volatile, but I /can/ inject > asm volatile("") and that seems to prevent gcc from moving code hunks > around: > > #define __this_address ({ __label__ __here; __here: asm volatile(""); &&__here; }) That seems cleaner to me, and I /think/ the gcc manual says it won't remove such statements, but it also says: Under certain circumstances, GCC may duplicate (or remove duplicates of) your assembly code when optimizing. So I have no real idea whether this is going to be robust or not. I'm not a gcc/asm expert at all (that stuff is mostly black magic to me). Cheers, Dave.
On Sat, Aug 19, 2017 at 11:12:46AM +1000, Dave Chinner wrote: > On Fri, Aug 18, 2017 at 05:58:16PM -0700, Darrick J. Wong wrote: > > On Sat, Aug 19, 2017 at 10:33:00AM +1000, Dave Chinner wrote: > > > On Fri, Aug 18, 2017 at 11:45:11AM -0700, Darrick J. Wong wrote: > > > > On Fri, Aug 18, 2017 at 10:06:07AM -0700, Darrick J. Wong wrote: > > > > > ...which is totally worthless, unless we want to compile all the verifier > > > > > functions with __attribute__((optimize("O0"))), which is bogus. > > > > > > > > > > <sigh> Back to the drawing board on that one. > > > > > > > > Ok, there's /slightly/ less awful way to prevent gcc from optimizing the > > > > verifier function to the point of imprecise pointer value, but it involves > > > > writing to a volatile int: > > > > > > > > /* stupidly prevent gcc from over-optimizing getting the instruction ptr */ > > > > extern volatile int xfs_lineno; > > > > #define __this_address ({ __label__ __here; __here: xfs_lineno = __LINE__; &&__here; }) > > > > > > > > <grumble> Yucky, but it more or less works. > > > > > > Can you declare the label as volatile, like you can an asm > > > statement to prevent the compiler from optimising out asm > > > statements? > > > > > > Even so, given the yuckiness is very isolated and should only affect > > > the slow path code, I can live with this. > > > > Hmmm. I can't declare the label as volatile, but I /can/ inject > > asm volatile("") and that seems to prevent gcc from moving code hunks > > around: > > > > #define __this_address ({ __label__ __here; __here: asm volatile(""); &&__here; }) > > That seems cleaner to me, and I /think/ the gcc manual says it won't > remove such statements, but it also says: > > Under certain circumstances, GCC may duplicate (or remove duplicates > of) your assembly code when optimizing. > > So I have no real idea whether this is going to be robust or not. > I'm not a gcc/asm expert at all (that stuff is mostly black magic > to me). Same here. I figure if we start getting complaints about totally wacko function pointers in the dmesg/xfsrepair output, we can put the set-a-volatile-int cobwebs back in. --D > > Cheers, > > Dave. > -- > Dave Chinner > david@fromorbit.com > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 18, 2017 at 06:17:21PM -0700, Darrick J. Wong wrote: > On Sat, Aug 19, 2017 at 11:12:46AM +1000, Dave Chinner wrote: > > On Fri, Aug 18, 2017 at 05:58:16PM -0700, Darrick J. Wong wrote: > > > On Sat, Aug 19, 2017 at 10:33:00AM +1000, Dave Chinner wrote: > > > > On Fri, Aug 18, 2017 at 11:45:11AM -0700, Darrick J. Wong wrote: > > > > > On Fri, Aug 18, 2017 at 10:06:07AM -0700, Darrick J. Wong wrote: > > > > > > ...which is totally worthless, unless we want to compile all the verifier > > > > > > functions with __attribute__((optimize("O0"))), which is bogus. > > > > > > > > > > > > <sigh> Back to the drawing board on that one. > > > > > > > > > > Ok, there's /slightly/ less awful way to prevent gcc from optimizing the > > > > > verifier function to the point of imprecise pointer value, but it involves > > > > > writing to a volatile int: > > > > > > > > > > /* stupidly prevent gcc from over-optimizing getting the instruction ptr */ > > > > > extern volatile int xfs_lineno; > > > > > #define __this_address ({ __label__ __here; __here: xfs_lineno = __LINE__; &&__here; }) > > > > > > > > > > <grumble> Yucky, but it more or less works. > > > > > > > > Can you declare the label as volatile, like you can an asm > > > > statement to prevent the compiler from optimising out asm > > > > statements? > > > > > > > > Even so, given the yuckiness is very isolated and should only affect > > > > the slow path code, I can live with this. > > > > > > Hmmm. I can't declare the label as volatile, but I /can/ inject > > > asm volatile("") and that seems to prevent gcc from moving code hunks > > > around: > > > > > > #define __this_address ({ __label__ __here; __here: asm volatile(""); &&__here; }) > > > > That seems cleaner to me, and I /think/ the gcc manual says it won't > > remove such statements, but it also says: > > > > Under certain circumstances, GCC may duplicate (or remove duplicates > > of) your assembly code when optimizing. > > > > So I have no real idea whether this is going to be robust or not. > > I'm not a gcc/asm expert at all (that stuff is mostly black magic > > to me). > > Same here. I figure if we start getting complaints about totally wacko > function pointers in the dmesg/xfsrepair output, we can put the > set-a-volatile-int cobwebs back in. Doh, it's taking me longer to remember things I forgot 10+ years ago these days... From include/linux/gcc-compiler.h: /* Optimization barrier */ /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") So I think we can just dump a barrier() call in the macro and it should work without us having to care about it. Still need a comment to explain why the barrier is there, though... Cheers, Dave.
So what do you think of the version that adds real printks for each condition including more details like the one verifier I did below? Probably needs some unlikely annotations, though. -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Aug 21, 2017 at 01:13:33AM -0700, Christoph Hellwig wrote: > So what do you think of the version that adds real printks for > each condition including more details like the one verifier I > did below? Probably needs some unlikely annotations, though. Given that there was another resend of the series I'd be really curious about the answer to this? -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Aug 29, 2017 at 08:11:59AM -0700, Christoph Hellwig wrote: > On Mon, Aug 21, 2017 at 01:13:33AM -0700, Christoph Hellwig wrote: > > So what do you think of the version that adds real printks for > > each condition including more details like the one verifier I > > did below? Probably needs some unlikely annotations, though. > > Given that there was another resend of the series I'd be really > curious about the answer to this? Oh! Sorry, I lost the thread and forgot to reply. :( For debug kernels, I think it's definitely valuable to us to enhance the corruption reports by printing out the expected value(s) and the observed value so that we can pinpoint quickly exactly which test failed and why. I'm not as strongly convinced that it's worth it to put all those strings into release builds and have to carry those around in memory. Then again, on my rc7 dev build the strings only contribute about 120K to a 1.4MB .ko file so it might not be a big deal. I also think it would be useful for xfs_verifier_error to print more of the corrupted buffer. --D > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Aug 29, 2017 at 08:11:59AM -0700, Christoph Hellwig wrote: > On Mon, Aug 21, 2017 at 01:13:33AM -0700, Christoph Hellwig wrote: > > So what do you think of the version that adds real printks for > > each condition including more details like the one verifier I > > did below? Probably needs some unlikely annotations, though. > > Given that there was another resend of the series I'd be really > curious about the answer to this? If we increase the size of the hexdump on error, then most of the specific numbers in the print statements can be pulled from the hexdump. And if the verifier tells us exactly what check failed, we don't have to decode the entire hexdump to know what field was out of band. Perhaps what we should think about here is adding a mode to xfs_db to decode the hexdump into structured output so we don't have to manually decode the hex dumps ever again.... Cheers, Dave.
On Wed, Aug 30, 2017 at 08:22:47AM +1000, Dave Chinner wrote: > On Tue, Aug 29, 2017 at 08:11:59AM -0700, Christoph Hellwig wrote: > > On Mon, Aug 21, 2017 at 01:13:33AM -0700, Christoph Hellwig wrote: > > > So what do you think of the version that adds real printks for > > > each condition including more details like the one verifier I > > > did below? Probably needs some unlikely annotations, though. > > > > Given that there was another resend of the series I'd be really > > curious about the answer to this? > > If we increase the size of the hexdump on error, then most of the > specific numbers in the print statements can be pulled from the > hexdump. And if the verifier tells us exactly what check failed, > we don't have to decode the entire hexdump to know what field was > out of band. How much do we increase the size of the hexdump? 64 -> 128? Or whatever the structure header size is? How about if xfs_error_level >= XFS_ERRORLEVEL_HIGH then we dump the entire buffer? > Perhaps what we should think about here is adding a mode to xfs_db > to decode the hexdump into structured output so we don't have to > manually decode the hex dumps ever again.... Seems useful, yes. --D > > Cheers, > > Dave. > -- > Dave Chinner > david@fromorbit.com > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Aug 30, 2017 at 05:10:09PM -0700, Darrick J. Wong wrote: > On Wed, Aug 30, 2017 at 08:22:47AM +1000, Dave Chinner wrote: > > On Tue, Aug 29, 2017 at 08:11:59AM -0700, Christoph Hellwig wrote: > > > On Mon, Aug 21, 2017 at 01:13:33AM -0700, Christoph Hellwig wrote: > > > > So what do you think of the version that adds real printks for > > > > each condition including more details like the one verifier I > > > > did below? Probably needs some unlikely annotations, though. > > > > > > Given that there was another resend of the series I'd be really > > > curious about the answer to this? > > > > If we increase the size of the hexdump on error, then most of the > > specific numbers in the print statements can be pulled from the > > hexdump. And if the verifier tells us exactly what check failed, > > we don't have to decode the entire hexdump to know what field was > > out of band. > > How much do we increase the size of the hexdump? 64 -> 128? Or > whatever the structure header size is? I choose 64 because it captured the primary header for most structures for CRC enabled filesystems, so it would have owner/crc/uuid/etc in it. I wasn't really trying to capture the object specific metadata in it, but increasing to 128 bytes would capture most of that block headers, too. Won't really help with inodes, though, as the core is 176 bytes and the owner/crc stuff is at the end.... > How about if xfs_error_level >= > XFS_ERRORLEVEL_HIGH then we dump the entire buffer? Excellent idea. We can easily capture the entire output for corruptions the users can easily trip over. Maybe put in the short dump a line "turn error level up to 11 to get a full dump of the corruption"? Cheers, Dave.
On 8/30/17 9:43 PM, Dave Chinner wrote: > On Wed, Aug 30, 2017 at 05:10:09PM -0700, Darrick J. Wong wrote: >> On Wed, Aug 30, 2017 at 08:22:47AM +1000, Dave Chinner wrote: >>> On Tue, Aug 29, 2017 at 08:11:59AM -0700, Christoph Hellwig wrote: >>>> On Mon, Aug 21, 2017 at 01:13:33AM -0700, Christoph Hellwig wrote: >>>>> So what do you think of the version that adds real printks for >>>>> each condition including more details like the one verifier I >>>>> did below? Probably needs some unlikely annotations, though. >>>> >>>> Given that there was another resend of the series I'd be really >>>> curious about the answer to this? >>> >>> If we increase the size of the hexdump on error, then most of the >>> specific numbers in the print statements can be pulled from the >>> hexdump. And if the verifier tells us exactly what check failed, >>> we don't have to decode the entire hexdump to know what field was >>> out of band. >> >> How much do we increase the size of the hexdump? 64 -> 128? Or >> whatever the structure header size is? > > I choose 64 because it captured the primary header for most > structures for CRC enabled filesystems, so it would have > owner/crc/uuid/etc in it. I wasn't really trying to capture the > object specific metadata in it, but increasing to 128 bytes would > capture most of that block headers, too. Won't really help with > inodes, though, as the core is 176 bytes and the owner/crc stuff is > at the end.... > >> How about if xfs_error_level >= >> XFS_ERRORLEVEL_HIGH then we dump the entire buffer? > > Excellent idea. We can easily capture the entire output for > corruptions the users can easily trip over. Maybe put in the short > dump a line "turn error level up to 11 to get a full dump of the > corruption"? Yep, the thing about "more info only if you tune it" is that nobody will know to tune it. Unless you printk that info... Of course nobody will know what "turn error up ..." means, either. Hm, at one point I had a patch to add object size to the xfs_buf_ops struct and print that many bytes, but can't find it now :/ (not that it was very complicated...) Anyway, point is making it vary with the size of the object wouldn't be too hard. -Eric -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Aug 30, 2017 at 10:05:25PM -0500, Eric Sandeen wrote: > On 8/30/17 9:43 PM, Dave Chinner wrote: > > On Wed, Aug 30, 2017 at 05:10:09PM -0700, Darrick J. Wong wrote: > >> On Wed, Aug 30, 2017 at 08:22:47AM +1000, Dave Chinner wrote: > >>> On Tue, Aug 29, 2017 at 08:11:59AM -0700, Christoph Hellwig wrote: > >>>> On Mon, Aug 21, 2017 at 01:13:33AM -0700, Christoph Hellwig wrote: > >>>>> So what do you think of the version that adds real printks for > >>>>> each condition including more details like the one verifier I > >>>>> did below? Probably needs some unlikely annotations, though. > >>>> > >>>> Given that there was another resend of the series I'd be really > >>>> curious about the answer to this? > >>> > >>> If we increase the size of the hexdump on error, then most of the > >>> specific numbers in the print statements can be pulled from the > >>> hexdump. And if the verifier tells us exactly what check failed, > >>> we don't have to decode the entire hexdump to know what field was > >>> out of band. > >> > >> How much do we increase the size of the hexdump? 64 -> 128? Or > >> whatever the structure header size is? > > > > I choose 64 because it captured the primary header for most > > structures for CRC enabled filesystems, so it would have > > owner/crc/uuid/etc in it. I wasn't really trying to capture the > > object specific metadata in it, but increasing to 128 bytes would > > capture most of that block headers, too. Won't really help with > > inodes, though, as the core is 176 bytes and the owner/crc stuff is > > at the end.... > > > >> How about if xfs_error_level >= > >> XFS_ERRORLEVEL_HIGH then we dump the entire buffer? > > > > Excellent idea. We can easily capture the entire output for > > corruptions the users can easily trip over. Maybe put in the short > > dump a line "turn error level up to 11 to get a full dump of the > > corruption"? > > Yep, the thing about "more info only if you tune it" is that nobody > will know to tune it. Unless you printk that info... > > Of course nobody will know what "turn error up ..." means, either. Sure, I was just paraphrasing how an error message might look. A few quick coats of paint on the bikeshed will result in something like: "If this is a recurring error, please set /proc/sys/fs/xfs/error_level to ...." > Hm, at one point I had a patch to add object size to the > xfs_buf_ops struct and print that many bytes, but can't find it now :/ > (not that it was very complicated...) > > Anyway, point is making it vary with the size of the object wouldn't > be too hard. Probably not, but it is complicated by the fact we have a couple of different ways of dumping corruption errors. e.g. inode verifier warnings are dumped through XFS_CORRUPTION_ERROR() rather than xfs_verifier_error() as they are not buffer based verifiers. Other things like log record CRC failures are hard coded to dump 32 bytes, as is xlog_print_trans() on transaction overruns.... That's not a show stopper, but it would be nice to have consistent behaviour across all the mechanisms we use to dump object data that failed verification... Cheers, Dave.
On Thu, Aug 31, 2017 at 01:27:36PM +1000, Dave Chinner wrote: > On Wed, Aug 30, 2017 at 10:05:25PM -0500, Eric Sandeen wrote: > > On 8/30/17 9:43 PM, Dave Chinner wrote: > > > On Wed, Aug 30, 2017 at 05:10:09PM -0700, Darrick J. Wong wrote: > > >> On Wed, Aug 30, 2017 at 08:22:47AM +1000, Dave Chinner wrote: > > >>> On Tue, Aug 29, 2017 at 08:11:59AM -0700, Christoph Hellwig wrote: > > >>>> On Mon, Aug 21, 2017 at 01:13:33AM -0700, Christoph Hellwig wrote: > > >>>>> So what do you think of the version that adds real printks for > > >>>>> each condition including more details like the one verifier I > > >>>>> did below? Probably needs some unlikely annotations, though. > > >>>> > > >>>> Given that there was another resend of the series I'd be really > > >>>> curious about the answer to this? > > >>> > > >>> If we increase the size of the hexdump on error, then most of the > > >>> specific numbers in the print statements can be pulled from the > > >>> hexdump. And if the verifier tells us exactly what check failed, > > >>> we don't have to decode the entire hexdump to know what field was > > >>> out of band. > > >> > > >> How much do we increase the size of the hexdump? 64 -> 128? Or > > >> whatever the structure header size is? > > > > > > I choose 64 because it captured the primary header for most > > > structures for CRC enabled filesystems, so it would have > > > owner/crc/uuid/etc in it. I wasn't really trying to capture the > > > object specific metadata in it, but increasing to 128 bytes would > > > capture most of that block headers, too. Won't really help with > > > inodes, though, as the core is 176 bytes and the owner/crc stuff is > > > at the end.... > > > > > >> How about if xfs_error_level >= > > >> XFS_ERRORLEVEL_HIGH then we dump the entire buffer? > > > > > > Excellent idea. We can easily capture the entire output for > > > corruptions the users can easily trip over. Maybe put in the short > > > dump a line "turn error level up to 11 to get a full dump of the > > > corruption"? > > > > Yep, the thing about "more info only if you tune it" is that nobody > > will know to tune it. Unless you printk that info... > > > > Of course nobody will know what "turn error up ..." means, either. > > Sure, I was just paraphrasing how an error message might look. A > few quick coats of paint on the bikeshed will result in something > like: > > "If this is a recurring error, please set > /proc/sys/fs/xfs/error_level to ...." > > > Hm, at one point I had a patch to add object size to the > > xfs_buf_ops struct and print that many bytes, but can't find it now :/ > > (not that it was very complicated...) > > > > Anyway, point is making it vary with the size of the object wouldn't > > be too hard. > > Probably not, but it is complicated by the fact we have a couple of > different ways of dumping corruption errors. e.g. inode verifier > warnings are dumped through XFS_CORRUPTION_ERROR() rather than > xfs_verifier_error() as they are not buffer based verifiers. Other > things like log record CRC failures are hard coded to dump 32 bytes, > as is xlog_print_trans() on transaction overruns.... > > That's not a show stopper, but it would be nice to have consistent > behaviour across all the mechanisms we use to dump object data that > failed verification... /me wonders if it'd suffice just to add an xfs_params value in /proc, set its default to 128 bytes, and make the corruption reporters query the xfs_param. Then we could tell users to set it to some magic value (-1? 0?) to get the entire buffer. I just had another thought -- what if we always dump the whole buffer if the corruption would result in fs shutdown? --D > Cheers, > > Dave. > -- > Dave Chinner > david@fromorbit.com > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Aug 30, 2017 at 10:44:43PM -0700, Darrick J. Wong wrote: > On Thu, Aug 31, 2017 at 01:27:36PM +1000, Dave Chinner wrote: > > On Wed, Aug 30, 2017 at 10:05:25PM -0500, Eric Sandeen wrote: > > > On 8/30/17 9:43 PM, Dave Chinner wrote: > > > > On Wed, Aug 30, 2017 at 05:10:09PM -0700, Darrick J. Wong wrote: > > > >> On Wed, Aug 30, 2017 at 08:22:47AM +1000, Dave Chinner wrote: > > > >>> On Tue, Aug 29, 2017 at 08:11:59AM -0700, Christoph Hellwig wrote: > > > >>>> On Mon, Aug 21, 2017 at 01:13:33AM -0700, Christoph Hellwig wrote: > > > >>>>> So what do you think of the version that adds real printks for > > > >>>>> each condition including more details like the one verifier I > > > >>>>> did below? Probably needs some unlikely annotations, though. > > > >>>> > > > >>>> Given that there was another resend of the series I'd be really > > > >>>> curious about the answer to this? > > > >>> > > > >>> If we increase the size of the hexdump on error, then most of the > > > >>> specific numbers in the print statements can be pulled from the > > > >>> hexdump. And if the verifier tells us exactly what check failed, > > > >>> we don't have to decode the entire hexdump to know what field was > > > >>> out of band. > > > >> > > > >> How much do we increase the size of the hexdump? 64 -> 128? Or > > > >> whatever the structure header size is? > > > > > > > > I choose 64 because it captured the primary header for most > > > > structures for CRC enabled filesystems, so it would have > > > > owner/crc/uuid/etc in it. I wasn't really trying to capture the > > > > object specific metadata in it, but increasing to 128 bytes would > > > > capture most of that block headers, too. Won't really help with > > > > inodes, though, as the core is 176 bytes and the owner/crc stuff is > > > > at the end.... > > > > > > > >> How about if xfs_error_level >= > > > >> XFS_ERRORLEVEL_HIGH then we dump the entire buffer? > > > > > > > > Excellent idea. We can easily capture the entire output for > > > > corruptions the users can easily trip over. Maybe put in the short > > > > dump a line "turn error level up to 11 to get a full dump of the > > > > corruption"? > > > > > > Yep, the thing about "more info only if you tune it" is that nobody > > > will know to tune it. Unless you printk that info... > > > > > > Of course nobody will know what "turn error up ..." means, either. > > > > Sure, I was just paraphrasing how an error message might look. A > > few quick coats of paint on the bikeshed will result in something > > like: > > > > "If this is a recurring error, please set > > /proc/sys/fs/xfs/error_level to ...." > > > > > Hm, at one point I had a patch to add object size to the > > > xfs_buf_ops struct and print that many bytes, but can't find it now :/ > > > (not that it was very complicated...) > > > > > > Anyway, point is making it vary with the size of the object wouldn't > > > be too hard. > > > > Probably not, but it is complicated by the fact we have a couple of > > different ways of dumping corruption errors. e.g. inode verifier > > warnings are dumped through XFS_CORRUPTION_ERROR() rather than > > xfs_verifier_error() as they are not buffer based verifiers. Other > > things like log record CRC failures are hard coded to dump 32 bytes, > > as is xlog_print_trans() on transaction overruns.... > > > > That's not a show stopper, but it would be nice to have consistent > > behaviour across all the mechanisms we use to dump object data that > > failed verification... > > /me wonders if it'd suffice just to add an xfs_params value in /proc, > set its default to 128 bytes, and make the corruption reporters query > the xfs_param. Then we could tell users to set it to some magic value > (-1? 0?) to get the entire buffer. Let's avoid adding a new proc entries to configure error verbosity when we already have a proc entry that controls error verbosity.... > I just had another thought -- what if we always dump the whole buffer if > the corruption would result in fs shutdown? How do you know that a verifier failure (or any specific call to XFS_CORRUPTION_ERROR) is going to result in a filesystem shutdown? Cheers, Dave.
On Fri, Sep 01, 2017 at 09:37:26AM +1000, Dave Chinner wrote: > On Wed, Aug 30, 2017 at 10:44:43PM -0700, Darrick J. Wong wrote: > > On Thu, Aug 31, 2017 at 01:27:36PM +1000, Dave Chinner wrote: > > > On Wed, Aug 30, 2017 at 10:05:25PM -0500, Eric Sandeen wrote: > > > > On 8/30/17 9:43 PM, Dave Chinner wrote: > > > > > On Wed, Aug 30, 2017 at 05:10:09PM -0700, Darrick J. Wong wrote: > > > > >> On Wed, Aug 30, 2017 at 08:22:47AM +1000, Dave Chinner wrote: > > > > >>> On Tue, Aug 29, 2017 at 08:11:59AM -0700, Christoph Hellwig wrote: > > > > >>>> On Mon, Aug 21, 2017 at 01:13:33AM -0700, Christoph Hellwig wrote: > > > > >>>>> So what do you think of the version that adds real printks for > > > > >>>>> each condition including more details like the one verifier I > > > > >>>>> did below? Probably needs some unlikely annotations, though. > > > > >>>> > > > > >>>> Given that there was another resend of the series I'd be really > > > > >>>> curious about the answer to this? > > > > >>> > > > > >>> If we increase the size of the hexdump on error, then most of the > > > > >>> specific numbers in the print statements can be pulled from the > > > > >>> hexdump. And if the verifier tells us exactly what check failed, > > > > >>> we don't have to decode the entire hexdump to know what field was > > > > >>> out of band. > > > > >> > > > > >> How much do we increase the size of the hexdump? 64 -> 128? Or > > > > >> whatever the structure header size is? > > > > > > > > > > I choose 64 because it captured the primary header for most > > > > > structures for CRC enabled filesystems, so it would have > > > > > owner/crc/uuid/etc in it. I wasn't really trying to capture the > > > > > object specific metadata in it, but increasing to 128 bytes would > > > > > capture most of that block headers, too. Won't really help with > > > > > inodes, though, as the core is 176 bytes and the owner/crc stuff is > > > > > at the end.... > > > > > > > > > >> How about if xfs_error_level >= > > > > >> XFS_ERRORLEVEL_HIGH then we dump the entire buffer? > > > > > > > > > > Excellent idea. We can easily capture the entire output for > > > > > corruptions the users can easily trip over. Maybe put in the short > > > > > dump a line "turn error level up to 11 to get a full dump of the > > > > > corruption"? > > > > > > > > Yep, the thing about "more info only if you tune it" is that nobody > > > > will know to tune it. Unless you printk that info... > > > > > > > > Of course nobody will know what "turn error up ..." means, either. > > > > > > Sure, I was just paraphrasing how an error message might look. A > > > few quick coats of paint on the bikeshed will result in something > > > like: > > > > > > "If this is a recurring error, please set > > > /proc/sys/fs/xfs/error_level to ...." > > > > > > > Hm, at one point I had a patch to add object size to the > > > > xfs_buf_ops struct and print that many bytes, but can't find it now :/ > > > > (not that it was very complicated...) > > > > > > > > Anyway, point is making it vary with the size of the object wouldn't > > > > be too hard. > > > > > > Probably not, but it is complicated by the fact we have a couple of > > > different ways of dumping corruption errors. e.g. inode verifier > > > warnings are dumped through XFS_CORRUPTION_ERROR() rather than > > > xfs_verifier_error() as they are not buffer based verifiers. Other > > > things like log record CRC failures are hard coded to dump 32 bytes, > > > as is xlog_print_trans() on transaction overruns.... > > > > > > That's not a show stopper, but it would be nice to have consistent > > > behaviour across all the mechanisms we use to dump object data that > > > failed verification... > > > > /me wonders if it'd suffice just to add an xfs_params value in /proc, > > set its default to 128 bytes, and make the corruption reporters query > > the xfs_param. Then we could tell users to set it to some magic value > > (-1? 0?) to get the entire buffer. > > Let's avoid adding a new proc entries to configure error verbosity > when we already have a proc entry that controls error verbosity.... Fair enough. > > I just had another thought -- what if we always dump the whole buffer if > > the corruption would result in fs shutdown? > > How do you know that a verifier failure (or any specific call to > XFS_CORRUPTION_ERROR) is going to result in a filesystem shutdown? Nuts. For a minute there I thought that if we were trying to get/read a buffer we'd have assigned it to a transaction before calling the verifier, but that's not true. Oh well. I'll start with a simpler patch to increase the dump size if xfs_error_level is high. --D > Cheers, > > Dave. > -- > Dave Chinner > david@fromorbit.com > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index b887fb2a2bcf..4386c68f72c6 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -113,27 +113,37 @@ xfs_dir3_leaf_check_int( * Should factor in the size of the bests table as well. * We can deduce a value for that from di_size. */ - if (hdr->count > ops->leaf_max_ents(geo)) + if (hdr->count > ops->leaf_max_ents(geo)) { + xfs_warn(mp, "count (%d) above max (%d)\n", + hdr->count, ops->leaf_max_ents(geo)); return false; + } /* Leaves and bests don't overlap in leaf format. */ if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || hdr->magic == XFS_DIR3_LEAF1_MAGIC) && - (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) + (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) { + xfs_warn(mp, "ents overlappings bests\n"); return false; + } /* Check hash value order, count stale entries. */ for (i = stale = 0; i < hdr->count; i++) { if (i + 1 < hdr->count) { if (be32_to_cpu(ents[i].hashval) > - be32_to_cpu(ents[i + 1].hashval)) + be32_to_cpu(ents[i + 1].hashval)) { + xfs_warn(mp, "broken hash order\n"); return false; + } } if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } - if (hdr->stale != stale) + if (hdr->stale != stale) { + xfs_warn(mp, "incorrect stalte count (%d, expected %d)\n", + hdr->stale, stale); return false; + } return true; } @@ -159,12 +169,21 @@ xfs_dir3_leaf_verify( magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC : XFS_DIR3_LEAFN_MAGIC; - if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) + if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) { + xfs_warn(mp, "incorrect magic number (0x%hx, expected 0x%hx)\n", + leaf3->info.hdr.magic, magic3); return false; - if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) + } + if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) { + xfs_warn(mp, "incorrect uuid, (%pUb, expected %pUb)\n", + &leaf3->info.uuid, &mp->m_sb.sb_meta_uuid); return false; - if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) + } + if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) { + xfs_warn(mp, "incorrect blkno, (%lld, expected %lld)\n", + be64_to_cpu(leaf3->info.blkno), bp->b_bn); return false; + } if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) return false; } else {