diff mbox series

[v3] btrfs: add fs state details to error messages.

Message ID 8a2a73ab4b48a4e73d24cf7f10cc0fe245d50a84.1645562216.git.sweettea-kernel@dorminy.me (mailing list archive)
State New, archived
Headers show
Series [v3] btrfs: add fs state details to error messages. | expand

Commit Message

Sweet Tea Dorminy Feb. 22, 2022, 8:42 p.m. UTC
When a filesystem goes read-only due to an error, multiple errors tend
to be reported, some of which are knock-on failures. Logging fs_states,
in btrfs_handle_fs_error() and btrfs_printk() helps distinguish the
first error from subsequent messages which may only exist due to an
error state.

Under the new format, most initial errors will look like:
`BTRFS: error (device loop0) in ...`
while subsequent errors will begin with:
`error (device loop0: state E) in ...`

An initial transaction abort error will look like
`error (device loop0: state A) in ...`
and subsequent messages will contain
`(device loop0: state EA) in ...`

Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
---
v3:
  - Reworked btrfs_state_to_string to use an array mapping all states
    to various error chars, or nothing, explicitly. Added error logging
    for more states, as requested.
  - Consolidated buffer length definition

v2: 
  - Changed btrfs_state_to_string() for clarity
  - Removed superfluous whitespace change
  - https://lore.kernel.org/linux-btrfs/084c136c6bb2d20ca0e91af7ded48306d52bb910.1645210326.git.sweettea-kernel@dorminy.me/

v1:
  - https://lore.kernel.org/linux-btrfs/20220212191042.94954-1-sweettea-kernel@dorminy.me/

 fs/btrfs/ctree.h |  2 ++
 fs/btrfs/super.c | 62 +++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 56 insertions(+), 8 deletions(-)

Comments

David Sterba Feb. 23, 2022, 3:24 p.m. UTC | #1
On Tue, Feb 22, 2022 at 03:42:28PM -0500, Sweet Tea Dorminy wrote:
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -66,6 +66,46 @@ static struct file_system_type btrfs_root_fs_type;
>  
>  static int btrfs_remount(struct super_block *sb, int *flags, char *data);
>  
> +#define STATE_STRING_PREFACE ": state "
> +#define STATE_STRING_BUF_LEN \
> +	(sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
> +
> +/* Characters to print to indicate error conditions. RO is not an error. */
> +static const char * const fs_state_strings[] = {
> +	[BTRFS_FS_STATE_ERROR]			= "E",
> +	[BTRFS_FS_STATE_REMOUNTING]		= "M",
> +	[BTRFS_FS_STATE_RO]			= NULL,
> +	[BTRFS_FS_STATE_TRANS_ABORTED]		= "A",
> +	[BTRFS_FS_STATE_DEV_REPLACING]		= "P",
> +	[BTRFS_FS_STATE_DUMMY_FS_INFO]		= NULL,
> +	[BTRFS_FS_STATE_NO_CSUMS]		= NULL,
> +	[BTRFS_FS_STATE_LOG_CLEANUP_ERROR]	= "L",

Yeah that's the idea with the table, but I think you don't need to use
strings, it should be sufficient to use chars, and 0 works for the empty
ones.  The way you did it consumes more memory and has indirection with
the pointers to the actual single letter strings.

I'm not sure if we want the non-error states like remounting or
replacing, but actually why not, even if it's a transient state it's
another piece of information that could be useful eventually.
diff mbox series

Patch

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8992e0096163..3db337cd015a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -148,6 +148,8 @@  enum {
 
 	/* Indicates there was an error cleaning up a log tree. */
 	BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
+
+	BTRFS_FS_STATE_COUNT,
 };
 
 #define BTRFS_BACKREF_REV_MAX		256
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4d947ba32da9..3fab9e46e80c 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,6 +66,46 @@  static struct file_system_type btrfs_root_fs_type;
 
 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 
+#define STATE_STRING_PREFACE ": state "
+#define STATE_STRING_BUF_LEN \
+	(sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
+
+/* Characters to print to indicate error conditions. RO is not an error. */
+static const char * const fs_state_strings[] = {
+	[BTRFS_FS_STATE_ERROR]			= "E",
+	[BTRFS_FS_STATE_REMOUNTING]		= "M",
+	[BTRFS_FS_STATE_RO]			= NULL,
+	[BTRFS_FS_STATE_TRANS_ABORTED]		= "A",
+	[BTRFS_FS_STATE_DEV_REPLACING]		= "P",
+	[BTRFS_FS_STATE_DUMMY_FS_INFO]		= NULL,
+	[BTRFS_FS_STATE_NO_CSUMS]		= NULL,
+	[BTRFS_FS_STATE_LOG_CLEANUP_ERROR]	= "L",
+};
+
+static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
+{
+	unsigned int bit;
+	unsigned int states_printed = 0;
+	char *curr = buf;
+
+	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
+	curr += sizeof(STATE_STRING_PREFACE) - 1;
+
+	for_each_set_bit(bit, &info->fs_state, sizeof(info->fs_state)) {
+		WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
+		if ((bit < BTRFS_FS_STATE_COUNT) && (fs_state_strings[bit] != NULL)) {
+			*curr++ = fs_state_strings[bit][0];
+			states_printed++;
+		}
+	}
+
+	/* If no states were printed, reset the buffer */
+	if (!states_printed)
+		curr = buf;
+
+	*curr++ = '\0';
+}
+
 /*
  * Generally the error codes correspond to their respective errors, but there
  * are a few special cases.
@@ -128,6 +168,7 @@  void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 {
 	struct super_block *sb = fs_info->sb;
 #ifdef CONFIG_PRINTK
+	char statestr[STATE_STRING_BUF_LEN];
 	const char *errstr;
 #endif
 
@@ -140,6 +181,7 @@  void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 
 #ifdef CONFIG_PRINTK
 	errstr = btrfs_decode_error(errno);
+	btrfs_state_to_string(fs_info, statestr);
 	if (fmt) {
 		struct va_format vaf;
 		va_list args;
@@ -148,12 +190,12 @@  void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 		vaf.fmt = fmt;
 		vaf.va = &args;
 
-		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
-			sb->s_id, function, line, errno, errstr, &vaf);
+		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
+			sb->s_id, statestr, function, line, errno, errstr, &vaf);
 		va_end(args);
 	} else {
-		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
-			sb->s_id, function, line, errno, errstr);
+		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
+			sb->s_id, statestr, function, line, errno, errstr);
 	}
 #endif
 
@@ -240,11 +282,15 @@  void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
 	vaf.va = &args;
 
 	if (__ratelimit(ratelimit)) {
-		if (fs_info)
-			printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
-				fs_info->sb->s_id, &vaf);
-		else
+		if (fs_info) {
+			char statestr[STATE_STRING_BUF_LEN];
+
+			btrfs_state_to_string(fs_info, statestr);
+			printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
+				fs_info->sb->s_id, statestr, &vaf);
+		} else {
 			printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
+		}
 	}
 
 	va_end(args);