diff mbox series

btrfs: remove hole from struct btrfs_delayed_node

Message ID 008db737b88fdf9993be37ff44edc89e31a3677a.1730808362.git.fdmanana@suse.com (mailing list archive)
State New
Headers show
Series btrfs: remove hole from struct btrfs_delayed_node | expand

Commit Message

Filipe Manana Nov. 5, 2024, 12:08 p.m. UTC
From: Filipe Manana <fdmanana@suse.com>

On x86_64 and a release kernel, there's a 4 bytes hole in the structure
after the ref count field:

  struct btrfs_delayed_node {
          u64                        inode_id;             /*     0     8 */
          u64                        bytes_reserved;       /*     8     8 */
          struct btrfs_root *        root;                 /*    16     8 */
          struct list_head           n_list;               /*    24    16 */
          struct list_head           p_list;               /*    40    16 */
          struct rb_root_cached      ins_root;             /*    56    16 */
          /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
          struct rb_root_cached      del_root;             /*    72    16 */
          struct mutex               mutex;                /*    88    32 */
          struct btrfs_inode_item    inode_item;           /*   120   160 */
          /* --- cacheline 4 boundary (256 bytes) was 24 bytes ago --- */
          refcount_t                 refs;                 /*   280     4 */

          /* XXX 4 bytes hole, try to pack */

          u64                        index_cnt;            /*   288     8 */
          long unsigned int          flags;                /*   296     8 */
          int                        count;                /*   304     4 */
          u32                        curr_index_batch_size; /*   308     4 */
          u32                        index_item_leaves;    /*   312     4 */

          /* size: 320, cachelines: 5, members: 15 */
          /* sum members: 312, holes: 1, sum holes: 4 */
          /* padding: 4 */
  };

Move the 'count' field, which is 4 bytes long, to just below the ref count
field, so we eliminate the hole and reduce the structure size from 320
bytes down to 312 bytes:

  struct btrfs_delayed_node {
          u64                        inode_id;             /*     0     8 */
          u64                        bytes_reserved;       /*     8     8 */
          struct btrfs_root *        root;                 /*    16     8 */
          struct list_head           n_list;               /*    24    16 */
          struct list_head           p_list;               /*    40    16 */
          struct rb_root_cached      ins_root;             /*    56    16 */
          /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
          struct rb_root_cached      del_root;             /*    72    16 */
          struct mutex               mutex;                /*    88    32 */
          struct btrfs_inode_item    inode_item;           /*   120   160 */
          /* --- cacheline 4 boundary (256 bytes) was 24 bytes ago --- */
          refcount_t                 refs;                 /*   280     4 */
          int                        count;                /*   284     4 */
          u64                        index_cnt;            /*   288     8 */
          long unsigned int          flags;                /*   296     8 */
          u32                        curr_index_batch_size; /*   304     4 */
          u32                        index_item_leaves;    /*   308     4 */

          /* size: 312, cachelines: 5, members: 15 */
          /* last cacheline: 56 bytes */
  };

This now allows to have 13 delayed nodes per 4K page instead of 12.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/delayed-inode.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Comments

David Sterba Nov. 5, 2024, 5:49 p.m. UTC | #1
On Tue, Nov 05, 2024 at 12:08:49PM +0000, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> On x86_64 and a release kernel, there's a 4 bytes hole in the structure
> after the ref count field:
> 
>   struct btrfs_delayed_node {
>           u64                        inode_id;             /*     0     8 */
>           u64                        bytes_reserved;       /*     8     8 */
>           struct btrfs_root *        root;                 /*    16     8 */
>           struct list_head           n_list;               /*    24    16 */
>           struct list_head           p_list;               /*    40    16 */
>           struct rb_root_cached      ins_root;             /*    56    16 */
>           /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
>           struct rb_root_cached      del_root;             /*    72    16 */
>           struct mutex               mutex;                /*    88    32 */
>           struct btrfs_inode_item    inode_item;           /*   120   160 */
>           /* --- cacheline 4 boundary (256 bytes) was 24 bytes ago --- */
>           refcount_t                 refs;                 /*   280     4 */
> 
>           /* XXX 4 bytes hole, try to pack */
> 
>           u64                        index_cnt;            /*   288     8 */
>           long unsigned int          flags;                /*   296     8 */
>           int                        count;                /*   304     4 */
>           u32                        curr_index_batch_size; /*   308     4 */
>           u32                        index_item_leaves;    /*   312     4 */
> 
>           /* size: 320, cachelines: 5, members: 15 */
>           /* sum members: 312, holes: 1, sum holes: 4 */
>           /* padding: 4 */
>   };
> 
> Move the 'count' field, which is 4 bytes long, to just below the ref count
> field, so we eliminate the hole and reduce the structure size from 320
> bytes down to 312 bytes:
> 
>   struct btrfs_delayed_node {
>           u64                        inode_id;             /*     0     8 */
>           u64                        bytes_reserved;       /*     8     8 */
>           struct btrfs_root *        root;                 /*    16     8 */
>           struct list_head           n_list;               /*    24    16 */
>           struct list_head           p_list;               /*    40    16 */
>           struct rb_root_cached      ins_root;             /*    56    16 */
>           /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
>           struct rb_root_cached      del_root;             /*    72    16 */
>           struct mutex               mutex;                /*    88    32 */
>           struct btrfs_inode_item    inode_item;           /*   120   160 */
>           /* --- cacheline 4 boundary (256 bytes) was 24 bytes ago --- */
>           refcount_t                 refs;                 /*   280     4 */
>           int                        count;                /*   284     4 */
>           u64                        index_cnt;            /*   288     8 */
>           long unsigned int          flags;                /*   296     8 */
>           u32                        curr_index_batch_size; /*   304     4 */
>           u32                        index_item_leaves;    /*   308     4 */
> 
>           /* size: 312, cachelines: 5, members: 15 */
>           /* last cacheline: 56 bytes */
>   };
> 
> This now allows to have 13 delayed nodes per 4K page instead of 12.
> 
> Signed-off-by: Filipe Manana <fdmanana@suse.com>

Reviewed-by: David Sterba <dsterba@suse.com>
Qu Wenruo Nov. 5, 2024, 8:50 p.m. UTC | #2
在 2024/11/5 22:38, fdmanana@kernel.org 写道:
> From: Filipe Manana <fdmanana@suse.com>
>
> On x86_64 and a release kernel, there's a 4 bytes hole in the structure
> after the ref count field:
>
>    struct btrfs_delayed_node {
>            u64                        inode_id;             /*     0     8 */
>            u64                        bytes_reserved;       /*     8     8 */
>            struct btrfs_root *        root;                 /*    16     8 */
>            struct list_head           n_list;               /*    24    16 */
>            struct list_head           p_list;               /*    40    16 */
>            struct rb_root_cached      ins_root;             /*    56    16 */
>            /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
>            struct rb_root_cached      del_root;             /*    72    16 */
>            struct mutex               mutex;                /*    88    32 */
>            struct btrfs_inode_item    inode_item;           /*   120   160 */
>            /* --- cacheline 4 boundary (256 bytes) was 24 bytes ago --- */
>            refcount_t                 refs;                 /*   280     4 */
>
>            /* XXX 4 bytes hole, try to pack */
>
>            u64                        index_cnt;            /*   288     8 */
>            long unsigned int          flags;                /*   296     8 */
>            int                        count;                /*   304     4 */
>            u32                        curr_index_batch_size; /*   308     4 */
>            u32                        index_item_leaves;    /*   312     4 */
>
>            /* size: 320, cachelines: 5, members: 15 */
>            /* sum members: 312, holes: 1, sum holes: 4 */
>            /* padding: 4 */
>    };
>
> Move the 'count' field, which is 4 bytes long, to just below the ref count
> field, so we eliminate the hole and reduce the structure size from 320
> bytes down to 312 bytes:
>
>    struct btrfs_delayed_node {
>            u64                        inode_id;             /*     0     8 */
>            u64                        bytes_reserved;       /*     8     8 */
>            struct btrfs_root *        root;                 /*    16     8 */
>            struct list_head           n_list;               /*    24    16 */
>            struct list_head           p_list;               /*    40    16 */
>            struct rb_root_cached      ins_root;             /*    56    16 */
>            /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
>            struct rb_root_cached      del_root;             /*    72    16 */
>            struct mutex               mutex;                /*    88    32 */
>            struct btrfs_inode_item    inode_item;           /*   120   160 */
>            /* --- cacheline 4 boundary (256 bytes) was 24 bytes ago --- */
>            refcount_t                 refs;                 /*   280     4 */
>            int                        count;                /*   284     4 */
>            u64                        index_cnt;            /*   288     8 */
>            long unsigned int          flags;                /*   296     8 */
>            u32                        curr_index_batch_size; /*   304     4 */
>            u32                        index_item_leaves;    /*   308     4 */
>
>            /* size: 312, cachelines: 5, members: 15 */
>            /* last cacheline: 56 bytes */
>    };
>
> This now allows to have 13 delayed nodes per 4K page instead of 12.
>
> Signed-off-by: Filipe Manana <fdmanana@suse.com>

Reviewed-by: Qu Wenruo <wqu@suse.com>

Thanks,
Qu
> ---
>   fs/btrfs/delayed-inode.h | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
> index 7cfefdfe54ea..f4d9feac0d0e 100644
> --- a/fs/btrfs/delayed-inode.h
> +++ b/fs/btrfs/delayed-inode.h
> @@ -64,9 +64,9 @@ struct btrfs_delayed_node {
>   	struct mutex mutex;
>   	struct btrfs_inode_item inode_item;
>   	refcount_t refs;
> +	int count;
>   	u64 index_cnt;
>   	unsigned long flags;
> -	int count;
>   	/*
>   	 * The size of the next batch of dir index items to insert (if this
>   	 * node is from a directory inode). Protected by @mutex.
diff mbox series

Patch

diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 7cfefdfe54ea..f4d9feac0d0e 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -64,9 +64,9 @@  struct btrfs_delayed_node {
 	struct mutex mutex;
 	struct btrfs_inode_item inode_item;
 	refcount_t refs;
+	int count;
 	u64 index_cnt;
 	unsigned long flags;
-	int count;
 	/*
 	 * The size of the next batch of dir index items to insert (if this
 	 * node is from a directory inode). Protected by @mutex.