diff mbox series

btrfs: Adjust the layout of the btrfs_inode structure to save memory.

Message ID 20240603153410.79244-1-sunjunchao2870@gmail.com (mailing list archive)
State New, archived
Headers show
Series btrfs: Adjust the layout of the btrfs_inode structure to save memory. | expand

Commit Message

Julian Sun June 3, 2024, 3:34 p.m. UTC
Using pahole, we can see that there are some padding holes in
the current btrfs_inode structure. Adjusting the layout of
btrfs_inode can reduce these holes, decreasing the size of
the structure by 8 bytes (although there are still 5 bytes of padding).

Here is the output generated by pahole:

        u8                         defrag_compress;      /*    26     1 */

        /* XXX 5 bytes hole, try to pack */

        spinlock_t                 lock;                 /*    32    64 */
	...
        unsigned int               outstanding_extents;  /*   432     4 */

        /* XXX 4 bytes hole, try to pack */

        spinlock_t                 ordered_tree_lock;    /*   440    64 */
	...
        u64                        i_otime_sec;          /*   800     8 */
        u32                        i_otime_nsec;         /*   808     4 */

        /* XXX 4 bytes hole, try to pack */

        struct list_head           delayed_iput;         /*   816    16 */

Signed-off-by: Junchao Sun <sunjunchao2870@gmail.com>
---
 fs/btrfs/btrfs_inode.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

Comments

Filipe Manana June 3, 2024, 3:46 p.m. UTC | #1
On Mon, Jun 3, 2024 at 4:34 PM Junchao Sun <sunjunchao2870@gmail.com> wrote:
>
> Using pahole, we can see that there are some padding holes in
> the current btrfs_inode structure. Adjusting the layout of
> btrfs_inode can reduce these holes, decreasing the size of
> the structure by 8 bytes (although there are still 5 bytes of padding).
>
> Here is the output generated by pahole:
>
>         u8                         defrag_compress;      /*    26     1 */
>
>         /* XXX 5 bytes hole, try to pack */
>
>         spinlock_t                 lock;                 /*    32    64 */
>         ...
>         unsigned int               outstanding_extents;  /*   432     4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         spinlock_t                 ordered_tree_lock;    /*   440    64 */
>         ...
>         u64                        i_otime_sec;          /*   800     8 */
>         u32                        i_otime_nsec;         /*   808     4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         struct list_head           delayed_iput;         /*   816    16 */

What branch is this based on?

On for-next, which is what you should be using, it doesn't help reduce
the size of the structure, it remains at 1024 bytes.
And as far as I can see, it also doesn't result in better locality
(fields used together now in the same cache line).

And it's just moving the hole from one place to another.

Before the patch, pahole on for-next gives:

struct btrfs_inode {
        struct btrfs_root *        root;                 /*     0     8 */
        u8                         prop_compress;        /*     8     1 */
        u8                         defrag_compress;      /*     9     1 */

        /* XXX 2 bytes hole, try to pack */

        spinlock_t                 lock;                 /*    12     4 */
        struct extent_map_tree     extent_tree;          /*    16    32 */
        struct extent_io_tree      io_tree;              /*    48    24 */
        /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
        struct extent_io_tree *    file_extent_tree;     /*    72     8 */
        struct mutex               log_mutex;            /*    80    32 */
        unsigned int               outstanding_extents;  /*   112     4 */
        spinlock_t                 ordered_tree_lock;    /*   116     4 */
        struct rb_root             ordered_tree;         /*   120     8 */
        /* --- cacheline 2 boundary (128 bytes) --- */
        struct rb_node *           ordered_tree_last;    /*   128     8 */
        struct list_head           delalloc_inodes;      /*   136    16 */
        long unsigned int          runtime_flags;        /*   152     8 */
        u64                        generation;           /*   160     8 */
        u64                        last_trans;           /*   168     8 */
        u64                        logged_trans;         /*   176     8 */
        int                        last_sub_trans;       /*   184     4 */
        int                        last_log_commit;      /*   188     4 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        union {
                u64                delalloc_bytes;       /*   192     8 */
                u64                first_dir_index_to_log; /*   192     8 */
        };                                               /*   192     8 */
        union {
                u64                new_delalloc_bytes;   /*   200     8 */
                u64                last_dir_index_offset; /*   200     8 */
        };                                               /*   200     8 */
        union {
                u64                defrag_bytes;         /*   208     8 */
                u64                reloc_block_group_start; /*   208     8 */
        };                                               /*   208     8 */
        u64                        disk_i_size;          /*   216     8 */
        union {
                u64                index_cnt;            /*   224     8 */
                u64                csum_bytes;           /*   224     8 */
        };                                               /*   224     8 */
        u64                        dir_index;            /*   232     8 */
        u64                        last_unlink_trans;    /*   240     8 */
        union {
                u64                last_reflink_trans;   /*   248     8 */
                u64                ref_root_id;          /*   248     8 */
        };                                               /*   248     8 */
        /* --- cacheline 4 boundary (256 bytes) --- */
        u32                        flags;                /*   256     4 */
        u32                        ro_flags;             /*   260     4 */
        struct btrfs_block_rsv     block_rsv;            /*   264    48 */
        struct btrfs_delayed_node * delayed_node;        /*   312     8 */
        /* --- cacheline 5 boundary (320 bytes) --- */
        u64                        i_otime_sec;          /*   320     8 */
        u32                        i_otime_nsec;         /*   328     4 */

        /* XXX 4 bytes hole, try to pack */

        struct list_head           delayed_iput;         /*   336    16 */
        struct rw_semaphore        i_mmap_lock;          /*   352    40 */
        /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
        struct inode               vfs_inode
__attribute__((__aligned__(8))); /*   392   632 */

        /* size: 1024, cachelines: 16, members: 36 */
        /* sum members: 1018, holes: 2, sum holes: 6 */
        /* forced alignments: 1 */
} __attribute__((__aligned__(8)));

After the patch is gives:

struct btrfs_inode {
        struct btrfs_root *        root;                 /*     0     8 */
        u8                         prop_compress;        /*     8     1 */
        u8                         defrag_compress;      /*     9     1 */

        /* XXX 2 bytes hole, try to pack */

        spinlock_t                 lock;                 /*    12     4 */
        struct extent_map_tree     extent_tree;          /*    16    32 */
        struct extent_io_tree      io_tree;              /*    48    24 */
        /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
        struct extent_io_tree *    file_extent_tree;     /*    72     8 */
        struct mutex               log_mutex;            /*    80    32 */
        spinlock_t                 ordered_tree_lock;    /*   112     4 */

        /* XXX 4 bytes hole, try to pack */

        struct rb_root             ordered_tree;         /*   120     8 */
        /* --- cacheline 2 boundary (128 bytes) --- */
        struct rb_node *           ordered_tree_last;    /*   128     8 */
        struct list_head           delalloc_inodes;      /*   136    16 */
        long unsigned int          runtime_flags;        /*   152     8 */
        u64                        generation;           /*   160     8 */
        u64                        last_trans;           /*   168     8 */
        u64                        logged_trans;         /*   176     8 */
        int                        last_sub_trans;       /*   184     4 */
        int                        last_log_commit;      /*   188     4 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        union {
                u64                delalloc_bytes;       /*   192     8 */
                u64                first_dir_index_to_log; /*   192     8 */
        };                                               /*   192     8 */
        union {
                u64                new_delalloc_bytes;   /*   200     8 */
                u64                last_dir_index_offset; /*   200     8 */
        };                                               /*   200     8 */
        union {
                u64                defrag_bytes;         /*   208     8 */
                u64                reloc_block_group_start; /*   208     8 */
        };                                               /*   208     8 */
        u64                        disk_i_size;          /*   216     8 */
        union {
                u64                index_cnt;            /*   224     8 */
                u64                csum_bytes;           /*   224     8 */
        };                                               /*   224     8 */
        u64                        dir_index;            /*   232     8 */
        u64                        last_unlink_trans;    /*   240     8 */
       union {
                u64                last_reflink_trans;   /*   248     8 */
                u64                ref_root_id;          /*   248     8 */
        };                                               /*   248     8 */
        /* --- cacheline 4 boundary (256 bytes) --- */
        u32                        flags;                /*   256     4 */
        u32                        ro_flags;             /*   260     4 */
        struct btrfs_block_rsv     block_rsv;            /*   264    48 */
        struct btrfs_delayed_node * delayed_node;        /*   312     8 */
        /* --- cacheline 5 boundary (320 bytes) --- */
        u64                        i_otime_sec;          /*   320     8 */
        u32                        i_otime_nsec;         /*   328     4 */
        unsigned int               outstanding_extents;  /*   332     4 */
        struct list_head           delayed_iput;         /*   336    16 */
        struct rw_semaphore        i_mmap_lock;          /*   352    40 */
        /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
        struct inode               vfs_inode
__attribute__((__aligned__(8))); /*   392   632 */

        /* size: 1024, cachelines: 16, members: 36 */
        /* sum members: 1018, holes: 2, sum holes: 6 */
        /* forced alignments: 1 */
} __attribute__((__aligned__(8)));

So no gains at all as far as I can see.

Thanks.

>
> Signed-off-by: Junchao Sun <sunjunchao2870@gmail.com>
> ---
>  fs/btrfs/btrfs_inode.h | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index 7f7c5a92d2b8..184c31bbf2df 100644
> --- a/fs/btrfs/btrfs_inode.h
> +++ b/fs/btrfs/btrfs_inode.h
> @@ -118,14 +118,6 @@ struct btrfs_inode {
>         /* held while logging the inode in tree-log.c */
>         struct mutex log_mutex;
>
> -       /*
> -        * Counters to keep track of the number of extent item's we may use due
> -        * to delalloc and such.  outstanding_extents is the number of extent
> -        * items we think we'll end up using, and reserved_extents is the number
> -        * of extent items we've reserved metadata for. Protected by 'lock'.
> -        */
> -       unsigned outstanding_extents;
> -
>         /* used to order data wrt metadata */
>         spinlock_t ordered_tree_lock;
>         struct rb_root ordered_tree;
> @@ -260,6 +252,14 @@ struct btrfs_inode {
>         u64 i_otime_sec;
>         u32 i_otime_nsec;
>
> +       /*
> +        * Counters to keep track of the number of extent item's we may use due
> +        * to delalloc and such.  outstanding_extents is the number of extent
> +        * items we think we'll end up using, and reserved_extents is the number
> +        * of extent items we've reserved metadata for. Protected by 'lock'.
> +        */
> +       unsigned outstanding_extents;
> +
>         /* Hook into fs_info->delayed_iputs */
>         struct list_head delayed_iput;
>
> --
> 2.39.2
>
>
Julian Sun June 3, 2024, 4:13 p.m. UTC | #2
Filipe Manana <fdmanana@kernel.org> 于2024年6月3日周一 11:47写道:
>
> On Mon, Jun 3, 2024 at 4:34 PM Junchao Sun <sunjunchao2870@gmail.com> wrote:
> >
> > Using pahole, we can see that there are some padding holes in
> > the current btrfs_inode structure. Adjusting the layout of
> > btrfs_inode can reduce these holes, decreasing the size of
> > the structure by 8 bytes (although there are still 5 bytes of padding).
> >
> > Here is the output generated by pahole:
> >
> >         u8                         defrag_compress;      /*    26     1 */
> >
> >         /* XXX 5 bytes hole, try to pack */
> >
> >         spinlock_t                 lock;                 /*    32    64 */
> >         ...
> >         unsigned int               outstanding_extents;  /*   432     4 */
> >
> >         /* XXX 4 bytes hole, try to pack */
> >
> >         spinlock_t                 ordered_tree_lock;    /*   440    64 */
> >         ...
> >         u64                        i_otime_sec;          /*   800     8 */
> >         u32                        i_otime_nsec;         /*   808     4 */
> >
> >         /* XXX 4 bytes hole, try to pack */
> >
> >         struct list_head           delayed_iput;         /*   816    16 */
>
>
> > What branch is this based on?
> >
> > On for-next, which is what you should be using, it doesn't help reduce
> > the size of the structure, it remains at 1024 bytes.
> > And as far as I can see, it also doesn't result in better locality
> > (fields used together now in the same cache line).
> >
> > And it's just moving the hole from one place to another.

Oh, I was not using the newest commit. I checkouted to the newest
commit, the same thing has been done by 398fb9131f3("btrfs: reorder
btrfs_inode to fill gaps").
Sorry for the noise and thanks for your review!
>
> Before the patch, pahole on for-next gives:
>
> struct btrfs_inode {
>         struct btrfs_root *        root;                 /*     0     8 */
>         u8                         prop_compress;        /*     8     1 */
>         u8                         defrag_compress;      /*     9     1 */
>
>         /* XXX 2 bytes hole, try to pack */
>
>         spinlock_t                 lock;                 /*    12     4 */
>         struct extent_map_tree     extent_tree;          /*    16    32 */
>         struct extent_io_tree      io_tree;              /*    48    24 */
>         /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
>         struct extent_io_tree *    file_extent_tree;     /*    72     8 */
>         struct mutex               log_mutex;            /*    80    32 */
>         unsigned int               outstanding_extents;  /*   112     4 */
>         spinlock_t                 ordered_tree_lock;    /*   116     4 */
>         struct rb_root             ordered_tree;         /*   120     8 */
>         /* --- cacheline 2 boundary (128 bytes) --- */
>         struct rb_node *           ordered_tree_last;    /*   128     8 */
>         struct list_head           delalloc_inodes;      /*   136    16 */
>         long unsigned int          runtime_flags;        /*   152     8 */
>         u64                        generation;           /*   160     8 */
>         u64                        last_trans;           /*   168     8 */
>         u64                        logged_trans;         /*   176     8 */
>         int                        last_sub_trans;       /*   184     4 */
>         int                        last_log_commit;      /*   188     4 */
>         /* --- cacheline 3 boundary (192 bytes) --- */
>         union {
>                 u64                delalloc_bytes;       /*   192     8 */
>                 u64                first_dir_index_to_log; /*   192     8 */
>         };                                               /*   192     8 */
>         union {
>                 u64                new_delalloc_bytes;   /*   200     8 */
>                 u64                last_dir_index_offset; /*   200     8 */
>         };                                               /*   200     8 */
>         union {
>                 u64                defrag_bytes;         /*   208     8 */
>                 u64                reloc_block_group_start; /*   208     8 */
>         };                                               /*   208     8 */
>         u64                        disk_i_size;          /*   216     8 */
>         union {
>                 u64                index_cnt;            /*   224     8 */
>                 u64                csum_bytes;           /*   224     8 */
>         };                                               /*   224     8 */
>         u64                        dir_index;            /*   232     8 */
>         u64                        last_unlink_trans;    /*   240     8 */
>         union {
>                 u64                last_reflink_trans;   /*   248     8 */
>                 u64                ref_root_id;          /*   248     8 */
>         };                                               /*   248     8 */
>         /* --- cacheline 4 boundary (256 bytes) --- */
>         u32                        flags;                /*   256     4 */
>         u32                        ro_flags;             /*   260     4 */
>         struct btrfs_block_rsv     block_rsv;            /*   264    48 */
>         struct btrfs_delayed_node * delayed_node;        /*   312     8 */
>         /* --- cacheline 5 boundary (320 bytes) --- */
>         u64                        i_otime_sec;          /*   320     8 */
>         u32                        i_otime_nsec;         /*   328     4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         struct list_head           delayed_iput;         /*   336    16 */
>         struct rw_semaphore        i_mmap_lock;          /*   352    40 */
>         /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
>         struct inode               vfs_inode
> __attribute__((__aligned__(8))); /*   392   632 */
>
>         /* size: 1024, cachelines: 16, members: 36 */
>         /* sum members: 1018, holes: 2, sum holes: 6 */
>         /* forced alignments: 1 */
> } __attribute__((__aligned__(8)));
>
> After the patch is gives:
>
> struct btrfs_inode {
>         struct btrfs_root *        root;                 /*     0     8 */
>         u8                         prop_compress;        /*     8     1 */
>         u8                         defrag_compress;      /*     9     1 */
>
>         /* XXX 2 bytes hole, try to pack */
>
>         spinlock_t                 lock;                 /*    12     4 */
>         struct extent_map_tree     extent_tree;          /*    16    32 */
>         struct extent_io_tree      io_tree;              /*    48    24 */
>         /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
>         struct extent_io_tree *    file_extent_tree;     /*    72     8 */
>         struct mutex               log_mutex;            /*    80    32 */
>         spinlock_t                 ordered_tree_lock;    /*   112     4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         struct rb_root             ordered_tree;         /*   120     8 */
>         /* --- cacheline 2 boundary (128 bytes) --- */
>         struct rb_node *           ordered_tree_last;    /*   128     8 */
>         struct list_head           delalloc_inodes;      /*   136    16 */
>         long unsigned int          runtime_flags;        /*   152     8 */
>         u64                        generation;           /*   160     8 */
>         u64                        last_trans;           /*   168     8 */
>         u64                        logged_trans;         /*   176     8 */
>         int                        last_sub_trans;       /*   184     4 */
>         int                        last_log_commit;      /*   188     4 */
>         /* --- cacheline 3 boundary (192 bytes) --- */
>         union {
>                 u64                delalloc_bytes;       /*   192     8 */
>                 u64                first_dir_index_to_log; /*   192     8 */
>         };                                               /*   192     8 */
>         union {
>                 u64                new_delalloc_bytes;   /*   200     8 */
>                 u64                last_dir_index_offset; /*   200     8 */
>         };                                               /*   200     8 */
>         union {
>                 u64                defrag_bytes;         /*   208     8 */
>                 u64                reloc_block_group_start; /*   208     8 */
>         };                                               /*   208     8 */
>         u64                        disk_i_size;          /*   216     8 */
>         union {
>                 u64                index_cnt;            /*   224     8 */
>                 u64                csum_bytes;           /*   224     8 */
>         };                                               /*   224     8 */
>         u64                        dir_index;            /*   232     8 */
>         u64                        last_unlink_trans;    /*   240     8 */
>        union {
>                 u64                last_reflink_trans;   /*   248     8 */
>                 u64                ref_root_id;          /*   248     8 */
>         };                                               /*   248     8 */
>         /* --- cacheline 4 boundary (256 bytes) --- */
>         u32                        flags;                /*   256     4 */
>         u32                        ro_flags;             /*   260     4 */
>         struct btrfs_block_rsv     block_rsv;            /*   264    48 */
>         struct btrfs_delayed_node * delayed_node;        /*   312     8 */
>         /* --- cacheline 5 boundary (320 bytes) --- */
>         u64                        i_otime_sec;          /*   320     8 */
>         u32                        i_otime_nsec;         /*   328     4 */
>         unsigned int               outstanding_extents;  /*   332     4 */
>         struct list_head           delayed_iput;         /*   336    16 */
>         struct rw_semaphore        i_mmap_lock;          /*   352    40 */
>         /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
>         struct inode               vfs_inode
> __attribute__((__aligned__(8))); /*   392   632 */
>
>         /* size: 1024, cachelines: 16, members: 36 */
>         /* sum members: 1018, holes: 2, sum holes: 6 */
>         /* forced alignments: 1 */
> } __attribute__((__aligned__(8)));
>
> So no gains at all as far as I can see.
>
> Thanks.
>
> >
> > Signed-off-by: Junchao Sun <sunjunchao2870@gmail.com>
> > ---
> >  fs/btrfs/btrfs_inode.h | 16 ++++++++--------
> >  1 file changed, 8 insertions(+), 8 deletions(-)
> >
> > diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> > index 7f7c5a92d2b8..184c31bbf2df 100644
> > --- a/fs/btrfs/btrfs_inode.h
> > +++ b/fs/btrfs/btrfs_inode.h
> > @@ -118,14 +118,6 @@ struct btrfs_inode {
> >         /* held while logging the inode in tree-log.c */
> >         struct mutex log_mutex;
> >
> > -       /*
> > -        * Counters to keep track of the number of extent item's we may use due
> > -        * to delalloc and such.  outstanding_extents is the number of extent
> > -        * items we think we'll end up using, and reserved_extents is the number
> > -        * of extent items we've reserved metadata for. Protected by 'lock'.
> > -        */
> > -       unsigned outstanding_extents;
> > -
> >         /* used to order data wrt metadata */
> >         spinlock_t ordered_tree_lock;
> >         struct rb_root ordered_tree;
> > @@ -260,6 +252,14 @@ struct btrfs_inode {
> >         u64 i_otime_sec;
> >         u32 i_otime_nsec;
> >
> > +       /*
> > +        * Counters to keep track of the number of extent item's we may use due
> > +        * to delalloc and such.  outstanding_extents is the number of extent
> > +        * items we think we'll end up using, and reserved_extents is the number
> > +        * of extent items we've reserved metadata for. Protected by 'lock'.
> > +        */
> > +       unsigned outstanding_extents;
> > +
> >         /* Hook into fs_info->delayed_iputs */
> >         struct list_head delayed_iput;
> >
> > --
> > 2.39.2
> >
> >
diff mbox series

Patch

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 7f7c5a92d2b8..184c31bbf2df 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -118,14 +118,6 @@  struct btrfs_inode {
 	/* held while logging the inode in tree-log.c */
 	struct mutex log_mutex;
 
-	/*
-	 * Counters to keep track of the number of extent item's we may use due
-	 * to delalloc and such.  outstanding_extents is the number of extent
-	 * items we think we'll end up using, and reserved_extents is the number
-	 * of extent items we've reserved metadata for. Protected by 'lock'.
-	 */
-	unsigned outstanding_extents;
-
 	/* used to order data wrt metadata */
 	spinlock_t ordered_tree_lock;
 	struct rb_root ordered_tree;
@@ -260,6 +252,14 @@  struct btrfs_inode {
 	u64 i_otime_sec;
 	u32 i_otime_nsec;
 
+	/*
+	 * Counters to keep track of the number of extent item's we may use due
+	 * to delalloc and such.  outstanding_extents is the number of extent
+	 * items we think we'll end up using, and reserved_extents is the number
+	 * of extent items we've reserved metadata for. Protected by 'lock'.
+	 */
+	unsigned outstanding_extents;
+
 	/* Hook into fs_info->delayed_iputs */
 	struct list_head delayed_iput;