Message ID | 008db737b88fdf9993be37ff44edc89e31a3677a.1730808362.git.fdmanana@suse.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | btrfs: remove hole from struct btrfs_delayed_node | expand |
On Tue, Nov 05, 2024 at 12:08:49PM +0000, fdmanana@kernel.org wrote: > From: Filipe Manana <fdmanana@suse.com> > > On x86_64 and a release kernel, there's a 4 bytes hole in the structure > after the ref count field: > > struct btrfs_delayed_node { > u64 inode_id; /* 0 8 */ > u64 bytes_reserved; /* 8 8 */ > struct btrfs_root * root; /* 16 8 */ > struct list_head n_list; /* 24 16 */ > struct list_head p_list; /* 40 16 */ > struct rb_root_cached ins_root; /* 56 16 */ > /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */ > struct rb_root_cached del_root; /* 72 16 */ > struct mutex mutex; /* 88 32 */ > struct btrfs_inode_item inode_item; /* 120 160 */ > /* --- cacheline 4 boundary (256 bytes) was 24 bytes ago --- */ > refcount_t refs; /* 280 4 */ > > /* XXX 4 bytes hole, try to pack */ > > u64 index_cnt; /* 288 8 */ > long unsigned int flags; /* 296 8 */ > int count; /* 304 4 */ > u32 curr_index_batch_size; /* 308 4 */ > u32 index_item_leaves; /* 312 4 */ > > /* size: 320, cachelines: 5, members: 15 */ > /* sum members: 312, holes: 1, sum holes: 4 */ > /* padding: 4 */ > }; > > Move the 'count' field, which is 4 bytes long, to just below the ref count > field, so we eliminate the hole and reduce the structure size from 320 > bytes down to 312 bytes: > > struct btrfs_delayed_node { > u64 inode_id; /* 0 8 */ > u64 bytes_reserved; /* 8 8 */ > struct btrfs_root * root; /* 16 8 */ > struct list_head n_list; /* 24 16 */ > struct list_head p_list; /* 40 16 */ > struct rb_root_cached ins_root; /* 56 16 */ > /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */ > struct rb_root_cached del_root; /* 72 16 */ > struct mutex mutex; /* 88 32 */ > struct btrfs_inode_item inode_item; /* 120 160 */ > /* --- cacheline 4 boundary (256 bytes) was 24 bytes ago --- */ > refcount_t refs; /* 280 4 */ > int count; /* 284 4 */ > u64 index_cnt; /* 288 8 */ > long unsigned int flags; /* 296 8 */ > u32 curr_index_batch_size; /* 304 4 */ > u32 index_item_leaves; /* 308 4 */ > > /* size: 312, cachelines: 5, members: 15 */ > /* last cacheline: 56 bytes */ > }; > > This now allows to have 13 delayed nodes per 4K page instead of 12. > > Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: David Sterba <dsterba@suse.com>
在 2024/11/5 22:38, fdmanana@kernel.org 写道: > From: Filipe Manana <fdmanana@suse.com> > > On x86_64 and a release kernel, there's a 4 bytes hole in the structure > after the ref count field: > > struct btrfs_delayed_node { > u64 inode_id; /* 0 8 */ > u64 bytes_reserved; /* 8 8 */ > struct btrfs_root * root; /* 16 8 */ > struct list_head n_list; /* 24 16 */ > struct list_head p_list; /* 40 16 */ > struct rb_root_cached ins_root; /* 56 16 */ > /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */ > struct rb_root_cached del_root; /* 72 16 */ > struct mutex mutex; /* 88 32 */ > struct btrfs_inode_item inode_item; /* 120 160 */ > /* --- cacheline 4 boundary (256 bytes) was 24 bytes ago --- */ > refcount_t refs; /* 280 4 */ > > /* XXX 4 bytes hole, try to pack */ > > u64 index_cnt; /* 288 8 */ > long unsigned int flags; /* 296 8 */ > int count; /* 304 4 */ > u32 curr_index_batch_size; /* 308 4 */ > u32 index_item_leaves; /* 312 4 */ > > /* size: 320, cachelines: 5, members: 15 */ > /* sum members: 312, holes: 1, sum holes: 4 */ > /* padding: 4 */ > }; > > Move the 'count' field, which is 4 bytes long, to just below the ref count > field, so we eliminate the hole and reduce the structure size from 320 > bytes down to 312 bytes: > > struct btrfs_delayed_node { > u64 inode_id; /* 0 8 */ > u64 bytes_reserved; /* 8 8 */ > struct btrfs_root * root; /* 16 8 */ > struct list_head n_list; /* 24 16 */ > struct list_head p_list; /* 40 16 */ > struct rb_root_cached ins_root; /* 56 16 */ > /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */ > struct rb_root_cached del_root; /* 72 16 */ > struct mutex mutex; /* 88 32 */ > struct btrfs_inode_item inode_item; /* 120 160 */ > /* --- cacheline 4 boundary (256 bytes) was 24 bytes ago --- */ > refcount_t refs; /* 280 4 */ > int count; /* 284 4 */ > u64 index_cnt; /* 288 8 */ > long unsigned int flags; /* 296 8 */ > u32 curr_index_batch_size; /* 304 4 */ > u32 index_item_leaves; /* 308 4 */ > > /* size: 312, cachelines: 5, members: 15 */ > /* last cacheline: 56 bytes */ > }; > > This now allows to have 13 delayed nodes per 4K page instead of 12. > > Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: Qu Wenruo <wqu@suse.com> Thanks, Qu > --- > fs/btrfs/delayed-inode.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h > index 7cfefdfe54ea..f4d9feac0d0e 100644 > --- a/fs/btrfs/delayed-inode.h > +++ b/fs/btrfs/delayed-inode.h > @@ -64,9 +64,9 @@ struct btrfs_delayed_node { > struct mutex mutex; > struct btrfs_inode_item inode_item; > refcount_t refs; > + int count; > u64 index_cnt; > unsigned long flags; > - int count; > /* > * The size of the next batch of dir index items to insert (if this > * node is from a directory inode). Protected by @mutex.
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 7cfefdfe54ea..f4d9feac0d0e 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -64,9 +64,9 @@ struct btrfs_delayed_node { struct mutex mutex; struct btrfs_inode_item inode_item; refcount_t refs; + int count; u64 index_cnt; unsigned long flags; - int count; /* * The size of the next batch of dir index items to insert (if this * node is from a directory inode). Protected by @mutex.