Message ID | c9b508651bafb53e98b9f194271d4b7b2d309cba.1694126893.git.dsterba@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Cleanups and struct packing | expand |
On 2023/9/8 07:09, David Sterba wrote: > We can reduce two members' size that in turn reduce size of struct > btrfs_ref from 64 to 56 bytes. As the structure is often used as a local > variable several functions reduce their stack usage. > > - make enum btrfs_ref_type packed, there are only 4 values > > - switch action and its values to a packed enum > > Final structure layout: > > struct btrfs_ref { > enum btrfs_ref_type type; /* 0 1 */ > enum btrfs_delayed_ref_action action; /* 1 1 */ Considering both type and action have only 4 values, we can further pack them into a single u8. Although this means we can not directly use enum type, but u8 type instead. Thanks, Qu > bool skip_qgroup; /* 2 1 */ > > /* XXX 5 bytes hole, try to pack */ > > u64 bytenr; /* 8 8 */ > u64 len; /* 16 8 */ > u64 parent; /* 24 8 */ > union { > struct btrfs_data_ref data_ref; /* 32 24 */ > struct btrfs_tree_ref tree_ref; /* 32 16 */ > }; /* 32 24 */ > > /* size: 56, cachelines: 1, members: 7 */ > /* sum members: 51, holes: 1, sum holes: 5 */ > /* last cacheline: 56 bytes */ > }; > > Signed-off-by: David Sterba <dsterba@suse.com> > --- > fs/btrfs/delayed-ref.h | 18 ++++++++++++------ > 1 file changed, 12 insertions(+), 6 deletions(-) > > diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h > index b8e14b0ba5f1..224278d4516f 100644 > --- a/fs/btrfs/delayed-ref.h > +++ b/fs/btrfs/delayed-ref.h > @@ -9,10 +9,16 @@ > #include <linux/refcount.h> > > /* these are the possible values of struct btrfs_delayed_ref_node->action */ > -#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ > -#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ > -#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ > -#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */ > +enum btrfs_delayed_ref_action { > + /* Add one backref to the tree */ > + BTRFS_ADD_DELAYED_REF = 1, > + /* Delete one backref from the tree */ > + BTRFS_DROP_DELAYED_REF, > + /* Record a full extent allocation */ > + BTRFS_ADD_DELAYED_EXTENT, > + /* Not changing ref count on head ref */ > + BTRFS_UPDATE_DELAYED_HEAD, > +} __packed; > > struct btrfs_delayed_ref_node { > struct rb_node ref_node; > @@ -183,7 +189,7 @@ enum btrfs_ref_type { > BTRFS_REF_DATA, > BTRFS_REF_METADATA, > BTRFS_REF_LAST, > -}; > +} __packed; > > struct btrfs_data_ref { > /* For EXTENT_DATA_REF */ > @@ -223,7 +229,7 @@ struct btrfs_tree_ref { > > struct btrfs_ref { > enum btrfs_ref_type type; > - int action; > + enum btrfs_delayed_ref_action action; > > /* > * Whether this extent should go through qgroup record.
On Fri, Sep 08, 2023 at 08:06:04AM +0800, Qu Wenruo wrote: > > > On 2023/9/8 07:09, David Sterba wrote: > > We can reduce two members' size that in turn reduce size of struct > > btrfs_ref from 64 to 56 bytes. As the structure is often used as a local > > variable several functions reduce their stack usage. > > > > - make enum btrfs_ref_type packed, there are only 4 values > > > > - switch action and its values to a packed enum > > > > Final structure layout: > > > > struct btrfs_ref { > > enum btrfs_ref_type type; /* 0 1 */ > > enum btrfs_delayed_ref_action action; /* 1 1 */ > > Considering both type and action have only 4 values, we can further pack > them into a single u8. > > Although this means we can not directly use enum type, but u8 type instead. I consider byte as good enough for enums or bounded types, unless there's a really good reason to do some ultra packing like in case we could drop a gap of like 5-7 bytes due to alignment. In case of btrfs_ref it does not help. With explict bit width this does not reduce the size (still 56 bytes) and only increase code size: struct btrfs_ref { - enum btrfs_ref_type type; /* 0 1 */ - enum btrfs_delayed_ref_action action; /* 1 1 */ - bool skip_qgroup; /* 2 1 */ + enum btrfs_ref_type type:2; /* 0: 0 1 */ + enum btrfs_delayed_ref_action action:3; /* 0: 2 1 */ + bool skip_qgroup:1; /* 0: 5 1 */ - /* XXX 5 bytes hole, try to pack */ + /* XXX 2 bits hole, try to pack */ + /* XXX 7 bytes hole, try to pack */ u64 bytenr; /* 8 8 */ u64 len; /* 16 8 */ @@ -3116,7 +3117,8 @@ struct btrfs_ref { }; /* 32 24 */ /* size: 56, cachelines: 1, members: 7 */ - /* sum members: 51, holes: 1, sum holes: 5 */ + /* sum members: 48, holes: 1, sum holes: 7 */ + /* sum bitfield members: 6 bits, bit holes: 1, sum bit holes: 2 bits */ /* last cacheline: 56 bytes */ }; Code increase: text data bss dec hex filename 1268477 29813 16088 1314378 140e4a pre/btrfs.ko 1268915 29813 16088 1314816 141000 post/btrfs.ko DELTA: +438 which means the simple byte comparisons are turned into a few instructions that first need to mask off the other values, eg. from btrfs_inc_extent_ref(): test %al,%al -> mov %eax,%edx and $0x3,%edx so it needs more registers and may prevent some optimizations.
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index b8e14b0ba5f1..224278d4516f 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -9,10 +9,16 @@ #include <linux/refcount.h> /* these are the possible values of struct btrfs_delayed_ref_node->action */ -#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ -#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ -#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ -#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */ +enum btrfs_delayed_ref_action { + /* Add one backref to the tree */ + BTRFS_ADD_DELAYED_REF = 1, + /* Delete one backref from the tree */ + BTRFS_DROP_DELAYED_REF, + /* Record a full extent allocation */ + BTRFS_ADD_DELAYED_EXTENT, + /* Not changing ref count on head ref */ + BTRFS_UPDATE_DELAYED_HEAD, +} __packed; struct btrfs_delayed_ref_node { struct rb_node ref_node; @@ -183,7 +189,7 @@ enum btrfs_ref_type { BTRFS_REF_DATA, BTRFS_REF_METADATA, BTRFS_REF_LAST, -}; +} __packed; struct btrfs_data_ref { /* For EXTENT_DATA_REF */ @@ -223,7 +229,7 @@ struct btrfs_tree_ref { struct btrfs_ref { enum btrfs_ref_type type; - int action; + enum btrfs_delayed_ref_action action; /* * Whether this extent should go through qgroup record.
We can reduce two members' size that in turn reduce size of struct btrfs_ref from 64 to 56 bytes. As the structure is often used as a local variable several functions reduce their stack usage. - make enum btrfs_ref_type packed, there are only 4 values - switch action and its values to a packed enum Final structure layout: struct btrfs_ref { enum btrfs_ref_type type; /* 0 1 */ enum btrfs_delayed_ref_action action; /* 1 1 */ bool skip_qgroup; /* 2 1 */ /* XXX 5 bytes hole, try to pack */ u64 bytenr; /* 8 8 */ u64 len; /* 16 8 */ u64 parent; /* 24 8 */ union { struct btrfs_data_ref data_ref; /* 32 24 */ struct btrfs_tree_ref tree_ref; /* 32 16 */ }; /* 32 24 */ /* size: 56, cachelines: 1, members: 7 */ /* sum members: 51, holes: 1, sum holes: 5 */ /* last cacheline: 56 bytes */ }; Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/delayed-ref.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-)