@@ -137,29 +137,29 @@ struct receive_queue {
struct napi_struct napi;
+ /* Name of this receive queue: input.$index */
+ char name[40];
+
struct bpf_prog __rcu *xdp_prog;
struct virtnet_rq_stats stats;
+ /* RX: fragments + linear part + virtio header */
+ struct scatterlist sg[MAX_SKB_FRAGS + 2];
+
+ /* Page frag for packet buffer allocation. */
+ struct page_frag alloc_frag;
+
/* Chain pages by the private ptr. */
struct page *pages;
/* Average packet length for mergeable receive buffers. */
struct ewma_pkt_len mrg_avg_pkt_len;
- /* Page frag for packet buffer allocation. */
- struct page_frag alloc_frag;
-
- /* RX: fragments + linear part + virtio header */
- struct scatterlist sg[MAX_SKB_FRAGS + 2];
+ struct xdp_rxq_info xdp_rxq;
/* Min single buffer size for mergeable buffers case. */
unsigned int min_buf_len;
-
- /* Name of this receive queue: input.$index */
- char name[40];
-
- struct xdp_rxq_info xdp_rxq;
};
/* Control VQ buffers: protected by the rtnl lock */
@@ -202,33 +202,33 @@ struct virtnet_info {
/* Host can handle any s/g split between our header and packet data */
bool any_header_sg;
+ /* Does the affinity hint is set for virtqueues? */
+ bool affinity_hint_set;
+
/* Packet virtio header size */
u8 hdr_len;
- /* Work struct for refilling if we run low on memory. */
- struct delayed_work refill;
+ struct control_buf *ctrl;
/* Work struct for config space updates */
struct work_struct config_work;
- /* Does the affinity hint is set for virtqueues? */
- bool affinity_hint_set;
-
/* CPU hotplug instances for online & dead */
struct hlist_node node;
struct hlist_node node_dead;
- struct control_buf *ctrl;
-
- /* Ethtool settings */
- u8 duplex;
- u32 speed;
-
unsigned long guest_offloads;
unsigned long guest_offloads_capable;
/* failover when STANDBY feature enabled */
struct failover *failover;
+
+ /* Work struct for refilling if we run low on memory. */
+ struct delayed_work refill;
+
+ /* Ethtool settings */
+ u32 speed;
+ u8 duplex;
};
struct padded_vnet_hdr {
Analysis of the structure virtnet_info using pahole gives the following stats. /* size: 256, cachelines: 4, members: 25 */ /* sum members: 245, holes: 3, sum holes: 11 */ /* paddings: 1, sum paddings: 4 */ Reordering the order in which the members of virtnet_info are declared helps in packing byte holes in the middle of virtnet_info, reduce the size required by the structure by 8 bytes, and also allows members to be stored without overstepping the boundaries of a cacheline (for a cacheline of size 64bytes) unnecessarily. Analysis using pahole post-reordering of members gives the following stats. /* size: 248, cachelines: 4, members: 25 */ /* padding: 3 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 56 bytes */ Signed-off-by: Anant Thazhemadam <anant.thazhemadam@gmail.com> --- The complete analysis done by pahole can be found below. Before the change: struct virtnet_info { struct virtio_device * vdev; /* 0 8 */ struct virtqueue * cvq; /* 8 8 */ struct net_device * dev; /* 16 8 */ struct send_queue * sq; /* 24 8 */ struct receive_queue * rq; /* 32 8 */ unsigned int status; /* 40 4 */ u16 max_queue_pairs; /* 44 2 */ u16 curr_queue_pairs; /* 46 2 */ u16 xdp_queue_pairs; /* 48 2 */ bool big_packets; /* 50 1 */ bool mergeable_rx_bufs; /* 51 1 */ bool has_cvq; /* 52 1 */ bool any_header_sg; /* 53 1 */ u8 hdr_len; /* 54 1 */ /* XXX 1 byte hole, try to pack */ struct delayed_work refill; /* 56 88 */ /* XXX last struct has 4 bytes of padding */ /* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */ struct work_struct config_work; /* 144 32 */ bool affinity_hint_set; /* 176 1 */ /* XXX 7 bytes hole, try to pack */ struct hlist_node node; /* 184 16 */ /* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */ struct hlist_node node_dead; /* 200 16 */ struct control_buf * ctrl; /* 216 8 */ u8 duplex; /* 224 1 */ /* XXX 3 bytes hole, try to pack */ u32 speed; /* 228 4 */ long unsigned int guest_offloads; /* 232 8 */ long unsigned int guest_offloads_capable; /* 240 8 */ struct failover * failover; /* 248 8 */ /* size: 256, cachelines: 4, members: 25 */ /* sum members: 245, holes: 3, sum holes: 11 */ /* paddings: 1, sum paddings: 4 */ }; After the Change: struct virtnet_info { struct virtio_device * vdev; /* 0 8 */ struct virtqueue * cvq; /* 8 8 */ struct net_device * dev; /* 16 8 */ struct send_queue * sq; /* 24 8 */ struct receive_queue * rq; /* 32 8 */ unsigned int status; /* 40 4 */ u16 max_queue_pairs; /* 44 2 */ u16 curr_queue_pairs; /* 46 2 */ u16 xdp_queue_pairs; /* 48 2 */ bool big_packets; /* 50 1 */ bool mergeable_rx_bufs; /* 51 1 */ bool has_cvq; /* 52 1 */ bool any_header_sg; /* 53 1 */ bool affinity_hint_set; /* 54 1 */ u8 hdr_len; /* 55 1 */ struct control_buf * ctrl; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct work_struct config_work; /* 64 32 */ struct hlist_node node; /* 96 16 */ struct hlist_node node_dead; /* 112 16 */ /* --- cacheline 2 boundary (128 bytes) --- */ long unsigned int guest_offloads; /* 128 8 */ long unsigned int guest_offloads_capable; /* 136 8 */ struct failover * failover; /* 144 8 */ struct delayed_work refill; /* 152 88 */ /* XXX last struct has 4 bytes of padding */ /* --- cacheline 3 boundary (192 bytes) was 48 bytes ago --- */ u32 speed; /* 240 4 */ u8 duplex; /* 244 1 */ /* size: 248, cachelines: 4, members: 25 */ /* padding: 3 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 56 bytes */ }; It can be seen that the size has reduced by 8 bytes, and the holes have been eliminated as well. Also, more members of virtnet_info are accomodated within one cacheline (without unnecessarily crossing over the cacheline boundary). drivers/net/virtio_net.c | 42 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-)