Message ID | 20231019174944.3376335-2-sdf@google.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | xsk: TX metadata | expand |
On Friday, October 20, 2023 1:50 AM Stanislav Fomichev <sdf@google.com> wrote: >For zerocopy mode, tx_desc->addr can point to the arbitrary offset >and carry some TX metadata in the headroom. For copy mode, there >is no way currently to populate skb metadata. > >Introduce new tx_metadata_len umem config option that indicates how many >bytes to treat as metadata. Metadata bytes come prior to tx_desc address >(same as in RX case). > >The size of the metadata has the same constraints as XDP: >- less than 256 bytes >- 4-byte aligned >- non-zero > >This data is not interpreted in any way right now. > >Signed-off-by: Stanislav Fomichev <sdf@google.com> LGTM. Reviewed-by: Song Yoong Siang <yoong.siang.song@intel.com> >--- > include/net/xdp_sock.h | 1 + > include/net/xsk_buff_pool.h | 1 + > include/uapi/linux/if_xdp.h | 1 + > net/xdp/xdp_umem.c | 4 ++++ > net/xdp/xsk.c | 12 +++++++++++- > net/xdp/xsk_buff_pool.c | 1 + > net/xdp/xsk_queue.h | 17 ++++++++++------- > tools/include/uapi/linux/if_xdp.h | 1 + > 8 files changed, 30 insertions(+), 8 deletions(-) > >diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h >index 7dd0df2f6f8e..5ae88a00f34a 100644 >--- a/include/net/xdp_sock.h >+++ b/include/net/xdp_sock.h >@@ -30,6 +30,7 @@ struct xdp_umem { > struct user_struct *user; > refcount_t users; > u8 flags; >+ u8 tx_metadata_len; > bool zc; > struct page **pgs; > int id; >diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h >index b0bdff26fc88..1985ffaf9b0c 100644 >--- a/include/net/xsk_buff_pool.h >+++ b/include/net/xsk_buff_pool.h >@@ -77,6 +77,7 @@ struct xsk_buff_pool { > u32 chunk_size; > u32 chunk_shift; > u32 frame_len; >+ u8 tx_metadata_len; /* inherited from umem */ > u8 cached_need_wakeup; > bool uses_need_wakeup; > bool dma_need_sync; >diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h >index 8d48863472b9..2ecf79282c26 100644 >--- a/include/uapi/linux/if_xdp.h >+++ b/include/uapi/linux/if_xdp.h >@@ -76,6 +76,7 @@ struct xdp_umem_reg { > __u32 chunk_size; > __u32 headroom; > __u32 flags; >+ __u32 tx_metadata_len; > }; > > struct xdp_statistics { >diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c >index 06cead2b8e34..333f3d53aad4 100644 >--- a/net/xdp/xdp_umem.c >+++ b/net/xdp/xdp_umem.c >@@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct >xdp_umem_reg *mr) > if (headroom >= chunk_size - XDP_PACKET_HEADROOM) > return -EINVAL; > >+ if (mr->tx_metadata_len > 256 || mr->tx_metadata_len % 4) >+ return -EINVAL; >+ > umem->size = size; > umem->headroom = headroom; > umem->chunk_size = chunk_size; >@@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct >xdp_umem_reg *mr) > umem->pgs = NULL; > umem->user = NULL; > umem->flags = mr->flags; >+ umem->tx_metadata_len = mr->tx_metadata_len; > > INIT_LIST_HEAD(&umem->xsk_dma_list); > refcount_set(&umem->users, 1); >diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c >index ba070fd37d24..ba4c77a24a83 100644 >--- a/net/xdp/xsk.c >+++ b/net/xdp/xsk.c >@@ -1265,6 +1265,14 @@ struct xdp_umem_reg_v1 { > __u32 headroom; > }; > >+struct xdp_umem_reg_v2 { >+ __u64 addr; /* Start of packet data area */ >+ __u64 len; /* Length of packet data area */ >+ __u32 chunk_size; >+ __u32 headroom; >+ __u32 flags; >+}; >+ > static int xsk_setsockopt(struct socket *sock, int level, int optname, > sockptr_t optval, unsigned int optlen) > { >@@ -1308,8 +1316,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int >optname, > > if (optlen < sizeof(struct xdp_umem_reg_v1)) > return -EINVAL; >- else if (optlen < sizeof(mr)) >+ else if (optlen < sizeof(struct xdp_umem_reg_v2)) > mr_size = sizeof(struct xdp_umem_reg_v1); >+ else if (optlen < sizeof(mr)) >+ mr_size = sizeof(struct xdp_umem_reg_v2); > > if (copy_from_sockptr(&mr, optval, mr_size)) > return -EFAULT; >diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c >index 49cb9f9a09be..386eddcdf837 100644 >--- a/net/xdp/xsk_buff_pool.c >+++ b/net/xdp/xsk_buff_pool.c >@@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct >xdp_sock *xs, > XDP_PACKET_HEADROOM; > pool->umem = umem; > pool->addrs = umem->addrs; >+ pool->tx_metadata_len = umem->tx_metadata_len; > INIT_LIST_HEAD(&pool->free_list); > INIT_LIST_HEAD(&pool->xskb_list); > INIT_LIST_HEAD(&pool->xsk_tx_list); >diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h >index 13354a1e4280..c74a1372bcb9 100644 >--- a/net/xdp/xsk_queue.h >+++ b/net/xdp/xsk_queue.h >@@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options) > static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, > struct xdp_desc *desc) > { >- u64 offset = desc->addr & (pool->chunk_size - 1); >+ u64 addr = desc->addr - pool->tx_metadata_len; >+ u64 len = desc->len + pool->tx_metadata_len; >+ u64 offset = addr & (pool->chunk_size - 1); > > if (!desc->len) > return false; > >- if (offset + desc->len > pool->chunk_size) >+ if (offset + len > pool->chunk_size) > return false; > >- if (desc->addr >= pool->addrs_cnt) >+ if (addr >= pool->addrs_cnt) > return false; > > if (xp_unused_options_set(desc->options)) >@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct >xsk_buff_pool *pool, > static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, > struct xdp_desc *desc) > { >- u64 addr = xp_unaligned_add_offset_to_addr(desc->addr); >+ u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool- >>tx_metadata_len; >+ u64 len = desc->len + pool->tx_metadata_len; > > if (!desc->len) > return false; > >- if (desc->len > pool->chunk_size) >+ if (len > pool->chunk_size) > return false; > >- if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt || >- xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) >+ if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt || >+ xp_desc_crosses_non_contig_pg(pool, addr, len)) > return false; > > if (xp_unused_options_set(desc->options)) >diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h >index 73a47da885dc..34411a2e5b6c 100644 >--- a/tools/include/uapi/linux/if_xdp.h >+++ b/tools/include/uapi/linux/if_xdp.h >@@ -76,6 +76,7 @@ struct xdp_umem_reg { > __u32 chunk_size; > __u32 headroom; > __u32 flags; >+ __u32 tx_metadata_len; > }; > > struct xdp_statistics { >-- >2.42.0.655.g421f12c284-goog
On Thu, 19 Oct 2023 10:49:34 -0700 Stanislav Fomichev wrote:
> - 4-byte aligned
But there is an 8B field in it. Won't this trap on some funky
architecture of yore?
On Thu, 19 Oct 2023 at 19:50, Stanislav Fomichev <sdf@google.com> wrote: > > For zerocopy mode, tx_desc->addr can point to the arbitrary offset nit: the -> an > and carry some TX metadata in the headroom. For copy mode, there > is no way currently to populate skb metadata. > > Introduce new tx_metadata_len umem config option that indicates how many > bytes to treat as metadata. Metadata bytes come prior to tx_desc address > (same as in RX case). > > The size of the metadata has the same constraints as XDP: > - less than 256 bytes > - 4-byte aligned > - non-zero > > This data is not interpreted in any way right now. > > Signed-off-by: Stanislav Fomichev <sdf@google.com> > --- > include/net/xdp_sock.h | 1 + > include/net/xsk_buff_pool.h | 1 + > include/uapi/linux/if_xdp.h | 1 + > net/xdp/xdp_umem.c | 4 ++++ > net/xdp/xsk.c | 12 +++++++++++- > net/xdp/xsk_buff_pool.c | 1 + > net/xdp/xsk_queue.h | 17 ++++++++++------- > tools/include/uapi/linux/if_xdp.h | 1 + > 8 files changed, 30 insertions(+), 8 deletions(-) > > diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h > index 7dd0df2f6f8e..5ae88a00f34a 100644 > --- a/include/net/xdp_sock.h > +++ b/include/net/xdp_sock.h > @@ -30,6 +30,7 @@ struct xdp_umem { > struct user_struct *user; > refcount_t users; > u8 flags; > + u8 tx_metadata_len; > bool zc; > struct page **pgs; > int id; > diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h > index b0bdff26fc88..1985ffaf9b0c 100644 > --- a/include/net/xsk_buff_pool.h > +++ b/include/net/xsk_buff_pool.h > @@ -77,6 +77,7 @@ struct xsk_buff_pool { > u32 chunk_size; > u32 chunk_shift; > u32 frame_len; > + u8 tx_metadata_len; /* inherited from umem */ > u8 cached_need_wakeup; > bool uses_need_wakeup; > bool dma_need_sync; > diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h > index 8d48863472b9..2ecf79282c26 100644 > --- a/include/uapi/linux/if_xdp.h > +++ b/include/uapi/linux/if_xdp.h > @@ -76,6 +76,7 @@ struct xdp_umem_reg { > __u32 chunk_size; > __u32 headroom; > __u32 flags; > + __u32 tx_metadata_len; > }; > > struct xdp_statistics { > diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c > index 06cead2b8e34..333f3d53aad4 100644 > --- a/net/xdp/xdp_umem.c > +++ b/net/xdp/xdp_umem.c > @@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) > if (headroom >= chunk_size - XDP_PACKET_HEADROOM) > return -EINVAL; > > + if (mr->tx_metadata_len > 256 || mr->tx_metadata_len % 4) > + return -EINVAL; Should be >= 256 since the final internal destination is a u8 and the documentation says "should be less than 256 bytes". > + > umem->size = size; > umem->headroom = headroom; > umem->chunk_size = chunk_size; > @@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) > umem->pgs = NULL; > umem->user = NULL; > umem->flags = mr->flags; > + umem->tx_metadata_len = mr->tx_metadata_len; > > INIT_LIST_HEAD(&umem->xsk_dma_list); > refcount_set(&umem->users, 1); > diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c > index ba070fd37d24..ba4c77a24a83 100644 > --- a/net/xdp/xsk.c > +++ b/net/xdp/xsk.c > @@ -1265,6 +1265,14 @@ struct xdp_umem_reg_v1 { > __u32 headroom; > }; > > +struct xdp_umem_reg_v2 { > + __u64 addr; /* Start of packet data area */ > + __u64 len; /* Length of packet data area */ > + __u32 chunk_size; > + __u32 headroom; > + __u32 flags; > +}; > + > static int xsk_setsockopt(struct socket *sock, int level, int optname, > sockptr_t optval, unsigned int optlen) > { > @@ -1308,8 +1316,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, > > if (optlen < sizeof(struct xdp_umem_reg_v1)) > return -EINVAL; > - else if (optlen < sizeof(mr)) > + else if (optlen < sizeof(struct xdp_umem_reg_v2)) > mr_size = sizeof(struct xdp_umem_reg_v1); > + else if (optlen < sizeof(mr)) > + mr_size = sizeof(struct xdp_umem_reg_v2); > > if (copy_from_sockptr(&mr, optval, mr_size)) > return -EFAULT; > diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c > index 49cb9f9a09be..386eddcdf837 100644 > --- a/net/xdp/xsk_buff_pool.c > +++ b/net/xdp/xsk_buff_pool.c > @@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, > XDP_PACKET_HEADROOM; > pool->umem = umem; > pool->addrs = umem->addrs; > + pool->tx_metadata_len = umem->tx_metadata_len; > INIT_LIST_HEAD(&pool->free_list); > INIT_LIST_HEAD(&pool->xskb_list); > INIT_LIST_HEAD(&pool->xsk_tx_list); > diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h > index 13354a1e4280..c74a1372bcb9 100644 > --- a/net/xdp/xsk_queue.h > +++ b/net/xdp/xsk_queue.h > @@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options) > static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, > struct xdp_desc *desc) > { > - u64 offset = desc->addr & (pool->chunk_size - 1); > + u64 addr = desc->addr - pool->tx_metadata_len; > + u64 len = desc->len + pool->tx_metadata_len; > + u64 offset = addr & (pool->chunk_size - 1); > > if (!desc->len) > return false; > > - if (offset + desc->len > pool->chunk_size) > + if (offset + len > pool->chunk_size) > return false; > > - if (desc->addr >= pool->addrs_cnt) > + if (addr >= pool->addrs_cnt) > return false; > > if (xp_unused_options_set(desc->options)) > @@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, > static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, > struct xdp_desc *desc) > { > - u64 addr = xp_unaligned_add_offset_to_addr(desc->addr); > + u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len; > + u64 len = desc->len + pool->tx_metadata_len; > > if (!desc->len) > return false; > > - if (desc->len > pool->chunk_size) > + if (len > pool->chunk_size) > return false; > > - if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt || > - xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) > + if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt || > + xp_desc_crosses_non_contig_pg(pool, addr, len)) > return false; > > if (xp_unused_options_set(desc->options)) > diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h > index 73a47da885dc..34411a2e5b6c 100644 > --- a/tools/include/uapi/linux/if_xdp.h > +++ b/tools/include/uapi/linux/if_xdp.h > @@ -76,6 +76,7 @@ struct xdp_umem_reg { > __u32 chunk_size; > __u32 headroom; > __u32 flags; > + __u32 tx_metadata_len; > }; > > struct xdp_statistics { > -- > 2.42.0.655.g421f12c284-goog > >
On 10/20, Jakub Kicinski wrote: > On Thu, 19 Oct 2023 10:49:34 -0700 Stanislav Fomichev wrote: > > - 4-byte aligned > > But there is an 8B field in it. Won't this trap on some funky > architecture of yore? Hmm, good point, will update this to 8 byte alignment and will use 8-bit timestamp as a reason.
On 10/23, Magnus Karlsson wrote: > On Thu, 19 Oct 2023 at 19:50, Stanislav Fomichev <sdf@google.com> wrote: > > > > For zerocopy mode, tx_desc->addr can point to the arbitrary offset > > nit: the -> an Thanks! > > and carry some TX metadata in the headroom. For copy mode, there > > is no way currently to populate skb metadata. > > > > Introduce new tx_metadata_len umem config option that indicates how many > > bytes to treat as metadata. Metadata bytes come prior to tx_desc address > > (same as in RX case). > > > > The size of the metadata has the same constraints as XDP: > > - less than 256 bytes > > - 4-byte aligned > > - non-zero > > > > This data is not interpreted in any way right now. > > > > Signed-off-by: Stanislav Fomichev <sdf@google.com> > > --- > > include/net/xdp_sock.h | 1 + > > include/net/xsk_buff_pool.h | 1 + > > include/uapi/linux/if_xdp.h | 1 + > > net/xdp/xdp_umem.c | 4 ++++ > > net/xdp/xsk.c | 12 +++++++++++- > > net/xdp/xsk_buff_pool.c | 1 + > > net/xdp/xsk_queue.h | 17 ++++++++++------- > > tools/include/uapi/linux/if_xdp.h | 1 + > > 8 files changed, 30 insertions(+), 8 deletions(-) > > > > diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h > > index 7dd0df2f6f8e..5ae88a00f34a 100644 > > --- a/include/net/xdp_sock.h > > +++ b/include/net/xdp_sock.h > > @@ -30,6 +30,7 @@ struct xdp_umem { > > struct user_struct *user; > > refcount_t users; > > u8 flags; > > + u8 tx_metadata_len; > > bool zc; > > struct page **pgs; > > int id; > > diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h > > index b0bdff26fc88..1985ffaf9b0c 100644 > > --- a/include/net/xsk_buff_pool.h > > +++ b/include/net/xsk_buff_pool.h > > @@ -77,6 +77,7 @@ struct xsk_buff_pool { > > u32 chunk_size; > > u32 chunk_shift; > > u32 frame_len; > > + u8 tx_metadata_len; /* inherited from umem */ > > u8 cached_need_wakeup; > > bool uses_need_wakeup; > > bool dma_need_sync; > > diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h > > index 8d48863472b9..2ecf79282c26 100644 > > --- a/include/uapi/linux/if_xdp.h > > +++ b/include/uapi/linux/if_xdp.h > > @@ -76,6 +76,7 @@ struct xdp_umem_reg { > > __u32 chunk_size; > > __u32 headroom; > > __u32 flags; > > + __u32 tx_metadata_len; > > }; > > > > struct xdp_statistics { > > diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c > > index 06cead2b8e34..333f3d53aad4 100644 > > --- a/net/xdp/xdp_umem.c > > +++ b/net/xdp/xdp_umem.c > > @@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) > > if (headroom >= chunk_size - XDP_PACKET_HEADROOM) > > return -EINVAL; > > > > + if (mr->tx_metadata_len > 256 || mr->tx_metadata_len % 4) > > + return -EINVAL; > > Should be >= 256 since the final internal destination is a u8 and the > documentation says "should be less than 256 bytes". Thanks, will fix.
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 7dd0df2f6f8e..5ae88a00f34a 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -30,6 +30,7 @@ struct xdp_umem { struct user_struct *user; refcount_t users; u8 flags; + u8 tx_metadata_len; bool zc; struct page **pgs; int id; diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index b0bdff26fc88..1985ffaf9b0c 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -77,6 +77,7 @@ struct xsk_buff_pool { u32 chunk_size; u32 chunk_shift; u32 frame_len; + u8 tx_metadata_len; /* inherited from umem */ u8 cached_need_wakeup; bool uses_need_wakeup; bool dma_need_sync; diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 8d48863472b9..2ecf79282c26 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -76,6 +76,7 @@ struct xdp_umem_reg { __u32 chunk_size; __u32 headroom; __u32 flags; + __u32 tx_metadata_len; }; struct xdp_statistics { diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 06cead2b8e34..333f3d53aad4 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (headroom >= chunk_size - XDP_PACKET_HEADROOM) return -EINVAL; + if (mr->tx_metadata_len > 256 || mr->tx_metadata_len % 4) + return -EINVAL; + umem->size = size; umem->headroom = headroom; umem->chunk_size = chunk_size; @@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->pgs = NULL; umem->user = NULL; umem->flags = mr->flags; + umem->tx_metadata_len = mr->tx_metadata_len; INIT_LIST_HEAD(&umem->xsk_dma_list); refcount_set(&umem->users, 1); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ba070fd37d24..ba4c77a24a83 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1265,6 +1265,14 @@ struct xdp_umem_reg_v1 { __u32 headroom; }; +struct xdp_umem_reg_v2 { + __u64 addr; /* Start of packet data area */ + __u64 len; /* Length of packet data area */ + __u32 chunk_size; + __u32 headroom; + __u32 flags; +}; + static int xsk_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -1308,8 +1316,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(struct xdp_umem_reg_v1)) return -EINVAL; - else if (optlen < sizeof(mr)) + else if (optlen < sizeof(struct xdp_umem_reg_v2)) mr_size = sizeof(struct xdp_umem_reg_v1); + else if (optlen < sizeof(mr)) + mr_size = sizeof(struct xdp_umem_reg_v2); if (copy_from_sockptr(&mr, optval, mr_size)) return -EFAULT; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 49cb9f9a09be..386eddcdf837 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, XDP_PACKET_HEADROOM; pool->umem = umem; pool->addrs = umem->addrs; + pool->tx_metadata_len = umem->tx_metadata_len; INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 13354a1e4280..c74a1372bcb9 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options) static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 offset = desc->addr & (pool->chunk_size - 1); + u64 addr = desc->addr - pool->tx_metadata_len; + u64 len = desc->len + pool->tx_metadata_len; + u64 offset = addr & (pool->chunk_size - 1); if (!desc->len) return false; - if (offset + desc->len > pool->chunk_size) + if (offset + len > pool->chunk_size) return false; - if (desc->addr >= pool->addrs_cnt) + if (addr >= pool->addrs_cnt) return false; if (xp_unused_options_set(desc->options)) @@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 addr = xp_unaligned_add_offset_to_addr(desc->addr); + u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len; + u64 len = desc->len + pool->tx_metadata_len; if (!desc->len) return false; - if (desc->len > pool->chunk_size) + if (len > pool->chunk_size) return false; - if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt || - xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) + if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt || + xp_desc_crosses_non_contig_pg(pool, addr, len)) return false; if (xp_unused_options_set(desc->options)) diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 73a47da885dc..34411a2e5b6c 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -76,6 +76,7 @@ struct xdp_umem_reg { __u32 chunk_size; __u32 headroom; __u32 flags; + __u32 tx_metadata_len; }; struct xdp_statistics {
For zerocopy mode, tx_desc->addr can point to the arbitrary offset and carry some TX metadata in the headroom. For copy mode, there is no way currently to populate skb metadata. Introduce new tx_metadata_len umem config option that indicates how many bytes to treat as metadata. Metadata bytes come prior to tx_desc address (same as in RX case). The size of the metadata has the same constraints as XDP: - less than 256 bytes - 4-byte aligned - non-zero This data is not interpreted in any way right now. Signed-off-by: Stanislav Fomichev <sdf@google.com> --- include/net/xdp_sock.h | 1 + include/net/xsk_buff_pool.h | 1 + include/uapi/linux/if_xdp.h | 1 + net/xdp/xdp_umem.c | 4 ++++ net/xdp/xsk.c | 12 +++++++++++- net/xdp/xsk_buff_pool.c | 1 + net/xdp/xsk_queue.h | 17 ++++++++++------- tools/include/uapi/linux/if_xdp.h | 1 + 8 files changed, 30 insertions(+), 8 deletions(-)