@@ -51,6 +51,7 @@ struct xdp_sock {
struct list_head flush_node;
struct xsk_buff_pool *pool;
u16 queue_id;
+ u8 tx_metadata_len;
bool zc;
bool sg;
enum {
@@ -77,6 +77,7 @@ struct xsk_buff_pool {
u32 chunk_size;
u32 chunk_shift;
u32 frame_len;
+ u8 tx_metadata_len; /* inherited from xsk_sock */
u8 cached_need_wakeup;
bool uses_need_wakeup;
bool dma_need_sync;
@@ -69,6 +69,7 @@ struct xdp_mmap_offsets {
#define XDP_UMEM_COMPLETION_RING 6
#define XDP_STATISTICS 7
#define XDP_OPTIONS 8
+#define XDP_TX_METADATA_LEN 9
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
@@ -1337,6 +1337,26 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
mutex_unlock(&xs->mutex);
return err;
}
+ case XDP_TX_METADATA_LEN:
+ {
+ int val;
+
+ if (optlen < sizeof(val))
+ return -EINVAL;
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
+ return -EFAULT;
+ if (!val || val > 256 || val % 4)
+ return -EINVAL;
+
+ mutex_lock(&xs->mutex);
+ if (xs->state != XSK_READY) {
+ mutex_unlock(&xs->mutex);
+ return -EBUSY;
+ }
+ xs->tx_metadata_len = val;
+ mutex_unlock(&xs->mutex);
+ return 0;
+ }
default:
break;
}
@@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM;
pool->umem = umem;
pool->addrs = umem->addrs;
+ pool->tx_metadata_len = xs->tx_metadata_len;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
@@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options)
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 offset = desc->addr & (pool->chunk_size - 1);
+ u64 addr = desc->addr - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
+ u64 offset = addr & (pool->chunk_size - 1);
if (!desc->len)
return false;
- if (offset + desc->len > pool->chunk_size)
+ if (offset + len > pool->chunk_size)
return false;
- if (desc->addr >= pool->addrs_cnt)
+ if (addr >= pool->addrs_cnt)
return false;
if (xp_unused_options_set(desc->options))
@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
if (!desc->len)
return false;
- if (desc->len > pool->chunk_size)
+ if (len > pool->chunk_size)
return false;
- if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
- xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+ if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
+ xp_desc_crosses_non_contig_pg(pool, addr, len))
return false;
if (xp_unused_options_set(desc->options))
For zerocopy mode, tx_desc->addr can point to the arbitrary offset and carry some TX metadata in the headroom. For copy mode, there is no way currently to populate skb metadata. Introduce new XDP_TX_METADATA_LEN that indicates how many bytes to treat as metadata. Metadata bytes come prior to tx_desc address (same as in RX case). The size of the metadata has the same constraints as XDP: - less than 256 bytes - 4-byte aligned - non-zero This data is not interpreted in any way right now. Signed-off-by: Stanislav Fomichev <sdf@google.com> --- include/net/xdp_sock.h | 1 + include/net/xsk_buff_pool.h | 1 + include/uapi/linux/if_xdp.h | 1 + net/xdp/xsk.c | 20 ++++++++++++++++++++ net/xdp/xsk_buff_pool.c | 1 + net/xdp/xsk_queue.h | 17 ++++++++++------- 6 files changed, 34 insertions(+), 7 deletions(-)