@@ -26,6 +26,10 @@
#include <linux/module.h>
#include <net/ip6_checksum.h>
+#include <linux/filter.h>
+#include <linux/bpf_trace.h>
+#include <linux/netlink.h>
+#include <net/xdp.h>
#include "vmxnet3_int.h"
@@ -47,6 +51,8 @@ static int enable_mq = 1;
static void
vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, const u8 *mac);
+static int
+vmxnet3_xdp_headroom(struct vmxnet3_adapter *adapter);
/*
* Enable/Disable the given intr
@@ -592,6 +598,9 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
rbi->len,
GFP_KERNEL);
+ if (adapter->xdp_enabled)
+ skb_reserve(rbi->skb, XDP_PACKET_HEADROOM);
+
if (unlikely(rbi->skb == NULL)) {
rq->stats.rx_buf_alloc_failure++;
break;
@@ -1387,6 +1396,182 @@ vmxnet3_get_hdr_len(struct vmxnet3_adapter *adapter, struct sk_buff *skb,
return (hlen + (hdr.tcp->doff << 2));
}
+static int
+vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
+ struct xdp_frame *xdpf,
+ struct sk_buff *skb,
+ struct vmxnet3_tx_queue *tq);
+
+static int
+vmxnet3_xdp_xmit_back(struct vmxnet3_adapter *adapter,
+ struct xdp_frame *xdpf,
+ struct sk_buff *skb)
+{
+ struct vmxnet3_tx_queue *tq;
+ struct netdev_queue *nq;
+ int err = 0, cpu;
+ int tq_number;
+
+ tq_number = adapter->num_tx_queues;
+ cpu = smp_processor_id();
+ tq = &adapter->tx_queue[cpu % tq_number];
+ nq = netdev_get_tx_queue(adapter->netdev, tq->qid);
+
+ __netif_tx_lock(nq, cpu);
+
+ err = vmxnet3_xdp_xmit_frame(adapter, xdpf, skb, tq);
+ if (err) {
+ goto exit;
+ }
+
+exit:
+ __netif_tx_unlock(nq);
+
+ return err;
+}
+
+static int
+vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
+ struct xdp_frame *xdpf,
+ struct sk_buff *skb,
+ struct vmxnet3_tx_queue *tq)
+{
+ struct vmxnet3_tx_ctx ctx;
+ struct vmxnet3_tx_buf_info *tbi = NULL;
+ union Vmxnet3_GenericDesc *gdesc;
+ int tx_num_deferred;
+ u32 buf_size;
+ u32 dw2;
+ int ret = 0;
+
+ if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) {
+ tq->stats.tx_ring_full++;
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
+ dw2 |= xdpf->len;
+ ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
+ gdesc = ctx.sop_txd;
+
+ buf_size = xdpf->len;
+ tbi = tq->buf_info + tq->tx_ring.next2fill;
+ tbi->map_type = VMXNET3_MAP_SINGLE;
+ tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
+ xdpf->data, buf_size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) {
+ ret = -EFAULT;
+ goto exit;
+ }
+ tbi->len = buf_size;
+
+ gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
+ BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
+
+ gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
+ gdesc->dword[2] = cpu_to_le32(dw2);
+
+ /* Setup the EOP desc */
+ gdesc->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
+
+ gdesc->txd.om = 0;
+ gdesc->txd.msscof = 0;
+ gdesc->txd.hlen = 0;
+ gdesc->txd.ti = 0;
+
+ tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred);
+ tq->shared->txNumDeferred += 1;
+ tx_num_deferred++;
+
+ vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
+
+ /* set the last buf_info for the pkt */
+ tbi->skb = skb;
+ tbi->sop_idx = ctx.sop_txd - tq->tx_ring.base;
+
+ dma_wmb();
+ gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
+ VMXNET3_TXD_GEN);
+ if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) {
+ tq->shared->txNumDeferred = 0;
+ VMXNET3_WRITE_BAR0_REG(adapter,
+ VMXNET3_REG_TXPROD + tq->qid * 8,
+ tq->tx_ring.next2fill);
+ }
+exit:
+ return ret;
+}
+
+static int
+vmxnet3_run_xdp(struct vmxnet3_rx_queue *rq, struct sk_buff *skb,
+ int frame_sz, bool *need_xdp_flush)
+{
+ struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
+ int delta, delta_len, xdp_metasize;
+ int headroom = XDP_PACKET_HEADROOM;
+ struct xdp_frame *xdpf;
+ struct xdp_buff xdp;
+ void *orig_data;
+ void *buf_hard_start;
+ u32 act;
+
+ buf_hard_start = skb->data - headroom;
+ xdp_init_buff(&xdp, frame_sz, &rq->xdp_rxq);
+ xdp_prepare_buff(&xdp, buf_hard_start,
+ headroom, skb->len, true);
+ orig_data = xdp.data;
+
+ if (!READ_ONCE(rq->xdp_bpf_prog))
+ return 0;
+
+ act = bpf_prog_run_xdp(rq->xdp_bpf_prog, &xdp);
+ rq->stats.xdp_packets++;
+
+ switch (act) {
+ case XDP_DROP:
+ if (ctx->skb)
+ dev_kfree_skb(ctx->skb);
+ ctx->skb = NULL;
+ rq->stats.xdp_drops++;
+ break;
+ case XDP_PASS:
+ /* Recalculate length in case bpf program changed it. */
+ delta = xdp.data - orig_data;
+ skb_reserve(skb, delta);
+ delta_len = (xdp.data_end - xdp.data) - skb->len;
+ xdp_metasize = xdp.data - xdp.data_meta;
+
+ skb_metadata_set(skb, xdp_metasize);
+ skb_put(skb, delta_len);
+ break;
+ case XDP_TX:
+ xdpf = xdp_convert_buff_to_frame(&xdp);
+ if (!xdpf || vmxnet3_xdp_xmit_back(rq->adapter, xdpf, skb)) {
+ dev_kfree_skb(ctx->skb);
+ rq->stats.xdp_drops++;
+ } else {
+ rq->stats.xdp_tx++;
+ }
+ ctx->skb = NULL;
+ break;
+ case XDP_ABORTED:
+ ctx->skb = NULL;
+ trace_xdp_exception(rq->adapter->netdev, rq->xdp_bpf_prog, act);
+ rq->stats.xdp_aborted++;
+ break;
+ case XDP_REDIRECT: /* Not Supported. */
+ ctx->skb = NULL;
+ fallthrough;
+ default:
+ bpf_warn_invalid_xdp_action(rq->adapter->netdev,
+ rq->xdp_bpf_prog, act);
+ break;
+ }
+ return act;
+}
+
static int
vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
struct vmxnet3_adapter *adapter, int quota)
@@ -1404,6 +1589,8 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
struct Vmxnet3_RxDesc rxCmdDesc;
struct Vmxnet3_RxCompDesc rxComp;
#endif
+ bool need_xdp_flush = 0;
+
vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
&rxComp);
while (rcd->gen == rq->comp_ring.gen) {
@@ -1469,7 +1656,9 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
rxDataRingUsed =
VMXNET3_RX_DATA_RING(adapter, rcd->rqID);
- len = rxDataRingUsed ? rcd->len : rbi->len;
+ len = rxDataRingUsed ?
+ rcd->len + vmxnet3_xdp_headroom(adapter)
+ : rbi->len;
new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
len);
if (new_skb == NULL) {
@@ -1483,6 +1672,9 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
goto rcd_done;
}
+ if (adapter->xdp_enabled)
+ skb_reserve(new_skb, XDP_PACKET_HEADROOM);
+
if (rxDataRingUsed) {
size_t sz;
@@ -1620,8 +1812,30 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
}
}
-
skb = ctx->skb;
+
+ if (rcd->sop && rcd->eop) {
+ struct bpf_prog *xdp_prog;
+ int buflen = rbi->len;
+ int act = XDP_PASS;
+
+ xdp_prog = READ_ONCE(rq->xdp_bpf_prog);
+ if (!xdp_prog)
+ goto skip_xdp;
+
+ act = vmxnet3_run_xdp(rq, skb, buflen,
+ &need_xdp_flush);
+ switch (act) {
+ case XDP_PASS:
+ goto skip_xdp;
+ case XDP_DROP:
+ case XDP_TX:
+ case XDP_REDIRECT:
+ default:
+ goto rcd_done;
+ }
+ }
+skip_xdp:
if (rcd->eop) {
u32 mtu = adapter->netdev->mtu;
skb->len += skb->data_len;
@@ -1775,6 +1989,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
rq->comp_ring.gen = VMXNET3_INIT_GEN;
rq->comp_ring.next2proc = 0;
+ rq->xdp_bpf_prog = NULL;
}
@@ -1787,6 +2002,32 @@ vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
}
+static void
+vmxnet3_unregister_xdp_rxq(struct vmxnet3_rx_queue *rq)
+{
+ xdp_rxq_info_unreg_mem_model(&rq->xdp_rxq);
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+}
+
+static int
+vmxnet3_register_xdp_rxq(struct vmxnet3_rx_queue *rq,
+ struct vmxnet3_adapter *adapter)
+{
+ int err;
+
+ err = xdp_rxq_info_reg(&rq->xdp_rxq, adapter->netdev, rq->qid, 0);
+ if (err < 0) {
+ return err;
+ }
+
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_SHARED,
+ NULL);
+ if (err < 0) {
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+ return err;
+ }
+ return 0;
+}
static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
struct vmxnet3_adapter *adapter)
@@ -1831,6 +2072,8 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
kfree(rq->buf_info[0]);
rq->buf_info[0] = NULL;
rq->buf_info[1] = NULL;
+
+ vmxnet3_unregister_xdp_rxq(rq);
}
static void
@@ -1892,6 +2135,10 @@ vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
}
vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
+ /* always register, even if no XDP prog used */
+ if (vmxnet3_register_xdp_rxq(rq, adapter))
+ return -EINVAL;
+
/* reset the comp ring */
rq->comp_ring.next2proc = 0;
memset(rq->comp_ring.base, 0, rq->comp_ring.size *
@@ -2593,7 +2840,8 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
if (adapter->netdev->features & NETIF_F_RXCSUM)
devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
- if (adapter->netdev->features & NETIF_F_LRO) {
+ if (adapter->netdev->features & NETIF_F_LRO &&
+ !adapter->xdp_enabled) {
devRead->misc.uptFeatures |= UPT1_F_LRO;
devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
}
@@ -3033,6 +3281,14 @@ vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
pci_disable_device(adapter->pdev);
}
+static int
+vmxnet3_xdp_headroom(struct vmxnet3_adapter *adapter)
+{
+ if (adapter->xdp_enabled)
+ return VMXNET3_XDP_ROOM;
+ else
+ return 0;
+}
static void
vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
@@ -3043,7 +3299,8 @@ vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
VMXNET3_MAX_ETH_HDR_SIZE) {
adapter->skb_buf_size = adapter->netdev->mtu +
- VMXNET3_MAX_ETH_HDR_SIZE;
+ VMXNET3_MAX_ETH_HDR_SIZE +
+ vmxnet3_xdp_headroom(adapter);
if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
@@ -3564,6 +3821,99 @@ vmxnet3_reset_work(struct work_struct *data)
clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
}
+static void
+vmxnet3_xdp_exchange_program(struct vmxnet3_adapter *adapter,
+ struct bpf_prog *prog)
+{
+ struct vmxnet3_rx_queue *rq;
+ int i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ rq = &adapter->rx_queue[i];
+ WRITE_ONCE(rq->xdp_bpf_prog, prog);
+ }
+ if (prog)
+ adapter->xdp_enabled = true;
+ else
+ adapter->xdp_enabled = false;
+}
+
+static int
+vmxnet3_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf,
+ struct netlink_ext_ack *extack)
+{
+ struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+ struct bpf_prog *new_bpf_prog = bpf->prog;
+ struct bpf_prog *old_bpf_prog;
+ bool use_dataring;
+ bool need_update;
+ bool running;
+ int err = 0;
+
+ if (new_bpf_prog && netdev->mtu > VMXNET3_XDP_MAX_MTU) {
+ NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
+ return -EOPNOTSUPP;
+ }
+
+ use_dataring = VMXNET3_RX_DATA_RING(adapter, 0);
+ if (new_bpf_prog && use_dataring) {
+ NL_SET_ERR_MSG_MOD(extack, "RX data ring not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ old_bpf_prog = READ_ONCE(adapter->rx_queue[0].xdp_bpf_prog);
+ if (!new_bpf_prog && !old_bpf_prog) {
+ adapter->xdp_enabled = false;
+ return 0;
+ }
+ running = netif_running(netdev);
+ need_update = !!old_bpf_prog != !!new_bpf_prog;
+
+ if (running && need_update) {
+ vmxnet3_quiesce_dev(adapter);
+ }
+
+ vmxnet3_xdp_exchange_program(adapter, new_bpf_prog);
+ if (old_bpf_prog) {
+ bpf_prog_put(old_bpf_prog);
+ }
+
+ if (running && need_update) {
+ vmxnet3_reset_dev(adapter);
+ vmxnet3_rq_destroy_all(adapter);
+ vmxnet3_adjust_rx_ring_size(adapter);
+ err = vmxnet3_rq_create_all(adapter);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "failed to re-create rx queues for XDP.");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ err = vmxnet3_activate_dev(adapter);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "failed to activate device for XDP.");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
+ }
+out:
+ return err;
+}
+
+/* This is the main xdp call used by kernel to set/unset eBPF program. */
+static int
+vmxnet3_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+ switch (bpf->command) {
+ case XDP_SETUP_PROG:
+ return vmxnet3_xdp_set(netdev, bpf, bpf->extack);
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
static int
vmxnet3_probe_device(struct pci_dev *pdev,
@@ -3586,6 +3936,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = vmxnet3_netpoll,
#endif
+ .ndo_bpf = vmxnet3_xdp,
};
int err;
u32 ver;
@@ -3901,6 +4252,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
goto err_register;
}
+ adapter->xdp_enabled = false;
vmxnet3_check_link(adapter, false);
return 0;
@@ -106,6 +106,16 @@ vmxnet3_rq_driver_stats[] = {
drop_fcs) },
{ " rx buf alloc fail", offsetof(struct vmxnet3_rq_driver_stats,
rx_buf_alloc_failure) },
+ { " xdp packets", offsetof(struct vmxnet3_rq_driver_stats,
+ xdp_packets) },
+ { " xdp tx", offsetof(struct vmxnet3_rq_driver_stats,
+ xdp_tx) },
+ { " xdp redirects", offsetof(struct vmxnet3_rq_driver_stats,
+ xdp_redirects) },
+ { " xdp drops", offsetof(struct vmxnet3_rq_driver_stats,
+ xdp_drops) },
+ { " xdp aborted", offsetof(struct vmxnet3_rq_driver_stats,
+ xdp_aborted) },
};
/* global stats maintained by the driver */
@@ -56,6 +56,8 @@
#include <linux/if_arp.h>
#include <linux/inetdevice.h>
#include <linux/log2.h>
+#include <linux/bpf.h>
+#include <linux/skbuff.h>
#include "vmxnet3_defs.h"
@@ -285,6 +287,12 @@ struct vmxnet3_rq_driver_stats {
u64 drop_err;
u64 drop_fcs;
u64 rx_buf_alloc_failure;
+
+ u64 xdp_packets; /* Total packets processed by XDP. */
+ u64 xdp_tx;
+ u64 xdp_redirects;
+ u64 xdp_drops;
+ u64 xdp_aborted;
};
struct vmxnet3_rx_data_ring {
@@ -307,6 +315,8 @@ struct vmxnet3_rx_queue {
struct vmxnet3_rx_buf_info *buf_info[2];
struct Vmxnet3_RxQueueCtrl *shared;
struct vmxnet3_rq_driver_stats stats;
+ struct bpf_prog *xdp_bpf_prog;
+ struct xdp_rxq_info xdp_rxq;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define VMXNET3_DEVICE_MAX_TX_QUEUES 32
@@ -415,6 +425,7 @@ struct vmxnet3_adapter {
u16 tx_prod_offset;
u16 rx_prod_offset;
u16 rx_prod2_offset;
+ bool xdp_enabled;
};
#define VMXNET3_WRITE_BAR0_REG(adapter, reg, val) \
@@ -457,6 +468,11 @@ struct vmxnet3_adapter {
#define VMXNET3_MAX_ETH_HDR_SIZE 22
#define VMXNET3_MAX_SKB_BUF_SIZE (3*1024)
+#define VMXNET3_XDP_ROOM SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + \
+ XDP_PACKET_HEADROOM
+#define VMXNET3_XDP_MAX_MTU VMXNET3_MAX_SKB_BUF_SIZE - VMXNET3_XDP_ROOM
+
+
#define VMXNET3_GET_RING_IDX(adapter, rqID) \
((rqID >= adapter->num_rx_queues && \
rqID < 2 * adapter->num_rx_queues) ? 1 : 0) \