diff mbox series

[net-next,v5,3/6] octeontx2-pf: AF_XDP zero copy receive support

Message ID 20250206085034.1978172-4-sumang@marvell.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Add af_xdp support for cn10k | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 15 of 15 maintainers
netdev/build_clang fail Errors and warnings before: 68 this patch: 69
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 20 this patch: 20
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Suman Ghosh Feb. 6, 2025, 8:50 a.m. UTC
This patch adds support to AF_XDP zero copy for CN10K.
This patch specifically adds receive side support. In this approach once
a xdp program with zero copy support on a specific rx queue is enabled,
then that receive quse is disabled/detached from the existing kernel
queue and re-assigned to the umem memory.

Signed-off-by: Suman Ghosh <sumang@marvell.com>
---
 .../ethernet/marvell/octeontx2/nic/Makefile   |   2 +-
 .../ethernet/marvell/octeontx2/nic/cn10k.c    |   6 +-
 .../marvell/octeontx2/nic/otx2_common.c       | 112 ++++++++---
 .../marvell/octeontx2/nic/otx2_common.h       |   6 +-
 .../ethernet/marvell/octeontx2/nic/otx2_pf.c  |  25 ++-
 .../marvell/octeontx2/nic/otx2_txrx.c         |  81 ++++++--
 .../marvell/octeontx2/nic/otx2_txrx.h         |   6 +
 .../ethernet/marvell/octeontx2/nic/otx2_vf.c  |  12 +-
 .../ethernet/marvell/octeontx2/nic/otx2_xsk.c | 182 ++++++++++++++++++
 .../ethernet/marvell/octeontx2/nic/otx2_xsk.h |  21 ++
 .../ethernet/marvell/octeontx2/nic/qos_sq.c   |   2 +-
 11 files changed, 392 insertions(+), 63 deletions(-)
 create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c
 create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.h

Comments

Simon Horman Feb. 10, 2025, 5:56 p.m. UTC | #1
On Thu, Feb 06, 2025 at 02:20:31PM +0530, Suman Ghosh wrote:
> This patch adds support to AF_XDP zero copy for CN10K.
> This patch specifically adds receive side support. In this approach once
> a xdp program with zero copy support on a specific rx queue is enabled,
> then that receive quse is disabled/detached from the existing kernel
> queue and re-assigned to the umem memory.
> 
> Signed-off-by: Suman Ghosh <sumang@marvell.com>

...

> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c

...

> @@ -124,7 +127,8 @@ int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
>  			break;
>  		}
>  		cq->pool_ptrs--;
> -		ptrs[num_ptrs] = (u64)bufptr + OTX2_HEAD_ROOM;
> +		ptrs[num_ptrs] = pool->xsk_pool ? (u64)bufptr : (u64)bufptr + OTX2_HEAD_ROOM;

Please consider limiting lines to 80 columns wide or less in Networking
code where it can be done without reducing readability (subjective to be
sure).

> +
>  		num_ptrs++;
>  		if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) {
>  			__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,

...

> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c

...

> @@ -1312,8 +1326,8 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
>  
>  	/* Free SQB and RQB pointers from the aura pool */
>  	for (pool_id = pool_start; pool_id < pool_end; pool_id++) {
> -		iova = otx2_aura_allocptr(pfvf, pool_id);
>  		pool = &pfvf->qset.pool[pool_id];
> +		iova = otx2_aura_allocptr(pfvf, pool_id);
>  		while (iova) {
>  			if (type == AURA_NIX_RQ)
>  				iova -= OTX2_HEAD_ROOM;

This hunk seems unnecessary.

...

> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c

...

> @@ -529,9 +530,10 @@ static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_p
>  int otx2_napi_handler(struct napi_struct *napi, int budget)
>  {
>  	struct otx2_cq_queue *rx_cq = NULL;
> +	struct otx2_cq_queue *cq = NULL;
>  	struct otx2_cq_poll *cq_poll;
>  	int workdone = 0, cq_idx, i;
> -	struct otx2_cq_queue *cq;
> +	struct otx2_pool *pool;
>  	struct otx2_qset *qset;
>  	struct otx2_nic *pfvf;
>  	int filled_cnt = -1;
> @@ -556,6 +558,7 @@ int otx2_napi_handler(struct napi_struct *napi, int budget)
>  
>  	if (rx_cq && rx_cq->pool_ptrs)
>  		filled_cnt = pfvf->hw_ops->refill_pool_ptrs(pfvf, rx_cq);
> +
>  	/* Clear the IRQ */
>  	otx2_write64(pfvf, NIX_LF_CINTX_INT(cq_poll->cint_idx), BIT_ULL(0));
>  

> @@ -568,20 +571,31 @@ int otx2_napi_handler(struct napi_struct *napi, int budget)
>  		if (pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED)
>  			otx2_adjust_adaptive_coalese(pfvf, cq_poll);
>  
> +		if (likely(cq))
> +			pool = &pfvf->qset.pool[cq->cq_idx];

pool is initialised conditionally here.

> +
>  		if (unlikely(!filled_cnt)) {
>  			struct refill_work *work;
>  			struct delayed_work *dwork;
>  
> -			work = &pfvf->refill_wrk[cq->cq_idx];
> -			dwork = &work->pool_refill_work;
> -			/* Schedule a task if no other task is running */
> -			if (!cq->refill_task_sched) {
> -				work->napi = napi;
> -				cq->refill_task_sched = true;
> -				schedule_delayed_work(dwork,
> -						      msecs_to_jiffies(100));
> +			if (likely(cq)) {

And here it is assumed that the same condition may not be met.

> +				work = &pfvf->refill_wrk[cq->cq_idx];
> +				dwork = &work->pool_refill_work;
> +				/* Schedule a task if no other task is running */
> +				if (!cq->refill_task_sched) {
> +					work->napi = napi;
> +					cq->refill_task_sched = true;
> +					schedule_delayed_work(dwork,
> +							      msecs_to_jiffies(100));
> +				}
>  			}
> +			/* Call for wake-up for not able to fill buffers */
> +			if (pool->xsk_pool)

> +				xsk_set_rx_need_wakeup(pool->xsk_pool);

But here pool is dereferences without being guarded by the same
condition. This seems inconsistent.

>  		} else {
> +			/* Clear wake-up, since buffers are filled successfully */
> +			if (pool->xsk_pool)
> +				xsk_clear_rx_need_wakeup(pool->xsk_pool);

And it is not obvious to me (or Smatch, which flagged this one) that
pool is initialised here.

>  			/* Re-enable interrupts */
>  			otx2_write64(pfvf,
>  				     NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx),

...

> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> index e926c6ce96cf..e43ecfb633f8 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> @@ -722,15 +722,25 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	if (err)
>  		goto err_shutdown_tc;
>  
> +	vf->af_xdp_zc_qidx = bitmap_zalloc(qcount, GFP_KERNEL);
> +	if (!vf->af_xdp_zc_qidx) {
> +		err = -ENOMEM;
> +		goto err_af_xdp_zc;
> +	}
> +
>  #ifdef CONFIG_DCB
>  	err = otx2_dcbnl_set_ops(netdev);
>  	if (err)
> -		goto err_shutdown_tc;
> +		goto err_dcbnl_set_ops;
>  #endif
>  	otx2_qos_init(vf, qos_txqs);
>  
>  	return 0;
>  
> +err_dcbnl_set_ops:
> +	bitmap_free(vf->af_xdp_zc_qidx);
> +err_af_xdp_zc:
> +	otx2_unregister_dl(vf);

Please consider naming the labels above after what they do rather
than where they come from, as seems to be the case for the existing
labels below, and is preferred in Networking code.

>  err_shutdown_tc:
>  	otx2_shutdown_tc(vf);
>  err_unreg_netdev:

> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c

...

> +static int otx2_xsk_ctx_disable(struct otx2_nic *pfvf, u16 qidx, int aura_id)
> +{
> +	struct nix_cn10k_aq_enq_req *cn10k_rq_aq;
> +	struct npa_aq_enq_req *aura_aq;
> +	struct npa_aq_enq_req *pool_aq;
> +	struct nix_aq_enq_req *rq_aq;
> +
> +	if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) {
> +		cn10k_rq_aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
> +		if (!cn10k_rq_aq)
> +			return -ENOMEM;
> +		cn10k_rq_aq->qidx = qidx;
> +		cn10k_rq_aq->rq.ena = 0;
> +		cn10k_rq_aq->rq_mask.ena = 1;
> +		cn10k_rq_aq->ctype = NIX_AQ_CTYPE_RQ;
> +		cn10k_rq_aq->op = NIX_AQ_INSTOP_WRITE;
> +	} else {
> +		rq_aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
> +		if (!rq_aq)
> +			return -ENOMEM;
> +		rq_aq->qidx = qidx;
> +		rq_aq->sq.ena = 0;
> +		rq_aq->sq_mask.ena = 1;
> +		rq_aq->ctype = NIX_AQ_CTYPE_RQ;
> +		rq_aq->op = NIX_AQ_INSTOP_WRITE;
> +	}
> +
> +	aura_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
> +	if (!aura_aq) {
> +		otx2_mbox_reset(&pfvf->mbox.mbox, 0);
> +		return -ENOMEM;

It's not a big deal, but FWIIW I would have used a goto label here.

> +	}
> +
> +	aura_aq->aura_id = aura_id;
> +	aura_aq->aura.ena = 0;
> +	aura_aq->aura_mask.ena = 1;
> +	aura_aq->ctype = NPA_AQ_CTYPE_AURA;
> +	aura_aq->op = NPA_AQ_INSTOP_WRITE;
> +
> +	pool_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
> +	if (!pool_aq) {
> +		otx2_mbox_reset(&pfvf->mbox.mbox, 0);
> +		return -ENOMEM;

And re-used it here.

> +	}
> +
> +	pool_aq->aura_id = aura_id;
> +	pool_aq->pool.ena = 0;
> +	pool_aq->pool_mask.ena = 1;
> +
> +	pool_aq->ctype = NPA_AQ_CTYPE_POOL;
> +	pool_aq->op = NPA_AQ_INSTOP_WRITE;
> +
> +	return otx2_sync_mbox_msg(&pfvf->mbox);
> +}

...
Suman Ghosh Feb. 12, 2025, 7:32 a.m. UTC | #2
>> @@ -124,7 +127,8 @@ int cn10k_refill_pool_ptrs(void *dev, struct
>otx2_cq_queue *cq)
>>  			break;
>>  		}
>>  		cq->pool_ptrs--;
>> -		ptrs[num_ptrs] = (u64)bufptr + OTX2_HEAD_ROOM;
>> +		ptrs[num_ptrs] = pool->xsk_pool ? (u64)bufptr : (u64)bufptr +
>> +OTX2_HEAD_ROOM;
>
>Please consider limiting lines to 80 columns wide or less in Networking
>code where it can be done without reducing readability (subjective to be
>sure).
[Suman] ack
>
>> +
>>  		num_ptrs++;
>>  		if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) {
>>  			__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
>
>...
>
>> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
>> b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
>
>...
>
>> @@ -1312,8 +1326,8 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf,
>> int type)
>>
>>  	/* Free SQB and RQB pointers from the aura pool */
>>  	for (pool_id = pool_start; pool_id < pool_end; pool_id++) {
>> -		iova = otx2_aura_allocptr(pfvf, pool_id);
>>  		pool = &pfvf->qset.pool[pool_id];
>> +		iova = otx2_aura_allocptr(pfvf, pool_id);
>>  		while (iova) {
>>  			if (type == AURA_NIX_RQ)
>>  				iova -= OTX2_HEAD_ROOM;
>
>This hunk seems unnecessary.
[Suman] ack
>
>...
>
>> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
>> b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
>
>...
>
>> @@ -529,9 +530,10 @@ static void otx2_adjust_adaptive_coalese(struct
>> otx2_nic *pfvf, struct otx2_cq_p  int otx2_napi_handler(struct
>> napi_struct *napi, int budget)  {
>>  	struct otx2_cq_queue *rx_cq = NULL;
>> +	struct otx2_cq_queue *cq = NULL;
>>  	struct otx2_cq_poll *cq_poll;
>>  	int workdone = 0, cq_idx, i;
>> -	struct otx2_cq_queue *cq;
>> +	struct otx2_pool *pool;
>>  	struct otx2_qset *qset;
>>  	struct otx2_nic *pfvf;
>>  	int filled_cnt = -1;
>> @@ -556,6 +558,7 @@ int otx2_napi_handler(struct napi_struct *napi,
>> int budget)
>>
>>  	if (rx_cq && rx_cq->pool_ptrs)
>>  		filled_cnt = pfvf->hw_ops->refill_pool_ptrs(pfvf, rx_cq);
>> +
>>  	/* Clear the IRQ */
>>  	otx2_write64(pfvf, NIX_LF_CINTX_INT(cq_poll->cint_idx),
>BIT_ULL(0));
>>
>
>> @@ -568,20 +571,31 @@ int otx2_napi_handler(struct napi_struct *napi,
>int budget)
>>  		if (pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED)
>>  			otx2_adjust_adaptive_coalese(pfvf, cq_poll);
>>
>> +		if (likely(cq))
>> +			pool = &pfvf->qset.pool[cq->cq_idx];
>
>pool is initialised conditionally here.
[Suman] ack, will update in v6
>
>> +
>>  		if (unlikely(!filled_cnt)) {
>>  			struct refill_work *work;
>>  			struct delayed_work *dwork;
>>
>> -			work = &pfvf->refill_wrk[cq->cq_idx];
>> -			dwork = &work->pool_refill_work;
>> -			/* Schedule a task if no other task is running */
>> -			if (!cq->refill_task_sched) {
>> -				work->napi = napi;
>> -				cq->refill_task_sched = true;
>> -				schedule_delayed_work(dwork,
>> -						      msecs_to_jiffies(100));
>> +			if (likely(cq)) {
>
>And here it is assumed that the same condition may not be met.
[Suman] ack, will update in v6
>
>> +				work = &pfvf->refill_wrk[cq->cq_idx];
>> +				dwork = &work->pool_refill_work;
>> +				/* Schedule a task if no other task is running */
>> +				if (!cq->refill_task_sched) {
>> +					work->napi = napi;
>> +					cq->refill_task_sched = true;
>> +					schedule_delayed_work(dwork,
>> +							      msecs_to_jiffies(100));
>> +				}
>>  			}
>> +			/* Call for wake-up for not able to fill buffers */
>> +			if (pool->xsk_pool)
>
>> +				xsk_set_rx_need_wakeup(pool->xsk_pool);
>
>But here pool is dereferences without being guarded by the same
>condition. This seems inconsistent.
[Suman] ack, will update in v6
>
>>  		} else {
>> +			/* Clear wake-up, since buffers are filled successfully
>*/
>> +			if (pool->xsk_pool)
>> +				xsk_clear_rx_need_wakeup(pool->xsk_pool);
>
>And it is not obvious to me (or Smatch, which flagged this one) that
>pool is initialised here.
[Suman] ack, will update in v6
>
>>  			/* Re-enable interrupts */
>>  			otx2_write64(pfvf,
>>  				     NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx),
>
>...
>
>> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
>> b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
>> index e926c6ce96cf..e43ecfb633f8 100644
>> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
>> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
>> @@ -722,15 +722,25 @@ static int otx2vf_probe(struct pci_dev *pdev,
>const struct pci_device_id *id)
>>  	if (err)
>>  		goto err_shutdown_tc;
>>
>> +	vf->af_xdp_zc_qidx = bitmap_zalloc(qcount, GFP_KERNEL);
>> +	if (!vf->af_xdp_zc_qidx) {
>> +		err = -ENOMEM;
>> +		goto err_af_xdp_zc;
>> +	}
>> +
>>  #ifdef CONFIG_DCB
>>  	err = otx2_dcbnl_set_ops(netdev);
>>  	if (err)
>> -		goto err_shutdown_tc;
>> +		goto err_dcbnl_set_ops;
>>  #endif
>>  	otx2_qos_init(vf, qos_txqs);
>>
>>  	return 0;
>>
>> +err_dcbnl_set_ops:
>> +	bitmap_free(vf->af_xdp_zc_qidx);
>> +err_af_xdp_zc:
>> +	otx2_unregister_dl(vf);
>
>Please consider naming the labels above after what they do rather than
>where they come from, as seems to be the case for the existing labels
>below, and is preferred in Networking code.
[Suman] ack, will update in v6
>
>>  err_shutdown_tc:
>>  	otx2_shutdown_tc(vf);
>>  err_unreg_netdev:
>
>> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c
>> b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c
>
>...
>
>> +static int otx2_xsk_ctx_disable(struct otx2_nic *pfvf, u16 qidx, int
>> +aura_id) {
>> +	struct nix_cn10k_aq_enq_req *cn10k_rq_aq;
>> +	struct npa_aq_enq_req *aura_aq;
>> +	struct npa_aq_enq_req *pool_aq;
>> +	struct nix_aq_enq_req *rq_aq;
>> +
>> +	if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) {
>> +		cn10k_rq_aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf-
>>mbox);
>> +		if (!cn10k_rq_aq)
>> +			return -ENOMEM;
>> +		cn10k_rq_aq->qidx = qidx;
>> +		cn10k_rq_aq->rq.ena = 0;
>> +		cn10k_rq_aq->rq_mask.ena = 1;
>> +		cn10k_rq_aq->ctype = NIX_AQ_CTYPE_RQ;
>> +		cn10k_rq_aq->op = NIX_AQ_INSTOP_WRITE;
>> +	} else {
>> +		rq_aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
>> +		if (!rq_aq)
>> +			return -ENOMEM;
>> +		rq_aq->qidx = qidx;
>> +		rq_aq->sq.ena = 0;
>> +		rq_aq->sq_mask.ena = 1;
>> +		rq_aq->ctype = NIX_AQ_CTYPE_RQ;
>> +		rq_aq->op = NIX_AQ_INSTOP_WRITE;
>> +	}
>> +
>> +	aura_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
>> +	if (!aura_aq) {
>> +		otx2_mbox_reset(&pfvf->mbox.mbox, 0);
>> +		return -ENOMEM;
>
>It's not a big deal, but FWIIW I would have used a goto label here.
[Suman] ack
>
>> +	}
>> +
>> +	aura_aq->aura_id = aura_id;
>> +	aura_aq->aura.ena = 0;
>> +	aura_aq->aura_mask.ena = 1;
>> +	aura_aq->ctype = NPA_AQ_CTYPE_AURA;
>> +	aura_aq->op = NPA_AQ_INSTOP_WRITE;
>> +
>> +	pool_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
>> +	if (!pool_aq) {
>> +		otx2_mbox_reset(&pfvf->mbox.mbox, 0);
>> +		return -ENOMEM;
>
>And re-used it here.
[Suman] ack
>
>> +	}
>> +
>> +	pool_aq->aura_id = aura_id;
>> +	pool_aq->pool.ena = 0;
>> +	pool_aq->pool_mask.ena = 1;
>> +
>> +	pool_aq->ctype = NPA_AQ_CTYPE_POOL;
>> +	pool_aq->op = NPA_AQ_INSTOP_WRITE;
>> +
>> +	return otx2_sync_mbox_msg(&pfvf->mbox); }
>
>...
diff mbox series

Patch

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
index cb6513ab35e7..69e0778f9ac1 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
@@ -9,7 +9,7 @@  obj-$(CONFIG_RVU_ESWITCH) += rvu_rep.o
 
 rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
                otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \
-               otx2_devlink.o qos_sq.o qos.o
+               otx2_devlink.o qos_sq.o qos.o otx2_xsk.o
 rvu_nicvf-y := otx2_vf.o
 rvu_rep-y := rep.o
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
index a15cc86635d6..9a2865c60850 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
@@ -112,9 +112,12 @@  int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
 	struct otx2_nic *pfvf = dev;
 	int cnt = cq->pool_ptrs;
 	u64 ptrs[NPA_MAX_BURST];
+	struct otx2_pool *pool;
 	dma_addr_t bufptr;
 	int num_ptrs = 1;
 
+	pool = &pfvf->qset.pool[cq->cq_idx];
+
 	/* Refill pool with new buffers */
 	while (cq->pool_ptrs) {
 		if (otx2_alloc_buffer(pfvf, cq, &bufptr)) {
@@ -124,7 +127,8 @@  int cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
 			break;
 		}
 		cq->pool_ptrs--;
-		ptrs[num_ptrs] = (u64)bufptr + OTX2_HEAD_ROOM;
+		ptrs[num_ptrs] = pool->xsk_pool ? (u64)bufptr : (u64)bufptr + OTX2_HEAD_ROOM;
+
 		num_ptrs++;
 		if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) {
 			__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 161cf33ef89e..b31eccb03cc3 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -17,6 +17,7 @@ 
 #include "otx2_common.h"
 #include "otx2_struct.h"
 #include "cn10k.h"
+#include "otx2_xsk.h"
 
 static bool otx2_is_pfc_enabled(struct otx2_nic *pfvf)
 {
@@ -549,10 +550,13 @@  static int otx2_alloc_pool_buf(struct otx2_nic *pfvf, struct otx2_pool *pool,
 }
 
 static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
-			     dma_addr_t *dma)
+			     dma_addr_t *dma, int qidx, int idx)
 {
 	u8 *buf;
 
+	if (pool->xsk_pool)
+		return otx2_xsk_pool_alloc_buf(pfvf, pool, dma, idx);
+
 	if (pool->page_pool)
 		return otx2_alloc_pool_buf(pfvf, pool, dma);
 
@@ -571,12 +575,12 @@  static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
 }
 
 int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
-		    dma_addr_t *dma)
+		    dma_addr_t *dma, int qidx, int idx)
 {
 	int ret;
 
 	local_bh_disable();
-	ret = __otx2_alloc_rbuf(pfvf, pool, dma);
+	ret = __otx2_alloc_rbuf(pfvf, pool, dma, qidx, idx);
 	local_bh_enable();
 	return ret;
 }
@@ -584,7 +588,8 @@  int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
 int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq,
 		      dma_addr_t *dma)
 {
-	if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma)))
+	if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma,
+				       cq->cq_idx, cq->pool_ptrs - 1)))
 		return -ENOMEM;
 	return 0;
 }
@@ -884,7 +889,7 @@  void otx2_sqb_flush(struct otx2_nic *pfvf)
 #define RQ_PASS_LVL_AURA (255 - ((95 * 256) / 100)) /* RED when 95% is full */
 #define RQ_DROP_LVL_AURA (255 - ((99 * 256) / 100)) /* Drop when 99% is full */
 
-static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
+int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
 {
 	struct otx2_qset *qset = &pfvf->qset;
 	struct nix_aq_enq_req *aq;
@@ -1041,7 +1046,7 @@  int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
 
 }
 
-static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
+int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
 {
 	struct otx2_qset *qset = &pfvf->qset;
 	int err, pool_id, non_xdp_queues;
@@ -1057,11 +1062,18 @@  static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
 		cq->cint_idx = qidx;
 		cq->cqe_cnt = qset->rqe_cnt;
 		if (pfvf->xdp_prog) {
-			pool = &qset->pool[qidx];
 			xdp_rxq_info_reg(&cq->xdp_rxq, pfvf->netdev, qidx, 0);
-			xdp_rxq_info_reg_mem_model(&cq->xdp_rxq,
-						   MEM_TYPE_PAGE_POOL,
-						   pool->page_pool);
+			pool = &qset->pool[qidx];
+			if (pool->xsk_pool) {
+				xdp_rxq_info_reg_mem_model(&cq->xdp_rxq,
+							   MEM_TYPE_XSK_BUFF_POOL,
+							   NULL);
+				xsk_pool_set_rxq_info(pool->xsk_pool, &cq->xdp_rxq);
+			} else if (pool->page_pool) {
+				xdp_rxq_info_reg_mem_model(&cq->xdp_rxq,
+							   MEM_TYPE_PAGE_POOL,
+							   pool->page_pool);
+			}
 		}
 	} else if (qidx < non_xdp_queues) {
 		cq->cq_type = CQ_TX;
@@ -1281,9 +1293,10 @@  void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool,
 
 	pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
 	page = virt_to_head_page(phys_to_virt(pa));
-
 	if (pool->page_pool) {
 		page_pool_put_full_page(pool->page_pool, page, true);
+	} else if (pool->xsk_pool) {
+		/* Note: No way of identifying xdp_buff */
 	} else {
 		dma_unmap_page_attrs(pfvf->dev, iova, size,
 				     DMA_FROM_DEVICE,
@@ -1298,6 +1311,7 @@  void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
 	int pool_id, pool_start = 0, pool_end = 0, size = 0;
 	struct otx2_pool *pool;
 	u64 iova;
+	int idx;
 
 	if (type == AURA_NIX_SQ) {
 		pool_start = otx2_get_pool_idx(pfvf, type, 0);
@@ -1312,8 +1326,8 @@  void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
 
 	/* Free SQB and RQB pointers from the aura pool */
 	for (pool_id = pool_start; pool_id < pool_end; pool_id++) {
-		iova = otx2_aura_allocptr(pfvf, pool_id);
 		pool = &pfvf->qset.pool[pool_id];
+		iova = otx2_aura_allocptr(pfvf, pool_id);
 		while (iova) {
 			if (type == AURA_NIX_RQ)
 				iova -= OTX2_HEAD_ROOM;
@@ -1322,6 +1336,13 @@  void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
 
 			iova = otx2_aura_allocptr(pfvf, pool_id);
 		}
+
+		for (idx = 0 ; idx < pool->xdp_cnt; idx++) {
+			if (!pool->xdp[idx])
+				continue;
+
+			xsk_buff_free(pool->xdp[idx]);
+		}
 	}
 }
 
@@ -1338,7 +1359,8 @@  void otx2_aura_pool_free(struct otx2_nic *pfvf)
 		qmem_free(pfvf->dev, pool->stack);
 		qmem_free(pfvf->dev, pool->fc_addr);
 		page_pool_destroy(pool->page_pool);
-		pool->page_pool = NULL;
+		devm_kfree(pfvf->dev, pool->xdp);
+		pool->xsk_pool = NULL;
 	}
 	devm_kfree(pfvf->dev, pfvf->qset.pool);
 	pfvf->qset.pool = NULL;
@@ -1425,6 +1447,7 @@  int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
 		   int stack_pages, int numptrs, int buf_size, int type)
 {
 	struct page_pool_params pp_params = { 0 };
+	struct xsk_buff_pool *xsk_pool;
 	struct npa_aq_enq_req *aq;
 	struct otx2_pool *pool;
 	int err;
@@ -1468,21 +1491,35 @@  int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
 	aq->ctype = NPA_AQ_CTYPE_POOL;
 	aq->op = NPA_AQ_INSTOP_INIT;
 
-	if (type != AURA_NIX_RQ) {
-		pool->page_pool = NULL;
+	if (type != AURA_NIX_RQ)
+		return 0;
+
+	if (!test_bit(pool_id, pfvf->af_xdp_zc_qidx)) {
+		pp_params.order = get_order(buf_size);
+		pp_params.flags = PP_FLAG_DMA_MAP;
+		pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs);
+		pp_params.nid = NUMA_NO_NODE;
+		pp_params.dev = pfvf->dev;
+		pp_params.dma_dir = DMA_FROM_DEVICE;
+		pool->page_pool = page_pool_create(&pp_params);
+		if (IS_ERR(pool->page_pool)) {
+			netdev_err(pfvf->netdev, "Creation of page pool failed\n");
+			return PTR_ERR(pool->page_pool);
+		}
 		return 0;
 	}
 
-	pp_params.order = get_order(buf_size);
-	pp_params.flags = PP_FLAG_DMA_MAP;
-	pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs);
-	pp_params.nid = NUMA_NO_NODE;
-	pp_params.dev = pfvf->dev;
-	pp_params.dma_dir = DMA_FROM_DEVICE;
-	pool->page_pool = page_pool_create(&pp_params);
-	if (IS_ERR(pool->page_pool)) {
-		netdev_err(pfvf->netdev, "Creation of page pool failed\n");
-		return PTR_ERR(pool->page_pool);
+	/* Set XSK pool to support AF_XDP zero-copy */
+	xsk_pool = xsk_get_pool_from_qid(pfvf->netdev, pool_id);
+	if (xsk_pool) {
+		pool->xsk_pool = xsk_pool;
+		pool->xdp_cnt = numptrs;
+		pool->xdp = devm_kcalloc(pfvf->dev,
+					 numptrs, sizeof(struct xdp_buff *), GFP_KERNEL);
+		if (IS_ERR(pool->xdp)) {
+			netdev_err(pfvf->netdev, "Creation of xsk pool failed\n");
+			return PTR_ERR(pool->xdp);
+		}
 	}
 
 	return 0;
@@ -1543,9 +1580,18 @@  int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
 		}
 
 		for (ptr = 0; ptr < num_sqbs; ptr++) {
-			err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
-			if (err)
+			err = otx2_alloc_rbuf(pfvf, pool, &bufptr, pool_id, ptr);
+			if (err) {
+				if (pool->xsk_pool) {
+					ptr--;
+					while (ptr >= 0) {
+						xsk_buff_free(pool->xdp[ptr]);
+						ptr--;
+					}
+				}
 				goto err_mem;
+			}
+
 			pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr);
 			sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
 		}
@@ -1595,11 +1641,19 @@  int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
 	/* Allocate pointers and free them to aura/pool */
 	for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
 		pool = &pfvf->qset.pool[pool_id];
+
 		for (ptr = 0; ptr < num_ptrs; ptr++) {
-			err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
-			if (err)
+			err = otx2_alloc_rbuf(pfvf, pool, &bufptr, pool_id, ptr);
+			if (err) {
+				if (pool->xsk_pool) {
+					while (ptr)
+						xsk_buff_free(pool->xdp[--ptr]);
+				}
 				return -ENOMEM;
+			}
+
 			pfvf->hw_ops->aura_freeptr(pfvf, pool_id,
+						   pool->xsk_pool ? bufptr :
 						   bufptr + OTX2_HEAD_ROOM);
 		}
 	}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index d5fbccb289df..60508971b62f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -532,6 +532,8 @@  struct otx2_nic {
 
 	/* Inline ipsec */
 	struct cn10k_ipsec	ipsec;
+	/* af_xdp zero-copy */
+	unsigned long		*af_xdp_zc_qidx;
 };
 
 static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
@@ -1003,7 +1005,7 @@  void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq);
 void otx2_free_pending_sqe(struct otx2_nic *pfvf);
 void otx2_sqb_flush(struct otx2_nic *pfvf);
 int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
-		    dma_addr_t *dma);
+		    dma_addr_t *dma, int qidx, int idx);
 int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable);
 void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
 int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable);
@@ -1033,6 +1035,8 @@  void otx2_pfaf_mbox_destroy(struct otx2_nic *pf);
 void otx2_disable_mbox_intr(struct otx2_nic *pf);
 void otx2_disable_napi(struct otx2_nic *pf);
 irqreturn_t otx2_cq_intr_handler(int irq, void *cq_irq);
+int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura);
+int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx);
 
 /* RSS configuration APIs*/
 int otx2_rss_init(struct otx2_nic *pfvf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 4347a3c95350..188ab6b6fb16 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -27,6 +27,7 @@ 
 #include "qos.h"
 #include <rvu_trace.h>
 #include "cn10k_ipsec.h"
+#include "otx2_xsk.h"
 
 #define DRV_NAME	"rvu_nicpf"
 #define DRV_STRING	"Marvell RVU NIC Physical Function Driver"
@@ -1662,9 +1663,7 @@  void otx2_free_hw_resources(struct otx2_nic *pf)
 	struct nix_lf_free_req *free_req;
 	struct mbox *mbox = &pf->mbox;
 	struct otx2_cq_queue *cq;
-	struct otx2_pool *pool;
 	struct msg_req *req;
-	int pool_id;
 	int qidx;
 
 	/* Ensure all SQE are processed */
@@ -1705,13 +1704,6 @@  void otx2_free_hw_resources(struct otx2_nic *pf)
 	/* Free RQ buffer pointers*/
 	otx2_free_aura_ptr(pf, AURA_NIX_RQ);
 
-	for (qidx = 0; qidx < pf->hw.rx_queues; qidx++) {
-		pool_id = otx2_get_pool_idx(pf, AURA_NIX_RQ, qidx);
-		pool = &pf->qset.pool[pool_id];
-		page_pool_destroy(pool->page_pool);
-		pool->page_pool = NULL;
-	}
-
 	otx2_free_cq_res(pf);
 
 	/* Free all ingress bandwidth profiles allocated */
@@ -2788,6 +2780,8 @@  static int otx2_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
 		return otx2_xdp_setup(pf, xdp->prog);
+	case XDP_SETUP_XSK_POOL:
+		return otx2_xsk_pool_setup(pf, xdp->xsk.pool, xdp->xsk.queue_id);
 	default:
 		return -EINVAL;
 	}
@@ -2865,6 +2859,7 @@  static const struct net_device_ops otx2_netdev_ops = {
 	.ndo_set_vf_vlan	= otx2_set_vf_vlan,
 	.ndo_get_vf_config	= otx2_get_vf_config,
 	.ndo_bpf		= otx2_xdp,
+	.ndo_xsk_wakeup		= otx2_xsk_wakeup,
 	.ndo_xdp_xmit           = otx2_xdp_xmit,
 	.ndo_setup_tc		= otx2_setup_tc,
 	.ndo_set_vf_trust	= otx2_ndo_set_vf_trust,
@@ -3203,16 +3198,26 @@  static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	/* Enable link notifications */
 	otx2_cgx_config_linkevents(pf, true);
 
+	pf->af_xdp_zc_qidx = bitmap_zalloc(qcount, GFP_KERNEL);
+	if (!pf->af_xdp_zc_qidx) {
+		err = -ENOMEM;
+		goto err_af_xdp_zc;
+	}
+
 #ifdef CONFIG_DCB
 	err = otx2_dcbnl_set_ops(netdev);
 	if (err)
-		goto err_pf_sriov_init;
+		goto err_dcbnl_set_ops;
 #endif
 
 	otx2_qos_init(pf, qos_txqs);
 
 	return 0;
 
+err_dcbnl_set_ops:
+	bitmap_free(pf->af_xdp_zc_qidx);
+err_af_xdp_zc:
+	otx2_sriov_vfcfg_cleanup(pf);
 err_pf_sriov_init:
 	otx2_shutdown_tc(pf);
 err_mcam_flow_del:
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index d46f05993d3f..44137160bdf6 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -12,6 +12,7 @@ 
 #include <linux/bpf_trace.h>
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
+#include <net/xdp.h>
 
 #include "otx2_reg.h"
 #include "otx2_common.h"
@@ -529,9 +530,10 @@  static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_p
 int otx2_napi_handler(struct napi_struct *napi, int budget)
 {
 	struct otx2_cq_queue *rx_cq = NULL;
+	struct otx2_cq_queue *cq = NULL;
 	struct otx2_cq_poll *cq_poll;
 	int workdone = 0, cq_idx, i;
-	struct otx2_cq_queue *cq;
+	struct otx2_pool *pool;
 	struct otx2_qset *qset;
 	struct otx2_nic *pfvf;
 	int filled_cnt = -1;
@@ -556,6 +558,7 @@  int otx2_napi_handler(struct napi_struct *napi, int budget)
 
 	if (rx_cq && rx_cq->pool_ptrs)
 		filled_cnt = pfvf->hw_ops->refill_pool_ptrs(pfvf, rx_cq);
+
 	/* Clear the IRQ */
 	otx2_write64(pfvf, NIX_LF_CINTX_INT(cq_poll->cint_idx), BIT_ULL(0));
 
@@ -568,20 +571,31 @@  int otx2_napi_handler(struct napi_struct *napi, int budget)
 		if (pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED)
 			otx2_adjust_adaptive_coalese(pfvf, cq_poll);
 
+		if (likely(cq))
+			pool = &pfvf->qset.pool[cq->cq_idx];
+
 		if (unlikely(!filled_cnt)) {
 			struct refill_work *work;
 			struct delayed_work *dwork;
 
-			work = &pfvf->refill_wrk[cq->cq_idx];
-			dwork = &work->pool_refill_work;
-			/* Schedule a task if no other task is running */
-			if (!cq->refill_task_sched) {
-				work->napi = napi;
-				cq->refill_task_sched = true;
-				schedule_delayed_work(dwork,
-						      msecs_to_jiffies(100));
+			if (likely(cq)) {
+				work = &pfvf->refill_wrk[cq->cq_idx];
+				dwork = &work->pool_refill_work;
+				/* Schedule a task if no other task is running */
+				if (!cq->refill_task_sched) {
+					work->napi = napi;
+					cq->refill_task_sched = true;
+					schedule_delayed_work(dwork,
+							      msecs_to_jiffies(100));
+				}
 			}
+			/* Call for wake-up for not able to fill buffers */
+			if (pool->xsk_pool)
+				xsk_set_rx_need_wakeup(pool->xsk_pool);
 		} else {
+			/* Clear wake-up, since buffers are filled successfully */
+			if (pool->xsk_pool)
+				xsk_clear_rx_need_wakeup(pool->xsk_pool);
 			/* Re-enable interrupts */
 			otx2_write64(pfvf,
 				     NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx),
@@ -1232,15 +1246,19 @@  void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int q
 	u16 pool_id;
 	u64 iova;
 
-	if (pfvf->xdp_prog)
+	pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx);
+	pool = &pfvf->qset.pool[pool_id];
+
+	if (pfvf->xdp_prog) {
+		if (pool->page_pool)
+			xdp_rxq_info_unreg_mem_model(&cq->xdp_rxq);
+
 		xdp_rxq_info_unreg(&cq->xdp_rxq);
+	}
 
 	if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
 		return;
 
-	pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx);
-	pool = &pfvf->qset.pool[pool_id];
-
 	while (cq->pend_cqe) {
 		cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq);
 		processed_cqe++;
@@ -1424,17 +1442,28 @@  static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
 				     struct otx2_cq_queue *cq,
 				     bool *need_xdp_flush)
 {
+	struct xdp_buff xdp, *xsk_buff = NULL;
 	unsigned char *hard_start;
 	struct otx2_pool *pool;
 	struct xdp_frame *xdpf;
 	int qidx = cq->cq_idx;
-	struct xdp_buff xdp;
 	struct page *page;
 	u64 iova, pa;
 	u32 act;
 	int err;
 
 	pool = &pfvf->qset.pool[qidx];
+
+	if (pool->xsk_pool) {
+		xsk_buff = pool->xdp[--cq->rbpool->xdp_top];
+		if (!xsk_buff)
+			return false;
+
+		xsk_buff->data_end = xsk_buff->data + cqe->sg.seg_size;
+		act = bpf_prog_run_xdp(prog, xsk_buff);
+		goto handle_xdp_verdict;
+	}
+
 	iova = cqe->sg.seg_addr - OTX2_HEAD_ROOM;
 	pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
 	page = virt_to_page(phys_to_virt(pa));
@@ -1447,6 +1476,7 @@  static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
 
 	act = bpf_prog_run_xdp(prog, &xdp);
 
+handle_xdp_verdict:
 	switch (act) {
 	case XDP_PASS:
 		break;
@@ -1458,6 +1488,15 @@  static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
 					      cqe->sg.seg_size, qidx, XDP_TX);
 	case XDP_REDIRECT:
 		cq->pool_ptrs++;
+		if (xsk_buff) {
+			err = xdp_do_redirect(pfvf->netdev, xsk_buff, prog);
+			if (!err) {
+				*need_xdp_flush = true;
+				return true;
+			}
+			return false;
+		}
+
 		err = xdp_do_redirect(pfvf->netdev, &xdp, prog);
 		if (!err) {
 			*need_xdp_flush = true;
@@ -1473,17 +1512,21 @@  static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
 		bpf_warn_invalid_xdp_action(pfvf->netdev, prog, act);
 		break;
 	case XDP_ABORTED:
+		if (xsk_buff)
+			xsk_buff_free(xsk_buff);
 		trace_xdp_exception(pfvf->netdev, prog, act);
 		break;
 	case XDP_DROP:
 		cq->pool_ptrs++;
-		if (page->pp) {
+		if (xsk_buff) {
+			xsk_buff_free(xsk_buff);
+		} else if (page->pp) {
 			page_pool_recycle_direct(pool->page_pool, page);
-			return true;
+		} else {
+			otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize,
+					    DMA_FROM_DEVICE);
+			put_page(page);
 		}
-		otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize,
-				    DMA_FROM_DEVICE);
-		put_page(page);
 		return true;
 	}
 	return false;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
index 92e1e84cad75..8f346fbc8221 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -12,6 +12,7 @@ 
 #include <linux/iommu.h>
 #include <linux/if_vlan.h>
 #include <net/xdp.h>
+#include <net/xdp_sock_drv.h>
 
 #define LBK_CHAN_BASE	0x000
 #define SDP_CHAN_BASE	0x700
@@ -128,7 +129,11 @@  struct otx2_pool {
 	struct qmem		*stack;
 	struct qmem		*fc_addr;
 	struct page_pool	*page_pool;
+	struct xsk_buff_pool	*xsk_pool;
+	struct xdp_buff		**xdp;
+	u16			xdp_cnt;
 	u16			rbsize;
+	u16			xdp_top;
 };
 
 struct otx2_cq_queue {
@@ -145,6 +150,7 @@  struct otx2_cq_queue {
 	void			*cqe_base;
 	struct qmem		*cqe;
 	struct otx2_pool	*rbpool;
+	bool			xsk_zc_en;
 	struct xdp_rxq_info xdp_rxq;
 } ____cacheline_aligned_in_smp;
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index e926c6ce96cf..e43ecfb633f8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -722,15 +722,25 @@  static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (err)
 		goto err_shutdown_tc;
 
+	vf->af_xdp_zc_qidx = bitmap_zalloc(qcount, GFP_KERNEL);
+	if (!vf->af_xdp_zc_qidx) {
+		err = -ENOMEM;
+		goto err_af_xdp_zc;
+	}
+
 #ifdef CONFIG_DCB
 	err = otx2_dcbnl_set_ops(netdev);
 	if (err)
-		goto err_shutdown_tc;
+		goto err_dcbnl_set_ops;
 #endif
 	otx2_qos_init(vf, qos_txqs);
 
 	return 0;
 
+err_dcbnl_set_ops:
+	bitmap_free(vf->af_xdp_zc_qidx);
+err_af_xdp_zc:
+	otx2_unregister_dl(vf);
 err_shutdown_tc:
 	otx2_shutdown_tc(vf);
 err_unreg_netdev:
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c
new file mode 100644
index 000000000000..69098c6a6fed
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c
@@ -0,0 +1,182 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2024 Marvell.
+ *
+ */
+
+#include <linux/bpf_trace.h>
+#include <linux/stringify.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xdp.h>
+
+#include "otx2_common.h"
+#include "otx2_xsk.h"
+
+int otx2_xsk_pool_alloc_buf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+			    dma_addr_t *dma, int idx)
+{
+	struct xdp_buff *xdp;
+	int delta;
+
+	xdp = xsk_buff_alloc(pool->xsk_pool);
+	if (!xdp)
+		return -ENOMEM;
+
+	pool->xdp[pool->xdp_top++] = xdp;
+	*dma = OTX2_DATA_ALIGN(xsk_buff_xdp_get_dma(xdp));
+	/* Adjust xdp->data for unaligned addresses */
+	delta = *dma - xsk_buff_xdp_get_dma(xdp);
+	xdp->data += delta;
+
+	return 0;
+}
+
+static int otx2_xsk_ctx_disable(struct otx2_nic *pfvf, u16 qidx, int aura_id)
+{
+	struct nix_cn10k_aq_enq_req *cn10k_rq_aq;
+	struct npa_aq_enq_req *aura_aq;
+	struct npa_aq_enq_req *pool_aq;
+	struct nix_aq_enq_req *rq_aq;
+
+	if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) {
+		cn10k_rq_aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
+		if (!cn10k_rq_aq)
+			return -ENOMEM;
+		cn10k_rq_aq->qidx = qidx;
+		cn10k_rq_aq->rq.ena = 0;
+		cn10k_rq_aq->rq_mask.ena = 1;
+		cn10k_rq_aq->ctype = NIX_AQ_CTYPE_RQ;
+		cn10k_rq_aq->op = NIX_AQ_INSTOP_WRITE;
+	} else {
+		rq_aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
+		if (!rq_aq)
+			return -ENOMEM;
+		rq_aq->qidx = qidx;
+		rq_aq->sq.ena = 0;
+		rq_aq->sq_mask.ena = 1;
+		rq_aq->ctype = NIX_AQ_CTYPE_RQ;
+		rq_aq->op = NIX_AQ_INSTOP_WRITE;
+	}
+
+	aura_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+	if (!aura_aq) {
+		otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+		return -ENOMEM;
+	}
+
+	aura_aq->aura_id = aura_id;
+	aura_aq->aura.ena = 0;
+	aura_aq->aura_mask.ena = 1;
+	aura_aq->ctype = NPA_AQ_CTYPE_AURA;
+	aura_aq->op = NPA_AQ_INSTOP_WRITE;
+
+	pool_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+	if (!pool_aq) {
+		otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+		return -ENOMEM;
+	}
+
+	pool_aq->aura_id = aura_id;
+	pool_aq->pool.ena = 0;
+	pool_aq->pool_mask.ena = 1;
+
+	pool_aq->ctype = NPA_AQ_CTYPE_POOL;
+	pool_aq->op = NPA_AQ_INSTOP_WRITE;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+static void otx2_clean_up_rq(struct otx2_nic *pfvf, int qidx)
+{
+	struct otx2_qset *qset = &pfvf->qset;
+	struct otx2_cq_queue *cq;
+	struct otx2_pool *pool;
+	u64 iova;
+
+	/* If the DOWN flag is set SQs are already freed */
+	if (pfvf->flags & OTX2_FLAG_INTF_DOWN)
+		return;
+
+	cq = &qset->cq[qidx];
+	if (cq)
+		otx2_cleanup_rx_cqes(pfvf, cq, qidx);
+
+	pool = &pfvf->qset.pool[qidx];
+	iova = otx2_aura_allocptr(pfvf, qidx);
+	while (iova) {
+		iova -= OTX2_HEAD_ROOM;
+		otx2_free_bufs(pfvf, pool, iova, pfvf->rbsize);
+		iova = otx2_aura_allocptr(pfvf, qidx);
+	}
+
+	mutex_lock(&pfvf->mbox.lock);
+	otx2_xsk_ctx_disable(pfvf, qidx, qidx);
+	mutex_unlock(&pfvf->mbox.lock);
+}
+
+int otx2_xsk_pool_enable(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qidx)
+{
+	u16 rx_queues = pf->hw.rx_queues;
+	u16 tx_queues = pf->hw.tx_queues;
+	int err;
+
+	if (qidx >= rx_queues || qidx >= tx_queues)
+		return -EINVAL;
+
+	err = xsk_pool_dma_map(pool, pf->dev, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+	if (err)
+		return err;
+
+	set_bit(qidx, pf->af_xdp_zc_qidx);
+	otx2_clean_up_rq(pf, qidx);
+	/* Kick start the NAPI context so that receiving will start */
+	return otx2_xsk_wakeup(pf->netdev, qidx, XDP_WAKEUP_RX);
+}
+
+int otx2_xsk_pool_disable(struct otx2_nic *pf, u16 qidx)
+{
+	struct net_device *netdev = pf->netdev;
+	struct xsk_buff_pool *pool;
+
+	pool = xsk_get_pool_from_qid(netdev, qidx);
+	if (!pool)
+		return -EINVAL;
+
+	otx2_clean_up_rq(pf, qidx);
+	clear_bit(qidx, pf->af_xdp_zc_qidx);
+	xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+
+	return 0;
+}
+
+int otx2_xsk_pool_setup(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qidx)
+{
+	if (pool)
+		return otx2_xsk_pool_enable(pf, pool, qidx);
+
+	return otx2_xsk_pool_disable(pf, qidx);
+}
+
+int otx2_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
+{
+	struct otx2_nic *pf = netdev_priv(dev);
+	struct otx2_cq_poll *cq_poll = NULL;
+	struct otx2_qset *qset = &pf->qset;
+
+	if (pf->flags & OTX2_FLAG_INTF_DOWN)
+		return -ENETDOWN;
+
+	if (queue_id >= pf->hw.rx_queues)
+		return -EINVAL;
+
+	cq_poll = &qset->napi[queue_id];
+	if (!cq_poll)
+		return -EINVAL;
+
+	/* Trigger interrupt */
+	if (!napi_if_scheduled_mark_missed(&cq_poll->napi))
+		otx2_write64(pf, NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx), BIT_ULL(0));
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.h
new file mode 100644
index 000000000000..022b3433edbb
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.h
@@ -0,0 +1,21 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell RVU PF/VF Netdev Devlink
+ *
+ * Copyright (C) 2024 Marvell.
+ *
+ */
+
+#ifndef	OTX2_XSK_H
+#define	OTX2_XSK_H
+
+struct otx2_nic;
+struct xsk_buff_pool;
+
+int otx2_xsk_pool_setup(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qid);
+int otx2_xsk_pool_enable(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qid);
+int otx2_xsk_pool_disable(struct otx2_nic *pf, u16 qid);
+int otx2_xsk_pool_alloc_buf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+			    dma_addr_t *dma, int idx);
+int otx2_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
+
+#endif /* OTX2_XSK_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
index 9d887bfc3108..c5dbae0e513b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
@@ -82,7 +82,7 @@  static int otx2_qos_sq_aura_pool_init(struct otx2_nic *pfvf, int qidx)
 	}
 
 	for (ptr = 0; ptr < num_sqbs; ptr++) {
-		err = otx2_alloc_rbuf(pfvf, pool, &bufptr);
+		err = otx2_alloc_rbuf(pfvf, pool, &bufptr, pool_id, ptr);
 		if (err)
 			goto sqb_free;
 		pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr);