diff mbox series

[net,v3,2/2] mlx5: fix possible ptp queue fifo use-after-free

Message ID 20230126010206.13483-3-vfedorenko@novek.ru (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series mlx5: ptp fifo bugfixes | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 0 this patch: 15
netdev/cc_maintainers fail 1 blamed authors not CCed: tariqt@nvidia.com; 9 maintainers not CCed: richardcochran@gmail.com lkayal@nvidia.com leon@kernel.org davem@davemloft.net pabeni@redhat.com linux-rdma@vger.kernel.org tariqt@nvidia.com maximmi@nvidia.com edumazet@google.com
netdev/build_clang fail Errors and warnings before: 0 this patch: 15
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn fail Errors and warnings before: 0 this patch: 15
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Vadim Fedorenko Jan. 26, 2023, 1:02 a.m. UTC
From: Vadim Fedorenko <vadfed@meta.com>

Fifo pointers were not checked during push and pop operations and this
could potentially lead to use-after-free or skb leak under heavy PTP
traffic.

Also there were OOO cqe spotted which lead to drain of the queue and
use-after-free because of lack of fifo pointers check. Special check
is added to avoid resync operation if SKB could not exist in the fifo
because of OOO cqe (skb_id must be between consumer and producer index).

Fixes: 58a518948f60 ("net/mlx5e: Add resiliency for PTP TX port timestamp")
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 23 ++++++++++++++-----
 .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  7 +++++-
 2 files changed, 23 insertions(+), 7 deletions(-)

Comments

Rahul Rameshbabu Jan. 26, 2023, 1:09 a.m. UTC | #1
On Thu, 26 Jan, 2023 04:02:06 +0300 Vadim Fedorenko <vfedorenko@novek.ru> wrote:
> From: Vadim Fedorenko <vadfed@meta.com>
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> index 15a5a57b47b8..6e559b856afb 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> @@ -289,14 +289,19 @@ struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_skb_fifo *fifo, u16 i)
>  static inline
>  void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
>  {
> -	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
> +	struct sk_buff **skb_item;
>  
> +	WARN_ONCE(mlx5e_skb_fifo_has_room(fifo), "ptp fifo overflow");

I think you meant 'WARN_ONCE(!mlx5e_skb_fifo_has_room(fifo), "ptp fifo overflow");'?

It is only safe to push in the fifo when the fifo has room. Therefore,
we should warn when a push is attempted with no more room in the fifo.
Does this warning, as is, not trigger for you during testing in normal
conditions?

> +	skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>  	*skb_item = skb;
>  }
Rahul Rameshbabu Jan. 26, 2023, 1:27 a.m. UTC | #2
On Thu, 26 Jan, 2023 04:02:06 +0300 Vadim Fedorenko <vfedorenko@novek.ru> wrote:
> From: Vadim Fedorenko <vadfed@meta.com>
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> index b72de2b520ec..4ac7483dcbcc 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> @@ -94,14 +94,23 @@ static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_
>  
>  	ptpsq->cq_stats->resync_event++;
>  
> -	while (skb_cc != skb_id) {
> -		skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
> +	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id)
> +		pr_err_ratelimited("mlx5e: out-of-order ptp cqe\n");

Lets use mlx5_core_err_rl(ptpsq->txqsq.mdev, "out-of-order ptp cqe\n") instead?

> +		return false;
> +	}
> +
> +	while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
>  		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>  		skb_tstamp_tx(skb, &hwts);
>  		ptpsq->cq_stats->resync_cqe++;
>  		napi_consume_skb(skb, budget);
>  		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>  	}
Tariq Toukan Jan. 26, 2023, 6:53 a.m. UTC | #3
On 26/01/2023 3:02, Vadim Fedorenko wrote:
> From: Vadim Fedorenko <vadfed@meta.com>
> 
> Fifo pointers were not checked during push and pop operations and this
> could potentially lead to use-after-free or skb leak under heavy PTP
> traffic.
> 
> Also there were OOO cqe spotted which lead to drain of the queue and
> use-after-free because of lack of fifo pointers check. Special check
> is added to avoid resync operation if SKB could not exist in the fifo
> because of OOO cqe (skb_id must be between consumer and producer index).
> 

Hi,

Let's hold on with this patch.
I don't think we understand the root cause. I'm also not sure this patch 
doesn't degrade the successful flow. See comment below.

We don't expect an xmit operation coming from the kernel while the TXQ 
is stopped. This might be the reason for the fifo overflow. Does it 
happen? If so, let's understand why and fix.

Your fix to mlx5e_skb_fifo_has_room() should help with preventing the 
fifo overflow. Does the issue still occur even after your patch [1]?

Also, it's not easy to decisively determine that a CQE arrived OOO. I 
doubt this can happen. The SQ is cyclic and works in-order. It's more 
probably a full cycle of lost CQEs.

BTW, what value do you see in your environment for
MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter) ?

Thanks,
Tariq

[1] [PATCH net v3 1/2] mlx5: fix skb leak while fifo resync and push

> Fixes: 58a518948f60 ("net/mlx5e: Add resiliency for PTP TX port timestamp")
> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
> ---
>   .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 23 ++++++++++++++-----
>   .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  7 +++++-
>   2 files changed, 23 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> index b72de2b520ec..4ac7483dcbcc 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> @@ -86,7 +86,7 @@ static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb
>   	return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
>   }
>   
> -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc,
> +static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc,
>   					     u16 skb_id, int budget)
>   {
>   	struct skb_shared_hwtstamps hwts = {};
> @@ -94,14 +94,23 @@ static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_
>   
>   	ptpsq->cq_stats->resync_event++;
>   
> -	while (skb_cc != skb_id) {
> -		skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
> +	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id)

This can give false positives near the edge of the fifo (wraparound).

> +		pr_err_ratelimited("mlx5e: out-of-order ptp cqe\n");
> +		return false;
> +	}
> +
> +	while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
>   		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>   		skb_tstamp_tx(skb, &hwts);
>   		ptpsq->cq_stats->resync_cqe++;
>   		napi_consume_skb(skb, budget);
>   		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>   	}
> +
> +	if (!skb)
> +		return false;
> +
> +	return true;
>   }
>   
>   static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
> @@ -111,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>   	u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
>   	u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>   	struct mlx5e_txqsq *sq = &ptpsq->txqsq;
> -	struct sk_buff *skb;
> +	struct sk_buff *skb = NULL;
>   	ktime_t hwtstamp;
>   
>   	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
> @@ -120,8 +129,10 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>   		goto out;
>   	}
>   
> -	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
> -		mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id, budget);
> +	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
> +	    !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id, budget)) {
> +		goto out;
> +	}
>   
>   	skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>   	hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> index 15a5a57b47b8..6e559b856afb 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> @@ -289,14 +289,19 @@ struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_skb_fifo *fifo, u16 i)
>   static inline
>   void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
>   {
> -	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
> +	struct sk_buff **skb_item;
>   
> +	WARN_ONCE(mlx5e_skb_fifo_has_room(fifo), "ptp fifo overflow");
> +	skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>   	*skb_item = skb;
>   }
>   
>   static inline
>   struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_skb_fifo *fifo)
>   {
> +	if (*fifo->pc == *fifo->cc)
> +		return NULL;
> +
>   	return *mlx5e_skb_fifo_get(fifo, (*fifo->cc)++);
>   }
>
Vadim Fedorenko Jan. 26, 2023, 10:53 a.m. UTC | #4
On 26/01/2023 01:09, Rahul Rameshbabu wrote:
> On Thu, 26 Jan, 2023 04:02:06 +0300 Vadim Fedorenko <vfedorenko@novek.ru> wrote:
>> From: Vadim Fedorenko <vadfed@meta.com>
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> index 15a5a57b47b8..6e559b856afb 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> @@ -289,14 +289,19 @@ struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_skb_fifo *fifo, u16 i)
>>   static inline
>>   void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
>>   {
>> -	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>> +	struct sk_buff **skb_item;
>>   
>> +	WARN_ONCE(mlx5e_skb_fifo_has_room(fifo), "ptp fifo overflow");
> 
> I think you meant 'WARN_ONCE(!mlx5e_skb_fifo_has_room(fifo), "ptp fifo overflow");'?
> 
Yes, you are absolutely right, mistyping during re-arrange.
Will improve in the next spin.

> It is only safe to push in the fifo when the fifo has room. Therefore,
> we should warn when a push is attempted with no more room in the fifo.
> Does this warning, as is, not trigger for you during testing in normal
> conditions?
> 
>> +	skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>>   	*skb_item = skb;
>>   }
Vadim Fedorenko Jan. 26, 2023, 10:53 a.m. UTC | #5
On 26/01/2023 01:27, Rahul Rameshbabu wrote:
> On Thu, 26 Jan, 2023 04:02:06 +0300 Vadim Fedorenko <vfedorenko@novek.ru> wrote:
>> From: Vadim Fedorenko <vadfed@meta.com>
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> index b72de2b520ec..4ac7483dcbcc 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> @@ -94,14 +94,23 @@ static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_
>>   
>>   	ptpsq->cq_stats->resync_event++;
>>   
>> -	while (skb_cc != skb_id) {
>> -		skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>> +	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id)
>> +		pr_err_ratelimited("mlx5e: out-of-order ptp cqe\n");
> 
> Lets use mlx5_core_err_rl(ptpsq->txqsq.mdev, "out-of-order ptp cqe\n") instead?

Sure, thanks for suggestion.

> 
>> +		return false;
>> +	}
>> +
>> +	while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
>>   		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>>   		skb_tstamp_tx(skb, &hwts);
>>   		ptpsq->cq_stats->resync_cqe++;
>>   		napi_consume_skb(skb, budget);
>>   		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>>   	}
Vadim Fedorenko Jan. 26, 2023, 1:22 p.m. UTC | #6
On 26/01/2023 06:53, Tariq Toukan wrote:
> 
> 
> On 26/01/2023 3:02, Vadim Fedorenko wrote:
>> From: Vadim Fedorenko <vadfed@meta.com>
>>
>> Fifo pointers were not checked during push and pop operations and this
>> could potentially lead to use-after-free or skb leak under heavy PTP
>> traffic.
>>
>> Also there were OOO cqe spotted which lead to drain of the queue and
>> use-after-free because of lack of fifo pointers check. Special check
>> is added to avoid resync operation if SKB could not exist in the fifo
>> because of OOO cqe (skb_id must be between consumer and producer index).
>>
> 
> Hi,
> 
> Let's hold on with this patch.
> I don't think we understand the root cause. I'm also not sure this patch 
> doesn't degrade the successful flow. See comment below.
> 
> We don't expect an xmit operation coming from the kernel while the TXQ 
> is stopped. This might be the reason for the fifo overflow. Does it 
> happen? If so, let's understand why and fix.
> 
> Your fix to mlx5e_skb_fifo_has_room() should help with preventing the 
> fifo overflow. Does the issue still occur even after your patch [1]?

Well, I do agree that there should be no overflow after the first patch. 
I added WARN_ONCE just to be sure that future changes will not trigger
overflow. But I'm OK to remove it if we are confident enough.

> Also, it's not easy to decisively determine that a CQE arrived OOO. I 
> doubt this can happen. The SQ is cyclic and works in-order. It's more 
> probably a full cycle of lost CQEs.

I have shown logs of OOO CQEs in previous version, but I can show it 
once again:

<idle>-0       [000] ..s..  2306.825713: mlx5e_ptp_ts_cqe_drop: mlx5: 
ptp ts cqe drop detected, skb_cc = 185, skb_id = 186
<idle>-0       [000] ..s..  2306.825719: 
mlx5e_ptp_skb_fifo_ts_cqe_resync: mlx5: ptp ts cqe resync, skb_cc = 186, 
skb_id = 186, cpuid = 8
<idle>-0       [000] ..s..  2306.825730: mlx5e_ptp_handle_ts_cqe: mlx5: 
ptp handle ts cqe, skb_cc = 187, skb_id = 185
<idle>-0       [000] ..s..  2306.825730: mlx5e_ptp_ts_cqe_drop: mlx5: 
ptp ts cqe drop detected, skb_cc = 187, skb_id = 185
<idle>-0       [000] ..s..  2306.825747: mlx5e_ptp_handle_ts_cqe: mlx5: 
ptp handle ts cqe, skb_cc = 187, skb_id = 187
<idle>-0       [000] ..s..  2306.825932: mlx5e_ptp_handle_ts_cqe: mlx5: 
ptp handle ts cqe, skb_cc = 188, skb_id = 188
<idle>-0       [000] ..s..  2306.825948: mlx5e_ptp_handle_ts_cqe: mlx5: 
ptp handle ts cqe, skb_cc = 189, skb_id = 189
<idle>-0       [000] ..s..  2306.825965: mlx5e_ptp_handle_ts_cqe: mlx5: 
ptp handle ts cqe, skb_cc = 190, skb_id = 190

In this example skb_cc is masked value, not the full value of the 
counter, but it still shows the problem. We can see that CQE with 
skb_id=186 has arrived when skb_cc was 185. That triggered resync, which 
flushed skb_id 185 from the queue. Then skb_id 186 was processed and 
after that skb_id 185 has arrived out-of-order. With current patch 
applied, this OOO CQE was simply skipped in resync. Then skb_ids 
187,188,189 and 190 have arrived in order.

Without current patch applied, second resync (when triggered by skb_id 
185) will trash all SKBs in the queue because there is no such id in the 
queue. An even more, without frist patch applied, it will trash all the 
queue until cc index overlaps and gets to the requested SKB. But this 
leads to use-after-free (skb_tstamp_tx(skb, &hwts)) and double-free for 
the last element later in napi_consume_skb.

If need more information I'm happy to gather it.

> 
> BTW, what value do you see in your environment for
> MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter) ?

In our setup: ts_cqe_metadata_size2wqe_counter = 8
> 
> Thanks,
> Tariq
> 
> [1] [PATCH net v3 1/2] mlx5: fix skb leak while fifo resync and push
> 
>> Fixes: 58a518948f60 ("net/mlx5e: Add resiliency for PTP TX port 
>> timestamp")
>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>> ---
>>   .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 23 ++++++++++++++-----
>>   .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  7 +++++-
>>   2 files changed, 23 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c 
>> b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> index b72de2b520ec..4ac7483dcbcc 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> @@ -86,7 +86,7 @@ static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq 
>> *ptpsq, u16 skb_cc, u16 skb
>>       return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
>>   }
>> -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq 
>> *ptpsq, u16 skb_cc,
>> +static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq 
>> *ptpsq, u16 skb_cc,
>>                            u16 skb_id, int budget)
>>   {
>>       struct skb_shared_hwtstamps hwts = {};
>> @@ -94,14 +94,23 @@ static void 
>> mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_
>>       ptpsq->cq_stats->resync_event++;
>> -    while (skb_cc != skb_id) {
>> -        skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>> +    if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id)
> 
> This can give false positives near the edge of the fifo (wraparound).

Can you please provide values that will trigger false positive here? I 
explained the reasoning of such if statement to Jakub in the previous 
version and I'm happy to improve this check.

> 
>> +        pr_err_ratelimited("mlx5e: out-of-order ptp cqe\n");
>> +        return false;
>> +    }
>> +
>> +    while (skb_cc != skb_id && (skb = 
>> mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
>>           hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>>           skb_tstamp_tx(skb, &hwts);
>>           ptpsq->cq_stats->resync_cqe++;
>>           napi_consume_skb(skb, budget);
>>           skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>>       }
>> +
>> +    if (!skb)
>> +        return false;
>> +
>> +    return true;
>>   }
>>   static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>> @@ -111,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct 
>> mlx5e_ptpsq *ptpsq,
>>       u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
>>       u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>>       struct mlx5e_txqsq *sq = &ptpsq->txqsq;
>> -    struct sk_buff *skb;
>> +    struct sk_buff *skb = NULL;
>>       ktime_t hwtstamp;
>>       if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
>> @@ -120,8 +129,10 @@ static void mlx5e_ptp_handle_ts_cqe(struct 
>> mlx5e_ptpsq *ptpsq,
>>           goto out;
>>       }
>> -    if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
>> -        mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id, budget);
>> +    if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
>> +        !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id, 
>> budget)) {
>> +        goto out;
>> +    }
>>       skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>>       hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, 
>> get_cqe_ts(cqe));
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h 
>> b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> index 15a5a57b47b8..6e559b856afb 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> @@ -289,14 +289,19 @@ struct sk_buff **mlx5e_skb_fifo_get(struct 
>> mlx5e_skb_fifo *fifo, u16 i)
>>   static inline
>>   void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff 
>> *skb)
>>   {
>> -    struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>> +    struct sk_buff **skb_item;
>> +    WARN_ONCE(mlx5e_skb_fifo_has_room(fifo), "ptp fifo overflow");
>> +    skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>>       *skb_item = skb;
>>   }
>>   static inline
>>   struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_skb_fifo *fifo)
>>   {
>> +    if (*fifo->pc == *fifo->cc)
>> +        return NULL;
>> +
>>       return *mlx5e_skb_fifo_get(fifo, (*fifo->cc)++);
>>   }
kernel test robot Jan. 28, 2023, 4:42 p.m. UTC | #7
Hi Vadim,

I love your patch! Perhaps something to improve:

[auto build test WARNING on net/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/mlx5-fix-skb-leak-while-fifo-resync-and-push/20230128-120805
patch link:    https://lore.kernel.org/r/20230126010206.13483-3-vfedorenko%40novek.ru
patch subject: [PATCH net v3 2/2] mlx5: fix possible ptp queue fifo use-after-free
config: i386-randconfig-a015 (https://download.01.org/0day-ci/archive/20230129/202301290020.2pCidjI4-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/2516a9785583b92ac82262a813203de696096ccd
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Vadim-Fedorenko/mlx5-fix-skb-leak-while-fifo-resync-and-push/20230128-120805
        git checkout 2516a9785583b92ac82262a813203de696096ccd
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/net/ethernet/mellanox/mlx5/core/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:93:18: warning: unused variable 'skb' [-Wunused-variable]
           struct sk_buff *skb;
                           ^
>> drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:92:30: warning: unused variable 'hwts' [-Wunused-variable]
           struct skb_shared_hwtstamps hwts = {};
                                       ^
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:102:2: error: expected identifier or '('
           while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
           ^
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:110:2: error: expected identifier or '('
           if (!skb)
           ^
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:113:2: error: expected identifier or '('
           return true;
           ^
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:114:1: error: extraneous closing brace ('}')
   }
   ^
   2 warnings and 4 errors generated.


vim +/skb +93 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c

58a518948f6015 Aya Levin       2022-07-04   88  
2516a9785583b9 Vadim Fedorenko 2023-01-26   89  static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc,
83256998eee9fa Vadim Fedorenko 2023-01-26   90  					     u16 skb_id, int budget)
58a518948f6015 Aya Levin       2022-07-04   91  {
58a518948f6015 Aya Levin       2022-07-04  @92  	struct skb_shared_hwtstamps hwts = {};
58a518948f6015 Aya Levin       2022-07-04  @93  	struct sk_buff *skb;
58a518948f6015 Aya Levin       2022-07-04   94  
58a518948f6015 Aya Levin       2022-07-04   95  	ptpsq->cq_stats->resync_event++;
58a518948f6015 Aya Levin       2022-07-04   96  
2516a9785583b9 Vadim Fedorenko 2023-01-26   97  	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id)
2516a9785583b9 Vadim Fedorenko 2023-01-26   98  		pr_err_ratelimited("mlx5e: out-of-order ptp cqe\n");
2516a9785583b9 Vadim Fedorenko 2023-01-26   99  		return false;
2516a9785583b9 Vadim Fedorenko 2023-01-26  100  	}
2516a9785583b9 Vadim Fedorenko 2023-01-26  101  
2516a9785583b9 Vadim Fedorenko 2023-01-26  102  	while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
58a518948f6015 Aya Levin       2022-07-04  103  		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
58a518948f6015 Aya Levin       2022-07-04  104  		skb_tstamp_tx(skb, &hwts);
58a518948f6015 Aya Levin       2022-07-04  105  		ptpsq->cq_stats->resync_cqe++;
83256998eee9fa Vadim Fedorenko 2023-01-26  106  		napi_consume_skb(skb, budget);
58a518948f6015 Aya Levin       2022-07-04  107  		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
58a518948f6015 Aya Levin       2022-07-04  108  	}
2516a9785583b9 Vadim Fedorenko 2023-01-26  109  
2516a9785583b9 Vadim Fedorenko 2023-01-26  110  	if (!skb)
2516a9785583b9 Vadim Fedorenko 2023-01-26  111  		return false;
2516a9785583b9 Vadim Fedorenko 2023-01-26  112  
2516a9785583b9 Vadim Fedorenko 2023-01-26  113  	return true;
58a518948f6015 Aya Levin       2022-07-04  114  }
58a518948f6015 Aya Levin       2022-07-04  115
kernel test robot Jan. 28, 2023, 5:55 p.m. UTC | #8
Hi Vadim,

I love your patch! Yet something to improve:

[auto build test ERROR on net/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/mlx5-fix-skb-leak-while-fifo-resync-and-push/20230128-120805
patch link:    https://lore.kernel.org/r/20230126010206.13483-3-vfedorenko%40novek.ru
patch subject: [PATCH net v3 2/2] mlx5: fix possible ptp queue fifo use-after-free
config: openrisc-randconfig-r014-20230123 (https://download.01.org/0day-ci/archive/20230129/202301290138.HFiMVZzA-lkp@intel.com/config)
compiler: or1k-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/2516a9785583b92ac82262a813203de696096ccd
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Vadim-Fedorenko/mlx5-fix-skb-leak-while-fifo-resync-and-push/20230128-120805
        git checkout 2516a9785583b92ac82262a813203de696096ccd
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=openrisc olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=openrisc SHELL=/bin/bash drivers/net/ethernet/mellanox/mlx5/core/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   In file included from include/linux/build_bug.h:5,
                    from include/linux/container_of.h:5,
                    from include/linux/list.h:5,
                    from include/linux/timer.h:5,
                    from include/linux/netdevice.h:24,
                    from include/linux/if_vlan.h:10,
                    from drivers/net/ethernet/mellanox/mlx5/core/en.h:35,
                    from drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h:7,
                    from drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:4:
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c: In function 'mlx5e_ptp_skb_fifo_ts_cqe_resync':
>> include/linux/compiler.h:56:23: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
      56 | #define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
         |                       ^~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:97:9: note: in expansion of macro 'if'
      97 |         if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id)
         |         ^~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:99:17: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the 'if'
      99 |                 return false;
         |                 ^~~~~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:93:25: warning: unused variable 'skb' [-Wunused-variable]
      93 |         struct sk_buff *skb;
         |                         ^~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:92:37: warning: unused variable 'hwts' [-Wunused-variable]
      92 |         struct skb_shared_hwtstamps hwts = {};
         |                                     ^~~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c: At top level:
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:102:9: error: expected identifier or '(' before 'while'
     102 |         while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
         |         ^~~~~
>> include/linux/compiler.h:56:23: error: expected identifier or '(' before 'if'
      56 | #define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
         |                       ^~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:110:9: note: in expansion of macro 'if'
     110 |         if (!skb)
         |         ^~
>> include/linux/compiler.h:72:2: error: expected identifier or '(' before ')' token
      72 | })
         |  ^
   include/linux/compiler.h:58:69: note: in expansion of macro '__trace_if_value'
      58 | #define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
         |                                                                     ^~~~~~~~~~~~~~~~
   include/linux/compiler.h:56:28: note: in expansion of macro '__trace_if_var'
      56 | #define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
         |                            ^~~~~~~~~~~~~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:110:9: note: in expansion of macro 'if'
     110 |         if (!skb)
         |         ^~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:113:9: error: expected identifier or '(' before 'return'
     113 |         return true;
         |         ^~~~~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:114:1: error: expected identifier or '(' before '}' token
     114 | }
         | ^


vim +56 include/linux/compiler.h

2bcd521a684cc9 Steven Rostedt 2008-11-21  50  
2bcd521a684cc9 Steven Rostedt 2008-11-21  51  #ifdef CONFIG_PROFILE_ALL_BRANCHES
2bcd521a684cc9 Steven Rostedt 2008-11-21  52  /*
2bcd521a684cc9 Steven Rostedt 2008-11-21  53   * "Define 'is'", Bill Clinton
2bcd521a684cc9 Steven Rostedt 2008-11-21  54   * "Define 'if'", Steven Rostedt
2bcd521a684cc9 Steven Rostedt 2008-11-21  55   */
a15fd609ad53a6 Linus Torvalds 2019-03-20 @56  #define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
a15fd609ad53a6 Linus Torvalds 2019-03-20  57  
a15fd609ad53a6 Linus Torvalds 2019-03-20  58  #define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
a15fd609ad53a6 Linus Torvalds 2019-03-20  59  
a15fd609ad53a6 Linus Torvalds 2019-03-20  60  #define __trace_if_value(cond) ({			\
2bcd521a684cc9 Steven Rostedt 2008-11-21  61  	static struct ftrace_branch_data		\
e04462fb82f8dd Miguel Ojeda   2018-09-03  62  		__aligned(4)				\
33def8498fdde1 Joe Perches    2020-10-21  63  		__section("_ftrace_branch")		\
a15fd609ad53a6 Linus Torvalds 2019-03-20  64  		__if_trace = {				\
2bcd521a684cc9 Steven Rostedt 2008-11-21  65  			.func = __func__,		\
2bcd521a684cc9 Steven Rostedt 2008-11-21  66  			.file = __FILE__,		\
2bcd521a684cc9 Steven Rostedt 2008-11-21  67  			.line = __LINE__,		\
2bcd521a684cc9 Steven Rostedt 2008-11-21  68  		};					\
a15fd609ad53a6 Linus Torvalds 2019-03-20  69  	(cond) ?					\
a15fd609ad53a6 Linus Torvalds 2019-03-20  70  		(__if_trace.miss_hit[1]++,1) :		\
a15fd609ad53a6 Linus Torvalds 2019-03-20  71  		(__if_trace.miss_hit[0]++,0);		\
a15fd609ad53a6 Linus Torvalds 2019-03-20 @72  })
a15fd609ad53a6 Linus Torvalds 2019-03-20  73
kernel test robot Jan. 28, 2023, 9:42 p.m. UTC | #9
Hi Vadim,

I love your patch! Yet something to improve:

[auto build test ERROR on net/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/mlx5-fix-skb-leak-while-fifo-resync-and-push/20230128-120805
patch link:    https://lore.kernel.org/r/20230126010206.13483-3-vfedorenko%40novek.ru
patch subject: [PATCH net v3 2/2] mlx5: fix possible ptp queue fifo use-after-free
config: i386-randconfig-a015 (https://download.01.org/0day-ci/archive/20230129/202301290528.ZvflGIBO-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/2516a9785583b92ac82262a813203de696096ccd
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Vadim-Fedorenko/mlx5-fix-skb-leak-while-fifo-resync-and-push/20230128-120805
        git checkout 2516a9785583b92ac82262a813203de696096ccd
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:93:18: warning: unused variable 'skb' [-Wunused-variable]
           struct sk_buff *skb;
                           ^
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:92:30: warning: unused variable 'hwts' [-Wunused-variable]
           struct skb_shared_hwtstamps hwts = {};
                                       ^
>> drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:102:2: error: expected identifier or '('
           while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
           ^
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:110:2: error: expected identifier or '('
           if (!skb)
           ^
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:113:2: error: expected identifier or '('
           return true;
           ^
>> drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:114:1: error: extraneous closing brace ('}')
   }
   ^
   2 warnings and 4 errors generated.


vim +102 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c

    88	
    89	static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc,
    90						     u16 skb_id, int budget)
    91	{
    92		struct skb_shared_hwtstamps hwts = {};
    93		struct sk_buff *skb;
    94	
    95		ptpsq->cq_stats->resync_event++;
    96	
    97		if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id)
    98			pr_err_ratelimited("mlx5e: out-of-order ptp cqe\n");
    99			return false;
   100		}
   101	
 > 102		while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
   103			hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
   104			skb_tstamp_tx(skb, &hwts);
   105			ptpsq->cq_stats->resync_cqe++;
   106			napi_consume_skb(skb, budget);
   107			skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
   108		}
   109	
   110		if (!skb)
   111			return false;
   112	
   113		return true;
 > 114	}
   115
kernel test robot Jan. 29, 2023, 12:05 a.m. UTC | #10
Hi Vadim,

I love your patch! Perhaps something to improve:

[auto build test WARNING on net/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/mlx5-fix-skb-leak-while-fifo-resync-and-push/20230128-120805
patch link:    https://lore.kernel.org/r/20230126010206.13483-3-vfedorenko%40novek.ru
patch subject: [PATCH net v3 2/2] mlx5: fix possible ptp queue fifo use-after-free
config: alpha-allmodconfig (https://download.01.org/0day-ci/archive/20230129/202301290714.hjuYGOXr-lkp@intel.com/config)
compiler: alpha-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/2516a9785583b92ac82262a813203de696096ccd
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Vadim-Fedorenko/mlx5-fix-skb-leak-while-fifo-resync-and-push/20230128-120805
        git checkout 2516a9785583b92ac82262a813203de696096ccd
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=alpha olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=alpha SHELL=/bin/bash drivers/bluetooth/ drivers/net/ethernet/mediatek/ drivers/net/ethernet/mellanox/mlx5/core/ drivers/pci/controller/ drivers/power/supply/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c: In function 'mlx5e_ptp_skb_fifo_ts_cqe_resync':
>> drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:97:9: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
      97 |         if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id)
         |         ^~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:99:17: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the 'if'
      99 |                 return false;
         |                 ^~~~~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:93:25: warning: unused variable 'skb' [-Wunused-variable]
      93 |         struct sk_buff *skb;
         |                         ^~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:92:37: warning: unused variable 'hwts' [-Wunused-variable]
      92 |         struct skb_shared_hwtstamps hwts = {};
         |                                     ^~~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c: At top level:
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:102:9: error: expected identifier or '(' before 'while'
     102 |         while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
         |         ^~~~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:110:9: error: expected identifier or '(' before 'if'
     110 |         if (!skb)
         |         ^~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:113:9: error: expected identifier or '(' before 'return'
     113 |         return true;
         |         ^~~~~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:114:1: error: expected identifier or '(' before '}' token
     114 | }
         | ^


vim +/if +97 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c

    88	
    89	static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc,
    90						     u16 skb_id, int budget)
    91	{
    92		struct skb_shared_hwtstamps hwts = {};
    93		struct sk_buff *skb;
    94	
    95		ptpsq->cq_stats->resync_event++;
    96	
  > 97		if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id)
    98			pr_err_ratelimited("mlx5e: out-of-order ptp cqe\n");
    99			return false;
   100		}
   101	
   102		while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
   103			hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
   104			skb_tstamp_tx(skb, &hwts);
   105			ptpsq->cq_stats->resync_cqe++;
   106			napi_consume_skb(skb, budget);
   107			skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
   108		}
   109	
   110		if (!skb)
   111			return false;
   112	
   113		return true;
   114	}
   115
kernel test robot Jan. 29, 2023, 12:32 p.m. UTC | #11
Hi Vadim,

I love your patch! Yet something to improve:

[auto build test ERROR on net/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/mlx5-fix-skb-leak-while-fifo-resync-and-push/20230128-120805
patch link:    https://lore.kernel.org/r/20230126010206.13483-3-vfedorenko%40novek.ru
patch subject: [PATCH net v3 2/2] mlx5: fix possible ptp queue fifo use-after-free
config: alpha-allmodconfig (https://download.01.org/0day-ci/archive/20230129/202301292055.CXz5AB3g-lkp@intel.com/config)
compiler: alpha-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/2516a9785583b92ac82262a813203de696096ccd
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Vadim-Fedorenko/mlx5-fix-skb-leak-while-fifo-resync-and-push/20230128-120805
        git checkout 2516a9785583b92ac82262a813203de696096ccd
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=alpha olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=alpha SHELL=/bin/bash drivers/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c: In function 'mlx5e_ptp_skb_fifo_ts_cqe_resync':
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:97:9: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
      97 |         if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id)
         |         ^~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:99:17: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the 'if'
      99 |                 return false;
         |                 ^~~~~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:93:25: warning: unused variable 'skb' [-Wunused-variable]
      93 |         struct sk_buff *skb;
         |                         ^~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:92:37: warning: unused variable 'hwts' [-Wunused-variable]
      92 |         struct skb_shared_hwtstamps hwts = {};
         |                                     ^~~~
   drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c: At top level:
>> drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:102:9: error: expected identifier or '(' before 'while'
     102 |         while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
         |         ^~~~~
>> drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:110:9: error: expected identifier or '(' before 'if'
     110 |         if (!skb)
         |         ^~
>> drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:113:9: error: expected identifier or '(' before 'return'
     113 |         return true;
         |         ^~~~~~
>> drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c:114:1: error: expected identifier or '(' before '}' token
     114 | }
         | ^


vim +102 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c

    88	
    89	static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc,
    90						     u16 skb_id, int budget)
    91	{
    92		struct skb_shared_hwtstamps hwts = {};
    93		struct sk_buff *skb;
    94	
    95		ptpsq->cq_stats->resync_event++;
    96	
    97		if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id)
    98			pr_err_ratelimited("mlx5e: out-of-order ptp cqe\n");
    99			return false;
   100		}
   101	
 > 102		while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
   103			hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
   104			skb_tstamp_tx(skb, &hwts);
   105			ptpsq->cq_stats->resync_cqe++;
   106			napi_consume_skb(skb, budget);
   107			skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
   108		}
   109	
 > 110		if (!skb)
   111			return false;
   112	
 > 113		return true;
 > 114	}
   115
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index b72de2b520ec..4ac7483dcbcc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -86,7 +86,7 @@  static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb
 	return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
 }
 
-static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc,
+static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc,
 					     u16 skb_id, int budget)
 {
 	struct skb_shared_hwtstamps hwts = {};
@@ -94,14 +94,23 @@  static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_
 
 	ptpsq->cq_stats->resync_event++;
 
-	while (skb_cc != skb_id) {
-		skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id)
+		pr_err_ratelimited("mlx5e: out-of-order ptp cqe\n");
+		return false;
+	}
+
+	while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
 		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
 		skb_tstamp_tx(skb, &hwts);
 		ptpsq->cq_stats->resync_cqe++;
 		napi_consume_skb(skb, budget);
 		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
 	}
+
+	if (!skb)
+		return false;
+
+	return true;
 }
 
 static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
@@ -111,7 +120,7 @@  static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
 	u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
 	u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
 	struct mlx5e_txqsq *sq = &ptpsq->txqsq;
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
 	ktime_t hwtstamp;
 
 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
@@ -120,8 +129,10 @@  static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
 		goto out;
 	}
 
-	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
-		mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id, budget);
+	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
+	    !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id, budget)) {
+		goto out;
+	}
 
 	skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
 	hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 15a5a57b47b8..6e559b856afb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -289,14 +289,19 @@  struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_skb_fifo *fifo, u16 i)
 static inline
 void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
 {
-	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
+	struct sk_buff **skb_item;
 
+	WARN_ONCE(mlx5e_skb_fifo_has_room(fifo), "ptp fifo overflow");
+	skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
 	*skb_item = skb;
 }
 
 static inline
 struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_skb_fifo *fifo)
 {
+	if (*fifo->pc == *fifo->cc)
+		return NULL;
+
 	return *mlx5e_skb_fifo_get(fifo, (*fifo->cc)++);
 }