diff mbox series

[net-next,v4,1/2] bnxt_en: cache only 24 bits of hw counter

Message ID 20241029205453.2290688-1-vadfed@meta.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v4,1/2] bnxt_en: cache only 24 bits of hw counter | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5 this patch: 5
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: edumazet@google.com
netdev/build_clang success Errors and warnings before: 3 this patch: 3
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 5 this patch: 5
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-10-31--09-00 (tests: 779)

Commit Message

Vadim Fedorenko Oct. 29, 2024, 8:54 p.m. UTC
This hardware can provide only 48 bits of cycle counter. We can leave
only 24 bits in the cache to extend RX timestamps from 32 bits to 48
bits. Lower 8 bits of the cached value will be used to check for
roll-over while extending to full 48 bits.
This change makes cache writes atomic even on 32 bit platforms and we
can simply use READ_ONCE()/WRITE_ONCE() pair and remove spinlock. The
configuration structure will be also reduced by 4 bytes.

Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c |  8 ++++----
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 18 +++---------------
 2 files changed, 7 insertions(+), 19 deletions(-)

Comments

Michael Chan Oct. 30, 2024, 4:53 p.m. UTC | #1
On Tue, Oct 29, 2024 at 1:55 PM Vadim Fedorenko <vadfed@meta.com> wrote:
>
> This hardware can provide only 48 bits of cycle counter. We can leave
> only 24 bits in the cache to extend RX timestamps from 32 bits to 48
> bits. Lower 8 bits of the cached value will be used to check for
> roll-over while extending to full 48 bits.
> This change makes cache writes atomic even on 32 bit platforms and we
> can simply use READ_ONCE()/WRITE_ONCE() pair and remove spinlock. The
> configuration structure will be also reduced by 4 bytes.
>
> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>

Thanks.
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Jakub Kicinski Nov. 3, 2024, 7:22 p.m. UTC | #2
On Tue, 29 Oct 2024 13:54:52 -0700 Vadim Fedorenko wrote:
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
> index fa514be87650..820c7e83e586 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
> @@ -106,7 +106,7 @@ static void bnxt_ptp_get_current_time(struct bnxt *bp)
>  	if (!ptp)
>  		return;
>  	spin_lock_irqsave(&ptp->ptp_lock, flags);
> -	WRITE_ONCE(ptp->old_time, ptp->current_time);
> +	WRITE_ONCE(ptp->old_time, (u32)(ptp->current_time >> BNXT_HI_TIMER_SHIFT));

the casts to u32 seem unnecessary since write to u32 will truncate
the value, anyway, and they make the lines go over 80 columns

>  	bnxt_refclk_read(bp, NULL, &ptp->current_time);
>  	spin_unlock_irqrestore(&ptp->ptp_lock, flags);
>  }
> @@ -174,7 +174,7 @@ void bnxt_ptp_update_current_time(struct bnxt *bp)
>  	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
>  
>  	bnxt_refclk_read(ptp->bp, NULL, &ptp->current_time);
> -	WRITE_ONCE(ptp->old_time, ptp->current_time);
> +	WRITE_ONCE(ptp->old_time, (u32)(ptp->current_time >> BNXT_HI_TIMER_SHIFT));
>  }
>  
>  static int bnxt_ptp_adjphc(struct bnxt_ptp_cfg *ptp, s64 delta)
> @@ -813,7 +813,7 @@ int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts)
>  	if (!ptp)
>  		return -ENODEV;
>  
> -	BNXT_READ_TIME64(ptp, time, ptp->old_time);
> +	time = (u64)(READ_ONCE(ptp->old_time) << BNXT_HI_TIMER_SHIFT);

And this cast looks misplaced, I presume you want the shift to operate
on 64b. The way this is written the shift will be truncated to 32b,
and then we will promote, with top 32b being all 0.
Vadim Fedorenko Nov. 3, 2024, 9:17 p.m. UTC | #3
On 03/11/2024 19:22, Jakub Kicinski wrote:
> On Tue, 29 Oct 2024 13:54:52 -0700 Vadim Fedorenko wrote:
>> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
>> index fa514be87650..820c7e83e586 100644
>> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
>> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
>> @@ -106,7 +106,7 @@ static void bnxt_ptp_get_current_time(struct bnxt *bp)
>>   	if (!ptp)
>>   		return;
>>   	spin_lock_irqsave(&ptp->ptp_lock, flags);
>> -	WRITE_ONCE(ptp->old_time, ptp->current_time);
>> +	WRITE_ONCE(ptp->old_time, (u32)(ptp->current_time >> BNXT_HI_TIMER_SHIFT));
> 
> the casts to u32 seem unnecessary since write to u32 will truncate
> the value, anyway, and they make the lines go over 80 columns
> 
>>   	bnxt_refclk_read(bp, NULL, &ptp->current_time);
>>   	spin_unlock_irqrestore(&ptp->ptp_lock, flags);
>>   }
>> @@ -174,7 +174,7 @@ void bnxt_ptp_update_current_time(struct bnxt *bp)
>>   	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
>>   
>>   	bnxt_refclk_read(ptp->bp, NULL, &ptp->current_time);
>> -	WRITE_ONCE(ptp->old_time, ptp->current_time);
>> +	WRITE_ONCE(ptp->old_time, (u32)(ptp->current_time >> BNXT_HI_TIMER_SHIFT));
>>   }
>>   
>>   static int bnxt_ptp_adjphc(struct bnxt_ptp_cfg *ptp, s64 delta)
>> @@ -813,7 +813,7 @@ int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts)
>>   	if (!ptp)
>>   		return -ENODEV;
>>   
>> -	BNXT_READ_TIME64(ptp, time, ptp->old_time);
>> +	time = (u64)(READ_ONCE(ptp->old_time) << BNXT_HI_TIMER_SHIFT);
> 
> And this cast looks misplaced, I presume you want the shift to operate
> on 64b. The way this is written the shift will be truncated to 32b,
> and then we will promote, with top 32b being all 0.

Indeed, this cast is misplaced. I'll do v5 soon, thanks!
diff mbox series

Patch

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index fa514be87650..820c7e83e586 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -106,7 +106,7 @@  static void bnxt_ptp_get_current_time(struct bnxt *bp)
 	if (!ptp)
 		return;
 	spin_lock_irqsave(&ptp->ptp_lock, flags);
-	WRITE_ONCE(ptp->old_time, ptp->current_time);
+	WRITE_ONCE(ptp->old_time, (u32)(ptp->current_time >> BNXT_HI_TIMER_SHIFT));
 	bnxt_refclk_read(bp, NULL, &ptp->current_time);
 	spin_unlock_irqrestore(&ptp->ptp_lock, flags);
 }
@@ -174,7 +174,7 @@  void bnxt_ptp_update_current_time(struct bnxt *bp)
 	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
 
 	bnxt_refclk_read(ptp->bp, NULL, &ptp->current_time);
-	WRITE_ONCE(ptp->old_time, ptp->current_time);
+	WRITE_ONCE(ptp->old_time, (u32)(ptp->current_time >> BNXT_HI_TIMER_SHIFT));
 }
 
 static int bnxt_ptp_adjphc(struct bnxt_ptp_cfg *ptp, s64 delta)
@@ -813,7 +813,7 @@  int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts)
 	if (!ptp)
 		return -ENODEV;
 
-	BNXT_READ_TIME64(ptp, time, ptp->old_time);
+	time = (u64)(READ_ONCE(ptp->old_time) << BNXT_HI_TIMER_SHIFT);
 	*ts = (time & BNXT_HI_TIMER_MASK) | pkt_ts;
 	if (pkt_ts < (time & BNXT_LO_TIMER_MASK))
 		*ts += BNXT_LO_TIMER_MASK + 1;
@@ -1079,7 +1079,7 @@  int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
 
 		spin_lock_irqsave(&ptp->ptp_lock, flags);
 		bnxt_refclk_read(bp, NULL, &ptp->current_time);
-		WRITE_ONCE(ptp->old_time, ptp->current_time);
+		WRITE_ONCE(ptp->old_time, (u32)(ptp->current_time >> BNXT_HI_TIMER_SHIFT));
 		spin_unlock_irqrestore(&ptp->ptp_lock, flags);
 		ptp_schedule_worker(ptp->ptp_clock, 0);
 	}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
index f322466ecad3..3ac5cbc1c5c4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -21,6 +21,7 @@ 
 #define BNXT_DEVCLK_FREQ	1000000
 #define BNXT_LO_TIMER_MASK	0x0000ffffffffUL
 #define BNXT_HI_TIMER_MASK	0xffff00000000UL
+#define BNXT_HI_TIMER_SHIFT	24
 
 #define BNXT_PTP_DFLT_TX_TMO	1000 /* ms */
 #define BNXT_PTP_QTS_TIMEOUT	1000
@@ -106,10 +107,11 @@  struct bnxt_ptp_cfg {
 	/* serialize ts tx request queuing */
 	spinlock_t		ptp_tx_lock;
 	u64			current_time;
-	u64			old_time;
 	unsigned long		next_period;
 	unsigned long		next_overflow_check;
 	u32			cmult;
+	/* cache of upper 24 bits of cyclecoutner. 8 bits are used to check for roll-over */
+	u32			old_time;
 	/* a 23b shift cyclecounter will overflow in ~36 mins.  Check overflow every 18 mins. */
 	#define BNXT_PHC_OVERFLOW_PERIOD	(18 * 60 * HZ)
 
@@ -145,20 +147,6 @@  struct bnxt_ptp_cfg {
 	struct bnxt_ptp_stats	stats;
 };
 
-#if BITS_PER_LONG == 32
-#define BNXT_READ_TIME64(ptp, dst, src)				\
-do {								\
-	unsigned long flags;					\
-								\
-	spin_lock_irqsave(&(ptp)->ptp_lock, flags);		\
-	(dst) = (src);						\
-	spin_unlock_irqrestore(&(ptp)->ptp_lock, flags);	\
-} while (0)
-#else
-#define BNXT_READ_TIME64(ptp, dst, src)		\
-	((dst) = READ_ONCE(src))
-#endif
-
 #define BNXT_PTP_INC_TX_AVAIL(ptp)		\
 do {						\
 	spin_lock_bh(&(ptp)->ptp_tx_lock);	\