diff mbox series

[v2,net-next,1/2] bnx2x: Utilize firmware 7.13.21.0

Message ID 20211217165552.746-1-manishc@marvell.com (mailing list archive)
State Accepted
Delegated to: Netdev Maintainers
Headers show
Series [v2,net-next,1/2] bnx2x: Utilize firmware 7.13.21.0 | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 101 this patch: 101
netdev/cc_maintainers warning 2 maintainers not CCed: skalluru@marvell.com davem@davemloft.net
netdev/build_clang success Errors and warnings before: 6 this patch: 6
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 101 this patch: 101
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0

Commit Message

Manish Chopra Dec. 17, 2021, 4:55 p.m. UTC
This new firmware addresses few important issues and enhancements
as mentioned below -

- Support direct invalidation of FP HSI Ver per function ID, required for
  invalidating FP HSI Ver prior to each VF start, as there is no VF start
- BRB hardware block parity error detection support for the driver
- Fix the FCOE underrun flow
- Fix PSOD during FCoE BFS over the NIC ports after preboot driver
- Maintains backward compatibility

This patch incorporates this new firmware 7.13.21.0 in bnx2x driver.

Signed-off-by: Manish Chopra <manishc@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Alok Prasad <palok@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
---

v1->v2:
------------
* Modified the patch such that driver to be backward compatible
  with older firmware too (New fw v7.13.21.0 on linux-firmware.git
  enables driver to maintain backward compatibility)
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h        | 11 +++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  6 +-
 .../net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h    |  2 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h    |  3 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 75 +++++++++++++++-------
 5 files changed, 69 insertions(+), 28 deletions(-)

Comments

Paul Menzel March 9, 2022, 4:15 p.m. UTC | #1
Dear Manish,


Am 17.12.21 um 17:55 schrieb Manish Chopra:
> This new firmware addresses few important issues and enhancements
> as mentioned below -
> 
> - Support direct invalidation of FP HSI Ver per function ID, required for
>    invalidating FP HSI Ver prior to each VF start, as there is no VF start
> - BRB hardware block parity error detection support for the driver
> - Fix the FCOE underrun flow
> - Fix PSOD during FCoE BFS over the NIC ports after preboot driver
> - Maintains backward compatibility
> 
> This patch incorporates this new firmware 7.13.21.0 in bnx2x driver.
> 
> Signed-off-by: Manish Chopra <manishc@marvell.com>
> Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
> Signed-off-by: Alok Prasad <palok@marvell.com>
> Signed-off-by: Ariel Elior <aelior@marvell.com>
> ---
> 
> v1->v2:
> ------------
> * Modified the patch such that driver to be backward compatible
>    with older firmware too (New fw v7.13.21.0 on linux-firmware.git
>    enables driver to maintain backward compatibility)
> ---
>   drivers/net/ethernet/broadcom/bnx2x/bnx2x.h        | 11 +++-
>   drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  6 +-
>   .../net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h    |  2 +
>   drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h    |  3 +-
>   drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 75 +++++++++++++++-------
>   5 files changed, 69 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> index 2b06d78b..a19dd67 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> @@ -1850,6 +1850,14 @@ struct bnx2x {
>   
>   	/* Vxlan/Geneve related information */
>   	u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX];
> +
> +#define FW_CAP_INVALIDATE_VF_FP_HSI	BIT(0)
> +	u32 fw_cap;
> +
> +	u32 fw_major;
> +	u32 fw_minor;
> +	u32 fw_rev;
> +	u32 fw_eng;
>   };
>   
>   /* Tx queues may be less or equal to Rx queues */
> @@ -2525,5 +2533,6 @@ enum {
>    * Meant for implicit re-load flows.
>    */
>   int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
> -
> +int bnx2x_init_firmware(struct bnx2x *bp);
> +void bnx2x_release_firmware(struct bnx2x *bp);
>   #endif /* bnx2x.h */
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> index 54a2334..8d36ebb 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> @@ -2365,10 +2365,8 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err)
>   	if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
>   	    load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
>   		/* build my FW version dword */
> -		u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
> -			(BCM_5710_FW_MINOR_VERSION << 8) +
> -			(BCM_5710_FW_REVISION_VERSION << 16) +
> -			(BCM_5710_FW_ENGINEERING_VERSION << 24);
> +		u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
> +				(bp->fw_rev << 16) + (bp->fw_eng << 24);
>   
>   		/* read loaded FW from chip */
>   		u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
> index 3f84352..a84d015 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
> @@ -241,6 +241,8 @@
>   	IRO[221].m2))
>   #define XSTORM_VF_TO_PF_OFFSET(funcId) \
>   	(IRO[48].base + ((funcId) * IRO[48].m1))
> +#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid)	\
> +	(IRO[386].base + ((fid) * IRO[386].m1))
>   #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
>   
>   /* eth hsi version */
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
> index 622fadc..611efee 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
> @@ -3024,7 +3024,8 @@ struct afex_stats {
>   
>   #define BCM_5710_FW_MAJOR_VERSION			7
>   #define BCM_5710_FW_MINOR_VERSION			13
> -#define BCM_5710_FW_REVISION_VERSION		15
> +#define BCM_5710_FW_REVISION_VERSION		21
> +#define BCM_5710_FW_REVISION_VERSION_V15	15
>   #define BCM_5710_FW_ENGINEERING_VERSION		0
>   #define BCM_5710_FW_COMPILE_FLAGS			1
>   
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> index aec666e..125dafe 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> @@ -74,9 +74,19 @@
>   	__stringify(BCM_5710_FW_MINOR_VERSION) "."	\
>   	__stringify(BCM_5710_FW_REVISION_VERSION) "."	\
>   	__stringify(BCM_5710_FW_ENGINEERING_VERSION)
> +
> +#define FW_FILE_VERSION_V15				\
> +	__stringify(BCM_5710_FW_MAJOR_VERSION) "."      \
> +	__stringify(BCM_5710_FW_MINOR_VERSION) "."	\
> +	__stringify(BCM_5710_FW_REVISION_VERSION_V15) "."	\
> +	__stringify(BCM_5710_FW_ENGINEERING_VERSION)
> +
>   #define FW_FILE_NAME_E1		"bnx2x/bnx2x-e1-" FW_FILE_VERSION ".fw"
>   #define FW_FILE_NAME_E1H	"bnx2x/bnx2x-e1h-" FW_FILE_VERSION ".fw"
>   #define FW_FILE_NAME_E2		"bnx2x/bnx2x-e2-" FW_FILE_VERSION ".fw"
> +#define FW_FILE_NAME_E1_V15	"bnx2x/bnx2x-e1-" FW_FILE_VERSION_V15 ".fw"
> +#define FW_FILE_NAME_E1H_V15	"bnx2x/bnx2x-e1h-" FW_FILE_VERSION_V15 ".fw"
> +#define FW_FILE_NAME_E2_V15	"bnx2x/bnx2x-e2-" FW_FILE_VERSION_V15 ".fw"
>   
>   /* Time in jiffies before concluding the transmitter is hung */
>   #define TX_TIMEOUT		(5*HZ)
> @@ -747,9 +757,7 @@ static int bnx2x_mc_assert(struct bnx2x *bp)
>   		  CHIP_IS_E1(bp) ? "everest1" :
>   		  CHIP_IS_E1H(bp) ? "everest1h" :
>   		  CHIP_IS_E2(bp) ? "everest2" : "everest3",
> -		  BCM_5710_FW_MAJOR_VERSION,
> -		  BCM_5710_FW_MINOR_VERSION,
> -		  BCM_5710_FW_REVISION_VERSION);
> +		  bp->fw_major, bp->fw_minor, bp->fw_rev);
>   
>   	return rc;
>   }
> @@ -12308,6 +12316,15 @@ static int bnx2x_init_bp(struct bnx2x *bp)
>   
>   	bnx2x_read_fwinfo(bp);
>   
> +	if (IS_PF(bp)) {
> +		rc = bnx2x_init_firmware(bp);
> +
> +		if (rc) {
> +			bnx2x_free_mem_bp(bp);
> +			return rc;
> +		}
> +	}
> +
>   	func = BP_FUNC(bp);
>   
>   	/* need to reset chip if undi was active */
> @@ -12320,6 +12337,7 @@ static int bnx2x_init_bp(struct bnx2x *bp)
>   
>   		rc = bnx2x_prev_unload(bp);
>   		if (rc) {
> +			bnx2x_release_firmware(bp);
>   			bnx2x_free_mem_bp(bp);
>   			return rc;
>   		}
> @@ -13317,16 +13335,11 @@ static int bnx2x_check_firmware(struct bnx2x *bp)
>   	/* Check FW version */
>   	offset = be32_to_cpu(fw_hdr->fw_version.offset);
>   	fw_ver = firmware->data + offset;
> -	if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) ||
> -	    (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) ||
> -	    (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) ||
> -	    (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) {
> +	if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor ||
> +	    fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) {
>   		BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be %d.%d.%d.%d\n",
> -		       fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
> -		       BCM_5710_FW_MAJOR_VERSION,
> -		       BCM_5710_FW_MINOR_VERSION,
> -		       BCM_5710_FW_REVISION_VERSION,
> -		       BCM_5710_FW_ENGINEERING_VERSION);
> +			  fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
> +			  bp->fw_major, bp->fw_minor, bp->fw_rev, bp->fw_eng);
>   		return -EINVAL;
>   	}
>   
> @@ -13404,34 +13417,51 @@ static void be16_to_cpu_n(const u8 *_source, u8 *_target, u32 n)
>   	     (u8 *)bp->arr, len);					\
>   } while (0)
>   
> -static int bnx2x_init_firmware(struct bnx2x *bp)
> +int bnx2x_init_firmware(struct bnx2x *bp)
>   {
> -	const char *fw_file_name;
> +	const char *fw_file_name, *fw_file_name_v15;
>   	struct bnx2x_fw_file_hdr *fw_hdr;
>   	int rc;
>   
>   	if (bp->firmware)
>   		return 0;
>   
> -	if (CHIP_IS_E1(bp))
> +	if (CHIP_IS_E1(bp)) {
>   		fw_file_name = FW_FILE_NAME_E1;
> -	else if (CHIP_IS_E1H(bp))
> +		fw_file_name_v15 = FW_FILE_NAME_E1_V15;
> +	} else if (CHIP_IS_E1H(bp)) {
>   		fw_file_name = FW_FILE_NAME_E1H;
> -	else if (!CHIP_IS_E1x(bp))
> +		fw_file_name_v15 = FW_FILE_NAME_E1H_V15;
> +	} else if (!CHIP_IS_E1x(bp)) {
>   		fw_file_name = FW_FILE_NAME_E2;
> -	else {
> +		fw_file_name_v15 = FW_FILE_NAME_E2_V15;
> +	} else {
>   		BNX2X_ERR("Unsupported chip revision\n");
>   		return -EINVAL;
>   	}
> +
>   	BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
>   
>   	rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev);
>   	if (rc) {
> -		BNX2X_ERR("Can't load firmware file %s\n",
> -			  fw_file_name);
> -		goto request_firmware_exit;
> +		BNX2X_DEV_INFO("Trying to load older fw %s\n", fw_file_name_v15);
> +
> +		/* try to load prev version */
> +		rc = request_firmware(&bp->firmware, fw_file_name_v15, &bp->pdev->dev);
> +
> +		if (rc)
> +			goto request_firmware_exit;
> +
> +		bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15;
> +	} else {
> +		bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI;
> +		bp->fw_rev = BCM_5710_FW_REVISION_VERSION;
>   	}
>   
> +	bp->fw_major = BCM_5710_FW_MAJOR_VERSION;
> +	bp->fw_minor = BCM_5710_FW_MINOR_VERSION;
> +	bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION;
> +
>   	rc = bnx2x_check_firmware(bp);
>   	if (rc) {
>   		BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name);
> @@ -13487,7 +13517,7 @@ static int bnx2x_init_firmware(struct bnx2x *bp)
>   	return rc;
>   }
>   
> -static void bnx2x_release_firmware(struct bnx2x *bp)
> +void bnx2x_release_firmware(struct bnx2x *bp)
>   {
>   	kfree(bp->init_ops_offsets);
>   	kfree(bp->init_ops);
> @@ -14004,6 +14034,7 @@ static int bnx2x_init_one(struct pci_dev *pdev,
>   	return 0;
>   
>   init_one_freemem:
> +	bnx2x_release_firmware(bp);
>   	bnx2x_free_mem_bp(bp);
>   
>   init_one_exit:

This change was added to Linux in commit b7a49f73059f (bnx2x: Utilize 
firmware 7.13.21.0) [1] to Linux v5.17-rc1, and backported to the stable 
series, for example, Linux v5.10.95.

Due to CVE-2022-0847 (Dirty Pipe) [1] we updated systems from, for 
example, Linux 5.10.24 to Linux v5.10.103 and noticed that the Broadcom 
network devices failed to initialize.

```
[   20.477325] bnx2 0000:02:00.0 eth0: Broadcom NetXtreme II BCM5709 
1000Base-T (C0) PCI Express found at mem c6000000, IRQ 41, node addr 
c8:1f:66:cf:34:45
[   20.491782] bnx2 0000:02:00.1 eth1: Broadcom NetXtreme II BCM5709 
1000Base-T (C0) PCI Express found at mem c8000000, IRQ 42, node addr 
c8:1f:66:cf:34:47
[   20.506223] bnx2 0000:03:00.0 eth2: Broadcom NetXtreme II BCM5709 
1000Base-T (C0) PCI Express found at mem ca000000, IRQ 43, node addr 
c8:1f:66:cf:34:49
[   20.520644] bnx2 0000:03:00.1 eth3: Broadcom NetXtreme II BCM5709 
1000Base-T (C0) PCI Express found at mem cc000000, IRQ 44, node addr 
c8:1f:66:cf:34:4b
[   20.534985] bnx2x 0000:45:00.0: msix capability found
[   20.540342] bnx2x 0000:45:00.0: part number 
394D4342-31373735-31314131-473331
[   20.548605] bnx2x 0000:45:00.0: Direct firmware load for 
bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
[   20.558373] bnx2x 0000:45:00.0: Direct firmware load for 
bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
[   20.568319] bnx2x: probe of 0000:45:00.0 failed with error -2
[   20.574148] bnx2x 0000:45:00.1: msix capability found
[   20.579470] bnx2x 0000:45:00.1: part number 
394D4342-31373735-31314131-473331
[   20.587708] bnx2x 0000:45:00.1: Direct firmware load for 
bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
[   20.597479] bnx2x 0000:45:00.1: Direct firmware load for 
bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
[   20.607355] bnx2x: probe of 0000:45:00.1 failed with error -2
[   20.613179] bnx2x 0000:46:00.0: msix capability found
[   20.618501] bnx2x 0000:46:00.0: part number 
394D4342-31373735-31314131-473331
[   20.626805] bnx2x 0000:46:00.0: Direct firmware load for 
bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
[   20.636580] bnx2x 0000:46:00.0: Direct firmware load for 
bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
[   20.646453] bnx2x: probe of 0000:46:00.0 failed with error -2
[   20.652279] bnx2x 0000:46:00.1: msix capability found
[   20.657593] bnx2x 0000:46:00.1: part number 
394D4342-31373735-31314131-473331
[   20.665813] bnx2x 0000:46:00.1: Direct firmware load for 
bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
[   20.675590] bnx2x 0000:46:00.1: Direct firmware load for 
bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
[   20.685457] bnx2x: probe of 0000:46:00.1 failed with error -2
```

Due to reasons, we do not have firmware in the initrd, and the firmware 
files are only the root partition in `/lib/firmware`.

Logging in using other means¹, and removing the PCI devices, and 
rescanning brought the devices online.

     $ echo 1 | sudo tee /sys/bus/pci/devices/0000\:{45,46}\:00.{0,1}/remove
     $ echo 1 | sudo tee /sys/bus/pci/rescan

Adding the firmware files to the initrd also fixes the issue.

I didn’t bisect the change, but the refactoring of the init methods 
looks like the reason for the change of behavior. As it’s not documented 
in the commit message, was that intentional? I think it breaks Linux’ no 
regression rule, and the commit (and follow-ups) should be reverted in 
Linux v5.17 and backed out from the stable series.


Kind regards,

Paul


¹ Why can’t even without proper firmware, the card at least initialize 
to have a network connection even if it’s degraded?
Paul Menzel March 9, 2022, 4:18 p.m. UTC | #2
[Use correct regression address]

Am 09.03.22 um 17:15 schrieb Paul Menzel:
> Dear Manish,
> 
> 
> Am 17.12.21 um 17:55 schrieb Manish Chopra:
>> This new firmware addresses few important issues and enhancements
>> as mentioned below -
>>
>> - Support direct invalidation of FP HSI Ver per function ID, required for
>>    invalidating FP HSI Ver prior to each VF start, as there is no VF start
>> - BRB hardware block parity error detection support for the driver
>> - Fix the FCOE underrun flow
>> - Fix PSOD during FCoE BFS over the NIC ports after preboot driver
>> - Maintains backward compatibility
>>
>> This patch incorporates this new firmware 7.13.21.0 in bnx2x driver.
>>
>> Signed-off-by: Manish Chopra <manishc@marvell.com>
>> Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
>> Signed-off-by: Alok Prasad <palok@marvell.com>
>> Signed-off-by: Ariel Elior <aelior@marvell.com>
>> ---
>>
>> v1->v2:
>> ------------
>> * Modified the patch such that driver to be backward compatible
>>    with older firmware too (New fw v7.13.21.0 on linux-firmware.git
>>    enables driver to maintain backward compatibility)
>> ---
>>   drivers/net/ethernet/broadcom/bnx2x/bnx2x.h        | 11 +++-
>>   drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  6 +-
>>   .../net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h    |  2 +
>>   drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h    |  3 +-
>>   drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 75 +++++++++++++++-------
>>   5 files changed, 69 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
>> index 2b06d78b..a19dd67 100644
>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
>> @@ -1850,6 +1850,14 @@ struct bnx2x {
>>       /* Vxlan/Geneve related information */
>>       u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX];
>> +
>> +#define FW_CAP_INVALIDATE_VF_FP_HSI    BIT(0)
>> +    u32 fw_cap;
>> +
>> +    u32 fw_major;
>> +    u32 fw_minor;
>> +    u32 fw_rev;
>> +    u32 fw_eng;
>>   };
>>   /* Tx queues may be less or equal to Rx queues */
>> @@ -2525,5 +2533,6 @@ enum {
>>    * Meant for implicit re-load flows.
>>    */
>>   int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
>> -
>> +int bnx2x_init_firmware(struct bnx2x *bp);
>> +void bnx2x_release_firmware(struct bnx2x *bp);
>>   #endif /* bnx2x.h */
>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
>> index 54a2334..8d36ebb 100644
>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
>> @@ -2365,10 +2365,8 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 
>> load_code, bool print_err)
>>       if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
>>           load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
>>           /* build my FW version dword */
>> -        u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
>> -            (BCM_5710_FW_MINOR_VERSION << 8) +
>> -            (BCM_5710_FW_REVISION_VERSION << 16) +
>> -            (BCM_5710_FW_ENGINEERING_VERSION << 24);
>> +        u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
>> +                (bp->fw_rev << 16) + (bp->fw_eng << 24);
>>           /* read loaded FW from chip */
>>           u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
>> index 3f84352..a84d015 100644
>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
>> @@ -241,6 +241,8 @@
>>       IRO[221].m2))
>>   #define XSTORM_VF_TO_PF_OFFSET(funcId) \
>>       (IRO[48].base + ((funcId) * IRO[48].m1))
>> +#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid)    \
>> +    (IRO[386].base + ((fid) * IRO[386].m1))
>>   #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
>>   /* eth hsi version */
>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h 
>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
>> index 622fadc..611efee 100644
>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
>> @@ -3024,7 +3024,8 @@ struct afex_stats {
>>   #define BCM_5710_FW_MAJOR_VERSION            7
>>   #define BCM_5710_FW_MINOR_VERSION            13
>> -#define BCM_5710_FW_REVISION_VERSION        15
>> +#define BCM_5710_FW_REVISION_VERSION        21
>> +#define BCM_5710_FW_REVISION_VERSION_V15    15
>>   #define BCM_5710_FW_ENGINEERING_VERSION        0
>>   #define BCM_5710_FW_COMPILE_FLAGS            1
>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
>> index aec666e..125dafe 100644
>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
>> @@ -74,9 +74,19 @@
>>       __stringify(BCM_5710_FW_MINOR_VERSION) "."    \
>>       __stringify(BCM_5710_FW_REVISION_VERSION) "."    \
>>       __stringify(BCM_5710_FW_ENGINEERING_VERSION)
>> +
>> +#define FW_FILE_VERSION_V15                \
>> +    __stringify(BCM_5710_FW_MAJOR_VERSION) "."      \
>> +    __stringify(BCM_5710_FW_MINOR_VERSION) "."    \
>> +    __stringify(BCM_5710_FW_REVISION_VERSION_V15) "."    \
>> +    __stringify(BCM_5710_FW_ENGINEERING_VERSION)
>> +
>>   #define FW_FILE_NAME_E1        "bnx2x/bnx2x-e1-" FW_FILE_VERSION ".fw"
>>   #define FW_FILE_NAME_E1H    "bnx2x/bnx2x-e1h-" FW_FILE_VERSION ".fw"
>>   #define FW_FILE_NAME_E2        "bnx2x/bnx2x-e2-" FW_FILE_VERSION ".fw"
>> +#define FW_FILE_NAME_E1_V15    "bnx2x/bnx2x-e1-" FW_FILE_VERSION_V15 
>> ".fw"
>> +#define FW_FILE_NAME_E1H_V15    "bnx2x/bnx2x-e1h-" 
>> FW_FILE_VERSION_V15 ".fw"
>> +#define FW_FILE_NAME_E2_V15    "bnx2x/bnx2x-e2-" FW_FILE_VERSION_V15 
>> ".fw"
>>   /* Time in jiffies before concluding the transmitter is hung */
>>   #define TX_TIMEOUT        (5*HZ)
>> @@ -747,9 +757,7 @@ static int bnx2x_mc_assert(struct bnx2x *bp)
>>             CHIP_IS_E1(bp) ? "everest1" :
>>             CHIP_IS_E1H(bp) ? "everest1h" :
>>             CHIP_IS_E2(bp) ? "everest2" : "everest3",
>> -          BCM_5710_FW_MAJOR_VERSION,
>> -          BCM_5710_FW_MINOR_VERSION,
>> -          BCM_5710_FW_REVISION_VERSION);
>> +          bp->fw_major, bp->fw_minor, bp->fw_rev);
>>       return rc;
>>   }
>> @@ -12308,6 +12316,15 @@ static int bnx2x_init_bp(struct bnx2x *bp)
>>       bnx2x_read_fwinfo(bp);
>> +    if (IS_PF(bp)) {
>> +        rc = bnx2x_init_firmware(bp);
>> +
>> +        if (rc) {
>> +            bnx2x_free_mem_bp(bp);
>> +            return rc;
>> +        }
>> +    }
>> +
>>       func = BP_FUNC(bp);
>>       /* need to reset chip if undi was active */
>> @@ -12320,6 +12337,7 @@ static int bnx2x_init_bp(struct bnx2x *bp)
>>           rc = bnx2x_prev_unload(bp);
>>           if (rc) {
>> +            bnx2x_release_firmware(bp);
>>               bnx2x_free_mem_bp(bp);
>>               return rc;
>>           }
>> @@ -13317,16 +13335,11 @@ static int bnx2x_check_firmware(struct bnx2x 
>> *bp)
>>       /* Check FW version */
>>       offset = be32_to_cpu(fw_hdr->fw_version.offset);
>>       fw_ver = firmware->data + offset;
>> -    if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) ||
>> -        (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) ||
>> -        (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) ||
>> -        (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) {
>> +    if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor ||
>> +        fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) {
>>           BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be 
>> %d.%d.%d.%d\n",
>> -               fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
>> -               BCM_5710_FW_MAJOR_VERSION,
>> -               BCM_5710_FW_MINOR_VERSION,
>> -               BCM_5710_FW_REVISION_VERSION,
>> -               BCM_5710_FW_ENGINEERING_VERSION);
>> +              fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
>> +              bp->fw_major, bp->fw_minor, bp->fw_rev, bp->fw_eng);
>>           return -EINVAL;
>>       }
>> @@ -13404,34 +13417,51 @@ static void be16_to_cpu_n(const u8 *_source, 
>> u8 *_target, u32 n)
>>            (u8 *)bp->arr, len);                    \
>>   } while (0)
>> -static int bnx2x_init_firmware(struct bnx2x *bp)
>> +int bnx2x_init_firmware(struct bnx2x *bp)
>>   {
>> -    const char *fw_file_name;
>> +    const char *fw_file_name, *fw_file_name_v15;
>>       struct bnx2x_fw_file_hdr *fw_hdr;
>>       int rc;
>>       if (bp->firmware)
>>           return 0;
>> -    if (CHIP_IS_E1(bp))
>> +    if (CHIP_IS_E1(bp)) {
>>           fw_file_name = FW_FILE_NAME_E1;
>> -    else if (CHIP_IS_E1H(bp))
>> +        fw_file_name_v15 = FW_FILE_NAME_E1_V15;
>> +    } else if (CHIP_IS_E1H(bp)) {
>>           fw_file_name = FW_FILE_NAME_E1H;
>> -    else if (!CHIP_IS_E1x(bp))
>> +        fw_file_name_v15 = FW_FILE_NAME_E1H_V15;
>> +    } else if (!CHIP_IS_E1x(bp)) {
>>           fw_file_name = FW_FILE_NAME_E2;
>> -    else {
>> +        fw_file_name_v15 = FW_FILE_NAME_E2_V15;
>> +    } else {
>>           BNX2X_ERR("Unsupported chip revision\n");
>>           return -EINVAL;
>>       }
>> +
>>       BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
>>       rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev);
>>       if (rc) {
>> -        BNX2X_ERR("Can't load firmware file %s\n",
>> -              fw_file_name);
>> -        goto request_firmware_exit;
>> +        BNX2X_DEV_INFO("Trying to load older fw %s\n", fw_file_name_v15);
>> +
>> +        /* try to load prev version */
>> +        rc = request_firmware(&bp->firmware, fw_file_name_v15, &bp->pdev->dev);
>> +
>> +        if (rc)
>> +            goto request_firmware_exit;
>> +
>> +        bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15;
>> +    } else {
>> +        bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI;
>> +        bp->fw_rev = BCM_5710_FW_REVISION_VERSION;
>>       }
>> +    bp->fw_major = BCM_5710_FW_MAJOR_VERSION;
>> +    bp->fw_minor = BCM_5710_FW_MINOR_VERSION;
>> +    bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION;
>> +
>>       rc = bnx2x_check_firmware(bp);
>>       if (rc) {
>>           BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name);
>> @@ -13487,7 +13517,7 @@ static int bnx2x_init_firmware(struct bnx2x *bp)
>>       return rc;
>>   }
>> -static void bnx2x_release_firmware(struct bnx2x *bp)
>> +void bnx2x_release_firmware(struct bnx2x *bp)
>>   {
>>       kfree(bp->init_ops_offsets);
>>       kfree(bp->init_ops);
>> @@ -14004,6 +14034,7 @@ static int bnx2x_init_one(struct pci_dev *pdev,
>>       return 0;
>>   init_one_freemem:
>> +    bnx2x_release_firmware(bp);
>>       bnx2x_free_mem_bp(bp);
>>   init_one_exit:
> 
> This change was added to Linux in commit b7a49f73059f (bnx2x: Utilize 
> firmware 7.13.21.0) [1] to Linux v5.17-rc1, and backported to the stable 
> series, for example, Linux v5.10.95.
> 
> Due to CVE-2022-0847 (Dirty Pipe) [1] we updated systems from, for 
> example, Linux 5.10.24 to Linux v5.10.103 and noticed that the Broadcom 
> network devices failed to initialize.
> 
> ```
> [   20.477325] bnx2 0000:02:00.0 eth0: Broadcom NetXtreme II BCM5709 1000Base-T (C0) PCI Express found at mem c6000000, IRQ 41, node addr c8:1f:66:cf:34:45
> [   20.491782] bnx2 0000:02:00.1 eth1: Broadcom NetXtreme II BCM5709 1000Base-T (C0) PCI Express found at mem c8000000, IRQ 42, node addr c8:1f:66:cf:34:47
> [   20.506223] bnx2 0000:03:00.0 eth2: Broadcom NetXtreme II BCM5709 1000Base-T (C0) PCI Express found at mem ca000000, IRQ 43, node addr c8:1f:66:cf:34:49
> [   20.520644] bnx2 0000:03:00.1 eth3: Broadcom NetXtreme II BCM5709 1000Base-T (C0) PCI Express found at mem cc000000, IRQ 44, node addr c8:1f:66:cf:34:4b
> [   20.534985] bnx2x 0000:45:00.0: msix capability found
> [   20.540342] bnx2x 0000:45:00.0: part number 394D4342-31373735-31314131-473331
> [   20.548605] bnx2x 0000:45:00.0: Direct firmware load for bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
> [   20.558373] bnx2x 0000:45:00.0: Direct firmware load for bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
> [   20.568319] bnx2x: probe of 0000:45:00.0 failed with error -2
> [   20.574148] bnx2x 0000:45:00.1: msix capability found
> [   20.579470] bnx2x 0000:45:00.1: part number 394D4342-31373735-31314131-473331
> [   20.587708] bnx2x 0000:45:00.1: Direct firmware load for bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
> [   20.597479] bnx2x 0000:45:00.1: Direct firmware load for bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
> [   20.607355] bnx2x: probe of 0000:45:00.1 failed with error -2
> [   20.613179] bnx2x 0000:46:00.0: msix capability found
> [   20.618501] bnx2x 0000:46:00.0: part number 394D4342-31373735-31314131-473331
> [   20.626805] bnx2x 0000:46:00.0: Direct firmware load for bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
> [   20.636580] bnx2x 0000:46:00.0: Direct firmware load for bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
> [   20.646453] bnx2x: probe of 0000:46:00.0 failed with error -2
> [   20.652279] bnx2x 0000:46:00.1: msix capability found
> [   20.657593] bnx2x 0000:46:00.1: part number 394D4342-31373735-31314131-473331
> [   20.665813] bnx2x 0000:46:00.1: Direct firmware load for bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
> [   20.675590] bnx2x 0000:46:00.1: Direct firmware load for bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
> [   20.685457] bnx2x: probe of 0000:46:00.1 failed with error -2
> ```
> 
> Due to reasons, we do not have firmware in the initrd, and the firmware 
> files are only the root partition in `/lib/firmware`.
> 
> Logging in using other means¹, and removing the PCI devices, and 
> rescanning brought the devices online.
> 
>      $ echo 1 | sudo tee /sys/bus/pci/devices/0000\:{45,46}\:00.{0,1}/remove
>      $ echo 1 | sudo tee /sys/bus/pci/rescan
> 
> Adding the firmware files to the initrd also fixes the issue.
> 
> I didn’t bisect the change, but the refactoring of the init methods 
> looks like the reason for the change of behavior. As it’s not documented 
> in the commit message, was that intentional? I think it breaks Linux’ no 
> regression rule, and the commit (and follow-ups) should be reverted in 
> Linux v5.17 and backed out from the stable series.
> 
> 
> Kind regards,
> 
> Paul
> 
> 
> ¹ Why can’t even without proper firmware, the card at least initialize 
> to have a network connection even if it’s degraded?
Jakub Kicinski March 9, 2022, 4:35 p.m. UTC | #3
On Wed, 9 Mar 2022 17:18:30 +0100 Paul Menzel wrote:
> > This change was added to Linux in commit b7a49f73059f (bnx2x: Utilize 
> > firmware 7.13.21.0) [1] to Linux v5.17-rc1, and backported to the stable 
> > series, for example, Linux v5.10.95.

SMH, I must have missed this getting backported in the rush of merge
window backports :(

Yes, please revert.
Manish Chopra March 9, 2022, 4:46 p.m. UTC | #4
> -----Original Message-----
> From: Paul Menzel <pmenzel@molgen.mpg.de>
> Sent: Wednesday, March 9, 2022 9:45 PM
> To: Manish Chopra <manishc@marvell.com>
> Cc: kuba@kernel.org; netdev@vger.kernel.org; Ariel Elior
> <aelior@marvell.com>; Alok Prasad <palok@marvell.com>; Prabhakar
> Kushwaha <pkushwaha@marvell.com>; David S. Miller
> <davem@davemloft.net>; Greg KH <gregkh@linuxfoundation.org>;
> stable@vger.kernel.org; it+netdev@molgen.mpg.de;
> regressions@leemhuis.info; Linus Torvalds <torvalds@linux-foundation.org>
> Subject: [EXT] Re: [PATCH v2 net-next 1/2] bnx2x: Utilize firmware 7.13.21.0
> 
> External Email
> 
> ----------------------------------------------------------------------
> Dear Manish,
> 
> 
> Am 17.12.21 um 17:55 schrieb Manish Chopra:
> > This new firmware addresses few important issues and enhancements as
> > mentioned below -
> >
> > - Support direct invalidation of FP HSI Ver per function ID, required for
> >    invalidating FP HSI Ver prior to each VF start, as there is no VF
> > start
> > - BRB hardware block parity error detection support for the driver
> > - Fix the FCOE underrun flow
> > - Fix PSOD during FCoE BFS over the NIC ports after preboot driver
> > - Maintains backward compatibility
> >
> > This patch incorporates this new firmware 7.13.21.0 in bnx2x driver.
> >
> > Signed-off-by: Manish Chopra <manishc@marvell.com>
> > Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
> > Signed-off-by: Alok Prasad <palok@marvell.com>
> > Signed-off-by: Ariel Elior <aelior@marvell.com>
> > ---
> >
> > v1->v2:
> > ------------
> > * Modified the patch such that driver to be backward compatible
> >    with older firmware too (New fw v7.13.21.0 on linux-firmware.git
> >    enables driver to maintain backward compatibility)
> > ---
> >   drivers/net/ethernet/broadcom/bnx2x/bnx2x.h        | 11 +++-
> >   drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  6 +-
> >   .../net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h    |  2 +
> >   drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h    |  3 +-
> >   drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 75
> +++++++++++++++-------
> >   5 files changed, 69 insertions(+), 28 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> > b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> > index 2b06d78b..a19dd67 100644
> > --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> > +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> > @@ -1850,6 +1850,14 @@ struct bnx2x {
> >
> >   	/* Vxlan/Geneve related information */
> >   	u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX];
> > +
> > +#define FW_CAP_INVALIDATE_VF_FP_HSI	BIT(0)
> > +	u32 fw_cap;
> > +
> > +	u32 fw_major;
> > +	u32 fw_minor;
> > +	u32 fw_rev;
> > +	u32 fw_eng;
> >   };
> >
> >   /* Tx queues may be less or equal to Rx queues */ @@ -2525,5 +2533,6
> > @@ enum {
> >    * Meant for implicit re-load flows.
> >    */
> >   int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
> > -
> > +int bnx2x_init_firmware(struct bnx2x *bp); void
> > +bnx2x_release_firmware(struct bnx2x *bp);
> >   #endif /* bnx2x.h */
> > diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> > b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> > index 54a2334..8d36ebb 100644
> > --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> > +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> > @@ -2365,10 +2365,8 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp,
> u32 load_code, bool print_err)
> >   	if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
> >   	    load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
> >   		/* build my FW version dword */
> > -		u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
> > -			(BCM_5710_FW_MINOR_VERSION << 8) +
> > -			(BCM_5710_FW_REVISION_VERSION << 16) +
> > -			(BCM_5710_FW_ENGINEERING_VERSION << 24);
> > +		u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
> > +				(bp->fw_rev << 16) + (bp->fw_eng << 24);
> >
> >   		/* read loaded FW from chip */
> >   		u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM); diff --git
> > a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
> > b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
> > index 3f84352..a84d015 100644
> > --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
> > +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
> > @@ -241,6 +241,8 @@
> >   	IRO[221].m2))
> >   #define XSTORM_VF_TO_PF_OFFSET(funcId) \
> >   	(IRO[48].base + ((funcId) * IRO[48].m1))
> > +#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid)
> 	\
> > +	(IRO[386].base + ((fid) * IRO[386].m1))
> >   #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
> >
> >   /* eth hsi version */
> > diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
> > b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
> > index 622fadc..611efee 100644
> > --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
> > +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
> > @@ -3024,7 +3024,8 @@ struct afex_stats {
> >
> >   #define BCM_5710_FW_MAJOR_VERSION			7
> >   #define BCM_5710_FW_MINOR_VERSION			13
> > -#define BCM_5710_FW_REVISION_VERSION		15
> > +#define BCM_5710_FW_REVISION_VERSION		21
> > +#define BCM_5710_FW_REVISION_VERSION_V15	15
> >   #define BCM_5710_FW_ENGINEERING_VERSION		0
> >   #define BCM_5710_FW_COMPILE_FLAGS			1
> >
> > diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> > b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> > index aec666e..125dafe 100644
> > --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> > +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> > @@ -74,9 +74,19 @@
> >   	__stringify(BCM_5710_FW_MINOR_VERSION) "."	\
> >   	__stringify(BCM_5710_FW_REVISION_VERSION) "."	\
> >   	__stringify(BCM_5710_FW_ENGINEERING_VERSION)
> > +
> > +#define FW_FILE_VERSION_V15				\
> > +	__stringify(BCM_5710_FW_MAJOR_VERSION) "."      \
> > +	__stringify(BCM_5710_FW_MINOR_VERSION) "."	\
> > +	__stringify(BCM_5710_FW_REVISION_VERSION_V15) "."	\
> > +	__stringify(BCM_5710_FW_ENGINEERING_VERSION)
> > +
> >   #define FW_FILE_NAME_E1		"bnx2x/bnx2x-e1-" FW_FILE_VERSION
> ".fw"
> >   #define FW_FILE_NAME_E1H	"bnx2x/bnx2x-e1h-"
> FW_FILE_VERSION ".fw"
> >   #define FW_FILE_NAME_E2		"bnx2x/bnx2x-e2-" FW_FILE_VERSION
> ".fw"
> > +#define FW_FILE_NAME_E1_V15	"bnx2x/bnx2x-e1-"
> FW_FILE_VERSION_V15 ".fw"
> > +#define FW_FILE_NAME_E1H_V15	"bnx2x/bnx2x-e1h-"
> FW_FILE_VERSION_V15 ".fw"
> > +#define FW_FILE_NAME_E2_V15	"bnx2x/bnx2x-e2-"
> FW_FILE_VERSION_V15 ".fw"
> >
> >   /* Time in jiffies before concluding the transmitter is hung */
> >   #define TX_TIMEOUT		(5*HZ)
> > @@ -747,9 +757,7 @@ static int bnx2x_mc_assert(struct bnx2x *bp)
> >   		  CHIP_IS_E1(bp) ? "everest1" :
> >   		  CHIP_IS_E1H(bp) ? "everest1h" :
> >   		  CHIP_IS_E2(bp) ? "everest2" : "everest3",
> > -		  BCM_5710_FW_MAJOR_VERSION,
> > -		  BCM_5710_FW_MINOR_VERSION,
> > -		  BCM_5710_FW_REVISION_VERSION);
> > +		  bp->fw_major, bp->fw_minor, bp->fw_rev);
> >
> >   	return rc;
> >   }
> > @@ -12308,6 +12316,15 @@ static int bnx2x_init_bp(struct bnx2x *bp)
> >
> >   	bnx2x_read_fwinfo(bp);
> >
> > +	if (IS_PF(bp)) {
> > +		rc = bnx2x_init_firmware(bp);
> > +
> > +		if (rc) {
> > +			bnx2x_free_mem_bp(bp);
> > +			return rc;
> > +		}
> > +	}
> > +
> >   	func = BP_FUNC(bp);
> >
> >   	/* need to reset chip if undi was active */ @@ -12320,6 +12337,7 @@
> > static int bnx2x_init_bp(struct bnx2x *bp)
> >
> >   		rc = bnx2x_prev_unload(bp);
> >   		if (rc) {
> > +			bnx2x_release_firmware(bp);
> >   			bnx2x_free_mem_bp(bp);
> >   			return rc;
> >   		}
> > @@ -13317,16 +13335,11 @@ static int bnx2x_check_firmware(struct
> bnx2x *bp)
> >   	/* Check FW version */
> >   	offset = be32_to_cpu(fw_hdr->fw_version.offset);
> >   	fw_ver = firmware->data + offset;
> > -	if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) ||
> > -	    (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) ||
> > -	    (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) ||
> > -	    (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) {
> > +	if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor ||
> > +	    fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) {
> >   		BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be
> %d.%d.%d.%d\n",
> > -		       fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
> > -		       BCM_5710_FW_MAJOR_VERSION,
> > -		       BCM_5710_FW_MINOR_VERSION,
> > -		       BCM_5710_FW_REVISION_VERSION,
> > -		       BCM_5710_FW_ENGINEERING_VERSION);
> > +			  fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
> > +			  bp->fw_major, bp->fw_minor, bp->fw_rev, bp-
> >fw_eng);
> >   		return -EINVAL;
> >   	}
> >
> > @@ -13404,34 +13417,51 @@ static void be16_to_cpu_n(const u8
> *_source, u8 *_target, u32 n)
> >   	     (u8 *)bp->arr, len);					\
> >   } while (0)
> >
> > -static int bnx2x_init_firmware(struct bnx2x *bp)
> > +int bnx2x_init_firmware(struct bnx2x *bp)
> >   {
> > -	const char *fw_file_name;
> > +	const char *fw_file_name, *fw_file_name_v15;
> >   	struct bnx2x_fw_file_hdr *fw_hdr;
> >   	int rc;
> >
> >   	if (bp->firmware)
> >   		return 0;
> >
> > -	if (CHIP_IS_E1(bp))
> > +	if (CHIP_IS_E1(bp)) {
> >   		fw_file_name = FW_FILE_NAME_E1;
> > -	else if (CHIP_IS_E1H(bp))
> > +		fw_file_name_v15 = FW_FILE_NAME_E1_V15;
> > +	} else if (CHIP_IS_E1H(bp)) {
> >   		fw_file_name = FW_FILE_NAME_E1H;
> > -	else if (!CHIP_IS_E1x(bp))
> > +		fw_file_name_v15 = FW_FILE_NAME_E1H_V15;
> > +	} else if (!CHIP_IS_E1x(bp)) {
> >   		fw_file_name = FW_FILE_NAME_E2;
> > -	else {
> > +		fw_file_name_v15 = FW_FILE_NAME_E2_V15;
> > +	} else {
> >   		BNX2X_ERR("Unsupported chip revision\n");
> >   		return -EINVAL;
> >   	}
> > +
> >   	BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
> >
> >   	rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev-
> >dev);
> >   	if (rc) {
> > -		BNX2X_ERR("Can't load firmware file %s\n",
> > -			  fw_file_name);
> > -		goto request_firmware_exit;
> > +		BNX2X_DEV_INFO("Trying to load older fw %s\n",
> fw_file_name_v15);
> > +
> > +		/* try to load prev version */
> > +		rc = request_firmware(&bp->firmware, fw_file_name_v15,
> > +&bp->pdev->dev);
> > +
> > +		if (rc)
> > +			goto request_firmware_exit;
> > +
> > +		bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15;
> > +	} else {
> > +		bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI;
> > +		bp->fw_rev = BCM_5710_FW_REVISION_VERSION;
> >   	}
> >
> > +	bp->fw_major = BCM_5710_FW_MAJOR_VERSION;
> > +	bp->fw_minor = BCM_5710_FW_MINOR_VERSION;
> > +	bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION;
> > +
> >   	rc = bnx2x_check_firmware(bp);
> >   	if (rc) {
> >   		BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name); @@
> -13487,7
> > +13517,7 @@ static int bnx2x_init_firmware(struct bnx2x *bp)
> >   	return rc;
> >   }
> >
> > -static void bnx2x_release_firmware(struct bnx2x *bp)
> > +void bnx2x_release_firmware(struct bnx2x *bp)
> >   {
> >   	kfree(bp->init_ops_offsets);
> >   	kfree(bp->init_ops);
> > @@ -14004,6 +14034,7 @@ static int bnx2x_init_one(struct pci_dev *pdev,
> >   	return 0;
> >
> >   init_one_freemem:
> > +	bnx2x_release_firmware(bp);
> >   	bnx2x_free_mem_bp(bp);
> >
> >   init_one_exit:
> 
> This change was added to Linux in commit b7a49f73059f (bnx2x: Utilize
> firmware 7.13.21.0) [1] to Linux v5.17-rc1, and backported to the stable
> series, for example, Linux v5.10.95.
> 
> Due to CVE-2022-0847 (Dirty Pipe) [1] we updated systems from, for example,
> Linux 5.10.24 to Linux v5.10.103 and noticed that the Broadcom network
> devices failed to initialize.
> 
> ```
> [   20.477325] bnx2 0000:02:00.0 eth0: Broadcom NetXtreme II BCM5709
> 1000Base-T (C0) PCI Express found at mem c6000000, IRQ 41, node addr
> c8:1f:66:cf:34:45
> [   20.491782] bnx2 0000:02:00.1 eth1: Broadcom NetXtreme II BCM5709
> 1000Base-T (C0) PCI Express found at mem c8000000, IRQ 42, node addr
> c8:1f:66:cf:34:47
> [   20.506223] bnx2 0000:03:00.0 eth2: Broadcom NetXtreme II BCM5709
> 1000Base-T (C0) PCI Express found at mem ca000000, IRQ 43, node addr
> c8:1f:66:cf:34:49
> [   20.520644] bnx2 0000:03:00.1 eth3: Broadcom NetXtreme II BCM5709
> 1000Base-T (C0) PCI Express found at mem cc000000, IRQ 44, node addr
> c8:1f:66:cf:34:4b
> [   20.534985] bnx2x 0000:45:00.0: msix capability found
> [   20.540342] bnx2x 0000:45:00.0: part number
> 394D4342-31373735-31314131-473331
> [   20.548605] bnx2x 0000:45:00.0: Direct firmware load for
> bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
> [   20.558373] bnx2x 0000:45:00.0: Direct firmware load for
> bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
> [   20.568319] bnx2x: probe of 0000:45:00.0 failed with error -2
> [   20.574148] bnx2x 0000:45:00.1: msix capability found
> [   20.579470] bnx2x 0000:45:00.1: part number
> 394D4342-31373735-31314131-473331
> [   20.587708] bnx2x 0000:45:00.1: Direct firmware load for
> bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
> [   20.597479] bnx2x 0000:45:00.1: Direct firmware load for
> bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
> [   20.607355] bnx2x: probe of 0000:45:00.1 failed with error -2
> [   20.613179] bnx2x 0000:46:00.0: msix capability found
> [   20.618501] bnx2x 0000:46:00.0: part number
> 394D4342-31373735-31314131-473331
> [   20.626805] bnx2x 0000:46:00.0: Direct firmware load for
> bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
> [   20.636580] bnx2x 0000:46:00.0: Direct firmware load for
> bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
> [   20.646453] bnx2x: probe of 0000:46:00.0 failed with error -2
> [   20.652279] bnx2x 0000:46:00.1: msix capability found
> [   20.657593] bnx2x 0000:46:00.1: part number
> 394D4342-31373735-31314131-473331
> [   20.665813] bnx2x 0000:46:00.1: Direct firmware load for
> bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
> [   20.675590] bnx2x 0000:46:00.1: Direct firmware load for
> bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
> [   20.685457] bnx2x: probe of 0000:46:00.1 failed with error -2
> ```
> 
> Due to reasons, we do not have firmware in the initrd, and the firmware files
> are only the root partition in `/lib/firmware`.
> 
> Logging in using other means¹, and removing the PCI devices, and rescanning
> brought the devices online.
> 
>      $ echo 1 | sudo tee /sys/bus/pci/devices/0000\:{45,46}\:00.{0,1}/remove
>      $ echo 1 | sudo tee /sys/bus/pci/rescan
> 
> Adding the firmware files to the initrd also fixes the issue.
> 
> I didn’t bisect the change, but the refactoring of the init methods looks like the
> reason for the change of behavior. As it’s not documented in the commit
> message, was that intentional? I think it breaks Linux’ no regression rule, and
> the commit (and follow-ups) should be reverted in Linux v5.17 and backed out
> from the stable series.
> 
> 
Hi Paul,

The whole motivation of sending this v2 was to make FW backward compatible with older FW so that it doesn't break the system if someone updates the kernel,
however the initrd case was slipped which was fixed recently with below commit and should already be added to affected stable branches. You may want to apply this commit to resolve the problem.

commit e13ad1443684f7afaff24cf207e85e97885256bd
Author: Manish Chopra <manishc@marvell.com>
Date:   Wed Feb 23 00:57:20 2022 -0800

    bnx2x: fix driver load from initrd

    Commit b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0") added
    new firmware support in the driver with maintaining older firmware
    compatibility. However, older firmware was not added in MODULE_FIRMWARE()
    which caused missing firmware files in initrd image leading to driver load
    failure from initrd. This patch adds MODULE_FIRMWARE() for older firmware
    version to have firmware files included in initrd.

    Fixes: b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0")
    Link: https://bugzilla.kernel.org/show_bug.cgi?id=215627
    Signed-off-by: Manish Chopra <manishc@marvell.com>
    Signed-off-by: Alok Prasad <palok@marvell.com>
    Signed-off-by: Ariel Elior <aelior@marvell.com>
    Link: https://lore.kernel.org/r/20220223085720.12021-1-manishc@marvell.com
    Signed-off-by: Jakub Kicinski <kuba@kernel.org>

Thanks,
Manish 


> Kind regards,
> 
> Paul
> 
> 
> ¹ Why can’t even without proper firmware, the card at least initialize to have a
> network connection even if it’s degraded?
Paul Menzel March 9, 2022, 6 p.m. UTC | #5
Dear Manish,


Am 09.03.22 um 17:46 schrieb Manish Chopra:
>> -----Original Message-----
>> From: Paul Menzel <pmenzel@molgen.mpg.de>
>> Sent: Wednesday, March 9, 2022 9:45 PM

[…]

>> Dear Manish,
>>
>>
>> Am 17.12.21 um 17:55 schrieb Manish Chopra:
>>> This new firmware addresses few important issues and enhancements as
>>> mentioned below -
>>>
>>> - Support direct invalidation of FP HSI Ver per function ID, required for
>>>   invalidating FP HSI Ver prior to each VF start, as there is no VF start
>>> - BRB hardware block parity error detection support for the driver
>>> - Fix the FCOE underrun flow
>>> - Fix PSOD during FCoE BFS over the NIC ports after preboot driver
>>> - Maintains backward compatibility
>>>
>>> This patch incorporates this new firmware 7.13.21.0 in bnx2x driver.
>>>
>>> Signed-off-by: Manish Chopra <manishc@marvell.com>
>>> Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
>>> Signed-off-by: Alok Prasad <palok@marvell.com>
>>> Signed-off-by: Ariel Elior <aelior@marvell.com>
>>> ---
>>>
>>> v1->v2:
>>> ------------
>>> * Modified the patch such that driver to be backward compatible
>>>     with older firmware too (New fw v7.13.21.0 on linux-firmware.git
>>>     enables driver to maintain backward compatibility)
>>> ---
>>>    drivers/net/ethernet/broadcom/bnx2x/bnx2x.h        | 11 +++-
>>>    drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  6 +-
>>>    .../net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h    |  2 +
>>>    drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h    |  3 +-
>>>    drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 75 +++++++++++++++-------
>>>    5 files changed, 69 insertions(+), 28 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
>>> index 2b06d78b..a19dd67 100644
>>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
>>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
>>> @@ -1850,6 +1850,14 @@ struct bnx2x {
>>>
>>>    	/* Vxlan/Geneve related information */
>>>    	u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX];
>>> +
>>> +#define FW_CAP_INVALIDATE_VF_FP_HSI	BIT(0)
>>> +	u32 fw_cap;
>>> +
>>> +	u32 fw_major;
>>> +	u32 fw_minor;
>>> +	u32 fw_rev;
>>> +	u32 fw_eng;
>>>    };
>>>
>>>    /* Tx queues may be less or equal to Rx queues */ @@ -2525,5 +2533,6
>>> @@ enum {
>>>     * Meant for implicit re-load flows.
>>>     */
>>>    int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
>>> -
>>> +int bnx2x_init_firmware(struct bnx2x *bp); void
>>> +bnx2x_release_firmware(struct bnx2x *bp);
>>>    #endif /* bnx2x.h */
>>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
>>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
>>> index 54a2334..8d36ebb 100644
>>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
>>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
>>> @@ -2365,10 +2365,8 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp,
>> u32 load_code, bool print_err)
>>>    	if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
>>>    	    load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
>>>    		/* build my FW version dword */
>>> -		u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
>>> -			(BCM_5710_FW_MINOR_VERSION << 8) +
>>> -			(BCM_5710_FW_REVISION_VERSION << 16) +
>>> -			(BCM_5710_FW_ENGINEERING_VERSION << 24);
>>> +		u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
>>> +				(bp->fw_rev << 16) + (bp->fw_eng << 24);
>>>
>>>    		/* read loaded FW from chip */
>>>    		u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM); diff --git
>>> a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
>>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
>>> index 3f84352..a84d015 100644
>>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
>>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
>>> @@ -241,6 +241,8 @@
>>>    	IRO[221].m2))
>>>    #define XSTORM_VF_TO_PF_OFFSET(funcId) \
>>>    	(IRO[48].base + ((funcId) * IRO[48].m1))
>>> +#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid)
>> 	\
>>> +	(IRO[386].base + ((fid) * IRO[386].m1))
>>>    #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
>>>
>>>    /* eth hsi version */
>>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
>>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
>>> index 622fadc..611efee 100644
>>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
>>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
>>> @@ -3024,7 +3024,8 @@ struct afex_stats {
>>>
>>>    #define BCM_5710_FW_MAJOR_VERSION			7
>>>    #define BCM_5710_FW_MINOR_VERSION			13
>>> -#define BCM_5710_FW_REVISION_VERSION		15
>>> +#define BCM_5710_FW_REVISION_VERSION		21
>>> +#define BCM_5710_FW_REVISION_VERSION_V15	15
>>>    #define BCM_5710_FW_ENGINEERING_VERSION		0
>>>    #define BCM_5710_FW_COMPILE_FLAGS			1
>>>
>>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
>>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
>>> index aec666e..125dafe 100644
>>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
>>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
>>> @@ -74,9 +74,19 @@
>>>    	__stringify(BCM_5710_FW_MINOR_VERSION) "."	\
>>>    	__stringify(BCM_5710_FW_REVISION_VERSION) "."	\
>>>    	__stringify(BCM_5710_FW_ENGINEERING_VERSION)
>>> +
>>> +#define FW_FILE_VERSION_V15				\
>>> +	__stringify(BCM_5710_FW_MAJOR_VERSION) "."      \
>>> +	__stringify(BCM_5710_FW_MINOR_VERSION) "."	\
>>> +	__stringify(BCM_5710_FW_REVISION_VERSION_V15) "."	\
>>> +	__stringify(BCM_5710_FW_ENGINEERING_VERSION)
>>> +
>>>    #define FW_FILE_NAME_E1		"bnx2x/bnx2x-e1-" FW_FILE_VERSION
>> ".fw"
>>>    #define FW_FILE_NAME_E1H	"bnx2x/bnx2x-e1h-"
>> FW_FILE_VERSION ".fw"
>>>    #define FW_FILE_NAME_E2		"bnx2x/bnx2x-e2-" FW_FILE_VERSION
>> ".fw"
>>> +#define FW_FILE_NAME_E1_V15	"bnx2x/bnx2x-e1-"
>> FW_FILE_VERSION_V15 ".fw"
>>> +#define FW_FILE_NAME_E1H_V15	"bnx2x/bnx2x-e1h-"
>> FW_FILE_VERSION_V15 ".fw"
>>> +#define FW_FILE_NAME_E2_V15	"bnx2x/bnx2x-e2-"
>> FW_FILE_VERSION_V15 ".fw"
>>>
>>>    /* Time in jiffies before concluding the transmitter is hung */
>>>    #define TX_TIMEOUT		(5*HZ)
>>> @@ -747,9 +757,7 @@ static int bnx2x_mc_assert(struct bnx2x *bp)
>>>    		  CHIP_IS_E1(bp) ? "everest1" :
>>>    		  CHIP_IS_E1H(bp) ? "everest1h" :
>>>    		  CHIP_IS_E2(bp) ? "everest2" : "everest3",
>>> -		  BCM_5710_FW_MAJOR_VERSION,
>>> -		  BCM_5710_FW_MINOR_VERSION,
>>> -		  BCM_5710_FW_REVISION_VERSION);
>>> +		  bp->fw_major, bp->fw_minor, bp->fw_rev);
>>>
>>>    	return rc;
>>>    }
>>> @@ -12308,6 +12316,15 @@ static int bnx2x_init_bp(struct bnx2x *bp)
>>>
>>>    	bnx2x_read_fwinfo(bp);
>>>
>>> +	if (IS_PF(bp)) {
>>> +		rc = bnx2x_init_firmware(bp);
>>> +
>>> +		if (rc) {
>>> +			bnx2x_free_mem_bp(bp);
>>> +			return rc;
>>> +		}
>>> +	}
>>> +
>>>    	func = BP_FUNC(bp);
>>>
>>>    	/* need to reset chip if undi was active */ @@ -12320,6 +12337,7 @@
>>> static int bnx2x_init_bp(struct bnx2x *bp)
>>>
>>>    		rc = bnx2x_prev_unload(bp);
>>>    		if (rc) {
>>> +			bnx2x_release_firmware(bp);
>>>    			bnx2x_free_mem_bp(bp);
>>>    			return rc;
>>>    		}
>>> @@ -13317,16 +13335,11 @@ static int bnx2x_check_firmware(struct
>> bnx2x *bp)
>>>    	/* Check FW version */
>>>    	offset = be32_to_cpu(fw_hdr->fw_version.offset);
>>>    	fw_ver = firmware->data + offset;
>>> -	if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) ||
>>> -	    (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) ||
>>> -	    (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) ||
>>> -	    (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) {
>>> +	if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor ||
>>> +	    fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) {
>>>    		BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be
>> %d.%d.%d.%d\n",
>>> -		       fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
>>> -		       BCM_5710_FW_MAJOR_VERSION,
>>> -		       BCM_5710_FW_MINOR_VERSION,
>>> -		       BCM_5710_FW_REVISION_VERSION,
>>> -		       BCM_5710_FW_ENGINEERING_VERSION);
>>> +			  fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
>>> +			  bp->fw_major, bp->fw_minor, bp->fw_rev, bp-
>>> fw_eng);
>>>    		return -EINVAL;
>>>    	}
>>>
>>> @@ -13404,34 +13417,51 @@ static void be16_to_cpu_n(const u8
>> *_source, u8 *_target, u32 n)
>>>    	     (u8 *)bp->arr, len);					\
>>>    } while (0)
>>>
>>> -static int bnx2x_init_firmware(struct bnx2x *bp)
>>> +int bnx2x_init_firmware(struct bnx2x *bp)
>>>    {
>>> -	const char *fw_file_name;
>>> +	const char *fw_file_name, *fw_file_name_v15;
>>>    	struct bnx2x_fw_file_hdr *fw_hdr;
>>>    	int rc;
>>>
>>>    	if (bp->firmware)
>>>    		return 0;
>>>
>>> -	if (CHIP_IS_E1(bp))
>>> +	if (CHIP_IS_E1(bp)) {
>>>    		fw_file_name = FW_FILE_NAME_E1;
>>> -	else if (CHIP_IS_E1H(bp))
>>> +		fw_file_name_v15 = FW_FILE_NAME_E1_V15;
>>> +	} else if (CHIP_IS_E1H(bp)) {
>>>    		fw_file_name = FW_FILE_NAME_E1H;
>>> -	else if (!CHIP_IS_E1x(bp))
>>> +		fw_file_name_v15 = FW_FILE_NAME_E1H_V15;
>>> +	} else if (!CHIP_IS_E1x(bp)) {
>>>    		fw_file_name = FW_FILE_NAME_E2;
>>> -	else {
>>> +		fw_file_name_v15 = FW_FILE_NAME_E2_V15;
>>> +	} else {
>>>    		BNX2X_ERR("Unsupported chip revision\n");
>>>    		return -EINVAL;
>>>    	}
>>> +
>>>    	BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
>>>
>>>    	rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev);
>>>    	if (rc) {
>>> -		BNX2X_ERR("Can't load firmware file %s\n",
>>> -			  fw_file_name);
>>> -		goto request_firmware_exit;
>>> +		BNX2X_DEV_INFO("Trying to load older fw %s\n", fw_file_name_v15);
>>> +
>>> +		/* try to load prev version */
>>> +		rc = request_firmware(&bp->firmware, fw_file_name_v15, +&bp->pdev->dev);
>>> +
>>> +		if (rc)
>>> +			goto request_firmware_exit;
>>> +
>>> +		bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15;
>>> +	} else {
>>> +		bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI;
>>> +		bp->fw_rev = BCM_5710_FW_REVISION_VERSION;
>>>    	}
>>>
>>> +	bp->fw_major = BCM_5710_FW_MAJOR_VERSION;
>>> +	bp->fw_minor = BCM_5710_FW_MINOR_VERSION;
>>> +	bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION;
>>> +
>>>    	rc = bnx2x_check_firmware(bp);
>>>    	if (rc) {
>>>    		BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name); @@
>> -13487,7
>>> +13517,7 @@ static int bnx2x_init_firmware(struct bnx2x *bp)
>>>    	return rc;
>>>    }
>>>
>>> -static void bnx2x_release_firmware(struct bnx2x *bp)
>>> +void bnx2x_release_firmware(struct bnx2x *bp)
>>>    {
>>>    	kfree(bp->init_ops_offsets);
>>>    	kfree(bp->init_ops);
>>> @@ -14004,6 +14034,7 @@ static int bnx2x_init_one(struct pci_dev *pdev,
>>>    	return 0;
>>>
>>>    init_one_freemem:
>>> +	bnx2x_release_firmware(bp);
>>>    	bnx2x_free_mem_bp(bp);
>>>
>>>    init_one_exit:
>>
>> This change was added to Linux in commit b7a49f73059f (bnx2x: Utilize
>> firmware 7.13.21.0) [1] to Linux v5.17-rc1, and backported to the stable
>> series, for example, Linux v5.10.95.
>>
>> Due to CVE-2022-0847 (Dirty Pipe) [1] we updated systems from, for example,
>> Linux 5.10.24 to Linux v5.10.103 and noticed that the Broadcom network
>> devices failed to initialize.
>>
>> ```
>> [   20.477325] bnx2 0000:02:00.0 eth0: Broadcom NetXtreme II BCM5709 1000Base-T (C0) PCI Express found at mem c6000000, IRQ 41, node addr c8:1f:66:cf:34:45
>> [   20.491782] bnx2 0000:02:00.1 eth1: Broadcom NetXtreme II BCM5709 1000Base-T (C0) PCI Express found at mem c8000000, IRQ 42, node addr c8:1f:66:cf:34:47
>> [   20.506223] bnx2 0000:03:00.0 eth2: Broadcom NetXtreme II BCM5709 1000Base-T (C0) PCI Express found at mem ca000000, IRQ 43, node addr c8:1f:66:cf:34:49
>> [   20.520644] bnx2 0000:03:00.1 eth3: Broadcom NetXtreme II BCM5709 1000Base-T (C0) PCI Express found at mem cc000000, IRQ 44, node addr c8:1f:66:cf:34:4b
>> [   20.534985] bnx2x 0000:45:00.0: msix capability found
>> [   20.540342] bnx2x 0000:45:00.0: part number 394D4342-31373735-31314131-473331
>> [   20.548605] bnx2x 0000:45:00.0: Direct firmware load for bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
>> [   20.558373] bnx2x 0000:45:00.0: Direct firmware load for bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
>> [   20.568319] bnx2x: probe of 0000:45:00.0 failed with error -2
>> [   20.574148] bnx2x 0000:45:00.1: msix capability found
>> [   20.579470] bnx2x 0000:45:00.1: part number 394D4342-31373735-31314131-473331
>> [   20.587708] bnx2x 0000:45:00.1: Direct firmware load for bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
>> [   20.597479] bnx2x 0000:45:00.1: Direct firmware load for bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
>> [   20.607355] bnx2x: probe of 0000:45:00.1 failed with error -2
>> [   20.613179] bnx2x 0000:46:00.0: msix capability found
>> [   20.618501] bnx2x 0000:46:00.0: part number 394D4342-31373735-31314131-473331
>> [   20.626805] bnx2x 0000:46:00.0: Direct firmware load for bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
>> [   20.636580] bnx2x 0000:46:00.0: Direct firmware load for bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
>> [   20.646453] bnx2x: probe of 0000:46:00.0 failed with error -2
>> [   20.652279] bnx2x 0000:46:00.1: msix capability found
>> [   20.657593] bnx2x 0000:46:00.1: part number 394D4342-31373735-31314131-473331
>> [   20.665813] bnx2x 0000:46:00.1: Direct firmware load for bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
>> [   20.675590] bnx2x 0000:46:00.1: Direct firmware load for bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
>> [   20.685457] bnx2x: probe of 0000:46:00.1 failed with error -2
>> ```
>>
>> Due to reasons, we do not have firmware in the initrd, and the firmware files
>> are only the root partition in `/lib/firmware`.
>>
>> Logging in using other means¹, and removing the PCI devices, and rescanning
>> brought the devices online.
>>
>>       $ echo 1 | sudo tee /sys/bus/pci/devices/0000\:{45,46}\:00.{0,1}/remove
>>       $ echo 1 | sudo tee /sys/bus/pci/rescan
>>
>> Adding the firmware files to the initrd also fixes the issue.
>>
>> I didn’t bisect the change, but the refactoring of the init methods looks like the
>> reason for the change of behavior. As it’s not documented in the commit
>> message, was that intentional? I think it breaks Linux’ no regression rule, and
>> the commit (and follow-ups) should be reverted in Linux v5.17 and backed out
>> from the stable series.

> The whole motivation of sending this v2 was to make FW backward compatible with older FW so that it doesn't break the system if someone updates the kernel,
> however the initrd case was slipped which was fixed recently with below commit and should already be added to affected stable branches. You may want to apply this commit to resolve the problem.
> 
> commit e13ad1443684f7afaff24cf207e85e97885256bd
> Author: Manish Chopra <manishc@marvell.com>
> Date:   Wed Feb 23 00:57:20 2022 -0800
> 
>      bnx2x: fix driver load from initrd
> 
>      Commit b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0") added
>      new firmware support in the driver with maintaining older firmware
>      compatibility. However, older firmware was not added in MODULE_FIRMWARE()
>      which caused missing firmware files in initrd image leading to driver load
>      failure from initrd. This patch adds MODULE_FIRMWARE() for older firmware
>      version to have firmware files included in initrd.
> 
>      Fixes: b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0")
>      Link: https://bugzilla.kernel.org/show_bug.cgi?id=215627
>      Signed-off-by: Manish Chopra <manishc@marvell.com>
>      Signed-off-by: Alok Prasad <palok@marvell.com>
>      Signed-off-by: Ariel Elior <aelior@marvell.com>
>      Link: https://lore.kernel.org/r/20220223085720.12021-1-manishc@marvell.com
>      Signed-off-by: Jakub Kicinski <kuba@kernel.org>

I am aware of this commit, and it’s in Linux 5.10.103, we built and 
used. It’s not about firmware version 7.13.15.0 not being loaded, but as 
written, about the code being changed, so the firmware files are 
required earlier now. For unknown reasons to me `bnx2x_init_firmware()` 
is now also called in `bnx2x_init_bp()`, which is probably an earlier 
code path. (I do not know the driver, so please take everything I write 
with a grain of salt.)

[…]


Kind regards,

Paul


>> ¹ Why can’t even without proper firmware, the card at least initialize to have a
>> network connection even if it’s degraded?
Manish Chopra March 9, 2022, 7:22 p.m. UTC | #6
Hi Paul,

> -----Original Message-----
> From: Paul Menzel <pmenzel@molgen.mpg.de>
> Sent: Wednesday, March 9, 2022 11:31 PM
> To: Manish Chopra <manishc@marvell.com>
> Cc: kuba@kernel.org; netdev@vger.kernel.org; Ariel Elior
> <aelior@marvell.com>; Alok Prasad <palok@marvell.com>; Prabhakar
> Kushwaha <pkushwaha@marvell.com>; David S. Miller
> <davem@davemloft.net>; Greg KH <gregkh@linuxfoundation.org>;
> stable@vger.kernel.org; it+netdev@molgen.mpg.de; Linus Torvalds
> <torvalds@linux-foundation.org>; regressions@lists.linux.dev
> Subject: Re: [EXT] Re: [PATCH v2 net-next 1/2] bnx2x: Utilize firmware
> 7.13.21.0
> 
> Dear Manish,
> 
> 
> Am 09.03.22 um 17:46 schrieb Manish Chopra:
> >> -----Original Message-----
> >> From: Paul Menzel <pmenzel@molgen.mpg.de>
> >> Sent: Wednesday, March 9, 2022 9:45 PM
> 
> […]
> 
> >> Dear Manish,
> >>
> >>
> >> Am 17.12.21 um 17:55 schrieb Manish Chopra:
> >>> This new firmware addresses few important issues and enhancements as
> >>> mentioned below -
> >>>
> >>> - Support direct invalidation of FP HSI Ver per function ID, required for
> >>>   invalidating FP HSI Ver prior to each VF start, as there is no VF
> >>> start
> >>> - BRB hardware block parity error detection support for the driver
> >>> - Fix the FCOE underrun flow
> >>> - Fix PSOD during FCoE BFS over the NIC ports after preboot driver
> >>> - Maintains backward compatibility
> >>>
> >>> This patch incorporates this new firmware 7.13.21.0 in bnx2x driver.
> >>>
> >>> Signed-off-by: Manish Chopra <manishc@marvell.com>
> >>> Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
> >>> Signed-off-by: Alok Prasad <palok@marvell.com>
> >>> Signed-off-by: Ariel Elior <aelior@marvell.com>
> >>> ---
> >>>
> >>> v1->v2:
> >>> ------------
> >>> * Modified the patch such that driver to be backward compatible
> >>>     with older firmware too (New fw v7.13.21.0 on linux-firmware.git
> >>>     enables driver to maintain backward compatibility)
> >>> ---
> >>>    drivers/net/ethernet/broadcom/bnx2x/bnx2x.h        | 11 +++-
> >>>    drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  6 +-
> >>>    .../net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h    |  2 +
> >>>    drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h    |  3 +-
> >>>    drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 75
> +++++++++++++++-------
> >>>    5 files changed, 69 insertions(+), 28 deletions(-)
> >>>
> >>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> >>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> >>> index 2b06d78b..a19dd67 100644
> >>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> >>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> >>> @@ -1850,6 +1850,14 @@ struct bnx2x {
> >>>
> >>>    	/* Vxlan/Geneve related information */
> >>>    	u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX];
> >>> +
> >>> +#define FW_CAP_INVALIDATE_VF_FP_HSI	BIT(0)
> >>> +	u32 fw_cap;
> >>> +
> >>> +	u32 fw_major;
> >>> +	u32 fw_minor;
> >>> +	u32 fw_rev;
> >>> +	u32 fw_eng;
> >>>    };
> >>>
> >>>    /* Tx queues may be less or equal to Rx queues */ @@ -2525,5
> >>> +2533,6 @@ enum {
> >>>     * Meant for implicit re-load flows.
> >>>     */
> >>>    int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
> >>> -
> >>> +int bnx2x_init_firmware(struct bnx2x *bp); void
> >>> +bnx2x_release_firmware(struct bnx2x *bp);
> >>>    #endif /* bnx2x.h */
> >>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> >>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> >>> index 54a2334..8d36ebb 100644
> >>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> >>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> >>> @@ -2365,10 +2365,8 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp,
> >> u32 load_code, bool print_err)
> >>>    	if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
> >>>    	    load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
> >>>    		/* build my FW version dword */
> >>> -		u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
> >>> -			(BCM_5710_FW_MINOR_VERSION << 8) +
> >>> -			(BCM_5710_FW_REVISION_VERSION << 16) +
> >>> -			(BCM_5710_FW_ENGINEERING_VERSION << 24);
> >>> +		u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
> >>> +				(bp->fw_rev << 16) + (bp->fw_eng << 24);
> >>>
> >>>    		/* read loaded FW from chip */
> >>>    		u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM); diff --git
> >>> a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
> >>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
> >>> index 3f84352..a84d015 100644
> >>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
> >>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
> >>> @@ -241,6 +241,8 @@
> >>>    	IRO[221].m2))
> >>>    #define XSTORM_VF_TO_PF_OFFSET(funcId) \
> >>>    	(IRO[48].base + ((funcId) * IRO[48].m1))
> >>> +#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid)
> >> 	\
> >>> +	(IRO[386].base + ((fid) * IRO[386].m1))
> >>>    #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
> >>>
> >>>    /* eth hsi version */
> >>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
> >>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
> >>> index 622fadc..611efee 100644
> >>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
> >>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
> >>> @@ -3024,7 +3024,8 @@ struct afex_stats {
> >>>
> >>>    #define BCM_5710_FW_MAJOR_VERSION			7
> >>>    #define BCM_5710_FW_MINOR_VERSION			13
> >>> -#define BCM_5710_FW_REVISION_VERSION		15
> >>> +#define BCM_5710_FW_REVISION_VERSION		21
> >>> +#define BCM_5710_FW_REVISION_VERSION_V15	15
> >>>    #define BCM_5710_FW_ENGINEERING_VERSION		0
> >>>    #define BCM_5710_FW_COMPILE_FLAGS			1
> >>>
> >>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> >>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> >>> index aec666e..125dafe 100644
> >>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> >>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> >>> @@ -74,9 +74,19 @@
> >>>    	__stringify(BCM_5710_FW_MINOR_VERSION) "."	\
> >>>    	__stringify(BCM_5710_FW_REVISION_VERSION) "."	\
> >>>    	__stringify(BCM_5710_FW_ENGINEERING_VERSION)
> >>> +
> >>> +#define FW_FILE_VERSION_V15				\
> >>> +	__stringify(BCM_5710_FW_MAJOR_VERSION) "."      \
> >>> +	__stringify(BCM_5710_FW_MINOR_VERSION) "."	\
> >>> +	__stringify(BCM_5710_FW_REVISION_VERSION_V15) "."	\
> >>> +	__stringify(BCM_5710_FW_ENGINEERING_VERSION)
> >>> +
> >>>    #define FW_FILE_NAME_E1		"bnx2x/bnx2x-e1-"
> FW_FILE_VERSION
> >> ".fw"
> >>>    #define FW_FILE_NAME_E1H	"bnx2x/bnx2x-e1h-"
> >> FW_FILE_VERSION ".fw"
> >>>    #define FW_FILE_NAME_E2		"bnx2x/bnx2x-e2-"
> FW_FILE_VERSION
> >> ".fw"
> >>> +#define FW_FILE_NAME_E1_V15	"bnx2x/bnx2x-e1-"
> >> FW_FILE_VERSION_V15 ".fw"
> >>> +#define FW_FILE_NAME_E1H_V15	"bnx2x/bnx2x-e1h-"
> >> FW_FILE_VERSION_V15 ".fw"
> >>> +#define FW_FILE_NAME_E2_V15	"bnx2x/bnx2x-e2-"
> >> FW_FILE_VERSION_V15 ".fw"
> >>>
> >>>    /* Time in jiffies before concluding the transmitter is hung */
> >>>    #define TX_TIMEOUT		(5*HZ)
> >>> @@ -747,9 +757,7 @@ static int bnx2x_mc_assert(struct bnx2x *bp)
> >>>    		  CHIP_IS_E1(bp) ? "everest1" :
> >>>    		  CHIP_IS_E1H(bp) ? "everest1h" :
> >>>    		  CHIP_IS_E2(bp) ? "everest2" : "everest3",
> >>> -		  BCM_5710_FW_MAJOR_VERSION,
> >>> -		  BCM_5710_FW_MINOR_VERSION,
> >>> -		  BCM_5710_FW_REVISION_VERSION);
> >>> +		  bp->fw_major, bp->fw_minor, bp->fw_rev);
> >>>
> >>>    	return rc;
> >>>    }
> >>> @@ -12308,6 +12316,15 @@ static int bnx2x_init_bp(struct bnx2x *bp)
> >>>
> >>>    	bnx2x_read_fwinfo(bp);
> >>>
> >>> +	if (IS_PF(bp)) {
> >>> +		rc = bnx2x_init_firmware(bp);
> >>> +
> >>> +		if (rc) {
> >>> +			bnx2x_free_mem_bp(bp);
> >>> +			return rc;
> >>> +		}
> >>> +	}
> >>> +
> >>>    	func = BP_FUNC(bp);
> >>>
> >>>    	/* need to reset chip if undi was active */ @@ -12320,6 +12337,7
> >>> @@ static int bnx2x_init_bp(struct bnx2x *bp)
> >>>
> >>>    		rc = bnx2x_prev_unload(bp);
> >>>    		if (rc) {
> >>> +			bnx2x_release_firmware(bp);
> >>>    			bnx2x_free_mem_bp(bp);
> >>>    			return rc;
> >>>    		}
> >>> @@ -13317,16 +13335,11 @@ static int bnx2x_check_firmware(struct
> >> bnx2x *bp)
> >>>    	/* Check FW version */
> >>>    	offset = be32_to_cpu(fw_hdr->fw_version.offset);
> >>>    	fw_ver = firmware->data + offset;
> >>> -	if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) ||
> >>> -	    (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) ||
> >>> -	    (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) ||
> >>> -	    (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) {
> >>> +	if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor ||
> >>> +	    fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) {
> >>>    		BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be
> >> %d.%d.%d.%d\n",
> >>> -		       fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
> >>> -		       BCM_5710_FW_MAJOR_VERSION,
> >>> -		       BCM_5710_FW_MINOR_VERSION,
> >>> -		       BCM_5710_FW_REVISION_VERSION,
> >>> -		       BCM_5710_FW_ENGINEERING_VERSION);
> >>> +			  fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
> >>> +			  bp->fw_major, bp->fw_minor, bp->fw_rev, bp-
> >>> fw_eng);
> >>>    		return -EINVAL;
> >>>    	}
> >>>
> >>> @@ -13404,34 +13417,51 @@ static void be16_to_cpu_n(const u8
> >> *_source, u8 *_target, u32 n)
> >>>    	     (u8 *)bp->arr, len);					\
> >>>    } while (0)
> >>>
> >>> -static int bnx2x_init_firmware(struct bnx2x *bp)
> >>> +int bnx2x_init_firmware(struct bnx2x *bp)
> >>>    {
> >>> -	const char *fw_file_name;
> >>> +	const char *fw_file_name, *fw_file_name_v15;
> >>>    	struct bnx2x_fw_file_hdr *fw_hdr;
> >>>    	int rc;
> >>>
> >>>    	if (bp->firmware)
> >>>    		return 0;
> >>>
> >>> -	if (CHIP_IS_E1(bp))
> >>> +	if (CHIP_IS_E1(bp)) {
> >>>    		fw_file_name = FW_FILE_NAME_E1;
> >>> -	else if (CHIP_IS_E1H(bp))
> >>> +		fw_file_name_v15 = FW_FILE_NAME_E1_V15;
> >>> +	} else if (CHIP_IS_E1H(bp)) {
> >>>    		fw_file_name = FW_FILE_NAME_E1H;
> >>> -	else if (!CHIP_IS_E1x(bp))
> >>> +		fw_file_name_v15 = FW_FILE_NAME_E1H_V15;
> >>> +	} else if (!CHIP_IS_E1x(bp)) {
> >>>    		fw_file_name = FW_FILE_NAME_E2;
> >>> -	else {
> >>> +		fw_file_name_v15 = FW_FILE_NAME_E2_V15;
> >>> +	} else {
> >>>    		BNX2X_ERR("Unsupported chip revision\n");
> >>>    		return -EINVAL;
> >>>    	}
> >>> +
> >>>    	BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
> >>>
> >>>    	rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev-
> >dev);
> >>>    	if (rc) {
> >>> -		BNX2X_ERR("Can't load firmware file %s\n",
> >>> -			  fw_file_name);
> >>> -		goto request_firmware_exit;
> >>> +		BNX2X_DEV_INFO("Trying to load older fw %s\n",
> fw_file_name_v15);
> >>> +
> >>> +		/* try to load prev version */
> >>> +		rc = request_firmware(&bp->firmware, fw_file_name_v15,
> >>> ++&bp->pdev->dev);
> >>> +
> >>> +		if (rc)
> >>> +			goto request_firmware_exit;
> >>> +
> >>> +		bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15;
> >>> +	} else {
> >>> +		bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI;
> >>> +		bp->fw_rev = BCM_5710_FW_REVISION_VERSION;
> >>>    	}
> >>>
> >>> +	bp->fw_major = BCM_5710_FW_MAJOR_VERSION;
> >>> +	bp->fw_minor = BCM_5710_FW_MINOR_VERSION;
> >>> +	bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION;
> >>> +
> >>>    	rc = bnx2x_check_firmware(bp);
> >>>    	if (rc) {
> >>>    		BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name); @@
> >> -13487,7
> >>> +13517,7 @@ static int bnx2x_init_firmware(struct bnx2x *bp)
> >>>    	return rc;
> >>>    }
> >>>
> >>> -static void bnx2x_release_firmware(struct bnx2x *bp)
> >>> +void bnx2x_release_firmware(struct bnx2x *bp)
> >>>    {
> >>>    	kfree(bp->init_ops_offsets);
> >>>    	kfree(bp->init_ops);
> >>> @@ -14004,6 +14034,7 @@ static int bnx2x_init_one(struct pci_dev
> *pdev,
> >>>    	return 0;
> >>>
> >>>    init_one_freemem:
> >>> +	bnx2x_release_firmware(bp);
> >>>    	bnx2x_free_mem_bp(bp);
> >>>
> >>>    init_one_exit:
> >>
> >> This change was added to Linux in commit b7a49f73059f (bnx2x: Utilize
> >> firmware 7.13.21.0) [1] to Linux v5.17-rc1, and backported to the
> >> stable series, for example, Linux v5.10.95.
> >>
> >> Due to CVE-2022-0847 (Dirty Pipe) [1] we updated systems from, for
> >> example, Linux 5.10.24 to Linux v5.10.103 and noticed that the
> >> Broadcom network devices failed to initialize.
> >>
> >> ```
> >> [   20.477325] bnx2 0000:02:00.0 eth0: Broadcom NetXtreme II BCM5709
> 1000Base-T (C0) PCI Express found at mem c6000000, IRQ 41, node addr
> c8:1f:66:cf:34:45
> >> [   20.491782] bnx2 0000:02:00.1 eth1: Broadcom NetXtreme II BCM5709
> 1000Base-T (C0) PCI Express found at mem c8000000, IRQ 42, node addr
> c8:1f:66:cf:34:47
> >> [   20.506223] bnx2 0000:03:00.0 eth2: Broadcom NetXtreme II BCM5709
> 1000Base-T (C0) PCI Express found at mem ca000000, IRQ 43, node addr
> c8:1f:66:cf:34:49
> >> [   20.520644] bnx2 0000:03:00.1 eth3: Broadcom NetXtreme II BCM5709
> 1000Base-T (C0) PCI Express found at mem cc000000, IRQ 44, node addr
> c8:1f:66:cf:34:4b
> >> [   20.534985] bnx2x 0000:45:00.0: msix capability found
> >> [   20.540342] bnx2x 0000:45:00.0: part number 394D4342-31373735-
> 31314131-473331
> >> [   20.548605] bnx2x 0000:45:00.0: Direct firmware load for bnx2x/bnx2x-
> e1h-7.13.21.0.fw failed with error -2
> >> [   20.558373] bnx2x 0000:45:00.0: Direct firmware load for bnx2x/bnx2x-
> e1h-7.13.15.0.fw failed with error -2
> >> [   20.568319] bnx2x: probe of 0000:45:00.0 failed with error -2
> >> [   20.574148] bnx2x 0000:45:00.1: msix capability found
> >> [   20.579470] bnx2x 0000:45:00.1: part number 394D4342-31373735-
> 31314131-473331
> >> [   20.587708] bnx2x 0000:45:00.1: Direct firmware load for bnx2x/bnx2x-
> e1h-7.13.21.0.fw failed with error -2
> >> [   20.597479] bnx2x 0000:45:00.1: Direct firmware load for bnx2x/bnx2x-
> e1h-7.13.15.0.fw failed with error -2
> >> [   20.607355] bnx2x: probe of 0000:45:00.1 failed with error -2
> >> [   20.613179] bnx2x 0000:46:00.0: msix capability found
> >> [   20.618501] bnx2x 0000:46:00.0: part number 394D4342-31373735-
> 31314131-473331
> >> [   20.626805] bnx2x 0000:46:00.0: Direct firmware load for bnx2x/bnx2x-
> e1h-7.13.21.0.fw failed with error -2
> >> [   20.636580] bnx2x 0000:46:00.0: Direct firmware load for bnx2x/bnx2x-
> e1h-7.13.15.0.fw failed with error -2
> >> [   20.646453] bnx2x: probe of 0000:46:00.0 failed with error -2
> >> [   20.652279] bnx2x 0000:46:00.1: msix capability found
> >> [   20.657593] bnx2x 0000:46:00.1: part number 394D4342-31373735-
> 31314131-473331
> >> [   20.665813] bnx2x 0000:46:00.1: Direct firmware load for bnx2x/bnx2x-
> e1h-7.13.21.0.fw failed with error -2
> >> [   20.675590] bnx2x 0000:46:00.1: Direct firmware load for bnx2x/bnx2x-
> e1h-7.13.15.0.fw failed with error -2
> >> [   20.685457] bnx2x: probe of 0000:46:00.1 failed with error -2
> >> ```
> >>
> >> Due to reasons, we do not have firmware in the initrd, and the
> >> firmware files are only the root partition in `/lib/firmware`.
> >>
> >> Logging in using other means¹, and removing the PCI devices, and
> >> rescanning brought the devices online.
> >>
> >>       $ echo 1 | sudo tee
> /sys/bus/pci/devices/0000\:{45,46}\:00.{0,1}/remove
> >>       $ echo 1 | sudo tee /sys/bus/pci/rescan
> >>
> >> Adding the firmware files to the initrd also fixes the issue.
> >>
> >> I didn’t bisect the change, but the refactoring of the init methods
> >> looks like the reason for the change of behavior. As it’s not
> >> documented in the commit message, was that intentional? I think it
> >> breaks Linux’ no regression rule, and the commit (and follow-ups)
> >> should be reverted in Linux v5.17 and backed out from the stable series.
> 
> > The whole motivation of sending this v2 was to make FW backward
> > compatible with older FW so that it doesn't break the system if someone
> updates the kernel, however the initrd case was slipped which was fixed
> recently with below commit and should already be added to affected stable
> branches. You may want to apply this commit to resolve the problem.
> >
> > commit e13ad1443684f7afaff24cf207e85e97885256bd
> > Author: Manish Chopra <manishc@marvell.com>
> > Date:   Wed Feb 23 00:57:20 2022 -0800
> >
> >      bnx2x: fix driver load from initrd
> >
> >      Commit b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0") added
> >      new firmware support in the driver with maintaining older firmware
> >      compatibility. However, older firmware was not added in
> MODULE_FIRMWARE()
> >      which caused missing firmware files in initrd image leading to driver load
> >      failure from initrd. This patch adds MODULE_FIRMWARE() for older
> firmware
> >      version to have firmware files included in initrd.
> >
> >      Fixes: b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0")
> >      Link: https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__bugzilla.kernel.org_show-5Fbug.cgi-3Fid-
> 3D215627&d=DwIDaQ&c=nKjWec2b6R0mOyPaz7xtfQ&r=bMTgx2X48QVXyX
> OEL8ALyI4dsWoR-m74c5n3d-
> ruJI8&m=DWtPUJhj8ALDm3F8Z6yRzueXgOQYUZjabDdRRZgAxN107KS-
> brGjhwlenYylwJfh&s=FOolAGCSdsIl3ADjsbNpyuvYJS9UFqNOdf1pNXO5Y90&e
> =
> >      Signed-off-by: Manish Chopra <manishc@marvell.com>
> >      Signed-off-by: Alok Prasad <palok@marvell.com>
> >      Signed-off-by: Ariel Elior <aelior@marvell.com>
> >      Link: https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__lore.kernel.org_r_20220223085720.12021-2D1-2Dmanishc-
> 40marvell.com&d=DwIDaQ&c=nKjWec2b6R0mOyPaz7xtfQ&r=bMTgx2X48QV
> XyXOEL8ALyI4dsWoR-m74c5n3d-
> ruJI8&m=DWtPUJhj8ALDm3F8Z6yRzueXgOQYUZjabDdRRZgAxN107KS-
> brGjhwlenYylwJfh&s=kWJAQqsp93gdiS4He5uT7oprXMx3WHE3pYMFb9pSJW
> 4&e=
> >      Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> 
> I am aware of this commit, and it’s in Linux 5.10.103, we built and used. It’s
> not about firmware version 7.13.15.0 not being loaded, but as written, about
> the code being changed, so the firmware files are required earlier now. For
> unknown reasons to me `bnx2x_init_firmware()` is now also called in
> `bnx2x_init_bp()`, which is probably an earlier code path. (I do not know the
> driver, so please take everything I write with a grain of salt.)
> 

This move was intentional, as follow up driver flow [bnx2x_compare_fw_ver()] needs to know which exact FW version (newer or older fw version which will be decided at run time now)
the function is supposed to be run with in order to compare against already loaded FW on the adapter to decide on function probe/init failure (as opposed to earlier where driver was
always stick to the one specific/fixed firmware version). So for that reason I chose the right place to invoke the bnx2x_init_firmware() during the probe early instead of later stage.

Thanks,
Manish
Linus Torvalds March 9, 2022, 7:24 p.m. UTC | #7
On Wed, Mar 9, 2022 at 11:22 AM Manish Chopra <manishc@marvell.com> wrote:
>
> This move was intentional, as follow up driver flow [bnx2x_compare_fw_ver()] needs to know which exact FW version (newer or older fw version which will be decided at run time now)
> the function is supposed to be run with in order to compare against already loaded FW on the adapter to decide on function probe/init failure (as opposed to earlier where driver was
> always stick to the one specific/fixed firmware version). So for that reason I chose the right place to invoke the bnx2x_init_firmware() during the probe early instead of later stage.

.. but since that fundamentally DOES NOT WORK, we'll clearly have to
revert that change.

Firmware loading cannot happen early in boot. End of story. You need
to delay firmware loading until the device is actually opened.

                                 Linus
Manish Chopra March 9, 2022, 7:46 p.m. UTC | #8
> -----Original Message-----
> From: Linus Torvalds <torvalds@linux-foundation.org>
> Sent: Thursday, March 10, 2022 12:55 AM
> To: Manish Chopra <manishc@marvell.com>
> Cc: Paul Menzel <pmenzel@molgen.mpg.de>; kuba@kernel.org;
> netdev@vger.kernel.org; Ariel Elior <aelior@marvell.com>; Alok Prasad
> <palok@marvell.com>; Prabhakar Kushwaha <pkushwaha@marvell.com>;
> David S. Miller <davem@davemloft.net>; Greg KH
> <gregkh@linuxfoundation.org>; stable@vger.kernel.org;
> it+netdev@molgen.mpg.de; regressions@lists.linux.dev
> Subject: Re: [EXT] Re: [PATCH v2 net-next 1/2] bnx2x: Utilize firmware
> 7.13.21.0
> 
> On Wed, Mar 9, 2022 at 11:22 AM Manish Chopra <manishc@marvell.com>
> wrote:
> >
> > This move was intentional, as follow up driver flow
> > [bnx2x_compare_fw_ver()] needs to know which exact FW version (newer
> > or older fw version which will be decided at run time now) the function is
> supposed to be run with in order to compare against already loaded FW on
> the adapter to decide on function probe/init failure (as opposed to earlier
> where driver was always stick to the one specific/fixed firmware version). So
> for that reason I chose the right place to invoke the bnx2x_init_firmware()
> during the probe early instead of later stage.
> 
> .. but since that fundamentally DOES NOT WORK, we'll clearly have to revert
> that change.
> 
> Firmware loading cannot happen early in boot. End of story. You need to delay
> firmware loading until the device is actually opened.
> 
>                                  Linus

Hello Linus,

This has not changed anything functionally from driver/device perspective, FW is still being loaded only when device is opened.
bnx2x_init_firmware() [I guess, perhaps the name is misleading] just request_firmware() to prepare the metadata to be used when device will be opened.

Thanks,
Manish
Linus Torvalds March 9, 2022, 10:18 p.m. UTC | #9
On Wed, Mar 9, 2022 at 11:46 AM Manish Chopra <manishc@marvell.com> wrote:
>
> This has not changed anything functionally from driver/device perspective, FW is still being loaded only when device is opened.
> bnx2x_init_firmware() [I guess, perhaps the name is misleading] just request_firmware() to prepare the metadata to be used when device will be opened.

So how do you explain the report by Paul Menzel that things used to
work and no longer work now?

You can't do request_firmware() early. When you actually then push the
firmware to the device is immaterial - but request_firmware() has to
be done after the system is up and running.

                 Linus
Thorsten Leemhuis March 10, 2022, 10:13 a.m. UTC | #10
[TLDR: I'm adding the regression report below to regzbot, the Linux
kernel regression tracking bot; all text you find below is compiled from
a few templates paragraphs you might have encountered already already
from similar mails.]

Hi, this is your Linux kernel regression tracker. Top-posting for once,
to make this easily accessible to everyone.

To be sure below issue doesn't fall through the cracks unnoticed, I'm
adding it to regzbot, my Linux kernel regression tracking bot:

#regzbot ^introduced b7a49f73059fe6147b6b7
#regzbot title firmware now as to be in the initramfs
#regzbot ignore-activity

If it turns out this isn't a regression, free free to remove it from the
tracking by sending a reply to this thread containing a paragraph like
"#regzbot invalid: reason why this is invalid" (without the quotes).

Reminder for developers: when fixing the issue, please add a 'Link:'
tags pointing to the report (the mail quoted above) using
lore.kernel.org/r/, as explained in
'Documentation/process/submitting-patches.rst' and
'Documentation/process/5.Posting.rst'. Regzbot needs them to
automatically connect reports with fixes, but they are useful in
general, too.

Sending this just to the lists, as this seems to be handled already.

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)

P.S.: As the Linux kernel's regression tracker I'm getting a lot of
reports on my table. I can only look briefly into most of them and lack
knowledge about most of the areas they concern. I thus unfortunately
will sometimes get things wrong or miss something important. I hope
that's not the case here; if you think it is, don't hesitate to tell me
in a public reply, it's in everyone's interest to set the public record
straight.


On 09.03.22 17:18, Paul Menzel wrote:
> [Use correct regression address]
> 
> Am 09.03.22 um 17:15 schrieb Paul Menzel:
>> Dear Manish,
>>
>>
>> Am 17.12.21 um 17:55 schrieb Manish Chopra:
>>> This new firmware addresses few important issues and enhancements
>>> as mentioned below -
>>>
>>> - Support direct invalidation of FP HSI Ver per function ID, required
>>> for
>>>    invalidating FP HSI Ver prior to each VF start, as there is no VF
>>> start
>>> - BRB hardware block parity error detection support for the driver
>>> - Fix the FCOE underrun flow
>>> - Fix PSOD during FCoE BFS over the NIC ports after preboot driver
>>> - Maintains backward compatibility
>>>
>>> This patch incorporates this new firmware 7.13.21.0 in bnx2x driver.
>>>
>>> Signed-off-by: Manish Chopra <manishc@marvell.com>
>>> Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
>>> Signed-off-by: Alok Prasad <palok@marvell.com>
>>> Signed-off-by: Ariel Elior <aelior@marvell.com>
>>> ---
>>>
>>> v1->v2:
>>> ------------
>>> * Modified the patch such that driver to be backward compatible
>>>    with older firmware too (New fw v7.13.21.0 on linux-firmware.git
>>>    enables driver to maintain backward compatibility)
>>> ---
>>>   drivers/net/ethernet/broadcom/bnx2x/bnx2x.h        | 11 +++-
>>>   drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  6 +-
>>>   .../net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h    |  2 +
>>>   drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h    |  3 +-
>>>   drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 75
>>> +++++++++++++++-------
>>>   5 files changed, 69 insertions(+), 28 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
>>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
>>> index 2b06d78b..a19dd67 100644
>>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
>>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
>>> @@ -1850,6 +1850,14 @@ struct bnx2x {
>>>       /* Vxlan/Geneve related information */
>>>       u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX];
>>> +
>>> +#define FW_CAP_INVALIDATE_VF_FP_HSI    BIT(0)
>>> +    u32 fw_cap;
>>> +
>>> +    u32 fw_major;
>>> +    u32 fw_minor;
>>> +    u32 fw_rev;
>>> +    u32 fw_eng;
>>>   };
>>>   /* Tx queues may be less or equal to Rx queues */
>>> @@ -2525,5 +2533,6 @@ enum {
>>>    * Meant for implicit re-load flows.
>>>    */
>>>   int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
>>> -
>>> +int bnx2x_init_firmware(struct bnx2x *bp);
>>> +void bnx2x_release_firmware(struct bnx2x *bp);
>>>   #endif /* bnx2x.h */
>>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
>>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
>>> index 54a2334..8d36ebb 100644
>>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
>>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
>>> @@ -2365,10 +2365,8 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32
>>> load_code, bool print_err)
>>>       if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
>>>           load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
>>>           /* build my FW version dword */
>>> -        u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
>>> -            (BCM_5710_FW_MINOR_VERSION << 8) +
>>> -            (BCM_5710_FW_REVISION_VERSION << 16) +
>>> -            (BCM_5710_FW_ENGINEERING_VERSION << 24);
>>> +        u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
>>> +                (bp->fw_rev << 16) + (bp->fw_eng << 24);
>>>           /* read loaded FW from chip */
>>>           u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
>>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
>>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
>>> index 3f84352..a84d015 100644
>>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
>>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
>>> @@ -241,6 +241,8 @@
>>>       IRO[221].m2))
>>>   #define XSTORM_VF_TO_PF_OFFSET(funcId) \
>>>       (IRO[48].base + ((funcId) * IRO[48].m1))
>>> +#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid)    \
>>> +    (IRO[386].base + ((fid) * IRO[386].m1))
>>>   #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
>>>   /* eth hsi version */
>>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
>>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
>>> index 622fadc..611efee 100644
>>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
>>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
>>> @@ -3024,7 +3024,8 @@ struct afex_stats {
>>>   #define BCM_5710_FW_MAJOR_VERSION            7
>>>   #define BCM_5710_FW_MINOR_VERSION            13
>>> -#define BCM_5710_FW_REVISION_VERSION        15
>>> +#define BCM_5710_FW_REVISION_VERSION        21
>>> +#define BCM_5710_FW_REVISION_VERSION_V15    15
>>>   #define BCM_5710_FW_ENGINEERING_VERSION        0
>>>   #define BCM_5710_FW_COMPILE_FLAGS            1
>>> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
>>> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
>>> index aec666e..125dafe 100644
>>> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
>>> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
>>> @@ -74,9 +74,19 @@
>>>       __stringify(BCM_5710_FW_MINOR_VERSION) "."    \
>>>       __stringify(BCM_5710_FW_REVISION_VERSION) "."    \
>>>       __stringify(BCM_5710_FW_ENGINEERING_VERSION)
>>> +
>>> +#define FW_FILE_VERSION_V15                \
>>> +    __stringify(BCM_5710_FW_MAJOR_VERSION) "."      \
>>> +    __stringify(BCM_5710_FW_MINOR_VERSION) "."    \
>>> +    __stringify(BCM_5710_FW_REVISION_VERSION_V15) "."    \
>>> +    __stringify(BCM_5710_FW_ENGINEERING_VERSION)
>>> +
>>>   #define FW_FILE_NAME_E1        "bnx2x/bnx2x-e1-" FW_FILE_VERSION ".fw"
>>>   #define FW_FILE_NAME_E1H    "bnx2x/bnx2x-e1h-" FW_FILE_VERSION ".fw"
>>>   #define FW_FILE_NAME_E2        "bnx2x/bnx2x-e2-" FW_FILE_VERSION ".fw"
>>> +#define FW_FILE_NAME_E1_V15    "bnx2x/bnx2x-e1-" FW_FILE_VERSION_V15
>>> ".fw"
>>> +#define FW_FILE_NAME_E1H_V15    "bnx2x/bnx2x-e1h-"
>>> FW_FILE_VERSION_V15 ".fw"
>>> +#define FW_FILE_NAME_E2_V15    "bnx2x/bnx2x-e2-" FW_FILE_VERSION_V15
>>> ".fw"
>>>   /* Time in jiffies before concluding the transmitter is hung */
>>>   #define TX_TIMEOUT        (5*HZ)
>>> @@ -747,9 +757,7 @@ static int bnx2x_mc_assert(struct bnx2x *bp)
>>>             CHIP_IS_E1(bp) ? "everest1" :
>>>             CHIP_IS_E1H(bp) ? "everest1h" :
>>>             CHIP_IS_E2(bp) ? "everest2" : "everest3",
>>> -          BCM_5710_FW_MAJOR_VERSION,
>>> -          BCM_5710_FW_MINOR_VERSION,
>>> -          BCM_5710_FW_REVISION_VERSION);
>>> +          bp->fw_major, bp->fw_minor, bp->fw_rev);
>>>       return rc;
>>>   }
>>> @@ -12308,6 +12316,15 @@ static int bnx2x_init_bp(struct bnx2x *bp)
>>>       bnx2x_read_fwinfo(bp);
>>> +    if (IS_PF(bp)) {
>>> +        rc = bnx2x_init_firmware(bp);
>>> +
>>> +        if (rc) {
>>> +            bnx2x_free_mem_bp(bp);
>>> +            return rc;
>>> +        }
>>> +    }
>>> +
>>>       func = BP_FUNC(bp);
>>>       /* need to reset chip if undi was active */
>>> @@ -12320,6 +12337,7 @@ static int bnx2x_init_bp(struct bnx2x *bp)
>>>           rc = bnx2x_prev_unload(bp);
>>>           if (rc) {
>>> +            bnx2x_release_firmware(bp);
>>>               bnx2x_free_mem_bp(bp);
>>>               return rc;
>>>           }
>>> @@ -13317,16 +13335,11 @@ static int bnx2x_check_firmware(struct
>>> bnx2x *bp)
>>>       /* Check FW version */
>>>       offset = be32_to_cpu(fw_hdr->fw_version.offset);
>>>       fw_ver = firmware->data + offset;
>>> -    if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) ||
>>> -        (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) ||
>>> -        (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) ||
>>> -        (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) {
>>> +    if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor ||
>>> +        fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) {
>>>           BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be
>>> %d.%d.%d.%d\n",
>>> -               fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
>>> -               BCM_5710_FW_MAJOR_VERSION,
>>> -               BCM_5710_FW_MINOR_VERSION,
>>> -               BCM_5710_FW_REVISION_VERSION,
>>> -               BCM_5710_FW_ENGINEERING_VERSION);
>>> +              fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
>>> +              bp->fw_major, bp->fw_minor, bp->fw_rev, bp->fw_eng);
>>>           return -EINVAL;
>>>       }
>>> @@ -13404,34 +13417,51 @@ static void be16_to_cpu_n(const u8
>>> *_source, u8 *_target, u32 n)
>>>            (u8 *)bp->arr, len);                    \
>>>   } while (0)
>>> -static int bnx2x_init_firmware(struct bnx2x *bp)
>>> +int bnx2x_init_firmware(struct bnx2x *bp)
>>>   {
>>> -    const char *fw_file_name;
>>> +    const char *fw_file_name, *fw_file_name_v15;
>>>       struct bnx2x_fw_file_hdr *fw_hdr;
>>>       int rc;
>>>       if (bp->firmware)
>>>           return 0;
>>> -    if (CHIP_IS_E1(bp))
>>> +    if (CHIP_IS_E1(bp)) {
>>>           fw_file_name = FW_FILE_NAME_E1;
>>> -    else if (CHIP_IS_E1H(bp))
>>> +        fw_file_name_v15 = FW_FILE_NAME_E1_V15;
>>> +    } else if (CHIP_IS_E1H(bp)) {
>>>           fw_file_name = FW_FILE_NAME_E1H;
>>> -    else if (!CHIP_IS_E1x(bp))
>>> +        fw_file_name_v15 = FW_FILE_NAME_E1H_V15;
>>> +    } else if (!CHIP_IS_E1x(bp)) {
>>>           fw_file_name = FW_FILE_NAME_E2;
>>> -    else {
>>> +        fw_file_name_v15 = FW_FILE_NAME_E2_V15;
>>> +    } else {
>>>           BNX2X_ERR("Unsupported chip revision\n");
>>>           return -EINVAL;
>>>       }
>>> +
>>>       BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
>>>       rc = request_firmware(&bp->firmware, fw_file_name,
>>> &bp->pdev->dev);
>>>       if (rc) {
>>> -        BNX2X_ERR("Can't load firmware file %s\n",
>>> -              fw_file_name);
>>> -        goto request_firmware_exit;
>>> +        BNX2X_DEV_INFO("Trying to load older fw %s\n",
>>> fw_file_name_v15);
>>> +
>>> +        /* try to load prev version */
>>> +        rc = request_firmware(&bp->firmware, fw_file_name_v15,
>>> &bp->pdev->dev);
>>> +
>>> +        if (rc)
>>> +            goto request_firmware_exit;
>>> +
>>> +        bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15;
>>> +    } else {
>>> +        bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI;
>>> +        bp->fw_rev = BCM_5710_FW_REVISION_VERSION;
>>>       }
>>> +    bp->fw_major = BCM_5710_FW_MAJOR_VERSION;
>>> +    bp->fw_minor = BCM_5710_FW_MINOR_VERSION;
>>> +    bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION;
>>> +
>>>       rc = bnx2x_check_firmware(bp);
>>>       if (rc) {
>>>           BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name);
>>> @@ -13487,7 +13517,7 @@ static int bnx2x_init_firmware(struct bnx2x *bp)
>>>       return rc;
>>>   }
>>> -static void bnx2x_release_firmware(struct bnx2x *bp)
>>> +void bnx2x_release_firmware(struct bnx2x *bp)
>>>   {
>>>       kfree(bp->init_ops_offsets);
>>>       kfree(bp->init_ops);
>>> @@ -14004,6 +14034,7 @@ static int bnx2x_init_one(struct pci_dev *pdev,
>>>       return 0;
>>>   init_one_freemem:
>>> +    bnx2x_release_firmware(bp);
>>>       bnx2x_free_mem_bp(bp);
>>>   init_one_exit:
>>
>> This change was added to Linux in commit b7a49f73059f (bnx2x: Utilize
>> firmware 7.13.21.0) [1] to Linux v5.17-rc1, and backported to the
>> stable series, for example, Linux v5.10.95.
>>
>> Due to CVE-2022-0847 (Dirty Pipe) [1] we updated systems from, for
>> example, Linux 5.10.24 to Linux v5.10.103 and noticed that the
>> Broadcom network devices failed to initialize.
>>
>> ```
>> [   20.477325] bnx2 0000:02:00.0 eth0: Broadcom NetXtreme II BCM5709
>> 1000Base-T (C0) PCI Express found at mem c6000000, IRQ 41, node addr
>> c8:1f:66:cf:34:45
>> [   20.491782] bnx2 0000:02:00.1 eth1: Broadcom NetXtreme II BCM5709
>> 1000Base-T (C0) PCI Express found at mem c8000000, IRQ 42, node addr
>> c8:1f:66:cf:34:47
>> [   20.506223] bnx2 0000:03:00.0 eth2: Broadcom NetXtreme II BCM5709
>> 1000Base-T (C0) PCI Express found at mem ca000000, IRQ 43, node addr
>> c8:1f:66:cf:34:49
>> [   20.520644] bnx2 0000:03:00.1 eth3: Broadcom NetXtreme II BCM5709
>> 1000Base-T (C0) PCI Express found at mem cc000000, IRQ 44, node addr
>> c8:1f:66:cf:34:4b
>> [   20.534985] bnx2x 0000:45:00.0: msix capability found
>> [   20.540342] bnx2x 0000:45:00.0: part number
>> 394D4342-31373735-31314131-473331
>> [   20.548605] bnx2x 0000:45:00.0: Direct firmware load for
>> bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
>> [   20.558373] bnx2x 0000:45:00.0: Direct firmware load for
>> bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
>> [   20.568319] bnx2x: probe of 0000:45:00.0 failed with error -2
>> [   20.574148] bnx2x 0000:45:00.1: msix capability found
>> [   20.579470] bnx2x 0000:45:00.1: part number
>> 394D4342-31373735-31314131-473331
>> [   20.587708] bnx2x 0000:45:00.1: Direct firmware load for
>> bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
>> [   20.597479] bnx2x 0000:45:00.1: Direct firmware load for
>> bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
>> [   20.607355] bnx2x: probe of 0000:45:00.1 failed with error -2
>> [   20.613179] bnx2x 0000:46:00.0: msix capability found
>> [   20.618501] bnx2x 0000:46:00.0: part number
>> 394D4342-31373735-31314131-473331
>> [   20.626805] bnx2x 0000:46:00.0: Direct firmware load for
>> bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
>> [   20.636580] bnx2x 0000:46:00.0: Direct firmware load for
>> bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
>> [   20.646453] bnx2x: probe of 0000:46:00.0 failed with error -2
>> [   20.652279] bnx2x 0000:46:00.1: msix capability found
>> [   20.657593] bnx2x 0000:46:00.1: part number
>> 394D4342-31373735-31314131-473331
>> [   20.665813] bnx2x 0000:46:00.1: Direct firmware load for
>> bnx2x/bnx2x-e1h-7.13.21.0.fw failed with error -2
>> [   20.675590] bnx2x 0000:46:00.1: Direct firmware load for
>> bnx2x/bnx2x-e1h-7.13.15.0.fw failed with error -2
>> [   20.685457] bnx2x: probe of 0000:46:00.1 failed with error -2
>> ```
>>
>> Due to reasons, we do not have firmware in the initrd, and the
>> firmware files are only the root partition in `/lib/firmware`.
>>
>> Logging in using other means¹, and removing the PCI devices, and
>> rescanning brought the devices online.
>>
>>      $ echo 1 | sudo tee
>> /sys/bus/pci/devices/0000\:{45,46}\:00.{0,1}/remove
>>      $ echo 1 | sudo tee /sys/bus/pci/rescan
>>
>> Adding the firmware files to the initrd also fixes the issue.
>>
>> I didn’t bisect the change, but the refactoring of the init methods
>> looks like the reason for the change of behavior. As it’s not
>> documented in the commit message, was that intentional? I think it
>> breaks Linux’ no regression rule, and the commit (and follow-ups)
>> should be reverted in Linux v5.17 and backed out from the stable series.
>>
>>
>> Kind regards,
>>
>> Paul
>>
>>
>> ¹ Why can’t even without proper firmware, the card at least initialize
>> to have a network connection even if it’s degraded?
> 
>
Jakub Kicinski March 10, 2022, 6:52 p.m. UTC | #11
On Wed, 9 Mar 2022 14:18:23 -0800 Linus Torvalds wrote:
> On Wed, Mar 9, 2022 at 11:46 AM Manish Chopra <manishc@marvell.com> wrote:
> >
> > This has not changed anything functionally from driver/device perspective, FW is still being loaded only when device is opened.
> > bnx2x_init_firmware() [I guess, perhaps the name is misleading] just request_firmware() to prepare the metadata to be used when device will be opened.  
> 
> So how do you explain the report by Paul Menzel that things used to
> work and no longer work now?
> 
> You can't do request_firmware() early. When you actually then push the
> firmware to the device is immaterial - but request_firmware() has to
> be done after the system is up and running.

Alright, I don't see a revert in my inbox, so let me double check.

Linus, is my understanding correct that our PR today should revert
the patches in question [1]? Or just drop them from stable for now, 
and give Manish and team a week or two to figure out a fix?

Manish, assuming it's the former - would you be able to provide 
a revert within a couple of hours (and I mean  "couple" in the 
British sense of roughly two)?

[1] patches in question:
 e13ad1443684 ("bnx2x: fix driver load from initrd")
 802d4d207e75 ("bnx2x: Invalidate fastpath HSI version for VFs")
 b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0")
Ariel Elior March 10, 2022, 7:14 p.m. UTC | #12
> On Wed, 9 Mar 2022 14:18:23 -0800 Linus Torvalds wrote:
> > On Wed, Mar 9, 2022 at 11:46 AM Manish Chopra <manishc@marvell.com>
> wrote:
> > >
> > > This has not changed anything functionally from driver/device
> perspective, FW is still being loaded only when device is opened.
> > > bnx2x_init_firmware() [I guess, perhaps the name is misleading] just
> request_firmware() to prepare the metadata to be used when device will be
> opened.
> >
> > So how do you explain the report by Paul Menzel that things used to
> > work and no longer work now?
> >
> > You can't do request_firmware() early. When you actually then push the
> > firmware to the device is immaterial - but request_firmware() has to
> > be done after the system is up and running.
> 
> Alright, I don't see a revert in my inbox, so let me double check.
> 
> Linus, is my understanding correct that our PR today should revert
> the patches in question [1]? Or just drop them from stable for now,
> and give Manish and team a week or two to figure out a fix?
> 
> Manish, assuming it's the former - would you be able to provide
> a revert within a couple of hours (and I mean  "couple" in the
> British sense of roughly two)?
> 
> [1] patches in question:
>  e13ad1443684 ("bnx2x: fix driver load from initrd")
>  802d4d207e75 ("bnx2x: Invalidate fastpath HSI version for VFs")
>  b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0")

Jackub, assuming former, we won't be able to provide anything in a couple of hours
(already past midnight in Manish's part of the world).

Linus, does "you can't request FW before system is up and running" translate to
"don't request FW at probe" or "don't request FW in initrd?"
Or perhaps something else?

Thanks,
Ariel
Jakub Kicinski March 10, 2022, 7:42 p.m. UTC | #13
On Thu, 10 Mar 2022 19:14:14 +0000 Ariel Elior wrote:
> Jackub, assuming former, we won't be able to provide anything in a couple of hours
> (already past midnight in Manish's part of the world).

No worries, I can take care of it myself, given the short notice.
Manish Chopra March 11, 2022, 12:11 p.m. UTC | #14
> -----Original Message-----
> From: Linus Torvalds <torvalds@linux-foundation.org>
> Sent: Thursday, March 10, 2022 3:48 AM
> To: Manish Chopra <manishc@marvell.com>
> Cc: Paul Menzel <pmenzel@molgen.mpg.de>; kuba@kernel.org;
> netdev@vger.kernel.org; Ariel Elior <aelior@marvell.com>; Alok Prasad
> <palok@marvell.com>; Prabhakar Kushwaha <pkushwaha@marvell.com>;
> David S. Miller <davem@davemloft.net>; Greg KH
> <gregkh@linuxfoundation.org>; stable@vger.kernel.org;
> it+netdev@molgen.mpg.de; regressions@lists.linux.dev
> Subject: Re: [EXT] Re: [PATCH v2 net-next 1/2] bnx2x: Utilize firmware
> 7.13.21.0
> 
> On Wed, Mar 9, 2022 at 11:46 AM Manish Chopra <manishc@marvell.com>
> wrote:
> >
> > This has not changed anything functionally from driver/device perspective,
> FW is still being loaded only when device is opened.
> > bnx2x_init_firmware() [I guess, perhaps the name is misleading] just
> request_firmware() to prepare the metadata to be used when device will be
> opened.
> 
> So how do you explain the report by Paul Menzel that things used to work and
> no longer work now?
> 

The issue which Paul mentioned had to do with "/lib/firmware/bnx2x/* file not found" when driver probes, which was introduced by the patch in subject,
And the commit e13ad1443684 ("bnx2x: fix driver load from initrd") fixes this issue. So things should work as it is with the mentioned fixed commit.
The only discussion led by this problem now is why the request_firmware() was moved early on [from open() to probe()] by the patch in subject.
I explained the intention to do this in my earlier emails and let me add more details below - 

Note that we have just moved request_firmware() logic, *not* something significant which has to do with actual FW loading or device initialization from the
FW file data which could cause significant functional change for this device/driver, FW load/init part still stays in open flow.

Before the patch in subject, driver used to only work with fixed/specific FW version file whose version was statically known to the driver function at probe() time to take
some decision to fail the function probe early in the system if the function is supposed to run with a FW version which is not the same version loaded on the device by another PF (different ENV).
Now when we sent this new FW patch (in subject) then we got feedback from community to maintain backward compatibility with older FW versions as well and we did it in same v2 patch legitimately,
just that now we can work with both older or newer FW file so we need this run time FW version information to cache (based on request_firmware() return success value for an old FW file or new FW file)
which will be used in follow up probe() flows to decide the function probe failure early If there could be FW version mismatches against the loaded FW on the device by other PFs already

So we need to understand why we should not call request_firmware() in probe or at least what's really harmful in doing that in probe() if some of the follow up probe flows needs
some of the metadata info (like the run time FW versions info in this case which we get based on request_firmware() return value), we could avoid this but we don't want
to add some ugly/unsuitable file APIs checks to know which FW version file is available on the file system if there is already an API request_firmware() available for this to be used.

Please let us know. Thanks.

> You can't do request_firmware() early. When you actually then push the
> firmware to the device is immaterial - but request_firmware() has to be done
> after the system is up and running.
> 
>                  Linus
Greg KH March 11, 2022, 12:52 p.m. UTC | #15
On Fri, Mar 11, 2022 at 12:11:45PM +0000, Manish Chopra wrote:
> > -----Original Message-----
> > From: Linus Torvalds <torvalds@linux-foundation.org>
> > Sent: Thursday, March 10, 2022 3:48 AM
> > To: Manish Chopra <manishc@marvell.com>
> > Cc: Paul Menzel <pmenzel@molgen.mpg.de>; kuba@kernel.org;
> > netdev@vger.kernel.org; Ariel Elior <aelior@marvell.com>; Alok Prasad
> > <palok@marvell.com>; Prabhakar Kushwaha <pkushwaha@marvell.com>;
> > David S. Miller <davem@davemloft.net>; Greg KH
> > <gregkh@linuxfoundation.org>; stable@vger.kernel.org;
> > it+netdev@molgen.mpg.de; regressions@lists.linux.dev
> > Subject: Re: [EXT] Re: [PATCH v2 net-next 1/2] bnx2x: Utilize firmware
> > 7.13.21.0
> > 
> > On Wed, Mar 9, 2022 at 11:46 AM Manish Chopra <manishc@marvell.com>
> > wrote:
> > >
> > > This has not changed anything functionally from driver/device perspective,
> > FW is still being loaded only when device is opened.
> > > bnx2x_init_firmware() [I guess, perhaps the name is misleading] just
> > request_firmware() to prepare the metadata to be used when device will be
> > opened.
> > 
> > So how do you explain the report by Paul Menzel that things used to work and
> > no longer work now?
> > 
> 
> The issue which Paul mentioned had to do with "/lib/firmware/bnx2x/* file not found" when driver probes, which was introduced by the patch in subject,
> And the commit e13ad1443684 ("bnx2x: fix driver load from initrd") fixes this issue. So things should work as it is with the mentioned fixed commit.
> The only discussion led by this problem now is why the request_firmware() was moved early on [from open() to probe()] by the patch in subject.
> I explained the intention to do this in my earlier emails and let me add more details below - 
> 
> Note that we have just moved request_firmware() logic, *not* something significant which has to do with actual FW loading or device initialization from the
> FW file data which could cause significant functional change for this device/driver, FW load/init part still stays in open flow.
> 
> Before the patch in subject, driver used to only work with fixed/specific FW version file whose version was statically known to the driver function at probe() time to take
> some decision to fail the function probe early in the system if the function is supposed to run with a FW version which is not the same version loaded on the device by another PF (different ENV).
> Now when we sent this new FW patch (in subject) then we got feedback from community to maintain backward compatibility with older FW versions as well and we did it in same v2 patch legitimately,
> just that now we can work with both older or newer FW file so we need this run time FW version information to cache (based on request_firmware() return success value for an old FW file or new FW file)
> which will be used in follow up probe() flows to decide the function probe failure early If there could be FW version mismatches against the loaded FW on the device by other PFs already
> 
> So we need to understand why we should not call request_firmware() in probe or at least what's really harmful in doing that in probe() if some of the follow up probe flows needs
> some of the metadata info (like the run time FW versions info in this case which we get based on request_firmware() return value), we could avoid this but we don't want
> to add some ugly/unsuitable file APIs checks to know which FW version file is available on the file system if there is already an API request_firmware() available for this to be used.
> 
> Please let us know. Thanks.

I think you are asking "why can't we call request_firmware() at probe
time?", right?

If so, try it and see why it fails.  Build the driver into the kernel
image, do not use an initramfs, and see what happens.

Again, wait until open() to call it, then you have a working userspace,
including a filesystem where the firmware actually will be.  Before
then, you might not.

thanks,

greg k-h
Paul Menzel March 11, 2022, 1 p.m. UTC | #16
Dear Manish,


As a side note, it’d be great if you could use an email client, better 
supporting quoting.


Am 11.03.22 um 13:11 schrieb Manish Chopra:
>> -----Original Message-----
>> From: Linus Torvalds <torvalds@linux-foundation.org>
>> Sent: Thursday, March 10, 2022 3:48 AM

[…]

>> On Wed, Mar 9, 2022 at 11:46 AM Manish Chopra <manishc@marvell.com>
>> wrote:
>>>
>>> This has not changed anything functionally from driver/device perspective,
>>> FW is still being loaded only when device is opened.
>>> bnx2x_init_firmware() [I guess, perhaps the name is misleading] just
>>> request_firmware() to prepare the metadata to be used when device will be
>>> opened.
>>
>> So how do you explain the report by Paul Menzel that things used to work and
>> no longer work now?
> 
> The issue which Paul mentioned had to do with "/lib/firmware/bnx2x/*
> file not found" when driver probes, which was introduced by the patch
> in subject, And the commit e13ad1443684 ("bnx2x: fix driver load from
> initrd") fixes this issue. So things should work as it is with the
> mentioned fixed commit.
No, your statement is incorrect. I already corrected it in a previous 
reply. The commit you mentioned was backported to 5.10.103. As we used 
that version, your commit was present.

> The only discussion led by this problem now is why the
> request_firmware() was moved early on [from open() to probe()] by the
> patch in subject. I explained the intention to do this in my earlier
> emails and let me add more details below -
> 
> Note that we have just moved request_firmware() logic, *not*
> something significant which has to do with actual FW loading or
> device initialization from the FW file data which could cause
> significant functional change for this device/driver, FW load/init
> part still stays in open flow.
> 
> Before the patch in subject, driver used to only work with
> fixed/specific FW version file whose version was statically known to
> the driver function at probe() time to take some decision to fail the
> function probe early in the system if the function is supposed to run
> with a FW version which is not the same version loaded on the device
> by another PF (different ENV). Now when we sent this new FW patch (in
> subject) then we got feedback from community to maintain backward
> compatibility with older FW versions as well and we did it in same v2
> patch legitimately, just that now we can work with both older or
> newer FW file so we need this run time FW version information to
> cache (based on request_firmware() return success value for an old FW
> file or new FW file) which will be used in follow up probe() flows to
> decide the function probe failure early If there could be FW version
> mismatches against the loaded FW on the device by other PFs already
> 
> So we need to understand why we should not call request_firmware() in
> probe or at least what's really harmful in doing that in probe() if
> some of the follow up probe flows needs some of the metadata info
> (like the run time FW versions info in this case which we get based
> on request_firmware() return value), we could avoid this but we don't
> want to add some ugly/unsuitable file APIs checks to know which FW
> version file is available on the file system if there is already an
> API request_firmware() available for this to be used.
Your patches broke loading the driver, and as a result – as seen from 
the pastes I provided – the network devices were not functional.

> Please let us know. Thanks.
> 
>> You can't do request_firmware() early. When you actually then push the
>> firmware to the device is immaterial - but request_firmware() has to be done
>> after the system is up and running.


Kind regards,

Paul
Donald Buczek March 14, 2022, 2:36 p.m. UTC | #17
Dear Manish,

On 3/11/22 1:11 PM, Manish Chopra wrote:
>> -----Original Message-----
>> From: Linus Torvalds <torvalds@linux-foundation.org>
>> Sent: Thursday, March 10, 2022 3:48 AM
>> To: Manish Chopra <manishc@marvell.com>
>> Cc: Paul Menzel <pmenzel@molgen.mpg.de>; kuba@kernel.org;
>> netdev@vger.kernel.org; Ariel Elior <aelior@marvell.com>; Alok Prasad
>> <palok@marvell.com>; Prabhakar Kushwaha <pkushwaha@marvell.com>;
>> David S. Miller <davem@davemloft.net>; Greg KH
>> <gregkh@linuxfoundation.org>; stable@vger.kernel.org;
>> it+netdev@molgen.mpg.de; regressions@lists.linux.dev
>> Subject: Re: [EXT] Re: [PATCH v2 net-next 1/2] bnx2x: Utilize firmware
>> 7.13.21.0
>>
>> On Wed, Mar 9, 2022 at 11:46 AM Manish Chopra <manishc@marvell.com>
>> wrote:
>>>
>>> This has not changed anything functionally from driver/device perspective,
>> FW is still being loaded only when device is opened.
>>> bnx2x_init_firmware() [I guess, perhaps the name is misleading] just
>> request_firmware() to prepare the metadata to be used when device will be
>> opened.
>>
>> So how do you explain the report by Paul Menzel that things used to work and
>> no longer work now?
>>
> 
> The issue which Paul mentioned had to do with "/lib/firmware/bnx2x/* file not found" when driver probes, which was introduced by the patch in subject,
> And the commit e13ad1443684 ("bnx2x: fix driver load from initrd") fixes this issue. So things should work as it is with the mentioned fixed commit.
> The only discussion led by this problem now is why the request_firmware() was moved early on [from open() to probe()] by the patch in subject.
> I explained the intention to do this in my earlier emails and let me add more details below -
> 
> Note that we have just moved request_firmware() logic, *not* something significant which has to do with actual FW loading or device initialization from the
> FW file data which could cause significant functional change for this device/driver, FW load/init part still stays in open flow.
> 
> Before the patch in subject, driver used to only work with fixed/specific FW version file whose version was statically known to the driver function at probe() time to take
> some decision to fail the function probe early in the system if the function is supposed to run with a FW version which is not the same version loaded on the device by another PF (different ENV).
> Now when we sent this new FW patch (in subject) then we got feedback from community to maintain backward compatibility with older FW versions as well and we did it in same v2 patch legitimately,
> just that now we can work with both older or newer FW file so we need this run time FW version information to cache (based on request_firmware() return success value for an old FW file or new FW file)
> which will be used in follow up probe() flows to decide the function probe failure early If there could be FW version mismatches against the loaded FW on the device by other PFs already

There might be something more wrong with the patch in the subject: The usability of the ports from a single card (with older firmware?) now depends on the order the ports are enabled (first port enabled is working, second port enabled is not working, driver complaining about a firmware mismatch).

In the following examples, the driver was not built-in to the kernel but loaded from the root filesystem instead, so there is no initramfs related problem here.

For the records:

root@ira:~# dmesg|grep bnx2x
[   18.749871] bnx2x 0000:45:00.0: msix capability found
[   18.766534] bnx2x 0000:45:00.0: part number 394D4342-31373735-31314131-473331
[   18.799198] bnx2x 0000:45:00.0: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
[   18.807638] bnx2x 0000:45:00.1: msix capability found
[   18.824509] bnx2x 0000:45:00.1: part number 394D4342-31373735-31314131-473331
[   18.857171] bnx2x 0000:45:00.1: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
[   18.865619] bnx2x 0000:46:00.0: msix capability found
[   18.882636] bnx2x 0000:46:00.0: part number 394D4342-31373735-31314131-473331
[   18.915196] bnx2x 0000:46:00.0: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
[   18.923636] bnx2x 0000:46:00.1: msix capability found
[   18.940505] bnx2x 0000:46:00.1: part number 394D4342-31373735-31314131-473331
[   18.973167] bnx2x 0000:46:00.1: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
[   46.480660] bnx2x 0000:45:00.0 net04: renamed from eth4
[   46.494677] bnx2x 0000:45:00.1 net05: renamed from eth5
[   46.508544] bnx2x 0000:46:00.0 net06: renamed from eth6
[   46.524641] bnx2x 0000:46:00.1 net07: renamed from eth7
root@ira:~# ls /lib/firmware/bnx2x/
bnx2x-e1-6.0.34.0.fw   bnx2x-e1-7.13.1.0.fw   bnx2x-e1-7.8.2.0.fw     bnx2x-e1h-7.12.30.0.fw  bnx2x-e1h-7.8.19.0.fw  bnx2x-e2-7.10.51.0.fw  bnx2x-e2-7.8.17.0.fw
bnx2x-e1-6.2.5.0.fw    bnx2x-e1-7.13.11.0.fw  bnx2x-e1h-6.0.34.0.fw   bnx2x-e1h-7.13.1.0.fw   bnx2x-e1h-7.8.2.0.fw   bnx2x-e2-7.12.30.0.fw  bnx2x-e2-7.8.19.0.fw
bnx2x-e1-6.2.9.0.fw    bnx2x-e1-7.13.15.0.fw  bnx2x-e1h-6.2.5.0.fw    bnx2x-e1h-7.13.11.0.fw  bnx2x-e2-6.0.34.0.fw   bnx2x-e2-7.13.1.0.fw   bnx2x-e2-7.8.2.0.fw
bnx2x-e1-7.0.20.0.fw   bnx2x-e1-7.13.21.0.fw  bnx2x-e1h-6.2.9.0.fw    bnx2x-e1h-7.13.15.0.fw  bnx2x-e2-6.2.5.0.fw    bnx2x-e2-7.13.11.0.fw
bnx2x-e1-7.0.23.0.fw   bnx2x-e1-7.2.16.0.fw   bnx2x-e1h-7.0.20.0.fw   bnx2x-e1h-7.13.21.0.fw  bnx2x-e2-6.2.9.0.fw    bnx2x-e2-7.13.15.0.fw
bnx2x-e1-7.0.29.0.fw   bnx2x-e1-7.2.51.0.fw   bnx2x-e1h-7.0.23.0.fw   bnx2x-e1h-7.2.16.0.fw   bnx2x-e2-7.0.20.0.fw   bnx2x-e2-7.13.21.0.fw
bnx2x-e1-7.10.51.0.fw  bnx2x-e1-7.8.17.0.fw   bnx2x-e1h-7.0.29.0.fw   bnx2x-e1h-7.2.51.0.fw   bnx2x-e2-7.0.23.0.fw   bnx2x-e2-7.2.16.0.fw
bnx2x-e1-7.12.30.0.fw  bnx2x-e1-7.8.19.0.fw   bnx2x-e1h-7.10.51.0.fw  bnx2x-e1h-7.8.17.0.fw   bnx2x-e2-7.0.29.0.fw   bnx2x-e2-7.2.51.0.fw

Now with v5.10.95, the first kernel of the series which includes fdcfabd0952d ("bnx2x: Utilize firmware 7.13.21.0") and later:

root@ira:~# dmesg -w &
[...]
root@ira:~# ip link set net04 up
[   88.504536] bnx2x 0000:45:00.0 net04: using MSI-X  IRQs: sp 47  fp[0] 49 ... fp[7] 56
root@ira:~# ip link set net05 up
[   90.825820] bnx2x: [bnx2x_compare_fw_ver:2380(net05)]bnx2x with FW 120d07 was already loaded which mismatches my 150d07 FW. Aborting
RTNETLINK answers: Device or resource busy
root@ira:~# ip link set net04 down
root@ira:~# ip link set net05 down
root@ira:~# ip link set net05 up
[  114.462448] bnx2x 0000:45:00.1 net05: using MSI-X  IRQs: sp 58  fp[0] 60 ... fp[7] 67
root@ira:~# ip link set net04 up
[  117.247763] bnx2x: [bnx2x_compare_fw_ver:2380(net04)]bnx2x with FW 120d07 was already loaded which mismatches my 150d07 FW. Aborting
RTNETLINK answers: Device or resource busy

With v5.10.94, both ports work fine:

root@ira:~# dmesg -w &
[...]
root@ira:~# ip link set net04 up
[  133.126647] bnx2x 0000:45:00.0 net04: using MSI-X  IRQs: sp 47  fp[0] 49 ... fp[7] 56
root@ira:~# ip link set net05 up
[  136.215169] bnx2x 0000:45:00.1 net05: using MSI-X  IRQs: sp 58  fp[0] 60 ... fp[7] 67

Best
   Donald


> So we need to understand why we should not call request_firmware() in probe or at least what's really harmful in doing that in probe() if some of the follow up probe flows needs
> some of the metadata info (like the run time FW versions info in this case which we get based on request_firmware() return value), we could avoid this but we don't want
> to add some ugly/unsuitable file APIs checks to know which FW version file is available on the file system if there is already an API request_firmware() available for this to be used.
> 
> Please let us know. Thanks.
> 
>> You can't do request_firmware() early. When you actually then push the
>> firmware to the device is immaterial - but request_firmware() has to be done
>> after the system is up and running.
>>
>>                   Linus
>
Paul Menzel March 14, 2022, 3:07 p.m. UTC | #18
[Use Jakub’s current address]

Dear Manish,


Am 14.03.22 um 15:36 schrieb Donald Buczek:

> On 3/11/22 1:11 PM, Manish Chopra wrote:
>>> -----Original Message-----
>>> From: Linus Torvalds <torvalds@linux-foundation.org>
>>> Sent: Thursday, March 10, 2022 3:48 AM

[…]

>>> On Wed, Mar 9, 2022 at 11:46 AM Manish Chopra wrote:
>>>>
>>>> This has not changed anything functionally from driver/device 
>>>> perspective,
>>> FW is still being loaded only when device is opened.
>>>> bnx2x_init_firmware() [I guess, perhaps the name is misleading] just
>>> request_firmware() to prepare the metadata to be used when device 
>>> will be opened.
>>>
>>> So how do you explain the report by Paul Menzel that things used to 
>>> work and no longer work now?
>>>
>>
>> The issue which Paul mentioned had to do with "/lib/firmware/bnx2x/* 
>> file not found" when driver probes, which was introduced by the patch 
>> in subject,
>> And the commit e13ad1443684 ("bnx2x: fix driver load from initrd") 
>> fixes this issue. So things should work as it is with the mentioned 
>> fixed commit.
>> The only discussion led by this problem now is why the 
>> request_firmware() was moved early on [from open() to probe()] by the 
>> patch in subject.
>> I explained the intention to do this in my earlier emails and let me 
>> add more details below -
>>
>> Note that we have just moved request_firmware() logic, *not* something 
>> significant which has to do with actual FW loading or device 
>> initialization from the
>> FW file data which could cause significant functional change for this 
>> device/driver, FW load/init part still stays in open flow.
>>
>> Before the patch in subject, driver used to only work with 
>> fixed/specific FW version file whose version was statically known to 
>> the driver function at probe() time to take
>> some decision to fail the function probe early in the system if the 
>> function is supposed to run with a FW version which is not the same 
>> version loaded on the device by another PF (different ENV).
>> Now when we sent this new FW patch (in subject) then we got feedback 
>> from community to maintain backward compatibility with older FW 
>> versions as well and we did it in same v2 patch legitimately,
>> just that now we can work with both older or newer FW file so we need 
>> this run time FW version information to cache (based on 
>> request_firmware() return success value for an old FW file or new FW 
>> file)
>> which will be used in follow up probe() flows to decide the function 
>> probe failure early If there could be FW version mismatches against 
>> the loaded FW on the device by other PFs already
> 
> There might be something more wrong with the patch in the subject: The 
> usability of the ports from a single card (with older firmware?) now 
> depends on the order the ports are enabled (first port enabled is 
> working, second port enabled is not working, driver complaining about a 
> firmware mismatch).
> 
> In the following examples, the driver was not built-in to the kernel but 
> loaded from the root filesystem instead, so there is no initramfs 
> related problem here.
> 
> For the records:
> 
> root@ira:~# dmesg|grep bnx2x
> [   18.749871] bnx2x 0000:45:00.0: msix capability found
> [   18.766534] bnx2x 0000:45:00.0: part number 394D4342-31373735-31314131-473331
> [   18.799198] bnx2x 0000:45:00.0: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
> [   18.807638] bnx2x 0000:45:00.1: msix capability found
> [   18.824509] bnx2x 0000:45:00.1: part number 394D4342-31373735-31314131-473331
> [   18.857171] bnx2x 0000:45:00.1: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
> [   18.865619] bnx2x 0000:46:00.0: msix capability found
> [   18.882636] bnx2x 0000:46:00.0: part number 394D4342-31373735-31314131-473331
> [   18.915196] bnx2x 0000:46:00.0: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
> [   18.923636] bnx2x 0000:46:00.1: msix capability found
> [   18.940505] bnx2x 0000:46:00.1: part number 394D4342-31373735-31314131-473331
> [   18.973167] bnx2x 0000:46:00.1: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
> [   46.480660] bnx2x 0000:45:00.0 net04: renamed from eth4
> [   46.494677] bnx2x 0000:45:00.1 net05: renamed from eth5
> [   46.508544] bnx2x 0000:46:00.0 net06: renamed from eth6
> [   46.524641] bnx2x 0000:46:00.1 net07: renamed from eth7
> root@ira:~# ls /lib/firmware/bnx2x/
> bnx2x-e1-6.0.34.0.fw   bnx2x-e1-7.13.1.0.fw   bnx2x-e1-7.8.2.0.fw     
> bnx2x-e1h-7.12.30.0.fw  bnx2x-e1h-7.8.19.0.fw  bnx2x-e2-7.10.51.0.fw  bnx2x-e2-7.8.17.0.fw
> bnx2x-e1-6.2.5.0.fw    bnx2x-e1-7.13.11.0.fw  bnx2x-e1h-6.0.34.0.fw   
> bnx2x-e1h-7.13.1.0.fw   bnx2x-e1h-7.8.2.0.fw   bnx2x-e2-7.12.30.0.fw  bnx2x-e2-7.8.19.0.fw
> bnx2x-e1-6.2.9.0.fw    bnx2x-e1-7.13.15.0.fw  bnx2x-e1h-6.2.5.0.fw    
> bnx2x-e1h-7.13.11.0.fw  bnx2x-e2-6.0.34.0.fw   bnx2x-e2-7.13.1.0.fw   bnx2x-e2-7.8.2.0.fw
> bnx2x-e1-7.0.20.0.fw   bnx2x-e1-7.13.21.0.fw  bnx2x-e1h-6.2.9.0.fw    
> bnx2x-e1h-7.13.15.0.fw  bnx2x-e2-6.2.5.0.fw    bnx2x-e2-7.13.11.0.fw
> bnx2x-e1-7.0.23.0.fw   bnx2x-e1-7.2.16.0.fw   bnx2x-e1h-7.0.20.0.fw   
> bnx2x-e1h-7.13.21.0.fw  bnx2x-e2-6.2.9.0.fw    bnx2x-e2-7.13.15.0.fw
> bnx2x-e1-7.0.29.0.fw   bnx2x-e1-7.2.51.0.fw   bnx2x-e1h-7.0.23.0.fw   
> bnx2x-e1h-7.2.16.0.fw   bnx2x-e2-7.0.20.0.fw   bnx2x-e2-7.13.21.0.fw
> bnx2x-e1-7.10.51.0.fw  bnx2x-e1-7.8.17.0.fw   bnx2x-e1h-7.0.29.0.fw   
> bnx2x-e1h-7.2.51.0.fw   bnx2x-e2-7.0.23.0.fw   bnx2x-e2-7.2.16.0.fw
> bnx2x-e1-7.12.30.0.fw  bnx2x-e1-7.8.19.0.fw   bnx2x-e1h-7.10.51.0.fw  
> bnx2x-e1h-7.8.17.0.fw   bnx2x-e2-7.0.29.0.fw   bnx2x-e2-7.2.51.0.fw
> 
> Now with v5.10.95, the first kernel of the series which includes 
> fdcfabd0952d ("bnx2x: Utilize firmware 7.13.21.0") and later:
> 
> root@ira:~# dmesg -w &
> [...]
> root@ira:~# ip link set net04 up
> [   88.504536] bnx2x 0000:45:00.0 net04: using MSI-X  IRQs: sp 47  fp[0] 49 ... fp[7] 56
> root@ira:~# ip link set net05 up
> [   90.825820] bnx2x: [bnx2x_compare_fw_ver:2380(net05)]bnx2x with FW 120d07 was already loaded which mismatches my 150d07 FW. Aborting
> RTNETLINK answers: Device or resource busy
> root@ira:~# ip link set net04 down
> root@ira:~# ip link set net05 down
> root@ira:~# ip link set net05 up
> [  114.462448] bnx2x 0000:45:00.1 net05: using MSI-X  IRQs: sp 58  fp[0] 60 ... fp[7] 67
> root@ira:~# ip link set net04 up
> [  117.247763] bnx2x: [bnx2x_compare_fw_ver:2380(net04)]bnx2x with FW 120d07 was already loaded which mismatches my 150d07 FW. Aborting
> RTNETLINK answers: Device or resource busy
> 
> With v5.10.94, both ports work fine:
> 
> root@ira:~# dmesg -w &
> [...]
> root@ira:~# ip link set net04 up
> [  133.126647] bnx2x 0000:45:00.0 net04: using MSI-X  IRQs: sp 47  fp[0] 49 ... fp[7] 56
> root@ira:~# ip link set net05 up
> [  136.215169] bnx2x 0000:45:00.1 net05: using MSI-X  IRQs: sp 58  fp[0] 60 ... fp[7] 67

One additional note, that it’s totally unclear to us, where FW version 
120d07 in the error message comes from. It maps to 7.13.18.0, which is 
nowhere to be found and too new to be on the cards EEPROM, which should 
be from 2013 or so.


Kind regards,

Paul


>> So we need to understand why we should not call request_firmware() in 
>> probe or at least what's really harmful in doing that in probe() if 
>> some of the follow up probe flows needs
>> some of the metadata info (like the run time FW versions info in this 
>> case which we get based on request_firmware() return value), we could 
>> avoid this but we don't want
>> to add some ugly/unsuitable file APIs checks to know which FW version 
>> file is available on the file system if there is already an API 
>> request_firmware() available for this to be used.
>>
>> Please let us know. Thanks.
>>
>>> You can't do request_firmware() early. When you actually then push the
>>> firmware to the device is immaterial - but request_firmware() has to 
>>> be done
>>> after the system is up and running.
>>>
>>>                   Linus
Jakub Kicinski March 15, 2022, 2:57 a.m. UTC | #19
On Mon, 14 Mar 2022 16:07:08 +0100 Paul Menzel wrote:
> > There might be something more wrong with the patch in the subject: The 
> > usability of the ports from a single card (with older firmware?) now 
> > depends on the order the ports are enabled (first port enabled is 
> > working, second port enabled is not working, driver complaining about a 
> > firmware mismatch).
> > 
> > In the following examples, the driver was not built-in to the kernel but 
> > loaded from the root filesystem instead, so there is no initramfs 
> > related problem here.
> > 
> > For the records:
> > 
> > root@ira:~# dmesg|grep bnx2x
> > [   18.749871] bnx2x 0000:45:00.0: msix capability found
> > [   18.766534] bnx2x 0000:45:00.0: part number 394D4342-31373735-31314131-473331
> > [   18.799198] bnx2x 0000:45:00.0: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
> > [   18.807638] bnx2x 0000:45:00.1: msix capability found
> > [   18.824509] bnx2x 0000:45:00.1: part number 394D4342-31373735-31314131-473331
> > [   18.857171] bnx2x 0000:45:00.1: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
> > [   18.865619] bnx2x 0000:46:00.0: msix capability found
> > [   18.882636] bnx2x 0000:46:00.0: part number 394D4342-31373735-31314131-473331
> > [   18.915196] bnx2x 0000:46:00.0: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
> > [   18.923636] bnx2x 0000:46:00.1: msix capability found
> > [   18.940505] bnx2x 0000:46:00.1: part number 394D4342-31373735-31314131-473331
> > [   18.973167] bnx2x 0000:46:00.1: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
> > [   46.480660] bnx2x 0000:45:00.0 net04: renamed from eth4
> > [   46.494677] bnx2x 0000:45:00.1 net05: renamed from eth5
> > [   46.508544] bnx2x 0000:46:00.0 net06: renamed from eth6
> > [   46.524641] bnx2x 0000:46:00.1 net07: renamed from eth7
> > root@ira:~# ls /lib/firmware/bnx2x/
> > bnx2x-e1-6.0.34.0.fw   bnx2x-e1-7.13.1.0.fw   bnx2x-e1-7.8.2.0.fw     
> > bnx2x-e1h-7.12.30.0.fw  bnx2x-e1h-7.8.19.0.fw  bnx2x-e2-7.10.51.0.fw  bnx2x-e2-7.8.17.0.fw
> > bnx2x-e1-6.2.5.0.fw    bnx2x-e1-7.13.11.0.fw  bnx2x-e1h-6.0.34.0.fw   
> > bnx2x-e1h-7.13.1.0.fw   bnx2x-e1h-7.8.2.0.fw   bnx2x-e2-7.12.30.0.fw  bnx2x-e2-7.8.19.0.fw
> > bnx2x-e1-6.2.9.0.fw    bnx2x-e1-7.13.15.0.fw  bnx2x-e1h-6.2.5.0.fw    
> > bnx2x-e1h-7.13.11.0.fw  bnx2x-e2-6.0.34.0.fw   bnx2x-e2-7.13.1.0.fw   bnx2x-e2-7.8.2.0.fw
> > bnx2x-e1-7.0.20.0.fw   bnx2x-e1-7.13.21.0.fw  bnx2x-e1h-6.2.9.0.fw    
> > bnx2x-e1h-7.13.15.0.fw  bnx2x-e2-6.2.5.0.fw    bnx2x-e2-7.13.11.0.fw
> > bnx2x-e1-7.0.23.0.fw   bnx2x-e1-7.2.16.0.fw   bnx2x-e1h-7.0.20.0.fw   
> > bnx2x-e1h-7.13.21.0.fw  bnx2x-e2-6.2.9.0.fw    bnx2x-e2-7.13.15.0.fw
> > bnx2x-e1-7.0.29.0.fw   bnx2x-e1-7.2.51.0.fw   bnx2x-e1h-7.0.23.0.fw   
> > bnx2x-e1h-7.2.16.0.fw   bnx2x-e2-7.0.20.0.fw   bnx2x-e2-7.13.21.0.fw
> > bnx2x-e1-7.10.51.0.fw  bnx2x-e1-7.8.17.0.fw   bnx2x-e1h-7.0.29.0.fw   
> > bnx2x-e1h-7.2.51.0.fw   bnx2x-e2-7.0.23.0.fw   bnx2x-e2-7.2.16.0.fw
> > bnx2x-e1-7.12.30.0.fw  bnx2x-e1-7.8.19.0.fw   bnx2x-e1h-7.10.51.0.fw  
> > bnx2x-e1h-7.8.17.0.fw   bnx2x-e2-7.0.29.0.fw   bnx2x-e2-7.2.51.0.fw
> > 
> > Now with v5.10.95, the first kernel of the series which includes 
> > fdcfabd0952d ("bnx2x: Utilize firmware 7.13.21.0") and later:
> > 
> > root@ira:~# dmesg -w &
> > [...]
> > root@ira:~# ip link set net04 up
> > [   88.504536] bnx2x 0000:45:00.0 net04: using MSI-X  IRQs: sp 47  fp[0] 49 ... fp[7] 56
> > root@ira:~# ip link set net05 up
> > [   90.825820] bnx2x: [bnx2x_compare_fw_ver:2380(net05)]bnx2x with FW 120d07 was already loaded which mismatches my 150d07 FW. Aborting
> > RTNETLINK answers: Device or resource busy
> > root@ira:~# ip link set net04 down
> > root@ira:~# ip link set net05 down
> > root@ira:~# ip link set net05 up
> > [  114.462448] bnx2x 0000:45:00.1 net05: using MSI-X  IRQs: sp 58  fp[0] 60 ... fp[7] 67
> > root@ira:~# ip link set net04 up
> > [  117.247763] bnx2x: [bnx2x_compare_fw_ver:2380(net04)]bnx2x with FW 120d07 was already loaded which mismatches my 150d07 FW. Aborting
> > RTNETLINK answers: Device or resource busy
> > 
> > With v5.10.94, both ports work fine:
> > 
> > root@ira:~# dmesg -w &
> > [...]
> > root@ira:~# ip link set net04 up
> > [  133.126647] bnx2x 0000:45:00.0 net04: using MSI-X  IRQs: sp 47  fp[0] 49 ... fp[7] 56
> > root@ira:~# ip link set net05 up
> > [  136.215169] bnx2x 0000:45:00.1 net05: using MSI-X  IRQs: sp 58  fp[0] 60 ... fp[7] 67  
> 
> One additional note, that it’s totally unclear to us, where FW version 
> 120d07 in the error message comes from. It maps to 7.13.18.0, which is 
> nowhere to be found and too new to be on the cards EEPROM, which should 
> be from 2013 or so.

Hrm, any chance an out-of-tree driver or another OS loaded it?
Does the dmesg indicate that the host loaded the FW at all?
Looks like upstream went from .15 to .21, .18 was never in the picture.

Also, will the revert work for you?
Manish Chopra March 16, 2022, 11:23 a.m. UTC | #20
Hello Paul,

> -----Original Message-----
> From: Paul Menzel <pmenzel@molgen.mpg.de>
> Sent: Monday, March 14, 2022 8:37 PM
> To: Manish Chopra <manishc@marvell.com>
> Cc: Donald Buczek <buczek@molgen.mpg.de>; Linus Torvalds
> <torvalds@linux-foundation.org>; Jakub Kicinski <kuba@kernel.org>;
> netdev@vger.kernel.org; Ariel Elior <aelior@marvell.com>; Alok Prasad
> <palok@marvell.com>; Prabhakar Kushwaha <pkushwaha@marvell.com>;
> David S. Miller <davem@davemloft.net>; Greg KH
> <gregkh@linuxfoundation.org>; stable@vger.kernel.org;
> it+netdev@molgen.mpg.de; regressions@lists.linux.dev
> Subject: Re: [EXT] Re: [PATCH v2 net-next 1/2] bnx2x: Utilize firmware
> 7.13.21.0
> 
> [Use Jakub’s current address]
> 
> Dear Manish,
> 
> 
> Am 14.03.22 um 15:36 schrieb Donald Buczek:
> 
> > On 3/11/22 1:11 PM, Manish Chopra wrote:
> >>> -----Original Message-----
> >>> From: Linus Torvalds <torvalds@linux-foundation.org>
> >>> Sent: Thursday, March 10, 2022 3:48 AM
> 
> […]
> 
> >>> On Wed, Mar 9, 2022 at 11:46 AM Manish Chopra wrote:
> >>>>
> >>>> This has not changed anything functionally from driver/device
> >>>> perspective,
> >>> FW is still being loaded only when device is opened.
> >>>> bnx2x_init_firmware() [I guess, perhaps the name is misleading]
> >>>> just
> >>> request_firmware() to prepare the metadata to be used when device
> >>> will be opened.
> >>>
> >>> So how do you explain the report by Paul Menzel that things used to
> >>> work and no longer work now?
> >>>
> >>
> >> The issue which Paul mentioned had to do with "/lib/firmware/bnx2x/*
> >> file not found" when driver probes, which was introduced by the patch
> >> in subject, And the commit e13ad1443684 ("bnx2x: fix driver load from
> >> initrd") fixes this issue. So things should work as it is with the
> >> mentioned fixed commit.
> >> The only discussion led by this problem now is why the
> >> request_firmware() was moved early on [from open() to probe()] by the
> >> patch in subject.
> >> I explained the intention to do this in my earlier emails and let me
> >> add more details below -
> >>
> >> Note that we have just moved request_firmware() logic, *not*
> >> something significant which has to do with actual FW loading or
> >> device initialization from the FW file data which could cause
> >> significant functional change for this device/driver, FW load/init
> >> part still stays in open flow.
> >>
> >> Before the patch in subject, driver used to only work with
> >> fixed/specific FW version file whose version was statically known to
> >> the driver function at probe() time to take some decision to fail the
> >> function probe early in the system if the function is supposed to run
> >> with a FW version which is not the same version loaded on the device
> >> by another PF (different ENV).
> >> Now when we sent this new FW patch (in subject) then we got feedback
> >> from community to maintain backward compatibility with older FW
> >> versions as well and we did it in same v2 patch legitimately, just
> >> that now we can work with both older or newer FW file so we need this
> >> run time FW version information to cache (based on
> >> request_firmware() return success value for an old FW file or new FW
> >> file)
> >> which will be used in follow up probe() flows to decide the function
> >> probe failure early If there could be FW version mismatches against
> >> the loaded FW on the device by other PFs already
> >
> > There might be something more wrong with the patch in the subject: The
> > usability of the ports from a single card (with older firmware?) now
> > depends on the order the ports are enabled (first port enabled is
> > working, second port enabled is not working, driver complaining about
> > a firmware mismatch).
> >
> > In the following examples, the driver was not built-in to the kernel
> > but loaded from the root filesystem instead, so there is no initramfs
> > related problem here.
> >
> > For the records:
> >
> > root@ira:~# dmesg|grep bnx2x
> > [   18.749871] bnx2x 0000:45:00.0: msix capability found [
> > 18.766534] bnx2x 0000:45:00.0: part number
> > 394D4342-31373735-31314131-473331 [   18.799198] bnx2x 0000:45:00.0:
> > 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link) [
> > 18.807638] bnx2x 0000:45:00.1: msix capability found [   18.824509]
> > bnx2x 0000:45:00.1: part number 394D4342-31373735-31314131-473331
> [
> > 18.857171] bnx2x 0000:45:00.1: 32.000 Gb/s available PCIe bandwidth
> > (5.0 GT/s PCIe x8 link) [   18.865619] bnx2x 0000:46:00.0: msix
> > capability found [   18.882636] bnx2x 0000:46:00.0: part number
> > 394D4342-31373735-31314131-473331 [   18.915196] bnx2x 0000:46:00.0:
> > 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link) [
> > 18.923636] bnx2x 0000:46:00.1: msix capability found [   18.940505]
> > bnx2x 0000:46:00.1: part number 394D4342-31373735-31314131-473331
> [
> > 18.973167] bnx2x 0000:46:00.1: 32.000 Gb/s available PCIe bandwidth
> > (5.0 GT/s PCIe x8 link) [   46.480660] bnx2x 0000:45:00.0 net04:
> > renamed from eth4 [   46.494677] bnx2x 0000:45:00.1 net05: renamed
> > from eth5 [   46.508544] bnx2x 0000:46:00.0 net06: renamed from eth6
> [   46.524641] bnx2x 0000:46:00.1 net07: renamed from eth7 root@ira:~# ls
> /lib/firmware/bnx2x/
> > bnx2x-e1-6.0.34.0.fw   bnx2x-e1-7.13.1.0.fw   bnx2x-e1-7.8.2.0.fw
> > bnx2x-e1h-7.12.30.0.fw  bnx2x-e1h-7.8.19.0.fw  bnx2x-e2-
> 7.10.51.0.fw  bnx2x-e2-7.8.17.0.fw
> > bnx2x-e1-6.2.5.0.fw    bnx2x-e1-7.13.11.0.fw  bnx2x-e1h-6.0.34.0.fw
> > bnx2x-e1h-7.13.1.0.fw   bnx2x-e1h-7.8.2.0.fw   bnx2x-e2-
> 7.12.30.0.fw  bnx2x-e2-7.8.19.0.fw
> > bnx2x-e1-6.2.9.0.fw    bnx2x-e1-7.13.15.0.fw  bnx2x-e1h-6.2.5.0.fw
> > bnx2x-e1h-7.13.11.0.fw  bnx2x-e2-6.0.34.0.fw   bnx2x-e2-
> 7.13.1.0.fw   bnx2x-e2-7.8.2.0.fw
> > bnx2x-e1-7.0.20.0.fw   bnx2x-e1-7.13.21.0.fw  bnx2x-e1h-6.2.9.0.fw
> > bnx2x-e1h-7.13.15.0.fw  bnx2x-e2-6.2.5.0.fw    bnx2x-e2-7.13.11.0.fw
> > bnx2x-e1-7.0.23.0.fw   bnx2x-e1-7.2.16.0.fw   bnx2x-e1h-7.0.20.0.fw
> > bnx2x-e1h-7.13.21.0.fw  bnx2x-e2-6.2.9.0.fw    bnx2x-e2-7.13.15.0.fw
> > bnx2x-e1-7.0.29.0.fw   bnx2x-e1-7.2.51.0.fw   bnx2x-e1h-7.0.23.0.fw
> > bnx2x-e1h-7.2.16.0.fw   bnx2x-e2-7.0.20.0.fw   bnx2x-e2-7.13.21.0.fw
> > bnx2x-e1-7.10.51.0.fw  bnx2x-e1-7.8.17.0.fw   bnx2x-e1h-7.0.29.0.fw
> > bnx2x-e1h-7.2.51.0.fw   bnx2x-e2-7.0.23.0.fw   bnx2x-e2-7.2.16.0.fw
> > bnx2x-e1-7.12.30.0.fw  bnx2x-e1-7.8.19.0.fw   bnx2x-e1h-7.10.51.0.fw
> > bnx2x-e1h-7.8.17.0.fw   bnx2x-e2-7.0.29.0.fw   bnx2x-e2-7.2.51.0.fw
> >
> > Now with v5.10.95, the first kernel of the series which includes
> > fdcfabd0952d ("bnx2x: Utilize firmware 7.13.21.0") and later:
> >
> > root@ira:~# dmesg -w &
> > [...]
> > root@ira:~# ip link set net04 up
> > [   88.504536] bnx2x 0000:45:00.0 net04: using MSI-X  IRQs: sp 47
> > fp[0] 49 ... fp[7] 56 root@ira:~# ip link set net05 up [   90.825820]
> > bnx2x: [bnx2x_compare_fw_ver:2380(net05)]bnx2x with FW 120d07 was
> > already loaded which mismatches my 150d07 FW. Aborting RTNETLINK
> > answers: Device or resource busy root@ira:~# ip link set net04 down
> > root@ira:~# ip link set net05 down root@ira:~# ip link set net05 up [
> > 114.462448] bnx2x 0000:45:00.1 net05: using MSI-X  IRQs: sp 58  fp[0]
> > 60 ... fp[7] 67 root@ira:~# ip link set net04 up [  117.247763] bnx2x:
> > [bnx2x_compare_fw_ver:2380(net04)]bnx2x with FW 120d07 was already
> > loaded which mismatches my 150d07 FW. Aborting RTNETLINK answers:
> > Device or resource busy
> >
> > With v5.10.94, both ports work fine:
> >
> > root@ira:~# dmesg -w &
> > [...]
> > root@ira:~# ip link set net04 up
> > [  133.126647] bnx2x 0000:45:00.0 net04: using MSI-X  IRQs: sp 47
> > fp[0] 49 ... fp[7] 56 root@ira:~# ip link set net05 up [  136.215169]
> > bnx2x 0000:45:00.1 net05: using MSI-X  IRQs: sp 58  fp[0] 60 ... fp[7]
> > 67
> 
> One additional note, that it’s totally unclear to us, where FW version
> 120d07 in the error message comes from. It maps to 7.13.18.0, which is
> nowhere to be found and too new to be on the cards EEPROM, which should
> be from 2013 or so.
> 

I could reproduce the earlier issue (about FW file load failure you have reported) on my 5.14.x based kernel with driver built-in the kernel (CONFIG_BNX2X=y),
which was caused due to commit b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0")

# dmesg -T | grep "Direct firmware"
[Wed Mar 16 14:11:25 2022] bnx2x 0000:13:00.0: Direct firmware load for bnx2x/bnx2x-e2-7.13.21.0.fw failed with error -2
[Wed Mar 16 14:11:25 2022] bnx2x 0000:13:00.0: Direct firmware load for bnx2x/bnx2x-e2-7.13.15.0.fw failed with error -2
[Wed Mar 16 14:11:25 2022] bnx2x 0000:13:00.1: Direct firmware load for bnx2x/bnx2x-e2-7.13.21.0.fw failed with error -2
[Wed Mar 16 14:11:25 2022] bnx2x 0000:13:00.1: Direct firmware load for bnx2x/bnx2x-e2-7.13.15.0.fw failed with error -2

After I have re-installed the kernel with the fix (which I have sent to you and list as RFC today) applied and performed cold boot/power cycle
of the the server, Later...

# dmesg -T | grep "Direct firmware"

# ethtool -i ens3f0
driver: bnx2x
version: 5.14.0+
firmware-version: mbi 7.19.2 bc 7.16.5 phy 1.34
expansion-rom-version:
bus-info: 0000:13:00.0
supports-statistics: yes
supports-test: yes
supports-eeprom-access: yes
supports-register-dump: yes
supports-priv-flags: yes

(I have configured ethtool with "--disable-netlink" to allow setting these message level, otherwise you will run into the other issue you have reported about it)
# ./ethtool -s ens3f0 msglvl 0x0100000
# ./ethtool -s ens3f1 msglvl 0x0100000

# ifconfig ens3f0 up
# ifconfig ens3f1 up
# ifconfig ens3f0 down
# ifconfig ens3f1 down

# dmesg -T | grep fw
# ifconfig ens3f0 up
# ifconfig ens3f1 up

# dmesg -T | grep fw
[Wed Mar 16 15:19:13 2022] bnx2x: [bnx2x_alloc_fw_stats_mem:2270(ens3f0)]stats fw_stats_num 10, vf headroom 0, num_groups 1
[Wed Mar 16 15:19:13 2022] bnx2x: [bnx2x_alloc_fw_stats_mem:2302(ens3f0)]statistics request base address set to 1 a078e000
[Wed Mar 16 15:19:13 2022] bnx2x: [bnx2x_alloc_fw_stats_mem:2305(ens3f0)]statistics data base address set to 1 a078e110
[Wed Mar 16 15:19:16 2022] bnx2x: [bnx2x_alloc_fw_stats_mem:2270(ens3f1)]stats fw_stats_num 12, vf headroom 0, num_groups 1
[Wed Mar 16 15:19:16 2022] bnx2x: [bnx2x_alloc_fw_stats_mem:2302(ens3f1)]statistics request base address set to 1 d1b6000
[Wed Mar 16 15:19:16 2022] bnx2x: [bnx2x_alloc_fw_stats_mem:2305(ens3f1)]statistics data base address set to 1 d1b6110

I suggest you to re-install the kernel with that fix applied and test in your environment after cold boot/power cycle the server so that it starts with clean state of the devices
---------------------------------------------------

Regarding the odd/mismatched FW version you reported recently, we believe it has nothing to do with the patch in subject,
perhaps it's some residue from earlier OOB modules or PMDs ?, Note that they don't use/load the firmware from /lib/firmware/bnx2x
but they have whole firmware in-built within the module and the version you mentioned seems something from oob component.
That's why you are not able to locate them in /lib/firmware/bnx2x/

Please check/scan the system environment if there are any OOB modules/PMDs installed or running on the same adapter by any chance ?
Maybe perhaps re-installing the kernel with fix and making a cold boot/power cycle of the server will make this issue go away too.? 

Even after all these if it still report about the odd loaded firmware, it should be fine informatively as the driver fix now also relax
the strict FW versions comparisons (against already loaded_fw by any chance) to allow these close oob firmwares to be backward
compatible instead failing the device load abruptly.

BTW,

1. Can you please provide the complete system logs (/var/log/messages or dmesg -T) and other relevant info (like lspci, ip link show etc.) for any issues ? 
2. Does system has only these two NIC controllers with total 4 PCI functions (two to each) ? No any other PCI functions on any of these controllers used in some different environment ?

Thanks !
Paul Menzel March 16, 2022, 5:14 p.m. UTC | #21
Dear Jakub,


Am 15.03.22 um 03:57 schrieb Jakub Kicinski:
> On Mon, 14 Mar 2022 16:07:08 +0100 Paul Menzel wrote:
>>> There might be something more wrong with the patch in the subject: The
>>> usability of the ports from a single card (with older firmware?) now
>>> depends on the order the ports are enabled (first port enabled is
>>> working, second port enabled is not working, driver complaining about a
>>> firmware mismatch).
>>>
>>> In the following examples, the driver was not built-in to the kernel but
>>> loaded from the root filesystem instead, so there is no initramfs
>>> related problem here.
>>>
>>> For the records:
>>>
>>> root@ira:~# dmesg|grep bnx2x
>>> [   18.749871] bnx2x 0000:45:00.0: msix capability found
>>> [   18.766534] bnx2x 0000:45:00.0: part number 394D4342-31373735-31314131-473331
>>> [   18.799198] bnx2x 0000:45:00.0: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
>>> [   18.807638] bnx2x 0000:45:00.1: msix capability found
>>> [   18.824509] bnx2x 0000:45:00.1: part number 394D4342-31373735-31314131-473331
>>> [   18.857171] bnx2x 0000:45:00.1: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
>>> [   18.865619] bnx2x 0000:46:00.0: msix capability found
>>> [   18.882636] bnx2x 0000:46:00.0: part number 394D4342-31373735-31314131-473331
>>> [   18.915196] bnx2x 0000:46:00.0: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
>>> [   18.923636] bnx2x 0000:46:00.1: msix capability found
>>> [   18.940505] bnx2x 0000:46:00.1: part number 394D4342-31373735-31314131-473331
>>> [   18.973167] bnx2x 0000:46:00.1: 32.000 Gb/s available PCIe bandwidth (5.0 GT/s PCIe x8 link)
>>> [   46.480660] bnx2x 0000:45:00.0 net04: renamed from eth4
>>> [   46.494677] bnx2x 0000:45:00.1 net05: renamed from eth5
>>> [   46.508544] bnx2x 0000:46:00.0 net06: renamed from eth6
>>> [   46.524641] bnx2x 0000:46:00.1 net07: renamed from eth7
>>> root@ira:~# ls /lib/firmware/bnx2x/
>>> bnx2x-e1-6.0.34.0.fw   bnx2x-e1-7.13.1.0.fw   bnx2x-e1-7.8.2.0.fw
>>> bnx2x-e1h-7.12.30.0.fw  bnx2x-e1h-7.8.19.0.fw  bnx2x-e2-7.10.51.0.fw  bnx2x-e2-7.8.17.0.fw
>>> bnx2x-e1-6.2.5.0.fw    bnx2x-e1-7.13.11.0.fw  bnx2x-e1h-6.0.34.0.fw
>>> bnx2x-e1h-7.13.1.0.fw   bnx2x-e1h-7.8.2.0.fw   bnx2x-e2-7.12.30.0.fw  bnx2x-e2-7.8.19.0.fw
>>> bnx2x-e1-6.2.9.0.fw    bnx2x-e1-7.13.15.0.fw  bnx2x-e1h-6.2.5.0.fw
>>> bnx2x-e1h-7.13.11.0.fw  bnx2x-e2-6.0.34.0.fw   bnx2x-e2-7.13.1.0.fw   bnx2x-e2-7.8.2.0.fw
>>> bnx2x-e1-7.0.20.0.fw   bnx2x-e1-7.13.21.0.fw  bnx2x-e1h-6.2.9.0.fw
>>> bnx2x-e1h-7.13.15.0.fw  bnx2x-e2-6.2.5.0.fw    bnx2x-e2-7.13.11.0.fw
>>> bnx2x-e1-7.0.23.0.fw   bnx2x-e1-7.2.16.0.fw   bnx2x-e1h-7.0.20.0.fw
>>> bnx2x-e1h-7.13.21.0.fw  bnx2x-e2-6.2.9.0.fw    bnx2x-e2-7.13.15.0.fw
>>> bnx2x-e1-7.0.29.0.fw   bnx2x-e1-7.2.51.0.fw   bnx2x-e1h-7.0.23.0.fw
>>> bnx2x-e1h-7.2.16.0.fw   bnx2x-e2-7.0.20.0.fw   bnx2x-e2-7.13.21.0.fw
>>> bnx2x-e1-7.10.51.0.fw  bnx2x-e1-7.8.17.0.fw   bnx2x-e1h-7.0.29.0.fw
>>> bnx2x-e1h-7.2.51.0.fw   bnx2x-e2-7.0.23.0.fw   bnx2x-e2-7.2.16.0.fw
>>> bnx2x-e1-7.12.30.0.fw  bnx2x-e1-7.8.19.0.fw   bnx2x-e1h-7.10.51.0.fw
>>> bnx2x-e1h-7.8.17.0.fw   bnx2x-e2-7.0.29.0.fw   bnx2x-e2-7.2.51.0.fw
>>>
>>> Now with v5.10.95, the first kernel of the series which includes
>>> fdcfabd0952d ("bnx2x: Utilize firmware 7.13.21.0") and later:
>>>
>>> root@ira:~# dmesg -w &
>>> [...]
>>> root@ira:~# ip link set net04 up
>>> [   88.504536] bnx2x 0000:45:00.0 net04: using MSI-X  IRQs: sp 47  fp[0] 49 ... fp[7] 56
>>> root@ira:~# ip link set net05 up
>>> [   90.825820] bnx2x: [bnx2x_compare_fw_ver:2380(net05)]bnx2x with FW 120d07 was already loaded which mismatches my 150d07 FW. Aborting
>>> RTNETLINK answers: Device or resource busy
>>> root@ira:~# ip link set net04 down
>>> root@ira:~# ip link set net05 down
>>> root@ira:~# ip link set net05 up
>>> [  114.462448] bnx2x 0000:45:00.1 net05: using MSI-X  IRQs: sp 58  fp[0] 60 ... fp[7] 67
>>> root@ira:~# ip link set net04 up
>>> [  117.247763] bnx2x: [bnx2x_compare_fw_ver:2380(net04)]bnx2x with FW 120d07 was already loaded which mismatches my 150d07 FW. Aborting
>>> RTNETLINK answers: Device or resource busy
>>>
>>> With v5.10.94, both ports work fine:
>>>
>>> root@ira:~# dmesg -w &
>>> [...]
>>> root@ira:~# ip link set net04 up
>>> [  133.126647] bnx2x 0000:45:00.0 net04: using MSI-X  IRQs: sp 47  fp[0] 49 ... fp[7] 56
>>> root@ira:~# ip link set net05 up
>>> [  136.215169] bnx2x 0000:45:00.1 net05: using MSI-X  IRQs: sp 58  fp[0] 60 ... fp[7] 67
>>
>> One additional note, that it’s totally unclear to us, where FW version
>> 120d07 in the error message comes from. It maps to 7.13.18.0, which is
>> nowhere to be found and too new to be on the cards EEPROM, which should
>> be from 2013 or so.
> 
> Hrm, any chance an out-of-tree driver or another OS loaded it?

No, no such chance. The system firmware was not touched in the last five 
years, and there is no other OS.

> Does the dmesg indicate that the host loaded the FW at all?

Unfortunately, no such Linux log messages exist (in the code).

> Looks like upstream went from .15 to .21, .18 was never in the picture.

Yes, that is the strange thing. Is there any chance, that the `.18` is 
in the firmware file somehow, and was forgotten to be updated to `.21`?

```
$ md5sum /lib/firmware/bnx2x/*{15,21}*
4137c813f3ff937f01fddf13c309d972  /lib/firmware/bnx2x/bnx2x-e1-7.13.15.0.fw
075539e1072908a0c86ba352c1130f60  /lib/firmware/bnx2x/bnx2x-e1h-7.13.15.0.fw
e59925dedf7ed95679e629dfeeaf5803  /lib/firmware/bnx2x/bnx2x-e2-7.13.15.0.fw
a470a0d77930ef72ac7b725a5ff447fe  /lib/firmware/bnx2x/bnx2x-e1-7.13.21.0.fw
f02012b118664c0579ab58757f276982  /lib/firmware/bnx2x/bnx2x-e1h-7.13.21.0.fw
47667a7bf156d1e531fde42d800e4a66  /lib/firmware/bnx2x/bnx2x-e2-7.13.21.0.fw
```

> Also, will the revert work for you?

I tested reverting the three patches you also listed on top of 5.10.103, 
and that restored the working behavior. Manish’s patch *[RFC net] bnx2x: 
fix built-in kernel driver load failure* also fixes the issue.


Kind regards,

Paul
diff mbox series

Patch

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 2b06d78b..a19dd67 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1850,6 +1850,14 @@  struct bnx2x {
 
 	/* Vxlan/Geneve related information */
 	u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX];
+
+#define FW_CAP_INVALIDATE_VF_FP_HSI	BIT(0)
+	u32 fw_cap;
+
+	u32 fw_major;
+	u32 fw_minor;
+	u32 fw_rev;
+	u32 fw_eng;
 };
 
 /* Tx queues may be less or equal to Rx queues */
@@ -2525,5 +2533,6 @@  enum {
  * Meant for implicit re-load flows.
  */
 int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
-
+int bnx2x_init_firmware(struct bnx2x *bp);
+void bnx2x_release_firmware(struct bnx2x *bp);
 #endif /* bnx2x.h */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 54a2334..8d36ebb 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2365,10 +2365,8 @@  int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err)
 	if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
 	    load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
 		/* build my FW version dword */
-		u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
-			(BCM_5710_FW_MINOR_VERSION << 8) +
-			(BCM_5710_FW_REVISION_VERSION << 16) +
-			(BCM_5710_FW_ENGINEERING_VERSION << 24);
+		u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
+				(bp->fw_rev << 16) + (bp->fw_eng << 24);
 
 		/* read loaded FW from chip */
 		u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
index 3f84352..a84d015 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
@@ -241,6 +241,8 @@ 
 	IRO[221].m2))
 #define XSTORM_VF_TO_PF_OFFSET(funcId) \
 	(IRO[48].base + ((funcId) * IRO[48].m1))
+#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid)	\
+	(IRO[386].base + ((fid) * IRO[386].m1))
 #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
 
 /* eth hsi version */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index 622fadc..611efee 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -3024,7 +3024,8 @@  struct afex_stats {
 
 #define BCM_5710_FW_MAJOR_VERSION			7
 #define BCM_5710_FW_MINOR_VERSION			13
-#define BCM_5710_FW_REVISION_VERSION		15
+#define BCM_5710_FW_REVISION_VERSION		21
+#define BCM_5710_FW_REVISION_VERSION_V15	15
 #define BCM_5710_FW_ENGINEERING_VERSION		0
 #define BCM_5710_FW_COMPILE_FLAGS			1
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index aec666e..125dafe 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -74,9 +74,19 @@ 
 	__stringify(BCM_5710_FW_MINOR_VERSION) "."	\
 	__stringify(BCM_5710_FW_REVISION_VERSION) "."	\
 	__stringify(BCM_5710_FW_ENGINEERING_VERSION)
+
+#define FW_FILE_VERSION_V15				\
+	__stringify(BCM_5710_FW_MAJOR_VERSION) "."      \
+	__stringify(BCM_5710_FW_MINOR_VERSION) "."	\
+	__stringify(BCM_5710_FW_REVISION_VERSION_V15) "."	\
+	__stringify(BCM_5710_FW_ENGINEERING_VERSION)
+
 #define FW_FILE_NAME_E1		"bnx2x/bnx2x-e1-" FW_FILE_VERSION ".fw"
 #define FW_FILE_NAME_E1H	"bnx2x/bnx2x-e1h-" FW_FILE_VERSION ".fw"
 #define FW_FILE_NAME_E2		"bnx2x/bnx2x-e2-" FW_FILE_VERSION ".fw"
+#define FW_FILE_NAME_E1_V15	"bnx2x/bnx2x-e1-" FW_FILE_VERSION_V15 ".fw"
+#define FW_FILE_NAME_E1H_V15	"bnx2x/bnx2x-e1h-" FW_FILE_VERSION_V15 ".fw"
+#define FW_FILE_NAME_E2_V15	"bnx2x/bnx2x-e2-" FW_FILE_VERSION_V15 ".fw"
 
 /* Time in jiffies before concluding the transmitter is hung */
 #define TX_TIMEOUT		(5*HZ)
@@ -747,9 +757,7 @@  static int bnx2x_mc_assert(struct bnx2x *bp)
 		  CHIP_IS_E1(bp) ? "everest1" :
 		  CHIP_IS_E1H(bp) ? "everest1h" :
 		  CHIP_IS_E2(bp) ? "everest2" : "everest3",
-		  BCM_5710_FW_MAJOR_VERSION,
-		  BCM_5710_FW_MINOR_VERSION,
-		  BCM_5710_FW_REVISION_VERSION);
+		  bp->fw_major, bp->fw_minor, bp->fw_rev);
 
 	return rc;
 }
@@ -12308,6 +12316,15 @@  static int bnx2x_init_bp(struct bnx2x *bp)
 
 	bnx2x_read_fwinfo(bp);
 
+	if (IS_PF(bp)) {
+		rc = bnx2x_init_firmware(bp);
+
+		if (rc) {
+			bnx2x_free_mem_bp(bp);
+			return rc;
+		}
+	}
+
 	func = BP_FUNC(bp);
 
 	/* need to reset chip if undi was active */
@@ -12320,6 +12337,7 @@  static int bnx2x_init_bp(struct bnx2x *bp)
 
 		rc = bnx2x_prev_unload(bp);
 		if (rc) {
+			bnx2x_release_firmware(bp);
 			bnx2x_free_mem_bp(bp);
 			return rc;
 		}
@@ -13317,16 +13335,11 @@  static int bnx2x_check_firmware(struct bnx2x *bp)
 	/* Check FW version */
 	offset = be32_to_cpu(fw_hdr->fw_version.offset);
 	fw_ver = firmware->data + offset;
-	if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) ||
-	    (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) ||
-	    (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) ||
-	    (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) {
+	if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor ||
+	    fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) {
 		BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be %d.%d.%d.%d\n",
-		       fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
-		       BCM_5710_FW_MAJOR_VERSION,
-		       BCM_5710_FW_MINOR_VERSION,
-		       BCM_5710_FW_REVISION_VERSION,
-		       BCM_5710_FW_ENGINEERING_VERSION);
+			  fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
+			  bp->fw_major, bp->fw_minor, bp->fw_rev, bp->fw_eng);
 		return -EINVAL;
 	}
 
@@ -13404,34 +13417,51 @@  static void be16_to_cpu_n(const u8 *_source, u8 *_target, u32 n)
 	     (u8 *)bp->arr, len);					\
 } while (0)
 
-static int bnx2x_init_firmware(struct bnx2x *bp)
+int bnx2x_init_firmware(struct bnx2x *bp)
 {
-	const char *fw_file_name;
+	const char *fw_file_name, *fw_file_name_v15;
 	struct bnx2x_fw_file_hdr *fw_hdr;
 	int rc;
 
 	if (bp->firmware)
 		return 0;
 
-	if (CHIP_IS_E1(bp))
+	if (CHIP_IS_E1(bp)) {
 		fw_file_name = FW_FILE_NAME_E1;
-	else if (CHIP_IS_E1H(bp))
+		fw_file_name_v15 = FW_FILE_NAME_E1_V15;
+	} else if (CHIP_IS_E1H(bp)) {
 		fw_file_name = FW_FILE_NAME_E1H;
-	else if (!CHIP_IS_E1x(bp))
+		fw_file_name_v15 = FW_FILE_NAME_E1H_V15;
+	} else if (!CHIP_IS_E1x(bp)) {
 		fw_file_name = FW_FILE_NAME_E2;
-	else {
+		fw_file_name_v15 = FW_FILE_NAME_E2_V15;
+	} else {
 		BNX2X_ERR("Unsupported chip revision\n");
 		return -EINVAL;
 	}
+
 	BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
 
 	rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev);
 	if (rc) {
-		BNX2X_ERR("Can't load firmware file %s\n",
-			  fw_file_name);
-		goto request_firmware_exit;
+		BNX2X_DEV_INFO("Trying to load older fw %s\n", fw_file_name_v15);
+
+		/* try to load prev version */
+		rc = request_firmware(&bp->firmware, fw_file_name_v15, &bp->pdev->dev);
+
+		if (rc)
+			goto request_firmware_exit;
+
+		bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15;
+	} else {
+		bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI;
+		bp->fw_rev = BCM_5710_FW_REVISION_VERSION;
 	}
 
+	bp->fw_major = BCM_5710_FW_MAJOR_VERSION;
+	bp->fw_minor = BCM_5710_FW_MINOR_VERSION;
+	bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION;
+
 	rc = bnx2x_check_firmware(bp);
 	if (rc) {
 		BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name);
@@ -13487,7 +13517,7 @@  static int bnx2x_init_firmware(struct bnx2x *bp)
 	return rc;
 }
 
-static void bnx2x_release_firmware(struct bnx2x *bp)
+void bnx2x_release_firmware(struct bnx2x *bp)
 {
 	kfree(bp->init_ops_offsets);
 	kfree(bp->init_ops);
@@ -14004,6 +14034,7 @@  static int bnx2x_init_one(struct pci_dev *pdev,
 	return 0;
 
 init_one_freemem:
+	bnx2x_release_firmware(bp);
 	bnx2x_free_mem_bp(bp);
 
 init_one_exit: