Message ID | 20230316113640.499267-1-ogabbay@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/10] accel/habanalabs: align to latest firmware specs | expand |
On 16/03/2023 13:36, Oded Gabbay wrote: > Copy the most up-to-date interface files to the firmware. > > Signed-off-by: Oded Gabbay <ogabbay@kernel.org> > --- > drivers/accel/habanalabs/gaudi2/gaudi2.c | 2 +- > .../habanalabs/include/common/cpucp_if.h | 51 ++++++++++++++++++- > .../habanalabs/include/common/hl_boot_if.h | 47 +++++------------ > .../include/gaudi2/gaudi2_async_events.h | 4 +- > .../habanalabs/include/gaudi2/gaudi2_fw_if.h | 5 +- > 5 files changed, 69 insertions(+), 40 deletions(-) > > diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c > index 8943dc9872da..21cf7180fe9f 100644 > --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c > +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c > @@ -9784,7 +9784,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent > break; > > case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED: > - case GAUDI2_EVENT_DEV_RESET_REQ: > + case GAUDI2_EVENT_CPU_DEV_RESET_REQ: > event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; > error_count = GAUDI2_NA_EVENT_CAUSE; > is_critical = true; > diff --git a/drivers/accel/habanalabs/include/common/cpucp_if.h b/drivers/accel/habanalabs/include/common/cpucp_if.h > index d713252a4f13..bb65b9e2b424 100644 > --- a/drivers/accel/habanalabs/include/common/cpucp_if.h > +++ b/drivers/accel/habanalabs/include/common/cpucp_if.h > @@ -33,6 +33,10 @@ > #define PLL_MAP_MAX_BITS 128 > #define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) > > +enum eq_event_id { > + EQ_EVENT_NIC_STS_REQUEST = 0, > +}; > + > /* > * info of the pkt queue pointers in the first async occurrence > */ > @@ -354,9 +358,48 @@ struct hl_eq_addr_dec_intr_data { > __u8 pad[7]; > }; > > +enum hl_mme_acc_err_type { > + MME_ACC_WBC_ERR_RESP_LEGACY, > + MME_ACC_WBC_ERR_RESP_SET0_CH0, > + MME_ACC_WBC_ERR_RESP_SET0_CH1, > + MME_ACC_WBC_ERR_RESP_SET1_CH0, > + MME_ACC_WBC_ERR_RESP_SET1_CH1, > + MME_ACC_WBC_BUSER_NUMERICAL_INF_ERR_SET0_CH0, > + MME_ACC_WBC_BUSER_NUMERICAL_INF_ERR_SET0_CH1, > + MME_ACC_WBC_BUSER_NUMERICAL_NINF_ERR_SET0_CH0, > + MME_ACC_WBC_BUSER_NUMERICAL_NINF_ERR_SET0_CH1, > + MME_ACC_WBC_BUSER_NUMERICAL_NAN_ERR_SET0_CH0, > + MME_ACC_WBC_BUSER_NUMERICAL_NAN_ERR_SET0_CH1, > + MME_ACC_WBC_BUSER_RR_DBG_ERR_SET0_CH0, > + MME_ACC_WBC_BUSER_RR_DBG_ERR_SET0_CH1, > + MME_ACC_WBC_BUSER_NUMERICAL_INF_ERR_SET1_CH0, > + MME_ACC_WBC_BUSER_NUMERICAL_INF_ERR_SET1_CH1, > + MME_ACC_WBC_BUSER_NUMERICAL_NINF_ERR_SET1_CH0, > + MME_ACC_WBC_BUSER_NUMERICAL_NINF_ERR_SET1_CH1, > + MME_ACC_WBC_BUSER_NUMERICAL_NAN_ERR_SET1_CH0, > + MME_ACC_WBC_BUSER_NUMERICAL_NAN_ERR_SET1_CH1, > + MME_ACC_WBC_BUSER_RR_DBG_ERR_SET1_CH0, > + MME_ACC_WBC_BUSER_RR_DBG_ERR_SET1_CH1, > + MME_ACC_AP_STS_SRC_DNRM, > + MME_ACC_AP_STS_SRC_INF, > + MME_ACC_AP_STS_SRC_NINF, > + MME_ACC_AP_STS_SRC_NAN, > + MME_ACC_AP_STS_RES_INF, > + MME_ACC_AP_STS_RES_NINF, > + MME_ACC_AP_STS_RES_NAN > +}; > + > +struct hl_eq_mme_acc_data { > + __u8 mme_id; > + __u8 err_type; /* enum hl_mme_acc_err_type */ > + __le16 ctx_id; > + __u8 pad[4]; > +}; > + > struct hl_eq_entry { > struct hl_eq_header hdr; > union { > + __le64 data_placeholder; > struct hl_eq_ecc_data ecc_data; > struct hl_eq_hbm_ecc_data hbm_ecc_data; /* Gaudi1 HBM */ > struct hl_eq_sm_sei_data sm_sei_data; > @@ -661,6 +704,9 @@ enum pq_init_status { > * CPUCP_PACKET_ACTIVE_STATUS_SET - > * LKD sends FW indication whether device is free or in use, this indication is reported > * also to the BMC. > + * > + * CPUCP_PACKET_REGISTER_INTERRUPTS - > + * Packet to register interrupts indicating LKD is ready to receive events from FW. > */ > > enum cpucp_packet_id { > @@ -725,6 +771,8 @@ enum cpucp_packet_id { > CPUCP_PACKET_RESERVED9, /* not used */ > CPUCP_PACKET_RESERVED10, /* not used */ > CPUCP_PACKET_RESERVED11, /* not used */ > + CPUCP_PACKET_RESERVED12, /* internal */ > + CPUCP_PACKET_REGISTER_INTERRUPTS, /* internal */ > CPUCP_PACKET_ID_MAX /* must be last */ > }; > > @@ -1127,6 +1175,7 @@ struct cpucp_security_info { > * (0 = functional 1 = binned) > * @interposer_version: Interposer version programmed in eFuse > * @substrate_version: Substrate version programmed in eFuse > + * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM. > * @fw_os_version: Firmware OS Version > */ > struct cpucp_info { > @@ -1154,7 +1203,7 @@ struct cpucp_info { > __u8 substrate_version; > __u8 reserved2; > struct cpucp_security_info sec_info; > - __le32 reserved3; > + __le32 fw_hbm_region_size; > __u8 pll_map[PLL_MAP_LEN]; > __le64 mme_binning_mask; > __u8 fw_os_version[VERSION_MAX_LEN]; > diff --git a/drivers/accel/habanalabs/include/common/hl_boot_if.h b/drivers/accel/habanalabs/include/common/hl_boot_if.h > index 2256add057c5..c58d76a2705c 100644 > --- a/drivers/accel/habanalabs/include/common/hl_boot_if.h > +++ b/drivers/accel/habanalabs/include/common/hl_boot_if.h > @@ -770,15 +770,23 @@ enum hl_components { > HL_COMPONENTS_ARMCP, > HL_COMPONENTS_CPLD, > HL_COMPONENTS_UBOOT, > + HL_COMPONENTS_FUSE, > HL_COMPONENTS_MAX_NUM = 16 > }; > > +#define NAME_MAX_LEN 32 /* bytes */ > +struct hl_module_data { > + __u8 name[NAME_MAX_LEN]; > + __u8 version[VERSION_MAX_LEN]; > +}; > + > /** > * struct hl_component_versions - versions associated with hl component. > * @struct_size: size of all the struct (including dynamic size of modules). > * @modules_offset: offset of the modules field in this struct. > * @component: version of the component itself. > * @fw_os: Firmware OS Version. > + * @comp_name: Name of the component. > * @modules_mask: i'th bit (from LSB) is a flag - on if module i in enum > * hl_modules is used. > * @modules_counter: number of set bits in modules_mask. > @@ -791,45 +799,14 @@ struct hl_component_versions { > __le16 modules_offset; > __u8 component[VERSION_MAX_LEN]; > __u8 fw_os[VERSION_MAX_LEN]; > + __u8 comp_name[NAME_MAX_LEN]; > __le16 modules_mask; > __u8 modules_counter; > __u8 reserved[1]; > - __u8 modules[][VERSION_MAX_LEN]; > -}; > - > -/** > - * struct hl_fw_versions - all versions (fuse, cpucp's components with their > - * modules) > - * @struct_size: size of all the struct (including dynamic size of components). > - * @components_offset: offset of the components field in this struct. > - * @fuse: silicon production FUSE information. > - * @components_mask: i'th bit (from LSB) is a flag - on if component i in enum > - * hl_components is used. > - * @components_counter: number of set bits in components_mask. > - * @reserved: reserved for future use. > - * @components: versions of hl components. Index i corresponds to the i'th bit > - * that is *on* in components_mask. For example, if > - * components_mask=0b101, then *components represents arcpid and > - * *(hl_component_versions*)((char*)components + 1') represents > - * preboot, where 1' = components[0].struct_size. > - */ > -struct hl_fw_versions { > - __le16 struct_size; > - __le16 components_offset; > - __u8 fuse[VERSION_MAX_LEN]; > - __le16 components_mask; > - __u8 components_counter; > - __u8 reserved[1]; > - struct hl_component_versions components[]; > + struct hl_module_data modules[]; > }; > > -/* Max size of struct hl_component_versions */ > -#define HL_COMPONENT_VERSIONS_MAX_SIZE \ > - (sizeof(struct hl_component_versions) + HL_MODULES_MAX_NUM * \ > - VERSION_MAX_LEN) > - > -/* Max size of struct hl_fw_versions */ > -#define HL_FW_VERSIONS_MAX_SIZE (sizeof(struct hl_fw_versions) + \ > - HL_COMPONENTS_MAX_NUM * HL_COMPONENT_VERSIONS_MAX_SIZE) > +/* Max size of fit size */ > +#define HL_FW_VERSIONS_FIT_SIZE 4096 > > #endif /* HL_BOOT_IF_H */ > diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h > index 50852cc80373..f661068d0c5f 100644 > --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h > +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h > @@ -1,6 +1,6 @@ > /* SPDX-License-Identifier: GPL-2.0 > * > - * Copyright 2018-2021 HabanaLabs, Ltd. > + * Copyright 2018-2022 HabanaLabs, Ltd. > * All Rights Reserved. > * > */ > @@ -958,7 +958,7 @@ enum gaudi2_async_event_id { > GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1 = 1318, > GAUDI2_EVENT_ARC_DCCM_FULL = 1319, > GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320, > - GAUDI2_EVENT_DEV_RESET_REQ = 1321, > + GAUDI2_EVENT_CPU_DEV_RESET_REQ = 1321, > GAUDI2_EVENT_SIZE, > }; > > diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h > index 82f3ca2a3966..8522f24deac0 100644 > --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h > +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h > @@ -63,7 +63,10 @@ struct gaudi2_cold_rst_data { > u32 fake_sig_validation_en : 1; > u32 bist_skip_enable : 1; > u32 bist_need_iatu_config : 1; > - u32 reserved : 24; > + u32 fake_bis_compliant : 1; > + u32 wd_rst_cause_arm : 1; > + u32 wd_rst_cause_arcpid : 1; > + u32 reserved : 21; > }; > __le32 data; > }; Reviewed-by: Ofir Bitton <obitton@habana.ai>
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 8943dc9872da..21cf7180fe9f 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -9784,7 +9784,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent break; case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED: - case GAUDI2_EVENT_DEV_RESET_REQ: + case GAUDI2_EVENT_CPU_DEV_RESET_REQ: event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; error_count = GAUDI2_NA_EVENT_CAUSE; is_critical = true; diff --git a/drivers/accel/habanalabs/include/common/cpucp_if.h b/drivers/accel/habanalabs/include/common/cpucp_if.h index d713252a4f13..bb65b9e2b424 100644 --- a/drivers/accel/habanalabs/include/common/cpucp_if.h +++ b/drivers/accel/habanalabs/include/common/cpucp_if.h @@ -33,6 +33,10 @@ #define PLL_MAP_MAX_BITS 128 #define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) +enum eq_event_id { + EQ_EVENT_NIC_STS_REQUEST = 0, +}; + /* * info of the pkt queue pointers in the first async occurrence */ @@ -354,9 +358,48 @@ struct hl_eq_addr_dec_intr_data { __u8 pad[7]; }; +enum hl_mme_acc_err_type { + MME_ACC_WBC_ERR_RESP_LEGACY, + MME_ACC_WBC_ERR_RESP_SET0_CH0, + MME_ACC_WBC_ERR_RESP_SET0_CH1, + MME_ACC_WBC_ERR_RESP_SET1_CH0, + MME_ACC_WBC_ERR_RESP_SET1_CH1, + MME_ACC_WBC_BUSER_NUMERICAL_INF_ERR_SET0_CH0, + MME_ACC_WBC_BUSER_NUMERICAL_INF_ERR_SET0_CH1, + MME_ACC_WBC_BUSER_NUMERICAL_NINF_ERR_SET0_CH0, + MME_ACC_WBC_BUSER_NUMERICAL_NINF_ERR_SET0_CH1, + MME_ACC_WBC_BUSER_NUMERICAL_NAN_ERR_SET0_CH0, + MME_ACC_WBC_BUSER_NUMERICAL_NAN_ERR_SET0_CH1, + MME_ACC_WBC_BUSER_RR_DBG_ERR_SET0_CH0, + MME_ACC_WBC_BUSER_RR_DBG_ERR_SET0_CH1, + MME_ACC_WBC_BUSER_NUMERICAL_INF_ERR_SET1_CH0, + MME_ACC_WBC_BUSER_NUMERICAL_INF_ERR_SET1_CH1, + MME_ACC_WBC_BUSER_NUMERICAL_NINF_ERR_SET1_CH0, + MME_ACC_WBC_BUSER_NUMERICAL_NINF_ERR_SET1_CH1, + MME_ACC_WBC_BUSER_NUMERICAL_NAN_ERR_SET1_CH0, + MME_ACC_WBC_BUSER_NUMERICAL_NAN_ERR_SET1_CH1, + MME_ACC_WBC_BUSER_RR_DBG_ERR_SET1_CH0, + MME_ACC_WBC_BUSER_RR_DBG_ERR_SET1_CH1, + MME_ACC_AP_STS_SRC_DNRM, + MME_ACC_AP_STS_SRC_INF, + MME_ACC_AP_STS_SRC_NINF, + MME_ACC_AP_STS_SRC_NAN, + MME_ACC_AP_STS_RES_INF, + MME_ACC_AP_STS_RES_NINF, + MME_ACC_AP_STS_RES_NAN +}; + +struct hl_eq_mme_acc_data { + __u8 mme_id; + __u8 err_type; /* enum hl_mme_acc_err_type */ + __le16 ctx_id; + __u8 pad[4]; +}; + struct hl_eq_entry { struct hl_eq_header hdr; union { + __le64 data_placeholder; struct hl_eq_ecc_data ecc_data; struct hl_eq_hbm_ecc_data hbm_ecc_data; /* Gaudi1 HBM */ struct hl_eq_sm_sei_data sm_sei_data; @@ -661,6 +704,9 @@ enum pq_init_status { * CPUCP_PACKET_ACTIVE_STATUS_SET - * LKD sends FW indication whether device is free or in use, this indication is reported * also to the BMC. + * + * CPUCP_PACKET_REGISTER_INTERRUPTS - + * Packet to register interrupts indicating LKD is ready to receive events from FW. */ enum cpucp_packet_id { @@ -725,6 +771,8 @@ enum cpucp_packet_id { CPUCP_PACKET_RESERVED9, /* not used */ CPUCP_PACKET_RESERVED10, /* not used */ CPUCP_PACKET_RESERVED11, /* not used */ + CPUCP_PACKET_RESERVED12, /* internal */ + CPUCP_PACKET_REGISTER_INTERRUPTS, /* internal */ CPUCP_PACKET_ID_MAX /* must be last */ }; @@ -1127,6 +1175,7 @@ struct cpucp_security_info { * (0 = functional 1 = binned) * @interposer_version: Interposer version programmed in eFuse * @substrate_version: Substrate version programmed in eFuse + * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM. * @fw_os_version: Firmware OS Version */ struct cpucp_info { @@ -1154,7 +1203,7 @@ struct cpucp_info { __u8 substrate_version; __u8 reserved2; struct cpucp_security_info sec_info; - __le32 reserved3; + __le32 fw_hbm_region_size; __u8 pll_map[PLL_MAP_LEN]; __le64 mme_binning_mask; __u8 fw_os_version[VERSION_MAX_LEN]; diff --git a/drivers/accel/habanalabs/include/common/hl_boot_if.h b/drivers/accel/habanalabs/include/common/hl_boot_if.h index 2256add057c5..c58d76a2705c 100644 --- a/drivers/accel/habanalabs/include/common/hl_boot_if.h +++ b/drivers/accel/habanalabs/include/common/hl_boot_if.h @@ -770,15 +770,23 @@ enum hl_components { HL_COMPONENTS_ARMCP, HL_COMPONENTS_CPLD, HL_COMPONENTS_UBOOT, + HL_COMPONENTS_FUSE, HL_COMPONENTS_MAX_NUM = 16 }; +#define NAME_MAX_LEN 32 /* bytes */ +struct hl_module_data { + __u8 name[NAME_MAX_LEN]; + __u8 version[VERSION_MAX_LEN]; +}; + /** * struct hl_component_versions - versions associated with hl component. * @struct_size: size of all the struct (including dynamic size of modules). * @modules_offset: offset of the modules field in this struct. * @component: version of the component itself. * @fw_os: Firmware OS Version. + * @comp_name: Name of the component. * @modules_mask: i'th bit (from LSB) is a flag - on if module i in enum * hl_modules is used. * @modules_counter: number of set bits in modules_mask. @@ -791,45 +799,14 @@ struct hl_component_versions { __le16 modules_offset; __u8 component[VERSION_MAX_LEN]; __u8 fw_os[VERSION_MAX_LEN]; + __u8 comp_name[NAME_MAX_LEN]; __le16 modules_mask; __u8 modules_counter; __u8 reserved[1]; - __u8 modules[][VERSION_MAX_LEN]; -}; - -/** - * struct hl_fw_versions - all versions (fuse, cpucp's components with their - * modules) - * @struct_size: size of all the struct (including dynamic size of components). - * @components_offset: offset of the components field in this struct. - * @fuse: silicon production FUSE information. - * @components_mask: i'th bit (from LSB) is a flag - on if component i in enum - * hl_components is used. - * @components_counter: number of set bits in components_mask. - * @reserved: reserved for future use. - * @components: versions of hl components. Index i corresponds to the i'th bit - * that is *on* in components_mask. For example, if - * components_mask=0b101, then *components represents arcpid and - * *(hl_component_versions*)((char*)components + 1') represents - * preboot, where 1' = components[0].struct_size. - */ -struct hl_fw_versions { - __le16 struct_size; - __le16 components_offset; - __u8 fuse[VERSION_MAX_LEN]; - __le16 components_mask; - __u8 components_counter; - __u8 reserved[1]; - struct hl_component_versions components[]; + struct hl_module_data modules[]; }; -/* Max size of struct hl_component_versions */ -#define HL_COMPONENT_VERSIONS_MAX_SIZE \ - (sizeof(struct hl_component_versions) + HL_MODULES_MAX_NUM * \ - VERSION_MAX_LEN) - -/* Max size of struct hl_fw_versions */ -#define HL_FW_VERSIONS_MAX_SIZE (sizeof(struct hl_fw_versions) + \ - HL_COMPONENTS_MAX_NUM * HL_COMPONENT_VERSIONS_MAX_SIZE) +/* Max size of fit size */ +#define HL_FW_VERSIONS_FIT_SIZE 4096 #endif /* HL_BOOT_IF_H */ diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h index 50852cc80373..f661068d0c5f 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2018-2021 HabanaLabs, Ltd. + * Copyright 2018-2022 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -958,7 +958,7 @@ enum gaudi2_async_event_id { GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1 = 1318, GAUDI2_EVENT_ARC_DCCM_FULL = 1319, GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320, - GAUDI2_EVENT_DEV_RESET_REQ = 1321, + GAUDI2_EVENT_CPU_DEV_RESET_REQ = 1321, GAUDI2_EVENT_SIZE, }; diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h index 82f3ca2a3966..8522f24deac0 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h @@ -63,7 +63,10 @@ struct gaudi2_cold_rst_data { u32 fake_sig_validation_en : 1; u32 bist_skip_enable : 1; u32 bist_need_iatu_config : 1; - u32 reserved : 24; + u32 fake_bis_compliant : 1; + u32 wd_rst_cause_arm : 1; + u32 wd_rst_cause_arcpid : 1; + u32 reserved : 21; }; __le32 data; };
Copy the most up-to-date interface files to the firmware. Signed-off-by: Oded Gabbay <ogabbay@kernel.org> --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 2 +- .../habanalabs/include/common/cpucp_if.h | 51 ++++++++++++++++++- .../habanalabs/include/common/hl_boot_if.h | 47 +++++------------ .../include/gaudi2/gaudi2_async_events.h | 4 +- .../habanalabs/include/gaudi2/gaudi2_fw_if.h | 5 +- 5 files changed, 69 insertions(+), 40 deletions(-)