Message ID | 20230322091958.13103-9-vburru@marvell.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | octeon_ep: deferred probe and mailbox | expand |
Context | Check | Description |
---|---|---|
netdev/series_format | success | Posting correctly formatted |
netdev/tree_selection | success | Clearly marked for net-next |
netdev/fixes_present | success | Fixes tag not required for -next series |
netdev/header_inline | success | No static functions without inline keyword in header files |
netdev/build_32bit | success | Errors and warnings before: 18 this patch: 18 |
netdev/cc_maintainers | success | CCed 7 of 7 maintainers |
netdev/build_clang | success | Errors and warnings before: 18 this patch: 18 |
netdev/verify_signedoff | success | Signed-off-by tag matches author and committer |
netdev/deprecated_api | success | None detected |
netdev/check_selftest | success | No net selftest shell script |
netdev/verify_fixes | success | No Fixes tag |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 18 this patch: 18 |
netdev/checkpatch | warning | WARNING: line length of 84 exceeds 80 columns |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/source_inline | success | Was 0 now: 0 |
On Wed, Mar 22, 2023 at 02:19:57AM -0700, Veerasenareddy Burru wrote: > Monitor periodic heartbeat messages from device firmware. > Presence of heartbeat indicates the device is active and running. > If the heartbeat is missed for configured interval indicates > firmware has crashed and device is unusable; in this case, PF driver > stops and uninitialize the device. > > Signed-off-by: Veerasenareddy Burru <vburru@marvell.com> > Signed-off-by: Abhijit Ayarekar <aayarekar@marvell.com> > --- > v3 -> v4: > * 0007-xxx.patch in v3 is 0008-xxx.patch in v4. > > v2 -> v3: > * 0009-xxx.patch in v2 is now 0007-xxx.patch in v3 due to > 0007 and 0008.patch from v2 are removed in v3. > > v1 -> v2: > * no change > > .../marvell/octeon_ep/octep_cn9k_pf.c | 9 ++++ > .../ethernet/marvell/octeon_ep/octep_config.h | 6 +++ > .../ethernet/marvell/octeon_ep/octep_main.c | 45 ++++++++++++++++++- > .../ethernet/marvell/octeon_ep/octep_main.h | 7 +++ > .../marvell/octeon_ep/octep_regs_cn9k_pf.h | 2 + > 5 files changed, 67 insertions(+), 2 deletions(-) > > diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c > index e2503c9bc8a1..90c3a419932d 100644 > --- a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c > +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c > @@ -16,6 +16,9 @@ > #define CTRL_MBOX_MAX_PF 128 > #define CTRL_MBOX_SZ ((size_t)(0x400000 / CTRL_MBOX_MAX_PF)) > > +#define FW_HB_INTERVAL_IN_SECS 1 > +#define FW_HB_MISS_COUNT 10 > + > /* Names of Hardware non-queue generic interrupts */ > static char *cn93_non_ioq_msix_names[] = { > "epf_ire_rint", > @@ -249,6 +252,10 @@ static void octep_init_config_cn93_pf(struct octep_device *oct) > conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr + > (0x400000ull * 7) + > (link * CTRL_MBOX_SZ); > + > + conf->hb_interval = FW_HB_INTERVAL_IN_SECS; > + conf->max_hb_miss_cnt = FW_HB_MISS_COUNT; > + > } > > /* Setup registers for a hardware Tx Queue */ > @@ -383,6 +390,8 @@ static bool octep_poll_non_ioq_interrupts_cn93_pf(struct octep_device *oct) > octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg0); > if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX) > queue_work(octep_wq, &oct->ctrl_mbox_task); > + else if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT) > + atomic_set(&oct->hb_miss_cnt, 0); > > handled = true; > } > diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h > index f208f3f9a447..df7cd39d9fce 100644 > --- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h > +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h > @@ -200,5 +200,11 @@ struct octep_config { > > /* ctrl mbox config */ > struct octep_ctrl_mbox_config ctrl_mbox_cfg; > + > + /* Configured maximum heartbeat miss count */ > + u32 max_hb_miss_cnt; > + > + /* Configured firmware heartbeat interval in secs */ > + u32 hb_interval; > }; > #endif /* _OCTEP_CONFIG_H_ */ > diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c > index ba0d5fe3081d..415dd06ff344 100644 > --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c > +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c > @@ -901,6 +901,38 @@ static void octep_intr_poll_task(struct work_struct *work) > msecs_to_jiffies(OCTEP_INTR_POLL_TIME_MSECS)); > } > > +/** > + * octep_hb_timeout_task - work queue task to check firmware heartbeat. > + * > + * @work: pointer to hb work_struct > + * > + * Check for heartbeat miss count. Uninitialize oct device if miss count > + * exceeds configured max heartbeat miss count. > + * > + **/ > +static void octep_hb_timeout_task(struct work_struct *work) > +{ > + struct octep_device *oct = container_of(work, struct octep_device, > + hb_task.work); > + > + int miss_cnt; > + > + atomic_inc(&oct->hb_miss_cnt); > + miss_cnt = atomic_read(&oct->hb_miss_cnt); miss_cnt = atomic_inc_return(&oct->hb_miss_cnt); > + if (miss_cnt < oct->conf->max_hb_miss_cnt) { How is this heartbeat working? You increment on every entry to octep_hb_timeout_task(), After max_hb_miss_cnt invocations, you will stop your device. Thanks > + queue_delayed_work(octep_wq, &oct->hb_task, > + msecs_to_jiffies(oct->conf->hb_interval * 1000)); > + return; > + } > + > + dev_err(&oct->pdev->dev, "Missed %u heartbeats. Uninitializing\n", > + miss_cnt); > + rtnl_lock(); > + if (netif_running(oct->netdev)) > + octep_stop(oct->netdev); > + rtnl_unlock(); > +} > + > /** > * octep_ctrl_mbox_task - work queue task to handle ctrl mbox messages. > * > @@ -938,7 +970,7 @@ static const char *octep_devid_to_str(struct octep_device *oct) > int octep_device_setup(struct octep_device *oct) > { > struct pci_dev *pdev = oct->pdev; > - int i; > + int i, ret; > > /* allocate memory for oct->conf */ > oct->conf = kzalloc(sizeof(*oct->conf), GFP_KERNEL); > @@ -973,7 +1005,15 @@ int octep_device_setup(struct octep_device *oct) > > oct->pkind = CFG_GET_IQ_PKIND(oct->conf); > > - return octep_ctrl_net_init(oct); > + ret = octep_ctrl_net_init(oct); > + if (ret) > + return ret; > + > + atomic_set(&oct->hb_miss_cnt, 0); > + INIT_DELAYED_WORK(&oct->hb_task, octep_hb_timeout_task); > + queue_delayed_work(octep_wq, &oct->hb_task, > + msecs_to_jiffies(oct->conf->hb_interval * 1000)); > + return 0; > > unsupported_dev: > for (i = 0; i < OCTEP_MMIO_REGIONS; i++) > @@ -1002,6 +1042,7 @@ static void octep_device_cleanup(struct octep_device *oct) > } > > octep_ctrl_net_uninit(oct); > + cancel_delayed_work_sync(&oct->hb_task); > > oct->hw_ops.soft_reset(oct); > for (i = 0; i < OCTEP_MMIO_REGIONS; i++) { > diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h > index 836d990ba3fa..e0907a719133 100644 > --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h > +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h > @@ -280,6 +280,13 @@ struct octep_device { > bool poll_non_ioq_intr; > /* Work entry to poll non-ioq interrupts */ > struct delayed_work intr_poll_task; > + > + /* Firmware heartbeat timer */ > + struct timer_list hb_timer; > + /* Firmware heartbeat miss count tracked by timer */ > + atomic_t hb_miss_cnt; > + /* Task to reset device on heartbeat miss */ > + struct delayed_work hb_task; > }; > > static inline u16 OCTEP_MAJOR_REV(struct octep_device *oct) > diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h > index 0466fd9a002d..b25c3093dc7b 100644 > --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h > +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h > @@ -367,5 +367,7 @@ > > /* bit 0 for control mbox interrupt */ > #define CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX BIT_ULL(0) > +/* bit 1 for firmware heartbeat interrupt */ > +#define CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT BIT_ULL(1) > > #endif /* _OCTEP_REGS_CN9K_PF_H_ */ > -- > 2.36.0 >
> -----Original Message----- > From: Leon Romanovsky <leon@kernel.org> > Sent: Thursday, March 23, 2023 3:47 AM > To: Veerasenareddy Burru <vburru@marvell.com> > Cc: netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Abhijit Ayarekar > <aayarekar@marvell.com>; Sathesh B Edara <sedara@marvell.com>; > Satananda Burla <sburla@marvell.com>; linux-doc@vger.kernel.org; David S. > Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; > Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com> > Subject: [EXT] Re: [PATCH net-next v4 8/8] octeon_ep: add heartbeat > monitor > > External Email > > ---------------------------------------------------------------------- > On Wed, Mar 22, 2023 at 02:19:57AM -0700, Veerasenareddy Burru wrote: > > Monitor periodic heartbeat messages from device firmware. > > Presence of heartbeat indicates the device is active and running. > > If the heartbeat is missed for configured interval indicates firmware > > has crashed and device is unusable; in this case, PF driver stops and > > uninitialize the device. > > > > Signed-off-by: Veerasenareddy Burru <vburru@marvell.com> > > Signed-off-by: Abhijit Ayarekar <aayarekar@marvell.com> > > --- > > v3 -> v4: > > * 0007-xxx.patch in v3 is 0008-xxx.patch in v4. > > > > v2 -> v3: > > * 0009-xxx.patch in v2 is now 0007-xxx.patch in v3 due to > > 0007 and 0008.patch from v2 are removed in v3. > > > > v1 -> v2: > > * no change > > > > .../marvell/octeon_ep/octep_cn9k_pf.c | 9 ++++ > > .../ethernet/marvell/octeon_ep/octep_config.h | 6 +++ > > .../ethernet/marvell/octeon_ep/octep_main.c | 45 > ++++++++++++++++++- > > .../ethernet/marvell/octeon_ep/octep_main.h | 7 +++ > > .../marvell/octeon_ep/octep_regs_cn9k_pf.h | 2 + > > 5 files changed, 67 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c > > b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c > > index e2503c9bc8a1..90c3a419932d 100644 > > --- a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c > > +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c > > @@ -16,6 +16,9 @@ > > #define CTRL_MBOX_MAX_PF 128 > > #define CTRL_MBOX_SZ ((size_t)(0x400000 / > CTRL_MBOX_MAX_PF)) > > > > +#define FW_HB_INTERVAL_IN_SECS 1 > > +#define FW_HB_MISS_COUNT 10 > > + > > /* Names of Hardware non-queue generic interrupts */ static char > > *cn93_non_ioq_msix_names[] = { > > "epf_ire_rint", > > @@ -249,6 +252,10 @@ static void octep_init_config_cn93_pf(struct > octep_device *oct) > > conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct- > >mmio[2].hw_addr + > > (0x400000ull * 7) + > > (link * CTRL_MBOX_SZ); > > + > > + conf->hb_interval = FW_HB_INTERVAL_IN_SECS; > > + conf->max_hb_miss_cnt = FW_HB_MISS_COUNT; > > + > > } > > > > /* Setup registers for a hardware Tx Queue */ @@ -383,6 +390,8 @@ > > static bool octep_poll_non_ioq_interrupts_cn93_pf(struct octep_device > *oct) > > octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg0); > > if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX) > > queue_work(octep_wq, &oct->ctrl_mbox_task); > > + else if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT) > > + atomic_set(&oct->hb_miss_cnt, 0); > > > > handled = true; > > } > > diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h > > b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h > > index f208f3f9a447..df7cd39d9fce 100644 > > --- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h > > +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h > > @@ -200,5 +200,11 @@ struct octep_config { > > > > /* ctrl mbox config */ > > struct octep_ctrl_mbox_config ctrl_mbox_cfg; > > + > > + /* Configured maximum heartbeat miss count */ > > + u32 max_hb_miss_cnt; > > + > > + /* Configured firmware heartbeat interval in secs */ > > + u32 hb_interval; > > }; > > #endif /* _OCTEP_CONFIG_H_ */ > > diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c > > b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c > > index ba0d5fe3081d..415dd06ff344 100644 > > --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c > > +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c > > @@ -901,6 +901,38 @@ static void octep_intr_poll_task(struct work_struct > *work) > > > msecs_to_jiffies(OCTEP_INTR_POLL_TIME_MSECS)); > > } > > > > +/** > > + * octep_hb_timeout_task - work queue task to check firmware > heartbeat. > > + * > > + * @work: pointer to hb work_struct > > + * > > + * Check for heartbeat miss count. Uninitialize oct device if miss > > +count > > + * exceeds configured max heartbeat miss count. > > + * > > + **/ > > +static void octep_hb_timeout_task(struct work_struct *work) { > > + struct octep_device *oct = container_of(work, struct octep_device, > > + hb_task.work); > > + > > + int miss_cnt; > > + > > + atomic_inc(&oct->hb_miss_cnt); > > + miss_cnt = atomic_read(&oct->hb_miss_cnt); > > miss_cnt = atomic_inc_return(&oct->hb_miss_cnt); > Thanks for the feedback. Will fix it. > > + if (miss_cnt < oct->conf->max_hb_miss_cnt) { > > How is this heartbeat working? You increment on every entry to > octep_hb_timeout_task(), After max_hb_miss_cnt invocations, you will stop > your device. > > Thanks > Yes, device will be stopped after max_hb_miss_cnt heartbeats are missed. > > + queue_delayed_work(octep_wq, &oct->hb_task, > > + msecs_to_jiffies(oct->conf->hb_interval * > 1000)); > > + return; > > + } > > + > > + dev_err(&oct->pdev->dev, "Missed %u heartbeats. Uninitializing\n", > > + miss_cnt); > > + rtnl_lock(); > > + if (netif_running(oct->netdev)) > > + octep_stop(oct->netdev); > > + rtnl_unlock(); > > +} > > + > > /** > > * octep_ctrl_mbox_task - work queue task to handle ctrl mbox messages. > > * > > @@ -938,7 +970,7 @@ static const char *octep_devid_to_str(struct > > octep_device *oct) int octep_device_setup(struct octep_device *oct) > > { > > struct pci_dev *pdev = oct->pdev; > > - int i; > > + int i, ret; > > > > /* allocate memory for oct->conf */ > > oct->conf = kzalloc(sizeof(*oct->conf), GFP_KERNEL); @@ -973,7 > > +1005,15 @@ int octep_device_setup(struct octep_device *oct) > > > > oct->pkind = CFG_GET_IQ_PKIND(oct->conf); > > > > - return octep_ctrl_net_init(oct); > > + ret = octep_ctrl_net_init(oct); > > + if (ret) > > + return ret; > > + > > + atomic_set(&oct->hb_miss_cnt, 0); > > + INIT_DELAYED_WORK(&oct->hb_task, octep_hb_timeout_task); > > + queue_delayed_work(octep_wq, &oct->hb_task, > > + msecs_to_jiffies(oct->conf->hb_interval * 1000)); > > + return 0; > > > > unsupported_dev: > > for (i = 0; i < OCTEP_MMIO_REGIONS; i++) @@ -1002,6 +1042,7 @@ > > static void octep_device_cleanup(struct octep_device *oct) > > } > > > > octep_ctrl_net_uninit(oct); > > + cancel_delayed_work_sync(&oct->hb_task); > > > > oct->hw_ops.soft_reset(oct); > > for (i = 0; i < OCTEP_MMIO_REGIONS; i++) { diff --git > > a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h > > b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h > > index 836d990ba3fa..e0907a719133 100644 > > --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h > > +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h > > @@ -280,6 +280,13 @@ struct octep_device { > > bool poll_non_ioq_intr; > > /* Work entry to poll non-ioq interrupts */ > > struct delayed_work intr_poll_task; > > + > > + /* Firmware heartbeat timer */ > > + struct timer_list hb_timer; > > + /* Firmware heartbeat miss count tracked by timer */ > > + atomic_t hb_miss_cnt; > > + /* Task to reset device on heartbeat miss */ > > + struct delayed_work hb_task; > > }; > > > > static inline u16 OCTEP_MAJOR_REV(struct octep_device *oct) diff > > --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h > > b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h > > index 0466fd9a002d..b25c3093dc7b 100644 > > --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h > > +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h > > @@ -367,5 +367,7 @@ > > > > /* bit 0 for control mbox interrupt */ > > #define CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX BIT_ULL(0) > > +/* bit 1 for firmware heartbeat interrupt */ > > +#define CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT BIT_ULL(1) > > > > #endif /* _OCTEP_REGS_CN9K_PF_H_ */ > > -- > > 2.36.0 > >
On Thu, Mar 23, 2023 at 06:14:10PM +0000, Veerasenareddy Burru wrote: > > > > -----Original Message----- > > From: Leon Romanovsky <leon@kernel.org> > > Sent: Thursday, March 23, 2023 3:47 AM > > To: Veerasenareddy Burru <vburru@marvell.com> > > Cc: netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Abhijit Ayarekar > > <aayarekar@marvell.com>; Sathesh B Edara <sedara@marvell.com>; > > Satananda Burla <sburla@marvell.com>; linux-doc@vger.kernel.org; David S. > > Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; > > Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com> > > Subject: [EXT] Re: [PATCH net-next v4 8/8] octeon_ep: add heartbeat > > monitor > > > > External Email > > > > ---------------------------------------------------------------------- > > On Wed, Mar 22, 2023 at 02:19:57AM -0700, Veerasenareddy Burru wrote: > > > Monitor periodic heartbeat messages from device firmware. > > > Presence of heartbeat indicates the device is active and running. > > > If the heartbeat is missed for configured interval indicates firmware > > > has crashed and device is unusable; in this case, PF driver stops and > > > uninitialize the device. > > > > > > Signed-off-by: Veerasenareddy Burru <vburru@marvell.com> > > > Signed-off-by: Abhijit Ayarekar <aayarekar@marvell.com> > > > --- > > > v3 -> v4: > > > * 0007-xxx.patch in v3 is 0008-xxx.patch in v4. > > > > > > v2 -> v3: > > > * 0009-xxx.patch in v2 is now 0007-xxx.patch in v3 due to > > > 0007 and 0008.patch from v2 are removed in v3. > > > > > > v1 -> v2: > > > * no change <...> > > > + struct octep_device *oct = container_of(work, struct octep_device, > > > + hb_task.work); > > > + > > > + int miss_cnt; > > > + > > > + atomic_inc(&oct->hb_miss_cnt); > > > + miss_cnt = atomic_read(&oct->hb_miss_cnt); > > > > miss_cnt = atomic_inc_return(&oct->hb_miss_cnt); > > > > Thanks for the feedback. Will fix it. > > > > + if (miss_cnt < oct->conf->max_hb_miss_cnt) { > > > > How is this heartbeat working? You increment on every entry to > > octep_hb_timeout_task(), After max_hb_miss_cnt invocations, you will stop > > your device. > > > > Thanks > > > > Yes, device will be stopped after max_hb_miss_cnt heartbeats are missed. If I read code correctly, device will stop after octep_hb_timeout_task() calls which happens every msecs_to_jiffies(oct->conf->hb_interval * 1000. You don't cancel/resechdule job if timeout doesn't happen. Thanks > > > > + queue_delayed_work(octep_wq, &oct->hb_task, > > > + msecs_to_jiffies(oct->conf->hb_interval * > > 1000));
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c index e2503c9bc8a1..90c3a419932d 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c @@ -16,6 +16,9 @@ #define CTRL_MBOX_MAX_PF 128 #define CTRL_MBOX_SZ ((size_t)(0x400000 / CTRL_MBOX_MAX_PF)) +#define FW_HB_INTERVAL_IN_SECS 1 +#define FW_HB_MISS_COUNT 10 + /* Names of Hardware non-queue generic interrupts */ static char *cn93_non_ioq_msix_names[] = { "epf_ire_rint", @@ -249,6 +252,10 @@ static void octep_init_config_cn93_pf(struct octep_device *oct) conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr + (0x400000ull * 7) + (link * CTRL_MBOX_SZ); + + conf->hb_interval = FW_HB_INTERVAL_IN_SECS; + conf->max_hb_miss_cnt = FW_HB_MISS_COUNT; + } /* Setup registers for a hardware Tx Queue */ @@ -383,6 +390,8 @@ static bool octep_poll_non_ioq_interrupts_cn93_pf(struct octep_device *oct) octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg0); if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX) queue_work(octep_wq, &oct->ctrl_mbox_task); + else if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT) + atomic_set(&oct->hb_miss_cnt, 0); handled = true; } diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h index f208f3f9a447..df7cd39d9fce 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h @@ -200,5 +200,11 @@ struct octep_config { /* ctrl mbox config */ struct octep_ctrl_mbox_config ctrl_mbox_cfg; + + /* Configured maximum heartbeat miss count */ + u32 max_hb_miss_cnt; + + /* Configured firmware heartbeat interval in secs */ + u32 hb_interval; }; #endif /* _OCTEP_CONFIG_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index ba0d5fe3081d..415dd06ff344 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -901,6 +901,38 @@ static void octep_intr_poll_task(struct work_struct *work) msecs_to_jiffies(OCTEP_INTR_POLL_TIME_MSECS)); } +/** + * octep_hb_timeout_task - work queue task to check firmware heartbeat. + * + * @work: pointer to hb work_struct + * + * Check for heartbeat miss count. Uninitialize oct device if miss count + * exceeds configured max heartbeat miss count. + * + **/ +static void octep_hb_timeout_task(struct work_struct *work) +{ + struct octep_device *oct = container_of(work, struct octep_device, + hb_task.work); + + int miss_cnt; + + atomic_inc(&oct->hb_miss_cnt); + miss_cnt = atomic_read(&oct->hb_miss_cnt); + if (miss_cnt < oct->conf->max_hb_miss_cnt) { + queue_delayed_work(octep_wq, &oct->hb_task, + msecs_to_jiffies(oct->conf->hb_interval * 1000)); + return; + } + + dev_err(&oct->pdev->dev, "Missed %u heartbeats. Uninitializing\n", + miss_cnt); + rtnl_lock(); + if (netif_running(oct->netdev)) + octep_stop(oct->netdev); + rtnl_unlock(); +} + /** * octep_ctrl_mbox_task - work queue task to handle ctrl mbox messages. * @@ -938,7 +970,7 @@ static const char *octep_devid_to_str(struct octep_device *oct) int octep_device_setup(struct octep_device *oct) { struct pci_dev *pdev = oct->pdev; - int i; + int i, ret; /* allocate memory for oct->conf */ oct->conf = kzalloc(sizeof(*oct->conf), GFP_KERNEL); @@ -973,7 +1005,15 @@ int octep_device_setup(struct octep_device *oct) oct->pkind = CFG_GET_IQ_PKIND(oct->conf); - return octep_ctrl_net_init(oct); + ret = octep_ctrl_net_init(oct); + if (ret) + return ret; + + atomic_set(&oct->hb_miss_cnt, 0); + INIT_DELAYED_WORK(&oct->hb_task, octep_hb_timeout_task); + queue_delayed_work(octep_wq, &oct->hb_task, + msecs_to_jiffies(oct->conf->hb_interval * 1000)); + return 0; unsupported_dev: for (i = 0; i < OCTEP_MMIO_REGIONS; i++) @@ -1002,6 +1042,7 @@ static void octep_device_cleanup(struct octep_device *oct) } octep_ctrl_net_uninit(oct); + cancel_delayed_work_sync(&oct->hb_task); oct->hw_ops.soft_reset(oct); for (i = 0; i < OCTEP_MMIO_REGIONS; i++) { diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h index 836d990ba3fa..e0907a719133 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h @@ -280,6 +280,13 @@ struct octep_device { bool poll_non_ioq_intr; /* Work entry to poll non-ioq interrupts */ struct delayed_work intr_poll_task; + + /* Firmware heartbeat timer */ + struct timer_list hb_timer; + /* Firmware heartbeat miss count tracked by timer */ + atomic_t hb_miss_cnt; + /* Task to reset device on heartbeat miss */ + struct delayed_work hb_task; }; static inline u16 OCTEP_MAJOR_REV(struct octep_device *oct) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h index 0466fd9a002d..b25c3093dc7b 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h @@ -367,5 +367,7 @@ /* bit 0 for control mbox interrupt */ #define CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX BIT_ULL(0) +/* bit 1 for firmware heartbeat interrupt */ +#define CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT BIT_ULL(1) #endif /* _OCTEP_REGS_CN9K_PF_H_ */