Message ID | 20240904054815.1341712-3-jitendra.vegiraju@broadcom.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: stmmac: Add PCI driver support for BCM8958x | expand |
On Tue, Sep 03, 2024 at 10:48:12PM -0700, jitendra.vegiraju@broadcom.com wrote: > From: Jitendra Vegiraju <jitendra.vegiraju@broadcom.com> > > The BCM8958x uses early adopter version of DWC_xgmac version 4.00a for > Ethernet MAC. The DW25GMAC introduced in this version adds new DMA > architecture called Hyper-DMA (HDMA) for virtualization scalability. > This is realized by decoupling physical DMA channels(PDMA) from potentially > large number of virtual DMA channels (VDMA). The VDMAs are software > abstractions that map to PDMAs for frame transmission and reception. > > Define new macros DW25GMAC_CORE_4_00 and DW25GMAC_ID to identify 25GMAC > device. > To support the new HDMA architecture, a new instance of stmmac_dma_ops > dw25gmac400_dma_ops is added. > Most of the other dma operation functions in existing dwxgamc2_dma.c file > are reused where applicable. > Added setup function for DW25GMAC's stmmac_hwif_entry in stmmac core. > > Signed-off-by: Jitendra Vegiraju <jitendra.vegiraju@broadcom.com> > --- > drivers/net/ethernet/stmicro/stmmac/Makefile | 2 +- > drivers/net/ethernet/stmicro/stmmac/common.h | 4 + > .../net/ethernet/stmicro/stmmac/dw25gmac.c | 224 ++++++++++++++++++ > .../net/ethernet/stmicro/stmmac/dw25gmac.h | 92 +++++++ > .../net/ethernet/stmicro/stmmac/dwxgmac2.h | 1 + > .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 43 ++++ > .../ethernet/stmicro/stmmac/dwxgmac2_dma.c | 31 +++ > drivers/net/ethernet/stmicro/stmmac/hwif.h | 1 + > 8 files changed, 397 insertions(+), 1 deletion(-) > create mode 100644 drivers/net/ethernet/stmicro/stmmac/dw25gmac.c > create mode 100644 drivers/net/ethernet/stmicro/stmmac/dw25gmac.h > > diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile > index c2f0e91f6bf8..967e8a9aa432 100644 > --- a/drivers/net/ethernet/stmicro/stmmac/Makefile > +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile > @@ -6,7 +6,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \ > mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o dwmac4_descs.o \ > dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o hwif.o \ > stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o \ > - stmmac_xdp.o stmmac_est.o \ > + stmmac_xdp.o stmmac_est.o dw25gmac.o \ > $(stmmac-y) > > stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o > diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h > index 684489156dce..9a747b89ba51 100644 > --- a/drivers/net/ethernet/stmicro/stmmac/common.h > +++ b/drivers/net/ethernet/stmicro/stmmac/common.h > @@ -38,9 +38,11 @@ > #define DWXGMAC_CORE_2_10 0x21 > #define DWXGMAC_CORE_2_20 0x22 > #define DWXLGMAC_CORE_2_00 0x20 > +#define DW25GMAC_CORE_4_00 0x40 > > /* Device ID */ > #define DWXGMAC_ID 0x76 > +#define DW25GMAC_ID 0x55 > #define DWXLGMAC_ID 0x27 > > #define STMMAC_CHAN0 0 /* Always supported and default for all chips */ > @@ -563,6 +565,7 @@ struct mac_link { > u32 speed2500; > u32 speed5000; > u32 speed10000; > + u32 speed25000; > } xgmii; > struct { > u32 speed25000; > @@ -621,6 +624,7 @@ int dwmac100_setup(struct stmmac_priv *priv); > int dwmac1000_setup(struct stmmac_priv *priv); > int dwmac4_setup(struct stmmac_priv *priv); > int dwxgmac2_setup(struct stmmac_priv *priv); > +int dw25gmac_setup(struct stmmac_priv *priv); > int dwxlgmac2_setup(struct stmmac_priv *priv); > > void stmmac_set_mac_addr(void __iomem *ioaddr, const u8 addr[6], > diff --git a/drivers/net/ethernet/stmicro/stmmac/dw25gmac.c b/drivers/net/ethernet/stmicro/stmmac/dw25gmac.c > new file mode 100644 > index 000000000000..adb33700ffbb > --- /dev/null > +++ b/drivers/net/ethernet/stmicro/stmmac/dw25gmac.c > @@ -0,0 +1,224 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright (c) 2024 Broadcom Corporation > + */ > +#include "stmmac.h" > +#include "dwxgmac2.h" > +#include "dw25gmac.h" > + > +static u32 decode_vdma_count(u32 regval) > +{ > + /* compressed encoding for vdma count > + * regval: VDMA count > + * 0-15 : 1 - 16 > + * 16-19 : 20, 24, 28, 32 > + * 20-23 : 40, 48, 56, 64 > + * 24-27 : 80, 96, 112, 128 > + */ > + if (regval < 16) > + return regval + 1; > + return (4 << ((regval - 16) / 4)) * ((regval % 4) + 5); The shortest code isn't always the best one. This one gives me a headache in trying to decipher whether it really matches to what is described in the comment. What about just: if (regval < 16) /* Direct mapping */ return regval + 1; else if (regval < 20) /* 20, 24, 28, 32 */ return 20 + (regval - 16) * 4; else if (regval < 24) /* 40, 48, 56, 64 */ return 40 + (regval - 20) * 8; else if (regval < 28) /* 80, 96, 112, 128 */ return 80 + (regval - 24) * 16; ? > +} > + > +static void dw25gmac_read_hdma_limits(void __iomem *ioaddr, > + struct stmmac_hdma_cfg *hdma) > +{ > + u32 hw_cap; > + > + /* Get VDMA/PDMA counts from HW */ > + hw_cap = readl(ioaddr + XGMAC_HW_FEATURE2); > + hdma->tx_vdmas = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_TXCNT, > + hw_cap)); > + hdma->rx_vdmas = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_RXCNT, > + hw_cap)); > + hdma->tx_pdmas = FIELD_GET(XGMAC_HWFEAT_TXQCNT, hw_cap) + 1; > + hdma->rx_pdmas = FIELD_GET(XGMAC_HWFEAT_RXQCNT, hw_cap) + 1; Hmm, these are the Tx/Rx DMA-channels and Tx/Rx MTL-queues count fields. Can't you just use the dma_features::{number_tx_channel,number_tx_queues} and dma_features::{number_rx_channel,number_rx_queues} fields to store the retrieved data? Moreover why not to add the code above to the dwxgmac2_get_hw_feature() method? > +} > + > +int dw25gmac_hdma_cfg_init(struct stmmac_priv *priv) > +{ > + struct plat_stmmacenet_data *plat = priv->plat; > + struct device *dev = priv->device; > + struct stmmac_hdma_cfg *hdma; > + int i; > + > + hdma = devm_kzalloc(dev, > + sizeof(*plat->dma_cfg->hdma_cfg), > + GFP_KERNEL); > + if (!hdma) > + return -ENOMEM; > + > + dw25gmac_read_hdma_limits(priv->ioaddr, hdma); > + > + hdma->tvdma_tc = devm_kzalloc(dev, > + sizeof(*hdma->tvdma_tc) * hdma->tx_vdmas, > + GFP_KERNEL); > + if (!hdma->tvdma_tc) > + return -ENOMEM; > + > + hdma->rvdma_tc = devm_kzalloc(dev, > + sizeof(*hdma->rvdma_tc) * hdma->rx_vdmas, > + GFP_KERNEL); > + if (!hdma->rvdma_tc) > + return -ENOMEM; > + > + hdma->tpdma_tc = devm_kzalloc(dev, > + sizeof(*hdma->tpdma_tc) * hdma->tx_pdmas, > + GFP_KERNEL); > + if (!hdma->tpdma_tc) > + return -ENOMEM; > + > + hdma->rpdma_tc = devm_kzalloc(dev, > + sizeof(*hdma->rpdma_tc) * hdma->rx_pdmas, > + GFP_KERNEL); > + if (!hdma->rpdma_tc) > + return -ENOMEM; > + > + /* Initialize one-to-one mapping for each of the used queues */ > + for (i = 0; i < plat->tx_queues_to_use; i++) { > + hdma->tvdma_tc[i] = i; > + hdma->tpdma_tc[i] = i; > + } > + for (i = 0; i < plat->rx_queues_to_use; i++) { > + hdma->rvdma_tc[i] = i; > + hdma->rpdma_tc[i] = i; > + } So the Traffic Class ID is initialized for the tx_queues_to_use/rx_queues_to_use number of channels only, right? What about the Virtual and Physical DMA-channels with numbers greater than these values? > + plat->dma_cfg->hdma_cfg = hdma; > + > + return 0; > +} > + > +static int rd_dma_ch_ind(void __iomem *ioaddr, u8 mode, u32 channel) > +{ > + u32 reg_val = 0; > + > + reg_val |= FIELD_PREP(XXVGMAC_MODE_SELECT, mode); > + reg_val |= FIELD_PREP(XXVGMAC_ADDR_OFFSET, channel); > + reg_val |= XXVGMAC_CMD_TYPE | XXVGMAC_OB; > + writel(reg_val, ioaddr + XXVGMAC_DMA_CH_IND_CONTROL); > + return readl(ioaddr + XXVGMAC_DMA_CH_IND_DATA); > +} > + > +static void wr_dma_ch_ind(void __iomem *ioaddr, u8 mode, u32 channel, u32 val) > +{ > + u32 reg_val = 0; > + > + writel(val, ioaddr + XXVGMAC_DMA_CH_IND_DATA); > + reg_val |= FIELD_PREP(XXVGMAC_MODE_SELECT, mode); > + reg_val |= FIELD_PREP(XXVGMAC_ADDR_OFFSET, channel); > + reg_val |= XGMAC_OB; > + writel(reg_val, ioaddr + XXVGMAC_DMA_CH_IND_CONTROL); > +} > + > +static void xgmac4_tp2tc_map(void __iomem *ioaddr, u8 pdma_ch, u32 tc_num) > +{ > + u32 val = 0; > + > + val = rd_dma_ch_ind(ioaddr, MODE_TXEXTCFG, pdma_ch); > + val &= ~XXVGMAC_TP2TCMP; > + val |= FIELD_PREP(XXVGMAC_TP2TCMP, tc_num); > + wr_dma_ch_ind(ioaddr, MODE_TXEXTCFG, pdma_ch, val); > +} > + > +static void xgmac4_rp2tc_map(void __iomem *ioaddr, u8 pdma_ch, u32 tc_num) > +{ > + u32 val = 0; > + > + val = rd_dma_ch_ind(ioaddr, MODE_RXEXTCFG, pdma_ch); > + val &= ~XXVGMAC_RP2TCMP; > + val |= FIELD_PREP(XXVGMAC_RP2TCMP, tc_num); > + wr_dma_ch_ind(ioaddr, MODE_RXEXTCFG, pdma_ch, val); > +} > + > +void dw25gmac_dma_init(void __iomem *ioaddr, > + struct stmmac_dma_cfg *dma_cfg) > +{ > + u32 value; > + u32 i; > + > + value = readl(ioaddr + XGMAC_DMA_SYSBUS_MODE); > + value &= ~(XGMAC_AAL | XGMAC_EAME); > + if (dma_cfg->aal) > + value |= XGMAC_AAL; > + if (dma_cfg->eame) > + value |= XGMAC_EAME; > + writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE); > + > + for (i = 0; i < dma_cfg->hdma_cfg->tx_vdmas; i++) { > + value = rd_dma_ch_ind(ioaddr, MODE_TXDESCCTRL, i); > + value &= ~XXVGMAC_TXDCSZ; > + value |= FIELD_PREP(XXVGMAC_TXDCSZ, > + XXVGMAC_TXDCSZ_256BYTES); > + value &= ~XXVGMAC_TDPS; > + value |= FIELD_PREP(XXVGMAC_TDPS, XXVGMAC_TDPS_HALF); > + wr_dma_ch_ind(ioaddr, MODE_TXDESCCTRL, i, value); > + } > + > + for (i = 0; i < dma_cfg->hdma_cfg->rx_vdmas; i++) { > + value = rd_dma_ch_ind(ioaddr, MODE_RXDESCCTRL, i); > + value &= ~XXVGMAC_RXDCSZ; > + value |= FIELD_PREP(XXVGMAC_RXDCSZ, > + XXVGMAC_RXDCSZ_256BYTES); > + value &= ~XXVGMAC_RDPS; > + value |= FIELD_PREP(XXVGMAC_TDPS, XXVGMAC_RDPS_HALF); > + wr_dma_ch_ind(ioaddr, MODE_RXDESCCTRL, i, value); > + } > + > + for (i = 0; i < dma_cfg->hdma_cfg->tx_pdmas; i++) { > + value = rd_dma_ch_ind(ioaddr, MODE_TXEXTCFG, i); > + value &= ~(XXVGMAC_TXPBL | XXVGMAC_TPBLX8_MODE); > + if (dma_cfg->pblx8) > + value |= XXVGMAC_TPBLX8_MODE; > + value |= FIELD_PREP(XXVGMAC_TXPBL, dma_cfg->pbl); > + wr_dma_ch_ind(ioaddr, MODE_TXEXTCFG, i, value); > + xgmac4_tp2tc_map(ioaddr, i, dma_cfg->hdma_cfg->tpdma_tc[i]); > + } > + > + for (i = 0; i < dma_cfg->hdma_cfg->rx_pdmas; i++) { > + value = rd_dma_ch_ind(ioaddr, MODE_RXEXTCFG, i); > + value &= ~(XXVGMAC_RXPBL | XXVGMAC_RPBLX8_MODE); > + if (dma_cfg->pblx8) > + value |= XXVGMAC_RPBLX8_MODE; > + value |= FIELD_PREP(XXVGMAC_RXPBL, dma_cfg->pbl); > + wr_dma_ch_ind(ioaddr, MODE_RXEXTCFG, i, value); > + xgmac4_rp2tc_map(ioaddr, i, dma_cfg->hdma_cfg->rpdma_tc[i]); What if tx_pdmas doesn't match plat_stmmacenet_data::tx_queues_to_use and rx_pdmas doesn't match to plat_stmmacenet_data::rx_queues_to_use? If they don't then you'll get out of the initialized tpdma_tc/rpdma_tc fields and these channels will be pre-initialized with the zero TC. Is that what expected? I doubt so. > + } > +} > + > +void dw25gmac_dma_init_tx_chan(struct stmmac_priv *priv, > + void __iomem *ioaddr, > + struct stmmac_dma_cfg *dma_cfg, > + dma_addr_t dma_addr, u32 chan) > +{ > + u32 value; > + > + value = readl(ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); > + value &= ~XXVGMAC_TVDMA2TCMP; > + value |= FIELD_PREP(XXVGMAC_TVDMA2TCMP, > + dma_cfg->hdma_cfg->tvdma_tc[chan]); > + writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); Please note this will have only first plat_stmmacenet_data::{tx_queues_to_use,rx_queues_to_use} VDMA channels initialized. Don't you have much more than just 4 channels? > + > + writel(upper_32_bits(dma_addr), > + ioaddr + XGMAC_DMA_CH_TxDESC_HADDR(chan)); > + writel(lower_32_bits(dma_addr), > + ioaddr + XGMAC_DMA_CH_TxDESC_LADDR(chan)); > +} > + > +void dw25gmac_dma_init_rx_chan(struct stmmac_priv *priv, > + void __iomem *ioaddr, > + struct stmmac_dma_cfg *dma_cfg, > + dma_addr_t dma_addr, u32 chan) > +{ > + u32 value; > + > + value = readl(ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); > + value &= ~XXVGMAC_RVDMA2TCMP; > + value |= FIELD_PREP(XXVGMAC_RVDMA2TCMP, > + dma_cfg->hdma_cfg->rvdma_tc[chan]); > + writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); The same question. > + > + writel(upper_32_bits(dma_addr), > + ioaddr + XGMAC_DMA_CH_RxDESC_HADDR(chan)); > + writel(lower_32_bits(dma_addr), > + ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan)); > +} These methods are called for each plat_stmmacenet_data::{tx_queues_to_use,rx_queues_to_use} DMA-channel/Queue. The static mapping means you'll have each PDMA/Queue assigned a static traffic class ID corresponding to the channel ID. Meanwhile the VDMA channels are supposed to be initialized with the TC ID corresponding to the matching PDMA ID. The TC ID in this case is passed as the DMA/Queue channel ID. Then the Tx/Rx DMA-channels init methods can be converted to: dw25gmac_dma_init_Xx_chan(chan) { /* Map each chan-th VDMA to the single chan PDMA by assigning * the static TC ID. */ for (i = chan; i < Xx_vdmas; i += (Xx_vdmas / Xx_queues_to_use)) { /* Initialize VDMA channels */ XXVGMAC_TVDMA2TCMP = chan; } /* Assign the static TC ID to the specified PDMA channel */ xgmac4_rp2tc_map(chan, chan) } , where X={t,r}. Thus you can redistribute the loops implemented in dw25gmac_dma_init() to the respective Tx/Rx DMA-channel init methods. Am I missing something? -Serge() > [...]
Hi Serge, On Tue, Sep 10, 2024 at 12:25 PM Serge Semin <fancer.lancer@gmail.com> wrote: > > > +static u32 decode_vdma_count(u32 regval) > > +{ > > + /* compressed encoding for vdma count > > + * regval: VDMA count > > + * 0-15 : 1 - 16 > > + * 16-19 : 20, 24, 28, 32 > > + * 20-23 : 40, 48, 56, 64 > > + * 24-27 : 80, 96, 112, 128 > > + */ > > + if (regval < 16) > > + return regval + 1; > > + return (4 << ((regval - 16) / 4)) * ((regval % 4) + 5); > > The shortest code isn't always the best one. This one gives me a > headache in trying to decipher whether it really matches to what is > described in the comment. What about just: > > if (regval < 16) /* Direct mapping */ > return regval + 1; > else if (regval < 20) /* 20, 24, 28, 32 */ > return 20 + (regval - 16) * 4; > else if (regval < 24) /* 40, 48, 56, 64 */ > return 40 + (regval - 20) * 8; > else if (regval < 28) /* 80, 96, 112, 128 */ > return 80 + (regval - 24) * 16; > > ? Couldn't agree more :) Thanks, I will replace it with your code, which is definitely more readable. > > > +} > > + > > +static void dw25gmac_read_hdma_limits(void __iomem *ioaddr, > > + struct stmmac_hdma_cfg *hdma) > > +{ > > + u32 hw_cap; > > + > > + /* Get VDMA/PDMA counts from HW */ > > + hw_cap = readl(ioaddr + XGMAC_HW_FEATURE2); > > > > + hdma->tx_vdmas = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_TXCNT, > > + hw_cap)); > > + hdma->rx_vdmas = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_RXCNT, > > + hw_cap)); > > + hdma->tx_pdmas = FIELD_GET(XGMAC_HWFEAT_TXQCNT, hw_cap) + 1; > > + hdma->rx_pdmas = FIELD_GET(XGMAC_HWFEAT_RXQCNT, hw_cap) + 1; > > Hmm, these are the Tx/Rx DMA-channels and Tx/Rx MTL-queues count > fields. Can't you just use the > dma_features::{number_tx_channel,number_tx_queues} and > dma_features::{number_rx_channel,number_rx_queues} fields to store the > retrieved data? > > Moreover why not to add the code above to the dwxgmac2_get_hw_feature() method? > Thanks, I missed the reuse of existing fields. However, since the VDMA count has a slightly bigger bitmask, we need to extract VDMA channel count as per DW25GMAC spec. Instead of duplicating dwxgmac2_get_hw_feature(), should we add wrapper for dw25gmac, something like the following? dw25gmac_get_hw_feature(ioaddr, dma_cap) { u32 hw_cap; int rc; rc = dwxgmac2_get_hw_feature(ioaddr, dma_cap); /* Get VDMA counts from HW */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE2); dma_cap->num_tx_channels = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_TXCNT, hw_cap)); dma_cap->num_rx_channels = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_RXCNT, hw_cap)); return rc; } > > +} > > + > > +int dw25gmac_hdma_cfg_init(struct stmmac_priv *priv) > > +{ > > + struct plat_stmmacenet_data *plat = priv->plat; > > + struct device *dev = priv->device; > > + struct stmmac_hdma_cfg *hdma; > > + int i; > > + > > + hdma = devm_kzalloc(dev, > > + sizeof(*plat->dma_cfg->hdma_cfg), > > + GFP_KERNEL); > > + if (!hdma) > > + return -ENOMEM; > > + > > + dw25gmac_read_hdma_limits(priv->ioaddr, hdma); > > + > > + hdma->tvdma_tc = devm_kzalloc(dev, > > + sizeof(*hdma->tvdma_tc) * hdma->tx_vdmas, > > + GFP_KERNEL); > > + if (!hdma->tvdma_tc) > > + return -ENOMEM; > > + > > + hdma->rvdma_tc = devm_kzalloc(dev, > > + sizeof(*hdma->rvdma_tc) * hdma->rx_vdmas, > > + GFP_KERNEL); > > + if (!hdma->rvdma_tc) > > + return -ENOMEM; > > + > > + hdma->tpdma_tc = devm_kzalloc(dev, > > + sizeof(*hdma->tpdma_tc) * hdma->tx_pdmas, > > + GFP_KERNEL); > > + if (!hdma->tpdma_tc) > > + return -ENOMEM; > > + > > + hdma->rpdma_tc = devm_kzalloc(dev, > > + sizeof(*hdma->rpdma_tc) * hdma->rx_pdmas, > > + GFP_KERNEL); > > + if (!hdma->rpdma_tc) > > + return -ENOMEM; > > + > > > + /* Initialize one-to-one mapping for each of the used queues */ > > + for (i = 0; i < plat->tx_queues_to_use; i++) { > > + hdma->tvdma_tc[i] = i; > > + hdma->tpdma_tc[i] = i; > > + } > > + for (i = 0; i < plat->rx_queues_to_use; i++) { > > + hdma->rvdma_tc[i] = i; > > + hdma->rpdma_tc[i] = i; > > + } > > So the Traffic Class ID is initialized for the > tx_queues_to_use/rx_queues_to_use number of channels only, right? What > about the Virtual and Physical DMA-channels with numbers greater than > these values? > You have brought up a question that applies to remaining comments in this file as well. How the VDMA/PDMA mapping is used depends on the device/glue driver. For example in our SoC the remaining VDMAs are meant to be used with SRIOV virtual functions and not all of them are available for physical function. Since additional VDMAs/PDMAs remain unused in hardware I let them stay at their default values. No traffic is expected to be mapped to unused V/PDMAs. I couldn't think of a reason for it to be an issue from a driver perspective. Please let me know, if I am missing something or we need to address a use case with bigger scope. The responses for following comments also depend on what approach we take here. > > + plat->dma_cfg->hdma_cfg = hdma; > > + > > + return 0; > > +} > > + > > + > > +void dw25gmac_dma_init(void __iomem *ioaddr, > > + struct stmmac_dma_cfg *dma_cfg) > > +{ > > + u32 value; > > + u32 i; > > + > > + value = readl(ioaddr + XGMAC_DMA_SYSBUS_MODE); > > + value &= ~(XGMAC_AAL | XGMAC_EAME); > > + if (dma_cfg->aal) > > + value |= XGMAC_AAL; > > + if (dma_cfg->eame) > > + value |= XGMAC_EAME; > > + writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE); > > + > > + for (i = 0; i < dma_cfg->hdma_cfg->tx_vdmas; i++) { > > + value = rd_dma_ch_ind(ioaddr, MODE_TXDESCCTRL, i); > > + value &= ~XXVGMAC_TXDCSZ; > > + value |= FIELD_PREP(XXVGMAC_TXDCSZ, > > + XXVGMAC_TXDCSZ_256BYTES); > > + value &= ~XXVGMAC_TDPS; > > + value |= FIELD_PREP(XXVGMAC_TDPS, XXVGMAC_TDPS_HALF); > > + wr_dma_ch_ind(ioaddr, MODE_TXDESCCTRL, i, value); > > + } > > + > > + for (i = 0; i < dma_cfg->hdma_cfg->rx_vdmas; i++) { > > + value = rd_dma_ch_ind(ioaddr, MODE_RXDESCCTRL, i); > > + value &= ~XXVGMAC_RXDCSZ; > > + value |= FIELD_PREP(XXVGMAC_RXDCSZ, > > + XXVGMAC_RXDCSZ_256BYTES); > > + value &= ~XXVGMAC_RDPS; > > + value |= FIELD_PREP(XXVGMAC_TDPS, XXVGMAC_RDPS_HALF); > > + wr_dma_ch_ind(ioaddr, MODE_RXDESCCTRL, i, value); > > + } > > + > > > + for (i = 0; i < dma_cfg->hdma_cfg->tx_pdmas; i++) { > > + value = rd_dma_ch_ind(ioaddr, MODE_TXEXTCFG, i); > > + value &= ~(XXVGMAC_TXPBL | XXVGMAC_TPBLX8_MODE); > > + if (dma_cfg->pblx8) > > + value |= XXVGMAC_TPBLX8_MODE; > > + value |= FIELD_PREP(XXVGMAC_TXPBL, dma_cfg->pbl); > > + wr_dma_ch_ind(ioaddr, MODE_TXEXTCFG, i, value); > > + xgmac4_tp2tc_map(ioaddr, i, dma_cfg->hdma_cfg->tpdma_tc[i]); > > + } > > + > > + for (i = 0; i < dma_cfg->hdma_cfg->rx_pdmas; i++) { > > + value = rd_dma_ch_ind(ioaddr, MODE_RXEXTCFG, i); > > + value &= ~(XXVGMAC_RXPBL | XXVGMAC_RPBLX8_MODE); > > + if (dma_cfg->pblx8) > > + value |= XXVGMAC_RPBLX8_MODE; > > + value |= FIELD_PREP(XXVGMAC_RXPBL, dma_cfg->pbl); > > + wr_dma_ch_ind(ioaddr, MODE_RXEXTCFG, i, value); > > + xgmac4_rp2tc_map(ioaddr, i, dma_cfg->hdma_cfg->rpdma_tc[i]); > > What if tx_pdmas doesn't match plat_stmmacenet_data::tx_queues_to_use > and rx_pdmas doesn't match to plat_stmmacenet_data::rx_queues_to_use? > > If they don't then you'll get out of the initialized tpdma_tc/rpdma_tc > fields and these channels will be pre-initialized with the zero TC. Is > that what expected? I doubt so. > As mentioned in the previous response the remaining resources are unused and no traffic is mapped to those resources. > > + } > > +} > > + > > > +void dw25gmac_dma_init_tx_chan(struct stmmac_priv *priv, > > + void __iomem *ioaddr, > > + struct stmmac_dma_cfg *dma_cfg, > > + dma_addr_t dma_addr, u32 chan) > > +{ > > + u32 value; > > + > > > + value = readl(ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); > > + value &= ~XXVGMAC_TVDMA2TCMP; > > + value |= FIELD_PREP(XXVGMAC_TVDMA2TCMP, > > + dma_cfg->hdma_cfg->tvdma_tc[chan]); > > + writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); > > Please note this will have only first > plat_stmmacenet_data::{tx_queues_to_use,rx_queues_to_use} VDMA > channels initialized. Don't you have much more than just 4 channels? > Yes, there are 32 VDMA channels on this device. In our application the additional channels are partitioned for use with SRIOV virtual functions. Similar to PDMA comment above, the additional VDMAs are not enabled, and left in default state. My thinking is, when another 25gmac device comes along that requires a different mapping we may need to add the ability to set the mapping in glue driver. We can support this by adding a check in dw25gmac_setup() @@ -1708,8 +1708,10 @@ int dw25gmac_setup(struct stmmac_priv *priv) mac->mii.clk_csr_shift = 19; mac->mii.clk_csr_mask = GENMASK(21, 19); - /* Allocate and initialize hdma mapping */ - return dw25gmac_hdma_cfg_init(priv); + /* Allocate and initialize hdma mapping, if not done by glue driver. */ + if (!priv->plat->dma_cfg->hdma_cfg) + return dw25gmac_hdma_cfg_init(priv); + return 0; } > > + > > + writel(upper_32_bits(dma_addr), > > + ioaddr + XGMAC_DMA_CH_TxDESC_HADDR(chan)); > > + writel(lower_32_bits(dma_addr), > > + ioaddr + XGMAC_DMA_CH_TxDESC_LADDR(chan)); > > +} > > + > > +void dw25gmac_dma_init_rx_chan(struct stmmac_priv *priv, > > + void __iomem *ioaddr, > > + struct stmmac_dma_cfg *dma_cfg, > > + dma_addr_t dma_addr, u32 chan) > > +{ > > + u32 value; > > + > > > + value = readl(ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); > > + value &= ~XXVGMAC_RVDMA2TCMP; > > + value |= FIELD_PREP(XXVGMAC_RVDMA2TCMP, > > + dma_cfg->hdma_cfg->rvdma_tc[chan]); > > + writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); > > The same question. > > > + > > + writel(upper_32_bits(dma_addr), > > + ioaddr + XGMAC_DMA_CH_RxDESC_HADDR(chan)); > > + writel(lower_32_bits(dma_addr), > > + ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan)); > > +} > > These methods are called for each > plat_stmmacenet_data::{tx_queues_to_use,rx_queues_to_use} > DMA-channel/Queue. The static mapping means you'll have each > PDMA/Queue assigned a static traffic class ID corresponding to the > channel ID. Meanwhile the VDMA channels are supposed to be initialized > with the TC ID corresponding to the matching PDMA ID. > > The TC ID in this case is passed as the DMA/Queue channel ID. Then the > Tx/Rx DMA-channels init methods can be converted to: > > dw25gmac_dma_init_Xx_chan(chan) > { > /* Map each chan-th VDMA to the single chan PDMA by assigning > * the static TC ID. > */ > for (i = chan; i < Xx_vdmas; i += (Xx_vdmas / Xx_queues_to_use)) { > /* Initialize VDMA channels */ > XXVGMAC_TVDMA2TCMP = chan; > } > > /* Assign the static TC ID to the specified PDMA channel */ > xgmac4_rp2tc_map(chan, chan) > } > > , where X={t,r}. > > Thus you can redistribute the loops implemented in dw25gmac_dma_init() > to the respective Tx/Rx DMA-channel init methods. > > Am I missing something? I think your visualization of HDMA may be going beyond the application I understand. We are allocating a VDMA for each of the TX/RX channels. The use of additional VDMAs depends on how the device is partitioned for virtualization. In the non-SRIOV case the remaining VDMAs will remain unused. Please let me know if I missed your question. > > -Serge() > > > [...]
Hi Serge, On Mon, Sep 16, 2024 at 4:32 PM Jitendra Vegiraju <jitendra.vegiraju@broadcom.com> wrote: > > Hi Serge, > > On Tue, Sep 10, 2024 at 12:25 PM Serge Semin <fancer.lancer@gmail.com> wrote: > > > > > +static u32 decode_vdma_count(u32 regval) > > > +{ > > > + /* compressed encoding for vdma count > > > + * regval: VDMA count > > > + * 0-15 : 1 - 16 > > > + * 16-19 : 20, 24, 28, 32 > > > + * 20-23 : 40, 48, 56, 64 > > > + * 24-27 : 80, 96, 112, 128 > > > + */ > > > + if (regval < 16) > > > + return regval + 1; > > > + return (4 << ((regval - 16) / 4)) * ((regval % 4) + 5); > > > > The shortest code isn't always the best one. This one gives me a > > headache in trying to decipher whether it really matches to what is > > described in the comment. What about just: > > > > if (regval < 16) /* Direct mapping */ > > return regval + 1; > > else if (regval < 20) /* 20, 24, 28, 32 */ > > return 20 + (regval - 16) * 4; > > else if (regval < 24) /* 40, 48, 56, 64 */ > > return 40 + (regval - 20) * 8; > > else if (regval < 28) /* 80, 96, 112, 128 */ > > return 80 + (regval - 24) * 16; > > > > ? > Couldn't agree more :) > Thanks, I will replace it with your code, which is definitely more readable. > > > > > > +} > > > + > > > +static void dw25gmac_read_hdma_limits(void __iomem *ioaddr, > > > + struct stmmac_hdma_cfg *hdma) > > > +{ > > > + u32 hw_cap; > > > + > > > + /* Get VDMA/PDMA counts from HW */ > > > + hw_cap = readl(ioaddr + XGMAC_HW_FEATURE2); > > > > > > > + hdma->tx_vdmas = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_TXCNT, > > > + hw_cap)); > > > + hdma->rx_vdmas = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_RXCNT, > > > + hw_cap)); > > > + hdma->tx_pdmas = FIELD_GET(XGMAC_HWFEAT_TXQCNT, hw_cap) + 1; > > > + hdma->rx_pdmas = FIELD_GET(XGMAC_HWFEAT_RXQCNT, hw_cap) + 1; > > > > Hmm, these are the Tx/Rx DMA-channels and Tx/Rx MTL-queues count > > fields. Can't you just use the > > dma_features::{number_tx_channel,number_tx_queues} and > > dma_features::{number_rx_channel,number_rx_queues} fields to store the > > retrieved data? > > > > Moreover why not to add the code above to the dwxgmac2_get_hw_feature() method? > > > Thanks, I missed the reuse of existing fields. > However, since the VDMA count has a slightly bigger bitmask, we need to extract > VDMA channel count as per DW25GMAC spec. > Instead of duplicating dwxgmac2_get_hw_feature(), should we add wrapper for > dw25gmac, something like the following? > dw25gmac_get_hw_feature(ioaddr, dma_cap) > { > u32 hw_cap; > int rc; > rc = dwxgmac2_get_hw_feature(ioaddr, dma_cap); > /* Get VDMA counts from HW */ > hw_cap = readl(ioaddr + XGMAC_HW_FEATURE2); > dma_cap->num_tx_channels = > decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_TXCNT, > hw_cap)); > dma_cap->num_rx_channels = > decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_RXCNT, > hw_cap)); > return rc; > } > > > > +} > > > + > > > +int dw25gmac_hdma_cfg_init(struct stmmac_priv *priv) > > > +{ > > > + struct plat_stmmacenet_data *plat = priv->plat; > > > + struct device *dev = priv->device; > > > + struct stmmac_hdma_cfg *hdma; > > > + int i; > > > + > > > + hdma = devm_kzalloc(dev, > > > + sizeof(*plat->dma_cfg->hdma_cfg), > > > + GFP_KERNEL); > > > + if (!hdma) > > > + return -ENOMEM; > > > + > > > + dw25gmac_read_hdma_limits(priv->ioaddr, hdma); > > > + > > > + hdma->tvdma_tc = devm_kzalloc(dev, > > > + sizeof(*hdma->tvdma_tc) * hdma->tx_vdmas, > > > + GFP_KERNEL); > > > + if (!hdma->tvdma_tc) > > > + return -ENOMEM; > > > + > > > + hdma->rvdma_tc = devm_kzalloc(dev, > > > + sizeof(*hdma->rvdma_tc) * hdma->rx_vdmas, > > > + GFP_KERNEL); > > > + if (!hdma->rvdma_tc) > > > + return -ENOMEM; > > > + > > > + hdma->tpdma_tc = devm_kzalloc(dev, > > > + sizeof(*hdma->tpdma_tc) * hdma->tx_pdmas, > > > + GFP_KERNEL); > > > + if (!hdma->tpdma_tc) > > > + return -ENOMEM; > > > + > > > + hdma->rpdma_tc = devm_kzalloc(dev, > > > + sizeof(*hdma->rpdma_tc) * hdma->rx_pdmas, > > > + GFP_KERNEL); > > > + if (!hdma->rpdma_tc) > > > + return -ENOMEM; > > > + > > > > > + /* Initialize one-to-one mapping for each of the used queues */ > > > + for (i = 0; i < plat->tx_queues_to_use; i++) { > > > + hdma->tvdma_tc[i] = i; > > > + hdma->tpdma_tc[i] = i; > > > + } > > > + for (i = 0; i < plat->rx_queues_to_use; i++) { > > > + hdma->rvdma_tc[i] = i; > > > + hdma->rpdma_tc[i] = i; > > > + } > > > > So the Traffic Class ID is initialized for the > > tx_queues_to_use/rx_queues_to_use number of channels only, right? What > > about the Virtual and Physical DMA-channels with numbers greater than > > these values? > > > You have brought up a question that applies to remaining comments in > this file as well. > How the VDMA/PDMA mapping is used depends on the device/glue driver. > For example in > our SoC the remaining VDMAs are meant to be used with SRIOV virtual > functions and not > all of them are available for physical function. > Since additional VDMAs/PDMAs remain unused in hardware I let them stay at their > default values. No traffic is expected to be mapped to unused V/PDMAs. > I couldn't think of a reason for it to be an issue from a driver perspective. > Please let me know, if I am missing something or we need to address a > use case with bigger scope. > The responses for following comments also depend on what approach we take here. > > > > + plat->dma_cfg->hdma_cfg = hdma; > > > + > > > + return 0; > > > +} > > > + > > > + > > > +void dw25gmac_dma_init(void __iomem *ioaddr, > > > + struct stmmac_dma_cfg *dma_cfg) > > > +{ > > > + u32 value; > > > + u32 i; > > > + > > > + value = readl(ioaddr + XGMAC_DMA_SYSBUS_MODE); > > > + value &= ~(XGMAC_AAL | XGMAC_EAME); > > > + if (dma_cfg->aal) > > > + value |= XGMAC_AAL; > > > + if (dma_cfg->eame) > > > + value |= XGMAC_EAME; > > > + writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE); > > > + > > > + for (i = 0; i < dma_cfg->hdma_cfg->tx_vdmas; i++) { > > > + value = rd_dma_ch_ind(ioaddr, MODE_TXDESCCTRL, i); > > > + value &= ~XXVGMAC_TXDCSZ; > > > + value |= FIELD_PREP(XXVGMAC_TXDCSZ, > > > + XXVGMAC_TXDCSZ_256BYTES); > > > + value &= ~XXVGMAC_TDPS; > > > + value |= FIELD_PREP(XXVGMAC_TDPS, XXVGMAC_TDPS_HALF); > > > + wr_dma_ch_ind(ioaddr, MODE_TXDESCCTRL, i, value); > > > + } > > > + > > > + for (i = 0; i < dma_cfg->hdma_cfg->rx_vdmas; i++) { > > > + value = rd_dma_ch_ind(ioaddr, MODE_RXDESCCTRL, i); > > > + value &= ~XXVGMAC_RXDCSZ; > > > + value |= FIELD_PREP(XXVGMAC_RXDCSZ, > > > + XXVGMAC_RXDCSZ_256BYTES); > > > + value &= ~XXVGMAC_RDPS; > > > + value |= FIELD_PREP(XXVGMAC_TDPS, XXVGMAC_RDPS_HALF); > > > + wr_dma_ch_ind(ioaddr, MODE_RXDESCCTRL, i, value); > > > + } > > > + > > > > > + for (i = 0; i < dma_cfg->hdma_cfg->tx_pdmas; i++) { > > > + value = rd_dma_ch_ind(ioaddr, MODE_TXEXTCFG, i); > > > + value &= ~(XXVGMAC_TXPBL | XXVGMAC_TPBLX8_MODE); > > > + if (dma_cfg->pblx8) > > > + value |= XXVGMAC_TPBLX8_MODE; > > > + value |= FIELD_PREP(XXVGMAC_TXPBL, dma_cfg->pbl); > > > + wr_dma_ch_ind(ioaddr, MODE_TXEXTCFG, i, value); > > > + xgmac4_tp2tc_map(ioaddr, i, dma_cfg->hdma_cfg->tpdma_tc[i]); > > > + } > > > + > > > + for (i = 0; i < dma_cfg->hdma_cfg->rx_pdmas; i++) { > > > + value = rd_dma_ch_ind(ioaddr, MODE_RXEXTCFG, i); > > > + value &= ~(XXVGMAC_RXPBL | XXVGMAC_RPBLX8_MODE); > > > + if (dma_cfg->pblx8) > > > + value |= XXVGMAC_RPBLX8_MODE; > > > + value |= FIELD_PREP(XXVGMAC_RXPBL, dma_cfg->pbl); > > > + wr_dma_ch_ind(ioaddr, MODE_RXEXTCFG, i, value); > > > + xgmac4_rp2tc_map(ioaddr, i, dma_cfg->hdma_cfg->rpdma_tc[i]); > > > > What if tx_pdmas doesn't match plat_stmmacenet_data::tx_queues_to_use > > and rx_pdmas doesn't match to plat_stmmacenet_data::rx_queues_to_use? > > > > If they don't then you'll get out of the initialized tpdma_tc/rpdma_tc > > fields and these channels will be pre-initialized with the zero TC. Is > > that what expected? I doubt so. > > > As mentioned in the previous response the remaining resources are unused > and no traffic is mapped to those resources. > > > > + } > > > +} > > > + > > > > > +void dw25gmac_dma_init_tx_chan(struct stmmac_priv *priv, > > > + void __iomem *ioaddr, > > > + struct stmmac_dma_cfg *dma_cfg, > > > + dma_addr_t dma_addr, u32 chan) > > > +{ > > > + u32 value; > > > + > > > > > + value = readl(ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); > > > + value &= ~XXVGMAC_TVDMA2TCMP; > > > + value |= FIELD_PREP(XXVGMAC_TVDMA2TCMP, > > > + dma_cfg->hdma_cfg->tvdma_tc[chan]); > > > + writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); > > > > Please note this will have only first > > plat_stmmacenet_data::{tx_queues_to_use,rx_queues_to_use} VDMA > > channels initialized. Don't you have much more than just 4 channels? > > > Yes, there are 32 VDMA channels on this device. In our application the > additional channels are partitioned for use with SRIOV virtual functions. > Similar to PDMA comment above, the additional VDMAs are not enabled, > and left in default state. > My thinking is, when another 25gmac device comes along that requires a > different mapping we may need to add the ability to set the mapping in > glue driver. > We can support this by adding a check in dw25gmac_setup() > @@ -1708,8 +1708,10 @@ int dw25gmac_setup(struct stmmac_priv *priv) > mac->mii.clk_csr_shift = 19; > mac->mii.clk_csr_mask = GENMASK(21, 19); > > - /* Allocate and initialize hdma mapping */ > - return dw25gmac_hdma_cfg_init(priv); > + /* Allocate and initialize hdma mapping, if not done by glue driver. */ > + if (!priv->plat->dma_cfg->hdma_cfg) > + return dw25gmac_hdma_cfg_init(priv); > + return 0; > } > > > > + > > > + writel(upper_32_bits(dma_addr), > > > + ioaddr + XGMAC_DMA_CH_TxDESC_HADDR(chan)); > > > + writel(lower_32_bits(dma_addr), > > > + ioaddr + XGMAC_DMA_CH_TxDESC_LADDR(chan)); > > > +} > > > + > > > +void dw25gmac_dma_init_rx_chan(struct stmmac_priv *priv, > > > + void __iomem *ioaddr, > > > + struct stmmac_dma_cfg *dma_cfg, > > > + dma_addr_t dma_addr, u32 chan) > > > +{ > > > + u32 value; > > > + > > > > > + value = readl(ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); > > > + value &= ~XXVGMAC_RVDMA2TCMP; > > > + value |= FIELD_PREP(XXVGMAC_RVDMA2TCMP, > > > + dma_cfg->hdma_cfg->rvdma_tc[chan]); > > > + writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); > > > > The same question. > > > > > + > > > + writel(upper_32_bits(dma_addr), > > > + ioaddr + XGMAC_DMA_CH_RxDESC_HADDR(chan)); > > > + writel(lower_32_bits(dma_addr), > > > + ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan)); > > > +} > > > > These methods are called for each > > plat_stmmacenet_data::{tx_queues_to_use,rx_queues_to_use} > > DMA-channel/Queue. The static mapping means you'll have each > > PDMA/Queue assigned a static traffic class ID corresponding to the > > channel ID. Meanwhile the VDMA channels are supposed to be initialized > > with the TC ID corresponding to the matching PDMA ID. > > > > The TC ID in this case is passed as the DMA/Queue channel ID. Then the > > Tx/Rx DMA-channels init methods can be converted to: > > > > dw25gmac_dma_init_Xx_chan(chan) > > { > > /* Map each chan-th VDMA to the single chan PDMA by assigning > > * the static TC ID. > > */ > > for (i = chan; i < Xx_vdmas; i += (Xx_vdmas / Xx_queues_to_use)) { > > /* Initialize VDMA channels */ > > XXVGMAC_TVDMA2TCMP = chan; > > } > > > > /* Assign the static TC ID to the specified PDMA channel */ > > xgmac4_rp2tc_map(chan, chan) > > } > > > > , where X={t,r}. > > > > Thus you can redistribute the loops implemented in dw25gmac_dma_init() > > to the respective Tx/Rx DMA-channel init methods. > > > > Am I missing something? > I think your visualization of HDMA may be going beyond the application > I understand. > We are allocating a VDMA for each of the TX/RX channels. The use of > additional VDMAs > depends on how the device is partitioned for virtualization. > In the non-SRIOV case the remaining VDMAs will remain unused. > Please let me know if I missed your question. > > > > -Serge() > > > > > [...] When you get a chance, I would like to get your input on the approach we need to take to incrementally add dw25gmac support. In the last conversation there were some open questions around the case of initializing unused VDMA channels and related combination scenarios. The hdma mapping provides flexibility for virtualization. However, our SoC device cannot use all VDMAs with one PCI function. The VDMAs are partitioned for SRIOV use in the firmware. This SoC defaults to 8 functions with 4 VDMA channels each. The initial effort is to support one PCI physical function with 4 VDMA channels. Also, currently the stmmac driver has inferred one-to-one relation between netif channels and physical DMAs. It would be a complex change to support each VDMA as its own netif channel and mapping fewer physical DMAs. Hence, for initial submission one-to-one mapping is assumed. As you mentioned, a static one-to-one mapping of VDMA-TC-PDMA doesn't require the additional complexity of managing these mappings as proposed in the current patch series with *struct stmmac_hdma_cfg*. To introduce dw25gmac incrementally, I am thinking of two approaches, 1. Take the current patch series forward using *struct stmmac_hdma_cfg*, keeping the unused VDMAs in default state. We need to fix the initialization loops to only initialize the VDMA and PDMAs being used. 2. Simplify the initial patch by removing *struct hdma_cfg* from the patch series and still use static VDMA-TC-PDMA mapping. Please share your thoughts. If it helps, I can send patch series with option 2 above after addressing all other review comments. Appreciate your guidance! -Jitendra
Hi Jitendra On Fri, Oct 04, 2024 at 09:05:36AM GMT, Jitendra Vegiraju wrote: > > When you get a chance, I would like to get your input on the approach we need > to take to incrementally add dw25gmac support. > Sorry for the delay with response. I've been quite busy lately. I'll get back to this patch set review early next week and give you my thoughts regarding all your questions. -Serge(y)
On Mon, Sep 16, 2024 at 04:32:33PM GMT, Jitendra Vegiraju wrote: > Hi Serge, > > On Tue, Sep 10, 2024 at 12:25 PM Serge Semin <fancer.lancer@gmail.com> wrote: > > > > > +static u32 decode_vdma_count(u32 regval) > > > +{ > > > + /* compressed encoding for vdma count > > > + * regval: VDMA count > > > + * 0-15 : 1 - 16 > > > + * 16-19 : 20, 24, 28, 32 > > > + * 20-23 : 40, 48, 56, 64 > > > + * 24-27 : 80, 96, 112, 128 > > > + */ > > > + if (regval < 16) > > > + return regval + 1; > > > + return (4 << ((regval - 16) / 4)) * ((regval % 4) + 5); > > > > The shortest code isn't always the best one. This one gives me a > > headache in trying to decipher whether it really matches to what is > > described in the comment. What about just: > > > > if (regval < 16) /* Direct mapping */ > > return regval + 1; > > else if (regval < 20) /* 20, 24, 28, 32 */ > > return 20 + (regval - 16) * 4; > > else if (regval < 24) /* 40, 48, 56, 64 */ > > return 40 + (regval - 20) * 8; > > else if (regval < 28) /* 80, 96, 112, 128 */ > > return 80 + (regval - 24) * 16; > > > > ? > Couldn't agree more :) > Thanks, I will replace it with your code, which is definitely more readable. > > > > > > +} > > > + > > > +static void dw25gmac_read_hdma_limits(void __iomem *ioaddr, > > > + struct stmmac_hdma_cfg *hdma) > > > +{ > > > + u32 hw_cap; > > > + > > > + /* Get VDMA/PDMA counts from HW */ > > > + hw_cap = readl(ioaddr + XGMAC_HW_FEATURE2); > > > > > > > + hdma->tx_vdmas = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_TXCNT, > > > + hw_cap)); > > > + hdma->rx_vdmas = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_RXCNT, > > > + hw_cap)); > > > + hdma->tx_pdmas = FIELD_GET(XGMAC_HWFEAT_TXQCNT, hw_cap) + 1; > > > + hdma->rx_pdmas = FIELD_GET(XGMAC_HWFEAT_RXQCNT, hw_cap) + 1; > > > > Hmm, these are the Tx/Rx DMA-channels and Tx/Rx MTL-queues count > > fields. Can't you just use the > > dma_features::{number_tx_channel,number_tx_queues} and > > dma_features::{number_rx_channel,number_rx_queues} fields to store the > > retrieved data? > > > > Moreover why not to add the code above to the dwxgmac2_get_hw_feature() method? > > > Thanks, I missed the reuse of existing fields. > However, since the VDMA count has a slightly bigger bitmask, we need to extract > VDMA channel count as per DW25GMAC spec. > Instead of duplicating dwxgmac2_get_hw_feature(), should we add wrapper for > dw25gmac, something like the following? Yeah, and there is the encoded fields value. Your suggestion sounds reasonable. > dw25gmac_get_hw_feature(ioaddr, dma_cap) > { > u32 hw_cap; > int rc; s/rc/ret + newline > rc = dwxgmac2_get_hw_feature(ioaddr, dma_cap); newline > /* Get VDMA counts from HW */ > hw_cap = readl(ioaddr + XGMAC_HW_FEATURE2); > dma_cap->num_tx_channels = > decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_TXCNT, > hw_cap)); > dma_cap->num_rx_channels = > decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_RXCNT, > hw_cap)); newline > return rc; > } > > > > +} > > > + > > > +int dw25gmac_hdma_cfg_init(struct stmmac_priv *priv) > > > +{ > > > + struct plat_stmmacenet_data *plat = priv->plat; > > > + struct device *dev = priv->device; > > > + struct stmmac_hdma_cfg *hdma; > > > + int i; > > > + > > > + hdma = devm_kzalloc(dev, > > > + sizeof(*plat->dma_cfg->hdma_cfg), > > > + GFP_KERNEL); > > > + if (!hdma) > > > + return -ENOMEM; > > > + > > > + dw25gmac_read_hdma_limits(priv->ioaddr, hdma); > > > + > > > + hdma->tvdma_tc = devm_kzalloc(dev, > > > + sizeof(*hdma->tvdma_tc) * hdma->tx_vdmas, > > > + GFP_KERNEL); > > > + if (!hdma->tvdma_tc) > > > + return -ENOMEM; > > > + > > > + hdma->rvdma_tc = devm_kzalloc(dev, > > > + sizeof(*hdma->rvdma_tc) * hdma->rx_vdmas, > > > + GFP_KERNEL); > > > + if (!hdma->rvdma_tc) > > > + return -ENOMEM; > > > + > > > + hdma->tpdma_tc = devm_kzalloc(dev, > > > + sizeof(*hdma->tpdma_tc) * hdma->tx_pdmas, > > > + GFP_KERNEL); > > > + if (!hdma->tpdma_tc) > > > + return -ENOMEM; > > > + > > > + hdma->rpdma_tc = devm_kzalloc(dev, > > > + sizeof(*hdma->rpdma_tc) * hdma->rx_pdmas, > > > + GFP_KERNEL); > > > + if (!hdma->rpdma_tc) > > > + return -ENOMEM; > > > + > > > > > + /* Initialize one-to-one mapping for each of the used queues */ > > > + for (i = 0; i < plat->tx_queues_to_use; i++) { > > > + hdma->tvdma_tc[i] = i; > > > + hdma->tpdma_tc[i] = i; > > > + } > > > + for (i = 0; i < plat->rx_queues_to_use; i++) { > > > + hdma->rvdma_tc[i] = i; > > > + hdma->rpdma_tc[i] = i; > > > + } > > > > So the Traffic Class ID is initialized for the > > tx_queues_to_use/rx_queues_to_use number of channels only, right? What > > about the Virtual and Physical DMA-channels with numbers greater than > > these values? > > > You have brought up a question that applies to remaining comments in > this file as well. > How the VDMA/PDMA mapping is used depends on the device/glue driver. > For example in > our SoC the remaining VDMAs are meant to be used with SRIOV virtual > functions and not > all of them are available for physical function. > Since additional VDMAs/PDMAs remain unused in hardware I let them stay at their > default values. No traffic is expected to be mapped to unused V/PDMAs. > I couldn't think of a reason for it to be an issue from a driver perspective. > Please let me know, if I am missing something or we need to address a > use case with bigger scope. > The responses for following comments also depend on what approach we take here. If they are unused, then I suggest to follow the KISS principle. See my last comment for details. > > > > + plat->dma_cfg->hdma_cfg = hdma; > > > + > > > + return 0; > > > +} > > > + > > > + > > > +void dw25gmac_dma_init(void __iomem *ioaddr, > > > + struct stmmac_dma_cfg *dma_cfg) > > > +{ > > > + u32 value; > > > + u32 i; > > > + > > > + value = readl(ioaddr + XGMAC_DMA_SYSBUS_MODE); > > > + value &= ~(XGMAC_AAL | XGMAC_EAME); > > > + if (dma_cfg->aal) > > > + value |= XGMAC_AAL; > > > + if (dma_cfg->eame) > > > + value |= XGMAC_EAME; > > > + writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE); > > > + > > > + for (i = 0; i < dma_cfg->hdma_cfg->tx_vdmas; i++) { > > > + value = rd_dma_ch_ind(ioaddr, MODE_TXDESCCTRL, i); > > > + value &= ~XXVGMAC_TXDCSZ; > > > + value |= FIELD_PREP(XXVGMAC_TXDCSZ, > > > + XXVGMAC_TXDCSZ_256BYTES); > > > + value &= ~XXVGMAC_TDPS; > > > + value |= FIELD_PREP(XXVGMAC_TDPS, XXVGMAC_TDPS_HALF); > > > + wr_dma_ch_ind(ioaddr, MODE_TXDESCCTRL, i, value); > > > + } > > > + > > > + for (i = 0; i < dma_cfg->hdma_cfg->rx_vdmas; i++) { > > > + value = rd_dma_ch_ind(ioaddr, MODE_RXDESCCTRL, i); > > > + value &= ~XXVGMAC_RXDCSZ; > > > + value |= FIELD_PREP(XXVGMAC_RXDCSZ, > > > + XXVGMAC_RXDCSZ_256BYTES); > > > + value &= ~XXVGMAC_RDPS; > > > + value |= FIELD_PREP(XXVGMAC_TDPS, XXVGMAC_RDPS_HALF); > > > + wr_dma_ch_ind(ioaddr, MODE_RXDESCCTRL, i, value); > > > + } > > > + > > > > > + for (i = 0; i < dma_cfg->hdma_cfg->tx_pdmas; i++) { > > > + value = rd_dma_ch_ind(ioaddr, MODE_TXEXTCFG, i); > > > + value &= ~(XXVGMAC_TXPBL | XXVGMAC_TPBLX8_MODE); > > > + if (dma_cfg->pblx8) > > > + value |= XXVGMAC_TPBLX8_MODE; > > > + value |= FIELD_PREP(XXVGMAC_TXPBL, dma_cfg->pbl); > > > + wr_dma_ch_ind(ioaddr, MODE_TXEXTCFG, i, value); > > > + xgmac4_tp2tc_map(ioaddr, i, dma_cfg->hdma_cfg->tpdma_tc[i]); > > > + } > > > + > > > + for (i = 0; i < dma_cfg->hdma_cfg->rx_pdmas; i++) { > > > + value = rd_dma_ch_ind(ioaddr, MODE_RXEXTCFG, i); > > > + value &= ~(XXVGMAC_RXPBL | XXVGMAC_RPBLX8_MODE); > > > + if (dma_cfg->pblx8) > > > + value |= XXVGMAC_RPBLX8_MODE; > > > + value |= FIELD_PREP(XXVGMAC_RXPBL, dma_cfg->pbl); > > > + wr_dma_ch_ind(ioaddr, MODE_RXEXTCFG, i, value); > > > + xgmac4_rp2tc_map(ioaddr, i, dma_cfg->hdma_cfg->rpdma_tc[i]); > > > > What if tx_pdmas doesn't match plat_stmmacenet_data::tx_queues_to_use > > and rx_pdmas doesn't match to plat_stmmacenet_data::rx_queues_to_use? > > > > If they don't then you'll get out of the initialized tpdma_tc/rpdma_tc > > fields and these channels will be pre-initialized with the zero TC. Is > > that what expected? I doubt so. > > > As mentioned in the previous response the remaining resources are unused > and no traffic is mapped to those resources. > > > > + } > > > +} > > > + > > > > > +void dw25gmac_dma_init_tx_chan(struct stmmac_priv *priv, > > > + void __iomem *ioaddr, > > > + struct stmmac_dma_cfg *dma_cfg, > > > + dma_addr_t dma_addr, u32 chan) > > > +{ > > > + u32 value; > > > + > > > > > + value = readl(ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); > > > + value &= ~XXVGMAC_TVDMA2TCMP; > > > + value |= FIELD_PREP(XXVGMAC_TVDMA2TCMP, > > > + dma_cfg->hdma_cfg->tvdma_tc[chan]); > > > + writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); > > > > Please note this will have only first > > plat_stmmacenet_data::{tx_queues_to_use,rx_queues_to_use} VDMA > > channels initialized. Don't you have much more than just 4 channels? > > > Yes, there are 32 VDMA channels on this device. In our application the > additional channels are partitioned for use with SRIOV virtual functions. > Similar to PDMA comment above, the additional VDMAs are not enabled, > and left in default state. > My thinking is, when another 25gmac device comes along that requires a > different mapping we may need to add the ability to set the mapping in > glue driver. > We can support this by adding a check in dw25gmac_setup() > @@ -1708,8 +1708,10 @@ int dw25gmac_setup(struct stmmac_priv *priv) > mac->mii.clk_csr_shift = 19; > mac->mii.clk_csr_mask = GENMASK(21, 19); > > - /* Allocate and initialize hdma mapping */ > - return dw25gmac_hdma_cfg_init(priv); > + /* Allocate and initialize hdma mapping, if not done by glue driver. */ > + if (!priv->plat->dma_cfg->hdma_cfg) > + return dw25gmac_hdma_cfg_init(priv); > + return 0; > } So the use-case is actually hypothetical. Then I don't see a point in adding the stmmac_hdma_cfg structure. See my last comment for details. > > > > + > > > + writel(upper_32_bits(dma_addr), > > > + ioaddr + XGMAC_DMA_CH_TxDESC_HADDR(chan)); > > > + writel(lower_32_bits(dma_addr), > > > + ioaddr + XGMAC_DMA_CH_TxDESC_LADDR(chan)); > > > +} > > > + > > > +void dw25gmac_dma_init_rx_chan(struct stmmac_priv *priv, > > > + void __iomem *ioaddr, > > > + struct stmmac_dma_cfg *dma_cfg, > > > + dma_addr_t dma_addr, u32 chan) > > > +{ > > > + u32 value; > > > + > > > > > + value = readl(ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); > > > + value &= ~XXVGMAC_RVDMA2TCMP; > > > + value |= FIELD_PREP(XXVGMAC_RVDMA2TCMP, > > > + dma_cfg->hdma_cfg->rvdma_tc[chan]); > > > + writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); > > > > The same question. > > > > > + > > > + writel(upper_32_bits(dma_addr), > > > + ioaddr + XGMAC_DMA_CH_RxDESC_HADDR(chan)); > > > + writel(lower_32_bits(dma_addr), > > > + ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan)); > > > +} > > > > These methods are called for each > > plat_stmmacenet_data::{tx_queues_to_use,rx_queues_to_use} > > DMA-channel/Queue. The static mapping means you'll have each > > PDMA/Queue assigned a static traffic class ID corresponding to the > > channel ID. Meanwhile the VDMA channels are supposed to be initialized > > with the TC ID corresponding to the matching PDMA ID. > > > > The TC ID in this case is passed as the DMA/Queue channel ID. Then the > > Tx/Rx DMA-channels init methods can be converted to: > > > > dw25gmac_dma_init_Xx_chan(chan) > > { > > /* Map each chan-th VDMA to the single chan PDMA by assigning > > * the static TC ID. > > */ > > for (i = chan; i < Xx_vdmas; i += (Xx_vdmas / Xx_queues_to_use)) { > > /* Initialize VDMA channels */ > > XXVGMAC_TVDMA2TCMP = chan; > > } > > > > /* Assign the static TC ID to the specified PDMA channel */ > > xgmac4_rp2tc_map(chan, chan) > > } > > > > , where X={t,r}. > > > > Thus you can redistribute the loops implemented in dw25gmac_dma_init() > > to the respective Tx/Rx DMA-channel init methods. > > > > Am I missing something? > I think your visualization of HDMA may be going beyond the application > I understand. > We are allocating a VDMA for each of the TX/RX channels. The use of > additional VDMAs > depends on how the device is partitioned for virtualization. > In the non-SRIOV case the remaining VDMAs will remain unused. > Please let me know if I missed your question. So you say that you need a 1:1 mapping between First-VDMAs:PDMAs/Queues, and there are only tx_queues_to_use/rx_queues_to_use pairs utilized. Right? If so I don't really see a need in implementing the overcomplicated solution you suggest, especially seeing the core driver already supports an infrastructure for the DMA-Queue managing: 1. Rx path: dwxgmac2_map_mtl_to_dma() - set VDMA-Rx-Queue TC dwxgmac2_rx_queue_enable() 2. Tx path: dwxgmac2_dma_tx_mode() - set VDMA-Tx-Queue TC In the first case the mapping can be changed via the MTL DT-nodes. In the second case the mapping is one-on-one static. Thus you'll be able to keep the dw25gmac_dma_init_tx_chan()/dw25gmac_dma_init_rx_chan() method simple initializing the PBL/Descriptor/etc-related stuff only, as it's done for the DW QoS Eth and XGMAC/XLGMAC IP-cores. You won't need to introduce a new structure stmmac_hdma_cfg, especially either seeing it is anyway redundant for your use-case. BTW could you clarify: 1. is the TCes specified for the VDMA/PDMA-queue mapping the same as the TCs assigned to the Tx-Queues in the MTL_TxQ0_Operation_Mode register? If they aren't, then what is the relation between them? 2. Similarly, if there is a TC-based Rx VDMA/Queue mapping, then what's the function of the MTL_RxQ_DMA_MAP* registers? -Serge(y) > > > > -Serge() > > > > > [...]
Hi Jitendra On Fri, Oct 04, 2024 at 09:05:36AM GMT, Jitendra Vegiraju wrote: > Hi Serge, > > On Mon, Sep 16, 2024 at 4:32 PM Jitendra Vegiraju > <jitendra.vegiraju@broadcom.com> wrote: > > > ... > > When you get a chance, I would like to get your input on the approach we need > to take to incrementally add dw25gmac support. > > In the last conversation there were some open questions around the case of > initializing unused VDMA channels and related combination scenarios. > > The hdma mapping provides flexibility for virtualization. However, our > SoC device cannot use all VDMAs with one PCI function. The VDMAs are > partitioned for SRIOV use in the firmware. This SoC defaults to 8 functions > with 4 VDMA channels each. The initial effort is to support one PCI physical > function with 4 VDMA channels. > Also, currently the stmmac driver has inferred one-to-one relation between > netif channels and physical DMAs. It would be a complex change to support > each VDMA as its own netif channel and mapping fewer physical DMAs. > Hence, for initial submission one-to-one mapping is assumed. > > As you mentioned, a static one-to-one mapping of VDMA-TC-PDMA doesn't > require the additional complexity of managing these mappings as proposed > in the current patch series with *struct stmmac_hdma_cfg*. > > To introduce dw25gmac incrementally, I am thinking of two approaches, > 1. Take the current patch series forward using *struct stmmac_hdma_cfg*, > keeping the unused VDMAs in default state. We need to fix the > initialization > loops to only initialize the VDMA and PDMAs being used. > 2. Simplify the initial patch by removing *struct hdma_cfg* from the patch > series and still use static VDMA-TC-PDMA mapping. > Please share your thoughts. > If it helps, I can send patch series with option 2 above after > addressing all other > review comments. IMO approach 2 seems more preferable. Please find my comments to your previous email in this thread: https://lore.kernel.org/netdev/sn5epdl4jdwj4t6mo55w4poz6vkdcuzceezsmpb7447hmaj2ot@gmlxst7gdcix/ > > Appreciate your guidance! Always welcome. Thank you for submitting the patches and your patience to proceed with the review process. -Serge(y) > -Jitendra
Hi Serge, Thanks for the feedback. On Thu, Oct 10, 2024 at 4:55 PM Serge Semin <fancer.lancer@gmail.com> wrote: > > On Mon, Sep 16, 2024 at 04:32:33PM GMT, Jitendra Vegiraju wrote: > > Hi Serge, > > > > On Tue, Sep 10, 2024 at 12:25 PM Serge Semin <fancer.lancer@gmail.com> wrote: > > > > > > > +static u32 decode_vdma_count(u32 regval) > > > > +{ > > > > + /* compressed encoding for vdma count > > > > + * regval: VDMA count > > > > + * 0-15 : 1 - 16 > > > > + * 16-19 : 20, 24, 28, 32 > > > > + * 20-23 : 40, 48, 56, 64 > > > > + * 24-27 : 80, 96, 112, 128 > > > > + */ > > > > + if (regval < 16) > > > > + return regval + 1; > > > > + return (4 << ((regval - 16) / 4)) * ((regval % 4) + 5); > > > > > > The shortest code isn't always the best one. This one gives me a > > > headache in trying to decipher whether it really matches to what is > > > described in the comment. What about just: > > > > > > if (regval < 16) /* Direct mapping */ > > > return regval + 1; > > > else if (regval < 20) /* 20, 24, 28, 32 */ > > > return 20 + (regval - 16) * 4; > > > else if (regval < 24) /* 40, 48, 56, 64 */ > > > return 40 + (regval - 20) * 8; > > > else if (regval < 28) /* 80, 96, 112, 128 */ > > > return 80 + (regval - 24) * 16; > > > > > > ? > > Couldn't agree more :) > > Thanks, I will replace it with your code, which is definitely more readable. > > > > > > > > > +} > > > > + > > > > +static void dw25gmac_read_hdma_limits(void __iomem *ioaddr, > > > > + struct stmmac_hdma_cfg *hdma) > > > > +{ > > > > + u32 hw_cap; > > > > + > > > > + /* Get VDMA/PDMA counts from HW */ > > > > + hw_cap = readl(ioaddr + XGMAC_HW_FEATURE2); > > > > > > > > > > + hdma->tx_vdmas = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_TXCNT, > > > > + hw_cap)); > > > > + hdma->rx_vdmas = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_RXCNT, > > > > + hw_cap)); > > > > + hdma->tx_pdmas = FIELD_GET(XGMAC_HWFEAT_TXQCNT, hw_cap) + 1; > > > > + hdma->rx_pdmas = FIELD_GET(XGMAC_HWFEAT_RXQCNT, hw_cap) + 1; > > > > > > Hmm, these are the Tx/Rx DMA-channels and Tx/Rx MTL-queues count > > > fields. Can't you just use the > > > dma_features::{number_tx_channel,number_tx_queues} and > > > dma_features::{number_rx_channel,number_rx_queues} fields to store the > > > retrieved data? > > > > > > Moreover why not to add the code above to the dwxgmac2_get_hw_feature() method? > > > > > Thanks, I missed the reuse of existing fields. > > > However, since the VDMA count has a slightly bigger bitmask, we need to extract > > VDMA channel count as per DW25GMAC spec. > > Instead of duplicating dwxgmac2_get_hw_feature(), should we add wrapper for > > dw25gmac, something like the following? > > Yeah, and there is the encoded fields value. Your suggestion sounds > reasonable. > > > dw25gmac_get_hw_feature(ioaddr, dma_cap) > > { > > u32 hw_cap; > > > int rc; > > s/rc/ret > > + newline > > > rc = dwxgmac2_get_hw_feature(ioaddr, dma_cap); > > newline > > > /* Get VDMA counts from HW */ > > hw_cap = readl(ioaddr + XGMAC_HW_FEATURE2); > > dma_cap->num_tx_channels = > > decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_TXCNT, > > hw_cap)); > > dma_cap->num_rx_channels = > > decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_RXCNT, > > hw_cap)); > > newline > Ack > > return rc; > > } > > > > > > +} > > > > + > > > > +int dw25gmac_hdma_cfg_init(struct stmmac_priv *priv) > > > > +{ > > > > + struct plat_stmmacenet_data *plat = priv->plat; > > > > + struct device *dev = priv->device; > > > > + struct stmmac_hdma_cfg *hdma; > > > > + int i; > > > > + > > > > + hdma = devm_kzalloc(dev, > > > > + sizeof(*plat->dma_cfg->hdma_cfg), > > > > + GFP_KERNEL); > > > > + if (!hdma) > > > > + return -ENOMEM; > > > > + > > > > + dw25gmac_read_hdma_limits(priv->ioaddr, hdma); > > > > + > > > > + hdma->tvdma_tc = devm_kzalloc(dev, > > > > + sizeof(*hdma->tvdma_tc) * hdma->tx_vdmas, > > > > + GFP_KERNEL); > > > > + if (!hdma->tvdma_tc) > > > > + return -ENOMEM; > > > > + > > > > + hdma->rvdma_tc = devm_kzalloc(dev, > > > > + sizeof(*hdma->rvdma_tc) * hdma->rx_vdmas, > > > > + GFP_KERNEL); > > > > + if (!hdma->rvdma_tc) > > > > + return -ENOMEM; > > > > + > > > > + hdma->tpdma_tc = devm_kzalloc(dev, > > > > + sizeof(*hdma->tpdma_tc) * hdma->tx_pdmas, > > > > + GFP_KERNEL); > > > > + if (!hdma->tpdma_tc) > > > > + return -ENOMEM; > > > > + > > > > + hdma->rpdma_tc = devm_kzalloc(dev, > > > > + sizeof(*hdma->rpdma_tc) * hdma->rx_pdmas, > > > > + GFP_KERNEL); > > > > + if (!hdma->rpdma_tc) > > > > + return -ENOMEM; > > > > + > > > > > > > + /* Initialize one-to-one mapping for each of the used queues */ > > > > + for (i = 0; i < plat->tx_queues_to_use; i++) { > > > > + hdma->tvdma_tc[i] = i; > > > > + hdma->tpdma_tc[i] = i; > > > > + } > > > > + for (i = 0; i < plat->rx_queues_to_use; i++) { > > > > + hdma->rvdma_tc[i] = i; > > > > + hdma->rpdma_tc[i] = i; > > > > + } > > > > > > So the Traffic Class ID is initialized for the > > > tx_queues_to_use/rx_queues_to_use number of channels only, right? What > > > about the Virtual and Physical DMA-channels with numbers greater than > > > these values? > > > > > > You have brought up a question that applies to remaining comments in > > this file as well. > > How the VDMA/PDMA mapping is used depends on the device/glue driver. > > For example in > > our SoC the remaining VDMAs are meant to be used with SRIOV virtual > > functions and not > > all of them are available for physical function. > > Since additional VDMAs/PDMAs remain unused in hardware I let them stay at their > > default values. No traffic is expected to be mapped to unused V/PDMAs. > > I couldn't think of a reason for it to be an issue from a driver perspective. > > Please let me know, if I am missing something or we need to address a > > use case with bigger scope. > > The responses for following comments also depend on what approach we take here. > > If they are unused, then I suggest to follow the KISS principle. See > my last comment for details. > > > > > > > + plat->dma_cfg->hdma_cfg = hdma; > > > > + > > > > + return 0; > > > > +} > > > > + > > > > + > > > > +void dw25gmac_dma_init(void __iomem *ioaddr, > > > > + struct stmmac_dma_cfg *dma_cfg) > > > > +{ > > > > + u32 value; > > > > + u32 i; > > > > + > > > > + value = readl(ioaddr + XGMAC_DMA_SYSBUS_MODE); > > > > + value &= ~(XGMAC_AAL | XGMAC_EAME); > > > > + if (dma_cfg->aal) > > > > + value |= XGMAC_AAL; > > > > + if (dma_cfg->eame) > > > > + value |= XGMAC_EAME; > > > > + writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE); > > > > + > > > > + for (i = 0; i < dma_cfg->hdma_cfg->tx_vdmas; i++) { > > > > + value = rd_dma_ch_ind(ioaddr, MODE_TXDESCCTRL, i); > > > > + value &= ~XXVGMAC_TXDCSZ; > > > > + value |= FIELD_PREP(XXVGMAC_TXDCSZ, > > > > + XXVGMAC_TXDCSZ_256BYTES); > > > > + value &= ~XXVGMAC_TDPS; > > > > + value |= FIELD_PREP(XXVGMAC_TDPS, XXVGMAC_TDPS_HALF); > > > > + wr_dma_ch_ind(ioaddr, MODE_TXDESCCTRL, i, value); > > > > + } > > > > + > > > > + for (i = 0; i < dma_cfg->hdma_cfg->rx_vdmas; i++) { > > > > + value = rd_dma_ch_ind(ioaddr, MODE_RXDESCCTRL, i); > > > > + value &= ~XXVGMAC_RXDCSZ; > > > > + value |= FIELD_PREP(XXVGMAC_RXDCSZ, > > > > + XXVGMAC_RXDCSZ_256BYTES); > > > > + value &= ~XXVGMAC_RDPS; > > > > + value |= FIELD_PREP(XXVGMAC_TDPS, XXVGMAC_RDPS_HALF); > > > > + wr_dma_ch_ind(ioaddr, MODE_RXDESCCTRL, i, value); > > > > + } > > > > + > > > > > > > + for (i = 0; i < dma_cfg->hdma_cfg->tx_pdmas; i++) { > > > > + value = rd_dma_ch_ind(ioaddr, MODE_TXEXTCFG, i); > > > > + value &= ~(XXVGMAC_TXPBL | XXVGMAC_TPBLX8_MODE); > > > > + if (dma_cfg->pblx8) > > > > + value |= XXVGMAC_TPBLX8_MODE; > > > > + value |= FIELD_PREP(XXVGMAC_TXPBL, dma_cfg->pbl); > > > > + wr_dma_ch_ind(ioaddr, MODE_TXEXTCFG, i, value); > > > > + xgmac4_tp2tc_map(ioaddr, i, dma_cfg->hdma_cfg->tpdma_tc[i]); > > > > + } > > > > + > > > > + for (i = 0; i < dma_cfg->hdma_cfg->rx_pdmas; i++) { > > > > + value = rd_dma_ch_ind(ioaddr, MODE_RXEXTCFG, i); > > > > + value &= ~(XXVGMAC_RXPBL | XXVGMAC_RPBLX8_MODE); > > > > + if (dma_cfg->pblx8) > > > > + value |= XXVGMAC_RPBLX8_MODE; > > > > + value |= FIELD_PREP(XXVGMAC_RXPBL, dma_cfg->pbl); > > > > + wr_dma_ch_ind(ioaddr, MODE_RXEXTCFG, i, value); > > > > + xgmac4_rp2tc_map(ioaddr, i, dma_cfg->hdma_cfg->rpdma_tc[i]); > > > > > > What if tx_pdmas doesn't match plat_stmmacenet_data::tx_queues_to_use > > > and rx_pdmas doesn't match to plat_stmmacenet_data::rx_queues_to_use? > > > > > > If they don't then you'll get out of the initialized tpdma_tc/rpdma_tc > > > fields and these channels will be pre-initialized with the zero TC. Is > > > that what expected? I doubt so. > > > > > As mentioned in the previous response the remaining resources are unused > > and no traffic is mapped to those resources. > > > > > > + } > > > > +} > > > > + > > > > > > > +void dw25gmac_dma_init_tx_chan(struct stmmac_priv *priv, > > > > + void __iomem *ioaddr, > > > > + struct stmmac_dma_cfg *dma_cfg, > > > > + dma_addr_t dma_addr, u32 chan) > > > > +{ > > > > + u32 value; > > > > + > > > > > > > + value = readl(ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); > > > > + value &= ~XXVGMAC_TVDMA2TCMP; > > > > + value |= FIELD_PREP(XXVGMAC_TVDMA2TCMP, > > > > + dma_cfg->hdma_cfg->tvdma_tc[chan]); > > > > + writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); > > > > > > Please note this will have only first > > > plat_stmmacenet_data::{tx_queues_to_use,rx_queues_to_use} VDMA > > > channels initialized. Don't you have much more than just 4 channels? > > > > > Yes, there are 32 VDMA channels on this device. In our application the > > additional channels are partitioned for use with SRIOV virtual functions. > > Similar to PDMA comment above, the additional VDMAs are not enabled, > > and left in default state. > > My thinking is, when another 25gmac device comes along that requires a > > different mapping we may need to add the ability to set the mapping in > > glue driver. > > We can support this by adding a check in dw25gmac_setup() > > @@ -1708,8 +1708,10 @@ int dw25gmac_setup(struct stmmac_priv *priv) > > mac->mii.clk_csr_shift = 19; > > mac->mii.clk_csr_mask = GENMASK(21, 19); > > > > - /* Allocate and initialize hdma mapping */ > > - return dw25gmac_hdma_cfg_init(priv); > > + /* Allocate and initialize hdma mapping, if not done by glue driver. */ > > + if (!priv->plat->dma_cfg->hdma_cfg) > > + return dw25gmac_hdma_cfg_init(priv); > > + return 0; > > } > > So the use-case is actually hypothetical. Then I don't see a point in > adding the stmmac_hdma_cfg structure. See my last comment for details. > Yes, It's better to not over-complicate the patch for the hypothetical case. I will remove the new struct in next update. > > > > > > + > > > > + writel(upper_32_bits(dma_addr), > > > > + ioaddr + XGMAC_DMA_CH_TxDESC_HADDR(chan)); > > > > + writel(lower_32_bits(dma_addr), > > > > + ioaddr + XGMAC_DMA_CH_TxDESC_LADDR(chan)); > > > > +} > > > > + > > > > +void dw25gmac_dma_init_rx_chan(struct stmmac_priv *priv, > > > > + void __iomem *ioaddr, > > > > + struct stmmac_dma_cfg *dma_cfg, > > > > + dma_addr_t dma_addr, u32 chan) > > > > +{ > > > > + u32 value; > > > > + > > > > > > > + value = readl(ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); > > > > + value &= ~XXVGMAC_RVDMA2TCMP; > > > > + value |= FIELD_PREP(XXVGMAC_RVDMA2TCMP, > > > > + dma_cfg->hdma_cfg->rvdma_tc[chan]); > > > > + writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); > > > > > > The same question. > > > > > > > + > > > > + writel(upper_32_bits(dma_addr), > > > > + ioaddr + XGMAC_DMA_CH_RxDESC_HADDR(chan)); > > > > + writel(lower_32_bits(dma_addr), > > > > + ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan)); > > > > +} > > > > > > These methods are called for each > > > plat_stmmacenet_data::{tx_queues_to_use,rx_queues_to_use} > > > DMA-channel/Queue. The static mapping means you'll have each > > > PDMA/Queue assigned a static traffic class ID corresponding to the > > > channel ID. Meanwhile the VDMA channels are supposed to be initialized > > > with the TC ID corresponding to the matching PDMA ID. > > > > > > The TC ID in this case is passed as the DMA/Queue channel ID. Then the > > > Tx/Rx DMA-channels init methods can be converted to: > > > > > > dw25gmac_dma_init_Xx_chan(chan) > > > { > > > /* Map each chan-th VDMA to the single chan PDMA by assigning > > > * the static TC ID. > > > */ > > > for (i = chan; i < Xx_vdmas; i += (Xx_vdmas / Xx_queues_to_use)) { > > > /* Initialize VDMA channels */ > > > XXVGMAC_TVDMA2TCMP = chan; > > > } > > > > > > /* Assign the static TC ID to the specified PDMA channel */ > > > xgmac4_rp2tc_map(chan, chan) > > > } > > > > > > , where X={t,r}. > > > > > > Thus you can redistribute the loops implemented in dw25gmac_dma_init() > > > to the respective Tx/Rx DMA-channel init methods. > > > > > > Am I missing something? > > I think your visualization of HDMA may be going beyond the application > > I understand. > > We are allocating a VDMA for each of the TX/RX channels. The use of > > additional VDMAs > > depends on how the device is partitioned for virtualization. > > In the non-SRIOV case the remaining VDMAs will remain unused. > > Please let me know if I missed your question. > > So you say that you need a 1:1 mapping between > First-VDMAs:PDMAs/Queues, and there are only > tx_queues_to_use/rx_queues_to_use pairs utilized. Right? If so I don't > really see a need in implementing the overcomplicated solution you > suggest, especially seeing the core driver already supports an > infrastructure for the DMA-Queue managing: > 1. Rx path: dwxgmac2_map_mtl_to_dma() - set VDMA-Rx-Queue TC > dwxgmac2_rx_queue_enable() > 2. Tx path: dwxgmac2_dma_tx_mode() - set VDMA-Tx-Queue TC > > In the first case the mapping can be changed via the MTL DT-nodes. In > the second case the mapping is one-on-one static. Thus you'll be able > to keep the dw25gmac_dma_init_tx_chan()/dw25gmac_dma_init_rx_chan() > method simple initializing the PBL/Descriptor/etc-related stuff only, > as it's done for the DW QoS Eth and XGMAC/XLGMAC IP-cores. You won't > need to introduce a new structure stmmac_hdma_cfg, especially either > seeing it is anyway redundant for your use-case. > > BTW could you clarify: > > 1. is the TCes specified for the VDMA/PDMA-queue mapping the same as > the TCs assigned to the Tx-Queues in the MTL_TxQ0_Operation_Mode > register? If they aren't, then what is the relation between them? > In register documentation for HDMA XGMAC IP, the Q2TCMAP field in MTL_TxQ0_Operation_Mode is marked as *reserved" and is ignored. The VDMA->TC->PDMA mapping is used for DMA scheduling. This static mapping can be done in dw25gmac_dma_init_tx_chan() and dw25gmac_dma_init_rx_chan(). > 2. Similarly, if there is a TC-based Rx VDMA/Queue mapping, then > what's the function of the MTL_RxQ_DMA_MAP* registers? In the RX direction MTL_RxQ_DMA_MAP* decides the VDMA channel for a given RXQ. The TC for VDMA channel is decided by VDMA/TC mapping. Then TC to PDMA mapping is used to pick PDMA for actual DMA operation. > > -Serge(y) > > > > > > > -Serge() > > > > > > > [...]
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index c2f0e91f6bf8..967e8a9aa432 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -6,7 +6,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \ mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o dwmac4_descs.o \ dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o hwif.o \ stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o \ - stmmac_xdp.o stmmac_est.o \ + stmmac_xdp.o stmmac_est.o dw25gmac.o \ $(stmmac-y) stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 684489156dce..9a747b89ba51 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -38,9 +38,11 @@ #define DWXGMAC_CORE_2_10 0x21 #define DWXGMAC_CORE_2_20 0x22 #define DWXLGMAC_CORE_2_00 0x20 +#define DW25GMAC_CORE_4_00 0x40 /* Device ID */ #define DWXGMAC_ID 0x76 +#define DW25GMAC_ID 0x55 #define DWXLGMAC_ID 0x27 #define STMMAC_CHAN0 0 /* Always supported and default for all chips */ @@ -563,6 +565,7 @@ struct mac_link { u32 speed2500; u32 speed5000; u32 speed10000; + u32 speed25000; } xgmii; struct { u32 speed25000; @@ -621,6 +624,7 @@ int dwmac100_setup(struct stmmac_priv *priv); int dwmac1000_setup(struct stmmac_priv *priv); int dwmac4_setup(struct stmmac_priv *priv); int dwxgmac2_setup(struct stmmac_priv *priv); +int dw25gmac_setup(struct stmmac_priv *priv); int dwxlgmac2_setup(struct stmmac_priv *priv); void stmmac_set_mac_addr(void __iomem *ioaddr, const u8 addr[6], diff --git a/drivers/net/ethernet/stmicro/stmmac/dw25gmac.c b/drivers/net/ethernet/stmicro/stmmac/dw25gmac.c new file mode 100644 index 000000000000..adb33700ffbb --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dw25gmac.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024 Broadcom Corporation + */ +#include "stmmac.h" +#include "dwxgmac2.h" +#include "dw25gmac.h" + +static u32 decode_vdma_count(u32 regval) +{ + /* compressed encoding for vdma count + * regval: VDMA count + * 0-15 : 1 - 16 + * 16-19 : 20, 24, 28, 32 + * 20-23 : 40, 48, 56, 64 + * 24-27 : 80, 96, 112, 128 + */ + if (regval < 16) + return regval + 1; + return (4 << ((regval - 16) / 4)) * ((regval % 4) + 5); +} + +static void dw25gmac_read_hdma_limits(void __iomem *ioaddr, + struct stmmac_hdma_cfg *hdma) +{ + u32 hw_cap; + + /* Get VDMA/PDMA counts from HW */ + hw_cap = readl(ioaddr + XGMAC_HW_FEATURE2); + hdma->tx_vdmas = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_TXCNT, + hw_cap)); + hdma->rx_vdmas = decode_vdma_count(FIELD_GET(XXVGMAC_HWFEAT_VDMA_RXCNT, + hw_cap)); + hdma->tx_pdmas = FIELD_GET(XGMAC_HWFEAT_TXQCNT, hw_cap) + 1; + hdma->rx_pdmas = FIELD_GET(XGMAC_HWFEAT_RXQCNT, hw_cap) + 1; +} + +int dw25gmac_hdma_cfg_init(struct stmmac_priv *priv) +{ + struct plat_stmmacenet_data *plat = priv->plat; + struct device *dev = priv->device; + struct stmmac_hdma_cfg *hdma; + int i; + + hdma = devm_kzalloc(dev, + sizeof(*plat->dma_cfg->hdma_cfg), + GFP_KERNEL); + if (!hdma) + return -ENOMEM; + + dw25gmac_read_hdma_limits(priv->ioaddr, hdma); + + hdma->tvdma_tc = devm_kzalloc(dev, + sizeof(*hdma->tvdma_tc) * hdma->tx_vdmas, + GFP_KERNEL); + if (!hdma->tvdma_tc) + return -ENOMEM; + + hdma->rvdma_tc = devm_kzalloc(dev, + sizeof(*hdma->rvdma_tc) * hdma->rx_vdmas, + GFP_KERNEL); + if (!hdma->rvdma_tc) + return -ENOMEM; + + hdma->tpdma_tc = devm_kzalloc(dev, + sizeof(*hdma->tpdma_tc) * hdma->tx_pdmas, + GFP_KERNEL); + if (!hdma->tpdma_tc) + return -ENOMEM; + + hdma->rpdma_tc = devm_kzalloc(dev, + sizeof(*hdma->rpdma_tc) * hdma->rx_pdmas, + GFP_KERNEL); + if (!hdma->rpdma_tc) + return -ENOMEM; + + /* Initialize one-to-one mapping for each of the used queues */ + for (i = 0; i < plat->tx_queues_to_use; i++) { + hdma->tvdma_tc[i] = i; + hdma->tpdma_tc[i] = i; + } + for (i = 0; i < plat->rx_queues_to_use; i++) { + hdma->rvdma_tc[i] = i; + hdma->rpdma_tc[i] = i; + } + plat->dma_cfg->hdma_cfg = hdma; + + return 0; +} + +static int rd_dma_ch_ind(void __iomem *ioaddr, u8 mode, u32 channel) +{ + u32 reg_val = 0; + + reg_val |= FIELD_PREP(XXVGMAC_MODE_SELECT, mode); + reg_val |= FIELD_PREP(XXVGMAC_ADDR_OFFSET, channel); + reg_val |= XXVGMAC_CMD_TYPE | XXVGMAC_OB; + writel(reg_val, ioaddr + XXVGMAC_DMA_CH_IND_CONTROL); + return readl(ioaddr + XXVGMAC_DMA_CH_IND_DATA); +} + +static void wr_dma_ch_ind(void __iomem *ioaddr, u8 mode, u32 channel, u32 val) +{ + u32 reg_val = 0; + + writel(val, ioaddr + XXVGMAC_DMA_CH_IND_DATA); + reg_val |= FIELD_PREP(XXVGMAC_MODE_SELECT, mode); + reg_val |= FIELD_PREP(XXVGMAC_ADDR_OFFSET, channel); + reg_val |= XGMAC_OB; + writel(reg_val, ioaddr + XXVGMAC_DMA_CH_IND_CONTROL); +} + +static void xgmac4_tp2tc_map(void __iomem *ioaddr, u8 pdma_ch, u32 tc_num) +{ + u32 val = 0; + + val = rd_dma_ch_ind(ioaddr, MODE_TXEXTCFG, pdma_ch); + val &= ~XXVGMAC_TP2TCMP; + val |= FIELD_PREP(XXVGMAC_TP2TCMP, tc_num); + wr_dma_ch_ind(ioaddr, MODE_TXEXTCFG, pdma_ch, val); +} + +static void xgmac4_rp2tc_map(void __iomem *ioaddr, u8 pdma_ch, u32 tc_num) +{ + u32 val = 0; + + val = rd_dma_ch_ind(ioaddr, MODE_RXEXTCFG, pdma_ch); + val &= ~XXVGMAC_RP2TCMP; + val |= FIELD_PREP(XXVGMAC_RP2TCMP, tc_num); + wr_dma_ch_ind(ioaddr, MODE_RXEXTCFG, pdma_ch, val); +} + +void dw25gmac_dma_init(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg) +{ + u32 value; + u32 i; + + value = readl(ioaddr + XGMAC_DMA_SYSBUS_MODE); + value &= ~(XGMAC_AAL | XGMAC_EAME); + if (dma_cfg->aal) + value |= XGMAC_AAL; + if (dma_cfg->eame) + value |= XGMAC_EAME; + writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE); + + for (i = 0; i < dma_cfg->hdma_cfg->tx_vdmas; i++) { + value = rd_dma_ch_ind(ioaddr, MODE_TXDESCCTRL, i); + value &= ~XXVGMAC_TXDCSZ; + value |= FIELD_PREP(XXVGMAC_TXDCSZ, + XXVGMAC_TXDCSZ_256BYTES); + value &= ~XXVGMAC_TDPS; + value |= FIELD_PREP(XXVGMAC_TDPS, XXVGMAC_TDPS_HALF); + wr_dma_ch_ind(ioaddr, MODE_TXDESCCTRL, i, value); + } + + for (i = 0; i < dma_cfg->hdma_cfg->rx_vdmas; i++) { + value = rd_dma_ch_ind(ioaddr, MODE_RXDESCCTRL, i); + value &= ~XXVGMAC_RXDCSZ; + value |= FIELD_PREP(XXVGMAC_RXDCSZ, + XXVGMAC_RXDCSZ_256BYTES); + value &= ~XXVGMAC_RDPS; + value |= FIELD_PREP(XXVGMAC_TDPS, XXVGMAC_RDPS_HALF); + wr_dma_ch_ind(ioaddr, MODE_RXDESCCTRL, i, value); + } + + for (i = 0; i < dma_cfg->hdma_cfg->tx_pdmas; i++) { + value = rd_dma_ch_ind(ioaddr, MODE_TXEXTCFG, i); + value &= ~(XXVGMAC_TXPBL | XXVGMAC_TPBLX8_MODE); + if (dma_cfg->pblx8) + value |= XXVGMAC_TPBLX8_MODE; + value |= FIELD_PREP(XXVGMAC_TXPBL, dma_cfg->pbl); + wr_dma_ch_ind(ioaddr, MODE_TXEXTCFG, i, value); + xgmac4_tp2tc_map(ioaddr, i, dma_cfg->hdma_cfg->tpdma_tc[i]); + } + + for (i = 0; i < dma_cfg->hdma_cfg->rx_pdmas; i++) { + value = rd_dma_ch_ind(ioaddr, MODE_RXEXTCFG, i); + value &= ~(XXVGMAC_RXPBL | XXVGMAC_RPBLX8_MODE); + if (dma_cfg->pblx8) + value |= XXVGMAC_RPBLX8_MODE; + value |= FIELD_PREP(XXVGMAC_RXPBL, dma_cfg->pbl); + wr_dma_ch_ind(ioaddr, MODE_RXEXTCFG, i, value); + xgmac4_rp2tc_map(ioaddr, i, dma_cfg->hdma_cfg->rpdma_tc[i]); + } +} + +void dw25gmac_dma_init_tx_chan(struct stmmac_priv *priv, + void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, + dma_addr_t dma_addr, u32 chan) +{ + u32 value; + + value = readl(ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); + value &= ~XXVGMAC_TVDMA2TCMP; + value |= FIELD_PREP(XXVGMAC_TVDMA2TCMP, + dma_cfg->hdma_cfg->tvdma_tc[chan]); + writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); + + writel(upper_32_bits(dma_addr), + ioaddr + XGMAC_DMA_CH_TxDESC_HADDR(chan)); + writel(lower_32_bits(dma_addr), + ioaddr + XGMAC_DMA_CH_TxDESC_LADDR(chan)); +} + +void dw25gmac_dma_init_rx_chan(struct stmmac_priv *priv, + void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, + dma_addr_t dma_addr, u32 chan) +{ + u32 value; + + value = readl(ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); + value &= ~XXVGMAC_RVDMA2TCMP; + value |= FIELD_PREP(XXVGMAC_RVDMA2TCMP, + dma_cfg->hdma_cfg->rvdma_tc[chan]); + writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); + + writel(upper_32_bits(dma_addr), + ioaddr + XGMAC_DMA_CH_RxDESC_HADDR(chan)); + writel(lower_32_bits(dma_addr), + ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan)); +} diff --git a/drivers/net/ethernet/stmicro/stmmac/dw25gmac.h b/drivers/net/ethernet/stmicro/stmmac/dw25gmac.h new file mode 100644 index 000000000000..76c80a7ed025 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dw25gmac.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2024 Broadcom Corporation + * DW25GMAC definitions. + */ +#ifndef __STMMAC_DW25GMAC_H__ +#define __STMMAC_DW25GMAC_H__ + +/* Hardware features */ +#define XXVGMAC_HWFEAT_VDMA_RXCNT GENMASK(16, 12) +#define XXVGMAC_HWFEAT_VDMA_TXCNT GENMASK(22, 18) + +/* DMA Indirect Registers*/ +#define XXVGMAC_DMA_CH_IND_CONTROL 0x00003080 +#define XXVGMAC_MODE_SELECT GENMASK(27, 24) +enum dma_ch_ind_modes { + MODE_TXEXTCFG = 0x0, /* Tx Extended Config */ + MODE_RXEXTCFG = 0x1, /* Rx Extended Config */ + MODE_TXDBGSTS = 0x2, /* Tx Debug Status */ + MODE_RXDBGSTS = 0x3, /* Rx Debug Status */ + MODE_TXDESCCTRL = 0x4, /* Tx Descriptor control */ + MODE_RXDESCCTRL = 0x5, /* Rx Descriptor control */ +}; + +#define XXVGMAC_ADDR_OFFSET GENMASK(14, 8) +#define XXVGMAC_AUTO_INCR GENMASK(5, 4) +#define XXVGMAC_CMD_TYPE BIT(1) +#define XXVGMAC_OB BIT(0) +#define XXVGMAC_DMA_CH_IND_DATA 0x00003084 + +/* TX Config definitions */ +#define XXVGMAC_TXPBL GENMASK(29, 24) +#define XXVGMAC_TPBLX8_MODE BIT(19) +#define XXVGMAC_TP2TCMP GENMASK(18, 16) +#define XXVGMAC_ORRQ GENMASK(13, 8) + +/* RX Config definitions */ +#define XXVGMAC_RXPBL GENMASK(29, 24) +#define XXVGMAC_RPBLX8_MODE BIT(19) +#define XXVGMAC_RP2TCMP GENMASK(18, 16) +#define XXVGMAC_OWRQ GENMASK(13, 8) + +/* Tx Descriptor control */ +#define XXVGMAC_TXDCSZ GENMASK(2, 0) +#define XXVGMAC_TXDCSZ_0BYTES 0 +#define XXVGMAC_TXDCSZ_64BYTES 1 +#define XXVGMAC_TXDCSZ_128BYTES 2 +#define XXVGMAC_TXDCSZ_256BYTES 3 +#define XXVGMAC_TDPS GENMASK(5, 3) +#define XXVGMAC_TDPS_ZERO 0 +#define XXVGMAC_TDPS_1_8TH 1 +#define XXVGMAC_TDPS_1_4TH 2 +#define XXVGMAC_TDPS_HALF 3 +#define XXVGMAC_TDPS_3_4TH 4 + +/* Rx Descriptor control */ +#define XXVGMAC_RXDCSZ GENMASK(2, 0) +#define XXVGMAC_RXDCSZ_0BYTES 0 +#define XXVGMAC_RXDCSZ_64BYTES 1 +#define XXVGMAC_RXDCSZ_128BYTES 2 +#define XXVGMAC_RXDCSZ_256BYTES 3 +#define XXVGMAC_RDPS GENMASK(5, 3) +#define XXVGMAC_RDPS_ZERO 0 +#define XXVGMAC_RDPS_1_8TH 1 +#define XXVGMAC_RDPS_1_4TH 2 +#define XXVGMAC_RDPS_HALF 3 +#define XXVGMAC_RDPS_3_4TH 4 + +/* DWCXG_DMA_CH(#i) Registers*/ +#define XXVGMAC_DSL GENMASK(20, 18) +#define XXVGMAC_MSS GENMASK(13, 0) +#define XXVGMAC_TFSEL GENMASK(30, 29) +#define XXVGMAC_TQOS GENMASK(27, 24) +#define XXVGMAC_IPBL BIT(15) +#define XXVGMAC_TVDMA2TCMP GENMASK(6, 4) +#define XXVGMAC_RPF BIT(31) +#define XXVGMAC_RVDMA2TCMP GENMASK(30, 28) +#define XXVGMAC_RQOS GENMASK(27, 24) + +int dw25gmac_hdma_cfg_init(struct stmmac_priv *priv); + +void dw25gmac_dma_init(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg); + +void dw25gmac_dma_init_tx_chan(struct stmmac_priv *priv, + void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, + dma_addr_t dma_addr, u32 chan); +void dw25gmac_dma_init_rx_chan(struct stmmac_priv *priv, + void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, + dma_addr_t dma_addr, u32 chan); +#endif /* __STMMAC_DW25GMAC_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 6a2c7d22df1e..c9424c5a6ce5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -17,6 +17,7 @@ #define XGMAC_CONFIG_SS_OFF 29 #define XGMAC_CONFIG_SS_MASK GENMASK(31, 29) #define XGMAC_CONFIG_SS_10000 (0x0 << XGMAC_CONFIG_SS_OFF) +#define XGMAC_CONFIG_SS_25000 (0x1 << XGMAC_CONFIG_SS_OFF) #define XGMAC_CONFIG_SS_2500_GMII (0x2 << XGMAC_CONFIG_SS_OFF) #define XGMAC_CONFIG_SS_1000_GMII (0x3 << XGMAC_CONFIG_SS_OFF) #define XGMAC_CONFIG_SS_100_MII (0x4 << XGMAC_CONFIG_SS_OFF) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index cbf2dd976ab1..98b94b6e54db 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -11,6 +11,7 @@ #include "stmmac_ptp.h" #include "dwxlgmac2.h" #include "dwxgmac2.h" +#include "dw25gmac.h" static void dwxgmac2_core_init(struct mac_device_info *hw, struct net_device *dev) @@ -1669,6 +1670,48 @@ int dwxgmac2_setup(struct stmmac_priv *priv) return 0; } +int dw25gmac_setup(struct stmmac_priv *priv) +{ + struct mac_device_info *mac = priv->hw; + + dev_info(priv->device, "\tDW25GMAC\n"); + + priv->dev->priv_flags |= IFF_UNICAST_FLT; + mac->pcsr = priv->ioaddr; + mac->multicast_filter_bins = priv->plat->multicast_filter_bins; + mac->unicast_filter_entries = priv->plat->unicast_filter_entries; + mac->mcast_bits_log2 = 0; + + if (mac->multicast_filter_bins) + mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); + + mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_1000FD | MAC_2500FD | MAC_5000FD | + MAC_10000FD | MAC_25000FD; + mac->link.duplex = 0; + mac->link.speed10 = XGMAC_CONFIG_SS_10_MII; + mac->link.speed100 = XGMAC_CONFIG_SS_100_MII; + mac->link.speed1000 = XGMAC_CONFIG_SS_1000_GMII; + mac->link.speed2500 = XGMAC_CONFIG_SS_2500_GMII; + mac->link.xgmii.speed2500 = XGMAC_CONFIG_SS_2500; + mac->link.xgmii.speed5000 = XGMAC_CONFIG_SS_5000; + mac->link.xgmii.speed10000 = XGMAC_CONFIG_SS_10000; + mac->link.xgmii.speed25000 = XGMAC_CONFIG_SS_25000; + mac->link.speed_mask = XGMAC_CONFIG_SS_MASK; + + mac->mii.addr = XGMAC_MDIO_ADDR; + mac->mii.data = XGMAC_MDIO_DATA; + mac->mii.addr_shift = 16; + mac->mii.addr_mask = GENMASK(20, 16); + mac->mii.reg_shift = 0; + mac->mii.reg_mask = GENMASK(15, 0); + mac->mii.clk_csr_shift = 19; + mac->mii.clk_csr_mask = GENMASK(21, 19); + + /* Allocate and initialize hdma mapping */ + return dw25gmac_hdma_cfg_init(priv); +} + int dwxlgmac2_setup(struct stmmac_priv *priv) { struct mac_device_info *mac = priv->hw; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 7840bc403788..02abfdd40270 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -7,6 +7,7 @@ #include <linux/iopoll.h> #include "stmmac.h" #include "dwxgmac2.h" +#include "dw25gmac.h" static int dwxgmac2_dma_reset(void __iomem *ioaddr) { @@ -641,3 +642,33 @@ const struct stmmac_dma_ops dwxgmac210_dma_ops = { .enable_sph = dwxgmac2_enable_sph, .enable_tbs = dwxgmac2_enable_tbs, }; + +const struct stmmac_dma_ops dw25gmac400_dma_ops = { + .reset = dwxgmac2_dma_reset, + .init = dw25gmac_dma_init, + .init_chan = dwxgmac2_dma_init_chan, + .init_rx_chan = dw25gmac_dma_init_rx_chan, + .init_tx_chan = dw25gmac_dma_init_tx_chan, + .axi = dwxgmac2_dma_axi, + .dump_regs = dwxgmac2_dma_dump_regs, + .dma_rx_mode = dwxgmac2_dma_rx_mode, + .dma_tx_mode = dwxgmac2_dma_tx_mode, + .enable_dma_irq = dwxgmac2_enable_dma_irq, + .disable_dma_irq = dwxgmac2_disable_dma_irq, + .start_tx = dwxgmac2_dma_start_tx, + .stop_tx = dwxgmac2_dma_stop_tx, + .start_rx = dwxgmac2_dma_start_rx, + .stop_rx = dwxgmac2_dma_stop_rx, + .dma_interrupt = dwxgmac2_dma_interrupt, + .get_hw_feature = dwxgmac2_get_hw_feature, + .rx_watchdog = dwxgmac2_rx_watchdog, + .set_rx_ring_len = dwxgmac2_set_rx_ring_len, + .set_tx_ring_len = dwxgmac2_set_tx_ring_len, + .set_rx_tail_ptr = dwxgmac2_set_rx_tail_ptr, + .set_tx_tail_ptr = dwxgmac2_set_tx_tail_ptr, + .enable_tso = dwxgmac2_enable_tso, + .qmode = dwxgmac2_qmode, + .set_bfsize = dwxgmac2_set_bfsize, + .enable_sph = dwxgmac2_enable_sph, + .enable_tbs = dwxgmac2_enable_tbs, +}; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 7e90f34b8c88..9764eadf72c2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -682,6 +682,7 @@ extern const struct stmmac_desc_ops dwxgmac210_desc_ops; extern const struct stmmac_mmc_ops dwmac_mmc_ops; extern const struct stmmac_mmc_ops dwxgmac_mmc_ops; extern const struct stmmac_est_ops dwmac510_est_ops; +extern const struct stmmac_dma_ops dw25gmac400_dma_ops; #define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ #define GMAC4_VERSION 0x00000110 /* GMAC4+ CORE Version */