diff mbox series

PCI: dw-rockchip: Configure max payload size on host init

Message ID 20250416151926.140202-1-18255117159@163.com (mailing list archive)
State New
Headers show
Series PCI: dw-rockchip: Configure max payload size on host init | expand

Commit Message

Hans Zhang April 16, 2025, 3:19 p.m. UTC
The RK3588's PCIe controller defaults to a 128-byte max payload size,
but its hardware capability actually supports 256 bytes. This results
in suboptimal performance with devices that support larger payloads.

Signed-off-by: Hans Zhang <18255117159@163.com>
---
 drivers/pci/controller/dwc/pcie-dw-rockchip.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)


base-commit: a24588245776dafc227243a01bfbeb8a59bafba9

Comments

Bjorn Helgaas April 16, 2025, 8:40 p.m. UTC | #1
On Wed, Apr 16, 2025 at 11:19:26PM +0800, Hans Zhang wrote:
> The RK3588's PCIe controller defaults to a 128-byte max payload size,
> but its hardware capability actually supports 256 bytes. This results
> in suboptimal performance with devices that support larger payloads.
> 
> Signed-off-by: Hans Zhang <18255117159@163.com>
> ---
>  drivers/pci/controller/dwc/pcie-dw-rockchip.c | 18 ++++++++++++++++++
>  1 file changed, 18 insertions(+)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
> index c624b7ebd118..5bbb536a2576 100644
> --- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
> +++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
> @@ -477,6 +477,22 @@ static irqreturn_t rockchip_pcie_ep_sys_irq_thread(int irq, void *arg)
>  	return IRQ_HANDLED;
>  }
>  
> +static void rockchip_pcie_set_max_payload(struct rockchip_pcie *rockchip)
> +{
> +	struct dw_pcie *pci = &rockchip->pci;
> +	u32 dev_cap, dev_ctrl;
> +	u16 offset;
> +
> +	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
> +	dev_cap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCAP);
> +	dev_cap &= PCI_EXP_DEVCAP_PAYLOAD;
> +
> +	dev_ctrl = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCTL);
> +	dev_ctrl &= ~PCI_EXP_DEVCTL_PAYLOAD;
> +	dev_ctrl |= dev_cap << 5;
> +	dw_pcie_writel_dbi(pci, offset + PCI_EXP_DEVCTL, dev_ctrl);
> +}

I can't really complain too much about this since meson does basically
the same thing, but there are some things I don't like about this:

  - I don't think it's safe to set MPS higher in all cases.  If we set
    the Root Port MPS=256, and an Endpoint only supports MPS=128, the
    Endpoint may do a 256-byte DMA read (assuming its MRRS>=256).  In
    that case the RP may respond with a 256-byte payload the Endpoint
    can't handle.  The generic code in pci_configure_mps() might be
    smart enough to avoid that situation, but I'm not confident about
    it.  Maybe I could be convinced.

  - There's nothing rockchip-specific about this.

  - It's very similar to meson_set_max_payload(), so it'd be nice to
    share that code somehow.

  - The commit log is specific about Max_Payload_Size Supported being
    256 bytes, but the patch actually reads the value from Device
    Capabilities.

  - I'd like to see FIELD_PREP()/FIELD_GET() used when possible.
    PCIE_LTSSM_STATUS_MASK is probably the only other place.

These would be material for a separate patch:

  - The #defines for register offsets and bits are kind of a mess,
    e.g., PCIE_SMLH_LINKUP, PCIE_RDLH_LINKUP, PCIE_LINKUP,
    PCIE_L0S_ENTRY, and PCIE_LTSSM_STATUS_MASK are in
    PCIE_CLIENT_LTSSM_STATUS, but you couldn't tell that from the
    names, and they're not even defined together.

  - Same for PCIE_RDLH_LINK_UP_CHGED, PCIE_LINK_REQ_RST_NOT_INT,
    PCIE_RDLH_LINK_UP_CHGED, which are in
    PCIE_CLIENT_INTR_STATUS_MISC.

  - PCIE_LTSSM_ENABLE_ENHANCE is apparently in
    PCIE_CLIENT_HOT_RESET_CTRL?  Sure wouldn't guess that from the
    names or the order of #defines.

  - PCIE_CLIENT_GENERAL_DEBUG isn't used at all.

>  static int rockchip_pcie_configure_rc(struct platform_device *pdev,
>  				      struct rockchip_pcie *rockchip)
>  {
> @@ -511,6 +527,8 @@ static int rockchip_pcie_configure_rc(struct platform_device *pdev,
>  	pp->ops = &rockchip_pcie_host_ops;
>  	pp->use_linkup_irq = true;
>  
> +	rockchip_pcie_set_max_payload(rockchip);
> +
>  	ret = dw_pcie_host_init(pp);
>  	if (ret) {
>  		dev_err(dev, "failed to initialize host\n");
> 
> base-commit: a24588245776dafc227243a01bfbeb8a59bafba9
> -- 
> 2.25.1
>
Hans Zhang April 17, 2025, 2:19 a.m. UTC | #2
On 2025/4/17 04:40, Bjorn Helgaas wrote:
> On Wed, Apr 16, 2025 at 11:19:26PM +0800, Hans Zhang wrote:
>> The RK3588's PCIe controller defaults to a 128-byte max payload size,
>> but its hardware capability actually supports 256 bytes. This results
>> in suboptimal performance with devices that support larger payloads.
>>
>> Signed-off-by: Hans Zhang <18255117159@163.com>
>> ---
>>   drivers/pci/controller/dwc/pcie-dw-rockchip.c | 18 ++++++++++++++++++
>>   1 file changed, 18 insertions(+)
>>
>> diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
>> index c624b7ebd118..5bbb536a2576 100644
>> --- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
>> +++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
>> @@ -477,6 +477,22 @@ static irqreturn_t rockchip_pcie_ep_sys_irq_thread(int irq, void *arg)
>>   	return IRQ_HANDLED;
>>   }
>>   
>> +static void rockchip_pcie_set_max_payload(struct rockchip_pcie *rockchip)
>> +{
>> +	struct dw_pcie *pci = &rockchip->pci;
>> +	u32 dev_cap, dev_ctrl;
>> +	u16 offset;
>> +
>> +	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
>> +	dev_cap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCAP);
>> +	dev_cap &= PCI_EXP_DEVCAP_PAYLOAD;
>> +
>> +	dev_ctrl = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCTL);
>> +	dev_ctrl &= ~PCI_EXP_DEVCTL_PAYLOAD;
>> +	dev_ctrl |= dev_cap << 5;
>> +	dw_pcie_writel_dbi(pci, offset + PCI_EXP_DEVCTL, dev_ctrl);
>> +}
> 
> I can't really complain too much about this since meson does basically
> the same thing, but there are some things I don't like about this:
> 
>    - I don't think it's safe to set MPS higher in all cases.  If we set
>      the Root Port MPS=256, and an Endpoint only supports MPS=128, the
>      Endpoint may do a 256-byte DMA read (assuming its MRRS>=256).  In
>      that case the RP may respond with a 256-byte payload the Endpoint
>      can't handle.  The generic code in pci_configure_mps() might be
>      smart enough to avoid that situation, but I'm not confident about
>      it.  Maybe I could be convinced.
> 

Dear Bjorn,

Thank you very much for your reply. If we set the Root Port MPS=256, and 
an Endpoint only supports MPS=128. Finally, Root Port is also set to 
MPS=128 in pci_configure_mps.


lspci information before the patch was submitted:

root@firefly:~# lspci -vvv
00:00.0 PCI bridge: Fuzhou Rockchip Electronics Co., Ltd Device 3588 
(rev 01) (prog-if 00 [Normal decode])
         Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- 
ParErr- Stepping- SERR+ FastB2B- DisINTx+
         Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- 
<TAbort- <MAbort- >SERR- <PERR- INTx-
         Latency: 0
         Interrupt: pin A routed to IRQ 73
         Bus: primary=00, secondary=01, subordinate=ff, sec-latency=0
         I/O behind bridge: 0000f000-00000fff [disabled]
         Memory behind bridge: f0200000-f02fffff [size=1M]
         Prefetchable memory behind bridge: 
00000000fff00000-00000000000fffff [disabled]
         Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- 
<TAbort- <MAbort- <SERR- <PERR-
         Expansion ROM at f0300000 [virtual] [disabled] [size=64K]
         BridgeCtl: Parity- SERR+ NoISA- VGA- VGA16- MAbort- >Reset- 
FastB2B-
                 PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
         Capabilities: [40] Power Management version 3
                 Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=375mA 
PME(D0+,D1+,D2-,D3hot+,D3cold-)
                 Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
         Capabilities: [50] MSI: Enable+ Count=16/32 Maskable+ 64bit+
                 Address: 00000000fe670040  Data: 0000
                 Masking: fffffeff  Pending: 00000000
         Capabilities: [70] Express (v2) Root Port (Slot-), MSI 08
                 DevCap: MaxPayload 256 bytes, PhantFunc 0
                         ExtTag+ RBE+
                 DevCtl: CorrErr- NonFatalErr- FatalErr- UnsupReq-
                         RlxdOrd+ ExtTag+ PhantFunc- AuxPwr- NoSnoop-
                         MaxPayload 128 bytes, MaxReadReq 512 bytes


01:00.0 Non-Volatile memory controller: Samsung Electronics Co Ltd 
Device a80c (prog-if 02 [NVM Express])
         Subsystem: Samsung Electronics Co Ltd Device a801
         Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- 
ParErr- Stepping- SERR- FastB2B- DisINTx+
         Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- 
<TAbort- <MAbort- >SERR- <PERR- INTx-
         Latency: 0
         Interrupt: pin A routed to IRQ 72
         Region 0: Memory at f0200000 (64-bit, non-prefetchable) [size=16K]
         Capabilities: [40] Power Management version 3
                 Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA 
PME(D0-,D1-,D2-,D3hot-,D3cold-)
                 Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
         Capabilities: [50] MSI: Enable- Count=1/32 Maskable- 64bit+
                 Address: 0000000000000000  Data: 0000
         Capabilities: [70] Express (v2) Endpoint, MSI 00
                 DevCap: MaxPayload 512 bytes, PhantFunc 0, Latency L0s 
unlimited, L1 unlimited
                         ExtTag+ AttnBtn- AttnInd- PwrInd- RBE+ FLReset+ 
SlotPowerLimit 0.000W
                 DevCtl: CorrErr- NonFatalErr- FatalErr- UnsupReq-
                         RlxdOrd+ ExtTag+ PhantFunc- AuxPwr- NoSnoop+ 
FLReset-
                         MaxPayload 128 bytes, MaxReadReq 512 bytes



lspci information after the patch was submitted:
root@firefly:~# lspci -vvv
00:00.0 PCI bridge: Fuzhou Rockchip Electronics Co., Ltd Device 3588 
(rev 01) (prog-if 00 [Normal decode])
         Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- 
ParErr- Stepping- SERR+ FastB2B- DisINTx+
         Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- 
<TAbort- <MAbort- >SERR- <PERR- INTx-
         Latency: 0
         Interrupt: pin A routed to IRQ 73
         Bus: primary=00, secondary=01, subordinate=ff, sec-latency=0
         I/O behind bridge: 0000f000-00000fff [disabled]
         Memory behind bridge: f0200000-f02fffff [size=1M]
         Prefetchable memory behind bridge: 
00000000fff00000-00000000000fffff [disabled]
         Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- 
<TAbort- <MAbort- <SERR- <PERR-
         Expansion ROM at f0300000 [virtual] [disabled] [size=64K]
         BridgeCtl: Parity- SERR+ NoISA- VGA- VGA16- MAbort- >Reset- 
FastB2B-
                 PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
         Capabilities: [40] Power Management version 3
                 Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=375mA 
PME(D0+,D1+,D2-,D3hot+,D3cold-)
                 Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
         Capabilities: [50] MSI: Enable+ Count=16/32 Maskable+ 64bit+
                 Address: 00000000fe670040  Data: 0000
                 Masking: fffffeff  Pending: 00000000
         Capabilities: [70] Express (v2) Root Port (Slot-), MSI 08
                 DevCap: MaxPayload 256 bytes, PhantFunc 0
                         ExtTag+ RBE+
                 DevCtl: CorrErr- NonFatalErr- FatalErr- UnsupReq-
                         RlxdOrd+ ExtTag+ PhantFunc- AuxPwr- NoSnoop-
                         MaxPayload 256 bytes, MaxReadReq 512 bytes

01:00.0 Non-Volatile memory controller: Samsung Electronics Co Ltd 
Device a80c (prog-if 02 [NVM Express])
         Subsystem: Samsung Electronics Co Ltd Device a801
         Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- 
ParErr- Stepping- SERR- FastB2B- DisINTx+
         Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- 
<TAbort- <MAbort- >SERR- <PERR- INTx-
         Latency: 0
         Interrupt: pin A routed to IRQ 72
         Region 0: Memory at f0200000 (64-bit, non-prefetchable) [size=16K]
         Capabilities: [40] Power Management version 3
                 Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA 
PME(D0-,D1-,D2-,D3hot-,D3cold-)
                 Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
         Capabilities: [50] MSI: Enable- Count=1/32 Maskable- 64bit+
                 Address: 0000000000000000  Data: 0000
         Capabilities: [70] Express (v2) Endpoint, MSI 00
                 DevCap: MaxPayload 512 bytes, PhantFunc 0, Latency L0s 
unlimited, L1 unlimited
                         ExtTag+ AttnBtn- AttnInd- PwrInd- RBE+ FLReset+ 
SlotPowerLimit 0.000W
                 DevCtl: CorrErr- NonFatalErr- FatalErr- UnsupReq-
                         RlxdOrd+ ExtTag+ PhantFunc- AuxPwr- NoSnoop+ 
FLReset-
                         MaxPayload 256 bytes, MaxReadReq 512 bytes



Here are my tests and the NVMe SSD worked fine.

root@firefly:~# df -h
Filesystem           Size  Used Avail Use% Mounted on
......
/dev/nvme0n1         916G   28K  870G   1% /root/nvme
......
root@firefly:~#
root@firefly:~# ls -l nvme/
total 16
drwx------ 2 root root 16384 Dec 24 06:34 lost+found
root@firefly:~#
root@firefly:~# cd nvme/
root@firefly:~/nvme# time dd if=/dev/zero of=test bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.875072 s, 1.2 GB/s

real    0m0.881s
user    0m0.001s
sys     0m0.874s
root@firefly:~/nvme# ls -l
total 1048596
drwx------ 2 root root      16384 Dec 24 06:34 lost+found
-rw-r--r-- 1 root root 1073741824 Apr 17 02:11 test
root@firefly:~/nvme# time cp -rf test test1

real    0m0.901s
user    0m0.005s
sys     0m0.889s
root@firefly:~/nvme# ls -lh
total 2.1G
drwx------ 2 root root  16K Dec 24 06:34 lost+found
-rw-r--r-- 1 root root 1.0G Apr 17 02:11 test
-rw-r--r-- 1 root root 1.0G Apr 17 02:12 test1



>    - There's nothing rockchip-specific about this.
> 
>    - It's very similar to meson_set_max_payload(), so it'd be nice to
>      share that code somehow.

The next version will be added to DWC.

> 
>    - The commit log is specific about Max_Payload_Size Supported being
>      256 bytes, but the patch actually reads the value from Device
>      Capabilities.
The commit log will be modified.

> 
>    - I'd like to see FIELD_PREP()/FIELD_GET() used when possible.
>      PCIE_LTSSM_STATUS_MASK is probably the only other place.
> 

Will change.

> These would be material for a separate patch:
> 

Thank you very much for your reminding and advice. I will submit another 
patch separately for modification.

>    - The #defines for register offsets and bits are kind of a mess,
>      e.g., PCIE_SMLH_LINKUP, PCIE_RDLH_LINKUP, PCIE_LINKUP,
>      PCIE_L0S_ENTRY, and PCIE_LTSSM_STATUS_MASK are in
>      PCIE_CLIENT_LTSSM_STATUS, but you couldn't tell that from the
>      names, and they're not even defined together.
> 
>    - Same for PCIE_RDLH_LINK_UP_CHGED, PCIE_LINK_REQ_RST_NOT_INT,
>      PCIE_RDLH_LINK_UP_CHGED, which are in
>      PCIE_CLIENT_INTR_STATUS_MISC.
> 
>    - PCIE_LTSSM_ENABLE_ENHANCE is apparently in
>      PCIE_CLIENT_HOT_RESET_CTRL?  Sure wouldn't guess that from the
>      names or the order of #defines.
> 
>    - PCIE_CLIENT_GENERAL_DEBUG isn't used at all.

Will delete.


Best regard,
Hans

> 
>>   static int rockchip_pcie_configure_rc(struct platform_device *pdev,
>>   				      struct rockchip_pcie *rockchip)
>>   {
>> @@ -511,6 +527,8 @@ static int rockchip_pcie_configure_rc(struct platform_device *pdev,
>>   	pp->ops = &rockchip_pcie_host_ops;
>>   	pp->use_linkup_irq = true;
>>   
>> +	rockchip_pcie_set_max_payload(rockchip);
>> +
>>   	ret = dw_pcie_host_init(pp);
>>   	if (ret) {
>>   		dev_err(dev, "failed to initialize host\n");
>>
>> base-commit: a24588245776dafc227243a01bfbeb8a59bafba9
>> -- 
>> 2.25.1
>>
Niklas Cassel April 17, 2025, 6:01 a.m. UTC | #3
On Thu, Apr 17, 2025 at 10:19:10AM +0800, Hans Zhang wrote:
> On 2025/4/17 04:40, Bjorn Helgaas wrote:
> > On Wed, Apr 16, 2025 at 11:19:26PM +0800, Hans Zhang wrote:
> > > The RK3588's PCIe controller defaults to a 128-byte max payload size,
> > > but its hardware capability actually supports 256 bytes. This results
> > > in suboptimal performance with devices that support larger payloads.
> > > 
> > > Signed-off-by: Hans Zhang <18255117159@163.com>
> > > ---
> > >   drivers/pci/controller/dwc/pcie-dw-rockchip.c | 18 ++++++++++++++++++
> > >   1 file changed, 18 insertions(+)
> > > 
> > > diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
> > > index c624b7ebd118..5bbb536a2576 100644
> > > --- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
> > > +++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
> > > @@ -477,6 +477,22 @@ static irqreturn_t rockchip_pcie_ep_sys_irq_thread(int irq, void *arg)
> > >   	return IRQ_HANDLED;
> > >   }
> > > +static void rockchip_pcie_set_max_payload(struct rockchip_pcie *rockchip)
> > > +{
> > > +	struct dw_pcie *pci = &rockchip->pci;
> > > +	u32 dev_cap, dev_ctrl;
> > > +	u16 offset;
> > > +
> > > +	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
> > > +	dev_cap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCAP);
> > > +	dev_cap &= PCI_EXP_DEVCAP_PAYLOAD;
> > > +
> > > +	dev_ctrl = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCTL);
> > > +	dev_ctrl &= ~PCI_EXP_DEVCTL_PAYLOAD;
> > > +	dev_ctrl |= dev_cap << 5;
> > > +	dw_pcie_writel_dbi(pci, offset + PCI_EXP_DEVCTL, dev_ctrl);
> > > +}
> > 
> > I can't really complain too much about this since meson does basically
> > the same thing, but there are some things I don't like about this:
> > 
> >    - I don't think it's safe to set MPS higher in all cases.  If we set
> >      the Root Port MPS=256, and an Endpoint only supports MPS=128, the
> >      Endpoint may do a 256-byte DMA read (assuming its MRRS>=256).  In
> >      that case the RP may respond with a 256-byte payload the Endpoint
> >      can't handle.  The generic code in pci_configure_mps() might be
> >      smart enough to avoid that situation, but I'm not confident about
> >      it.  Maybe I could be convinced.
> > 
> 
> Dear Bjorn,
> 
> Thank you very much for your reply. If we set the Root Port MPS=256, and an
> Endpoint only supports MPS=128. Finally, Root Port is also set to MPS=128 in
> pci_configure_mps.

In you example below, the Endpoint has:
 DevCap: MaxPayload 512 bytes

So at least your example can't be used to prove this specific point.
But perhaps you just wanted to show that your Max Payload Size increase
actually works?


Kind regards,
Niklas
Hans Zhang April 17, 2025, 6:47 a.m. UTC | #4
On 2025/4/17 14:01, Niklas Cassel wrote:
> On Thu, Apr 17, 2025 at 10:19:10AM +0800, Hans Zhang wrote:
>> On 2025/4/17 04:40, Bjorn Helgaas wrote:
>>> On Wed, Apr 16, 2025 at 11:19:26PM +0800, Hans Zhang wrote:
>>>> The RK3588's PCIe controller defaults to a 128-byte max payload size,
>>>> but its hardware capability actually supports 256 bytes. This results
>>>> in suboptimal performance with devices that support larger payloads.
>>>>
>>>> Signed-off-by: Hans Zhang <18255117159@163.com>
>>>> ---
>>>>    drivers/pci/controller/dwc/pcie-dw-rockchip.c | 18 ++++++++++++++++++
>>>>    1 file changed, 18 insertions(+)
>>>>
>>>> diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
>>>> index c624b7ebd118..5bbb536a2576 100644
>>>> --- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
>>>> +++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
>>>> @@ -477,6 +477,22 @@ static irqreturn_t rockchip_pcie_ep_sys_irq_thread(int irq, void *arg)
>>>>    	return IRQ_HANDLED;
>>>>    }
>>>> +static void rockchip_pcie_set_max_payload(struct rockchip_pcie *rockchip)
>>>> +{
>>>> +	struct dw_pcie *pci = &rockchip->pci;
>>>> +	u32 dev_cap, dev_ctrl;
>>>> +	u16 offset;
>>>> +
>>>> +	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
>>>> +	dev_cap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCAP);
>>>> +	dev_cap &= PCI_EXP_DEVCAP_PAYLOAD;
>>>> +
>>>> +	dev_ctrl = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCTL);
>>>> +	dev_ctrl &= ~PCI_EXP_DEVCTL_PAYLOAD;
>>>> +	dev_ctrl |= dev_cap << 5;
>>>> +	dw_pcie_writel_dbi(pci, offset + PCI_EXP_DEVCTL, dev_ctrl);
>>>> +}
>>>
>>> I can't really complain too much about this since meson does basically
>>> the same thing, but there are some things I don't like about this:
>>>
>>>     - I don't think it's safe to set MPS higher in all cases.  If we set
>>>       the Root Port MPS=256, and an Endpoint only supports MPS=128, the
>>>       Endpoint may do a 256-byte DMA read (assuming its MRRS>=256).  In
>>>       that case the RP may respond with a 256-byte payload the Endpoint
>>>       can't handle.  The generic code in pci_configure_mps() might be
>>>       smart enough to avoid that situation, but I'm not confident about
>>>       it.  Maybe I could be convinced.
>>>
>>
>> Dear Bjorn,
>>
>> Thank you very much for your reply. If we set the Root Port MPS=256, and an
>> Endpoint only supports MPS=128. Finally, Root Port is also set to MPS=128 in
>> pci_configure_mps.
> 
> In you example below, the Endpoint has:
>   DevCap: MaxPayload 512 bytes
> 
> So at least your example can't be used to prove this specific point.
> But perhaps you just wanted to show that your Max Payload Size increase
> actually works?
> 

Dear Niklas,

Do you have an Endpoint with MPS=128? If so, you can also help verify 
the logic of the pci_configure_mps function. I don't have an Endpoint 
with MPS=128 here.


The processing logic of the pci_configure_mps function has been verified 
on our own SOC platform. Please refer to the following log.
Our Root Port will set MPS=512.


0002:30:00.0 PCI bridge: Device 1f6c:0001 (prog-if 00 [Normal decode])
         Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- 
ParErr- Stepping- SERR- FastB2B- DisINTx+
         Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- 
<TAbort- <MAbort- >SERR- <PERR- INTx-
         Latency: 0
         Interrupt: pin A routed to IRQ 167
         Bus: primary=30, secondary=31, subordinate=5f, sec-latency=0
         I/O behind bridge: 300000-300fff [size=4K] [16-bit]
         Memory behind bridge: 38300000-383fffff [size=1M] [32-bit]
         Prefetchable memory behind bridge: 
00000000fff00000-00000000000fffff [disabled] [64-bit]
         Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- 
<TAbort- <MAbort- <SERR- <PERR-
         Expansion ROM at 38200000 [virtual] [disabled] [size=1M]
         BridgeCtl: Parity- SERR+ NoISA- VGA- VGA16- MAbort- >Reset- 
FastB2B-
                 PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
         Capabilities: [80] Power Management version 3
                 Flags: PMEClk- DSI- D1+ D2- AuxCurrent=0mA 
PME(D0+,D1+,D2-,D3hot+,D3cold-)
                 Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
         Capabilities: [90] MSI: Enable+ Count=1/32 Maskable+ 64bit+
                 Address: 000000000e060040  Data: 0000
                 Masking: fffffffe  Pending: 00000000
         Capabilities: [b0] MSI-X: Enable- Count=2 Masked-
                 Vector table: BAR=0 offset=00000040
                 PBA: BAR=0 offset=00000040
         Capabilities: [c0] Express (v2) Root Port (Slot-), MSI 00
                 DevCap: MaxPayload 512 bytes, PhantFunc 0
                         ExtTag- RBE+
                 DevCtl: CorrErr+ NonFatalErr+ FatalErr+ UnsupReq+
                         RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop+
                         MaxPayload 256 bytes, MaxReadReq 1024 bytes


0002:31:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. 
RTL8125 2.5GbE Controller (rev 05)
         Subsystem: Realtek Semiconductor Co., Ltd. RTL8125 2.5GbE 
Controller
         Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- 
ParErr- Stepping- SERR- FastB2B- DisINTx+
         Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- 
<TAbort- <MAbort- >SERR- <PERR- INTx-
         Latency: 0, Cache Line Size: 64 bytes
         Interrupt: pin A routed to IRQ 166
         Region 0: I/O ports at 300000 [size=256]
         Region 2: Memory at 38300000 (64-bit, non-prefetchable) [size=64K]
         Region 4: Memory at 38310000 (64-bit, non-prefetchable) [size=16K]
         Capabilities: [40] Power Management version 3
                 Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=375mA 
PME(D0+,D1+,D2+,D3hot+,D3cold+)
                 Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
         Capabilities: [50] MSI: Enable- Count=1/1 Maskable+ 64bit+
                 Address: 0000000000000000  Data: 0000
                 Masking: 00000000  Pending: 00000000
         Capabilities: [70] Express (v2) Endpoint, MSI 01
                 DevCap: MaxPayload 256 bytes, PhantFunc 0, Latency L0s 
<512ns, L1 <64us
                         ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset- 
SlotPowerLimit 0W
                 DevCtl: CorrErr+ NonFatalErr+ FatalErr+ UnsupReq+
                         RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop-
                         MaxPayload 256 bytes, MaxReadReq 2048 bytes


hans@hans:~$ iperf3 -s
-----------------------------------------------------------
Server listening on 5201
-----------------------------------------------------------
Accepted connection from ethernet_ip, port 47114
[  5] local ubuntu_host_ip port 5201 connected to ethernet_ip port 47122
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-1.00   sec   108 MBytes   902 Mbits/sec
[  5]   1.00-2.00   sec   112 MBytes   941 Mbits/sec
[  5]   2.00-3.00   sec   112 MBytes   941 Mbits/sec
[  5]   3.00-4.00   sec   112 MBytes   941 Mbits/sec
[  5]   4.00-5.00   sec   112 MBytes   941 Mbits/sec
[  5]   5.00-6.00   sec   112 MBytes   941 Mbits/sec
[  5]   6.00-7.00   sec   112 MBytes   941 Mbits/sec
[  5]   7.00-8.00   sec   112 MBytes   941 Mbits/sec
[  5]   8.00-9.00   sec   112 MBytes   941 Mbits/sec
[  5]   9.00-10.00  sec   112 MBytes   941 Mbits/sec
[  5]  10.00-10.04  sec  4.92 MBytes   941 Mbits/sec
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-10.04  sec  1.10 GBytes   938 Mbits/sec 
receiver
-----------------------------------------------------------


root@cix-localhost:~# iperf3 -c ubuntu_host_ip
Connecting to host ubuntu_host_ip, port 5201
[  5] local ethernet_ip port 47122 connected to ubuntu_host_ip port 5201
[ ID] Interval           Transfer     Bitrate         Retr  Cwnd
[  5]   0.00-1.00   sec   114 MBytes   958 Mbits/sec    0    484 KBytes
[  5]   1.00-2.00   sec   113 MBytes   946 Mbits/sec    0    535 KBytes
[  5]   2.00-3.00   sec   112 MBytes   936 Mbits/sec    0    559 KBytes
[  5]   3.00-4.00   sec   113 MBytes   946 Mbits/sec    0    587 KBytes
[  5]   4.00-5.00   sec   112 MBytes   939 Mbits/sec    0    587 KBytes
[  5]   5.00-6.00   sec   113 MBytes   948 Mbits/sec    0    587 KBytes
[  5]   6.00-7.00   sec   112 MBytes   936 Mbits/sec    0    587 KBytes
[  5]   7.00-8.00   sec   112 MBytes   939 Mbits/sec    0    587 KBytes
[  5]   8.00-9.00   sec   112 MBytes   942 Mbits/sec    0    619 KBytes
[  5]   9.00-10.00  sec   113 MBytes   945 Mbits/sec    0    677 KBytes
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.00  sec  1.10 GBytes   944 Mbits/sec    0             sender
[  5]   0.00-10.04  sec  1.10 GBytes   938 Mbits/sec 
receiver

Best regards,
Hans
Niklas Cassel April 17, 2025, 6:53 a.m. UTC | #5
On Thu, Apr 17, 2025 at 02:47:23PM +0800, Hans Zhang wrote:
> On 2025/4/17 14:01, Niklas Cassel wrote:
> > On Thu, Apr 17, 2025 at 10:19:10AM +0800, Hans Zhang wrote:
> > > On 2025/4/17 04:40, Bjorn Helgaas wrote:
> > > > On Wed, Apr 16, 2025 at 11:19:26PM +0800, Hans Zhang wrote:
> > > > > The RK3588's PCIe controller defaults to a 128-byte max payload size,
> > > > > but its hardware capability actually supports 256 bytes. This results
> > > > > in suboptimal performance with devices that support larger payloads.
> > > > > 
> > > > > Signed-off-by: Hans Zhang <18255117159@163.com>
> > > > > ---
> > > > >    drivers/pci/controller/dwc/pcie-dw-rockchip.c | 18 ++++++++++++++++++
> > > > >    1 file changed, 18 insertions(+)
> > > > > 
> > > > > diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
> > > > > index c624b7ebd118..5bbb536a2576 100644
> > > > > --- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
> > > > > +++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
> > > > > @@ -477,6 +477,22 @@ static irqreturn_t rockchip_pcie_ep_sys_irq_thread(int irq, void *arg)
> > > > >    	return IRQ_HANDLED;
> > > > >    }
> > > > > +static void rockchip_pcie_set_max_payload(struct rockchip_pcie *rockchip)
> > > > > +{
> > > > > +	struct dw_pcie *pci = &rockchip->pci;
> > > > > +	u32 dev_cap, dev_ctrl;
> > > > > +	u16 offset;
> > > > > +
> > > > > +	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
> > > > > +	dev_cap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCAP);
> > > > > +	dev_cap &= PCI_EXP_DEVCAP_PAYLOAD;
> > > > > +
> > > > > +	dev_ctrl = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCTL);
> > > > > +	dev_ctrl &= ~PCI_EXP_DEVCTL_PAYLOAD;
> > > > > +	dev_ctrl |= dev_cap << 5;
> > > > > +	dw_pcie_writel_dbi(pci, offset + PCI_EXP_DEVCTL, dev_ctrl);
> > > > > +}
> > > > 
> > > > I can't really complain too much about this since meson does basically
> > > > the same thing, but there are some things I don't like about this:
> > > > 
> > > >     - I don't think it's safe to set MPS higher in all cases.  If we set
> > > >       the Root Port MPS=256, and an Endpoint only supports MPS=128, the
> > > >       Endpoint may do a 256-byte DMA read (assuming its MRRS>=256).  In
> > > >       that case the RP may respond with a 256-byte payload the Endpoint
> > > >       can't handle.  The generic code in pci_configure_mps() might be
> > > >       smart enough to avoid that situation, but I'm not confident about
> > > >       it.  Maybe I could be convinced.
> > > > 
> > > 
> > > Dear Bjorn,
> > > 
> > > Thank you very much for your reply. If we set the Root Port MPS=256, and an
> > > Endpoint only supports MPS=128. Finally, Root Port is also set to MPS=128 in
> > > pci_configure_mps.
> > 
> > In you example below, the Endpoint has:
> >   DevCap: MaxPayload 512 bytes
> > 
> > So at least your example can't be used to prove this specific point.
> > But perhaps you just wanted to show that your Max Payload Size increase
> > actually works?
> > 
> 
> Dear Niklas,
> 
> Do you have an Endpoint with MPS=128? If so, you can also help verify the
> logic of the pci_configure_mps function. I don't have an Endpoint with
> MPS=128 here.

I imagine that it would be trivial to test with a PCIe controller running
in endpoint mode with the PCI endpoint subsystem in the kernel.
(As you should be able to set CAP.MPS before starting link training.)


> The processing logic of the pci_configure_mps function has been verified on
> our own SOC platform. Please refer to the following log.
> Our Root Port will set MPS=512.

(snip)

Ok, since it works to downgrade 512B to 256B, I would imagine that it also
would downgrade to 128B properly.


Kind regards,
Niklas
Niklas Cassel April 17, 2025, 7:04 a.m. UTC | #6
Hello Hans,

On Wed, Apr 16, 2025 at 11:19:26PM +0800, Hans Zhang wrote:
> The RK3588's PCIe controller defaults to a 128-byte max payload size,
> but its hardware capability actually supports 256 bytes. This results
> in suboptimal performance with devices that support larger payloads.

Patch looks good to me, but please always reference the TRM when you can.

Before this patch:
		DevCap: MaxPayload 256 bytes
		DevCtl: MaxPayload 128 bytes


As per rk3588 TRM, section "11.4.3.8 DSP_PCIE_CAP Detail Registers Description"

DevCap is per the register description of DSP_PCIE_CAP_DEVICE_CAPABILITIES_REG,
field PCIE_CAP_MAX_PAYLOAD_SIZE.
Which claims that the value after reset is 0x1 (256B).

DevCtl is per the register description of
DSP_PCIE_CAP_DEVICE_CONTROL_DEVICE_STATUS, field PCIE_CAP_MAX_PAYLOAD_SIZE_CS.
Which claims that the reset value is 0x0 (128B).

Both of these match the values above.

As per the description of PCIE_CAP_MAX_PAYLOAD_SIZE_CS:
"Permissible values that
can be programmed are indicated by the Max_Payload_Size
Supported field (PCIE_CAP_MAX_PAYLOAD_SIZE) in the Device
Capabilities (DEVICE_CAPABILITIES_REG) register (for more
details, see section 7.5.3.3 of PCI Express Base Specification)."

So your patch looks good.

I guess I'm mostly surprised that the e.g. pci_configure_mps() does not
already set DevCtl to the max(DevCap.MPS of the host, DevCap.MPS of the
endpoint).

Apparently pci_configure_mps() only decreases MPS from the reset values?
It never increases it?


Kind regards,
Niklas
Shawn Lin April 17, 2025, 7:08 a.m. UTC | #7
在 2025/04/17 星期四 15:04, Niklas Cassel 写道:
> Hello Hans,
> 
> On Wed, Apr 16, 2025 at 11:19:26PM +0800, Hans Zhang wrote:
>> The RK3588's PCIe controller defaults to a 128-byte max payload size,
>> but its hardware capability actually supports 256 bytes. This results
>> in suboptimal performance with devices that support larger payloads.
> 
> Patch looks good to me, but please always reference the TRM when you can.
> 
> Before this patch:
> 		DevCap: MaxPayload 256 bytes
> 		DevCtl: MaxPayload 128 bytes
> 
> 
> As per rk3588 TRM, section "11.4.3.8 DSP_PCIE_CAP Detail Registers Description"
> 
> DevCap is per the register description of DSP_PCIE_CAP_DEVICE_CAPABILITIES_REG,
> field PCIE_CAP_MAX_PAYLOAD_SIZE.
> Which claims that the value after reset is 0x1 (256B).
> 
> DevCtl is per the register description of
> DSP_PCIE_CAP_DEVICE_CONTROL_DEVICE_STATUS, field PCIE_CAP_MAX_PAYLOAD_SIZE_CS.
> Which claims that the reset value is 0x0 (128B).
> 
> Both of these match the values above.
> 
> As per the description of PCIE_CAP_MAX_PAYLOAD_SIZE_CS:
> "Permissible values that
> can be programmed are indicated by the Max_Payload_Size
> Supported field (PCIE_CAP_MAX_PAYLOAD_SIZE) in the Device
> Capabilities (DEVICE_CAPABILITIES_REG) register (for more
> details, see section 7.5.3.3 of PCI Express Base Specification)."
> 
> So your patch looks good.
> 
> I guess I'm mostly surprised that the e.g. pci_configure_mps() does not
> already set DevCtl to the max(DevCap.MPS of the host, DevCap.MPS of the
> endpoint).
> 
> Apparently pci_configure_mps() only decreases MPS from the reset values?
> It never increases it?
> 

Actually it does:

https://github.com/torvalds/linux/blob/master/Documentation/admin-guide/kernel-parameters.txt#L4757

> 
> Kind regards,
> Niklas
> 
>
Niklas Cassel April 17, 2025, 7:22 a.m. UTC | #8
On Thu, Apr 17, 2025 at 03:08:34PM +0800, Shawn Lin wrote:
> 在 2025/04/17 星期四 15:04, Niklas Cassel 写道:
> > Hello Hans,
> > 
> > On Wed, Apr 16, 2025 at 11:19:26PM +0800, Hans Zhang wrote:
> > > The RK3588's PCIe controller defaults to a 128-byte max payload size,
> > > but its hardware capability actually supports 256 bytes. This results
> > > in suboptimal performance with devices that support larger payloads.
> > 
> > Patch looks good to me, but please always reference the TRM when you can.
> > 
> > Before this patch:
> > 		DevCap: MaxPayload 256 bytes
> > 		DevCtl: MaxPayload 128 bytes
> > 
> > 
> > As per rk3588 TRM, section "11.4.3.8 DSP_PCIE_CAP Detail Registers Description"
> > 
> > DevCap is per the register description of DSP_PCIE_CAP_DEVICE_CAPABILITIES_REG,
> > field PCIE_CAP_MAX_PAYLOAD_SIZE.
> > Which claims that the value after reset is 0x1 (256B).
> > 
> > DevCtl is per the register description of
> > DSP_PCIE_CAP_DEVICE_CONTROL_DEVICE_STATUS, field PCIE_CAP_MAX_PAYLOAD_SIZE_CS.
> > Which claims that the reset value is 0x0 (128B).
> > 
> > Both of these match the values above.
> > 
> > As per the description of PCIE_CAP_MAX_PAYLOAD_SIZE_CS:
> > "Permissible values that
> > can be programmed are indicated by the Max_Payload_Size
> > Supported field (PCIE_CAP_MAX_PAYLOAD_SIZE) in the Device
> > Capabilities (DEVICE_CAPABILITIES_REG) register (for more
> > details, see section 7.5.3.3 of PCI Express Base Specification)."
> > 
> > So your patch looks good.
> > 
> > I guess I'm mostly surprised that the e.g. pci_configure_mps() does not
> > already set DevCtl to the max(DevCap.MPS of the host, DevCap.MPS of the
> > endpoint).
> > 
> > Apparently pci_configure_mps() only decreases MPS from the reset values?
> > It never increases it?
> > 
> 
> Actually it does:
> 
> https://github.com/torvalds/linux/blob/master/Documentation/admin-guide/kernel-parameters.txt#L4757

If that is the case, then explain the before/after with Hans lspci output here:
https://lore.kernel.org/linux-pci/bb40385c-6839-484c-90b2-d6c7ecb95ba9@163.com/

His patch changes the default value of DevCtl.MPS (from 128B to 256B), but if
pci_configure_mps() can bump DevCtl.MPS to a higher value, his patch should not
be needed, since the EP (an NVMe SSD in his case) has DevCap.MPS 512B, and the
RC itself has DevCap.MPS 256B.

Seems like we are missing something here.


Kind regards,
Niklas
Shawn Lin April 17, 2025, 7:25 a.m. UTC | #9
在 2025/04/17 星期四 15:22, Niklas Cassel 写道:
> On Thu, Apr 17, 2025 at 03:08:34PM +0800, Shawn Lin wrote:
>> 在 2025/04/17 星期四 15:04, Niklas Cassel 写道:
>>> Hello Hans,
>>>
>>> On Wed, Apr 16, 2025 at 11:19:26PM +0800, Hans Zhang wrote:
>>>> The RK3588's PCIe controller defaults to a 128-byte max payload size,
>>>> but its hardware capability actually supports 256 bytes. This results
>>>> in suboptimal performance with devices that support larger payloads.
>>>
>>> Patch looks good to me, but please always reference the TRM when you can.
>>>
>>> Before this patch:
>>> 		DevCap: MaxPayload 256 bytes
>>> 		DevCtl: MaxPayload 128 bytes
>>>
>>>
>>> As per rk3588 TRM, section "11.4.3.8 DSP_PCIE_CAP Detail Registers Description"
>>>
>>> DevCap is per the register description of DSP_PCIE_CAP_DEVICE_CAPABILITIES_REG,
>>> field PCIE_CAP_MAX_PAYLOAD_SIZE.
>>> Which claims that the value after reset is 0x1 (256B).
>>>
>>> DevCtl is per the register description of
>>> DSP_PCIE_CAP_DEVICE_CONTROL_DEVICE_STATUS, field PCIE_CAP_MAX_PAYLOAD_SIZE_CS.
>>> Which claims that the reset value is 0x0 (128B).
>>>
>>> Both of these match the values above.
>>>
>>> As per the description of PCIE_CAP_MAX_PAYLOAD_SIZE_CS:
>>> "Permissible values that
>>> can be programmed are indicated by the Max_Payload_Size
>>> Supported field (PCIE_CAP_MAX_PAYLOAD_SIZE) in the Device
>>> Capabilities (DEVICE_CAPABILITIES_REG) register (for more
>>> details, see section 7.5.3.3 of PCI Express Base Specification)."
>>>
>>> So your patch looks good.
>>>
>>> I guess I'm mostly surprised that the e.g. pci_configure_mps() does not
>>> already set DevCtl to the max(DevCap.MPS of the host, DevCap.MPS of the
>>> endpoint).
>>>
>>> Apparently pci_configure_mps() only decreases MPS from the reset values?
>>> It never increases it?
>>>
>>
>> Actually it does:
>>
>> https://github.com/torvalds/linux/blob/master/Documentation/admin-guide/kernel-parameters.txt#L4757
> 
> If that is the case, then explain the before/after with Hans lspci output here:
> https://lore.kernel.org/linux-pci/bb40385c-6839-484c-90b2-d6c7ecb95ba9@163.com/
> 
> His patch changes the default value of DevCtl.MPS (from 128B to 256B), but if
> pci_configure_mps() can bump DevCtl.MPS to a higher value, his patch should not
> be needed, since the EP (an NVMe SSD in his case) has DevCap.MPS 512B, and the
> RC itself has DevCap.MPS 256B.
> 
> Seems like we are missing something here.

So Hans, could you please help set pci=pcie_bus_safe or
pci=pcie_bus_perf in your cmdline, and see how lspci dump different
without your patch?

> 
> 
> Kind regards,
> Niklas
> 
>
Niklas Cassel April 17, 2025, 7:48 a.m. UTC | #10
On Thu, Apr 17, 2025 at 03:25:06PM +0800, Shawn Lin wrote:
> 在 2025/04/17 星期四 15:22, Niklas Cassel 写道:
> > On Thu, Apr 17, 2025 at 03:08:34PM +0800, Shawn Lin wrote:
> > > 在 2025/04/17 星期四 15:04, Niklas Cassel 写道:
> > > > Hello Hans,
> > > > 
> > > > On Wed, Apr 16, 2025 at 11:19:26PM +0800, Hans Zhang wrote:
> > > > > The RK3588's PCIe controller defaults to a 128-byte max payload size,
> > > > > but its hardware capability actually supports 256 bytes. This results
> > > > > in suboptimal performance with devices that support larger payloads.
> > > > 
> > > > Patch looks good to me, but please always reference the TRM when you can.
> > > > 
> > > > Before this patch:
> > > > 		DevCap: MaxPayload 256 bytes
> > > > 		DevCtl: MaxPayload 128 bytes
> > > > 
> > > > 
> > > > As per rk3588 TRM, section "11.4.3.8 DSP_PCIE_CAP Detail Registers Description"
> > > > 
> > > > DevCap is per the register description of DSP_PCIE_CAP_DEVICE_CAPABILITIES_REG,
> > > > field PCIE_CAP_MAX_PAYLOAD_SIZE.
> > > > Which claims that the value after reset is 0x1 (256B).
> > > > 
> > > > DevCtl is per the register description of
> > > > DSP_PCIE_CAP_DEVICE_CONTROL_DEVICE_STATUS, field PCIE_CAP_MAX_PAYLOAD_SIZE_CS.
> > > > Which claims that the reset value is 0x0 (128B).
> > > > 
> > > > Both of these match the values above.
> > > > 
> > > > As per the description of PCIE_CAP_MAX_PAYLOAD_SIZE_CS:
> > > > "Permissible values that
> > > > can be programmed are indicated by the Max_Payload_Size
> > > > Supported field (PCIE_CAP_MAX_PAYLOAD_SIZE) in the Device
> > > > Capabilities (DEVICE_CAPABILITIES_REG) register (for more
> > > > details, see section 7.5.3.3 of PCI Express Base Specification)."
> > > > 
> > > > So your patch looks good.
> > > > 
> > > > I guess I'm mostly surprised that the e.g. pci_configure_mps() does not
> > > > already set DevCtl to the max(DevCap.MPS of the host, DevCap.MPS of the
> > > > endpoint).
> > > > 
> > > > Apparently pci_configure_mps() only decreases MPS from the reset values?
> > > > It never increases it?
> > > > 
> > > 
> > > Actually it does:
> > > 
> > > https://github.com/torvalds/linux/blob/master/Documentation/admin-guide/kernel-parameters.txt#L4757
> > 
> > If that is the case, then explain the before/after with Hans lspci output here:
> > https://lore.kernel.org/linux-pci/bb40385c-6839-484c-90b2-d6c7ecb95ba9@163.com/
> > 
> > His patch changes the default value of DevCtl.MPS (from 128B to 256B), but if
> > pci_configure_mps() can bump DevCtl.MPS to a higher value, his patch should not
> > be needed, since the EP (an NVMe SSD in his case) has DevCap.MPS 512B, and the
> > RC itself has DevCap.MPS 256B.
> > 
> > Seems like we are missing something here.
> 
> So Hans, could you please help set pci=pcie_bus_safe or
> pci=pcie_bus_perf in your cmdline, and see how lspci dump different
> without your patch?

It seems that the default MPS strategy can be set using Kconfigs:
https://github.com/torvalds/linux/blob/v6.15-rc2/drivers/pci/pci.c#L126-L136
https://github.com/torvalds/linux/blob/v6.15-rc2/include/linux/pci.h#L1110-L1116

Note that the these Kconfigs are hidden behind CONFIG_EXPERT.
So unless you have explicitly set one of these Kconfigs, the default should be:
PCIE_BUS_DEFAULT,	/* Ensure MPS matches upstream bridge */


Kind regards,
Niklas
Hans Zhang April 17, 2025, 8:07 a.m. UTC | #11
On 2025/4/17 15:48, Niklas Cassel wrote:
> On Thu, Apr 17, 2025 at 03:25:06PM +0800, Shawn Lin wrote:
>> 在 2025/04/17 星期四 15:22, Niklas Cassel 写道:
>>> On Thu, Apr 17, 2025 at 03:08:34PM +0800, Shawn Lin wrote:
>>>> 在 2025/04/17 星期四 15:04, Niklas Cassel 写道:
>>>>> Hello Hans,
>>>>>
>>>>> On Wed, Apr 16, 2025 at 11:19:26PM +0800, Hans Zhang wrote:
>>>>>> The RK3588's PCIe controller defaults to a 128-byte max payload size,
>>>>>> but its hardware capability actually supports 256 bytes. This results
>>>>>> in suboptimal performance with devices that support larger payloads.
>>>>>
>>>>> Patch looks good to me, but please always reference the TRM when you can.
>>>>>
>>>>> Before this patch:
>>>>> 		DevCap: MaxPayload 256 bytes
>>>>> 		DevCtl: MaxPayload 128 bytes
>>>>>
>>>>>
>>>>> As per rk3588 TRM, section "11.4.3.8 DSP_PCIE_CAP Detail Registers Description"
>>>>>
>>>>> DevCap is per the register description of DSP_PCIE_CAP_DEVICE_CAPABILITIES_REG,
>>>>> field PCIE_CAP_MAX_PAYLOAD_SIZE.
>>>>> Which claims that the value after reset is 0x1 (256B).
>>>>>
>>>>> DevCtl is per the register description of
>>>>> DSP_PCIE_CAP_DEVICE_CONTROL_DEVICE_STATUS, field PCIE_CAP_MAX_PAYLOAD_SIZE_CS.
>>>>> Which claims that the reset value is 0x0 (128B).
>>>>>
>>>>> Both of these match the values above.
>>>>>
>>>>> As per the description of PCIE_CAP_MAX_PAYLOAD_SIZE_CS:
>>>>> "Permissible values that
>>>>> can be programmed are indicated by the Max_Payload_Size
>>>>> Supported field (PCIE_CAP_MAX_PAYLOAD_SIZE) in the Device
>>>>> Capabilities (DEVICE_CAPABILITIES_REG) register (for more
>>>>> details, see section 7.5.3.3 of PCI Express Base Specification)."
>>>>>
>>>>> So your patch looks good.
>>>>>
>>>>> I guess I'm mostly surprised that the e.g. pci_configure_mps() does not
>>>>> already set DevCtl to the max(DevCap.MPS of the host, DevCap.MPS of the
>>>>> endpoint).
>>>>>
>>>>> Apparently pci_configure_mps() only decreases MPS from the reset values?
>>>>> It never increases it?
>>>>>
>>>>
>>>> Actually it does:
>>>>
>>>> https://github.com/torvalds/linux/blob/master/Documentation/admin-guide/kernel-parameters.txt#L4757
>>>
>>> If that is the case, then explain the before/after with Hans lspci output here:
>>> https://lore.kernel.org/linux-pci/bb40385c-6839-484c-90b2-d6c7ecb95ba9@163.com/
>>>
>>> His patch changes the default value of DevCtl.MPS (from 128B to 256B), but if
>>> pci_configure_mps() can bump DevCtl.MPS to a higher value, his patch should not
>>> be needed, since the EP (an NVMe SSD in his case) has DevCap.MPS 512B, and the
>>> RC itself has DevCap.MPS 256B.
>>>
>>> Seems like we are missing something here.
>>
>> So Hans, could you please help set pci=pcie_bus_safe or
>> pci=pcie_bus_perf in your cmdline, and see how lspci dump different
>> without your patch?
> 
> It seems that the default MPS strategy can be set using Kconfigs:
> https://github.com/torvalds/linux/blob/v6.15-rc2/drivers/pci/pci.c#L126-L136
> https://github.com/torvalds/linux/blob/v6.15-rc2/include/linux/pci.h#L1110-L1116
> 
> Note that the these Kconfigs are hidden behind CONFIG_EXPERT.
> So unless you have explicitly set one of these Kconfigs, the default should be:
> PCIE_BUS_DEFAULT,	/* Ensure MPS matches upstream bridge */


Hi Niklas and Shawn,

Thank you very much for your discussion and reply.

I tested it on RK3588 and our platform. By setting pci=pcie_bus_safe, 
the maximum MPS will be automatically matched in the end.

So is my patch no longer needed? For RK3588, does the customer have to 
configure CONFIG_PCIE_BUS_SAFE or pci=pcie_bus_safe?

Also, for pci-meson.c, can the meson_set_max_payload be deleted?


Best regards,
Hans
Niklas Cassel April 17, 2025, 8:39 a.m. UTC | #12
On Thu, Apr 17, 2025 at 04:07:51PM +0800, Hans Zhang wrote:
> On 2025/4/17 15:48, Niklas Cassel wrote:
> 
> Hi Niklas and Shawn,
> 
> Thank you very much for your discussion and reply.
> 
> I tested it on RK3588 and our platform. By setting pci=pcie_bus_safe, the
> maximum MPS will be automatically matched in the end.
> 
> So is my patch no longer needed? For RK3588, does the customer have to
> configure CONFIG_PCIE_BUS_SAFE or pci=pcie_bus_safe?
> 
> Also, for pci-meson.c, can the meson_set_max_payload be deleted?

I think the only reason why this works is because
pcie_bus_configure_settings(), in the case of
pcie_bus_config == PCIE_BUS_SAFE, will walk the bus and set MPS in
the bridge to the lowest of the downstream devices:
https://github.com/torvalds/linux/blob/v6.15-rc2/drivers/pci/probe.c#L2994-L2999


So Hans, if you look at lspci for the other RCs/bridges that don't
have any downstream devices connected, do they also show DevCtl.MPS 256B
or do they still show 128B ?


One could argue that for all policies (execept for maybe PCIE_BUS_TUNE_OFF),
pcie_bus_configure_settings() should start off by initializing DevCtl.MPS to
DevCap.MPS (for the bridge itself), and after that pcie_bus_configure_settings()
can override it depending on policy, e.g. set MPS to 128B in case of
pcie_bus_config == PCIE_BUS_PEER2PEER, or walk the bus in case of
pcie_bus_config == PCIE_BUS_SAFE.

That way, we should be able to remove the setting for pci-meson.c as well.

Bjorn, thoughts?


Kind regards,
Niklas
Hans Zhang April 17, 2025, 9:48 a.m. UTC | #13
On 2025/4/17 16:39, Niklas Cassel wrote:
> On Thu, Apr 17, 2025 at 04:07:51PM +0800, Hans Zhang wrote:
>> On 2025/4/17 15:48, Niklas Cassel wrote:
>>
>> Hi Niklas and Shawn,
>>
>> Thank you very much for your discussion and reply.
>>
>> I tested it on RK3588 and our platform. By setting pci=pcie_bus_safe, the
>> maximum MPS will be automatically matched in the end.
>>
>> So is my patch no longer needed? For RK3588, does the customer have to
>> configure CONFIG_PCIE_BUS_SAFE or pci=pcie_bus_safe?
>>
>> Also, for pci-meson.c, can the meson_set_max_payload be deleted?
> 
> I think the only reason why this works is because
> pcie_bus_configure_settings(), in the case of
> pcie_bus_config == PCIE_BUS_SAFE, will walk the bus and set MPS in
> the bridge to the lowest of the downstream devices:
> https://github.com/torvalds/linux/blob/v6.15-rc2/drivers/pci/probe.c#L2994-L2999
> 
> 
> So Hans, if you look at lspci for the other RCs/bridges that don't
> have any downstream devices connected, do they also show DevCtl.MPS 256B
> or do they still show 128B ?
> 

Hi Niklas,

It will show DevCtl.MPS 256B.


oot@firefly:~# lspci
00:00.0 PCI bridge: Fuzhou Rockchip Electronics Co., Ltd Device 3588 
(rev 01)
root@firefly:~# lspci -vvv
00:00.0 PCI bridge: Fuzhou Rockchip Electronics Co., Ltd Device 3588 
(rev 01) (prog-if 00 [Normal decode])
         Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- 
ParErr- Stepping- SERR+ FastB2B- DisINTx+
         Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- 
<TAbort- <MAbort- >SERR- <PERR- INTx-
         Latency: 0
         Interrupt: pin A routed to IRQ 79
         Bus: primary=00, secondary=01, subordinate=ff, sec-latency=0
         I/O behind bridge: 0000f000-00000fff [disabled]
         Memory behind bridge: fff00000-000fffff [disabled]
         Prefetchable memory behind bridge: 
00000000fff00000-00000000000fffff [disabled]
         Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- 
<TAbort- <MAbort- <SERR- <PERR-
         Expansion ROM at f0200000 [virtual] [disabled] [size=64K]
         BridgeCtl: Parity- SERR+ NoISA- VGA- VGA16- MAbort- >Reset- 
FastB2B-
                 PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
         Capabilities: [40] Power Management version 3
                 Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=375mA 
PME(D0+,D1+,D2-,D3hot+,D3cold-)
                 Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
         Capabilities: [50] MSI: Enable+ Count=16/32 Maskable+ 64bit+
                 Address: 00000000fe670040  Data: 0000
                 Masking: fffffeff  Pending: 00000000
         Capabilities: [70] Express (v2) Root Port (Slot-), MSI 08
                 DevCap: MaxPayload 256 bytes, PhantFunc 0
                         ExtTag+ RBE+
                 DevCtl: CorrErr- NonFatalErr- FatalErr- UnsupReq-
                         RlxdOrd+ ExtTag+ PhantFunc- AuxPwr- NoSnoop-
                         MaxPayload 256 bytes, MaxReadReq 512 bytes

Best regards,
Hans

> 
> One could argue that for all policies (execept for maybe PCIE_BUS_TUNE_OFF),
> pcie_bus_configure_settings() should start off by initializing DevCtl.MPS to
> DevCap.MPS (for the bridge itself), and after that pcie_bus_configure_settings()
> can override it depending on policy, e.g. set MPS to 128B in case of
> pcie_bus_config == PCIE_BUS_PEER2PEER, or walk the bus in case of
> pcie_bus_config == PCIE_BUS_SAFE.
> 
> That way, we should be able to remove the setting for pci-meson.c as well.
> 
> Bjorn, thoughts?
> 
> 
> Kind regards,
> Niklas
Niklas Cassel April 17, 2025, 9:54 a.m. UTC | #14
On Thu, Apr 17, 2025 at 05:48:04PM +0800, Hans Zhang wrote:
> 
> 
> On 2025/4/17 16:39, Niklas Cassel wrote:
> > On Thu, Apr 17, 2025 at 04:07:51PM +0800, Hans Zhang wrote:
> > > On 2025/4/17 15:48, Niklas Cassel wrote:
> > > 
> > > Hi Niklas and Shawn,
> > > 
> > > Thank you very much for your discussion and reply.
> > > 
> > > I tested it on RK3588 and our platform. By setting pci=pcie_bus_safe, the
> > > maximum MPS will be automatically matched in the end.
> > > 
> > > So is my patch no longer needed? For RK3588, does the customer have to
> > > configure CONFIG_PCIE_BUS_SAFE or pci=pcie_bus_safe?
> > > 
> > > Also, for pci-meson.c, can the meson_set_max_payload be deleted?
> > 
> > I think the only reason why this works is because
> > pcie_bus_configure_settings(), in the case of
> > pcie_bus_config == PCIE_BUS_SAFE, will walk the bus and set MPS in
> > the bridge to the lowest of the downstream devices:
> > https://github.com/torvalds/linux/blob/v6.15-rc2/drivers/pci/probe.c#L2994-L2999
> > 
> > 
> > So Hans, if you look at lspci for the other RCs/bridges that don't
> > have any downstream devices connected, do they also show DevCtl.MPS 256B
> > or do they still show 128B ?
> > 
> 
> Hi Niklas,
> 
> It will show DevCtl.MPS 256B.

Ok.

I guess that just means that the bridge itself is included in pci_walk_bus().

Let's wait and see what people think about my proposal earlier in the thread,
or if someone can think of something better.


Kind regards,
Niklas
Bjorn Helgaas April 17, 2025, 4:52 p.m. UTC | #15
On Thu, Apr 17, 2025 at 10:39:49AM +0200, Niklas Cassel wrote:
> On Thu, Apr 17, 2025 at 04:07:51PM +0800, Hans Zhang wrote:
> > On 2025/4/17 15:48, Niklas Cassel wrote:
> > 
> > Hi Niklas and Shawn,
> > 
> > Thank you very much for your discussion and reply.
> > 
> > I tested it on RK3588 and our platform. By setting pci=pcie_bus_safe, the
> > maximum MPS will be automatically matched in the end.
> > 
> > So is my patch no longer needed? For RK3588, does the customer have to
> > configure CONFIG_PCIE_BUS_SAFE or pci=pcie_bus_safe?
> > 
> > Also, for pci-meson.c, can the meson_set_max_payload be deleted?
> 
> I think the only reason why this works is because
> pcie_bus_configure_settings(), in the case of
> pcie_bus_config == PCIE_BUS_SAFE, will walk the bus and set MPS in
> the bridge to the lowest of the downstream devices:
> https://github.com/torvalds/linux/blob/v6.15-rc2/drivers/pci/probe.c#L2994-L2999
> 
> So Hans, if you look at lspci for the other RCs/bridges that don't
> have any downstream devices connected, do they also show DevCtl.MPS 256B
> or do they still show 128B ?
> 
> One could argue that for all policies (execept for maybe PCIE_BUS_TUNE_OFF),
> pcie_bus_configure_settings() should start off by initializing DevCtl.MPS to
> DevCap.MPS (for the bridge itself), and after that pcie_bus_configure_settings()
> can override it depending on policy, e.g. set MPS to 128B in case of
> pcie_bus_config == PCIE_BUS_PEER2PEER, or walk the bus in case of
> pcie_bus_config == PCIE_BUS_SAFE.
> 
> That way, we should be able to remove the setting for pci-meson.c as well.

Thanks, I came here to say basically the same thing.  Ideally I think
the generic code in pcie_bus_configure_settings() should be able to
increase MPS or decrease it such that neither meson_set_max_payload()
nor rockchip_pcie_set_max_payload() is required.

However, the requirement to pick a Kconfig setting makes it a mess.  I
would love to get rid of those Kconfig symbols.  I don't like the
command-line parameters either, but it would definitely be an
improvement if we could nuke the Kconfig symbols and rely on the
command-line parameters.

It's also a problem when devices are hot-added after the hierarchy has
already been set up because the new device might not work correctly in
the existing config.

It's a hard problem to solve.

For new platforms without an install base, maybe it would be easier to
rely on the command-line parameters since there aren't a bunch of
users that would have to change the Kconfig.

Bjorn
Hans Zhang April 18, 2025, 12:33 p.m. UTC | #16
On 2025/4/18 00:52, Bjorn Helgaas wrote:
> On Thu, Apr 17, 2025 at 10:39:49AM +0200, Niklas Cassel wrote:
>> On Thu, Apr 17, 2025 at 04:07:51PM +0800, Hans Zhang wrote:
>>> On 2025/4/17 15:48, Niklas Cassel wrote:
>>>
>>> Hi Niklas and Shawn,
>>>
>>> Thank you very much for your discussion and reply.
>>>
>>> I tested it on RK3588 and our platform. By setting pci=pcie_bus_safe, the
>>> maximum MPS will be automatically matched in the end.
>>>
>>> So is my patch no longer needed? For RK3588, does the customer have to
>>> configure CONFIG_PCIE_BUS_SAFE or pci=pcie_bus_safe?
>>>
>>> Also, for pci-meson.c, can the meson_set_max_payload be deleted?
>>
>> I think the only reason why this works is because
>> pcie_bus_configure_settings(), in the case of
>> pcie_bus_config == PCIE_BUS_SAFE, will walk the bus and set MPS in
>> the bridge to the lowest of the downstream devices:
>> https://github.com/torvalds/linux/blob/v6.15-rc2/drivers/pci/probe.c#L2994-L2999
>>
>> So Hans, if you look at lspci for the other RCs/bridges that don't
>> have any downstream devices connected, do they also show DevCtl.MPS 256B
>> or do they still show 128B ?
>>
>> One could argue that for all policies (execept for maybe PCIE_BUS_TUNE_OFF),
>> pcie_bus_configure_settings() should start off by initializing DevCtl.MPS to
>> DevCap.MPS (for the bridge itself), and after that pcie_bus_configure_settings()
>> can override it depending on policy, e.g. set MPS to 128B in case of
>> pcie_bus_config == PCIE_BUS_PEER2PEER, or walk the bus in case of
>> pcie_bus_config == PCIE_BUS_SAFE.
>>
>> That way, we should be able to remove the setting for pci-meson.c as well.
> 
> Thanks, I came here to say basically the same thing.  Ideally I think
> the generic code in pcie_bus_configure_settings() should be able to
> increase MPS or decrease it such that neither meson_set_max_payload()
> nor rockchip_pcie_set_max_payload() is required.
> 
> However, the requirement to pick a Kconfig setting makes it a mess.  I
> would love to get rid of those Kconfig symbols.  I don't like the
> command-line parameters either, but it would definitely be an
> improvement if we could nuke the Kconfig symbols and rely on the
> command-line parameters.
> 
> It's also a problem when devices are hot-added after the hierarchy has
> already been set up because the new device might not work correctly in
> the existing config.
> 
> It's a hard problem to solve.
> 
> For new platforms without an install base, maybe it would be easier to
> rely on the command-line parameters since there aren't a bunch of
> users that would have to change the Kconfig.
> 

Dear Bjorn,

Thanks your for reply. Niklas and I attempted to modify the relevant 
logic in drivers/pci/probe.c and found that there was a lot of code 
judging the global variable pcie_bus_config. At present, there is no 
good method. I will keep trying.

I wonder if you have any good suggestions? It seems that the code logic 
regarding pcie_bus_config is a little complicated and cannot be modified 
for the time being?

Best regards,
Hans
Niklas Cassel April 18, 2025, 2:55 p.m. UTC | #17
On 18 April 2025 14:33:08 CEST, Hans Zhang <18255117159@163.com> wrote:
>
>Dear Bjorn,
>
>Thanks your for reply. Niklas and I attempted to modify the relevant logic in drivers/pci/probe.c and found that there was a lot of code judging the global variable pcie_bus_config. At present, there is no good method. I will keep trying.
>
>I wonder if you have any good suggestions? It seems that the code logic regarding pcie_bus_config is a little complicated and cannot be modified for the time being?


Hans,

If:

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 364fa2a514f8..2e1c92fdd577 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2983,6 +2983,13 @@ void pcie_bus_configure_settings(struct pci_bus *bus)
         if (!pci_is_pcie(bus->self))
                 return;
  +       /*
+        * Start off with DevCtl.MPS == DevCap.MPS, unless PCIE_BUS_TUNE_OFF.
+        * This might get overriden by a MPS strategy below.
+        */
+       if (pcie_bus_config != PCIE_BUS_TUNE_OFF)
+               smpss = pcie_get_mps(bus->self);
+
         /*
          * FIXME - Peer to peer DMA is possible, though the endpoint would need
          * to be aware of the MPS of the destination.  To work around this,



does not work, can't you modify the code slightly so that it works?

I haven't tried myself, but considering that it works when walking the bus, it seems that it should be possible to get something working.


Kind regards,
Niklas
Bjorn Helgaas April 18, 2025, 4:21 p.m. UTC | #18
On Fri, Apr 18, 2025 at 04:55:13PM +0200, Niklas Cassel wrote:
> On 18 April 2025 14:33:08 CEST, Hans Zhang <18255117159@163.com> wrote:
> >Thanks your for reply. Niklas and I attempted to modify the
> >relevant logic in drivers/pci/probe.c and found that there was a
> >lot of code judging the global variable pcie_bus_config. At
> >present, there is no good method. I will keep trying.
> >
> >I wonder if you have any good suggestions? It seems that the code
> >logic regarding pcie_bus_config is a little complicated and cannot
> >be modified for the time being?
> 
> If:
> 
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 364fa2a514f8..2e1c92fdd577 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -2983,6 +2983,13 @@ void pcie_bus_configure_settings(struct pci_bus *bus)
>          if (!pci_is_pcie(bus->self))
>                  return;
>   +       /*
> +        * Start off with DevCtl.MPS == DevCap.MPS, unless PCIE_BUS_TUNE_OFF.
> +        * This might get overriden by a MPS strategy below.
> +        */
> +       if (pcie_bus_config != PCIE_BUS_TUNE_OFF)
> +               smpss = pcie_get_mps(bus->self);
> +
>          /*
>           * FIXME - Peer to peer DMA is possible, though the endpoint would need
>           * to be aware of the MPS of the destination.  To work around this,
> 
> 
> 
> does not work, can't you modify the code slightly so that it works?
> 
> I haven't tried myself, but considering that it works when walking
> the bus, it seems that it should be possible to get something
> working.

Thanks, Niklas, this seems like a reasonable place to start.
Hopefully we can drop the controller-specific quirks since there's
nothing controller-specific about them.

Bjorn
Hans Zhang April 18, 2025, 5:21 p.m. UTC | #19
On 2025/4/18 22:55, Niklas Cassel wrote:
> 
> 
> On 18 April 2025 14:33:08 CEST, Hans Zhang <18255117159@163.com> wrote:
>>
>> Dear Bjorn,
>>
>> Thanks your for reply. Niklas and I attempted to modify the relevant logic in drivers/pci/probe.c and found that there was a lot of code judging the global variable pcie_bus_config. At present, there is no good method. I will keep trying.
>>
>> I wonder if you have any good suggestions? It seems that the code logic regarding pcie_bus_config is a little complicated and cannot be modified for the time being?
> 
> 
> Hans,
> 
> If:
> 
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 364fa2a514f8..2e1c92fdd577 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -2983,6 +2983,13 @@ void pcie_bus_configure_settings(struct pci_bus *bus)
>           if (!pci_is_pcie(bus->self))
>                   return;
>    +       /*
> +        * Start off with DevCtl.MPS == DevCap.MPS, unless PCIE_BUS_TUNE_OFF.
> +        * This might get overriden by a MPS strategy below.
> +        */
> +       if (pcie_bus_config != PCIE_BUS_TUNE_OFF)
> +               smpss = pcie_get_mps(bus->self);
> +

Dear Niklas,

Thank you very much for your reply and thoughts.

pcie_get_mps: Returns maximum payload size in bytes

I guess you want to obtain the DevCap MPS. But the purpose of the smpss 
variable is to save the DevCtl MPS.

>           /*
>            * FIXME - Peer to peer DMA is possible, though the endpoint would need
>            * to be aware of the MPS of the destination.  To work around this,
> 
> 
> 
> does not work, can't you modify the code slightly so that it works?
> 
> I haven't tried myself, but considering that it works when walking the bus, it seems that it should be possible to get something working.
> 


After making the following modifications, my test shows that it is 
normal. If the consideration is not comprehensive. Could Bjorn and 
Niklas please review my revisions?

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 364fa2a514f8..5b54f1b0a91d 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2951,8 +2951,7 @@ static int pcie_bus_configure_set(struct pci_dev 
*dev, void *data)
         if (!pci_is_pcie(dev))
                 return 0;

-       if (pcie_bus_config == PCIE_BUS_TUNE_OFF ||
-           pcie_bus_config == PCIE_BUS_DEFAULT)
+       if (pcie_bus_config == PCIE_BUS_TUNE_OFF)
                 return 0;

         mps = 128 << *(u8 *)data;
@@ -2991,7 +2990,8 @@ void pcie_bus_configure_settings(struct pci_bus *bus)
         if (pcie_bus_config == PCIE_BUS_PEER2PEER)
                 smpss = 0;

-       if (pcie_bus_config == PCIE_BUS_SAFE) {
+       if ((pcie_bus_config == PCIE_BUS_SAFE) ||
+           (pcie_bus_config != PCIE_BUS_TUNE_OFF)) {
                 smpss = bus->self->pcie_mpss;

                 pcie_find_smpss(bus->self, &smpss);


Best regards,
Hans
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
index c624b7ebd118..5bbb536a2576 100644
--- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
+++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
@@ -477,6 +477,22 @@  static irqreturn_t rockchip_pcie_ep_sys_irq_thread(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+static void rockchip_pcie_set_max_payload(struct rockchip_pcie *rockchip)
+{
+	struct dw_pcie *pci = &rockchip->pci;
+	u32 dev_cap, dev_ctrl;
+	u16 offset;
+
+	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	dev_cap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCAP);
+	dev_cap &= PCI_EXP_DEVCAP_PAYLOAD;
+
+	dev_ctrl = dw_pcie_readl_dbi(pci, offset + PCI_EXP_DEVCTL);
+	dev_ctrl &= ~PCI_EXP_DEVCTL_PAYLOAD;
+	dev_ctrl |= dev_cap << 5;
+	dw_pcie_writel_dbi(pci, offset + PCI_EXP_DEVCTL, dev_ctrl);
+}
+
 static int rockchip_pcie_configure_rc(struct platform_device *pdev,
 				      struct rockchip_pcie *rockchip)
 {
@@ -511,6 +527,8 @@  static int rockchip_pcie_configure_rc(struct platform_device *pdev,
 	pp->ops = &rockchip_pcie_host_ops;
 	pp->use_linkup_irq = true;
 
+	rockchip_pcie_set_max_payload(rockchip);
+
 	ret = dw_pcie_host_init(pp);
 	if (ret) {
 		dev_err(dev, "failed to initialize host\n");