Message ID | 20230308090313.1653-5-damien.lemoal@opensource.wdc.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | PCI endpoint fixes and improvements | expand |
On Wed, Mar 08, 2023 at 06:03:01PM +0900, Damien Le Moal wrote: > pci_epf_test_data_transfer() and pci_epf_test_dma_callback() are not > handling DMA transfer completion correctly, leading to completion > notifications to the RP side that are too early. This problem can be Can you say RC which stands for Root Complex instead of RP? > detected when the RP side is running an IOMMU with messages such as: > > pci-endpoint-test 0000:0b:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT > domain=0x001c address=0xfff00000 flags=0x0000] > > When running the pcitest.sh tests: the address used for a previous > test transfer generates the above error while the next test transfer is > running. > > Fix this by testing the dma transfer status in > pci_epf_test_dma_callback() and notifying the completion only when the > transfer status is DMA_COMPLETE or DMA_ERROR. Furthermore, in > pci_epf_test_data_transfer(), be paranoid and check again the transfer > status and always call dmaengine_terminate_sync() before returning. > > Fixes: 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities") > Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com> > --- > drivers/pci/endpoint/functions/pci-epf-test.c | 40 ++++++++++++++----- > 1 file changed, 29 insertions(+), 11 deletions(-) > > diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c > index d65419735d2e..557fbb91c729 100644 > --- a/drivers/pci/endpoint/functions/pci-epf-test.c > +++ b/drivers/pci/endpoint/functions/pci-epf-test.c > @@ -54,6 +54,9 @@ struct pci_epf_test { > struct delayed_work cmd_handler; > struct dma_chan *dma_chan_tx; > struct dma_chan *dma_chan_rx; > + struct dma_chan *transfer_chan; > + dma_cookie_t transfer_cookie; > + enum dma_status transfer_status; > struct completion transfer_complete; > bool dma_supported; > bool dma_private; > @@ -85,8 +88,14 @@ static size_t bar_size[] = { 512, 512, 1024, 16384, 131072, 1048576 }; > static void pci_epf_test_dma_callback(void *param) > { > struct pci_epf_test *epf_test = param; > - > - complete(&epf_test->transfer_complete); > + struct dma_tx_state state; > + > + epf_test->transfer_status = > + dmaengine_tx_status(epf_test->transfer_chan, > + epf_test->transfer_cookie, &state); > + if (epf_test->transfer_status == DMA_COMPLETE || > + epf_test->transfer_status == DMA_ERROR) > + complete(&epf_test->transfer_complete); > } > > /** > @@ -120,7 +129,6 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test, > struct dma_async_tx_descriptor *tx; > struct dma_slave_config sconf = {}; > struct device *dev = &epf->dev; > - dma_cookie_t cookie; > int ret; > > if (IS_ERR_OR_NULL(chan)) { > @@ -152,25 +160,35 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test, > } > > reinit_completion(&epf_test->transfer_complete); > + epf_test->transfer_chan = chan; > tx->callback = pci_epf_test_dma_callback; > tx->callback_param = epf_test; > - cookie = tx->tx_submit(tx); > + epf_test->transfer_cookie = tx->tx_submit(tx); > > - ret = dma_submit_error(cookie); > + ret = dma_submit_error(epf_test->transfer_cookie); > if (ret) { > - dev_err(dev, "Failed to do DMA tx_submit %d\n", cookie); > - return -EIO; > + dev_err(dev, "Failed to do DMA tx_submit %d\n", ret); > + goto terminate; > } > > dma_async_issue_pending(chan); > ret = wait_for_completion_interruptible(&epf_test->transfer_complete); > if (ret < 0) { > - dmaengine_terminate_sync(chan); > - dev_err(dev, "DMA wait_for_completion_timeout\n"); > - return -ETIMEDOUT; > + dev_err(dev, "DMA wait_for_completion interrupted\n"); > + goto terminate; > } > > - return 0; > + if (epf_test->transfer_status == DMA_ERROR) { > + dev_err(dev, "DMA transfer failed\n"); > + ret = -EIO; > + } > + > + WARN_ON(epf_test->transfer_status != DMA_COMPLETE); Why do you need this check? Even if required, WARN_ON is superfluous here. Thanks, Mani > + > +terminate: > + dmaengine_terminate_sync(chan); > + > + return ret; > } > > struct epf_dma_filter { > -- > 2.39.2 >
On 2023/03/16 0:20, Manivannan Sadhasivam wrote: >> @@ -152,25 +160,35 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test, >> } >> >> reinit_completion(&epf_test->transfer_complete); >> + epf_test->transfer_chan = chan; >> tx->callback = pci_epf_test_dma_callback; >> tx->callback_param = epf_test; >> - cookie = tx->tx_submit(tx); >> + epf_test->transfer_cookie = tx->tx_submit(tx); >> >> - ret = dma_submit_error(cookie); >> + ret = dma_submit_error(epf_test->transfer_cookie); >> if (ret) { >> - dev_err(dev, "Failed to do DMA tx_submit %d\n", cookie); >> - return -EIO; >> + dev_err(dev, "Failed to do DMA tx_submit %d\n", ret); >> + goto terminate; >> } >> >> dma_async_issue_pending(chan); >> ret = wait_for_completion_interruptible(&epf_test->transfer_complete); >> if (ret < 0) { >> - dmaengine_terminate_sync(chan); >> - dev_err(dev, "DMA wait_for_completion_timeout\n"); >> - return -ETIMEDOUT; >> + dev_err(dev, "DMA wait_for_completion interrupted\n"); >> + goto terminate; >> } >> >> - return 0; >> + if (epf_test->transfer_status == DMA_ERROR) { >> + dev_err(dev, "DMA transfer failed\n"); >> + ret = -EIO; >> + } >> + >> + WARN_ON(epf_test->transfer_status != DMA_COMPLETE); > > Why do you need this check? Even if required, WARN_ON is superfluous here. The check is needed to return -EIO if there was a problem with the transfer, because wait_for_completion_interruptible() does not notify such errors (it only notifies timeouts). And yes, the WARN can go away. Will remove it.
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index d65419735d2e..557fbb91c729 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -54,6 +54,9 @@ struct pci_epf_test { struct delayed_work cmd_handler; struct dma_chan *dma_chan_tx; struct dma_chan *dma_chan_rx; + struct dma_chan *transfer_chan; + dma_cookie_t transfer_cookie; + enum dma_status transfer_status; struct completion transfer_complete; bool dma_supported; bool dma_private; @@ -85,8 +88,14 @@ static size_t bar_size[] = { 512, 512, 1024, 16384, 131072, 1048576 }; static void pci_epf_test_dma_callback(void *param) { struct pci_epf_test *epf_test = param; - - complete(&epf_test->transfer_complete); + struct dma_tx_state state; + + epf_test->transfer_status = + dmaengine_tx_status(epf_test->transfer_chan, + epf_test->transfer_cookie, &state); + if (epf_test->transfer_status == DMA_COMPLETE || + epf_test->transfer_status == DMA_ERROR) + complete(&epf_test->transfer_complete); } /** @@ -120,7 +129,6 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test, struct dma_async_tx_descriptor *tx; struct dma_slave_config sconf = {}; struct device *dev = &epf->dev; - dma_cookie_t cookie; int ret; if (IS_ERR_OR_NULL(chan)) { @@ -152,25 +160,35 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test, } reinit_completion(&epf_test->transfer_complete); + epf_test->transfer_chan = chan; tx->callback = pci_epf_test_dma_callback; tx->callback_param = epf_test; - cookie = tx->tx_submit(tx); + epf_test->transfer_cookie = tx->tx_submit(tx); - ret = dma_submit_error(cookie); + ret = dma_submit_error(epf_test->transfer_cookie); if (ret) { - dev_err(dev, "Failed to do DMA tx_submit %d\n", cookie); - return -EIO; + dev_err(dev, "Failed to do DMA tx_submit %d\n", ret); + goto terminate; } dma_async_issue_pending(chan); ret = wait_for_completion_interruptible(&epf_test->transfer_complete); if (ret < 0) { - dmaengine_terminate_sync(chan); - dev_err(dev, "DMA wait_for_completion_timeout\n"); - return -ETIMEDOUT; + dev_err(dev, "DMA wait_for_completion interrupted\n"); + goto terminate; } - return 0; + if (epf_test->transfer_status == DMA_ERROR) { + dev_err(dev, "DMA transfer failed\n"); + ret = -EIO; + } + + WARN_ON(epf_test->transfer_status != DMA_COMPLETE); + +terminate: + dmaengine_terminate_sync(chan); + + return ret; } struct epf_dma_filter {
pci_epf_test_data_transfer() and pci_epf_test_dma_callback() are not handling DMA transfer completion correctly, leading to completion notifications to the RP side that are too early. This problem can be detected when the RP side is running an IOMMU with messages such as: pci-endpoint-test 0000:0b:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x001c address=0xfff00000 flags=0x0000] When running the pcitest.sh tests: the address used for a previous test transfer generates the above error while the next test transfer is running. Fix this by testing the dma transfer status in pci_epf_test_dma_callback() and notifying the completion only when the transfer status is DMA_COMPLETE or DMA_ERROR. Furthermore, in pci_epf_test_data_transfer(), be paranoid and check again the transfer status and always call dmaengine_terminate_sync() before returning. Fixes: 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities") Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com> --- drivers/pci/endpoint/functions/pci-epf-test.c | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-)