diff mbox

[v2,1/3] drm/prime: Iterate SG DMA addresses separately

Message ID 73ac5c65-1dbd-8711-5770-75d79389bf44@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Robin Murphy April 17, 2018, 6:22 p.m. UTC
On 17/04/18 17:29, Christian König wrote:
> Am 17.04.2018 um 17:58 schrieb Robin Murphy:
>> For dma_map_sg(), DMA API implementations are free to merge consecutive
>> segments into a single DMA mapping if conditions are suitable, thus the
>> resulting DMA addresses which drm_prime_sg_to_page_addr_arrays()
>> iterates may be packed into fewer entries than ttm->sg->nents implies.
>>
>> The current implementation does not account for this, meaning that its
>> callers either have to reject the 0 < count < nents case or risk getting
>> bogus DMA addresses beyond the first segment. Fortunately this is quite
>> easy to handle without having to rejig structures to also store the
>> mapped count, since the total DMA length should still be equal to the
>> total buffer length. All we need is a second scatterlist cursor to
>> iterate through the DMA addresses independently of the page addresses.
>>
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> 
> Reviewed-by: Christian König <christian.koenig@amd.com> for the whole 
> series.

Thanks Christian.

FWIW, the following *completely untested* hack should in theory give the 
AMD IOMMU similar segment-merging behaviour to the arm64 IOMMU DMA ops, 
if that helps widen the scope for testing/investigation (I have neither 
an AMD/ATI graphics card nor a PCIe-capable arm64 box to hand just at 
the moment).

Robin.

----->8-----
  	domain = get_domain(dev);
@@ -2535,7 +2535,28 @@ static int map_sg(struct device *dev, struct 
scatterlist *sglist,
  		s->dma_length   = s->length;
  	}

-	return nelems;
+	d = sglist;
+	max_seg = dma_get_max_seg_size(dev);
+	count = 1;
+	nelems -= 1;
+	for_each_sg(sg_next(sglist), s, nelems, i) {
+		dma_addr_t s_dma_addr = s->dma_address;
+		unsigned int s_dma_len = s->dma_length;
+
+		s->dma_address = 0;
+		s->dma_length = 0;
+		if (s_dma_addr == d->dma_address + d->dma_length &&
+		    d->dma_length + s_dma_len <= max_seg) {
+			d->dma_length += s_dma_len;
+		} else {
+			d = sg_next(d);
+			d->dma_address = s_dma_addr;
+			d->dma_length = s_dma_len;
+			count++;
+		}
+	}
+
+	return count;

  out_unmap:
  	pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n",
diff mbox

Patch

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2a99f0f14795..60b0e495b567 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2489,11 +2489,11 @@  static int map_sg(struct device *dev, struct 
scatterlist *sglist,
  		  int nelems, enum dma_data_direction direction,
  		  unsigned long attrs)
  {
-	int mapped_pages = 0, npages = 0, prot = 0, i;
+	int mapped_pages = 0, npages = 0, prot = 0, i, count;
  	struct protection_domain *domain;
  	struct dma_ops_domain *dma_dom;
-	struct scatterlist *s;
-	unsigned long address;
+	struct scatterlist *s, *d;
+	unsigned long address, max_seg;
  	u64 dma_mask;