[v3,8/8] nouveau/svm: Implement atomic SVM access

Message ID	20210226071832.31547-9-apopple@nvidia.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=kY6J=H4=kvack.org=owner-linux-mm@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org A367E64EDB TLS: TLSv1.2, AES256-SHA) id <B6038a0f60000>; Thu, 25 Feb 2021 23:19:18 -0800 From: Alistair Popple <apopple@nvidia.com> To: <linux-mm@kvack.org>, <nouveau@lists.freedesktop.org>, <bskeggs@redhat.com>, <akpm@linux-foundation.org> CC: <linux-doc@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <dri-devel@lists.freedesktop.org>, <jhubbard@nvidia.com>, <rcampbell@nvidia.com>, <jglisse@redhat.com>, <jgg@nvidia.com>, <hch@infradead.org>, <daniel@ffwll.ch>, Alistair Popple <apopple@nvidia.com> Subject: [PATCH v3 8/8] nouveau/svm: Implement atomic SVM access Date: Fri, 26 Feb 2021 18:18:32 +1100 Message-ID: <20210226071832.31547-9-apopple@nvidia.com> In-Reply-To: <20210226071832.31547-1-apopple@nvidia.com> References: <20210226071832.31547-1-apopple@nvidia.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain Received-SPF: none (nvidia.com>: No applicable sender policy available) receiver=imf03; identity=mailfrom; envelope-from="<apopple@nvidia.com>"; helo=hqnvemgate24.nvidia.com; client-ip=216.228.121.143 Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	Add support for SVM atomics in Nouveau \| expand [v3,0/8] Add support for SVM atomics in Nouveau [v3,1/8] mm: Remove special swap entry functions [v3,2/8] mm/swapops: Rework swap entry manipulation code [v3,3/8] mm/rmap: Split try_to_munlock from try_to_unmap [v3,4/8] mm/rmap: Split migration into its own function [v3,5/8] mm: Device exclusive memory access [v3,6/8] mm: Selftests for exclusive device memory [v3,7/8] nouveau/svm: Refactor nouveau_range_fault [v3,8/8] nouveau/svm: Implement atomic SVM access

diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h b/drivers/gpu/drm/nouveau/include/nvif/if000c.h index d6dd40f21eed..9c7ff56831c5 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/if000c.h +++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h @@ -77,6 +77,7 @@ struct nvif_vmm_pfnmap_v0 { #define NVIF_VMM_PFNMAP_V0_APER 0x00000000000000f0ULL #define NVIF_VMM_PFNMAP_V0_HOST 0x0000000000000000ULL #define NVIF_VMM_PFNMAP_V0_VRAM 0x0000000000000010ULL +#define NVIF_VMM_PFNMAP_V0_A 0x0000000000000004ULL #define NVIF_VMM_PFNMAP_V0_W 0x0000000000000002ULL #define NVIF_VMM_PFNMAP_V0_V 0x0000000000000001ULL #define NVIF_VMM_PFNMAP_V0_NONE 0x0000000000000000ULL diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index cd7b47c946cf..630c4a7bcb55 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -35,6 +35,7 @@ #include <linux/sched/mm.h> #include <linux/sort.h> #include <linux/hmm.h> +#include <linux/rmap.h> struct nouveau_svm { struct nouveau_drm *drm; @@ -421,9 +422,9 @@ nouveau_svm_fault_cmp(const void *a, const void *b) return ret; if ((ret = (s64)fa->addr - fb->addr)) return ret; - /*XXX: atomic? */ - return (fa->access == 0 || fa->access == 3) - - (fb->access == 0 || fb->access == 3); + /* Atomic access (2) has highest priority */ + return (-1*(fa->access == 2) + (fa->access == 0 || fa->access == 3)) - + (-1*(fb->access == 2) + (fb->access == 0 || fb->access == 3)); } static void @@ -555,10 +556,58 @@ static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm, args->p.phys[0] |= NVIF_VMM_PFNMAP_V0_W; } +static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, + struct nouveau_drm *drm, + struct nouveau_pfnmap_args *args, u32 size, + unsigned long hmm_flags, struct mm_struct *mm) +{ + struct page *page; + unsigned long start = args->p.addr; + struct vm_area_struct *vma; + int ret = 0; + + mmap_read_lock(mm); + vma = find_vma_intersection(mm, start, start + size); + if (!vma || !(vma->vm_flags & VM_WRITE)) { + ret = -EPERM; + goto out; + } + + make_device_exclusive_range(mm, start, start + PAGE_SIZE, &page); + if (!page) { + ret = -EINVAL; + goto out; + } + + /* Map the page on the GPU. */ + args->p.page = 12; + args->p.size = PAGE_SIZE; + args->p.addr = start; + args->p.phys[0] = page_to_phys(page) | + NVIF_VMM_PFNMAP_V0_V | + NVIF_VMM_PFNMAP_V0_W | + NVIF_VMM_PFNMAP_V0_A | + NVIF_VMM_PFNMAP_V0_HOST; + + mutex_lock(&svmm->mutex); + svmm->vmm->vmm.object.client->super = true; + ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); + svmm->vmm->vmm.object.client->super = false; + mutex_unlock(&svmm->mutex); + + set_page_dirty(page); + unlock_page(page); + put_page(page); + +out: + mmap_read_unlock(mm); + return ret; +} + static int nouveau_range_fault(struct nouveau_svmm *svmm, struct nouveau_drm *drm, struct nouveau_pfnmap_args *args, u32 size, - unsigned long hmm_flags, + unsigned long hmm_flags, int atomic, struct svm_notifier *notifier) { unsigned long timeout = @@ -608,12 +657,18 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm, break; } - nouveau_hmm_convert_pfn(drm, &range, args); + if (atomic) { + mutex_unlock(&svmm->mutex); + ret = nouveau_atomic_range_fault(svmm, drm, args, + size, hmm_flags, mm); + } else { + nouveau_hmm_convert_pfn(drm, &range, args); - svmm->vmm->vmm.object.client->super = true; - ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); - svmm->vmm->vmm.object.client->super = false; - mutex_unlock(&svmm->mutex); + svmm->vmm->vmm.object.client->super = true; + ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); + svmm->vmm->vmm.object.client->super = false; + mutex_unlock(&svmm->mutex); + } out: mmu_interval_notifier_remove(&notifier->notifier); @@ -637,7 +692,7 @@ nouveau_svm_fault(struct nvif_notify *notify) unsigned long hmm_flags; u64 inst, start, limit; int fi, fn; - int replay = 0, ret; + int replay = 0, atomic = 0, ret; /* Parse available fault buffer entries into a cache, and update * the GET pointer so HW can reuse the entries. @@ -718,12 +773,15 @@ nouveau_svm_fault(struct nvif_notify *notify) /* * Determine required permissions based on GPU fault * access flags. - * XXX: atomic? */ switch (buffer->fault[fi]->access) { case 0: /* READ. */ hmm_flags = HMM_PFN_REQ_FAULT; break; + case 2: /* ATOMIC. */ + hmm_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE; + atomic = true; + break; case 3: /* PREFETCH. */ hmm_flags = 0; break; @@ -740,7 +798,7 @@ nouveau_svm_fault(struct nvif_notify *notify) notifier.svmm = svmm; ret = nouveau_range_fault(svmm, svm->drm, &args.i, - sizeof(args), hmm_flags, &notifier); + sizeof(args), hmm_flags, atomic, &notifier); mmput(mm); limit = args.i.p.addr + args.i.p.size; @@ -760,7 +818,11 @@ nouveau_svm_fault(struct nvif_notify *notify) !(args.phys[0] & NVIF_VMM_PFNMAP_V0_V)) || (buffer->fault[fi]->access != 0 /* READ. */ && buffer->fault[fi]->access != 3 /* PREFETCH. */ && - !(args.phys[0] & NVIF_VMM_PFNMAP_V0_W))) + !(args.phys[0] & NVIF_VMM_PFNMAP_V0_W)) || + (buffer->fault[fi]->access != 0 /* READ. */ && + buffer->fault[fi]->access != 1 /* WRITE. */ && + buffer->fault[fi]->access != 3 /* PREFETCH. */ && + !(args.phys[0] & NVIF_VMM_PFNMAP_V0_A))) break; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index a2b179568970..f6188aa9171c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -178,6 +178,7 @@ void nvkm_vmm_unmap_region(struct nvkm_vmm *, struct nvkm_vma *); #define NVKM_VMM_PFN_APER 0x00000000000000f0ULL #define NVKM_VMM_PFN_HOST 0x0000000000000000ULL #define NVKM_VMM_PFN_VRAM 0x0000000000000010ULL +#define NVKM_VMM_PFN_A 0x0000000000000004ULL #define NVKM_VMM_PFN_W 0x0000000000000002ULL #define NVKM_VMM_PFN_V 0x0000000000000001ULL #define NVKM_VMM_PFN_NONE 0x0000000000000000ULL diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index 236db5570771..f02abd9cb4dd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -88,6 +88,9 @@ gp100_vmm_pgt_pfn(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, if (!(*map->pfn & NVKM_VMM_PFN_W)) data |= BIT_ULL(6); /* RO. */ + if (!(*map->pfn & NVKM_VMM_PFN_A)) + data |= BIT_ULL(7); /* Atomic disable. */ + if (!(*map->pfn & NVKM_VMM_PFN_VRAM)) { addr = *map->pfn >> NVKM_VMM_PFN_ADDR_SHIFT; addr = dma_map_page(dev, pfn_to_page(addr), 0, @@ -322,6 +325,9 @@ gp100_vmm_pd0_pfn(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, if (!(*map->pfn & NVKM_VMM_PFN_W)) data |= BIT_ULL(6); /* RO. */ + if (!(*map->pfn & NVKM_VMM_PFN_A)) + data |= BIT_ULL(7); /* Atomic disable. */ + if (!(*map->pfn & NVKM_VMM_PFN_VRAM)) { addr = *map->pfn >> NVKM_VMM_PFN_ADDR_SHIFT; addr = dma_map_page(dev, pfn_to_page(addr), 0,

[v3,8/8] nouveau/svm: Implement atomic SVM access

Commit Message

Patch