Message ID | 20191028201032.6352-15-jgg@ziepe.ca (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Consolidate the mmu notifier interval_tree and locking | expand |
Hi Jason, I did quick test after merging amd-staging-drm-next with the mmu_notifier branch, which includes this set changes. The test result has different failures, app stuck intermittently, GUI no display etc. I am understanding the changes and will try to figure out the cause. Regards, Philip On 2019-10-28 4:10 p.m., Jason Gunthorpe wrote: > From: Jason Gunthorpe <jgg@mellanox.com> > > Convert the collision-retry lock around hmm_range_fault to use the one now > provided by the mmu_range notifier. > > Although this driver does not seem to use the collision retry lock that > hmm provides correctly, it can still be converted over to use the > mmu_range_notifier api instead of hmm_mirror without too much trouble. > > This also deletes another place where a driver is associating additional > data (struct amdgpu_mn) with a mmu_struct. > > Cc: Alex Deucher <alexander.deucher@amd.com> > Cc: Christian König <christian.koenig@amd.com> > Cc: David (ChunMing) Zhou <David1.Zhou@amd.com> > Cc: amd-gfx@lists.freedesktop.org > Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> > --- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 + > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 14 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 148 ++---------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 49 ------ > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 76 ++++----- > 5 files changed, 66 insertions(+), 225 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 47700302a08b7f..1bcedb9b477dce 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -1738,6 +1738,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, > return ret; > } > > + /* > + * FIXME: Cannot ignore the return code, must hold > + * notifier_lock > + */ > amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > > /* Mark the BO as valid unless it was invalidated > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 2e53feed40e230..76771f5f0b60ab 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -607,8 +607,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, > e->tv.num_shared = 2; > > amdgpu_bo_list_get_list(p->bo_list, &p->validated); > - if (p->bo_list->first_userptr != p->bo_list->num_entries) > - p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); > > INIT_LIST_HEAD(&duplicates); > amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); > @@ -1291,11 +1289,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > if (r) > goto error_unlock; > > - /* No memory allocation is allowed while holding the mn lock. > - * p->mn is hold until amdgpu_cs_submit is finished and fence is added > - * to BOs. > + /* No memory allocation is allowed while holding the notifier lock. > + * The lock is held until amdgpu_cs_submit is finished and fence is > + * added to BOs. > */ > - amdgpu_mn_lock(p->mn); > + mutex_lock(&p->adev->notifier_lock); > > /* If userptr are invalidated after amdgpu_cs_parser_bos(), return > * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. > @@ -1338,13 +1336,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); > > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); > - amdgpu_mn_unlock(p->mn); > + mutex_unlock(&p->adev->notifier_lock); > > return 0; > > error_abort: > drm_sched_job_cleanup(&job->base); > - amdgpu_mn_unlock(p->mn); > + mutex_unlock(&p->adev->notifier_lock); > > error_unlock: > amdgpu_job_free(job); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > index 4ffd7b90f4d907..cb718a064eb491 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > @@ -50,28 +50,6 @@ > #include "amdgpu.h" > #include "amdgpu_amdkfd.h" > > -/** > - * amdgpu_mn_lock - take the write side lock for this notifier > - * > - * @mn: our notifier > - */ > -void amdgpu_mn_lock(struct amdgpu_mn *mn) > -{ > - if (mn) > - down_write(&mn->lock); > -} > - > -/** > - * amdgpu_mn_unlock - drop the write side lock for this notifier > - * > - * @mn: our notifier > - */ > -void amdgpu_mn_unlock(struct amdgpu_mn *mn) > -{ > - if (mn) > - up_write(&mn->lock); > -} > - > /** > * amdgpu_mn_invalidate_gfx - callback to notify about mm change > * > @@ -82,12 +60,19 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) > * potentially dirty. > */ > static bool amdgpu_mn_invalidate_gfx(struct mmu_range_notifier *mrn, > - const struct mmu_notifier_range *range) > + const struct mmu_notifier_range *range, > + unsigned long cur_seq) > { > struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier); > struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > long r; > > + /* > + * FIXME: Must hold some lock shared with > + * amdgpu_ttm_tt_get_user_pages_done() > + */ > + mmu_range_set_seq(mrn, cur_seq); > + > /* FIXME: Is this necessary? */ > if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, > range->end)) > @@ -119,11 +104,18 @@ static const struct mmu_range_notifier_ops amdgpu_mn_gfx_ops = { > * evicting all user-mode queues of the process. > */ > static bool amdgpu_mn_invalidate_hsa(struct mmu_range_notifier *mrn, > - const struct mmu_notifier_range *range) > + const struct mmu_notifier_range *range, > + unsigned long cur_seq) > { > struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier); > struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > > + /* > + * FIXME: Must hold some lock shared with > + * amdgpu_ttm_tt_get_user_pages_done() > + */ > + mmu_range_set_seq(mrn, cur_seq); > + > /* FIXME: Is this necessary? */ > if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, > range->end)) > @@ -143,92 +135,6 @@ static const struct mmu_range_notifier_ops amdgpu_mn_hsa_ops = { > .invalidate = amdgpu_mn_invalidate_hsa, > }; > > -static int amdgpu_mn_sync_pagetables(struct hmm_mirror *mirror, > - const struct mmu_notifier_range *update) > -{ > - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); > - > - if (!mmu_notifier_range_blockable(update)) > - return false; > - > - down_read(&amn->lock); > - up_read(&amn->lock); > - return 0; > -} > - > -/* Low bits of any reasonable mm pointer will be unused due to struct > - * alignment. Use these bits to make a unique key from the mm pointer > - * and notifier type. > - */ > -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) > - > -static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { > - [AMDGPU_MN_TYPE_GFX] = { > - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables, > - }, > - [AMDGPU_MN_TYPE_HSA] = { > - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables, > - }, > -}; > - > -/** > - * amdgpu_mn_get - create HMM mirror context > - * > - * @adev: amdgpu device pointer > - * @type: type of MMU notifier context > - * > - * Creates a HMM mirror context for current->mm. > - */ > -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, > - enum amdgpu_mn_type type) > -{ > - struct mm_struct *mm = current->mm; > - struct amdgpu_mn *amn; > - unsigned long key = AMDGPU_MN_KEY(mm, type); > - int r; > - > - mutex_lock(&adev->mn_lock); > - if (down_write_killable(&mm->mmap_sem)) { > - mutex_unlock(&adev->mn_lock); > - return ERR_PTR(-EINTR); > - } > - > - hash_for_each_possible(adev->mn_hash, amn, node, key) > - if (AMDGPU_MN_KEY(amn->mirror.hmm->mmu_notifier.mm, > - amn->type) == key) > - goto release_locks; > - > - amn = kzalloc(sizeof(*amn), GFP_KERNEL); > - if (!amn) { > - amn = ERR_PTR(-ENOMEM); > - goto release_locks; > - } > - > - amn->adev = adev; > - init_rwsem(&amn->lock); > - amn->type = type; > - > - amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; > - r = hmm_mirror_register(&amn->mirror, mm); > - if (r) > - goto free_amn; > - > - hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type)); > - > -release_locks: > - up_write(&mm->mmap_sem); > - mutex_unlock(&adev->mn_lock); > - > - return amn; > - > -free_amn: > - up_write(&mm->mmap_sem); > - mutex_unlock(&adev->mn_lock); > - kfree(amn); > - > - return ERR_PTR(r); > -} > - > /** > * amdgpu_mn_register - register a BO for notifier updates > * > @@ -263,25 +169,3 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) > mmu_range_notifier_remove(&bo->notifier); > bo->notifier.mm = NULL; > } > - > -/* flags used by HMM internal, not related to CPU/GPU PTE flags */ > -static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { > - (1 << 0), /* HMM_PFN_VALID */ > - (1 << 1), /* HMM_PFN_WRITE */ > - 0 /* HMM_PFN_DEVICE_PRIVATE */ > -}; > - > -static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { > - 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ > - 0, /* HMM_PFN_NONE */ > - 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ > -}; > - > -void amdgpu_hmm_init_range(struct hmm_range *range) > -{ > - if (range) { > - range->flags = hmm_range_flags; > - range->values = hmm_range_values; > - range->pfn_shift = PAGE_SHIFT; > - } > -} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h > index d73ab2947b22b2..a292238f75ebae 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h > @@ -30,59 +30,10 @@ > #include <linux/workqueue.h> > #include <linux/interval_tree.h> > > -enum amdgpu_mn_type { > - AMDGPU_MN_TYPE_GFX, > - AMDGPU_MN_TYPE_HSA, > -}; > - > -/** > - * struct amdgpu_mn > - * > - * @adev: amdgpu device pointer > - * @type: type of MMU notifier > - * @work: destruction work item > - * @node: hash table node to find structure by adev and mn > - * @lock: rw semaphore protecting the notifier nodes > - * @mirror: HMM mirror function support > - * > - * Data for each amdgpu device and process address space. > - */ > -struct amdgpu_mn { > - /* constant after initialisation */ > - struct amdgpu_device *adev; > - enum amdgpu_mn_type type; > - > - /* only used on destruction */ > - struct work_struct work; > - > - /* protected by adev->mn_lock */ > - struct hlist_node node; > - > - /* objects protected by lock */ > - struct rw_semaphore lock; > - > -#ifdef CONFIG_HMM_MIRROR > - /* HMM mirror */ > - struct hmm_mirror mirror; > -#endif > -}; > - > #if defined(CONFIG_HMM_MIRROR) > -void amdgpu_mn_lock(struct amdgpu_mn *mn); > -void amdgpu_mn_unlock(struct amdgpu_mn *mn); > -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, > - enum amdgpu_mn_type type); > int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); > void amdgpu_mn_unregister(struct amdgpu_bo *bo); > -void amdgpu_hmm_init_range(struct hmm_range *range); > #else > -static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} > -static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} > -static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, > - enum amdgpu_mn_type type) > -{ > - return NULL; > -} > static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) > { > DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, " > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index c0e41f1f0c2365..65d9824b54f2a9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -773,6 +773,20 @@ struct amdgpu_ttm_tt { > #endif > }; > > +#ifdef CONFIG_DRM_AMDGPU_USERPTR > +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ > +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { > + (1 << 0), /* HMM_PFN_VALID */ > + (1 << 1), /* HMM_PFN_WRITE */ > + 0 /* HMM_PFN_DEVICE_PRIVATE */ > +}; > + > +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { > + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ > + 0, /* HMM_PFN_NONE */ > + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ > +}; > + > /** > * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user > * memory and start HMM tracking CPU page table update > @@ -780,29 +794,27 @@ struct amdgpu_ttm_tt { > * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only > * once afterwards to stop HMM tracking > */ > -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) > - > -#define MAX_RETRY_HMM_RANGE_FAULT 16 > - > int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > { > - struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL; > struct ttm_tt *ttm = bo->tbo.ttm; > struct amdgpu_ttm_tt *gtt = (void *)ttm; > struct mm_struct *mm; > + struct hmm_range *range; > unsigned long start = gtt->userptr; > struct vm_area_struct *vma; > - struct hmm_range *range; > unsigned long i; > - uint64_t *pfns; > int r = 0; > > - if (unlikely(!mirror)) { > - DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n"); > + mm = bo->notifier.mm; > + if (unlikely(!mm)) { > + DRM_DEBUG_DRIVER("BO is not registered?\n"); > return -EFAULT; > } > > - mm = mirror->hmm->mmu_notifier.mm; > + /* Another get_user_pages is running at the same time?? */ > + if (WARN_ON(gtt->range)) > + return -EFAULT; > + > if (!mmget_not_zero(mm)) /* Happens during process shutdown */ > return -ESRCH; > > @@ -811,30 +823,24 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > r = -ENOMEM; > goto out; > } > + range->notifier = &bo->notifier; > + range->flags = hmm_range_flags; > + range->values = hmm_range_values; > + range->pfn_shift = PAGE_SHIFT; > + range->start = bo->notifier.interval_tree.start; > + range->end = bo->notifier.interval_tree.last + 1; > + range->default_flags = hmm_range_flags[HMM_PFN_VALID]; > + if (!amdgpu_ttm_tt_is_readonly(ttm)) > + range->default_flags |= range->flags[HMM_PFN_WRITE]; > > - pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); > - if (unlikely(!pfns)) { > + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns), > + GFP_KERNEL); > + if (unlikely(!range->pfns)) { > r = -ENOMEM; > goto out_free_ranges; > } > > - amdgpu_hmm_init_range(range); > - range->default_flags = range->flags[HMM_PFN_VALID]; > - range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ? > - 0 : range->flags[HMM_PFN_WRITE]; > - range->pfn_flags_mask = 0; > - range->pfns = pfns; > - range->start = start; > - range->end = start + ttm->num_pages * PAGE_SIZE; > - > - hmm_range_register(range, mirror); > - > - /* > - * Just wait for range to be valid, safe to ignore return value as we > - * will use the return value of hmm_range_fault() below under the > - * mmap_sem to ascertain the validity of the range. > - */ > - hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); > + range->notifier_seq = mmu_range_read_begin(&bo->notifier); > > down_read(&mm->mmap_sem); > vma = find_vma(mm, start); > @@ -855,10 +861,10 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > goto out_free_pfns; > > for (i = 0; i < ttm->num_pages; i++) { > - pages[i] = hmm_device_entry_to_page(range, pfns[i]); > + pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); > if (unlikely(!pages[i])) { > pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", > - i, pfns[i]); > + i, range->pfns[i]); > r = -ENOMEM; > > goto out_free_pfns; > @@ -873,8 +879,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > out_unlock: > up_read(&mm->mmap_sem); > out_free_pfns: > - hmm_range_unregister(range); > - kvfree(pfns); > + kvfree(range->pfns); > out_free_ranges: > kfree(range); > out: > @@ -903,9 +908,8 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) > "No user pages to check\n"); > > if (gtt->range) { > - r = hmm_range_valid(gtt->range); > - hmm_range_unregister(gtt->range); > - > + r = mmu_range_read_retry(gtt->range->notifier, > + gtt->range->notifier_seq); > kvfree(gtt->range->pfns); > kfree(gtt->range); > gtt->range = NULL; >
On Tue, Oct 29, 2019 at 07:22:37PM +0000, Yang, Philip wrote: > Hi Jason, > > I did quick test after merging amd-staging-drm-next with the > mmu_notifier branch, which includes this set changes. The test result > has different failures, app stuck intermittently, GUI no display etc. I > am understanding the changes and will try to figure out the cause. Thanks! I'm not surprised by this given how difficult this patch was to make. Let me know if I can assist in any way Please ensure to run with lockdep enabled.. Your symptops sounds sort of like deadlocking? Regards, Jason
On 2019-10-29 3:25 p.m., Jason Gunthorpe wrote: > On Tue, Oct 29, 2019 at 07:22:37PM +0000, Yang, Philip wrote: >> Hi Jason, >> >> I did quick test after merging amd-staging-drm-next with the >> mmu_notifier branch, which includes this set changes. The test result >> has different failures, app stuck intermittently, GUI no display etc. I >> am understanding the changes and will try to figure out the cause. > > Thanks! I'm not surprised by this given how difficult this patch was > to make. Let me know if I can assist in any way > > Please ensure to run with lockdep enabled.. Your symptops sounds sort > of like deadlocking? > Hi Jason, Attached patch fix several issues in amdgpu driver, maybe you can squash this into patch 14. With this is done, patch 12, 13, 14 is Reviewed-by and Tested-by Philip Yang <philip.yang@amd.com> Regards, Philip > Regards, > Jason >
On Fri, Nov 01, 2019 at 02:44:51PM +0000, Yang, Philip wrote: > > > On 2019-10-29 3:25 p.m., Jason Gunthorpe wrote: > > On Tue, Oct 29, 2019 at 07:22:37PM +0000, Yang, Philip wrote: > >> Hi Jason, > >> > >> I did quick test after merging amd-staging-drm-next with the > >> mmu_notifier branch, which includes this set changes. The test result > >> has different failures, app stuck intermittently, GUI no display etc. I > >> am understanding the changes and will try to figure out the cause. > > > > Thanks! I'm not surprised by this given how difficult this patch was > > to make. Let me know if I can assist in any way > > > > Please ensure to run with lockdep enabled.. Your symptops sounds sort > > of like deadlocking? > > > Hi Jason, > > Attached patch fix several issues in amdgpu driver, maybe you can squash > this into patch 14. With this is done, patch 12, 13, 14 is Reviewed-by > and Tested-by Philip Yang <philip.yang@amd.com> Wow, this is great thanks! Can you clarify what the problems you found were? Was the bug the 'return !r' below? I'll also add your signed off by Here are some remarks: > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > index cb718a064eb4..c8bbd06f1009 100644 > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > @@ -67,21 +67,15 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_range_notifier *mrn, > struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > long r; > > - /* > - * FIXME: Must hold some lock shared with > - * amdgpu_ttm_tt_get_user_pages_done() > - */ > - mmu_range_set_seq(mrn, cur_seq); > + mutex_lock(&adev->notifier_lock); > > - /* FIXME: Is this necessary? */ > - if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, > - range->end)) > - return true; > + mmu_range_set_seq(mrn, cur_seq); > > - if (!mmu_notifier_range_blockable(range)) > + if (!mmu_notifier_range_blockable(range)) { > + mutex_unlock(&adev->notifier_lock); > return false; This test for range_blockable should be before mutex_lock, I can move it up Also, do you know if notifier_lock is held while calling amdgpu_ttm_tt_get_user_pages_done()? Can we add a 'lock assert held' to amdgpu_ttm_tt_get_user_pages_done()? > @@ -854,12 +853,20 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > r = -EPERM; > goto out_unlock; > } > + up_read(&mm->mmap_sem); > + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); > + > +retry: > + range->notifier_seq = mmu_range_read_begin(&bo->notifier); > > + down_read(&mm->mmap_sem); > r = hmm_range_fault(range, 0); > up_read(&mm->mmap_sem); > - > - if (unlikely(r < 0)) > + if (unlikely(r <= 0)) { > + if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) > + goto retry; > goto out_free_pfns; > + } This isn't really right, a retry loop like this needs to go all the way to mmu_range_read_retry() and done under the notifier_lock. ie mmu_range_read_retry() can fail just as likely as hmm_range_fault() can, and drivers are supposed to retry in both cases, with a single timeout. AFAICT it is a major bug that many places ignore the return code of amdgpu_ttm_tt_get_user_pages_done() ??? However, this is all pre-existing bugs, so I'm OK go ahead with this patch as modified. I advise AMD to make a followup patch .. I'll add a FIXME note to this effect. > for (i = 0; i < ttm->num_pages; i++) { > pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); > @@ -916,7 +923,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) > gtt->range = NULL; > } > > - return r; > + return !r; Ah is this the major error? hmm_range_valid() is inverted vs mmu_range_read_retry()? > } > #endif > > @@ -997,10 +1004,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) > sg_free_table(ttm->sg); > > #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) > - if (gtt->range && > - ttm->pages[0] == hmm_device_entry_to_page(gtt->range, > - gtt->range->pfns[0])) > - WARN_ONCE(1, "Missing get_user_page_done\n"); > + if (gtt->range) { > + unsigned long i; > + > + for (i = 0; i < ttm->num_pages; i++) { > + if (ttm->pages[i] != > + hmm_device_entry_to_page(gtt->range, > + gtt->range->pfns[i])) > + break; > + } > + > + WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); > + } Is this related/necessary? I can put it in another patch if it is just debugging improvement? Please advise Thanks a lot, Jason
On 2019-11-01 11:12 a.m., Jason Gunthorpe wrote: > On Fri, Nov 01, 2019 at 02:44:51PM +0000, Yang, Philip wrote: >> >> >> On 2019-10-29 3:25 p.m., Jason Gunthorpe wrote: >>> On Tue, Oct 29, 2019 at 07:22:37PM +0000, Yang, Philip wrote: >>>> Hi Jason, >>>> >>>> I did quick test after merging amd-staging-drm-next with the >>>> mmu_notifier branch, which includes this set changes. The test result >>>> has different failures, app stuck intermittently, GUI no display etc. I >>>> am understanding the changes and will try to figure out the cause. >>> >>> Thanks! I'm not surprised by this given how difficult this patch was >>> to make. Let me know if I can assist in any way >>> >>> Please ensure to run with lockdep enabled.. Your symptops sounds sort >>> of like deadlocking? >>> >> Hi Jason, >> >> Attached patch fix several issues in amdgpu driver, maybe you can squash >> this into patch 14. With this is done, patch 12, 13, 14 is Reviewed-by >> and Tested-by Philip Yang <philip.yang@amd.com> > > Wow, this is great thanks! Can you clarify what the problems you found > were? Was the bug the 'return !r' below? > Yes. return !r is critical one, and retry if hmm_range_fault return -EBUSY is needed too. > I'll also add your signed off by > > Here are some remarks: > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >> index cb718a064eb4..c8bbd06f1009 100644 >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >> @@ -67,21 +67,15 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_range_notifier *mrn, >> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); >> long r; >> >> - /* >> - * FIXME: Must hold some lock shared with >> - * amdgpu_ttm_tt_get_user_pages_done() >> - */ >> - mmu_range_set_seq(mrn, cur_seq); >> + mutex_lock(&adev->notifier_lock); >> >> - /* FIXME: Is this necessary? */ >> - if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, >> - range->end)) >> - return true; >> + mmu_range_set_seq(mrn, cur_seq); >> >> - if (!mmu_notifier_range_blockable(range)) >> + if (!mmu_notifier_range_blockable(range)) { >> + mutex_unlock(&adev->notifier_lock); >> return false; > > This test for range_blockable should be before mutex_lock, I can move > it up > yes, thanks. > Also, do you know if notifier_lock is held while calling > amdgpu_ttm_tt_get_user_pages_done()? Can we add a 'lock assert held' > to amdgpu_ttm_tt_get_user_pages_done()? > gpu side hold notifier_lock but kfd side doesn't. kfd side doesn't check amdgpu_ttm_tt_get_user_pages_done/mmu_range_read_retry return value but check mem->invalid flag which is updated from invalidate callback. It takes more time to change, I will come to another patch to fix it later. >> @@ -854,12 +853,20 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) >> r = -EPERM; >> goto out_unlock; >> } >> + up_read(&mm->mmap_sem); >> + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); >> + >> +retry: >> + range->notifier_seq = mmu_range_read_begin(&bo->notifier); >> >> + down_read(&mm->mmap_sem); >> r = hmm_range_fault(range, 0); >> up_read(&mm->mmap_sem); >> - >> - if (unlikely(r < 0)) >> + if (unlikely(r <= 0)) { >> + if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) >> + goto retry; >> goto out_free_pfns; >> + } > > This isn't really right, a retry loop like this needs to go all the > way to mmu_range_read_retry() and done under the notifier_lock. ie > mmu_range_read_retry() can fail just as likely as hmm_range_fault() > can, and drivers are supposed to retry in both cases, with a single > timeout. > For gpu, check mmu_range_read_retry return value under the notifier_lock to do retry is in seperate location, not in same retry loop. > AFAICT it is a major bug that many places ignore the return code of > amdgpu_ttm_tt_get_user_pages_done() ??? > For kfd, explained above. > However, this is all pre-existing bugs, so I'm OK go ahead with this > patch as modified. I advise AMD to make a followup patch .. > yes, I will. > I'll add a FIXME note to this effect. > >> for (i = 0; i < ttm->num_pages; i++) { >> pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); >> @@ -916,7 +923,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) >> gtt->range = NULL; >> } >> >> - return r; >> + return !r; > > Ah is this the major error? hmm_range_valid() is inverted vs > mmu_range_read_retry()? > yes. >> } >> #endif >> >> @@ -997,10 +1004,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) >> sg_free_table(ttm->sg); >> >> #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) >> - if (gtt->range && >> - ttm->pages[0] == hmm_device_entry_to_page(gtt->range, >> - gtt->range->pfns[0])) >> - WARN_ONCE(1, "Missing get_user_page_done\n"); >> + if (gtt->range) { >> + unsigned long i; >> + >> + for (i = 0; i < ttm->num_pages; i++) { >> + if (ttm->pages[i] != >> + hmm_device_entry_to_page(gtt->range, >> + gtt->range->pfns[i])) >> + break; >> + } >> + >> + WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); >> + } > > Is this related/necessary? I can put it in another patch if it is just > debugging improvement? Please advise > I see this WARN backtrace now, but I didn't see it before. This is somehow related. > Thanks a lot, > Jason >
On Fri, Nov 01, 2019 at 03:59:26PM +0000, Yang, Philip wrote: > > This test for range_blockable should be before mutex_lock, I can move > > it up > > > yes, thanks. Okay, I wrote it like this: if (mmu_notifier_range_blockable(range)) mutex_lock(&adev->notifier_lock); else if (!mutex_trylock(&adev->notifier_lock)) return false; > > Also, do you know if notifier_lock is held while calling > > amdgpu_ttm_tt_get_user_pages_done()? Can we add a 'lock assert held' > > to amdgpu_ttm_tt_get_user_pages_done()? > > gpu side hold notifier_lock but kfd side doesn't. kfd side doesn't check > amdgpu_ttm_tt_get_user_pages_done/mmu_range_read_retry return value but > check mem->invalid flag which is updated from invalidate callback. It > takes more time to change, I will come to another patch to fix it later. Ah.. confusing, OK, I'll let you sort that > > However, this is all pre-existing bugs, so I'm OK go ahead with this > > patch as modified. I advise AMD to make a followup patch .. > > > yes, I will. While you are here, this is also wrong: int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { down_read(&mm->mmap_sem); r = hmm_range_fault(range, 0); up_read(&mm->mmap_sem); if (unlikely(r <= 0)) { if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) goto retry; goto out_free_pfns; } for (i = 0; i < ttm->num_pages; i++) { pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); It is not allowed to read the results of hmm_range_fault() outside locking, and in particular, we can't convert to a struct page. This must be done inside the notifier_lock, after checking mmu_range_read_retry(), all handling of the struct page must be structured like that. > >> @@ -997,10 +1004,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) > >> sg_free_table(ttm->sg); > >> > >> #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) > >> - if (gtt->range && > >> - ttm->pages[0] == hmm_device_entry_to_page(gtt->range, > >> - gtt->range->pfns[0])) > >> - WARN_ONCE(1, "Missing get_user_page_done\n"); > >> + if (gtt->range) { > >> + unsigned long i; > >> + > >> + for (i = 0; i < ttm->num_pages; i++) { > >> + if (ttm->pages[i] != > >> + hmm_device_entry_to_page(gtt->range, > >> + gtt->range->pfns[i])) > >> + break; > >> + } > >> + > >> + WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); > >> + } > > > > Is this related/necessary? I can put it in another patch if it is just > > debugging improvement? Please advise > > > I see this WARN backtrace now, but I didn't see it before. This is > somehow related. Hm, might be instructive to learn what is going on.. Thanks, Jason
On Fri, Nov 01, 2019 at 02:44:51PM +0000, Yang, Philip wrote: > @@ -854,12 +853,20 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > r = -EPERM; > goto out_unlock; > } > + up_read(&mm->mmap_sem); > + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); > + > +retry: > + range->notifier_seq = mmu_range_read_begin(&bo->notifier); > > + down_read(&mm->mmap_sem); > r = hmm_range_fault(range, 0); > up_read(&mm->mmap_sem); > - > - if (unlikely(r < 0)) > + if (unlikely(r <= 0)) { > + if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) > + goto retry; > goto out_free_pfns; > + } I was reflecting on why this suddently became necessary, and I think what might be happening is that hmm_range_fault() is trigging invalidations as it runs (ie it is faulting in pages or something) and that in turn causes the mrn to need retry. The hmm version of this had a bug where a full invalidate_range_start/end pair would not trigger retry, so this this didn't happen. This is unfortunate as the retry is unnecessary, but at this time I can't think of a good way to separate an ignorable synchronous invalidation caused by hmm_range_fault from an async one that cannot be ignored.. A basic fix would be to not update the mrq seq in the notifier if the invalidate is triggered by hmm_range_fault, but that seems difficult to determine.. Any thoughts Jerome? Jason
On 2019-11-01 1:42 p.m., Jason Gunthorpe wrote: > On Fri, Nov 01, 2019 at 03:59:26PM +0000, Yang, Philip wrote: >>> This test for range_blockable should be before mutex_lock, I can move >>> it up >>> >> yes, thanks. > > Okay, I wrote it like this: > > if (mmu_notifier_range_blockable(range)) > mutex_lock(&adev->notifier_lock); > else if (!mutex_trylock(&adev->notifier_lock)) > return false; > >>> Also, do you know if notifier_lock is held while calling >>> amdgpu_ttm_tt_get_user_pages_done()? Can we add a 'lock assert held' >>> to amdgpu_ttm_tt_get_user_pages_done()? >> >> gpu side hold notifier_lock but kfd side doesn't. kfd side doesn't check >> amdgpu_ttm_tt_get_user_pages_done/mmu_range_read_retry return value but >> check mem->invalid flag which is updated from invalidate callback. It >> takes more time to change, I will come to another patch to fix it later. > > Ah.. confusing, OK, I'll let you sort that > >>> However, this is all pre-existing bugs, so I'm OK go ahead with this >>> patch as modified. I advise AMD to make a followup patch .. >>> >> yes, I will. > > While you are here, this is also wrong: > > int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > { > down_read(&mm->mmap_sem); > r = hmm_range_fault(range, 0); > up_read(&mm->mmap_sem); > if (unlikely(r <= 0)) { > if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) > goto retry; > goto out_free_pfns; > } > > for (i = 0; i < ttm->num_pages; i++) { > pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); > > It is not allowed to read the results of hmm_range_fault() outside > locking, and in particular, we can't convert to a struct page. > > This must be done inside the notifier_lock, after checking > mmu_range_read_retry(), all handling of the struct page must be > structured like that. > Below change will fix this, then driver will call mmu_range_read_retry second time using same range->notifier_seq to check if range is invalidated inside amdgpu_cs_submit, this looks ok for me. @@ -868,6 +869,13 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) goto out_free_pfns; } + mutex_lock(&adev->notifier_lock); + + if (mmu_range_read_retry(&bo->notifier, range->notifier_seq)) { + mutex_unlock(&adev->notifier_lock); + goto retry; + } + for (i = 0; i < ttm->num_pages; i++) { pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); if (unlikely(!pages[i])) { @@ -875,10 +883,12 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) i, range->pfns[i]); r = -ENOMEM; + mutex_unlock(&adev->notifier_lock); goto out_free_pfns; } } + mutex_unlock(&adev->notifier_lock); gtt->range = range; mmput(mm); Philip >>>> @@ -997,10 +1004,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) >>>> sg_free_table(ttm->sg); >>>> >>>> #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) >>>> - if (gtt->range && >>>> - ttm->pages[0] == hmm_device_entry_to_page(gtt->range, >>>> - gtt->range->pfns[0])) >>>> - WARN_ONCE(1, "Missing get_user_page_done\n"); >>>> + if (gtt->range) { >>>> + unsigned long i; >>>> + >>>> + for (i = 0; i < ttm->num_pages; i++) { >>>> + if (ttm->pages[i] != >>>> + hmm_device_entry_to_page(gtt->range, >>>> + gtt->range->pfns[i])) >>>> + break; >>>> + } >>>> + >>>> + WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); >>>> + } >>> >>> Is this related/necessary? I can put it in another patch if it is just >>> debugging improvement? Please advise >>> >> I see this WARN backtrace now, but I didn't see it before. This is >> somehow related. > > Hm, might be instructive to learn what is going on.. > > Thanks, > Jason >
Sorry, resend patch, the one in previous email missed couple of lines duo to copy/paste. On 2019-11-01 3:45 p.m., Yang, Philip wrote: > > > On 2019-11-01 1:42 p.m., Jason Gunthorpe wrote: >> On Fri, Nov 01, 2019 at 03:59:26PM +0000, Yang, Philip wrote: >>>> This test for range_blockable should be before mutex_lock, I can move >>>> it up >>>> >>> yes, thanks. >> >> Okay, I wrote it like this: >> >> if (mmu_notifier_range_blockable(range)) >> mutex_lock(&adev->notifier_lock); >> else if (!mutex_trylock(&adev->notifier_lock)) >> return false; >> >>>> Also, do you know if notifier_lock is held while calling >>>> amdgpu_ttm_tt_get_user_pages_done()? Can we add a 'lock assert held' >>>> to amdgpu_ttm_tt_get_user_pages_done()? >>> >>> gpu side hold notifier_lock but kfd side doesn't. kfd side doesn't check >>> amdgpu_ttm_tt_get_user_pages_done/mmu_range_read_retry return value but >>> check mem->invalid flag which is updated from invalidate callback. It >>> takes more time to change, I will come to another patch to fix it later. >> >> Ah.. confusing, OK, I'll let you sort that >> >>>> However, this is all pre-existing bugs, so I'm OK go ahead with this >>>> patch as modified. I advise AMD to make a followup patch .. >>>> >>> yes, I will. >> >> While you are here, this is also wrong: >> >> int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) >> { >> down_read(&mm->mmap_sem); >> r = hmm_range_fault(range, 0); >> up_read(&mm->mmap_sem); >> if (unlikely(r <= 0)) { >> if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) >> goto retry; >> goto out_free_pfns; >> } >> >> for (i = 0; i < ttm->num_pages; i++) { >> pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); >> >> It is not allowed to read the results of hmm_range_fault() outside >> locking, and in particular, we can't convert to a struct page. >> >> This must be done inside the notifier_lock, after checking >> mmu_range_read_retry(), all handling of the struct page must be >> structured like that. >> > Below change will fix this, then driver will call mmu_range_read_retry > second time using same range->notifier_seq to check if range is > invalidated inside amdgpu_cs_submit, this looks ok for me. > @@ -797,6 +797,7 @@ static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { */ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned long start = gtt->userptr; @@ -868,6 +869,13 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) goto out_free_pfns; } + mutex_lock(&adev->notifier_lock); + + if (mmu_range_read_retry(&bo->notifier, range->notifier_seq)) { + mutex_unlock(&adev->notifier_lock); + goto retry; + } + for (i = 0; i < ttm->num_pages; i++) { pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); if (unlikely(!pages[i])) { @@ -875,10 +883,12 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) i, range->pfns[i]); r = -ENOMEM; + mutex_unlock(&adev->notifier_lock); goto out_free_pfns; } } + mutex_unlock(&adev->notifier_lock); gtt->range = range; mmput(mm); > > Philip > >>>>> @@ -997,10 +1004,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) >>>>> sg_free_table(ttm->sg); >>>>> >>>>> #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) >>>>> - if (gtt->range && >>>>> - ttm->pages[0] == hmm_device_entry_to_page(gtt->range, >>>>> - gtt->range->pfns[0])) >>>>> - WARN_ONCE(1, "Missing get_user_page_done\n"); >>>>> + if (gtt->range) { >>>>> + unsigned long i; >>>>> + >>>>> + for (i = 0; i < ttm->num_pages; i++) { >>>>> + if (ttm->pages[i] != >>>>> + hmm_device_entry_to_page(gtt->range, >>>>> + gtt->range->pfns[i])) >>>>> + break; >>>>> + } >>>>> + >>>>> + WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); >>>>> + } >>>> >>>> Is this related/necessary? I can put it in another patch if it is just >>>> debugging improvement? Please advise >>>> >>> I see this WARN backtrace now, but I didn't see it before. This is >>> somehow related. >> >> Hm, might be instructive to learn what is going on.. >> >> Thanks, >> Jason >> > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx >
On Fri, Nov 01, 2019 at 07:45:22PM +0000, Yang, Philip wrote: > > This must be done inside the notifier_lock, after checking > > mmu_range_read_retry(), all handling of the struct page must be > > structured like that. > > > Below change will fix this, then driver will call mmu_range_read_retry > second time using same range->notifier_seq to check if range is > invalidated inside amdgpu_cs_submit, this looks ok for me. Lets defer this to some patch trying to fix it, I find it hard to follow.. > @@ -868,6 +869,13 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo > *bo, struct page **pages) > goto out_free_pfns; > } > > + mutex_lock(&adev->notifier_lock); > + > + if (mmu_range_read_retry(&bo->notifier, range->notifier_seq)) { > + mutex_unlock(&adev->notifier_lock); > + goto retry; > + } > + > for (i = 0; i < ttm->num_pages; i++) { > pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); > if (unlikely(!pages[i])) { > @@ -875,10 +883,12 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo > *bo, struct page **pages) > i, range->pfns[i]); > r = -ENOMEM; > > + mutex_unlock(&adev->notifier_lock); > goto out_free_pfns; > } > } Well, maybe? The question now is what happens to 'pages' ? With this arrangment the driver cannot touch 'pages' without also again going under the lock and checking retry. If it doesn't touch it, then lets just move this device_entry_to_page to a more appropriate place? I'd prefer it if the driver could be structured in the normal way, with a clear locked region where the page list is handled.. Jason
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 47700302a08b7f..1bcedb9b477dce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1738,6 +1738,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, return ret; } + /* + * FIXME: Cannot ignore the return code, must hold + * notifier_lock + */ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); /* Mark the BO as valid unless it was invalidated diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2e53feed40e230..76771f5f0b60ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -607,8 +607,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->tv.num_shared = 2; amdgpu_bo_list_get_list(p->bo_list, &p->validated); - if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); @@ -1291,11 +1289,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock. - * p->mn is hold until amdgpu_cs_submit is finished and fence is added - * to BOs. + /* No memory allocation is allowed while holding the notifier lock. + * The lock is held until amdgpu_cs_submit is finished and fence is + * added to BOs. */ - amdgpu_mn_lock(p->mn); + mutex_lock(&p->adev->notifier_lock); /* If userptr are invalidated after amdgpu_cs_parser_bos(), return * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. @@ -1338,13 +1336,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); return 0; error_abort: drm_sched_job_cleanup(&job->base); - amdgpu_mn_unlock(p->mn); + mutex_unlock(&p->adev->notifier_lock); error_unlock: amdgpu_job_free(job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 4ffd7b90f4d907..cb718a064eb491 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -50,28 +50,6 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" -/** - * amdgpu_mn_lock - take the write side lock for this notifier - * - * @mn: our notifier - */ -void amdgpu_mn_lock(struct amdgpu_mn *mn) -{ - if (mn) - down_write(&mn->lock); -} - -/** - * amdgpu_mn_unlock - drop the write side lock for this notifier - * - * @mn: our notifier - */ -void amdgpu_mn_unlock(struct amdgpu_mn *mn) -{ - if (mn) - up_write(&mn->lock); -} - /** * amdgpu_mn_invalidate_gfx - callback to notify about mm change * @@ -82,12 +60,19 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) * potentially dirty. */ static bool amdgpu_mn_invalidate_gfx(struct mmu_range_notifier *mrn, - const struct mmu_notifier_range *range) + const struct mmu_notifier_range *range, + unsigned long cur_seq) { struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); long r; + /* + * FIXME: Must hold some lock shared with + * amdgpu_ttm_tt_get_user_pages_done() + */ + mmu_range_set_seq(mrn, cur_seq); + /* FIXME: Is this necessary? */ if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, range->end)) @@ -119,11 +104,18 @@ static const struct mmu_range_notifier_ops amdgpu_mn_gfx_ops = { * evicting all user-mode queues of the process. */ static bool amdgpu_mn_invalidate_hsa(struct mmu_range_notifier *mrn, - const struct mmu_notifier_range *range) + const struct mmu_notifier_range *range, + unsigned long cur_seq) { struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + /* + * FIXME: Must hold some lock shared with + * amdgpu_ttm_tt_get_user_pages_done() + */ + mmu_range_set_seq(mrn, cur_seq); + /* FIXME: Is this necessary? */ if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, range->end)) @@ -143,92 +135,6 @@ static const struct mmu_range_notifier_ops amdgpu_mn_hsa_ops = { .invalidate = amdgpu_mn_invalidate_hsa, }; -static int amdgpu_mn_sync_pagetables(struct hmm_mirror *mirror, - const struct mmu_notifier_range *update) -{ - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - - if (!mmu_notifier_range_blockable(update)) - return false; - - down_read(&amn->lock); - up_read(&amn->lock); - return 0; -} - -/* Low bits of any reasonable mm pointer will be unused due to struct - * alignment. Use these bits to make a unique key from the mm pointer - * and notifier type. - */ -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) - -static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { - [AMDGPU_MN_TYPE_GFX] = { - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables, - }, - [AMDGPU_MN_TYPE_HSA] = { - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables, - }, -}; - -/** - * amdgpu_mn_get - create HMM mirror context - * - * @adev: amdgpu device pointer - * @type: type of MMU notifier context - * - * Creates a HMM mirror context for current->mm. - */ -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type) -{ - struct mm_struct *mm = current->mm; - struct amdgpu_mn *amn; - unsigned long key = AMDGPU_MN_KEY(mm, type); - int r; - - mutex_lock(&adev->mn_lock); - if (down_write_killable(&mm->mmap_sem)) { - mutex_unlock(&adev->mn_lock); - return ERR_PTR(-EINTR); - } - - hash_for_each_possible(adev->mn_hash, amn, node, key) - if (AMDGPU_MN_KEY(amn->mirror.hmm->mmu_notifier.mm, - amn->type) == key) - goto release_locks; - - amn = kzalloc(sizeof(*amn), GFP_KERNEL); - if (!amn) { - amn = ERR_PTR(-ENOMEM); - goto release_locks; - } - - amn->adev = adev; - init_rwsem(&amn->lock); - amn->type = type; - - amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; - r = hmm_mirror_register(&amn->mirror, mm); - if (r) - goto free_amn; - - hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type)); - -release_locks: - up_write(&mm->mmap_sem); - mutex_unlock(&adev->mn_lock); - - return amn; - -free_amn: - up_write(&mm->mmap_sem); - mutex_unlock(&adev->mn_lock); - kfree(amn); - - return ERR_PTR(r); -} - /** * amdgpu_mn_register - register a BO for notifier updates * @@ -263,25 +169,3 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) mmu_range_notifier_remove(&bo->notifier); bo->notifier.mm = NULL; } - -/* flags used by HMM internal, not related to CPU/GPU PTE flags */ -static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { - (1 << 0), /* HMM_PFN_VALID */ - (1 << 1), /* HMM_PFN_WRITE */ - 0 /* HMM_PFN_DEVICE_PRIVATE */ -}; - -static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { - 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ - 0, /* HMM_PFN_NONE */ - 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ -}; - -void amdgpu_hmm_init_range(struct hmm_range *range) -{ - if (range) { - range->flags = hmm_range_flags; - range->values = hmm_range_values; - range->pfn_shift = PAGE_SHIFT; - } -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index d73ab2947b22b2..a292238f75ebae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -30,59 +30,10 @@ #include <linux/workqueue.h> #include <linux/interval_tree.h> -enum amdgpu_mn_type { - AMDGPU_MN_TYPE_GFX, - AMDGPU_MN_TYPE_HSA, -}; - -/** - * struct amdgpu_mn - * - * @adev: amdgpu device pointer - * @type: type of MMU notifier - * @work: destruction work item - * @node: hash table node to find structure by adev and mn - * @lock: rw semaphore protecting the notifier nodes - * @mirror: HMM mirror function support - * - * Data for each amdgpu device and process address space. - */ -struct amdgpu_mn { - /* constant after initialisation */ - struct amdgpu_device *adev; - enum amdgpu_mn_type type; - - /* only used on destruction */ - struct work_struct work; - - /* protected by adev->mn_lock */ - struct hlist_node node; - - /* objects protected by lock */ - struct rw_semaphore lock; - -#ifdef CONFIG_HMM_MIRROR - /* HMM mirror */ - struct hmm_mirror mirror; -#endif -}; - #if defined(CONFIG_HMM_MIRROR) -void amdgpu_mn_lock(struct amdgpu_mn *mn); -void amdgpu_mn_unlock(struct amdgpu_mn *mn); -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); -void amdgpu_hmm_init_range(struct hmm_range *range); #else -static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} -static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} -static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, - enum amdgpu_mn_type type) -{ - return NULL; -} static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, " diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c0e41f1f0c2365..65d9824b54f2a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -773,6 +773,20 @@ struct amdgpu_ttm_tt { #endif }; +#ifdef CONFIG_DRM_AMDGPU_USERPTR +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; + /** * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user * memory and start HMM tracking CPU page table update @@ -780,29 +794,27 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - -#define MAX_RETRY_HMM_RANGE_FAULT 16 - int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { - struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL; struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm; + struct hmm_range *range; unsigned long start = gtt->userptr; struct vm_area_struct *vma; - struct hmm_range *range; unsigned long i; - uint64_t *pfns; int r = 0; - if (unlikely(!mirror)) { - DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n"); + mm = bo->notifier.mm; + if (unlikely(!mm)) { + DRM_DEBUG_DRIVER("BO is not registered?\n"); return -EFAULT; } - mm = mirror->hmm->mmu_notifier.mm; + /* Another get_user_pages is running at the same time?? */ + if (WARN_ON(gtt->range)) + return -EFAULT; + if (!mmget_not_zero(mm)) /* Happens during process shutdown */ return -ESRCH; @@ -811,30 +823,24 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) r = -ENOMEM; goto out; } + range->notifier = &bo->notifier; + range->flags = hmm_range_flags; + range->values = hmm_range_values; + range->pfn_shift = PAGE_SHIFT; + range->start = bo->notifier.interval_tree.start; + range->end = bo->notifier.interval_tree.last + 1; + range->default_flags = hmm_range_flags[HMM_PFN_VALID]; + if (!amdgpu_ttm_tt_is_readonly(ttm)) + range->default_flags |= range->flags[HMM_PFN_WRITE]; - pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); - if (unlikely(!pfns)) { + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns), + GFP_KERNEL); + if (unlikely(!range->pfns)) { r = -ENOMEM; goto out_free_ranges; } - amdgpu_hmm_init_range(range); - range->default_flags = range->flags[HMM_PFN_VALID]; - range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ? - 0 : range->flags[HMM_PFN_WRITE]; - range->pfn_flags_mask = 0; - range->pfns = pfns; - range->start = start; - range->end = start + ttm->num_pages * PAGE_SIZE; - - hmm_range_register(range, mirror); - - /* - * Just wait for range to be valid, safe to ignore return value as we - * will use the return value of hmm_range_fault() below under the - * mmap_sem to ascertain the validity of the range. - */ - hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); + range->notifier_seq = mmu_range_read_begin(&bo->notifier); down_read(&mm->mmap_sem); vma = find_vma(mm, start); @@ -855,10 +861,10 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) goto out_free_pfns; for (i = 0; i < ttm->num_pages; i++) { - pages[i] = hmm_device_entry_to_page(range, pfns[i]); + pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); if (unlikely(!pages[i])) { pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", - i, pfns[i]); + i, range->pfns[i]); r = -ENOMEM; goto out_free_pfns; @@ -873,8 +879,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) out_unlock: up_read(&mm->mmap_sem); out_free_pfns: - hmm_range_unregister(range); - kvfree(pfns); + kvfree(range->pfns); out_free_ranges: kfree(range); out: @@ -903,9 +908,8 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) "No user pages to check\n"); if (gtt->range) { - r = hmm_range_valid(gtt->range); - hmm_range_unregister(gtt->range); - + r = mmu_range_read_retry(gtt->range->notifier, + gtt->range->notifier_seq); kvfree(gtt->range->pfns); kfree(gtt->range); gtt->range = NULL;