Message ID | 20191028201032.6352-14-jgg@ziepe.ca (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Jason Gunthorpe |
Headers | show |
Series | Consolidate the mmu notifier interval_tree and locking | expand |
Am 28.10.19 um 21:10 schrieb Jason Gunthorpe: > From: Jason Gunthorpe <jgg@mellanox.com> > > Remove the interval tree in the driver and rely on the tree maintained by > the mmu_notifier for delivering mmu_notifier invalidation callbacks. > > For some reason amdgpu has a very complicated arrangement where it tries > to prevent duplicate entries in the interval_tree, this is not necessary, > each amdgpu_bo can be its own stand alone entry. interval_tree already > allows duplicates and overlaps in the tree. > > Also, there is no need to remove entries upon a release callback, the > mmu_range API safely allows objects to remain registered beyond the > lifetime of the mm. The driver only has to stop touching the pages during > release. > > Cc: Alex Deucher <alexander.deucher@amd.com> > Cc: Christian König <christian.koenig@amd.com> > Cc: David (ChunMing) Zhou <David1.Zhou@amd.com> > Cc: amd-gfx@lists.freedesktop.org > Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 341 ++++-------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 - > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 13 +- > 6 files changed, 84 insertions(+), 282 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index bd37df5dd6d048..60591a5d420021 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1006,6 +1006,8 @@ struct amdgpu_device { > struct mutex lock_reset; > struct amdgpu_doorbell_index doorbell_index; > > + struct mutex notifier_lock; > + > int asic_reset_res; > struct work_struct xgmi_reset_work; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 6d021ecc8d598f..47700302a08b7f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -481,8 +481,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, > * > * Returns 0 for success, negative errno for errors. > */ > -static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, > - uint64_t user_addr) > +static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) > { > struct amdkfd_process_info *process_info = mem->process_info; > struct amdgpu_bo *bo = mem->bo; > @@ -1195,7 +1194,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); > > if (user_addr) { > - ret = init_user_pages(*mem, current->mm, user_addr); > + ret = init_user_pages(*mem, user_addr); > if (ret) > goto allocate_init_user_pages_failed; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 5a1939dbd4e3e6..38f97998aaddb2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -2633,6 +2633,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, > mutex_init(&adev->virt.vf_errors.lock); > hash_init(adev->mn_hash); > mutex_init(&adev->lock_reset); > + mutex_init(&adev->notifier_lock); > mutex_init(&adev->virt.dpm_mutex); > mutex_init(&adev->psp.mutex); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > index 31d4deb5d29484..4ffd7b90f4d907 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > @@ -50,66 +50,6 @@ > #include "amdgpu.h" > #include "amdgpu_amdkfd.h" > > -/** > - * struct amdgpu_mn_node > - * > - * @it: interval node defining start-last of the affected address range > - * @bos: list of all BOs in the affected address range > - * > - * Manages all BOs which are affected of a certain range of address space. > - */ > -struct amdgpu_mn_node { > - struct interval_tree_node it; > - struct list_head bos; > -}; > - > -/** > - * amdgpu_mn_destroy - destroy the HMM mirror > - * > - * @work: previously sheduled work item > - * > - * Lazy destroys the notifier from a work item > - */ > -static void amdgpu_mn_destroy(struct work_struct *work) > -{ > - struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); > - struct amdgpu_device *adev = amn->adev; > - struct amdgpu_mn_node *node, *next_node; > - struct amdgpu_bo *bo, *next_bo; > - > - mutex_lock(&adev->mn_lock); > - down_write(&amn->lock); > - hash_del(&amn->node); > - rbtree_postorder_for_each_entry_safe(node, next_node, > - &amn->objects.rb_root, it.rb) { > - list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { > - bo->mn = NULL; > - list_del_init(&bo->mn_list); > - } > - kfree(node); > - } > - up_write(&amn->lock); > - mutex_unlock(&adev->mn_lock); > - > - hmm_mirror_unregister(&amn->mirror); > - kfree(amn); > -} > - > -/** > - * amdgpu_hmm_mirror_release - callback to notify about mm destruction > - * > - * @mirror: the HMM mirror (mm) this callback is about > - * > - * Shedule a work item to lazy destroy HMM mirror. > - */ > -static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) > -{ > - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); > - > - INIT_WORK(&amn->work, amdgpu_mn_destroy); > - schedule_work(&amn->work); > -} > - > /** > * amdgpu_mn_lock - take the write side lock for this notifier > * > @@ -133,157 +73,86 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) > } > > /** > - * amdgpu_mn_read_lock - take the read side lock for this notifier > - * > - * @amn: our notifier > - */ > -static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) > -{ > - if (blockable) > - down_read(&amn->lock); > - else if (!down_read_trylock(&amn->lock)) > - return -EAGAIN; > - > - return 0; > -} > - > -/** > - * amdgpu_mn_read_unlock - drop the read side lock for this notifier > - * > - * @amn: our notifier > - */ > -static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) > -{ > - up_read(&amn->lock); > -} > - > -/** > - * amdgpu_mn_invalidate_node - unmap all BOs of a node > + * amdgpu_mn_invalidate_gfx - callback to notify about mm change > * > - * @node: the node with the BOs to unmap > - * @start: start of address range affected > - * @end: end of address range affected > + * @mrn: the range (mm) is about to update > + * @range: details on the invalidation > * > * Block for operations on BOs to finish and mark pages as accessed and > * potentially dirty. > */ > -static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, > - unsigned long start, > - unsigned long end) > +static bool amdgpu_mn_invalidate_gfx(struct mmu_range_notifier *mrn, > + const struct mmu_notifier_range *range) > { > - struct amdgpu_bo *bo; > + struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier); > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > long r; > > - list_for_each_entry(bo, &node->bos, mn_list) { > - > - if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) > - continue; > - > - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, > - true, false, MAX_SCHEDULE_TIMEOUT); > - if (r <= 0) > - DRM_ERROR("(%ld) failed to wait for user bo\n", r); > - } > + /* FIXME: Is this necessary? */ Most likely not. Christian. > + if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, > + range->end)) > + return true; > + > + if (!mmu_notifier_range_blockable(range)) > + return false; > + > + mutex_lock(&adev->notifier_lock); > + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, > + MAX_SCHEDULE_TIMEOUT); > + mutex_unlock(&adev->notifier_lock); > + if (r <= 0) > + DRM_ERROR("(%ld) failed to wait for user bo\n", r); > + return true; > } > > +static const struct mmu_range_notifier_ops amdgpu_mn_gfx_ops = { > + .invalidate = amdgpu_mn_invalidate_gfx, > +}; > + > /** > - * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change > + * amdgpu_mn_invalidate_hsa - callback to notify about mm change > * > - * @mirror: the hmm_mirror (mm) is about to update > - * @update: the update start, end address > + * @mrn: the range (mm) is about to update > + * @range: details on the invalidation > * > - * Block for operations on BOs to finish and mark pages as accessed and > - * potentially dirty. > + * We temporarily evict the BO attached to this range. This necessitates > + * evicting all user-mode queues of the process. > */ > -static int > -amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, > - const struct mmu_notifier_range *update) > +static bool amdgpu_mn_invalidate_hsa(struct mmu_range_notifier *mrn, > + const struct mmu_notifier_range *range) > { > - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); > - unsigned long start = update->start; > - unsigned long end = update->end; > - bool blockable = mmu_notifier_range_blockable(update); > - struct interval_tree_node *it; > - > - /* notification is exclusive, but interval is inclusive */ > - end -= 1; > - > - /* TODO we should be able to split locking for interval tree and > - * amdgpu_mn_invalidate_node > - */ > - if (amdgpu_mn_read_lock(amn, blockable)) > - return -EAGAIN; > - > - it = interval_tree_iter_first(&amn->objects, start, end); > - while (it) { > - struct amdgpu_mn_node *node; > - > - if (!blockable) { > - amdgpu_mn_read_unlock(amn); > - return -EAGAIN; > - } > + struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier); > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > > - node = container_of(it, struct amdgpu_mn_node, it); > - it = interval_tree_iter_next(it, start, end); > + /* FIXME: Is this necessary? */ > + if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, > + range->end)) > + return true; > > - amdgpu_mn_invalidate_node(node, start, end); > - } > + if (!mmu_notifier_range_blockable(range)) > + return false; > > - amdgpu_mn_read_unlock(amn); > + mutex_lock(&adev->notifier_lock); > + amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm); > + mutex_unlock(&adev->notifier_lock); > > - return 0; > + return true; > } > > -/** > - * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change > - * > - * @mirror: the hmm_mirror (mm) is about to update > - * @update: the update start, end address > - * > - * We temporarily evict all BOs between start and end. This > - * necessitates evicting all user-mode queues of the process. The BOs > - * are restorted in amdgpu_mn_invalidate_range_end_hsa. > - */ > -static int > -amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, > - const struct mmu_notifier_range *update) > +static const struct mmu_range_notifier_ops amdgpu_mn_hsa_ops = { > + .invalidate = amdgpu_mn_invalidate_hsa, > +}; > + > +static int amdgpu_mn_sync_pagetables(struct hmm_mirror *mirror, > + const struct mmu_notifier_range *update) > { > struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); > - unsigned long start = update->start; > - unsigned long end = update->end; > - bool blockable = mmu_notifier_range_blockable(update); > - struct interval_tree_node *it; > > - /* notification is exclusive, but interval is inclusive */ > - end -= 1; > - > - if (amdgpu_mn_read_lock(amn, blockable)) > - return -EAGAIN; > - > - it = interval_tree_iter_first(&amn->objects, start, end); > - while (it) { > - struct amdgpu_mn_node *node; > - struct amdgpu_bo *bo; > - > - if (!blockable) { > - amdgpu_mn_read_unlock(amn); > - return -EAGAIN; > - } > - > - node = container_of(it, struct amdgpu_mn_node, it); > - it = interval_tree_iter_next(it, start, end); > - > - list_for_each_entry(bo, &node->bos, mn_list) { > - struct kgd_mem *mem = bo->kfd_bo; > - > - if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, > - start, end)) > - amdgpu_amdkfd_evict_userptr(mem, amn->mm); > - } > - } > - > - amdgpu_mn_read_unlock(amn); > + if (!mmu_notifier_range_blockable(update)) > + return false; > > + down_read(&amn->lock); > + up_read(&amn->lock); > return 0; > } > > @@ -295,12 +164,10 @@ amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, > > static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { > [AMDGPU_MN_TYPE_GFX] = { > - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx, > - .release = amdgpu_hmm_mirror_release > + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables, > }, > [AMDGPU_MN_TYPE_HSA] = { > - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa, > - .release = amdgpu_hmm_mirror_release > + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables, > }, > }; > > @@ -327,7 +194,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, > } > > hash_for_each_possible(adev->mn_hash, amn, node, key) > - if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) > + if (AMDGPU_MN_KEY(amn->mirror.hmm->mmu_notifier.mm, > + amn->type) == key) > goto release_locks; > > amn = kzalloc(sizeof(*amn), GFP_KERNEL); > @@ -337,10 +205,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, > } > > amn->adev = adev; > - amn->mm = mm; > init_rwsem(&amn->lock); > amn->type = type; > - amn->objects = RB_ROOT_CACHED; > > amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; > r = hmm_mirror_register(&amn->mirror, mm); > @@ -369,100 +235,33 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, > * @bo: amdgpu buffer object > * @addr: userptr addr we should monitor > * > - * Registers an HMM mirror for the given BO at the specified address. > + * Registers a mmu_notifier for the given BO at the specified address. > * Returns 0 on success, -ERRNO if anything goes wrong. > */ > int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) > { > - unsigned long end = addr + amdgpu_bo_size(bo) - 1; > - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > - enum amdgpu_mn_type type = > - bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; > - struct amdgpu_mn *amn; > - struct amdgpu_mn_node *node = NULL, *new_node; > - struct list_head bos; > - struct interval_tree_node *it; > - > - amn = amdgpu_mn_get(adev, type); > - if (IS_ERR(amn)) > - return PTR_ERR(amn); > - > - new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); > - if (!new_node) > - return -ENOMEM; > - > - INIT_LIST_HEAD(&bos); > - > - down_write(&amn->lock); > - > - while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { > - kfree(node); > - node = container_of(it, struct amdgpu_mn_node, it); > - interval_tree_remove(&node->it, &amn->objects); > - addr = min(it->start, addr); > - end = max(it->last, end); > - list_splice(&node->bos, &bos); > - } > - > - if (!node) > - node = new_node; > + if (bo->kfd_bo) > + bo->notifier.ops = &amdgpu_mn_hsa_ops; > else > - kfree(new_node); > - > - bo->mn = amn; > - > - node->it.start = addr; > - node->it.last = end; > - INIT_LIST_HEAD(&node->bos); > - list_splice(&bos, &node->bos); > - list_add(&bo->mn_list, &node->bos); > + bo->notifier.ops = &amdgpu_mn_gfx_ops; > > - interval_tree_insert(&node->it, &amn->objects); > - > - up_write(&amn->lock); > - > - return 0; > + return mmu_range_notifier_insert(&bo->notifier, addr, > + amdgpu_bo_size(bo), current->mm); > } > > /** > - * amdgpu_mn_unregister - unregister a BO for HMM mirror updates > + * amdgpu_mn_unregister - unregister a BO for notifier updates > * > * @bo: amdgpu buffer object > * > - * Remove any registration of HMM mirror updates from the buffer object. > + * Remove any registration of mmu notifier updates from the buffer object. > */ > void amdgpu_mn_unregister(struct amdgpu_bo *bo) > { > - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > - struct amdgpu_mn *amn; > - struct list_head *head; > - > - mutex_lock(&adev->mn_lock); > - > - amn = bo->mn; > - if (amn == NULL) { > - mutex_unlock(&adev->mn_lock); > + if (!bo->notifier.mm) > return; > - } > - > - down_write(&amn->lock); > - > - /* save the next list entry for later */ > - head = bo->mn_list.next; > - > - bo->mn = NULL; > - list_del_init(&bo->mn_list); > - > - if (list_empty(head)) { > - struct amdgpu_mn_node *node; > - > - node = container_of(head, struct amdgpu_mn_node, bos); > - interval_tree_remove(&node->it, &amn->objects); > - kfree(node); > - } > - > - up_write(&amn->lock); > - mutex_unlock(&adev->mn_lock); > + mmu_range_notifier_remove(&bo->notifier); > + bo->notifier.mm = NULL; > } > > /* flags used by HMM internal, not related to CPU/GPU PTE flags */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h > index b8ed68943625c2..d73ab2947b22b2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h > @@ -39,12 +39,10 @@ enum amdgpu_mn_type { > * struct amdgpu_mn > * > * @adev: amdgpu device pointer > - * @mm: process address space > * @type: type of MMU notifier > * @work: destruction work item > * @node: hash table node to find structure by adev and mn > * @lock: rw semaphore protecting the notifier nodes > - * @objects: interval tree containing amdgpu_mn_nodes > * @mirror: HMM mirror function support > * > * Data for each amdgpu device and process address space. > @@ -52,7 +50,6 @@ enum amdgpu_mn_type { > struct amdgpu_mn { > /* constant after initialisation */ > struct amdgpu_device *adev; > - struct mm_struct *mm; > enum amdgpu_mn_type type; > > /* only used on destruction */ > @@ -63,7 +60,6 @@ struct amdgpu_mn { > > /* objects protected by lock */ > struct rw_semaphore lock; > - struct rb_root_cached objects; > > #ifdef CONFIG_HMM_MIRROR > /* HMM mirror */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > index 658f4c9779b704..4b44ab850f94c2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > @@ -30,6 +30,9 @@ > > #include <drm/amdgpu_drm.h> > #include "amdgpu.h" > +#ifdef CONFIG_MMU_NOTIFIER > +#include <linux/mmu_notifier.h> > +#endif > > #define AMDGPU_BO_INVALID_OFFSET LONG_MAX > #define AMDGPU_BO_MAX_PLACEMENTS 3 > @@ -100,10 +103,12 @@ struct amdgpu_bo { > struct ttm_bo_kmap_obj dma_buf_vmap; > struct amdgpu_mn *mn; > > - union { > - struct list_head mn_list; > - struct list_head shadow_list; > - }; > + > +#ifdef CONFIG_MMU_NOTIFIER > + struct mmu_range_notifier notifier; > +#endif > + > + struct list_head shadow_list; > > struct kgd_mem *kfd_bo; > };
On Tue, Oct 29, 2019 at 07:51:30AM +0000, Koenig, Christian wrote: > > +static bool amdgpu_mn_invalidate_gfx(struct mmu_range_notifier *mrn, > > + const struct mmu_notifier_range *range) > > { > > - struct amdgpu_bo *bo; > > + struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier); > > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > > long r; > > > > - list_for_each_entry(bo, &node->bos, mn_list) { > > - > > - if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) > > - continue; > > - > > - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, > > - true, false, MAX_SCHEDULE_TIMEOUT); > > - if (r <= 0) > > - DRM_ERROR("(%ld) failed to wait for user bo\n", r); > > - } > > + /* FIXME: Is this necessary? */ > > Most likely not. > > Christian. > > > + if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, > > + range->end)) > > + return true; So is the bo->tbo.mem.num_pages == bo->tbo.ttm.num_pages always? And userptr can't be zero here, or at least it doesn't matter if it is? > > +static bool amdgpu_mn_invalidate_hsa(struct mmu_range_notifier *mrn, > > + const struct mmu_notifier_range *range) > > { > > - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); > > - unsigned long start = update->start; > > - unsigned long end = update->end; > > - bool blockable = mmu_notifier_range_blockable(update); > > - struct interval_tree_node *it; > > - > > - /* notification is exclusive, but interval is inclusive */ > > - end -= 1; > > - > > - /* TODO we should be able to split locking for interval tree and > > - * amdgpu_mn_invalidate_node > > - */ > > - if (amdgpu_mn_read_lock(amn, blockable)) > > - return -EAGAIN; > > - > > - it = interval_tree_iter_first(&amn->objects, start, end); > > - while (it) { > > - struct amdgpu_mn_node *node; > > - > > - if (!blockable) { > > - amdgpu_mn_read_unlock(amn); > > - return -EAGAIN; > > - } > > + struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier); > > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > > > > - node = container_of(it, struct amdgpu_mn_node, it); > > - it = interval_tree_iter_next(it, start, end); > > + /* FIXME: Is this necessary? */ > > + if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, > > + range->end)) > > + return true; > > > > - amdgpu_mn_invalidate_node(node, start, end); > > - } This one too right? Jason
On 2019-10-28 4:10 p.m., Jason Gunthorpe wrote: > From: Jason Gunthorpe <jgg@mellanox.com> > > Remove the interval tree in the driver and rely on the tree maintained by > the mmu_notifier for delivering mmu_notifier invalidation callbacks. > > For some reason amdgpu has a very complicated arrangement where it tries > to prevent duplicate entries in the interval_tree, this is not necessary, > each amdgpu_bo can be its own stand alone entry. interval_tree already > allows duplicates and overlaps in the tree. > > Also, there is no need to remove entries upon a release callback, the > mmu_range API safely allows objects to remain registered beyond the > lifetime of the mm. The driver only has to stop touching the pages during > release. > > Cc: Alex Deucher <alexander.deucher@amd.com> > Cc: Christian König <christian.koenig@amd.com> > Cc: David (ChunMing) Zhou <David1.Zhou@amd.com> > Cc: amd-gfx@lists.freedesktop.org > Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 341 ++++-------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 - > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 13 +- > 6 files changed, 84 insertions(+), 282 deletions(-) [snip] > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > index 31d4deb5d29484..4ffd7b90f4d907 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c [snip] > @@ -50,66 +50,6 @@ > #include "amdgpu.h" > #include "amdgpu_amdkfd.h" > > -/** > - * struct amdgpu_mn_node > - * > - * @it: interval node defining start-last of the affected address range > - * @bos: list of all BOs in the affected address range > - * > - * Manages all BOs which are affected of a certain range of address space. > - */ > -struct amdgpu_mn_node { > - struct interval_tree_node it; > - struct list_head bos; > -}; > - > -/** > - * amdgpu_mn_destroy - destroy the HMM mirror > - * > - * @work: previously sheduled work item > - * > - * Lazy destroys the notifier from a work item > - */ > -static void amdgpu_mn_destroy(struct work_struct *work) > -{ > - struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); > - struct amdgpu_device *adev = amn->adev; > - struct amdgpu_mn_node *node, *next_node; > - struct amdgpu_bo *bo, *next_bo; > - > - mutex_lock(&adev->mn_lock); > - down_write(&amn->lock); > - hash_del(&amn->node); > - rbtree_postorder_for_each_entry_safe(node, next_node, > - &amn->objects.rb_root, it.rb) { > - list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { > - bo->mn = NULL; > - list_del_init(&bo->mn_list); > - } > - kfree(node); > - } > - up_write(&amn->lock); > - mutex_unlock(&adev->mn_lock); > - > - hmm_mirror_unregister(&amn->mirror); > - kfree(amn); > -} > - > -/** > - * amdgpu_hmm_mirror_release - callback to notify about mm destruction > - * > - * @mirror: the HMM mirror (mm) this callback is about > - * > - * Shedule a work item to lazy destroy HMM mirror. > - */ > -static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) > -{ > - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); > - > - INIT_WORK(&amn->work, amdgpu_mn_destroy); > - schedule_work(&amn->work); > -} > - > /** > * amdgpu_mn_lock - take the write side lock for this notifier > * > @@ -133,157 +73,86 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) > } > > /** > - * amdgpu_mn_read_lock - take the read side lock for this notifier > - * > - * @amn: our notifier > - */ > -static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) > -{ > - if (blockable) > - down_read(&amn->lock); > - else if (!down_read_trylock(&amn->lock)) > - return -EAGAIN; > - > - return 0; > -} > - > -/** > - * amdgpu_mn_read_unlock - drop the read side lock for this notifier > - * > - * @amn: our notifier > - */ > -static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) > -{ > - up_read(&amn->lock); > -} > - > -/** > - * amdgpu_mn_invalidate_node - unmap all BOs of a node > + * amdgpu_mn_invalidate_gfx - callback to notify about mm change > * > - * @node: the node with the BOs to unmap > - * @start: start of address range affected > - * @end: end of address range affected > + * @mrn: the range (mm) is about to update > + * @range: details on the invalidation > * > * Block for operations on BOs to finish and mark pages as accessed and > * potentially dirty. > */ > -static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, > - unsigned long start, > - unsigned long end) > +static bool amdgpu_mn_invalidate_gfx(struct mmu_range_notifier *mrn, > + const struct mmu_notifier_range *range) > { > - struct amdgpu_bo *bo; > + struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier); > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > long r; > > - list_for_each_entry(bo, &node->bos, mn_list) { > - > - if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) > - continue; > - > - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, > - true, false, MAX_SCHEDULE_TIMEOUT); > - if (r <= 0) > - DRM_ERROR("(%ld) failed to wait for user bo\n", r); > - } > + /* FIXME: Is this necessary? */ > + if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, > + range->end)) > + return true; > + > + if (!mmu_notifier_range_blockable(range)) > + return false; > + > + mutex_lock(&adev->notifier_lock); > + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, > + MAX_SCHEDULE_TIMEOUT); > + mutex_unlock(&adev->notifier_lock); > + if (r <= 0) > + DRM_ERROR("(%ld) failed to wait for user bo\n", r); > + return true; > } > > +static const struct mmu_range_notifier_ops amdgpu_mn_gfx_ops = { > + .invalidate = amdgpu_mn_invalidate_gfx, > +}; > + > /** > - * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change > + * amdgpu_mn_invalidate_hsa - callback to notify about mm change > * > - * @mirror: the hmm_mirror (mm) is about to update > - * @update: the update start, end address > + * @mrn: the range (mm) is about to update > + * @range: details on the invalidation > * > - * Block for operations on BOs to finish and mark pages as accessed and > - * potentially dirty. > + * We temporarily evict the BO attached to this range. This necessitates > + * evicting all user-mode queues of the process. > */ > -static int > -amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, > - const struct mmu_notifier_range *update) > +static bool amdgpu_mn_invalidate_hsa(struct mmu_range_notifier *mrn, > + const struct mmu_notifier_range *range) > { > - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); > - unsigned long start = update->start; > - unsigned long end = update->end; > - bool blockable = mmu_notifier_range_blockable(update); > - struct interval_tree_node *it; > - > - /* notification is exclusive, but interval is inclusive */ > - end -= 1; > - > - /* TODO we should be able to split locking for interval tree and > - * amdgpu_mn_invalidate_node > - */ > - if (amdgpu_mn_read_lock(amn, blockable)) > - return -EAGAIN; > - > - it = interval_tree_iter_first(&amn->objects, start, end); > - while (it) { > - struct amdgpu_mn_node *node; > - > - if (!blockable) { > - amdgpu_mn_read_unlock(amn); > - return -EAGAIN; > - } > + struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier); > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > > - node = container_of(it, struct amdgpu_mn_node, it); > - it = interval_tree_iter_next(it, start, end); > + /* FIXME: Is this necessary? */ > + if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, > + range->end)) > + return true; > > - amdgpu_mn_invalidate_node(node, start, end); > - } > + if (!mmu_notifier_range_blockable(range)) > + return false; > > - amdgpu_mn_read_unlock(amn); > + mutex_lock(&adev->notifier_lock); > + amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm); > + mutex_unlock(&adev->notifier_lock); > > - return 0; > + return true; > } > > -/** > - * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change > - * > - * @mirror: the hmm_mirror (mm) is about to update > - * @update: the update start, end address > - * > - * We temporarily evict all BOs between start and end. This > - * necessitates evicting all user-mode queues of the process. The BOs > - * are restorted in amdgpu_mn_invalidate_range_end_hsa. > - */ > -static int > -amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, > - const struct mmu_notifier_range *update) > +static const struct mmu_range_notifier_ops amdgpu_mn_hsa_ops = { > + .invalidate = amdgpu_mn_invalidate_hsa, > +}; > + > +static int amdgpu_mn_sync_pagetables(struct hmm_mirror *mirror, > + const struct mmu_notifier_range *update) > { > struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); > - unsigned long start = update->start; > - unsigned long end = update->end; > - bool blockable = mmu_notifier_range_blockable(update); > - struct interval_tree_node *it; > > - /* notification is exclusive, but interval is inclusive */ > - end -= 1; > - > - if (amdgpu_mn_read_lock(amn, blockable)) > - return -EAGAIN; > - > - it = interval_tree_iter_first(&amn->objects, start, end); > - while (it) { > - struct amdgpu_mn_node *node; > - struct amdgpu_bo *bo; > - > - if (!blockable) { > - amdgpu_mn_read_unlock(amn); > - return -EAGAIN; > - } > - > - node = container_of(it, struct amdgpu_mn_node, it); > - it = interval_tree_iter_next(it, start, end); > - > - list_for_each_entry(bo, &node->bos, mn_list) { > - struct kgd_mem *mem = bo->kfd_bo; > - > - if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, > - start, end)) > - amdgpu_amdkfd_evict_userptr(mem, amn->mm); > - } > - } > - > - amdgpu_mn_read_unlock(amn); > + if (!mmu_notifier_range_blockable(update)) > + return false; This should return -EAGAIN. Not sure it matters much, because this whole function disappears in the next commit in the series. It seems to be only vestigial at this point. Regards, Felix > > + down_read(&amn->lock); > + up_read(&amn->lock); > return 0; > } > > @@ -295,12 +164,10 @@ amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, > > static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { > [AMDGPU_MN_TYPE_GFX] = { > - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx, > - .release = amdgpu_hmm_mirror_release > + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables, > }, > [AMDGPU_MN_TYPE_HSA] = { > - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa, > - .release = amdgpu_hmm_mirror_release > + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables, > }, > }; > > @@ -327,7 +194,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, > } > > hash_for_each_possible(adev->mn_hash, amn, node, key) > - if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) > + if (AMDGPU_MN_KEY(amn->mirror.hmm->mmu_notifier.mm, > + amn->type) == key) > goto release_locks; > > amn = kzalloc(sizeof(*amn), GFP_KERNEL); > @@ -337,10 +205,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, > } > > amn->adev = adev; > - amn->mm = mm; > init_rwsem(&amn->lock); > amn->type = type; > - amn->objects = RB_ROOT_CACHED; > > amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; > r = hmm_mirror_register(&amn->mirror, mm); > @@ -369,100 +235,33 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, > * @bo: amdgpu buffer object > * @addr: userptr addr we should monitor > * > - * Registers an HMM mirror for the given BO at the specified address. > + * Registers a mmu_notifier for the given BO at the specified address. > * Returns 0 on success, -ERRNO if anything goes wrong. > */ > int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) > { > - unsigned long end = addr + amdgpu_bo_size(bo) - 1; > - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > - enum amdgpu_mn_type type = > - bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; > - struct amdgpu_mn *amn; > - struct amdgpu_mn_node *node = NULL, *new_node; > - struct list_head bos; > - struct interval_tree_node *it; > - > - amn = amdgpu_mn_get(adev, type); > - if (IS_ERR(amn)) > - return PTR_ERR(amn); > - > - new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); > - if (!new_node) > - return -ENOMEM; > - > - INIT_LIST_HEAD(&bos); > - > - down_write(&amn->lock); > - > - while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { > - kfree(node); > - node = container_of(it, struct amdgpu_mn_node, it); > - interval_tree_remove(&node->it, &amn->objects); > - addr = min(it->start, addr); > - end = max(it->last, end); > - list_splice(&node->bos, &bos); > - } > - > - if (!node) > - node = new_node; > + if (bo->kfd_bo) > + bo->notifier.ops = &amdgpu_mn_hsa_ops; > else > - kfree(new_node); > - > - bo->mn = amn; > - > - node->it.start = addr; > - node->it.last = end; > - INIT_LIST_HEAD(&node->bos); > - list_splice(&bos, &node->bos); > - list_add(&bo->mn_list, &node->bos); > + bo->notifier.ops = &amdgpu_mn_gfx_ops; > > - interval_tree_insert(&node->it, &amn->objects); > - > - up_write(&amn->lock); > - > - return 0; > + return mmu_range_notifier_insert(&bo->notifier, addr, > + amdgpu_bo_size(bo), current->mm); > } > > /** > - * amdgpu_mn_unregister - unregister a BO for HMM mirror updates > + * amdgpu_mn_unregister - unregister a BO for notifier updates > * > * @bo: amdgpu buffer object > * > - * Remove any registration of HMM mirror updates from the buffer object. > + * Remove any registration of mmu notifier updates from the buffer object. > */ > void amdgpu_mn_unregister(struct amdgpu_bo *bo) > { > - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > - struct amdgpu_mn *amn; > - struct list_head *head; > - > - mutex_lock(&adev->mn_lock); > - > - amn = bo->mn; > - if (amn == NULL) { > - mutex_unlock(&adev->mn_lock); > + if (!bo->notifier.mm) > return; > - } > - > - down_write(&amn->lock); > - > - /* save the next list entry for later */ > - head = bo->mn_list.next; > - > - bo->mn = NULL; > - list_del_init(&bo->mn_list); > - > - if (list_empty(head)) { > - struct amdgpu_mn_node *node; > - > - node = container_of(head, struct amdgpu_mn_node, bos); > - interval_tree_remove(&node->it, &amn->objects); > - kfree(node); > - } > - > - up_write(&amn->lock); > - mutex_unlock(&adev->mn_lock); > + mmu_range_notifier_remove(&bo->notifier); > + bo->notifier.mm = NULL; > } > > /* flags used by HMM internal, not related to CPU/GPU PTE flags */ [snip]
On Tue, Oct 29, 2019 at 10:14:29PM +0000, Kuehling, Felix wrote: > > +static const struct mmu_range_notifier_ops amdgpu_mn_hsa_ops = { > > + .invalidate = amdgpu_mn_invalidate_hsa, > > +}; > > + > > +static int amdgpu_mn_sync_pagetables(struct hmm_mirror *mirror, > > + const struct mmu_notifier_range *update) > > { > > struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); > > - unsigned long start = update->start; > > - unsigned long end = update->end; > > - bool blockable = mmu_notifier_range_blockable(update); > > - struct interval_tree_node *it; > > > > - /* notification is exclusive, but interval is inclusive */ > > - end -= 1; > > - > > - if (amdgpu_mn_read_lock(amn, blockable)) > > - return -EAGAIN; > > - > > - it = interval_tree_iter_first(&amn->objects, start, end); > > - while (it) { > > - struct amdgpu_mn_node *node; > > - struct amdgpu_bo *bo; > > - > > - if (!blockable) { > > - amdgpu_mn_read_unlock(amn); > > - return -EAGAIN; > > - } > > - > > - node = container_of(it, struct amdgpu_mn_node, it); > > - it = interval_tree_iter_next(it, start, end); > > - > > - list_for_each_entry(bo, &node->bos, mn_list) { > > - struct kgd_mem *mem = bo->kfd_bo; > > - > > - if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, > > - start, end)) > > - amdgpu_amdkfd_evict_userptr(mem, amn->mm); > > - } > > - } > > - > > - amdgpu_mn_read_unlock(amn); > > + if (!mmu_notifier_range_blockable(update)) > > + return false; > > This should return -EAGAIN. Not sure it matters much, because this whole > function disappears in the next commit in the series. It seems to be > only vestigial at this point. Right, the only reason it is still here is that I couldn't really tell if this: > > + down_read(&amn->lock); > > + up_read(&amn->lock); > > return 0; > > } Was serving as the 'driver lock' in the hmm scheme... If not then the whole thing should just be deleted at this point. I fixed the EAGAIN though Jason
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index bd37df5dd6d048..60591a5d420021 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1006,6 +1006,8 @@ struct amdgpu_device { struct mutex lock_reset; struct amdgpu_doorbell_index doorbell_index; + struct mutex notifier_lock; + int asic_reset_res; struct work_struct xgmi_reset_work; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6d021ecc8d598f..47700302a08b7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -481,8 +481,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, * * Returns 0 for success, negative errno for errors. */ -static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, - uint64_t user_addr) +static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) { struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; @@ -1195,7 +1194,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); if (user_addr) { - ret = init_user_pages(*mem, current->mm, user_addr); + ret = init_user_pages(*mem, user_addr); if (ret) goto allocate_init_user_pages_failed; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5a1939dbd4e3e6..38f97998aaddb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2633,6 +2633,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); mutex_init(&adev->lock_reset); + mutex_init(&adev->notifier_lock); mutex_init(&adev->virt.dpm_mutex); mutex_init(&adev->psp.mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 31d4deb5d29484..4ffd7b90f4d907 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -50,66 +50,6 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" -/** - * struct amdgpu_mn_node - * - * @it: interval node defining start-last of the affected address range - * @bos: list of all BOs in the affected address range - * - * Manages all BOs which are affected of a certain range of address space. - */ -struct amdgpu_mn_node { - struct interval_tree_node it; - struct list_head bos; -}; - -/** - * amdgpu_mn_destroy - destroy the HMM mirror - * - * @work: previously sheduled work item - * - * Lazy destroys the notifier from a work item - */ -static void amdgpu_mn_destroy(struct work_struct *work) -{ - struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); - struct amdgpu_device *adev = amn->adev; - struct amdgpu_mn_node *node, *next_node; - struct amdgpu_bo *bo, *next_bo; - - mutex_lock(&adev->mn_lock); - down_write(&amn->lock); - hash_del(&amn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, - &amn->objects.rb_root, it.rb) { - list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { - bo->mn = NULL; - list_del_init(&bo->mn_list); - } - kfree(node); - } - up_write(&amn->lock); - mutex_unlock(&adev->mn_lock); - - hmm_mirror_unregister(&amn->mirror); - kfree(amn); -} - -/** - * amdgpu_hmm_mirror_release - callback to notify about mm destruction - * - * @mirror: the HMM mirror (mm) this callback is about - * - * Shedule a work item to lazy destroy HMM mirror. - */ -static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) -{ - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - - INIT_WORK(&amn->work, amdgpu_mn_destroy); - schedule_work(&amn->work); -} - /** * amdgpu_mn_lock - take the write side lock for this notifier * @@ -133,157 +73,86 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) } /** - * amdgpu_mn_read_lock - take the read side lock for this notifier - * - * @amn: our notifier - */ -static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) -{ - if (blockable) - down_read(&amn->lock); - else if (!down_read_trylock(&amn->lock)) - return -EAGAIN; - - return 0; -} - -/** - * amdgpu_mn_read_unlock - drop the read side lock for this notifier - * - * @amn: our notifier - */ -static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) -{ - up_read(&amn->lock); -} - -/** - * amdgpu_mn_invalidate_node - unmap all BOs of a node + * amdgpu_mn_invalidate_gfx - callback to notify about mm change * - * @node: the node with the BOs to unmap - * @start: start of address range affected - * @end: end of address range affected + * @mrn: the range (mm) is about to update + * @range: details on the invalidation * * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, - unsigned long start, - unsigned long end) +static bool amdgpu_mn_invalidate_gfx(struct mmu_range_notifier *mrn, + const struct mmu_notifier_range *range) { - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); long r; - list_for_each_entry(bo, &node->bos, mn_list) { - - if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) - continue; - - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, - true, false, MAX_SCHEDULE_TIMEOUT); - if (r <= 0) - DRM_ERROR("(%ld) failed to wait for user bo\n", r); - } + /* FIXME: Is this necessary? */ + if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, + range->end)) + return true; + + if (!mmu_notifier_range_blockable(range)) + return false; + + mutex_lock(&adev->notifier_lock); + r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); + mutex_unlock(&adev->notifier_lock); + if (r <= 0) + DRM_ERROR("(%ld) failed to wait for user bo\n", r); + return true; } +static const struct mmu_range_notifier_ops amdgpu_mn_gfx_ops = { + .invalidate = amdgpu_mn_invalidate_gfx, +}; + /** - * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change + * amdgpu_mn_invalidate_hsa - callback to notify about mm change * - * @mirror: the hmm_mirror (mm) is about to update - * @update: the update start, end address + * @mrn: the range (mm) is about to update + * @range: details on the invalidation * - * Block for operations on BOs to finish and mark pages as accessed and - * potentially dirty. + * We temporarily evict the BO attached to this range. This necessitates + * evicting all user-mode queues of the process. */ -static int -amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, - const struct mmu_notifier_range *update) +static bool amdgpu_mn_invalidate_hsa(struct mmu_range_notifier *mrn, + const struct mmu_notifier_range *range) { - struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - unsigned long start = update->start; - unsigned long end = update->end; - bool blockable = mmu_notifier_range_blockable(update); - struct interval_tree_node *it; - - /* notification is exclusive, but interval is inclusive */ - end -= 1; - - /* TODO we should be able to split locking for interval tree and - * amdgpu_mn_invalidate_node - */ - if (amdgpu_mn_read_lock(amn, blockable)) - return -EAGAIN; - - it = interval_tree_iter_first(&amn->objects, start, end); - while (it) { - struct amdgpu_mn_node *node; - - if (!blockable) { - amdgpu_mn_read_unlock(amn); - return -EAGAIN; - } + struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, start, end); + /* FIXME: Is this necessary? */ + if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start, + range->end)) + return true; - amdgpu_mn_invalidate_node(node, start, end); - } + if (!mmu_notifier_range_blockable(range)) + return false; - amdgpu_mn_read_unlock(amn); + mutex_lock(&adev->notifier_lock); + amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm); + mutex_unlock(&adev->notifier_lock); - return 0; + return true; } -/** - * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change - * - * @mirror: the hmm_mirror (mm) is about to update - * @update: the update start, end address - * - * We temporarily evict all BOs between start and end. This - * necessitates evicting all user-mode queues of the process. The BOs - * are restorted in amdgpu_mn_invalidate_range_end_hsa. - */ -static int -amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, - const struct mmu_notifier_range *update) +static const struct mmu_range_notifier_ops amdgpu_mn_hsa_ops = { + .invalidate = amdgpu_mn_invalidate_hsa, +}; + +static int amdgpu_mn_sync_pagetables(struct hmm_mirror *mirror, + const struct mmu_notifier_range *update) { struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); - unsigned long start = update->start; - unsigned long end = update->end; - bool blockable = mmu_notifier_range_blockable(update); - struct interval_tree_node *it; - /* notification is exclusive, but interval is inclusive */ - end -= 1; - - if (amdgpu_mn_read_lock(amn, blockable)) - return -EAGAIN; - - it = interval_tree_iter_first(&amn->objects, start, end); - while (it) { - struct amdgpu_mn_node *node; - struct amdgpu_bo *bo; - - if (!blockable) { - amdgpu_mn_read_unlock(amn); - return -EAGAIN; - } - - node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, start, end); - - list_for_each_entry(bo, &node->bos, mn_list) { - struct kgd_mem *mem = bo->kfd_bo; - - if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, - start, end)) - amdgpu_amdkfd_evict_userptr(mem, amn->mm); - } - } - - amdgpu_mn_read_unlock(amn); + if (!mmu_notifier_range_blockable(update)) + return false; + down_read(&amn->lock); + up_read(&amn->lock); return 0; } @@ -295,12 +164,10 @@ amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { [AMDGPU_MN_TYPE_GFX] = { - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx, - .release = amdgpu_hmm_mirror_release + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables, }, [AMDGPU_MN_TYPE_HSA] = { - .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa, - .release = amdgpu_hmm_mirror_release + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables, }, }; @@ -327,7 +194,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, } hash_for_each_possible(adev->mn_hash, amn, node, key) - if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) + if (AMDGPU_MN_KEY(amn->mirror.hmm->mmu_notifier.mm, + amn->type) == key) goto release_locks; amn = kzalloc(sizeof(*amn), GFP_KERNEL); @@ -337,10 +205,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, } amn->adev = adev; - amn->mm = mm; init_rwsem(&amn->lock); amn->type = type; - amn->objects = RB_ROOT_CACHED; amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; r = hmm_mirror_register(&amn->mirror, mm); @@ -369,100 +235,33 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, * @bo: amdgpu buffer object * @addr: userptr addr we should monitor * - * Registers an HMM mirror for the given BO at the specified address. + * Registers a mmu_notifier for the given BO at the specified address. * Returns 0 on success, -ERRNO if anything goes wrong. */ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { - unsigned long end = addr + amdgpu_bo_size(bo) - 1; - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - enum amdgpu_mn_type type = - bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; - struct amdgpu_mn *amn; - struct amdgpu_mn_node *node = NULL, *new_node; - struct list_head bos; - struct interval_tree_node *it; - - amn = amdgpu_mn_get(adev, type); - if (IS_ERR(amn)) - return PTR_ERR(amn); - - new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); - if (!new_node) - return -ENOMEM; - - INIT_LIST_HEAD(&bos); - - down_write(&amn->lock); - - while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { - kfree(node); - node = container_of(it, struct amdgpu_mn_node, it); - interval_tree_remove(&node->it, &amn->objects); - addr = min(it->start, addr); - end = max(it->last, end); - list_splice(&node->bos, &bos); - } - - if (!node) - node = new_node; + if (bo->kfd_bo) + bo->notifier.ops = &amdgpu_mn_hsa_ops; else - kfree(new_node); - - bo->mn = amn; - - node->it.start = addr; - node->it.last = end; - INIT_LIST_HEAD(&node->bos); - list_splice(&bos, &node->bos); - list_add(&bo->mn_list, &node->bos); + bo->notifier.ops = &amdgpu_mn_gfx_ops; - interval_tree_insert(&node->it, &amn->objects); - - up_write(&amn->lock); - - return 0; + return mmu_range_notifier_insert(&bo->notifier, addr, + amdgpu_bo_size(bo), current->mm); } /** - * amdgpu_mn_unregister - unregister a BO for HMM mirror updates + * amdgpu_mn_unregister - unregister a BO for notifier updates * * @bo: amdgpu buffer object * - * Remove any registration of HMM mirror updates from the buffer object. + * Remove any registration of mmu notifier updates from the buffer object. */ void amdgpu_mn_unregister(struct amdgpu_bo *bo) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_mn *amn; - struct list_head *head; - - mutex_lock(&adev->mn_lock); - - amn = bo->mn; - if (amn == NULL) { - mutex_unlock(&adev->mn_lock); + if (!bo->notifier.mm) return; - } - - down_write(&amn->lock); - - /* save the next list entry for later */ - head = bo->mn_list.next; - - bo->mn = NULL; - list_del_init(&bo->mn_list); - - if (list_empty(head)) { - struct amdgpu_mn_node *node; - - node = container_of(head, struct amdgpu_mn_node, bos); - interval_tree_remove(&node->it, &amn->objects); - kfree(node); - } - - up_write(&amn->lock); - mutex_unlock(&adev->mn_lock); + mmu_range_notifier_remove(&bo->notifier); + bo->notifier.mm = NULL; } /* flags used by HMM internal, not related to CPU/GPU PTE flags */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index b8ed68943625c2..d73ab2947b22b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -39,12 +39,10 @@ enum amdgpu_mn_type { * struct amdgpu_mn * * @adev: amdgpu device pointer - * @mm: process address space * @type: type of MMU notifier * @work: destruction work item * @node: hash table node to find structure by adev and mn * @lock: rw semaphore protecting the notifier nodes - * @objects: interval tree containing amdgpu_mn_nodes * @mirror: HMM mirror function support * * Data for each amdgpu device and process address space. @@ -52,7 +50,6 @@ enum amdgpu_mn_type { struct amdgpu_mn { /* constant after initialisation */ struct amdgpu_device *adev; - struct mm_struct *mm; enum amdgpu_mn_type type; /* only used on destruction */ @@ -63,7 +60,6 @@ struct amdgpu_mn { /* objects protected by lock */ struct rw_semaphore lock; - struct rb_root_cached objects; #ifdef CONFIG_HMM_MIRROR /* HMM mirror */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 658f4c9779b704..4b44ab850f94c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -30,6 +30,9 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" +#ifdef CONFIG_MMU_NOTIFIER +#include <linux/mmu_notifier.h> +#endif #define AMDGPU_BO_INVALID_OFFSET LONG_MAX #define AMDGPU_BO_MAX_PLACEMENTS 3 @@ -100,10 +103,12 @@ struct amdgpu_bo { struct ttm_bo_kmap_obj dma_buf_vmap; struct amdgpu_mn *mn; - union { - struct list_head mn_list; - struct list_head shadow_list; - }; + +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_range_notifier notifier; +#endif + + struct list_head shadow_list; struct kgd_mem *kfd_bo; };