Message ID | 20190614004450.20252-5-jgg@ziepe.ca (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm/hmm: Various revisions from a locking/code review | expand |
On Sat, Jun 15, 2019 at 07:12:11AM -0700, Christoph Hellwig wrote: > > + spin_lock(&mm->page_table_lock); > > + if (mm->hmm) { > > + if (kref_get_unless_zero(&mm->hmm->kref)) { > > + spin_unlock(&mm->page_table_lock); > > + return mm->hmm; > > + } > > + } > > + spin_unlock(&mm->page_table_lock); > > This could become: > > spin_lock(&mm->page_table_lock); > hmm = mm->hmm > if (hmm && kref_get_unless_zero(&hmm->kref)) > goto out_unlock; > spin_unlock(&mm->page_table_lock); > > as the last two lines of the function already drop the page_table_lock > and then return hmm. Or drop the "hmm = mm->hmm" asignment above and > return mm->hmm as that should be always identical to hmm at the end > to save another line. Yeah, I can fuss it some more. > > + /* > > + * The mm->hmm pointer is kept valid while notifier ops can be running > > + * so they don't have to deal with a NULL mm->hmm value > > + */ > > The comment confuses me. How does the page_table_lock relate to > possibly running notifiers, as I can't find that we take > page_table_lock? Or is it just about the fact that we only clear > mm->hmm in the free callback, and not in hmm_free? It was late when I wrote this fixup, the comment is faulty, and there is no reason to delay this until the SRCU cleanup at this point in the series. The ops all get their struct hmm from container_of, the only thing that refers to mm->hmm is hmm_get_or_create(). I'll revise it tomorrow, the comment will go away and the =NULL will go to the release callback Jason
On Sat, Jun 15, 2019 at 07:12:11AM -0700, Christoph Hellwig wrote: > > + spin_lock(&mm->page_table_lock); > > + if (mm->hmm) { > > + if (kref_get_unless_zero(&mm->hmm->kref)) { > > + spin_unlock(&mm->page_table_lock); > > + return mm->hmm; > > + } > > + } > > + spin_unlock(&mm->page_table_lock); > > This could become: > > spin_lock(&mm->page_table_lock); > hmm = mm->hmm > if (hmm && kref_get_unless_zero(&hmm->kref)) > goto out_unlock; > spin_unlock(&mm->page_table_lock); > > as the last two lines of the function already drop the page_table_lock > and then return hmm. Or drop the "hmm = mm->hmm" asignment above and > return mm->hmm as that should be always identical to hmm at the end > to save another line. > > > + /* > > + * The mm->hmm pointer is kept valid while notifier ops can be running > > + * so they don't have to deal with a NULL mm->hmm value > > + */ > > The comment confuses me. How does the page_table_lock relate to > possibly running notifiers, as I can't find that we take > page_table_lock? Or is it just about the fact that we only clear > mm->hmm in the free callback, and not in hmm_free? Revised with: From bdc02a1d502db08457823e6b2b983861a3574a76 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe <jgg@mellanox.com> Date: Thu, 23 May 2019 10:24:13 -0300 Subject: [PATCH] mm/hmm: Simplify hmm_get_or_create and make it reliable As coded this function can false-fail in various racy situations. Make it reliable and simpler by running under the write side of the mmap_sem and avoiding the false-failing compare/exchange pattern. Due to the mmap_sem this no longer has to avoid racing with a 2nd parallel hmm_get_or_create(). Unfortunately this still has to use the page_table_lock as the non-sleeping lock protecting mm->hmm, since the contexts where we free the hmm are incompatible with mmap_sem. Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Ralph Campbell <rcampbell@nvidia.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Tested-by: Philip Yang <Philip.Yang@amd.com> --- v2: - Fix error unwind of mmgrab (Jerome) - Use hmm local instead of 2nd container_of (Jerome) v3: - Can't use mmap_sem in the SRCU callback, keep using the page_table_lock (Philip) v4: - Put the mm->hmm = NULL in the kref release, reduce LOC in hmm_get_or_create() (Christoph) --- mm/hmm.c | 77 ++++++++++++++++++++++---------------------------------- 1 file changed, 30 insertions(+), 47 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index 080b17a2e87e2d..0423f4ca3a7e09 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -31,16 +31,6 @@ #if IS_ENABLED(CONFIG_HMM_MIRROR) static const struct mmu_notifier_ops hmm_mmu_notifier_ops; -static inline struct hmm *mm_get_hmm(struct mm_struct *mm) -{ - struct hmm *hmm = READ_ONCE(mm->hmm); - - if (hmm && kref_get_unless_zero(&hmm->kref)) - return hmm; - - return NULL; -} - /** * hmm_get_or_create - register HMM against an mm (HMM internal) * @@ -55,11 +45,16 @@ static inline struct hmm *mm_get_hmm(struct mm_struct *mm) */ static struct hmm *hmm_get_or_create(struct mm_struct *mm) { - struct hmm *hmm = mm_get_hmm(mm); - bool cleanup = false; + struct hmm *hmm; + + lockdep_assert_held_exclusive(&mm->mmap_sem); - if (hmm) - return hmm; + /* Abuse the page_table_lock to also protect mm->hmm. */ + spin_lock(&mm->page_table_lock); + hmm = mm->hmm; + if (mm->hmm && kref_get_unless_zero(&mm->hmm->kref)) + goto out_unlock; + spin_unlock(&mm->page_table_lock); hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); if (!hmm) @@ -74,57 +69,45 @@ static struct hmm *hmm_get_or_create(struct mm_struct *mm) hmm->notifiers = 0; hmm->dead = false; hmm->mm = mm; - mmgrab(hmm->mm); - spin_lock(&mm->page_table_lock); - if (!mm->hmm) - mm->hmm = hmm; - else - cleanup = true; - spin_unlock(&mm->page_table_lock); + hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; + if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) { + kfree(hmm); + return NULL; + } - if (cleanup) - goto error; + mmgrab(hmm->mm); /* - * We should only get here if hold the mmap_sem in write mode ie on - * registration of first mirror through hmm_mirror_register() + * We hold the exclusive mmap_sem here so we know that mm->hmm is + * still NULL or 0 kref, and is safe to update. */ - hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; - if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) - goto error_mm; - - return hmm; - -error_mm: spin_lock(&mm->page_table_lock); - if (mm->hmm == hmm) - mm->hmm = NULL; + mm->hmm = hmm; + +out_unlock: spin_unlock(&mm->page_table_lock); -error: - mmdrop(hmm->mm); - kfree(hmm); - return NULL; + return hmm; } static void hmm_free_rcu(struct rcu_head *rcu) { - kfree(container_of(rcu, struct hmm, rcu)); + struct hmm *hmm = container_of(rcu, struct hmm, rcu); + + mmdrop(hmm->mm); + kfree(hmm); } static void hmm_free(struct kref *kref) { struct hmm *hmm = container_of(kref, struct hmm, kref); - struct mm_struct *mm = hmm->mm; - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); + spin_lock(&hmm->mm->page_table_lock); + if (hmm->mm->hmm == hmm) + hmm->mm->hmm = NULL; + spin_unlock(&hmm->mm->page_table_lock); - spin_lock(&mm->page_table_lock); - if (mm->hmm == hmm) - mm->hmm = NULL; - spin_unlock(&mm->page_table_lock); - - mmdrop(hmm->mm); + mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm); mmu_notifier_call_srcu(&hmm->rcu, hmm_free_rcu); }
diff --git a/mm/hmm.c b/mm/hmm.c index 080b17a2e87e2d..4c64d4c32f4825 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -31,16 +31,6 @@ #if IS_ENABLED(CONFIG_HMM_MIRROR) static const struct mmu_notifier_ops hmm_mmu_notifier_ops; -static inline struct hmm *mm_get_hmm(struct mm_struct *mm) -{ - struct hmm *hmm = READ_ONCE(mm->hmm); - - if (hmm && kref_get_unless_zero(&hmm->kref)) - return hmm; - - return NULL; -} - /** * hmm_get_or_create - register HMM against an mm (HMM internal) * @@ -55,11 +45,19 @@ static inline struct hmm *mm_get_hmm(struct mm_struct *mm) */ static struct hmm *hmm_get_or_create(struct mm_struct *mm) { - struct hmm *hmm = mm_get_hmm(mm); - bool cleanup = false; + struct hmm *hmm; - if (hmm) - return hmm; + lockdep_assert_held_exclusive(&mm->mmap_sem); + + /* Abuse the page_table_lock to also protect mm->hmm. */ + spin_lock(&mm->page_table_lock); + if (mm->hmm) { + if (kref_get_unless_zero(&mm->hmm->kref)) { + spin_unlock(&mm->page_table_lock); + return mm->hmm; + } + } + spin_unlock(&mm->page_table_lock); hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); if (!hmm) @@ -74,57 +72,47 @@ static struct hmm *hmm_get_or_create(struct mm_struct *mm) hmm->notifiers = 0; hmm->dead = false; hmm->mm = mm; - mmgrab(hmm->mm); - spin_lock(&mm->page_table_lock); - if (!mm->hmm) - mm->hmm = hmm; - else - cleanup = true; - spin_unlock(&mm->page_table_lock); + hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; + if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) { + kfree(hmm); + return NULL; + } - if (cleanup) - goto error; + mmgrab(hmm->mm); /* - * We should only get here if hold the mmap_sem in write mode ie on - * registration of first mirror through hmm_mirror_register() + * We hold the exclusive mmap_sem here so we know that mm->hmm is + * still NULL or 0 kref, and is safe to update. */ - hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; - if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) - goto error_mm; - - return hmm; - -error_mm: spin_lock(&mm->page_table_lock); - if (mm->hmm == hmm) - mm->hmm = NULL; + mm->hmm = hmm; spin_unlock(&mm->page_table_lock); -error: - mmdrop(hmm->mm); - kfree(hmm); - return NULL; + return hmm; } static void hmm_free_rcu(struct rcu_head *rcu) { - kfree(container_of(rcu, struct hmm, rcu)); + struct hmm *hmm = container_of(rcu, struct hmm, rcu); + + /* + * The mm->hmm pointer is kept valid while notifier ops can be running + * so they don't have to deal with a NULL mm->hmm value + */ + spin_lock(&hmm->mm->page_table_lock); + if (hmm->mm->hmm == hmm) + hmm->mm->hmm = NULL; + spin_unlock(&hmm->mm->page_table_lock); + mmdrop(hmm->mm); + + kfree(hmm); } static void hmm_free(struct kref *kref) { struct hmm *hmm = container_of(kref, struct hmm, kref); - struct mm_struct *mm = hmm->mm; - - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); - spin_lock(&mm->page_table_lock); - if (mm->hmm == hmm) - mm->hmm = NULL; - spin_unlock(&mm->page_table_lock); - - mmdrop(hmm->mm); + mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm); mmu_notifier_call_srcu(&hmm->rcu, hmm_free_rcu); }