Message ID | 20221021163703.3218176-6-jthoughton@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | hugetlb: introduce HugeTLB high-granularity mapping | expand |
On Fri, Oct 21, 2022 at 04:36:21PM +0000, James Houghton wrote: > Currently hugetlb_vma_lock_alloc doesn't return anything, as there is no > need: if it fails, PMD sharing won't be enabled. However, HGM requires > that the VMA lock exists, so we need to verify that > hugetlb_vma_lock_alloc actually succeeded. If hugetlb_vma_lock_alloc > fails, then we can pass that up to the caller that is attempting to > enable HGM. > > Signed-off-by: James Houghton <jthoughton@google.com> > --- > mm/hugetlb.c | 16 +++++++++------- > 1 file changed, 9 insertions(+), 7 deletions(-) > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 52cec5b0789e..dc82256b89dd 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -92,7 +92,7 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp; > /* Forward declaration */ > static int hugetlb_acct_memory(struct hstate *h, long delta); > static void hugetlb_vma_lock_free(struct vm_area_struct *vma); > -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); > +static int hugetlb_vma_lock_alloc(struct vm_area_struct *vma); > static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); > > static inline bool subpool_is_free(struct hugepage_subpool *spool) > @@ -7001,17 +7001,17 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma) > } > } > > -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > +static int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > { > struct hugetlb_vma_lock *vma_lock; > > /* Only establish in (flags) sharable vmas */ > if (!vma || !(vma->vm_flags & VM_MAYSHARE)) > - return; > + return -EINVAL; > > - /* Should never get here with non-NULL vm_private_data */ > + /* We've already allocated the lock. */ > if (vma->vm_private_data) > - return; > + return 0; No objection on the patch itself, but I am just wondering what guarantees thread-safety for this function to not leak vm_private_data when two threads try to allocate at the same time. I think it should be the write mmap lock. I saw that in your latest code base there's: /* * We must hold the mmap lock for writing so that callers can rely on * hugetlb_hgm_enabled returning a consistent result while holding * the mmap lock for reading. */ mmap_assert_write_locked(vma->vm_mm); /* HugeTLB HGM requires the VMA lock to synchronize collapsing. */ ret = hugetlb_vma_data_alloc(vma); if (ret) return ret; So that's covered there. The rest places are hugetlb_vm_op_open() and hugetlb_reserve_pages() and they all seem fine too: hugetlb_vm_op_open() is during mmap(), the latter has vma==NULL so allocation will be skipped. I'm wondering whether it would make sense to move this assert to be inside of hugetlb_vma_data_alloc() after the !vma check, or just add the same assert too but for different reason. > > vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL); > if (!vma_lock) { > @@ -7026,13 +7026,14 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > * allocation failure. > */ > pr_warn_once("HugeTLB: unable to allocate vma specific lock\n"); > - return; > + return -ENOMEM; > } > > kref_init(&vma_lock->refs); > init_rwsem(&vma_lock->rw_sema); > vma_lock->vma = vma; > vma->vm_private_data = vma_lock; > + return 0; > } > > /* > @@ -7160,8 +7161,9 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma) > { > } > > -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > +static int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > { > + return 0; > } > > pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, > -- > 2.38.0.135.g90850a2211-goog > >
On Wed, Nov 16, 2022 at 9:08 AM Peter Xu <peterx@redhat.com> wrote: > > No objection on the patch itself, but I am just wondering what guarantees > thread-safety for this function to not leak vm_private_data when two > threads try to allocate at the same time. > > I think it should be the write mmap lock. I saw that in your latest code > base there's: > > /* > * We must hold the mmap lock for writing so that callers can rely on > * hugetlb_hgm_enabled returning a consistent result while holding > * the mmap lock for reading. > */ > mmap_assert_write_locked(vma->vm_mm); > > /* HugeTLB HGM requires the VMA lock to synchronize collapsing. */ > ret = hugetlb_vma_data_alloc(vma); > if (ret) > return ret; > > So that's covered there. The rest places are hugetlb_vm_op_open() and > hugetlb_reserve_pages() and they all seem fine too: hugetlb_vm_op_open() is > during mmap(), the latter has vma==NULL so allocation will be skipped. > > I'm wondering whether it would make sense to move this assert to be inside > of hugetlb_vma_data_alloc() after the !vma check, or just add the same > assert too but for different reason. I think leaving the assert here and adding a new assert inside hugetlb_vma_data_alloc() makes sense. Thanks Peter. - James > > > > > vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL); > > if (!vma_lock) { > > @@ -7026,13 +7026,14 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > > * allocation failure. > > */ > > pr_warn_once("HugeTLB: unable to allocate vma specific lock\n"); > > - return; > > + return -ENOMEM; > > } > > > > kref_init(&vma_lock->refs); > > init_rwsem(&vma_lock->rw_sema); > > vma_lock->vma = vma; > > vma->vm_private_data = vma_lock; > > + return 0; > > } > > > > /* > > @@ -7160,8 +7161,9 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma) > > { > > } > > > > -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > > +static int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > > { > > + return 0; > > } > > > > pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, > > -- > > 2.38.0.135.g90850a2211-goog > > > > > > -- > Peter Xu >
On Fri, Oct 21, 2022 at 9:37 AM James Houghton <jthoughton@google.com> wrote: > > Currently hugetlb_vma_lock_alloc doesn't return anything, as there is no > need: if it fails, PMD sharing won't be enabled. However, HGM requires > that the VMA lock exists, so we need to verify that > hugetlb_vma_lock_alloc actually succeeded. If hugetlb_vma_lock_alloc > fails, then we can pass that up to the caller that is attempting to > enable HGM. > > Signed-off-by: James Houghton <jthoughton@google.com> > --- > mm/hugetlb.c | 16 +++++++++------- > 1 file changed, 9 insertions(+), 7 deletions(-) > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 52cec5b0789e..dc82256b89dd 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -92,7 +92,7 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp; > /* Forward declaration */ > static int hugetlb_acct_memory(struct hstate *h, long delta); > static void hugetlb_vma_lock_free(struct vm_area_struct *vma); > -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); > +static int hugetlb_vma_lock_alloc(struct vm_area_struct *vma); > static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); > > static inline bool subpool_is_free(struct hugepage_subpool *spool) > @@ -7001,17 +7001,17 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma) > } > } > > -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > +static int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > { > struct hugetlb_vma_lock *vma_lock; > > /* Only establish in (flags) sharable vmas */ > if (!vma || !(vma->vm_flags & VM_MAYSHARE)) > - return; > + return -EINVAL; > > - /* Should never get here with non-NULL vm_private_data */ > + /* We've already allocated the lock. */ > if (vma->vm_private_data) > - return; > + return 0; I would have expected -EEXIST here. Also even if the patch looks generally fine it's hard to provide Acked-by now. I need to look at the call site which is in another patch in the series. If there is an opportunity to squash changes to helpers and their call sites please do. > > vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL); > if (!vma_lock) { > @@ -7026,13 +7026,14 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > * allocation failure. > */ > pr_warn_once("HugeTLB: unable to allocate vma specific lock\n"); > - return; > + return -ENOMEM; > } > > kref_init(&vma_lock->refs); > init_rwsem(&vma_lock->rw_sema); > vma_lock->vma = vma; > vma->vm_private_data = vma_lock; > + return 0; > } > > /* > @@ -7160,8 +7161,9 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma) > { > } > > -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > +static int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) > { > + return 0; > } > > pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, > -- > 2.38.0.135.g90850a2211-goog >
On 10/21/22 16:36, James Houghton wrote: > Currently hugetlb_vma_lock_alloc doesn't return anything, as there is no > need: if it fails, PMD sharing won't be enabled. However, HGM requires > that the VMA lock exists, so we need to verify that > hugetlb_vma_lock_alloc actually succeeded. If hugetlb_vma_lock_alloc > fails, then we can pass that up to the caller that is attempting to > enable HGM. No serious objections to this change ... However, there are currently only two places today where hugetlb_vma_lock_alloc is called: hugetlb_reserve_pages and hugetlb_vm_op_open. hugetlb_reserve_pages is not an issue. Since hugetlb_vm_op_open (as a defined vm_operation) returns void, I am not sure how you plan to pass up an allocation failure. Suspect this will become evident in subsequent patches.
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 52cec5b0789e..dc82256b89dd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -92,7 +92,7 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp; /* Forward declaration */ static int hugetlb_acct_memory(struct hstate *h, long delta); static void hugetlb_vma_lock_free(struct vm_area_struct *vma); -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); +static int hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); static inline bool subpool_is_free(struct hugepage_subpool *spool) @@ -7001,17 +7001,17 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma) } } -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) +static int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) { struct hugetlb_vma_lock *vma_lock; /* Only establish in (flags) sharable vmas */ if (!vma || !(vma->vm_flags & VM_MAYSHARE)) - return; + return -EINVAL; - /* Should never get here with non-NULL vm_private_data */ + /* We've already allocated the lock. */ if (vma->vm_private_data) - return; + return 0; vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL); if (!vma_lock) { @@ -7026,13 +7026,14 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) * allocation failure. */ pr_warn_once("HugeTLB: unable to allocate vma specific lock\n"); - return; + return -ENOMEM; } kref_init(&vma_lock->refs); init_rwsem(&vma_lock->rw_sema); vma_lock->vma = vma; vma->vm_private_data = vma_lock; + return 0; } /* @@ -7160,8 +7161,9 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma) { } -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) +static int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) { + return 0; } pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
Currently hugetlb_vma_lock_alloc doesn't return anything, as there is no need: if it fails, PMD sharing won't be enabled. However, HGM requires that the VMA lock exists, so we need to verify that hugetlb_vma_lock_alloc actually succeeded. If hugetlb_vma_lock_alloc fails, then we can pass that up to the caller that is attempting to enable HGM. Signed-off-by: James Houghton <jthoughton@google.com> --- mm/hugetlb.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-)