Message ID | 20190807233729.3899352-4-songliubraving@fb.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | THP aware uprobe | expand |
On 08/07, Song Liu wrote: > > @@ -399,7 +399,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, > spin_unlock(ptl); > return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); > } > - if (flags & FOLL_SPLIT) { > + if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) { > int ret; > page = pmd_page(*pmd); > if (is_huge_zero_page(page)) { > @@ -408,7 +408,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, > split_huge_pmd(vma, pmd, address); > if (pmd_trans_unstable(pmd)) > ret = -EBUSY; > - } else { > + } else if (flags & FOLL_SPLIT) { > if (unlikely(!try_get_page(page))) { > spin_unlock(ptl); > return ERR_PTR(-ENOMEM); > @@ -420,6 +420,10 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, > put_page(page); > if (pmd_none(*pmd)) > return no_page_table(vma, flags); > + } else { /* flags & FOLL_SPLIT_PMD */ > + spin_unlock(ptl); > + split_huge_pmd(vma, pmd, address); > + ret = pte_alloc(mm, pmd) ? -ENOMEM : 0; > } Can't resist, let me repeat that I do not like this patch because imo it complicates this code for no reason. But I can't insist and of course I could miss something. Oleg.
> On Aug 8, 2019, at 9:37 AM, Oleg Nesterov <oleg@redhat.com> wrote: > > On 08/07, Song Liu wrote: >> >> @@ -399,7 +399,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, >> spin_unlock(ptl); >> return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); >> } >> - if (flags & FOLL_SPLIT) { >> + if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) { >> int ret; >> page = pmd_page(*pmd); >> if (is_huge_zero_page(page)) { >> @@ -408,7 +408,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, >> split_huge_pmd(vma, pmd, address); >> if (pmd_trans_unstable(pmd)) >> ret = -EBUSY; >> - } else { >> + } else if (flags & FOLL_SPLIT) { >> if (unlikely(!try_get_page(page))) { >> spin_unlock(ptl); >> return ERR_PTR(-ENOMEM); >> @@ -420,6 +420,10 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, >> put_page(page); >> if (pmd_none(*pmd)) >> return no_page_table(vma, flags); >> + } else { /* flags & FOLL_SPLIT_PMD */ >> + spin_unlock(ptl); >> + split_huge_pmd(vma, pmd, address); >> + ret = pte_alloc(mm, pmd) ? -ENOMEM : 0; >> } > > Can't resist, let me repeat that I do not like this patch because imo > it complicates this code for no reason. Personally, I don't think this is more complicated than your version. This patch is safe as it doesn't change any code for is_huge_zero_page() case. Also, if some code calls follow_pmd_mask() with flags contains both FOLL_SPLIT and FOLL_SPLIT_PMD, we should honor FOLL_SPLIT and split the huge page. Of course, there is no code that sets both flags. Does this resolve your concern here? Thanks, Song
On 08/08, Song Liu wrote: > > > On Aug 8, 2019, at 9:37 AM, Oleg Nesterov <oleg@redhat.com> wrote: > > > > On 08/07, Song Liu wrote: > >> > >> @@ -399,7 +399,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, > >> spin_unlock(ptl); > >> return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); > >> } > >> - if (flags & FOLL_SPLIT) { > >> + if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) { > >> int ret; > >> page = pmd_page(*pmd); > >> if (is_huge_zero_page(page)) { > >> @@ -408,7 +408,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, > >> split_huge_pmd(vma, pmd, address); > >> if (pmd_trans_unstable(pmd)) > >> ret = -EBUSY; > >> - } else { > >> + } else if (flags & FOLL_SPLIT) { > >> if (unlikely(!try_get_page(page))) { > >> spin_unlock(ptl); > >> return ERR_PTR(-ENOMEM); > >> @@ -420,6 +420,10 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, > >> put_page(page); > >> if (pmd_none(*pmd)) > >> return no_page_table(vma, flags); > >> + } else { /* flags & FOLL_SPLIT_PMD */ > >> + spin_unlock(ptl); > >> + split_huge_pmd(vma, pmd, address); > >> + ret = pte_alloc(mm, pmd) ? -ENOMEM : 0; > >> } > > > > Can't resist, let me repeat that I do not like this patch because imo > > it complicates this code for no reason. > > Personally, I don't think this is more complicated than your version. I do, but of course this is subjective. > Also, if some code calls follow_pmd_mask() with flags contains both > FOLL_SPLIT and FOLL_SPLIT_PMD, we should honor FOLL_SPLIT and split the > huge page. Heh. why not other way around? > Of course, there is no code that sets both flags. and of course, nobody should ever pass both FOLL_SPLIT and FOLL_SPLIT_PMD, perhaps this deserves a warning. Not to mention that it would be nice to kill FOLL_SPLIT which has a single user, but this is another story. Oleg.
> On Aug 9, 2019, at 9:35 AM, Oleg Nesterov <oleg@redhat.com> wrote: > > On 08/08, Song Liu wrote: >> >>> On Aug 8, 2019, at 9:37 AM, Oleg Nesterov <oleg@redhat.com> wrote: >>> >>> On 08/07, Song Liu wrote: >>>> >>>> @@ -399,7 +399,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, >>>> spin_unlock(ptl); >>>> return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); >>>> } >>>> - if (flags & FOLL_SPLIT) { >>>> + if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) { >>>> int ret; >>>> page = pmd_page(*pmd); >>>> if (is_huge_zero_page(page)) { >>>> @@ -408,7 +408,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, >>>> split_huge_pmd(vma, pmd, address); >>>> if (pmd_trans_unstable(pmd)) >>>> ret = -EBUSY; >>>> - } else { >>>> + } else if (flags & FOLL_SPLIT) { >>>> if (unlikely(!try_get_page(page))) { >>>> spin_unlock(ptl); >>>> return ERR_PTR(-ENOMEM); >>>> @@ -420,6 +420,10 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, >>>> put_page(page); >>>> if (pmd_none(*pmd)) >>>> return no_page_table(vma, flags); >>>> + } else { /* flags & FOLL_SPLIT_PMD */ >>>> + spin_unlock(ptl); >>>> + split_huge_pmd(vma, pmd, address); >>>> + ret = pte_alloc(mm, pmd) ? -ENOMEM : 0; >>>> } >>> >>> Can't resist, let me repeat that I do not like this patch because imo >>> it complicates this code for no reason. >> >> Personally, I don't think this is more complicated than your version. > > I do, but of course this is subjective. > >> Also, if some code calls follow_pmd_mask() with flags contains both >> FOLL_SPLIT and FOLL_SPLIT_PMD, we should honor FOLL_SPLIT and split the >> huge page. > > Heh. why not other way around? Because FOLL_SPLIT splits both the page and the pmd. FOLL_SPLIT_PMD only splits the pmd, so it is a subset of FOLL_SPLIT. When the user sets both, we should split both the page and the pmd. Thanks, Song
diff --git a/include/linux/mm.h b/include/linux/mm.h index f189176dabed..74db879711eb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2614,6 +2614,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_COW 0x4000 /* internal GUP flag */ #define FOLL_ANON 0x8000 /* don't do file mappings */ #define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ +#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ /* * NOTE on FOLL_LONGTERM: diff --git a/mm/gup.c b/mm/gup.c index 98f13ab37bac..c20afe800b3f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -399,7 +399,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, spin_unlock(ptl); return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); } - if (flags & FOLL_SPLIT) { + if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) { int ret; page = pmd_page(*pmd); if (is_huge_zero_page(page)) { @@ -408,7 +408,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, split_huge_pmd(vma, pmd, address); if (pmd_trans_unstable(pmd)) ret = -EBUSY; - } else { + } else if (flags & FOLL_SPLIT) { if (unlikely(!try_get_page(page))) { spin_unlock(ptl); return ERR_PTR(-ENOMEM); @@ -420,6 +420,10 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, put_page(page); if (pmd_none(*pmd)) return no_page_table(vma, flags); + } else { /* flags & FOLL_SPLIT_PMD */ + spin_unlock(ptl); + split_huge_pmd(vma, pmd, address); + ret = pte_alloc(mm, pmd) ? -ENOMEM : 0; } return ret ? ERR_PTR(ret) :