Message ID | 631001ecc556c5e348ff4f47719334c31f7bd592.1681547405.git.lstoakes@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | remove the vmas parameter from GUP APIs | expand |
On 2023/04/15 18:08, Lorenzo Stoakes wrote: > @@ -475,10 +474,14 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, > gup_flags |= FOLL_SPLIT_PMD; > /* Read the page with vaddr into memory */ > ret = get_user_pages_remote(mm, vaddr, 1, gup_flags, > - &old_page, &vma, NULL); > + &old_page, NULL); > if (ret <= 0) > return ret; > > + vma = vma_lookup(mm, vaddr); > + if (!vma) > + goto put_old; > + > ret = verify_opcode(old_page, vaddr, &opcode); > if (ret <= 0) > goto put_old; This conversion looks wrong. This causes returning a positive number when vma_lookup() returned NULL. * Return 0 (success) or a negative errno.
On Sat, Apr 15, 2023 at 06:52:41PM +0900, Tetsuo Handa wrote: > On 2023/04/15 18:08, Lorenzo Stoakes wrote: > > @@ -475,10 +474,14 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, > > gup_flags |= FOLL_SPLIT_PMD; > > /* Read the page with vaddr into memory */ > > ret = get_user_pages_remote(mm, vaddr, 1, gup_flags, > > - &old_page, &vma, NULL); > > + &old_page, NULL); > > if (ret <= 0) > > return ret; > > > > + vma = vma_lookup(mm, vaddr); > > + if (!vma) > > + goto put_old; > > + > > ret = verify_opcode(old_page, vaddr, &opcode); > > if (ret <= 0) > > goto put_old; > > This conversion looks wrong. > This causes returning a positive number when vma_lookup() returned NULL. > > * Return 0 (success) or a negative errno. > In reality it shouldn't be possible for vma to return NULL, I'm adding the checks to be extra careful. In any case you're right, attaching a -fix patch to avoid spam:- ----8<---- From 0710d01ea69ad4e846fa1e56a40b253ff59714ac Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes <lstoakes@gmail.com> Date: Sat, 15 Apr 2023 11:09:15 +0100 Subject: [PATCH] mm/gup: remove vmas parameter from get_user_pages_remote() Correct vma NULL check as indicated by Tetsuo. Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com> --- kernel/events/uprobes.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index b21993cd2dcc..affe42a13eff 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -479,8 +479,10 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, return ret; vma = vma_lookup(mm, vaddr); - if (!vma) + if (!vma) { + ret = -EINVAL; goto put_old; + } ret = verify_opcode(old_page, vaddr, &opcode); if (ret <= 0)
On 2023/04/15 19:14, Lorenzo Stoakes wrote: > On Sat, Apr 15, 2023 at 06:52:41PM +0900, Tetsuo Handa wrote: >> On 2023/04/15 18:08, Lorenzo Stoakes wrote: >>> @@ -475,10 +474,14 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, >>> gup_flags |= FOLL_SPLIT_PMD; >>> /* Read the page with vaddr into memory */ >>> ret = get_user_pages_remote(mm, vaddr, 1, gup_flags, >>> - &old_page, &vma, NULL); >>> + &old_page, NULL); >>> if (ret <= 0) >>> return ret; >>> >>> + vma = vma_lookup(mm, vaddr); >>> + if (!vma) >>> + goto put_old; >>> + >>> ret = verify_opcode(old_page, vaddr, &opcode); >>> if (ret <= 0) >>> goto put_old; >> >> This conversion looks wrong. >> This causes returning a positive number when vma_lookup() returned NULL. >> >> * Return 0 (success) or a negative errno. >> > > In reality it shouldn't be possible for vma to return NULL, I'm adding the > checks to be extra careful. > > In any case you're right, attaching a -fix patch to avoid spam:- If you want to return -EINVAL when vma_lookup() returned NULL for whatever unexpected reason, returning -EOPNOTSUPP in below path looks strange. > @@ -448,7 +448,8 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, > * would cause the existing tags to be cleared if the page > * was never mapped with PROT_MTE. > */ > - if (!(vma->vm_flags & VM_MTE)) { > + vma = vma_lookup(mm, addr); > + if (!vma || !(vma->vm_flags & VM_MTE)) { > ret = -EOPNOTSUPP; > put_page(page); > break; Also, > @@ -5591,7 +5591,9 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, > struct page *page = NULL; > > ret = get_user_pages_remote(mm, addr, 1, > - gup_flags, &page, &vma, NULL); > + gup_flags, &page, NULL); > + vma = vma_lookup(mm, addr); > + > if (ret <= 0) { > #ifndef CONFIG_HAVE_IOREMAP_PROT > break; > @@ -5600,7 +5602,6 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, > * Check if this is a VM_IO | VM_PFNMAP VMA, which > * we can access using slightly different code. > */ > - vma = vma_lookup(mm, addr); > if (!vma) > break; > if (vma->vm_ops && vma->vm_ops->access) > @@ -5617,11 +5618,11 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, > bytes = PAGE_SIZE-offset; > > maddr = kmap(page); > - if (write) { > + if (write && vma) { > copy_to_user_page(vma, page, addr, > maddr + offset, buf, bytes); > set_page_dirty_lock(page); > - } else { > + } else if (vma) { > copy_from_user_page(vma, page, addr, > buf, maddr + offset, bytes); > } not calling copy_{from,to}_user_page() if vma == NULL is not sufficient for propagating an error to caller.
On Sat, Apr 15, 2023 at 07:36:06PM +0900, Tetsuo Handa wrote: > On 2023/04/15 19:14, Lorenzo Stoakes wrote: > > On Sat, Apr 15, 2023 at 06:52:41PM +0900, Tetsuo Handa wrote: > >> On 2023/04/15 18:08, Lorenzo Stoakes wrote: > >>> @@ -475,10 +474,14 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, > >>> gup_flags |= FOLL_SPLIT_PMD; > >>> /* Read the page with vaddr into memory */ > >>> ret = get_user_pages_remote(mm, vaddr, 1, gup_flags, > >>> - &old_page, &vma, NULL); > >>> + &old_page, NULL); > >>> if (ret <= 0) > >>> return ret; > >>> > >>> + vma = vma_lookup(mm, vaddr); > >>> + if (!vma) > >>> + goto put_old; > >>> + > >>> ret = verify_opcode(old_page, vaddr, &opcode); > >>> if (ret <= 0) > >>> goto put_old; > >> > >> This conversion looks wrong. > >> This causes returning a positive number when vma_lookup() returned NULL. > >> > >> * Return 0 (success) or a negative errno. > >> > > > > In reality it shouldn't be possible for vma to return NULL, I'm adding the > > checks to be extra careful. > > > > In any case you're right, attaching a -fix patch to avoid spam:- > > If you want to return -EINVAL when vma_lookup() returned NULL for whatever > unexpected reason, returning -EOPNOTSUPP in below path looks strange. > This feels a little pedantic, this is not a condition that is expected to occur in practice, I'm not sure users will be writing code to differentiate between the two, and certainly vma being NULL implies MTE is not supported. To differentiate with minimal churn, I'll add a WARN_ON_ONCE() here and in each other case where an impossible condition arises as it would be indicative of a kernel bug. > > @@ -448,7 +448,8 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, > > * would cause the existing tags to be cleared if the page > > * was never mapped with PROT_MTE. > > */ > > - if (!(vma->vm_flags & VM_MTE)) { > > + vma = vma_lookup(mm, addr); > > + if (!vma || !(vma->vm_flags & VM_MTE)) { > > ret = -EOPNOTSUPP; > > put_page(page); > > break; > > Also, > > > @@ -5591,7 +5591,9 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, > > struct page *page = NULL; > > > > ret = get_user_pages_remote(mm, addr, 1, > > - gup_flags, &page, &vma, NULL); > > + gup_flags, &page, NULL); > > + vma = vma_lookup(mm, addr); > > + > > if (ret <= 0) { > > #ifndef CONFIG_HAVE_IOREMAP_PROT > > break; > > @@ -5600,7 +5602,6 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, > > * Check if this is a VM_IO | VM_PFNMAP VMA, which > > * we can access using slightly different code. > > */ > > - vma = vma_lookup(mm, addr); > > if (!vma) > > break; > > if (vma->vm_ops && vma->vm_ops->access) > > @@ -5617,11 +5618,11 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, > > bytes = PAGE_SIZE-offset; > > > > maddr = kmap(page); > > - if (write) { > > + if (write && vma) { > > copy_to_user_page(vma, page, addr, > > maddr + offset, buf, bytes); > > set_page_dirty_lock(page); > > - } else { > > + } else if (vma) { > > copy_from_user_page(vma, page, addr, > > buf, maddr + offset, bytes); > > } > > not calling copy_{from,to}_user_page() if vma == NULL is not sufficient for > propagating an error to caller. > This is a product of wanting to avoid churn, again this condition is simply impossible. Also as a pedantic side note - the loop explicitly indicates no errors are propagated, so there is no need to do so. However, I want to be consistent with how I handle this and also I think it's sensible to add warnings for violation of this 'impossible' condition so I'll add a branch for it. Since I'd end up confusingly fixing up a fix-patch (and I want to change another patch in series to be consistent), I'll do a respin, apologies for spam in advance...
On 2023/04/15 20:27, Lorenzo Stoakes wrote: >>> @@ -5617,11 +5618,11 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, >>> bytes = PAGE_SIZE-offset; >>> >>> maddr = kmap(page); >>> - if (write) { >>> + if (write && vma) { >>> copy_to_user_page(vma, page, addr, >>> maddr + offset, buf, bytes); >>> set_page_dirty_lock(page); >>> - } else { >>> + } else if (vma) { >>> copy_from_user_page(vma, page, addr, >>> buf, maddr + offset, bytes); >>> } >> >> not calling copy_{from,to}_user_page() if vma == NULL is not sufficient for >> propagating an error to caller. >> > > This is a product of wanting to avoid churn, again this condition is simply > impossible. Also as a pedantic side note - the loop explicitly indicates no > errors are propagated, so there is no need to do so. Since __access_remote_vm() implicitly indicates an error via "return buf - old_buf;", "buf += bytes;" must not be executed if copy_{to,from}_user_page(bytes) was not called.
On Sat, Apr 15, 2023 at 08:40:44PM +0900, Tetsuo Handa wrote: > On 2023/04/15 20:27, Lorenzo Stoakes wrote: > >>> @@ -5617,11 +5618,11 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, > >>> bytes = PAGE_SIZE-offset; > >>> > >>> maddr = kmap(page); > >>> - if (write) { > >>> + if (write && vma) { > >>> copy_to_user_page(vma, page, addr, > >>> maddr + offset, buf, bytes); > >>> set_page_dirty_lock(page); > >>> - } else { > >>> + } else if (vma) { > >>> copy_from_user_page(vma, page, addr, > >>> buf, maddr + offset, bytes); > >>> } > >> > >> not calling copy_{from,to}_user_page() if vma == NULL is not sufficient for > >> propagating an error to caller. > >> > > > > This is a product of wanting to avoid churn, again this condition is simply > > impossible. Also as a pedantic side note - the loop explicitly indicates no > > errors are propagated, so there is no need to do so. > > Since __access_remote_vm() implicitly indicates an error via "return buf - old_buf;", > "buf += bytes;" must not be executed if copy_{to,from}_user_page(bytes) was not called. > > Yes, indeed, perhaps I wasn't clear, ack that we shouldn't increment buf if !vma, I already fixed this in the respin and additionally have added a warning here and in every instance of unexpected !vma. Will spam everybody with v3 once the allmodconfig build is complete... :)
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index f5bcb0dc6267..d43a744d7919 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -419,7 +419,6 @@ long get_mte_ctrl(struct task_struct *task) static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, struct iovec *kiov, unsigned int gup_flags) { - struct vm_area_struct *vma; void __user *buf = kiov->iov_base; size_t len = kiov->iov_len; int ret; @@ -432,12 +431,13 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, return -EIO; while (len) { + struct vm_area_struct *vma; unsigned long tags, offset; void *maddr; struct page *page = NULL; ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page, - &vma, NULL); + NULL); if (ret <= 0) break; @@ -448,7 +448,8 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, * would cause the existing tags to be cleared if the page * was never mapped with PROT_MTE. */ - if (!(vma->vm_flags & VM_MTE)) { + vma = vma_lookup(mm, addr); + if (!vma || !(vma->vm_flags & VM_MTE)) { ret = -EOPNOTSUPP; put_page(page); break; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9250fde1f97d..c19d0cb7d2f2 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2777,7 +2777,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr) mmap_read_lock(kvm->mm); get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE, - &page, NULL, NULL); + &page, NULL); mmap_read_unlock(kvm->mm); return page; } diff --git a/fs/exec.c b/fs/exec.c index 87cf3a2f0e9a..d8d48ee15aac 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -219,7 +219,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, */ mmap_read_lock(bprm->mm); ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags, - &page, NULL, NULL); + &page, NULL); mmap_read_unlock(bprm->mm); if (ret <= 0) return NULL; diff --git a/include/linux/mm.h b/include/linux/mm.h index ec9875c59f6d..1bfe73a2b6d3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2374,7 +2374,7 @@ extern int __access_remote_vm(struct mm_struct *mm, unsigned long addr, long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, - struct vm_area_struct **vmas, int *locked); + int *locked); long pin_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 59887c69d54c..b21993cd2dcc 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -365,7 +365,6 @@ __update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d) { void *kaddr; struct page *page; - struct vm_area_struct *vma; int ret; short *ptr; @@ -373,7 +372,7 @@ __update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d) return -EINVAL; ret = get_user_pages_remote(mm, vaddr, 1, - FOLL_WRITE, &page, &vma, NULL); + FOLL_WRITE, &page, NULL); if (unlikely(ret <= 0)) { /* * We are asking for 1 page. If get_user_pages_remote() fails, @@ -475,10 +474,14 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, gup_flags |= FOLL_SPLIT_PMD; /* Read the page with vaddr into memory */ ret = get_user_pages_remote(mm, vaddr, 1, gup_flags, - &old_page, &vma, NULL); + &old_page, NULL); if (ret <= 0) return ret; + vma = vma_lookup(mm, vaddr); + if (!vma) + goto put_old; + ret = verify_opcode(old_page, vaddr, &opcode); if (ret <= 0) goto put_old; @@ -2027,8 +2030,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) * but we treat this as a 'remote' access since it is * essentially a kernel access to the memory. */ - result = get_user_pages_remote(mm, vaddr, 1, FOLL_FORCE, &page, - NULL, NULL); + result = get_user_pages_remote(mm, vaddr, 1, FOLL_FORCE, &page, NULL); if (result < 0) return result; diff --git a/mm/gup.c b/mm/gup.c index 931c805bc32b..9440aa54c741 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2165,8 +2165,6 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas, * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. Or NULL, if caller * only intends to ensure the pages are faulted in. - * @vmas: array of pointers to vmas corresponding to each page. - * Or NULL if the caller does not require them. * @locked: pointer to lock flag indicating whether lock is held and * subsequently whether VM_FAULT_RETRY functionality can be * utilised. Lock must initially be held. @@ -2181,8 +2179,6 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas, * * The caller is responsible for releasing returned @pages, via put_page(). * - * @vmas are valid only as long as mmap_lock is held. - * * Must be called with mmap_lock held for read or write. * * get_user_pages_remote walks a process's page tables and takes a reference @@ -2219,15 +2215,15 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas, long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, - struct vm_area_struct **vmas, int *locked) + int *locked) { int local_locked = 1; - if (!is_valid_gup_args(pages, vmas, locked, &gup_flags, + if (!is_valid_gup_args(pages, NULL, locked, &gup_flags, FOLL_TOUCH | FOLL_REMOTE)) return -EINVAL; - return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, + return __get_user_pages_locked(mm, start, nr_pages, pages, NULL, locked ? locked : &local_locked, gup_flags); } @@ -2237,7 +2233,7 @@ EXPORT_SYMBOL(get_user_pages_remote); long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, - struct vm_area_struct **vmas, int *locked) + int *locked) { return 0; } diff --git a/mm/memory.c b/mm/memory.c index 8ddb10199e8d..913e693322f2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5591,7 +5591,9 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, struct page *page = NULL; ret = get_user_pages_remote(mm, addr, 1, - gup_flags, &page, &vma, NULL); + gup_flags, &page, NULL); + vma = vma_lookup(mm, addr); + if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT break; @@ -5600,7 +5602,6 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, * Check if this is a VM_IO | VM_PFNMAP VMA, which * we can access using slightly different code. */ - vma = vma_lookup(mm, addr); if (!vma) break; if (vma->vm_ops && vma->vm_ops->access) @@ -5617,11 +5618,11 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, bytes = PAGE_SIZE-offset; maddr = kmap(page); - if (write) { + if (write && vma) { copy_to_user_page(vma, page, addr, maddr + offset, buf, bytes); set_page_dirty_lock(page); - } else { + } else if (vma) { copy_from_user_page(vma, page, addr, buf, maddr + offset, bytes); } diff --git a/mm/rmap.c b/mm/rmap.c index ba901c416785..756ea8a9bb90 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2324,7 +2324,7 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, npages = get_user_pages_remote(mm, start, npages, FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD, - pages, NULL, NULL); + pages, NULL); if (npages < 0) return npages; diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 31af29f669d2..ac20c0bdff9d 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -916,7 +916,7 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, */ mmap_read_lock(bprm->mm); ret = get_user_pages_remote(bprm->mm, pos, 1, - FOLL_FORCE, &page, NULL, NULL); + FOLL_FORCE, &page, NULL); mmap_read_unlock(bprm->mm); if (ret <= 0) return false; diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 9bfe1d6f6529..e033c79d528e 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -61,8 +61,7 @@ static void async_pf_execute(struct work_struct *work) * access remotely. */ mmap_read_lock(mm); - get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, NULL, - &locked); + get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked); if (locked) mmap_read_unlock(mm);
The only instances of get_user_pages_remote() invocations which used the vmas parameter were for a single page which can instead simply look up the VMA directly. In particular:- - __update_ref_ctr() looked up the VMA but did nothing with it so we simply remove it. - __access_remote_vm() was already using vma_lookup() when the original lookup failed so by doing the lookup directly this also de-duplicates the code. This forms part of a broader set of patches intended to eliminate the vmas parameter altogether. Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com> --- arch/arm64/kernel/mte.c | 7 ++++--- arch/s390/kvm/interrupt.c | 2 +- fs/exec.c | 2 +- include/linux/mm.h | 2 +- kernel/events/uprobes.c | 12 +++++++----- mm/gup.c | 12 ++++-------- mm/memory.c | 9 +++++---- mm/rmap.c | 2 +- security/tomoyo/domain.c | 2 +- virt/kvm/async_pf.c | 3 +-- 10 files changed, 26 insertions(+), 27 deletions(-)