@@ -1556,10 +1556,10 @@ static void unmap_single_vma(struct mmu_gather *tlb,
if (end <= vma->vm_start)
return;
- if (vma->vm_file)
+ if (vma->vm_file && !(vma->vm_flags & VM_DEAD))
uprobe_munmap(vma, start, end);
- if (unlikely(vma->vm_flags & VM_PFNMAP))
+ if (unlikely(vma->vm_flags & VM_PFNMAP) && !(vma->vm_flags & VM_DEAD))
untrack_pfn(vma, 0, 0);
if (start != end) {
@@ -1577,7 +1577,19 @@ static void unmap_single_vma(struct mmu_gather *tlb,
*/
if (vma->vm_file) {
i_mmap_lock_write(vma->vm_file->f_mapping);
- __unmap_hugepage_range_final(tlb, vma, start, end, NULL);
+ if (vma->vm_flags & VM_DEAD)
+ /*
+ * The vma is being unmapped with read
+ * mmap_sem.
+ * Can't update vm_flags, it has been
+ * updated before with exclusive lock
+ * held.
+ */
+ __unmap_hugepage_range(tlb, vma, start,
+ end, NULL);
+ else
+ __unmap_hugepage_range_final(tlb, vma,
+ start, end, NULL);
i_mmap_unlock_write(vma->vm_file->f_mapping);
}
} else
@@ -2778,6 +2778,91 @@ static inline void munmap_mlock_vma(struct vm_area_struct *vma,
}
}
+/*
+ * Unmap large mapping early with acquiring read mmap_sem
+ *
+ * uf is the list for userfaultfd
+ */
+static int do_munmap_zap_early(struct mm_struct *mm, unsigned long start,
+ size_t len, struct list_head *uf)
+{
+ unsigned long end = 0;
+ struct vm_area_struct *start_vma = NULL, *prev, *vma;
+ int ret = 0;
+
+ if (!munmap_addr_sanity(start, len))
+ return -EINVAL;
+
+ len = PAGE_ALIGN(len);
+
+ end = start + len;
+
+ if (len >= PUD_SIZE) {
+ /*
+ * need write mmap_sem to split vma and set VM_DEAD flag
+ * splitting vma up-front to save PITA to clean if it is failed
+ */
+ if (down_write_killable(&mm->mmap_sem))
+ return -EINTR;
+
+ ret = munmap_lookup_vma(mm, &start_vma, &prev, start, end);
+ if (ret != 1)
+ goto out;
+
+ /* This ret value might be returned, so reset it */
+ ret = 0;
+
+ if (unlikely(uf)) {
+ ret = userfaultfd_unmap_prep(start_vma, start, end, uf);
+ if (ret)
+ goto out;
+ }
+
+ /* Handle mlocked vmas */
+ if (mm->locked_vm)
+ munmap_mlock_vma(start_vma, end);
+
+ /*
+ * set VM_DEAD flag before tear down them.
+ * page fault on VM_DEAD vma will trigger SIGSEGV.
+ *
+ * And, clear uprobe, VM_PFNMAP and hugetlb mapping in advance.
+ */
+ vma = start_vma;
+ for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
+ vma->vm_flags |= VM_DEAD;
+
+ if (vma->vm_file)
+ uprobe_munmap(vma, vma->vm_start, vma->vm_end);
+ if (unlikely(vma->vm_flags & VM_PFNMAP))
+ untrack_pfn(vma, 0, 0);
+ if (is_vm_hugetlb_page(vma))
+ vma->vm_flags &= ~VM_MAYSHARE;
+ }
+
+ downgrade_write(&mm->mmap_sem);
+
+ /* zap mappings with read mmap_sem */
+ zap_page_range(start_vma, start, len);
+ /* indicates early zap is success */
+ up_read(&mm->mmap_sem);
+ }
+
+ /* hold write mmap_sem for vma cleanup or regular path */
+ if (down_write_killable(&mm->mmap_sem))
+ return -EINTR;
+ /*
+ * call do_munmap() for vma cleanup too in order to not carry vma
+ * to here since the address space might be changed under our
+ * feet before we retake the exclusive lock.
+ */
+ ret = do_munmap(mm, start, len, uf);
+
+out:
+ up_write(&mm->mmap_sem);
+ return ret;
+}
+
/* Munmap is split into 2 main parts -- this part which finds
* what needs doing, and the areas themselves, which do the
* work. This now handles partial unmappings.
@@ -2836,6 +2921,17 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
return 0;
}
+static int vm_munmap_zap_early(unsigned long start, size_t len)
+{
+ int ret;
+ struct mm_struct *mm = current->mm;
+ LIST_HEAD(uf);
+
+ ret = do_munmap_zap_early(mm, start, len, &uf);
+ userfaultfd_unmap_complete(mm, &uf);
+ return ret;
+}
+
int vm_munmap(unsigned long start, size_t len)
{
int ret;
@@ -2855,10 +2951,13 @@ int vm_munmap(unsigned long start, size_t len)
SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
{
profile_munmap(addr);
+#ifdef CONFIG_64BIT
+ return vm_munmap_zap_early(addr, len);
+#else
return vm_munmap(addr, len);
+#endif
}
-
/*
* Emulation of deprecated remap_file_pages() syscall.
*/