@@ -1845,6 +1845,8 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor, unsigned long ceiling);
int
copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
+int
+mshare_copy_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
int follow_invalidate_pte(struct mm_struct *mm, unsigned long address,
struct mmu_notifier_range *range, pte_t **ptepp,
pmd_t **pmdpp, spinlock_t **ptlp);
@@ -1234,6 +1234,54 @@ copy_p4d_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
return 0;
}
+/*
+ * Copy PTEs for mshare'd pages.
+ * This code is based upon copy_page_range()
+ */
+int
+mshare_copy_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
+{
+ pgd_t *src_pgd, *dst_pgd;
+ unsigned long next;
+ unsigned long addr = src_vma->vm_start;
+ unsigned long end = src_vma->vm_end;
+ struct mm_struct *dst_mm = dst_vma->vm_mm;
+ struct mm_struct *src_mm = src_vma->vm_mm;
+ struct mmu_notifier_range range;
+ int ret = 0;
+
+ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
+ 0, src_vma, src_mm, addr, end);
+ mmu_notifier_invalidate_range_start(&range);
+ /*
+ * Disabling preemption is not needed for the write side, as
+ * the read side doesn't spin, but goes to the mmap_lock.
+ *
+ * Use the raw variant of the seqcount_t write API to avoid
+ * lockdep complaining about preemptibility.
+ */
+ mmap_assert_write_locked(src_mm);
+ raw_write_seqcount_begin(&src_mm->write_protect_seq);
+
+ dst_pgd = pgd_offset(dst_mm, addr);
+ src_pgd = pgd_offset(src_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(src_pgd))
+ continue;
+ if (unlikely(copy_p4d_range(dst_vma, src_vma, dst_pgd, src_pgd,
+ addr, next))) {
+ ret = -ENOMEM;
+ break;
+ }
+ } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+
+ raw_write_seqcount_end(&src_mm->write_protect_seq);
+ mmu_notifier_invalidate_range_end(&range);
+
+ return ret;
+}
+
int
copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
{
@@ -385,7 +385,6 @@ SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr,
* Copy this vma over to host mm
*/
vma->vm_private_data = info;
- vma->vm_mm = new_mm;
vma->vm_flags |= VM_SHARED_PT;
new_vma = vm_area_dup(vma);
if (!new_vma) {
@@ -394,6 +393,7 @@ SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr,
err = -ENOMEM;
goto free_info;
}
+ new_vma->vm_mm = new_mm;
err = insert_vm_struct(new_mm, new_vma);
if (err) {
mmap_write_unlock(new_mm);
@@ -402,17 +402,13 @@ SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr,
goto free_info;
}
+ /* Copy over current PTEs */
+ err = mshare_copy_ptes(new_vma, vma);
+ if (err != 0)
+ goto free_info;
vma = vma->vm_next;
}
- /*
- * Copy over current PTEs
- */
- myaddr = addr;
- while (myaddr < new_mm->task_size) {
- *pgd_offset(new_mm, myaddr) = *pgd_offset(old_mm, myaddr);
- myaddr += PGDIR_SIZE;
- }
/*
* TODO: Free the corresponding page table in calling
* process
VMAs for shared addresses are hosted by a separate host mm. Copy over original PTEs from the donor process to host mm so the PTEs are maintained independent of donor process. Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com> --- include/linux/mm.h | 2 ++ mm/memory.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++ mm/mshare.c | 14 +++++--------- 3 files changed, 55 insertions(+), 9 deletions(-)