Message ID | 1b0a0e8c9558766f13a64ae93092eb8c9ea7965d.1656531090.git.khalid.aziz@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add support for shared PTEs across processes | expand |
On Wed, Jun 29, 2022 at 04:53:58PM -0600, Khalid Aziz wrote: > Number of mappings of an mshare region should be tracked so it can > be removed when there are no more references to it and associated > file has been deleted. This add code to support the unlink operation > for associated file, remove the mshare region on file deletion if > refcount goes to zero, add munmap operation to maintain refcount > to mshare region and remove it on last munmap if file has been > deleted. > > Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com> > --- > mm/mshare.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 63 insertions(+), 1 deletion(-) > > diff --git a/mm/mshare.c b/mm/mshare.c > index 088a6cab1e93..90ce0564a138 100644 > --- a/mm/mshare.c > +++ b/mm/mshare.c > @@ -29,6 +29,7 @@ static struct super_block *msharefs_sb; > struct mshare_data { > struct mm_struct *mm; > refcount_t refcnt; > + int deleted; > struct mshare_info *minfo; > }; > > @@ -48,6 +49,7 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov) > size_t ret; > struct mshare_info m_info; > > + mmap_read_lock(info->mm); > if (info->minfo != NULL) { > m_info.start = info->minfo->start; > m_info.size = info->minfo->size; > @@ -55,18 +57,42 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov) > m_info.start = 0; > m_info.size = 0; > } > + mmap_read_unlock(info->mm); > ret = copy_to_iter(&m_info, sizeof(m_info), iov); > if (!ret) > return -EFAULT; > return ret; > } > > +static void > +msharefs_close(struct vm_area_struct *vma) > +{ > + struct mshare_data *info = vma->vm_private_data; > + > + if (refcount_dec_and_test(&info->refcnt)) { > + mmap_read_lock(info->mm); > + if (info->deleted) { > + mmap_read_unlock(info->mm); > + mmput(info->mm); > + kfree(info->minfo); > + kfree(info); Aren't filesystems supposed to take care of disposing of the file data in destroy_inode? IIRC struct inode doesn't go away until all fds are closed, mappings are torn down, and there are no more references from dentries. I could be misremembering since it's been a few months since I went looking at the (VFS) inode lifecycle. > + } else { > + mmap_read_unlock(info->mm); > + } > + } > +} > + > +static const struct vm_operations_struct msharefs_vm_ops = { > + .close = msharefs_close, > +}; > + > static int > msharefs_mmap(struct file *file, struct vm_area_struct *vma) > { > struct mshare_data *info = file->private_data; > struct mm_struct *mm = info->mm; > > + mmap_write_lock(mm); > /* > * If this mshare region has been set up once already, bail out > */ > @@ -80,10 +106,14 @@ msharefs_mmap(struct file *file, struct vm_area_struct *vma) > mm->task_size = vma->vm_end - vma->vm_start; > if (!mm->task_size) > mm->task_size--; > + mmap_write_unlock(mm); > info->minfo->start = mm->mmap_base; > info->minfo->size = mm->task_size; > + info->deleted = 0; > + refcount_inc(&info->refcnt); > vma->vm_flags |= VM_SHARED_PT; > vma->vm_private_data = info; > + vma->vm_ops = &msharefs_vm_ops; > return 0; > } > > @@ -240,6 +270,38 @@ msharefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, > return ret; > } > > +static int > +msharefs_unlink(struct inode *dir, struct dentry *dentry) > +{ > + struct inode *inode = d_inode(dentry); > + struct mshare_data *info = inode->i_private; > + > + /* > + * Unmap the mshare region if it is still mapped in > + */ > + vm_munmap(info->minfo->start, info->minfo->size); > + > + /* > + * Mark msharefs file for deletion so it can not be opened > + * and used for mshare mappings any more > + */ > + simple_unlink(dir, dentry); > + mmap_write_lock(info->mm); > + info->deleted = 1; > + mmap_write_unlock(info->mm); What if the file is hardlinked? --D > + > + /* > + * Is this the last reference? If so, delete mshare region and > + * remove the file > + */ > + if (!refcount_dec_and_test(&info->refcnt)) { > + mmput(info->mm); > + kfree(info->minfo); > + kfree(info); > + } > + return 0; > +} > + > static const struct inode_operations msharefs_file_inode_ops = { > .setattr = simple_setattr, > .getattr = simple_getattr, > @@ -248,7 +310,7 @@ static const struct inode_operations msharefs_dir_inode_ops = { > .create = msharefs_create, > .lookup = simple_lookup, > .link = simple_link, > - .unlink = simple_unlink, > + .unlink = msharefs_unlink, > .mkdir = msharefs_mkdir, > .rmdir = simple_rmdir, > .mknod = msharefs_mknod, > -- > 2.32.0 >
On 6/30/22 15:50, Darrick J. Wong wrote: > On Wed, Jun 29, 2022 at 04:53:58PM -0600, Khalid Aziz wrote: >> Number of mappings of an mshare region should be tracked so it can >> be removed when there are no more references to it and associated >> file has been deleted. This add code to support the unlink operation >> for associated file, remove the mshare region on file deletion if >> refcount goes to zero, add munmap operation to maintain refcount >> to mshare region and remove it on last munmap if file has been >> deleted. >> >> Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com> >> --- >> mm/mshare.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 63 insertions(+), 1 deletion(-) >> >> diff --git a/mm/mshare.c b/mm/mshare.c >> index 088a6cab1e93..90ce0564a138 100644 >> --- a/mm/mshare.c >> +++ b/mm/mshare.c >> @@ -29,6 +29,7 @@ static struct super_block *msharefs_sb; >> struct mshare_data { >> struct mm_struct *mm; >> refcount_t refcnt; >> + int deleted; >> struct mshare_info *minfo; >> }; >> >> @@ -48,6 +49,7 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov) >> size_t ret; >> struct mshare_info m_info; >> >> + mmap_read_lock(info->mm); >> if (info->minfo != NULL) { >> m_info.start = info->minfo->start; >> m_info.size = info->minfo->size; >> @@ -55,18 +57,42 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov) >> m_info.start = 0; >> m_info.size = 0; >> } >> + mmap_read_unlock(info->mm); >> ret = copy_to_iter(&m_info, sizeof(m_info), iov); >> if (!ret) >> return -EFAULT; >> return ret; >> } >> >> +static void >> +msharefs_close(struct vm_area_struct *vma) >> +{ >> + struct mshare_data *info = vma->vm_private_data; >> + >> + if (refcount_dec_and_test(&info->refcnt)) { >> + mmap_read_lock(info->mm); >> + if (info->deleted) { >> + mmap_read_unlock(info->mm); >> + mmput(info->mm); >> + kfree(info->minfo); >> + kfree(info); > > Aren't filesystems supposed to take care of disposing of the file data > in destroy_inode? IIRC struct inode doesn't go away until all fds are > closed, mappings are torn down, and there are no more references from > dentries. I could be misremembering since it's been a few months since > I went looking at the (VFS) inode lifecycle. Documentation (vfs.rst) says - "this method is called by destroy_inode() to release resources allocated for struct inode. It is only required if ->alloc_inode was defined and simply undoes anything done by ->alloc_inode.". I am not defining alloc_inode, so I assumed I do not need to define destroy_inode and the standard destroy_inode will do the right thing since standard alloc_inode is being used. Are you suggesting per-region mshare_data should be freed in destroy_inode instead of in close? > >> + } else { >> + mmap_read_unlock(info->mm); >> + } >> + } >> +} >> + >> +static const struct vm_operations_struct msharefs_vm_ops = { >> + .close = msharefs_close, >> +}; >> + >> static int >> msharefs_mmap(struct file *file, struct vm_area_struct *vma) >> { >> struct mshare_data *info = file->private_data; >> struct mm_struct *mm = info->mm; >> >> + mmap_write_lock(mm); >> /* >> * If this mshare region has been set up once already, bail out >> */ >> @@ -80,10 +106,14 @@ msharefs_mmap(struct file *file, struct vm_area_struct *vma) >> mm->task_size = vma->vm_end - vma->vm_start; >> if (!mm->task_size) >> mm->task_size--; >> + mmap_write_unlock(mm); >> info->minfo->start = mm->mmap_base; >> info->minfo->size = mm->task_size; >> + info->deleted = 0; >> + refcount_inc(&info->refcnt); >> vma->vm_flags |= VM_SHARED_PT; >> vma->vm_private_data = info; >> + vma->vm_ops = &msharefs_vm_ops; >> return 0; >> } >> >> @@ -240,6 +270,38 @@ msharefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, >> return ret; >> } >> >> +static int >> +msharefs_unlink(struct inode *dir, struct dentry *dentry) >> +{ >> + struct inode *inode = d_inode(dentry); >> + struct mshare_data *info = inode->i_private; >> + >> + /* >> + * Unmap the mshare region if it is still mapped in >> + */ >> + vm_munmap(info->minfo->start, info->minfo->size); >> + >> + /* >> + * Mark msharefs file for deletion so it can not be opened >> + * and used for mshare mappings any more >> + */ >> + simple_unlink(dir, dentry); >> + mmap_write_lock(info->mm); >> + info->deleted = 1; >> + mmap_write_unlock(info->mm); > > What if the file is hardlinked? It looks like that is a bug currently. I need to account for that. Thanks! -- Khalid
diff --git a/mm/mshare.c b/mm/mshare.c index 088a6cab1e93..90ce0564a138 100644 --- a/mm/mshare.c +++ b/mm/mshare.c @@ -29,6 +29,7 @@ static struct super_block *msharefs_sb; struct mshare_data { struct mm_struct *mm; refcount_t refcnt; + int deleted; struct mshare_info *minfo; }; @@ -48,6 +49,7 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov) size_t ret; struct mshare_info m_info; + mmap_read_lock(info->mm); if (info->minfo != NULL) { m_info.start = info->minfo->start; m_info.size = info->minfo->size; @@ -55,18 +57,42 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov) m_info.start = 0; m_info.size = 0; } + mmap_read_unlock(info->mm); ret = copy_to_iter(&m_info, sizeof(m_info), iov); if (!ret) return -EFAULT; return ret; } +static void +msharefs_close(struct vm_area_struct *vma) +{ + struct mshare_data *info = vma->vm_private_data; + + if (refcount_dec_and_test(&info->refcnt)) { + mmap_read_lock(info->mm); + if (info->deleted) { + mmap_read_unlock(info->mm); + mmput(info->mm); + kfree(info->minfo); + kfree(info); + } else { + mmap_read_unlock(info->mm); + } + } +} + +static const struct vm_operations_struct msharefs_vm_ops = { + .close = msharefs_close, +}; + static int msharefs_mmap(struct file *file, struct vm_area_struct *vma) { struct mshare_data *info = file->private_data; struct mm_struct *mm = info->mm; + mmap_write_lock(mm); /* * If this mshare region has been set up once already, bail out */ @@ -80,10 +106,14 @@ msharefs_mmap(struct file *file, struct vm_area_struct *vma) mm->task_size = vma->vm_end - vma->vm_start; if (!mm->task_size) mm->task_size--; + mmap_write_unlock(mm); info->minfo->start = mm->mmap_base; info->minfo->size = mm->task_size; + info->deleted = 0; + refcount_inc(&info->refcnt); vma->vm_flags |= VM_SHARED_PT; vma->vm_private_data = info; + vma->vm_ops = &msharefs_vm_ops; return 0; } @@ -240,6 +270,38 @@ msharefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, return ret; } +static int +msharefs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + struct mshare_data *info = inode->i_private; + + /* + * Unmap the mshare region if it is still mapped in + */ + vm_munmap(info->minfo->start, info->minfo->size); + + /* + * Mark msharefs file for deletion so it can not be opened + * and used for mshare mappings any more + */ + simple_unlink(dir, dentry); + mmap_write_lock(info->mm); + info->deleted = 1; + mmap_write_unlock(info->mm); + + /* + * Is this the last reference? If so, delete mshare region and + * remove the file + */ + if (!refcount_dec_and_test(&info->refcnt)) { + mmput(info->mm); + kfree(info->minfo); + kfree(info); + } + return 0; +} + static const struct inode_operations msharefs_file_inode_ops = { .setattr = simple_setattr, .getattr = simple_getattr, @@ -248,7 +310,7 @@ static const struct inode_operations msharefs_dir_inode_ops = { .create = msharefs_create, .lookup = simple_lookup, .link = simple_link, - .unlink = simple_unlink, + .unlink = msharefs_unlink, .mkdir = msharefs_mkdir, .rmdir = simple_rmdir, .mknod = msharefs_mknod,
Number of mappings of an mshare region should be tracked so it can be removed when there are no more references to it and associated file has been deleted. This add code to support the unlink operation for associated file, remove the mshare region on file deletion if refcount goes to zero, add munmap operation to maintain refcount to mshare region and remove it on last munmap if file has been deleted. Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com> --- mm/mshare.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-)