Message ID | 05649b455e2191642e85cc5522ef39ad49fdeca3.1656531090.git.khalid.aziz@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add support for shared PTEs across processes | expand |
On Wed, Jun 29, 2022 at 04:53:55PM -0600, Khalid Aziz wrote: > When a new file is created under msharefs, allocate a new mm_struct > that will hold the VMAs for mshare region. Also allocate structure > to defines the mshare region and add a read operation to the file > that returns this information about the mshare region. Currently > this information is returned as a struct: > > struct mshare_info { > unsigned long start; > unsigned long size; > }; > > This gives the start address for mshare region and its size. > > Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com> > --- > include/uapi/linux/mman.h | 5 +++ > mm/mshare.c | 64 ++++++++++++++++++++++++++++++++++++++- > 2 files changed, 68 insertions(+), 1 deletion(-) > > diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h > index f55bc680b5b0..56fe446e24b1 100644 > --- a/include/uapi/linux/mman.h > +++ b/include/uapi/linux/mman.h > @@ -41,4 +41,9 @@ > #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB > #define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB > > +struct mshare_info { > + unsigned long start; > + unsigned long size; You might want to make these explicitly u64, since this is userspace ABI and you never know when someone will want to do something crazy like run 32-bit programs with mshare files. Also you might want to add some padding fields for flags, future expansion, etc. > +}; > + > #endif /* _UAPI_LINUX_MMAN_H */ > diff --git a/mm/mshare.c b/mm/mshare.c > index 2d5924d39221..d238b68b0576 100644 > --- a/mm/mshare.c > +++ b/mm/mshare.c > @@ -22,8 +22,14 @@ > #include <uapi/linux/magic.h> > #include <uapi/linux/limits.h> > #include <uapi/linux/mman.h> > +#include <linux/sched/mm.h> > > static struct super_block *msharefs_sb; > +struct mshare_data { > + struct mm_struct *mm; > + refcount_t refcnt; > + struct mshare_info *minfo; > +}; > > static const struct inode_operations msharefs_dir_inode_ops; > static const struct inode_operations msharefs_file_inode_ops; > @@ -34,8 +40,29 @@ msharefs_open(struct inode *inode, struct file *file) > return simple_open(inode, file); > } > > +static ssize_t > +msharefs_read(struct kiocb *iocb, struct iov_iter *iov) > +{ > + struct mshare_data *info = iocb->ki_filp->private_data; > + size_t ret; > + struct mshare_info m_info; > + > + if (info->minfo != NULL) { > + m_info.start = info->minfo->start; > + m_info.size = info->minfo->size; > + } else { > + m_info.start = 0; > + m_info.size = 0; Hmmm, read()ing out the shared mapping information. Heh. When does this case happen? Is it before anybody mmaps this file into an address space? > + } > + ret = copy_to_iter(&m_info, sizeof(m_info), iov); > + if (!ret) > + return -EFAULT; > + return ret; > +} > + > static const struct file_operations msharefs_file_operations = { > .open = msharefs_open, > + .read_iter = msharefs_read, > .llseek = no_llseek, > }; > > @@ -73,12 +100,43 @@ static struct dentry > return ERR_PTR(-ENOMEM); > } > > +static int > +msharefs_fill_mm(struct inode *inode) > +{ > + struct mm_struct *mm; > + struct mshare_data *info = NULL; > + int retval = 0; > + > + mm = mm_alloc(); > + if (!mm) { > + retval = -ENOMEM; > + goto err_free; > + } > + > + info = kzalloc(sizeof(*info), GFP_KERNEL); > + if (!info) { > + retval = -ENOMEM; > + goto err_free; > + } > + info->mm = mm; > + info->minfo = NULL; > + refcount_set(&info->refcnt, 1); > + inode->i_private = info; > + > + return 0; > + > +err_free: > + if (mm) > + mmput(mm); > + kfree(info); > + return retval; > +} > + > static struct inode > *msharefs_get_inode(struct super_block *sb, const struct inode *dir, > umode_t mode) > { > struct inode *inode = new_inode(sb); > - > if (inode) { > inode->i_ino = get_next_ino(); > inode_init_owner(&init_user_ns, inode, dir, mode); > @@ -89,6 +147,10 @@ static struct inode > case S_IFREG: > inode->i_op = &msharefs_file_inode_ops; > inode->i_fop = &msharefs_file_operations; > + if (msharefs_fill_mm(inode) != 0) { > + discard_new_inode(inode); > + inode = ERR_PTR(-ENOMEM); Is it intentional to clobber the msharefs_fill_mm return value and replace it with ENOMEM? --D > + } > break; > case S_IFDIR: > inode->i_op = &msharefs_dir_inode_ops; > -- > 2.32.0 >
On 6/30/22 15:27, Darrick J. Wong wrote: > On Wed, Jun 29, 2022 at 04:53:55PM -0600, Khalid Aziz wrote: >> When a new file is created under msharefs, allocate a new mm_struct >> that will hold the VMAs for mshare region. Also allocate structure >> to defines the mshare region and add a read operation to the file >> that returns this information about the mshare region. Currently >> this information is returned as a struct: >> >> struct mshare_info { >> unsigned long start; >> unsigned long size; >> }; >> >> This gives the start address for mshare region and its size. >> >> Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com> >> --- >> include/uapi/linux/mman.h | 5 +++ >> mm/mshare.c | 64 ++++++++++++++++++++++++++++++++++++++- >> 2 files changed, 68 insertions(+), 1 deletion(-) >> >> diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h >> index f55bc680b5b0..56fe446e24b1 100644 >> --- a/include/uapi/linux/mman.h >> +++ b/include/uapi/linux/mman.h >> @@ -41,4 +41,9 @@ >> #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB >> #define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB >> >> +struct mshare_info { >> + unsigned long start; >> + unsigned long size; > > You might want to make these explicitly u64, since this is userspace > ABI and you never know when someone will want to do something crazy like > run 32-bit programs with mshare files. > > Also you might want to add some padding fields for flags, future > expansion, etc. That sounds like a good idea. I will queue it up for next version of patch series. > >> +}; >> + >> #endif /* _UAPI_LINUX_MMAN_H */ >> diff --git a/mm/mshare.c b/mm/mshare.c >> index 2d5924d39221..d238b68b0576 100644 >> --- a/mm/mshare.c >> +++ b/mm/mshare.c >> @@ -22,8 +22,14 @@ >> #include <uapi/linux/magic.h> >> #include <uapi/linux/limits.h> >> #include <uapi/linux/mman.h> >> +#include <linux/sched/mm.h> >> >> static struct super_block *msharefs_sb; >> +struct mshare_data { >> + struct mm_struct *mm; >> + refcount_t refcnt; >> + struct mshare_info *minfo; >> +}; >> >> static const struct inode_operations msharefs_dir_inode_ops; >> static const struct inode_operations msharefs_file_inode_ops; >> @@ -34,8 +40,29 @@ msharefs_open(struct inode *inode, struct file *file) >> return simple_open(inode, file); >> } >> >> +static ssize_t >> +msharefs_read(struct kiocb *iocb, struct iov_iter *iov) >> +{ >> + struct mshare_data *info = iocb->ki_filp->private_data; >> + size_t ret; >> + struct mshare_info m_info; >> + >> + if (info->minfo != NULL) { >> + m_info.start = info->minfo->start; >> + m_info.size = info->minfo->size; >> + } else { >> + m_info.start = 0; >> + m_info.size = 0; > > Hmmm, read()ing out the shared mapping information. Heh. > > When does this case happen? Is it before anybody mmaps this file into > an address space? > It can happen before or after the first mmap which will establish the start address and size. Hence I have to account for both cases. >> + } >> + ret = copy_to_iter(&m_info, sizeof(m_info), iov); >> + if (!ret) >> + return -EFAULT; >> + return ret; >> +} >> + >> static const struct file_operations msharefs_file_operations = { >> .open = msharefs_open, >> + .read_iter = msharefs_read, >> .llseek = no_llseek, >> }; >> >> @@ -73,12 +100,43 @@ static struct dentry >> return ERR_PTR(-ENOMEM); >> } >> >> +static int >> +msharefs_fill_mm(struct inode *inode) >> +{ >> + struct mm_struct *mm; >> + struct mshare_data *info = NULL; >> + int retval = 0; >> + >> + mm = mm_alloc(); >> + if (!mm) { >> + retval = -ENOMEM; >> + goto err_free; >> + } >> + >> + info = kzalloc(sizeof(*info), GFP_KERNEL); >> + if (!info) { >> + retval = -ENOMEM; >> + goto err_free; >> + } >> + info->mm = mm; >> + info->minfo = NULL; >> + refcount_set(&info->refcnt, 1); >> + inode->i_private = info; >> + >> + return 0; >> + >> +err_free: >> + if (mm) >> + mmput(mm); >> + kfree(info); >> + return retval; >> +} >> + >> static struct inode >> *msharefs_get_inode(struct super_block *sb, const struct inode *dir, >> umode_t mode) >> { >> struct inode *inode = new_inode(sb); >> - >> if (inode) { >> inode->i_ino = get_next_ino(); >> inode_init_owner(&init_user_ns, inode, dir, mode); >> @@ -89,6 +147,10 @@ static struct inode >> case S_IFREG: >> inode->i_op = &msharefs_file_inode_ops; >> inode->i_fop = &msharefs_file_operations; >> + if (msharefs_fill_mm(inode) != 0) { >> + discard_new_inode(inode); >> + inode = ERR_PTR(-ENOMEM); > > Is it intentional to clobber the msharefs_fill_mm return value and > replace it with ENOMEM? ENOMEM sounded like the right value to return from msharefs_get_inode() in case of failure. On the other hand, there isn't much of a reason to not just return the return value from msharefs_fill_mm(). I can change that. Thanks for the review. -- Khalid
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index f55bc680b5b0..56fe446e24b1 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -41,4 +41,9 @@ #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB #define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB +struct mshare_info { + unsigned long start; + unsigned long size; +}; + #endif /* _UAPI_LINUX_MMAN_H */ diff --git a/mm/mshare.c b/mm/mshare.c index 2d5924d39221..d238b68b0576 100644 --- a/mm/mshare.c +++ b/mm/mshare.c @@ -22,8 +22,14 @@ #include <uapi/linux/magic.h> #include <uapi/linux/limits.h> #include <uapi/linux/mman.h> +#include <linux/sched/mm.h> static struct super_block *msharefs_sb; +struct mshare_data { + struct mm_struct *mm; + refcount_t refcnt; + struct mshare_info *minfo; +}; static const struct inode_operations msharefs_dir_inode_ops; static const struct inode_operations msharefs_file_inode_ops; @@ -34,8 +40,29 @@ msharefs_open(struct inode *inode, struct file *file) return simple_open(inode, file); } +static ssize_t +msharefs_read(struct kiocb *iocb, struct iov_iter *iov) +{ + struct mshare_data *info = iocb->ki_filp->private_data; + size_t ret; + struct mshare_info m_info; + + if (info->minfo != NULL) { + m_info.start = info->minfo->start; + m_info.size = info->minfo->size; + } else { + m_info.start = 0; + m_info.size = 0; + } + ret = copy_to_iter(&m_info, sizeof(m_info), iov); + if (!ret) + return -EFAULT; + return ret; +} + static const struct file_operations msharefs_file_operations = { .open = msharefs_open, + .read_iter = msharefs_read, .llseek = no_llseek, }; @@ -73,12 +100,43 @@ static struct dentry return ERR_PTR(-ENOMEM); } +static int +msharefs_fill_mm(struct inode *inode) +{ + struct mm_struct *mm; + struct mshare_data *info = NULL; + int retval = 0; + + mm = mm_alloc(); + if (!mm) { + retval = -ENOMEM; + goto err_free; + } + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + retval = -ENOMEM; + goto err_free; + } + info->mm = mm; + info->minfo = NULL; + refcount_set(&info->refcnt, 1); + inode->i_private = info; + + return 0; + +err_free: + if (mm) + mmput(mm); + kfree(info); + return retval; +} + static struct inode *msharefs_get_inode(struct super_block *sb, const struct inode *dir, umode_t mode) { struct inode *inode = new_inode(sb); - if (inode) { inode->i_ino = get_next_ino(); inode_init_owner(&init_user_ns, inode, dir, mode); @@ -89,6 +147,10 @@ static struct inode case S_IFREG: inode->i_op = &msharefs_file_inode_ops; inode->i_fop = &msharefs_file_operations; + if (msharefs_fill_mm(inode) != 0) { + discard_new_inode(inode); + inode = ERR_PTR(-ENOMEM); + } break; case S_IFDIR: inode->i_op = &msharefs_dir_inode_ops;
When a new file is created under msharefs, allocate a new mm_struct that will hold the VMAs for mshare region. Also allocate structure to defines the mshare region and add a read operation to the file that returns this information about the mshare region. Currently this information is returned as a struct: struct mshare_info { unsigned long start; unsigned long size; }; This gives the start address for mshare region and its size. Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com> --- include/uapi/linux/mman.h | 5 +++ mm/mshare.c | 64 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 1 deletion(-)