diff mbox series

[v2,3/9] mm/mshare: make msharefs writable and support directories

Message ID 397ad80630444b90877625a1e94dd81392fc678e.1656531090.git.khalid.aziz@oracle.com (mailing list archive)
State New
Headers show
Series Add support for shared PTEs across processes | expand

Commit Message

Khalid Aziz June 29, 2022, 10:53 p.m. UTC
Make msharefs filesystem writable and allow creating directories
to support better access control to mshare'd regions defined in
msharefs.

Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
---
 mm/mshare.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 186 insertions(+), 9 deletions(-)

Comments

Darrick J. Wong June 30, 2022, 9:34 p.m. UTC | #1
On Wed, Jun 29, 2022 at 04:53:54PM -0600, Khalid Aziz wrote:
> Make msharefs filesystem writable and allow creating directories
> to support better access control to mshare'd regions defined in
> msharefs.
> 
> Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
> ---
>  mm/mshare.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 186 insertions(+), 9 deletions(-)
> 
> diff --git a/mm/mshare.c b/mm/mshare.c
> index 3e448e11c742..2d5924d39221 100644
> --- a/mm/mshare.c
> +++ b/mm/mshare.c
> @@ -21,11 +21,21 @@
>  #include <linux/fileattr.h>
>  #include <uapi/linux/magic.h>
>  #include <uapi/linux/limits.h>
> +#include <uapi/linux/mman.h>
>  
>  static struct super_block *msharefs_sb;
>  
> +static const struct inode_operations msharefs_dir_inode_ops;
> +static const struct inode_operations msharefs_file_inode_ops;
> +
> +static int
> +msharefs_open(struct inode *inode, struct file *file)
> +{
> +	return simple_open(inode, file);
> +}
> +
>  static const struct file_operations msharefs_file_operations = {
> -	.open		= simple_open,
> +	.open		= msharefs_open,
>  	.llseek		= no_llseek,
>  };
>  
> @@ -42,6 +52,113 @@ msharefs_d_hash(const struct dentry *dentry, struct qstr *qstr)
>  	return 0;
>  }
>  
> +static struct dentry
> +*msharefs_alloc_dentry(struct dentry *parent, const char *name)
> +{
> +	struct dentry *d;
> +	struct qstr q;
> +	int err;
> +
> +	q.name = name;
> +	q.len = strlen(name);
> +
> +	err = msharefs_d_hash(parent, &q);
> +	if (err)
> +		return ERR_PTR(err);
> +
> +	d = d_alloc(parent, &q);
> +	if (d)
> +		return d;
> +
> +	return ERR_PTR(-ENOMEM);
> +}
> +
> +static struct inode
> +*msharefs_get_inode(struct super_block *sb, const struct inode *dir,
> +			umode_t mode)
> +{
> +	struct inode *inode = new_inode(sb);
> +
> +	if (inode) {

Not sure why you wouldn't go with the less-indently version:

	if (!inode)
		return ERR_PTR(-ENOMEM);

	inode->i_ino = get_next_ino();
	<etc>

> +		inode->i_ino = get_next_ino();
> +		inode_init_owner(&init_user_ns, inode, dir, mode);
> +
> +		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
> +
> +		switch (mode & S_IFMT) {

Shouldn't we set the mode somewhere?

> +		case S_IFREG:
> +			inode->i_op = &msharefs_file_inode_ops;
> +			inode->i_fop = &msharefs_file_operations;
> +			break;
> +		case S_IFDIR:
> +			inode->i_op = &msharefs_dir_inode_ops;
> +			inode->i_fop = &simple_dir_operations;
> +			inc_nlink(inode);
> +			break;
> +		case S_IFLNK:
> +			inode->i_op = &page_symlink_inode_operations;
> +			break;
> +		default:
> +			discard_new_inode(inode);
> +			inode = NULL;
> +			break;
> +		}
> +	}
> +
> +	return inode;
> +}
> +
> +static int
> +msharefs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
> +		struct dentry *dentry, umode_t mode, dev_t dev)
> +{
> +	struct inode *inode;
> +	int err = 0;
> +
> +	inode = msharefs_get_inode(dir->i_sb, dir, mode);
> +	if (IS_ERR(inode))
> +		return PTR_ERR(inode);

...and if @inode is NULL?

> +
> +	d_instantiate(dentry, inode);
> +	dget(dentry);
> +	dir->i_mtime = dir->i_ctime = current_time(dir);
> +
> +	return err;
> +}
> +
> +static int
> +msharefs_create(struct user_namespace *mnt_userns, struct inode *dir,
> +		struct dentry *dentry, umode_t mode, bool excl)
> +{
> +	return msharefs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
> +}
> +
> +static int
> +msharefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
> +		struct dentry *dentry, umode_t mode)
> +{
> +	int ret = msharefs_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
> +
> +	if (!ret)
> +		inc_nlink(dir);
> +	return ret;
> +}
> +
> +static const struct inode_operations msharefs_file_inode_ops = {
> +	.setattr	= simple_setattr,
> +	.getattr	= simple_getattr,
> +};
> +static const struct inode_operations msharefs_dir_inode_ops = {
> +	.create		= msharefs_create,
> +	.lookup		= simple_lookup,
> +	.link		= simple_link,
> +	.unlink		= simple_unlink,
> +	.mkdir		= msharefs_mkdir,
> +	.rmdir		= simple_rmdir,
> +	.mknod		= msharefs_mknod,
> +	.rename		= simple_rename,
> +};
> +
>  static void
>  mshare_evict_inode(struct inode *inode)
>  {
> @@ -58,7 +175,7 @@ mshare_info_read(struct file *file, char __user *buf, size_t nbytes,
>  {
>  	char s[80];
>  
> -	sprintf(s, "%ld", PGDIR_SIZE);
> +	sprintf(s, "%ld\n", PGDIR_SIZE);

Changing this already?

>  	return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
>  }
>  
> @@ -72,6 +189,38 @@ static const struct super_operations mshare_s_ops = {
>  	.evict_inode = mshare_evict_inode,
>  };
>  
> +static int
> +prepopulate_files(struct super_block *s, struct inode *dir,
> +			struct dentry *root, const struct tree_descr *files)
> +{
> +	int i;
> +	struct inode *inode;
> +	struct dentry *dentry;
> +
> +	for (i = 0; !files->name || files->name[0]; i++, files++) {
> +		if (!files->name)
> +			continue;

What ends the array?  NULL name or empty name?
Do we have to erase all of these when the fs gets unmounted?

--D

> +
> +		dentry = msharefs_alloc_dentry(root, files->name);
> +		if (!dentry)
> +			return -ENOMEM;
> +
> +		inode = msharefs_get_inode(s, dir, S_IFREG | files->mode);
> +		if (!inode) {
> +			dput(dentry);
> +			return -ENOMEM;
> +		}
> +		inode->i_mode = S_IFREG | files->mode;
> +		inode->i_atime = inode->i_mtime = inode->i_ctime
> +			= current_time(inode);
> +		inode->i_fop = files->ops;
> +		inode->i_ino = i;
> +		d_add(dentry, inode);
> +	}
> +
> +	return 0;
> +}
> +
>  static int
>  msharefs_fill_super(struct super_block *sb, struct fs_context *fc)
>  {
> @@ -79,21 +228,49 @@ msharefs_fill_super(struct super_block *sb, struct fs_context *fc)
>  		[2] = { "mshare_info", &mshare_info_ops, 0444},
>  		{""},
>  	};
> -	int err;
> +	struct inode *inode;
> +	struct dentry *root;
> +	int err = 0;
>  
> -	err = simple_fill_super(sb, MSHARE_MAGIC, mshare_files);
> -	if (!err) {
> -		msharefs_sb = sb;
> -		sb->s_d_op = &msharefs_d_ops;
> -		sb->s_op = &mshare_s_ops;
> +	sb->s_blocksize		= PAGE_SIZE;
> +	sb->s_blocksize_bits	= PAGE_SHIFT;
> +	sb->s_magic		= MSHARE_MAGIC;
> +	sb->s_op		= &mshare_s_ops;
> +	sb->s_d_op		= &msharefs_d_ops;
> +	sb->s_time_gran		= 1;
> +
> +	inode = msharefs_get_inode(sb, NULL, S_IFDIR | 0777);
> +	if (!inode) {
> +		err = -ENOMEM;
> +		goto out;
>  	}
> +	inode->i_ino = 1;
> +	root = d_make_root(inode);
> +	if (!root) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +
> +	err = prepopulate_files(sb, inode, root, mshare_files);
> +	if (err < 0)
> +		goto clean_root;
> +
> +	sb->s_root = root;
> +	msharefs_sb = sb;
> +	return err;
> +
> +clean_root:
> +	d_genocide(root);
> +	shrink_dcache_parent(root);
> +	dput(root);
> +out:
>  	return err;
>  }
>  
>  static int
>  msharefs_get_tree(struct fs_context *fc)
>  {
> -	return get_tree_single(fc, msharefs_fill_super);
> +	return get_tree_nodev(fc, msharefs_fill_super);
>  }
>  
>  static const struct fs_context_operations msharefs_context_ops = {
> -- 
> 2.32.0
>
Khalid Aziz June 30, 2022, 10:49 p.m. UTC | #2
On 6/30/22 15:34, Darrick J. Wong wrote:
> On Wed, Jun 29, 2022 at 04:53:54PM -0600, Khalid Aziz wrote:
>> Make msharefs filesystem writable and allow creating directories
>> to support better access control to mshare'd regions defined in
>> msharefs.
>>
>> Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
>> ---
>>   mm/mshare.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++++---
>>   1 file changed, 186 insertions(+), 9 deletions(-)
>>
>> diff --git a/mm/mshare.c b/mm/mshare.c
>> index 3e448e11c742..2d5924d39221 100644
>> --- a/mm/mshare.c
>> +++ b/mm/mshare.c
>> @@ -21,11 +21,21 @@
>>   #include <linux/fileattr.h>
>>   #include <uapi/linux/magic.h>
>>   #include <uapi/linux/limits.h>
>> +#include <uapi/linux/mman.h>
>>   
>>   static struct super_block *msharefs_sb;
>>   
>> +static const struct inode_operations msharefs_dir_inode_ops;
>> +static const struct inode_operations msharefs_file_inode_ops;
>> +
>> +static int
>> +msharefs_open(struct inode *inode, struct file *file)
>> +{
>> +	return simple_open(inode, file);
>> +}
>> +
>>   static const struct file_operations msharefs_file_operations = {
>> -	.open		= simple_open,
>> +	.open		= msharefs_open,
>>   	.llseek		= no_llseek,
>>   };
>>   
>> @@ -42,6 +52,113 @@ msharefs_d_hash(const struct dentry *dentry, struct qstr *qstr)
>>   	return 0;
>>   }
>>   
>> +static struct dentry
>> +*msharefs_alloc_dentry(struct dentry *parent, const char *name)
>> +{
>> +	struct dentry *d;
>> +	struct qstr q;
>> +	int err;
>> +
>> +	q.name = name;
>> +	q.len = strlen(name);
>> +
>> +	err = msharefs_d_hash(parent, &q);
>> +	if (err)
>> +		return ERR_PTR(err);
>> +
>> +	d = d_alloc(parent, &q);
>> +	if (d)
>> +		return d;
>> +
>> +	return ERR_PTR(-ENOMEM);
>> +}
>> +
>> +static struct inode
>> +*msharefs_get_inode(struct super_block *sb, const struct inode *dir,
>> +			umode_t mode)
>> +{
>> +	struct inode *inode = new_inode(sb);
>> +
>> +	if (inode) {
> 
> Not sure why you wouldn't go with the less-indently version:
> 
> 	if (!inode)
> 		return ERR_PTR(-ENOMEM);
> 
> 	inode->i_ino = get_next_ino();
> 	<etc>
> 

Yeah, good idea. I will change it.

>> +		inode->i_ino = get_next_ino();
>> +		inode_init_owner(&init_user_ns, inode, dir, mode);
>> +
>> +		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
>> +
>> +		switch (mode & S_IFMT) {
> 
> Shouldn't we set the mode somewhere?

mode is passed in as parameter to msharefs_get_inode() which uses this value to determine its actions.

> 
>> +		case S_IFREG:
>> +			inode->i_op = &msharefs_file_inode_ops;
>> +			inode->i_fop = &msharefs_file_operations;
>> +			break;
>> +		case S_IFDIR:
>> +			inode->i_op = &msharefs_dir_inode_ops;
>> +			inode->i_fop = &simple_dir_operations;
>> +			inc_nlink(inode);
>> +			break;
>> +		case S_IFLNK:
>> +			inode->i_op = &page_symlink_inode_operations;
>> +			break;
>> +		default:
>> +			discard_new_inode(inode);
>> +			inode = NULL;
>> +			break;
>> +		}
>> +	}
>> +
>> +	return inode;
>> +}
>> +
>> +static int
>> +msharefs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
>> +		struct dentry *dentry, umode_t mode, dev_t dev)
>> +{
>> +	struct inode *inode;
>> +	int err = 0;
>> +
>> +	inode = msharefs_get_inode(dir->i_sb, dir, mode);
>> +	if (IS_ERR(inode))
>> +		return PTR_ERR(inode);
> 
> ...and if @inode is NULL?

Oh right, IS_ERR() does not check for NULL value. I will add a check for that and return ENOMEM.

> 
>> +
>> +	d_instantiate(dentry, inode);
>> +	dget(dentry);
>> +	dir->i_mtime = dir->i_ctime = current_time(dir);
>> +
>> +	return err;
>> +}
>> +
>> +static int
>> +msharefs_create(struct user_namespace *mnt_userns, struct inode *dir,
>> +		struct dentry *dentry, umode_t mode, bool excl)
>> +{
>> +	return msharefs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
>> +}
>> +
>> +static int
>> +msharefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
>> +		struct dentry *dentry, umode_t mode)
>> +{
>> +	int ret = msharefs_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
>> +
>> +	if (!ret)
>> +		inc_nlink(dir);
>> +	return ret;
>> +}
>> +
>> +static const struct inode_operations msharefs_file_inode_ops = {
>> +	.setattr	= simple_setattr,
>> +	.getattr	= simple_getattr,
>> +};
>> +static const struct inode_operations msharefs_dir_inode_ops = {
>> +	.create		= msharefs_create,
>> +	.lookup		= simple_lookup,
>> +	.link		= simple_link,
>> +	.unlink		= simple_unlink,
>> +	.mkdir		= msharefs_mkdir,
>> +	.rmdir		= simple_rmdir,
>> +	.mknod		= msharefs_mknod,
>> +	.rename		= simple_rename,
>> +};
>> +
>>   static void
>>   mshare_evict_inode(struct inode *inode)
>>   {
>> @@ -58,7 +175,7 @@ mshare_info_read(struct file *file, char __user *buf, size_t nbytes,
>>   {
>>   	char s[80];
>>   
>> -	sprintf(s, "%ld", PGDIR_SIZE);
>> +	sprintf(s, "%ld\n", PGDIR_SIZE);
> 
> Changing this already?

Possibly. There is one suggestion to change it to PMD and it might be a better choice.

> 
>>   	return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
>>   }
>>   
>> @@ -72,6 +189,38 @@ static const struct super_operations mshare_s_ops = {
>>   	.evict_inode = mshare_evict_inode,
>>   };
>>   
>> +static int
>> +prepopulate_files(struct super_block *s, struct inode *dir,
>> +			struct dentry *root, const struct tree_descr *files)
>> +{
>> +	int i;
>> +	struct inode *inode;
>> +	struct dentry *dentry;
>> +
>> +	for (i = 0; !files->name || files->name[0]; i++, files++) {
>> +		if (!files->name)
>> +			continue;
> 
> What ends the array?  NULL name or empty name?
> Do we have to erase all of these when the fs gets unmounted?

This code is very similar to simple_fill_super() and I reused the code from there. inodes and dentries will need to be 
erased on unmount through evict_inode.

Thanks,
Khalid
Al Viro June 30, 2022, 11:09 p.m. UTC | #3
On Wed, Jun 29, 2022 at 04:53:54PM -0600, Khalid Aziz wrote:

> +static int
> +msharefs_open(struct inode *inode, struct file *file)
> +{
> +	return simple_open(inode, file);
> +}

Again, whatever for?

> +static struct dentry
> +*msharefs_alloc_dentry(struct dentry *parent, const char *name)
> +{
> +	struct dentry *d;
> +	struct qstr q;
> +	int err;
> +
> +	q.name = name;
> +	q.len = strlen(name);
> +
> +	err = msharefs_d_hash(parent, &q);
> +	if (err)
> +		return ERR_PTR(err);
> +
> +	d = d_alloc(parent, &q);
> +	if (d)
> +		return d;
> +
> +	return ERR_PTR(-ENOMEM);
> +}

And it's different from d_alloc_name() how, exactly?

> +		case S_IFLNK:
> +			inode->i_op = &page_symlink_inode_operations;
> +			break;

Really?  You've got symlinks here?

> +		default:
> +			discard_new_inode(inode);
> +			inode = NULL;

That's an odd way to spell BUG()...

> +static int
> +msharefs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
> +		struct dentry *dentry, umode_t mode, dev_t dev)
> +{
> +	struct inode *inode;
> +	int err = 0;
> +
> +	inode = msharefs_get_inode(dir->i_sb, dir, mode);
> +	if (IS_ERR(inode))
> +		return PTR_ERR(inode);
> +
> +	d_instantiate(dentry, inode);
> +	dget(dentry);
> +	dir->i_mtime = dir->i_ctime = current_time(dir);
> +
> +	return err;
> +}

BTW, what's the point of having device nodes on that thing?

> +static int
> +msharefs_create(struct user_namespace *mnt_userns, struct inode *dir,
> +		struct dentry *dentry, umode_t mode, bool excl)
> +{
> +	return msharefs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
> +}
> +
> +static int
> +msharefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
> +		struct dentry *dentry, umode_t mode)
> +{
> +	int ret = msharefs_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
> +
> +	if (!ret)
> +		inc_nlink(dir);
> +	return ret;
> +}
> +
> +static const struct inode_operations msharefs_file_inode_ops = {
> +	.setattr	= simple_setattr,
> +	.getattr	= simple_getattr,
> +};
> +static const struct inode_operations msharefs_dir_inode_ops = {
> +	.create		= msharefs_create,
> +	.lookup		= simple_lookup,
> +	.link		= simple_link,
> +	.unlink		= simple_unlink,
> +	.mkdir		= msharefs_mkdir,
> +	.rmdir		= simple_rmdir,
> +	.mknod		= msharefs_mknod,
> +	.rename		= simple_rename,
> +};
> +
>  static void
>  mshare_evict_inode(struct inode *inode)
>  {
> @@ -58,7 +175,7 @@ mshare_info_read(struct file *file, char __user *buf, size_t nbytes,
>  {
>  	char s[80];
>  
> -	sprintf(s, "%ld", PGDIR_SIZE);
> +	sprintf(s, "%ld\n", PGDIR_SIZE);
>  	return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
>  }
>  
> @@ -72,6 +189,38 @@ static const struct super_operations mshare_s_ops = {
>  	.evict_inode = mshare_evict_inode,
>  };
>  
> +static int
> +prepopulate_files(struct super_block *s, struct inode *dir,
> +			struct dentry *root, const struct tree_descr *files)
> +{
> +	int i;
> +	struct inode *inode;
> +	struct dentry *dentry;
> +
> +	for (i = 0; !files->name || files->name[0]; i++, files++) {
> +		if (!files->name)
> +			continue;
> +
> +		dentry = msharefs_alloc_dentry(root, files->name);
> +		if (!dentry)
> +			return -ENOMEM;
> +
> +		inode = msharefs_get_inode(s, dir, S_IFREG | files->mode);
> +		if (!inode) {
> +			dput(dentry);
> +			return -ENOMEM;
> +		}
> +		inode->i_mode = S_IFREG | files->mode;
> +		inode->i_atime = inode->i_mtime = inode->i_ctime
> +			= current_time(inode);
> +		inode->i_fop = files->ops;
> +		inode->i_ino = i;
> +		d_add(dentry, inode);
> +	}
> +
> +	return 0;
> +}

Looks remarkably similar to something I've seen somewhere... fs/libfs.c,
if I'm not mistaken...

Sarcasm aside, what's wrong with using simple_fill_super()?
Khalid Aziz July 2, 2022, 12:22 a.m. UTC | #4
On 6/30/22 17:09, Al Viro wrote:
> On Wed, Jun 29, 2022 at 04:53:54PM -0600, Khalid Aziz wrote:
> 
>> +static int
>> +msharefs_open(struct inode *inode, struct file *file)
>> +{
>> +	return simple_open(inode, file);
>> +}
> 
> Again, whatever for? >
>> +static struct dentry
>> +*msharefs_alloc_dentry(struct dentry *parent, const char *name)
>> +{
>> +	struct dentry *d;
>> +	struct qstr q;
>> +	int err;
>> +
>> +	q.name = name;
>> +	q.len = strlen(name);
>> +
>> +	err = msharefs_d_hash(parent, &q);
>> +	if (err)
>> +		return ERR_PTR(err);
>> +
>> +	d = d_alloc(parent, &q);
>> +	if (d)
>> +		return d;
>> +
>> +	return ERR_PTR(-ENOMEM);
>> +}
> 
> And it's different from d_alloc_name() how, exactly?

By making minor changes to my other code, I was able to use all of the standard functions you pointed out. That 
simplified my patch quite a bit. Thank you!

> 
>> +		case S_IFLNK:
>> +			inode->i_op = &page_symlink_inode_operations;
>> +			break;
> 
> Really?  You've got symlinks here?

I intended to support symlinks on msharefs but I am not sure if I see a use case at this time. I can drop support for 
symlinks and add it in future if there is a use case.

> 
>> +		default:
>> +			discard_new_inode(inode);
>> +			inode = NULL;
> 
> That's an odd way to spell BUG()...

I think what you are saying is this default case represents a bug and I should report it as such. Is that right, or 
should I not have a default case at all (which is what I am seeing in some of the other places)?

> 
>> +static int
>> +msharefs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
>> +		struct dentry *dentry, umode_t mode, dev_t dev)
>> +{
>> +	struct inode *inode;
>> +	int err = 0;
>> +
>> +	inode = msharefs_get_inode(dir->i_sb, dir, mode);
>> +	if (IS_ERR(inode))
>> +		return PTR_ERR(inode);
>> +
>> +	d_instantiate(dentry, inode);
>> +	dget(dentry);
>> +	dir->i_mtime = dir->i_ctime = current_time(dir);
>> +
>> +	return err;
>> +}
> 
> BTW, what's the point of having device nodes on that thing?

There will be no device nodes on msharefs. Are you referring to the dev_t parameter in msharefs_mknod() declaration? If 
so, I am following the prototype declaration for that function from fs.h:

         int (*mknod) (struct user_namespace *, struct inode *,struct dentry *,
                       umode_t,dev_t);

If I am misunderstanding, please correct me.

> 
>> +static int
>> +msharefs_create(struct user_namespace *mnt_userns, struct inode *dir,
>> +		struct dentry *dentry, umode_t mode, bool excl)
>> +{
>> +	return msharefs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
>> +}
>> +
>> +static int
>> +msharefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
>> +		struct dentry *dentry, umode_t mode)
>> +{
>> +	int ret = msharefs_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
>> +
>> +	if (!ret)
>> +		inc_nlink(dir);
>> +	return ret;
>> +}
>> +
>> +static const struct inode_operations msharefs_file_inode_ops = {
>> +	.setattr	= simple_setattr,
>> +	.getattr	= simple_getattr,
>> +};
>> +static const struct inode_operations msharefs_dir_inode_ops = {
>> +	.create		= msharefs_create,
>> +	.lookup		= simple_lookup,
>> +	.link		= simple_link,
>> +	.unlink		= simple_unlink,
>> +	.mkdir		= msharefs_mkdir,
>> +	.rmdir		= simple_rmdir,
>> +	.mknod		= msharefs_mknod,
>> +	.rename		= simple_rename,
>> +};
>> +
>>   static void
>>   mshare_evict_inode(struct inode *inode)
>>   {
>> @@ -58,7 +175,7 @@ mshare_info_read(struct file *file, char __user *buf, size_t nbytes,
>>   {
>>   	char s[80];
>>   
>> -	sprintf(s, "%ld", PGDIR_SIZE);
>> +	sprintf(s, "%ld\n", PGDIR_SIZE);
>>   	return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
>>   }
>>   
>> @@ -72,6 +189,38 @@ static const struct super_operations mshare_s_ops = {
>>   	.evict_inode = mshare_evict_inode,
>>   };
>>   
>> +static int
>> +prepopulate_files(struct super_block *s, struct inode *dir,
>> +			struct dentry *root, const struct tree_descr *files)
>> +{
>> +	int i;
>> +	struct inode *inode;
>> +	struct dentry *dentry;
>> +
>> +	for (i = 0; !files->name || files->name[0]; i++, files++) {
>> +		if (!files->name)
>> +			continue;
>> +
>> +		dentry = msharefs_alloc_dentry(root, files->name);
>> +		if (!dentry)
>> +			return -ENOMEM;
>> +
>> +		inode = msharefs_get_inode(s, dir, S_IFREG | files->mode);
>> +		if (!inode) {
>> +			dput(dentry);
>> +			return -ENOMEM;
>> +		}
>> +		inode->i_mode = S_IFREG | files->mode;
>> +		inode->i_atime = inode->i_mtime = inode->i_ctime
>> +			= current_time(inode);
>> +		inode->i_fop = files->ops;
>> +		inode->i_ino = i;
>> +		d_add(dentry, inode);
>> +	}
>> +
>> +	return 0;
>> +}
> 
> Looks remarkably similar to something I've seen somewhere... fs/libfs.c,
> if I'm not mistaken...
> 
> Sarcasm aside, what's wrong with using simple_fill_super()?
I started out using simple_fill_super() in patch 1. I found that when I use simple_fill_super(), I end up with a 
filesystem that userspace can not create a file in. I looked at other code like shmfs and efivarfs and wrote similar 
code which got me a writable filesystem. I might be missing something basic and if there is a way to use 
simple_fill_super() and be able to support file creation from userspace, I would much rather use simple_fill_super().

Thanks,
Khalid
diff mbox series

Patch

diff --git a/mm/mshare.c b/mm/mshare.c
index 3e448e11c742..2d5924d39221 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -21,11 +21,21 @@ 
 #include <linux/fileattr.h>
 #include <uapi/linux/magic.h>
 #include <uapi/linux/limits.h>
+#include <uapi/linux/mman.h>
 
 static struct super_block *msharefs_sb;
 
+static const struct inode_operations msharefs_dir_inode_ops;
+static const struct inode_operations msharefs_file_inode_ops;
+
+static int
+msharefs_open(struct inode *inode, struct file *file)
+{
+	return simple_open(inode, file);
+}
+
 static const struct file_operations msharefs_file_operations = {
-	.open		= simple_open,
+	.open		= msharefs_open,
 	.llseek		= no_llseek,
 };
 
@@ -42,6 +52,113 @@  msharefs_d_hash(const struct dentry *dentry, struct qstr *qstr)
 	return 0;
 }
 
+static struct dentry
+*msharefs_alloc_dentry(struct dentry *parent, const char *name)
+{
+	struct dentry *d;
+	struct qstr q;
+	int err;
+
+	q.name = name;
+	q.len = strlen(name);
+
+	err = msharefs_d_hash(parent, &q);
+	if (err)
+		return ERR_PTR(err);
+
+	d = d_alloc(parent, &q);
+	if (d)
+		return d;
+
+	return ERR_PTR(-ENOMEM);
+}
+
+static struct inode
+*msharefs_get_inode(struct super_block *sb, const struct inode *dir,
+			umode_t mode)
+{
+	struct inode *inode = new_inode(sb);
+
+	if (inode) {
+		inode->i_ino = get_next_ino();
+		inode_init_owner(&init_user_ns, inode, dir, mode);
+
+		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+
+		switch (mode & S_IFMT) {
+		case S_IFREG:
+			inode->i_op = &msharefs_file_inode_ops;
+			inode->i_fop = &msharefs_file_operations;
+			break;
+		case S_IFDIR:
+			inode->i_op = &msharefs_dir_inode_ops;
+			inode->i_fop = &simple_dir_operations;
+			inc_nlink(inode);
+			break;
+		case S_IFLNK:
+			inode->i_op = &page_symlink_inode_operations;
+			break;
+		default:
+			discard_new_inode(inode);
+			inode = NULL;
+			break;
+		}
+	}
+
+	return inode;
+}
+
+static int
+msharefs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+		struct dentry *dentry, umode_t mode, dev_t dev)
+{
+	struct inode *inode;
+	int err = 0;
+
+	inode = msharefs_get_inode(dir->i_sb, dir, mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	d_instantiate(dentry, inode);
+	dget(dentry);
+	dir->i_mtime = dir->i_ctime = current_time(dir);
+
+	return err;
+}
+
+static int
+msharefs_create(struct user_namespace *mnt_userns, struct inode *dir,
+		struct dentry *dentry, umode_t mode, bool excl)
+{
+	return msharefs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
+}
+
+static int
+msharefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+		struct dentry *dentry, umode_t mode)
+{
+	int ret = msharefs_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
+
+	if (!ret)
+		inc_nlink(dir);
+	return ret;
+}
+
+static const struct inode_operations msharefs_file_inode_ops = {
+	.setattr	= simple_setattr,
+	.getattr	= simple_getattr,
+};
+static const struct inode_operations msharefs_dir_inode_ops = {
+	.create		= msharefs_create,
+	.lookup		= simple_lookup,
+	.link		= simple_link,
+	.unlink		= simple_unlink,
+	.mkdir		= msharefs_mkdir,
+	.rmdir		= simple_rmdir,
+	.mknod		= msharefs_mknod,
+	.rename		= simple_rename,
+};
+
 static void
 mshare_evict_inode(struct inode *inode)
 {
@@ -58,7 +175,7 @@  mshare_info_read(struct file *file, char __user *buf, size_t nbytes,
 {
 	char s[80];
 
-	sprintf(s, "%ld", PGDIR_SIZE);
+	sprintf(s, "%ld\n", PGDIR_SIZE);
 	return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
 }
 
@@ -72,6 +189,38 @@  static const struct super_operations mshare_s_ops = {
 	.evict_inode = mshare_evict_inode,
 };
 
+static int
+prepopulate_files(struct super_block *s, struct inode *dir,
+			struct dentry *root, const struct tree_descr *files)
+{
+	int i;
+	struct inode *inode;
+	struct dentry *dentry;
+
+	for (i = 0; !files->name || files->name[0]; i++, files++) {
+		if (!files->name)
+			continue;
+
+		dentry = msharefs_alloc_dentry(root, files->name);
+		if (!dentry)
+			return -ENOMEM;
+
+		inode = msharefs_get_inode(s, dir, S_IFREG | files->mode);
+		if (!inode) {
+			dput(dentry);
+			return -ENOMEM;
+		}
+		inode->i_mode = S_IFREG | files->mode;
+		inode->i_atime = inode->i_mtime = inode->i_ctime
+			= current_time(inode);
+		inode->i_fop = files->ops;
+		inode->i_ino = i;
+		d_add(dentry, inode);
+	}
+
+	return 0;
+}
+
 static int
 msharefs_fill_super(struct super_block *sb, struct fs_context *fc)
 {
@@ -79,21 +228,49 @@  msharefs_fill_super(struct super_block *sb, struct fs_context *fc)
 		[2] = { "mshare_info", &mshare_info_ops, 0444},
 		{""},
 	};
-	int err;
+	struct inode *inode;
+	struct dentry *root;
+	int err = 0;
 
-	err = simple_fill_super(sb, MSHARE_MAGIC, mshare_files);
-	if (!err) {
-		msharefs_sb = sb;
-		sb->s_d_op = &msharefs_d_ops;
-		sb->s_op = &mshare_s_ops;
+	sb->s_blocksize		= PAGE_SIZE;
+	sb->s_blocksize_bits	= PAGE_SHIFT;
+	sb->s_magic		= MSHARE_MAGIC;
+	sb->s_op		= &mshare_s_ops;
+	sb->s_d_op		= &msharefs_d_ops;
+	sb->s_time_gran		= 1;
+
+	inode = msharefs_get_inode(sb, NULL, S_IFDIR | 0777);
+	if (!inode) {
+		err = -ENOMEM;
+		goto out;
 	}
+	inode->i_ino = 1;
+	root = d_make_root(inode);
+	if (!root) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = prepopulate_files(sb, inode, root, mshare_files);
+	if (err < 0)
+		goto clean_root;
+
+	sb->s_root = root;
+	msharefs_sb = sb;
+	return err;
+
+clean_root:
+	d_genocide(root);
+	shrink_dcache_parent(root);
+	dput(root);
+out:
 	return err;
 }
 
 static int
 msharefs_get_tree(struct fs_context *fc)
 {
-	return get_tree_single(fc, msharefs_fill_super);
+	return get_tree_nodev(fc, msharefs_fill_super);
 }
 
 static const struct fs_context_operations msharefs_context_ops = {