diff mbox

[RFC,02/13] ovl: redirect dir by file handle on copy up

Message ID 1492387183-18847-3-git-send-email-amir73il@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Amir Goldstein April 16, 2017, 11:59 p.m. UTC
When mounted with mount option redirect_dir=fh,
every copy up of lower directory stores the lower
dir file handle in overlay.fh xattr of upper dir.

This method has some advantages over absolute path redirect:
- it is more compact in stored xattr size
- it is not limited by lengths of full paths
- lookup redirect is more efficient for very nested directories

It also has some disadvantages over absolute path redirect:
- it requires that all lower layers are on the same file system,
  which support exportfs ops
- file handles will become stale if overlay lower directories
  were to be copied to another location

Therefore, redirect by file handle is considered an optimization
and file handle is stored on copy up in addition to setting
redirect by path on rename.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/overlayfs/Kconfig     | 16 +++++++++
 fs/overlayfs/copy_up.c   | 87 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/overlayfs/overlayfs.h | 15 +++++++++
 fs/overlayfs/ovl_entry.h |  1 +
 fs/overlayfs/super.c     | 28 ++++++++++++++--
 fs/overlayfs/util.c      | 14 ++++++++
 6 files changed, 159 insertions(+), 2 deletions(-)

Comments

Rock Lee April 17, 2017, 1:33 p.m. UTC | #1
Hi, Amir:

> +static int ovl_set_redirect_fh(struct dentry *dentry, struct dentry *upper)
> +{
> +       int err;
> +       const struct ovl_fh *fh;
> +
> +       fh = ovl_get_redirect_fh(ovl_dentry_lower(dentry));
> +       err = PTR_ERR(fh);
> +       if (IS_ERR(fh))
> +               goto out_err;
> +
> +       err = ovl_do_setxattr(upper, OVL_XATTR_FH, fh, fh->len, 0);
> +       if (err)
> +               goto out_free;

I'm not quite sure, maybe you missed a 'free(fh)' here.

> +       return 0;
> +
> +out_free:
> +       kfree(fh);
> +out_err:
> +       if (err == -EOPNOTSUPP) {
> +               ovl_clear_redirect_fh(dentry->d_sb);
> +               return 0;
> +       }
> +       pr_warn_ratelimited("overlay: failed to set redirect fh (%i)\n", err);
> +       return err;
> +}
> +
Amir Goldstein April 17, 2017, 2:03 p.m. UTC | #2
On Mon, Apr 17, 2017 at 3:33 PM, Rock Lee <rockdotlee@gmail.com> wrote:
> Hi, Amir:
>
>> +static int ovl_set_redirect_fh(struct dentry *dentry, struct dentry *upper)
>> +{
>> +       int err;
>> +       const struct ovl_fh *fh;
>> +
>> +       fh = ovl_get_redirect_fh(ovl_dentry_lower(dentry));
>> +       err = PTR_ERR(fh);
>> +       if (IS_ERR(fh))
>> +               goto out_err;
>> +
>> +       err = ovl_do_setxattr(upper, OVL_XATTR_FH, fh, fh->len, 0);
>> +       if (err)
>> +               goto out_free;
>
> I'm not quite sure, maybe you missed a 'free(fh)' here.

It's right there at out_free :)

Thanks for reviewing!

>
>> +       return 0;
>> +
>> +out_free:
>> +       kfree(fh);
>> +out_err:
>> +       if (err == -EOPNOTSUPP) {
>> +               ovl_clear_redirect_fh(dentry->d_sb);
>> +               return 0;
>> +       }
>> +       pr_warn_ratelimited("overlay: failed to set redirect fh (%i)\n", err);
>> +       return err;
>> +}
>> +
>
>
>
>
>
>
> --
> Cheers,
> Rock
Vivek Goyal April 17, 2017, 7:49 p.m. UTC | #3
On Mon, Apr 17, 2017 at 02:59:32AM +0300, Amir Goldstein wrote:
> When mounted with mount option redirect_dir=fh,
> every copy up of lower directory stores the lower
> dir file handle in overlay.fh xattr of upper dir.
> 
> This method has some advantages over absolute path redirect:
> - it is more compact in stored xattr size
> - it is not limited by lengths of full paths
> - lookup redirect is more efficient for very nested directories
> 
> It also has some disadvantages over absolute path redirect:
> - it requires that all lower layers are on the same file system,
>   which support exportfs ops
> - file handles will become stale if overlay lower directories
>   were to be copied to another location
> 
> Therefore, redirect by file handle is considered an optimization
> and file handle is stored on copy up in addition to setting
> redirect by path on rename.

Hi Amir, 

If this is just an optimization to redirect, then why not store it
only during rename (like redirct path). 

Given you are storing it all the time for merged dir during copy up, 
looks like you plan to use it for other use cases too. May be it
makes sense to either mention those use cases here or in this patch
just create file handle over rename and then in later patches change
it with explanation.

Or I missed the intent completely?

> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/overlayfs/Kconfig     | 16 +++++++++
>  fs/overlayfs/copy_up.c   | 87 ++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/overlayfs/overlayfs.h | 15 +++++++++
>  fs/overlayfs/ovl_entry.h |  1 +
>  fs/overlayfs/super.c     | 28 ++++++++++++++--
>  fs/overlayfs/util.c      | 14 ++++++++
>  6 files changed, 159 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
> index 0daac51..351b61a 100644
> --- a/fs/overlayfs/Kconfig
> +++ b/fs/overlayfs/Kconfig
> @@ -22,3 +22,19 @@ config OVERLAY_FS_REDIRECT_DIR
>  	  Note, that redirects are not backward compatible.  That is, mounting
>  	  an overlay which has redirects on a kernel that doesn't support this
>  	  feature will have unexpected results.
> +
> +config OVERLAY_FS_REDIRECT_FH
> +	bool "Overlayfs: turn on redirect by file handle by default"
> +	depends on OVERLAY_FS_REDIRECT_DIR
> +	help
> +	  If this config option is enabled then overlay filesystems will use
> +	  redirect by file handle for merged directories by default.  It is
> +	  also possible to turn on redirect by file handle globally with the
> +	  "redirect_fh=on" module option or on a filesystem instance basis
> +	  with the "redirect_dir=fh" mount option.
> +
> +	  Redirect by file handle provides faster lookup compared to redirect
> +	  by absolute path, but it only works when all layers are on the same
> +	  underlying file system that supports NFS export ops.  Redirect by
> +	  file handle breaks if layers are taken offline and copied to another
> +	  location.  In that case, lookup falls back to redirect by path.
> diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
> index 906ea6c..428dc26 100644
> --- a/fs/overlayfs/copy_up.c
> +++ b/fs/overlayfs/copy_up.c
> @@ -20,6 +20,7 @@
>  #include <linux/namei.h>
>  #include <linux/fdtable.h>
>  #include <linux/ratelimit.h>
> +#include <linux/exportfs.h>
>  #include "overlayfs.h"
>  #include "ovl_entry.h"
>  
> @@ -232,6 +233,80 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
>  	return err;
>  }
>  
> +static struct ovl_fh *ovl_get_redirect_fh(struct dentry *lower)
> +{
> +	const struct export_operations *nop = lower->d_sb->s_export_op;
> +	struct ovl_fh *fh;
> +	int fh_type, fh_len, dwords;
> +	void *buf, *ret;
> +	int buflen = MAX_HANDLE_SZ;
> +
> +	/* Do not encode file handle if we cannot decode it later */
> +	if (!nop || !nop->fh_to_dentry) {
> +		pr_info("overlay: redirect by file handle not supported "
> +			"by lower - turning off redirect\n");

This message probably should go where you are actually turning off the
feature?

> +		return ERR_PTR(-EOPNOTSUPP);
> +	}
> +
> +	buf = kmalloc(buflen, GFP_TEMPORARY);
> +	if (!buf)
> +		return ERR_PTR(-ENOMEM);
> +
> +	fh = buf;
> +	dwords = (buflen - offsetof(struct ovl_fh, fid)) >> 2;
> +	fh_type = exportfs_encode_fh(lower,
> +				     (struct fid *)fh->fid,
> +				     &dwords, 0);
> +	fh_len = (dwords << 2) + offsetof(struct ovl_fh, fid);
> +
> +	ret = ERR_PTR(-EOVERFLOW);
> +	if (fh_len > buflen || fh_type <= 0 || fh_type == FILEID_INVALID)
> +		goto out;
> +
> +	fh->version = OVL_FH_VERSION;
> +	fh->magic = OVL_FH_MAGIC;
> +	fh->type = fh_type;
> +	fh->len = fh_len;
> +
> +	ret = kmalloc(fh_len, GFP_KERNEL);
> +	if (!ret) {
> +		ret = ERR_PTR(-ENOMEM);
> +		goto out;
> +	}
> +
> +	memcpy(ret, buf, fh_len);
> +out:
> +	kfree(buf);
> +	return ret;
> +}
> +
> +static int ovl_set_redirect_fh(struct dentry *dentry, struct dentry *upper)
> +{
> +	int err;
> +	const struct ovl_fh *fh;
> +
> +	fh = ovl_get_redirect_fh(ovl_dentry_lower(dentry));
> +	err = PTR_ERR(fh);
> +	if (IS_ERR(fh))
> +		goto out_err;
> +
> +	err = ovl_do_setxattr(upper, OVL_XATTR_FH, fh, fh->len, 0);
> +	if (err)
> +		goto out_free;
> +
> +	return 0;
> +
> +out_free:
> +	kfree(fh);
> +out_err:
> +	if (err == -EOPNOTSUPP) {
> +		ovl_clear_redirect_fh(dentry->d_sb);
> +		return 0;
> +	}
> +	pr_warn_ratelimited("overlay: failed to set redirect fh (%i)\n", err);
> +	return err;
> +}
> +
>  static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
>  			      struct dentry *dentry, struct path *lowerpath,
>  			      struct kstat *stat, const char *link,
> @@ -316,6 +391,18 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
>  	if (err)
>  		goto out_cleanup;
>  
> +	if (S_ISDIR(stat->mode) &&
> +	    ovl_redirect_dir(dentry->d_sb) &&
> +	    ovl_redirect_fh(dentry->d_sb)) {

Do we need to check for redirect_dir. Looks like redirect_dir has to
be enabled if redirect_fh is enabled?

> +		/*
> +		 * Store file handle of lower dir in upper dir xattr to
> +		 * create a chain of file handles for merged dir stack
> +		 */
> +		err = ovl_set_redirect_fh(dentry, temp);
> +		if (err)
> +			goto out_cleanup;
> +	}
> +
>  	if (tmpfile)
>  		err = ovl_do_link(temp, udir, upper, true);
>  	else
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index c851158..49be199 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -20,6 +20,7 @@ enum ovl_path_type {
>  #define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
>  #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
>  #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
> +#define OVL_XATTR_FH OVL_XATTR_PREFIX "fh"
>  
>  #define OVL_ISUPPER_MASK 1UL
>  
> @@ -146,6 +147,18 @@ static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
>  	return (struct inode *) (x & ~OVL_ISUPPER_MASK);
>  }
>  
> +/* redirect data format for redirect by file handle */
> +struct ovl_fh {
> +	unsigned char version;	/* 0 */
> +	unsigned char magic;	/* 0xfb */
> +	unsigned char len;	/* size of this header + size of fid */
> +	unsigned char type;	/* fid_type of fid */
> +	unsigned char fid[0];	/* file identifier */
> +} __packed;
> +
> +#define OVL_FH_VERSION	0
> +#define OVL_FH_MAGIC	0xfb
> +
>  /* util.c */
>  int ovl_want_write(struct dentry *dentry);
>  void ovl_drop_write(struct dentry *dentry);
> @@ -171,6 +184,8 @@ bool ovl_redirect_dir(struct super_block *sb);
>  void ovl_clear_redirect_dir(struct super_block *sb);
>  const char *ovl_dentry_get_redirect(struct dentry *dentry);
>  void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
> +bool ovl_redirect_fh(struct super_block *sb);
> +void ovl_clear_redirect_fh(struct super_block *sb);
>  void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
>  void ovl_inode_init(struct inode *inode, struct inode *realinode,
>  		    bool is_upper);
> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
> index e294f22..cf22992 100644
> --- a/fs/overlayfs/ovl_entry.h
> +++ b/fs/overlayfs/ovl_entry.h
> @@ -14,6 +14,7 @@ struct ovl_config {
>  	char *workdir;
>  	bool default_permissions;
>  	bool redirect_dir;
> +	bool redirect_fh;
>  };
>  
>  /* private information held for overlayfs's superblock */
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index 79500d9..3036b2d 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -17,6 +17,7 @@
>  #include <linux/statfs.h>
>  #include <linux/seq_file.h>
>  #include <linux/posix_acl_xattr.h>
> +#include <linux/exportfs.h>
>  #include "overlayfs.h"
>  #include "ovl_entry.h"
>  
> @@ -34,6 +35,12 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
>  MODULE_PARM_DESC(ovl_redirect_dir_def,
>  		 "Default to on or off for the redirect_dir feature");
>  
> +static bool ovl_redirect_fh_def =
> +	    IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_FH);
> +module_param_named(redirect_fh, ovl_redirect_fh_def, bool, 0644);
> +MODULE_PARM_DESC(ovl_redirect_fh_def,
> +		 "Default to on or off for redirect by file handle");
> +
>  static void ovl_dentry_release(struct dentry *dentry)
>  {
>  	struct ovl_entry *oe = dentry->d_fsdata;
> @@ -246,9 +253,11 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
>  	}
>  	if (ufs->config.default_permissions)
>  		seq_puts(m, ",default_permissions");
> -	if (ufs->config.redirect_dir != ovl_redirect_dir_def)
> +	if (ufs->config.redirect_dir != ovl_redirect_dir_def ||
> +	    ufs->config.redirect_fh != ovl_redirect_fh_def)
>  		seq_printf(m, ",redirect_dir=%s",
> -			   ufs->config.redirect_dir ? "on" : "off");
> +			   !ufs->config.redirect_dir ? "off" :
> +			   ufs->config.redirect_fh ? "fh" : "on");
>  	return 0;
>  }
>  
> @@ -278,6 +287,7 @@ enum {
>  	OPT_DEFAULT_PERMISSIONS,
>  	OPT_REDIRECT_DIR_ON,
>  	OPT_REDIRECT_DIR_OFF,
> +	OPT_REDIRECT_DIR_FH,
>  	OPT_ERR,
>  };
>  
> @@ -288,6 +298,7 @@ static const match_table_t ovl_tokens = {
>  	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
>  	{OPT_REDIRECT_DIR_ON,		"redirect_dir=on"},
>  	{OPT_REDIRECT_DIR_OFF,		"redirect_dir=off"},
> +	{OPT_REDIRECT_DIR_FH,		"redirect_dir=fh"},
>  	{OPT_ERR,			NULL}
>  };
>  
> @@ -354,10 +365,17 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
>  
>  		case OPT_REDIRECT_DIR_ON:
>  			config->redirect_dir = true;
> +			config->redirect_fh = false;
>  			break;

Is this ->redirect_fh=false declarations needed? We do ufs allocation
using kzalloc() and that means all the fields are 0 value.
>  
>  		case OPT_REDIRECT_DIR_OFF:
>  			config->redirect_dir = false;
> +			config->redirect_fh = false;
> +			break;
> +
> +		case OPT_REDIRECT_DIR_FH:
> +			config->redirect_dir = true;
> +			config->redirect_fh = true;
>  			break;
>  
>  		default:
> @@ -754,6 +772,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>  
>  	init_waitqueue_head(&ufs->copyup_wq);
>  	ufs->config.redirect_dir = ovl_redirect_dir_def;
> +	ufs->config.redirect_fh = ovl_redirect_fh_def;
>  	err = ovl_parse_opt((char *) data, &ufs->config);
>  	if (err)
>  		goto out_free_config;
> @@ -934,6 +953,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>  	else
>  		sb->s_d_op = &ovl_dentry_operations;
>  
> +	/* Redirect by fhandle only if all layers on same sb with export ops */
> +	if (!ufs->same_sb || !ufs->same_sb->s_export_op ||
> +	    !ufs->same_sb->s_export_op->fh_to_dentry)
> +		ufs->config.redirect_fh = false;
> +
>  	ufs->creator_cred = cred = prepare_creds();
>  	if (!cred)
>  		goto out_put_lower_mnt;
> diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> index dcebef0..17530c5 100644
> --- a/fs/overlayfs/util.c
> +++ b/fs/overlayfs/util.c
> @@ -215,6 +215,20 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
>  	oe->redirect = redirect;
>  }
>  
> +bool ovl_redirect_fh(struct super_block *sb)
> +{
> +	struct ovl_fs *ofs = sb->s_fs_info;
> +
> +	return ofs->config.redirect_fh;
> +}
> +
> +void ovl_clear_redirect_fh(struct super_block *sb)
> +{
> +	struct ovl_fs *ofs = sb->s_fs_info;
> +
> +	ofs->config.redirect_fh = false;
> +}
> +
>  void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
>  {
>  	struct ovl_entry *oe = dentry->d_fsdata;
> -- 
> 2.7.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein April 17, 2017, 9:14 p.m. UTC | #4
On Mon, Apr 17, 2017 at 10:49 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
> On Mon, Apr 17, 2017 at 02:59:32AM +0300, Amir Goldstein wrote:
>> When mounted with mount option redirect_dir=fh,
>> every copy up of lower directory stores the lower
>> dir file handle in overlay.fh xattr of upper dir.
>>
>> This method has some advantages over absolute path redirect:
>> - it is more compact in stored xattr size
>> - it is not limited by lengths of full paths
>> - lookup redirect is more efficient for very nested directories
>>
>> It also has some disadvantages over absolute path redirect:
>> - it requires that all lower layers are on the same file system,
>>   which support exportfs ops
>> - file handles will become stale if overlay lower directories
>>   were to be copied to another location
>>
>> Therefore, redirect by file handle is considered an optimization
>> and file handle is stored on copy up in addition to setting
>> redirect by path on rename.
>
> Hi Amir,
>
> If this is just an optimization to redirect, then why not store it
> only during rename (like redirct path).
>
> Given you are storing it all the time for merged dir during copy up,
> looks like you plan to use it for other use cases too. May be it
> makes sense to either mention those use cases here or in this patch
> just create file handle over rename and then in later patches change
> it with explanation.
>
> Or I missed the intent completely?
>


Hi Vivek,

Patches 2-3 (redirect fh for dirs) are resend of patches I have been
carrying for a while now for redirect dir of snapshots, so the commit
message is a bit out dated.
I guess it makes some sense to store fh on rename in this patch
and change it to store on copy up in patch 4, but I'll only make
this cosmetic effort if Miklos insists ;-)
Probably best if I just squash patch 4 into patch 2 and describe
the config option/module param/mount option for what they
are used for in this patch set and not only the redirect dir optimization.

>>
>> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>> ---
>>  fs/overlayfs/Kconfig     | 16 +++++++++
>>  fs/overlayfs/copy_up.c   | 87 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  fs/overlayfs/overlayfs.h | 15 +++++++++
>>  fs/overlayfs/ovl_entry.h |  1 +
>>  fs/overlayfs/super.c     | 28 ++++++++++++++--
>>  fs/overlayfs/util.c      | 14 ++++++++
>>  6 files changed, 159 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
>> index 0daac51..351b61a 100644
>> --- a/fs/overlayfs/Kconfig
>> +++ b/fs/overlayfs/Kconfig
>> @@ -22,3 +22,19 @@ config OVERLAY_FS_REDIRECT_DIR
>>         Note, that redirects are not backward compatible.  That is, mounting
>>         an overlay which has redirects on a kernel that doesn't support this
>>         feature will have unexpected results.
>> +
>> +config OVERLAY_FS_REDIRECT_FH
>> +     bool "Overlayfs: turn on redirect by file handle by default"
>> +     depends on OVERLAY_FS_REDIRECT_DIR
>> +     help
>> +       If this config option is enabled then overlay filesystems will use
>> +       redirect by file handle for merged directories by default.  It is
>> +       also possible to turn on redirect by file handle globally with the
>> +       "redirect_fh=on" module option or on a filesystem instance basis
>> +       with the "redirect_dir=fh" mount option.
>> +
>> +       Redirect by file handle provides faster lookup compared to redirect
>> +       by absolute path, but it only works when all layers are on the same
>> +       underlying file system that supports NFS export ops.  Redirect by
>> +       file handle breaks if layers are taken offline and copied to another
>> +       location.  In that case, lookup falls back to redirect by path.
>> diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
>> index 906ea6c..428dc26 100644
>> --- a/fs/overlayfs/copy_up.c
>> +++ b/fs/overlayfs/copy_up.c
>> @@ -20,6 +20,7 @@
>>  #include <linux/namei.h>
>>  #include <linux/fdtable.h>
>>  #include <linux/ratelimit.h>
>> +#include <linux/exportfs.h>
>>  #include "overlayfs.h"
>>  #include "ovl_entry.h"
>>
>> @@ -232,6 +233,80 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
>>       return err;
>>  }
>>
>> +static struct ovl_fh *ovl_get_redirect_fh(struct dentry *lower)
>> +{
>> +     const struct export_operations *nop = lower->d_sb->s_export_op;
>> +     struct ovl_fh *fh;
>> +     int fh_type, fh_len, dwords;
>> +     void *buf, *ret;
>> +     int buflen = MAX_HANDLE_SZ;
>> +
>> +     /* Do not encode file handle if we cannot decode it later */
>> +     if (!nop || !nop->fh_to_dentry) {
>> +             pr_info("overlay: redirect by file handle not supported "
>> +                     "by lower - turning off redirect\n");
>
> This message probably should go where you are actually turning off the
> feature?
>

This test should probably go away, or turn into a WARN_ON, because
I added a check for export ops in all layers at mount time.

>> +             return ERR_PTR(-EOPNOTSUPP);
>> +     }
>> +
>> +     buf = kmalloc(buflen, GFP_TEMPORARY);
>> +     if (!buf)
>> +             return ERR_PTR(-ENOMEM);
>> +
>> +     fh = buf;
>> +     dwords = (buflen - offsetof(struct ovl_fh, fid)) >> 2;
>> +     fh_type = exportfs_encode_fh(lower,
>> +                                  (struct fid *)fh->fid,
>> +                                  &dwords, 0);
>> +     fh_len = (dwords << 2) + offsetof(struct ovl_fh, fid);
>> +
>> +     ret = ERR_PTR(-EOVERFLOW);
>> +     if (fh_len > buflen || fh_type <= 0 || fh_type == FILEID_INVALID)
>> +             goto out;
>> +
>> +     fh->version = OVL_FH_VERSION;
>> +     fh->magic = OVL_FH_MAGIC;
>> +     fh->type = fh_type;
>> +     fh->len = fh_len;
>> +
>> +     ret = kmalloc(fh_len, GFP_KERNEL);
>> +     if (!ret) {
>> +             ret = ERR_PTR(-ENOMEM);
>> +             goto out;
>> +     }
>> +
>> +     memcpy(ret, buf, fh_len);
>> +out:
>> +     kfree(buf);
>> +     return ret;
>> +}
>> +
>> +static int ovl_set_redirect_fh(struct dentry *dentry, struct dentry *upper)
>> +{
>> +     int err;
>> +     const struct ovl_fh *fh;
>> +
>> +     fh = ovl_get_redirect_fh(ovl_dentry_lower(dentry));
>> +     err = PTR_ERR(fh);
>> +     if (IS_ERR(fh))
>> +             goto out_err;
>> +
>> +     err = ovl_do_setxattr(upper, OVL_XATTR_FH, fh, fh->len, 0);
>> +     if (err)
>> +             goto out_free;
>> +
>> +     return 0;
>> +
>> +out_free:
>> +     kfree(fh);
>> +out_err:
>> +     if (err == -EOPNOTSUPP) {
>> +             ovl_clear_redirect_fh(dentry->d_sb);
>> +             return 0;
>> +     }
>> +     pr_warn_ratelimited("overlay: failed to set redirect fh (%i)\n", err);
>> +     return err;
>> +}
>> +
>>  static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
>>                             struct dentry *dentry, struct path *lowerpath,
>>                             struct kstat *stat, const char *link,
>> @@ -316,6 +391,18 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
>>       if (err)
>>               goto out_cleanup;
>>
>> +     if (S_ISDIR(stat->mode) &&
>> +         ovl_redirect_dir(dentry->d_sb) &&
>> +         ovl_redirect_fh(dentry->d_sb)) {
>
> Do we need to check for redirect_dir. Looks like redirect_dir has to
> be enabled if redirect_fh is enabled?
>

No, we don't. I removed it in patch 4.
I am waiting for feedback from Miklos on my choice of module parameter
name/type and the mount options redirect_dir=off/on/fh
(it made sense when I wrote this patch but maybe not anymore)
I'll sort this out after we agree on these details.

>> +             /*
>> +              * Store file handle of lower dir in upper dir xattr to
>> +              * create a chain of file handles for merged dir stack
>> +              */
>> +             err = ovl_set_redirect_fh(dentry, temp);
>> +             if (err)
>> +                     goto out_cleanup;
>> +     }
>> +
>>       if (tmpfile)
>>               err = ovl_do_link(temp, udir, upper, true);
>>       else
>> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
>> index c851158..49be199 100644
>> --- a/fs/overlayfs/overlayfs.h
>> +++ b/fs/overlayfs/overlayfs.h
>> @@ -20,6 +20,7 @@ enum ovl_path_type {
>>  #define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
>>  #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
>>  #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
>> +#define OVL_XATTR_FH OVL_XATTR_PREFIX "fh"
>>
>>  #define OVL_ISUPPER_MASK 1UL
>>
>> @@ -146,6 +147,18 @@ static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
>>       return (struct inode *) (x & ~OVL_ISUPPER_MASK);
>>  }
>>
>> +/* redirect data format for redirect by file handle */
>> +struct ovl_fh {
>> +     unsigned char version;  /* 0 */
>> +     unsigned char magic;    /* 0xfb */
>> +     unsigned char len;      /* size of this header + size of fid */
>> +     unsigned char type;     /* fid_type of fid */
>> +     unsigned char fid[0];   /* file identifier */
>> +} __packed;
>> +
>> +#define OVL_FH_VERSION       0
>> +#define OVL_FH_MAGIC 0xfb
>> +
>>  /* util.c */
>>  int ovl_want_write(struct dentry *dentry);
>>  void ovl_drop_write(struct dentry *dentry);
>> @@ -171,6 +184,8 @@ bool ovl_redirect_dir(struct super_block *sb);
>>  void ovl_clear_redirect_dir(struct super_block *sb);
>>  const char *ovl_dentry_get_redirect(struct dentry *dentry);
>>  void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
>> +bool ovl_redirect_fh(struct super_block *sb);
>> +void ovl_clear_redirect_fh(struct super_block *sb);
>>  void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
>>  void ovl_inode_init(struct inode *inode, struct inode *realinode,
>>                   bool is_upper);
>> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
>> index e294f22..cf22992 100644
>> --- a/fs/overlayfs/ovl_entry.h
>> +++ b/fs/overlayfs/ovl_entry.h
>> @@ -14,6 +14,7 @@ struct ovl_config {
>>       char *workdir;
>>       bool default_permissions;
>>       bool redirect_dir;
>> +     bool redirect_fh;
>>  };
>>
>>  /* private information held for overlayfs's superblock */
>> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
>> index 79500d9..3036b2d 100644
>> --- a/fs/overlayfs/super.c
>> +++ b/fs/overlayfs/super.c
>> @@ -17,6 +17,7 @@
>>  #include <linux/statfs.h>
>>  #include <linux/seq_file.h>
>>  #include <linux/posix_acl_xattr.h>
>> +#include <linux/exportfs.h>
>>  #include "overlayfs.h"
>>  #include "ovl_entry.h"
>>
>> @@ -34,6 +35,12 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
>>  MODULE_PARM_DESC(ovl_redirect_dir_def,
>>                "Default to on or off for the redirect_dir feature");
>>
>> +static bool ovl_redirect_fh_def =
>> +         IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_FH);
>> +module_param_named(redirect_fh, ovl_redirect_fh_def, bool, 0644);
>> +MODULE_PARM_DESC(ovl_redirect_fh_def,
>> +              "Default to on or off for redirect by file handle");
>> +
>>  static void ovl_dentry_release(struct dentry *dentry)
>>  {
>>       struct ovl_entry *oe = dentry->d_fsdata;
>> @@ -246,9 +253,11 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
>>       }
>>       if (ufs->config.default_permissions)
>>               seq_puts(m, ",default_permissions");
>> -     if (ufs->config.redirect_dir != ovl_redirect_dir_def)
>> +     if (ufs->config.redirect_dir != ovl_redirect_dir_def ||
>> +         ufs->config.redirect_fh != ovl_redirect_fh_def)
>>               seq_printf(m, ",redirect_dir=%s",
>> -                        ufs->config.redirect_dir ? "on" : "off");
>> +                        !ufs->config.redirect_dir ? "off" :
>> +                        ufs->config.redirect_fh ? "fh" : "on");
>>       return 0;
>>  }
>>
>> @@ -278,6 +287,7 @@ enum {
>>       OPT_DEFAULT_PERMISSIONS,
>>       OPT_REDIRECT_DIR_ON,
>>       OPT_REDIRECT_DIR_OFF,
>> +     OPT_REDIRECT_DIR_FH,
>>       OPT_ERR,
>>  };
>>
>> @@ -288,6 +298,7 @@ static const match_table_t ovl_tokens = {
>>       {OPT_DEFAULT_PERMISSIONS,       "default_permissions"},
>>       {OPT_REDIRECT_DIR_ON,           "redirect_dir=on"},
>>       {OPT_REDIRECT_DIR_OFF,          "redirect_dir=off"},
>> +     {OPT_REDIRECT_DIR_FH,           "redirect_dir=fh"},
>>       {OPT_ERR,                       NULL}
>>  };
>>
>> @@ -354,10 +365,17 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
>>
>>               case OPT_REDIRECT_DIR_ON:
>>                       config->redirect_dir = true;
>> +                     config->redirect_fh = false;
>>                       break;
>
> Is this ->redirect_fh=false declarations needed? We do ufs allocation
> using kzalloc() and that means all the fields are 0 value.

Unless some bozo comes along and does

mount -t overlay -o redirect_dir=fh,redirect_dir=on ...

We are not preventing this, we don't need to.

>>
>>               case OPT_REDIRECT_DIR_OFF:
>>                       config->redirect_dir = false;
>> +                     config->redirect_fh = false;
>> +                     break;
>> +
>> +             case OPT_REDIRECT_DIR_FH:
>> +                     config->redirect_dir = true;
>> +                     config->redirect_fh = true;
>>                       break;
>>
>>               default:
>> @@ -754,6 +772,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>>
>>       init_waitqueue_head(&ufs->copyup_wq);
>>       ufs->config.redirect_dir = ovl_redirect_dir_def;
>> +     ufs->config.redirect_fh = ovl_redirect_fh_def;
>>       err = ovl_parse_opt((char *) data, &ufs->config);
>>       if (err)
>>               goto out_free_config;
>> @@ -934,6 +953,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>>       else
>>               sb->s_d_op = &ovl_dentry_operations;
>>
>> +     /* Redirect by fhandle only if all layers on same sb with export ops */
>> +     if (!ufs->same_sb || !ufs->same_sb->s_export_op ||
>> +         !ufs->same_sb->s_export_op->fh_to_dentry)
>> +             ufs->config.redirect_fh = false;
>> +
>>       ufs->creator_cred = cred = prepare_creds();
>>       if (!cred)
>>               goto out_put_lower_mnt;
>> diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
>> index dcebef0..17530c5 100644
>> --- a/fs/overlayfs/util.c
>> +++ b/fs/overlayfs/util.c
>> @@ -215,6 +215,20 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
>>       oe->redirect = redirect;
>>  }
>>
>> +bool ovl_redirect_fh(struct super_block *sb)
>> +{
>> +     struct ovl_fs *ofs = sb->s_fs_info;
>> +
>> +     return ofs->config.redirect_fh;
>> +}
>> +
>> +void ovl_clear_redirect_fh(struct super_block *sb)
>> +{
>> +     struct ovl_fs *ofs = sb->s_fs_info;
>> +
>> +     ofs->config.redirect_fh = false;
>> +}
>> +
>>  void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
>>  {
>>       struct ovl_entry *oe = dentry->d_fsdata;
>> --
>> 2.7.4
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Miklos Szeredi April 19, 2017, 3:16 p.m. UTC | #5
On Mon, Apr 17, 2017 at 1:59 AM, Amir Goldstein <amir73il@gmail.com> wrote:
> When mounted with mount option redirect_dir=fh,
> every copy up of lower directory stores the lower
> dir file handle in overlay.fh xattr of upper dir.

Is there a disadvantage to doing this unconditionally? (Same goes for patch 4)

Is there a disadvantage to doing this even for the non-samefs case?

Thanks,
Miklos
Amir Goldstein April 19, 2017, 3:27 p.m. UTC | #6
On Wed, Apr 19, 2017 at 6:16 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
> On Mon, Apr 17, 2017 at 1:59 AM, Amir Goldstein <amir73il@gmail.com> wrote:
>> When mounted with mount option redirect_dir=fh,
>> every copy up of lower directory stores the lower
>> dir file handle in overlay.fh xattr of upper dir.
>
> Is there a disadvantage to doing this unconditionally? (Same goes for patch 4)
>
> Is there a disadvantage to doing this even for the non-samefs case?
>

Not that I can think of. It's just that all the features that stable
inode brings
to the table only work for same_fs right now, but we can store overlay.fh
anyway.

When you get a chance please let me know your preference wrt
configuration options.
I don't see a reason to let the user turn off storing overlay.fh?
Is there a reason to let user turn off lookup by fh? for files?
(i.e. to preserve old weird behavior?)

Amir.
Miklos Szeredi April 19, 2017, 3:33 p.m. UTC | #7
On Wed, Apr 19, 2017 at 5:27 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Wed, Apr 19, 2017 at 6:16 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
>> On Mon, Apr 17, 2017 at 1:59 AM, Amir Goldstein <amir73il@gmail.com> wrote:
>>> When mounted with mount option redirect_dir=fh,
>>> every copy up of lower directory stores the lower
>>> dir file handle in overlay.fh xattr of upper dir.
>>
>> Is there a disadvantage to doing this unconditionally? (Same goes for patch 4)
>>
>> Is there a disadvantage to doing this even for the non-samefs case?
>>
>
> Not that I can think of. It's just that all the features that stable
> inode brings
> to the table only work for same_fs right now, but we can store overlay.fh
> anyway.
>
> When you get a chance please let me know your preference wrt
> configuration options.
> I don't see a reason to let the user turn off storing overlay.fh?
> Is there a reason to let user turn off lookup by fh? for files?
> (i.e. to preserve old weird behavior?)

The less config options the better.

We needed the redirect_dir=on/off thing because it's a backward
incompatible format change and the alternatives sucked more (i.e.
Linus didn't like them).

I don't see why we'd need to allow turning off backward compatible
changes (of course, there's a minimal overhead, but lets assume that
nobody cares about that, and if it turns out to be a problem, we'll
fix it).

Thanks,
Miklos

>
> Amir.
Amir Goldstein April 19, 2017, 3:43 p.m. UTC | #8
On Wed, Apr 19, 2017 at 6:33 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
> On Wed, Apr 19, 2017 at 5:27 PM, Amir Goldstein <amir73il@gmail.com> wrote:
>> On Wed, Apr 19, 2017 at 6:16 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
>>> On Mon, Apr 17, 2017 at 1:59 AM, Amir Goldstein <amir73il@gmail.com> wrote:
>>>> When mounted with mount option redirect_dir=fh,
>>>> every copy up of lower directory stores the lower
>>>> dir file handle in overlay.fh xattr of upper dir.
>>>
>>> Is there a disadvantage to doing this unconditionally? (Same goes for patch 4)
>>>
>>> Is there a disadvantage to doing this even for the non-samefs case?
>>>
>>
>> Not that I can think of. It's just that all the features that stable
>> inode brings
>> to the table only work for same_fs right now, but we can store overlay.fh
>> anyway.
>>
>> When you get a chance please let me know your preference wrt
>> configuration options.
>> I don't see a reason to let the user turn off storing overlay.fh?
>> Is there a reason to let user turn off lookup by fh? for files?
>> (i.e. to preserve old weird behavior?)
>
> The less config options the better.
>
> We needed the redirect_dir=on/off thing because it's a backward
> incompatible format change and the alternatives sucked more (i.e.
> Linus didn't like them).
>
> I don't see why we'd need to allow turning off backward compatible
> changes (of course, there's a minimal overhead, but lets assume that
> nobody cares about that, and if it turns out to be a problem, we'll
> fix it).
>

There is one slightly hackish reason for which I find redirect_dir=fh
mount option useful.
redirect_fh it automatically turned on|off depending on same_fs
configuration.
When I look at the mount options at /proc/mounts I can see
redirect_dir=fh|on which gives me an indication if I can
expect the stable inodes behavior.

This sort of info can be useful for support calls, but I guess it
belongs somewhere under sysfs or debugfs...

Amir.
Amir Goldstein April 20, 2017, 8:55 a.m. UTC | #9
On Wed, Apr 19, 2017 at 6:27 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Wed, Apr 19, 2017 at 6:16 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
>> On Mon, Apr 17, 2017 at 1:59 AM, Amir Goldstein <amir73il@gmail.com> wrote:
>>> When mounted with mount option redirect_dir=fh,
>>> every copy up of lower directory stores the lower
>>> dir file handle in overlay.fh xattr of upper dir.
>>
>> Is there a disadvantage to doing this unconditionally? (Same goes for patch 4)
>>
>> Is there a disadvantage to doing this even for the non-samefs case?
>>
>
> Not that I can think of. It's just that all the features that stable
> inode brings
> to the table only work for same_fs right now, but we can store overlay.fh
> anyway.
>

Sorry, I remembered what the reason was.

File handles are only unique for a single fs namespace.
Right now, the copy up stores the lower handle in overlay.fh
without storing lower layer number and/or lower fs device id.

The overlay.fh format has a header with version and magic:
struct ovl_fh {
        unsigned char version;  /* 0 */
        unsigned char magic;    /* 0xfb */

So we can easily extend this format later on to support non
same fs redirect_fh.

That said, to answer your question, there is no disadvantage to
storing the lower handle for non-samefs case, there is only
a problem with lookup in the case of several lower layers
no on the same fs.

So we can actually relax the samefs constrain for redirect_fh
to exclude upper layer and specifically, we can always use redirect_fh
in the single lower layer case.

There is one more advantage to storing overlay.fh unconditionally.
Even with multi non-samefs lower layers, the overlay.fh on upper
can be used to distinguish between "merge" and "pure" upper
for non dirs.

I would like to extend the meaning of OVL_TYPE_MERGE
to represent an upper non-dir that has been copied up.
Patch #5 already takes the first step and sets numlower = 1
for non-dirs that have been copied up on samefs.

The next steps would be:
1. remove d_is_dir() check from ovl_path_type()
    BTW, can you please explain this comment.
    I don't understand how this could be:

                /*
                 * Non-dir dentry can hold lower dentry from previous
                 * location.
                 */
                if (oe->numlower && d_is_dir(dentry))
                        type |= __OVL_PATH_MERGE;

2. check merge_or_lower() in ovl_rename() also for non-dir case
    this will cause setting overlay.redirect on non-dir
    and then hardlinks could work better when copying layers
    even without reverse mapping and recovery

3. set __OVL_PATH_MERGE flag during lookup for non-samefs
    In non-samefs case, __OVL_PATH_MERGE may not always be
    deduced from numlower, so need to use my patch that add
    ovl_update_type() [1]

[1] https://marc.info/?l=linux-unionfs&m=149079818122017&w=2

Amir.
Miklos Szeredi April 21, 2017, 3:02 p.m. UTC | #10
On Thu, Apr 20, 2017 at 10:55 AM, Amir Goldstein <amir73il@gmail.com> wrote:

> File handles are only unique for a single fs namespace.
> Right now, the copy up stores the lower handle in overlay.fh
> without storing lower layer number and/or lower fs device id.

Hmm, would need to be careful with the security implications of
decoding fh on the wrong fs.

>
> The overlay.fh format has a header with version and magic:
> struct ovl_fh {
>         unsigned char version;  /* 0 */
>         unsigned char magic;    /* 0xfb */
>
> So we can easily extend this format later on to support non
> same fs redirect_fh.
>
> That said, to answer your question, there is no disadvantage to
> storing the lower handle for non-samefs case, there is only
> a problem with lookup in the case of several lower layers
> no on the same fs.

Why?

> So we can actually relax the samefs constrain for redirect_fh
> to exclude upper layer and specifically, we can always use redirect_fh
> in the single lower layer case.
>
> There is one more advantage to storing overlay.fh unconditionally.
> Even with multi non-samefs lower layers, the overlay.fh on upper
> can be used to distinguish between "merge" and "pure" upper
> for non dirs.
>
> I would like to extend the meaning of OVL_TYPE_MERGE
> to represent an upper non-dir that has been copied up.
> Patch #5 already takes the first step and sets numlower = 1
> for non-dirs that have been copied up on samefs.
>
> The next steps would be:
> 1. remove d_is_dir() check from ovl_path_type()
>     BTW, can you please explain this comment.
>     I don't understand how this could be:
>
>                 /*
>                  * Non-dir dentry can hold lower dentry from previous
>                  * location.
>                  */
>                 if (oe->numlower && d_is_dir(dentry))
>                         type |= __OVL_PATH_MERGE;

I guess the comment says, that when a non-dir is copied up it will
have positive __upperdentry and a positive numlower (actually, always
one).

If you remove the d_is_dir() then it will change the meaning from
"merge" to "copied up".  The two meanings are distinct: consider the
ovl_clear_empty() case.  It starts with a merged dir and ends up with
an opaque one.  But the opaque one is still the same object as the
original, lower one, so both need to have the same overlay.fh
attribute.

>
> 2. check merge_or_lower() in ovl_rename() also for non-dir case
>     this will cause setting overlay.redirect on non-dir
>     and then hardlinks could work better when copying layers
>     even without reverse mapping and recovery
>
> 3. set __OVL_PATH_MERGE flag during lookup for non-samefs
>     In non-samefs case, __OVL_PATH_MERGE may not always be
>     deduced from numlower, so need to use my patch that add
>     ovl_update_type() [1]

So lets not confuse "merge" with "copied up".  The two are similar for
directories, but not the same, and "merge" doesn't make sense for
regular files.

Otherwise I think the above would be fine.

Thanks,
Miklos
Amir Goldstein April 21, 2017, 3:29 p.m. UTC | #11
On Fri, Apr 21, 2017 at 6:02 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
> On Thu, Apr 20, 2017 at 10:55 AM, Amir Goldstein <amir73il@gmail.com> wrote:
>
>> File handles are only unique for a single fs namespace.
>> Right now, the copy up stores the lower handle in overlay.fh
>> without storing lower layer number and/or lower fs device id.
>
> Hmm, would need to be careful with the security implications of
> decoding fh on the wrong fs.
>
>>
>> The overlay.fh format has a header with version and magic:
>> struct ovl_fh {
>>         unsigned char version;  /* 0 */
>>         unsigned char magic;    /* 0xfb */
>>
>> So we can easily extend this format later on to support non
>> same fs redirect_fh.
>>
>> That said, to answer your question, there is no disadvantage to
>> storing the lower handle for non-samefs case, there is only
>> a problem with lookup in the case of several lower layers
>> no on the same fs.
>
> Why?
>

Because decoding an fh takes a vfsmnt as input arg, looking up
by fh is implemented:

    for (i = 0; i < numlower; i++) {
          this = ovl_lookup_fh(lowerstack[i].mnt, d.fh);
          if (this == NULL)
               continue;
    }

and ovl_lookup_fh() even verifies ovl_dentry_under_mnt() for
accepting the decoded dentry.

So unless we are in the "same lower sb" case this lookup
can return a dentry from the wrong layer.

>> So we can actually relax the samefs constrain for redirect_fh
>> to exclude upper layer and specifically, we can always use redirect_fh
>> in the single lower layer case.
>>
>> There is one more advantage to storing overlay.fh unconditionally.
>> Even with multi non-samefs lower layers, the overlay.fh on upper
>> can be used to distinguish between "merge" and "pure" upper
>> for non dirs.
>>
>> I would like to extend the meaning of OVL_TYPE_MERGE
>> to represent an upper non-dir that has been copied up.
>> Patch #5 already takes the first step and sets numlower = 1
>> for non-dirs that have been copied up on samefs.
>>
>> The next steps would be:
>> 1. remove d_is_dir() check from ovl_path_type()
>>     BTW, can you please explain this comment.
>>     I don't understand how this could be:
>>
>>                 /*
>>                  * Non-dir dentry can hold lower dentry from previous
>>                  * location.
>>                  */
>>                 if (oe->numlower && d_is_dir(dentry))
>>                         type |= __OVL_PATH_MERGE;
>
> I guess the comment says, that when a non-dir is copied up it will
> have positive __upperdentry and a positive numlower (actually, always
> one).

Ok, I guess I wasn't sure what "previous location" was referring to
it implied rename.

>
> If you remove the d_is_dir() then it will change the meaning from
> "merge" to "copied up".  The two meanings are distinct: consider the
> ovl_clear_empty() case.  It starts with a merged dir and ends up with
> an opaque one.  But the opaque one is still the same object as the
> original, lower one, so both need to have the same overlay.fh
> attribute.
>

Well, technically, an opaque object should not have overlay.fh
but I see that it can happen with an unhashed just-deleted entry.
so yes, need to be able to make that distinction.

>>
>> 2. check merge_or_lower() in ovl_rename() also for non-dir case
>>     this will cause setting overlay.redirect on non-dir
>>     and then hardlinks could work better when copying layers
>>     even without reverse mapping and recovery
>>
>> 3. set __OVL_PATH_MERGE flag during lookup for non-samefs
>>     In non-samefs case, __OVL_PATH_MERGE may not always be
>>     deduced from numlower, so need to use my patch that add
>>     ovl_update_type() [1]
>
> So lets not confuse "merge" with "copied up".  The two are similar for
> directories, but not the same, and "merge" doesn't make sense for
> regular files.
>

It make sense if you consider that the overlay inode of non-dir is a merge
of the upper attributes and data and the lower ino (and maybe st_dev).

But it is a hack nonetheless, so I'll think up a new flag name for
COPIED_UP. suggestions are welcome.

Thanks,
Amir.
diff mbox

Patch

diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 0daac51..351b61a 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -22,3 +22,19 @@  config OVERLAY_FS_REDIRECT_DIR
 	  Note, that redirects are not backward compatible.  That is, mounting
 	  an overlay which has redirects on a kernel that doesn't support this
 	  feature will have unexpected results.
+
+config OVERLAY_FS_REDIRECT_FH
+	bool "Overlayfs: turn on redirect by file handle by default"
+	depends on OVERLAY_FS_REDIRECT_DIR
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  redirect by file handle for merged directories by default.  It is
+	  also possible to turn on redirect by file handle globally with the
+	  "redirect_fh=on" module option or on a filesystem instance basis
+	  with the "redirect_dir=fh" mount option.
+
+	  Redirect by file handle provides faster lookup compared to redirect
+	  by absolute path, but it only works when all layers are on the same
+	  underlying file system that supports NFS export ops.  Redirect by
+	  file handle breaks if layers are taken offline and copied to another
+	  location.  In that case, lookup falls back to redirect by path.
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 906ea6c..428dc26 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -20,6 +20,7 @@ 
 #include <linux/namei.h>
 #include <linux/fdtable.h>
 #include <linux/ratelimit.h>
+#include <linux/exportfs.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
 
@@ -232,6 +233,80 @@  int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 	return err;
 }
 
+static struct ovl_fh *ovl_get_redirect_fh(struct dentry *lower)
+{
+	const struct export_operations *nop = lower->d_sb->s_export_op;
+	struct ovl_fh *fh;
+	int fh_type, fh_len, dwords;
+	void *buf, *ret;
+	int buflen = MAX_HANDLE_SZ;
+
+	/* Do not encode file handle if we cannot decode it later */
+	if (!nop || !nop->fh_to_dentry) {
+		pr_info("overlay: redirect by file handle not supported "
+			"by lower - turning off redirect\n");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	buf = kmalloc(buflen, GFP_TEMPORARY);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	fh = buf;
+	dwords = (buflen - offsetof(struct ovl_fh, fid)) >> 2;
+	fh_type = exportfs_encode_fh(lower,
+				     (struct fid *)fh->fid,
+				     &dwords, 0);
+	fh_len = (dwords << 2) + offsetof(struct ovl_fh, fid);
+
+	ret = ERR_PTR(-EOVERFLOW);
+	if (fh_len > buflen || fh_type <= 0 || fh_type == FILEID_INVALID)
+		goto out;
+
+	fh->version = OVL_FH_VERSION;
+	fh->magic = OVL_FH_MAGIC;
+	fh->type = fh_type;
+	fh->len = fh_len;
+
+	ret = kmalloc(fh_len, GFP_KERNEL);
+	if (!ret) {
+		ret = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	memcpy(ret, buf, fh_len);
+out:
+	kfree(buf);
+	return ret;
+}
+
+static int ovl_set_redirect_fh(struct dentry *dentry, struct dentry *upper)
+{
+	int err;
+	const struct ovl_fh *fh;
+
+	fh = ovl_get_redirect_fh(ovl_dentry_lower(dentry));
+	err = PTR_ERR(fh);
+	if (IS_ERR(fh))
+		goto out_err;
+
+	err = ovl_do_setxattr(upper, OVL_XATTR_FH, fh, fh->len, 0);
+	if (err)
+		goto out_free;
+
+	return 0;
+
+out_free:
+	kfree(fh);
+out_err:
+	if (err == -EOPNOTSUPP) {
+		ovl_clear_redirect_fh(dentry->d_sb);
+		return 0;
+	}
+	pr_warn_ratelimited("overlay: failed to set redirect fh (%i)\n", err);
+	return err;
+}
+
 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 			      struct dentry *dentry, struct path *lowerpath,
 			      struct kstat *stat, const char *link,
@@ -316,6 +391,18 @@  static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	if (err)
 		goto out_cleanup;
 
+	if (S_ISDIR(stat->mode) &&
+	    ovl_redirect_dir(dentry->d_sb) &&
+	    ovl_redirect_fh(dentry->d_sb)) {
+		/*
+		 * Store file handle of lower dir in upper dir xattr to
+		 * create a chain of file handles for merged dir stack
+		 */
+		err = ovl_set_redirect_fh(dentry, temp);
+		if (err)
+			goto out_cleanup;
+	}
+
 	if (tmpfile)
 		err = ovl_do_link(temp, udir, upper, true);
 	else
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index c851158..49be199 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -20,6 +20,7 @@  enum ovl_path_type {
 #define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
 #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
 #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
+#define OVL_XATTR_FH OVL_XATTR_PREFIX "fh"
 
 #define OVL_ISUPPER_MASK 1UL
 
@@ -146,6 +147,18 @@  static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
 	return (struct inode *) (x & ~OVL_ISUPPER_MASK);
 }
 
+/* redirect data format for redirect by file handle */
+struct ovl_fh {
+	unsigned char version;	/* 0 */
+	unsigned char magic;	/* 0xfb */
+	unsigned char len;	/* size of this header + size of fid */
+	unsigned char type;	/* fid_type of fid */
+	unsigned char fid[0];	/* file identifier */
+} __packed;
+
+#define OVL_FH_VERSION	0
+#define OVL_FH_MAGIC	0xfb
+
 /* util.c */
 int ovl_want_write(struct dentry *dentry);
 void ovl_drop_write(struct dentry *dentry);
@@ -171,6 +184,8 @@  bool ovl_redirect_dir(struct super_block *sb);
 void ovl_clear_redirect_dir(struct super_block *sb);
 const char *ovl_dentry_get_redirect(struct dentry *dentry);
 void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
+bool ovl_redirect_fh(struct super_block *sb);
+void ovl_clear_redirect_fh(struct super_block *sb);
 void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
 void ovl_inode_init(struct inode *inode, struct inode *realinode,
 		    bool is_upper);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index e294f22..cf22992 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -14,6 +14,7 @@  struct ovl_config {
 	char *workdir;
 	bool default_permissions;
 	bool redirect_dir;
+	bool redirect_fh;
 };
 
 /* private information held for overlayfs's superblock */
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 79500d9..3036b2d 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -17,6 +17,7 @@ 
 #include <linux/statfs.h>
 #include <linux/seq_file.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/exportfs.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
 
@@ -34,6 +35,12 @@  module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
 MODULE_PARM_DESC(ovl_redirect_dir_def,
 		 "Default to on or off for the redirect_dir feature");
 
+static bool ovl_redirect_fh_def =
+	    IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_FH);
+module_param_named(redirect_fh, ovl_redirect_fh_def, bool, 0644);
+MODULE_PARM_DESC(ovl_redirect_fh_def,
+		 "Default to on or off for redirect by file handle");
+
 static void ovl_dentry_release(struct dentry *dentry)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;
@@ -246,9 +253,11 @@  static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 	}
 	if (ufs->config.default_permissions)
 		seq_puts(m, ",default_permissions");
-	if (ufs->config.redirect_dir != ovl_redirect_dir_def)
+	if (ufs->config.redirect_dir != ovl_redirect_dir_def ||
+	    ufs->config.redirect_fh != ovl_redirect_fh_def)
 		seq_printf(m, ",redirect_dir=%s",
-			   ufs->config.redirect_dir ? "on" : "off");
+			   !ufs->config.redirect_dir ? "off" :
+			   ufs->config.redirect_fh ? "fh" : "on");
 	return 0;
 }
 
@@ -278,6 +287,7 @@  enum {
 	OPT_DEFAULT_PERMISSIONS,
 	OPT_REDIRECT_DIR_ON,
 	OPT_REDIRECT_DIR_OFF,
+	OPT_REDIRECT_DIR_FH,
 	OPT_ERR,
 };
 
@@ -288,6 +298,7 @@  static const match_table_t ovl_tokens = {
 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
 	{OPT_REDIRECT_DIR_ON,		"redirect_dir=on"},
 	{OPT_REDIRECT_DIR_OFF,		"redirect_dir=off"},
+	{OPT_REDIRECT_DIR_FH,		"redirect_dir=fh"},
 	{OPT_ERR,			NULL}
 };
 
@@ -354,10 +365,17 @@  static int ovl_parse_opt(char *opt, struct ovl_config *config)
 
 		case OPT_REDIRECT_DIR_ON:
 			config->redirect_dir = true;
+			config->redirect_fh = false;
 			break;
 
 		case OPT_REDIRECT_DIR_OFF:
 			config->redirect_dir = false;
+			config->redirect_fh = false;
+			break;
+
+		case OPT_REDIRECT_DIR_FH:
+			config->redirect_dir = true;
+			config->redirect_fh = true;
 			break;
 
 		default:
@@ -754,6 +772,7 @@  static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 	init_waitqueue_head(&ufs->copyup_wq);
 	ufs->config.redirect_dir = ovl_redirect_dir_def;
+	ufs->config.redirect_fh = ovl_redirect_fh_def;
 	err = ovl_parse_opt((char *) data, &ufs->config);
 	if (err)
 		goto out_free_config;
@@ -934,6 +953,11 @@  static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	else
 		sb->s_d_op = &ovl_dentry_operations;
 
+	/* Redirect by fhandle only if all layers on same sb with export ops */
+	if (!ufs->same_sb || !ufs->same_sb->s_export_op ||
+	    !ufs->same_sb->s_export_op->fh_to_dentry)
+		ufs->config.redirect_fh = false;
+
 	ufs->creator_cred = cred = prepare_creds();
 	if (!cred)
 		goto out_put_lower_mnt;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index dcebef0..17530c5 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -215,6 +215,20 @@  void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
 	oe->redirect = redirect;
 }
 
+bool ovl_redirect_fh(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->config.redirect_fh;
+}
+
+void ovl_clear_redirect_fh(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	ofs->config.redirect_fh = false;
+}
+
 void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;