diff mbox series

[v2] statx: stx_subvol

Message ID 20240308022914.196982-1-kent.overstreet@linux.dev (mailing list archive)
State New, archived
Headers show
Series [v2] statx: stx_subvol | expand

Commit Message

Kent Overstreet March 8, 2024, 2:29 a.m. UTC
Add a new statx field for (sub)volume identifiers, as implemented by
btrfs and bcachefs.

This includes bcachefs support; we'll definitely want btrfs support as
well.

Link: https://lore.kernel.org/linux-fsdevel/2uvhm6gweyl7iyyp2xpfryvcu2g3padagaeqcbiavjyiis6prl@yjm725bizncq/
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/fs.c          | 3 +++
 fs/stat.c                 | 1 +
 include/linux/stat.h      | 1 +
 include/uapi/linux/stat.h | 4 +++-
 4 files changed, 8 insertions(+), 1 deletion(-)

Comments

Neal Gompa March 8, 2024, 11:42 a.m. UTC | #1
On Thu, Mar 7, 2024 at 9:29 PM Kent Overstreet
<kent.overstreet@linux.dev> wrote:
>
> Add a new statx field for (sub)volume identifiers, as implemented by
> btrfs and bcachefs.
>
> This includes bcachefs support; we'll definitely want btrfs support as
> well.
>
> Link: https://lore.kernel.org/linux-fsdevel/2uvhm6gweyl7iyyp2xpfryvcu2g3padagaeqcbiavjyiis6prl@yjm725bizncq/
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> Cc: Josef Bacik <josef@toxicpanda.com>
> Cc: Miklos Szeredi <mszeredi@redhat.com>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: David Howells <dhowells@redhat.com>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
>  fs/bcachefs/fs.c          | 3 +++
>  fs/stat.c                 | 1 +
>  include/linux/stat.h      | 1 +
>  include/uapi/linux/stat.h | 4 +++-
>  4 files changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
> index 3f073845bbd7..6a542ed43e2c 100644
> --- a/fs/bcachefs/fs.c
> +++ b/fs/bcachefs/fs.c
> @@ -840,6 +840,9 @@ static int bch2_getattr(struct mnt_idmap *idmap,
>         stat->blksize   = block_bytes(c);
>         stat->blocks    = inode->v.i_blocks;
>
> +       stat->subvol    = inode->ei_subvol;
> +       stat->result_mask |= STATX_SUBVOL;
> +
>         if (request_mask & STATX_BTIME) {
>                 stat->result_mask |= STATX_BTIME;
>                 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
> diff --git a/fs/stat.c b/fs/stat.c
> index 77cdc69eb422..70bd3e888cfa 100644
> --- a/fs/stat.c
> +++ b/fs/stat.c
> @@ -658,6 +658,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
>         tmp.stx_mnt_id = stat->mnt_id;
>         tmp.stx_dio_mem_align = stat->dio_mem_align;
>         tmp.stx_dio_offset_align = stat->dio_offset_align;
> +       tmp.stx_subvol = stat->subvol;
>
>         return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
>  }
> diff --git a/include/linux/stat.h b/include/linux/stat.h
> index 52150570d37a..bf92441dbad2 100644
> --- a/include/linux/stat.h
> +++ b/include/linux/stat.h
> @@ -53,6 +53,7 @@ struct kstat {
>         u32             dio_mem_align;
>         u32             dio_offset_align;
>         u64             change_cookie;
> +       u64             subvol;
>  };
>
>  /* These definitions are internal to the kernel for now. Mainly used by nfsd. */
> diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
> index 2f2ee82d5517..67626d535316 100644
> --- a/include/uapi/linux/stat.h
> +++ b/include/uapi/linux/stat.h
> @@ -126,8 +126,9 @@ struct statx {
>         __u64   stx_mnt_id;
>         __u32   stx_dio_mem_align;      /* Memory buffer alignment for direct I/O */
>         __u32   stx_dio_offset_align;   /* File offset alignment for direct I/O */
> +       __u64   stx_subvol;     /* Subvolume identifier */
>         /* 0xa0 */
> -       __u64   __spare3[12];   /* Spare space for future expansion */
> +       __u64   __spare3[11];   /* Spare space for future expansion */
>         /* 0x100 */
>  };
>
> @@ -155,6 +156,7 @@ struct statx {
>  #define STATX_MNT_ID           0x00001000U     /* Got stx_mnt_id */
>  #define STATX_DIOALIGN         0x00002000U     /* Want/got direct I/O alignment info */
>  #define STATX_MNT_ID_UNIQUE    0x00004000U     /* Want/got extended stx_mount_id */
> +#define STATX_SUBVOL           0x00008000U     /* Want/got stx_subvol */
>
>  #define STATX__RESERVED                0x80000000U     /* Reserved for future struct statx expansion */
>
> --
> 2.43.0
>
>

I think it's generally expected that patches that touch different
layers are split up. That is, we should have a patch that adds the
capability and a separate patch that enables it in bcachefs. This also
helps make it clearer to others how a new feature should be plumbed
into a filesystem.

I would prefer it to be split up in this manner for this reason.



--
真実はいつも一つ!/ Always, there's only one truth!
Kent Overstreet March 8, 2024, 4:34 p.m. UTC | #2
On Fri, Mar 08, 2024 at 06:42:27AM -0500, Neal Gompa wrote:
> On Thu, Mar 7, 2024 at 9:29 PM Kent Overstreet
> <kent.overstreet@linux.dev> wrote:
> >
> > Add a new statx field for (sub)volume identifiers, as implemented by
> > btrfs and bcachefs.
> >
> > This includes bcachefs support; we'll definitely want btrfs support as
> > well.
> >
> > Link: https://lore.kernel.org/linux-fsdevel/2uvhm6gweyl7iyyp2xpfryvcu2g3padagaeqcbiavjyiis6prl@yjm725bizncq/
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > Cc: Josef Bacik <josef@toxicpanda.com>
> > Cc: Miklos Szeredi <mszeredi@redhat.com>
> > Cc: Christian Brauner <brauner@kernel.org>
> > Cc: David Howells <dhowells@redhat.com>
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > ---
> >  fs/bcachefs/fs.c          | 3 +++
> >  fs/stat.c                 | 1 +
> >  include/linux/stat.h      | 1 +
> >  include/uapi/linux/stat.h | 4 +++-
> >  4 files changed, 8 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
> > index 3f073845bbd7..6a542ed43e2c 100644
> > --- a/fs/bcachefs/fs.c
> > +++ b/fs/bcachefs/fs.c
> > @@ -840,6 +840,9 @@ static int bch2_getattr(struct mnt_idmap *idmap,
> >         stat->blksize   = block_bytes(c);
> >         stat->blocks    = inode->v.i_blocks;
> >
> > +       stat->subvol    = inode->ei_subvol;
> > +       stat->result_mask |= STATX_SUBVOL;
> > +
> >         if (request_mask & STATX_BTIME) {
> >                 stat->result_mask |= STATX_BTIME;
> >                 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
> > diff --git a/fs/stat.c b/fs/stat.c
> > index 77cdc69eb422..70bd3e888cfa 100644
> > --- a/fs/stat.c
> > +++ b/fs/stat.c
> > @@ -658,6 +658,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
> >         tmp.stx_mnt_id = stat->mnt_id;
> >         tmp.stx_dio_mem_align = stat->dio_mem_align;
> >         tmp.stx_dio_offset_align = stat->dio_offset_align;
> > +       tmp.stx_subvol = stat->subvol;
> >
> >         return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
> >  }
> > diff --git a/include/linux/stat.h b/include/linux/stat.h
> > index 52150570d37a..bf92441dbad2 100644
> > --- a/include/linux/stat.h
> > +++ b/include/linux/stat.h
> > @@ -53,6 +53,7 @@ struct kstat {
> >         u32             dio_mem_align;
> >         u32             dio_offset_align;
> >         u64             change_cookie;
> > +       u64             subvol;
> >  };
> >
> >  /* These definitions are internal to the kernel for now. Mainly used by nfsd. */
> > diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
> > index 2f2ee82d5517..67626d535316 100644
> > --- a/include/uapi/linux/stat.h
> > +++ b/include/uapi/linux/stat.h
> > @@ -126,8 +126,9 @@ struct statx {
> >         __u64   stx_mnt_id;
> >         __u32   stx_dio_mem_align;      /* Memory buffer alignment for direct I/O */
> >         __u32   stx_dio_offset_align;   /* File offset alignment for direct I/O */
> > +       __u64   stx_subvol;     /* Subvolume identifier */
> >         /* 0xa0 */
> > -       __u64   __spare3[12];   /* Spare space for future expansion */
> > +       __u64   __spare3[11];   /* Spare space for future expansion */
> >         /* 0x100 */
> >  };
> >
> > @@ -155,6 +156,7 @@ struct statx {
> >  #define STATX_MNT_ID           0x00001000U     /* Got stx_mnt_id */
> >  #define STATX_DIOALIGN         0x00002000U     /* Want/got direct I/O alignment info */
> >  #define STATX_MNT_ID_UNIQUE    0x00004000U     /* Want/got extended stx_mount_id */
> > +#define STATX_SUBVOL           0x00008000U     /* Want/got stx_subvol */
> >
> >  #define STATX__RESERVED                0x80000000U     /* Reserved for future struct statx expansion */
> >
> > --
> > 2.43.0
> >
> >
> 
> I think it's generally expected that patches that touch different
> layers are split up. That is, we should have a patch that adds the
> capability and a separate patch that enables it in bcachefs. This also
> helps make it clearer to others how a new feature should be plumbed
> into a filesystem.
> 
> I would prefer it to be split up in this manner for this reason.

I'll do it that way if the patch is big enough that it ought to be
split up. For something this small, seeing how it's used is relevant
context for both reviewers and people looking at it afterwards.
Neal Gompa March 8, 2024, 4:44 p.m. UTC | #3
On Fri, Mar 8, 2024 at 11:34 AM Kent Overstreet
<kent.overstreet@linux.dev> wrote:
>
> On Fri, Mar 08, 2024 at 06:42:27AM -0500, Neal Gompa wrote:
> > On Thu, Mar 7, 2024 at 9:29 PM Kent Overstreet
> > <kent.overstreet@linux.dev> wrote:
> > >
> > > Add a new statx field for (sub)volume identifiers, as implemented by
> > > btrfs and bcachefs.
> > >
> > > This includes bcachefs support; we'll definitely want btrfs support as
> > > well.
> > >
> > > Link: https://lore.kernel.org/linux-fsdevel/2uvhm6gweyl7iyyp2xpfryvcu2g3padagaeqcbiavjyiis6prl@yjm725bizncq/
> > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > > Cc: Josef Bacik <josef@toxicpanda.com>
> > > Cc: Miklos Szeredi <mszeredi@redhat.com>
> > > Cc: Christian Brauner <brauner@kernel.org>
> > > Cc: David Howells <dhowells@redhat.com>
> > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > > ---
> > >  fs/bcachefs/fs.c          | 3 +++
> > >  fs/stat.c                 | 1 +
> > >  include/linux/stat.h      | 1 +
> > >  include/uapi/linux/stat.h | 4 +++-
> > >  4 files changed, 8 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
> > > index 3f073845bbd7..6a542ed43e2c 100644
> > > --- a/fs/bcachefs/fs.c
> > > +++ b/fs/bcachefs/fs.c
> > > @@ -840,6 +840,9 @@ static int bch2_getattr(struct mnt_idmap *idmap,
> > >         stat->blksize   = block_bytes(c);
> > >         stat->blocks    = inode->v.i_blocks;
> > >
> > > +       stat->subvol    = inode->ei_subvol;
> > > +       stat->result_mask |= STATX_SUBVOL;
> > > +
> > >         if (request_mask & STATX_BTIME) {
> > >                 stat->result_mask |= STATX_BTIME;
> > >                 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
> > > diff --git a/fs/stat.c b/fs/stat.c
> > > index 77cdc69eb422..70bd3e888cfa 100644
> > > --- a/fs/stat.c
> > > +++ b/fs/stat.c
> > > @@ -658,6 +658,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
> > >         tmp.stx_mnt_id = stat->mnt_id;
> > >         tmp.stx_dio_mem_align = stat->dio_mem_align;
> > >         tmp.stx_dio_offset_align = stat->dio_offset_align;
> > > +       tmp.stx_subvol = stat->subvol;
> > >
> > >         return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
> > >  }
> > > diff --git a/include/linux/stat.h b/include/linux/stat.h
> > > index 52150570d37a..bf92441dbad2 100644
> > > --- a/include/linux/stat.h
> > > +++ b/include/linux/stat.h
> > > @@ -53,6 +53,7 @@ struct kstat {
> > >         u32             dio_mem_align;
> > >         u32             dio_offset_align;
> > >         u64             change_cookie;
> > > +       u64             subvol;
> > >  };
> > >
> > >  /* These definitions are internal to the kernel for now. Mainly used by nfsd. */
> > > diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
> > > index 2f2ee82d5517..67626d535316 100644
> > > --- a/include/uapi/linux/stat.h
> > > +++ b/include/uapi/linux/stat.h
> > > @@ -126,8 +126,9 @@ struct statx {
> > >         __u64   stx_mnt_id;
> > >         __u32   stx_dio_mem_align;      /* Memory buffer alignment for direct I/O */
> > >         __u32   stx_dio_offset_align;   /* File offset alignment for direct I/O */
> > > +       __u64   stx_subvol;     /* Subvolume identifier */
> > >         /* 0xa0 */
> > > -       __u64   __spare3[12];   /* Spare space for future expansion */
> > > +       __u64   __spare3[11];   /* Spare space for future expansion */
> > >         /* 0x100 */
> > >  };
> > >
> > > @@ -155,6 +156,7 @@ struct statx {
> > >  #define STATX_MNT_ID           0x00001000U     /* Got stx_mnt_id */
> > >  #define STATX_DIOALIGN         0x00002000U     /* Want/got direct I/O alignment info */
> > >  #define STATX_MNT_ID_UNIQUE    0x00004000U     /* Want/got extended stx_mount_id */
> > > +#define STATX_SUBVOL           0x00008000U     /* Want/got stx_subvol */
> > >
> > >  #define STATX__RESERVED                0x80000000U     /* Reserved for future struct statx expansion */
> > >
> > > --
> > > 2.43.0
> > >
> > >
> >
> > I think it's generally expected that patches that touch different
> > layers are split up. That is, we should have a patch that adds the
> > capability and a separate patch that enables it in bcachefs. This also
> > helps make it clearer to others how a new feature should be plumbed
> > into a filesystem.
> >
> > I would prefer it to be split up in this manner for this reason.
>
> I'll do it that way if the patch is big enough that it ought to be
> split up. For something this small, seeing how it's used is relevant
> context for both reviewers and people looking at it afterwards.
>

It needs to also be split up because fs/ and fs/bcachefs are
maintained differently. And while right now bcachefs is the only
consumer of the API, btrfs will add it right after it's committed, and
for people who are cherry-picking/backporting accordingly, having to
chop out part of a patch would be unpleasant.
Kent Overstreet March 8, 2024, 4:48 p.m. UTC | #4
On Fri, Mar 08, 2024 at 11:44:48AM -0500, Neal Gompa wrote:
> On Fri, Mar 8, 2024 at 11:34 AM Kent Overstreet
> <kent.overstreet@linux.dev> wrote:
> >
> > On Fri, Mar 08, 2024 at 06:42:27AM -0500, Neal Gompa wrote:
> > > On Thu, Mar 7, 2024 at 9:29 PM Kent Overstreet
> > > <kent.overstreet@linux.dev> wrote:
> > > >
> > > > Add a new statx field for (sub)volume identifiers, as implemented by
> > > > btrfs and bcachefs.
> > > >
> > > > This includes bcachefs support; we'll definitely want btrfs support as
> > > > well.
> > > >
> > > > Link: https://lore.kernel.org/linux-fsdevel/2uvhm6gweyl7iyyp2xpfryvcu2g3padagaeqcbiavjyiis6prl@yjm725bizncq/
> > > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > > > Cc: Josef Bacik <josef@toxicpanda.com>
> > > > Cc: Miklos Szeredi <mszeredi@redhat.com>
> > > > Cc: Christian Brauner <brauner@kernel.org>
> > > > Cc: David Howells <dhowells@redhat.com>
> > > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > > > ---
> > > >  fs/bcachefs/fs.c          | 3 +++
> > > >  fs/stat.c                 | 1 +
> > > >  include/linux/stat.h      | 1 +
> > > >  include/uapi/linux/stat.h | 4 +++-
> > > >  4 files changed, 8 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
> > > > index 3f073845bbd7..6a542ed43e2c 100644
> > > > --- a/fs/bcachefs/fs.c
> > > > +++ b/fs/bcachefs/fs.c
> > > > @@ -840,6 +840,9 @@ static int bch2_getattr(struct mnt_idmap *idmap,
> > > >         stat->blksize   = block_bytes(c);
> > > >         stat->blocks    = inode->v.i_blocks;
> > > >
> > > > +       stat->subvol    = inode->ei_subvol;
> > > > +       stat->result_mask |= STATX_SUBVOL;
> > > > +
> > > >         if (request_mask & STATX_BTIME) {
> > > >                 stat->result_mask |= STATX_BTIME;
> > > >                 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
> > > > diff --git a/fs/stat.c b/fs/stat.c
> > > > index 77cdc69eb422..70bd3e888cfa 100644
> > > > --- a/fs/stat.c
> > > > +++ b/fs/stat.c
> > > > @@ -658,6 +658,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
> > > >         tmp.stx_mnt_id = stat->mnt_id;
> > > >         tmp.stx_dio_mem_align = stat->dio_mem_align;
> > > >         tmp.stx_dio_offset_align = stat->dio_offset_align;
> > > > +       tmp.stx_subvol = stat->subvol;
> > > >
> > > >         return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
> > > >  }
> > > > diff --git a/include/linux/stat.h b/include/linux/stat.h
> > > > index 52150570d37a..bf92441dbad2 100644
> > > > --- a/include/linux/stat.h
> > > > +++ b/include/linux/stat.h
> > > > @@ -53,6 +53,7 @@ struct kstat {
> > > >         u32             dio_mem_align;
> > > >         u32             dio_offset_align;
> > > >         u64             change_cookie;
> > > > +       u64             subvol;
> > > >  };
> > > >
> > > >  /* These definitions are internal to the kernel for now. Mainly used by nfsd. */
> > > > diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
> > > > index 2f2ee82d5517..67626d535316 100644
> > > > --- a/include/uapi/linux/stat.h
> > > > +++ b/include/uapi/linux/stat.h
> > > > @@ -126,8 +126,9 @@ struct statx {
> > > >         __u64   stx_mnt_id;
> > > >         __u32   stx_dio_mem_align;      /* Memory buffer alignment for direct I/O */
> > > >         __u32   stx_dio_offset_align;   /* File offset alignment for direct I/O */
> > > > +       __u64   stx_subvol;     /* Subvolume identifier */
> > > >         /* 0xa0 */
> > > > -       __u64   __spare3[12];   /* Spare space for future expansion */
> > > > +       __u64   __spare3[11];   /* Spare space for future expansion */
> > > >         /* 0x100 */
> > > >  };
> > > >
> > > > @@ -155,6 +156,7 @@ struct statx {
> > > >  #define STATX_MNT_ID           0x00001000U     /* Got stx_mnt_id */
> > > >  #define STATX_DIOALIGN         0x00002000U     /* Want/got direct I/O alignment info */
> > > >  #define STATX_MNT_ID_UNIQUE    0x00004000U     /* Want/got extended stx_mount_id */
> > > > +#define STATX_SUBVOL           0x00008000U     /* Want/got stx_subvol */
> > > >
> > > >  #define STATX__RESERVED                0x80000000U     /* Reserved for future struct statx expansion */
> > > >
> > > > --
> > > > 2.43.0
> > > >
> > > >
> > >
> > > I think it's generally expected that patches that touch different
> > > layers are split up. That is, we should have a patch that adds the
> > > capability and a separate patch that enables it in bcachefs. This also
> > > helps make it clearer to others how a new feature should be plumbed
> > > into a filesystem.
> > >
> > > I would prefer it to be split up in this manner for this reason.
> >
> > I'll do it that way if the patch is big enough that it ought to be
> > split up. For something this small, seeing how it's used is relevant
> > context for both reviewers and people looking at it afterwards.
> >
> 
> It needs to also be split up because fs/ and fs/bcachefs are
> maintained differently. And while right now bcachefs is the only
> consumer of the API, btrfs will add it right after it's committed, and
> for people who are cherry-picking/backporting accordingly, having to
> chop out part of a patch would be unpleasant.

It's a new feature, not a bugfix, this should never get backported. And
I the bcachefs maintainer wrote the patch, and I'm submitting it to the
VFS maintainer, so if it's fine with him it's fine with me.
Darrick J. Wong March 8, 2024, 4:56 p.m. UTC | #5
On Fri, Mar 08, 2024 at 11:48:31AM -0500, Kent Overstreet wrote:
> On Fri, Mar 08, 2024 at 11:44:48AM -0500, Neal Gompa wrote:
> > On Fri, Mar 8, 2024 at 11:34 AM Kent Overstreet
> > <kent.overstreet@linux.dev> wrote:
> > >
> > > On Fri, Mar 08, 2024 at 06:42:27AM -0500, Neal Gompa wrote:
> > > > On Thu, Mar 7, 2024 at 9:29 PM Kent Overstreet
> > > > <kent.overstreet@linux.dev> wrote:
> > > > >
> > > > > Add a new statx field for (sub)volume identifiers, as implemented by
> > > > > btrfs and bcachefs.
> > > > >
> > > > > This includes bcachefs support; we'll definitely want btrfs support as
> > > > > well.
> > > > >
> > > > > Link: https://lore.kernel.org/linux-fsdevel/2uvhm6gweyl7iyyp2xpfryvcu2g3padagaeqcbiavjyiis6prl@yjm725bizncq/
> > > > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > > > > Cc: Josef Bacik <josef@toxicpanda.com>
> > > > > Cc: Miklos Szeredi <mszeredi@redhat.com>
> > > > > Cc: Christian Brauner <brauner@kernel.org>
> > > > > Cc: David Howells <dhowells@redhat.com>
> > > > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > > > > ---
> > > > >  fs/bcachefs/fs.c          | 3 +++
> > > > >  fs/stat.c                 | 1 +
> > > > >  include/linux/stat.h      | 1 +
> > > > >  include/uapi/linux/stat.h | 4 +++-
> > > > >  4 files changed, 8 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
> > > > > index 3f073845bbd7..6a542ed43e2c 100644
> > > > > --- a/fs/bcachefs/fs.c
> > > > > +++ b/fs/bcachefs/fs.c
> > > > > @@ -840,6 +840,9 @@ static int bch2_getattr(struct mnt_idmap *idmap,
> > > > >         stat->blksize   = block_bytes(c);
> > > > >         stat->blocks    = inode->v.i_blocks;
> > > > >
> > > > > +       stat->subvol    = inode->ei_subvol;
> > > > > +       stat->result_mask |= STATX_SUBVOL;
> > > > > +
> > > > >         if (request_mask & STATX_BTIME) {
> > > > >                 stat->result_mask |= STATX_BTIME;
> > > > >                 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
> > > > > diff --git a/fs/stat.c b/fs/stat.c
> > > > > index 77cdc69eb422..70bd3e888cfa 100644
> > > > > --- a/fs/stat.c
> > > > > +++ b/fs/stat.c
> > > > > @@ -658,6 +658,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
> > > > >         tmp.stx_mnt_id = stat->mnt_id;
> > > > >         tmp.stx_dio_mem_align = stat->dio_mem_align;
> > > > >         tmp.stx_dio_offset_align = stat->dio_offset_align;
> > > > > +       tmp.stx_subvol = stat->subvol;
> > > > >
> > > > >         return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
> > > > >  }
> > > > > diff --git a/include/linux/stat.h b/include/linux/stat.h
> > > > > index 52150570d37a..bf92441dbad2 100644
> > > > > --- a/include/linux/stat.h
> > > > > +++ b/include/linux/stat.h
> > > > > @@ -53,6 +53,7 @@ struct kstat {
> > > > >         u32             dio_mem_align;
> > > > >         u32             dio_offset_align;
> > > > >         u64             change_cookie;
> > > > > +       u64             subvol;
> > > > >  };
> > > > >
> > > > >  /* These definitions are internal to the kernel for now. Mainly used by nfsd. */
> > > > > diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
> > > > > index 2f2ee82d5517..67626d535316 100644
> > > > > --- a/include/uapi/linux/stat.h
> > > > > +++ b/include/uapi/linux/stat.h
> > > > > @@ -126,8 +126,9 @@ struct statx {
> > > > >         __u64   stx_mnt_id;
> > > > >         __u32   stx_dio_mem_align;      /* Memory buffer alignment for direct I/O */
> > > > >         __u32   stx_dio_offset_align;   /* File offset alignment for direct I/O */
> > > > > +       __u64   stx_subvol;     /* Subvolume identifier */
> > > > >         /* 0xa0 */
> > > > > -       __u64   __spare3[12];   /* Spare space for future expansion */
> > > > > +       __u64   __spare3[11];   /* Spare space for future expansion */
> > > > >         /* 0x100 */
> > > > >  };
> > > > >
> > > > > @@ -155,6 +156,7 @@ struct statx {
> > > > >  #define STATX_MNT_ID           0x00001000U     /* Got stx_mnt_id */
> > > > >  #define STATX_DIOALIGN         0x00002000U     /* Want/got direct I/O alignment info */
> > > > >  #define STATX_MNT_ID_UNIQUE    0x00004000U     /* Want/got extended stx_mount_id */
> > > > > +#define STATX_SUBVOL           0x00008000U     /* Want/got stx_subvol */
> > > > >
> > > > >  #define STATX__RESERVED                0x80000000U     /* Reserved for future struct statx expansion */
> > > > >
> > > > > --
> > > > > 2.43.0
> > > > >
> > > > >
> > > >
> > > > I think it's generally expected that patches that touch different
> > > > layers are split up. That is, we should have a patch that adds the
> > > > capability and a separate patch that enables it in bcachefs. This also
> > > > helps make it clearer to others how a new feature should be plumbed
> > > > into a filesystem.
> > > >
> > > > I would prefer it to be split up in this manner for this reason.
> > >
> > > I'll do it that way if the patch is big enough that it ought to be
> > > split up. For something this small, seeing how it's used is relevant
> > > context for both reviewers and people looking at it afterwards.
> > >
> > 
> > It needs to also be split up because fs/ and fs/bcachefs are
> > maintained differently. And while right now bcachefs is the only
> > consumer of the API, btrfs will add it right after it's committed, and
> > for people who are cherry-picking/backporting accordingly, having to
> > chop out part of a patch would be unpleasant.
> 
> It's a new feature, not a bugfix, this should never get backported. And
> I the bcachefs maintainer wrote the patch, and I'm submitting it to the
> VFS maintainer, so if it's fine with him it's fine with me.

But then how am I supposed to bikeshed the structure of the V2 patchset
by immediately asking you to recombine the patches and spit out a V3?

</sarcasm>

But, seriously, can you update the manpage too?  Is stx_subvol a u64
cookie where userspace mustn't try to read anything into its contents?
Just like st_ino and st_dev are (supposed) to be?

Should the XFS data and rt volumes be reported with different stx_vol
values?

--D
Kent Overstreet March 8, 2024, 5:13 p.m. UTC | #6
On Fri, Mar 08, 2024 at 08:56:33AM -0800, Darrick J. Wong wrote:
> On Fri, Mar 08, 2024 at 11:48:31AM -0500, Kent Overstreet wrote:
> > It's a new feature, not a bugfix, this should never get backported. And
> > I the bcachefs maintainer wrote the patch, and I'm submitting it to the
> > VFS maintainer, so if it's fine with him it's fine with me.
> 
> But then how am I supposed to bikeshed the structure of the V2 patchset
> by immediately asking you to recombine the patches and spit out a V3?
> 
> </sarcasm>
> 
> But, seriously, can you update the manpage too?

yeah, where's that at?

> Is stx_subvol a u64
> cookie where userspace mustn't try to read anything into its contents?
> Just like st_ino and st_dev are (supposed) to be?

Actually, that's up for debate. I'm considering having the readdir()
equivalent for walking subvolumes return subvolume IDs, and then there'd
be a separate call to open by ID.

Al's idea was to return open fds to child subvolumes, then userspace can
get the path from /proc; that's also a possibility.

The key thing is that with subvolumes it's actually possible to do an
open_by_id() call with correct security checks on pathwalking - because
we don't have hardlinks so there's no ambiguity.

Or we might do it getdents() style and return the path directly.

But I think userspace is going to want to work with the volume
identifiers directly, which is partly why I'm considering why other
options might be cleaner.

Another thing to consider: where we're going with this is giving
userspace a good efficient interrface for recursive tree traversal of
subvolumes, but it might not be a bad idea to do that for mountpoints as
well - similar problems, similar scalability issues that we might want
to solve eventually.

> Should the XFS data and rt volumes be reported with different stx_vol
> values?

Maybe?
Jeff Layton March 9, 2024, 11:46 a.m. UTC | #7
On Fri, 2024-03-08 at 12:13 -0500, Kent Overstreet wrote:
> On Fri, Mar 08, 2024 at 08:56:33AM -0800, Darrick J. Wong wrote:
> > On Fri, Mar 08, 2024 at 11:48:31AM -0500, Kent Overstreet wrote:
> > > It's a new feature, not a bugfix, this should never get backported. And
> > > I the bcachefs maintainer wrote the patch, and I'm submitting it to the
> > > VFS maintainer, so if it's fine with him it's fine with me.
> > 
> > But then how am I supposed to bikeshed the structure of the V2 patchset
> > by immediately asking you to recombine the patches and spit out a V3?
> > 
> > </sarcasm>
> > 
> > But, seriously, can you update the manpage too?
> 
> yeah, where's that at?
> 

    https://git.kernel.org/pub/scm/docs/man-pages/man-pages.git


> > Is stx_subvol a u64
> > cookie where userspace mustn't try to read anything into its contents?
> > Just like st_ino and st_dev are (supposed) to be?
> 
> Actually, that's up for debate. I'm considering having the readdir()
> equivalent for walking subvolumes return subvolume IDs, and then there'd
> be a separate call to open by ID.
> 
> Al's idea was to return open fds to child subvolumes, then userspace can
> get the path from /proc; that's also a possibility.
> 
> The key thing is that with subvolumes it's actually possible to do an
> open_by_id() call with correct security checks on pathwalking - because
> we don't have hardlinks so there's no ambiguity.
> 
> Or we might do it getdents() style and return the path directly.
> 
> But I think userspace is going to want to work with the volume
> identifiers directly, which is partly why I'm considering why other
> options might be cleaner.
> 
> Another thing to consider: where we're going with this is giving
> userspace a good efficient interrface for recursive tree traversal of
> subvolumes, but it might not be a bad idea to do that for mountpoints as
> well - similar problems, similar scalability issues that we might want
> to solve eventually.
> 

All of that's fine, but Darrick's question is about whether we should
ensure that these IDs are considered _opaque_. I think they should be.

We don't want to anyone to fall into the trap of trying to convey extra
info to userland about the volumes via this value. It should only be
good for uniquely identifying the volume.

We'll also need to document the scope of uniqueness. I assume we'll want
to define this as only being unique within a single filesystem? IOW, if
I have 2 bcachefs filesystems that are on independent devices, these
values may collide? Someone wanting to uniquely identify a subvolume on
a system will need to check both the st_dev and the st_vol, correct?


> > Should the XFS data and rt volumes be reported with different stx_vol
> > values?
> 
> Maybe?
>
Kent Overstreet March 9, 2024, 12:15 p.m. UTC | #8
On Sat, Mar 09, 2024 at 06:46:54AM -0500, Jeff Layton wrote:
> On Fri, 2024-03-08 at 12:13 -0500, Kent Overstreet wrote:
> > On Fri, Mar 08, 2024 at 08:56:33AM -0800, Darrick J. Wong wrote:
> > > On Fri, Mar 08, 2024 at 11:48:31AM -0500, Kent Overstreet wrote:
> > > > It's a new feature, not a bugfix, this should never get backported. And
> > > > I the bcachefs maintainer wrote the patch, and I'm submitting it to the
> > > > VFS maintainer, so if it's fine with him it's fine with me.
> > > 
> > > But then how am I supposed to bikeshed the structure of the V2 patchset
> > > by immediately asking you to recombine the patches and spit out a V3?
> > > 
> > > </sarcasm>
> > > 
> > > But, seriously, can you update the manpage too?
> > 
> > yeah, where's that at?
> > 
> 
>     https://git.kernel.org/pub/scm/docs/man-pages/man-pages.git
> 
> 
> > > Is stx_subvol a u64
> > > cookie where userspace mustn't try to read anything into its contents?
> > > Just like st_ino and st_dev are (supposed) to be?
> > 
> > Actually, that's up for debate. I'm considering having the readdir()
> > equivalent for walking subvolumes return subvolume IDs, and then there'd
> > be a separate call to open by ID.
> > 
> > Al's idea was to return open fds to child subvolumes, then userspace can
> > get the path from /proc; that's also a possibility.
> > 
> > The key thing is that with subvolumes it's actually possible to do an
> > open_by_id() call with correct security checks on pathwalking - because
> > we don't have hardlinks so there's no ambiguity.
> > 
> > Or we might do it getdents() style and return the path directly.
> > 
> > But I think userspace is going to want to work with the volume
> > identifiers directly, which is partly why I'm considering why other
> > options might be cleaner.
> > 
> > Another thing to consider: where we're going with this is giving
> > userspace a good efficient interrface for recursive tree traversal of
> > subvolumes, but it might not be a bad idea to do that for mountpoints as
> > well - similar problems, similar scalability issues that we might want
> > to solve eventually.
> > 
> 
> All of that's fine, but Darrick's question is about whether we should
> ensure that these IDs are considered _opaque_. I think they should be.
> 
> We don't want to anyone to fall into the trap of trying to convey extra
> info to userland about the volumes via this value. It should only be
> good for uniquely identifying the volume.
> 
> We'll also need to document the scope of uniqueness. I assume we'll want
> to define this as only being unique within a single filesystem? IOW, if
> I have 2 bcachefs filesystems that are on independent devices, these
> values may collide? Someone wanting to uniquely identify a subvolume on
> a system will need to check both the st_dev and the st_vol, correct?

they're small integers, not UUIDs, so yes
Dave Chinner March 11, 2024, 2:17 a.m. UTC | #9
On Fri, Mar 08, 2024 at 08:56:33AM -0800, Darrick J. Wong wrote:
> Should the XFS data and rt volumes be reported with different stx_vol
> values?

No, because all the inodes are on the data volume and the same inode
can have data on the data volume or the rt volume. i.e. "data on rt,
truncate, clear rt, copy data back into data dev".  It's still the
same inode, and may have exactly the same data, so why should change
stx_vol and make it appear to userspace as being a different inode?

-Dave.
Miklos Szeredi March 11, 2024, 5:30 a.m. UTC | #10
On Mon, 11 Mar 2024 at 03:17, Dave Chinner <david@fromorbit.com> wrote:
>
> On Fri, Mar 08, 2024 at 08:56:33AM -0800, Darrick J. Wong wrote:
> > Should the XFS data and rt volumes be reported with different stx_vol
> > values?
>
> No, because all the inodes are on the data volume and the same inode
> can have data on the data volume or the rt volume. i.e. "data on rt,
> truncate, clear rt, copy data back into data dev".  It's still the
> same inode, and may have exactly the same data, so why should change
> stx_vol and make it appear to userspace as being a different inode?

Because stx_vol must not be used by userspace to distinguish between
unique inodes.  To determine if two inodes are distinct within a
filesystem (which may have many volumes) it should query the file
handle and compare that.

If we'll have a filesystem that has a different stx_vol but the same
fh, all the better.

Thanks,
Miklos
Kent Overstreet March 11, 2024, 5:49 a.m. UTC | #11
On Mon, Mar 11, 2024 at 06:30:21AM +0100, Miklos Szeredi wrote:
> On Mon, 11 Mar 2024 at 03:17, Dave Chinner <david@fromorbit.com> wrote:
> >
> > On Fri, Mar 08, 2024 at 08:56:33AM -0800, Darrick J. Wong wrote:
> > > Should the XFS data and rt volumes be reported with different stx_vol
> > > values?
> >
> > No, because all the inodes are on the data volume and the same inode
> > can have data on the data volume or the rt volume. i.e. "data on rt,
> > truncate, clear rt, copy data back into data dev".  It's still the
> > same inode, and may have exactly the same data, so why should change
> > stx_vol and make it appear to userspace as being a different inode?
> 
> Because stx_vol must not be used by userspace to distinguish between
> unique inodes.  To determine if two inodes are distinct within a
> filesystem (which may have many volumes) it should query the file
> handle and compare that.
> 
> If we'll have a filesystem that has a different stx_vol but the same
> fh, all the better.

I agree that stx_vol should not be used for uniqueness testing, but
that's a non sequitar here; Dave's talking about the fact that volume
isn't a constatn for a given inode on XFS. And that's a good point;
volumes on XFS don't map to the filesystem path heirarchy in a nice
clean way like on btrfs and bcachefs (and presumably ZFS).

Subvolumes on btrfs and bcachefs form a tree, and that's something we
should document about stx_subvol - recursively enumerable things are
quite nice to work with.
Johannes Thumshirn March 11, 2024, 8:12 a.m. UTC | #12
On 08.03.24 03:29, Kent Overstreet wrote:
> Add a new statx field for (sub)volume identifiers, as implemented by
> btrfs and bcachefs.
> 
> This includes bcachefs support; we'll definitely want btrfs support as
> well.

For btrfs you can add the following:


 From 82343b7cb2a947bca43234c443b9c22339367f68 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Mon, 11 Mar 2024 09:09:36 +0100
Subject: [PATCH] btrfs: provide subvolume id for statx

Add the inode's subvolume id to the newly proposed statx subvol field.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
  fs/btrfs/inode.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 37701531eeb1..8cf692c708d7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8779,6 +8779,9 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
  	generic_fillattr(idmap, request_mask, inode, stat);
  	stat->dev = BTRFS_I(inode)->root->anon_dev;

+	stat->subvol = BTRFS_I(inode)->root->root_key.objectid;
+	stat->result_mask |= STATX_SUBVOL;
+
  	spin_lock(&BTRFS_I(inode)->lock);
  	delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
  	inode_bytes = inode_get_bytes(inode);
Christian Brauner March 11, 2024, 1:42 p.m. UTC | #13
On Fri, Mar 08, 2024 at 08:56:33AM -0800, Darrick J. Wong wrote:
> On Fri, Mar 08, 2024 at 11:48:31AM -0500, Kent Overstreet wrote:
> > On Fri, Mar 08, 2024 at 11:44:48AM -0500, Neal Gompa wrote:
> > > On Fri, Mar 8, 2024 at 11:34 AM Kent Overstreet
> > > <kent.overstreet@linux.dev> wrote:
> > > >
> > > > On Fri, Mar 08, 2024 at 06:42:27AM -0500, Neal Gompa wrote:
> > > > > On Thu, Mar 7, 2024 at 9:29 PM Kent Overstreet
> > > > > <kent.overstreet@linux.dev> wrote:
> > > > > >
> > > > > > Add a new statx field for (sub)volume identifiers, as implemented by
> > > > > > btrfs and bcachefs.
> > > > > >
> > > > > > This includes bcachefs support; we'll definitely want btrfs support as
> > > > > > well.
> > > > > >
> > > > > > Link: https://lore.kernel.org/linux-fsdevel/2uvhm6gweyl7iyyp2xpfryvcu2g3padagaeqcbiavjyiis6prl@yjm725bizncq/
> > > > > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > > > > > Cc: Josef Bacik <josef@toxicpanda.com>
> > > > > > Cc: Miklos Szeredi <mszeredi@redhat.com>
> > > > > > Cc: Christian Brauner <brauner@kernel.org>
> > > > > > Cc: David Howells <dhowells@redhat.com>
> > > > > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > > > > > ---
> > > > > >  fs/bcachefs/fs.c          | 3 +++
> > > > > >  fs/stat.c                 | 1 +
> > > > > >  include/linux/stat.h      | 1 +
> > > > > >  include/uapi/linux/stat.h | 4 +++-
> > > > > >  4 files changed, 8 insertions(+), 1 deletion(-)
> > > > > >
> > > > > > diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
> > > > > > index 3f073845bbd7..6a542ed43e2c 100644
> > > > > > --- a/fs/bcachefs/fs.c
> > > > > > +++ b/fs/bcachefs/fs.c
> > > > > > @@ -840,6 +840,9 @@ static int bch2_getattr(struct mnt_idmap *idmap,
> > > > > >         stat->blksize   = block_bytes(c);
> > > > > >         stat->blocks    = inode->v.i_blocks;
> > > > > >
> > > > > > +       stat->subvol    = inode->ei_subvol;
> > > > > > +       stat->result_mask |= STATX_SUBVOL;
> > > > > > +
> > > > > >         if (request_mask & STATX_BTIME) {
> > > > > >                 stat->result_mask |= STATX_BTIME;
> > > > > >                 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
> > > > > > diff --git a/fs/stat.c b/fs/stat.c
> > > > > > index 77cdc69eb422..70bd3e888cfa 100644
> > > > > > --- a/fs/stat.c
> > > > > > +++ b/fs/stat.c
> > > > > > @@ -658,6 +658,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
> > > > > >         tmp.stx_mnt_id = stat->mnt_id;
> > > > > >         tmp.stx_dio_mem_align = stat->dio_mem_align;
> > > > > >         tmp.stx_dio_offset_align = stat->dio_offset_align;
> > > > > > +       tmp.stx_subvol = stat->subvol;
> > > > > >
> > > > > >         return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
> > > > > >  }
> > > > > > diff --git a/include/linux/stat.h b/include/linux/stat.h
> > > > > > index 52150570d37a..bf92441dbad2 100644
> > > > > > --- a/include/linux/stat.h
> > > > > > +++ b/include/linux/stat.h
> > > > > > @@ -53,6 +53,7 @@ struct kstat {
> > > > > >         u32             dio_mem_align;
> > > > > >         u32             dio_offset_align;
> > > > > >         u64             change_cookie;
> > > > > > +       u64             subvol;
> > > > > >  };
> > > > > >
> > > > > >  /* These definitions are internal to the kernel for now. Mainly used by nfsd. */
> > > > > > diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
> > > > > > index 2f2ee82d5517..67626d535316 100644
> > > > > > --- a/include/uapi/linux/stat.h
> > > > > > +++ b/include/uapi/linux/stat.h
> > > > > > @@ -126,8 +126,9 @@ struct statx {
> > > > > >         __u64   stx_mnt_id;
> > > > > >         __u32   stx_dio_mem_align;      /* Memory buffer alignment for direct I/O */
> > > > > >         __u32   stx_dio_offset_align;   /* File offset alignment for direct I/O */
> > > > > > +       __u64   stx_subvol;     /* Subvolume identifier */
> > > > > >         /* 0xa0 */
> > > > > > -       __u64   __spare3[12];   /* Spare space for future expansion */
> > > > > > +       __u64   __spare3[11];   /* Spare space for future expansion */
> > > > > >         /* 0x100 */
> > > > > >  };
> > > > > >
> > > > > > @@ -155,6 +156,7 @@ struct statx {
> > > > > >  #define STATX_MNT_ID           0x00001000U     /* Got stx_mnt_id */
> > > > > >  #define STATX_DIOALIGN         0x00002000U     /* Want/got direct I/O alignment info */
> > > > > >  #define STATX_MNT_ID_UNIQUE    0x00004000U     /* Want/got extended stx_mount_id */
> > > > > > +#define STATX_SUBVOL           0x00008000U     /* Want/got stx_subvol */
> > > > > >
> > > > > >  #define STATX__RESERVED                0x80000000U     /* Reserved for future struct statx expansion */
> > > > > >
> > > > > > --
> > > > > > 2.43.0
> > > > > >
> > > > > >
> > > > >
> > > > > I think it's generally expected that patches that touch different
> > > > > layers are split up. That is, we should have a patch that adds the
> > > > > capability and a separate patch that enables it in bcachefs. This also
> > > > > helps make it clearer to others how a new feature should be plumbed
> > > > > into a filesystem.
> > > > >
> > > > > I would prefer it to be split up in this manner for this reason.
> > > >
> > > > I'll do it that way if the patch is big enough that it ought to be
> > > > split up. For something this small, seeing how it's used is relevant
> > > > context for both reviewers and people looking at it afterwards.
> > > >
> > > 
> > > It needs to also be split up because fs/ and fs/bcachefs are
> > > maintained differently. And while right now bcachefs is the only
> > > consumer of the API, btrfs will add it right after it's committed, and
> > > for people who are cherry-picking/backporting accordingly, having to
> > > chop out part of a patch would be unpleasant.
> > 
> > It's a new feature, not a bugfix, this should never get backported. And
> > I the bcachefs maintainer wrote the patch, and I'm submitting it to the
> > VFS maintainer, so if it's fine with him it's fine with me.
> 
> But then how am I supposed to bikeshed the structure of the V2 patchset
> by immediately asking you to recombine the patches and spit out a V3?
> 
> </sarcasm>

I see no reason to split this up. Especially given how small the patch
is. If there's a good reason such as meaningful merge conflicts then we
can always move to a stable branch for the infrastructure changes that
everyone else can pull from.

> 
> But, seriously, can you update the manpage too?  Is stx_subvol a u64
> cookie where userspace mustn't try to read anything into its contents?
> Just like st_ino and st_dev are (supposed) to be?

I was honestly hoping for a more elaborate patch description that would
have explained this. But I can just add this and yes, this is how I
conceptualized it and how we've always discussed it before.
Christian Brauner March 11, 2024, 1:43 p.m. UTC | #14
On Mon, Mar 11, 2024 at 08:12:33AM +0000, Johannes Thumshirn wrote:
> On 08.03.24 03:29, Kent Overstreet wrote:
> > Add a new statx field for (sub)volume identifiers, as implemented by
> > btrfs and bcachefs.
> > 
> > This includes bcachefs support; we'll definitely want btrfs support as
> > well.
> 
> For btrfs you can add the following:
> 
> 
>  From 82343b7cb2a947bca43234c443b9c22339367f68 Mon Sep 17 00:00:00 2001
> From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> Date: Mon, 11 Mar 2024 09:09:36 +0100
> Subject: [PATCH] btrfs: provide subvolume id for statx
> 
> Add the inode's subvolume id to the newly proposed statx subvol field.
> 
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---

Thanks, will fold, once I hear from Josef.
Kent Overstreet March 11, 2024, 8:15 p.m. UTC | #15
On Mon, Mar 11, 2024 at 02:43:11PM +0100, Christian Brauner wrote:
> On Mon, Mar 11, 2024 at 08:12:33AM +0000, Johannes Thumshirn wrote:
> > On 08.03.24 03:29, Kent Overstreet wrote:
> > > Add a new statx field for (sub)volume identifiers, as implemented by
> > > btrfs and bcachefs.
> > > 
> > > This includes bcachefs support; we'll definitely want btrfs support as
> > > well.
> > 
> > For btrfs you can add the following:
> > 
> > 
> >  From 82343b7cb2a947bca43234c443b9c22339367f68 Mon Sep 17 00:00:00 2001
> > From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> > Date: Mon, 11 Mar 2024 09:09:36 +0100
> > Subject: [PATCH] btrfs: provide subvolume id for statx
> > 
> > Add the inode's subvolume id to the newly proposed statx subvol field.
> > 
> > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> > ---
> 
> Thanks, will fold, once I hear from Josef.

Can we try to make 6.9? Need to know what to put in the manpage, and
I've got userspace tooling that wants to use it.
David Sterba March 11, 2024, 10:43 p.m. UTC | #16
On Mon, Mar 11, 2024 at 02:43:11PM +0100, Christian Brauner wrote:
> On Mon, Mar 11, 2024 at 08:12:33AM +0000, Johannes Thumshirn wrote:
> > On 08.03.24 03:29, Kent Overstreet wrote:
> > > Add a new statx field for (sub)volume identifiers, as implemented by
> > > btrfs and bcachefs.
> > > 
> > > This includes bcachefs support; we'll definitely want btrfs support as
> > > well.
> > 
> > For btrfs you can add the following:
> > 
> > 
> >  From 82343b7cb2a947bca43234c443b9c22339367f68 Mon Sep 17 00:00:00 2001
> > From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> > Date: Mon, 11 Mar 2024 09:09:36 +0100
> > Subject: [PATCH] btrfs: provide subvolume id for statx
> > 
> > Add the inode's subvolume id to the newly proposed statx subvol field.
> > 
> > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> > ---
> 
> Thanks, will fold, once I hear from Josef.

We're OK with it.
Eric Biggers March 12, 2024, 2:13 a.m. UTC | #17
On Thu, Mar 07, 2024 at 09:29:12PM -0500, Kent Overstreet wrote:
>  	__u32	stx_dio_mem_align;	/* Memory buffer alignment for direct I/O */
>  	__u32	stx_dio_offset_align;	/* File offset alignment for direct I/O */
> +	__u64	stx_subvol;	/* Subvolume identifier */
>  	/* 0xa0 */
> -	__u64	__spare3[12];	/* Spare space for future expansion */
> +	__u64	__spare3[11];	/* Spare space for future expansion */
>  	/* 0x100 */

The /* 0xa0 */ comment needs to be updated (or deleted).

- Eric
Christian Brauner March 12, 2024, 2:27 p.m. UTC | #18
On Mon, Mar 11, 2024 at 04:15:04PM -0400, Kent Overstreet wrote:
> On Mon, Mar 11, 2024 at 02:43:11PM +0100, Christian Brauner wrote:
> > On Mon, Mar 11, 2024 at 08:12:33AM +0000, Johannes Thumshirn wrote:
> > > On 08.03.24 03:29, Kent Overstreet wrote:
> > > > Add a new statx field for (sub)volume identifiers, as implemented by
> > > > btrfs and bcachefs.
> > > > 
> > > > This includes bcachefs support; we'll definitely want btrfs support as
> > > > well.
> > > 
> > > For btrfs you can add the following:
> > > 
> > > 
> > >  From 82343b7cb2a947bca43234c443b9c22339367f68 Mon Sep 17 00:00:00 2001
> > > From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> > > Date: Mon, 11 Mar 2024 09:09:36 +0100
> > > Subject: [PATCH] btrfs: provide subvolume id for statx
> > > 
> > > Add the inode's subvolume id to the newly proposed statx subvol field.
> > > 
> > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> > > ---
> > 
> > Thanks, will fold, once I hear from Josef.
> 
> Can we try to make 6.9? Need to know what to put in the manpage, and
> I've got userspace tooling that wants to use it.

Hm, I understand that you want to see this done but I think it's just
too tight. If this would've been Acked last week then we could've done
it the second week of the merge window. So my reaction is to wait for
v6.10. What do others think?
Christian Brauner March 12, 2024, 2:27 p.m. UTC | #19
On Mon, Mar 11, 2024 at 11:43:00PM +0100, David Sterba wrote:
> On Mon, Mar 11, 2024 at 02:43:11PM +0100, Christian Brauner wrote:
> > On Mon, Mar 11, 2024 at 08:12:33AM +0000, Johannes Thumshirn wrote:
> > > On 08.03.24 03:29, Kent Overstreet wrote:
> > > > Add a new statx field for (sub)volume identifiers, as implemented by
> > > > btrfs and bcachefs.
> > > > 
> > > > This includes bcachefs support; we'll definitely want btrfs support as
> > > > well.
> > > 
> > > For btrfs you can add the following:
> > > 
> > > 
> > >  From 82343b7cb2a947bca43234c443b9c22339367f68 Mon Sep 17 00:00:00 2001
> > > From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> > > Date: Mon, 11 Mar 2024 09:09:36 +0100
> > > Subject: [PATCH] btrfs: provide subvolume id for statx
> > > 
> > > Add the inode's subvolume id to the newly proposed statx subvol field.
> > > 
> > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> > > ---
> > 
> > Thanks, will fold, once I hear from Josef.
> 
> We're OK with it.

Thanks!
Neal Gompa March 12, 2024, 5:17 p.m. UTC | #20
On Tue, Mar 12, 2024 at 7:27 AM Christian Brauner <brauner@kernel.org> wrote:
>
> On Mon, Mar 11, 2024 at 11:43:00PM +0100, David Sterba wrote:
> > On Mon, Mar 11, 2024 at 02:43:11PM +0100, Christian Brauner wrote:
> > > On Mon, Mar 11, 2024 at 08:12:33AM +0000, Johannes Thumshirn wrote:
> > > > On 08.03.24 03:29, Kent Overstreet wrote:
> > > > > Add a new statx field for (sub)volume identifiers, as implemented by
> > > > > btrfs and bcachefs.
> > > > >
> > > > > This includes bcachefs support; we'll definitely want btrfs support as
> > > > > well.
> > > >
> > > > For btrfs you can add the following:
> > > >
> > > >
> > > >  From 82343b7cb2a947bca43234c443b9c22339367f68 Mon Sep 17 00:00:00 2001
> > > > From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> > > > Date: Mon, 11 Mar 2024 09:09:36 +0100
> > > > Subject: [PATCH] btrfs: provide subvolume id for statx
> > > >
> > > > Add the inode's subvolume id to the newly proposed statx subvol field.
> > > >
> > > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> > > > ---
> > >
> > > Thanks, will fold, once I hear from Josef.
> >
> > We're OK with it.
>
> Thanks!


Well, I guess I'm fine with the whole thing then if everyone else is.

Reviewed-by: Neal Gompa <neal@gompa.dev>



--
真実はいつも一つ!/ Always, there's only one truth!
John Garry May 28, 2024, 12:46 p.m. UTC | #21
On 12/03/2024 02:13, Eric Biggers wrote:
> On Thu, Mar 07, 2024 at 09:29:12PM -0500, Kent Overstreet wrote:
>>   	__u32	stx_dio_mem_align;	/* Memory buffer alignment for direct I/O */
>>   	__u32	stx_dio_offset_align;	/* File offset alignment for direct I/O */
>> +	__u64	stx_subvol;	/* Subvolume identifier */
>>   	/* 0xa0 */
>> -	__u64	__spare3[12];	/* Spare space for future expansion */
>> +	__u64	__spare3[11];	/* Spare space for future expansion */
>>   	/* 0x100 */
> The /* 0xa0 */ comment needs to be updated (or deleted).

I would tend to agree. Was this intentionally not updated (or deleted)?
Andreas Dilger May 28, 2024, 10:11 p.m. UTC | #22
> On May 28, 2024, at 6:46 AM, John Garry <john.g.garry@oracle.com> wrote:
> 
> On 12/03/2024 02:13, Eric Biggers wrote:
>> On Thu, Mar 07, 2024 at 09:29:12PM -0500, Kent Overstreet wrote:
>>>  	__u32	stx_dio_mem_align;	/* Memory buffer alignment for direct I/O */
>>>  	__u32	stx_dio_offset_align;	/* File offset alignment for direct I/O */
>>> +	__u64	stx_subvol;	/* Subvolume identifier */
>>>  	/* 0xa0 */
>>> -	__u64	__spare3[12];	/* Spare space for future expansion */
>>> +	__u64	__spare3[11];	/* Spare space for future expansion */
>>>  	/* 0x100 */
>> The /* 0xa0 */ comment needs to be updated (or deleted).
> 
> I would tend to agree. Was this intentionally not updated (or deleted)?

More correct would be to add the new stx_subvol field after the "0xa0"
comment so that it is clear at what offset in the struct this field is.

Cheers, Andreas
diff mbox series

Patch

diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 3f073845bbd7..6a542ed43e2c 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -840,6 +840,9 @@  static int bch2_getattr(struct mnt_idmap *idmap,
 	stat->blksize	= block_bytes(c);
 	stat->blocks	= inode->v.i_blocks;
 
+	stat->subvol	= inode->ei_subvol;
+	stat->result_mask |= STATX_SUBVOL;
+
 	if (request_mask & STATX_BTIME) {
 		stat->result_mask |= STATX_BTIME;
 		stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
diff --git a/fs/stat.c b/fs/stat.c
index 77cdc69eb422..70bd3e888cfa 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -658,6 +658,7 @@  cp_statx(const struct kstat *stat, struct statx __user *buffer)
 	tmp.stx_mnt_id = stat->mnt_id;
 	tmp.stx_dio_mem_align = stat->dio_mem_align;
 	tmp.stx_dio_offset_align = stat->dio_offset_align;
+	tmp.stx_subvol = stat->subvol;
 
 	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
 }
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 52150570d37a..bf92441dbad2 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -53,6 +53,7 @@  struct kstat {
 	u32		dio_mem_align;
 	u32		dio_offset_align;
 	u64		change_cookie;
+	u64		subvol;
 };
 
 /* These definitions are internal to the kernel for now. Mainly used by nfsd. */
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 2f2ee82d5517..67626d535316 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -126,8 +126,9 @@  struct statx {
 	__u64	stx_mnt_id;
 	__u32	stx_dio_mem_align;	/* Memory buffer alignment for direct I/O */
 	__u32	stx_dio_offset_align;	/* File offset alignment for direct I/O */
+	__u64	stx_subvol;	/* Subvolume identifier */
 	/* 0xa0 */
-	__u64	__spare3[12];	/* Spare space for future expansion */
+	__u64	__spare3[11];	/* Spare space for future expansion */
 	/* 0x100 */
 };
 
@@ -155,6 +156,7 @@  struct statx {
 #define STATX_MNT_ID		0x00001000U	/* Got stx_mnt_id */
 #define STATX_DIOALIGN		0x00002000U	/* Want/got direct I/O alignment info */
 #define STATX_MNT_ID_UNIQUE	0x00004000U	/* Want/got extended stx_mount_id */
+#define STATX_SUBVOL		0x00008000U	/* Want/got stx_subvol */
 
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */