diff mbox series

[v8,10/19] fanotify: introduce FAN_PRE_ACCESS permission event

Message ID b80986f8d5b860acea2c9a73c0acd93587be5fe4.1731684329.git.josef@toxicpanda.com (mailing list archive)
State New
Headers show
Series fanotify: add pre-content hooks | expand

Commit Message

Josef Bacik Nov. 15, 2024, 3:30 p.m. UTC
From: Amir Goldstein <amir73il@gmail.com>

Similar to FAN_ACCESS_PERM permission event, but it is only allowed with
class FAN_CLASS_PRE_CONTENT and only allowed on regular files and dirs.

Unlike FAN_ACCESS_PERM, it is safe to write to the file being accessed
in the context of the event handler.

This pre-content event is meant to be used by hierarchical storage
managers that want to fill the content of files on first read access.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/notify/fanotify/fanotify.c      |  3 ++-
 fs/notify/fanotify/fanotify_user.c | 22 +++++++++++++++++++---
 include/linux/fanotify.h           | 14 ++++++++++----
 include/uapi/linux/fanotify.h      |  2 ++
 4 files changed, 33 insertions(+), 8 deletions(-)

Comments

Amir Goldstein Nov. 15, 2024, 3:59 p.m. UTC | #1
On Fri, Nov 15, 2024 at 4:31 PM Josef Bacik <josef@toxicpanda.com> wrote:
>
> From: Amir Goldstein <amir73il@gmail.com>
>
> Similar to FAN_ACCESS_PERM permission event, but it is only allowed with
> class FAN_CLASS_PRE_CONTENT and only allowed on regular files and dirs.
>
> Unlike FAN_ACCESS_PERM, it is safe to write to the file being accessed
> in the context of the event handler.
>
> This pre-content event is meant to be used by hierarchical storage
> managers that want to fill the content of files on first read access.
>
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/notify/fanotify/fanotify.c      |  3 ++-
>  fs/notify/fanotify/fanotify_user.c | 22 +++++++++++++++++++---
>  include/linux/fanotify.h           | 14 ++++++++++----
>  include/uapi/linux/fanotify.h      |  2 ++
>  4 files changed, 33 insertions(+), 8 deletions(-)
>
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index 2e6ba94ec405..da6c3c1c7edf 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -916,8 +916,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
>         BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
>         BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
>         BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
> +       BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);
>
> -       BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21);
> +       BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22);
>
>         mask = fanotify_group_event_mask(group, iter_info, &match_mask,
>                                          mask, data, data_type, dir);
> diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> index 456cc3e92c88..5ea447e9e5a8 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -1640,11 +1640,23 @@ static int fanotify_events_supported(struct fsnotify_group *group,
>                                      unsigned int flags)
>  {
>         unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
> +       bool is_dir = d_is_dir(path->dentry);
>         /* Strict validation of events in non-dir inode mask with v5.17+ APIs */
>         bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
>                                  (mask & FAN_RENAME) ||
>                                  (flags & FAN_MARK_IGNORE);
>
> +       /*
> +        * Filesystems need to opt-into pre-content evnets (a.k.a HSM)
> +        * and they are only supported on regular files and directories.
> +        */
> +       if (mask & FANOTIFY_PRE_CONTENT_EVENTS) {
> +               if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM))
> +                       return -EINVAL;

You missed my latest push of this change.
no worries, for final version want:

                return -EOPNOTSUPP;

> +               if (!is_dir && !d_is_reg(path->dentry))
> +                       return -EINVAL;
> +       }
> +
>         /*
>          * Some filesystems such as 'proc' acquire unusual locks when opening
>          * files. For them fanotify permission events have high chances of
> @@ -1677,7 +1689,7 @@ static int fanotify_events_supported(struct fsnotify_group *group,
>          * but because we always allowed it, error only when using new APIs.
>          */
>         if (strict_dir_events && mark_type == FAN_MARK_INODE &&
> -           !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
> +           !is_dir && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
>                 return -ENOTDIR;
>
>         return 0;
> @@ -1778,10 +1790,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
>                 return -EPERM;
>
>         /*
> -        * Permission events require minimum priority FAN_CLASS_CONTENT.
> +        * Permission events are not allowed for FAN_CLASS_NOTIF.
> +        * Pre-content permission events are not allowed for FAN_CLASS_CONTENT.
>          */
>         if (mask & FANOTIFY_PERM_EVENTS &&
> -           group->priority < FSNOTIFY_PRIO_CONTENT)
> +           group->priority == FSNOTIFY_PRIO_NORMAL)
> +               return -EINVAL;
> +       else if (mask & FANOTIFY_PRE_CONTENT_EVENTS &&
> +                group->priority == FSNOTIFY_PRIO_CONTENT)
>                 return -EINVAL;
>
>         if (mask & FAN_FS_ERROR &&
> diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
> index 89ff45bd6f01..c747af064d2c 100644
> --- a/include/linux/fanotify.h
> +++ b/include/linux/fanotify.h
> @@ -89,6 +89,16 @@
>  #define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \
>                                  FAN_RENAME)
>
> +/* Content events can be used to inspect file content */
> +#define FANOTIFY_CONTENT_PERM_EVENTS (FAN_OPEN_PERM | FAN_OPEN_EXEC_PERM | \
> +                                     FAN_ACCESS_PERM)
> +/* Pre-content events can be used to fill file content */
> +#define FANOTIFY_PRE_CONTENT_EVENTS  (FAN_PRE_ACCESS)
> +
> +/* Events that require a permission response from user */
> +#define FANOTIFY_PERM_EVENTS   (FANOTIFY_CONTENT_PERM_EVENTS | \
> +                                FANOTIFY_PRE_CONTENT_EVENTS)
> +
>  /* Events that can be reported with event->fd */
>  #define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS)
>
> @@ -104,10 +114,6 @@
>                                  FANOTIFY_INODE_EVENTS | \
>                                  FANOTIFY_ERROR_EVENTS)
>
> -/* Events that require a permission response from user */
> -#define FANOTIFY_PERM_EVENTS   (FAN_OPEN_PERM | FAN_ACCESS_PERM | \
> -                                FAN_OPEN_EXEC_PERM)
> -
>  /* Extra flags that may be reported with event or control handling of events */
>  #define FANOTIFY_EVENT_FLAGS   (FAN_EVENT_ON_CHILD | FAN_ONDIR)
>
> diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
> index 79072b6894f2..7596168c80eb 100644
> --- a/include/uapi/linux/fanotify.h
> +++ b/include/uapi/linux/fanotify.h
> @@ -27,6 +27,8 @@
>  #define FAN_OPEN_EXEC_PERM     0x00040000      /* File open/exec in perm check */
>  /* #define FAN_DIR_MODIFY      0x00080000 */   /* Deprecated (reserved) */
>
> +#define FAN_PRE_ACCESS         0x00100000      /* Pre-content access hook */
> +
>  #define FAN_EVENT_ON_CHILD     0x08000000      /* Interested in child events */
>
>  #define FAN_RENAME             0x10000000      /* File was renamed */
> --
> 2.43.0
>
Jan Kara Nov. 21, 2024, 10:44 a.m. UTC | #2
On Fri 15-11-24 10:30:23, Josef Bacik wrote:
> From: Amir Goldstein <amir73il@gmail.com>
> 
> Similar to FAN_ACCESS_PERM permission event, but it is only allowed with
> class FAN_CLASS_PRE_CONTENT and only allowed on regular files and dirs.
> 
> Unlike FAN_ACCESS_PERM, it is safe to write to the file being accessed
> in the context of the event handler.
> 
> This pre-content event is meant to be used by hierarchical storage
> managers that want to fill the content of files on first read access.
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>

Here I was wondering about one thing:

> +	/*
> +	 * Filesystems need to opt-into pre-content evnets (a.k.a HSM)
> +	 * and they are only supported on regular files and directories.
> +	 */
> +	if (mask & FANOTIFY_PRE_CONTENT_EVENTS) {
> +		if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM))
> +			return -EINVAL;
> +		if (!is_dir && !d_is_reg(path->dentry))
> +			return -EINVAL;
> +	}

AFAICS, currently no pre-content events are generated for directories. So
perhaps we should refuse directories here as well for now? I'd like to
avoid the mistake of original fanotify which had some events available on
directories but they did nothing and then you have to ponder hard whether
you're going to break userspace if you actually start emitting them...

								Honza
Amir Goldstein Nov. 21, 2024, 2:18 p.m. UTC | #3
On Thu, Nov 21, 2024 at 11:44 AM Jan Kara <jack@suse.cz> wrote:
>
> On Fri 15-11-24 10:30:23, Josef Bacik wrote:
> > From: Amir Goldstein <amir73il@gmail.com>
> >
> > Similar to FAN_ACCESS_PERM permission event, but it is only allowed with
> > class FAN_CLASS_PRE_CONTENT and only allowed on regular files and dirs.
> >
> > Unlike FAN_ACCESS_PERM, it is safe to write to the file being accessed
> > in the context of the event handler.
> >
> > This pre-content event is meant to be used by hierarchical storage
> > managers that want to fill the content of files on first read access.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>
> Here I was wondering about one thing:
>
> > +     /*
> > +      * Filesystems need to opt-into pre-content evnets (a.k.a HSM)
> > +      * and they are only supported on regular files and directories.
> > +      */
> > +     if (mask & FANOTIFY_PRE_CONTENT_EVENTS) {
> > +             if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM))
> > +                     return -EINVAL;
> > +             if (!is_dir && !d_is_reg(path->dentry))
> > +                     return -EINVAL;
> > +     }
>
> AFAICS, currently no pre-content events are generated for directories. So
> perhaps we should refuse directories here as well for now? I'd like to

readdir() does emit PRE_ACCESS (without a range) and also always
emitted ACCESS_PERM. my POC is using that PRE_ACCESS to populate
directories on-demand, although the functionality is incomplete without the
"populate on lookup" event.

> avoid the mistake of original fanotify which had some events available on
> directories but they did nothing and then you have to ponder hard whether
> you're going to break userspace if you actually start emitting them...

But in any case, the FAN_ONDIR built-in filter is applicable to PRE_ACCESS.

Thanks,
Amir.
Jan Kara Nov. 21, 2024, 4:36 p.m. UTC | #4
On Thu 21-11-24 15:18:36, Amir Goldstein wrote:
> On Thu, Nov 21, 2024 at 11:44 AM Jan Kara <jack@suse.cz> wrote:
> >
> > On Fri 15-11-24 10:30:23, Josef Bacik wrote:
> > > From: Amir Goldstein <amir73il@gmail.com>
> > >
> > > Similar to FAN_ACCESS_PERM permission event, but it is only allowed with
> > > class FAN_CLASS_PRE_CONTENT and only allowed on regular files and dirs.
> > >
> > > Unlike FAN_ACCESS_PERM, it is safe to write to the file being accessed
> > > in the context of the event handler.
> > >
> > > This pre-content event is meant to be used by hierarchical storage
> > > managers that want to fill the content of files on first read access.
> > >
> > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> >
> > Here I was wondering about one thing:
> >
> > > +     /*
> > > +      * Filesystems need to opt-into pre-content evnets (a.k.a HSM)
> > > +      * and they are only supported on regular files and directories.
> > > +      */
> > > +     if (mask & FANOTIFY_PRE_CONTENT_EVENTS) {
> > > +             if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM))
> > > +                     return -EINVAL;
> > > +             if (!is_dir && !d_is_reg(path->dentry))
> > > +                     return -EINVAL;
> > > +     }
> >
> > AFAICS, currently no pre-content events are generated for directories. So
> > perhaps we should refuse directories here as well for now? I'd like to
> 
> readdir() does emit PRE_ACCESS (without a range)

Ah, right.

> and also always emitted ACCESS_PERM.

I know that and it's one of those mostly useless events AFAICT.

> my POC is using that PRE_ACCESS to populate
> directories on-demand, although the functionality is incomplete without the
> "populate on lookup" event.

Exactly. Without "populate on lookup" doing "populate on readdir" is ok for
a demo but not really usable in practice because you can get spurious
ENOENT from a lookup.

> > avoid the mistake of original fanotify which had some events available on
> > directories but they did nothing and then you have to ponder hard whether
> > you're going to break userspace if you actually start emitting them...
> 
> But in any case, the FAN_ONDIR built-in filter is applicable to PRE_ACCESS.

Well, I'm not so concerned about filtering out uninteresting events. I'm
more concerned about emitting the event now and figuring out later that we
need to emit it in different places or with some other info when actual
production users appear.

But I've realized we must allow pre-content marks to be placed on dirs so
that such marks can be placed on parents watching children. What we'd need
to forbid is a combination of FAN_ONDIR and FAN_PRE_ACCESS, wouldn't we?

								Honza
Amir Goldstein Nov. 21, 2024, 6:31 p.m. UTC | #5
On Thu, Nov 21, 2024 at 5:36 PM Jan Kara <jack@suse.cz> wrote:
>
> On Thu 21-11-24 15:18:36, Amir Goldstein wrote:
> > On Thu, Nov 21, 2024 at 11:44 AM Jan Kara <jack@suse.cz> wrote:
> > >
> > > On Fri 15-11-24 10:30:23, Josef Bacik wrote:
> > > > From: Amir Goldstein <amir73il@gmail.com>
> > > >
> > > > Similar to FAN_ACCESS_PERM permission event, but it is only allowed with
> > > > class FAN_CLASS_PRE_CONTENT and only allowed on regular files and dirs.
> > > >
> > > > Unlike FAN_ACCESS_PERM, it is safe to write to the file being accessed
> > > > in the context of the event handler.
> > > >
> > > > This pre-content event is meant to be used by hierarchical storage
> > > > managers that want to fill the content of files on first read access.
> > > >
> > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > >
> > > Here I was wondering about one thing:
> > >
> > > > +     /*
> > > > +      * Filesystems need to opt-into pre-content evnets (a.k.a HSM)
> > > > +      * and they are only supported on regular files and directories.
> > > > +      */
> > > > +     if (mask & FANOTIFY_PRE_CONTENT_EVENTS) {
> > > > +             if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM))
> > > > +                     return -EINVAL;
> > > > +             if (!is_dir && !d_is_reg(path->dentry))
> > > > +                     return -EINVAL;
> > > > +     }
> > >
> > > AFAICS, currently no pre-content events are generated for directories. So
> > > perhaps we should refuse directories here as well for now? I'd like to
> >
> > readdir() does emit PRE_ACCESS (without a range)
>
> Ah, right.
>
> > and also always emitted ACCESS_PERM.
>
> I know that and it's one of those mostly useless events AFAICT.
>
> > my POC is using that PRE_ACCESS to populate
> > directories on-demand, although the functionality is incomplete without the
> > "populate on lookup" event.
>
> Exactly. Without "populate on lookup" doing "populate on readdir" is ok for
> a demo but not really usable in practice because you can get spurious
> ENOENT from a lookup.
>
> > > avoid the mistake of original fanotify which had some events available on
> > > directories but they did nothing and then you have to ponder hard whether
> > > you're going to break userspace if you actually start emitting them...
> >
> > But in any case, the FAN_ONDIR built-in filter is applicable to PRE_ACCESS.
>
> Well, I'm not so concerned about filtering out uninteresting events. I'm
> more concerned about emitting the event now and figuring out later that we
> need to emit it in different places or with some other info when actual
> production users appear.
>
> But I've realized we must allow pre-content marks to be placed on dirs so
> that such marks can be placed on parents watching children. What we'd need
> to forbid is a combination of FAN_ONDIR and FAN_PRE_ACCESS, wouldn't we?

Yes, I think that can work well for now.

Thanks,
Amir.
Amir Goldstein Nov. 21, 2024, 6:37 p.m. UTC | #6
On Thu, Nov 21, 2024 at 7:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Thu, Nov 21, 2024 at 5:36 PM Jan Kara <jack@suse.cz> wrote:
> >
> > On Thu 21-11-24 15:18:36, Amir Goldstein wrote:
> > > On Thu, Nov 21, 2024 at 11:44 AM Jan Kara <jack@suse.cz> wrote:
> > > >
> > > > On Fri 15-11-24 10:30:23, Josef Bacik wrote:
> > > > > From: Amir Goldstein <amir73il@gmail.com>
> > > > >
> > > > > Similar to FAN_ACCESS_PERM permission event, but it is only allowed with
> > > > > class FAN_CLASS_PRE_CONTENT and only allowed on regular files and dirs.
> > > > >
> > > > > Unlike FAN_ACCESS_PERM, it is safe to write to the file being accessed
> > > > > in the context of the event handler.
> > > > >
> > > > > This pre-content event is meant to be used by hierarchical storage
> > > > > managers that want to fill the content of files on first read access.
> > > > >
> > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > >
> > > > Here I was wondering about one thing:
> > > >
> > > > > +     /*
> > > > > +      * Filesystems need to opt-into pre-content evnets (a.k.a HSM)
> > > > > +      * and they are only supported on regular files and directories.
> > > > > +      */
> > > > > +     if (mask & FANOTIFY_PRE_CONTENT_EVENTS) {
> > > > > +             if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM))
> > > > > +                     return -EINVAL;
> > > > > +             if (!is_dir && !d_is_reg(path->dentry))
> > > > > +                     return -EINVAL;
> > > > > +     }
> > > >
> > > > AFAICS, currently no pre-content events are generated for directories. So
> > > > perhaps we should refuse directories here as well for now? I'd like to
> > >
> > > readdir() does emit PRE_ACCESS (without a range)
> >
> > Ah, right.
> >
> > > and also always emitted ACCESS_PERM.
> >
> > I know that and it's one of those mostly useless events AFAICT.
> >
> > > my POC is using that PRE_ACCESS to populate
> > > directories on-demand, although the functionality is incomplete without the
> > > "populate on lookup" event.
> >
> > Exactly. Without "populate on lookup" doing "populate on readdir" is ok for
> > a demo but not really usable in practice because you can get spurious
> > ENOENT from a lookup.
> >
> > > > avoid the mistake of original fanotify which had some events available on
> > > > directories but they did nothing and then you have to ponder hard whether
> > > > you're going to break userspace if you actually start emitting them...
> > >
> > > But in any case, the FAN_ONDIR built-in filter is applicable to PRE_ACCESS.
> >
> > Well, I'm not so concerned about filtering out uninteresting events. I'm
> > more concerned about emitting the event now and figuring out later that we
> > need to emit it in different places or with some other info when actual
> > production users appear.
> >
> > But I've realized we must allow pre-content marks to be placed on dirs so
> > that such marks can be placed on parents watching children. What we'd need
> > to forbid is a combination of FAN_ONDIR and FAN_PRE_ACCESS, wouldn't we?
>
> Yes, I think that can work well for now.
>

Only it does not require only check at API time that both flags are not
set, because FAN_ONDIR can be set earlier and then FAN_PRE_ACCESS
can be added later and vice versa, so need to do this in
fanotify_may_update_existing_mark() AFAICT.

Thanks,
Amir.
Jan Kara Nov. 22, 2024, 12:42 p.m. UTC | #7
On Thu 21-11-24 19:37:43, Amir Goldstein wrote:
> On Thu, Nov 21, 2024 at 7:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > On Thu, Nov 21, 2024 at 5:36 PM Jan Kara <jack@suse.cz> wrote:
> > > On Thu 21-11-24 15:18:36, Amir Goldstein wrote:
> > > > On Thu, Nov 21, 2024 at 11:44 AM Jan Kara <jack@suse.cz> wrote:
> > > > and also always emitted ACCESS_PERM.
> > >
> > > I know that and it's one of those mostly useless events AFAICT.
> > >
> > > > my POC is using that PRE_ACCESS to populate
> > > > directories on-demand, although the functionality is incomplete without the
> > > > "populate on lookup" event.
> > >
> > > Exactly. Without "populate on lookup" doing "populate on readdir" is ok for
> > > a demo but not really usable in practice because you can get spurious
> > > ENOENT from a lookup.
> > >
> > > > > avoid the mistake of original fanotify which had some events available on
> > > > > directories but they did nothing and then you have to ponder hard whether
> > > > > you're going to break userspace if you actually start emitting them...
> > > >
> > > > But in any case, the FAN_ONDIR built-in filter is applicable to PRE_ACCESS.
> > >
> > > Well, I'm not so concerned about filtering out uninteresting events. I'm
> > > more concerned about emitting the event now and figuring out later that we
> > > need to emit it in different places or with some other info when actual
> > > production users appear.
> > >
> > > But I've realized we must allow pre-content marks to be placed on dirs so
> > > that such marks can be placed on parents watching children. What we'd need
> > > to forbid is a combination of FAN_ONDIR and FAN_PRE_ACCESS, wouldn't we?
> >
> > Yes, I think that can work well for now.
> >
> 
> Only it does not require only check at API time that both flags are not
> set, because FAN_ONDIR can be set earlier and then FAN_PRE_ACCESS
> can be added later and vice versa, so need to do this in
> fanotify_may_update_existing_mark() AFAICT.

I have now something like:

@@ -1356,7 +1356,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
 }
 
 static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
-                                             unsigned int fan_flags)
+                                            __u32 mask, unsigned int fan_flags)
 {
        /*
         * Non evictable mark cannot be downgraded to evictable mark.
@@ -1383,6 +1383,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
            fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
                return -EEXIST;
 
+       /* For now pre-content events are not generated for directories */
+       mask |= fsn_mark->mask;
+       if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
+               return -EEXIST;
+
        return 0;
 }
 
So far only compile tested...

								Honza
Amir Goldstein Nov. 22, 2024, 1:51 p.m. UTC | #8
On Fri, Nov 22, 2024 at 1:42 PM Jan Kara <jack@suse.cz> wrote:
>
> On Thu 21-11-24 19:37:43, Amir Goldstein wrote:
> > On Thu, Nov 21, 2024 at 7:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > > On Thu, Nov 21, 2024 at 5:36 PM Jan Kara <jack@suse.cz> wrote:
> > > > On Thu 21-11-24 15:18:36, Amir Goldstein wrote:
> > > > > On Thu, Nov 21, 2024 at 11:44 AM Jan Kara <jack@suse.cz> wrote:
> > > > > and also always emitted ACCESS_PERM.
> > > >
> > > > I know that and it's one of those mostly useless events AFAICT.
> > > >
> > > > > my POC is using that PRE_ACCESS to populate
> > > > > directories on-demand, although the functionality is incomplete without the
> > > > > "populate on lookup" event.
> > > >
> > > > Exactly. Without "populate on lookup" doing "populate on readdir" is ok for
> > > > a demo but not really usable in practice because you can get spurious
> > > > ENOENT from a lookup.
> > > >
> > > > > > avoid the mistake of original fanotify which had some events available on
> > > > > > directories but they did nothing and then you have to ponder hard whether
> > > > > > you're going to break userspace if you actually start emitting them...
> > > > >
> > > > > But in any case, the FAN_ONDIR built-in filter is applicable to PRE_ACCESS.
> > > >
> > > > Well, I'm not so concerned about filtering out uninteresting events. I'm
> > > > more concerned about emitting the event now and figuring out later that we
> > > > need to emit it in different places or with some other info when actual
> > > > production users appear.
> > > >
> > > > But I've realized we must allow pre-content marks to be placed on dirs so
> > > > that such marks can be placed on parents watching children. What we'd need
> > > > to forbid is a combination of FAN_ONDIR and FAN_PRE_ACCESS, wouldn't we?
> > >
> > > Yes, I think that can work well for now.
> > >
> >
> > Only it does not require only check at API time that both flags are not
> > set, because FAN_ONDIR can be set earlier and then FAN_PRE_ACCESS
> > can be added later and vice versa, so need to do this in
> > fanotify_may_update_existing_mark() AFAICT.
>
> I have now something like:
>
> @@ -1356,7 +1356,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
>  }
>
>  static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
> -                                             unsigned int fan_flags)
> +                                            __u32 mask, unsigned int fan_flags)
>  {
>         /*
>          * Non evictable mark cannot be downgraded to evictable mark.
> @@ -1383,6 +1383,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
>             fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
>                 return -EEXIST;
>
> +       /* For now pre-content events are not generated for directories */
> +       mask |= fsn_mark->mask;
> +       if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
> +               return -EEXIST;
> +

EEXIST is going to be confusing if there was never any mark.
Either return -EINVAL here or also check this condition on the added mask
itself before calling fanotify_add_mark() and return -EINVAL there.

I prefer two distinct errors, but probably one is also good enough.

Thanks,
Amir.
Jan Kara Nov. 27, 2024, 12:18 p.m. UTC | #9
On Fri 22-11-24 14:51:23, Amir Goldstein wrote:
> On Fri, Nov 22, 2024 at 1:42 PM Jan Kara <jack@suse.cz> wrote:
> >
> > On Thu 21-11-24 19:37:43, Amir Goldstein wrote:
> > > On Thu, Nov 21, 2024 at 7:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > > > On Thu, Nov 21, 2024 at 5:36 PM Jan Kara <jack@suse.cz> wrote:
> > > > > On Thu 21-11-24 15:18:36, Amir Goldstein wrote:
> > > > > > On Thu, Nov 21, 2024 at 11:44 AM Jan Kara <jack@suse.cz> wrote:
> > > > > > and also always emitted ACCESS_PERM.
> > > > >
> > > > > I know that and it's one of those mostly useless events AFAICT.
> > > > >
> > > > > > my POC is using that PRE_ACCESS to populate
> > > > > > directories on-demand, although the functionality is incomplete without the
> > > > > > "populate on lookup" event.
> > > > >
> > > > > Exactly. Without "populate on lookup" doing "populate on readdir" is ok for
> > > > > a demo but not really usable in practice because you can get spurious
> > > > > ENOENT from a lookup.
> > > > >
> > > > > > > avoid the mistake of original fanotify which had some events available on
> > > > > > > directories but they did nothing and then you have to ponder hard whether
> > > > > > > you're going to break userspace if you actually start emitting them...
> > > > > >
> > > > > > But in any case, the FAN_ONDIR built-in filter is applicable to PRE_ACCESS.
> > > > >
> > > > > Well, I'm not so concerned about filtering out uninteresting events. I'm
> > > > > more concerned about emitting the event now and figuring out later that we
> > > > > need to emit it in different places or with some other info when actual
> > > > > production users appear.
> > > > >
> > > > > But I've realized we must allow pre-content marks to be placed on dirs so
> > > > > that such marks can be placed on parents watching children. What we'd need
> > > > > to forbid is a combination of FAN_ONDIR and FAN_PRE_ACCESS, wouldn't we?
> > > >
> > > > Yes, I think that can work well for now.
> > > >
> > >
> > > Only it does not require only check at API time that both flags are not
> > > set, because FAN_ONDIR can be set earlier and then FAN_PRE_ACCESS
> > > can be added later and vice versa, so need to do this in
> > > fanotify_may_update_existing_mark() AFAICT.
> >
> > I have now something like:
> >
> > @@ -1356,7 +1356,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
> >  }
> >
> >  static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
> > -                                             unsigned int fan_flags)
> > +                                            __u32 mask, unsigned int fan_flags)
> >  {
> >         /*
> >          * Non evictable mark cannot be downgraded to evictable mark.
> > @@ -1383,6 +1383,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
> >             fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
> >                 return -EEXIST;
> >
> > +       /* For now pre-content events are not generated for directories */
> > +       mask |= fsn_mark->mask;
> > +       if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
> > +               return -EEXIST;
> > +
> 
> EEXIST is going to be confusing if there was never any mark.
> Either return -EINVAL here or also check this condition on the added mask
> itself before calling fanotify_add_mark() and return -EINVAL there.
> 
> I prefer two distinct errors, but probably one is also good enough.

That's actually a good point. My previous change allowed setting
FAN_PRE_ACCESS | FAN_ONDIR on a new mark because that doesn't get to
fanotify_may_update_existing_mark(). So I now have:

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 0919ea735f4a..38a46865408e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1356,7 +1356,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
 }
 
 static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
-					      unsigned int fan_flags)
+					     __u32 mask, unsigned int fan_flags)
 {
 	/*
 	 * Non evictable mark cannot be downgraded to evictable mark.
@@ -1383,6 +1383,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
 	    fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
 		return -EEXIST;
 
+	/* For now pre-content events are not generated for directories */
+	mask |= fsn_mark->mask;
+	if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
+		return -EEXIST;
+
 	return 0;
 }
 
@@ -1409,7 +1414,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 	/*
 	 * Check if requested mark flags conflict with an existing mark flags.
 	 */
-	ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags);
+	ret = fanotify_may_update_existing_mark(fsn_mark, mask, fan_flags);
 	if (ret)
 		goto out;
 
@@ -1905,6 +1910,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME))
 		goto fput_and_out;
 
+	/* Pre-content events are not currently generated for directories. */
+	if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
+		goto fput_and_out;
+
 	if (mark_cmd == FAN_MARK_FLUSH) {
 		ret = 0;
 		if (mark_type == FAN_MARK_MOUNT)
Amir Goldstein Nov. 27, 2024, 12:20 p.m. UTC | #10
On Wed, Nov 27, 2024 at 1:18 PM Jan Kara <jack@suse.cz> wrote:
>
> On Fri 22-11-24 14:51:23, Amir Goldstein wrote:
> > On Fri, Nov 22, 2024 at 1:42 PM Jan Kara <jack@suse.cz> wrote:
> > >
> > > On Thu 21-11-24 19:37:43, Amir Goldstein wrote:
> > > > On Thu, Nov 21, 2024 at 7:31 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > > > > On Thu, Nov 21, 2024 at 5:36 PM Jan Kara <jack@suse.cz> wrote:
> > > > > > On Thu 21-11-24 15:18:36, Amir Goldstein wrote:
> > > > > > > On Thu, Nov 21, 2024 at 11:44 AM Jan Kara <jack@suse.cz> wrote:
> > > > > > > and also always emitted ACCESS_PERM.
> > > > > >
> > > > > > I know that and it's one of those mostly useless events AFAICT.
> > > > > >
> > > > > > > my POC is using that PRE_ACCESS to populate
> > > > > > > directories on-demand, although the functionality is incomplete without the
> > > > > > > "populate on lookup" event.
> > > > > >
> > > > > > Exactly. Without "populate on lookup" doing "populate on readdir" is ok for
> > > > > > a demo but not really usable in practice because you can get spurious
> > > > > > ENOENT from a lookup.
> > > > > >
> > > > > > > > avoid the mistake of original fanotify which had some events available on
> > > > > > > > directories but they did nothing and then you have to ponder hard whether
> > > > > > > > you're going to break userspace if you actually start emitting them...
> > > > > > >
> > > > > > > But in any case, the FAN_ONDIR built-in filter is applicable to PRE_ACCESS.
> > > > > >
> > > > > > Well, I'm not so concerned about filtering out uninteresting events. I'm
> > > > > > more concerned about emitting the event now and figuring out later that we
> > > > > > need to emit it in different places or with some other info when actual
> > > > > > production users appear.
> > > > > >
> > > > > > But I've realized we must allow pre-content marks to be placed on dirs so
> > > > > > that such marks can be placed on parents watching children. What we'd need
> > > > > > to forbid is a combination of FAN_ONDIR and FAN_PRE_ACCESS, wouldn't we?
> > > > >
> > > > > Yes, I think that can work well for now.
> > > > >
> > > >
> > > > Only it does not require only check at API time that both flags are not
> > > > set, because FAN_ONDIR can be set earlier and then FAN_PRE_ACCESS
> > > > can be added later and vice versa, so need to do this in
> > > > fanotify_may_update_existing_mark() AFAICT.
> > >
> > > I have now something like:
> > >
> > > @@ -1356,7 +1356,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
> > >  }
> > >
> > >  static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
> > > -                                             unsigned int fan_flags)
> > > +                                            __u32 mask, unsigned int fan_flags)
> > >  {
> > >         /*
> > >          * Non evictable mark cannot be downgraded to evictable mark.
> > > @@ -1383,6 +1383,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
> > >             fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
> > >                 return -EEXIST;
> > >
> > > +       /* For now pre-content events are not generated for directories */
> > > +       mask |= fsn_mark->mask;
> > > +       if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
> > > +               return -EEXIST;
> > > +
> >
> > EEXIST is going to be confusing if there was never any mark.
> > Either return -EINVAL here or also check this condition on the added mask
> > itself before calling fanotify_add_mark() and return -EINVAL there.
> >
> > I prefer two distinct errors, but probably one is also good enough.
>
> That's actually a good point. My previous change allowed setting
> FAN_PRE_ACCESS | FAN_ONDIR on a new mark because that doesn't get to
> fanotify_may_update_existing_mark(). So I now have:
>
> diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> index 0919ea735f4a..38a46865408e 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -1356,7 +1356,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
>  }
>
>  static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
> -                                             unsigned int fan_flags)
> +                                            __u32 mask, unsigned int fan_flags)
>  {
>         /*
>          * Non evictable mark cannot be downgraded to evictable mark.
> @@ -1383,6 +1383,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
>             fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
>                 return -EEXIST;
>
> +       /* For now pre-content events are not generated for directories */
> +       mask |= fsn_mark->mask;
> +       if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
> +               return -EEXIST;
> +
>         return 0;
>  }
>
> @@ -1409,7 +1414,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
>         /*
>          * Check if requested mark flags conflict with an existing mark flags.
>          */
> -       ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags);
> +       ret = fanotify_may_update_existing_mark(fsn_mark, mask, fan_flags);
>         if (ret)
>                 goto out;
>
> @@ -1905,6 +1910,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
>         if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME))
>                 goto fput_and_out;
>
> +       /* Pre-content events are not currently generated for directories. */
> +       if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
> +               goto fput_and_out;
> +
>         if (mark_cmd == FAN_MARK_FLUSH) {
>                 ret = 0;
>                 if (mark_type == FAN_MARK_MOUNT)
> --
> 2.35.3
>

Looks good.

Thanks,
Amir.
diff mbox series

Patch

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 2e6ba94ec405..da6c3c1c7edf 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -916,8 +916,9 @@  static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
 	BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
 	BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
 	BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
+	BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);
 
-	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21);
+	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22);
 
 	mask = fanotify_group_event_mask(group, iter_info, &match_mask,
 					 mask, data, data_type, dir);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 456cc3e92c88..5ea447e9e5a8 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1640,11 +1640,23 @@  static int fanotify_events_supported(struct fsnotify_group *group,
 				     unsigned int flags)
 {
 	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+	bool is_dir = d_is_dir(path->dentry);
 	/* Strict validation of events in non-dir inode mask with v5.17+ APIs */
 	bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
 				 (mask & FAN_RENAME) ||
 				 (flags & FAN_MARK_IGNORE);
 
+	/*
+	 * Filesystems need to opt-into pre-content evnets (a.k.a HSM)
+	 * and they are only supported on regular files and directories.
+	 */
+	if (mask & FANOTIFY_PRE_CONTENT_EVENTS) {
+		if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM))
+			return -EINVAL;
+		if (!is_dir && !d_is_reg(path->dentry))
+			return -EINVAL;
+	}
+
 	/*
 	 * Some filesystems such as 'proc' acquire unusual locks when opening
 	 * files. For them fanotify permission events have high chances of
@@ -1677,7 +1689,7 @@  static int fanotify_events_supported(struct fsnotify_group *group,
 	 * but because we always allowed it, error only when using new APIs.
 	 */
 	if (strict_dir_events && mark_type == FAN_MARK_INODE &&
-	    !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
+	    !is_dir && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
 		return -ENOTDIR;
 
 	return 0;
@@ -1778,10 +1790,14 @@  static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		return -EPERM;
 
 	/*
-	 * Permission events require minimum priority FAN_CLASS_CONTENT.
+	 * Permission events are not allowed for FAN_CLASS_NOTIF.
+	 * Pre-content permission events are not allowed for FAN_CLASS_CONTENT.
 	 */
 	if (mask & FANOTIFY_PERM_EVENTS &&
-	    group->priority < FSNOTIFY_PRIO_CONTENT)
+	    group->priority == FSNOTIFY_PRIO_NORMAL)
+		return -EINVAL;
+	else if (mask & FANOTIFY_PRE_CONTENT_EVENTS &&
+		 group->priority == FSNOTIFY_PRIO_CONTENT)
 		return -EINVAL;
 
 	if (mask & FAN_FS_ERROR &&
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 89ff45bd6f01..c747af064d2c 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -89,6 +89,16 @@ 
 #define FANOTIFY_DIRENT_EVENTS	(FAN_MOVE | FAN_CREATE | FAN_DELETE | \
 				 FAN_RENAME)
 
+/* Content events can be used to inspect file content */
+#define FANOTIFY_CONTENT_PERM_EVENTS (FAN_OPEN_PERM | FAN_OPEN_EXEC_PERM | \
+				      FAN_ACCESS_PERM)
+/* Pre-content events can be used to fill file content */
+#define FANOTIFY_PRE_CONTENT_EVENTS  (FAN_PRE_ACCESS)
+
+/* Events that require a permission response from user */
+#define FANOTIFY_PERM_EVENTS	(FANOTIFY_CONTENT_PERM_EVENTS | \
+				 FANOTIFY_PRE_CONTENT_EVENTS)
+
 /* Events that can be reported with event->fd */
 #define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS)
 
@@ -104,10 +114,6 @@ 
 				 FANOTIFY_INODE_EVENTS | \
 				 FANOTIFY_ERROR_EVENTS)
 
-/* Events that require a permission response from user */
-#define FANOTIFY_PERM_EVENTS	(FAN_OPEN_PERM | FAN_ACCESS_PERM | \
-				 FAN_OPEN_EXEC_PERM)
-
 /* Extra flags that may be reported with event or control handling of events */
 #define FANOTIFY_EVENT_FLAGS	(FAN_EVENT_ON_CHILD | FAN_ONDIR)
 
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 79072b6894f2..7596168c80eb 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -27,6 +27,8 @@ 
 #define FAN_OPEN_EXEC_PERM	0x00040000	/* File open/exec in perm check */
 /* #define FAN_DIR_MODIFY	0x00080000 */	/* Deprecated (reserved) */
 
+#define FAN_PRE_ACCESS		0x00100000	/* Pre-content access hook */
+
 #define FAN_EVENT_ON_CHILD	0x08000000	/* Interested in child events */
 
 #define FAN_RENAME		0x10000000	/* File was renamed */