diff mbox series

[v4,tip/perf/core,2/4] mm: Introduce mmap_lock_speculation_{begin|end}

Message ID 20241028010818.2487581-3-andrii@kernel.org (mailing list archive)
State Not Applicable
Delegated to: BPF
Headers show
Series uprobes,mm: speculative lockless VMA-to-uprobe lookup | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Andrii Nakryiko Oct. 28, 2024, 1:08 a.m. UTC
From: Suren Baghdasaryan <surenb@google.com>

Add helper functions to speculatively perform operations without
read-locking mmap_lock, expecting that mmap_lock will not be
write-locked and mm is not modified from under us.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/mmap_lock.h | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

Comments

Vlastimil Babka Oct. 29, 2024, 4:48 p.m. UTC | #1
On 10/28/24 02:08, Andrii Nakryiko wrote:
> From: Suren Baghdasaryan <surenb@google.com>
> 
> Add helper functions to speculatively perform operations without
> read-locking mmap_lock, expecting that mmap_lock will not be
> write-locked and mm is not modified from under us.
> 
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>

Acked-by: Vlastimil Babka <vbabka@suse.cz>

> ---
>  include/linux/mmap_lock.h | 29 +++++++++++++++++++++++++++--
>  1 file changed, 27 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> index 6b3272686860..58dde2e35f7e 100644
> --- a/include/linux/mmap_lock.h
> +++ b/include/linux/mmap_lock.h
> @@ -71,6 +71,7 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
>  }
>  
>  #ifdef CONFIG_PER_VMA_LOCK
> +
>  static inline void mm_lock_seqcount_init(struct mm_struct *mm)
>  {
>  	seqcount_init(&mm->mm_lock_seq);
> @@ -86,11 +87,35 @@ static inline void mm_lock_seqcount_end(struct mm_struct *mm)
>  	do_raw_write_seqcount_end(&mm->mm_lock_seq);
>  }
>  
> -#else
> +static inline bool mmap_lock_speculation_begin(struct mm_struct *mm, unsigned int *seq)
> +{
> +	*seq = raw_read_seqcount(&mm->mm_lock_seq);
> +	/* Allow speculation if mmap_lock is not write-locked */
> +	return (*seq & 1) == 0;
> +}
> +
> +static inline bool mmap_lock_speculation_end(struct mm_struct *mm, unsigned int seq)
> +{
> +	return !do_read_seqcount_retry(&mm->mm_lock_seq, seq);
> +}
> +
> +#else /* CONFIG_PER_VMA_LOCK */
> +
>  static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
>  static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
>  static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
> -#endif
> +
> +static inline bool mmap_lock_speculation_begin(struct mm_struct *mm, unsigned int *seq)
> +{
> +	return false;
> +}
> +
> +static inline bool mmap_lock_speculation_end(struct mm_struct *mm, unsigned int seq)
> +{
> +	return false;
> +}
> +
> +#endif /* CONFIG_PER_VMA_LOCK */
>  
>  static inline void mmap_init_lock(struct mm_struct *mm)
>  {
Peter Zijlstra Nov. 21, 2024, 2:44 p.m. UTC | #2
On Sun, Oct 27, 2024 at 06:08:16PM -0700, Andrii Nakryiko wrote:
> From: Suren Baghdasaryan <surenb@google.com>
> 
> Add helper functions to speculatively perform operations without
> read-locking mmap_lock, expecting that mmap_lock will not be
> write-locked and mm is not modified from under us.
> 
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  include/linux/mmap_lock.h | 29 +++++++++++++++++++++++++++--
>  1 file changed, 27 insertions(+), 2 deletions(-)
> 

> @@ -86,11 +87,35 @@ static inline void mm_lock_seqcount_end(struct mm_struct *mm)
>  	do_raw_write_seqcount_end(&mm->mm_lock_seq);
>  }
>  
> -#else
> +static inline bool mmap_lock_speculation_begin(struct mm_struct *mm, unsigned int *seq)
> +{
> +	*seq = raw_read_seqcount(&mm->mm_lock_seq);
> +	/* Allow speculation if mmap_lock is not write-locked */
> +	return (*seq & 1) == 0;
> +}

At the very least this should have more comment; I don't think it
adequately explains the reason for being weird. Perhaps:

	/*
	 * Since mmap_lock is a sleeping lock, and waiting for it to
	 * become unlocked is more or less equivalent with taking it
	 * ourselves, don't bother with the speculative path and take
	 * the slow path, which takes the lock.
	 */
	*seq = raw_read_seqcount(&mm->mm_lock_seq);
	return !(*seq & 1);

But perhaps it makes even more sense to add this functionality to
seqcount itself. The same argument can be made for seqcount_mutex and
seqcount_rwlock users.

> +static inline bool mmap_lock_speculation_end(struct mm_struct *mm, unsigned int seq)
> +{
> +	return !do_read_seqcount_retry(&mm->mm_lock_seq, seq);
> +}

This naming is somewhare weird, begin/end do not typically imply boolean
return values.

Perhaps something like? can_speculate, or speculate_try_begin, paired
with speculated_success or speculate_retry ?
Peter Zijlstra Nov. 21, 2024, 3:22 p.m. UTC | #3
On Thu, Nov 21, 2024 at 03:44:42PM +0100, Peter Zijlstra wrote:

> But perhaps it makes even more sense to add this functionality to
> seqcount itself. The same argument can be made for seqcount_mutex and
> seqcount_rwlock users.

Something like so I suppose.

---
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 5298765d6ca4..102afdf8c7db 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -318,6 +318,28 @@ SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
 	__seq;								\
 })
 
+/**
+ * raw_seqcount_try_begin() - begin a seqcount_t read critical section
+ *                            w/o lockdep and w/o counter stabilization
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
+ *
+ * Very like raw_seqcount_begin(), except it enables eliding the critical
+ * section entirely if odd, instead of doing the speculation knowing it will
+ * fail.
+ *
+ * Useful when counter stabilization is more or less equivalent to taking
+ * the lock and there is a slowpath that does that.
+ *
+ * If true, start will be set to the (even) sequence count read.
+ *
+ * Return: true when a read critical section is started.
+ */
+#define raw_seqcount_try_begin(s, start)				\
+({									\
+	start = raw_read_seqcount(s);					\
+	!(start & 1);							\
+})
+
 /**
  * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
  *                        lockdep and w/o counter stabilization
Suren Baghdasaryan Nov. 21, 2024, 3:36 p.m. UTC | #4
On Thu, Nov 21, 2024 at 7:23 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Thu, Nov 21, 2024 at 03:44:42PM +0100, Peter Zijlstra wrote:
>
> > But perhaps it makes even more sense to add this functionality to
> > seqcount itself. The same argument can be made for seqcount_mutex and
> > seqcount_rwlock users.
>
> Something like so I suppose.

Ok, let me put this all together. Thanks!

>
> ---
> diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
> index 5298765d6ca4..102afdf8c7db 100644
> --- a/include/linux/seqlock.h
> +++ b/include/linux/seqlock.h
> @@ -318,6 +318,28 @@ SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
>         __seq;                                                          \
>  })
>
> +/**
> + * raw_seqcount_try_begin() - begin a seqcount_t read critical section
> + *                            w/o lockdep and w/o counter stabilization
> + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
> + *
> + * Very like raw_seqcount_begin(), except it enables eliding the critical
> + * section entirely if odd, instead of doing the speculation knowing it will
> + * fail.
> + *
> + * Useful when counter stabilization is more or less equivalent to taking
> + * the lock and there is a slowpath that does that.
> + *
> + * If true, start will be set to the (even) sequence count read.
> + *
> + * Return: true when a read critical section is started.
> + */
> +#define raw_seqcount_try_begin(s, start)                               \
> +({                                                                     \
> +       start = raw_read_seqcount(s);                                   \
> +       !(start & 1);                                                   \
> +})
> +
>  /**
>   * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
>   *                        lockdep and w/o counter stabilization
Suren Baghdasaryan Nov. 21, 2024, 4:32 p.m. UTC | #5
On Thu, Nov 21, 2024 at 7:36 AM Suren Baghdasaryan <surenb@google.com> wrote:
>
> On Thu, Nov 21, 2024 at 7:23 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Thu, Nov 21, 2024 at 03:44:42PM +0100, Peter Zijlstra wrote:
> >
> > > But perhaps it makes even more sense to add this functionality to
> > > seqcount itself. The same argument can be made for seqcount_mutex and
> > > seqcount_rwlock users.
> >
> > Something like so I suppose.
>
> Ok, let me put this all together. Thanks!

I posted the new version at
https://lore.kernel.org/all/20241121162826.987947-1-surenb@google.com/
The changes are minimal, only those requested by Peter, so hopefully
they can be accepted quickly.

>
> >
> > ---
> > diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
> > index 5298765d6ca4..102afdf8c7db 100644
> > --- a/include/linux/seqlock.h
> > +++ b/include/linux/seqlock.h
> > @@ -318,6 +318,28 @@ SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
> >         __seq;                                                          \
> >  })
> >
> > +/**
> > + * raw_seqcount_try_begin() - begin a seqcount_t read critical section
> > + *                            w/o lockdep and w/o counter stabilization
> > + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
> > + *
> > + * Very like raw_seqcount_begin(), except it enables eliding the critical
> > + * section entirely if odd, instead of doing the speculation knowing it will
> > + * fail.
> > + *
> > + * Useful when counter stabilization is more or less equivalent to taking
> > + * the lock and there is a slowpath that does that.
> > + *
> > + * If true, start will be set to the (even) sequence count read.
> > + *
> > + * Return: true when a read critical section is started.
> > + */
> > +#define raw_seqcount_try_begin(s, start)                               \
> > +({                                                                     \
> > +       start = raw_read_seqcount(s);                                   \
> > +       !(start & 1);                                                   \
> > +})
> > +
> >  /**
> >   * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
> >   *                        lockdep and w/o counter stabilization
diff mbox series

Patch

diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 6b3272686860..58dde2e35f7e 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -71,6 +71,7 @@  static inline void mmap_assert_write_locked(const struct mm_struct *mm)
 }
 
 #ifdef CONFIG_PER_VMA_LOCK
+
 static inline void mm_lock_seqcount_init(struct mm_struct *mm)
 {
 	seqcount_init(&mm->mm_lock_seq);
@@ -86,11 +87,35 @@  static inline void mm_lock_seqcount_end(struct mm_struct *mm)
 	do_raw_write_seqcount_end(&mm->mm_lock_seq);
 }
 
-#else
+static inline bool mmap_lock_speculation_begin(struct mm_struct *mm, unsigned int *seq)
+{
+	*seq = raw_read_seqcount(&mm->mm_lock_seq);
+	/* Allow speculation if mmap_lock is not write-locked */
+	return (*seq & 1) == 0;
+}
+
+static inline bool mmap_lock_speculation_end(struct mm_struct *mm, unsigned int seq)
+{
+	return !do_read_seqcount_retry(&mm->mm_lock_seq, seq);
+}
+
+#else /* CONFIG_PER_VMA_LOCK */
+
 static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
 static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
 static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
-#endif
+
+static inline bool mmap_lock_speculation_begin(struct mm_struct *mm, unsigned int *seq)
+{
+	return false;
+}
+
+static inline bool mmap_lock_speculation_end(struct mm_struct *mm, unsigned int seq)
+{
+	return false;
+}
+
+#endif /* CONFIG_PER_VMA_LOCK */
 
 static inline void mmap_init_lock(struct mm_struct *mm)
 {