diff mbox series

[v5,5/8] virtio: Treat alloc_dax() -EOPNOTSUPP failure as non-fatal

Message ID 20240212163101.19614-6-mathieu.desnoyers@efficios.com (mailing list archive)
State New
Headers show
Series Introduce cpu_dcache_is_aliasing() to fix DAX regression | expand

Commit Message

Mathieu Desnoyers Feb. 12, 2024, 4:30 p.m. UTC
In preparation for checking whether the architecture has data cache
aliasing within alloc_dax(), modify the error handling of virtio
virtio_fs_setup_dax() to treat alloc_dax() -EOPNOTSUPP failure as
non-fatal.

Co-developed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Fixes: d92576f1167c ("dax: does not work correctly with virtual aliasing caches")
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Mike Snitzer <snitzer@kernel.org>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-arch@vger.kernel.org
Cc: linux-cxl@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: linux-xfs@vger.kernel.org
Cc: dm-devel@lists.linux.dev
Cc: nvdimm@lists.linux.dev
---
 fs/fuse/virtio_fs.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

Comments

Dan Williams Feb. 12, 2024, 10:04 p.m. UTC | #1
Mathieu Desnoyers wrote:
> In preparation for checking whether the architecture has data cache
> aliasing within alloc_dax(), modify the error handling of virtio
> virtio_fs_setup_dax() to treat alloc_dax() -EOPNOTSUPP failure as
> non-fatal.
> 
> Co-developed-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> Fixes: d92576f1167c ("dax: does not work correctly with virtual aliasing caches")
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: Alasdair Kergon <agk@redhat.com>
> Cc: Mike Snitzer <snitzer@kernel.org>
> Cc: Mikulas Patocka <mpatocka@redhat.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Vishal Verma <vishal.l.verma@intel.com>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Russell King <linux@armlinux.org.uk>
> Cc: linux-arch@vger.kernel.org
> Cc: linux-cxl@vger.kernel.org
> Cc: linux-fsdevel@vger.kernel.org
> Cc: linux-mm@kvack.org
> Cc: linux-xfs@vger.kernel.org
> Cc: dm-devel@lists.linux.dev
> Cc: nvdimm@lists.linux.dev
> ---
>  fs/fuse/virtio_fs.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 5f1be1da92ce..f9acd9972af2 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -16,6 +16,7 @@
>  #include <linux/fs_context.h>
>  #include <linux/fs_parser.h>
>  #include <linux/highmem.h>
> +#include <linux/cleanup.h>
>  #include <linux/uio.h>
>  #include "fuse_i.h"
>  
> @@ -795,8 +796,11 @@ static void virtio_fs_cleanup_dax(void *data)
>  	put_dax(dax_dev);
>  }
>  
> +DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR(_T)) virtio_fs_cleanup_dax(_T))
> +
>  static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
>  {
> +	struct dax_device *dax_dev __free(cleanup_dax) = ERR_PTR(-EOPNOTSUPP);
>  	struct virtio_shm_region cache_reg;
>  	struct dev_pagemap *pgmap;
>  	bool have_cache;
> @@ -804,6 +808,12 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
>  	if (!IS_ENABLED(CONFIG_FUSE_DAX))
>  		return 0;
>  
> +	dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
> +	if (IS_ERR(dax_dev)) {
> +		int rc = PTR_ERR(dax_dev);
> +		return rc == -EOPNOTSUPP ? 0 : rc;
> +	}
> +
>  	/* Get cache region */
>  	have_cache = virtio_get_shm_region(vdev, &cache_reg,
>  					   (u8)VIRTIO_FS_SHMCAP_ID_CACHE);
> @@ -849,10 +859,7 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
>  	dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
>  		__func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
>  
> -	fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
> -	if (IS_ERR(fs->dax_dev))
> -		return PTR_ERR(fs->dax_dev);
> -
> +	fs->dax_dev = no_free_ptr(dax_dev);

This works because the internals of virtio_fs_cleanup_dax(), "kill_dax()
and put_dax()", know how to handle a NULL @dax_dev. It is still early
days with the "cleanup" helpers, but I wonder if anyone else cares that
the DEFINE_FREE() above does not check for NULL?

I think it is ok, but wanted to point that out for the virtiofs folks
and wonder what the style should be for things like this going forward.

Other growing pains with "cleanup.h" and ERR_PTR is happening over here:

http://lore.kernel.org/r/65ca861e14779_5a7f2949e@dwillia2-xfh.jf.intel.com.notmuch

...and that arrived at a similar style as is being used in this patch.
In both cases the cleanup function is called on exit with a NULL
argument.
Linus Torvalds Feb. 12, 2024, 10:08 p.m. UTC | #2
On Mon, 12 Feb 2024 at 14:04, Dan Williams <dan.j.williams@intel.com> wrote:
>
> This works because the internals of virtio_fs_cleanup_dax(), "kill_dax()
> and put_dax()", know how to handle a NULL @dax_dev. It is still early
> days with the "cleanup" helpers, but I wonder if anyone else cares that
> the DEFINE_FREE() above does not check for NULL?

Well, the main reason for DEFINE_FREE() to check for NULL is not
correctness, but code generation. See the comment about kfree() in
<linux/cleanup.h>:

 * NOTE: the DEFINE_FREE()'s @free expression includes a NULL test even though
 * kfree() is fine to be called with a NULL value. This is on purpose. This way
 * the compiler sees the end of our alloc_obj() function as [...]

with the full explanation there.

Now, whether the code wants to actually use the cleanup() helpers for
a single use-case is debatable.

But yes, if it does, I suspect it should use !IS_ERR_OR_NULL(ptr).

            Linus
Lukas Wunner Feb. 13, 2024, 6:18 a.m. UTC | #3
On Mon, Feb 12, 2024 at 03:02:46PM -0800, Dan Williams wrote:
> However, Lukas, I think Linus is right, your DEFINE_FREE() should use
> IS_ERR_OR_NULL().

Uh... that's a negative, sir. ;)

IS_ERR_OR_NULL() results in...
* a superfluous NULL pointer check in x509_key_preparse() and
* a superfluous IS_ERR check in x509_cert_parse().

IS_ERR() results *only* in...
* a superfluous IS_ERR check in x509_cert_parse().

I can get rid of the IS_ERR() check by using assume().

I can *not* get rid of the NULL pointer check because the compiler
is compiled with -fno-delete-null-pointer-checks.  (The compiler
seems to ignore __attribute__((returns_nonnull)) due to that.)


> I.e. the problem is trying to use
> __free(x509_free_certificate) in x509_cert_parse().
> 
> > --- a/crypto/asymmetric_keys/x509_cert_parser.c
> > +++ b/crypto/asymmetric_keys/x509_cert_parser.c
> > @@ -60,24 +60,24 @@ void x509_free_certificate(struct x509_certificate *cert)
> >   */
> >  struct x509_certificate *x509_cert_parse(const void *data, size_t datalen)
> >  {
> > -       struct x509_certificate *cert;
> > -       struct x509_parse_context *ctx;
> > +       struct x509_certificate *cert __free(x509_free_certificate);
> 
> ...make this:
> 
>     struct x509_certificate *cert __free(kfree);

That doesn't work I'm afraid.  x509_cert_parse() needs
x509_free_certificate() to be called in the error path,
not kfree().  See the existing code in current mainline:

x509_cert_parse() populates three sub-allocations in
struct x509_certificate (pub, sig, id) and two
sub-sub-allocations (pub->key, pub->params).

So I'd have to add five additional local variables which
get freed by __cleanup().  One of them (pub->key) requires
kfree_sensitive() instead of kfree(), so I'd need an extra
DEFINE_FREE() for that.

I haven't tried it but I suspect the result would look
terrible and David Howells wouldn't like it.


> ...and Mathieu, this should be IS_ERR_OR_NULL() to skip an unnecessary
> call to virtio_fs_cleanup_dax() at function exit that the compiler
> should elide.

My recommendation is to check for !IS_ERR() in the DEFINE_FREE() clause
and amend virtio_fs_cleanup_dax() with a "if (!dax_dev) return;" for
defensiveness in case someone calls it with a NULL pointer.

That's the best solution I could come up with for the x509_certificate
conversion.

Note that even with superfluous checks avoided, __cleanup() causes
gcc-12 to always generate two return paths.  It's very visible in
the generated code that all the stack unwinding code gets duplicated
in every function using __cleanup().  The existing Assembler code
of x509_key_preparse() and x509_cert_parse(), without __cleanup()
invocation, has only a single return path.

So __cleanup() bloats the code regardless of superfluous checks,
but future gcc versions might avoid that.  clang-15 generates much
more compact code (vmlinux is a couple hundred kBytes smaller),
but does weird things such as inlining x509_free_certificate()
in x509_cert_parse().

As you may have guessed, I've spent an inordinate amount of time
down that rabbit hole. ;(

Thanks,

Lukas
Lukas Wunner Feb. 13, 2024, 6:25 a.m. UTC | #4
On Mon, Feb 12, 2024 at 11:30:58AM -0500, Mathieu Desnoyers wrote:
> In preparation for checking whether the architecture has data cache
> aliasing within alloc_dax(), modify the error handling of virtio
> virtio_fs_setup_dax() to treat alloc_dax() -EOPNOTSUPP failure as
> non-fatal.
> 
> Co-developed-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> Fixes: d92576f1167c ("dax: does not work correctly with virtual aliasing caches")

That's a v4.0 commit, yet this patch uses DEFINE_FREE() which is
only available in v6.6 but not any earlier stable kernels.

So the Fixes tag feels a bit weird.

Apart from that,
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Dan Williams Feb. 13, 2024, 6:56 p.m. UTC | #5
Lukas Wunner wrote:
> On Mon, Feb 12, 2024 at 03:02:46PM -0800, Dan Williams wrote:
> > However, Lukas, I think Linus is right, your DEFINE_FREE() should use
> > IS_ERR_OR_NULL().
> 
> Uh... that's a negative, sir. ;)
> 
> IS_ERR_OR_NULL() results in...
> * a superfluous NULL pointer check in x509_key_preparse() and
> * a superfluous IS_ERR check in x509_cert_parse().
> 
> IS_ERR() results *only* in...
> * a superfluous IS_ERR check in x509_cert_parse().
> 
> I can get rid of the IS_ERR() check by using assume().
> 
> I can *not* get rid of the NULL pointer check because the compiler
> is compiled with -fno-delete-null-pointer-checks.  (The compiler
> seems to ignore __attribute__((returns_nonnull)) due to that.)
> 
> 
> > I.e. the problem is trying to use
> > __free(x509_free_certificate) in x509_cert_parse().
> > 
> > > --- a/crypto/asymmetric_keys/x509_cert_parser.c
> > > +++ b/crypto/asymmetric_keys/x509_cert_parser.c
> > > @@ -60,24 +60,24 @@ void x509_free_certificate(struct x509_certificate *cert)
> > >   */
> > >  struct x509_certificate *x509_cert_parse(const void *data, size_t datalen)
> > >  {
> > > -       struct x509_certificate *cert;
> > > -       struct x509_parse_context *ctx;
> > > +       struct x509_certificate *cert __free(x509_free_certificate);
> > 
> > ...make this:
> > 
> >     struct x509_certificate *cert __free(kfree);
> 
> That doesn't work I'm afraid.  x509_cert_parse() needs
> x509_free_certificate() to be called in the error path,
> not kfree().  See the existing code in current mainline:
> 
> x509_cert_parse() populates three sub-allocations in
> struct x509_certificate (pub, sig, id) and two
> sub-sub-allocations (pub->key, pub->params).
> 
> So I'd have to add five additional local variables which
> get freed by __cleanup().  One of them (pub->key) requires
> kfree_sensitive() instead of kfree(), so I'd need an extra
> DEFINE_FREE() for that.
> 
> I haven't tried it but I suspect the result would look
> terrible and David Howells wouldn't like it.

Ugh, that's what I was afraid of, so these cases are different.

> > ...and Mathieu, this should be IS_ERR_OR_NULL() to skip an unnecessary
> > call to virtio_fs_cleanup_dax() at function exit that the compiler
> > should elide.
> 
> My recommendation is to check for !IS_ERR() in the DEFINE_FREE() clause
> and amend virtio_fs_cleanup_dax() with a "if (!dax_dev) return;" for
> defensiveness in case someone calls it with a NULL pointer.

The internal calls (kill_dax(), put_dax()) check for NULL already, so I
don't think that's needed.

> That's the best solution I could come up with for the x509_certificate
> conversion.
> 
> Note that even with superfluous checks avoided, __cleanup() causes
> gcc-12 to always generate two return paths.  It's very visible in
> the generated code that all the stack unwinding code gets duplicated
> in every function using __cleanup().  The existing Assembler code
> of x509_key_preparse() and x509_cert_parse(), without __cleanup()
> invocation, has only a single return path.

I saw that too, some NULL checks can indeed be elided with a NULL check
in the DEFINE_FREE(), but the multiple exit paths still someimtes result
in __cleanup() using functions being larger than the goto equivalent.

> So __cleanup() bloats the code regardless of superfluous checks,
> but future gcc versions might avoid that.  clang-15 generates much
> more compact code (vmlinux is a couple hundred kBytes smaller),
> but does weird things such as inlining x509_free_certificate()
> in x509_cert_parse().
> 
> As you may have guessed, I've spent an inordinate amount of time
> down that rabbit hole. ;(

Hey, this is new and interesting stuff, glad we are grappling with it at
this level.
Mathieu Desnoyers Feb. 13, 2024, 7:46 p.m. UTC | #6
On 2024-02-13 01:25, Lukas Wunner wrote:
> On Mon, Feb 12, 2024 at 11:30:58AM -0500, Mathieu Desnoyers wrote:
>> In preparation for checking whether the architecture has data cache
>> aliasing within alloc_dax(), modify the error handling of virtio
>> virtio_fs_setup_dax() to treat alloc_dax() -EOPNOTSUPP failure as
>> non-fatal.
>>
>> Co-developed-by: Dan Williams <dan.j.williams@intel.com>
>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>> Fixes: d92576f1167c ("dax: does not work correctly with virtual aliasing caches")
> 
> That's a v4.0 commit, yet this patch uses DEFINE_FREE() which is
> only available in v6.6 but not any earlier stable kernels.

I asked this question to Greg KH before creating this patch, and his
answer was to implement my fix for master, and stable kernels would take
care of backporting all the required dependencies.

Now if I look at latest 6.1, 5.15, 5.10, 5.4, 4.19 stable kernels,
none seem to have include/linux/cleanup.h today. But I suspect that
sooner or later relevant master branch fixes will require stable
kernels to backport cleanup.h, so why not do it now ?

Thanks,

Mathieu


> 
> So the Fixes tag feels a bit weird.
> 
> Apart from that,
> Reviewed-by: Lukas Wunner <lukas@wunner.de>
Mathieu Desnoyers Feb. 13, 2024, 8:25 p.m. UTC | #7
On 2024-02-12 18:02, Dan Williams wrote:
[...]
> ...and Mathieu, this should be IS_ERR_OR_NULL() to skip an unnecessary
> call to virtio_fs_cleanup_dax() at function exit that the compiler
> should elide.

OK, so I'll go back to the previous approach for v6:

DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T))

and define the variable as:

struct dax_device *dax_dev __free(cleanup_dax) = NULL;

Thanks,

Mathieu
Greg Kroah-Hartman Feb. 14, 2024, 6:59 a.m. UTC | #8
On Tue, Feb 13, 2024 at 02:46:05PM -0500, Mathieu Desnoyers wrote:
> On 2024-02-13 01:25, Lukas Wunner wrote:
> > On Mon, Feb 12, 2024 at 11:30:58AM -0500, Mathieu Desnoyers wrote:
> > > In preparation for checking whether the architecture has data cache
> > > aliasing within alloc_dax(), modify the error handling of virtio
> > > virtio_fs_setup_dax() to treat alloc_dax() -EOPNOTSUPP failure as
> > > non-fatal.
> > > 
> > > Co-developed-by: Dan Williams <dan.j.williams@intel.com>
> > > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > > Fixes: d92576f1167c ("dax: does not work correctly with virtual aliasing caches")
> > 
> > That's a v4.0 commit, yet this patch uses DEFINE_FREE() which is
> > only available in v6.6 but not any earlier stable kernels.
> 
> I asked this question to Greg KH before creating this patch, and his
> answer was to implement my fix for master, and stable kernels would take
> care of backporting all the required dependencies.

That is correct.

> Now if I look at latest 6.1, 5.15, 5.10, 5.4, 4.19 stable kernels,
> none seem to have include/linux/cleanup.h today. But I suspect that
> sooner or later relevant master branch fixes will require stable
> kernels to backport cleanup.h, so why not do it now ?

Yes, eventually we will need to backport cleanup.h to the older kernel
trees, I know of many patches "in flight" that are using it, so it's not
unique to this one at all, so this is fine to have.

Remember, make changes for Linus's tree, don't go through any gyrations
to make things special for stable releases, that's something to only
consider later on, if you want to.  stable kernels should have no affect
on developers OTHER than a simple cc: stable tag on stuff that they know
should be backported, unless they really want to do more than that,
that's up to them.

thanks,

greg k-h
diff mbox series

Patch

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 5f1be1da92ce..f9acd9972af2 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -16,6 +16,7 @@ 
 #include <linux/fs_context.h>
 #include <linux/fs_parser.h>
 #include <linux/highmem.h>
+#include <linux/cleanup.h>
 #include <linux/uio.h>
 #include "fuse_i.h"
 
@@ -795,8 +796,11 @@  static void virtio_fs_cleanup_dax(void *data)
 	put_dax(dax_dev);
 }
 
+DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR(_T)) virtio_fs_cleanup_dax(_T))
+
 static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
 {
+	struct dax_device *dax_dev __free(cleanup_dax) = ERR_PTR(-EOPNOTSUPP);
 	struct virtio_shm_region cache_reg;
 	struct dev_pagemap *pgmap;
 	bool have_cache;
@@ -804,6 +808,12 @@  static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
 	if (!IS_ENABLED(CONFIG_FUSE_DAX))
 		return 0;
 
+	dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
+	if (IS_ERR(dax_dev)) {
+		int rc = PTR_ERR(dax_dev);
+		return rc == -EOPNOTSUPP ? 0 : rc;
+	}
+
 	/* Get cache region */
 	have_cache = virtio_get_shm_region(vdev, &cache_reg,
 					   (u8)VIRTIO_FS_SHMCAP_ID_CACHE);
@@ -849,10 +859,7 @@  static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
 	dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
 		__func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
 
-	fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
-	if (IS_ERR(fs->dax_dev))
-		return PTR_ERR(fs->dax_dev);
-
+	fs->dax_dev = no_free_ptr(dax_dev);
 	return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
 					fs->dax_dev);
 }