diff mbox series

xfs: assure zeroed memory buffers for certain kmem allocations

Message ID 20190916153504.30809-1-billodo@redhat.com (mailing list archive)
State Superseded, archived
Headers show
Series xfs: assure zeroed memory buffers for certain kmem allocations | expand

Commit Message

Bill O'Donnell Sept. 16, 2019, 3:35 p.m. UTC
Guarantee zeroed memory buffers for cases where potential memory
leak to disk can occur. In these cases, kmem_alloc is used and
doesn't zero the buffer, opening the possibility of information
leakage to disk.

Introduce a xfs_buf_flag, _XBF_KMZ, to indicate a request for a zeroed
buffer, and use existing infrastucture (xfs_buf_allocate_memory) to
obtain the already zeroed buffer from kernel memory.

This solution avoids the performance issue that would occur if a
wholesale change to replace kmem_alloc with kmem_zalloc was done.

Signed-off-by: Bill O'Donnell <billodo@redhat.com>
---
 fs/xfs/xfs_buf.c | 8 ++++++--
 fs/xfs/xfs_buf.h | 4 +++-
 2 files changed, 9 insertions(+), 3 deletions(-)

Comments

Eric Sandeen Sept. 16, 2019, 9:24 p.m. UTC | #1
On 9/16/19 10:35 AM, Bill O'Donnell wrote:
> Guarantee zeroed memory buffers for cases where potential memory
> leak to disk can occur. In these cases, kmem_alloc is used and
> doesn't zero the buffer, opening the possibility of information
> leakage to disk.
> 
> Introduce a xfs_buf_flag, _XBF_KMZ, to indicate a request for a zeroed
> buffer, and use existing infrastucture (xfs_buf_allocate_memory) to
> obtain the already zeroed buffer from kernel memory.
> 
> This solution avoids the performance issue that would occur if a
> wholesale change to replace kmem_alloc with kmem_zalloc was done.
> 
> Signed-off-by: Bill O'Donnell <billodo@redhat.com>

I think this can probably be further optimized by not obtaining zeroed
memory when we're about to fill the buffer from disk as the very
next step.

(in this case, xfs_buf_read_map calls xfs_buf_get_map and then immediately
reads the buffer from disk with _xfs_buf_read)  xfs_buf_read_map adds
XBF_READ to the flags during this process.

So I wonder if this can be simplified/optimized by just checking for XBF_READ
in xfs_buf_allocate_memory's flags, and if it's not set, then request
zeroed memory, because that indicates a buffer we'll be filling in from
memory and subsequently writing to disk.

-Eric

> ---
>  fs/xfs/xfs_buf.c | 8 ++++++--
>  fs/xfs/xfs_buf.h | 4 +++-
>  2 files changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 120ef99d09e8..916a3f782950 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -345,16 +345,19 @@ xfs_buf_allocate_memory(
>  	unsigned short		page_count, i;
>  	xfs_off_t		start, end;
>  	int			error;
> +	uint			kmflag_mask = 0;
>  
>  	/*
>  	 * for buffers that are contained within a single page, just allocate
>  	 * the memory from the heap - there's no need for the complexity of
>  	 * page arrays to keep allocation down to order 0.
>  	 */
> +	if (flags & _XBF_KMZ)
> +		kmflag_mask |= KM_ZERO;
>  	size = BBTOB(bp->b_length);
>  	if (size < PAGE_SIZE) {
>  		int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
> -		bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS);
> +		bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS | kmflag_mask);
>  		if (!bp->b_addr) {
>  			/* low memory - use alloc_page loop instead */
>  			goto use_alloc_page;
> @@ -391,7 +394,7 @@ xfs_buf_allocate_memory(
>  		struct page	*page;
>  		uint		retries = 0;
>  retry:
> -		page = alloc_page(gfp_mask);
> +		page = alloc_page(gfp_mask | kmflag_mask);
>  		if (unlikely(page == NULL)) {
>  			if (flags & XBF_READ_AHEAD) {
>  				bp->b_page_count = i;
> @@ -683,6 +686,7 @@ xfs_buf_get_map(
>  	struct xfs_buf		*new_bp;
>  	int			error = 0;
>  
> +	flags |= _XBF_KMZ;
>  	error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
>  
>  	switch (error) {
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index f6ce17d8d848..416ff588240a 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -38,6 +38,7 @@
>  #define _XBF_PAGES	 (1 << 20)/* backed by refcounted pages */
>  #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
>  #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
> +#define _XBF_KMZ	 (1 << 23)/* zeroed buffer required */
>  
>  typedef unsigned int xfs_buf_flags_t;
>  
> @@ -54,7 +55,8 @@ typedef unsigned int xfs_buf_flags_t;
>  	{ XBF_UNMAPPED,		"UNMAPPED" },	/* ditto */\
>  	{ _XBF_PAGES,		"PAGES" }, \
>  	{ _XBF_KMEM,		"KMEM" }, \
> -	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
> +	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
> +	{ _XBF_KMZ,             "KMEM_Z" }
>  
>  
>  /*
>
Darrick J. Wong Sept. 16, 2019, 9:30 p.m. UTC | #2
On Mon, Sep 16, 2019 at 04:24:40PM -0500, Eric Sandeen wrote:
> On 9/16/19 10:35 AM, Bill O'Donnell wrote:
> > Guarantee zeroed memory buffers for cases where potential memory
> > leak to disk can occur. In these cases, kmem_alloc is used and
> > doesn't zero the buffer, opening the possibility of information
> > leakage to disk.
> > 
> > Introduce a xfs_buf_flag, _XBF_KMZ, to indicate a request for a zeroed
> > buffer, and use existing infrastucture (xfs_buf_allocate_memory) to
> > obtain the already zeroed buffer from kernel memory.
> > 
> > This solution avoids the performance issue that would occur if a
> > wholesale change to replace kmem_alloc with kmem_zalloc was done.
> > 
> > Signed-off-by: Bill O'Donnell <billodo@redhat.com>
> 
> I think this can probably be further optimized by not obtaining zeroed
> memory when we're about to fill the buffer from disk as the very
> next step.
> 
> (in this case, xfs_buf_read_map calls xfs_buf_get_map and then immediately
> reads the buffer from disk with _xfs_buf_read)  xfs_buf_read_map adds
> XBF_READ to the flags during this process.
> 
> So I wonder if this can be simplified/optimized by just checking for XBF_READ
> in xfs_buf_allocate_memory's flags, and if it's not set, then request
> zeroed memory, because that indicates a buffer we'll be filling in from
> memory and subsequently writing to disk.

I was wondering that ("Why can't we allocate a zeroed buffer only for
the get_buf case so that we don't have to do that for the read_buf
case?") too.  Once you do that then you can then remove all the explicit
memset calls too.

> -Eric
> 
> > ---
> >  fs/xfs/xfs_buf.c | 8 ++++++--
> >  fs/xfs/xfs_buf.h | 4 +++-
> >  2 files changed, 9 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> > index 120ef99d09e8..916a3f782950 100644
> > --- a/fs/xfs/xfs_buf.c
> > +++ b/fs/xfs/xfs_buf.c
> > @@ -345,16 +345,19 @@ xfs_buf_allocate_memory(
> >  	unsigned short		page_count, i;
> >  	xfs_off_t		start, end;
> >  	int			error;
> > +	uint			kmflag_mask = 0;
> >  
> >  	/*
> >  	 * for buffers that are contained within a single page, just allocate
> >  	 * the memory from the heap - there's no need for the complexity of
> >  	 * page arrays to keep allocation down to order 0.
> >  	 */
> > +	if (flags & _XBF_KMZ)
> > +		kmflag_mask |= KM_ZERO;
> >  	size = BBTOB(bp->b_length);
> >  	if (size < PAGE_SIZE) {
> >  		int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
> > -		bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS);
> > +		bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS | kmflag_mask);

Does this overflow 80 columns?

--D

> >  		if (!bp->b_addr) {
> >  			/* low memory - use alloc_page loop instead */
> >  			goto use_alloc_page;
> > @@ -391,7 +394,7 @@ xfs_buf_allocate_memory(
> >  		struct page	*page;
> >  		uint		retries = 0;
> >  retry:
> > -		page = alloc_page(gfp_mask);
> > +		page = alloc_page(gfp_mask | kmflag_mask);
> >  		if (unlikely(page == NULL)) {
> >  			if (flags & XBF_READ_AHEAD) {
> >  				bp->b_page_count = i;
> > @@ -683,6 +686,7 @@ xfs_buf_get_map(
> >  	struct xfs_buf		*new_bp;
> >  	int			error = 0;
> >  
> > +	flags |= _XBF_KMZ;
> >  	error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
> >  
> >  	switch (error) {
> > diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> > index f6ce17d8d848..416ff588240a 100644
> > --- a/fs/xfs/xfs_buf.h
> > +++ b/fs/xfs/xfs_buf.h
> > @@ -38,6 +38,7 @@
> >  #define _XBF_PAGES	 (1 << 20)/* backed by refcounted pages */
> >  #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
> >  #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
> > +#define _XBF_KMZ	 (1 << 23)/* zeroed buffer required */
> >  
> >  typedef unsigned int xfs_buf_flags_t;
> >  
> > @@ -54,7 +55,8 @@ typedef unsigned int xfs_buf_flags_t;
> >  	{ XBF_UNMAPPED,		"UNMAPPED" },	/* ditto */\
> >  	{ _XBF_PAGES,		"PAGES" }, \
> >  	{ _XBF_KMEM,		"KMEM" }, \
> > -	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
> > +	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
> > +	{ _XBF_KMZ,             "KMEM_Z" }
> >  
> >  
> >  /*
> >
Bill O'Donnell Sept. 16, 2019, 9:32 p.m. UTC | #3
On Mon, Sep 16, 2019 at 04:24:40PM -0500, Eric Sandeen wrote:
> On 9/16/19 10:35 AM, Bill O'Donnell wrote:
> > Guarantee zeroed memory buffers for cases where potential memory
> > leak to disk can occur. In these cases, kmem_alloc is used and
> > doesn't zero the buffer, opening the possibility of information
> > leakage to disk.
> > 
> > Introduce a xfs_buf_flag, _XBF_KMZ, to indicate a request for a zeroed
> > buffer, and use existing infrastucture (xfs_buf_allocate_memory) to
> > obtain the already zeroed buffer from kernel memory.
> > 
> > This solution avoids the performance issue that would occur if a
> > wholesale change to replace kmem_alloc with kmem_zalloc was done.
> > 
> > Signed-off-by: Bill O'Donnell <billodo@redhat.com>
> 
> I think this can probably be further optimized by not obtaining zeroed
> memory when we're about to fill the buffer from disk as the very
> next step.

Yep. I missed that redundancy.

> 
> (in this case, xfs_buf_read_map calls xfs_buf_get_map and then immediately
> reads the buffer from disk with _xfs_buf_read)  xfs_buf_read_map adds
> XBF_READ to the flags during this process.
> 
> So I wonder if this can be simplified/optimized by just checking for XBF_READ
> in xfs_buf_allocate_memory's flags, and if it's not set, then request
> zeroed memory, because that indicates a buffer we'll be filling in from
> memory and subsequently writing to disk.

nod.

> 
> -Eric
> 
> > ---
> >  fs/xfs/xfs_buf.c | 8 ++++++--
> >  fs/xfs/xfs_buf.h | 4 +++-
> >  2 files changed, 9 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> > index 120ef99d09e8..916a3f782950 100644
> > --- a/fs/xfs/xfs_buf.c
> > +++ b/fs/xfs/xfs_buf.c
> > @@ -345,16 +345,19 @@ xfs_buf_allocate_memory(
> >  	unsigned short		page_count, i;
> >  	xfs_off_t		start, end;
> >  	int			error;
> > +	uint			kmflag_mask = 0;
> >  
> >  	/*
> >  	 * for buffers that are contained within a single page, just allocate
> >  	 * the memory from the heap - there's no need for the complexity of
> >  	 * page arrays to keep allocation down to order 0.
> >  	 */
> > +	if (flags & _XBF_KMZ)
> > +		kmflag_mask |= KM_ZERO;
> >  	size = BBTOB(bp->b_length);
> >  	if (size < PAGE_SIZE) {
> >  		int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
> > -		bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS);
> > +		bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS | kmflag_mask);
> >  		if (!bp->b_addr) {
> >  			/* low memory - use alloc_page loop instead */
> >  			goto use_alloc_page;
> > @@ -391,7 +394,7 @@ xfs_buf_allocate_memory(
> >  		struct page	*page;
> >  		uint		retries = 0;
> >  retry:
> > -		page = alloc_page(gfp_mask);
> > +		page = alloc_page(gfp_mask | kmflag_mask);
> >  		if (unlikely(page == NULL)) {
> >  			if (flags & XBF_READ_AHEAD) {
> >  				bp->b_page_count = i;
> > @@ -683,6 +686,7 @@ xfs_buf_get_map(
> >  	struct xfs_buf		*new_bp;
> >  	int			error = 0;
> >  
> > +	flags |= _XBF_KMZ;
> >  	error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
> >  
> >  	switch (error) {
> > diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> > index f6ce17d8d848..416ff588240a 100644
> > --- a/fs/xfs/xfs_buf.h
> > +++ b/fs/xfs/xfs_buf.h
> > @@ -38,6 +38,7 @@
> >  #define _XBF_PAGES	 (1 << 20)/* backed by refcounted pages */
> >  #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
> >  #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
> > +#define _XBF_KMZ	 (1 << 23)/* zeroed buffer required */
> >  
> >  typedef unsigned int xfs_buf_flags_t;
> >  
> > @@ -54,7 +55,8 @@ typedef unsigned int xfs_buf_flags_t;
> >  	{ XBF_UNMAPPED,		"UNMAPPED" },	/* ditto */\
> >  	{ _XBF_PAGES,		"PAGES" }, \
> >  	{ _XBF_KMEM,		"KMEM" }, \
> > -	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
> > +	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
> > +	{ _XBF_KMZ,             "KMEM_Z" }
> >  
> >  
> >  /*
> >
Dave Chinner Sept. 16, 2019, 9:54 p.m. UTC | #4
On Mon, Sep 16, 2019 at 10:35:04AM -0500, Bill O'Donnell wrote:
> Guarantee zeroed memory buffers for cases where potential memory
> leak to disk can occur. In these cases, kmem_alloc is used and
> doesn't zero the buffer, opening the possibility of information
> leakage to disk.
> 
> Introduce a xfs_buf_flag, _XBF_KMZ, to indicate a request for a zeroed
> buffer, and use existing infrastucture (xfs_buf_allocate_memory) to
> obtain the already zeroed buffer from kernel memory.
> 
> This solution avoids the performance issue that would occur if a
> wholesale change to replace kmem_alloc with kmem_zalloc was done.
> 
> Signed-off-by: Bill O'Donnell <billodo@redhat.com>
> ---
>  fs/xfs/xfs_buf.c | 8 ++++++--
>  fs/xfs/xfs_buf.h | 4 +++-
>  2 files changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 120ef99d09e8..916a3f782950 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -345,16 +345,19 @@ xfs_buf_allocate_memory(
>  	unsigned short		page_count, i;
>  	xfs_off_t		start, end;
>  	int			error;
> +	uint			kmflag_mask = 0;
>  
>  	/*
>  	 * for buffers that are contained within a single page, just allocate
>  	 * the memory from the heap - there's no need for the complexity of
>  	 * page arrays to keep allocation down to order 0.
>  	 */
> +	if (flags & _XBF_KMZ)
> +		kmflag_mask |= KM_ZERO;
>  	size = BBTOB(bp->b_length);
>  	if (size < PAGE_SIZE) {
>  		int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
> -		bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS);
> +		bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS | kmflag_mask);
>  		if (!bp->b_addr) {
>  			/* low memory - use alloc_page loop instead */
>  			goto use_alloc_page;
> @@ -391,7 +394,7 @@ xfs_buf_allocate_memory(
>  		struct page	*page;
>  		uint		retries = 0;
>  retry:
> -		page = alloc_page(gfp_mask);
> +		page = alloc_page(gfp_mask | kmflag_mask);
>  		if (unlikely(page == NULL)) {
>  			if (flags & XBF_READ_AHEAD) {
>  				bp->b_page_count = i;
> @@ -683,6 +686,7 @@ xfs_buf_get_map(
>  	struct xfs_buf		*new_bp;
>  	int			error = 0;
>  
> +	flags |= _XBF_KMZ;
>  	error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);

IIRC, this flag was supposed to go into xfs_trans_get_buf_map()
and direct callers of xfs_buf_get*() that weren't in the read path.
That avoids the need for zeroing pages that we are going to DMA
actual data into before it gets to users...

>  	switch (error) {
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index f6ce17d8d848..416ff588240a 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -38,6 +38,7 @@
>  #define _XBF_PAGES	 (1 << 20)/* backed by refcounted pages */
>  #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
>  #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
> +#define _XBF_KMZ	 (1 << 23)/* zeroed buffer required */

"KMZ" isn't very descriptive, and it shouldn't have a "_" prefix as
it's not internal to the buffer cache - it's a caller controlled
flag like XBF_TRYLOCK.

I'd suggest something like XBF_INIT_PAGES or XBF_ZERO to make it
clear we are asking for ithe buffer to be explicitly initialised
to zero.

Cheers,

Dave.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 120ef99d09e8..916a3f782950 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -345,16 +345,19 @@  xfs_buf_allocate_memory(
 	unsigned short		page_count, i;
 	xfs_off_t		start, end;
 	int			error;
+	uint			kmflag_mask = 0;
 
 	/*
 	 * for buffers that are contained within a single page, just allocate
 	 * the memory from the heap - there's no need for the complexity of
 	 * page arrays to keep allocation down to order 0.
 	 */
+	if (flags & _XBF_KMZ)
+		kmflag_mask |= KM_ZERO;
 	size = BBTOB(bp->b_length);
 	if (size < PAGE_SIZE) {
 		int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
-		bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS);
+		bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS | kmflag_mask);
 		if (!bp->b_addr) {
 			/* low memory - use alloc_page loop instead */
 			goto use_alloc_page;
@@ -391,7 +394,7 @@  xfs_buf_allocate_memory(
 		struct page	*page;
 		uint		retries = 0;
 retry:
-		page = alloc_page(gfp_mask);
+		page = alloc_page(gfp_mask | kmflag_mask);
 		if (unlikely(page == NULL)) {
 			if (flags & XBF_READ_AHEAD) {
 				bp->b_page_count = i;
@@ -683,6 +686,7 @@  xfs_buf_get_map(
 	struct xfs_buf		*new_bp;
 	int			error = 0;
 
+	flags |= _XBF_KMZ;
 	error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
 
 	switch (error) {
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index f6ce17d8d848..416ff588240a 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -38,6 +38,7 @@ 
 #define _XBF_PAGES	 (1 << 20)/* backed by refcounted pages */
 #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
 #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
+#define _XBF_KMZ	 (1 << 23)/* zeroed buffer required */
 
 typedef unsigned int xfs_buf_flags_t;
 
@@ -54,7 +55,8 @@  typedef unsigned int xfs_buf_flags_t;
 	{ XBF_UNMAPPED,		"UNMAPPED" },	/* ditto */\
 	{ _XBF_PAGES,		"PAGES" }, \
 	{ _XBF_KMEM,		"KMEM" }, \
-	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
+	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
+	{ _XBF_KMZ,             "KMEM_Z" }
 
 
 /*