diff mbox series

[RFC,5/8] mm: Take placement mappings gap into account

Message ID 20240215231332.1556787-6-rick.p.edgecombe@intel.com (mailing list archive)
State New
Headers show
Series Cover a guard gap corner case | expand

Commit Message

Edgecombe, Rick P Feb. 15, 2024, 11:13 p.m. UTC
When memory is being placed, mmap() will take care to respect the guard
gaps of certain types of memory (VM_SHADOWSTACK, VM_GROWSUP and
VM_GROWSDOWN). In order to ensure guard gaps between mappings, mmap()
needs to consider two things:
 1. That the new mapping isn’t placed in an any existing mappings guard
    gaps.
 2. That the new mapping isn’t placed such that any existing mappings
    are not in *its* guard gaps.

The long standing behavior of mmap() is to ensure 1, but not take any care
around 2. So for example, if there is a PAGE_SIZE free area, and a
mmap() with a PAGE_SIZE size, and a type that has a guard gap is being
placed, mmap() may place the shadow stack in the PAGE_SIZE free area. Then
the mapping that is supposed to have a guard gap will not have a gap to
the adjacent VMA.

For MAP_GROWSDOWN/VM_GROWSDOWN and MAP_GROWSUP/VM_GROWSUP this has not
been a problem in practice because applications place these kinds of
mappings very early, when there is not many mappings to find a space
between. But for shadow stacks, they may be placed throughout the lifetime
of the application.

So define a VM_UNMAPPED_START_GAP_SET flag to specify that a start_gap
field has been set, as most vm_unmapped_area_info structs are not zeroed,
so the added field will often contain garbage. Use
VM_UNMAPPED_START_GAP_SET in unmapped_area/_topdown() to find a space that
includes the guard gap for the new mapping. Take care to not interfere
with the alignment.

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
 include/linux/mm.h |  2 ++
 mm/mmap.c          | 21 ++++++++++++++-------
 2 files changed, 16 insertions(+), 7 deletions(-)

Comments

Kirill A . Shutemov Feb. 16, 2024, 1:12 p.m. UTC | #1
On Thu, Feb 15, 2024 at 03:13:29PM -0800, Rick Edgecombe wrote:
> When memory is being placed, mmap() will take care to respect the guard
> gaps of certain types of memory (VM_SHADOWSTACK, VM_GROWSUP and
> VM_GROWSDOWN). In order to ensure guard gaps between mappings, mmap()
> needs to consider two things:
>  1. That the new mapping isn’t placed in an any existing mappings guard
>     gaps.
>  2. That the new mapping isn’t placed such that any existing mappings
>     are not in *its* guard gaps.
> 
> The long standing behavior of mmap() is to ensure 1, but not take any care
> around 2. So for example, if there is a PAGE_SIZE free area, and a
> mmap() with a PAGE_SIZE size, and a type that has a guard gap is being
> placed, mmap() may place the shadow stack in the PAGE_SIZE free area. Then
> the mapping that is supposed to have a guard gap will not have a gap to
> the adjacent VMA.
> 
> For MAP_GROWSDOWN/VM_GROWSDOWN and MAP_GROWSUP/VM_GROWSUP this has not
> been a problem in practice because applications place these kinds of
> mappings very early, when there is not many mappings to find a space
> between. But for shadow stacks, they may be placed throughout the lifetime
> of the application.
> 
> So define a VM_UNMAPPED_START_GAP_SET flag to specify that a start_gap
> field has been set, as most vm_unmapped_area_info structs are not zeroed,
> so the added field will often contain garbage. Use
> VM_UNMAPPED_START_GAP_SET in unmapped_area/_topdown() to find a space that
> includes the guard gap for the new mapping. Take care to not interfere
> with the alignment.
> 
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> ---
>  include/linux/mm.h |  2 ++
>  mm/mmap.c          | 21 ++++++++++++++-------
>  2 files changed, 16 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 9addf16dbf18..160bb6db7a16 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3393,12 +3393,14 @@ extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
>  
>  struct vm_unmapped_area_info {
>  #define VM_UNMAPPED_AREA_TOPDOWN 1
> +#define VM_UNMAPPED_START_GAP_SET 2

The flag seems to be an workaround not to clear the structure. I think
users need to be updated to clear the structure. In most cases rework code
to use C99 struct initializer would do the trick.

>  	unsigned long flags;
>  	unsigned long length;
>  	unsigned long low_limit;
>  	unsigned long high_limit;
>  	unsigned long align_mask;
>  	unsigned long align_offset;
> +	unsigned long start_gap;
>  };
>  
>  extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 936d728ba1ca..1b6c333656f9 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1567,14 +1567,17 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
>   */
>  static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
>  {
> -	unsigned long length, gap;
> +	unsigned long length, gap, start_gap = 0;
>  	unsigned long low_limit, high_limit;
>  	struct vm_area_struct *tmp;
>  
>  	MA_STATE(mas, &current->mm->mm_mt, 0, 0);
>  
> +	if (info->flags & VM_UNMAPPED_START_GAP_SET)
> +		start_gap = info->start_gap;
> +
>  	/* Adjust search length to account for worst case alignment overhead */
> -	length = info->length + info->align_mask;
> +	length = info->length + info->align_mask + start_gap;
>  	if (length < info->length)
>  		return -ENOMEM;
>  
> @@ -1586,7 +1589,7 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
>  	if (mas_empty_area(&mas, low_limit, high_limit - 1, length))
>  		return -ENOMEM;
>  
> -	gap = mas.index;
> +	gap = mas.index + start_gap;
>  	gap += (info->align_offset - gap) & info->align_mask;

Do we care to check if alignment itself would satisfy start_gap
requirement?

>  	tmp = mas_next(&mas, ULONG_MAX);
>  	if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
> @@ -1619,13 +1622,17 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
>   */
>  static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
>  {
> -	unsigned long length, gap, gap_end;
> +	unsigned long length, gap, gap_end, start_gap = 0;
>  	unsigned long low_limit, high_limit;
>  	struct vm_area_struct *tmp;
>  
>  	MA_STATE(mas, &current->mm->mm_mt, 0, 0);
> +
> +	if (info->flags & VM_UNMAPPED_START_GAP_SET)
> +		start_gap = info->start_gap;
> +
>  	/* Adjust search length to account for worst case alignment overhead */
> -	length = info->length + info->align_mask;
> +	length = info->length + info->align_mask + start_gap;
>  	if (length < info->length)
>  		return -ENOMEM;
>  
> @@ -1832,7 +1839,7 @@ unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm, struct file *fi
>  
>  unsigned long
>  __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
> -		unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)
> +		    unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)

Unrelated space change.

>  {
>  	unsigned long (*get_area)(struct file *, unsigned long,
>  				  unsigned long, unsigned long, unsigned long)
> @@ -1883,7 +1890,7 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
>  
>  unsigned long
>  get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
> -		unsigned long pgoff, unsigned long flags)
> +		  unsigned long pgoff, unsigned long flags)

Ditto.

>  {
>  	return __get_unmapped_area(file, addr, len, pgoff, flags, 0);
>  }
> -- 
> 2.34.1
>
Edgecombe, Rick P Feb. 17, 2024, 1:11 a.m. UTC | #2
On Fri, 2024-02-16 at 15:12 +0200, Kirill A. Shutemov wrote:
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 9addf16dbf18..160bb6db7a16 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -3393,12 +3393,14 @@ extern unsigned long __must_check
> > vm_mmap(struct file *, unsigned long,
> >   
> >   struct vm_unmapped_area_info {
> >   #define VM_UNMAPPED_AREA_TOPDOWN 1
> > +#define VM_UNMAPPED_START_GAP_SET 2
> 
> The flag seems to be an workaround not to clear the structure. I
> think
> users need to be updated to clear the structure. In most cases rework
> code
> to use C99 struct initializer would do the trick.

Yea, it's just a treewide change to initialize them all. It should be
easy to review at least. I'll add a patch to do this.

> > @@ -1586,7 +1589,7 @@ static unsigned long unmapped_area(struct
> > vm_unmapped_area_info *info)
> >         if (mas_empty_area(&mas, low_limit, high_limit - 1,
> > length))
> >                 return -ENOMEM;
> >   
> > -       gap = mas.index;
> > +       gap = mas.index + start_gap;
> >         gap += (info->align_offset - gap) & info->align_mask;
> 
> Do we care to check if alignment itself would satisfy start_gap
> requirement?

Ugh, I think actually the alignment stuff clobbers the guard gap in the
search up scenario. I'm also seeing some weird results as I throw test
values into the existing logic, but very likely I just need to look at
this not late on a Friday. Thanks for pointing it out.


> >   unsigned long
> >   __get_unmapped_area(struct file *file, unsigned long addr,
> > unsigned long len,
> > -               unsigned long pgoff, unsigned long flags,
> > vm_flags_t vm_flags)
> > +                   unsigned long pgoff, unsigned long flags,
> > vm_flags_t vm_flags)
> 
> Unrelated space change.

Sure.
Edgecombe, Rick P Feb. 20, 2024, 4:48 p.m. UTC | #3
On Fri, 2024-02-16 at 17:11 -0800, Rick Edgecombe wrote:
> > Do we care to check if alignment itself would satisfy start_gap
> > requirement?
> 
> Ugh, I think actually the alignment stuff clobbers the guard gap in
> the
> search up scenario. I'm also seeing some weird results as I throw
> test
> values into the existing logic, but very likely I just need to look
> at
> this not late on a Friday. Thanks for pointing it out.

Ok, playing around with the address adjustment math in a separate test
program, I think it is all ok functionally. But there are two gotchas:

1. The existing math for search up assumes that the requested length is
bigger than the alignment mask. If the length is smaller, non-
cannonical addresses can result (more than ->high_limit). I don't think
any callers can call with this combination so it's fine functionally.

2. The newly added code can only hit the scenario you highlight if the
start gap is more than the alignment size. If alignment mask is more
than the start gap, the alignment will only shift the address more than
the adjustment made for the start gap.

So if it skips the start gap adjustment in the case of alignment adding
the necessary gap it won't change the result and just add a branch.
Similarly, if the start gap fulfills the alignment, there is no
adjustment during the alignment step.


I think maybe I'll add a comment covering both gotchas and leave the
logic as is, unless there are any objections. Or maybe a VM_WARN_ON,
hmm.
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9addf16dbf18..160bb6db7a16 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3393,12 +3393,14 @@  extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
 
 struct vm_unmapped_area_info {
 #define VM_UNMAPPED_AREA_TOPDOWN 1
+#define VM_UNMAPPED_START_GAP_SET 2
 	unsigned long flags;
 	unsigned long length;
 	unsigned long low_limit;
 	unsigned long high_limit;
 	unsigned long align_mask;
 	unsigned long align_offset;
+	unsigned long start_gap;
 };
 
 extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);
diff --git a/mm/mmap.c b/mm/mmap.c
index 936d728ba1ca..1b6c333656f9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1567,14 +1567,17 @@  static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
  */
 static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 {
-	unsigned long length, gap;
+	unsigned long length, gap, start_gap = 0;
 	unsigned long low_limit, high_limit;
 	struct vm_area_struct *tmp;
 
 	MA_STATE(mas, &current->mm->mm_mt, 0, 0);
 
+	if (info->flags & VM_UNMAPPED_START_GAP_SET)
+		start_gap = info->start_gap;
+
 	/* Adjust search length to account for worst case alignment overhead */
-	length = info->length + info->align_mask;
+	length = info->length + info->align_mask + start_gap;
 	if (length < info->length)
 		return -ENOMEM;
 
@@ -1586,7 +1589,7 @@  static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 	if (mas_empty_area(&mas, low_limit, high_limit - 1, length))
 		return -ENOMEM;
 
-	gap = mas.index;
+	gap = mas.index + start_gap;
 	gap += (info->align_offset - gap) & info->align_mask;
 	tmp = mas_next(&mas, ULONG_MAX);
 	if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
@@ -1619,13 +1622,17 @@  static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
  */
 static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 {
-	unsigned long length, gap, gap_end;
+	unsigned long length, gap, gap_end, start_gap = 0;
 	unsigned long low_limit, high_limit;
 	struct vm_area_struct *tmp;
 
 	MA_STATE(mas, &current->mm->mm_mt, 0, 0);
+
+	if (info->flags & VM_UNMAPPED_START_GAP_SET)
+		start_gap = info->start_gap;
+
 	/* Adjust search length to account for worst case alignment overhead */
-	length = info->length + info->align_mask;
+	length = info->length + info->align_mask + start_gap;
 	if (length < info->length)
 		return -ENOMEM;
 
@@ -1832,7 +1839,7 @@  unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm, struct file *fi
 
 unsigned long
 __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
-		unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)
+		    unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)
 {
 	unsigned long (*get_area)(struct file *, unsigned long,
 				  unsigned long, unsigned long, unsigned long)
@@ -1883,7 +1890,7 @@  __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 
 unsigned long
 get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
-		unsigned long pgoff, unsigned long flags)
+		  unsigned long pgoff, unsigned long flags)
 {
 	return __get_unmapped_area(file, addr, len, pgoff, flags, 0);
 }