diff mbox series

[v6,2/4] x86/modules: Increase randomization for modules

Message ID 1536874298-23492-3-git-send-email-rick.p.edgecombe@intel.com (mailing list archive)
State New, archived
Headers show
Series KASLR feature to randomize each loadable module | expand

Commit Message

Edgecombe, Rick P Sept. 13, 2018, 9:31 p.m. UTC
This changes the behavior of the KASLR logic for allocating memory for the text
sections of loadable modules. It randomizes the location of each module text
section with about 17 bits of entropy in typical use. This is enabled on X86_64
only. For 32 bit, the behavior is unchanged.

It refactors existing code around module randomization somewhat. There are now
three different behaviors for x86 module_alloc depending on config.
RANDOMIZE_BASE=n, and RANDOMIZE_BASE=y ARCH=x86_64, and RANDOMIZE_BASE=y
ARCH=i386. The refactor of the existing code is to try to clearly show what
those behaviors are without having three separate versions or threading the
behaviors in a bunch of little spots. The reason it is not enabled on 32 bit
yet is because the module space is much smaller and simulations haven't been
run to see how it performs.

The new algorithm breaks the module space in two, a random area and a backup
area. It first tries to allocate at a number of randomly located starting pages
inside the random section without purging any lazy free vmap areas and
triggering the associated TLB flush. If this fails, it will try again a number
of times allowing for purges if needed. It also saves any position that could
have succeeded if it was allowed to purge, which doubles the chances of finding
a spot that would fit. Finally if those both fail to find a position it will
allocate in the backup area. The backup area base will be offset in the same
way as the current algorithm does for the base area, 1024 possible locations.

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
 arch/x86/include/asm/pgtable_64_types.h |   7 ++
 arch/x86/kernel/module.c                | 165 +++++++++++++++++++++++++++-----
 2 files changed, 149 insertions(+), 23 deletions(-)

Comments

Kees Cook Sept. 21, 2018, 7:05 p.m. UTC | #1
On Thu, Sep 13, 2018 at 2:31 PM, Rick Edgecombe
<rick.p.edgecombe@intel.com> wrote:
> This changes the behavior of the KASLR logic for allocating memory for the text
> sections of loadable modules. It randomizes the location of each module text
> section with about 17 bits of entropy in typical use. This is enabled on X86_64
> only. For 32 bit, the behavior is unchanged.
>
> It refactors existing code around module randomization somewhat. There are now
> three different behaviors for x86 module_alloc depending on config.
> RANDOMIZE_BASE=n, and RANDOMIZE_BASE=y ARCH=x86_64, and RANDOMIZE_BASE=y
> ARCH=i386. The refactor of the existing code is to try to clearly show what
> those behaviors are without having three separate versions or threading the
> behaviors in a bunch of little spots. The reason it is not enabled on 32 bit
> yet is because the module space is much smaller and simulations haven't been
> run to see how it performs.
>
> The new algorithm breaks the module space in two, a random area and a backup
> area. It first tries to allocate at a number of randomly located starting pages
> inside the random section without purging any lazy free vmap areas and
> triggering the associated TLB flush. If this fails, it will try again a number
> of times allowing for purges if needed. It also saves any position that could
> have succeeded if it was allowed to purge, which doubles the chances of finding
> a spot that would fit. Finally if those both fail to find a position it will
> allocate in the backup area. The backup area base will be offset in the same
> way as the current algorithm does for the base area, 1024 possible locations.
>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>

I'm excited to get fine-grained module randomization. I think it's a
good first step to getting other fine-grained KASLR in other places.
Thanks for working on this!

> ---
>  arch/x86/include/asm/pgtable_64_types.h |   7 ++
>  arch/x86/kernel/module.c                | 165 +++++++++++++++++++++++++++-----
>  2 files changed, 149 insertions(+), 23 deletions(-)
>
> diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
> index 04edd2d..5e26369 100644
> --- a/arch/x86/include/asm/pgtable_64_types.h
> +++ b/arch/x86/include/asm/pgtable_64_types.h
> @@ -143,6 +143,13 @@ extern unsigned int ptrs_per_p4d;
>  #define MODULES_END            _AC(0xffffffffff000000, UL)
>  #define MODULES_LEN            (MODULES_END - MODULES_VADDR)
>
> +/*
> + * Dedicate the first part of the module space to a randomized area when KASLR
> + * is in use.  Leave the remaining part for a fallback if we are unable to
> + * allocate in the random area.
> + */
> +#define MODULES_RAND_LEN       PAGE_ALIGN((MODULES_LEN/3)*2)
> +
>  #define ESPFIX_PGD_ENTRY       _AC(-2, UL)
>  #define ESPFIX_BASE_ADDR       (ESPFIX_PGD_ENTRY << P4D_SHIFT)
>
> diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
> index f58336a..d50a0a0 100644
> --- a/arch/x86/kernel/module.c
> +++ b/arch/x86/kernel/module.c
> @@ -48,34 +48,151 @@ do {                                                       \
>  } while (0)
>  #endif
>
> -#ifdef CONFIG_RANDOMIZE_BASE
> +#if defined(CONFIG_X86_64) && defined(CONFIG_RANDOMIZE_BASE)
> +static inline unsigned long get_modules_rand_len(void)
> +{
> +       return MODULES_RAND_LEN;
> +}
> +#else
> +static inline unsigned long get_modules_rand_len(void)
> +{
> +       BUILD_BUG();
> +       return 0;
> +}
> +
> +inline bool kaslr_enabled(void);
> +#endif
> +
> +static inline int kaslr_randomize_each_module(void)
> +{
> +       return IS_ENABLED(CONFIG_RANDOMIZE_BASE)
> +               && IS_ENABLED(CONFIG_X86_64)
> +               && kaslr_enabled();
> +}
> +
> +static inline int kaslr_randomize_base(void)
> +{
> +       return IS_ENABLED(CONFIG_RANDOMIZE_BASE)
> +               && !IS_ENABLED(CONFIG_X86_64)
> +               && kaslr_enabled();
> +}
> +
>  static unsigned long module_load_offset;
> +static const unsigned long NR_NO_PURGE = 5000;
> +static const unsigned long NR_TRY_PURGE = 5000;
>
>  /* Mutex protects the module_load_offset. */
>  static DEFINE_MUTEX(module_kaslr_mutex);
>
>  static unsigned long int get_module_load_offset(void)
>  {
> -       if (kaslr_enabled()) {
> -               mutex_lock(&module_kaslr_mutex);
> -               /*
> -                * Calculate the module_load_offset the first time this
> -                * code is called. Once calculated it stays the same until
> -                * reboot.
> -                */
> -               if (module_load_offset == 0)
> -                       module_load_offset =
> -                               (get_random_int() % 1024 + 1) * PAGE_SIZE;
> -               mutex_unlock(&module_kaslr_mutex);
> -       }
> +       mutex_lock(&module_kaslr_mutex);
> +       /*
> +        * Calculate the module_load_offset the first time this
> +        * code is called. Once calculated it stays the same until
> +        * reboot.
> +        */
> +       if (module_load_offset == 0)
> +               module_load_offset = (get_random_int() % 1024 + 1) * PAGE_SIZE;
> +       mutex_unlock(&module_kaslr_mutex);
> +
>         return module_load_offset;
>  }
> -#else
> -static unsigned long int get_module_load_offset(void)
> +
> +static unsigned long get_module_vmalloc_start(void)
>  {
> -       return 0;
> +       if (kaslr_randomize_each_module())
> +               return MODULES_VADDR + get_modules_rand_len()
> +                                       + get_module_load_offset();
> +       else if (kaslr_randomize_base())
> +               return MODULES_VADDR + get_module_load_offset();
> +
> +       return MODULES_VADDR;
> +}

I would find this much more readable as:

static unsigned long get_module_vmalloc_start(void)
{
       unsigned long addr = MODULES_VADDR;

       if (kaslr_randomize_base())
              addr += get_module_load_offset();

       if (kaslr_randomize_each_module())
               addr += get_modules_rand_len();

       return addr;
}



> +
> +static void *try_module_alloc(unsigned long addr, unsigned long size,
> +                                       int try_purge)
> +{
> +       const unsigned long vm_flags = 0;
> +
> +       return __vmalloc_node_try_addr(addr, size, GFP_KERNEL, PAGE_KERNEL_EXEC,
> +                                       vm_flags, NUMA_NO_NODE, try_purge,
> +                                       __builtin_return_address(0));
> +}
> +
> +/*
> + * Find a random address to try that won't obviously not fit. Random areas are
> + * allowed to overflow into the backup area
> + */
> +static unsigned long get_rand_module_addr(unsigned long size)
> +{
> +       unsigned long nr_max_pos = (MODULES_LEN - size) / MODULE_ALIGN + 1;
> +       unsigned long nr_rnd_pos = get_modules_rand_len() / MODULE_ALIGN;
> +       unsigned long nr_pos = min(nr_max_pos, nr_rnd_pos);
> +
> +       unsigned long module_position_nr = get_random_long() % nr_pos;
> +       unsigned long offset = module_position_nr * MODULE_ALIGN;
> +
> +       return MODULES_VADDR + offset;
> +}
> +
> +/*
> + * Try to allocate in the random area. First 5000 times without purging, then
> + * 5000 times with purging. If these fail, return NULL.
> + */
> +static void *try_module_randomize_each(unsigned long size)
> +{
> +       void *p = NULL;
> +       unsigned int i;
> +       unsigned long last_lazy_free_blocked = 0;
> +
> +       /* This will have a guard page */
> +       unsigned long va_size = PAGE_ALIGN(size) + PAGE_SIZE;
> +
> +       if (!kaslr_randomize_each_module())
> +               return NULL;
> +
> +       /* Make sure there is at least one address that might fit. */
> +       if (va_size < PAGE_ALIGN(size) || va_size > MODULES_LEN)
> +               return NULL;
> +
> +       /* Try to find a spot that doesn't need a lazy purge */
> +       for (i = 0; i < NR_NO_PURGE; i++) {
> +               unsigned long addr = get_rand_module_addr(va_size);
> +
> +               /* First try to avoid having to purge */
> +               p = try_module_alloc(addr, size, 0);
> +
> +               /*
> +                * Save the last value that was blocked by a
> +                * lazy purge area.
> +                */
> +               if (IS_ERR(p) && PTR_ERR(p) == -EUCLEAN)
> +                       last_lazy_free_blocked = addr;
> +               else if (!IS_ERR(p))
> +                       return p;
> +       }
> +
> +       /* Try the most recent spot that could be used after a lazy purge */
> +       if (last_lazy_free_blocked) {
> +               p = try_module_alloc(last_lazy_free_blocked, size, 1);
> +
> +               if (!IS_ERR(p))
> +                       return p;
> +       }
> +
> +       /* Look for more spots and allow lazy purges */
> +       for (i = 0; i < NR_TRY_PURGE; i++) {
> +               unsigned long addr = get_rand_module_addr(va_size);
> +
> +               /* Give up and allow for purges */
> +               p = try_module_alloc(addr, size, 1);
> +
> +               if (!IS_ERR(p))
> +                       return p;
> +       }
> +       return NULL;
>  }
> -#endif
>
>  void *module_alloc(unsigned long size)
>  {
> @@ -84,16 +201,18 @@ void *module_alloc(unsigned long size)
>         if (PAGE_ALIGN(size) > MODULES_LEN)
>                 return NULL;
>
> -       p = __vmalloc_node_range(size, MODULE_ALIGN,
> -                                   MODULES_VADDR + get_module_load_offset(),
> -                                   MODULES_END, GFP_KERNEL,
> -                                   PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
> -                                   __builtin_return_address(0));
> +       p = try_module_randomize_each(size);
> +
> +       if (!p)
> +               p = __vmalloc_node_range(size, MODULE_ALIGN,
> +                               get_module_vmalloc_start(), MODULES_END,
> +                               GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
> +                               NUMA_NO_NODE, __builtin_return_address(0));

Instead of having two open-coded __vmalloc_node_range() calls left in
this after the change, can this be done in terms of a call to
try_module_alloc() instead? I see they're slightly different, but it
might be nice for making the two paths share more code.

> +
>         if (p && (kasan_module_alloc(p, size) < 0)) {
>                 vfree(p);
>                 return NULL;
>         }
> -
>         return p;
>  }
>
> --
> 2.7.4
>

Looks promising!

-Kees
Edgecombe, Rick P Sept. 24, 2018, 6:57 p.m. UTC | #2
On Fri, 2018-09-21 at 12:05 -0700, Kees Cook wrote:
> On Thu, Sep 13, 2018 at 2:31 PM, Rick Edgecombe
> <rick.p.edgecombe@intel.com> wrote:
> I would find this much more readable as:
> static unsigned long get_module_vmalloc_start(void)
> {
>        unsigned long addr = MODULES_VADDR;
> 
>        if (kaslr_randomize_base())
>               addr += get_module_load_offset();
> 
>        if (kaslr_randomize_each_module())
>                addr += get_modules_rand_len();
> 
>        return addr;
> }
Thanks, that looks better.

> 
> >  void *module_alloc(unsigned long size)
> >  {
> > @@ -84,16 +201,18 @@ void *module_alloc(unsigned long size)
> >         if (PAGE_ALIGN(size) > MODULES_LEN)
> >                 return NULL;
> > 
> > -       p = __vmalloc_node_range(size, MODULE_ALIGN,
> > -                                   MODULES_VADDR +
> > get_module_load_offset(),
> > -                                   MODULES_END, GFP_KERNEL,
> > -                                   PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
> > -                                   __builtin_return_address(0));
> > +       p = try_module_randomize_each(size);
> > +
> > +       if (!p)
> > +               p = __vmalloc_node_range(size, MODULE_ALIGN,
> > +                               get_module_vmalloc_start(), MODULES_END,
> > +                               GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
> > +                               NUMA_NO_NODE, __builtin_return_address(0));
> Instead of having two open-coded __vmalloc_node_range() calls left in
> this after the change, can this be done in terms of a call to
> try_module_alloc() instead? I see they're slightly different, but it
> might be nice for making the two paths share more code.
Not sure what you mean. Across the whole change, there is one call
to __vmalloc_node_range, and one to __vmalloc_node_try_addr.
Kees Cook Sept. 24, 2018, 7:58 p.m. UTC | #3
On Mon, Sep 24, 2018 at 11:57 AM, Edgecombe, Rick P
<rick.p.edgecombe@intel.com> wrote:
> On Fri, 2018-09-21 at 12:05 -0700, Kees Cook wrote:
>> On Thu, Sep 13, 2018 at 2:31 PM, Rick Edgecombe
>> <rick.p.edgecombe@intel.com> wrote:
>> I would find this much more readable as:
>> static unsigned long get_module_vmalloc_start(void)
>> {
>>        unsigned long addr = MODULES_VADDR;
>>
>>        if (kaslr_randomize_base())
>>               addr += get_module_load_offset();
>>
>>        if (kaslr_randomize_each_module())
>>                addr += get_modules_rand_len();
>>
>>        return addr;
>> }
> Thanks, that looks better.
>
>>
>> >  void *module_alloc(unsigned long size)
>> >  {
>> > @@ -84,16 +201,18 @@ void *module_alloc(unsigned long size)
>> >         if (PAGE_ALIGN(size) > MODULES_LEN)
>> >                 return NULL;
>> >
>> > -       p = __vmalloc_node_range(size, MODULE_ALIGN,
>> > -                                   MODULES_VADDR +
>> > get_module_load_offset(),
>> > -                                   MODULES_END, GFP_KERNEL,
>> > -                                   PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
>> > -                                   __builtin_return_address(0));
>> > +       p = try_module_randomize_each(size);
>> > +
>> > +       if (!p)
>> > +               p = __vmalloc_node_range(size, MODULE_ALIGN,
>> > +                               get_module_vmalloc_start(), MODULES_END,
>> > +                               GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
>> > +                               NUMA_NO_NODE, __builtin_return_address(0));
>> Instead of having two open-coded __vmalloc_node_range() calls left in
>> this after the change, can this be done in terms of a call to
>> try_module_alloc() instead? I see they're slightly different, but it
>> might be nice for making the two paths share more code.
> Not sure what you mean. Across the whole change, there is one call
> to __vmalloc_node_range, and one to __vmalloc_node_try_addr.

I guess I meant the vmalloc calls -- one for node_range and one for
node_try_addr. I was wondering if the logic could be combined in some
way so that the __vmalloc_node_range() could be made in terms of the
the helper that try_module_randomize_each() uses. But this could just
be me hoping for nice-to-read changes. ;)

-Kees
Edgecombe, Rick P Sept. 24, 2018, 9:27 p.m. UTC | #4
On Mon, 2018-09-24 at 12:58 -0700, Kees Cook wrote:
> On Mon, Sep 24, 2018 at 11:57 AM, Edgecombe, Rick P
> <rick.p.edgecombe@intel.com> wrote:
> > > Instead of having two open-coded __vmalloc_node_range() calls left in
> > > this after the change, can this be done in terms of a call to
> > > try_module_alloc() instead? I see they're slightly different, but it
> > > might be nice for making the two paths share more code.
> > Not sure what you mean. Across the whole change, there is one call
> > to __vmalloc_node_range, and one to __vmalloc_node_try_addr.
> I guess I meant the vmalloc calls -- one for node_range and one for
> node_try_addr. I was wondering if the logic could be combined in some
> way so that the __vmalloc_node_range() could be made in terms of the
> the helper that try_module_randomize_each() uses. But this could just
> be me hoping for nice-to-read changes. ;)
> 
> -Kees
One thing I had been considering was to move the whole "try random locations,
then use backup" logic to vmalloc.c, and just have parameters for random area
size, number of tries, etc. This way it could be possibly be re-used for other
architectures for modules. Also on our list is to look at randomizing vmalloc
space (especially stacks), which may or may not involve using a similar method.

So maybe bit pre-mature refactoring, but would also clean up the code in
module.c. Do you think it would be worth it?

Thanks,

Rick
Kees Cook Sept. 24, 2018, 9:29 p.m. UTC | #5
On Mon, Sep 24, 2018 at 2:27 PM, Edgecombe, Rick P
<rick.p.edgecombe@intel.com> wrote:
> On Mon, 2018-09-24 at 12:58 -0700, Kees Cook wrote:
>> On Mon, Sep 24, 2018 at 11:57 AM, Edgecombe, Rick P
>> <rick.p.edgecombe@intel.com> wrote:
>> > > Instead of having two open-coded __vmalloc_node_range() calls left in
>> > > this after the change, can this be done in terms of a call to
>> > > try_module_alloc() instead? I see they're slightly different, but it
>> > > might be nice for making the two paths share more code.
>> > Not sure what you mean. Across the whole change, there is one call
>> > to __vmalloc_node_range, and one to __vmalloc_node_try_addr.
>> I guess I meant the vmalloc calls -- one for node_range and one for
>> node_try_addr. I was wondering if the logic could be combined in some
>> way so that the __vmalloc_node_range() could be made in terms of the
>> the helper that try_module_randomize_each() uses. But this could just
>> be me hoping for nice-to-read changes. ;)
>>
>> -Kees
> One thing I had been considering was to move the whole "try random locations,
> then use backup" logic to vmalloc.c, and just have parameters for random area
> size, number of tries, etc. This way it could be possibly be re-used for other
> architectures for modules. Also on our list is to look at randomizing vmalloc
> space (especially stacks), which may or may not involve using a similar method.
>
> So maybe bit pre-mature refactoring, but would also clean up the code in
> module.c. Do you think it would be worth it?

I'd love to hear thoughts from -mm folks. Andrew, Matthew?

-Kees
diff mbox series

Patch

diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 04edd2d..5e26369 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -143,6 +143,13 @@  extern unsigned int ptrs_per_p4d;
 #define MODULES_END		_AC(0xffffffffff000000, UL)
 #define MODULES_LEN		(MODULES_END - MODULES_VADDR)
 
+/*
+ * Dedicate the first part of the module space to a randomized area when KASLR
+ * is in use.  Leave the remaining part for a fallback if we are unable to
+ * allocate in the random area.
+ */
+#define MODULES_RAND_LEN	PAGE_ALIGN((MODULES_LEN/3)*2)
+
 #define ESPFIX_PGD_ENTRY	_AC(-2, UL)
 #define ESPFIX_BASE_ADDR	(ESPFIX_PGD_ENTRY << P4D_SHIFT)
 
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index f58336a..d50a0a0 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -48,34 +48,151 @@  do {							\
 } while (0)
 #endif
 
-#ifdef CONFIG_RANDOMIZE_BASE
+#if defined(CONFIG_X86_64) && defined(CONFIG_RANDOMIZE_BASE)
+static inline unsigned long get_modules_rand_len(void)
+{
+	return MODULES_RAND_LEN;
+}
+#else
+static inline unsigned long get_modules_rand_len(void)
+{
+	BUILD_BUG();
+	return 0;
+}
+
+inline bool kaslr_enabled(void);
+#endif
+
+static inline int kaslr_randomize_each_module(void)
+{
+	return IS_ENABLED(CONFIG_RANDOMIZE_BASE)
+		&& IS_ENABLED(CONFIG_X86_64)
+		&& kaslr_enabled();
+}
+
+static inline int kaslr_randomize_base(void)
+{
+	return IS_ENABLED(CONFIG_RANDOMIZE_BASE)
+		&& !IS_ENABLED(CONFIG_X86_64)
+		&& kaslr_enabled();
+}
+
 static unsigned long module_load_offset;
+static const unsigned long NR_NO_PURGE = 5000;
+static const unsigned long NR_TRY_PURGE = 5000;
 
 /* Mutex protects the module_load_offset. */
 static DEFINE_MUTEX(module_kaslr_mutex);
 
 static unsigned long int get_module_load_offset(void)
 {
-	if (kaslr_enabled()) {
-		mutex_lock(&module_kaslr_mutex);
-		/*
-		 * Calculate the module_load_offset the first time this
-		 * code is called. Once calculated it stays the same until
-		 * reboot.
-		 */
-		if (module_load_offset == 0)
-			module_load_offset =
-				(get_random_int() % 1024 + 1) * PAGE_SIZE;
-		mutex_unlock(&module_kaslr_mutex);
-	}
+	mutex_lock(&module_kaslr_mutex);
+	/*
+	 * Calculate the module_load_offset the first time this
+	 * code is called. Once calculated it stays the same until
+	 * reboot.
+	 */
+	if (module_load_offset == 0)
+		module_load_offset = (get_random_int() % 1024 + 1) * PAGE_SIZE;
+	mutex_unlock(&module_kaslr_mutex);
+
 	return module_load_offset;
 }
-#else
-static unsigned long int get_module_load_offset(void)
+
+static unsigned long get_module_vmalloc_start(void)
 {
-	return 0;
+	if (kaslr_randomize_each_module())
+		return MODULES_VADDR + get_modules_rand_len()
+					+ get_module_load_offset();
+	else if (kaslr_randomize_base())
+		return MODULES_VADDR + get_module_load_offset();
+
+	return MODULES_VADDR;
+}
+
+static void *try_module_alloc(unsigned long addr, unsigned long size,
+					int try_purge)
+{
+	const unsigned long vm_flags = 0;
+
+	return __vmalloc_node_try_addr(addr, size, GFP_KERNEL, PAGE_KERNEL_EXEC,
+					vm_flags, NUMA_NO_NODE, try_purge,
+					__builtin_return_address(0));
+}
+
+/*
+ * Find a random address to try that won't obviously not fit. Random areas are
+ * allowed to overflow into the backup area
+ */
+static unsigned long get_rand_module_addr(unsigned long size)
+{
+	unsigned long nr_max_pos = (MODULES_LEN - size) / MODULE_ALIGN + 1;
+	unsigned long nr_rnd_pos = get_modules_rand_len() / MODULE_ALIGN;
+	unsigned long nr_pos = min(nr_max_pos, nr_rnd_pos);
+
+	unsigned long module_position_nr = get_random_long() % nr_pos;
+	unsigned long offset = module_position_nr * MODULE_ALIGN;
+
+	return MODULES_VADDR + offset;
+}
+
+/*
+ * Try to allocate in the random area. First 5000 times without purging, then
+ * 5000 times with purging. If these fail, return NULL.
+ */
+static void *try_module_randomize_each(unsigned long size)
+{
+	void *p = NULL;
+	unsigned int i;
+	unsigned long last_lazy_free_blocked = 0;
+
+	/* This will have a guard page */
+	unsigned long va_size = PAGE_ALIGN(size) + PAGE_SIZE;
+
+	if (!kaslr_randomize_each_module())
+		return NULL;
+
+	/* Make sure there is at least one address that might fit. */
+	if (va_size < PAGE_ALIGN(size) || va_size > MODULES_LEN)
+		return NULL;
+
+	/* Try to find a spot that doesn't need a lazy purge */
+	for (i = 0; i < NR_NO_PURGE; i++) {
+		unsigned long addr = get_rand_module_addr(va_size);
+
+		/* First try to avoid having to purge */
+		p = try_module_alloc(addr, size, 0);
+
+		/*
+		 * Save the last value that was blocked by a
+		 * lazy purge area.
+		 */
+		if (IS_ERR(p) && PTR_ERR(p) == -EUCLEAN)
+			last_lazy_free_blocked = addr;
+		else if (!IS_ERR(p))
+			return p;
+	}
+
+	/* Try the most recent spot that could be used after a lazy purge */
+	if (last_lazy_free_blocked) {
+		p = try_module_alloc(last_lazy_free_blocked, size, 1);
+
+		if (!IS_ERR(p))
+			return p;
+	}
+
+	/* Look for more spots and allow lazy purges */
+	for (i = 0; i < NR_TRY_PURGE; i++) {
+		unsigned long addr = get_rand_module_addr(va_size);
+
+		/* Give up and allow for purges */
+		p = try_module_alloc(addr, size, 1);
+
+		if (!IS_ERR(p))
+			return p;
+	}
+	return NULL;
 }
-#endif
 
 void *module_alloc(unsigned long size)
 {
@@ -84,16 +201,18 @@  void *module_alloc(unsigned long size)
 	if (PAGE_ALIGN(size) > MODULES_LEN)
 		return NULL;
 
-	p = __vmalloc_node_range(size, MODULE_ALIGN,
-				    MODULES_VADDR + get_module_load_offset(),
-				    MODULES_END, GFP_KERNEL,
-				    PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
-				    __builtin_return_address(0));
+	p = try_module_randomize_each(size);
+
+	if (!p)
+		p = __vmalloc_node_range(size, MODULE_ALIGN,
+				get_module_vmalloc_start(), MODULES_END,
+				GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+				NUMA_NO_NODE, __builtin_return_address(0));
+
 	if (p && (kasan_module_alloc(p, size) < 0)) {
 		vfree(p);
 		return NULL;
 	}
-
 	return p;
 }