diff mbox series

[RFC,v2,1/2] uaccess: Check no rescheduling function is called in unsafe region

Message ID 1543845318-24543-2-git-send-email-julien.thierry@arm.com (mailing list archive)
State RFC
Headers show
Series uaccess: Add unsafe accessors for arm64 | expand

Commit Message

Julien Thierry Dec. 3, 2018, 1:55 p.m. UTC
While running a user_access regions, it is not supported to reschedule.
Add an overridable primitive to indicate whether a user_access region is
active and check that this is not the case when calling rescheduling
functions.

Also, add a comment clarifying the behaviour of user_access regions.

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
---
 include/linux/kernel.h  |  6 ++++--
 include/linux/uaccess.h | 11 +++++++++++
 kernel/sched/core.c     | 19 +++++++++++++++++++
 3 files changed, 34 insertions(+), 2 deletions(-)

I'm not sure these are the best locations to check this but I was hoping
this patch could start the discussion.

Should I move the check? Should I add a config option to conditionally
build those checks?

--
1.9.1

Comments

Valentin Schneider Jan. 14, 2019, 12:03 p.m. UTC | #1
Hi,

On 03/12/2018 13:55, Julien Thierry wrote:
> While running a user_access regions, it is not supported to reschedule.
> Add an overridable primitive to indicate whether a user_access region is
> active and check that this is not the case when calling rescheduling
> functions.
> 
> Also, add a comment clarifying the behaviour of user_access regions.
> 
> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> ---
>  include/linux/kernel.h  |  6 ++++--
>  include/linux/uaccess.h | 11 +++++++++++
>  kernel/sched/core.c     | 19 +++++++++++++++++++
>  3 files changed, 34 insertions(+), 2 deletions(-)
> 
> I'm not sure these are the best locations to check this but I was hoping
> this patch could start the discussion.
> 
> Should I move the check? Should I add a config option to conditionally
> build those checks?
> 

I was going to say it's already under DEBUG_ATOMIC_SLEEP, but that's only
true for the __might_sleep() bit actually.

I think it'd make sense to blanket that under a config, but using
DEBUG_ATOMIC_SLEEP for that is a bit too much. What about a
DEBUG_UACCESS_SLEEP?

> diff --git a/include/linux/kernel.h b/include/linux/kernel.h
> index d6aac75..fe0e984 100644
> --- a/include/linux/kernel.h
> +++ b/include/linux/kernel.h
> @@ -237,11 +237,13 @@
>  struct pt_regs;
>  struct user;
> 
> +extern void __might_resched(const char *file, int line);
>  #ifdef CONFIG_PREEMPT_VOLUNTARY
>  extern int _cond_resched(void);
> -# define might_resched() _cond_resched()
> +# define might_resched() \
> +	do { __might_resched(__FILE__, __LINE__); _cond_resched(); } while (0)
>  #else
> -# define might_resched() do { } while (0)
> +# define might_resched() __might_resched(__FILE__, __LINE__)>  #endif
> 
>  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> index efe79c1..50adb84 100644
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -266,6 +266,13 @@ static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
>  #define probe_kernel_address(addr, retval)		\
>  	probe_kernel_read(&retval, addr, sizeof(retval))
> 
> +/*
> + * user_access_begin() and user_access_end() define a region where
> + * unsafe user accessors can be used.
> + * During execution of this region, no sleeping functions should be called.
> + * Exceptions and interrupt shall exit the user_access region and re-enter it
> + * when returning to the interrupted context.
> + */

I would first have the bit about exceptions, then mention sleeping and add
something along the lines of

"[...] no sleeping functions should be called - we rely on exception
handling to take care of the user_access status for us, but that doesn't
happen when directly calling schedule()."

My wording's not the best but I just want something to point out *why*
sleeping ain't okay.

>  #ifndef user_access_begin
>  #define user_access_begin() do { } while (0)
>  #define user_access_end() do { } while (0)
> @@ -273,6 +280,10 @@ static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
>  #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
>  #endif
> 
> +#ifndef unsafe_user_region_active
> +#define unsafe_user_region_active()	false
> +#endif
> +
>  #ifdef CONFIG_HARDENED_USERCOPY
>  void usercopy_warn(const char *name, const char *detail, bool to_user,
>  		   unsigned long offset, unsigned long len);
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 6fedf3a..03f53c8 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3289,6 +3289,13 @@ static inline void schedule_debug(struct task_struct *prev)
>  		__schedule_bug(prev);
>  		preempt_count_set(PREEMPT_DISABLED);
>  	}
> +
> +	if (unlikely(unsafe_user_region_active())) {
> +		printk(KERN_ERR "BUG: scheduling while user_access enabled: %s/%d/0x%08x\n",
> +		       prev->comm, prev->pid, preempt_count());
> +		dump_stack();
> +	}
> +
>  	rcu_sleep_check();
> 
>  	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
> @@ -6151,6 +6158,18 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
>  EXPORT_SYMBOL(___might_sleep);
>  #endif
> 
> +void __might_resched(const char *file, int line)
> +{
> +	if (!unsafe_user_region_active())
> +		return;
> +
> +	printk(KERN_ERR
> +		"BUG: rescheduling function called from user access context at %s:%d\n",
> +			file, line);
> +	dump_stack();
> +}

So this check is "careful, things might go bad" and the schedule_debug()
one is "things went bad". IIUC we'll always get this warning when we hit
the schedule_debug() one. I was going to suggest only keeping one of them,
but I think both hold value.

> +EXPORT_SYMBOL(__might_resched);
> +
>  #ifdef CONFIG_MAGIC_SYSRQ
>  void normalize_rt_tasks(void)
>  {
> --
> 1.9.1
>
Julien Thierry Jan. 15, 2019, 11:48 a.m. UTC | #2
Hi,

On 14/01/2019 12:03, Valentin Schneider wrote:
> Hi,
> 
> On 03/12/2018 13:55, Julien Thierry wrote:
>> While running a user_access regions, it is not supported to reschedule.
>> Add an overridable primitive to indicate whether a user_access region is
>> active and check that this is not the case when calling rescheduling
>> functions.
>>
>> Also, add a comment clarifying the behaviour of user_access regions.
>>
>> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
>> ---
>>  include/linux/kernel.h  |  6 ++++--
>>  include/linux/uaccess.h | 11 +++++++++++
>>  kernel/sched/core.c     | 19 +++++++++++++++++++
>>  3 files changed, 34 insertions(+), 2 deletions(-)
>>
>> I'm not sure these are the best locations to check this but I was hoping
>> this patch could start the discussion.
>>
>> Should I move the check? Should I add a config option to conditionally
>> build those checks?
>>
> 
> I was going to say it's already under DEBUG_ATOMIC_SLEEP, but that's only
> true for the __might_sleep() bit actually.
> 
> I think it'd make sense to blanket that under a config, but using
> DEBUG_ATOMIC_SLEEP for that is a bit too much. What about a
> DEBUG_UACCESS_SLEEP?
> 

Yes, I was wondering whether to add something like that, I'll add a
DEBUG_UACCESS_SLEEP to my next version.

>> diff --git a/include/linux/kernel.h b/include/linux/kernel.h
>> index d6aac75..fe0e984 100644
>> --- a/include/linux/kernel.h
>> +++ b/include/linux/kernel.h
>> @@ -237,11 +237,13 @@
>>  struct pt_regs;
>>  struct user;
>>
>> +extern void __might_resched(const char *file, int line);
>>  #ifdef CONFIG_PREEMPT_VOLUNTARY
>>  extern int _cond_resched(void);
>> -# define might_resched() _cond_resched()
>> +# define might_resched() \
>> +	do { __might_resched(__FILE__, __LINE__); _cond_resched(); } while (0)
>>  #else
>> -# define might_resched() do { } while (0)
>> +# define might_resched() __might_resched(__FILE__, __LINE__)>  #endif
>>
>>  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
>> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
>> index efe79c1..50adb84 100644
>> --- a/include/linux/uaccess.h
>> +++ b/include/linux/uaccess.h
>> @@ -266,6 +266,13 @@ static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
>>  #define probe_kernel_address(addr, retval)		\
>>  	probe_kernel_read(&retval, addr, sizeof(retval))
>>
>> +/*
>> + * user_access_begin() and user_access_end() define a region where
>> + * unsafe user accessors can be used.
>> + * During execution of this region, no sleeping functions should be called.
>> + * Exceptions and interrupt shall exit the user_access region and re-enter it
>> + * when returning to the interrupted context.
>> + */
> 
> I would first have the bit about exceptions, then mention sleeping and add
> something along the lines of
> 
> "[...] no sleeping functions should be called - we rely on exception
> handling to take care of the user_access status for us, but that doesn't
> happen when directly calling schedule()."
> 
> My wording's not the best but I just want something to point out *why*
> sleeping ain't okay.
> 

I think the wording is alright, I'll include your suggestion for the
next version.

>>  #ifndef user_access_begin
>>  #define user_access_begin() do { } while (0)
>>  #define user_access_end() do { } while (0)
>> @@ -273,6 +280,10 @@ static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
>>  #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
>>  #endif
>>
>> +#ifndef unsafe_user_region_active
>> +#define unsafe_user_region_active()	false
>> +#endif
>> +
>>  #ifdef CONFIG_HARDENED_USERCOPY
>>  void usercopy_warn(const char *name, const char *detail, bool to_user,
>>  		   unsigned long offset, unsigned long len);
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 6fedf3a..03f53c8 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -3289,6 +3289,13 @@ static inline void schedule_debug(struct task_struct *prev)
>>  		__schedule_bug(prev);
>>  		preempt_count_set(PREEMPT_DISABLED);
>>  	}
>> +
>> +	if (unlikely(unsafe_user_region_active())) {
>> +		printk(KERN_ERR "BUG: scheduling while user_access enabled: %s/%d/0x%08x\n",
>> +		       prev->comm, prev->pid, preempt_count());
>> +		dump_stack();
>> +	}
>> +
>>  	rcu_sleep_check();
>>
>>  	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
>> @@ -6151,6 +6158,18 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
>>  EXPORT_SYMBOL(___might_sleep);
>>  #endif
>>
>> +void __might_resched(const char *file, int line)
>> +{
>> +	if (!unsafe_user_region_active())
>> +		return;
>> +
>> +	printk(KERN_ERR
>> +		"BUG: rescheduling function called from user access context at %s:%d\n",
>> +			file, line);
>> +	dump_stack();
>> +}
> 
> So this check is "careful, things might go bad" and the schedule_debug()
> one is "things went bad". IIUC we'll always get this warning when we hit
> the schedule_debug() one. I was going to suggest only keeping one of them,
> but I think both hold value.
> 

Yes, I can't really convince myself to remove either, unless there is a
magic place that covers both cases.

Thanks for the suggestions.

Cheers,
diff mbox series

Patch

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d6aac75..fe0e984 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -237,11 +237,13 @@ 
 struct pt_regs;
 struct user;

+extern void __might_resched(const char *file, int line);
 #ifdef CONFIG_PREEMPT_VOLUNTARY
 extern int _cond_resched(void);
-# define might_resched() _cond_resched()
+# define might_resched() \
+	do { __might_resched(__FILE__, __LINE__); _cond_resched(); } while (0)
 #else
-# define might_resched() do { } while (0)
+# define might_resched() __might_resched(__FILE__, __LINE__)
 #endif

 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index efe79c1..50adb84 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -266,6 +266,13 @@  static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
 #define probe_kernel_address(addr, retval)		\
 	probe_kernel_read(&retval, addr, sizeof(retval))

+/*
+ * user_access_begin() and user_access_end() define a region where
+ * unsafe user accessors can be used.
+ * During execution of this region, no sleeping functions should be called.
+ * Exceptions and interrupt shall exit the user_access region and re-enter it
+ * when returning to the interrupted context.
+ */
 #ifndef user_access_begin
 #define user_access_begin() do { } while (0)
 #define user_access_end() do { } while (0)
@@ -273,6 +280,10 @@  static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
 #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
 #endif

+#ifndef unsafe_user_region_active
+#define unsafe_user_region_active()	false
+#endif
+
 #ifdef CONFIG_HARDENED_USERCOPY
 void usercopy_warn(const char *name, const char *detail, bool to_user,
 		   unsigned long offset, unsigned long len);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6fedf3a..03f53c8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3289,6 +3289,13 @@  static inline void schedule_debug(struct task_struct *prev)
 		__schedule_bug(prev);
 		preempt_count_set(PREEMPT_DISABLED);
 	}
+
+	if (unlikely(unsafe_user_region_active())) {
+		printk(KERN_ERR "BUG: scheduling while user_access enabled: %s/%d/0x%08x\n",
+		       prev->comm, prev->pid, preempt_count());
+		dump_stack();
+	}
+
 	rcu_sleep_check();

 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
@@ -6151,6 +6158,18 @@  void ___might_sleep(const char *file, int line, int preempt_offset)
 EXPORT_SYMBOL(___might_sleep);
 #endif

+void __might_resched(const char *file, int line)
+{
+	if (!unsafe_user_region_active())
+		return;
+
+	printk(KERN_ERR
+		"BUG: rescheduling function called from user access context at %s:%d\n",
+			file, line);
+	dump_stack();
+}
+EXPORT_SYMBOL(__might_resched);
+
 #ifdef CONFIG_MAGIC_SYSRQ
 void normalize_rt_tasks(void)
 {