diff mbox series

drivers/core: Replace lockdep_set_novalidate_class() with unique class keys

Message ID 1ad499bb-0c53-7529-ff00-e4328823f6fa@I-love.SAKURA.ne.jp (mailing list archive)
State New, archived
Headers show
Series drivers/core: Replace lockdep_set_novalidate_class() with unique class keys | expand

Commit Message

Tetsuo Handa Feb. 8, 2023, 10:30 a.m. UTC
Commit 1704f47b50b5 ("lockdep: Add novalidate class for dev->mutex
conversion") made it impossible to find real deadlocks unless timing
dependent testings manage to trigger hung task like [1] and [2]. And
lockdep_set_novalidate_class() remained for more than one decade due to
a fear of false positives [3]. But not sharing mutex_init() could make
it possible to find real deadlocks without triggering hung task [4].
Thus, let's assign a unique class key on each "struct device"->mutex.

Link: https://syzkaller.appspot.com/bug?extid=2d6ac90723742279e101 [1]
Link: https://syzkaller.appspot.com/bug?extid=2e39bc6569d281acbcfb [2]
Link: https://lkml.kernel.org/r/Y98FLlr7jkiFlV0k@rowland.harvard.edu [3]
Link: https://lkml.kernel.org/r/827177aa-bb64-87a9-e1af-dfe070744045@I-love.SAKURA.ne.jp [4]
Suggested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Co-developed-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Co-developed-by: Hillf Danton <hdanton@sina.com>
Signed-off-by: Hillf Danton <hdanton@sina.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
Hello, syzkaller users.

We made a patch that keeps lockdep validation enabled on "struct dev->mutex".
Will you try this patch and see if this patch causes boot failures and/or
too frequent crashes to continue testing.

 drivers/base/core.c      | 7 ++++++-
 include/linux/device.h   | 1 +
 include/linux/lockdep.h  | 6 ++++++
 kernel/locking/lockdep.c | 7 +++++++
 4 files changed, 20 insertions(+), 1 deletion(-)

Comments

Alan Stern Feb. 8, 2023, 3:07 p.m. UTC | #1
On Wed, Feb 08, 2023 at 07:30:25PM +0900, Tetsuo Handa wrote:
> Commit 1704f47b50b5 ("lockdep: Add novalidate class for dev->mutex
> conversion") made it impossible to find real deadlocks unless timing
> dependent testings manage to trigger hung task like [1] and [2]. And
> lockdep_set_novalidate_class() remained for more than one decade due to
> a fear of false positives [3]. But not sharing mutex_init() could make
> it possible to find real deadlocks without triggering hung task [4].
> Thus, let's assign a unique class key on each "struct device"->mutex.
> 
> Link: https://syzkaller.appspot.com/bug?extid=2d6ac90723742279e101 [1]
> Link: https://syzkaller.appspot.com/bug?extid=2e39bc6569d281acbcfb [2]
> Link: https://lkml.kernel.org/r/Y98FLlr7jkiFlV0k@rowland.harvard.edu [3]
> Link: https://lkml.kernel.org/r/827177aa-bb64-87a9-e1af-dfe070744045@I-love.SAKURA.ne.jp [4]
> Suggested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Co-developed-by: Alan Stern <stern@rowland.harvard.edu>
> Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

You must never do this!

I did not put my Signed-off-by: on the patch I sent to you.  I do not 
want it added to that patch or to this one.  You should never put 
somebody else's Signed-off-by: on a patch unless they tell you it's okay 
to do so.

I'm happy to have people test this patch, but I do not want anybody 
think that it is ready to be merged into the kernel.

> Co-developed-by: Hillf Danton <hdanton@sina.com>
> Signed-off-by: Hillf Danton <hdanton@sina.com>
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> ---
> Hello, syzkaller users.
> 
> We made a patch that keeps lockdep validation enabled on "struct dev->mutex".
> Will you try this patch and see if this patch causes boot failures and/or
> too frequent crashes to continue testing.
> 
>  drivers/base/core.c      | 7 ++++++-
>  include/linux/device.h   | 1 +
>  include/linux/lockdep.h  | 6 ++++++
>  kernel/locking/lockdep.c | 7 +++++++
>  4 files changed, 20 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index a3e14143ec0c..c30ecbc4d60e 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -2322,6 +2322,9 @@ static void device_release(struct kobject *kobj)
>  	devres_release_all(dev);
>  
>  	kfree(dev->dma_range_map);
> +	mutex_destroy(&dev->mutex);
> +	if (!lockdep_static_obj(dev))
> +		lockdep_unregister_key(&dev->mutex_key);
>  
>  	if (dev->release)
>  		dev->release(dev);
> @@ -2941,7 +2944,9 @@ void device_initialize(struct device *dev)
>  	kobject_init(&dev->kobj, &device_ktype);
>  	INIT_LIST_HEAD(&dev->dma_pools);
>  	mutex_init(&dev->mutex);
> -	lockdep_set_novalidate_class(&dev->mutex);
> +	if (!lockdep_static_obj(dev))
> +		lockdep_register_key(&dev->mutex_key);
> +	lockdep_set_class(&dev->mutex, &dev->mutex_key);
>  	spin_lock_init(&dev->devres_lock);
>  	INIT_LIST_HEAD(&dev->devres_head);
>  	device_pm_init(dev);
> diff --git a/include/linux/device.h b/include/linux/device.h
> index 44e3acae7b36..bdaca9f54dc2 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -570,6 +570,7 @@ struct device {
>  	struct mutex		mutex;	/* mutex to synchronize calls to
>  					 * its driver.
>  					 */
> +	struct lock_class_key	mutex_key;	/* Unique key for each device */
>  
>  	struct dev_links_info	links;
>  	struct dev_pm_info	power;
> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> index 1f1099dac3f0..5afc999a7e56 100644
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -172,6 +172,7 @@ do {							\
>  	current->lockdep_recursion -= LOCKDEP_OFF;	\
>  } while (0)
>  
> +extern int lockdep_static_obj(const void *obj);
>  extern void lockdep_register_key(struct lock_class_key *key);
>  extern void lockdep_unregister_key(struct lock_class_key *key);
>  
> @@ -391,6 +392,11 @@ static inline void lockdep_set_selftest_task(struct task_struct *task)
>  # define lockdep_free_key_range(start, size)	do { } while (0)
>  # define lockdep_sys_exit() 			do { } while (0)
>  
> +static inline int lockdep_static_obj(const void *obj)
> +{
> +	return 0;
> +}
> +
>  static inline void lockdep_register_key(struct lock_class_key *key)
>  {
>  }
> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> index e3375bc40dad..74c0113646f1 100644
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -857,6 +857,13 @@ static int static_obj(const void *obj)
>  	 */
>  	return is_module_address(addr) || is_module_percpu_address(addr);
>  }
> +
> +int lockdep_static_obj(const void *obj)
> +{
> +	return static_obj(obj);
> +}
> +EXPORT_SYMBOL_GPL(lockdep_static_obj);

What's the point of adding a new function that just calls the old 
function?  Why not simply rename the old function?

Alan Stern

> +
>  #endif
>  
>  /*
> -- 
> 2.34.1
>
Tetsuo Handa Feb. 9, 2023, 12:22 a.m. UTC | #2
On 2023/02/09 0:07, Alan Stern wrote:
> On Wed, Feb 08, 2023 at 07:30:25PM +0900, Tetsuo Handa wrote:
>> Commit 1704f47b50b5 ("lockdep: Add novalidate class for dev->mutex
>> conversion") made it impossible to find real deadlocks unless timing
>> dependent testings manage to trigger hung task like [1] and [2]. And
>> lockdep_set_novalidate_class() remained for more than one decade due to
>> a fear of false positives [3]. But not sharing mutex_init() could make
>> it possible to find real deadlocks without triggering hung task [4].
>> Thus, let's assign a unique class key on each "struct device"->mutex.
>>
>> Link: https://syzkaller.appspot.com/bug?extid=2d6ac90723742279e101 [1]
>> Link: https://syzkaller.appspot.com/bug?extid=2e39bc6569d281acbcfb [2]
>> Link: https://lkml.kernel.org/r/Y98FLlr7jkiFlV0k@rowland.harvard.edu [3]
>> Link: https://lkml.kernel.org/r/827177aa-bb64-87a9-e1af-dfe070744045@I-love.SAKURA.ne.jp [4]
>> Suggested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
>> Co-developed-by: Alan Stern <stern@rowland.harvard.edu>
>> Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
> 
> You must never do this!
> 
> I did not put my Signed-off-by: on the patch I sent to you.  I do not 
> want it added to that patch or to this one.  You should never put 
> somebody else's Signed-off-by: on a patch unless they tell you it's okay 
> to do so.

Did I misuse the Co-developed-by: tag? I added your Signed-off-by: tag because
https://docs.kernel.org/process/submitting-patches.html#when-to-use-acked-by-cc-and-co-developed-by
states that "every Co-developed-by: must be immediately followed by a Signed-off-by:
of the associated co-author."

I don't know whether the Co-developed-by: tag is used only when somebody else takes over
a previously proposed formal patch. I use the Co-developed-by: tag in order to state
developer's contribution when he/she suggested some plain diff but does not propose
that diff as a formal patch with description. Unless changes are proposed as a formal
patch (by somebody), bugs won't be fixed.

> 
> I'm happy to have people test this patch, but I do not want anybody 
> think that it is ready to be merged into the kernel.

People (and build/test bots) won't test changes that are not proposed as
a formal patch with Signed-off-by: tag. As far as I am aware, bot is not
testing plain diff.

I expected you to post a formal patch with your Signed-off-by: tag, but you didn't.
Therefore, I took over. Namely, define a dummy function for CONFIG_LOCKDEP=n case,
apply Hillf's suggestion, and reduce lines changed in kernel/locking/lockdep.c
in order to make the patch smaller and easier to apply the change.

> 
>> Co-developed-by: Hillf Danton <hdanton@sina.com>
>> Signed-off-by: Hillf Danton <hdanton@sina.com>
>> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
>> ---

>> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
>> index e3375bc40dad..74c0113646f1 100644
>> --- a/kernel/locking/lockdep.c
>> +++ b/kernel/locking/lockdep.c
>> @@ -857,6 +857,13 @@ static int static_obj(const void *obj)
>>  	 */
>>  	return is_module_address(addr) || is_module_percpu_address(addr);
>>  }
>> +
>> +int lockdep_static_obj(const void *obj)
>> +{
>> +	return static_obj(obj);
>> +}
>> +EXPORT_SYMBOL_GPL(lockdep_static_obj);
> 
> What's the point of adding a new function that just calls the old 
> function?  Why not simply rename the old function?

This makes the patch smaller and easier to apply the change. Of course,
I can update the patch if lockdep developers prefer rename over add.
What I worry is that lockdep developers do not permit static_obj() being
used by non-lockdep code.
Linus Torvalds Feb. 9, 2023, 12:46 a.m. UTC | #3
On Wed, Feb 8, 2023 at 4:23 PM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
>
> Did I misuse the Co-developed-by: tag? I added your Signed-off-by: tag because
> https://docs.kernel.org/process/submitting-patches.html#when-to-use-acked-by-cc-and-co-developed-by
> states that "every Co-developed-by: must be immediately followed by a Signed-off-by:
> of the associated co-author."

That doesn't mean that *You* can add a Signed-off-by:

Nobody can certify sign-off for anybody else. Read the sign-off rules:
you can add your *own* sign-off if the rules hold, but you can't sign
off for somebody else.

The "Co-developed-by: must be immediately followed by a
Signed-off-by:" thing only means that if there are multiple
developers, then ALL DEVELOPERS MUST SIGN OFF.

It absolutely does *NOT* mean that you adding a Co-developed-by means
that you then add a Signed-off-by.

That's like faking somebody else's signature on some paperwork. Never
do that either, and it's hopefully obvious why.

                  Linus
Tetsuo Handa Feb. 9, 2023, 1:50 a.m. UTC | #4
On 2023/02/09 9:46, Linus Torvalds wrote:
> On Wed, Feb 8, 2023 at 4:23 PM Tetsuo Handa
> <penguin-kernel@i-love.sakura.ne.jp> wrote:
>>
>> Did I misuse the Co-developed-by: tag? I added your Signed-off-by: tag because
>> https://docs.kernel.org/process/submitting-patches.html#when-to-use-acked-by-cc-and-co-developed-by
>> states that "every Co-developed-by: must be immediately followed by a Signed-off-by:
>> of the associated co-author."
> 
> That doesn't mean that *You* can add a Signed-off-by:
> 
> Nobody can certify sign-off for anybody else. Read the sign-off rules:
> you can add your *own* sign-off if the rules hold, but you can't sign
> off for somebody else.
> 
> The "Co-developed-by: must be immediately followed by a
> Signed-off-by:" thing only means that if there are multiple
> developers, then ALL DEVELOPERS MUST SIGN OFF.
> 
> It absolutely does *NOT* mean that you adding a Co-developed-by means
> that you then add a Signed-off-by.
> 
> That's like faking somebody else's signature on some paperwork. Never
> do that either, and it's hopefully obvious why.

OK. Then, how to handle a case where a developer suggested a diff but
he/she does not propose that diff as a formal patch?

Hillf is suggesting diffs for many bugs (an example is
https://syzkaller.appspot.com/bug?id=ee93abc9a483645fc0914811af9c12da355a2e3e ),
and some of diffs look reasonable/correct, but Hillf never tries to propose as
a formal patch, and that diff is left forgotten and that bug remains unfixed.

I don't want to steal Hillf's effort. But given that I can't add Co-developed-by:
and Signed-off-by: on behalf of Hillf, how can I propose a formal patch in a way
that preserves Hillf's effort? Is Suggested-by: suitable for this case?
Alan Stern Feb. 9, 2023, 2:26 a.m. UTC | #5
On Thu, Feb 09, 2023 at 09:22:39AM +0900, Tetsuo Handa wrote:
> On 2023/02/09 0:07, Alan Stern wrote:
> > I'm happy to have people test this patch, but I do not want anybody 
> > think that it is ready to be merged into the kernel.
> 
> People (and build/test bots) won't test changes that are not proposed as
> a formal patch with Signed-off-by: tag. As far as I am aware, bot is not
> testing plain diff.

People _do_ test changes without a Signed-off-by: tag.  This happens 
with my patches all the time; I don't put Signed-off-by: on a patch 
until I think it is ready to be merged.  If you search through the email 
archives, you'll find examples where people deliberately put a 
"Not-yet-signed-off-by:" tag on a suggested patch.

Syzbot also tests patches without a Signed-off-by: tag.  Here's a recent 
example:

https://lore.kernel.org/linux-usb/Y9wh8dGK6oHSjJQl@rowland.harvard.edu/

> > What's the point of adding a new function that just calls the old 
> > function?  Why not simply rename the old function?
> 
> This makes the patch smaller and easier to apply the change. Of course,

How does it make the patch easier to apply?  With either the original 
version or yours, you apply the patch by doing

	patch -p1 <patchfile

(or a similar git command).  Same command, same amount of difficulty for 
both patches.

> I can update the patch if lockdep developers prefer rename over add.
> What I worry is that lockdep developers do not permit static_obj() being
> used by non-lockdep code.

I worry about that too, and I hoped that Peter Z. would comment on it. 
But if they don't want the function to be exported, they ought to be 
able to suggest an alternative.

Alan Stern
Tetsuo Handa Feb. 11, 2023, 2:04 a.m. UTC | #6
On 2023/02/09 11:26, Alan Stern wrote:
> On Thu, Feb 09, 2023 at 09:22:39AM +0900, Tetsuo Handa wrote:
>> On 2023/02/09 0:07, Alan Stern wrote:
>>> I'm happy to have people test this patch, but I do not want anybody 
>>> think that it is ready to be merged into the kernel.
>>
>> People (and build/test bots) won't test changes that are not proposed as
>> a formal patch with Signed-off-by: tag. As far as I am aware, bot is not
>> testing plain diff.
> 
> People _do_ test changes without a Signed-off-by: tag.  This happens 
> with my patches all the time; I don't put Signed-off-by: on a patch 
> until I think it is ready to be merged.  If you search through the email 
> archives, you'll find examples where people deliberately put a 
> "Not-yet-signed-off-by:" tag on a suggested patch.

That's a cultural difference. I know there are developers who use
"Not-yet-signed-off-by:" tag. But I'm not subscribed to mailing lists
which you are subscribed to. I'm subscribed to linux-security-module, but
I feel that it is quite rare that developers post changes as plain diff
without Signed-off-by: tag, for people prefer to see formal patches than
plain diff. I can see that only David Howells was using Not-yet-signed-off-by:
tag (like https://marc.info/?l=linux-security-module&m=130455572927447 ).

But even with Not-yet-signed-off-by: tag, his patches are formal patches
with description rather than plain diff. Unlike networking subsystem where
patches sometimes get merged before people have time to review/test,
developers in my subscribed mailing lists tend to propose v2, v3, v4...
patches with "Signed-off-by:" tag instead of posting plain diff.

> Syzbot also tests patches without a Signed-off-by: tag.  Here's a recent 
> example:
> 
> https://lore.kernel.org/linux-usb/Y9wh8dGK6oHSjJQl@rowland.harvard.edu/

That's completely different. syzbot is designed to test plain diff via
explict "#syz test:" directive. If "#syz test:" directive is not included,
syzbot does not test your diff.

Do you know any bot which automatically does testing plain diff? I don't know
when bots (or automated systems) test changes, but my guess is that a formal
patch with "Signed-off-by:" tag serves as the directive for bots to test
changes. Maybe we want a directive (e.g. "Test-requested-by:" tag) which
encourages/asks bots (or automated systems) to test patches but does not
want patches to get merged into permanent git trees.

>> I can update the patch if lockdep developers prefer rename over add.
>> What I worry is that lockdep developers do not permit static_obj() being
>> used by non-lockdep code.
> 
> I worry about that too, and I hoped that Peter Z. would comment on it. 
> But if they don't want the function to be exported, they ought to be 
> able to suggest an alternative.

Then, at least we can start without "EXPORT_SYMBOL_GPL(lockdep_static_obj);"
line, for drivers/base/core.c cannot be built as a module.

Since there are already several locations which directly use e.g. _stext symbol,
we would simply duplicate static_obj() into drivers/base/core.c if Peter does
not want to make static_obj() visible to built-in code.



Anyway, despite being posted as a formal patch, it seems that nobody was
interested in manual testing. As far as I tried "#syz test" this patch against
https://syzkaller.appspot.com/bug?extid=9ef743bba3a17c756174 , syzbot kernel
was able to boot. Therefore, I think it is OK to stay for a week whether
this patch causes too frequent crashes to continue using linux-next.git .

Please propose a formal patch to Peter with your "Signed-off-by:" tag...
diff mbox series

Patch

diff --git a/drivers/base/core.c b/drivers/base/core.c
index a3e14143ec0c..c30ecbc4d60e 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2322,6 +2322,9 @@  static void device_release(struct kobject *kobj)
 	devres_release_all(dev);
 
 	kfree(dev->dma_range_map);
+	mutex_destroy(&dev->mutex);
+	if (!lockdep_static_obj(dev))
+		lockdep_unregister_key(&dev->mutex_key);
 
 	if (dev->release)
 		dev->release(dev);
@@ -2941,7 +2944,9 @@  void device_initialize(struct device *dev)
 	kobject_init(&dev->kobj, &device_ktype);
 	INIT_LIST_HEAD(&dev->dma_pools);
 	mutex_init(&dev->mutex);
-	lockdep_set_novalidate_class(&dev->mutex);
+	if (!lockdep_static_obj(dev))
+		lockdep_register_key(&dev->mutex_key);
+	lockdep_set_class(&dev->mutex, &dev->mutex_key);
 	spin_lock_init(&dev->devres_lock);
 	INIT_LIST_HEAD(&dev->devres_head);
 	device_pm_init(dev);
diff --git a/include/linux/device.h b/include/linux/device.h
index 44e3acae7b36..bdaca9f54dc2 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -570,6 +570,7 @@  struct device {
 	struct mutex		mutex;	/* mutex to synchronize calls to
 					 * its driver.
 					 */
+	struct lock_class_key	mutex_key;	/* Unique key for each device */
 
 	struct dev_links_info	links;
 	struct dev_pm_info	power;
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 1f1099dac3f0..5afc999a7e56 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -172,6 +172,7 @@  do {							\
 	current->lockdep_recursion -= LOCKDEP_OFF;	\
 } while (0)
 
+extern int lockdep_static_obj(const void *obj);
 extern void lockdep_register_key(struct lock_class_key *key);
 extern void lockdep_unregister_key(struct lock_class_key *key);
 
@@ -391,6 +392,11 @@  static inline void lockdep_set_selftest_task(struct task_struct *task)
 # define lockdep_free_key_range(start, size)	do { } while (0)
 # define lockdep_sys_exit() 			do { } while (0)
 
+static inline int lockdep_static_obj(const void *obj)
+{
+	return 0;
+}
+
 static inline void lockdep_register_key(struct lock_class_key *key)
 {
 }
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index e3375bc40dad..74c0113646f1 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -857,6 +857,13 @@  static int static_obj(const void *obj)
 	 */
 	return is_module_address(addr) || is_module_percpu_address(addr);
 }
+
+int lockdep_static_obj(const void *obj)
+{
+	return static_obj(obj);
+}
+EXPORT_SYMBOL_GPL(lockdep_static_obj);
+
 #endif
 
 /*