Message ID | 0bc6443a-dbac-70ab-bf99-9a439e35f3ef@I-love.SAKURA.ne.jp (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | wwan_hwsim: Avoid flush_scheduled_work() usage | expand |
Hi Tetsuo, Sergey, On Wed, 20 Apr 2022 at 04:22, Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp> wrote: > > Flushing system-wide workqueues is dangerous and will be forbidden. > Replace system_wq with local wwan_wq. > > Link: https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp > Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Could you add a 'Fixes' tag? > --- > Note: This patch is only compile tested. By the way, don't you want to call > debugfs_remove(wwan_hwsim_debugfs_devcreate) at err_clean_devs label in > wwan_hwsim_init() like wwan_hwsim_exit() does, for debugfs_create_file("devcreate") > is called before "goto err_clean_devs" happens? > > drivers/net/wwan/wwan_hwsim.c | 16 ++++++++++++---- > 1 file changed, 12 insertions(+), 4 deletions(-) > > diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c > index 5b62cf3b3c42..2136319f588f 100644 > --- a/drivers/net/wwan/wwan_hwsim.c > +++ b/drivers/net/wwan/wwan_hwsim.c > @@ -33,6 +33,7 @@ static struct dentry *wwan_hwsim_debugfs_devcreate; > static DEFINE_SPINLOCK(wwan_hwsim_devs_lock); > static LIST_HEAD(wwan_hwsim_devs); > static unsigned int wwan_hwsim_dev_idx; > +static struct workqueue_struct *wwan_wq; > > struct wwan_hwsim_dev { > struct list_head list; > @@ -371,7 +372,7 @@ static ssize_t wwan_hwsim_debugfs_portdestroy_write(struct file *file, > * waiting this callback to finish in the debugfs_remove() call. So, > * use workqueue. > */ > - schedule_work(&port->del_work); > + queue_work(wwan_wq, &port->del_work); > > return count; > } > @@ -416,7 +417,7 @@ static ssize_t wwan_hwsim_debugfs_devdestroy_write(struct file *file, > * waiting this callback to finish in the debugfs_remove() call. So, > * use workqueue. > */ > - schedule_work(&dev->del_work); > + queue_work(wwan_wq, &dev->del_work); > > return count; > } > @@ -506,9 +507,15 @@ static int __init wwan_hwsim_init(void) > if (wwan_hwsim_devsnum < 0 || wwan_hwsim_devsnum > 128) > return -EINVAL; > > + wwan_wq = alloc_workqueue("wwan_wq", 0, 0); > + if (!wwan_wq) > + return -ENOMEM; > + > wwan_hwsim_class = class_create(THIS_MODULE, "wwan_hwsim"); > - if (IS_ERR(wwan_hwsim_class)) > + if (IS_ERR(wwan_hwsim_class)) { > + destroy_workqueue(wwan_wq); > return PTR_ERR(wwan_hwsim_class); > + } > > wwan_hwsim_debugfs_topdir = debugfs_create_dir("wwan_hwsim", NULL); > wwan_hwsim_debugfs_devcreate = > @@ -524,6 +531,7 @@ static int __init wwan_hwsim_init(void) > > err_clean_devs: > wwan_hwsim_free_devs(); > + destroy_workqueue(wwan_wq); > debugfs_remove(wwan_hwsim_debugfs_topdir); > class_destroy(wwan_hwsim_class); > > @@ -534,7 +542,7 @@ static void __exit wwan_hwsim_exit(void) > { > debugfs_remove(wwan_hwsim_debugfs_devcreate); /* Avoid new devs */ > wwan_hwsim_free_devs(); > - flush_scheduled_work(); /* Wait deletion works completion */ > + destroy_workqueue(wwan_wq); /* Wait deletion works completion */ Wouldn't it be simpler to just remove the flush call. It Looks like all ports have been removed at that point, and all works cancelled, right? > debugfs_remove(wwan_hwsim_debugfs_topdir); > class_destroy(wwan_hwsim_class); > } > -- > 2.32.0 Regards, Loic
On 2022/04/20 18:53, Loic Poulain wrote: >> @@ -506,9 +507,15 @@ static int __init wwan_hwsim_init(void) >> if (wwan_hwsim_devsnum < 0 || wwan_hwsim_devsnum > 128) >> return -EINVAL; >> >> + wwan_wq = alloc_workqueue("wwan_wq", 0, 0); >> + if (!wwan_wq) >> + return -ENOMEM; >> + >> wwan_hwsim_class = class_create(THIS_MODULE, "wwan_hwsim"); >> - if (IS_ERR(wwan_hwsim_class)) >> + if (IS_ERR(wwan_hwsim_class)) { >> + destroy_workqueue(wwan_wq); >> return PTR_ERR(wwan_hwsim_class); >> + } >> >> wwan_hwsim_debugfs_topdir = debugfs_create_dir("wwan_hwsim", NULL); >> wwan_hwsim_debugfs_devcreate = >> @@ -524,6 +531,7 @@ static int __init wwan_hwsim_init(void) >> >> err_clean_devs: Do you want debugfs_remove(wwan_hwsim_debugfs_devcreate); here (as a separate patch)? >> wwan_hwsim_free_devs(); >> + destroy_workqueue(wwan_wq); >> debugfs_remove(wwan_hwsim_debugfs_topdir); >> class_destroy(wwan_hwsim_class); >> >> @@ -534,7 +542,7 @@ static void __exit wwan_hwsim_exit(void) >> { >> debugfs_remove(wwan_hwsim_debugfs_devcreate); /* Avoid new devs */ >> wwan_hwsim_free_devs(); >> - flush_scheduled_work(); /* Wait deletion works completion */ >> + destroy_workqueue(wwan_wq); /* Wait deletion works completion */ > > Wouldn't it be simpler to just remove the flush call. It Looks like > all ports have been removed at that point, and all works cancelled, > right? I guess that this flush_scheduled_work() is for waiting for schedule_work(&dev->del_work) from wwan_hwsim_debugfs_devdestroy_write(). That is, if wwan_hwsim_debugfs_devdestroy_write() already scheduled this work, wwan_hwsim_dev_del() from wwan_hwsim_dev_del_work() might be still in progress even after wwan_hwsim_dev_del() from wwan_hwsim_free_devs() from wwan_hwsim_exit() returned.
Hello Tetsuo, On Wed, Apr 20, 2022 at 1:17 PM Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp> wrote: > On 2022/04/20 18:53, Loic Poulain wrote: >>> @@ -506,9 +507,15 @@ static int __init wwan_hwsim_init(void) >>> if (wwan_hwsim_devsnum < 0 || wwan_hwsim_devsnum > 128) >>> return -EINVAL; >>> >>> + wwan_wq = alloc_workqueue("wwan_wq", 0, 0); >>> + if (!wwan_wq) >>> + return -ENOMEM; >>> + >>> wwan_hwsim_class = class_create(THIS_MODULE, "wwan_hwsim"); >>> - if (IS_ERR(wwan_hwsim_class)) >>> + if (IS_ERR(wwan_hwsim_class)) { >>> + destroy_workqueue(wwan_wq); >>> return PTR_ERR(wwan_hwsim_class); >>> + } >>> >>> wwan_hwsim_debugfs_topdir = debugfs_create_dir("wwan_hwsim", NULL); >>> wwan_hwsim_debugfs_devcreate = >>> @@ -524,6 +531,7 @@ static int __init wwan_hwsim_init(void) >>> >>> err_clean_devs: > > Do you want > > debugfs_remove(wwan_hwsim_debugfs_devcreate); > > here (as a separate patch)? Nope. But I will not be against such a patch. I remove the "devcreate" file in wwwan_hwsim_exit() to prevent new emulated device creation while the workqueue flushing, which can take a sufficient time. Here we cleanup the leftovers of the attempt to automatically create emulated devices. Here is no workqueue flushing, so the race window is very tight. In other words, the preparatory debugfs file removal is practically not required here, but it will not hurt anyone. And possibly will make the code less questionable. >>> wwan_hwsim_free_devs(); >>> + destroy_workqueue(wwan_wq); >>> debugfs_remove(wwan_hwsim_debugfs_topdir); >>> class_destroy(wwan_hwsim_class); >>> >>> @@ -534,7 +542,7 @@ static void __exit wwan_hwsim_exit(void) >>> { >>> debugfs_remove(wwan_hwsim_debugfs_devcreate); /* Avoid new devs */ >>> wwan_hwsim_free_devs(); >>> - flush_scheduled_work(); /* Wait deletion works completion */ >>> + destroy_workqueue(wwan_wq); /* Wait deletion works completion */ >> >> Wouldn't it be simpler to just remove the flush call. It Looks like >> all ports have been removed at that point, and all works cancelled, >> right? > > I guess that this flush_scheduled_work() is for waiting for schedule_work(&dev->del_work) from > wwan_hwsim_debugfs_devdestroy_write(). That is, if wwan_hwsim_debugfs_devdestroy_write() already > scheduled this work, wwan_hwsim_dev_del() from wwan_hwsim_dev_del_work() might be still in progress > even after wwan_hwsim_dev_del() from wwan_hwsim_free_devs() from wwan_hwsim_exit() returned. Exactly. This code will wait for the completion of the work that was scheduled somewhere else.
On Wed, Apr 20, 2022 at 5:22 AM Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp> wrote: > Flushing system-wide workqueues is dangerous and will be forbidden. > Replace system_wq with local wwan_wq. > > Link: https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp > Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Looks good! Just a couple minor questions below. Reviewed-by: Sergey Ryazanov <ryazanov.s.a@gmail.com> > --- > Note: This patch is only compile tested. By the way, don't you want to call > debugfs_remove(wwan_hwsim_debugfs_devcreate) at err_clean_devs label in > wwan_hwsim_init() like wwan_hwsim_exit() does, for debugfs_create_file("devcreate") > is called before "goto err_clean_devs" happens? As I replied in another mail. This is not strictly required, but will not hurt anyone. > drivers/net/wwan/wwan_hwsim.c | 16 ++++++++++++---- > 1 file changed, 12 insertions(+), 4 deletions(-) > > diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c > index 5b62cf3b3c42..2136319f588f 100644 > --- a/drivers/net/wwan/wwan_hwsim.c > +++ b/drivers/net/wwan/wwan_hwsim.c > @@ -33,6 +33,7 @@ static struct dentry *wwan_hwsim_debugfs_devcreate; > static DEFINE_SPINLOCK(wwan_hwsim_devs_lock); > static LIST_HEAD(wwan_hwsim_devs); > static unsigned int wwan_hwsim_dev_idx; > +static struct workqueue_struct *wwan_wq; > > struct wwan_hwsim_dev { > struct list_head list; > @@ -371,7 +372,7 @@ static ssize_t wwan_hwsim_debugfs_portdestroy_write(struct file *file, > * waiting this callback to finish in the debugfs_remove() call. So, > * use workqueue. > */ > - schedule_work(&port->del_work); > + queue_work(wwan_wq, &port->del_work); > > return count; > } > @@ -416,7 +417,7 @@ static ssize_t wwan_hwsim_debugfs_devdestroy_write(struct file *file, > * waiting this callback to finish in the debugfs_remove() call. So, > * use workqueue. > */ > - schedule_work(&dev->del_work); > + queue_work(wwan_wq, &dev->del_work); > > return count; > } > @@ -506,9 +507,15 @@ static int __init wwan_hwsim_init(void) > if (wwan_hwsim_devsnum < 0 || wwan_hwsim_devsnum > 128) > return -EINVAL; > > + wwan_wq = alloc_workqueue("wwan_wq", 0, 0); > + if (!wwan_wq) > + return -ENOMEM; > + > wwan_hwsim_class = class_create(THIS_MODULE, "wwan_hwsim"); > - if (IS_ERR(wwan_hwsim_class)) > + if (IS_ERR(wwan_hwsim_class)) { > + destroy_workqueue(wwan_wq); How about jumping to some label from here and do the workqueue destroying there? E.g. err = PTR_ERR(wwan_hwsim_class); goto err_wq_destroy; This will keep code symmetric. > return PTR_ERR(wwan_hwsim_class); > + } > > wwan_hwsim_debugfs_topdir = debugfs_create_dir("wwan_hwsim", NULL); > wwan_hwsim_debugfs_devcreate = > @@ -524,6 +531,7 @@ static int __init wwan_hwsim_init(void) > > err_clean_devs: > wwan_hwsim_free_devs(); > + destroy_workqueue(wwan_wq); > debugfs_remove(wwan_hwsim_debugfs_topdir); > class_destroy(wwan_hwsim_class); As you can see there are no need to wait the workqueue flushing, since it was not used. So the queue destroying call can be moved below the class destroying to keep cleanup symmetrical to the init sequence. E.g. err_clean_devs: wwan_hwsim_free_devs(); debugfs_remove(wwan_hwsim_debugfs_topdir); class_destroy(wwan_hwsim_class); +err_wq_destroy: + destroy_workqueue(wwan_wq); + return err; } > @@ -534,7 +542,7 @@ static void __exit wwan_hwsim_exit(void) > { > debugfs_remove(wwan_hwsim_debugfs_devcreate); /* Avoid new devs */ > wwan_hwsim_free_devs(); > - flush_scheduled_work(); /* Wait deletion works completion */ > + destroy_workqueue(wwan_wq); /* Wait deletion works completion */ > debugfs_remove(wwan_hwsim_debugfs_topdir); > class_destroy(wwan_hwsim_class); > } I do not care too much, but can we explicitly call the queue flushing to make the exit handler as clear as possible? { debugfs_remove(wwan_hwsim_debugfs_devcreate); /* Avoid new devs */ wwan_hwsim_free_devs(); - flush_scheduled_work(); /* Wait deletion works completion */ + flush_workqueue(wwan_wq); /* Wait deletion works completion */ debugfs_remove(wwan_hwsim_debugfs_topdir); class_destroy(wwan_hwsim_class); + destroy_workqueue(wwan_wq); }
Hello Loic, On Wed, Apr 20, 2022 at 12:53 PM Loic Poulain <loic.poulain@linaro.org> wrote: > On Wed, 20 Apr 2022 at 04:22, Tetsuo Handa > <penguin-kernel@i-love.sakura.ne.jp> wrote: >> >> Flushing system-wide workqueues is dangerous and will be forbidden. >> Replace system_wq with local wwan_wq. >> >> Link: https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp > > Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> > > Could you add a 'Fixes' tag? From what I understand, an inaccurate flushing of the system work queue can potentially cause a system freeze. That is why flush_scheduled_work() is planned to be removed. The hwsim module is just a random function user without any known issues. So, a 'fixes' tag is not required here, and there is no need to bother the stable team with a change backport. Anyway, Tetsuo, you missed a target tree in the subject. If this is not a fix, then you probably should target your changes to the 'net-next' tree.
On 2022/04/22 1:14, Sergey Ryazanov wrote: >> Do you want >> >> debugfs_remove(wwan_hwsim_debugfs_devcreate); >> >> here (as a separate patch)? > > Nope. But I will not be against such a patch. I remove the "devcreate" > file in wwwan_hwsim_exit() to prevent new emulated device creation > while the workqueue flushing, which can take a sufficient time. Here > we cleanup the leftovers of the attempt to automatically create > emulated devices. Here is no workqueue flushing, so the race window is > very tight. > > In other words, the preparatory debugfs file removal is practically > not required here, but it will not hurt anyone. And possibly will make > the code less questionable. OK. Since manual creation of emulated device via debugfs followed by manual device deletion of emulated device via debugfs is possible before automatic creation of emulated device via wwan_hwsim_init_devs() fails, "/* Avoid new devs */" comment is applicable to this error path; I will include debugfs_remove(wwan_hwsim_debugfs_devcreate) call. On 2022/04/22 1:35, Sergey Ryazanov wrote: >> @@ -506,9 +507,15 @@ static int __init wwan_hwsim_init(void) >> if (wwan_hwsim_devsnum < 0 || wwan_hwsim_devsnum > 128) >> return -EINVAL; >> >> + wwan_wq = alloc_workqueue("wwan_wq", 0, 0); >> + if (!wwan_wq) >> + return -ENOMEM; >> + >> wwan_hwsim_class = class_create(THIS_MODULE, "wwan_hwsim"); >> - if (IS_ERR(wwan_hwsim_class)) >> + if (IS_ERR(wwan_hwsim_class)) { >> + destroy_workqueue(wwan_wq); > > How about jumping to some label from here and do the workqueue > destroying there? E.g. OK. >> @@ -524,6 +531,7 @@ static int __init wwan_hwsim_init(void) >> >> err_clean_devs: >> wwan_hwsim_free_devs(); >> + destroy_workqueue(wwan_wq); >> debugfs_remove(wwan_hwsim_debugfs_topdir); >> class_destroy(wwan_hwsim_class); > > As you can see there are no need to wait the workqueue flushing, since > it was not used. So the queue destroying call can be moved below the > class destroying to keep cleanup symmetrical to the init sequence. I will add debugfs_remove(wwan_hwsim_debugfs_devcreate); /* Avoid new devs */ here, for "it was not used" part is theoretically not always true. >> @@ -534,7 +542,7 @@ static void __exit wwan_hwsim_exit(void) >> { >> debugfs_remove(wwan_hwsim_debugfs_devcreate); /* Avoid new devs */ >> wwan_hwsim_free_devs(); >> - flush_scheduled_work(); /* Wait deletion works completion */ >> + destroy_workqueue(wwan_wq); /* Wait deletion works completion */ >> debugfs_remove(wwan_hwsim_debugfs_topdir); >> class_destroy(wwan_hwsim_class); >> } > > I do not care too much, but can we explicitly call the queue flushing > to make the exit handler as clear as possible? OK. On 2022/04/22 1:54, Sergey Ryazanov wrote: > From what I understand, an inaccurate flushing of the system work > queue can potentially cause a system freeze. That is why > flush_scheduled_work() is planned to be removed. The hwsim module is > just a random function user without any known issues. So, a 'fixes' > tag is not required here, and there is no need to bother the stable > team with a change backport. Right, 'Fixes:' tag is not needed for this patch. Flushing the system-wide workqueue is problematic under e.g. GFP_NOFS/GFP_NOIO context. Removing flush_scheduled_work() is for proactively avoiding new problems like https://lkml.kernel.org/r/385ce718-f965-4005-56b6-34922c4533b8@I-love.SAKURA.ne.jp and https://lkml.kernel.org/r/20220225112405.355599-10-Jerome.Pouiller@silabs.com . > > Anyway, Tetsuo, you missed a target tree in the subject. If this is > not a fix, then you probably should target your changes to the > 'net-next' tree. > OK. I posted v2 patch at https://lkml.kernel.org/r/7390d51f-60e2-3cee-5277-b819a55ceabe@I-love.SAKURA.ne.jp . Thank you for responding.
diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c index 5b62cf3b3c42..2136319f588f 100644 --- a/drivers/net/wwan/wwan_hwsim.c +++ b/drivers/net/wwan/wwan_hwsim.c @@ -33,6 +33,7 @@ static struct dentry *wwan_hwsim_debugfs_devcreate; static DEFINE_SPINLOCK(wwan_hwsim_devs_lock); static LIST_HEAD(wwan_hwsim_devs); static unsigned int wwan_hwsim_dev_idx; +static struct workqueue_struct *wwan_wq; struct wwan_hwsim_dev { struct list_head list; @@ -371,7 +372,7 @@ static ssize_t wwan_hwsim_debugfs_portdestroy_write(struct file *file, * waiting this callback to finish in the debugfs_remove() call. So, * use workqueue. */ - schedule_work(&port->del_work); + queue_work(wwan_wq, &port->del_work); return count; } @@ -416,7 +417,7 @@ static ssize_t wwan_hwsim_debugfs_devdestroy_write(struct file *file, * waiting this callback to finish in the debugfs_remove() call. So, * use workqueue. */ - schedule_work(&dev->del_work); + queue_work(wwan_wq, &dev->del_work); return count; } @@ -506,9 +507,15 @@ static int __init wwan_hwsim_init(void) if (wwan_hwsim_devsnum < 0 || wwan_hwsim_devsnum > 128) return -EINVAL; + wwan_wq = alloc_workqueue("wwan_wq", 0, 0); + if (!wwan_wq) + return -ENOMEM; + wwan_hwsim_class = class_create(THIS_MODULE, "wwan_hwsim"); - if (IS_ERR(wwan_hwsim_class)) + if (IS_ERR(wwan_hwsim_class)) { + destroy_workqueue(wwan_wq); return PTR_ERR(wwan_hwsim_class); + } wwan_hwsim_debugfs_topdir = debugfs_create_dir("wwan_hwsim", NULL); wwan_hwsim_debugfs_devcreate = @@ -524,6 +531,7 @@ static int __init wwan_hwsim_init(void) err_clean_devs: wwan_hwsim_free_devs(); + destroy_workqueue(wwan_wq); debugfs_remove(wwan_hwsim_debugfs_topdir); class_destroy(wwan_hwsim_class); @@ -534,7 +542,7 @@ static void __exit wwan_hwsim_exit(void) { debugfs_remove(wwan_hwsim_debugfs_devcreate); /* Avoid new devs */ wwan_hwsim_free_devs(); - flush_scheduled_work(); /* Wait deletion works completion */ + destroy_workqueue(wwan_wq); /* Wait deletion works completion */ debugfs_remove(wwan_hwsim_debugfs_topdir); class_destroy(wwan_hwsim_class); }
Flushing system-wide workqueues is dangerous and will be forbidden. Replace system_wq with local wwan_wq. Link: https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> --- Note: This patch is only compile tested. By the way, don't you want to call debugfs_remove(wwan_hwsim_debugfs_devcreate) at err_clean_devs label in wwan_hwsim_init() like wwan_hwsim_exit() does, for debugfs_create_file("devcreate") is called before "goto err_clean_devs" happens? drivers/net/wwan/wwan_hwsim.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-)