Message ID | Zk2Eso--FVsZ5AF3@gondor.apana.org.au (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v3] hwrng: core - Remove add_early_randomness | expand |
On Wed May 22, 2024 at 8:37 AM EEST, Herbert Xu wrote: > On Tue, May 21, 2024 at 03:37:16PM -0400, Nícolas F. R. A. Prado wrote: > > > > FWIW this patch fixes the warning. So feel free to add > > > > Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com> > > Could you please test this patch instead? > > ---8<--- > A potential deadlock was reported with the config file at > > https://web.archive.org/web/20240522052129/https://0x0.st/XPN_.txt > > In this particular configuration, the deadlock doesn't exist because > the warning triggered at a point before modules were even available. > However, the deadlock can be real because any module loaded would > invoke async_synchronize_full. > > The issue is spurious for software crypto algorithms which aren't > themselves involved in async probing. However, it would be hard to > avoid for a PCI crypto driver using async probing. > > In this particular call trace, the problem is easily avoided because > the only reason the module is being requested during probing is the > add_early_randomness call in the hwrng core. This feature is > vestigial since there is now a kernel thread dedicated to doing > exactly this. > > So remove add_early_randomness as it is no longer needed. "vestigial" did not know that word before ;-) Something learned. What is the kthread doing this currently? > > Reported-by: Nícolas F. R. A. Prado <nfraprado@collabora.com> > Reported-by: Eric Biggers <ebiggers@kernel.org> > Fixes: 1b6d7f9eb150 ("tpm: add session encryption protection to tpm2_get_random()") > Link: https://lore.kernel.org/r/119dc5ed-f159-41be-9dda-1a056f29888d@notapiano/ > Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> > > diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c > index f5c71a617a99..4084df65c9fa 100644 > --- a/drivers/char/hw_random/core.c > +++ b/drivers/char/hw_random/core.c > @@ -64,19 +64,6 @@ static size_t rng_buffer_size(void) > return RNG_BUFFER_SIZE; > } > > -static void add_early_randomness(struct hwrng *rng) > -{ > - int bytes_read; > - > - mutex_lock(&reading_mutex); > - bytes_read = rng_get_data(rng, rng_fillbuf, 32, 0); > - mutex_unlock(&reading_mutex); > - if (bytes_read > 0) { > - size_t entropy = bytes_read * 8 * rng->quality / 1024; > - add_hwgenerator_randomness(rng_fillbuf, bytes_read, entropy, false); > - } > -} > - > static inline void cleanup_rng(struct kref *kref) > { > struct hwrng *rng = container_of(kref, struct hwrng, ref); > @@ -340,13 +327,12 @@ static ssize_t rng_current_store(struct device *dev, > const char *buf, size_t len) > { > int err; > - struct hwrng *rng, *old_rng, *new_rng; > + struct hwrng *rng, *new_rng; > > err = mutex_lock_interruptible(&rng_mutex); > if (err) > return -ERESTARTSYS; > > - old_rng = current_rng; > if (sysfs_streq(buf, "")) { > err = enable_best_rng(); > } else { > @@ -362,11 +348,8 @@ static ssize_t rng_current_store(struct device *dev, > new_rng = get_current_rng_nolock(); > mutex_unlock(&rng_mutex); > > - if (new_rng) { > - if (new_rng != old_rng) > - add_early_randomness(new_rng); > + if (new_rng) > put_rng(new_rng); > - } > > return err ? : len; > } > @@ -544,7 +527,6 @@ int hwrng_register(struct hwrng *rng) > { > int err = -EINVAL; > struct hwrng *tmp; > - bool is_new_current = false; > > if (!rng->name || (!rng->data_read && !rng->read)) > goto out; > @@ -573,25 +555,8 @@ int hwrng_register(struct hwrng *rng) > err = set_current_rng(rng); > if (err) > goto out_unlock; > - /* to use current_rng in add_early_randomness() we need > - * to take a ref > - */ > - is_new_current = true; > - kref_get(&rng->ref); > } > mutex_unlock(&rng_mutex); > - if (is_new_current || !rng->init) { > - /* > - * Use a new device's input to add some randomness to > - * the system. If this rng device isn't going to be > - * used right away, its init function hasn't been > - * called yet by set_current_rng(); so only use the > - * randomness from devices that don't need an init callback > - */ > - add_early_randomness(rng); > - } > - if (is_new_current) > - put_rng(rng); > return 0; > out_unlock: > mutex_unlock(&rng_mutex); > @@ -602,12 +567,11 @@ EXPORT_SYMBOL_GPL(hwrng_register); > > void hwrng_unregister(struct hwrng *rng) > { > - struct hwrng *old_rng, *new_rng; > + struct hwrng *new_rng; > int err; > > mutex_lock(&rng_mutex); > > - old_rng = current_rng; > list_del(&rng->list); > complete_all(&rng->dying); > if (current_rng == rng) { > @@ -626,11 +590,8 @@ void hwrng_unregister(struct hwrng *rng) > } else > mutex_unlock(&rng_mutex); > > - if (new_rng) { > - if (old_rng != new_rng) > - add_early_randomness(new_rng); > + if (new_rng) > put_rng(new_rng); > - } > > wait_for_completion(&rng->cleanup_done); > } I have no doubts that such thread would not exist, so: Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org> BR, Jarkko
On Wed, May 22, 2024 at 01:37:54PM +0800, Herbert Xu wrote: > On Tue, May 21, 2024 at 03:37:16PM -0400, Nícolas F. R. A. Prado wrote: > > > > FWIW this patch fixes the warning. So feel free to add > > > > Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com> > > Could you please test this patch instead? > > ---8<--- > A potential deadlock was reported with the config file at > > https://web.archive.org/web/20240522052129/https://0x0.st/XPN_.txt > > In this particular configuration, the deadlock doesn't exist because > the warning triggered at a point before modules were even available. > However, the deadlock can be real because any module loaded would > invoke async_synchronize_full. > > The issue is spurious for software crypto algorithms which aren't > themselves involved in async probing. However, it would be hard to > avoid for a PCI crypto driver using async probing. > > In this particular call trace, the problem is easily avoided because > the only reason the module is being requested during probing is the > add_early_randomness call in the hwrng core. This feature is > vestigial since there is now a kernel thread dedicated to doing > exactly this. > > So remove add_early_randomness as it is no longer needed. > > Reported-by: Nícolas F. R. A. Prado <nfraprado@collabora.com> > Reported-by: Eric Biggers <ebiggers@kernel.org> > Fixes: 1b6d7f9eb150 ("tpm: add session encryption protection to tpm2_get_random()") > Link: https://lore.kernel.org/r/119dc5ed-f159-41be-9dda-1a056f29888d@notapiano/ > Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> This patch also fixes the warning. Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com> Thanks, Nícolas
On Tue, 21 May 2024 at 22:38, Herbert Xu <herbert@gondor.apana.org.au> wrote: > > In this particular configuration, the deadlock doesn't exist because > the warning triggered at a point before modules were even available. > However, the deadlock can be real because any module loaded would > invoke async_synchronize_full. I think this crapectomy is good regardless of any deadlock - the "register this driver" should not just blindly call back into the driver. That said, looking at the code in question, there are other oddities going on. Even the "we found a favorite new rng" case looks rather strange. The thread we use - nice and asynchronous - seems to sleep only if the randomness source is emptied. What if you have a really good source of hw randomness? That looks like a busy loop to me, but hopefully I'm missing something obvious. So I think this hw_random code has other serious issues, and I get the feeling there might be more code that needs looking at.. Linus
On Wed, May 22, 2024 at 03:53:23PM -0700, Linus Torvalds wrote: > > That said, looking at the code in question, there are other oddities > going on. Even the "we found a favorite new rng" case looks rather > strange. The thread we use - nice and asynchronous - seems to sleep > only if the randomness source is emptied. > > What if you have a really good source of hw randomness? That looks > like a busy loop to me, but hopefully I'm missing something obvious. Yes that does look strange. So I dug up the original patch at https://lore.kernel.org/all/20140317165012.GC1763@lst.de/ and therein lies the answer. It's relying on random.c to push back when the amount of new entropy exceeds what it needs. IOW we will sleep via add_hwgenerator_randomness when random.c decides that enough is enough. In fact the rate is much less now compared to when the patch was first applied. Cheers,
On Wed, May 22, 2024 at 02:51:36PM +0300, Jarkko Sakkinen wrote: > > What is the kthread doing this currently? It's hwrng_fillfn. Cheers,
On Thu May 23, 2024 at 7:49 AM EEST, Herbert Xu wrote: > On Wed, May 22, 2024 at 03:53:23PM -0700, Linus Torvalds wrote: > > > > That said, looking at the code in question, there are other oddities > > going on. Even the "we found a favorite new rng" case looks rather > > strange. The thread we use - nice and asynchronous - seems to sleep > > only if the randomness source is emptied. > > > > What if you have a really good source of hw randomness? That looks > > like a busy loop to me, but hopefully I'm missing something obvious. > > Yes that does look strange. So I dug up the original patch at > > https://lore.kernel.org/all/20140317165012.GC1763@lst.de/ > > and therein lies the answer. It's relying on random.c to push back > when the amount of new entropy exceeds what it needs. IOW we will > sleep via add_hwgenerator_randomness when random.c decides that > enough is enough. In fact the rate is much less now compared to > when the patch was first applied. Just throwing something because came to mind, not a serious suggestion. In crypto_larval_lookup I see statements like this: request_module("crypto-%s", name); You could potentially bake up a section/table to vmlinux which would have entries like: "module name", 1/0 '1' would mean built-in. Then for early randomness use only stuff that is built-in. Came to mind from arch/x86/realmode for which I baked in a table for relocation (this was a collaborative work with H. Peter Anvin in 2012 to make trampoline code relocatable but is still a legit example to do such shenanigans in a subystem). BR, Jarkko
On Thu, May 23, 2024 at 12:53:04PM +0300, Jarkko Sakkinen wrote: > > Just throwing something because came to mind, not a serious suggestion. > > In crypto_larval_lookup I see statements like this: > > request_module("crypto-%s", name); > > You could potentially bake up a section/table to vmlinux which would > have entries like: > > "module name", 1/0 > > '1' would mean built-in. Then for early randomness use only stuff > that is built-in. This early random stuff is obsolete not just because we have a kernel thread doing the same thing, but moreover random.c itself has been modified so that it is no longer starved of entropy on startup. There is no reason to feed any early randomness. Cheers,
On Thu May 23, 2024 at 12:53 PM EEST, Jarkko Sakkinen wrote: > On Thu May 23, 2024 at 7:49 AM EEST, Herbert Xu wrote: > > On Wed, May 22, 2024 at 03:53:23PM -0700, Linus Torvalds wrote: > > > > > > That said, looking at the code in question, there are other oddities > > > going on. Even the "we found a favorite new rng" case looks rather > > > strange. The thread we use - nice and asynchronous - seems to sleep > > > only if the randomness source is emptied. > > > > > > What if you have a really good source of hw randomness? That looks > > > like a busy loop to me, but hopefully I'm missing something obvious. > > > > Yes that does look strange. So I dug up the original patch at > > > > https://lore.kernel.org/all/20140317165012.GC1763@lst.de/ > > > > and therein lies the answer. It's relying on random.c to push back > > when the amount of new entropy exceeds what it needs. IOW we will > > sleep via add_hwgenerator_randomness when random.c decides that > > enough is enough. In fact the rate is much less now compared to > > when the patch was first applied. > > Just throwing something because came to mind, not a serious suggestion. > > In crypto_larval_lookup I see statements like this: > > request_module("crypto-%s", name); > > You could potentially bake up a section/table to vmlinux which would > have entries like: > > "module name", 1/0 > > '1' would mean built-in. Then for early randomness use only stuff > that is built-in. > > Came to mind from arch/x86/realmode for which I baked in a table > for relocation (this was a collaborative work with H. Peter Anvin > in 2012 to make trampoline code relocatable but is still a legit > example to do such shenanigans in a subystem). This could be even parameter in __request_module() itself e.g. int __request_module(bool wait, bool builtin_only, const char *fmt, ...); And would not matter which initcall layer we are running at. BR, Jarkko
On Thu May 23, 2024 at 12:58 PM EEST, Herbert Xu wrote: > On Thu, May 23, 2024 at 12:53:04PM +0300, Jarkko Sakkinen wrote: > > > > Just throwing something because came to mind, not a serious suggestion. > > > > In crypto_larval_lookup I see statements like this: > > > > request_module("crypto-%s", name); > > > > You could potentially bake up a section/table to vmlinux which would > > have entries like: > > > > "module name", 1/0 > > > > '1' would mean built-in. Then for early randomness use only stuff > > that is built-in. > > This early random stuff is obsolete not just because we have a > kernel thread doing the same thing, but moreover random.c itself > has been modified so that it is no longer starved of entropy on > startup. There is no reason to feed any early randomness. As a feature that would still sometimes nice to have. I've sometimes wished there was "lsmod -b" or similar to display built-in stuff ;-) So overally I think it was good to have at least documented here... BR, Jarkko
On Thu, 23 May 2024 12:49:50 +0800 Herbert Xu <herbert@gondor.apana.org.au> wrote: > On Wed, May 22, 2024 at 03:53:23PM -0700, Linus Torvalds wrote: > > > > That said, looking at the code in question, there are other oddities > > going on. Even the "we found a favorite new rng" case looks rather > > strange. The thread we use - nice and asynchronous - seems to sleep > > only if the randomness source is emptied. > > > > What if you have a really good source of hw randomness? That looks > > like a busy loop to me, but hopefully I'm missing something obvious. > > Yes that does look strange. So I dug up the original patch at > > https://lore.kernel.org/all/20140317165012.GC1763@lst.de/ > > and therein lies the answer. It's relying on random.c to push back > when the amount of new entropy exceeds what it needs. IOW we will > sleep via add_hwgenerator_randomness when random.c decides that > enough is enough. Yes, I thought that this was the obvious choice, the lesser evil. If it just discarded the excess and returned immediately I had expected some kernel threads to spin constantly. > In fact the rate is much less now compared to > when the patch was first applied. You mean the rate of required entropy? Doesn't that make things worse? Maybe redesign the API to use a pull scheme? RNGs register at the randomness facility with a callback that can be used when there is a need for fresh entropy? Torsten
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index f5c71a617a99..4084df65c9fa 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -64,19 +64,6 @@ static size_t rng_buffer_size(void) return RNG_BUFFER_SIZE; } -static void add_early_randomness(struct hwrng *rng) -{ - int bytes_read; - - mutex_lock(&reading_mutex); - bytes_read = rng_get_data(rng, rng_fillbuf, 32, 0); - mutex_unlock(&reading_mutex); - if (bytes_read > 0) { - size_t entropy = bytes_read * 8 * rng->quality / 1024; - add_hwgenerator_randomness(rng_fillbuf, bytes_read, entropy, false); - } -} - static inline void cleanup_rng(struct kref *kref) { struct hwrng *rng = container_of(kref, struct hwrng, ref); @@ -340,13 +327,12 @@ static ssize_t rng_current_store(struct device *dev, const char *buf, size_t len) { int err; - struct hwrng *rng, *old_rng, *new_rng; + struct hwrng *rng, *new_rng; err = mutex_lock_interruptible(&rng_mutex); if (err) return -ERESTARTSYS; - old_rng = current_rng; if (sysfs_streq(buf, "")) { err = enable_best_rng(); } else { @@ -362,11 +348,8 @@ static ssize_t rng_current_store(struct device *dev, new_rng = get_current_rng_nolock(); mutex_unlock(&rng_mutex); - if (new_rng) { - if (new_rng != old_rng) - add_early_randomness(new_rng); + if (new_rng) put_rng(new_rng); - } return err ? : len; } @@ -544,7 +527,6 @@ int hwrng_register(struct hwrng *rng) { int err = -EINVAL; struct hwrng *tmp; - bool is_new_current = false; if (!rng->name || (!rng->data_read && !rng->read)) goto out; @@ -573,25 +555,8 @@ int hwrng_register(struct hwrng *rng) err = set_current_rng(rng); if (err) goto out_unlock; - /* to use current_rng in add_early_randomness() we need - * to take a ref - */ - is_new_current = true; - kref_get(&rng->ref); } mutex_unlock(&rng_mutex); - if (is_new_current || !rng->init) { - /* - * Use a new device's input to add some randomness to - * the system. If this rng device isn't going to be - * used right away, its init function hasn't been - * called yet by set_current_rng(); so only use the - * randomness from devices that don't need an init callback - */ - add_early_randomness(rng); - } - if (is_new_current) - put_rng(rng); return 0; out_unlock: mutex_unlock(&rng_mutex); @@ -602,12 +567,11 @@ EXPORT_SYMBOL_GPL(hwrng_register); void hwrng_unregister(struct hwrng *rng) { - struct hwrng *old_rng, *new_rng; + struct hwrng *new_rng; int err; mutex_lock(&rng_mutex); - old_rng = current_rng; list_del(&rng->list); complete_all(&rng->dying); if (current_rng == rng) { @@ -626,11 +590,8 @@ void hwrng_unregister(struct hwrng *rng) } else mutex_unlock(&rng_mutex); - if (new_rng) { - if (old_rng != new_rng) - add_early_randomness(new_rng); + if (new_rng) put_rng(new_rng); - } wait_for_completion(&rng->cleanup_done); }