diff mbox

[2/3] hwrng: stm32 - add support for STM32 HW RNG

Message ID 1443904519-24012-3-git-send-email-daniel.thompson@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Daniel Thompson Oct. 3, 2015, 8:35 p.m. UTC
Add support for STMicroelectronics STM32 random number generator.

The config value defaults to N, reflecting the fact that STM32 is a
very low resource microcontroller platform and unlikely to be targeted
by any "grown up" defconfigs.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
---
 drivers/char/hw_random/Kconfig     |  12 +++
 drivers/char/hw_random/Makefile    |   1 +
 drivers/char/hw_random/stm32-rng.c | 192 +++++++++++++++++++++++++++++++++++++
 3 files changed, 205 insertions(+)
 create mode 100644 drivers/char/hw_random/stm32-rng.c

Comments

kernel test robot Oct. 3, 2015, 9:02 p.m. UTC | #1
Hi Daniel,

[auto build test results on v4.3-rc3 -- if it's inappropriate base, please ignore]


coccinelle warnings: (new ones prefixed by >>)

>> drivers/char/hw_random/stm32-rng.c:164:1-4: WARNING: end returns can be simpified

Please review and possibly fold the followup patch.

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Maxime Coquelin Oct. 4, 2015, 8:52 a.m. UTC | #2
Hi Daniel,

On 10/03/2015 10:35 PM, Daniel Thompson wrote:
> Add support for STMicroelectronics STM32 random number generator.
>
> The config value defaults to N, reflecting the fact that STM32 is a
> very low resource microcontroller platform and unlikely to be targeted
> by any "grown up" defconfigs.
>
> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
> ---
>   drivers/char/hw_random/Kconfig     |  12 +++
>   drivers/char/hw_random/Makefile    |   1 +
>   drivers/char/hw_random/stm32-rng.c | 192 +++++++++++++++++++++++++++++++++++++
>   3 files changed, 205 insertions(+)
>   create mode 100644 drivers/char/hw_random/stm32-rng.c
<snip>
> diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c
> new file mode 100644
> index 000000000000..37dfa5fca105
> --- /dev/null
> +++ b/drivers/char/hw_random/stm32-rng.c
> @@ -0,0 +1,192 @@
<snip>
> +#include <linux/clk.h>
> +#include <linux/delay.h>
> +#include <linux/hw_random.h>
> +#include <linux/io.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/of_address.h>
> +#include <linux/of_platform.h>
> +#include <linux/slab.h>
> +
> +#define RNG_CR 0x00
> +#define RNG_CR_RNGEN BIT(2)
> +
> +#define RNG_SR 0x04
> +#define RNG_SR_SEIS BIT(6)
> +#define RNG_SR_CEIS BIT(5)
> +#define RNG_SR_DRDY BIT(0)
> +
> +#define RNG_DR 0x08
> +
> +/*
> + * It takes 40 cycles @ 48MHz to generate each random number (e.g. <1us).
> + * At the time of writing STM32 parts max out at ~200MHz meaning a timeout
> + * of 500 leaves us a very comfortable margin for error. The loop to which
> + * the timeout applies takes at least 4 instructions per cycle so the
> + * timeout is enough to take us up to multi-GHz parts!
> + */
> +#define RNG_TIMEOUT 500
> +
> +struct stm32_rng_private {
> +	struct hwrng rng;
> +	void __iomem *base;
> +	struct clk *clk;
> +};
> +
> +static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
> +{
> +	struct stm32_rng_private *priv =
> +	    container_of(rng, struct stm32_rng_private, rng);
> +	u32 cr, sr;
> +	int retval = 0;
> +
> +	/* enable random number generation */
> +	clk_enable(priv->clk);
> +	cr = readl(priv->base + RNG_CR);
> +	writel(cr | RNG_CR_RNGEN, priv->base + RNG_CR);
> +
> +	while (max > sizeof(u32)) {
> +		sr = readl(priv->base + RNG_SR);
> +		if (!sr && wait) {
> +			unsigned int timeout = RNG_TIMEOUT;
> +
> +			do {
> +				cpu_relax();
> +				sr = readl(priv->base + RNG_SR);
> +			} while (!sr && --timeout);
> +		}
> +
> +		/* Has h/ware error dection been triggered? */
> +		if (WARN_ON(sr & (RNG_SR_SEIS | RNG_SR_CEIS)))
Maybe you should instead use WARN_ONCE?
Because from what I understand in the datasheet, CEIS bit indicates and 
error with clock configuration.
If that happens, it is likely the same error will occur each time this 
function will be called.

> +			break;
> +
> +		/* No data ready... */
> +		if (!sr)
> +			break;
Maybe you could perform this check before the error detection, as if 
!sr, the HW error condition will be always false.
> +
> +		*(u32 *)data = readl(priv->base + RNG_DR);
> +
> +		retval += sizeof(u32);
> +		data += sizeof(u32);
> +		max -= sizeof(u32);
> +	}
> +
> +	/* disable the generator */
> +	writel(cr, priv->base + RNG_CR);
> +	clk_disable(priv->clk);
> +
> +	return retval || !wait ? retval : -EIO;
> +}

Couldn't we use "_relaxed" versions of readl/writel?
I might save some not needed barriers.

> +static int stm32_rng_probe(struct platform_device *ofdev)
> +{
> +	struct device *dev = &ofdev->dev;
> +	struct device_node *np = ofdev->dev.of_node;
> +	struct stm32_rng_private *priv;
> +	struct resource res;
> +	int err;
> +
> +	priv = devm_kzalloc(dev, sizeof(struct stm32_rng_private), GFP_KERNEL);
> +	if (!priv)
> +		return -ENOMEM;
> +
> +	err = of_address_to_resource(np, 0, &res);
> +	if (err)
> +		return err;
> +
> +	priv->base = devm_ioremap_resource(dev, &res);
> +	if (IS_ERR(priv->base))
> +		return PTR_ERR(priv->base);
> +
> +	priv->clk = devm_clk_get(&ofdev->dev, NULL);
> +	if (IS_ERR(priv->clk))
> +		return PTR_ERR(priv->clk);
> +
> +	dev_set_drvdata(dev, priv);
> +
> +	priv->rng.name = dev_driver_string(dev),
> +	priv->rng.init = stm32_rng_init,
> +	priv->rng.cleanup = stm32_rng_cleanup,
> +	priv->rng.read = stm32_rng_read,
> +	priv->rng.priv = (unsigned long) dev;
> +
> +	err = hwrng_register(&priv->rng);
> +	if (err)
> +		return err;
> +
> +	return 0;
As detected with Coccinelle by Fengguang build system, you could simplify:
return hwrng_register(&priv->rng);

Regards,
Maxime
Linus Walleij Oct. 4, 2015, 10:32 a.m. UTC | #3
On Sat, Oct 3, 2015 at 10:35 PM, Daniel Thompson
<daniel.thompson@linaro.org> wrote:

> Add support for STMicroelectronics STM32 random number generator.
>
> The config value defaults to N, reflecting the fact that STM32 is a
> very low resource microcontroller platform and unlikely to be targeted
> by any "grown up" defconfigs.
>
> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>

Incidentally both you and Lee Jones submitted similar RNG drivers from
the same company (ST) recent weeks:

Lee's ST thing:
http://marc.info/?l=linux-crypto-vger&m=144249760524971&w=2

Daniel's ST thing:
http://marc.info/?l=linux-crypto-vger&m=144390463810134&w=2

And we also have from old ST:
drivers/char/hw_random/nomadik-rng.c

1. Are these similar IPs that could be unified in a driver, just different
offsets in memory?

2. Could you at least cross-review each others drivers because both
tight loops to read bytes seem to contain similar semantics.

3. I took out the datasheet for Nomadik STn8820 and it seems that
the hardware is very similar to what this driver is trying to drive.
CR, SR and DR are in the same place, can you check if you also even
have the PrimeCell magic in the words at offset 0xfe0 thru 0xffc?

In any case it seems likely that this driver should supercede and replace
drivers/char/hw_random/nomadik-rng.c
still using the PrimeCell bus, and if it doesn't have the PrimeCell
IDs in hardware, this can be specified in the device tree using
arm,primecell-periphid = <0x000805e1>; in the node if need be.
The in-tree driver is dangerously simple and assume too much IMO.

Now the rest:

> +       /* enable random number generation */
> +       clk_enable(priv->clk);
> +       cr = readl(priv->base + RNG_CR);
> +       writel(cr | RNG_CR_RNGEN, priv->base + RNG_CR);

The Nomadik datasheet says this works the inverse way: setting
bit 2 to 1 *disables* the RNG. Can you double-check this?

The Nomadik version also has a TSTCLK bit 3, that should
always be set and loop-back checked in SR bit 1 to see that the block
is properly clocked. (Read data will hang on an AHB stall if it's
unclocked) Is this missing from STM32?

> + sr = readl(priv->base + RNG_SR);

I strongly recommend that you use readl_relaxed() for this and
all other reads on the hotpath, the Nomadik uses __raw_readl()
but that is just for the same reasons that readl_relaxed() should
be used: we just wanna read, not clean buffers etc.

> + /* Has h/ware error dection been triggered? */
> + if (WARN_ON(sr & (RNG_SR_SEIS | RNG_SR_CEIS)))
> +     break;
> +
> + /* No data ready... */
> + if (!sr)
> +     break;

This assumes that only bits 6,5 and 0 are ever used in this hardware.
Are you sure? On Nomadik DRDY bit 0 is the same, bit 1
is the clock test bit mentioned above, bit 2 is FAULT set if an invalid
bit sequence is detected and should definately be checked if
the HW supports it. Please mask explicitly for DRDY at least.

The bit layout gives at hand that this is indeed a derived IP block,
I wonder what bits 3 & 4 may be used for in some or all
revisions?

Then this construct:

> +static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
> +{
(...)
> + /* enable random number generation */
> + clk_enable(priv->clk);
> + cr = readl(priv->base + RNG_CR);
> + writel(cr | RNG_CR_RNGEN, priv->base + RNG_CR);
(...)
> + /* disable the generator */
> + writel(cr, priv->base + RNG_CR);
> + clk_disable(priv->clk);

This is in the hotpath where megabytes of data may be read out
by consecutive reads. I think it's wise to move the clock enable/disable
and also the write to cr to enable/disable the block to runtime PM
hooks, so that you can e.g. set some hysteresis around the disablement
with pm_runtime_set_autosuspend_delay(&dev->dev, 50);
pm_runtime_use_autosuspend(&dev->dev);pm_runtime_put_autosuspend().
For a primecell check the usage in drivers/mmc/host/mmci.c
for a complex usecase or e.g. drivers/hwtracing/coresight/coresight-tpiu.c
for a simpler usecase.

For the performance hints I guess you can even test whether
what I'm saying makes sense or not by reading some megabytes
of random and timing it.

Yours,
Linus Walleij
Daniel Thompson Oct. 5, 2015, 9:22 a.m. UTC | #4
On 4 October 2015 at 11:32, Linus Walleij <linus.walleij@linaro.org> wrote:
> On Sat, Oct 3, 2015 at 10:35 PM, Daniel Thompson
> <daniel.thompson@linaro.org> wrote:
>
>> Add support for STMicroelectronics STM32 random number generator.
>>
>> The config value defaults to N, reflecting the fact that STM32 is a
>> very low resource microcontroller platform and unlikely to be targeted
>> by any "grown up" defconfigs.
>>
>> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
>
> Incidentally both you and Lee Jones submitted similar RNG drivers from
> the same company (ST) recent weeks:
>
> Lee's ST thing:
> http://marc.info/?l=linux-crypto-vger&m=144249760524971&w=2
>
> Daniel's ST thing:
> http://marc.info/?l=linux-crypto-vger&m=144390463810134&w=2
>
> And we also have from old ST:
> drivers/char/hw_random/nomadik-rng.c
>
> 1. Are these similar IPs that could be unified in a driver, just different
> offsets in memory?

I don't think so. I can't see any commonality of register structure or
data transfer approach (neither PIO nor DMA).

There have been exceptions but, in general, STM32 microcontrollers
seem to have very little in common with ST SoC/ASIC devices except for
the manufacturers id.


> 2. Could you at least cross-review each others drivers because both
> tight loops to read bytes seem to contain similar semantics.

I'll take a look although, as above, I suspect any similarities in
structure will be based on the problem domain rather than shared
heritage in the hardware design.


> 3. I took out the datasheet for Nomadik STn8820 and it seems that
> the hardware is very similar to what this driver is trying to drive.
> CR, SR and DR are in the same place, can you check if you also even
> have the PrimeCell magic in the words at offset 0xfe0 thru 0xffc?

The register window for the STM32 RNG is only 0x400 (and I'll fix the
DT for v2 ;-) ) so the STM32 version isn't primecell-like.


> In any case it seems likely that this driver should supercede and replace
> drivers/char/hw_random/nomadik-rng.c
> still using the PrimeCell bus, and if it doesn't have the PrimeCell
> IDs in hardware, this can be specified in the device tree using
> arm,primecell-periphid = <0x000805e1>; in the node if need be.
> The in-tree driver is dangerously simple and assume too much IMO.

Not sure about this, but I'll take a closer look. There is a certain
family resemblance in the register set but there are significant
differences in data transfer between the Nomadik and STM32 hardware.


>
> Now the rest:
>
>> +       /* enable random number generation */
>> +       clk_enable(priv->clk);
>> +       cr = readl(priv->base + RNG_CR);
>> +       writel(cr | RNG_CR_RNGEN, priv->base + RNG_CR);
>
> The Nomadik datasheet says this works the inverse way: setting
> bit 2 to 1 *disables* the RNG. Can you double-check this?

Nomadik driver doesn't seem to have to set anything to enable the RNG,
so assuming the CR is resets to zero on nomadik I guess the bit
meaning is inverted on the two platforms.


> The Nomadik version also has a TSTCLK bit 3, that should
> always be set and loop-back checked in SR bit 1 to see that the block
> is properly clocked. (Read data will hang on an AHB stall if it's
> unclocked) Is this missing from STM32?

Bit 3 is reserved on STM32. I don't think the STM32 has logic to hang
the bus if the cell doesn't have data ready (it's not mentioned in the
datasheet but I've not verified what happens if we read when things
are not ready).


>> + sr = readl(priv->base + RNG_SR);
>
> I strongly recommend that you use readl_relaxed() for this and
> all other reads on the hotpath, the Nomadik uses __raw_readl()
> but that is just for the same reasons that readl_relaxed() should
> be used: we just wanna read, not clean buffers etc.

I'll fix this. Choosing writel/readl was deliberate (STM32 is a niche
platform so I *really* wanted COMPILE_TEST to work) but this reasons
is no longer relevant.


>> + /* Has h/ware error dection been triggered? */
>> + if (WARN_ON(sr & (RNG_SR_SEIS | RNG_SR_CEIS)))
>> +     break;
>> +
>> + /* No data ready... */
>> + if (!sr)
>> +     break;
>
> This assumes that only bits 6,5 and 0 are ever used in this hardware.
> Are you sure? On Nomadik DRDY bit 0 is the same, bit 1
> is the clock test bit mentioned above, bit 2 is FAULT set if an invalid
> bit sequence is detected and should definately be checked if
> the HW supports it. Please mask explicitly for DRDY at least.

There are also bits 1 and 2 but these are non-sticky versions of bits
5 and 6 so it should work... but I agree its not very conservative.

I plan to combine both branches into a simple "if (sr != RNG_SR_DRDY)"
since any other value indicates an error condition that we should warn
about anyway.

> The bit layout gives at hand that this is indeed a derived IP block,
> I wonder what bits 3 & 4 may be used for in some or all
> revisions?
>
> Then this construct:
>
>> +static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
>> +{
> (...)
>> + /* enable random number generation */
>> + clk_enable(priv->clk);
>> + cr = readl(priv->base + RNG_CR);
>> + writel(cr | RNG_CR_RNGEN, priv->base + RNG_CR);
> (...)
>> + /* disable the generator */
>> + writel(cr, priv->base + RNG_CR);
>> + clk_disable(priv->clk);
>
> This is in the hotpath where megabytes of data may be read out
> by consecutive reads. I think it's wise to move the clock enable/disable
> and also the write to cr to enable/disable the block to runtime PM
> hooks, so that you can e.g. set some hysteresis around the disablement
> with pm_runtime_set_autosuspend_delay(&dev->dev, 50);
> pm_runtime_use_autosuspend(&dev->dev);pm_runtime_put_autosuspend().
> For a primecell check the usage in drivers/mmc/host/mmci.c
> for a complex usecase or e.g. drivers/hwtracing/coresight/coresight-tpiu.c
> for a simpler usecase.
>
> For the performance hints I guess you can even test whether
> what I'm saying makes sense or not by reading some megabytes
> of random and timing it.

I'll have to benchmark this. clk_enable/disable have pretty simple
implementations on STM32 so there might not be much in it.

Insisting on minimal power (which I think it important in
microcontroller platforms) does hurt bandwidth more than you might
expect because the framework does not pass big userspace reads to the
underlying driver. This means the costs of clock control are spread
over 32 bytes rather than over the whole of a big read.


Daniel.
Daniel Thompson Oct. 5, 2015, 9:26 a.m. UTC | #5
On 4 October 2015 at 09:52, Maxime Coquelin <mcoquelin.stm32@gmail.com> wrote:
> Hi Daniel,
>
> On 10/03/2015 10:35 PM, Daniel Thompson wrote:
>>
>> Add support for STMicroelectronics STM32 random number generator.
>>
>> The config value defaults to N, reflecting the fact that STM32 is a
>> very low resource microcontroller platform and unlikely to be targeted
>> by any "grown up" defconfigs.
>>
>> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
>> ---
>>   drivers/char/hw_random/Kconfig     |  12 +++
>>   drivers/char/hw_random/Makefile    |   1 +
>>   drivers/char/hw_random/stm32-rng.c | 192
>> +++++++++++++++++++++++++++++++++++++
>>   3 files changed, 205 insertions(+)
>>   create mode 100644 drivers/char/hw_random/stm32-rng.c
>
> <snip>
>>
>> diff --git a/drivers/char/hw_random/stm32-rng.c
>> b/drivers/char/hw_random/stm32-rng.c
>> new file mode 100644
>> index 000000000000..37dfa5fca105
>> --- /dev/null
>> +++ b/drivers/char/hw_random/stm32-rng.c
>> @@ -0,0 +1,192 @@
>
> <snip>
>
>> +#include <linux/clk.h>
>> +#include <linux/delay.h>
>> +#include <linux/hw_random.h>
>> +#include <linux/io.h>
>> +#include <linux/kernel.h>
>> +#include <linux/module.h>
>> +#include <linux/of_address.h>
>> +#include <linux/of_platform.h>
>> +#include <linux/slab.h>
>> +
>> +#define RNG_CR 0x00
>> +#define RNG_CR_RNGEN BIT(2)
>> +
>> +#define RNG_SR 0x04
>> +#define RNG_SR_SEIS BIT(6)
>> +#define RNG_SR_CEIS BIT(5)
>> +#define RNG_SR_DRDY BIT(0)
>> +
>> +#define RNG_DR 0x08
>> +
>> +/*
>> + * It takes 40 cycles @ 48MHz to generate each random number (e.g. <1us).
>> + * At the time of writing STM32 parts max out at ~200MHz meaning a
>> timeout
>> + * of 500 leaves us a very comfortable margin for error. The loop to
>> which
>> + * the timeout applies takes at least 4 instructions per cycle so the
>> + * timeout is enough to take us up to multi-GHz parts!
>> + */
>> +#define RNG_TIMEOUT 500
>> +
>> +struct stm32_rng_private {
>> +       struct hwrng rng;
>> +       void __iomem *base;
>> +       struct clk *clk;
>> +};
>> +
>> +static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool
>> wait)
>> +{
>> +       struct stm32_rng_private *priv =
>> +           container_of(rng, struct stm32_rng_private, rng);
>> +       u32 cr, sr;
>> +       int retval = 0;
>> +
>> +       /* enable random number generation */
>> +       clk_enable(priv->clk);
>> +       cr = readl(priv->base + RNG_CR);
>> +       writel(cr | RNG_CR_RNGEN, priv->base + RNG_CR);
>> +
>> +       while (max > sizeof(u32)) {
>> +               sr = readl(priv->base + RNG_SR);
>> +               if (!sr && wait) {
>> +                       unsigned int timeout = RNG_TIMEOUT;
>> +
>> +                       do {
>> +                               cpu_relax();
>> +                               sr = readl(priv->base + RNG_SR);
>> +                       } while (!sr && --timeout);
>> +               }
>> +
>> +               /* Has h/ware error dection been triggered? */
>> +               if (WARN_ON(sr & (RNG_SR_SEIS | RNG_SR_CEIS)))
>
> Maybe you should instead use WARN_ONCE?
> Because from what I understand in the datasheet, CEIS bit indicates and
> error with clock configuration.
> If that happens, it is likely the same error will occur each time this
> function will be called.

Ok.


>> +                       break;
>> +
>> +               /* No data ready... */
>> +               if (!sr)
>> +                       break;
>
> Maybe you could perform this check before the error detection, as if !sr,
> the HW error condition will be always false.

I plan to combine both checks since timing out is still a serious
error even if the h/ware didn't detect it.

>>
>> +
>> +               *(u32 *)data = readl(priv->base + RNG_DR);
>> +
>> +               retval += sizeof(u32);
>> +               data += sizeof(u32);
>> +               max -= sizeof(u32);
>> +       }
>> +
>> +       /* disable the generator */
>> +       writel(cr, priv->base + RNG_CR);
>> +       clk_disable(priv->clk);
>> +
>> +       return retval || !wait ? retval : -EIO;
>> +}
>
>
> Couldn't we use "_relaxed" versions of readl/writel?
> I might save some not needed barriers.

Will fix. I had a "historic" reason to avoid _relaxed but this is no
longer relevant.


>> +static int stm32_rng_probe(struct platform_device *ofdev)
>> +{
>> +       struct device *dev = &ofdev->dev;
>> +       struct device_node *np = ofdev->dev.of_node;
>> +       struct stm32_rng_private *priv;
>> +       struct resource res;
>> +       int err;
>> +
>> +       priv = devm_kzalloc(dev, sizeof(struct stm32_rng_private),
>> GFP_KERNEL);
>> +       if (!priv)
>> +               return -ENOMEM;
>> +
>> +       err = of_address_to_resource(np, 0, &res);
>> +       if (err)
>> +               return err;
>> +
>> +       priv->base = devm_ioremap_resource(dev, &res);
>> +       if (IS_ERR(priv->base))
>> +               return PTR_ERR(priv->base);
>> +
>> +       priv->clk = devm_clk_get(&ofdev->dev, NULL);
>> +       if (IS_ERR(priv->clk))
>> +               return PTR_ERR(priv->clk);
>> +
>> +       dev_set_drvdata(dev, priv);
>> +
>> +       priv->rng.name = dev_driver_string(dev),
>> +       priv->rng.init = stm32_rng_init,
>> +       priv->rng.cleanup = stm32_rng_cleanup,
>> +       priv->rng.read = stm32_rng_read,
>> +       priv->rng.priv = (unsigned long) dev;
>> +
>> +       err = hwrng_register(&priv->rng);
>> +       if (err)
>> +               return err;
>> +
>> +       return 0;
>
> As detected with Coccinelle by Fengguang build system, you could simplify:
> return hwrng_register(&priv->rng);

Will do.


Thanks

Daniel.
Daniel Thompson Oct. 11, 2015, 7:15 p.m. UTC | #6
On 5 October 2015 at 10:22, Daniel Thompson <daniel.thompson@linaro.org> wrote:
> On 4 October 2015 at 11:32, Linus Walleij <linus.walleij@linaro.org> wrote:
>> On Sat, Oct 3, 2015 at 10:35 PM, Daniel Thompson
>> 3. I took out the datasheet for Nomadik STn8820 and it seems that
>> the hardware is very similar to what this driver is trying to drive.
>> CR, SR and DR are in the same place, can you check if you also even
>> have the PrimeCell magic in the words at offset 0xfe0 thru 0xffc?
>
> The register window for the STM32 RNG is only 0x400 (and I'll fix the
> DT for v2 ;-) ) so the STM32 version isn't primecell-like.
>
>
>> In any case it seems likely that this driver should supercede and replace
>> drivers/char/hw_random/nomadik-rng.c
>> still using the PrimeCell bus, and if it doesn't have the PrimeCell
>> IDs in hardware, this can be specified in the device tree using
>> arm,primecell-periphid = <0x000805e1>; in the node if need be.
>> The in-tree driver is dangerously simple and assume too much IMO.
>
> Not sure about this, but I'll take a closer look. There is a certain
> family resemblance in the register set but there are significant
> differences in data transfer between the Nomadik and STM32 hardware.

Having looked at this I don't really think we would gain very much
from combining these drivers. The cells have different (albeit
slightly similar) register layouts, different ways to transfer data
(16- versus 32-bit) and different ways to report test/report incorrect
clocking. In the case of the RNG the differences therefore span the
entire of its feature set.

I think combining them just results in two drivers that happen to
share a file and I dislike having to add "fake" information to the
devicetree to make it hang together.

Is that convincing for you?


Daniel.
Daniel Thompson Oct. 11, 2015, 7:24 p.m. UTC | #7
On 5 October 2015 at 10:22, Daniel Thompson <daniel.thompson@linaro.org> wrote:
> On 4 October 2015 at 11:32, Linus Walleij <linus.walleij@linaro.org> wrote:
>> On Sat, Oct 3, 2015 at 10:35 PM, Daniel Thompson
>> <daniel.thompson@linaro.org> wrote:
>> Then this construct:
>>
>>> +static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
>>> +{
>> (...)
>>> + /* enable random number generation */
>>> + clk_enable(priv->clk);
>>> + cr = readl(priv->base + RNG_CR);
>>> + writel(cr | RNG_CR_RNGEN, priv->base + RNG_CR);
>> (...)
>>> + /* disable the generator */
>>> + writel(cr, priv->base + RNG_CR);
>>> + clk_disable(priv->clk);
>>
>> This is in the hotpath where megabytes of data may be read out
>> by consecutive reads. I think it's wise to move the clock enable/disable
>> and also the write to cr to enable/disable the block to runtime PM
>> hooks, so that you can e.g. set some hysteresis around the disablement
>> with pm_runtime_set_autosuspend_delay(&dev->dev, 50);
>> pm_runtime_use_autosuspend(&dev->dev);pm_runtime_put_autosuspend().
>> For a primecell check the usage in drivers/mmc/host/mmci.c
>> for a complex usecase or e.g. drivers/hwtracing/coresight/coresight-tpiu.c
>> for a simpler usecase.
>>
>> For the performance hints I guess you can even test whether
>> what I'm saying makes sense or not by reading some megabytes
>> of random and timing it.
>
> I'll have to benchmark this. clk_enable/disable have pretty simple
> implementations on STM32 so there might not be much in it.

Well... I was extremely wrong about that!

Switching the driver over to autosuspend yielded a very significant
performance boost: ~1.1MiB/s to ~1.5MiB/s .

Very pleased with that!


Daniel.
diff mbox

Patch

diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index f48cf11c655e..7930cc9b719c 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -359,6 +359,18 @@  config HW_RANDOM_XGENE
 
 	  If unsure, say Y.
 
+config HW_RANDOM_STM32
+	tristate "STMicroelectronics STM32 random number generator"
+	depends on HW_RANDOM && (ARCH_STM32 || COMPILE_TEST)
+	help
+	  This driver provides kernel-side support for the Random Number
+	  Generator hardware found on STM32 microcontrollers.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called stm32-rng.
+
+	  If unsure, say N.
+
 endif # HW_RANDOM
 
 config UML_RANDOM
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 055bb01510ad..8b49c0f7abb1 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -31,3 +31,4 @@  obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o
 obj-$(CONFIG_HW_RANDOM_IPROC_RNG200) += iproc-rng200.o
 obj-$(CONFIG_HW_RANDOM_MSM) += msm-rng.o
 obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o
+obj-$(CONFIG_HW_RANDOM_STM32) += stm32-rng.o
diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c
new file mode 100644
index 000000000000..37dfa5fca105
--- /dev/null
+++ b/drivers/char/hw_random/stm32-rng.c
@@ -0,0 +1,192 @@ 
+/*
+ * Copyright (c) 2015, Daniel Thompson
+ *
+ * This file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+
+#define RNG_CR 0x00
+#define RNG_CR_RNGEN BIT(2)
+
+#define RNG_SR 0x04
+#define RNG_SR_SEIS BIT(6)
+#define RNG_SR_CEIS BIT(5)
+#define RNG_SR_DRDY BIT(0)
+
+#define RNG_DR 0x08
+
+/*
+ * It takes 40 cycles @ 48MHz to generate each random number (e.g. <1us).
+ * At the time of writing STM32 parts max out at ~200MHz meaning a timeout
+ * of 500 leaves us a very comfortable margin for error. The loop to which
+ * the timeout applies takes at least 4 instructions per cycle so the
+ * timeout is enough to take us up to multi-GHz parts!
+ */
+#define RNG_TIMEOUT 500
+
+struct stm32_rng_private {
+	struct hwrng rng;
+	void __iomem *base;
+	struct clk *clk;
+};
+
+static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	struct stm32_rng_private *priv =
+	    container_of(rng, struct stm32_rng_private, rng);
+	u32 cr, sr;
+	int retval = 0;
+
+	/* enable random number generation */
+	clk_enable(priv->clk);
+	cr = readl(priv->base + RNG_CR);
+	writel(cr | RNG_CR_RNGEN, priv->base + RNG_CR);
+
+	while (max > sizeof(u32)) {
+		sr = readl(priv->base + RNG_SR);
+		if (!sr && wait) {
+			unsigned int timeout = RNG_TIMEOUT;
+
+			do {
+				cpu_relax();
+				sr = readl(priv->base + RNG_SR);
+			} while (!sr && --timeout);
+		}
+
+		/* Has h/ware error dection been triggered? */
+		if (WARN_ON(sr & (RNG_SR_SEIS | RNG_SR_CEIS)))
+			break;
+
+		/* No data ready... */
+		if (!sr)
+			break;
+
+		*(u32 *)data = readl(priv->base + RNG_DR);
+
+		retval += sizeof(u32);
+		data += sizeof(u32);
+		max -= sizeof(u32);
+	}
+
+	/* disable the generator */
+	writel(cr, priv->base + RNG_CR);
+	clk_disable(priv->clk);
+
+	return retval || !wait ? retval : -EIO;
+}
+
+static int stm32_rng_init(struct hwrng *rng)
+{
+	struct stm32_rng_private *priv =
+	    container_of(rng, struct stm32_rng_private, rng);
+	int err;
+	u32 sr;
+
+	err = clk_prepare(priv->clk);
+	if (err)
+		return err;
+
+	/* clear error indicators */
+	sr = readl(priv->base + RNG_SR);
+	sr &= ~(RNG_SR_SEIS | RNG_SR_CEIS);
+	writel(sr, priv->base + RNG_SR);
+
+	return 0;
+}
+
+static void stm32_rng_cleanup(struct hwrng *rng)
+{
+	struct stm32_rng_private *priv =
+	    container_of(rng, struct stm32_rng_private, rng);
+
+	clk_unprepare(priv->clk);
+}
+
+static int stm32_rng_remove(struct platform_device *ofdev)
+{
+	struct device *dev = &ofdev->dev;
+	struct stm32_rng_private *priv = dev_get_drvdata(dev);
+
+	hwrng_unregister(&priv->rng);
+
+	return 0;
+}
+
+static int stm32_rng_probe(struct platform_device *ofdev)
+{
+	struct device *dev = &ofdev->dev;
+	struct device_node *np = ofdev->dev.of_node;
+	struct stm32_rng_private *priv;
+	struct resource res;
+	int err;
+
+	priv = devm_kzalloc(dev, sizeof(struct stm32_rng_private), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	err = of_address_to_resource(np, 0, &res);
+	if (err)
+		return err;
+
+	priv->base = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	priv->clk = devm_clk_get(&ofdev->dev, NULL);
+	if (IS_ERR(priv->clk))
+		return PTR_ERR(priv->clk);
+
+	dev_set_drvdata(dev, priv);
+
+	priv->rng.name = dev_driver_string(dev),
+	priv->rng.init = stm32_rng_init,
+	priv->rng.cleanup = stm32_rng_cleanup,
+	priv->rng.read = stm32_rng_read,
+	priv->rng.priv = (unsigned long) dev;
+
+	err = hwrng_register(&priv->rng);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static const struct of_device_id stm32_rng_match[] = {
+	{
+		.compatible = "st,stm32-rng",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, stm32_rng_match);
+
+static struct platform_driver stm32_rng_driver = {
+	.driver = {
+		.name = "stm32_rng",
+		.of_match_table = stm32_rng_match,
+	},
+	.probe = stm32_rng_probe,
+	.remove = stm32_rng_remove,
+};
+
+module_platform_driver(stm32_rng_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Thompson <daniel.thompson@linaro.org>");
+MODULE_DESCRIPTION("STMicroelectronics STM32 RNG device driver");