Message ID | 20230214201955.7461-2-mario.limonciello@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Avoid triggering an fTPM bug from kernel | expand |
On 14.02.23 21:19, Mario Limonciello wrote: > AMD has issued an advisory indicating that having fTPM enabled in > BIOS can cause "stuttering" in the OS. This issue has been fixed > in newer versions of the fTPM firmware, but it's up to system > designers to decide whether to distribute it. > > This issue has existed for a while, but is more prevalent starting > with kernel 6.1 because commit b006c439d58db ("hwrng: core - start > hwrng kthread also for untrusted sources") started to use the fTPM > for hwrng by default. However, all uses of /dev/hwrng result in > unacceptable stuttering. > > So, simply disable registration of the defective hwrng when detecting > these faulty fTPM versions. Hmm, no reply since Mario posted this. Jarkko, James, what's your stance on this? Does the patch look fine from your point of view? And does the situation justify merging this on the last minute for 6.2? Or should we merge it early for 6.3 and then backport to stable? Ciao, Thorsten > Link: https://www.amd.com/en/support/kb/faq/pa-410 > Link: https://bugzilla.kernel.org/show_bug.cgi?id=216989 > Link: https://lore.kernel.org/all/20230209153120.261904-1-Jason@zx2c4.com/ > Fixes: b006c439d58d ("hwrng: core - start hwrng kthread also for untrusted sources") > Cc: stable@vger.kernel.org > Cc: Jarkko Sakkinen <jarkko@kernel.org> > Cc: Thorsten Leemhuis <regressions@leemhuis.info> > Cc: James Bottomley <James.Bottomley@hansenpartnership.com> > Co-developed-by: Jason A. Donenfeld <Jason@zx2c4.com> > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> > Signed-off-by: Mario Limonciello <mario.limonciello@amd.com> > --- > drivers/char/tpm/tpm-chip.c | 62 ++++++++++++++++++++++++++++++- > drivers/char/tpm/tpm.h | 73 +++++++++++++++++++++++++++++++++++++ > 2 files changed, 134 insertions(+), 1 deletion(-) > > diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c > index 741d8f3e8fb3a..348dd5705fbb6 100644 > --- a/drivers/char/tpm/tpm-chip.c > +++ b/drivers/char/tpm/tpm-chip.c > @@ -512,6 +512,65 @@ static int tpm_add_legacy_sysfs(struct tpm_chip *chip) > return 0; > } > > +static bool tpm_is_rng_defective(struct tpm_chip *chip) > +{ > + int ret; > + u64 version; > + u32 val1, val2; > + > + /* No known-broken TPM1 chips. */ > + if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) > + return false; > + > + ret = tpm_request_locality(chip); > + if (ret) > + return false; > + > + /* Some AMD fTPM versions may cause stutter */ > + ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val1, NULL); > + if (ret) > + goto release; > + if (val1 != 0x414D4400U /* AMD */) { > + ret = -ENODEV; > + goto release; > + } > + ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_1, &val1, NULL); > + if (ret) > + goto release; > + ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_2, &val2, NULL); > + if (ret) > + goto release; > + > +release: > + tpm_relinquish_locality(chip); > + > + if (ret) > + return false; > + > + version = ((u64)val1 << 32) | val2; > + /* > + * Fixes for stutter as described in > + * https://www.amd.com/en/support/kb/faq/pa-410 > + * are available in two series of fTPM firmware: > + * 6.x.y.z series: 6.0.18.6 + > + * 3.x.y.z series: 3.57.x.5 + > + */ > + if ((version >> 48) == 6) { > + if (version >= 0x0006000000180006ULL) > + return false; > + } else if ((version >> 48) == 3) { > + if (version >= 0x0003005700000005ULL) > + return false; > + } else { > + return false; > + } > + dev_warn(&chip->dev, > + "AMD fTPM version 0x%llx causes system stutter; hwrng disabled\n", > + version); > + > + return true; > +} > + > static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait) > { > struct tpm_chip *chip = container_of(rng, struct tpm_chip, hwrng); > @@ -521,7 +580,8 @@ static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait) > > static int tpm_add_hwrng(struct tpm_chip *chip) > { > - if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM) || tpm_is_firmware_upgrade(chip)) > + if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM) || tpm_is_firmware_upgrade(chip) || > + tpm_is_rng_defective(chip)) > return 0; > > snprintf(chip->hwrng_name, sizeof(chip->hwrng_name), > diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h > index 24ee4e1cc452a..830014a266090 100644 > --- a/drivers/char/tpm/tpm.h > +++ b/drivers/char/tpm/tpm.h > @@ -150,6 +150,79 @@ enum tpm_sub_capabilities { > TPM_CAP_PROP_TIS_DURATION = 0x120, > }; > > +enum tpm2_pt_props { > + TPM2_PT_NONE = 0x00000000, > + TPM2_PT_GROUP = 0x00000100, > + TPM2_PT_FIXED = TPM2_PT_GROUP * 1, > + TPM2_PT_FAMILY_INDICATOR = TPM2_PT_FIXED + 0, > + TPM2_PT_LEVEL = TPM2_PT_FIXED + 1, > + TPM2_PT_REVISION = TPM2_PT_FIXED + 2, > + TPM2_PT_DAY_OF_YEAR = TPM2_PT_FIXED + 3, > + TPM2_PT_YEAR = TPM2_PT_FIXED + 4, > + TPM2_PT_MANUFACTURER = TPM2_PT_FIXED + 5, > + TPM2_PT_VENDOR_STRING_1 = TPM2_PT_FIXED + 6, > + TPM2_PT_VENDOR_STRING_2 = TPM2_PT_FIXED + 7, > + TPM2_PT_VENDOR_STRING_3 = TPM2_PT_FIXED + 8, > + TPM2_PT_VENDOR_STRING_4 = TPM2_PT_FIXED + 9, > + TPM2_PT_VENDOR_TPM_TYPE = TPM2_PT_FIXED + 10, > + TPM2_PT_FIRMWARE_VERSION_1 = TPM2_PT_FIXED + 11, > + TPM2_PT_FIRMWARE_VERSION_2 = TPM2_PT_FIXED + 12, > + TPM2_PT_INPUT_BUFFER = TPM2_PT_FIXED + 13, > + TPM2_PT_HR_TRANSIENT_MIN = TPM2_PT_FIXED + 14, > + TPM2_PT_HR_PERSISTENT_MIN = TPM2_PT_FIXED + 15, > + TPM2_PT_HR_LOADED_MIN = TPM2_PT_FIXED + 16, > + TPM2_PT_ACTIVE_SESSIONS_MAX = TPM2_PT_FIXED + 17, > + TPM2_PT_PCR_COUNT = TPM2_PT_FIXED + 18, > + TPM2_PT_PCR_SELECT_MIN = TPM2_PT_FIXED + 19, > + TPM2_PT_CONTEXT_GAP_MAX = TPM2_PT_FIXED + 20, > + TPM2_PT_NV_COUNTERS_MAX = TPM2_PT_FIXED + 22, > + TPM2_PT_NV_INDEX_MAX = TPM2_PT_FIXED + 23, > + TPM2_PT_MEMORY = TPM2_PT_FIXED + 24, > + TPM2_PT_CLOCK_UPDATE = TPM2_PT_FIXED + 25, > + TPM2_PT_CONTEXT_HASH = TPM2_PT_FIXED + 26, > + TPM2_PT_CONTEXT_SYM = TPM2_PT_FIXED + 27, > + TPM2_PT_CONTEXT_SYM_SIZE = TPM2_PT_FIXED + 28, > + TPM2_PT_ORDERLY_COUNT = TPM2_PT_FIXED + 29, > + TPM2_PT_MAX_COMMAND_SIZE = TPM2_PT_FIXED + 30, > + TPM2_PT_MAX_RESPONSE_SIZE = TPM2_PT_FIXED + 31, > + TPM2_PT_MAX_DIGEST = TPM2_PT_FIXED + 32, > + TPM2_PT_MAX_OBJECT_CONTEXT = TPM2_PT_FIXED + 33, > + TPM2_PT_MAX_SESSION_CONTEXT = TPM2_PT_FIXED + 34, > + TPM2_PT_PS_FAMILY_INDICATOR = TPM2_PT_FIXED + 35, > + TPM2_PT_PS_LEVEL = TPM2_PT_FIXED + 36, > + TPM2_PT_PS_REVISION = TPM2_PT_FIXED + 37, > + TPM2_PT_PS_DAY_OF_YEAR = TPM2_PT_FIXED + 38, > + TPM2_PT_PS_YEAR = TPM2_PT_FIXED + 39, > + TPM2_PT_SPLIT_MAX = TPM2_PT_FIXED + 40, > + TPM2_PT_TOTAL_COMMANDS = TPM2_PT_FIXED + 41, > + TPM2_PT_LIBRARY_COMMANDS = TPM2_PT_FIXED + 42, > + TPM2_PT_VENDOR_COMMANDS = TPM2_PT_FIXED + 43, > + TPM2_PT_NV_BUFFER_MAX = TPM2_PT_FIXED + 44, > + TPM2_PT_MODES = TPM2_PT_FIXED + 45, > + TPM2_PT_MAX_CAP_BUFFER = TPM2_PT_FIXED + 46, > + TPM2_PT_VAR = TPM2_PT_GROUP * 2, > + TPM2_PT_PERMANENT = TPM2_PT_VAR + 0, > + TPM2_PT_STARTUP_CLEAR = TPM2_PT_VAR + 1, > + TPM2_PT_HR_NV_INDEX = TPM2_PT_VAR + 2, > + TPM2_PT_HR_LOADED = TPM2_PT_VAR + 3, > + TPM2_PT_HR_LOADED_AVAIL = TPM2_PT_VAR + 4, > + TPM2_PT_HR_ACTIVE = TPM2_PT_VAR + 5, > + TPM2_PT_HR_ACTIVE_AVAIL = TPM2_PT_VAR + 6, > + TPM2_PT_HR_TRANSIENT_AVAIL = TPM2_PT_VAR + 7, > + TPM2_PT_HR_PERSISTENT = TPM2_PT_VAR + 8, > + TPM2_PT_HR_PERSISTENT_AVAIL = TPM2_PT_VAR + 9, > + TPM2_PT_NV_COUNTERS = TPM2_PT_VAR + 10, > + TPM2_PT_NV_COUNTERS_AVAIL = TPM2_PT_VAR + 11, > + TPM2_PT_ALGORITHM_SET = TPM2_PT_VAR + 12, > + TPM2_PT_LOADED_CURVES = TPM2_PT_VAR + 13, > + TPM2_PT_LOCKOUT_COUNTER = TPM2_PT_VAR + 14, > + TPM2_PT_MAX_AUTH_FAIL = TPM2_PT_VAR + 15, > + TPM2_PT_LOCKOUT_INTERVAL = TPM2_PT_VAR + 16, > + TPM2_PT_LOCKOUT_RECOVERY = TPM2_PT_VAR + 17, > + TPM2_PT_NV_WRITE_RECOVERY = TPM2_PT_VAR + 18, > + TPM2_PT_AUDIT_COUNTER_0 = TPM2_PT_VAR + 19, > + TPM2_PT_AUDIT_COUNTER_1 = TPM2_PT_VAR + 20, > +}; > > /* 128 bytes is an arbitrary cap. This could be as large as TPM_BUFSIZE - 18 > * bytes, but 128 is still a relatively large number of random bytes and
On Tue, Feb 14, 2023 at 02:19:55PM -0600, Mario Limonciello wrote: > AMD has issued an advisory indicating that having fTPM enabled in > BIOS can cause "stuttering" in the OS. This issue has been fixed > in newer versions of the fTPM firmware, but it's up to system > designers to decide whether to distribute it. > > This issue has existed for a while, but is more prevalent starting > with kernel 6.1 because commit b006c439d58db ("hwrng: core - start > hwrng kthread also for untrusted sources") started to use the fTPM > for hwrng by default. However, all uses of /dev/hwrng result in > unacceptable stuttering. > > So, simply disable registration of the defective hwrng when detecting > these faulty fTPM versions. > > Link: https://www.amd.com/en/support/kb/faq/pa-410 > Link: https://bugzilla.kernel.org/show_bug.cgi?id=216989 > Link: https://lore.kernel.org/all/20230209153120.261904-1-Jason@zx2c4.com/ > Fixes: b006c439d58d ("hwrng: core - start hwrng kthread also for untrusted sources") > Cc: stable@vger.kernel.org > Cc: Jarkko Sakkinen <jarkko@kernel.org> > Cc: Thorsten Leemhuis <regressions@leemhuis.info> > Cc: James Bottomley <James.Bottomley@hansenpartnership.com> > Co-developed-by: Jason A. Donenfeld <Jason@zx2c4.com> > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> > Signed-off-by: Mario Limonciello <mario.limonciello@amd.com> > --- > drivers/char/tpm/tpm-chip.c | 62 ++++++++++++++++++++++++++++++- > drivers/char/tpm/tpm.h | 73 +++++++++++++++++++++++++++++++++++++ > 2 files changed, 134 insertions(+), 1 deletion(-) > > diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c > index 741d8f3e8fb3a..348dd5705fbb6 100644 > --- a/drivers/char/tpm/tpm-chip.c > +++ b/drivers/char/tpm/tpm-chip.c > @@ -512,6 +512,65 @@ static int tpm_add_legacy_sysfs(struct tpm_chip *chip) > return 0; > } > > +static bool tpm_is_rng_defective(struct tpm_chip *chip) Perhaps tpm_amd_* ? Also, just a question: is there any legit use for fTPM's, which are not updated? I.e. why would want tpm_crb to initialize with a dysfunctional firmware? I.e. the existential question is: is it better to workaround the issue and let pass through, or make the user aware that the firmware would really need an update. > +{ > + int ret; > + u64 version; > + u32 val1, val2; I'd use reverse christmas tree order here. > + > + /* No known-broken TPM1 chips. */ > + if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) > + return false; > + > + ret = tpm_request_locality(chip); > + if (ret) > + return false; > + > + /* Some AMD fTPM versions may cause stutter */ > + ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val1, NULL); > + if (ret) > + goto release; > + if (val1 != 0x414D4400U /* AMD */) { > + ret = -ENODEV; > + goto release; > + } > + ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_1, &val1, NULL); > + if (ret) > + goto release; > + ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_2, &val2, NULL); > + if (ret) > + goto release; > + > +release: > + tpm_relinquish_locality(chip); > + > + if (ret) > + return false; > + > + version = ((u64)val1 << 32) | val2; > + /* > + * Fixes for stutter as described in > + * https://www.amd.com/en/support/kb/faq/pa-410 > + * are available in two series of fTPM firmware: > + * 6.x.y.z series: 6.0.18.6 + > + * 3.x.y.z series: 3.57.x.5 + > + */ > + if ((version >> 48) == 6) { > + if (version >= 0x0006000000180006ULL) > + return false; > + } else if ((version >> 48) == 3) { > + if (version >= 0x0003005700000005ULL) > + return false; > + } else { > + return false; > + } You can drop the curly braces here. > + dev_warn(&chip->dev, > + "AMD fTPM version 0x%llx causes system stutter; hwrng disabled\n", > + version); > + > + return true; > +} > + > static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait) > { > struct tpm_chip *chip = container_of(rng, struct tpm_chip, hwrng); > @@ -521,7 +580,8 @@ static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait) > > static int tpm_add_hwrng(struct tpm_chip *chip) > { > - if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM) || tpm_is_firmware_upgrade(chip)) > + if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM) || tpm_is_firmware_upgrade(chip) || > + tpm_is_rng_defective(chip)) > return 0; > > snprintf(chip->hwrng_name, sizeof(chip->hwrng_name), > diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h > index 24ee4e1cc452a..830014a266090 100644 > --- a/drivers/char/tpm/tpm.h > +++ b/drivers/char/tpm/tpm.h > @@ -150,6 +150,79 @@ enum tpm_sub_capabilities { > TPM_CAP_PROP_TIS_DURATION = 0x120, > }; > > +enum tpm2_pt_props { > + TPM2_PT_NONE = 0x00000000, > + TPM2_PT_GROUP = 0x00000100, > + TPM2_PT_FIXED = TPM2_PT_GROUP * 1, > + TPM2_PT_FAMILY_INDICATOR = TPM2_PT_FIXED + 0, > + TPM2_PT_LEVEL = TPM2_PT_FIXED + 1, > + TPM2_PT_REVISION = TPM2_PT_FIXED + 2, > + TPM2_PT_DAY_OF_YEAR = TPM2_PT_FIXED + 3, > + TPM2_PT_YEAR = TPM2_PT_FIXED + 4, > + TPM2_PT_MANUFACTURER = TPM2_PT_FIXED + 5, > + TPM2_PT_VENDOR_STRING_1 = TPM2_PT_FIXED + 6, > + TPM2_PT_VENDOR_STRING_2 = TPM2_PT_FIXED + 7, > + TPM2_PT_VENDOR_STRING_3 = TPM2_PT_FIXED + 8, > + TPM2_PT_VENDOR_STRING_4 = TPM2_PT_FIXED + 9, > + TPM2_PT_VENDOR_TPM_TYPE = TPM2_PT_FIXED + 10, > + TPM2_PT_FIRMWARE_VERSION_1 = TPM2_PT_FIXED + 11, > + TPM2_PT_FIRMWARE_VERSION_2 = TPM2_PT_FIXED + 12, > + TPM2_PT_INPUT_BUFFER = TPM2_PT_FIXED + 13, > + TPM2_PT_HR_TRANSIENT_MIN = TPM2_PT_FIXED + 14, > + TPM2_PT_HR_PERSISTENT_MIN = TPM2_PT_FIXED + 15, > + TPM2_PT_HR_LOADED_MIN = TPM2_PT_FIXED + 16, > + TPM2_PT_ACTIVE_SESSIONS_MAX = TPM2_PT_FIXED + 17, > + TPM2_PT_PCR_COUNT = TPM2_PT_FIXED + 18, > + TPM2_PT_PCR_SELECT_MIN = TPM2_PT_FIXED + 19, > + TPM2_PT_CONTEXT_GAP_MAX = TPM2_PT_FIXED + 20, > + TPM2_PT_NV_COUNTERS_MAX = TPM2_PT_FIXED + 22, > + TPM2_PT_NV_INDEX_MAX = TPM2_PT_FIXED + 23, > + TPM2_PT_MEMORY = TPM2_PT_FIXED + 24, > + TPM2_PT_CLOCK_UPDATE = TPM2_PT_FIXED + 25, > + TPM2_PT_CONTEXT_HASH = TPM2_PT_FIXED + 26, > + TPM2_PT_CONTEXT_SYM = TPM2_PT_FIXED + 27, > + TPM2_PT_CONTEXT_SYM_SIZE = TPM2_PT_FIXED + 28, > + TPM2_PT_ORDERLY_COUNT = TPM2_PT_FIXED + 29, > + TPM2_PT_MAX_COMMAND_SIZE = TPM2_PT_FIXED + 30, > + TPM2_PT_MAX_RESPONSE_SIZE = TPM2_PT_FIXED + 31, > + TPM2_PT_MAX_DIGEST = TPM2_PT_FIXED + 32, > + TPM2_PT_MAX_OBJECT_CONTEXT = TPM2_PT_FIXED + 33, > + TPM2_PT_MAX_SESSION_CONTEXT = TPM2_PT_FIXED + 34, > + TPM2_PT_PS_FAMILY_INDICATOR = TPM2_PT_FIXED + 35, > + TPM2_PT_PS_LEVEL = TPM2_PT_FIXED + 36, > + TPM2_PT_PS_REVISION = TPM2_PT_FIXED + 37, > + TPM2_PT_PS_DAY_OF_YEAR = TPM2_PT_FIXED + 38, > + TPM2_PT_PS_YEAR = TPM2_PT_FIXED + 39, > + TPM2_PT_SPLIT_MAX = TPM2_PT_FIXED + 40, > + TPM2_PT_TOTAL_COMMANDS = TPM2_PT_FIXED + 41, > + TPM2_PT_LIBRARY_COMMANDS = TPM2_PT_FIXED + 42, > + TPM2_PT_VENDOR_COMMANDS = TPM2_PT_FIXED + 43, > + TPM2_PT_NV_BUFFER_MAX = TPM2_PT_FIXED + 44, > + TPM2_PT_MODES = TPM2_PT_FIXED + 45, > + TPM2_PT_MAX_CAP_BUFFER = TPM2_PT_FIXED + 46, > + TPM2_PT_VAR = TPM2_PT_GROUP * 2, > + TPM2_PT_PERMANENT = TPM2_PT_VAR + 0, > + TPM2_PT_STARTUP_CLEAR = TPM2_PT_VAR + 1, > + TPM2_PT_HR_NV_INDEX = TPM2_PT_VAR + 2, > + TPM2_PT_HR_LOADED = TPM2_PT_VAR + 3, > + TPM2_PT_HR_LOADED_AVAIL = TPM2_PT_VAR + 4, > + TPM2_PT_HR_ACTIVE = TPM2_PT_VAR + 5, > + TPM2_PT_HR_ACTIVE_AVAIL = TPM2_PT_VAR + 6, > + TPM2_PT_HR_TRANSIENT_AVAIL = TPM2_PT_VAR + 7, > + TPM2_PT_HR_PERSISTENT = TPM2_PT_VAR + 8, > + TPM2_PT_HR_PERSISTENT_AVAIL = TPM2_PT_VAR + 9, > + TPM2_PT_NV_COUNTERS = TPM2_PT_VAR + 10, > + TPM2_PT_NV_COUNTERS_AVAIL = TPM2_PT_VAR + 11, > + TPM2_PT_ALGORITHM_SET = TPM2_PT_VAR + 12, > + TPM2_PT_LOADED_CURVES = TPM2_PT_VAR + 13, > + TPM2_PT_LOCKOUT_COUNTER = TPM2_PT_VAR + 14, > + TPM2_PT_MAX_AUTH_FAIL = TPM2_PT_VAR + 15, > + TPM2_PT_LOCKOUT_INTERVAL = TPM2_PT_VAR + 16, > + TPM2_PT_LOCKOUT_RECOVERY = TPM2_PT_VAR + 17, > + TPM2_PT_NV_WRITE_RECOVERY = TPM2_PT_VAR + 18, > + TPM2_PT_AUDIT_COUNTER_0 = TPM2_PT_VAR + 19, > + TPM2_PT_AUDIT_COUNTER_1 = TPM2_PT_VAR + 20, > +}; > > /* 128 bytes is an arbitrary cap. This could be as large as TPM_BUFSIZE - 18 > * bytes, but 128 is still a relatively large number of random bytes and > -- > 2.25.1 > BR, Jarkko
On Fri, Feb 17, 2023 at 04:18:39PM +0100, Thorsten Leemhuis wrote: > On 14.02.23 21:19, Mario Limonciello wrote: > > AMD has issued an advisory indicating that having fTPM enabled in > > BIOS can cause "stuttering" in the OS. This issue has been fixed > > in newer versions of the fTPM firmware, but it's up to system > > designers to decide whether to distribute it. > > > > This issue has existed for a while, but is more prevalent starting > > with kernel 6.1 because commit b006c439d58db ("hwrng: core - start > > hwrng kthread also for untrusted sources") started to use the fTPM > > for hwrng by default. However, all uses of /dev/hwrng result in > > unacceptable stuttering. > > > > So, simply disable registration of the defective hwrng when detecting > > these faulty fTPM versions. > > Hmm, no reply since Mario posted this. > > Jarkko, James, what's your stance on this? Does the patch look fine from > your point of view? And does the situation justify merging this on the > last minute for 6.2? Or should we merge it early for 6.3 and then > backport to stable? > > Ciao, Thorsten As I stated in earlier response: do we want to forbid tpm_crb in this case or do we want to pass-through with a faulty firmware? Not weighting either choice here I just don't see any motivating points in the commit message to pick either, that's all. BR, Jarkko
On 2/17/2023 16:05, Jarkko Sakkinen wrote: > Perhaps tpm_amd_* ? When Jason first proposed this patch I feel the intent was it could cover multiple deficiencies. But as this is the only one for now, sure re-naming it is fine. > > Also, just a question: is there any legit use for fTPM's, which are not > updated? I.e. why would want tpm_crb to initialize with a dysfunctional > firmware?> > I.e. the existential question is: is it better to workaround the issue and > let pass through, or make the user aware that the firmware would really > need an update. > On 2/17/2023 16:35, Jarkko Sakkinen wrote: >> >> Hmm, no reply since Mario posted this. >> >> Jarkko, James, what's your stance on this? Does the patch look fine from >> your point of view? And does the situation justify merging this on the >> last minute for 6.2? Or should we merge it early for 6.3 and then >> backport to stable? >> >> Ciao, Thorsten > > As I stated in earlier response: do we want to forbid tpm_crb in this case > or do we want to pass-through with a faulty firmware? > > Not weighting either choice here I just don't see any motivating points > in the commit message to pick either, that's all. > > BR, Jarkko Even if you're not using RNG functionality you can still do plenty of other things with the TPM. The RNG functionality is what tripped up this issue though. All of these issues were only raised because the kernel started using it by default for RNG and userspace wants random numbers all the time. If the firmware was easily updatable from all the OEMs I would lean on trying to encourage people to update. But alas this has been available for over a year and a sizable number of OEMs haven't distributed a fix. The major issue I see with forbidding tpm_crb is that users may have been using the fTPM for something and taking it away in an update could lead to a no-boot scenario if they're (for example) tying a policy to PCR values and can no longer access those. If the consensus were to go that direction instead I would want to see a module parameter that lets users turn on the fTPM even knowing this problem exists so they could recover. That all seems pretty expensive to me for this problem.
On Fri, Feb 17, 2023 at 08:25:56PM -0600, Limonciello, Mario wrote: > On 2/17/2023 16:05, Jarkko Sakkinen wrote: > > > Perhaps tpm_amd_* ? > > When Jason first proposed this patch I feel the intent was it could cover > multiple deficiencies. > But as this is the only one for now, sure re-naming it is fine. > > > > > Also, just a question: is there any legit use for fTPM's, which are not > > updated? I.e. why would want tpm_crb to initialize with a dysfunctional > > firmware?> > > I.e. the existential question is: is it better to workaround the issue and > > let pass through, or make the user aware that the firmware would really > > need an update. > > > > On 2/17/2023 16:35, Jarkko Sakkinen wrote: > > > > > > Hmm, no reply since Mario posted this. > > > > > > Jarkko, James, what's your stance on this? Does the patch look fine from > > > your point of view? And does the situation justify merging this on the > > > last minute for 6.2? Or should we merge it early for 6.3 and then > > > backport to stable? > > > > > > Ciao, Thorsten > > > > As I stated in earlier response: do we want to forbid tpm_crb in this case > > or do we want to pass-through with a faulty firmware? > > > > Not weighting either choice here I just don't see any motivating points > > in the commit message to pick either, that's all. > > > > BR, Jarkko > > Even if you're not using RNG functionality you can still do plenty of other > things with the TPM. The RNG functionality is what tripped up this issue > though. All of these issues were only raised because the kernel started > using it by default for RNG and userspace wants random numbers all the time. > > If the firmware was easily updatable from all the OEMs I would lean on > trying to encourage people to update. But alas this has been available for > over a year and a sizable number of OEMs haven't distributed a fix. > > The major issue I see with forbidding tpm_crb is that users may have been > using the fTPM for something and taking it away in an update could lead to a > no-boot scenario if they're (for example) tying a policy to PCR values and > can no longer access those. > > If the consensus were to go that direction instead I would want to see a > module parameter that lets users turn on the fTPM even knowing this problem > exists so they could recover. That all seems pretty expensive to me for > this problem. I agree with the last argument. I re-read the commit message and https://www.amd.com/en/support/kb/faq/pa-410. Why this scopes down to only rng? Should TPM2_CC_GET_RANDOM also blocked from /dev/tpm0? BR, Jarkko
[Public] > -----Original Message----- > From: Jarkko Sakkinen <jarkko@kernel.org> > Sent: Tuesday, February 21, 2023 16:53 > To: Limonciello, Mario <Mario.Limonciello@amd.com> > Cc: Thorsten Leemhuis <regressions@leemhuis.info>; James Bottomley > <James.Bottomley@hansenpartnership.com>; Jason@zx2c4.com; linux- > integrity@vger.kernel.org; linux-kernel@vger.kernel.org; > stable@vger.kernel.org; Linus Torvalds <torvalds@linux-foundation.org>; > Linux kernel regressions list <regressions@lists.linux.dev> > Subject: Re: [PATCH 1/1] tpm: disable hwrng for fTPM on some AMD designs > > On Fri, Feb 17, 2023 at 08:25:56PM -0600, Limonciello, Mario wrote: > > On 2/17/2023 16:05, Jarkko Sakkinen wrote: > > > > > Perhaps tpm_amd_* ? > > > > When Jason first proposed this patch I feel the intent was it could cover > > multiple deficiencies. > > But as this is the only one for now, sure re-naming it is fine. > > > > > > > > Also, just a question: is there any legit use for fTPM's, which are not > > > updated? I.e. why would want tpm_crb to initialize with a dysfunctional > > > firmware?> > > > I.e. the existential question is: is it better to workaround the issue and > > > let pass through, or make the user aware that the firmware would really > > > need an update. > > > > > > > On 2/17/2023 16:35, Jarkko Sakkinen wrote: > > > > > > > > Hmm, no reply since Mario posted this. > > > > > > > > Jarkko, James, what's your stance on this? Does the patch look fine > from > > > > your point of view? And does the situation justify merging this on the > > > > last minute for 6.2? Or should we merge it early for 6.3 and then > > > > backport to stable? > > > > > > > > Ciao, Thorsten > > > > > > As I stated in earlier response: do we want to forbid tpm_crb in this case > > > or do we want to pass-through with a faulty firmware? > > > > > > Not weighting either choice here I just don't see any motivating points > > > in the commit message to pick either, that's all. > > > > > > BR, Jarkko > > > > Even if you're not using RNG functionality you can still do plenty of other > > things with the TPM. The RNG functionality is what tripped up this issue > > though. All of these issues were only raised because the kernel started > > using it by default for RNG and userspace wants random numbers all the > time. > > > > If the firmware was easily updatable from all the OEMs I would lean on > > trying to encourage people to update. But alas this has been available for > > over a year and a sizable number of OEMs haven't distributed a fix. > > > > The major issue I see with forbidding tpm_crb is that users may have been > > using the fTPM for something and taking it away in an update could lead to > a > > no-boot scenario if they're (for example) tying a policy to PCR values and > > can no longer access those. > > > > If the consensus were to go that direction instead I would want to see a > > module parameter that lets users turn on the fTPM even knowing this > problem > > exists so they could recover. That all seems pretty expensive to me for > > this problem. > > I agree with the last argument. FYI, I did send out a v2 and folded in this argument to the commit message and adjusted for your feedback. You might not have found it in your inbox yet. > > I re-read the commit message and > https://www.amd.com/en/support/kb/faq/pa-410. > > Why this scopes down to only rng? Should TPM2_CC_GET_RANDOM also > blocked > from /dev/tpm0? > The only reason that this commit was created is because the kernel utilized the fTPM for hwrng which triggered the problem. If that never happened this probably wouldn't have been exposed either. Yes; I would agree that if someone was to do other fTPM operations over an extended period of time it's plausible they can cause the problem too. But picking and choosing functionality to block seems quite arbitrary to me.
Hi, this is your Linux kernel regression tracker. Top-posting for once, to make this easily accessible to everyone. Jarkko (or James), what is needed to get this regression resolved? More people showed up that are apparently affected by this. Sure, 6.2 is out, but it's a regression in 6.1 it thus would be good to fix rather sooner than later. Ideally this week, if you ask me. FWIW, latest version of this patch is here, but it didn't get any replies since it was posted last Tuesday (and the mail quoted below is just one day younger): https://lore.kernel.org/all/20230220180729.23862-1-mario.limonciello@amd.com/ Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat) -- Everything you wanna know about Linux kernel regression tracking: https://linux-regtracking.leemhuis.info/about/#tldr If I did something stupid, please tell me, as explained on that page. #regzbot poke On 22.02.23 00:10, Limonciello, Mario wrote: > [Public] > >> -----Original Message----- >> From: Jarkko Sakkinen <jarkko@kernel.org> >> Sent: Tuesday, February 21, 2023 16:53 >> To: Limonciello, Mario <Mario.Limonciello@amd.com> >> Cc: Thorsten Leemhuis <regressions@leemhuis.info>; James Bottomley >> <James.Bottomley@hansenpartnership.com>; Jason@zx2c4.com; linux- >> integrity@vger.kernel.org; linux-kernel@vger.kernel.org; >> stable@vger.kernel.org; Linus Torvalds <torvalds@linux-foundation.org>; >> Linux kernel regressions list <regressions@lists.linux.dev> >> Subject: Re: [PATCH 1/1] tpm: disable hwrng for fTPM on some AMD designs >> >> On Fri, Feb 17, 2023 at 08:25:56PM -0600, Limonciello, Mario wrote: >>> On 2/17/2023 16:05, Jarkko Sakkinen wrote: >>> >>>> Perhaps tpm_amd_* ? >>> >>> When Jason first proposed this patch I feel the intent was it could cover >>> multiple deficiencies. >>> But as this is the only one for now, sure re-naming it is fine. >>> >>>> >>>> Also, just a question: is there any legit use for fTPM's, which are not >>>> updated? I.e. why would want tpm_crb to initialize with a dysfunctional >>>> firmware?> >>>> I.e. the existential question is: is it better to workaround the issue and >>>> let pass through, or make the user aware that the firmware would really >>>> need an update. >>>> >>> >>> On 2/17/2023 16:35, Jarkko Sakkinen wrote: >>>>> >>>>> Hmm, no reply since Mario posted this. >>>>> >>>>> Jarkko, James, what's your stance on this? Does the patch look fine >> from >>>>> your point of view? And does the situation justify merging this on the >>>>> last minute for 6.2? Or should we merge it early for 6.3 and then >>>>> backport to stable? >>>>> >>>>> Ciao, Thorsten >>>> >>>> As I stated in earlier response: do we want to forbid tpm_crb in this case >>>> or do we want to pass-through with a faulty firmware? >>>> >>>> Not weighting either choice here I just don't see any motivating points >>>> in the commit message to pick either, that's all. >>>> >>>> BR, Jarkko >>> >>> Even if you're not using RNG functionality you can still do plenty of other >>> things with the TPM. The RNG functionality is what tripped up this issue >>> though. All of these issues were only raised because the kernel started >>> using it by default for RNG and userspace wants random numbers all the >> time. >>> >>> If the firmware was easily updatable from all the OEMs I would lean on >>> trying to encourage people to update. But alas this has been available for >>> over a year and a sizable number of OEMs haven't distributed a fix. >>> >>> The major issue I see with forbidding tpm_crb is that users may have been >>> using the fTPM for something and taking it away in an update could lead to >> a >>> no-boot scenario if they're (for example) tying a policy to PCR values and >>> can no longer access those. >>> >>> If the consensus were to go that direction instead I would want to see a >>> module parameter that lets users turn on the fTPM even knowing this >> problem >>> exists so they could recover. That all seems pretty expensive to me for >>> this problem. >> >> I agree with the last argument. > > FYI, I did send out a v2 and folded in this argument to the commit message > and adjusted for your feedback. You might not have found it in your inbox > yet. > >> >> I re-read the commit message and >> https://www.amd.com/en/support/kb/faq/pa-410. >> >> Why this scopes down to only rng? Should TPM2_CC_GET_RANDOM also >> blocked >> from /dev/tpm0? >> > > The only reason that this commit was created is because the kernel utilized > the fTPM for hwrng which triggered the problem. If that never happened > this probably wouldn't have been exposed either. > > Yes; I would agree that if someone was to do other fTPM operations over > an extended period of time it's plausible they can cause the problem too. > > But picking and choosing functionality to block seems quite arbitrary to me. >
On Mon, Feb 27, 2023 at 11:57:15AM +0100, Thorsten Leemhuis wrote: > Hi, this is your Linux kernel regression tracker. Top-posting for once, > to make this easily accessible to everyone. > > Jarkko (or James), what is needed to get this regression resolved? More > people showed up that are apparently affected by this. Sure, 6.2 is out, > but it's a regression in 6.1 it thus would be good to fix rather sooner > than later. Ideally this week, if you ask me. I do not see any tested-by's responded to v2 patch. I.e. we have an unverified solution, which cannot be applied. BR, Jarkko
On Mon, Feb 27, 2023 at 01:14:46PM +0200, Jarkko Sakkinen wrote: > On Mon, Feb 27, 2023 at 11:57:15AM +0100, Thorsten Leemhuis wrote: > > Hi, this is your Linux kernel regression tracker. Top-posting for once, > > to make this easily accessible to everyone. > > > > Jarkko (or James), what is needed to get this regression resolved? More > > people showed up that are apparently affected by this. Sure, 6.2 is out, > > but it's a regression in 6.1 it thus would be good to fix rather sooner > > than later. Ideally this week, if you ask me. > > I do not see any tested-by's responded to v2 patch. I.e. we have > an unverified solution, which cannot be applied. v2 is good enough as far as I'm concerned as long as we know it is good to go. Please do not respond tested-by to his thread. Test it and respond to the corresponding thread so that all tags can be picked by b4. BR, Jarkko
Hi, I am still getting fTPM stutters with 6.4.3 kernel on Asus GA402RJ laptop. Compiling kernel without TPM support makes the stutters go away. The fTPM firmware version is 0x3005700020005 on my machine.
On 7/27/2023 10:38, Daniil Stas wrote: > Hi, > I am still getting fTPM stutters with 6.4.3 kernel on Asus GA402RJ > laptop. > Compiling kernel without TPM support makes the stutters go away. > The fTPM firmware version is 0x3005700020005 on my machine. Can you please open up a kernel bugzilla and attach your dmesg to it both with TPM enabled and disabled? You can CC me on it directly.
On Thu, 27 Jul 2023 10:42:33 -0500 Mario Limonciello <mario.limonciello@amd.com> wrote: > On 7/27/2023 10:38, Daniil Stas wrote: > > Hi, > > I am still getting fTPM stutters with 6.4.3 kernel on Asus GA402RJ > > laptop. > > Compiling kernel without TPM support makes the stutters go away. > > The fTPM firmware version is 0x3005700020005 on my machine. > > Can you please open up a kernel bugzilla and attach your dmesg to it > both with TPM enabled and disabled? > > You can CC me on it directly. There are several bug categories to choose in the bugzilla. Which one should I use? I never used bugzilla before...
On 7/27/2023 11:39, Daniil Stas wrote: > On Thu, 27 Jul 2023 10:42:33 -0500 > Mario Limonciello <mario.limonciello@amd.com> wrote: > >> On 7/27/2023 10:38, Daniil Stas wrote: >>> Hi, >>> I am still getting fTPM stutters with 6.4.3 kernel on Asus GA402RJ >>> laptop. >>> Compiling kernel without TPM support makes the stutters go away. >>> The fTPM firmware version is 0x3005700020005 on my machine. >> >> Can you please open up a kernel bugzilla and attach your dmesg to it >> both with TPM enabled and disabled? >> >> You can CC me on it directly. > > There are several bug categories to choose in the bugzilla. Which one > should I use? > I never used bugzilla before... drivers/other is fine. If there is a better category we can move it later.
On Thu, 27 Jul 2023 11:41:55 -0500 Mario Limonciello <mario.limonciello@amd.com> wrote: > On 7/27/2023 11:39, Daniil Stas wrote: > > On Thu, 27 Jul 2023 10:42:33 -0500 > > Mario Limonciello <mario.limonciello@amd.com> wrote: > > > >> On 7/27/2023 10:38, Daniil Stas wrote: > [...] > >> > >> Can you please open up a kernel bugzilla and attach your dmesg to > >> it both with TPM enabled and disabled? > >> > >> You can CC me on it directly. > > > > There are several bug categories to choose in the bugzilla. Which > > one should I use? > > I never used bugzilla before... > > drivers/other is fine. If there is a better category we can move it > later. I see there are already several bug reports similar to mine. This one for example: https://bugzilla.kernel.org/show_bug.cgi?id=217212 Should I still make a new one?
On 7/27/2023 11:50, Daniil Stas wrote: > On Thu, 27 Jul 2023 11:41:55 -0500 > Mario Limonciello <mario.limonciello@amd.com> wrote: > >> On 7/27/2023 11:39, Daniil Stas wrote: >>> On Thu, 27 Jul 2023 10:42:33 -0500 >>> Mario Limonciello <mario.limonciello@amd.com> wrote: >>> >>>> On 7/27/2023 10:38, Daniil Stas wrote: >> [...] >>>> >>>> Can you please open up a kernel bugzilla and attach your dmesg to >>>> it both with TPM enabled and disabled? >>>> >>>> You can CC me on it directly. >>> >>> There are several bug categories to choose in the bugzilla. Which >>> one should I use? >>> I never used bugzilla before... >> >> drivers/other is fine. If there is a better category we can move it >> later. > > I see there are already several bug reports similar to mine. This one > for example: https://bugzilla.kernel.org/show_bug.cgi?id=217212 > Should I still make a new one? Yes; please make a separate one. If we decide it's the same we can always mark as a duplicate.
On Thu, 27 Jul 2023 11:51:21 -0500 Mario Limonciello <mario.limonciello@amd.com> wrote: > On 7/27/2023 11:50, Daniil Stas wrote: > > On Thu, 27 Jul 2023 11:41:55 -0500 > > Mario Limonciello <mario.limonciello@amd.com> wrote: > > > >> On 7/27/2023 11:39, Daniil Stas wrote: > [...] > [...] > >> [...] > [...] > [...] > >> > >> drivers/other is fine. If there is a better category we can move > >> it later. > > > > I see there are already several bug reports similar to mine. This > > one for example: https://bugzilla.kernel.org/show_bug.cgi?id=217212 > > Should I still make a new one? > > Yes; please make a separate one. If we decide it's the same we can > always mark as a duplicate. Here is the bug report I created: https://bugzilla.kernel.org/show_bug.cgi?id=217719
On Thu Jul 27, 2023 at 3:38 PM UTC, Daniil Stas wrote: > Hi, > I am still getting fTPM stutters with 6.4.3 kernel on Asus GA402RJ > laptop. > Compiling kernel without TPM support makes the stutters go away. > The fTPM firmware version is 0x3005700020005 on my machine. This is needs a bit more elaboration in order to be comprehended. Do you mean by "stutter" unexpected delays and when do they happen? BR, Jarkko
On Fri, 28 Jul 2023 19:30:18 +0000 "Jarkko Sakkinen" <jarkko@kernel.org> wrote: > On Thu Jul 27, 2023 at 3:38 PM UTC, Daniil Stas wrote: > > Hi, > > I am still getting fTPM stutters with 6.4.3 kernel on Asus GA402RJ > > laptop. > > Compiling kernel without TPM support makes the stutters go away. > > The fTPM firmware version is 0x3005700020005 on my machine. > > This is needs a bit more elaboration in order to be comprehended. > > Do you mean by "stutter" unexpected delays and when do they happen? > > BR, Jarkko Yes, unexpected delays. They just happen randomly. You can google "AMD fTPM stuttering", there are a lot of examples. Here is one: https://www.youtube.com/watch?v=TYnRL-x6DVI
On Thu, 27 Jul 2023 at 10:05, Daniil Stas <daniil.stas@posteo.net> wrote: > > Here is the bug report I created: > https://bugzilla.kernel.org/show_bug.cgi?id=217719 Let's just disable the stupid fTPM hwrnd thing. Maybe use it for the boot-time "gather entropy from different sources", but clearly it should *not* be used at runtime. Why would anybody use that crud when any machine that has it supposedly fixed (which apparently didn't turn out to be true after all) would also have the CPU rdrand instruction that doesn't have the problem? If you don't trust the CPU rdrand implementation (and that has had bugs too - see clear_rdrand_cpuid_bit() and x86_init_rdrand()), why would you trust the fTPM version that has caused even *more* problems? So I don't see any downside to just saying "that fTPM thing is not working". Even if it ends up working in the future, there are alternatives that aren't any worse. Linus
On 7/28/2023 3:41 PM, Linus Torvalds wrote: > On Thu, 27 Jul 2023 at 10:05, Daniil Stas <daniil.stas@posteo.net> wrote: >> >> Here is the bug report I created: >> https://bugzilla.kernel.org/show_bug.cgi?id=217719 > > Let's just disable the stupid fTPM hwrnd thing. > > Maybe use it for the boot-time "gather entropy from different > sources", but clearly it should *not* be used at runtime. > > Why would anybody use that crud when any machine that has it > supposedly fixed (which apparently didn't turn out to be true after > all) would also have the CPU rdrand instruction that doesn't have the > problem? It /seems/ to be a separate problem, but yes I agree with your point. > > If you don't trust the CPU rdrand implementation (and that has had > bugs too - see clear_rdrand_cpuid_bit() and x86_init_rdrand()), why > would you trust the fTPM version that has caused even *more* problems? That's exactly why I was asking in the kernel bugzilla if something similar gets tripped up by RDRAND. I've got a patch that tears it out entirely for AMD fTPMs in the bugzilla, but I would prefer to discuss this with BIOS people before going that direction. > > So I don't see any downside to just saying "that fTPM thing is not > working". Even if it ends up working in the future, there are > alternatives that aren't any worse. > > Linus
On Fri, 28 Jul 2023 at 14:01, Limonciello, Mario <mario.limonciello@amd.com> wrote: > > That's exactly why I was asking in the kernel bugzilla if something > similar gets tripped up by RDRAND. So that would sound very unlikely, but who knows... Microcode can obviously do pretty much anything at all, but at least the original fTPM issues _seemed_ to be about BIOS doing truly crazy things like SPI flash accesses. I can easily imagine a BIOS fTPM code using some absolutely horrid global "EFI synchronization" lock or whatever, which could then cause random problems just based on some entirely unrelated activity. I would not be surprised, for example, if wasn't the fTPM hwrnd code itself that decided to read some random number from SPI, but that it simply got serialized with something else that the BIOS was involved with. It's not like BIOS people are famous for their scalable code that is entirely parallel... And I'd be _very_ surprised if CPU microcode does anything even remotely like that. Not impossible - HP famously screwed with the time stamp counter with SMIs, and I could imagine them - or others - doing the same with rdrand. But it does sound pretty damn unlikely, compared to "EFI BIOS uses a one big lock approach". So rdrand (and rdseed in particular) can be rather slow, but I think we're talking hundreds of CPU cycles (maybe low thousands). Nothing like the stuttering reports we've seen from fTPM. Linus
On 7/28/2023 4:38 PM, Linus Torvalds wrote: > On Fri, 28 Jul 2023 at 14:01, Limonciello, Mario > <mario.limonciello@amd.com> wrote: >> >> That's exactly why I was asking in the kernel bugzilla if something >> similar gets tripped up by RDRAND. > > So that would sound very unlikely, but who knows... Microcode can > obviously do pretty much anything at all, but at least the original > fTPM issues _seemed_ to be about BIOS doing truly crazy things like > SPI flash accesses. > > I can easily imagine a BIOS fTPM code using some absolutely horrid > global "EFI synchronization" lock or whatever, which could then cause > random problems just based on some entirely unrelated activity. > > I would not be surprised, for example, if wasn't the fTPM hwrnd code > itself that decided to read some random number from SPI, but that it > simply got serialized with something else that the BIOS was involved > with. It's not like BIOS people are famous for their scalable code > that is entirely parallel... > > And I'd be _very_ surprised if CPU microcode does anything even > remotely like that. Not impossible - HP famously screwed with the time > stamp counter with SMIs, and I could imagine them - or others - doing > the same with rdrand. > > But it does sound pretty damn unlikely, compared to "EFI BIOS uses a > one big lock approach". > > So rdrand (and rdseed in particular) can be rather slow, but I think > we're talking hundreds of CPU cycles (maybe low thousands). Nothing > like the stuttering reports we've seen from fTPM. > > Linus Your theory sounds totally plausible and it would explain why even though this system has the fixes from the original issue it's tripping a similar behavior. Based on the argument of RDRAND being on the same SOC I think it's a pretty good argument to drop contributing to the hwrng entropy *anything* that's not a dTPM.
On Fri Jul 28, 2023 at 8:18 PM UTC, Daniil Stas wrote: > On Fri, 28 Jul 2023 19:30:18 +0000 > "Jarkko Sakkinen" <jarkko@kernel.org> wrote: > > > On Thu Jul 27, 2023 at 3:38 PM UTC, Daniil Stas wrote: > > > Hi, > > > I am still getting fTPM stutters with 6.4.3 kernel on Asus GA402RJ > > > laptop. > > > Compiling kernel without TPM support makes the stutters go away. > > > The fTPM firmware version is 0x3005700020005 on my machine. > > > > This is needs a bit more elaboration in order to be comprehended. > > > > Do you mean by "stutter" unexpected delays and when do they happen? > > > > BR, Jarkko > > Yes, unexpected delays. They just happen randomly. > You can google "AMD fTPM stuttering", there are a lot of examples. > Here is one: https://www.youtube.com/watch?v=TYnRL-x6DVI What if you make tpm_amd_is_rng_defective() to unconditonally return true? Does this make the problem dissappear, or not? BR, Jarkko
On Mon, 31 Jul 2023 10:14:05 +0000 "Jarkko Sakkinen" <jarkko@kernel.org> wrote: > On Fri Jul 28, 2023 at 8:18 PM UTC, Daniil Stas wrote: > > On Fri, 28 Jul 2023 19:30:18 +0000 > > "Jarkko Sakkinen" <jarkko@kernel.org> wrote: > > > > > On Thu Jul 27, 2023 at 3:38 PM UTC, Daniil Stas wrote: > [...] > > > > > > This is needs a bit more elaboration in order to be comprehended. > > > > > > Do you mean by "stutter" unexpected delays and when do they > > > happen? > > > > > > BR, Jarkko > > > > Yes, unexpected delays. They just happen randomly. > > You can google "AMD fTPM stuttering", there are a lot of examples. > > Here is one: https://www.youtube.com/watch?v=TYnRL-x6DVI > > What if you make tpm_amd_is_rng_defective() to unconditonally return > true? Does this make the problem dissappear, or not? > > BR, Jarkko I already tried compiling kernel without CONFIG_HW_RANDOM_TPM enabled, which does the same. Yes, it removes the issue.
On Mon Jul 31, 2023 at 10:28 AM UTC, Daniil Stas wrote: > On Mon, 31 Jul 2023 10:14:05 +0000 > "Jarkko Sakkinen" <jarkko@kernel.org> wrote: > > > On Fri Jul 28, 2023 at 8:18 PM UTC, Daniil Stas wrote: > > > On Fri, 28 Jul 2023 19:30:18 +0000 > > > "Jarkko Sakkinen" <jarkko@kernel.org> wrote: > > > > > > > On Thu Jul 27, 2023 at 3:38 PM UTC, Daniil Stas wrote: > > [...] > > > > > > > > This is needs a bit more elaboration in order to be comprehended. > > > > > > > > Do you mean by "stutter" unexpected delays and when do they > > > > happen? > > > > > > > > BR, Jarkko > > > > > > Yes, unexpected delays. They just happen randomly. > > > You can google "AMD fTPM stuttering", there are a lot of examples. > > > Here is one: https://www.youtube.com/watch?v=TYnRL-x6DVI > > > > What if you make tpm_amd_is_rng_defective() to unconditonally return > > true? Does this make the problem dissappear, or not? > > > > BR, Jarkko > > I already tried compiling kernel without CONFIG_HW_RANDOM_TPM enabled, > which does the same. > Yes, it removes the issue. Thank you, just wanted sanity check that I exactly know what is going on. BR, Jarkko
On Mon, 31 Jul 2023 at 03:53, Jarkko Sakkinen <jarkko@kernel.org> wrote: > > I quickly carved up a patch (attached), which is only compile tested > because I do not have any AMD hardware at hand. Is there some way to just see "this is a fTPM"? Because honestly, even if AMD is the one that has had stuttering issues, the bigger argument is that there is simply no _point_ in supporting randomness from a firmware source. There is no way anybody should believe that a firmware TPM generates better randomness than we do natively. And there are many reasons to _not_ believe it. The AMD problem is just the most user-visible one. Now, I'm not saying that a fTPM needs to be disabled in general - but I really feel like we should just do static int tpm_add_hwrng(struct tpm_chip *chip) { if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM)) return 0; // If it's not hardware, don't treat it as such if (tpm_is_fTPM(chip)) return 0; [...] and be done with it. But hey, if we have no way to see that whole "this is firmware emulation", then just blocking AMD might be the only way. Linus
On 7/31/2023 2:05 PM, Linus Torvalds wrote: > On Mon, 31 Jul 2023 at 03:53, Jarkko Sakkinen <jarkko@kernel.org> wrote: >> >> I quickly carved up a patch (attached), which is only compile tested >> because I do not have any AMD hardware at hand. > > Is there some way to just see "this is a fTPM"? > How many fTPM implementations are there? We're talking like less than 5 right? Maybe just check against a static list when calling tpm_add_hwrng(). > Because honestly, even if AMD is the one that has had stuttering > issues, the bigger argument is that there is simply no _point_ in > supporting randomness from a firmware source. > I've had some discussions today with a variety of people on this problem and there is no advantage to get RNG through the fTPM over RDRAND. They both source the exact same hardware IP, but RDRAND is a *lot* more direct. > There is no way anybody should believe that a firmware TPM generates > better randomness than we do natively. > > And there are many reasons to _not_ believe it. The AMD problem is > just the most user-visible one. > > Now, I'm not saying that a fTPM needs to be disabled in general - but > I really feel like we should just do > > static int tpm_add_hwrng(struct tpm_chip *chip) > { > if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM)) > return 0; > // If it's not hardware, don't treat it as such > if (tpm_is_fTPM(chip)) > return 0; > [...] > > and be done with it. > > But hey, if we have no way to see that whole "this is firmware > emulation", then just blocking AMD might be the only way. > > Linus
On Mon, 31 Jul 2023 at 12:18, Limonciello, Mario <mario.limonciello@amd.com> wrote: > > > Is there some way to just see "this is a fTPM"? > > How many fTPM implementations are there? We're talking like less than 5 > right? Maybe just check against a static list when > calling tpm_add_hwrng(). Sounds sane. But I was hoping for some direct way to just query "are you a firmware SMI hook, or real hardware". It would be lovely to avoid the list, because maybe AMD does - or in the past have done - discrete TPM hardware? So it might not be as easy as just checking against the manufacturer.. That said, maybe it really doesn't matter. I'm perfectly fine with just the "check for AMD as a manufacturer" too. In fact, I'd be perfectly happy with not using the TPM for run-time randomness at all, and purely doing it for the bootup entropy, which is where I feel it matters a lot m ore. > I've had some discussions today with a variety of people on this problem > and there is no advantage to get RNG through the fTPM over RDRAND. Ack. And that's true even if you _trust_ the fTPM. That said, I see no real downside to using the TPM (whether firmware or discrete) to just add to the boot-time "we'll gather entropy for our random number generator from any source". So it's purely the runtime randomness where I feel that the upside just isn't there, and the downsides are real. Linus
On 7/31/2023 5:53 AM, Jarkko Sakkinen wrote: > On Fri Jul 28, 2023 at 8:41 PM UTC, Linus Torvalds wrote: >> On Thu, 27 Jul 2023 at 10:05, Daniil Stas <daniil.stas@posteo.net> wrote: >>> >>> Here is the bug report I created: >>> https://bugzilla.kernel.org/show_bug.cgi?id=217719 >> >> Let's just disable the stupid fTPM hwrnd thing. >> >> Maybe use it for the boot-time "gather entropy from different >> sources", but clearly it should *not* be used at runtime. >> >> Why would anybody use that crud when any machine that has it >> supposedly fixed (which apparently didn't turn out to be true after >> all) would also have the CPU rdrand instruction that doesn't have the >> problem? >> >> If you don't trust the CPU rdrand implementation (and that has had >> bugs too - see clear_rdrand_cpuid_bit() and x86_init_rdrand()), why >> would you trust the fTPM version that has caused even *more* problems? >> >> So I don't see any downside to just saying "that fTPM thing is not >> working". Even if it ends up working in the future, there are >> alternatives that aren't any worse. >> >> Linus > > I quickly carved up a patch (attached), which is only compile tested > because I do not have any AMD hardware at hand. > > BR, Jarkko > I'll get some testing done on this patch with some AMD reference hardware, but off the cuff comments: 1) It needs to target older stable than 6.3 because 6.1-rc1 is when b006c439d58d was introduced and 6.1.19 backported f1324bbc4 as dc64dc4c8. 2) Add a Link tag for [1] 3) The fix for [2] is lost. The one that landed upstream was ecff6813d2bc ("tpm: return false from tpm_amd_is_rng_defective on non-x86 platforms"). I sent another one that checked for chip->ops [3], but this wasn't picked up. [1] https://bugzilla.kernel.org/show_bug.cgi?id=217719 [2] https://lore.kernel.org/lkml/99B81401-DB46-49B9-B321-CF832B50CAC3@linux.ibm.com/ [3] https://lore.kernel.org/lkml/20230623030427.908-1-mario.limonciello@amd.com/
On 7/31/2023 2:30 PM, Linus Torvalds wrote: > On Mon, 31 Jul 2023 at 12:18, Limonciello, Mario > <mario.limonciello@amd.com> wrote: >> >>> Is there some way to just see "this is a fTPM"? >> >> How many fTPM implementations are there? We're talking like less than 5 >> right? Maybe just check against a static list when >> calling tpm_add_hwrng(). > > Sounds sane. But I was hoping for some direct way to just query "are > you a firmware SMI hook, or real hardware". > > It would be lovely to avoid the list, because maybe AMD does - or in > the past have done - discrete TPM hardware? So it might not be as > easy as just checking against the manufacturer.. > > That said, maybe it really doesn't matter. I'm perfectly fine with > just the "check for AMD as a manufacturer" too. Jarko's patch seems conceptually fine for now for the fire of the day if that's the consensus on the direction for this. > > In fact, I'd be perfectly happy with not using the TPM for run-time > randomness at all, and purely doing it for the bootup entropy, which > is where I feel it matters a lot m ore. > >> I've had some discussions today with a variety of people on this problem >> and there is no advantage to get RNG through the fTPM over RDRAND. > > Ack. > > And that's true even if you _trust_ the fTPM. > > That said, I see no real downside to using the TPM (whether firmware > or discrete) to just add to the boot-time "we'll gather entropy for > our random number generator from any source". > > So it's purely the runtime randomness where I feel that the upside > just isn't there, and the downsides are real. > > Linus Are you thinking then to unregister the tpm hwrng "sometime" after boot? What would be the right timing/event for this? Maybe rootfs_initcall?
On Mon, 31 Jul 2023 at 14:57, Limonciello, Mario <mario.limonciello@amd.com> wrote: > > Are you thinking then to unregister the tpm hwrng "sometime" after boot? No, I was more thinking that instead of registering it as a randomness source, you'd just do a one-time tpm_get_random(..); add_hwgenerator_randomness(..); and leave it at that. Even if there is some stutter due to some crazy firmware implementation for reading the random data, at boot time nobody will notice it. Linus
Hi all, I've been tracking this issue with Mario on various threads and bugzilla for a while now. My suggestion over at bugzilla was to just disable all current AMD fTPMs by bumping the check for a major version number, so that the hardware people can reenable it i it's ever fixed, but only if this is something that the hardware people would actually respect. As I understand it, Mario was going to check into it and see. Failing that, yea, just disabling hwrng on fTPM seems like a fine enough thing to do. The reason I'm not too concerned about that is twofold: - Systems with fTPM all have RDRAND anyway, so there's no entropy problem. - fTPM *probably* uses the same random source as RDRAND -- the TRNG_OUT MMIO register -- so it's not really doing much more than what we already have available. So this all seems fine. And Jarkko's patch seems more or less the straight forward way of disabling it. But with that said, in order of priority, maybe we should first try these: 1) Adjust the version check to a major-place fTPM version that AMD's hardware team pinky swears will have this bug fixed. (Though, I can already imagine somebody on the list shouting, "we don't trust hardware teams to do anything with unreleased stuff!", which could be valid.) 2) Remove the version check, but add some other query to detect AMD fTPM vs realTPM, and ban fTPM. - Remove the version check, and just check for AMD; this is Jarrko's patch. Mario will know best the feasibility of (1) and (2). Jason
On 7/31/23 18:40, Jason A. Donenfeld wrote: > Hi all, > > I've been tracking this issue with Mario on various threads and > bugzilla for a while now. My suggestion over at bugzilla was to just > disable all current AMD fTPMs by bumping the check for a major version > number, so that the hardware people can reenable it i it's ever fixed, > but only if this is something that the hardware people would actually > respect. As I understand it, Mario was going to check into it and see. > Failing that, yea, just disabling hwrng on fTPM seems like a fine > enough thing to do. > > The reason I'm not too concerned about that is twofold: > - Systems with fTPM all have RDRAND anyway, so there's no entropy problem. > - fTPM *probably* uses the same random source as RDRAND -- the > TRNG_OUT MMIO register -- so it's not really doing much more than what > we already have available. Yeah I have conversations ongoing about this topic, but also I concluded your suspicion is correct. They both get their values from the integrated CCP HW IP. > > So this all seems fine. And Jarkko's patch seems more or less the > straight forward way of disabling it. But with that said, in order of > priority, maybe we should first try these: > > 1) Adjust the version check to a major-place fTPM version that AMD's > hardware team pinky swears will have this bug fixed. (Though, I can > already imagine somebody on the list shouting, "we don't trust > hardware teams to do anything with unreleased stuff!", which could be > valid.) I find it very likely the actual root cause is similar to what Linus suggested. If that's the case I don't think the bug can be fixed by just an fTPM fix but would rather require a BIOS fix. This to me strengthens the argument to either not register fTPM as RNG in the first place or just use TPM for boot time entropy. > 2) Remove the version check, but add some other query to detect AMD > fTPM vs realTPM, and ban fTPM. AMD doesn't make dTPMs, only fTPMs. It's tempting to try to use TPM2_PT_VENDOR_TPM_TYPE, but this actually is a vendor specific value. I don't see a reliable way in the spec to do this. > - Remove the version check, and just check for AMD; this is Jarrko's patch. I have a counter-proposal to Jarkko's patch attached. This has two notable changes: 1) It only disables RNG generation in the case of having RDRAND or RDSEED. 2) It also matches Intel PTT. I still do also think Linus' idea of TPMs only providing boot time entropy is worth weighing out.
I was following the issue under or discord channel ROG for Linux and helping out some other users with it by shipping a kernel for Arch with disabled CONFIG_HW_RANDOM_TPM as the default recommend kernel for Arch for ROG laptops (as my device isn't affect by it because it is Ryzen 4800HS). I know it was discussed here https://bugzilla.kernel.org/show_bug.cgi?id=217212#c16 against allowing the user to disable fTPM to be used as a random source via a boot time parameter but I still I disagree with it. Linux does have a parameter `random.trust_cpu` to control the random source from CPU, why they can not be a parameter like `random.trust_ftpm` (or `random.trust_tpm`)? It might be my limited knowledge of this topic but to me it feels like if they is a trust_cpu then Linux should have trust_ftpm too. Mateusz
On Mon Jul 31, 2023 at 10:05 PM EEST, Linus Torvalds wrote: > On Mon, 31 Jul 2023 at 03:53, Jarkko Sakkinen <jarkko@kernel.org> wrote: > > > > I quickly carved up a patch (attached), which is only compile tested > > because I do not have any AMD hardware at hand. > > Is there some way to just see "this is a fTPM"? > > Because honestly, even if AMD is the one that has had stuttering > issues, the bigger argument is that there is simply no _point_ in > supporting randomness from a firmware source. > > There is no way anybody should believe that a firmware TPM generates > better randomness than we do natively. > > And there are many reasons to _not_ believe it. The AMD problem is > just the most user-visible one. > > Now, I'm not saying that a fTPM needs to be disabled in general - but > I really feel like we should just do > > static int tpm_add_hwrng(struct tpm_chip *chip) > { > if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM)) > return 0; > // If it's not hardware, don't treat it as such > if (tpm_is_fTPM(chip)) > return 0; > [...] > > and be done with it. > > But hey, if we have no way to see that whole "this is firmware > emulation", then just blocking AMD might be the only way. > > Linus I would disable it inside tpm_crb driver, which is the driver used for fTPM's: they are identified by MSFT0101 ACPI identifier. I think the right scope is still AMD because we don't have such regressions with Intel fTPM. I.e. I would move the helper I created inside tpm_crb driver, and a new flag, let's say "TPM_CHIP_FLAG_HWRNG_DISABLED", which tpm_crb sets before calling tpm_chip_register(). Finally, tpm_add_hwrng() needs the following invariant: if (chip->flags & TPM_CHIP_FLAG_HWRNG_DISABLED) return 0; How does this sound? I can refine this quickly from my first trial. BR, Jarkko
On Tue, 1 Aug 2023 at 11:28, Jarkko Sakkinen <jarkko@kernel.org> wrote: > > I would disable it inside tpm_crb driver, which is the driver used > for fTPM's: they are identified by MSFT0101 ACPI identifier. > > I think the right scope is still AMD because we don't have such > regressions with Intel fTPM. I'm ok with that. > I.e. I would move the helper I created inside tpm_crb driver, and > a new flag, let's say "TPM_CHIP_FLAG_HWRNG_DISABLED", which tpm_crb > sets before calling tpm_chip_register(). > > Finally, tpm_add_hwrng() needs the following invariant: > > if (chip->flags & TPM_CHIP_FLAG_HWRNG_DISABLED) > return 0; > > How does this sound? I can refine this quickly from my first trial. Sounds fine. My only worry comes from my ignorance: do these fTPM devices *always* end up being enumerated through CRB, or do they potentially look "normal enough" that you can actually end up using them even without having that CRB driver loaded? Put another way: is the CRB driver the _only_ way they are visible, or could some people hit on this through the TPM TIS interface if they have CRB disabled? I see, for example, that qemu ends up emulating the TIS layer, and it might end up forwarding the TPM requests to something that is natively CRB? But again: I don't know enough about CRB vs TIS, so the above may be a stupid question. Linus
On 8/1/2023 13:42, Linus Torvalds wrote: > On Tue, 1 Aug 2023 at 11:28, Jarkko Sakkinen <jarkko@kernel.org> wrote: >> >> I would disable it inside tpm_crb driver, which is the driver used >> for fTPM's: they are identified by MSFT0101 ACPI identifier. >> >> I think the right scope is still AMD because we don't have such >> regressions with Intel fTPM. > > I'm ok with that. > >> I.e. I would move the helper I created inside tpm_crb driver, and >> a new flag, let's say "TPM_CHIP_FLAG_HWRNG_DISABLED", which tpm_crb >> sets before calling tpm_chip_register(). >> >> Finally, tpm_add_hwrng() needs the following invariant: >> >> if (chip->flags & TPM_CHIP_FLAG_HWRNG_DISABLED) >> return 0; >> >> How does this sound? I can refine this quickly from my first trial. > > Sounds fine. This sounds fine by me too, thanks. > > My only worry comes from my ignorance: do these fTPM devices *always* > end up being enumerated through CRB, or do they potentially look > "normal enough" that you can actually end up using them even without > having that CRB driver loaded? > > Put another way: is the CRB driver the _only_ way they are visible, or > could some people hit on this through the TPM TIS interface if they > have CRB disabled? > > I see, for example, that qemu ends up emulating the TIS layer, and it > might end up forwarding the TPM requests to something that is natively > CRB? > > But again: I don't know enough about CRB vs TIS, so the above may be a > stupid question. > > Linus
On Tue Aug 1, 2023 at 6:04 AM EEST, Mario Limonciello wrote: > On 7/31/23 18:40, Jason A. Donenfeld wrote: > > Hi all, > > > > I've been tracking this issue with Mario on various threads and > > bugzilla for a while now. My suggestion over at bugzilla was to just > > disable all current AMD fTPMs by bumping the check for a major version > > number, so that the hardware people can reenable it i it's ever fixed, > > but only if this is something that the hardware people would actually > > respect. As I understand it, Mario was going to check into it and see. > > Failing that, yea, just disabling hwrng on fTPM seems like a fine > > enough thing to do. > > > > The reason I'm not too concerned about that is twofold: > > - Systems with fTPM all have RDRAND anyway, so there's no entropy problem. > > - fTPM *probably* uses the same random source as RDRAND -- the > > TRNG_OUT MMIO register -- so it's not really doing much more than what > > we already have available. > > Yeah I have conversations ongoing about this topic, but also I concluded > your suspicion is correct. They both get their values from the > integrated CCP HW IP. > > > > > So this all seems fine. And Jarkko's patch seems more or less the > > straight forward way of disabling it. But with that said, in order of > > priority, maybe we should first try these: > > > > 1) Adjust the version check to a major-place fTPM version that AMD's > > hardware team pinky swears will have this bug fixed. (Though, I can > > already imagine somebody on the list shouting, "we don't trust > > hardware teams to do anything with unreleased stuff!", which could be > > valid.) > > I find it very likely the actual root cause is similar to what Linus > suggested. If that's the case I don't think the bug can be fixed > by just an fTPM fix but would rather require a BIOS fix. > > This to me strengthens the argument to either not register fTPM as RNG > in the first place or just use TPM for boot time entropy. > > > 2) Remove the version check, but add some other query to detect AMD > > fTPM vs realTPM, and ban fTPM. > > AMD doesn't make dTPMs, only fTPMs. It's tempting to try to use > TPM2_PT_VENDOR_TPM_TYPE, but this actually is a vendor specific value. > > I don't see a reliable way in the spec to do this. > > > - Remove the version check, and just check for AMD; this is Jarrko's patch. > > I have a counter-proposal to Jarkko's patch attached. This has two > notable changes: > > 1) It only disables RNG generation in the case of having RDRAND or RDSEED. > 2) It also matches Intel PTT. > > I still do also think Linus' idea of TPMs only providing boot time > entropy is worth weighing out. You should add something like TPM_CHIP_HWRNG_DISABLED instead and set this in tpm_crb before calling tpm_chip_register(). Nothing else concerning AMD hardware should be done in tpm-chip.c. It should only check TPM_CHIP_HWRNG_DISABLED in the beginning of tpm_add_hwrng(). BR, Jarkko
On Tue Aug 1, 2023 at 9:52 PM EEST, Jarkko Sakkinen wrote: > On Tue Aug 1, 2023 at 6:04 AM EEST, Mario Limonciello wrote: > > On 7/31/23 18:40, Jason A. Donenfeld wrote: > > > Hi all, > > > > > > I've been tracking this issue with Mario on various threads and > > > bugzilla for a while now. My suggestion over at bugzilla was to just > > > disable all current AMD fTPMs by bumping the check for a major version > > > number, so that the hardware people can reenable it i it's ever fixed, > > > but only if this is something that the hardware people would actually > > > respect. As I understand it, Mario was going to check into it and see. > > > Failing that, yea, just disabling hwrng on fTPM seems like a fine > > > enough thing to do. > > > > > > The reason I'm not too concerned about that is twofold: > > > - Systems with fTPM all have RDRAND anyway, so there's no entropy problem. > > > - fTPM *probably* uses the same random source as RDRAND -- the > > > TRNG_OUT MMIO register -- so it's not really doing much more than what > > > we already have available. > > > > Yeah I have conversations ongoing about this topic, but also I concluded > > your suspicion is correct. They both get their values from the > > integrated CCP HW IP. > > > > > > > > So this all seems fine. And Jarkko's patch seems more or less the > > > straight forward way of disabling it. But with that said, in order of > > > priority, maybe we should first try these: > > > > > > 1) Adjust the version check to a major-place fTPM version that AMD's > > > hardware team pinky swears will have this bug fixed. (Though, I can > > > already imagine somebody on the list shouting, "we don't trust > > > hardware teams to do anything with unreleased stuff!", which could be > > > valid.) > > > > I find it very likely the actual root cause is similar to what Linus > > suggested. If that's the case I don't think the bug can be fixed > > by just an fTPM fix but would rather require a BIOS fix. > > > > This to me strengthens the argument to either not register fTPM as RNG > > in the first place or just use TPM for boot time entropy. > > > > > 2) Remove the version check, but add some other query to detect AMD > > > fTPM vs realTPM, and ban fTPM. > > > > AMD doesn't make dTPMs, only fTPMs. It's tempting to try to use > > TPM2_PT_VENDOR_TPM_TYPE, but this actually is a vendor specific value. > > > > I don't see a reliable way in the spec to do this. > > > > > - Remove the version check, and just check for AMD; this is Jarrko's patch. > > > > I have a counter-proposal to Jarkko's patch attached. This has two > > notable changes: > > > > 1) It only disables RNG generation in the case of having RDRAND or RDSEED. > > 2) It also matches Intel PTT. > > > > I still do also think Linus' idea of TPMs only providing boot time > > entropy is worth weighing out. > > You should add something like TPM_CHIP_HWRNG_DISABLED instead and > set this in tpm_crb before calling tpm_chip_register(). > > Nothing else concerning AMD hardware should be done in tpm-chip.c. > It should only check TPM_CHIP_HWRNG_DISABLED in the beginning of > tpm_add_hwrng(). In English: I think adding the function to tpm-chip.c was a really bad idea in the first place, so let's revert that decisions and do this correctly in tpm_crb.c. BR, Jarkko
On Tue Aug 1, 2023 at 9:42 PM EEST, Linus Torvalds wrote: > On Tue, 1 Aug 2023 at 11:28, Jarkko Sakkinen <jarkko@kernel.org> wrote: > > > > I would disable it inside tpm_crb driver, which is the driver used > > for fTPM's: they are identified by MSFT0101 ACPI identifier. > > > > I think the right scope is still AMD because we don't have such > > regressions with Intel fTPM. > > I'm ok with that. > > > I.e. I would move the helper I created inside tpm_crb driver, and > > a new flag, let's say "TPM_CHIP_FLAG_HWRNG_DISABLED", which tpm_crb > > sets before calling tpm_chip_register(). > > > > Finally, tpm_add_hwrng() needs the following invariant: > > > > if (chip->flags & TPM_CHIP_FLAG_HWRNG_DISABLED) > > return 0; > > > > How does this sound? I can refine this quickly from my first trial. > > Sounds fine. Mario, it would be good if you could send a fix candidate but take my suggestion for a new TPM chip flag into account, while doing it. Please send it as a separate patch, not attachment to this thread. I can test and ack it, if it looks reasonable. > My only worry comes from my ignorance: do these fTPM devices *always* > end up being enumerated through CRB, or do they potentially look > "normal enough" that you can actually end up using them even without > having that CRB driver loaded? I know that QEMU has TPM passthrough but I don't know how it behaves exactly. > Put another way: is the CRB driver the _only_ way they are visible, or > could some people hit on this through the TPM TIS interface if they > have CRB disabled? I'm not aware of such implementations. > I see, for example, that qemu ends up emulating the TIS layer, and it > might end up forwarding the TPM requests to something that is natively > CRB? > > But again: I don't know enough about CRB vs TIS, so the above may be a > stupid question. > > Linus I would focus exactly what is known not to work and disable exactly that. If someone still wants to enable TPM on such hardware, we can later on add a kernel command-line flag to enforce hwrng. This ofc based on user feedback, not something I would add right now. BR, Jarkko
On Tue, Aug 01, 2023 at 10:09:58PM +0300, Jarkko Sakkinen wrote: > On Tue Aug 1, 2023 at 9:42 PM EEST, Linus Torvalds wrote: > > On Tue, 1 Aug 2023 at 11:28, Jarkko Sakkinen <jarkko@kernel.org> wrote: > > > > > > I would disable it inside tpm_crb driver, which is the driver used > > > for fTPM's: they are identified by MSFT0101 ACPI identifier. > > > > > > I think the right scope is still AMD because we don't have such > > > regressions with Intel fTPM. > > > > I'm ok with that. > > > > > I.e. I would move the helper I created inside tpm_crb driver, and > > > a new flag, let's say "TPM_CHIP_FLAG_HWRNG_DISABLED", which tpm_crb > > > sets before calling tpm_chip_register(). > > > > > > Finally, tpm_add_hwrng() needs the following invariant: > > > > > > if (chip->flags & TPM_CHIP_FLAG_HWRNG_DISABLED) > > > return 0; > > > > > > How does this sound? I can refine this quickly from my first trial. > > > > Sounds fine. > > Mario, it would be good if you could send a fix candidate but take my > suggestion for a new TPM chip flag into account, while doing it. Please > send it as a separate patch, not attachment to this thread. > > I can test and ack it, if it looks reasonable. > > > My only worry comes from my ignorance: do these fTPM devices *always* > > end up being enumerated through CRB, or do they potentially look > > "normal enough" that you can actually end up using them even without > > having that CRB driver loaded? > > I know that QEMU has TPM passthrough but I don't know how it behaves > exactly. > I just created a passthrough tpm device with a guest which it is using the tis driver, while the host is using crb (and apparently one of the amd devices that has an impacted fTPM). It looks like there is a complete separation between the frontend and backends, with the front end providing either a tis or crb interface to the guest, and then the backend sending commands by writing to the passthrough device that was given, such as /dev/tpm0, or an emulator such as swtpm. Stefan can probably explain it much better than I. Regards, Jerry > > Put another way: is the CRB driver the _only_ way they are visible, or > > could some people hit on this through the TPM TIS interface if they > > have CRB disabled? > > I'm not aware of such implementations. > > > I see, for example, that qemu ends up emulating the TIS layer, and it > > might end up forwarding the TPM requests to something that is natively > > CRB? > > > > But again: I don't know enough about CRB vs TIS, so the above may be a > > stupid question. > > > > Linus > > I would focus exactly what is known not to work and disable exactly > that. > > If someone still wants to enable TPM on such hardware, we can later > on add a kernel command-line flag to enforce hwrng. This ofc based > on user feedback, not something I would add right now. > > BR, Jarkko
On 8/2/23 19:13, Jerry Snitselaar wrote: > On Tue, Aug 01, 2023 at 10:09:58PM +0300, Jarkko Sakkinen wrote: >> On Tue Aug 1, 2023 at 9:42 PM EEST, Linus Torvalds wrote: >>> On Tue, 1 Aug 2023 at 11:28, Jarkko Sakkinen <jarkko@kernel.org> wrote: >>>> >>>> I would disable it inside tpm_crb driver, which is the driver used >>>> for fTPM's: they are identified by MSFT0101 ACPI identifier. >>>> >>>> I think the right scope is still AMD because we don't have such >>>> regressions with Intel fTPM. >>> >>> I'm ok with that. >>> >>>> I.e. I would move the helper I created inside tpm_crb driver, and >>>> a new flag, let's say "TPM_CHIP_FLAG_HWRNG_DISABLED", which tpm_crb >>>> sets before calling tpm_chip_register(). >>>> >>>> Finally, tpm_add_hwrng() needs the following invariant: >>>> >>>> if (chip->flags & TPM_CHIP_FLAG_HWRNG_DISABLED) >>>> return 0; >>>> >>>> How does this sound? I can refine this quickly from my first trial. >>> >>> Sounds fine. >> >> Mario, it would be good if you could send a fix candidate but take my >> suggestion for a new TPM chip flag into account, while doing it. Please >> send it as a separate patch, not attachment to this thread. >> >> I can test and ack it, if it looks reasonable. >> >>> My only worry comes from my ignorance: do these fTPM devices *always* >>> end up being enumerated through CRB, or do they potentially look >>> "normal enough" that you can actually end up using them even without >>> having that CRB driver loaded? >> >> I know that QEMU has TPM passthrough but I don't know how it behaves >> exactly. >> > > I just created a passthrough tpm device with a guest which it is using > the tis driver, while the host is using crb (and apparently one of the > amd devices that has an impacted fTPM). It looks like there is a > complete separation between the frontend and backends, with the front > end providing either a tis or crb interface to the guest, and then the > backend sending commands by writing to the passthrough device that was > given, such as /dev/tpm0, or an emulator such as swtpm. Stefan can > probably explain it much better than I. You explained it well... The passthrough TPM is only good for one VM (if at all), and all other VMs on the same machine should use a vTPM. Even one VM sharing the TPM with the host creates a potential mess with the shared resources of the TPM, such as the state of the PCRs. When that guest VM using the passthrough device now identifies the underlying hardware TPM's firmware version it will also take the same action to disable the TPM as a source for randomness. But then a VM with a passthrough TPM device should be rather rare... > >>> Put another way: is the CRB driver the _only_ way they are visible, or >>> could some people hit on this through the TPM TIS interface if they >>> have CRB disabled? >> >> I'm not aware of such implementations. CRB and TIS are two distinct MMIO type of interfaces with different registers etc. AMD could theoretically build a fTPM with a CRB interface and then another one with the same firmware and the TIS, but why would they? Stefan >> >>> I see, for example, that qemu ends up emulating the TIS layer, and it >>> might end up forwarding the TPM requests to something that is natively >>> CRB? >>> >>> But again: I don't know enough about CRB vs TIS, so the above may be a >>> stupid question. >>> >>> Linus >> >> I would focus exactly what is known not to work and disable exactly >> that. >> >> If someone still wants to enable TPM on such hardware, we can later >> on add a kernel command-line flag to enforce hwrng. This ofc based >> on user feedback, not something I would add right now. >> >> BR, Jarkko >
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 741d8f3e8fb3a..348dd5705fbb6 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -512,6 +512,65 @@ static int tpm_add_legacy_sysfs(struct tpm_chip *chip) return 0; } +static bool tpm_is_rng_defective(struct tpm_chip *chip) +{ + int ret; + u64 version; + u32 val1, val2; + + /* No known-broken TPM1 chips. */ + if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) + return false; + + ret = tpm_request_locality(chip); + if (ret) + return false; + + /* Some AMD fTPM versions may cause stutter */ + ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val1, NULL); + if (ret) + goto release; + if (val1 != 0x414D4400U /* AMD */) { + ret = -ENODEV; + goto release; + } + ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_1, &val1, NULL); + if (ret) + goto release; + ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_2, &val2, NULL); + if (ret) + goto release; + +release: + tpm_relinquish_locality(chip); + + if (ret) + return false; + + version = ((u64)val1 << 32) | val2; + /* + * Fixes for stutter as described in + * https://www.amd.com/en/support/kb/faq/pa-410 + * are available in two series of fTPM firmware: + * 6.x.y.z series: 6.0.18.6 + + * 3.x.y.z series: 3.57.x.5 + + */ + if ((version >> 48) == 6) { + if (version >= 0x0006000000180006ULL) + return false; + } else if ((version >> 48) == 3) { + if (version >= 0x0003005700000005ULL) + return false; + } else { + return false; + } + dev_warn(&chip->dev, + "AMD fTPM version 0x%llx causes system stutter; hwrng disabled\n", + version); + + return true; +} + static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait) { struct tpm_chip *chip = container_of(rng, struct tpm_chip, hwrng); @@ -521,7 +580,8 @@ static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait) static int tpm_add_hwrng(struct tpm_chip *chip) { - if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM) || tpm_is_firmware_upgrade(chip)) + if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM) || tpm_is_firmware_upgrade(chip) || + tpm_is_rng_defective(chip)) return 0; snprintf(chip->hwrng_name, sizeof(chip->hwrng_name), diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 24ee4e1cc452a..830014a266090 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -150,6 +150,79 @@ enum tpm_sub_capabilities { TPM_CAP_PROP_TIS_DURATION = 0x120, }; +enum tpm2_pt_props { + TPM2_PT_NONE = 0x00000000, + TPM2_PT_GROUP = 0x00000100, + TPM2_PT_FIXED = TPM2_PT_GROUP * 1, + TPM2_PT_FAMILY_INDICATOR = TPM2_PT_FIXED + 0, + TPM2_PT_LEVEL = TPM2_PT_FIXED + 1, + TPM2_PT_REVISION = TPM2_PT_FIXED + 2, + TPM2_PT_DAY_OF_YEAR = TPM2_PT_FIXED + 3, + TPM2_PT_YEAR = TPM2_PT_FIXED + 4, + TPM2_PT_MANUFACTURER = TPM2_PT_FIXED + 5, + TPM2_PT_VENDOR_STRING_1 = TPM2_PT_FIXED + 6, + TPM2_PT_VENDOR_STRING_2 = TPM2_PT_FIXED + 7, + TPM2_PT_VENDOR_STRING_3 = TPM2_PT_FIXED + 8, + TPM2_PT_VENDOR_STRING_4 = TPM2_PT_FIXED + 9, + TPM2_PT_VENDOR_TPM_TYPE = TPM2_PT_FIXED + 10, + TPM2_PT_FIRMWARE_VERSION_1 = TPM2_PT_FIXED + 11, + TPM2_PT_FIRMWARE_VERSION_2 = TPM2_PT_FIXED + 12, + TPM2_PT_INPUT_BUFFER = TPM2_PT_FIXED + 13, + TPM2_PT_HR_TRANSIENT_MIN = TPM2_PT_FIXED + 14, + TPM2_PT_HR_PERSISTENT_MIN = TPM2_PT_FIXED + 15, + TPM2_PT_HR_LOADED_MIN = TPM2_PT_FIXED + 16, + TPM2_PT_ACTIVE_SESSIONS_MAX = TPM2_PT_FIXED + 17, + TPM2_PT_PCR_COUNT = TPM2_PT_FIXED + 18, + TPM2_PT_PCR_SELECT_MIN = TPM2_PT_FIXED + 19, + TPM2_PT_CONTEXT_GAP_MAX = TPM2_PT_FIXED + 20, + TPM2_PT_NV_COUNTERS_MAX = TPM2_PT_FIXED + 22, + TPM2_PT_NV_INDEX_MAX = TPM2_PT_FIXED + 23, + TPM2_PT_MEMORY = TPM2_PT_FIXED + 24, + TPM2_PT_CLOCK_UPDATE = TPM2_PT_FIXED + 25, + TPM2_PT_CONTEXT_HASH = TPM2_PT_FIXED + 26, + TPM2_PT_CONTEXT_SYM = TPM2_PT_FIXED + 27, + TPM2_PT_CONTEXT_SYM_SIZE = TPM2_PT_FIXED + 28, + TPM2_PT_ORDERLY_COUNT = TPM2_PT_FIXED + 29, + TPM2_PT_MAX_COMMAND_SIZE = TPM2_PT_FIXED + 30, + TPM2_PT_MAX_RESPONSE_SIZE = TPM2_PT_FIXED + 31, + TPM2_PT_MAX_DIGEST = TPM2_PT_FIXED + 32, + TPM2_PT_MAX_OBJECT_CONTEXT = TPM2_PT_FIXED + 33, + TPM2_PT_MAX_SESSION_CONTEXT = TPM2_PT_FIXED + 34, + TPM2_PT_PS_FAMILY_INDICATOR = TPM2_PT_FIXED + 35, + TPM2_PT_PS_LEVEL = TPM2_PT_FIXED + 36, + TPM2_PT_PS_REVISION = TPM2_PT_FIXED + 37, + TPM2_PT_PS_DAY_OF_YEAR = TPM2_PT_FIXED + 38, + TPM2_PT_PS_YEAR = TPM2_PT_FIXED + 39, + TPM2_PT_SPLIT_MAX = TPM2_PT_FIXED + 40, + TPM2_PT_TOTAL_COMMANDS = TPM2_PT_FIXED + 41, + TPM2_PT_LIBRARY_COMMANDS = TPM2_PT_FIXED + 42, + TPM2_PT_VENDOR_COMMANDS = TPM2_PT_FIXED + 43, + TPM2_PT_NV_BUFFER_MAX = TPM2_PT_FIXED + 44, + TPM2_PT_MODES = TPM2_PT_FIXED + 45, + TPM2_PT_MAX_CAP_BUFFER = TPM2_PT_FIXED + 46, + TPM2_PT_VAR = TPM2_PT_GROUP * 2, + TPM2_PT_PERMANENT = TPM2_PT_VAR + 0, + TPM2_PT_STARTUP_CLEAR = TPM2_PT_VAR + 1, + TPM2_PT_HR_NV_INDEX = TPM2_PT_VAR + 2, + TPM2_PT_HR_LOADED = TPM2_PT_VAR + 3, + TPM2_PT_HR_LOADED_AVAIL = TPM2_PT_VAR + 4, + TPM2_PT_HR_ACTIVE = TPM2_PT_VAR + 5, + TPM2_PT_HR_ACTIVE_AVAIL = TPM2_PT_VAR + 6, + TPM2_PT_HR_TRANSIENT_AVAIL = TPM2_PT_VAR + 7, + TPM2_PT_HR_PERSISTENT = TPM2_PT_VAR + 8, + TPM2_PT_HR_PERSISTENT_AVAIL = TPM2_PT_VAR + 9, + TPM2_PT_NV_COUNTERS = TPM2_PT_VAR + 10, + TPM2_PT_NV_COUNTERS_AVAIL = TPM2_PT_VAR + 11, + TPM2_PT_ALGORITHM_SET = TPM2_PT_VAR + 12, + TPM2_PT_LOADED_CURVES = TPM2_PT_VAR + 13, + TPM2_PT_LOCKOUT_COUNTER = TPM2_PT_VAR + 14, + TPM2_PT_MAX_AUTH_FAIL = TPM2_PT_VAR + 15, + TPM2_PT_LOCKOUT_INTERVAL = TPM2_PT_VAR + 16, + TPM2_PT_LOCKOUT_RECOVERY = TPM2_PT_VAR + 17, + TPM2_PT_NV_WRITE_RECOVERY = TPM2_PT_VAR + 18, + TPM2_PT_AUDIT_COUNTER_0 = TPM2_PT_VAR + 19, + TPM2_PT_AUDIT_COUNTER_1 = TPM2_PT_VAR + 20, +}; /* 128 bytes is an arbitrary cap. This could be as large as TPM_BUFSIZE - 18 * bytes, but 128 is still a relatively large number of random bytes and