Message ID | 20220321152820.246700-3-alexandru.elisei@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: Add MTE support | expand |
Hi Alexandru, On 3/21/22 3:28 PM, Alexandru Elisei wrote: > MTE has been supported in Linux since commit 673638f434ee ("KVM: arm64: > Expose KVM_ARM_CAP_MTE"), add support for it in kvmtool. > > Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com> > --- > arm/aarch32/include/kvm/kvm-arch.h | 3 +++ > arm/aarch64/include/kvm/kvm-arch.h | 1 + > arm/aarch64/include/kvm/kvm-config-arch.h | 2 ++ > arm/aarch64/kvm.c | 13 +++++++++++++ > arm/include/arm-common/kvm-config-arch.h | 1 + > arm/kvm.c | 3 +++ > 6 files changed, 23 insertions(+) > > diff --git a/arm/aarch32/include/kvm/kvm-arch.h b/arm/aarch32/include/kvm/kvm-arch.h > index bee2fc255a82..5616b27e257e 100644 > --- a/arm/aarch32/include/kvm/kvm-arch.h > +++ b/arm/aarch32/include/kvm/kvm-arch.h > @@ -5,6 +5,9 @@ > > #define kvm__arch_get_kern_offset(...) 0x8000 > > +struct kvm; > +static inline void kvm__arch_enable_mte(struct kvm *kvm) {} > + > #define ARM_MAX_MEMORY(...) ARM_LOMAP_MAX_MEMORY > > #define MAX_PAGE_SIZE SZ_4K > diff --git a/arm/aarch64/include/kvm/kvm-arch.h b/arm/aarch64/include/kvm/kvm-arch.h > index 5e5ee41211ed..9124f6919d0f 100644 > --- a/arm/aarch64/include/kvm/kvm-arch.h > +++ b/arm/aarch64/include/kvm/kvm-arch.h > @@ -6,6 +6,7 @@ > struct kvm; > unsigned long long kvm__arch_get_kern_offset(struct kvm *kvm, int fd); > int kvm__arch_get_ipa_limit(struct kvm *kvm); > +void kvm__arch_enable_mte(struct kvm *kvm); > > #define ARM_MAX_MEMORY(kvm) ({ \ > u64 max_ram; \ > diff --git a/arm/aarch64/include/kvm/kvm-config-arch.h b/arm/aarch64/include/kvm/kvm-config-arch.h > index 04be43dfa9b2..11250365d8d5 100644 > --- a/arm/aarch64/include/kvm/kvm-config-arch.h > +++ b/arm/aarch64/include/kvm/kvm-config-arch.h > @@ -6,6 +6,8 @@ > "Run AArch32 guest"), \ > OPT_BOOLEAN('\0', "pmu", &(cfg)->has_pmuv3, \ > "Create PMUv3 device"), \ > + OPT_BOOLEAN('\0', "mte", &(cfg)->has_mte, \ > + "Enable memory tagging extension"), \ > OPT_U64('\0', "kaslr-seed", &(cfg)->kaslr_seed, \ > "Specify random seed for Kernel Address Space " \ > "Layout Randomization (KASLR)"), > diff --git a/arm/aarch64/kvm.c b/arm/aarch64/kvm.c > index 56a0aedc263d..46548f8ee96e 100644 > --- a/arm/aarch64/kvm.c > +++ b/arm/aarch64/kvm.c > @@ -81,3 +81,16 @@ int kvm__get_vm_type(struct kvm *kvm) > > return KVM_VM_TYPE_ARM_IPA_SIZE(ipa_bits); > } > + > +void kvm__arch_enable_mte(struct kvm *kvm) > +{ > + struct kvm_enable_cap cap = { > + .cap = KVM_CAP_ARM_MTE, > + }; > + > + if (!kvm__supports_extension(kvm, KVM_CAP_ARM_MTE)) > + die("MTE capability is not supported"); > + > + if (ioctl(kvm->vm_fd, KVM_ENABLE_CAP, &cap)) > + die_perror("KVM_ENABLE_CAP(KVM_CAP_ARM_MTE)"); > +} > diff --git a/arm/include/arm-common/kvm-config-arch.h b/arm/include/arm-common/kvm-config-arch.h > index 5734c46ab9e6..16e8d500a71b 100644 > --- a/arm/include/arm-common/kvm-config-arch.h > +++ b/arm/include/arm-common/kvm-config-arch.h > @@ -9,6 +9,7 @@ struct kvm_config_arch { > bool virtio_trans_pci; > bool aarch32_guest; > bool has_pmuv3; > + bool has_mte; > u64 kaslr_seed; > enum irqchip_type irqchip; > u64 fw_addr; > diff --git a/arm/kvm.c b/arm/kvm.c > index 80d233f13d0b..f2db93953778 100644 > --- a/arm/kvm.c > +++ b/arm/kvm.c > @@ -86,6 +86,9 @@ void kvm__arch_init(struct kvm *kvm, const char *hugetlbfs_path, u64 ram_size) > /* Create the virtual GIC. */ > if (gic__create(kvm, kvm->cfg.arch.irqchip)) > die("Failed to create virtual GIC"); > + > + if (kvm->cfg.arch.has_mte) > + kvm__arch_enable_mte(kvm); > } Can we enable it unconditionally if KVM_CAP_ARM_MTE is supported like we do for PAC and SVE? Cheers Vladimir
Hi, On Mon, Mar 21, 2022 at 03:40:18PM +0000, Vladimir Murzin wrote: > Hi Alexandru, > > On 3/21/22 3:28 PM, Alexandru Elisei wrote: > > MTE has been supported in Linux since commit 673638f434ee ("KVM: arm64: > > Expose KVM_ARM_CAP_MTE"), add support for it in kvmtool. > > > > Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com> > > --- > > arm/aarch32/include/kvm/kvm-arch.h | 3 +++ > > arm/aarch64/include/kvm/kvm-arch.h | 1 + > > arm/aarch64/include/kvm/kvm-config-arch.h | 2 ++ > > arm/aarch64/kvm.c | 13 +++++++++++++ > > arm/include/arm-common/kvm-config-arch.h | 1 + > > arm/kvm.c | 3 +++ > > 6 files changed, 23 insertions(+) > > > > diff --git a/arm/aarch32/include/kvm/kvm-arch.h b/arm/aarch32/include/kvm/kvm-arch.h > > index bee2fc255a82..5616b27e257e 100644 > > --- a/arm/aarch32/include/kvm/kvm-arch.h > > +++ b/arm/aarch32/include/kvm/kvm-arch.h > > @@ -5,6 +5,9 @@ > > > > #define kvm__arch_get_kern_offset(...) 0x8000 > > > > +struct kvm; > > +static inline void kvm__arch_enable_mte(struct kvm *kvm) {} > > + > > #define ARM_MAX_MEMORY(...) ARM_LOMAP_MAX_MEMORY > > > > #define MAX_PAGE_SIZE SZ_4K > > diff --git a/arm/aarch64/include/kvm/kvm-arch.h b/arm/aarch64/include/kvm/kvm-arch.h > > index 5e5ee41211ed..9124f6919d0f 100644 > > --- a/arm/aarch64/include/kvm/kvm-arch.h > > +++ b/arm/aarch64/include/kvm/kvm-arch.h > > @@ -6,6 +6,7 @@ > > struct kvm; > > unsigned long long kvm__arch_get_kern_offset(struct kvm *kvm, int fd); > > int kvm__arch_get_ipa_limit(struct kvm *kvm); > > +void kvm__arch_enable_mte(struct kvm *kvm); > > > > #define ARM_MAX_MEMORY(kvm) ({ \ > > u64 max_ram; \ > > diff --git a/arm/aarch64/include/kvm/kvm-config-arch.h b/arm/aarch64/include/kvm/kvm-config-arch.h > > index 04be43dfa9b2..11250365d8d5 100644 > > --- a/arm/aarch64/include/kvm/kvm-config-arch.h > > +++ b/arm/aarch64/include/kvm/kvm-config-arch.h > > @@ -6,6 +6,8 @@ > > "Run AArch32 guest"), \ > > OPT_BOOLEAN('\0', "pmu", &(cfg)->has_pmuv3, \ > > "Create PMUv3 device"), \ > > + OPT_BOOLEAN('\0', "mte", &(cfg)->has_mte, \ > > + "Enable memory tagging extension"), \ > > OPT_U64('\0', "kaslr-seed", &(cfg)->kaslr_seed, \ > > "Specify random seed for Kernel Address Space " \ > > "Layout Randomization (KASLR)"), > > diff --git a/arm/aarch64/kvm.c b/arm/aarch64/kvm.c > > index 56a0aedc263d..46548f8ee96e 100644 > > --- a/arm/aarch64/kvm.c > > +++ b/arm/aarch64/kvm.c > > @@ -81,3 +81,16 @@ int kvm__get_vm_type(struct kvm *kvm) > > > > return KVM_VM_TYPE_ARM_IPA_SIZE(ipa_bits); > > } > > + > > +void kvm__arch_enable_mte(struct kvm *kvm) > > +{ > > + struct kvm_enable_cap cap = { > > + .cap = KVM_CAP_ARM_MTE, > > + }; > > + > > + if (!kvm__supports_extension(kvm, KVM_CAP_ARM_MTE)) > > + die("MTE capability is not supported"); > > + > > + if (ioctl(kvm->vm_fd, KVM_ENABLE_CAP, &cap)) > > + die_perror("KVM_ENABLE_CAP(KVM_CAP_ARM_MTE)"); > > +} > > diff --git a/arm/include/arm-common/kvm-config-arch.h b/arm/include/arm-common/kvm-config-arch.h > > index 5734c46ab9e6..16e8d500a71b 100644 > > --- a/arm/include/arm-common/kvm-config-arch.h > > +++ b/arm/include/arm-common/kvm-config-arch.h > > @@ -9,6 +9,7 @@ struct kvm_config_arch { > > bool virtio_trans_pci; > > bool aarch32_guest; > > bool has_pmuv3; > > + bool has_mte; > > u64 kaslr_seed; > > enum irqchip_type irqchip; > > u64 fw_addr; > > diff --git a/arm/kvm.c b/arm/kvm.c > > index 80d233f13d0b..f2db93953778 100644 > > --- a/arm/kvm.c > > +++ b/arm/kvm.c > > @@ -86,6 +86,9 @@ void kvm__arch_init(struct kvm *kvm, const char *hugetlbfs_path, u64 ram_size) > > /* Create the virtual GIC. */ > > if (gic__create(kvm, kvm->cfg.arch.irqchip)) > > die("Failed to create virtual GIC"); > > + > > + if (kvm->cfg.arch.has_mte) > > + kvm__arch_enable_mte(kvm); > > } > > Can we enable it unconditionally if KVM_CAP_ARM_MTE is supported like we do for > PAC and SVE? I thought about that, the reason I chose to enable it based a kvmtool command line option, instead of always being enabled if available, is because of the overhead of sanitising the MTE tags on each stage 2 data abort. Steven, am I overreacting and that overhead is negligible? Also, as far as I know, PAC and SVE incur basically no overhead in KVM until the guest starts to use those features. Do you have a specific reason for wanting MTE to always be enabled if available? I'm happy to be convinced to make MTE enabled by default, I don't have preference either way. Thanks, Alex
On 21/03/2022 17:08, Alexandru Elisei wrote: > Hi, > > On Mon, Mar 21, 2022 at 03:40:18PM +0000, Vladimir Murzin wrote: >> Hi Alexandru, >> >> On 3/21/22 3:28 PM, Alexandru Elisei wrote: >>> MTE has been supported in Linux since commit 673638f434ee ("KVM: arm64: >>> Expose KVM_ARM_CAP_MTE"), add support for it in kvmtool. >>> >>> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com> >>> --- >>> arm/aarch32/include/kvm/kvm-arch.h | 3 +++ >>> arm/aarch64/include/kvm/kvm-arch.h | 1 + >>> arm/aarch64/include/kvm/kvm-config-arch.h | 2 ++ >>> arm/aarch64/kvm.c | 13 +++++++++++++ >>> arm/include/arm-common/kvm-config-arch.h | 1 + >>> arm/kvm.c | 3 +++ >>> 6 files changed, 23 insertions(+) >>> >>> diff --git a/arm/aarch32/include/kvm/kvm-arch.h b/arm/aarch32/include/kvm/kvm-arch.h >>> index bee2fc255a82..5616b27e257e 100644 >>> --- a/arm/aarch32/include/kvm/kvm-arch.h >>> +++ b/arm/aarch32/include/kvm/kvm-arch.h >>> @@ -5,6 +5,9 @@ >>> >>> #define kvm__arch_get_kern_offset(...) 0x8000 >>> >>> +struct kvm; >>> +static inline void kvm__arch_enable_mte(struct kvm *kvm) {} >>> + >>> #define ARM_MAX_MEMORY(...) ARM_LOMAP_MAX_MEMORY >>> >>> #define MAX_PAGE_SIZE SZ_4K >>> diff --git a/arm/aarch64/include/kvm/kvm-arch.h b/arm/aarch64/include/kvm/kvm-arch.h >>> index 5e5ee41211ed..9124f6919d0f 100644 >>> --- a/arm/aarch64/include/kvm/kvm-arch.h >>> +++ b/arm/aarch64/include/kvm/kvm-arch.h >>> @@ -6,6 +6,7 @@ >>> struct kvm; >>> unsigned long long kvm__arch_get_kern_offset(struct kvm *kvm, int fd); >>> int kvm__arch_get_ipa_limit(struct kvm *kvm); >>> +void kvm__arch_enable_mte(struct kvm *kvm); >>> >>> #define ARM_MAX_MEMORY(kvm) ({ \ >>> u64 max_ram; \ >>> diff --git a/arm/aarch64/include/kvm/kvm-config-arch.h b/arm/aarch64/include/kvm/kvm-config-arch.h >>> index 04be43dfa9b2..11250365d8d5 100644 >>> --- a/arm/aarch64/include/kvm/kvm-config-arch.h >>> +++ b/arm/aarch64/include/kvm/kvm-config-arch.h >>> @@ -6,6 +6,8 @@ >>> "Run AArch32 guest"), \ >>> OPT_BOOLEAN('\0', "pmu", &(cfg)->has_pmuv3, \ >>> "Create PMUv3 device"), \ >>> + OPT_BOOLEAN('\0', "mte", &(cfg)->has_mte, \ >>> + "Enable memory tagging extension"), \ >>> OPT_U64('\0', "kaslr-seed", &(cfg)->kaslr_seed, \ >>> "Specify random seed for Kernel Address Space " \ >>> "Layout Randomization (KASLR)"), >>> diff --git a/arm/aarch64/kvm.c b/arm/aarch64/kvm.c >>> index 56a0aedc263d..46548f8ee96e 100644 >>> --- a/arm/aarch64/kvm.c >>> +++ b/arm/aarch64/kvm.c >>> @@ -81,3 +81,16 @@ int kvm__get_vm_type(struct kvm *kvm) >>> >>> return KVM_VM_TYPE_ARM_IPA_SIZE(ipa_bits); >>> } >>> + >>> +void kvm__arch_enable_mte(struct kvm *kvm) >>> +{ >>> + struct kvm_enable_cap cap = { >>> + .cap = KVM_CAP_ARM_MTE, >>> + }; >>> + >>> + if (!kvm__supports_extension(kvm, KVM_CAP_ARM_MTE)) >>> + die("MTE capability is not supported"); >>> + >>> + if (ioctl(kvm->vm_fd, KVM_ENABLE_CAP, &cap)) >>> + die_perror("KVM_ENABLE_CAP(KVM_CAP_ARM_MTE)"); >>> +} >>> diff --git a/arm/include/arm-common/kvm-config-arch.h b/arm/include/arm-common/kvm-config-arch.h >>> index 5734c46ab9e6..16e8d500a71b 100644 >>> --- a/arm/include/arm-common/kvm-config-arch.h >>> +++ b/arm/include/arm-common/kvm-config-arch.h >>> @@ -9,6 +9,7 @@ struct kvm_config_arch { >>> bool virtio_trans_pci; >>> bool aarch32_guest; >>> bool has_pmuv3; >>> + bool has_mte; >>> u64 kaslr_seed; >>> enum irqchip_type irqchip; >>> u64 fw_addr; >>> diff --git a/arm/kvm.c b/arm/kvm.c >>> index 80d233f13d0b..f2db93953778 100644 >>> --- a/arm/kvm.c >>> +++ b/arm/kvm.c >>> @@ -86,6 +86,9 @@ void kvm__arch_init(struct kvm *kvm, const char *hugetlbfs_path, u64 ram_size) >>> /* Create the virtual GIC. */ >>> if (gic__create(kvm, kvm->cfg.arch.irqchip)) >>> die("Failed to create virtual GIC"); >>> + >>> + if (kvm->cfg.arch.has_mte) >>> + kvm__arch_enable_mte(kvm); >>> } >> >> Can we enable it unconditionally if KVM_CAP_ARM_MTE is supported like we do for >> PAC and SVE? > > I thought about that, the reason I chose to enable it based a kvmtool > command line option, instead of always being enabled if available, is > because of the overhead of sanitising the MTE tags on each stage 2 data > abort. Steven, am I overreacting and that overhead is negligible? I don't have any figures from hardware I'm afraid, so I can't say what the actual time overhead is in reality (but I suspect it's measurable). However there is also a memory overhead when it comes to swapping MTE tagged memory out. The tags have to be stored somewhere (currently they remain in memory) so there's >3% overhead[1] in terms of memory when swapped out. So I think probably the opt-in approach makes sense, but I've no strong feelings and I can see the benefits for the default being "every available". Steve [1] For each 4k page a 128 byte buffer is allocated for the pages (3.125% overhead), but on top of that there's the kmalloc overhead and an Xarray to store the pointer in. I haven't done the maths but it probably comes out closer to 4%. > Also, as far as I know, PAC and SVE incur basically no overhead in KVM > until the guest starts to use those features. > > Do you have a specific reason for wanting MTE to always be enabled if > available? I'm happy to be convinced to make MTE enabled by default, I > don't have preference either way. > > Thanks, > Alex
On 3/21/22 5:08 PM, Alexandru Elisei wrote: > Hi, > > On Mon, Mar 21, 2022 at 03:40:18PM +0000, Vladimir Murzin wrote: >> Hi Alexandru, >> >> On 3/21/22 3:28 PM, Alexandru Elisei wrote: >>> MTE has been supported in Linux since commit 673638f434ee ("KVM: arm64: >>> Expose KVM_ARM_CAP_MTE"), add support for it in kvmtool. >>> >>> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com> >>> --- >>> arm/aarch32/include/kvm/kvm-arch.h | 3 +++ >>> arm/aarch64/include/kvm/kvm-arch.h | 1 + >>> arm/aarch64/include/kvm/kvm-config-arch.h | 2 ++ >>> arm/aarch64/kvm.c | 13 +++++++++++++ >>> arm/include/arm-common/kvm-config-arch.h | 1 + >>> arm/kvm.c | 3 +++ >>> 6 files changed, 23 insertions(+) >>> >>> diff --git a/arm/aarch32/include/kvm/kvm-arch.h b/arm/aarch32/include/kvm/kvm-arch.h >>> index bee2fc255a82..5616b27e257e 100644 >>> --- a/arm/aarch32/include/kvm/kvm-arch.h >>> +++ b/arm/aarch32/include/kvm/kvm-arch.h >>> @@ -5,6 +5,9 @@ >>> >>> #define kvm__arch_get_kern_offset(...) 0x8000 >>> >>> +struct kvm; >>> +static inline void kvm__arch_enable_mte(struct kvm *kvm) {} >>> + >>> #define ARM_MAX_MEMORY(...) ARM_LOMAP_MAX_MEMORY >>> >>> #define MAX_PAGE_SIZE SZ_4K >>> diff --git a/arm/aarch64/include/kvm/kvm-arch.h b/arm/aarch64/include/kvm/kvm-arch.h >>> index 5e5ee41211ed..9124f6919d0f 100644 >>> --- a/arm/aarch64/include/kvm/kvm-arch.h >>> +++ b/arm/aarch64/include/kvm/kvm-arch.h >>> @@ -6,6 +6,7 @@ >>> struct kvm; >>> unsigned long long kvm__arch_get_kern_offset(struct kvm *kvm, int fd); >>> int kvm__arch_get_ipa_limit(struct kvm *kvm); >>> +void kvm__arch_enable_mte(struct kvm *kvm); >>> >>> #define ARM_MAX_MEMORY(kvm) ({ \ >>> u64 max_ram; \ >>> diff --git a/arm/aarch64/include/kvm/kvm-config-arch.h b/arm/aarch64/include/kvm/kvm-config-arch.h >>> index 04be43dfa9b2..11250365d8d5 100644 >>> --- a/arm/aarch64/include/kvm/kvm-config-arch.h >>> +++ b/arm/aarch64/include/kvm/kvm-config-arch.h >>> @@ -6,6 +6,8 @@ >>> "Run AArch32 guest"), \ >>> OPT_BOOLEAN('\0', "pmu", &(cfg)->has_pmuv3, \ >>> "Create PMUv3 device"), \ >>> + OPT_BOOLEAN('\0', "mte", &(cfg)->has_mte, \ >>> + "Enable memory tagging extension"), \ >>> OPT_U64('\0', "kaslr-seed", &(cfg)->kaslr_seed, \ >>> "Specify random seed for Kernel Address Space " \ >>> "Layout Randomization (KASLR)"), >>> diff --git a/arm/aarch64/kvm.c b/arm/aarch64/kvm.c >>> index 56a0aedc263d..46548f8ee96e 100644 >>> --- a/arm/aarch64/kvm.c >>> +++ b/arm/aarch64/kvm.c >>> @@ -81,3 +81,16 @@ int kvm__get_vm_type(struct kvm *kvm) >>> >>> return KVM_VM_TYPE_ARM_IPA_SIZE(ipa_bits); >>> } >>> + >>> +void kvm__arch_enable_mte(struct kvm *kvm) >>> +{ >>> + struct kvm_enable_cap cap = { >>> + .cap = KVM_CAP_ARM_MTE, >>> + }; >>> + >>> + if (!kvm__supports_extension(kvm, KVM_CAP_ARM_MTE)) >>> + die("MTE capability is not supported"); >>> + >>> + if (ioctl(kvm->vm_fd, KVM_ENABLE_CAP, &cap)) >>> + die_perror("KVM_ENABLE_CAP(KVM_CAP_ARM_MTE)"); >>> +} >>> diff --git a/arm/include/arm-common/kvm-config-arch.h b/arm/include/arm-common/kvm-config-arch.h >>> index 5734c46ab9e6..16e8d500a71b 100644 >>> --- a/arm/include/arm-common/kvm-config-arch.h >>> +++ b/arm/include/arm-common/kvm-config-arch.h >>> @@ -9,6 +9,7 @@ struct kvm_config_arch { >>> bool virtio_trans_pci; >>> bool aarch32_guest; >>> bool has_pmuv3; >>> + bool has_mte; >>> u64 kaslr_seed; >>> enum irqchip_type irqchip; >>> u64 fw_addr; >>> diff --git a/arm/kvm.c b/arm/kvm.c >>> index 80d233f13d0b..f2db93953778 100644 >>> --- a/arm/kvm.c >>> +++ b/arm/kvm.c >>> @@ -86,6 +86,9 @@ void kvm__arch_init(struct kvm *kvm, const char *hugetlbfs_path, u64 ram_size) >>> /* Create the virtual GIC. */ >>> if (gic__create(kvm, kvm->cfg.arch.irqchip)) >>> die("Failed to create virtual GIC"); >>> + >>> + if (kvm->cfg.arch.has_mte) >>> + kvm__arch_enable_mte(kvm); >>> } >> >> Can we enable it unconditionally if KVM_CAP_ARM_MTE is supported like we do for >> PAC and SVE? > > I thought about that, the reason I chose to enable it based a kvmtool > command line option, instead of always being enabled if available, is > because of the overhead of sanitising the MTE tags on each stage 2 data > abort. Steven, am I overreacting and that overhead is negligible? > > Also, as far as I know, PAC and SVE incur basically no overhead in KVM > until the guest starts to use those features. > > Do you have a specific reason for wanting MTE to always be enabled if > available? I'm happy to be convinced to make MTE enabled by default, I > don't have preference either way. Well, automatically enabling if available would align with what we do for other features in kvmtool and Linux itself - we tend to default y for new features, even MTE, thus improving chances to get reports back early if something (even performance) goes wrong. Just my 2p. Cheers Vladimir > > Thanks, > Alex
Hi, On Wed, Mar 23, 2022 at 10:31:15AM +0000, Vladimir Murzin wrote: > On 3/21/22 5:08 PM, Alexandru Elisei wrote: > > Hi, > > > > On Mon, Mar 21, 2022 at 03:40:18PM +0000, Vladimir Murzin wrote: > > > Hi Alexandru, > > > > > > On 3/21/22 3:28 PM, Alexandru Elisei wrote: > > > > MTE has been supported in Linux since commit 673638f434ee ("KVM: arm64: > > > > Expose KVM_ARM_CAP_MTE"), add support for it in kvmtool. > > > > > > > > Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com> > > > > --- > > > > arm/aarch32/include/kvm/kvm-arch.h | 3 +++ > > > > arm/aarch64/include/kvm/kvm-arch.h | 1 + > > > > arm/aarch64/include/kvm/kvm-config-arch.h | 2 ++ > > > > arm/aarch64/kvm.c | 13 +++++++++++++ > > > > arm/include/arm-common/kvm-config-arch.h | 1 + > > > > arm/kvm.c | 3 +++ > > > > 6 files changed, 23 insertions(+) > > > > > > > > diff --git a/arm/aarch32/include/kvm/kvm-arch.h b/arm/aarch32/include/kvm/kvm-arch.h > > > > index bee2fc255a82..5616b27e257e 100644 > > > > --- a/arm/aarch32/include/kvm/kvm-arch.h > > > > +++ b/arm/aarch32/include/kvm/kvm-arch.h > > > > @@ -5,6 +5,9 @@ > > > > #define kvm__arch_get_kern_offset(...) 0x8000 > > > > +struct kvm; > > > > +static inline void kvm__arch_enable_mte(struct kvm *kvm) {} > > > > + > > > > #define ARM_MAX_MEMORY(...) ARM_LOMAP_MAX_MEMORY > > > > #define MAX_PAGE_SIZE SZ_4K > > > > diff --git a/arm/aarch64/include/kvm/kvm-arch.h b/arm/aarch64/include/kvm/kvm-arch.h > > > > index 5e5ee41211ed..9124f6919d0f 100644 > > > > --- a/arm/aarch64/include/kvm/kvm-arch.h > > > > +++ b/arm/aarch64/include/kvm/kvm-arch.h > > > > @@ -6,6 +6,7 @@ > > > > struct kvm; > > > > unsigned long long kvm__arch_get_kern_offset(struct kvm *kvm, int fd); > > > > int kvm__arch_get_ipa_limit(struct kvm *kvm); > > > > +void kvm__arch_enable_mte(struct kvm *kvm); > > > > #define ARM_MAX_MEMORY(kvm) ({ \ > > > > u64 max_ram; \ > > > > diff --git a/arm/aarch64/include/kvm/kvm-config-arch.h b/arm/aarch64/include/kvm/kvm-config-arch.h > > > > index 04be43dfa9b2..11250365d8d5 100644 > > > > --- a/arm/aarch64/include/kvm/kvm-config-arch.h > > > > +++ b/arm/aarch64/include/kvm/kvm-config-arch.h > > > > @@ -6,6 +6,8 @@ > > > > "Run AArch32 guest"), \ > > > > OPT_BOOLEAN('\0', "pmu", &(cfg)->has_pmuv3, \ > > > > "Create PMUv3 device"), \ > > > > + OPT_BOOLEAN('\0', "mte", &(cfg)->has_mte, \ > > > > + "Enable memory tagging extension"), \ > > > > OPT_U64('\0', "kaslr-seed", &(cfg)->kaslr_seed, \ > > > > "Specify random seed for Kernel Address Space " \ > > > > "Layout Randomization (KASLR)"), > > > > diff --git a/arm/aarch64/kvm.c b/arm/aarch64/kvm.c > > > > index 56a0aedc263d..46548f8ee96e 100644 > > > > --- a/arm/aarch64/kvm.c > > > > +++ b/arm/aarch64/kvm.c > > > > @@ -81,3 +81,16 @@ int kvm__get_vm_type(struct kvm *kvm) > > > > return KVM_VM_TYPE_ARM_IPA_SIZE(ipa_bits); > > > > } > > > > + > > > > +void kvm__arch_enable_mte(struct kvm *kvm) > > > > +{ > > > > + struct kvm_enable_cap cap = { > > > > + .cap = KVM_CAP_ARM_MTE, > > > > + }; > > > > + > > > > + if (!kvm__supports_extension(kvm, KVM_CAP_ARM_MTE)) > > > > + die("MTE capability is not supported"); > > > > + > > > > + if (ioctl(kvm->vm_fd, KVM_ENABLE_CAP, &cap)) > > > > + die_perror("KVM_ENABLE_CAP(KVM_CAP_ARM_MTE)"); > > > > +} > > > > diff --git a/arm/include/arm-common/kvm-config-arch.h b/arm/include/arm-common/kvm-config-arch.h > > > > index 5734c46ab9e6..16e8d500a71b 100644 > > > > --- a/arm/include/arm-common/kvm-config-arch.h > > > > +++ b/arm/include/arm-common/kvm-config-arch.h > > > > @@ -9,6 +9,7 @@ struct kvm_config_arch { > > > > bool virtio_trans_pci; > > > > bool aarch32_guest; > > > > bool has_pmuv3; > > > > + bool has_mte; > > > > u64 kaslr_seed; > > > > enum irqchip_type irqchip; > > > > u64 fw_addr; > > > > diff --git a/arm/kvm.c b/arm/kvm.c > > > > index 80d233f13d0b..f2db93953778 100644 > > > > --- a/arm/kvm.c > > > > +++ b/arm/kvm.c > > > > @@ -86,6 +86,9 @@ void kvm__arch_init(struct kvm *kvm, const char *hugetlbfs_path, u64 ram_size) > > > > /* Create the virtual GIC. */ > > > > if (gic__create(kvm, kvm->cfg.arch.irqchip)) > > > > die("Failed to create virtual GIC"); > > > > + > > > > + if (kvm->cfg.arch.has_mte) > > > > + kvm__arch_enable_mte(kvm); > > > > } > > > > > > Can we enable it unconditionally if KVM_CAP_ARM_MTE is supported like we do for > > > PAC and SVE? > > > > I thought about that, the reason I chose to enable it based a kvmtool > > command line option, instead of always being enabled if available, is > > because of the overhead of sanitising the MTE tags on each stage 2 data > > abort. Steven, am I overreacting and that overhead is negligible? > > > > Also, as far as I know, PAC and SVE incur basically no overhead in KVM > > until the guest starts to use those features. > > > > Do you have a specific reason for wanting MTE to always be enabled if > > available? I'm happy to be convinced to make MTE enabled by default, I > > don't have preference either way. > > Well, automatically enabling if available would align with what we do for > other features in kvmtool and Linux itself - we tend to default y for new > features, even MTE, thus improving chances to get reports back early if > something (even performance) goes wrong. Just my 2p. According to Steven, for each 4k page the kernel uses an 128 byte buffer to store the tags, and then some extra memory is used to keep track of the buffers. Let's take the case of a VM with 1GB of memory, and be conservative and only account for the tag buffer. In this case, the tag buffers alone will be 32MB. For a VM with 1GB of memory created with kvmtool built from current master (commit faae833a746f), pmap shows a total memory usage of 1268388K. Subtracting the memory of the VM, we are left with 214MB of memory consumed by kvmtool. Having MTE enabled would increase the memory overhead of kvmtool by 32/214*100 = 15%. Of course, this memory overhead scales with the amount of memory the VM has. The buffer size that KVM uses might change in the future, but since we cannot predict the future (might become larger or smaller), I'm working with what is implement today. The kernel documentation for MTE suggests that in order to take advantage of it, software must be modified and recompiled. That means that users that don't want to use MTE won't get to exercise MTE because they won't be using MTE enabled software, but they will pay the overhead regardless. This of course assumes that going forward software won't be using MTE by default. kvmtool is supposed to be simple, fast and less resource intensive than other hypervisors, that's why I think having MTE disabled by default is the best way to implement the capability. Thanks, Alex
On Wed, Mar 23, 2022 at 12:03:33PM +0000, Alexandru Elisei wrote: > On Wed, Mar 23, 2022 at 10:31:15AM +0000, Vladimir Murzin wrote: > > On 3/21/22 5:08 PM, Alexandru Elisei wrote: > > > On Mon, Mar 21, 2022 at 03:40:18PM +0000, Vladimir Murzin wrote: > > > > Can we enable it unconditionally if KVM_CAP_ARM_MTE is supported like we do for > > > > PAC and SVE? > > > > > > I thought about that, the reason I chose to enable it based a kvmtool > > > command line option, instead of always being enabled if available, is > > > because of the overhead of sanitising the MTE tags on each stage 2 data > > > abort. Steven, am I overreacting and that overhead is negligible? > > > > > > Also, as far as I know, PAC and SVE incur basically no overhead in KVM > > > until the guest starts to use those features. > > > > > > Do you have a specific reason for wanting MTE to always be enabled if > > > available? I'm happy to be convinced to make MTE enabled by default, I > > > don't have preference either way. > > > > Well, automatically enabling if available would align with what we do for > > other features in kvmtool and Linux itself - we tend to default y for new > > features, even MTE, thus improving chances to get reports back early if > > something (even performance) goes wrong. Just my 2p. > > According to Steven, for each 4k page the kernel uses an 128 byte buffer to > store the tags, and then some extra memory is used to keep track of the > buffers. Let's take the case of a VM with 1GB of memory, and be > conservative and only account for the tag buffer. In this case, the tag > buffers alone will be 32MB. The 128 byte buffer is only allocated *if* the page is swapped out. So if the host swaps out the entire 1GB guest memory, it will incur the 32MB cost. But a fully swapped out VM is pretty useless, so this doesn't happen very often. We do have the cost of zeroing the tags when the page is mapped into guests. I don't remember the KVM implementation but maybe we can optimise this to zero the tags at the same time with zeroing the data as we do for anonymous pages in the host (unless it does this already). We could use the fine-grained traps to detect when MAIR_EL1 was configured by the guest for Tagged memory but, at least with Linux, we do this at boot irrespective of whether any application will use MTE or not. Future architecture versions may address this problem. > The kernel documentation for MTE suggests that in order to take advantage > of it, software must be modified and recompiled. That means that users that > don't want to use MTE won't get to exercise MTE because they won't be using > MTE enabled software, but they will pay the overhead regardless. This of > course assumes that going forward software won't be using MTE by default. Not all software needs to be recompiled. It's sufficient most of the time to replace the glibc with an MTE-aware one. I have a slight preference for MTE default in kvmtool, maybe with an option to disable it if one doesn't care about the feature or we find the overhead to be significant (I don't think it would be).
Hi, On 3/23/22 12:03 PM, Alexandru Elisei wrote: > Hi, > > On Wed, Mar 23, 2022 at 10:31:15AM +0000, Vladimir Murzin wrote: >> On 3/21/22 5:08 PM, Alexandru Elisei wrote: >>> Hi, >>> >>> On Mon, Mar 21, 2022 at 03:40:18PM +0000, Vladimir Murzin wrote: >>>> Hi Alexandru, >>>> >>>> On 3/21/22 3:28 PM, Alexandru Elisei wrote: >>>>> MTE has been supported in Linux since commit 673638f434ee ("KVM: arm64: >>>>> Expose KVM_ARM_CAP_MTE"), add support for it in kvmtool. >>>>> >>>>> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com> >>>>> --- >>>>> arm/aarch32/include/kvm/kvm-arch.h | 3 +++ >>>>> arm/aarch64/include/kvm/kvm-arch.h | 1 + >>>>> arm/aarch64/include/kvm/kvm-config-arch.h | 2 ++ >>>>> arm/aarch64/kvm.c | 13 +++++++++++++ >>>>> arm/include/arm-common/kvm-config-arch.h | 1 + >>>>> arm/kvm.c | 3 +++ >>>>> 6 files changed, 23 insertions(+) >>>>> >>>>> diff --git a/arm/aarch32/include/kvm/kvm-arch.h b/arm/aarch32/include/kvm/kvm-arch.h >>>>> index bee2fc255a82..5616b27e257e 100644 >>>>> --- a/arm/aarch32/include/kvm/kvm-arch.h >>>>> +++ b/arm/aarch32/include/kvm/kvm-arch.h >>>>> @@ -5,6 +5,9 @@ >>>>> #define kvm__arch_get_kern_offset(...) 0x8000 >>>>> +struct kvm; >>>>> +static inline void kvm__arch_enable_mte(struct kvm *kvm) {} >>>>> + >>>>> #define ARM_MAX_MEMORY(...) ARM_LOMAP_MAX_MEMORY >>>>> #define MAX_PAGE_SIZE SZ_4K >>>>> diff --git a/arm/aarch64/include/kvm/kvm-arch.h b/arm/aarch64/include/kvm/kvm-arch.h >>>>> index 5e5ee41211ed..9124f6919d0f 100644 >>>>> --- a/arm/aarch64/include/kvm/kvm-arch.h >>>>> +++ b/arm/aarch64/include/kvm/kvm-arch.h >>>>> @@ -6,6 +6,7 @@ >>>>> struct kvm; >>>>> unsigned long long kvm__arch_get_kern_offset(struct kvm *kvm, int fd); >>>>> int kvm__arch_get_ipa_limit(struct kvm *kvm); >>>>> +void kvm__arch_enable_mte(struct kvm *kvm); >>>>> #define ARM_MAX_MEMORY(kvm) ({ \ >>>>> u64 max_ram; \ >>>>> diff --git a/arm/aarch64/include/kvm/kvm-config-arch.h b/arm/aarch64/include/kvm/kvm-config-arch.h >>>>> index 04be43dfa9b2..11250365d8d5 100644 >>>>> --- a/arm/aarch64/include/kvm/kvm-config-arch.h >>>>> +++ b/arm/aarch64/include/kvm/kvm-config-arch.h >>>>> @@ -6,6 +6,8 @@ >>>>> "Run AArch32 guest"), \ >>>>> OPT_BOOLEAN('\0', "pmu", &(cfg)->has_pmuv3, \ >>>>> "Create PMUv3 device"), \ >>>>> + OPT_BOOLEAN('\0', "mte", &(cfg)->has_mte, \ >>>>> + "Enable memory tagging extension"), \ >>>>> OPT_U64('\0', "kaslr-seed", &(cfg)->kaslr_seed, \ >>>>> "Specify random seed for Kernel Address Space " \ >>>>> "Layout Randomization (KASLR)"), >>>>> diff --git a/arm/aarch64/kvm.c b/arm/aarch64/kvm.c >>>>> index 56a0aedc263d..46548f8ee96e 100644 >>>>> --- a/arm/aarch64/kvm.c >>>>> +++ b/arm/aarch64/kvm.c >>>>> @@ -81,3 +81,16 @@ int kvm__get_vm_type(struct kvm *kvm) >>>>> return KVM_VM_TYPE_ARM_IPA_SIZE(ipa_bits); >>>>> } >>>>> + >>>>> +void kvm__arch_enable_mte(struct kvm *kvm) >>>>> +{ >>>>> + struct kvm_enable_cap cap = { >>>>> + .cap = KVM_CAP_ARM_MTE, >>>>> + }; >>>>> + >>>>> + if (!kvm__supports_extension(kvm, KVM_CAP_ARM_MTE)) >>>>> + die("MTE capability is not supported"); >>>>> + >>>>> + if (ioctl(kvm->vm_fd, KVM_ENABLE_CAP, &cap)) >>>>> + die_perror("KVM_ENABLE_CAP(KVM_CAP_ARM_MTE)"); >>>>> +} >>>>> diff --git a/arm/include/arm-common/kvm-config-arch.h b/arm/include/arm-common/kvm-config-arch.h >>>>> index 5734c46ab9e6..16e8d500a71b 100644 >>>>> --- a/arm/include/arm-common/kvm-config-arch.h >>>>> +++ b/arm/include/arm-common/kvm-config-arch.h >>>>> @@ -9,6 +9,7 @@ struct kvm_config_arch { >>>>> bool virtio_trans_pci; >>>>> bool aarch32_guest; >>>>> bool has_pmuv3; >>>>> + bool has_mte; >>>>> u64 kaslr_seed; >>>>> enum irqchip_type irqchip; >>>>> u64 fw_addr; >>>>> diff --git a/arm/kvm.c b/arm/kvm.c >>>>> index 80d233f13d0b..f2db93953778 100644 >>>>> --- a/arm/kvm.c >>>>> +++ b/arm/kvm.c >>>>> @@ -86,6 +86,9 @@ void kvm__arch_init(struct kvm *kvm, const char *hugetlbfs_path, u64 ram_size) >>>>> /* Create the virtual GIC. */ >>>>> if (gic__create(kvm, kvm->cfg.arch.irqchip)) >>>>> die("Failed to create virtual GIC"); >>>>> + >>>>> + if (kvm->cfg.arch.has_mte) >>>>> + kvm__arch_enable_mte(kvm); >>>>> } >>>> >>>> Can we enable it unconditionally if KVM_CAP_ARM_MTE is supported like we do for >>>> PAC and SVE? >>> >>> I thought about that, the reason I chose to enable it based a kvmtool >>> command line option, instead of always being enabled if available, is >>> because of the overhead of sanitising the MTE tags on each stage 2 data >>> abort. Steven, am I overreacting and that overhead is negligible? >>> >>> Also, as far as I know, PAC and SVE incur basically no overhead in KVM >>> until the guest starts to use those features. >>> >>> Do you have a specific reason for wanting MTE to always be enabled if >>> available? I'm happy to be convinced to make MTE enabled by default, I >>> don't have preference either way. >> >> Well, automatically enabling if available would align with what we do for >> other features in kvmtool and Linux itself - we tend to default y for new >> features, even MTE, thus improving chances to get reports back early if >> something (even performance) goes wrong. Just my 2p. > > According to Steven, for each 4k page the kernel uses an 128 byte buffer to > store the tags, and then some extra memory is used to keep track of the > buffers. Let's take the case of a VM with 1GB of memory, and be > conservative and only account for the tag buffer. In this case, the tag > buffers alone will be 32MB. Right, IIUC, that memory is allocated on demand when we about to swap the page out or perform hibernation, so there is no reservation done upfront or I'm missing something? > > For a VM with 1GB of memory created with kvmtool built from current master > (commit faae833a746f), pmap shows a total memory usage of 1268388K. > Subtracting the memory of the VM, we are left with 214MB of memory consumed > by kvmtool. Having MTE enabled would increase the memory overhead of > kvmtool by 32/214*100 = 15%. > I admit, I might be missing something, but given that extra tag storage allocated for swap/hibernation overhead can be applied to any process which uses MTE, no? > Of course, this memory overhead scales with the amount of memory the VM > has. The buffer size that KVM uses might change in the future, but since we > cannot predict the future (might become larger or smaller), I'm working > with what is implement today. > Fair enough. IMO, we will see more MTE capable hardware, OTOH, memory overhead won't magically disappear, so should we start thinking how to reduce it? I noticed that code saves tags one to one, so for guest which doesn't actively use MTE whole page would be tagged with zero, cannot that case be optimized? or maybe be go further and compress tags? > The kernel documentation for MTE suggests that in order to take advantage > of it, software must be modified and recompiled. That means that users that > don't want to use MTE won't get to exercise MTE because they won't be using > MTE enabled software, but they will pay the overhead regardless. This of > course assumes that going forward software won't be using MTE by default. > > kvmtool is supposed to be simple, fast and less resource intensive than > other hypervisors, that's why I think having MTE disabled by default is the > best way to implement the capability. I see kvmtool as bleeding edge hacking tool, so closer it to the edge is better :) Cheers Vladimir > > Thanks, > Alex
On 23/03/2022 13:57, Vladimir Murzin wrote: > Hi, > > On 3/23/22 12:03 PM, Alexandru Elisei wrote: >> Hi, >> >> On Wed, Mar 23, 2022 at 10:31:15AM +0000, Vladimir Murzin wrote: >>> On 3/21/22 5:08 PM, Alexandru Elisei wrote: >>>> Hi, >>>> >>>> On Mon, Mar 21, 2022 at 03:40:18PM +0000, Vladimir Murzin wrote: >>>>> Hi Alexandru, >>>>> >>>>> On 3/21/22 3:28 PM, Alexandru Elisei wrote: >>>>>> MTE has been supported in Linux since commit 673638f434ee ("KVM: >>>>>> arm64: >>>>>> Expose KVM_ARM_CAP_MTE"), add support for it in kvmtool. >>>>>> >>>>>> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com> >>>>>> --- >>>>>> arm/aarch32/include/kvm/kvm-arch.h | 3 +++ >>>>>> arm/aarch64/include/kvm/kvm-arch.h | 1 + >>>>>> arm/aarch64/include/kvm/kvm-config-arch.h | 2 ++ >>>>>> arm/aarch64/kvm.c | 13 +++++++++++++ >>>>>> arm/include/arm-common/kvm-config-arch.h | 1 + >>>>>> arm/kvm.c | 3 +++ >>>>>> 6 files changed, 23 insertions(+) >>>>>> >>>>>> diff --git a/arm/aarch32/include/kvm/kvm-arch.h >>>>>> b/arm/aarch32/include/kvm/kvm-arch.h >>>>>> index bee2fc255a82..5616b27e257e 100644 >>>>>> --- a/arm/aarch32/include/kvm/kvm-arch.h >>>>>> +++ b/arm/aarch32/include/kvm/kvm-arch.h >>>>>> @@ -5,6 +5,9 @@ >>>>>> #define kvm__arch_get_kern_offset(...) 0x8000 >>>>>> +struct kvm; >>>>>> +static inline void kvm__arch_enable_mte(struct kvm *kvm) {} >>>>>> + >>>>>> #define ARM_MAX_MEMORY(...) ARM_LOMAP_MAX_MEMORY >>>>>> #define MAX_PAGE_SIZE SZ_4K >>>>>> diff --git a/arm/aarch64/include/kvm/kvm-arch.h >>>>>> b/arm/aarch64/include/kvm/kvm-arch.h >>>>>> index 5e5ee41211ed..9124f6919d0f 100644 >>>>>> --- a/arm/aarch64/include/kvm/kvm-arch.h >>>>>> +++ b/arm/aarch64/include/kvm/kvm-arch.h >>>>>> @@ -6,6 +6,7 @@ >>>>>> struct kvm; >>>>>> unsigned long long kvm__arch_get_kern_offset(struct kvm *kvm, >>>>>> int fd); >>>>>> int kvm__arch_get_ipa_limit(struct kvm *kvm); >>>>>> +void kvm__arch_enable_mte(struct kvm *kvm); >>>>>> #define ARM_MAX_MEMORY(kvm) ({ \ >>>>>> u64 max_ram; \ >>>>>> diff --git a/arm/aarch64/include/kvm/kvm-config-arch.h >>>>>> b/arm/aarch64/include/kvm/kvm-config-arch.h >>>>>> index 04be43dfa9b2..11250365d8d5 100644 >>>>>> --- a/arm/aarch64/include/kvm/kvm-config-arch.h >>>>>> +++ b/arm/aarch64/include/kvm/kvm-config-arch.h >>>>>> @@ -6,6 +6,8 @@ >>>>>> "Run AArch32 guest"), \ >>>>>> OPT_BOOLEAN('\0', "pmu", &(cfg)->has_pmuv3, \ >>>>>> "Create PMUv3 device"), \ >>>>>> + OPT_BOOLEAN('\0', "mte", &(cfg)->has_mte, \ >>>>>> + "Enable memory tagging extension"), \ >>>>>> OPT_U64('\0', "kaslr-seed", &(cfg)->kaslr_seed, \ >>>>>> "Specify random seed for Kernel Address Space " \ >>>>>> "Layout Randomization (KASLR)"), >>>>>> diff --git a/arm/aarch64/kvm.c b/arm/aarch64/kvm.c >>>>>> index 56a0aedc263d..46548f8ee96e 100644 >>>>>> --- a/arm/aarch64/kvm.c >>>>>> +++ b/arm/aarch64/kvm.c >>>>>> @@ -81,3 +81,16 @@ int kvm__get_vm_type(struct kvm *kvm) >>>>>> return KVM_VM_TYPE_ARM_IPA_SIZE(ipa_bits); >>>>>> } >>>>>> + >>>>>> +void kvm__arch_enable_mte(struct kvm *kvm) >>>>>> +{ >>>>>> + struct kvm_enable_cap cap = { >>>>>> + .cap = KVM_CAP_ARM_MTE, >>>>>> + }; >>>>>> + >>>>>> + if (!kvm__supports_extension(kvm, KVM_CAP_ARM_MTE)) >>>>>> + die("MTE capability is not supported"); >>>>>> + >>>>>> + if (ioctl(kvm->vm_fd, KVM_ENABLE_CAP, &cap)) >>>>>> + die_perror("KVM_ENABLE_CAP(KVM_CAP_ARM_MTE)"); >>>>>> +} >>>>>> diff --git a/arm/include/arm-common/kvm-config-arch.h >>>>>> b/arm/include/arm-common/kvm-config-arch.h >>>>>> index 5734c46ab9e6..16e8d500a71b 100644 >>>>>> --- a/arm/include/arm-common/kvm-config-arch.h >>>>>> +++ b/arm/include/arm-common/kvm-config-arch.h >>>>>> @@ -9,6 +9,7 @@ struct kvm_config_arch { >>>>>> bool virtio_trans_pci; >>>>>> bool aarch32_guest; >>>>>> bool has_pmuv3; >>>>>> + bool has_mte; >>>>>> u64 kaslr_seed; >>>>>> enum irqchip_type irqchip; >>>>>> u64 fw_addr; >>>>>> diff --git a/arm/kvm.c b/arm/kvm.c >>>>>> index 80d233f13d0b..f2db93953778 100644 >>>>>> --- a/arm/kvm.c >>>>>> +++ b/arm/kvm.c >>>>>> @@ -86,6 +86,9 @@ void kvm__arch_init(struct kvm *kvm, const char >>>>>> *hugetlbfs_path, u64 ram_size) >>>>>> /* Create the virtual GIC. */ >>>>>> if (gic__create(kvm, kvm->cfg.arch.irqchip)) >>>>>> die("Failed to create virtual GIC"); >>>>>> + >>>>>> + if (kvm->cfg.arch.has_mte) >>>>>> + kvm__arch_enable_mte(kvm); >>>>>> } >>>>> >>>>> Can we enable it unconditionally if KVM_CAP_ARM_MTE is supported >>>>> like we do for >>>>> PAC and SVE? >>>> >>>> I thought about that, the reason I chose to enable it based a kvmtool >>>> command line option, instead of always being enabled if available, is >>>> because of the overhead of sanitising the MTE tags on each stage 2 data >>>> abort. Steven, am I overreacting and that overhead is negligible? >>>> >>>> Also, as far as I know, PAC and SVE incur basically no overhead in KVM >>>> until the guest starts to use those features. >>>> >>>> Do you have a specific reason for wanting MTE to always be enabled if >>>> available? I'm happy to be convinced to make MTE enabled by default, I >>>> don't have preference either way. >>> >>> Well, automatically enabling if available would align with what we do >>> for >>> other features in kvmtool and Linux itself - we tend to default y for >>> new >>> features, even MTE, thus improving chances to get reports back early if >>> something (even performance) goes wrong. Just my 2p. >> >> According to Steven, for each 4k page the kernel uses an 128 byte >> buffer to >> store the tags, and then some extra memory is used to keep track of the >> buffers. Let's take the case of a VM with 1GB of memory, and be >> conservative and only account for the tag buffer. In this case, the tag >> buffers alone will be 32MB. > > Right, IIUC, that memory is allocated on demand when we about to swap > the page > out or perform hibernation, so there is no reservation done upfront or I'm > missing something? That's correct, sorry if I didn't make that clear earlier. The host memory is only allocated when the MTE-enabled guest's pages are swapped out. So if they are never swapped out there's no memory overhead. >> >> For a VM with 1GB of memory created with kvmtool built from current >> master >> (commit faae833a746f), pmap shows a total memory usage of 1268388K. >> Subtracting the memory of the VM, we are left with 214MB of memory >> consumed >> by kvmtool. Having MTE enabled would increase the memory overhead of >> kvmtool by 32/214*100 = 15%. >> > > I admit, I might be missing something, but given that extra tag storage > allocated for swap/hibernation overhead can be applied to any process which > uses MTE, no? One difference is that the entire VM is considered MTE memory, whereas in a normal application only those pages mapped with PROT_MTE take the overhead. So the memory overhead of applications running in the VM is higher than the same application outside of the VM (assuming the memory is swapped out by the host). >> Of course, this memory overhead scales with the amount of memory the VM >> has. The buffer size that KVM uses might change in the future, but >> since we >> cannot predict the future (might become larger or smaller), I'm working >> with what is implement today. >> > > Fair enough. IMO, we will see more MTE capable hardware, OTOH, memory > overhead > won't magically disappear, so should we start thinking how to reduce it? > I noticed that code saves tags one to one, so for guest which doesn't > actively use MTE whole page would be tagged with zero, cannot that case be > optimized? or maybe be go further and compress tags? Compressing tags is certainly something that could be considered (an early revision of the MTE swap did optimise the zero case). However Linux is lazy with the tags - if an application needs memory but doesn't use the tags then only the data portion will be zeroed and the tags will be left with whatever value they had previously (as they are not accessible by the application). This could mean that a MTE-enabled guest ends with with remarkably few pages with zeroed tags after it has been running for a while. From the host it's not possible to determine whether the tags are useful or not so they have to be saved regardless. That said I don't have a strong opinion on the default, as long as there's an option to disable MTE then having the default with it turned on is fine by me. >> The kernel documentation for MTE suggests that in order to take advantage >> of it, software must be modified and recompiled. That means that users >> that >> don't want to use MTE won't get to exercise MTE because they won't be >> using >> MTE enabled software, but they will pay the overhead regardless. This of >> course assumes that going forward software won't be using MTE by default. >> >> kvmtool is supposed to be simple, fast and less resource intensive than >> other hypervisors, that's why I think having MTE disabled by default >> is the >> best way to implement the capability. > > I see kvmtool as bleeding edge hacking tool, so closer it to the edge is > better :) I have to admit I like the "just works" approach that the defaults give you (e.g. p9 filesystem from host, no need to configure root filesystems etc), so there's definitely something to be said for MTE "just working" in kvmtool too. Steve
Hi, Catalin, Steven, Vladimir, thank you all for the feedback. I'll make MTE enabled by default in the next iteration of this series. Thanks, Alex On Wed, Mar 23, 2022 at 02:15:55PM +0000, Steven Price wrote: > On 23/03/2022 13:57, Vladimir Murzin wrote: > > Hi, > > > > On 3/23/22 12:03 PM, Alexandru Elisei wrote: > >> Hi, > >> > >> On Wed, Mar 23, 2022 at 10:31:15AM +0000, Vladimir Murzin wrote: > >>> On 3/21/22 5:08 PM, Alexandru Elisei wrote: > >>>> Hi, > >>>> > >>>> On Mon, Mar 21, 2022 at 03:40:18PM +0000, Vladimir Murzin wrote: > >>>>> Hi Alexandru, > >>>>> > >>>>> On 3/21/22 3:28 PM, Alexandru Elisei wrote: > >>>>>> MTE has been supported in Linux since commit 673638f434ee ("KVM: > >>>>>> arm64: > >>>>>> Expose KVM_ARM_CAP_MTE"), add support for it in kvmtool. > >>>>>> > >>>>>> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com> > >>>>>> --- > >>>>>> arm/aarch32/include/kvm/kvm-arch.h | 3 +++ > >>>>>> arm/aarch64/include/kvm/kvm-arch.h | 1 + > >>>>>> arm/aarch64/include/kvm/kvm-config-arch.h | 2 ++ > >>>>>> arm/aarch64/kvm.c | 13 +++++++++++++ > >>>>>> arm/include/arm-common/kvm-config-arch.h | 1 + > >>>>>> arm/kvm.c | 3 +++ > >>>>>> 6 files changed, 23 insertions(+) > >>>>>> > >>>>>> diff --git a/arm/aarch32/include/kvm/kvm-arch.h > >>>>>> b/arm/aarch32/include/kvm/kvm-arch.h > >>>>>> index bee2fc255a82..5616b27e257e 100644 > >>>>>> --- a/arm/aarch32/include/kvm/kvm-arch.h > >>>>>> +++ b/arm/aarch32/include/kvm/kvm-arch.h > >>>>>> @@ -5,6 +5,9 @@ > >>>>>> #define kvm__arch_get_kern_offset(...) 0x8000 > >>>>>> +struct kvm; > >>>>>> +static inline void kvm__arch_enable_mte(struct kvm *kvm) {} > >>>>>> + > >>>>>> #define ARM_MAX_MEMORY(...) ARM_LOMAP_MAX_MEMORY > >>>>>> #define MAX_PAGE_SIZE SZ_4K > >>>>>> diff --git a/arm/aarch64/include/kvm/kvm-arch.h > >>>>>> b/arm/aarch64/include/kvm/kvm-arch.h > >>>>>> index 5e5ee41211ed..9124f6919d0f 100644 > >>>>>> --- a/arm/aarch64/include/kvm/kvm-arch.h > >>>>>> +++ b/arm/aarch64/include/kvm/kvm-arch.h > >>>>>> @@ -6,6 +6,7 @@ > >>>>>> struct kvm; > >>>>>> unsigned long long kvm__arch_get_kern_offset(struct kvm *kvm, > >>>>>> int fd); > >>>>>> int kvm__arch_get_ipa_limit(struct kvm *kvm); > >>>>>> +void kvm__arch_enable_mte(struct kvm *kvm); > >>>>>> #define ARM_MAX_MEMORY(kvm) ({ \ > >>>>>> u64 max_ram; \ > >>>>>> diff --git a/arm/aarch64/include/kvm/kvm-config-arch.h > >>>>>> b/arm/aarch64/include/kvm/kvm-config-arch.h > >>>>>> index 04be43dfa9b2..11250365d8d5 100644 > >>>>>> --- a/arm/aarch64/include/kvm/kvm-config-arch.h > >>>>>> +++ b/arm/aarch64/include/kvm/kvm-config-arch.h > >>>>>> @@ -6,6 +6,8 @@ > >>>>>> "Run AArch32 guest"), \ > >>>>>> OPT_BOOLEAN('\0', "pmu", &(cfg)->has_pmuv3, \ > >>>>>> "Create PMUv3 device"), \ > >>>>>> + OPT_BOOLEAN('\0', "mte", &(cfg)->has_mte, \ > >>>>>> + "Enable memory tagging extension"), \ > >>>>>> OPT_U64('\0', "kaslr-seed", &(cfg)->kaslr_seed, \ > >>>>>> "Specify random seed for Kernel Address Space " \ > >>>>>> "Layout Randomization (KASLR)"), > >>>>>> diff --git a/arm/aarch64/kvm.c b/arm/aarch64/kvm.c > >>>>>> index 56a0aedc263d..46548f8ee96e 100644 > >>>>>> --- a/arm/aarch64/kvm.c > >>>>>> +++ b/arm/aarch64/kvm.c > >>>>>> @@ -81,3 +81,16 @@ int kvm__get_vm_type(struct kvm *kvm) > >>>>>> return KVM_VM_TYPE_ARM_IPA_SIZE(ipa_bits); > >>>>>> } > >>>>>> + > >>>>>> +void kvm__arch_enable_mte(struct kvm *kvm) > >>>>>> +{ > >>>>>> + struct kvm_enable_cap cap = { > >>>>>> + .cap = KVM_CAP_ARM_MTE, > >>>>>> + }; > >>>>>> + > >>>>>> + if (!kvm__supports_extension(kvm, KVM_CAP_ARM_MTE)) > >>>>>> + die("MTE capability is not supported"); > >>>>>> + > >>>>>> + if (ioctl(kvm->vm_fd, KVM_ENABLE_CAP, &cap)) > >>>>>> + die_perror("KVM_ENABLE_CAP(KVM_CAP_ARM_MTE)"); > >>>>>> +} > >>>>>> diff --git a/arm/include/arm-common/kvm-config-arch.h > >>>>>> b/arm/include/arm-common/kvm-config-arch.h > >>>>>> index 5734c46ab9e6..16e8d500a71b 100644 > >>>>>> --- a/arm/include/arm-common/kvm-config-arch.h > >>>>>> +++ b/arm/include/arm-common/kvm-config-arch.h > >>>>>> @@ -9,6 +9,7 @@ struct kvm_config_arch { > >>>>>> bool virtio_trans_pci; > >>>>>> bool aarch32_guest; > >>>>>> bool has_pmuv3; > >>>>>> + bool has_mte; > >>>>>> u64 kaslr_seed; > >>>>>> enum irqchip_type irqchip; > >>>>>> u64 fw_addr; > >>>>>> diff --git a/arm/kvm.c b/arm/kvm.c > >>>>>> index 80d233f13d0b..f2db93953778 100644 > >>>>>> --- a/arm/kvm.c > >>>>>> +++ b/arm/kvm.c > >>>>>> @@ -86,6 +86,9 @@ void kvm__arch_init(struct kvm *kvm, const char > >>>>>> *hugetlbfs_path, u64 ram_size) > >>>>>> /* Create the virtual GIC. */ > >>>>>> if (gic__create(kvm, kvm->cfg.arch.irqchip)) > >>>>>> die("Failed to create virtual GIC"); > >>>>>> + > >>>>>> + if (kvm->cfg.arch.has_mte) > >>>>>> + kvm__arch_enable_mte(kvm); > >>>>>> } > >>>>> > >>>>> Can we enable it unconditionally if KVM_CAP_ARM_MTE is supported > >>>>> like we do for > >>>>> PAC and SVE? > >>>> > >>>> I thought about that, the reason I chose to enable it based a kvmtool > >>>> command line option, instead of always being enabled if available, is > >>>> because of the overhead of sanitising the MTE tags on each stage 2 data > >>>> abort. Steven, am I overreacting and that overhead is negligible? > >>>> > >>>> Also, as far as I know, PAC and SVE incur basically no overhead in KVM > >>>> until the guest starts to use those features. > >>>> > >>>> Do you have a specific reason for wanting MTE to always be enabled if > >>>> available? I'm happy to be convinced to make MTE enabled by default, I > >>>> don't have preference either way. > >>> > >>> Well, automatically enabling if available would align with what we do > >>> for > >>> other features in kvmtool and Linux itself - we tend to default y for > >>> new > >>> features, even MTE, thus improving chances to get reports back early if > >>> something (even performance) goes wrong. Just my 2p. > >> > >> According to Steven, for each 4k page the kernel uses an 128 byte > >> buffer to > >> store the tags, and then some extra memory is used to keep track of the > >> buffers. Let's take the case of a VM with 1GB of memory, and be > >> conservative and only account for the tag buffer. In this case, the tag > >> buffers alone will be 32MB. > > > > Right, IIUC, that memory is allocated on demand when we about to swap > > the page > > out or perform hibernation, so there is no reservation done upfront or I'm > > missing something? > > That's correct, sorry if I didn't make that clear earlier. The host > memory is only allocated when the MTE-enabled guest's pages are swapped > out. So if they are never swapped out there's no memory overhead. > > >> > >> For a VM with 1GB of memory created with kvmtool built from current > >> master > >> (commit faae833a746f), pmap shows a total memory usage of 1268388K. > >> Subtracting the memory of the VM, we are left with 214MB of memory > >> consumed > >> by kvmtool. Having MTE enabled would increase the memory overhead of > >> kvmtool by 32/214*100 = 15%. > >> > > > > I admit, I might be missing something, but given that extra tag storage > > allocated for swap/hibernation overhead can be applied to any process which > > uses MTE, no? > > One difference is that the entire VM is considered MTE memory, whereas > in a normal application only those pages mapped with PROT_MTE take the > overhead. So the memory overhead of applications running in the VM is > higher than the same application outside of the VM (assuming the memory > is swapped out by the host). > > >> Of course, this memory overhead scales with the amount of memory the VM > >> has. The buffer size that KVM uses might change in the future, but > >> since we > >> cannot predict the future (might become larger or smaller), I'm working > >> with what is implement today. > >> > > > > Fair enough. IMO, we will see more MTE capable hardware, OTOH, memory > > overhead > > won't magically disappear, so should we start thinking how to reduce it? > > I noticed that code saves tags one to one, so for guest which doesn't > > actively use MTE whole page would be tagged with zero, cannot that case be > > optimized? or maybe be go further and compress tags? > > Compressing tags is certainly something that could be considered (an > early revision of the MTE swap did optimise the zero case). > > However Linux is lazy with the tags - if an application needs memory but > doesn't use the tags then only the data portion will be zeroed and the > tags will be left with whatever value they had previously (as they are > not accessible by the application). This could mean that a MTE-enabled > guest ends with with remarkably few pages with zeroed tags after it has > been running for a while. > > From the host it's not possible to determine whether the tags are useful > or not so they have to be saved regardless. > > That said I don't have a strong opinion on the default, as long as > there's an option to disable MTE then having the default with it turned > on is fine by me. > > >> The kernel documentation for MTE suggests that in order to take advantage > >> of it, software must be modified and recompiled. That means that users > >> that > >> don't want to use MTE won't get to exercise MTE because they won't be > >> using > >> MTE enabled software, but they will pay the overhead regardless. This of > >> course assumes that going forward software won't be using MTE by default. > >> > >> kvmtool is supposed to be simple, fast and less resource intensive than > >> other hypervisors, that's why I think having MTE disabled by default > >> is the > >> best way to implement the capability. > > > > I see kvmtool as bleeding edge hacking tool, so closer it to the edge is > > better :) > > I have to admit I like the "just works" approach that the defaults give > you (e.g. p9 filesystem from host, no need to configure root filesystems > etc), so there's definitely something to be said for MTE "just working" > in kvmtool too. > > Steve
diff --git a/arm/aarch32/include/kvm/kvm-arch.h b/arm/aarch32/include/kvm/kvm-arch.h index bee2fc255a82..5616b27e257e 100644 --- a/arm/aarch32/include/kvm/kvm-arch.h +++ b/arm/aarch32/include/kvm/kvm-arch.h @@ -5,6 +5,9 @@ #define kvm__arch_get_kern_offset(...) 0x8000 +struct kvm; +static inline void kvm__arch_enable_mte(struct kvm *kvm) {} + #define ARM_MAX_MEMORY(...) ARM_LOMAP_MAX_MEMORY #define MAX_PAGE_SIZE SZ_4K diff --git a/arm/aarch64/include/kvm/kvm-arch.h b/arm/aarch64/include/kvm/kvm-arch.h index 5e5ee41211ed..9124f6919d0f 100644 --- a/arm/aarch64/include/kvm/kvm-arch.h +++ b/arm/aarch64/include/kvm/kvm-arch.h @@ -6,6 +6,7 @@ struct kvm; unsigned long long kvm__arch_get_kern_offset(struct kvm *kvm, int fd); int kvm__arch_get_ipa_limit(struct kvm *kvm); +void kvm__arch_enable_mte(struct kvm *kvm); #define ARM_MAX_MEMORY(kvm) ({ \ u64 max_ram; \ diff --git a/arm/aarch64/include/kvm/kvm-config-arch.h b/arm/aarch64/include/kvm/kvm-config-arch.h index 04be43dfa9b2..11250365d8d5 100644 --- a/arm/aarch64/include/kvm/kvm-config-arch.h +++ b/arm/aarch64/include/kvm/kvm-config-arch.h @@ -6,6 +6,8 @@ "Run AArch32 guest"), \ OPT_BOOLEAN('\0', "pmu", &(cfg)->has_pmuv3, \ "Create PMUv3 device"), \ + OPT_BOOLEAN('\0', "mte", &(cfg)->has_mte, \ + "Enable memory tagging extension"), \ OPT_U64('\0', "kaslr-seed", &(cfg)->kaslr_seed, \ "Specify random seed for Kernel Address Space " \ "Layout Randomization (KASLR)"), diff --git a/arm/aarch64/kvm.c b/arm/aarch64/kvm.c index 56a0aedc263d..46548f8ee96e 100644 --- a/arm/aarch64/kvm.c +++ b/arm/aarch64/kvm.c @@ -81,3 +81,16 @@ int kvm__get_vm_type(struct kvm *kvm) return KVM_VM_TYPE_ARM_IPA_SIZE(ipa_bits); } + +void kvm__arch_enable_mte(struct kvm *kvm) +{ + struct kvm_enable_cap cap = { + .cap = KVM_CAP_ARM_MTE, + }; + + if (!kvm__supports_extension(kvm, KVM_CAP_ARM_MTE)) + die("MTE capability is not supported"); + + if (ioctl(kvm->vm_fd, KVM_ENABLE_CAP, &cap)) + die_perror("KVM_ENABLE_CAP(KVM_CAP_ARM_MTE)"); +} diff --git a/arm/include/arm-common/kvm-config-arch.h b/arm/include/arm-common/kvm-config-arch.h index 5734c46ab9e6..16e8d500a71b 100644 --- a/arm/include/arm-common/kvm-config-arch.h +++ b/arm/include/arm-common/kvm-config-arch.h @@ -9,6 +9,7 @@ struct kvm_config_arch { bool virtio_trans_pci; bool aarch32_guest; bool has_pmuv3; + bool has_mte; u64 kaslr_seed; enum irqchip_type irqchip; u64 fw_addr; diff --git a/arm/kvm.c b/arm/kvm.c index 80d233f13d0b..f2db93953778 100644 --- a/arm/kvm.c +++ b/arm/kvm.c @@ -86,6 +86,9 @@ void kvm__arch_init(struct kvm *kvm, const char *hugetlbfs_path, u64 ram_size) /* Create the virtual GIC. */ if (gic__create(kvm, kvm->cfg.arch.irqchip)) die("Failed to create virtual GIC"); + + if (kvm->cfg.arch.has_mte) + kvm__arch_enable_mte(kvm); } #define FDT_ALIGN SZ_2M
MTE has been supported in Linux since commit 673638f434ee ("KVM: arm64: Expose KVM_ARM_CAP_MTE"), add support for it in kvmtool. Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com> --- arm/aarch32/include/kvm/kvm-arch.h | 3 +++ arm/aarch64/include/kvm/kvm-arch.h | 1 + arm/aarch64/include/kvm/kvm-config-arch.h | 2 ++ arm/aarch64/kvm.c | 13 +++++++++++++ arm/include/arm-common/kvm-config-arch.h | 1 + arm/kvm.c | 3 +++ 6 files changed, 23 insertions(+)