diff mbox series

[v4,3/6] KVM: arm64: Implement kvm_arch_flush_remote_tlbs_range()

Message ID 20230519005231.3027912-4-rananta@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Add support for FEAT_TLBIRANGE | expand

Commit Message

Raghavendra Rao Ananta May 19, 2023, 12:52 a.m. UTC
Implement kvm_arch_flush_remote_tlbs_range() for arm64
to invalidate the given range in the TLB.

Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
---
 arch/arm64/include/asm/kvm_host.h |  3 +++
 arch/arm64/kvm/hyp/nvhe/tlb.c     |  4 +---
 arch/arm64/kvm/mmu.c              | 11 +++++++++++
 3 files changed, 15 insertions(+), 3 deletions(-)

Comments

Marc Zyngier May 29, 2023, 2 p.m. UTC | #1
On Fri, 19 May 2023 01:52:28 +0100,
Raghavendra Rao Ananta <rananta@google.com> wrote:
> 
> Implement kvm_arch_flush_remote_tlbs_range() for arm64
> to invalidate the given range in the TLB.
> 
> Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
> ---
>  arch/arm64/include/asm/kvm_host.h |  3 +++
>  arch/arm64/kvm/hyp/nvhe/tlb.c     |  4 +---
>  arch/arm64/kvm/mmu.c              | 11 +++++++++++
>  3 files changed, 15 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 81ab41b84f436..343fb530eea9c 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -1081,6 +1081,9 @@ struct kvm *kvm_arch_alloc_vm(void);
>  #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
>  int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
>  
> +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
> +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, u64 pages);
> +
>  static inline bool kvm_vm_is_protected(struct kvm *kvm)
>  {
>  	return false;
> diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
> index d4ea549c4b5c4..d2c7c1bc6d441 100644
> --- a/arch/arm64/kvm/hyp/nvhe/tlb.c
> +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
> @@ -150,10 +150,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
>  		return;
>  	}
>  
> -	dsb(ishst);
> -
>  	/* Switch to requested VMID */
> -	__tlb_switch_to_guest(mmu, &cxt);
> +	__tlb_switch_to_guest(mmu, &cxt, false);

This hunk is in the wrong patch, isn't it?

>  
>  	__flush_tlb_range_op(ipas2e1is, start, pages, stride, 0, 0, false);
>  
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index d0a0d3dca9316..e3673b4c10292 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -92,6 +92,17 @@ int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
>  	return 0;
>  }
>  
> +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, u64 pages)
> +{
> +	phys_addr_t start, end;
> +
> +	start = start_gfn << PAGE_SHIFT;
> +	end = (start_gfn + pages) << PAGE_SHIFT;
> +
> +	kvm_call_hyp(__kvm_tlb_flush_vmid_range, &kvm->arch.mmu, start, end);

So that's the point that I think is not right. It is the MMU code that
should drive the invalidation method, and not the HYP code. The HYP
code should be as dumb as possible, and the logic should be kept in
the MMU code.

So when a range invalidation is forwarded to HYP, it's a *valid* range
invalidation. not something that can fallback to VMID-wide invalidation.

Thanks,

	M.
Raghavendra Rao Ananta May 30, 2023, 9:22 p.m. UTC | #2
On Mon, May 29, 2023 at 7:00 AM Marc Zyngier <maz@kernel.org> wrote:
>
> On Fri, 19 May 2023 01:52:28 +0100,
> Raghavendra Rao Ananta <rananta@google.com> wrote:
> >
> > Implement kvm_arch_flush_remote_tlbs_range() for arm64
> > to invalidate the given range in the TLB.
> >
> > Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
> > ---
> >  arch/arm64/include/asm/kvm_host.h |  3 +++
> >  arch/arm64/kvm/hyp/nvhe/tlb.c     |  4 +---
> >  arch/arm64/kvm/mmu.c              | 11 +++++++++++
> >  3 files changed, 15 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 81ab41b84f436..343fb530eea9c 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -1081,6 +1081,9 @@ struct kvm *kvm_arch_alloc_vm(void);
> >  #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
> >  int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
> >
> > +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
> > +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, u64 pages);
> > +
> >  static inline bool kvm_vm_is_protected(struct kvm *kvm)
> >  {
> >       return false;
> > diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
> > index d4ea549c4b5c4..d2c7c1bc6d441 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/tlb.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
> > @@ -150,10 +150,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
> >               return;
> >       }
> >
> > -     dsb(ishst);
> > -
> >       /* Switch to requested VMID */
> > -     __tlb_switch_to_guest(mmu, &cxt);
> > +     __tlb_switch_to_guest(mmu, &cxt, false);
>
> This hunk is in the wrong patch, isn't it?
>
Ah, you are right. It should be part of the previous patch. I think I
introduced it accidentally when I rebased the series. I'll remove it
in the next spin.


> >
> >       __flush_tlb_range_op(ipas2e1is, start, pages, stride, 0, 0, false);
> >
> > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> > index d0a0d3dca9316..e3673b4c10292 100644
> > --- a/arch/arm64/kvm/mmu.c
> > +++ b/arch/arm64/kvm/mmu.c
> > @@ -92,6 +92,17 @@ int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
> >       return 0;
> >  }
> >
> > +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, u64 pages)
> > +{
> > +     phys_addr_t start, end;
> > +
> > +     start = start_gfn << PAGE_SHIFT;
> > +     end = (start_gfn + pages) << PAGE_SHIFT;
> > +
> > +     kvm_call_hyp(__kvm_tlb_flush_vmid_range, &kvm->arch.mmu, start, end);
>
> So that's the point that I think is not right. It is the MMU code that
> should drive the invalidation method, and not the HYP code. The HYP
> code should be as dumb as possible, and the logic should be kept in
> the MMU code.
>
> So when a range invalidation is forwarded to HYP, it's a *valid* range
> invalidation. not something that can fallback to VMID-wide invalidation.
>
I'm guessing that you are referring to patch-2. Do you recommend
moving the 'pages >= MAX_TLBI_RANGE_PAGES' logic here and simply
return an error? How about for the other check:
system_supports_tlb_range()?
The idea was for __kvm_tlb_flush_vmid_range() to also implement a
fallback mechanism in case the system doesn't support the range-based
instructions. But if we end up calling __kvm_tlb_flush_vmid_range()
from multiple cases, we'd end up duplicating the checks. WDYT?


> Thanks,
>
>         M.
>
> --
> Without deviation from the norm, progress is not possible.
Marc Zyngier May 31, 2023, 8:46 a.m. UTC | #3
On Tue, 30 May 2023 22:22:23 +0100,
Raghavendra Rao Ananta <rananta@google.com> wrote:
> 
> On Mon, May 29, 2023 at 7:00 AM Marc Zyngier <maz@kernel.org> wrote:
> >
> > On Fri, 19 May 2023 01:52:28 +0100,
> > Raghavendra Rao Ananta <rananta@google.com> wrote:
> > >
> > > Implement kvm_arch_flush_remote_tlbs_range() for arm64
> > > to invalidate the given range in the TLB.
> > >
> > > Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
> > > ---
> > >  arch/arm64/include/asm/kvm_host.h |  3 +++
> > >  arch/arm64/kvm/hyp/nvhe/tlb.c     |  4 +---
> > >  arch/arm64/kvm/mmu.c              | 11 +++++++++++
> > >  3 files changed, 15 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > > index 81ab41b84f436..343fb530eea9c 100644
> > > --- a/arch/arm64/include/asm/kvm_host.h
> > > +++ b/arch/arm64/include/asm/kvm_host.h
> > > @@ -1081,6 +1081,9 @@ struct kvm *kvm_arch_alloc_vm(void);
> > >  #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
> > >  int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
> > >
> > > +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
> > > +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, u64 pages);
> > > +
> > >  static inline bool kvm_vm_is_protected(struct kvm *kvm)
> > >  {
> > >       return false;
> > > diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
> > > index d4ea549c4b5c4..d2c7c1bc6d441 100644
> > > --- a/arch/arm64/kvm/hyp/nvhe/tlb.c
> > > +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
> > > @@ -150,10 +150,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
> > >               return;
> > >       }
> > >
> > > -     dsb(ishst);
> > > -
> > >       /* Switch to requested VMID */
> > > -     __tlb_switch_to_guest(mmu, &cxt);
> > > +     __tlb_switch_to_guest(mmu, &cxt, false);
> >
> > This hunk is in the wrong patch, isn't it?
> >
> Ah, you are right. It should be part of the previous patch. I think I
> introduced it accidentally when I rebased the series. I'll remove it
> in the next spin.
> 
> 
> > >
> > >       __flush_tlb_range_op(ipas2e1is, start, pages, stride, 0, 0, false);
> > >
> > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> > > index d0a0d3dca9316..e3673b4c10292 100644
> > > --- a/arch/arm64/kvm/mmu.c
> > > +++ b/arch/arm64/kvm/mmu.c
> > > @@ -92,6 +92,17 @@ int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
> > >       return 0;
> > >  }
> > >
> > > +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, u64 pages)
> > > +{
> > > +     phys_addr_t start, end;
> > > +
> > > +     start = start_gfn << PAGE_SHIFT;
> > > +     end = (start_gfn + pages) << PAGE_SHIFT;
> > > +
> > > +     kvm_call_hyp(__kvm_tlb_flush_vmid_range, &kvm->arch.mmu, start, end);
> >
> > So that's the point that I think is not right. It is the MMU code that
> > should drive the invalidation method, and not the HYP code. The HYP
> > code should be as dumb as possible, and the logic should be kept in
> > the MMU code.
> >
> > So when a range invalidation is forwarded to HYP, it's a *valid* range
> > invalidation. not something that can fallback to VMID-wide invalidation.
> >
> I'm guessing that you are referring to patch-2. Do you recommend
> moving the 'pages >= MAX_TLBI_RANGE_PAGES' logic here and simply
> return an error? How about for the other check:
> system_supports_tlb_range()?
> The idea was for __kvm_tlb_flush_vmid_range() to also implement a
> fallback mechanism in case the system doesn't support the range-based
> instructions. But if we end up calling __kvm_tlb_flush_vmid_range()
> from multiple cases, we'd end up duplicating the checks. WDYT?

My take is that there should be a single helper deciding to issue
either a number of range-based TLBIs depending on start/end, or a
single VMID-based TLBI. Having multiple calling sites is not a
problem, and even if that code gets duplicated, big deal.

But a hypercall that falls back to global invalidation based on a
range evaluation error (more than MAX_TLBI_RANGE_PAGES) is papering
over a latent bug.

There should be no logic whatsoever in any of the two tlb.c files.
Only a switch to the correct context, and the requested invalidation,
which *must* be architecturally correct.

Thanks,

	M.
Raghavendra Rao Ananta June 2, 2023, 1:37 a.m. UTC | #4
On Wed, May 31, 2023 at 1:46 AM Marc Zyngier <maz@kernel.org> wrote:
>
> On Tue, 30 May 2023 22:22:23 +0100,
> Raghavendra Rao Ananta <rananta@google.com> wrote:
> >
> > On Mon, May 29, 2023 at 7:00 AM Marc Zyngier <maz@kernel.org> wrote:
> > >
> > > On Fri, 19 May 2023 01:52:28 +0100,
> > > Raghavendra Rao Ananta <rananta@google.com> wrote:
> > > >
> > > > Implement kvm_arch_flush_remote_tlbs_range() for arm64
> > > > to invalidate the given range in the TLB.
> > > >
> > > > Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
> > > > ---
> > > >  arch/arm64/include/asm/kvm_host.h |  3 +++
> > > >  arch/arm64/kvm/hyp/nvhe/tlb.c     |  4 +---
> > > >  arch/arm64/kvm/mmu.c              | 11 +++++++++++
> > > >  3 files changed, 15 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > > > index 81ab41b84f436..343fb530eea9c 100644
> > > > --- a/arch/arm64/include/asm/kvm_host.h
> > > > +++ b/arch/arm64/include/asm/kvm_host.h
> > > > @@ -1081,6 +1081,9 @@ struct kvm *kvm_arch_alloc_vm(void);
> > > >  #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
> > > >  int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
> > > >
> > > > +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
> > > > +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, u64 pages);
> > > > +
> > > >  static inline bool kvm_vm_is_protected(struct kvm *kvm)
> > > >  {
> > > >       return false;
> > > > diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
> > > > index d4ea549c4b5c4..d2c7c1bc6d441 100644
> > > > --- a/arch/arm64/kvm/hyp/nvhe/tlb.c
> > > > +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
> > > > @@ -150,10 +150,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
> > > >               return;
> > > >       }
> > > >
> > > > -     dsb(ishst);
> > > > -
> > > >       /* Switch to requested VMID */
> > > > -     __tlb_switch_to_guest(mmu, &cxt);
> > > > +     __tlb_switch_to_guest(mmu, &cxt, false);
> > >
> > > This hunk is in the wrong patch, isn't it?
> > >
> > Ah, you are right. It should be part of the previous patch. I think I
> > introduced it accidentally when I rebased the series. I'll remove it
> > in the next spin.
> >
> >
> > > >
> > > >       __flush_tlb_range_op(ipas2e1is, start, pages, stride, 0, 0, false);
> > > >
> > > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> > > > index d0a0d3dca9316..e3673b4c10292 100644
> > > > --- a/arch/arm64/kvm/mmu.c
> > > > +++ b/arch/arm64/kvm/mmu.c
> > > > @@ -92,6 +92,17 @@ int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
> > > >       return 0;
> > > >  }
> > > >
> > > > +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, u64 pages)
> > > > +{
> > > > +     phys_addr_t start, end;
> > > > +
> > > > +     start = start_gfn << PAGE_SHIFT;
> > > > +     end = (start_gfn + pages) << PAGE_SHIFT;
> > > > +
> > > > +     kvm_call_hyp(__kvm_tlb_flush_vmid_range, &kvm->arch.mmu, start, end);
> > >
> > > So that's the point that I think is not right. It is the MMU code that
> > > should drive the invalidation method, and not the HYP code. The HYP
> > > code should be as dumb as possible, and the logic should be kept in
> > > the MMU code.
> > >
> > > So when a range invalidation is forwarded to HYP, it's a *valid* range
> > > invalidation. not something that can fallback to VMID-wide invalidation.
> > >
> > I'm guessing that you are referring to patch-2. Do you recommend
> > moving the 'pages >= MAX_TLBI_RANGE_PAGES' logic here and simply
> > return an error? How about for the other check:
> > system_supports_tlb_range()?
> > The idea was for __kvm_tlb_flush_vmid_range() to also implement a
> > fallback mechanism in case the system doesn't support the range-based
> > instructions. But if we end up calling __kvm_tlb_flush_vmid_range()
> > from multiple cases, we'd end up duplicating the checks. WDYT?
>
> My take is that there should be a single helper deciding to issue
> either a number of range-based TLBIs depending on start/end, or a
> single VMID-based TLBI. Having multiple calling sites is not a
> problem, and even if that code gets duplicated, big deal.
>
Hypothetically, if I move the check to this patch and return an error
if this situation occurs, since I'm dependending on David's common MMU
code [1], kvm_main.c would end of calling kvm_flush_remote_tlbs() and
we'd be doing a VMID-based TLBI.
One idea would be to issue a WARN_ON() and return 0 so that we don't
issue any TLBIs. Thoughts?

> But a hypercall that falls back to global invalidation based on a
> range evaluation error (more than MAX_TLBI_RANGE_PAGES) is papering
> over a latent bug.
>
If I understand this correctly, MAX_TLBI_RANGE_PAGES is specifically
the capacity of the range-based instructions itself, isn't it? Is it
incorrect for the caller to request a higher range be invalidated even
on systems that do not support these instructions? Probably that's why
__flush_tlb_range() falls back to a global flush when the range
request is exceeded?

Thank you.
Raghavendra

[1]:  https://lore.kernel.org/linux-arm-kernel/20230126184025.2294823-7-dmatlack@google.com/


> There should be no logic whatsoever in any of the two tlb.c files.
> Only a switch to the correct context, and the requested invalidation,
> which *must* be architecturally correct.
>
> Thanks,
>
>         M.
>
> --
> Without deviation from the norm, progress is not possible.
Marc Zyngier June 2, 2023, 8:25 a.m. UTC | #5
On Fri, 02 Jun 2023 02:37:28 +0100,
Raghavendra Rao Ananta <rananta@google.com> wrote:
> 
> On Wed, May 31, 2023 at 1:46 AM Marc Zyngier <maz@kernel.org> wrote:
> >
> > On Tue, 30 May 2023 22:22:23 +0100,
> > Raghavendra Rao Ananta <rananta@google.com> wrote:
> > >
> > > On Mon, May 29, 2023 at 7:00 AM Marc Zyngier <maz@kernel.org> wrote:
> > > >
> > > > On Fri, 19 May 2023 01:52:28 +0100,
> > > > Raghavendra Rao Ananta <rananta@google.com> wrote:
> > > > >
> > > > > Implement kvm_arch_flush_remote_tlbs_range() for arm64
> > > > > to invalidate the given range in the TLB.
> > > > >
> > > > > Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
> > > > > ---
> > > > >  arch/arm64/include/asm/kvm_host.h |  3 +++
> > > > >  arch/arm64/kvm/hyp/nvhe/tlb.c     |  4 +---
> > > > >  arch/arm64/kvm/mmu.c              | 11 +++++++++++
> > > > >  3 files changed, 15 insertions(+), 3 deletions(-)
> > > > >
> > > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > > > > index 81ab41b84f436..343fb530eea9c 100644
> > > > > --- a/arch/arm64/include/asm/kvm_host.h
> > > > > +++ b/arch/arm64/include/asm/kvm_host.h
> > > > > @@ -1081,6 +1081,9 @@ struct kvm *kvm_arch_alloc_vm(void);
> > > > >  #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
> > > > >  int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
> > > > >
> > > > > +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
> > > > > +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, u64 pages);
> > > > > +
> > > > >  static inline bool kvm_vm_is_protected(struct kvm *kvm)
> > > > >  {
> > > > >       return false;
> > > > > diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
> > > > > index d4ea549c4b5c4..d2c7c1bc6d441 100644
> > > > > --- a/arch/arm64/kvm/hyp/nvhe/tlb.c
> > > > > +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
> > > > > @@ -150,10 +150,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
> > > > >               return;
> > > > >       }
> > > > >
> > > > > -     dsb(ishst);
> > > > > -
> > > > >       /* Switch to requested VMID */
> > > > > -     __tlb_switch_to_guest(mmu, &cxt);
> > > > > +     __tlb_switch_to_guest(mmu, &cxt, false);
> > > >
> > > > This hunk is in the wrong patch, isn't it?
> > > >
> > > Ah, you are right. It should be part of the previous patch. I think I
> > > introduced it accidentally when I rebased the series. I'll remove it
> > > in the next spin.
> > >
> > >
> > > > >
> > > > >       __flush_tlb_range_op(ipas2e1is, start, pages, stride, 0, 0, false);
> > > > >
> > > > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> > > > > index d0a0d3dca9316..e3673b4c10292 100644
> > > > > --- a/arch/arm64/kvm/mmu.c
> > > > > +++ b/arch/arm64/kvm/mmu.c
> > > > > @@ -92,6 +92,17 @@ int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
> > > > >       return 0;
> > > > >  }
> > > > >
> > > > > +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, u64 pages)
> > > > > +{
> > > > > +     phys_addr_t start, end;
> > > > > +
> > > > > +     start = start_gfn << PAGE_SHIFT;
> > > > > +     end = (start_gfn + pages) << PAGE_SHIFT;
> > > > > +
> > > > > +     kvm_call_hyp(__kvm_tlb_flush_vmid_range, &kvm->arch.mmu, start, end);
> > > >
> > > > So that's the point that I think is not right. It is the MMU code that
> > > > should drive the invalidation method, and not the HYP code. The HYP
> > > > code should be as dumb as possible, and the logic should be kept in
> > > > the MMU code.
> > > >
> > > > So when a range invalidation is forwarded to HYP, it's a *valid* range
> > > > invalidation. not something that can fallback to VMID-wide invalidation.
> > > >
> > > I'm guessing that you are referring to patch-2. Do you recommend
> > > moving the 'pages >= MAX_TLBI_RANGE_PAGES' logic here and simply
> > > return an error? How about for the other check:
> > > system_supports_tlb_range()?
> > > The idea was for __kvm_tlb_flush_vmid_range() to also implement a
> > > fallback mechanism in case the system doesn't support the range-based
> > > instructions. But if we end up calling __kvm_tlb_flush_vmid_range()
> > > from multiple cases, we'd end up duplicating the checks. WDYT?
> >
> > My take is that there should be a single helper deciding to issue
> > either a number of range-based TLBIs depending on start/end, or a
> > single VMID-based TLBI. Having multiple calling sites is not a
> > problem, and even if that code gets duplicated, big deal.
> >
> Hypothetically, if I move the check to this patch and return an error
> if this situation occurs, since I'm dependending on David's common MMU
> code [1], kvm_main.c would end of calling kvm_flush_remote_tlbs() and
> we'd be doing a VMID-based TLBI.
> One idea would be to issue a WARN_ON() and return 0 so that we don't
> issue any TLBIs. Thoughts?

Then we should fix the infrastructure. And no, not issuing TLBs is
not an acceptable outcome. Might as well panic the kernel, because
silent memory corruption is not something I'm willing to entertain.

My take is as follows:

- either the core code doesn't specify a range, and we blow the whole
  thing as we do today

- or it specifies a range, and we apply range invalidation as permitted
  by the architecture and the implementation (either a *series* of
  range invalidation, or a full VMID invalidation).

As for the "common MMU" stuff, something such as "flush_remote_tlbs"
really shows that this code isn't common at all. It is just x86 creep,
and I have zero problem ignoring it.

> 
> > But a hypercall that falls back to global invalidation based on a
> > range evaluation error (more than MAX_TLBI_RANGE_PAGES) is papering
> > over a latent bug.
> >
> If I understand this correctly, MAX_TLBI_RANGE_PAGES is specifically
> the capacity of the range-based instructions itself, isn't it? Is it
> incorrect for the caller to request a higher range be invalidated even
> on systems that do not support these instructions? Probably that's why
> __flush_tlb_range() falls back to a global flush when the range
> request is exceeded?

This is indeed the architectural limit. But invalidating the whole
VMID isn't necessarily the right thing either. If you can preserve
some of the TLBs by only issuing a couple of range invalidation that
span *in total* more than MAX_TLBI_RANGE_PAGES.

Again, what I'm objecting to is the silent upgrading to VMID-wide
invalidation being hidden away in the HYP code. That's just wrong. The
HYP code is not the place for abstraction.

Thanks,

	M.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 81ab41b84f436..343fb530eea9c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1081,6 +1081,9 @@  struct kvm *kvm_arch_alloc_vm(void);
 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
 int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
 
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, u64 pages);
+
 static inline bool kvm_vm_is_protected(struct kvm *kvm)
 {
 	return false;
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index d4ea549c4b5c4..d2c7c1bc6d441 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -150,10 +150,8 @@  void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
 		return;
 	}
 
-	dsb(ishst);
-
 	/* Switch to requested VMID */
-	__tlb_switch_to_guest(mmu, &cxt);
+	__tlb_switch_to_guest(mmu, &cxt, false);
 
 	__flush_tlb_range_op(ipas2e1is, start, pages, stride, 0, 0, false);
 
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d0a0d3dca9316..e3673b4c10292 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -92,6 +92,17 @@  int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 	return 0;
 }
 
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, u64 pages)
+{
+	phys_addr_t start, end;
+
+	start = start_gfn << PAGE_SHIFT;
+	end = (start_gfn + pages) << PAGE_SHIFT;
+
+	kvm_call_hyp(__kvm_tlb_flush_vmid_range, &kvm->arch.mmu, start, end);
+	return 0;
+}
+
 static bool kvm_is_device_pfn(unsigned long pfn)
 {
 	return !pfn_is_map_memory(pfn);