diff mbox series

[v3,11/15] KVM: arm64: Implement do_share() helper for sharing memory

Message ID 20211201170411.1561936-12-qperret@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Introduce kvm_share_hyp() | expand

Commit Message

Quentin Perret Dec. 1, 2021, 5:04 p.m. UTC
From: Will Deacon <will@kernel.org>

By default, protected KVM isolates memory pages so that they are
accessible only to their owner: be it the host kernel, the hypervisor
at EL2 or (in future) the guest. Establishing shared-memory regions
between these components therefore involves a transition for each page
so that the owner can share memory with a borrower under a certain set
of permissions.

Introduce a do_share() helper for safely sharing a memory region between
two components. Currently, only host-to-hyp sharing is implemented, but
the code is easily extended to handle other combinations and the
permission checks for each component are reusable.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
---
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 237 ++++++++++++++++++++++++++
 1 file changed, 237 insertions(+)

Comments

Andrew Walbran Dec. 10, 2021, 3:18 p.m. UTC | #1
Reviewed-by: Andrew Walbran <qwandor@google.com>

On Wed, 1 Dec 2021 at 17:04, 'Quentin Perret' via kernel-team
<kernel-team@android.com> wrote:
>
> From: Will Deacon <will@kernel.org>
>
> By default, protected KVM isolates memory pages so that they are
> accessible only to their owner: be it the host kernel, the hypervisor
> at EL2 or (in future) the guest. Establishing shared-memory regions
> between these components therefore involves a transition for each page
> so that the owner can share memory with a borrower under a certain set
> of permissions.
>
> Introduce a do_share() helper for safely sharing a memory region between
> two components. Currently, only host-to-hyp sharing is implemented, but
> the code is easily extended to handle other combinations and the
> permission checks for each component are reusable.
>
> Signed-off-by: Will Deacon <will@kernel.org>
> Signed-off-by: Quentin Perret <qperret@google.com>
> ---
>  arch/arm64/kvm/hyp/nvhe/mem_protect.c | 237 ++++++++++++++++++++++++++
>  1 file changed, 237 insertions(+)
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index 757dfefe3aeb..74ca4043b08a 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -471,3 +471,240 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
>         ret = host_stage2_idmap(addr);
>         BUG_ON(ret && ret != -EAGAIN);
>  }
> +
> +/* This corresponds to locking order */
> +enum pkvm_component_id {
> +       PKVM_ID_HOST,
> +       PKVM_ID_HYP,
> +};
> +
> +struct pkvm_mem_transition {
> +       u64                             nr_pages;
> +
> +       struct {
> +               enum pkvm_component_id  id;
> +               /* Address in the initiator's address space */
> +               u64                     addr;
> +
> +               union {
> +                       struct {
> +                               /* Address in the completer's address space */
> +                               u64     completer_addr;
> +                       } host;
> +               };
> +       } initiator;
> +
> +       struct {
> +               enum pkvm_component_id  id;
> +       } completer;
> +};
> +
> +struct pkvm_mem_share {
> +       const struct pkvm_mem_transition        tx;
> +       const enum kvm_pgtable_prot             prot;
It would be helpful to add a comment documenting what this is used for
(i.e. whether it is for the initiator or completer). Or even rename it
to something like completer_prot to make that clear.

> +};
> +
> +struct check_walk_data {
> +       enum pkvm_page_state    desired;
> +       enum pkvm_page_state    (*get_page_state)(kvm_pte_t pte);
> +};
> +
> +static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
> +                                     kvm_pte_t *ptep,
> +                                     enum kvm_pgtable_walk_flags flag,
> +                                     void * const arg)
> +{
> +       struct check_walk_data *d = arg;
> +       kvm_pte_t pte = *ptep;
> +
> +       if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
> +               return -EINVAL;
> +
> +       return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
> +}
> +
> +static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
> +                                 struct check_walk_data *data)
> +{
> +       struct kvm_pgtable_walker walker = {
> +               .cb     = __check_page_state_visitor,
> +               .arg    = data,
> +               .flags  = KVM_PGTABLE_WALK_LEAF,
> +       };
> +
> +       return kvm_pgtable_walk(pgt, addr, size, &walker);
> +}
> +
> +static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
> +{
> +       if (!kvm_pte_valid(pte) && pte)
> +               return PKVM_NOPAGE;
> +
> +       return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
> +}
> +
> +static int __host_check_page_state_range(u64 addr, u64 size,
> +                                        enum pkvm_page_state state)
> +{
> +       struct check_walk_data d = {
> +               .desired        = state,
> +               .get_page_state = host_get_page_state,
> +       };
> +
> +       hyp_assert_lock_held(&host_kvm.lock);
> +       return check_page_state_range(&host_kvm.pgt, addr, size, &d);
> +}
> +
> +static int __host_set_page_state_range(u64 addr, u64 size,
> +                                      enum pkvm_page_state state)
> +{
> +       enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
> +
> +       return host_stage2_idmap_locked(addr, size, prot);
> +}
> +
> +static int host_request_owned_transition(u64 *completer_addr,
> +                                        const struct pkvm_mem_transition *tx)
> +{
> +       u64 size = tx->nr_pages * PAGE_SIZE;
> +       u64 addr = tx->initiator.addr;
> +
> +       *completer_addr = tx->initiator.host.completer_addr;
> +       return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
> +}
> +
> +static int host_initiate_share(u64 *completer_addr,
> +                              const struct pkvm_mem_transition *tx)
> +{
> +       u64 size = tx->nr_pages * PAGE_SIZE;
> +       u64 addr = tx->initiator.addr;
> +
> +       *completer_addr = tx->initiator.host.completer_addr;
> +       return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
> +}
> +
> +static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
> +{
> +       if (!kvm_pte_valid(pte))
> +               return PKVM_NOPAGE;
> +
> +       return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
> +}
> +
> +static int __hyp_check_page_state_range(u64 addr, u64 size,
> +                                       enum pkvm_page_state state)
> +{
> +       struct check_walk_data d = {
> +               .desired        = state,
> +               .get_page_state = hyp_get_page_state,
> +       };
> +
> +       hyp_assert_lock_held(&pkvm_pgd_lock);
> +       return check_page_state_range(&pkvm_pgtable, addr, size, &d);
> +}
> +
> +static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
> +{
> +       return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
> +                tx->initiator.id != PKVM_ID_HOST);
> +}
> +
> +static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
> +                        enum kvm_pgtable_prot perms)
> +{
> +       u64 size = tx->nr_pages * PAGE_SIZE;
> +
> +       if (perms != PAGE_HYP)
> +               return -EPERM;
> +
> +       if (__hyp_ack_skip_pgtable_check(tx))
> +               return 0;
> +
> +       return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
> +}
> +
> +static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
> +                             enum kvm_pgtable_prot perms)
> +{
> +       void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
> +       enum kvm_pgtable_prot prot;
> +
> +       prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
> +       return pkvm_create_mappings_locked(start, end, prot);
> +}
> +
> +static int check_share(struct pkvm_mem_share *share)
> +{
> +       const struct pkvm_mem_transition *tx = &share->tx;
> +       u64 completer_addr;
> +       int ret;
> +
> +       switch (tx->initiator.id) {
> +       case PKVM_ID_HOST:
> +               ret = host_request_owned_transition(&completer_addr, tx);
> +               break;
> +       default:
> +               ret = -EINVAL;
> +       }
> +
> +       if (ret)
> +               return ret;
> +
> +       switch (tx->completer.id) {
> +       case PKVM_ID_HYP:
> +               ret = hyp_ack_share(completer_addr, tx, share->prot);
> +               break;
> +       default:
> +               ret = -EINVAL;
> +       }
> +
> +       return ret;
> +}
> +
> +static int __do_share(struct pkvm_mem_share *share)
> +{
> +       const struct pkvm_mem_transition *tx = &share->tx;
> +       u64 completer_addr;
> +       int ret;
> +
> +       switch (tx->initiator.id) {
> +       case PKVM_ID_HOST:
> +               ret = host_initiate_share(&completer_addr, tx);
> +               break;
> +       default:
> +               ret = -EINVAL;
> +       }
> +
> +       if (ret)
> +               return ret;
> +
> +       switch (tx->completer.id) {
> +       case PKVM_ID_HYP:
> +               ret = hyp_complete_share(completer_addr, tx, share->prot);
> +               break;
> +       default:
> +               ret = -EINVAL;
> +       }
> +
> +       return ret;
> +}
> +
> +/*
> + * do_share():
> + *
> + * The page owner grants access to another component with a given set
> + * of permissions.
> + *
> + * Initiator: OWNED    => SHARED_OWNED
> + * Completer: NOPAGE   => SHARED_BORROWED
> + */
> +static int do_share(struct pkvm_mem_share *share)
> +{
> +       int ret;
> +
> +       ret = check_share(share);
> +       if (ret)
> +               return ret;
> +
> +       return WARN_ON(__do_share(share));
> +}
> --
> 2.34.0.rc2.393.gf8c9666880-goog
>
> --
> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
>
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 757dfefe3aeb..74ca4043b08a 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -471,3 +471,240 @@  void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
 	ret = host_stage2_idmap(addr);
 	BUG_ON(ret && ret != -EAGAIN);
 }
+
+/* This corresponds to locking order */
+enum pkvm_component_id {
+	PKVM_ID_HOST,
+	PKVM_ID_HYP,
+};
+
+struct pkvm_mem_transition {
+	u64				nr_pages;
+
+	struct {
+		enum pkvm_component_id	id;
+		/* Address in the initiator's address space */
+		u64			addr;
+
+		union {
+			struct {
+				/* Address in the completer's address space */
+				u64	completer_addr;
+			} host;
+		};
+	} initiator;
+
+	struct {
+		enum pkvm_component_id	id;
+	} completer;
+};
+
+struct pkvm_mem_share {
+	const struct pkvm_mem_transition	tx;
+	const enum kvm_pgtable_prot		prot;
+};
+
+struct check_walk_data {
+	enum pkvm_page_state	desired;
+	enum pkvm_page_state	(*get_page_state)(kvm_pte_t pte);
+};
+
+static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
+				      kvm_pte_t *ptep,
+				      enum kvm_pgtable_walk_flags flag,
+				      void * const arg)
+{
+	struct check_walk_data *d = arg;
+	kvm_pte_t pte = *ptep;
+
+	if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
+		return -EINVAL;
+
+	return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
+}
+
+static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
+				  struct check_walk_data *data)
+{
+	struct kvm_pgtable_walker walker = {
+		.cb	= __check_page_state_visitor,
+		.arg	= data,
+		.flags	= KVM_PGTABLE_WALK_LEAF,
+	};
+
+	return kvm_pgtable_walk(pgt, addr, size, &walker);
+}
+
+static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
+{
+	if (!kvm_pte_valid(pte) && pte)
+		return PKVM_NOPAGE;
+
+	return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
+}
+
+static int __host_check_page_state_range(u64 addr, u64 size,
+					 enum pkvm_page_state state)
+{
+	struct check_walk_data d = {
+		.desired	= state,
+		.get_page_state	= host_get_page_state,
+	};
+
+	hyp_assert_lock_held(&host_kvm.lock);
+	return check_page_state_range(&host_kvm.pgt, addr, size, &d);
+}
+
+static int __host_set_page_state_range(u64 addr, u64 size,
+				       enum pkvm_page_state state)
+{
+	enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
+
+	return host_stage2_idmap_locked(addr, size, prot);
+}
+
+static int host_request_owned_transition(u64 *completer_addr,
+					 const struct pkvm_mem_transition *tx)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+	u64 addr = tx->initiator.addr;
+
+	*completer_addr = tx->initiator.host.completer_addr;
+	return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
+}
+
+static int host_initiate_share(u64 *completer_addr,
+			       const struct pkvm_mem_transition *tx)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+	u64 addr = tx->initiator.addr;
+
+	*completer_addr = tx->initiator.host.completer_addr;
+	return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
+}
+
+static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
+{
+	if (!kvm_pte_valid(pte))
+		return PKVM_NOPAGE;
+
+	return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
+}
+
+static int __hyp_check_page_state_range(u64 addr, u64 size,
+					enum pkvm_page_state state)
+{
+	struct check_walk_data d = {
+		.desired	= state,
+		.get_page_state	= hyp_get_page_state,
+	};
+
+	hyp_assert_lock_held(&pkvm_pgd_lock);
+	return check_page_state_range(&pkvm_pgtable, addr, size, &d);
+}
+
+static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
+{
+	return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
+		 tx->initiator.id != PKVM_ID_HOST);
+}
+
+static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
+			 enum kvm_pgtable_prot perms)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+
+	if (perms != PAGE_HYP)
+		return -EPERM;
+
+	if (__hyp_ack_skip_pgtable_check(tx))
+		return 0;
+
+	return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
+}
+
+static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
+			      enum kvm_pgtable_prot perms)
+{
+	void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
+	enum kvm_pgtable_prot prot;
+
+	prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
+	return pkvm_create_mappings_locked(start, end, prot);
+}
+
+static int check_share(struct pkvm_mem_share *share)
+{
+	const struct pkvm_mem_transition *tx = &share->tx;
+	u64 completer_addr;
+	int ret;
+
+	switch (tx->initiator.id) {
+	case PKVM_ID_HOST:
+		ret = host_request_owned_transition(&completer_addr, tx);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret)
+		return ret;
+
+	switch (tx->completer.id) {
+	case PKVM_ID_HYP:
+		ret = hyp_ack_share(completer_addr, tx, share->prot);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int __do_share(struct pkvm_mem_share *share)
+{
+	const struct pkvm_mem_transition *tx = &share->tx;
+	u64 completer_addr;
+	int ret;
+
+	switch (tx->initiator.id) {
+	case PKVM_ID_HOST:
+		ret = host_initiate_share(&completer_addr, tx);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret)
+		return ret;
+
+	switch (tx->completer.id) {
+	case PKVM_ID_HYP:
+		ret = hyp_complete_share(completer_addr, tx, share->prot);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/*
+ * do_share():
+ *
+ * The page owner grants access to another component with a given set
+ * of permissions.
+ *
+ * Initiator: OWNED	=> SHARED_OWNED
+ * Completer: NOPAGE	=> SHARED_BORROWED
+ */
+static int do_share(struct pkvm_mem_share *share)
+{
+	int ret;
+
+	ret = check_share(share);
+	if (ret)
+		return ret;
+
+	return WARN_ON(__do_share(share));
+}