Message ID | 20210922182123.200105-2-tony.luck@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Basic recovery for machine checks inside SGX | expand |
On Wed, 2021-09-22 at 11:21 -0700, Tony Luck wrote: > SGX EPC pages go through the following life cycle: > > DIRTY ---> FREE ---> IN-USE --\ > ^ | > \-----------------/ > > Recovery action for poison for a DIRTY or FREE page is simple. Just > make sure never to allocate the page. IN-USE pages need some extra > handling. > > It would be good to use the sgx_epc_page->owner field as an indicator > of where an EPC page is currently in that cycle (owner != NULL means > the EPC page is IN-USE). But there is one caller, sgx_alloc_va_page(), > that calls with NULL. > > Since there are multiple uses of the "owner" field with different types > change the type of sgx_epc_page.owner to "void *. > > Start epc_pages out with a non-NULL owner while they are in DIRTY state. > > Fix up the one holdout to provide a non-NULL owner. > > Refactor the allocation sequence so that changes to/from NULL > value happen together with adding/removing the epc_page from > a free list while the node->lock is held. > > Signed-off-by: Tony Luck <tony.luck@intel.com> > --- > arch/x86/kernel/cpu/sgx/encl.c | 5 +++-- > arch/x86/kernel/cpu/sgx/encl.h | 2 +- > arch/x86/kernel/cpu/sgx/ioctl.c | 2 +- > arch/x86/kernel/cpu/sgx/main.c | 21 +++++++++++---------- > arch/x86/kernel/cpu/sgx/sgx.h | 4 ++-- > 5 files changed, 18 insertions(+), 16 deletions(-) > > diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c > index 001808e3901c..62cf20d5fbf6 100644 > --- a/arch/x86/kernel/cpu/sgx/encl.c > +++ b/arch/x86/kernel/cpu/sgx/encl.c > @@ -667,6 +667,7 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm, > > /** > * sgx_alloc_va_page() - Allocate a Version Array (VA) page > + * @owner: struct sgx_va_page connected to this VA page > * > * Allocate a free EPC page and convert it to a Version Array (VA) page. > * > @@ -674,12 +675,12 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm, > * a VA page, > * -errno otherwise > */ > -struct sgx_epc_page *sgx_alloc_va_page(void) > +struct sgx_epc_page *sgx_alloc_va_page(void *owner) > { > struct sgx_epc_page *epc_page; > int ret; > > - epc_page = sgx_alloc_epc_page(NULL, true); > + epc_page = sgx_alloc_epc_page(owner, true); > if (IS_ERR(epc_page)) > return ERR_CAST(epc_page); > > diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h > index fec43ca65065..2a972bc9b2d1 100644 > --- a/arch/x86/kernel/cpu/sgx/encl.h > +++ b/arch/x86/kernel/cpu/sgx/encl.h > @@ -111,7 +111,7 @@ void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write); > int sgx_encl_test_and_clear_young(struct mm_struct *mm, > struct sgx_encl_page *page); > > -struct sgx_epc_page *sgx_alloc_va_page(void); > +struct sgx_epc_page *sgx_alloc_va_page(void *owner); > unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page); > void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset); > bool sgx_va_page_full(struct sgx_va_page *va_page); > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c > index 83df20e3e633..655ce0bb069d 100644 > --- a/arch/x86/kernel/cpu/sgx/ioctl.c > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c > @@ -30,7 +30,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl) > if (!va_page) > return ERR_PTR(-ENOMEM); > > - va_page->epc_page = sgx_alloc_va_page(); > + va_page->epc_page = sgx_alloc_va_page(va_page); > if (IS_ERR(va_page->epc_page)) { > err = ERR_CAST(va_page->epc_page); > kfree(va_page); > diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c > index 63d3de02bbcc..69743709ec90 100644 > --- a/arch/x86/kernel/cpu/sgx/main.c > +++ b/arch/x86/kernel/cpu/sgx/main.c > @@ -457,7 +457,7 @@ static bool __init sgx_page_reclaimer_init(void) > return true; > } > > -static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) > +static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(void *owner, int nid) > { > struct sgx_numa_node *node = &sgx_numa_nodes[nid]; > struct sgx_epc_page *page = NULL; > @@ -471,6 +471,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) > > page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list); > list_del_init(&page->list); > + page->owner = owner; > sgx_nr_free_pages--; > > spin_unlock(&node->lock); > @@ -480,6 +481,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) > > /** > * __sgx_alloc_epc_page() - Allocate an EPC page > + * @owner: the owner of the EPC page > * > * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start > * from the NUMA node, where the caller is executing. > @@ -488,14 +490,14 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) > * - an EPC page: A borrowed EPC pages were available. > * - NULL: Out of EPC pages. > */ > -struct sgx_epc_page *__sgx_alloc_epc_page(void) > +struct sgx_epc_page *__sgx_alloc_epc_page(void *owner) > { > struct sgx_epc_page *page; > int nid_of_current = numa_node_id(); > int nid = nid_of_current; > > if (node_isset(nid_of_current, sgx_numa_mask)) { > - page = __sgx_alloc_epc_page_from_node(nid_of_current); > + page = __sgx_alloc_epc_page_from_node(owner, nid_of_current); > if (page) > return page; > } > @@ -506,7 +508,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void) > if (nid == nid_of_current) > break; > > - page = __sgx_alloc_epc_page_from_node(nid); > + page = __sgx_alloc_epc_page_from_node(owner, nid); > if (page) > return page; > } > @@ -559,7 +561,7 @@ int sgx_unmark_page_reclaimable(struct sgx_epc_page *page) > > /** > * sgx_alloc_epc_page() - Allocate an EPC page > - * @owner: the owner of the EPC page > + * @owner: per-caller page owner > * @reclaim: reclaim pages if necessary > * > * Iterate through EPC sections and borrow a free EPC page to the caller. When a > @@ -579,11 +581,9 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim) > struct sgx_epc_page *page; > > for ( ; ; ) { > - page = __sgx_alloc_epc_page(); > - if (!IS_ERR(page)) { > - page->owner = owner; > + page = __sgx_alloc_epc_page(owner); > + if (!IS_ERR(page)) > break; > - } > > if (list_empty(&sgx_active_page_list)) > return ERR_PTR(-ENOMEM); > @@ -624,6 +624,7 @@ void sgx_free_epc_page(struct sgx_epc_page *page) > > spin_lock(&node->lock); > > + page->owner = NULL; > list_add_tail(&page->list, &node->free_page_list); > sgx_nr_free_pages++; > > @@ -652,7 +653,7 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, > for (i = 0; i < nr_pages; i++) { > section->pages[i].section = index; > section->pages[i].flags = 0; > - section->pages[i].owner = NULL; > + section->pages[i].owner = (void *)-1; Probably should have a named constant. Anyway, I wonder why we want to do tricks with 'owner', when the struct has a flags field? Right now its use is so nice and straight forward, and most importantly intuitive. So what I would do instead of this, would be to add something like /* Pages, which are being tracked by the page reclaimer. */ #define SGX_EPC_PAGE_RECLAIMER_TRACKED BIT(0) /* Pages, which are allocated for use. */ #define SGX_EPC_PAGE_ALLOCATED BIT(1) This would be set by sgx_alloc_epc_page() and reset by sgx_free_epc_page(). In the subsequent patch you could then instead of /* * If there is no owner, then the page is on a free list. * Move it to the poison page list. */ if (!page->owner) { list_del(&page->list); list_add(&page->list, &sgx_poison_page_list); goto out; } you would /* * If there is no owner, then the page is on a free list. * Move it to the poison page list. */ if (!page->flags) { list_del(&page->list); list_add(&page->list, &sgx_poison_page_list); goto out; } You don't actually need to compare to that flag because the invariant would be that it is set, as long as the page is not explicitly freed. I think this is a better solution than in the patch set because it does not introduce any unorthodox use of anything. /Jarkko
On Thu, 2021-09-23 at 23:21 +0300, Jarkko Sakkinen wrote: > On Wed, 2021-09-22 at 11:21 -0700, Tony Luck wrote: > > SGX EPC pages go through the following life cycle: > > > > DIRTY ---> FREE ---> IN-USE --\ > > ^ | > > \-----------------/ > > > > Recovery action for poison for a DIRTY or FREE page is simple. Just > > make sure never to allocate the page. IN-USE pages need some extra > > handling. > > > > It would be good to use the sgx_epc_page->owner field as an indicator > > of where an EPC page is currently in that cycle (owner != NULL means > > the EPC page is IN-USE). But there is one caller, sgx_alloc_va_page(), > > that calls with NULL. > > > > Since there are multiple uses of the "owner" field with different types > > change the type of sgx_epc_page.owner to "void *. > > > > Start epc_pages out with a non-NULL owner while they are in DIRTY state. > > > > Fix up the one holdout to provide a non-NULL owner. > > > > Refactor the allocation sequence so that changes to/from NULL > > value happen together with adding/removing the epc_page from > > a free list while the node->lock is held. > > > > Signed-off-by: Tony Luck <tony.luck@intel.com> > > --- > > arch/x86/kernel/cpu/sgx/encl.c | 5 +++-- > > arch/x86/kernel/cpu/sgx/encl.h | 2 +- > > arch/x86/kernel/cpu/sgx/ioctl.c | 2 +- > > arch/x86/kernel/cpu/sgx/main.c | 21 +++++++++++---------- > > arch/x86/kernel/cpu/sgx/sgx.h | 4 ++-- > > 5 files changed, 18 insertions(+), 16 deletions(-) > > > > diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c > > index 001808e3901c..62cf20d5fbf6 100644 > > --- a/arch/x86/kernel/cpu/sgx/encl.c > > +++ b/arch/x86/kernel/cpu/sgx/encl.c > > @@ -667,6 +667,7 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm, > > > > /** > > * sgx_alloc_va_page() - Allocate a Version Array (VA) page > > + * @owner: struct sgx_va_page connected to this VA page > > * > > * Allocate a free EPC page and convert it to a Version Array (VA) page. > > * > > @@ -674,12 +675,12 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm, > > * a VA page, > > * -errno otherwise > > */ > > -struct sgx_epc_page *sgx_alloc_va_page(void) > > +struct sgx_epc_page *sgx_alloc_va_page(void *owner) > > { > > struct sgx_epc_page *epc_page; > > int ret; > > > > - epc_page = sgx_alloc_epc_page(NULL, true); > > + epc_page = sgx_alloc_epc_page(owner, true); > > if (IS_ERR(epc_page)) > > return ERR_CAST(epc_page); > > > > diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h > > index fec43ca65065..2a972bc9b2d1 100644 > > --- a/arch/x86/kernel/cpu/sgx/encl.h > > +++ b/arch/x86/kernel/cpu/sgx/encl.h > > @@ -111,7 +111,7 @@ void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write); > > int sgx_encl_test_and_clear_young(struct mm_struct *mm, > > struct sgx_encl_page *page); > > > > -struct sgx_epc_page *sgx_alloc_va_page(void); > > +struct sgx_epc_page *sgx_alloc_va_page(void *owner); > > unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page); > > void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset); > > bool sgx_va_page_full(struct sgx_va_page *va_page); > > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c > > index 83df20e3e633..655ce0bb069d 100644 > > --- a/arch/x86/kernel/cpu/sgx/ioctl.c > > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c > > @@ -30,7 +30,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl) > > if (!va_page) > > return ERR_PTR(-ENOMEM); > > > > - va_page->epc_page = sgx_alloc_va_page(); > > + va_page->epc_page = sgx_alloc_va_page(va_page); > > if (IS_ERR(va_page->epc_page)) { > > err = ERR_CAST(va_page->epc_page); > > kfree(va_page); > > diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c > > index 63d3de02bbcc..69743709ec90 100644 > > --- a/arch/x86/kernel/cpu/sgx/main.c > > +++ b/arch/x86/kernel/cpu/sgx/main.c > > @@ -457,7 +457,7 @@ static bool __init sgx_page_reclaimer_init(void) > > return true; > > } > > > > -static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) > > +static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(void *owner, int nid) > > { > > struct sgx_numa_node *node = &sgx_numa_nodes[nid]; > > struct sgx_epc_page *page = NULL; > > @@ -471,6 +471,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) > > > > page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list); > > list_del_init(&page->list); > > + page->owner = owner; > > sgx_nr_free_pages--; > > > > spin_unlock(&node->lock); > > @@ -480,6 +481,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) > > > > /** > > * __sgx_alloc_epc_page() - Allocate an EPC page > > + * @owner: the owner of the EPC page > > * > > * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start > > * from the NUMA node, where the caller is executing. > > @@ -488,14 +490,14 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) > > * - an EPC page: A borrowed EPC pages were available. > > * - NULL: Out of EPC pages. > > */ > > -struct sgx_epc_page *__sgx_alloc_epc_page(void) > > +struct sgx_epc_page *__sgx_alloc_epc_page(void *owner) > > { > > struct sgx_epc_page *page; > > int nid_of_current = numa_node_id(); > > int nid = nid_of_current; > > > > if (node_isset(nid_of_current, sgx_numa_mask)) { > > - page = __sgx_alloc_epc_page_from_node(nid_of_current); > > + page = __sgx_alloc_epc_page_from_node(owner, nid_of_current); > > if (page) > > return page; > > } > > @@ -506,7 +508,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void) > > if (nid == nid_of_current) > > break; > > > > - page = __sgx_alloc_epc_page_from_node(nid); > > + page = __sgx_alloc_epc_page_from_node(owner, nid); > > if (page) > > return page; > > } > > @@ -559,7 +561,7 @@ int sgx_unmark_page_reclaimable(struct sgx_epc_page *page) > > > > /** > > * sgx_alloc_epc_page() - Allocate an EPC page > > - * @owner: the owner of the EPC page > > + * @owner: per-caller page owner > > * @reclaim: reclaim pages if necessary > > * > > * Iterate through EPC sections and borrow a free EPC page to the caller. When a > > @@ -579,11 +581,9 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim) > > struct sgx_epc_page *page; > > > > for ( ; ; ) { > > - page = __sgx_alloc_epc_page(); > > - if (!IS_ERR(page)) { > > - page->owner = owner; > > + page = __sgx_alloc_epc_page(owner); > > + if (!IS_ERR(page)) > > break; > > - } > > > > if (list_empty(&sgx_active_page_list)) > > return ERR_PTR(-ENOMEM); > > @@ -624,6 +624,7 @@ void sgx_free_epc_page(struct sgx_epc_page *page) > > > > spin_lock(&node->lock); > > > > + page->owner = NULL; > > list_add_tail(&page->list, &node->free_page_list); > > sgx_nr_free_pages++; > > > > @@ -652,7 +653,7 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, > > for (i = 0; i < nr_pages; i++) { > > section->pages[i].section = index; > > section->pages[i].flags = 0; > > - section->pages[i].owner = NULL; > > + section->pages[i].owner = (void *)-1; > > Probably should have a named constant. > > Anyway, I wonder why we want to do tricks with 'owner', when the > struct has a flags field? > > Right now its use is so nice and straight forward, and most > importantly intuitive. > > So what I would do instead of this, would be to add something > like > > /* Pages, which are being tracked by the page reclaimer. */ > #define SGX_EPC_PAGE_RECLAIMER_TRACKED BIT(0) > > /* Pages, which are allocated for use. */ > #define SGX_EPC_PAGE_ALLOCATED BIT(1) > > This would be set by sgx_alloc_epc_page() and reset by > sgx_free_epc_page(). > > In the subsequent patch you could then instead of > > /* > * If there is no owner, then the page is on a free list. > * Move it to the poison page list. > */ > if (!page->owner) { > list_del(&page->list); > list_add(&page->list, &sgx_poison_page_list); > goto out; > } > > you would > > /* > * If there is no owner, then the page is on a free list. > * Move it to the poison page list. > */ > if (!page->flags) { > list_del(&page->list); > list_add(&page->list, &sgx_poison_page_list); > goto out; > } > > You don't actually need to compare to that flag because the > invariant would be that it is set, as long as the page is > not explicitly freed. > > I think this is a better solution than in the patch set > because it does not introduce any unorthodox use of anything. And does not contain any special cases, e.g. when you debug something you can always assume that a valid owner pointer is always a legit sgx_encl_page instance. /Jarkko
On Thu, Sep 23, 2021 at 11:24:35PM +0300, Jarkko Sakkinen wrote: > On Thu, 2021-09-23 at 23:21 +0300, Jarkko Sakkinen wrote: > > On Wed, 2021-09-22 at 11:21 -0700, Tony Luck wrote: > > > - section->pages[i].owner = NULL; > > > + section->pages[i].owner = (void *)-1; > > > > Probably should have a named constant. > > > > Anyway, I wonder why we want to do tricks with 'owner', when the > > struct has a flags field? > > > > Right now its use is so nice and straight forward, and most > > importantly intuitive. > > > > So what I would do instead of this, would be to add something > > like > > > > /* Pages, which are being tracked by the page reclaimer. */ > > #define SGX_EPC_PAGE_RECLAIMER_TRACKED BIT(0) > > > > /* Pages, which are allocated for use. */ > > #define SGX_EPC_PAGE_ALLOCATED BIT(1) > > > > This would be set by sgx_alloc_epc_page() and reset by > > sgx_free_epc_page(). > > > > In the subsequent patch you could then instead of > > > > /* > > * If there is no owner, then the page is on a free list. > > * Move it to the poison page list. > > */ > > if (!page->owner) { > > list_del(&page->list); > > list_add(&page->list, &sgx_poison_page_list); > > goto out; > > } > > > > you would > > > > /* > > * If there is no owner, then the page is on a free list. > > * Move it to the poison page list. > > */ > > if (!page->flags) { > > list_del(&page->list); > > list_add(&page->list, &sgx_poison_page_list); > > goto out; > > } > > > > You don't actually need to compare to that flag because the > > invariant would be that it is set, as long as the page is > > not explicitly freed. > > > > I think this is a better solution than in the patch set > > because it does not introduce any unorthodox use of anything. > > And does not contain any special cases, e.g. when you debug > something you can always assume that a valid owner pointer is > always a legit sgx_encl_page instance. > Jarkko, That's nice. It avoids having to create a fictitious owner for the dirty pages, and for the sgx_alloc_va_page() case. Which in turn means that the owner field in struct sgx_epc_page can remain as "struct sgx_encl_page *owner;" (neatly avoiding DaveH's request that it be an anonymous union of all the possible types, because it is back to just being one type). Thanks! Will include in next version. -Tony
> That's nice. It avoids having to create a fictitious owner for > the dirty pages, and for the sgx_alloc_va_page() case. Which > in turn means that the owner field in struct sgx_epc_page can > remain as "struct sgx_encl_page *owner;" (neatly avoiding DaveH's > request that it be an anonymous union of all the possible types, > because it is back to just being one type). > > Thanks! Will include in next version. Also avoids a bunch of refactoring to make sure to set the owner field while holding zone->lock. I roughly coded it up and the old part 0001 was: arch/x86/kernel/cpu/sgx/encl.c | 5 +++-- arch/x86/kernel/cpu/sgx/encl.h | 2 +- arch/x86/kernel/cpu/sgx/ioctl.c | 2 +- arch/x86/kernel/cpu/sgx/main.c | 21 +++++++++++---------- arch/x86/kernel/cpu/sgx/sgx.h | 4 ++-- 5 files changed, 18 insertions(+), 16 deletions(-) which is by no means huge, but the new part 0001 is arch/x86/kernel/cpu/sgx/main.c | 4 +++- arch/x86/kernel/cpu/sgx/sgx.h | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) -Tony
On Thu, 2021-09-23 at 22:11 +0000, Luck, Tony wrote: > > That's nice. It avoids having to create a fictitious owner for > > the dirty pages, and for the sgx_alloc_va_page() case. Which > > in turn means that the owner field in struct sgx_epc_page can > > remain as "struct sgx_encl_page *owner;" (neatly avoiding DaveH's > > request that it be an anonymous union of all the possible types, > > because it is back to just being one type). > > > > Thanks! Will include in next version. > > Also avoids a bunch of refactoring to make sure to set the owner field > while holding zone->lock. > > I roughly coded it up and the old part 0001 was: > > arch/x86/kernel/cpu/sgx/encl.c | 5 +++-- > arch/x86/kernel/cpu/sgx/encl.h | 2 +- > arch/x86/kernel/cpu/sgx/ioctl.c | 2 +- > arch/x86/kernel/cpu/sgx/main.c | 21 +++++++++++---------- > arch/x86/kernel/cpu/sgx/sgx.h | 4 ++-- > 5 files changed, 18 insertions(+), 16 deletions(-) > > which is by no means huge, but the new part 0001 is > > arch/x86/kernel/cpu/sgx/main.c | 4 +++- > arch/x86/kernel/cpu/sgx/sgx.h | 3 +++ > 2 files changed, 6 insertions(+), 1 deletion(-) > > -Tony This is good to hear. I guess it is then a no brainer to move into this direction then. /Jarkko
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 001808e3901c..62cf20d5fbf6 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -667,6 +667,7 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm, /** * sgx_alloc_va_page() - Allocate a Version Array (VA) page + * @owner: struct sgx_va_page connected to this VA page * * Allocate a free EPC page and convert it to a Version Array (VA) page. * @@ -674,12 +675,12 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm, * a VA page, * -errno otherwise */ -struct sgx_epc_page *sgx_alloc_va_page(void) +struct sgx_epc_page *sgx_alloc_va_page(void *owner) { struct sgx_epc_page *epc_page; int ret; - epc_page = sgx_alloc_epc_page(NULL, true); + epc_page = sgx_alloc_epc_page(owner, true); if (IS_ERR(epc_page)) return ERR_CAST(epc_page); diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h index fec43ca65065..2a972bc9b2d1 100644 --- a/arch/x86/kernel/cpu/sgx/encl.h +++ b/arch/x86/kernel/cpu/sgx/encl.h @@ -111,7 +111,7 @@ void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write); int sgx_encl_test_and_clear_young(struct mm_struct *mm, struct sgx_encl_page *page); -struct sgx_epc_page *sgx_alloc_va_page(void); +struct sgx_epc_page *sgx_alloc_va_page(void *owner); unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page); void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset); bool sgx_va_page_full(struct sgx_va_page *va_page); diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index 83df20e3e633..655ce0bb069d 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -30,7 +30,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl) if (!va_page) return ERR_PTR(-ENOMEM); - va_page->epc_page = sgx_alloc_va_page(); + va_page->epc_page = sgx_alloc_va_page(va_page); if (IS_ERR(va_page->epc_page)) { err = ERR_CAST(va_page->epc_page); kfree(va_page); diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 63d3de02bbcc..69743709ec90 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -457,7 +457,7 @@ static bool __init sgx_page_reclaimer_init(void) return true; } -static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) +static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(void *owner, int nid) { struct sgx_numa_node *node = &sgx_numa_nodes[nid]; struct sgx_epc_page *page = NULL; @@ -471,6 +471,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list); list_del_init(&page->list); + page->owner = owner; sgx_nr_free_pages--; spin_unlock(&node->lock); @@ -480,6 +481,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) /** * __sgx_alloc_epc_page() - Allocate an EPC page + * @owner: the owner of the EPC page * * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start * from the NUMA node, where the caller is executing. @@ -488,14 +490,14 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) * - an EPC page: A borrowed EPC pages were available. * - NULL: Out of EPC pages. */ -struct sgx_epc_page *__sgx_alloc_epc_page(void) +struct sgx_epc_page *__sgx_alloc_epc_page(void *owner) { struct sgx_epc_page *page; int nid_of_current = numa_node_id(); int nid = nid_of_current; if (node_isset(nid_of_current, sgx_numa_mask)) { - page = __sgx_alloc_epc_page_from_node(nid_of_current); + page = __sgx_alloc_epc_page_from_node(owner, nid_of_current); if (page) return page; } @@ -506,7 +508,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void) if (nid == nid_of_current) break; - page = __sgx_alloc_epc_page_from_node(nid); + page = __sgx_alloc_epc_page_from_node(owner, nid); if (page) return page; } @@ -559,7 +561,7 @@ int sgx_unmark_page_reclaimable(struct sgx_epc_page *page) /** * sgx_alloc_epc_page() - Allocate an EPC page - * @owner: the owner of the EPC page + * @owner: per-caller page owner * @reclaim: reclaim pages if necessary * * Iterate through EPC sections and borrow a free EPC page to the caller. When a @@ -579,11 +581,9 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim) struct sgx_epc_page *page; for ( ; ; ) { - page = __sgx_alloc_epc_page(); - if (!IS_ERR(page)) { - page->owner = owner; + page = __sgx_alloc_epc_page(owner); + if (!IS_ERR(page)) break; - } if (list_empty(&sgx_active_page_list)) return ERR_PTR(-ENOMEM); @@ -624,6 +624,7 @@ void sgx_free_epc_page(struct sgx_epc_page *page) spin_lock(&node->lock); + page->owner = NULL; list_add_tail(&page->list, &node->free_page_list); sgx_nr_free_pages++; @@ -652,7 +653,7 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, for (i = 0; i < nr_pages; i++) { section->pages[i].section = index; section->pages[i].flags = 0; - section->pages[i].owner = NULL; + section->pages[i].owner = (void *)-1; list_add_tail(§ion->pages[i].list, &sgx_dirty_page_list); } diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h index 4628acec0009..cc624778645f 100644 --- a/arch/x86/kernel/cpu/sgx/sgx.h +++ b/arch/x86/kernel/cpu/sgx/sgx.h @@ -29,7 +29,7 @@ struct sgx_epc_page { unsigned int section; unsigned int flags; - struct sgx_encl_page *owner; + void *owner; struct list_head list; }; @@ -77,7 +77,7 @@ static inline void *sgx_get_epc_virt_addr(struct sgx_epc_page *page) return section->virt_addr + index * PAGE_SIZE; } -struct sgx_epc_page *__sgx_alloc_epc_page(void); +struct sgx_epc_page *__sgx_alloc_epc_page(void *owner); void sgx_free_epc_page(struct sgx_epc_page *page); void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
SGX EPC pages go through the following life cycle: DIRTY ---> FREE ---> IN-USE --\ ^ | \-----------------/ Recovery action for poison for a DIRTY or FREE page is simple. Just make sure never to allocate the page. IN-USE pages need some extra handling. It would be good to use the sgx_epc_page->owner field as an indicator of where an EPC page is currently in that cycle (owner != NULL means the EPC page is IN-USE). But there is one caller, sgx_alloc_va_page(), that calls with NULL. Since there are multiple uses of the "owner" field with different types change the type of sgx_epc_page.owner to "void *. Start epc_pages out with a non-NULL owner while they are in DIRTY state. Fix up the one holdout to provide a non-NULL owner. Refactor the allocation sequence so that changes to/from NULL value happen together with adding/removing the epc_page from a free list while the node->lock is held. Signed-off-by: Tony Luck <tony.luck@intel.com> --- arch/x86/kernel/cpu/sgx/encl.c | 5 +++-- arch/x86/kernel/cpu/sgx/encl.h | 2 +- arch/x86/kernel/cpu/sgx/ioctl.c | 2 +- arch/x86/kernel/cpu/sgx/main.c | 21 +++++++++++---------- arch/x86/kernel/cpu/sgx/sgx.h | 4 ++-- 5 files changed, 18 insertions(+), 16 deletions(-)