Message ID | 20161208123828.21834-2-jarkko.sakkinen@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, 2016-12-08 at 14:38 +0200, Jarkko Sakkinen wrote: > Fallback from EWB failure by killing the enclave by zeroing TCS PTEs > and kicking out threads instead of crashing the driver with BUG_ON(). > > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> > --- > drivers/platform/x86/intel_sgx_page_cache.c | 57 ++++++++++++++++++----------- > 1 file changed, 36 insertions(+), 21 deletions(-) > > diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c > index 8b1cc82..0f63060 100644 > --- a/drivers/platform/x86/intel_sgx_page_cache.c > +++ b/drivers/platform/x86/intel_sgx_page_cache.c > @@ -195,9 +195,9 @@ static void sgx_etrack(struct sgx_epc_page *epc_page) > sgx_put_epc_page(epc); > } > > -static int sgx_ewb(struct sgx_encl *encl, > - struct sgx_encl_page *encl_page, > - struct page *backing) > +static int __sgx_ewb(struct sgx_encl *encl, > + struct sgx_encl_page *encl_page, > + struct page *backing) > { > struct sgx_page_info pginfo; > void *epc; > @@ -218,12 +218,31 @@ static int sgx_ewb(struct sgx_encl *encl, > sgx_put_epc_page(epc); > kunmap_atomic((void *)(unsigned long)pginfo.srcpge); > > - if (ret != 0 && ret != SGX_NOT_TRACKED) > - sgx_err(encl, "EWB returned %d\n", ret); > - > return ret; > } > > +static bool sgx_ewb(struct sgx_encl *encl, > + struct sgx_encl_page *entry, > + struct page *backing) > +{ > + int ret = __sgx_ewb(encl, entry, backing); > + > + /* Only kick out threads with an IPI if needed. */ > + if (ret == SGX_NOT_TRACKED) { > + smp_call_function(sgx_ipi_cb, NULL, 1); > + ret = __sgx_ewb(encl, entry, backing); > + } > + > + if (ret) { > + /* Make enclave inaccessible. */ > + sgx_invalidate(encl); > + smp_call_function(sgx_ipi_cb, NULL, 1); > + return false; > + } > + > + return true; > +} > + If __sgx_ewb() fails after sending an IPI, shouldn't we display a kernel warning, unload the driver and prevent reloading the driver until the system is rebooted? A failing EWB after a system wide IPI would indicate a hardware or kernel bug; in either case, the user/admin should be alerted and SGX should be effectively disabled. Such an abort flow could also be used to replace the other BUG/BUG_ON calls.
On Tue, Dec 13, 2016 at 11:24:08AM -0800, Sean Christopherson wrote: > On Thu, 2016-12-08 at 14:38 +0200, Jarkko Sakkinen wrote: > > Fallback from EWB failure by killing the enclave by zeroing TCS PTEs > > and kicking out threads instead of crashing the driver with BUG_ON(). > > > > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> > > --- > > drivers/platform/x86/intel_sgx_page_cache.c | 57 ++++++++++++++++++----------- > > 1 file changed, 36 insertions(+), 21 deletions(-) > > > > diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c > > index 8b1cc82..0f63060 100644 > > --- a/drivers/platform/x86/intel_sgx_page_cache.c > > +++ b/drivers/platform/x86/intel_sgx_page_cache.c > > @@ -195,9 +195,9 @@ static void sgx_etrack(struct sgx_epc_page *epc_page) > > sgx_put_epc_page(epc); > > } > > > > -static int sgx_ewb(struct sgx_encl *encl, > > - struct sgx_encl_page *encl_page, > > - struct page *backing) > > +static int __sgx_ewb(struct sgx_encl *encl, > > + struct sgx_encl_page *encl_page, > > + struct page *backing) > > { > > struct sgx_page_info pginfo; > > void *epc; > > @@ -218,12 +218,31 @@ static int sgx_ewb(struct sgx_encl *encl, > > sgx_put_epc_page(epc); > > kunmap_atomic((void *)(unsigned long)pginfo.srcpge); > > > > - if (ret != 0 && ret != SGX_NOT_TRACKED) > > - sgx_err(encl, "EWB returned %d\n", ret); > > - > > return ret; > > } > > > > +static bool sgx_ewb(struct sgx_encl *encl, > > + struct sgx_encl_page *entry, > > + struct page *backing) > > +{ > > + int ret = __sgx_ewb(encl, entry, backing); > > + > > + /* Only kick out threads with an IPI if needed. */ > > + if (ret == SGX_NOT_TRACKED) { > > + smp_call_function(sgx_ipi_cb, NULL, 1); > > + ret = __sgx_ewb(encl, entry, backing); > > + } > > + > > + if (ret) { > > + /* Make enclave inaccessible. */ > > + sgx_invalidate(encl); > > + smp_call_function(sgx_ipi_cb, NULL, 1); > > + return false; > > + } > > + > > + return true; > > +} > > + > > If __sgx_ewb() fails after sending an IPI, shouldn't we display a > kernel warning, unload the driver and prevent reloading the driver > until the system is rebooted? A failing EWB after a system wide IPI > would indicate a hardware or kernel bug; in either case, the Showing an error message makes sense. Other actions are up to sysadmin. > user/admin should be alerted and SGX should be effectively disabled. > Such an abort flow could also be used to replace the other BUG/BUG_ON > calls. The situations are very different in terms of context. Aborting is not a feature. /Jarkko
diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c index 8b1cc82..0f63060 100644 --- a/drivers/platform/x86/intel_sgx_page_cache.c +++ b/drivers/platform/x86/intel_sgx_page_cache.c @@ -195,9 +195,9 @@ static void sgx_etrack(struct sgx_epc_page *epc_page) sgx_put_epc_page(epc); } -static int sgx_ewb(struct sgx_encl *encl, - struct sgx_encl_page *encl_page, - struct page *backing) +static int __sgx_ewb(struct sgx_encl *encl, + struct sgx_encl_page *encl_page, + struct page *backing) { struct sgx_page_info pginfo; void *epc; @@ -218,12 +218,31 @@ static int sgx_ewb(struct sgx_encl *encl, sgx_put_epc_page(epc); kunmap_atomic((void *)(unsigned long)pginfo.srcpge); - if (ret != 0 && ret != SGX_NOT_TRACKED) - sgx_err(encl, "EWB returned %d\n", ret); - return ret; } +static bool sgx_ewb(struct sgx_encl *encl, + struct sgx_encl_page *entry, + struct page *backing) +{ + int ret = __sgx_ewb(encl, entry, backing); + + /* Only kick out threads with an IPI if needed. */ + if (ret == SGX_NOT_TRACKED) { + smp_call_function(sgx_ipi_cb, NULL, 1); + ret = __sgx_ewb(encl, entry, backing); + } + + if (ret) { + /* Make enclave inaccessible. */ + sgx_invalidate(encl); + smp_call_function(sgx_ipi_cb, NULL, 1); + return false; + } + + return true; +} + void sgx_free_encl_page(struct sgx_encl_page *entry, struct sgx_encl *encl, unsigned int flags) @@ -239,9 +258,9 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) struct sgx_encl_page *tmp; struct page *pages[SGX_NR_SWAP_CLUSTER_MAX + 1]; struct vm_area_struct *evma; + unsigned int free_flags; int cnt = 0; int i = 0; - int ret; if (list_empty(src)) return; @@ -304,20 +323,16 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) load_list); list_del(&entry->load_list); + free_flags = 0; + evma = sgx_find_vma(encl, entry->addr); if (evma) { - ret = sgx_ewb(encl, entry, pages[i]); - BUG_ON(ret != 0 && ret != SGX_NOT_TRACKED); - /* Only kick out threads with an IPI if needed. */ - if (ret) { - smp_call_function(sgx_ipi_cb, NULL, 1); - BUG_ON(sgx_ewb(encl, entry, pages[i])); - } + if (sgx_ewb(encl, entry, pages[i])) + free_flags = SGX_FREE_SKIP_EREMOVE; encl->secs_child_cnt--; } - sgx_free_encl_page(entry, encl, - evma ? SGX_FREE_SKIP_EREMOVE : 0); + sgx_free_encl_page(entry, encl, free_flags); sgx_put_backing(pages[i++], evma); } @@ -326,13 +341,13 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) (encl->flags & SGX_ENCL_INITIALIZED)) { pages[cnt] = sgx_get_backing(encl, &encl->secs_page); if (!IS_ERR(pages[cnt])) { - ret = sgx_ewb(encl, &encl->secs_page, - pages[cnt]); - BUG_ON(ret); + free_flags = 0; + if (sgx_ewb(encl, &encl->secs_page, pages[cnt])) + free_flags = SGX_FREE_SKIP_EREMOVE; + encl->flags |= SGX_ENCL_SECS_EVICTED; - sgx_free_encl_page(&encl->secs_page, encl, - SGX_FREE_SKIP_EREMOVE); + sgx_free_encl_page(&encl->secs_page, encl, free_flags); sgx_put_backing(pages[cnt], true); } }
Fallback from EWB failure by killing the enclave by zeroing TCS PTEs and kicking out threads instead of crashing the driver with BUG_ON(). Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> --- drivers/platform/x86/intel_sgx_page_cache.c | 57 ++++++++++++++++++----------- 1 file changed, 36 insertions(+), 21 deletions(-)