Add SGX selftest `augment_via_eaccept_long`

Message ID	20220804201456.33418-1-vijay.dhanraj@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-sgx-owner@kernel.org> From: vijay.dhanraj@intel.com To: linux-sgx@vger.kernel.org, jarkko@kernel.org, reinette.chatre@intel.com, dave.hansen@linux.intel.com Cc: haitao.huang@intel.com Subject: [PATCH] Add SGX selftest `augment_via_eaccept_long` Date: Thu, 4 Aug 2022 13:14:56 -0700 Message-Id: <20220804201456.33418-1-vijay.dhanraj@intel.com> Precedence: bulk
Series	Add SGX selftest `augment_via_eaccept_long` \| expand Add SGX selftest `augment_via_eaccept_long`

Dhanraj, Vijay Aug. 4, 2022, 8:14 p.m. UTC

From: Vijay Dhanraj <vijay.dhanraj@intel.com>

This commit adds a new test case which is same as `augment_via_eaccept`
but adds more number of EPC pages to stress test `EAUG` via `EACCEPT`.

Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
---
 tools/testing/selftests/sgx/load.c      |   5 +-
 tools/testing/selftests/sgx/main.c      | 120 +++++++++++++++++++++++-
 tools/testing/selftests/sgx/main.h      |   3 +-
 tools/testing/selftests/sgx/sigstruct.c |   2 +-
 4 files changed, 125 insertions(+), 5 deletions(-)

Jarkko Sakkinen Aug. 6, 2022, 6:18 p.m. UTC | #1

On Thu, Aug 04, 2022 at 01:14:56PM -0700, vijay.dhanraj@intel.com wrote:
> From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> 
> This commit adds a new test case which is same as `augment_via_eaccept`
> but adds more number of EPC pages to stress test `EAUG` via `EACCEPT`.
> 
> Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>

Thank you. I'll run this with Icelake system.

> ---
>  tools/testing/selftests/sgx/load.c      |   5 +-
>  tools/testing/selftests/sgx/main.c      | 120 +++++++++++++++++++++++-
>  tools/testing/selftests/sgx/main.h      |   3 +-
>  tools/testing/selftests/sgx/sigstruct.c |   2 +-
>  4 files changed, 125 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
> index 94bdeac1cf04..7de1b15c90b1 100644
> --- a/tools/testing/selftests/sgx/load.c
> +++ b/tools/testing/selftests/sgx/load.c
> @@ -171,7 +171,8 @@ uint64_t encl_get_entry(struct encl *encl, const char *symbol)
>  	return 0;
>  }
>  
> -bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
> +bool encl_load(const char *path, struct encl *encl, unsigned long heap_size,
> +			   unsigned long edmm_size)
>  {
>  	const char device_path[] = "/dev/sgx_enclave";
>  	struct encl_segment *seg;
> @@ -300,7 +301,7 @@ bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
>  
>  	encl->src_size = encl->segment_tbl[j].offset + encl->segment_tbl[j].size;
>  
> -	for (encl->encl_size = 4096; encl->encl_size < encl->src_size; )
> +	for (encl->encl_size = 4096; encl->encl_size < encl->src_size + edmm_size;)
>  		encl->encl_size <<= 1;
>  
>  	return true;
> diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
> index 9820b3809c69..65e79682f75e 100644
> --- a/tools/testing/selftests/sgx/main.c
> +++ b/tools/testing/selftests/sgx/main.c
> @@ -25,6 +25,8 @@ static const uint64_t MAGIC = 0x1122334455667788ULL;
>  static const uint64_t MAGIC2 = 0x8877665544332211ULL;
>  vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave;
>  
> +static const unsigned long edmm_size = 8589934592; //8G
> +
>  /*
>   * Security Information (SECINFO) data structure needed by a few SGX
>   * instructions (eg. ENCLU[EACCEPT] and ENCLU[EMODPE]) holds meta-data
> @@ -183,7 +185,7 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
>  	unsigned int i;
>  	void *addr;
>  
> -	if (!encl_load("test_encl.elf", encl, heap_size)) {
> +	if (!encl_load("test_encl.elf", encl, heap_size, edmm_size)) {
>  		encl_delete(encl);
>  		TH_LOG("Failed to load the test enclave.");
>  		return false;
> @@ -1210,6 +1212,122 @@ TEST_F(enclave, augment_via_eaccept)
>  	munmap(addr, PAGE_SIZE);
>  }
>  
> +/*
> + * Test for the addition of large number of pages to an initialized enclave via
> + * a pre-emptive run of EACCEPT on page to be added.
> + */
> +#define TIMEOUT_LONG 900 /* seconds */
> +TEST_F_TIMEOUT(enclave, augment_via_eaccept_long, TIMEOUT_LONG)
> +{
> +	struct encl_op_get_from_addr get_addr_op;
> +	struct encl_op_put_to_addr put_addr_op;
> +	struct encl_op_eaccept eaccept_op;
> +	size_t total_size = 0;
> +	void *addr;
> +	unsigned long i;
> +
> +	if (!sgx2_supported())
> +		SKIP(return, "SGX2 not supported");
> +
> +	ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
> +
> +	memset(&self->run, 0, sizeof(self->run));
> +	self->run.tcs = self->encl.encl_base;
> +
> +	for (i = 0; i < self->encl.nr_segments; i++) {
> +		struct encl_segment *seg = &self->encl.segment_tbl[i];
> +
> +		total_size += seg->size;
> +		TH_LOG("test enclave: total_size = %ld, seg->size = %ld", total_size, seg->size);
> +	}
> +
> +	/*
> +	 * Actual enclave size is expected to be larger than the loaded
> +	 * test enclave since enclave size must be a power of 2 in bytes while
> +	 * test_encl does not consume it all.
> +	 */
> +	EXPECT_LT(total_size + edmm_size, self->encl.encl_size);
> +
> +	/*
> +	 * mmap() a page at end of existing enclave to be used for dynamic
> +	 * EPC page.
> +	 *
> +	 * Kernel will allow new mapping using any permissions if it
> +	 * falls into the enclave's address range but not backed
> +	 * by existing enclave pages.
> +	 */
> +	TH_LOG("mmaping pages at end of enclave...");
> +	addr = mmap((void *)self->encl.encl_base + total_size, edmm_size,
> +			PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_FIXED,
> +			self->encl.fd, 0);
> +	EXPECT_NE(addr, MAP_FAILED);
> +
> +	self->run.exception_vector = 0;
> +	self->run.exception_error_code = 0;
> +	self->run.exception_addr = 0;
> +
> +	/*
> +	 * Run EACCEPT on new page to trigger the #PF->EAUG->EACCEPT(again
> +	 * without a #PF). All should be transparent to userspace.
> +	 */
> +	TH_LOG("Entering enclave to run EACCEPT for each page of %zd bytes may take a while ...",
> +			edmm_size);
> +	eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING;
> +	eaccept_op.ret = 0;
> +	eaccept_op.header.type = ENCL_OP_EACCEPT;
> +
> +	for (i = 0; i < edmm_size; i += 4096) {
> +		eaccept_op.epc_addr = (uint64_t)(addr + i);
> +
> +		EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
> +		if (self->run.exception_vector == 14 &&
> +			self->run.exception_error_code == 4 &&
> +			self->run.exception_addr == self->encl.encl_base) {
> +			munmap(addr, edmm_size);
> +			SKIP(return, "Kernel does not support adding pages to initialized enclave");
> +		}
> +
> +		EXPECT_EQ(self->run.exception_vector, 0);
> +		EXPECT_EQ(self->run.exception_error_code, 0);
> +		EXPECT_EQ(self->run.exception_addr, 0);
> +		ASSERT_EQ(eaccept_op.ret, 0);
> +		ASSERT_EQ(self->run.function, EEXIT);
> +	}
> +
> +	/*
> +	 * New page should be accessible from within enclave - attempt to
> +	 * write to it.
> +	 */
> +	put_addr_op.value = MAGIC;
> +	put_addr_op.addr = (unsigned long)addr;
> +	put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
> +
> +	EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
> +
> +	EXPECT_EEXIT(&self->run);
> +	EXPECT_EQ(self->run.exception_vector, 0);
> +	EXPECT_EQ(self->run.exception_error_code, 0);
> +	EXPECT_EQ(self->run.exception_addr, 0);
> +
> +	/*
> +	 * Read memory from newly added page that was just written to,
> +	 * confirming that data previously written (MAGIC) is present.
> +	 */
> +	get_addr_op.value = 0;
> +	get_addr_op.addr = (unsigned long)addr;
> +	get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
> +
> +	EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
> +
> +	EXPECT_EQ(get_addr_op.value, MAGIC);
> +	EXPECT_EEXIT(&self->run);
> +	EXPECT_EQ(self->run.exception_vector, 0);
> +	EXPECT_EQ(self->run.exception_error_code, 0);
> +	EXPECT_EQ(self->run.exception_addr, 0);
> +
> +	munmap(addr, edmm_size);
> +}
> +
>  /*
>   * SGX2 page type modification test in two phases:
>   * Phase 1:
> diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
> index fc585be97e2f..fe5d39ac0e1e 100644
> --- a/tools/testing/selftests/sgx/main.h
> +++ b/tools/testing/selftests/sgx/main.h
> @@ -35,7 +35,8 @@ extern unsigned char sign_key[];
>  extern unsigned char sign_key_end[];
>  
>  void encl_delete(struct encl *ctx);
> -bool encl_load(const char *path, struct encl *encl, unsigned long heap_size);
> +bool encl_load(const char *path, struct encl *encl, unsigned long heap_size,
> +			   unsigned long edmm_size);
>  bool encl_measure(struct encl *encl);
>  bool encl_build(struct encl *encl);
>  uint64_t encl_get_entry(struct encl *encl, const char *symbol);
> diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
> index 50c5ab1aa6fa..6000cf0e4975 100644
> --- a/tools/testing/selftests/sgx/sigstruct.c
> +++ b/tools/testing/selftests/sgx/sigstruct.c
> @@ -343,7 +343,7 @@ bool encl_measure(struct encl *encl)
>  	if (!ctx)
>  		goto err;
>  
> -	if (!mrenclave_ecreate(ctx, encl->src_size))
> +	if (!mrenclave_ecreate(ctx, encl->encl_size))
>  		goto err;
>  
>  	for (i = 0; i < encl->nr_segments; i++) {
> -- 
> 2.17.1
> 

BR, Jarkko

Jarkko Sakkinen Aug. 8, 2022, 12:18 p.m. UTC | #2

On Thu, Aug 04, 2022 at 01:14:56PM -0700, vijay.dhanraj@intel.com wrote:
> From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> 
> This commit adds a new test case which is same as `augment_via_eaccept`
> but adds more number of EPC pages to stress test `EAUG` via `EACCEPT`.
> 
> Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>

Hey, to reproduce the original issue: does it reproduce on VM or
should I run baremetal kernel?

BR, Jarkko

Dhanraj, Vijay Aug. 8, 2022, 1 p.m. UTC | #3

> -----Original Message-----
> From: Jarkko Sakkinen <jarkko@kernel.org>
> Sent: Monday, August 8, 2022 5:18 AM
> To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> <haitao.huang@intel.com>
> Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> 
> On Thu, Aug 04, 2022 at 01:14:56PM -0700, vijay.dhanraj@intel.com wrote:
> > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> >
> > This commit adds a new test case which is same as
> > `augment_via_eaccept` but adds more number of EPC pages to stress test
> `EAUG` via `EACCEPT`.
> >
> > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> 
> Hey, to reproduce the original issue: does it reproduce on VM or should I run
> baremetal kernel?
> 
> BR, Jarkko

Hi Jarkko, The issue should be reproducible on baremetal kernel.
-Vijay

Jarkko Sakkinen Aug. 8, 2022, 3:29 p.m. UTC | #4

On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay wrote:
> 
> > -----Original Message-----
> > From: Jarkko Sakkinen <jarkko@kernel.org>
> > Sent: Monday, August 8, 2022 5:18 AM
> > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> > <haitao.huang@intel.com>
> > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > 
> > On Thu, Aug 04, 2022 at 01:14:56PM -0700, vijay.dhanraj@intel.com wrote:
> > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > >
> > > This commit adds a new test case which is same as
> > > `augment_via_eaccept` but adds more number of EPC pages to stress test
> > `EAUG` via `EACCEPT`.
> > >
> > > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> > 
> > Hey, to reproduce the original issue: does it reproduce on VM or should I run
> > baremetal kernel?
> > 
> > BR, Jarkko
> 
> Hi Jarkko, The issue should be reproducible on baremetal kernel.

Thanks.

BR, Jarkko

Jarkko Sakkinen Aug. 9, 2022, 10:45 a.m. UTC | #5

On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen wrote:
> On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay wrote:
> > 
> > > -----Original Message-----
> > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > Sent: Monday, August 8, 2022 5:18 AM
> > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> > > <haitao.huang@intel.com>
> > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > 
> > > On Thu, Aug 04, 2022 at 01:14:56PM -0700, vijay.dhanraj@intel.com wrote:
> > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > >
> > > > This commit adds a new test case which is same as
> > > > `augment_via_eaccept` but adds more number of EPC pages to stress test
> > > `EAUG` via `EACCEPT`.
> > > >
> > > > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> > > 
> > > Hey, to reproduce the original issue: does it reproduce on VM or should I run
> > > baremetal kernel?
> > > 
> > > BR, Jarkko
> > 
> > Hi Jarkko, The issue should be reproducible on baremetal kernel.
> 
> Thanks.

I need comment out other tests in order to make sane out of this :-)

Mentioning this because came into realization that stress tests should
be IMHO moved each to a separate binary (so that they can be run
separately). Just a note (TODO) to myself.

I'll work on this today again and *possibly* split your test to its own
application to get a starting point for forementioned.

BR, Jarkko

Jarkko Sakkinen Aug. 9, 2022, 4:09 p.m. UTC | #6

On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen wrote:
> On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen wrote:
> > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay wrote:
> > > 
> > > > -----Original Message-----
> > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > Sent: Monday, August 8, 2022 5:18 AM
> > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> > > > <haitao.huang@intel.com>
> > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > > 
> > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700, vijay.dhanraj@intel.com wrote:
> > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > >
> > > > > This commit adds a new test case which is same as
> > > > > `augment_via_eaccept` but adds more number of EPC pages to stress test
> > > > `EAUG` via `EACCEPT`.
> > > > >
> > > > > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> > > > 
> > > > Hey, to reproduce the original issue: does it reproduce on VM or should I run
> > > > baremetal kernel?
> > > > 
> > > > BR, Jarkko
> > > 
> > > Hi Jarkko, The issue should be reproducible on baremetal kernel.
> > 
> > Thanks.
> 
> I need comment out other tests in order to make sane out of this :-)
> 
> Mentioning this because came into realization that stress tests should
> be IMHO moved each to a separate binary (so that they can be run
> separately). Just a note (TODO) to myself.
> 
> I'll work on this today again and *possibly* split your test to its own
> application to get a starting point for forementioned.

I got

#  RUN           enclave.augment_via_eaccept_long ...
# main.c:1241:augment_via_eaccept_long:test enclave: total_size = 8192, seg->size = 8192
# main.c:1241:augment_via_eaccept_long:test enclave: total_size = 12288, seg->size = 4096
# main.c:1241:augment_via_eaccept_long:test enclave: total_size = 36864, seg->size = 24576
# main.c:1241:augment_via_eaccept_long:test enclave: total_size = 40960, seg->size = 4096
# main.c:1259:augment_via_eaccept_long:mmaping pages at end of enclave...
# main.c:1273:augment_via_eaccept_long:Entering enclave to run EACCEPT for each page of 8589934592 bytes may take a while ...
#            OK  enclave.augment_via_eaccept_long

The CPU used for testing was according to /proc/cpuinfo:

model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz

I have couple of queries:

1. Is it possible to get dmesg output?
2. Do I have to repeat the test multiple times, or does it
   occur unconditionaly?

BR, Jarkko

Dhanraj, Vijay Aug. 9, 2022, 5:08 p.m. UTC | #7

> -----Original Message-----
> From: Jarkko Sakkinen <jarkko@kernel.org>
> Sent: Tuesday, August 9, 2022 9:10 AM
> To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> <haitao.huang@intel.com>
> Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> 
> On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen wrote:
> > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen wrote:
> > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay wrote:
> > > >
> > > > > -----Original Message-----
> > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > Sent: Monday, August 8, 2022 5:18 AM
> > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang,
> > > > > Haitao <haitao.huang@intel.com>
> > > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > > >
> > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700, vijay.dhanraj@intel.com
> wrote:
> > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > >
> > > > > > This commit adds a new test case which is same as
> > > > > > `augment_via_eaccept` but adds more number of EPC pages to
> > > > > > stress test
> > > > > `EAUG` via `EACCEPT`.
> > > > > >
> > > > > > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> > > > >
> > > > > Hey, to reproduce the original issue: does it reproduce on VM or
> > > > > should I run baremetal kernel?
> > > > >
> > > > > BR, Jarkko
> > > >
> > > > Hi Jarkko, The issue should be reproducible on baremetal kernel.
> > >
> > > Thanks.
> >
> > I need comment out other tests in order to make sane out of this :-)
> >
> > Mentioning this because came into realization that stress tests should
> > be IMHO moved each to a separate binary (so that they can be run
> > separately). Just a note (TODO) to myself.
> >
> > I'll work on this today again and *possibly* split your test to its
> > own application to get a starting point for forementioned.
> 
> I got
> 
> #  RUN           enclave.augment_via_eaccept_long ...
> # main.c:1241:augment_via_eaccept_long:test enclave: total_size = 8192,
> seg->size = 8192 # main.c:1241:augment_via_eaccept_long:test enclave:
> total_size = 12288, seg->size = 4096 #
> main.c:1241:augment_via_eaccept_long:test enclave: total_size = 36864,
> seg->size = 24576 # main.c:1241:augment_via_eaccept_long:test enclave:
> total_size = 40960, seg->size = 4096 #
> main.c:1259:augment_via_eaccept_long:mmaping pages at end of enclave...
> # main.c:1273:augment_via_eaccept_long:Entering enclave to run EACCEPT
> for each page of 8589934592 bytes may take a while ...
> #            OK  enclave.augment_via_eaccept_long
> 
> The CPU used for testing was according to /proc/cpuinfo:
> 
> model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> 
> I have couple of queries:
> 
> 1. Is it possible to get dmesg output?
I did check the dmesg output but couldn't find anything related to the failure. Just the general log messages.

> 2. Do I have to repeat the test multiple times, or does it
>    occur unconditionaly?
> 
I was able to repro every time but it was a bit sporadic for Haitao.

> BR, Jarkko

Also, did you set the PRMRR size to 2GB per socket in the BIOS? The issue is only reproduced for oversubscribed scenario. When I set my PRMRR to 64GB per socket, I wasn't able to repro the issue.

Regards, Vijay

Jarkko Sakkinen Aug. 9, 2022, 6:53 p.m. UTC | #8

On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
> 
> 
> > -----Original Message-----
> > From: Jarkko Sakkinen <jarkko@kernel.org>
> > Sent: Tuesday, August 9, 2022 9:10 AM
> > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> > <haitao.huang@intel.com>
> > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > 
> > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen wrote:
> > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen wrote:
> > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay wrote:
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > Sent: Monday, August 8, 2022 5:18 AM
> > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang,
> > > > > > Haitao <haitao.huang@intel.com>
> > > > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > > > >
> > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700, vijay.dhanraj@intel.com
> > wrote:
> > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > >
> > > > > > > This commit adds a new test case which is same as
> > > > > > > `augment_via_eaccept` but adds more number of EPC pages to
> > > > > > > stress test
> > > > > > `EAUG` via `EACCEPT`.
> > > > > > >
> > > > > > > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> > > > > >
> > > > > > Hey, to reproduce the original issue: does it reproduce on VM or
> > > > > > should I run baremetal kernel?
> > > > > >
> > > > > > BR, Jarkko
> > > > >
> > > > > Hi Jarkko, The issue should be reproducible on baremetal kernel.
> > > >
> > > > Thanks.
> > >
> > > I need comment out other tests in order to make sane out of this :-)
> > >
> > > Mentioning this because came into realization that stress tests should
> > > be IMHO moved each to a separate binary (so that they can be run
> > > separately). Just a note (TODO) to myself.
> > >
> > > I'll work on this today again and *possibly* split your test to its
> > > own application to get a starting point for forementioned.
> > 
> > I got
> > 
> > #  RUN           enclave.augment_via_eaccept_long ...
> > # main.c:1241:augment_via_eaccept_long:test enclave: total_size = 8192,
> > seg->size = 8192 # main.c:1241:augment_via_eaccept_long:test enclave:
> > total_size = 12288, seg->size = 4096 #
> > main.c:1241:augment_via_eaccept_long:test enclave: total_size = 36864,
> > seg->size = 24576 # main.c:1241:augment_via_eaccept_long:test enclave:
> > total_size = 40960, seg->size = 4096 #
> > main.c:1259:augment_via_eaccept_long:mmaping pages at end of enclave...
> > # main.c:1273:augment_via_eaccept_long:Entering enclave to run EACCEPT
> > for each page of 8589934592 bytes may take a while ...
> > #            OK  enclave.augment_via_eaccept_long
> > 
> > The CPU used for testing was according to /proc/cpuinfo:
> > 
> > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> > 
> > I have couple of queries:
> > 
> > 1. Is it possible to get dmesg output?
> I did check the dmesg output but couldn't find anything related to the failure. Just the general log messages.
> 
> > 2. Do I have to repeat the test multiple times, or does it
> >    occur unconditionaly?
> > 
> I was able to repro every time but it was a bit sporadic for Haitao.
> 
> > BR, Jarkko
> 
> Also, did you set the PRMRR size to 2GB per socket in the BIOS? The issue
> is only reproduced for oversubscribed scenario. When I set my PRMRR to
> 64GB per socket, I wasn't able to repro the issue.

I need to revisit this.

Can you simply run test_sgx with gdb and see where it hits?
HOST_CFLAGS has apparently "-g" already.

> Regards, Vijay

BR, Jarkko

Jarkko Sakkinen Aug. 9, 2022, 6:57 p.m. UTC | #9

On Tue, Aug 09, 2022 at 09:53:11PM +0300, Jarkko Sakkinen wrote:
> On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
> > 
> > 
> > > -----Original Message-----
> > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > Sent: Tuesday, August 9, 2022 9:10 AM
> > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> > > <haitao.huang@intel.com>
> > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > 
> > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen wrote:
> > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen wrote:
> > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay wrote:
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > > Sent: Monday, August 8, 2022 5:18 AM
> > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang,
> > > > > > > Haitao <haitao.huang@intel.com>
> > > > > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > > > > >
> > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700, vijay.dhanraj@intel.com
> > > wrote:
> > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > >
> > > > > > > > This commit adds a new test case which is same as
> > > > > > > > `augment_via_eaccept` but adds more number of EPC pages to
> > > > > > > > stress test
> > > > > > > `EAUG` via `EACCEPT`.
> > > > > > > >
> > > > > > > > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> > > > > > >
> > > > > > > Hey, to reproduce the original issue: does it reproduce on VM or
> > > > > > > should I run baremetal kernel?
> > > > > > >
> > > > > > > BR, Jarkko
> > > > > >
> > > > > > Hi Jarkko, The issue should be reproducible on baremetal kernel.
> > > > >
> > > > > Thanks.
> > > >
> > > > I need comment out other tests in order to make sane out of this :-)
> > > >
> > > > Mentioning this because came into realization that stress tests should
> > > > be IMHO moved each to a separate binary (so that they can be run
> > > > separately). Just a note (TODO) to myself.
> > > >
> > > > I'll work on this today again and *possibly* split your test to its
> > > > own application to get a starting point for forementioned.
> > > 
> > > I got
> > > 
> > > #  RUN           enclave.augment_via_eaccept_long ...
> > > # main.c:1241:augment_via_eaccept_long:test enclave: total_size = 8192,
> > > seg->size = 8192 # main.c:1241:augment_via_eaccept_long:test enclave:
> > > total_size = 12288, seg->size = 4096 #
> > > main.c:1241:augment_via_eaccept_long:test enclave: total_size = 36864,
> > > seg->size = 24576 # main.c:1241:augment_via_eaccept_long:test enclave:
> > > total_size = 40960, seg->size = 4096 #
> > > main.c:1259:augment_via_eaccept_long:mmaping pages at end of enclave...
> > > # main.c:1273:augment_via_eaccept_long:Entering enclave to run EACCEPT
> > > for each page of 8589934592 bytes may take a while ...
> > > #            OK  enclave.augment_via_eaccept_long
> > > 
> > > The CPU used for testing was according to /proc/cpuinfo:
> > > 
> > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> > > 
> > > I have couple of queries:
> > > 
> > > 1. Is it possible to get dmesg output?
> > I did check the dmesg output but couldn't find anything related to the failure. Just the general log messages.
> > 
> > > 2. Do I have to repeat the test multiple times, or does it
> > >    occur unconditionaly?
> > > 
> > I was able to repro every time but it was a bit sporadic for Haitao.
> > 
> > > BR, Jarkko
> > 
> > Also, did you set the PRMRR size to 2GB per socket in the BIOS? The issue
> > is only reproduced for oversubscribed scenario. When I set my PRMRR to
> > 64GB per socket, I wasn't able to repro the issue.
> 
> I need to revisit this.
> 
> Can you simply run test_sgx with gdb and see where it hits?
> HOST_CFLAGS has apparently "-g" already.

I'll check the socket configuration but since you can steadily
reproduce the possible bug, this would be useful information
to dig out.

BR, Jarkko

Dhanraj, Vijay Aug. 10, 2022, 12:09 a.m. UTC | #10

> -----Original Message-----
> From: Jarkko Sakkinen <jarkko@kernel.org>
> Sent: Tuesday, August 9, 2022 11:53 AM
> To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> <haitao.huang@intel.com>
> Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> 
> On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
> >
> >
> > > -----Original Message-----
> > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > Sent: Tuesday, August 9, 2022 9:10 AM
> > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang,
> > > Haitao <haitao.huang@intel.com>
> > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > >
> > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen wrote:
> > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen wrote:
> > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay wrote:
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > > Sent: Monday, August 8, 2022 5:18 AM
> > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> > > > > > > Huang, Haitao <haitao.huang@intel.com>
> > > > > > > Subject: Re: [PATCH] Add SGX selftest
> > > > > > > `augment_via_eaccept_long`
> > > > > > >
> > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
> > > > > > > vijay.dhanraj@intel.com
> > > wrote:
> > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > >
> > > > > > > > This commit adds a new test case which is same as
> > > > > > > > `augment_via_eaccept` but adds more number of EPC pages to
> > > > > > > > stress test
> > > > > > > `EAUG` via `EACCEPT`.
> > > > > > > >
> > > > > > > > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> > > > > > >
> > > > > > > Hey, to reproduce the original issue: does it reproduce on
> > > > > > > VM or should I run baremetal kernel?
> > > > > > >
> > > > > > > BR, Jarkko
> > > > > >
> > > > > > Hi Jarkko, The issue should be reproducible on baremetal kernel.
> > > > >
> > > > > Thanks.
> > > >
> > > > I need comment out other tests in order to make sane out of this
> > > > :-)
> > > >
> > > > Mentioning this because came into realization that stress tests
> > > > should be IMHO moved each to a separate binary (so that they can
> > > > be run separately). Just a note (TODO) to myself.
> > > >
> > > > I'll work on this today again and *possibly* split your test to
> > > > its own application to get a starting point for forementioned.
> > >
> > > I got
> > >
> > > #  RUN           enclave.augment_via_eaccept_long ...
> > > # main.c:1241:augment_via_eaccept_long:test enclave: total_size =
> > > 8192,
> > > seg->size = 8192 # main.c:1241:augment_via_eaccept_long:test enclave:
> > > total_size = 12288, seg->size = 4096 #
> > > main.c:1241:augment_via_eaccept_long:test enclave: total_size =
> > > 36864,
> > > seg->size = 24576 # main.c:1241:augment_via_eaccept_long:test enclave:
> > > total_size = 40960, seg->size = 4096 #
> > > main.c:1259:augment_via_eaccept_long:mmaping pages at end of
> enclave...
> > > # main.c:1273:augment_via_eaccept_long:Entering enclave to run
> > > EACCEPT for each page of 8589934592 bytes may take a while ...
> > > #            OK  enclave.augment_via_eaccept_long
> > >
> > > The CPU used for testing was according to /proc/cpuinfo:
> > >
> > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> > >
> > > I have couple of queries:
> > >
> > > 1. Is it possible to get dmesg output?
> > I did check the dmesg output but couldn't find anything related to the
> failure. Just the general log messages.
> >
> > > 2. Do I have to repeat the test multiple times, or does it
> > >    occur unconditionaly?
> > >
> > I was able to repro every time but it was a bit sporadic for Haitao.
> >
> > > BR, Jarkko
> >
> > Also, did you set the PRMRR size to 2GB per socket in the BIOS? The
> > issue is only reproduced for oversubscribed scenario. When I set my
> > PRMRR to 64GB per socket, I wasn't able to repro the issue.
> 
> I need to revisit this.
> 
> Can you simply run test_sgx with gdb and see where it hits?
> HOST_CFLAGS has apparently "-g" already.
> 
> > Regards, Vijay
> 
> BR, Jarkko

I am able to repro the issue when I reduce the PRMRR to 2B/socket but not but not able to break on the assertion failure with gdb. I also enabled debug attribute in the secs but still no avail. Anything I am missing here?

diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
index 7de1b15c90b1..c4bccd3f5f17 100644
--- a/tools/testing/selftests/sgx/load.c
+++ b/tools/testing/selftests/sgx/load.c
@@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl *encl)
 
        memset(secs, 0, sizeof(*secs));
        secs->ssa_frame_size = 1;
-       secs->attributes = SGX_ATTR_MODE64BIT;
+       secs->attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_DEBUG;
        secs->xfrm = 3;
        secs->base = encl->encl_base;
        secs->size = encl->encl_size;

Regards, Vijay

Jarkko Sakkinen Aug. 11, 2022, 1:01 a.m. UTC | #11

On Wed, Aug 10, 2022 at 12:09:56AM +0000, Dhanraj, Vijay wrote:
> 
> 
> > -----Original Message-----
> > From: Jarkko Sakkinen <jarkko@kernel.org>
> > Sent: Tuesday, August 9, 2022 11:53 AM
> > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> > <haitao.huang@intel.com>
> > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > 
> > On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > Sent: Tuesday, August 9, 2022 9:10 AM
> > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang,
> > > > Haitao <haitao.huang@intel.com>
> > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > >
> > > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen wrote:
> > > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen wrote:
> > > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay wrote:
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > > > Sent: Monday, August 8, 2022 5:18 AM
> > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> > > > > > > > Huang, Haitao <haitao.huang@intel.com>
> > > > > > > > Subject: Re: [PATCH] Add SGX selftest
> > > > > > > > `augment_via_eaccept_long`
> > > > > > > >
> > > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
> > > > > > > > vijay.dhanraj@intel.com
> > > > wrote:
> > > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > > >
> > > > > > > > > This commit adds a new test case which is same as
> > > > > > > > > `augment_via_eaccept` but adds more number of EPC pages to
> > > > > > > > > stress test
> > > > > > > > `EAUG` via `EACCEPT`.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > > > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> > > > > > > >
> > > > > > > > Hey, to reproduce the original issue: does it reproduce on
> > > > > > > > VM or should I run baremetal kernel?
> > > > > > > >
> > > > > > > > BR, Jarkko
> > > > > > >
> > > > > > > Hi Jarkko, The issue should be reproducible on baremetal kernel.
> > > > > >
> > > > > > Thanks.
> > > > >
> > > > > I need comment out other tests in order to make sane out of this
> > > > > :-)
> > > > >
> > > > > Mentioning this because came into realization that stress tests
> > > > > should be IMHO moved each to a separate binary (so that they can
> > > > > be run separately). Just a note (TODO) to myself.
> > > > >
> > > > > I'll work on this today again and *possibly* split your test to
> > > > > its own application to get a starting point for forementioned.
> > > >
> > > > I got
> > > >
> > > > #  RUN           enclave.augment_via_eaccept_long ...
> > > > # main.c:1241:augment_via_eaccept_long:test enclave: total_size =
> > > > 8192,
> > > > seg->size = 8192 # main.c:1241:augment_via_eaccept_long:test enclave:
> > > > total_size = 12288, seg->size = 4096 #
> > > > main.c:1241:augment_via_eaccept_long:test enclave: total_size =
> > > > 36864,
> > > > seg->size = 24576 # main.c:1241:augment_via_eaccept_long:test enclave:
> > > > total_size = 40960, seg->size = 4096 #
> > > > main.c:1259:augment_via_eaccept_long:mmaping pages at end of
> > enclave...
> > > > # main.c:1273:augment_via_eaccept_long:Entering enclave to run
> > > > EACCEPT for each page of 8589934592 bytes may take a while ...
> > > > #            OK  enclave.augment_via_eaccept_long
> > > >
> > > > The CPU used for testing was according to /proc/cpuinfo:
> > > >
> > > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> > > >
> > > > I have couple of queries:
> > > >
> > > > 1. Is it possible to get dmesg output?
> > > I did check the dmesg output but couldn't find anything related to the
> > failure. Just the general log messages.
> > >
> > > > 2. Do I have to repeat the test multiple times, or does it
> > > >    occur unconditionaly?
> > > >
> > > I was able to repro every time but it was a bit sporadic for Haitao.
> > >
> > > > BR, Jarkko
> > >
> > > Also, did you set the PRMRR size to 2GB per socket in the BIOS? The
> > > issue is only reproduced for oversubscribed scenario. When I set my
> > > PRMRR to 64GB per socket, I wasn't able to repro the issue.
> > 
> > I need to revisit this.
> > 
> > Can you simply run test_sgx with gdb and see where it hits?
> > HOST_CFLAGS has apparently "-g" already.
> > 
> > > Regards, Vijay
> > 
> > BR, Jarkko
> 
> I am able to repro the issue when I reduce the PRMRR to 2B/socket but not but not able to break on the assertion failure with gdb. I also enabled debug attribute in the secs but still no avail. Anything I am missing here?
> 
> diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
> index 7de1b15c90b1..c4bccd3f5f17 100644
> --- a/tools/testing/selftests/sgx/load.c
> +++ b/tools/testing/selftests/sgx/load.c
> @@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl *encl)
>  
>         memset(secs, 0, sizeof(*secs));
>         secs->ssa_frame_size = 1;
> -       secs->attributes = SGX_ATTR_MODE64BIT;
> +       secs->attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_DEBUG;
>         secs->xfrm = 3;
>         secs->base = encl->encl_base;
>         secs->size = encl->encl_size;
> 
> Regards, Vijay

I get also full pass with 2GB configuration (and also observed that 
kselftest runs much faster with this configuration).

But I looked at sgx_alloc_epc_page() and saw this:

               if (list_empty(&sgx_active_page_list))
                        return ERR_PTR(-ENOMEM);

                if (!reclaim) {
                        page = ERR_PTR(-EBUSY);
                        break;
                }

In sgx_vma_fault(), when running completely out of reclaimable pages, this
causes VM_FAULT_SIGBUS returned instead of VM_FAULT_NOPAGE:

	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
	if (IS_ERR(entry)) {
		mutex_unlock(&encl->lock);

		if (PTR_ERR(entry) == -EBUSY)
			return VM_FAULT_NOPAGE;

		return VM_FAULT_SIGBUS;
	}

Not sure if those should be re-ordered that would keep the process stuck up
until there is something to reclaim. Now we use NOPAGE to address situation
when there is actually something to reclaim but because of locking side of
things we pass reclaim=false to sgx_alloc_epc_page().

So this is kind of OOM behaviour how it works now instead of stalling
processes.

BR, Jarkko

Jarkko Sakkinen Aug. 11, 2022, 1:36 a.m. UTC | #12

On Thu, Aug 11, 2022 at 04:01:15AM +0300, Jarkko Sakkinen wrote:
> On Wed, Aug 10, 2022 at 12:09:56AM +0000, Dhanraj, Vijay wrote:
> > 
> > 
> > > -----Original Message-----
> > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > Sent: Tuesday, August 9, 2022 11:53 AM
> > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> > > <haitao.huang@intel.com>
> > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > 
> > > On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
> > > >
> > > >
> > > > > -----Original Message-----
> > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > Sent: Tuesday, August 9, 2022 9:10 AM
> > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang,
> > > > > Haitao <haitao.huang@intel.com>
> > > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > > >
> > > > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen wrote:
> > > > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen wrote:
> > > > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay wrote:
> > > > > > > >
> > > > > > > > > -----Original Message-----
> > > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > > > > Sent: Monday, August 8, 2022 5:18 AM
> > > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> > > > > > > > > Huang, Haitao <haitao.huang@intel.com>
> > > > > > > > > Subject: Re: [PATCH] Add SGX selftest
> > > > > > > > > `augment_via_eaccept_long`
> > > > > > > > >
> > > > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
> > > > > > > > > vijay.dhanraj@intel.com
> > > > > wrote:
> > > > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > > > >
> > > > > > > > > > This commit adds a new test case which is same as
> > > > > > > > > > `augment_via_eaccept` but adds more number of EPC pages to
> > > > > > > > > > stress test
> > > > > > > > > `EAUG` via `EACCEPT`.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > > > > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> > > > > > > > >
> > > > > > > > > Hey, to reproduce the original issue: does it reproduce on
> > > > > > > > > VM or should I run baremetal kernel?
> > > > > > > > >
> > > > > > > > > BR, Jarkko
> > > > > > > >
> > > > > > > > Hi Jarkko, The issue should be reproducible on baremetal kernel.
> > > > > > >
> > > > > > > Thanks.
> > > > > >
> > > > > > I need comment out other tests in order to make sane out of this
> > > > > > :-)
> > > > > >
> > > > > > Mentioning this because came into realization that stress tests
> > > > > > should be IMHO moved each to a separate binary (so that they can
> > > > > > be run separately). Just a note (TODO) to myself.
> > > > > >
> > > > > > I'll work on this today again and *possibly* split your test to
> > > > > > its own application to get a starting point for forementioned.
> > > > >
> > > > > I got
> > > > >
> > > > > #  RUN           enclave.augment_via_eaccept_long ...
> > > > > # main.c:1241:augment_via_eaccept_long:test enclave: total_size =
> > > > > 8192,
> > > > > seg->size = 8192 # main.c:1241:augment_via_eaccept_long:test enclave:
> > > > > total_size = 12288, seg->size = 4096 #
> > > > > main.c:1241:augment_via_eaccept_long:test enclave: total_size =
> > > > > 36864,
> > > > > seg->size = 24576 # main.c:1241:augment_via_eaccept_long:test enclave:
> > > > > total_size = 40960, seg->size = 4096 #
> > > > > main.c:1259:augment_via_eaccept_long:mmaping pages at end of
> > > enclave...
> > > > > # main.c:1273:augment_via_eaccept_long:Entering enclave to run
> > > > > EACCEPT for each page of 8589934592 bytes may take a while ...
> > > > > #            OK  enclave.augment_via_eaccept_long
> > > > >
> > > > > The CPU used for testing was according to /proc/cpuinfo:
> > > > >
> > > > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> > > > >
> > > > > I have couple of queries:
> > > > >
> > > > > 1. Is it possible to get dmesg output?
> > > > I did check the dmesg output but couldn't find anything related to the
> > > failure. Just the general log messages.
> > > >
> > > > > 2. Do I have to repeat the test multiple times, or does it
> > > > >    occur unconditionaly?
> > > > >
> > > > I was able to repro every time but it was a bit sporadic for Haitao.
> > > >
> > > > > BR, Jarkko
> > > >
> > > > Also, did you set the PRMRR size to 2GB per socket in the BIOS? The
> > > > issue is only reproduced for oversubscribed scenario. When I set my
> > > > PRMRR to 64GB per socket, I wasn't able to repro the issue.
> > > 
> > > I need to revisit this.
> > > 
> > > Can you simply run test_sgx with gdb and see where it hits?
> > > HOST_CFLAGS has apparently "-g" already.
> > > 
> > > > Regards, Vijay
> > > 
> > > BR, Jarkko
> > 
> > I am able to repro the issue when I reduce the PRMRR to 2B/socket but not but not able to break on the assertion failure with gdb. I also enabled debug attribute in the secs but still no avail. Anything I am missing here?
> > 
> > diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
> > index 7de1b15c90b1..c4bccd3f5f17 100644
> > --- a/tools/testing/selftests/sgx/load.c
> > +++ b/tools/testing/selftests/sgx/load.c
> > @@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl *encl)
> >  
> >         memset(secs, 0, sizeof(*secs));
> >         secs->ssa_frame_size = 1;
> > -       secs->attributes = SGX_ATTR_MODE64BIT;
> > +       secs->attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_DEBUG;
> >         secs->xfrm = 3;
> >         secs->base = encl->encl_base;
> >         secs->size = encl->encl_size;
> > 
> > Regards, Vijay
> 
> I get also full pass with 2GB configuration (and also observed that 
> kselftest runs much faster with this configuration).
> 
> But I looked at sgx_alloc_epc_page() and saw this:
> 
>                if (list_empty(&sgx_active_page_list))
>                         return ERR_PTR(-ENOMEM);
> 
>                 if (!reclaim) {
>                         page = ERR_PTR(-EBUSY);
>                         break;
>                 }
> 
> In sgx_vma_fault(), when running completely out of reclaimable pages, this
> causes VM_FAULT_SIGBUS returned instead of VM_FAULT_NOPAGE:
> 
> 	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
> 	if (IS_ERR(entry)) {
> 		mutex_unlock(&encl->lock);
> 
> 		if (PTR_ERR(entry) == -EBUSY)
> 			return VM_FAULT_NOPAGE;
> 
> 		return VM_FAULT_SIGBUS;
> 	}
> 
> Not sure if those should be re-ordered that would keep the process stuck up
> until there is something to reclaim. Now we use NOPAGE to address situation
> when there is actually something to reclaim but because of locking side of
> things we pass reclaim=false to sgx_alloc_epc_page().
> 
> So this is kind of OOM behaviour how it works now instead of stalling
> processes.

Right, I looked at the original email at was really a page fault
that was catched. The above theory cannot possibly hold, as the
process does not exit with a bus error.

I looked next to sgx_encl_eaug_page(), and found this:

        encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
        if (IS_ERR(encl_page))
                return VM_FAULT_OOM;

This is AFAIK the only code path in sgx_vma_fault() flow that
allocates non-EPC memory, and the code paths where EPC allocation
fails the result would be SIGBUS.

So perhaps allocation is failing here. You could pretty easily
trace allocations with bpftrace and kretprobe to see if this is
what is happening, e.g. in one terminal:

sudo bpftrace -e 'kr:sgx_encl_page_alloc /retval != 0/ { printf("%d\n", retval); }'

And then run the kselftest in another.

BR, Jarkko

Jarkko Sakkinen Aug. 11, 2022, 1:50 a.m. UTC | #13

On Thu, Aug 11, 2022 at 04:36:57AM +0300, Jarkko Sakkinen wrote:
> On Thu, Aug 11, 2022 at 04:01:15AM +0300, Jarkko Sakkinen wrote:
> > On Wed, Aug 10, 2022 at 12:09:56AM +0000, Dhanraj, Vijay wrote:
> > > 
> > > 
> > > > -----Original Message-----
> > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > Sent: Tuesday, August 9, 2022 11:53 AM
> > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> > > > <haitao.huang@intel.com>
> > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > > 
> > > > On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
> > > > >
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > Sent: Tuesday, August 9, 2022 9:10 AM
> > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang,
> > > > > > Haitao <haitao.huang@intel.com>
> > > > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > > > >
> > > > > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen wrote:
> > > > > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen wrote:
> > > > > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay wrote:
> > > > > > > > >
> > > > > > > > > > -----Original Message-----
> > > > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > > > > > Sent: Monday, August 8, 2022 5:18 AM
> > > > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> > > > > > > > > > Huang, Haitao <haitao.huang@intel.com>
> > > > > > > > > > Subject: Re: [PATCH] Add SGX selftest
> > > > > > > > > > `augment_via_eaccept_long`
> > > > > > > > > >
> > > > > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
> > > > > > > > > > vijay.dhanraj@intel.com
> > > > > > wrote:
> > > > > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > > > > >
> > > > > > > > > > > This commit adds a new test case which is same as
> > > > > > > > > > > `augment_via_eaccept` but adds more number of EPC pages to
> > > > > > > > > > > stress test
> > > > > > > > > > `EAUG` via `EACCEPT`.
> > > > > > > > > > >
> > > > > > > > > > > Signed-off-by: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > > > > > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> > > > > > > > > >
> > > > > > > > > > Hey, to reproduce the original issue: does it reproduce on
> > > > > > > > > > VM or should I run baremetal kernel?
> > > > > > > > > >
> > > > > > > > > > BR, Jarkko
> > > > > > > > >
> > > > > > > > > Hi Jarkko, The issue should be reproducible on baremetal kernel.
> > > > > > > >
> > > > > > > > Thanks.
> > > > > > >
> > > > > > > I need comment out other tests in order to make sane out of this
> > > > > > > :-)
> > > > > > >
> > > > > > > Mentioning this because came into realization that stress tests
> > > > > > > should be IMHO moved each to a separate binary (so that they can
> > > > > > > be run separately). Just a note (TODO) to myself.
> > > > > > >
> > > > > > > I'll work on this today again and *possibly* split your test to
> > > > > > > its own application to get a starting point for forementioned.
> > > > > >
> > > > > > I got
> > > > > >
> > > > > > #  RUN           enclave.augment_via_eaccept_long ...
> > > > > > # main.c:1241:augment_via_eaccept_long:test enclave: total_size =
> > > > > > 8192,
> > > > > > seg->size = 8192 # main.c:1241:augment_via_eaccept_long:test enclave:
> > > > > > total_size = 12288, seg->size = 4096 #
> > > > > > main.c:1241:augment_via_eaccept_long:test enclave: total_size =
> > > > > > 36864,
> > > > > > seg->size = 24576 # main.c:1241:augment_via_eaccept_long:test enclave:
> > > > > > total_size = 40960, seg->size = 4096 #
> > > > > > main.c:1259:augment_via_eaccept_long:mmaping pages at end of
> > > > enclave...
> > > > > > # main.c:1273:augment_via_eaccept_long:Entering enclave to run
> > > > > > EACCEPT for each page of 8589934592 bytes may take a while ...
> > > > > > #            OK  enclave.augment_via_eaccept_long
> > > > > >
> > > > > > The CPU used for testing was according to /proc/cpuinfo:
> > > > > >
> > > > > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> > > > > >
> > > > > > I have couple of queries:
> > > > > >
> > > > > > 1. Is it possible to get dmesg output?
> > > > > I did check the dmesg output but couldn't find anything related to the
> > > > failure. Just the general log messages.
> > > > >
> > > > > > 2. Do I have to repeat the test multiple times, or does it
> > > > > >    occur unconditionaly?
> > > > > >
> > > > > I was able to repro every time but it was a bit sporadic for Haitao.
> > > > >
> > > > > > BR, Jarkko
> > > > >
> > > > > Also, did you set the PRMRR size to 2GB per socket in the BIOS? The
> > > > > issue is only reproduced for oversubscribed scenario. When I set my
> > > > > PRMRR to 64GB per socket, I wasn't able to repro the issue.
> > > > 
> > > > I need to revisit this.
> > > > 
> > > > Can you simply run test_sgx with gdb and see where it hits?
> > > > HOST_CFLAGS has apparently "-g" already.
> > > > 
> > > > > Regards, Vijay
> > > > 
> > > > BR, Jarkko
> > > 
> > > I am able to repro the issue when I reduce the PRMRR to 2B/socket but not but not able to break on the assertion failure with gdb. I also enabled debug attribute in the secs but still no avail. Anything I am missing here?
> > > 
> > > diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
> > > index 7de1b15c90b1..c4bccd3f5f17 100644
> > > --- a/tools/testing/selftests/sgx/load.c
> > > +++ b/tools/testing/selftests/sgx/load.c
> > > @@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl *encl)
> > >  
> > >         memset(secs, 0, sizeof(*secs));
> > >         secs->ssa_frame_size = 1;
> > > -       secs->attributes = SGX_ATTR_MODE64BIT;
> > > +       secs->attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_DEBUG;
> > >         secs->xfrm = 3;
> > >         secs->base = encl->encl_base;
> > >         secs->size = encl->encl_size;
> > > 
> > > Regards, Vijay
> > 
> > I get also full pass with 2GB configuration (and also observed that 
> > kselftest runs much faster with this configuration).
> > 
> > But I looked at sgx_alloc_epc_page() and saw this:
> > 
> >                if (list_empty(&sgx_active_page_list))
> >                         return ERR_PTR(-ENOMEM);
> > 
> >                 if (!reclaim) {
> >                         page = ERR_PTR(-EBUSY);
> >                         break;
> >                 }
> > 
> > In sgx_vma_fault(), when running completely out of reclaimable pages, this
> > causes VM_FAULT_SIGBUS returned instead of VM_FAULT_NOPAGE:
> > 
> > 	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
> > 	if (IS_ERR(entry)) {
> > 		mutex_unlock(&encl->lock);
> > 
> > 		if (PTR_ERR(entry) == -EBUSY)
> > 			return VM_FAULT_NOPAGE;
> > 
> > 		return VM_FAULT_SIGBUS;
> > 	}
> > 
> > Not sure if those should be re-ordered that would keep the process stuck up
> > until there is something to reclaim. Now we use NOPAGE to address situation
> > when there is actually something to reclaim but because of locking side of
> > things we pass reclaim=false to sgx_alloc_epc_page().
> > 
> > So this is kind of OOM behaviour how it works now instead of stalling
> > processes.
> 
> Right, I looked at the original email at was really a page fault
> that was catched. The above theory cannot possibly hold, as the
> process does not exit with a bus error.
> 
> I looked next to sgx_encl_eaug_page(), and found this:
> 
>         encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
>         if (IS_ERR(encl_page))
>                 return VM_FAULT_OOM;
> 
> This is AFAIK the only code path in sgx_vma_fault() flow that
> allocates non-EPC memory, and the code paths where EPC allocation
> fails the result would be SIGBUS.
> 
> So perhaps allocation is failing here. You could pretty easily
> trace allocations with bpftrace and kretprobe to see if this is
> what is happening, e.g. in one terminal:
> 
> sudo bpftrace -e 'kr:sgx_encl_page_alloc /retval != 0/ { printf("%d\n", retval); }'

Should be

sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ { printf("%d\n", retval); }'

BR, Jarkko

Dhanraj, Vijay Aug. 11, 2022, 2:01 a.m. UTC | #14

> -----Original Message-----
> From: Jarkko Sakkinen <jarkko@kernel.org>
> Sent: Wednesday, August 10, 2022 6:51 PM
> To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> <haitao.huang@intel.com>
> Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> 
> On Thu, Aug 11, 2022 at 04:36:57AM +0300, Jarkko Sakkinen wrote:
> > On Thu, Aug 11, 2022 at 04:01:15AM +0300, Jarkko Sakkinen wrote:
> > > On Wed, Aug 10, 2022 at 12:09:56AM +0000, Dhanraj, Vijay wrote:
> > > >
> > > >
> > > > > -----Original Message-----
> > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > Sent: Tuesday, August 9, 2022 11:53 AM
> > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang,
> > > > > Haitao <haitao.huang@intel.com>
> > > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > > >
> > > > > On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
> > > > > >
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > > Sent: Tuesday, August 9, 2022 9:10 AM
> > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> > > > > > > Huang, Haitao <haitao.huang@intel.com>
> > > > > > > Subject: Re: [PATCH] Add SGX selftest
> > > > > > > `augment_via_eaccept_long`
> > > > > > >
> > > > > > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen wrote:
> > > > > > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen
> wrote:
> > > > > > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay
> wrote:
> > > > > > > > > >
> > > > > > > > > > > -----Original Message-----
> > > > > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > > > > > > Sent: Monday, August 8, 2022 5:18 AM
> > > > > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > > > > > > <reinette.chatre@intel.com>;
> > > > > > > > > > > dave.hansen@linux.intel.com; Huang, Haitao
> > > > > > > > > > > <haitao.huang@intel.com>
> > > > > > > > > > > Subject: Re: [PATCH] Add SGX selftest
> > > > > > > > > > > `augment_via_eaccept_long`
> > > > > > > > > > >
> > > > > > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
> > > > > > > > > > > vijay.dhanraj@intel.com
> > > > > > > wrote:
> > > > > > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > > > > > >
> > > > > > > > > > > > This commit adds a new test case which is same as
> > > > > > > > > > > > `augment_via_eaccept` but adds more number of EPC
> > > > > > > > > > > > pages to stress test
> > > > > > > > > > > `EAUG` via `EACCEPT`.
> > > > > > > > > > > >
> > > > > > > > > > > > Signed-off-by: Vijay Dhanraj
> > > > > > > > > > > > <vijay.dhanraj@intel.com>
> > > > > > > > > > > > Signed-off-by: Haitao Huang
> > > > > > > > > > > > <haitao.huang@linux.intel.com>
> > > > > > > > > > >
> > > > > > > > > > > Hey, to reproduce the original issue: does it
> > > > > > > > > > > reproduce on VM or should I run baremetal kernel?
> > > > > > > > > > >
> > > > > > > > > > > BR, Jarkko
> > > > > > > > > >
> > > > > > > > > > Hi Jarkko, The issue should be reproducible on baremetal
> kernel.
> > > > > > > > >
> > > > > > > > > Thanks.
> > > > > > > >
> > > > > > > > I need comment out other tests in order to make sane out
> > > > > > > > of this
> > > > > > > > :-)
> > > > > > > >
> > > > > > > > Mentioning this because came into realization that stress
> > > > > > > > tests should be IMHO moved each to a separate binary (so
> > > > > > > > that they can be run separately). Just a note (TODO) to myself.
> > > > > > > >
> > > > > > > > I'll work on this today again and *possibly* split your
> > > > > > > > test to its own application to get a starting point for
> forementioned.
> > > > > > >
> > > > > > > I got
> > > > > > >
> > > > > > > #  RUN           enclave.augment_via_eaccept_long ...
> > > > > > > # main.c:1241:augment_via_eaccept_long:test enclave:
> > > > > > > total_size = 8192,
> > > > > > > seg->size = 8192 # main.c:1241:augment_via_eaccept_long:test
> enclave:
> > > > > > > total_size = 12288, seg->size = 4096 #
> > > > > > > main.c:1241:augment_via_eaccept_long:test enclave:
> > > > > > > total_size = 36864,
> > > > > > > seg->size = 24576 # main.c:1241:augment_via_eaccept_long:test
> enclave:
> > > > > > > total_size = 40960, seg->size = 4096 #
> > > > > > > main.c:1259:augment_via_eaccept_long:mmaping pages at end of
> > > > > enclave...
> > > > > > > # main.c:1273:augment_via_eaccept_long:Entering enclave to
> > > > > > > run EACCEPT for each page of 8589934592 bytes may take a while
> ...
> > > > > > > #            OK  enclave.augment_via_eaccept_long
> > > > > > >
> > > > > > > The CPU used for testing was according to /proc/cpuinfo:
> > > > > > >
> > > > > > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> > > > > > >
> > > > > > > I have couple of queries:
> > > > > > >
> > > > > > > 1. Is it possible to get dmesg output?
> > > > > > I did check the dmesg output but couldn't find anything
> > > > > > related to the
> > > > > failure. Just the general log messages.
> > > > > >
> > > > > > > 2. Do I have to repeat the test multiple times, or does it
> > > > > > >    occur unconditionaly?
> > > > > > >
> > > > > > I was able to repro every time but it was a bit sporadic for Haitao.
> > > > > >
> > > > > > > BR, Jarkko
> > > > > >
> > > > > > Also, did you set the PRMRR size to 2GB per socket in the
> > > > > > BIOS? The issue is only reproduced for oversubscribed
> > > > > > scenario. When I set my PRMRR to 64GB per socket, I wasn't able to
> repro the issue.
> > > > >
> > > > > I need to revisit this.
> > > > >
> > > > > Can you simply run test_sgx with gdb and see where it hits?
> > > > > HOST_CFLAGS has apparently "-g" already.
> > > > >
> > > > > > Regards, Vijay
> > > > >
> > > > > BR, Jarkko
> > > >
> > > > I am able to repro the issue when I reduce the PRMRR to 2B/socket but
> not but not able to break on the assertion failure with gdb. I also enabled
> debug attribute in the secs but still no avail. Anything I am missing here?
> > > >
> > > > diff --git a/tools/testing/selftests/sgx/load.c
> > > > b/tools/testing/selftests/sgx/load.c
> > > > index 7de1b15c90b1..c4bccd3f5f17 100644
> > > > --- a/tools/testing/selftests/sgx/load.c
> > > > +++ b/tools/testing/selftests/sgx/load.c
> > > > @@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl *encl)
> > > >
> > > >         memset(secs, 0, sizeof(*secs));
> > > >         secs->ssa_frame_size = 1;
> > > > -       secs->attributes = SGX_ATTR_MODE64BIT;
> > > > +       secs->attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_DEBUG;
> > > >         secs->xfrm = 3;
> > > >         secs->base = encl->encl_base;
> > > >         secs->size = encl->encl_size;
> > > >
> > > > Regards, Vijay
> > >
> > > I get also full pass with 2GB configuration (and also observed that
> > > kselftest runs much faster with this configuration).
> > >
> > > But I looked at sgx_alloc_epc_page() and saw this:
> > >
> > >                if (list_empty(&sgx_active_page_list))
> > >                         return ERR_PTR(-ENOMEM);
> > >
> > >                 if (!reclaim) {
> > >                         page = ERR_PTR(-EBUSY);
> > >                         break;
> > >                 }
> > >
> > > In sgx_vma_fault(), when running completely out of reclaimable
> > > pages, this causes VM_FAULT_SIGBUS returned instead of
> VM_FAULT_NOPAGE:
> > >
> > > 	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
> > > 	if (IS_ERR(entry)) {
> > > 		mutex_unlock(&encl->lock);
> > >
> > > 		if (PTR_ERR(entry) == -EBUSY)
> > > 			return VM_FAULT_NOPAGE;
> > >
> > > 		return VM_FAULT_SIGBUS;
> > > 	}
> > >
> > > Not sure if those should be re-ordered that would keep the process
> > > stuck up until there is something to reclaim. Now we use NOPAGE to
> > > address situation when there is actually something to reclaim but
> > > because of locking side of things we pass reclaim=false to
> sgx_alloc_epc_page().
> > >
> > > So this is kind of OOM behaviour how it works now instead of
> > > stalling processes.
> >
> > Right, I looked at the original email at was really a page fault that
> > was catched. The above theory cannot possibly hold, as the process
> > does not exit with a bus error.
> >
> > I looked next to sgx_encl_eaug_page(), and found this:
> >
> >         encl_page = sgx_encl_page_alloc(encl, addr - encl->base,
> secinfo_flags);
> >         if (IS_ERR(encl_page))
> >                 return VM_FAULT_OOM;
> >
> > This is AFAIK the only code path in sgx_vma_fault() flow that
> > allocates non-EPC memory, and the code paths where EPC allocation
> > fails the result would be SIGBUS.
> >
> > So perhaps allocation is failing here. You could pretty easily trace
> > allocations with bpftrace and kretprobe to see if this is what is
> > happening, e.g. in one terminal:
> >
> > sudo bpftrace -e 'kr:sgx_encl_page_alloc /retval != 0/ { printf("%d\n",
> retval); }'
> 
> Should be
> 
> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ { printf("%d\n",
> retval); }'
> 
> BR, Jarkko

Thanks let me check and get back to you.

Regards, Vijay

Haitao Huang Aug. 12, 2022, 2:29 a.m. UTC | #15

Hi Jarkko

On Wed, 10 Aug 2022 20:50:54 -0500, Jarkko Sakkinen <jarkko@kernel.org>  
wrote:

> On Thu, Aug 11, 2022 at 04:36:57AM +0300, Jarkko Sakkinen wrote:
>> On Thu, Aug 11, 2022 at 04:01:15AM +0300, Jarkko Sakkinen wrote:
>> > On Wed, Aug 10, 2022 at 12:09:56AM +0000, Dhanraj, Vijay wrote:
>> > >
>> > >
>> > > > -----Original Message-----
>> > > > From: Jarkko Sakkinen <jarkko@kernel.org>
>> > > > Sent: Tuesday, August 9, 2022 11:53 AM
>> > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
>> > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
>> > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang,  
>> Haitao
>> > > > <haitao.huang@intel.com>
>> > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
>> > > >
>> > > > On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
>> > > > >
>> > > > >
>> > > > > > -----Original Message-----
>> > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
>> > > > > > Sent: Tuesday, August 9, 2022 9:10 AM
>> > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
>> > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
>> > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;  
>> Huang,
>> > > > > > Haitao <haitao.huang@intel.com>
>> > > > > > Subject: Re: [PATCH] Add SGX selftest  
>> `augment_via_eaccept_long`
>> > > > > >
>> > > > > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen  
>> wrote:
>> > > > > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen  
>> wrote:
>> > > > > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay  
>> wrote:
>> > > > > > > > >
>> > > > > > > > > > -----Original Message-----
>> > > > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
>> > > > > > > > > > Sent: Monday, August 8, 2022 5:18 AM
>> > > > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
>> > > > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
>> > > > > > > > > > <reinette.chatre@intel.com>;  
>> dave.hansen@linux.intel.com;
>> > > > > > > > > > Huang, Haitao <haitao.huang@intel.com>
>> > > > > > > > > > Subject: Re: [PATCH] Add SGX selftest
>> > > > > > > > > > `augment_via_eaccept_long`
>> > > > > > > > > >
>> > > > > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
>> > > > > > > > > > vijay.dhanraj@intel.com
>> > > > > > wrote:
>> > > > > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
>> > > > > > > > > > >
>> > > > > > > > > > > This commit adds a new test case which is same as
>> > > > > > > > > > > `augment_via_eaccept` but adds more number of EPC  
>> pages to
>> > > > > > > > > > > stress test
>> > > > > > > > > > `EAUG` via `EACCEPT`.
>> > > > > > > > > > >
>> > > > > > > > > > > Signed-off-by: Vijay Dhanraj  
>> <vijay.dhanraj@intel.com>
>> > > > > > > > > > > Signed-off-by: Haitao Huang  
>> <haitao.huang@linux.intel.com>
>> > > > > > > > > >
>> > > > > > > > > > Hey, to reproduce the original issue: does it  
>> reproduce on
>> > > > > > > > > > VM or should I run baremetal kernel?
>> > > > > > > > > >
>> > > > > > > > > > BR, Jarkko
>> > > > > > > > >
>> > > > > > > > > Hi Jarkko, The issue should be reproducible on  
>> baremetal kernel.
>> > > > > > > >
>> > > > > > > > Thanks.
>> > > > > > >
>> > > > > > > I need comment out other tests in order to make sane out of  
>> this
>> > > > > > > :-)
>> > > > > > >
>> > > > > > > Mentioning this because came into realization that stress  
>> tests
>> > > > > > > should be IMHO moved each to a separate binary (so that  
>> they can
>> > > > > > > be run separately). Just a note (TODO) to myself.
>> > > > > > >
>> > > > > > > I'll work on this today again and *possibly* split your  
>> test to
>> > > > > > > its own application to get a starting point for  
>> forementioned.
>> > > > > >
>> > > > > > I got
>> > > > > >
>> > > > > > #  RUN           enclave.augment_via_eaccept_long ...
>> > > > > > # main.c:1241:augment_via_eaccept_long:test enclave:  
>> total_size =
>> > > > > > 8192,
>> > > > > > seg->size = 8192 # main.c:1241:augment_via_eaccept_long:test  
>> enclave:
>> > > > > > total_size = 12288, seg->size = 4096 #
>> > > > > > main.c:1241:augment_via_eaccept_long:test enclave: total_size  
>> =
>> > > > > > 36864,
>> > > > > > seg->size = 24576 # main.c:1241:augment_via_eaccept_long:test  
>> enclave:
>> > > > > > total_size = 40960, seg->size = 4096 #
>> > > > > > main.c:1259:augment_via_eaccept_long:mmaping pages at end of
>> > > > enclave...
>> > > > > > # main.c:1273:augment_via_eaccept_long:Entering enclave to run
>> > > > > > EACCEPT for each page of 8589934592 bytes may take a while ...
>> > > > > > #            OK  enclave.augment_via_eaccept_long
>> > > > > >
>> > > > > > The CPU used for testing was according to /proc/cpuinfo:
>> > > > > >
>> > > > > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
>> > > > > >
>> > > > > > I have couple of queries:
>> > > > > >
>> > > > > > 1. Is it possible to get dmesg output?
>> > > > > I did check the dmesg output but couldn't find anything related  
>> to the
>> > > > failure. Just the general log messages.
>> > > > >
>> > > > > > 2. Do I have to repeat the test multiple times, or does it
>> > > > > >    occur unconditionaly?
>> > > > > >
>> > > > > I was able to repro every time but it was a bit sporadic for  
>> Haitao.
>> > > > >
>> > > > > > BR, Jarkko
>> > > > >
>> > > > > Also, did you set the PRMRR size to 2GB per socket in the BIOS?  
>> The
>> > > > > issue is only reproduced for oversubscribed scenario. When I  
>> set my
>> > > > > PRMRR to 64GB per socket, I wasn't able to repro the issue.
>> > > >
>> > > > I need to revisit this.
>> > > >
>> > > > Can you simply run test_sgx with gdb and see where it hits?
>> > > > HOST_CFLAGS has apparently "-g" already.
>> > > >
>> > > > > Regards, Vijay
>> > > >
>> > > > BR, Jarkko
>> > >
>> > > I am able to repro the issue when I reduce the PRMRR to 2B/socket  
>> but not but not able to break on the assertion failure with gdb. I also  
>> enabled debug attribute in the secs but still no avail. Anything I am  
>> missing here?
>> > >
>> > > diff --git a/tools/testing/selftests/sgx/load.c  
>> b/tools/testing/selftests/sgx/load.c
>> > > index 7de1b15c90b1..c4bccd3f5f17 100644
>> > > --- a/tools/testing/selftests/sgx/load.c
>> > > +++ b/tools/testing/selftests/sgx/load.c
>> > > @@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl *encl)
>> > >
>> > >         memset(secs, 0, sizeof(*secs));
>> > >         secs->ssa_frame_size = 1;
>> > > -       secs->attributes = SGX_ATTR_MODE64BIT;
>> > > +       secs->attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_DEBUG;
>> > >         secs->xfrm = 3;
>> > >         secs->base = encl->encl_base;
>> > >         secs->size = encl->encl_size;
>> > >
>> > > Regards, Vijay
>> >
>> > I get also full pass with 2GB configuration (and also observed that
>> > kselftest runs much faster with this configuration).
>> >
>> > But I looked at sgx_alloc_epc_page() and saw this:
>> >
>> >                if (list_empty(&sgx_active_page_list))
>> >                         return ERR_PTR(-ENOMEM);
>> >
>> >                 if (!reclaim) {
>> >                         page = ERR_PTR(-EBUSY);
>> >                         break;
>> >                 }
>> >
>> > In sgx_vma_fault(), when running completely out of reclaimable pages,  
>> this
>> > causes VM_FAULT_SIGBUS returned instead of VM_FAULT_NOPAGE:
>> >
>> > 	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
>> > 	if (IS_ERR(entry)) {
>> > 		mutex_unlock(&encl->lock);
>> >
>> > 		if (PTR_ERR(entry) == -EBUSY)
>> > 			return VM_FAULT_NOPAGE;
>> >
>> > 		return VM_FAULT_SIGBUS;
>> > 	}
>> >
>> > Not sure if those should be re-ordered that would keep the process  
>> stuck up
>> > until there is something to reclaim. Now we use NOPAGE to address  
>> situation
>> > when there is actually something to reclaim but because of locking  
>> side of
>> > things we pass reclaim=false to sgx_alloc_epc_page().
>> >
>> > So this is kind of OOM behaviour how it works now instead of stalling
>> > processes.
>>
>> Right, I looked at the original email at was really a page fault
>> that was catched. The above theory cannot possibly hold, as the
>> process does not exit with a bus error.
>>
>> I looked next to sgx_encl_eaug_page(), and found this:
>>
>>         encl_page = sgx_encl_page_alloc(encl, addr - encl->base,  
>> secinfo_flags);
>>         if (IS_ERR(encl_page))
>>                 return VM_FAULT_OOM;
>>
>> This is AFAIK the only code path in sgx_vma_fault() flow that
>> allocates non-EPC memory, and the code paths where EPC allocation
>> fails the result would be SIGBUS.
>>
>> So perhaps allocation is failing here. You could pretty easily
>> trace allocations with bpftrace and kretprobe to see if this is
>> what is happening, e.g. in one terminal:
>>
>> sudo bpftrace -e 'kr:sgx_encl_page_alloc /retval != 0/ { printf("%d\n",  
>> retval); }'
>
> Should be
>
> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ {  
> printf("%d\n", retval); }'
>
> BR, Jarkko

I tried these probs and got following results when failure happens (not  
always happen on my device).

sudo bpftrace -e 'kr:sgx_encl_page_alloc /(int64)retval <0 / {  
printf("%X\n", retval); }'

--> lots of negative values, I believe they are valid addresses in  
unsigned long type. So I looked up IS_ERR_VALUE macro and translated it in  
following probes.

sudo bpftrace -e 'kr:sgx_encl_page_alloc /(uint64)retval >=  
(uint64)(-4095)/ { printf("%X\n", retval); }'

--> none triggered

sudo bpftrace -e 'kr:sgx_alloc_epc_page /(uint64)retval >=  
(uint64)(-4095)/ { printf("%X\n", retval); }'

--> FFFFFFF0 printed, which I believe is -EBUSY.

BR
Haitao

Dhanraj, Vijay Aug. 12, 2022, 3:23 a.m. UTC | #16

> -----Original Message-----
> From: Haitao Huang <haitao.huang@linux.intel.com>
> Sent: Thursday, August 11, 2022 7:30 PM
> To: Dhanraj, Vijay <vijay.dhanraj@intel.com>; Jarkko Sakkinen
> <jarkko@kernel.org>
> Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> <haitao.huang@intel.com>
> Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> 
> Hi Jarkko
> 
> On Wed, 10 Aug 2022 20:50:54 -0500, Jarkko Sakkinen <jarkko@kernel.org>
> wrote:
> 
> > On Thu, Aug 11, 2022 at 04:36:57AM +0300, Jarkko Sakkinen wrote:
> >> On Thu, Aug 11, 2022 at 04:01:15AM +0300, Jarkko Sakkinen wrote:
> >> > On Wed, Aug 10, 2022 at 12:09:56AM +0000, Dhanraj, Vijay wrote:
> >> > >
> >> > >
> >> > > > -----Original Message-----
> >> > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> >> > > > Sent: Tuesday, August 9, 2022 11:53 AM
> >> > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> >> > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> >> > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> >> > > > Huang,
> >> Haitao
> >> > > > <haitao.huang@intel.com>
> >> > > > Subject: Re: [PATCH] Add SGX selftest
> >> > > > `augment_via_eaccept_long`
> >> > > >
> >> > > > On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
> >> > > > >
> >> > > > >
> >> > > > > > -----Original Message-----
> >> > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> >> > > > > > Sent: Tuesday, August 9, 2022 9:10 AM
> >> > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> >> > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> >> > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> >> Huang,
> >> > > > > > Haitao <haitao.huang@intel.com>
> >> > > > > > Subject: Re: [PATCH] Add SGX selftest
> >> `augment_via_eaccept_long`
> >> > > > > >
> >> > > > > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen
> >> wrote:
> >> > > > > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen
> >> wrote:
> >> > > > > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj,
> >> > > > > > > > Vijay
> >> wrote:
> >> > > > > > > > >
> >> > > > > > > > > > -----Original Message-----
> >> > > > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> >> > > > > > > > > > Sent: Monday, August 8, 2022 5:18 AM
> >> > > > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> >> > > > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> >> > > > > > > > > > <reinette.chatre@intel.com>;
> >> dave.hansen@linux.intel.com;
> >> > > > > > > > > > Huang, Haitao <haitao.huang@intel.com>
> >> > > > > > > > > > Subject: Re: [PATCH] Add SGX selftest
> >> > > > > > > > > > `augment_via_eaccept_long`
> >> > > > > > > > > >
> >> > > > > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
> >> > > > > > > > > > vijay.dhanraj@intel.com
> >> > > > > > wrote:
> >> > > > > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> >> > > > > > > > > > >
> >> > > > > > > > > > > This commit adds a new test case which is same as
> >> > > > > > > > > > > `augment_via_eaccept` but adds more number of EPC
> >> pages to
> >> > > > > > > > > > > stress test
> >> > > > > > > > > > `EAUG` via `EACCEPT`.
> >> > > > > > > > > > >
> >> > > > > > > > > > > Signed-off-by: Vijay Dhanraj
> >> <vijay.dhanraj@intel.com>
> >> > > > > > > > > > > Signed-off-by: Haitao Huang
> >> <haitao.huang@linux.intel.com>
> >> > > > > > > > > >
> >> > > > > > > > > > Hey, to reproduce the original issue: does it
> >> reproduce on
> >> > > > > > > > > > VM or should I run baremetal kernel?
> >> > > > > > > > > >
> >> > > > > > > > > > BR, Jarkko
> >> > > > > > > > >
> >> > > > > > > > > Hi Jarkko, The issue should be reproducible on
> >> baremetal kernel.
> >> > > > > > > >
> >> > > > > > > > Thanks.
> >> > > > > > >
> >> > > > > > > I need comment out other tests in order to make sane out
> >> > > > > > > of
> >> this
> >> > > > > > > :-)
> >> > > > > > >
> >> > > > > > > Mentioning this because came into realization that stress
> >> tests
> >> > > > > > > should be IMHO moved each to a separate binary (so that
> >> they can
> >> > > > > > > be run separately). Just a note (TODO) to myself.
> >> > > > > > >
> >> > > > > > > I'll work on this today again and *possibly* split your
> >> test to
> >> > > > > > > its own application to get a starting point for
> >> forementioned.
> >> > > > > >
> >> > > > > > I got
> >> > > > > >
> >> > > > > > #  RUN           enclave.augment_via_eaccept_long ...
> >> > > > > > # main.c:1241:augment_via_eaccept_long:test enclave:
> >> total_size =
> >> > > > > > 8192,
> >> > > > > > seg->size = 8192 #
> >> > > > > > seg->main.c:1241:augment_via_eaccept_long:test
> >> enclave:
> >> > > > > > total_size = 12288, seg->size = 4096 #
> >> > > > > > main.c:1241:augment_via_eaccept_long:test enclave:
> >> > > > > > total_size
> >> =
> >> > > > > > 36864,
> >> > > > > > seg->size = 24576 #
> >> > > > > > seg->main.c:1241:augment_via_eaccept_long:test
> >> enclave:
> >> > > > > > total_size = 40960, seg->size = 4096 #
> >> > > > > > main.c:1259:augment_via_eaccept_long:mmaping pages at end
> >> > > > > > of
> >> > > > enclave...
> >> > > > > > # main.c:1273:augment_via_eaccept_long:Entering enclave to
> >> > > > > > run EACCEPT for each page of 8589934592 bytes may take a while
> ...
> >> > > > > > #            OK  enclave.augment_via_eaccept_long
> >> > > > > >
> >> > > > > > The CPU used for testing was according to /proc/cpuinfo:
> >> > > > > >
> >> > > > > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> >> > > > > >
> >> > > > > > I have couple of queries:
> >> > > > > >
> >> > > > > > 1. Is it possible to get dmesg output?
> >> > > > > I did check the dmesg output but couldn't find anything
> >> > > > > related
> >> to the
> >> > > > failure. Just the general log messages.
> >> > > > >
> >> > > > > > 2. Do I have to repeat the test multiple times, or does it
> >> > > > > >    occur unconditionaly?
> >> > > > > >
> >> > > > > I was able to repro every time but it was a bit sporadic for
> >> Haitao.
> >> > > > >
> >> > > > > > BR, Jarkko
> >> > > > >
> >> > > > > Also, did you set the PRMRR size to 2GB per socket in the BIOS?
> >> The
> >> > > > > issue is only reproduced for oversubscribed scenario. When I
> >> set my
> >> > > > > PRMRR to 64GB per socket, I wasn't able to repro the issue.
> >> > > >
> >> > > > I need to revisit this.
> >> > > >
> >> > > > Can you simply run test_sgx with gdb and see where it hits?
> >> > > > HOST_CFLAGS has apparently "-g" already.
> >> > > >
> >> > > > > Regards, Vijay
> >> > > >
> >> > > > BR, Jarkko
> >> > >
> >> > > I am able to repro the issue when I reduce the PRMRR to 2B/socket
> >> but not but not able to break on the assertion failure with gdb. I
> >> also enabled debug attribute in the secs but still no avail. Anything
> >> I am missing here?
> >> > >
> >> > > diff --git a/tools/testing/selftests/sgx/load.c
> >> b/tools/testing/selftests/sgx/load.c
> >> > > index 7de1b15c90b1..c4bccd3f5f17 100644
> >> > > --- a/tools/testing/selftests/sgx/load.c
> >> > > +++ b/tools/testing/selftests/sgx/load.c
> >> > > @@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl *encl)
> >> > >
> >> > >         memset(secs, 0, sizeof(*secs));
> >> > >         secs->ssa_frame_size = 1;
> >> > > -       secs->attributes = SGX_ATTR_MODE64BIT;
> >> > > +       secs->attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_DEBUG;
> >> > >         secs->xfrm = 3;
> >> > >         secs->base = encl->encl_base;
> >> > >         secs->size = encl->encl_size;
> >> > >
> >> > > Regards, Vijay
> >> >
> >> > I get also full pass with 2GB configuration (and also observed that
> >> > kselftest runs much faster with this configuration).
> >> >
> >> > But I looked at sgx_alloc_epc_page() and saw this:
> >> >
> >> >                if (list_empty(&sgx_active_page_list))
> >> >                         return ERR_PTR(-ENOMEM);
> >> >
> >> >                 if (!reclaim) {
> >> >                         page = ERR_PTR(-EBUSY);
> >> >                         break;
> >> >                 }
> >> >
> >> > In sgx_vma_fault(), when running completely out of reclaimable
> >> > pages,
> >> this
> >> > causes VM_FAULT_SIGBUS returned instead of VM_FAULT_NOPAGE:
> >> >
> >> > 	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
> >> > 	if (IS_ERR(entry)) {
> >> > 		mutex_unlock(&encl->lock);
> >> >
> >> > 		if (PTR_ERR(entry) == -EBUSY)
> >> > 			return VM_FAULT_NOPAGE;
> >> >
> >> > 		return VM_FAULT_SIGBUS;
> >> > 	}
> >> >
> >> > Not sure if those should be re-ordered that would keep the process
> >> stuck up
> >> > until there is something to reclaim. Now we use NOPAGE to address
> >> situation
> >> > when there is actually something to reclaim but because of locking
> >> side of
> >> > things we pass reclaim=false to sgx_alloc_epc_page().
> >> >
> >> > So this is kind of OOM behaviour how it works now instead of
> >> > stalling processes.
> >>
> >> Right, I looked at the original email at was really a page fault that
> >> was catched. The above theory cannot possibly hold, as the process
> >> does not exit with a bus error.
> >>
> >> I looked next to sgx_encl_eaug_page(), and found this:
> >>
> >>         encl_page = sgx_encl_page_alloc(encl, addr - encl->base,
> >> secinfo_flags);
> >>         if (IS_ERR(encl_page))
> >>                 return VM_FAULT_OOM;
> >>
> >> This is AFAIK the only code path in sgx_vma_fault() flow that
> >> allocates non-EPC memory, and the code paths where EPC allocation
> >> fails the result would be SIGBUS.
> >>
> >> So perhaps allocation is failing here. You could pretty easily trace
> >> allocations with bpftrace and kretprobe to see if this is what is
> >> happening, e.g. in one terminal:
> >>
> >> sudo bpftrace -e 'kr:sgx_encl_page_alloc /retval != 0/ {
> >> printf("%d\n", retval); }'
> >
> > Should be
> >
> > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ {
> > printf("%d\n", retval); }'
> >
> > BR, Jarkko
> 
> I tried these probs and got following results when failure happens (not
> always happen on my device).
> 
> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(int64)retval <0 / { printf("%X\n",
> retval); }'
> 
> --> lots of negative values, I believe they are valid addresses in
> unsigned long type. So I looked up IS_ERR_VALUE macro and translated it in
> following probes.
> 
> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(uint64)retval >= (uint64)(-4095)/ {
> printf("%X\n", retval); }'
> 
> --> none triggered
> 
> sudo bpftrace -e 'kr:sgx_alloc_epc_page /(uint64)retval >= (uint64)(-4095)/ {
> printf("%X\n", retval); }'
> 
> --> FFFFFFF0 printed, which I believe is -EBUSY.
> 
> BR
> Haitao

I see the same behavior as Haitao. 
sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ { printf("%d\n", retval); } -> This one gave an error stdin:1:24-31: ERROR: Unknown struct/union: 'long'  

So switched to sudo bpftrace -e 'kr:sgx_encl_page_alloc /(int64)retval <0 / { printf("%X\n", retval); }' as suggested by Haitao and do see lot of positive and negative values.

sudo bpftrace -e 'kr:sgx_encl_page_alloc /(uint64)retval >= (uint64)(-4095)/ { printf("%X\n", retval); }' -> No output.

sudo bpftrace -e 'kr:sgx_alloc_epc_page /(uint64)retval >= (uint64)(-4095)/ { printf("%X\n", retval); } -> FFFFFFF0 is printed.

Thanks, Vijay

Haitao Huang Aug. 12, 2022, 5:47 a.m. UTC | #17

On Thu, 11 Aug 2022 21:29:42 -0500, Haitao Huang  
<haitao.huang@linux.intel.com> wrote:

> Hi Jarkko
>
> On Wed, 10 Aug 2022 20:50:54 -0500, Jarkko Sakkinen <jarkko@kernel.org>  
> wrote:
>
>> On Thu, Aug 11, 2022 at 04:36:57AM +0300, Jarkko Sakkinen wrote:
>>> On Thu, Aug 11, 2022 at 04:01:15AM +0300, Jarkko Sakkinen wrote:
>>> > On Wed, Aug 10, 2022 at 12:09:56AM +0000, Dhanraj, Vijay wrote:
>>> > >
>>> > >
>>> > > > -----Original Message-----
>>> > > > From: Jarkko Sakkinen <jarkko@kernel.org>
>>> > > > Sent: Tuesday, August 9, 2022 11:53 AM
>>> > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
>>> > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
>>> > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang,  
>>> Haitao
>>> > > > <haitao.huang@intel.com>
>>> > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
>>> > > >
>>> > > > On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
>>> > > > >
>>> > > > >
>>> > > > > > -----Original Message-----
>>> > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
>>> > > > > > Sent: Tuesday, August 9, 2022 9:10 AM
>>> > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
>>> > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
>>> > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;  
>>> Huang,
>>> > > > > > Haitao <haitao.huang@intel.com>
>>> > > > > > Subject: Re: [PATCH] Add SGX selftest  
>>> `augment_via_eaccept_long`
>>> > > > > >
>>> > > > > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen  
>>> wrote:
>>> > > > > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen  
>>> wrote:
>>> > > > > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj, Vijay  
>>> wrote:
>>> > > > > > > > >
>>> > > > > > > > > > -----Original Message-----
>>> > > > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
>>> > > > > > > > > > Sent: Monday, August 8, 2022 5:18 AM
>>> > > > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
>>> > > > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
>>> > > > > > > > > > <reinette.chatre@intel.com>;  
>>> dave.hansen@linux.intel.com;
>>> > > > > > > > > > Huang, Haitao <haitao.huang@intel.com>
>>> > > > > > > > > > Subject: Re: [PATCH] Add SGX selftest
>>> > > > > > > > > > `augment_via_eaccept_long`
>>> > > > > > > > > >
>>> > > > > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
>>> > > > > > > > > > vijay.dhanraj@intel.com
>>> > > > > > wrote:
>>> > > > > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
>>> > > > > > > > > > >
>>> > > > > > > > > > > This commit adds a new test case which is same as
>>> > > > > > > > > > > `augment_via_eaccept` but adds more number of EPC  
>>> pages to
>>> > > > > > > > > > > stress test
>>> > > > > > > > > > `EAUG` via `EACCEPT`.
>>> > > > > > > > > > >
>>> > > > > > > > > > > Signed-off-by: Vijay Dhanraj  
>>> <vijay.dhanraj@intel.com>
>>> > > > > > > > > > > Signed-off-by: Haitao Huang  
>>> <haitao.huang@linux.intel.com>
>>> > > > > > > > > >
>>> > > > > > > > > > Hey, to reproduce the original issue: does it  
>>> reproduce on
>>> > > > > > > > > > VM or should I run baremetal kernel?
>>> > > > > > > > > >
>>> > > > > > > > > > BR, Jarkko
>>> > > > > > > > >
>>> > > > > > > > > Hi Jarkko, The issue should be reproducible on  
>>> baremetal kernel.
>>> > > > > > > >
>>> > > > > > > > Thanks.
>>> > > > > > >
>>> > > > > > > I need comment out other tests in order to make sane out  
>>> of this
>>> > > > > > > :-)
>>> > > > > > >
>>> > > > > > > Mentioning this because came into realization that stress  
>>> tests
>>> > > > > > > should be IMHO moved each to a separate binary (so that  
>>> they can
>>> > > > > > > be run separately). Just a note (TODO) to myself.
>>> > > > > > >
>>> > > > > > > I'll work on this today again and *possibly* split your  
>>> test to
>>> > > > > > > its own application to get a starting point for  
>>> forementioned.
>>> > > > > >
>>> > > > > > I got
>>> > > > > >
>>> > > > > > #  RUN           enclave.augment_via_eaccept_long ...
>>> > > > > > # main.c:1241:augment_via_eaccept_long:test enclave:  
>>> total_size =
>>> > > > > > 8192,
>>> > > > > > seg->size = 8192 # main.c:1241:augment_via_eaccept_long:test  
>>> enclave:
>>> > > > > > total_size = 12288, seg->size = 4096 #
>>> > > > > > main.c:1241:augment_via_eaccept_long:test enclave:  
>>> total_size =
>>> > > > > > 36864,
>>> > > > > > seg->size = 24576 #  
>>> main.c:1241:augment_via_eaccept_long:test enclave:
>>> > > > > > total_size = 40960, seg->size = 4096 #
>>> > > > > > main.c:1259:augment_via_eaccept_long:mmaping pages at end of
>>> > > > enclave...
>>> > > > > > # main.c:1273:augment_via_eaccept_long:Entering enclave to  
>>> run
>>> > > > > > EACCEPT for each page of 8589934592 bytes may take a while  
>>> ...
>>> > > > > > #            OK  enclave.augment_via_eaccept_long
>>> > > > > >
>>> > > > > > The CPU used for testing was according to /proc/cpuinfo:
>>> > > > > >
>>> > > > > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
>>> > > > > >
>>> > > > > > I have couple of queries:
>>> > > > > >
>>> > > > > > 1. Is it possible to get dmesg output?
>>> > > > > I did check the dmesg output but couldn't find anything  
>>> related to the
>>> > > > failure. Just the general log messages.
>>> > > > >
>>> > > > > > 2. Do I have to repeat the test multiple times, or does it
>>> > > > > >    occur unconditionaly?
>>> > > > > >
>>> > > > > I was able to repro every time but it was a bit sporadic for  
>>> Haitao.
>>> > > > >
>>> > > > > > BR, Jarkko
>>> > > > >
>>> > > > > Also, did you set the PRMRR size to 2GB per socket in the  
>>> BIOS? The
>>> > > > > issue is only reproduced for oversubscribed scenario. When I  
>>> set my
>>> > > > > PRMRR to 64GB per socket, I wasn't able to repro the issue.
>>> > > >
>>> > > > I need to revisit this.
>>> > > >
>>> > > > Can you simply run test_sgx with gdb and see where it hits?
>>> > > > HOST_CFLAGS has apparently "-g" already.
>>> > > >
>>> > > > > Regards, Vijay
>>> > > >
>>> > > > BR, Jarkko
>>> > >
>>> > > I am able to repro the issue when I reduce the PRMRR to 2B/socket  
>>> but not but not able to break on the assertion failure with gdb. I  
>>> also enabled debug attribute in the secs but still no avail. Anything  
>>> I am missing here?
>>> > >
>>> > > diff --git a/tools/testing/selftests/sgx/load.c  
>>> b/tools/testing/selftests/sgx/load.c
>>> > > index 7de1b15c90b1..c4bccd3f5f17 100644
>>> > > --- a/tools/testing/selftests/sgx/load.c
>>> > > +++ b/tools/testing/selftests/sgx/load.c
>>> > > @@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl *encl)
>>> > >
>>> > >         memset(secs, 0, sizeof(*secs));
>>> > >         secs->ssa_frame_size = 1;
>>> > > -       secs->attributes = SGX_ATTR_MODE64BIT;
>>> > > +       secs->attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_DEBUG;
>>> > >         secs->xfrm = 3;
>>> > >         secs->base = encl->encl_base;
>>> > >         secs->size = encl->encl_size;
>>> > >
>>> > > Regards, Vijay
>>> >
>>> > I get also full pass with 2GB configuration (and also observed that
>>> > kselftest runs much faster with this configuration).
>>> >
>>> > But I looked at sgx_alloc_epc_page() and saw this:
>>> >
>>> >                if (list_empty(&sgx_active_page_list))
>>> >                         return ERR_PTR(-ENOMEM);
>>> >
>>> >                 if (!reclaim) {
>>> >                         page = ERR_PTR(-EBUSY);
>>> >                         break;
>>> >                 }
>>> >
>>> > In sgx_vma_fault(), when running completely out of reclaimable  
>>> pages, this
>>> > causes VM_FAULT_SIGBUS returned instead of VM_FAULT_NOPAGE:
>>> >
>>> > 	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
>>> > 	if (IS_ERR(entry)) {
>>> > 		mutex_unlock(&encl->lock);
>>> >
>>> > 		if (PTR_ERR(entry) == -EBUSY)
>>> > 			return VM_FAULT_NOPAGE;
>>> >
>>> > 		return VM_FAULT_SIGBUS;
>>> > 	}
>>> >
>>> > Not sure if those should be re-ordered that would keep the process  
>>> stuck up
>>> > until there is something to reclaim. Now we use NOPAGE to address  
>>> situation
>>> > when there is actually something to reclaim but because of locking  
>>> side of
>>> > things we pass reclaim=false to sgx_alloc_epc_page().
>>> >
>>> > So this is kind of OOM behaviour how it works now instead of stalling
>>> > processes.
>>>
>>> Right, I looked at the original email at was really a page fault
>>> that was catched. The above theory cannot possibly hold, as the
>>> process does not exit with a bus error.
>>>
>>> I looked next to sgx_encl_eaug_page(), and found this:
>>>
>>>         encl_page = sgx_encl_page_alloc(encl, addr - encl->base,  
>>> secinfo_flags);
>>>         if (IS_ERR(encl_page))
>>>                 return VM_FAULT_OOM;
>>>
>>> This is AFAIK the only code path in sgx_vma_fault() flow that
>>> allocates non-EPC memory, and the code paths where EPC allocation
>>> fails the result would be SIGBUS.
>>>
>>> So perhaps allocation is failing here. You could pretty easily
>>> trace allocations with bpftrace and kretprobe to see if this is
>>> what is happening, e.g. in one terminal:
>>>
>>> sudo bpftrace -e 'kr:sgx_encl_page_alloc /retval != 0/ {  
>>> printf("%d\n", retval); }'
>>
>> Should be
>>
>> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ {  
>> printf("%d\n", retval); }'
>>
>> BR, Jarkko
>
> I tried these probs and got following results when failure happens (not  
> always happen on my device).
>
> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(int64)retval <0 / {  
> printf("%X\n", retval); }'
>
> --> lots of negative values, I believe they are valid addresses in  
> unsigned long type. So I looked up IS_ERR_VALUE macro and translated it  
> in following probes.
>
> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(uint64)retval >=  
> (uint64)(-4095)/ { printf("%X\n", retval); }'
>
> --> none triggered
>
> sudo bpftrace -e 'kr:sgx_alloc_epc_page /(uint64)retval >=  
> (uint64)(-4095)/ { printf("%X\n", retval); }'
>
> --> FFFFFFF0 printed, which I believe is -EBUSY.
>

Using this probe:
  sudo bpftrace -e 'kr:sgx_encl_grow /(uint64)retval >= (uint64)(-4095)/ {  
printf("%lX\n", retval); }'

I found sgx_encl_grow could also return -EBUSY when allocating the VA page  
for the page to be EAUG'd.
But we did not handle the case in sgx_encl_eaug_page, and follow changes  
seem to fix the problem with limited tests:

diff --git a/arch/x86/kernel/cpu/sgx/encl.c  
b/arch/x86/kernel/cpu/sgx/encl.c
index 24c1bb8eb196..527de07c0e0b 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -345,7 +345,11 @@ static vm_fault_t sgx_encl_eaug_page(struct  
vm_area_struct *vma,

         va_page = sgx_encl_grow(encl, false);
         if (IS_ERR(va_page))
+    {
+               if (PTR_ERR(va_page) == -EBUSY)
+                       vmret =  VM_FAULT_NOPAGE;
                 goto err_out_epc;
+    }

         if (va_page)
                 list_add(&va_page->list, &encl->va_pages);

Will do more testing to verify. Let me know if the changes make sense.

Thanks
Haitao

Jarkko Sakkinen Aug. 14, 2022, 6:05 p.m. UTC | #18

On Thu, Aug 11, 2022 at 09:29:42PM -0500, Haitao Huang wrote:
> Hi Jarkko
> 
> On Wed, 10 Aug 2022 20:50:54 -0500, Jarkko Sakkinen <jarkko@kernel.org>
> wrote:
> 
> > On Thu, Aug 11, 2022 at 04:36:57AM +0300, Jarkko Sakkinen wrote:
> > > On Thu, Aug 11, 2022 at 04:01:15AM +0300, Jarkko Sakkinen wrote:
> > > > On Wed, Aug 10, 2022 at 12:09:56AM +0000, Dhanraj, Vijay wrote:
> > > > >
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > Sent: Tuesday, August 9, 2022 11:53 AM
> > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> > > Huang, Haitao
> > > > > > <haitao.huang@intel.com>
> > > > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > > > >
> > > > > > On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
> > > > > > >
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > > > Sent: Tuesday, August 9, 2022 9:10 AM
> > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> > > Huang,
> > > > > > > > Haitao <haitao.huang@intel.com>
> > > > > > > > Subject: Re: [PATCH] Add SGX selftest
> > > `augment_via_eaccept_long`
> > > > > > > >
> > > > > > > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen
> > > wrote:
> > > > > > > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko
> > > Sakkinen wrote:
> > > > > > > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj,
> > > Vijay wrote:
> > > > > > > > > > >
> > > > > > > > > > > > -----Original Message-----
> > > > > > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > > > > > > > Sent: Monday, August 8, 2022 5:18 AM
> > > > > > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > > > > > > > <reinette.chatre@intel.com>;
> > > dave.hansen@linux.intel.com;
> > > > > > > > > > > > Huang, Haitao <haitao.huang@intel.com>
> > > > > > > > > > > > Subject: Re: [PATCH] Add SGX selftest
> > > > > > > > > > > > `augment_via_eaccept_long`
> > > > > > > > > > > >
> > > > > > > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
> > > > > > > > > > > > vijay.dhanraj@intel.com
> > > > > > > > wrote:
> > > > > > > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > > > > > > >
> > > > > > > > > > > > > This commit adds a new test case which is same as
> > > > > > > > > > > > > `augment_via_eaccept` but adds more number of
> > > EPC pages to
> > > > > > > > > > > > > stress test
> > > > > > > > > > > > `EAUG` via `EACCEPT`.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Signed-off-by: Vijay Dhanraj
> > > <vijay.dhanraj@intel.com>
> > > > > > > > > > > > > Signed-off-by: Haitao Huang
> > > <haitao.huang@linux.intel.com>
> > > > > > > > > > > >
> > > > > > > > > > > > Hey, to reproduce the original issue: does it
> > > reproduce on
> > > > > > > > > > > > VM or should I run baremetal kernel?
> > > > > > > > > > > >
> > > > > > > > > > > > BR, Jarkko
> > > > > > > > > > >
> > > > > > > > > > > Hi Jarkko, The issue should be reproducible on
> > > baremetal kernel.
> > > > > > > > > >
> > > > > > > > > > Thanks.
> > > > > > > > >
> > > > > > > > > I need comment out other tests in order to make sane out
> > > of this
> > > > > > > > > :-)
> > > > > > > > >
> > > > > > > > > Mentioning this because came into realization that
> > > stress tests
> > > > > > > > > should be IMHO moved each to a separate binary (so that
> > > they can
> > > > > > > > > be run separately). Just a note (TODO) to myself.
> > > > > > > > >
> > > > > > > > > I'll work on this today again and *possibly* split your
> > > test to
> > > > > > > > > its own application to get a starting point for
> > > forementioned.
> > > > > > > >
> > > > > > > > I got
> > > > > > > >
> > > > > > > > #  RUN           enclave.augment_via_eaccept_long ...
> > > > > > > > # main.c:1241:augment_via_eaccept_long:test enclave:
> > > total_size =
> > > > > > > > 8192,
> > > > > > > > seg->size = 8192 #
> > > main.c:1241:augment_via_eaccept_long:test enclave:
> > > > > > > > total_size = 12288, seg->size = 4096 #
> > > > > > > > main.c:1241:augment_via_eaccept_long:test enclave:
> > > total_size =
> > > > > > > > 36864,
> > > > > > > > seg->size = 24576 #
> > > main.c:1241:augment_via_eaccept_long:test enclave:
> > > > > > > > total_size = 40960, seg->size = 4096 #
> > > > > > > > main.c:1259:augment_via_eaccept_long:mmaping pages at end of
> > > > > > enclave...
> > > > > > > > # main.c:1273:augment_via_eaccept_long:Entering enclave to run
> > > > > > > > EACCEPT for each page of 8589934592 bytes may take a while ...
> > > > > > > > #            OK  enclave.augment_via_eaccept_long
> > > > > > > >
> > > > > > > > The CPU used for testing was according to /proc/cpuinfo:
> > > > > > > >
> > > > > > > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> > > > > > > >
> > > > > > > > I have couple of queries:
> > > > > > > >
> > > > > > > > 1. Is it possible to get dmesg output?
> > > > > > > I did check the dmesg output but couldn't find anything
> > > related to the
> > > > > > failure. Just the general log messages.
> > > > > > >
> > > > > > > > 2. Do I have to repeat the test multiple times, or does it
> > > > > > > >    occur unconditionaly?
> > > > > > > >
> > > > > > > I was able to repro every time but it was a bit sporadic for
> > > Haitao.
> > > > > > >
> > > > > > > > BR, Jarkko
> > > > > > >
> > > > > > > Also, did you set the PRMRR size to 2GB per socket in the
> > > BIOS? The
> > > > > > > issue is only reproduced for oversubscribed scenario. When I
> > > set my
> > > > > > > PRMRR to 64GB per socket, I wasn't able to repro the issue.
> > > > > >
> > > > > > I need to revisit this.
> > > > > >
> > > > > > Can you simply run test_sgx with gdb and see where it hits?
> > > > > > HOST_CFLAGS has apparently "-g" already.
> > > > > >
> > > > > > > Regards, Vijay
> > > > > >
> > > > > > BR, Jarkko
> > > > >
> > > > > I am able to repro the issue when I reduce the PRMRR to
> > > 2B/socket but not but not able to break on the assertion failure
> > > with gdb. I also enabled debug attribute in the secs but still no
> > > avail. Anything I am missing here?
> > > > >
> > > > > diff --git a/tools/testing/selftests/sgx/load.c
> > > b/tools/testing/selftests/sgx/load.c
> > > > > index 7de1b15c90b1..c4bccd3f5f17 100644
> > > > > --- a/tools/testing/selftests/sgx/load.c
> > > > > +++ b/tools/testing/selftests/sgx/load.c
> > > > > @@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl *encl)
> > > > >
> > > > >         memset(secs, 0, sizeof(*secs));
> > > > >         secs->ssa_frame_size = 1;
> > > > > -       secs->attributes = SGX_ATTR_MODE64BIT;
> > > > > +       secs->attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_DEBUG;
> > > > >         secs->xfrm = 3;
> > > > >         secs->base = encl->encl_base;
> > > > >         secs->size = encl->encl_size;
> > > > >
> > > > > Regards, Vijay
> > > >
> > > > I get also full pass with 2GB configuration (and also observed that
> > > > kselftest runs much faster with this configuration).
> > > >
> > > > But I looked at sgx_alloc_epc_page() and saw this:
> > > >
> > > >                if (list_empty(&sgx_active_page_list))
> > > >                         return ERR_PTR(-ENOMEM);
> > > >
> > > >                 if (!reclaim) {
> > > >                         page = ERR_PTR(-EBUSY);
> > > >                         break;
> > > >                 }
> > > >
> > > > In sgx_vma_fault(), when running completely out of reclaimable
> > > pages, this
> > > > causes VM_FAULT_SIGBUS returned instead of VM_FAULT_NOPAGE:
> > > >
> > > > 	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
> > > > 	if (IS_ERR(entry)) {
> > > > 		mutex_unlock(&encl->lock);
> > > >
> > > > 		if (PTR_ERR(entry) == -EBUSY)
> > > > 			return VM_FAULT_NOPAGE;
> > > >
> > > > 		return VM_FAULT_SIGBUS;
> > > > 	}
> > > >
> > > > Not sure if those should be re-ordered that would keep the process
> > > stuck up
> > > > until there is something to reclaim. Now we use NOPAGE to address
> > > situation
> > > > when there is actually something to reclaim but because of locking
> > > side of
> > > > things we pass reclaim=false to sgx_alloc_epc_page().
> > > >
> > > > So this is kind of OOM behaviour how it works now instead of stalling
> > > > processes.
> > > 
> > > Right, I looked at the original email at was really a page fault
> > > that was catched. The above theory cannot possibly hold, as the
> > > process does not exit with a bus error.
> > > 
> > > I looked next to sgx_encl_eaug_page(), and found this:
> > > 
> > >         encl_page = sgx_encl_page_alloc(encl, addr - encl->base,
> > > secinfo_flags);
> > >         if (IS_ERR(encl_page))
> > >                 return VM_FAULT_OOM;
> > > 
> > > This is AFAIK the only code path in sgx_vma_fault() flow that
> > > allocates non-EPC memory, and the code paths where EPC allocation
> > > fails the result would be SIGBUS.
> > > 
> > > So perhaps allocation is failing here. You could pretty easily
> > > trace allocations with bpftrace and kretprobe to see if this is
> > > what is happening, e.g. in one terminal:
> > > 
> > > sudo bpftrace -e 'kr:sgx_encl_page_alloc /retval != 0/ {
> > > printf("%d\n", retval); }'
> > 
> > Should be
> > 
> > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ {
> > printf("%d\n", retval); }'
> > 
> > BR, Jarkko
> 
> I tried these probs and got following results when failure happens (not
> always happen on my device).
> 
> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(int64)retval <0 / {
> printf("%X\n", retval); }'
> 
> --> lots of negative values, I believe they are valid addresses in unsigned
> long type. So I looked up IS_ERR_VALUE macro and translated it in following
> probes.
> 
> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(uint64)retval >= (uint64)(-4095)/
> { printf("%X\n", retval); }'

Thank you for refining the probe.

I'll add it to my collection:

https://github.com/jarkkojs/bpftrace-sgx

Despite the repository name it contains probes both for
SGX and AMD-SNP, and also TDX and ARMv9 in future once
I have ways to test them.

> 
> --> none triggered
> 
> sudo bpftrace -e 'kr:sgx_alloc_epc_page /(uint64)retval >= (uint64)(-4095)/
> { printf("%X\n", retval); }'
> 
> --> FFFFFFF0 printed, which I believe is -EBUSY.

EBUSY should be fine, it will be retrigger the #PF handler
through round-trip.

Did you still encounter segfaults?

BR, Jarkko

Jarkko Sakkinen Aug. 14, 2022, 6:08 p.m. UTC | #19

On Fri, Aug 12, 2022 at 03:23:26AM +0000, Dhanraj, Vijay wrote:
> 
> 
> > -----Original Message-----
> > From: Haitao Huang <haitao.huang@linux.intel.com>
> > Sent: Thursday, August 11, 2022 7:30 PM
> > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>; Jarkko Sakkinen
> > <jarkko@kernel.org>
> > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang, Haitao
> > <haitao.huang@intel.com>
> > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > 
> > Hi Jarkko
> > 
> > On Wed, 10 Aug 2022 20:50:54 -0500, Jarkko Sakkinen <jarkko@kernel.org>
> > wrote:
> > 
> > > On Thu, Aug 11, 2022 at 04:36:57AM +0300, Jarkko Sakkinen wrote:
> > >> On Thu, Aug 11, 2022 at 04:01:15AM +0300, Jarkko Sakkinen wrote:
> > >> > On Wed, Aug 10, 2022 at 12:09:56AM +0000, Dhanraj, Vijay wrote:
> > >> > >
> > >> > >
> > >> > > > -----Original Message-----
> > >> > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > >> > > > Sent: Tuesday, August 9, 2022 11:53 AM
> > >> > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > >> > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > >> > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> > >> > > > Huang,
> > >> Haitao
> > >> > > > <haitao.huang@intel.com>
> > >> > > > Subject: Re: [PATCH] Add SGX selftest
> > >> > > > `augment_via_eaccept_long`
> > >> > > >
> > >> > > > On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
> > >> > > > >
> > >> > > > >
> > >> > > > > > -----Original Message-----
> > >> > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > >> > > > > > Sent: Tuesday, August 9, 2022 9:10 AM
> > >> > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > >> > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > >> > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> > >> Huang,
> > >> > > > > > Haitao <haitao.huang@intel.com>
> > >> > > > > > Subject: Re: [PATCH] Add SGX selftest
> > >> `augment_via_eaccept_long`
> > >> > > > > >
> > >> > > > > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen
> > >> wrote:
> > >> > > > > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko Sakkinen
> > >> wrote:
> > >> > > > > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj,
> > >> > > > > > > > Vijay
> > >> wrote:
> > >> > > > > > > > >
> > >> > > > > > > > > > -----Original Message-----
> > >> > > > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > >> > > > > > > > > > Sent: Monday, August 8, 2022 5:18 AM
> > >> > > > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > >> > > > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > >> > > > > > > > > > <reinette.chatre@intel.com>;
> > >> dave.hansen@linux.intel.com;
> > >> > > > > > > > > > Huang, Haitao <haitao.huang@intel.com>
> > >> > > > > > > > > > Subject: Re: [PATCH] Add SGX selftest
> > >> > > > > > > > > > `augment_via_eaccept_long`
> > >> > > > > > > > > >
> > >> > > > > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
> > >> > > > > > > > > > vijay.dhanraj@intel.com
> > >> > > > > > wrote:
> > >> > > > > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > >> > > > > > > > > > >
> > >> > > > > > > > > > > This commit adds a new test case which is same as
> > >> > > > > > > > > > > `augment_via_eaccept` but adds more number of EPC
> > >> pages to
> > >> > > > > > > > > > > stress test
> > >> > > > > > > > > > `EAUG` via `EACCEPT`.
> > >> > > > > > > > > > >
> > >> > > > > > > > > > > Signed-off-by: Vijay Dhanraj
> > >> <vijay.dhanraj@intel.com>
> > >> > > > > > > > > > > Signed-off-by: Haitao Huang
> > >> <haitao.huang@linux.intel.com>
> > >> > > > > > > > > >
> > >> > > > > > > > > > Hey, to reproduce the original issue: does it
> > >> reproduce on
> > >> > > > > > > > > > VM or should I run baremetal kernel?
> > >> > > > > > > > > >
> > >> > > > > > > > > > BR, Jarkko
> > >> > > > > > > > >
> > >> > > > > > > > > Hi Jarkko, The issue should be reproducible on
> > >> baremetal kernel.
> > >> > > > > > > >
> > >> > > > > > > > Thanks.
> > >> > > > > > >
> > >> > > > > > > I need comment out other tests in order to make sane out
> > >> > > > > > > of
> > >> this
> > >> > > > > > > :-)
> > >> > > > > > >
> > >> > > > > > > Mentioning this because came into realization that stress
> > >> tests
> > >> > > > > > > should be IMHO moved each to a separate binary (so that
> > >> they can
> > >> > > > > > > be run separately). Just a note (TODO) to myself.
> > >> > > > > > >
> > >> > > > > > > I'll work on this today again and *possibly* split your
> > >> test to
> > >> > > > > > > its own application to get a starting point for
> > >> forementioned.
> > >> > > > > >
> > >> > > > > > I got
> > >> > > > > >
> > >> > > > > > #  RUN           enclave.augment_via_eaccept_long ...
> > >> > > > > > # main.c:1241:augment_via_eaccept_long:test enclave:
> > >> total_size =
> > >> > > > > > 8192,
> > >> > > > > > seg->size = 8192 #
> > >> > > > > > seg->main.c:1241:augment_via_eaccept_long:test
> > >> enclave:
> > >> > > > > > total_size = 12288, seg->size = 4096 #
> > >> > > > > > main.c:1241:augment_via_eaccept_long:test enclave:
> > >> > > > > > total_size
> > >> =
> > >> > > > > > 36864,
> > >> > > > > > seg->size = 24576 #
> > >> > > > > > seg->main.c:1241:augment_via_eaccept_long:test
> > >> enclave:
> > >> > > > > > total_size = 40960, seg->size = 4096 #
> > >> > > > > > main.c:1259:augment_via_eaccept_long:mmaping pages at end
> > >> > > > > > of
> > >> > > > enclave...
> > >> > > > > > # main.c:1273:augment_via_eaccept_long:Entering enclave to
> > >> > > > > > run EACCEPT for each page of 8589934592 bytes may take a while
> > ...
> > >> > > > > > #            OK  enclave.augment_via_eaccept_long
> > >> > > > > >
> > >> > > > > > The CPU used for testing was according to /proc/cpuinfo:
> > >> > > > > >
> > >> > > > > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> > >> > > > > >
> > >> > > > > > I have couple of queries:
> > >> > > > > >
> > >> > > > > > 1. Is it possible to get dmesg output?
> > >> > > > > I did check the dmesg output but couldn't find anything
> > >> > > > > related
> > >> to the
> > >> > > > failure. Just the general log messages.
> > >> > > > >
> > >> > > > > > 2. Do I have to repeat the test multiple times, or does it
> > >> > > > > >    occur unconditionaly?
> > >> > > > > >
> > >> > > > > I was able to repro every time but it was a bit sporadic for
> > >> Haitao.
> > >> > > > >
> > >> > > > > > BR, Jarkko
> > >> > > > >
> > >> > > > > Also, did you set the PRMRR size to 2GB per socket in the BIOS?
> > >> The
> > >> > > > > issue is only reproduced for oversubscribed scenario. When I
> > >> set my
> > >> > > > > PRMRR to 64GB per socket, I wasn't able to repro the issue.
> > >> > > >
> > >> > > > I need to revisit this.
> > >> > > >
> > >> > > > Can you simply run test_sgx with gdb and see where it hits?
> > >> > > > HOST_CFLAGS has apparently "-g" already.
> > >> > > >
> > >> > > > > Regards, Vijay
> > >> > > >
> > >> > > > BR, Jarkko
> > >> > >
> > >> > > I am able to repro the issue when I reduce the PRMRR to 2B/socket
> > >> but not but not able to break on the assertion failure with gdb. I
> > >> also enabled debug attribute in the secs but still no avail. Anything
> > >> I am missing here?
> > >> > >
> > >> > > diff --git a/tools/testing/selftests/sgx/load.c
> > >> b/tools/testing/selftests/sgx/load.c
> > >> > > index 7de1b15c90b1..c4bccd3f5f17 100644
> > >> > > --- a/tools/testing/selftests/sgx/load.c
> > >> > > +++ b/tools/testing/selftests/sgx/load.c
> > >> > > @@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl *encl)
> > >> > >
> > >> > >         memset(secs, 0, sizeof(*secs));
> > >> > >         secs->ssa_frame_size = 1;
> > >> > > -       secs->attributes = SGX_ATTR_MODE64BIT;
> > >> > > +       secs->attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_DEBUG;
> > >> > >         secs->xfrm = 3;
> > >> > >         secs->base = encl->encl_base;
> > >> > >         secs->size = encl->encl_size;
> > >> > >
> > >> > > Regards, Vijay
> > >> >
> > >> > I get also full pass with 2GB configuration (and also observed that
> > >> > kselftest runs much faster with this configuration).
> > >> >
> > >> > But I looked at sgx_alloc_epc_page() and saw this:
> > >> >
> > >> >                if (list_empty(&sgx_active_page_list))
> > >> >                         return ERR_PTR(-ENOMEM);
> > >> >
> > >> >                 if (!reclaim) {
> > >> >                         page = ERR_PTR(-EBUSY);
> > >> >                         break;
> > >> >                 }
> > >> >
> > >> > In sgx_vma_fault(), when running completely out of reclaimable
> > >> > pages,
> > >> this
> > >> > causes VM_FAULT_SIGBUS returned instead of VM_FAULT_NOPAGE:
> > >> >
> > >> > 	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
> > >> > 	if (IS_ERR(entry)) {
> > >> > 		mutex_unlock(&encl->lock);
> > >> >
> > >> > 		if (PTR_ERR(entry) == -EBUSY)
> > >> > 			return VM_FAULT_NOPAGE;
> > >> >
> > >> > 		return VM_FAULT_SIGBUS;
> > >> > 	}
> > >> >
> > >> > Not sure if those should be re-ordered that would keep the process
> > >> stuck up
> > >> > until there is something to reclaim. Now we use NOPAGE to address
> > >> situation
> > >> > when there is actually something to reclaim but because of locking
> > >> side of
> > >> > things we pass reclaim=false to sgx_alloc_epc_page().
> > >> >
> > >> > So this is kind of OOM behaviour how it works now instead of
> > >> > stalling processes.
> > >>
> > >> Right, I looked at the original email at was really a page fault that
> > >> was catched. The above theory cannot possibly hold, as the process
> > >> does not exit with a bus error.
> > >>
> > >> I looked next to sgx_encl_eaug_page(), and found this:
> > >>
> > >>         encl_page = sgx_encl_page_alloc(encl, addr - encl->base,
> > >> secinfo_flags);
> > >>         if (IS_ERR(encl_page))
> > >>                 return VM_FAULT_OOM;
> > >>
> > >> This is AFAIK the only code path in sgx_vma_fault() flow that
> > >> allocates non-EPC memory, and the code paths where EPC allocation
> > >> fails the result would be SIGBUS.
> > >>
> > >> So perhaps allocation is failing here. You could pretty easily trace
> > >> allocations with bpftrace and kretprobe to see if this is what is
> > >> happening, e.g. in one terminal:
> > >>
> > >> sudo bpftrace -e 'kr:sgx_encl_page_alloc /retval != 0/ {
> > >> printf("%d\n", retval); }'
> > >
> > > Should be
> > >
> > > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ {
> > > printf("%d\n", retval); }'
> > >
> > > BR, Jarkko
> > 
> > I tried these probs and got following results when failure happens (not
> > always happen on my device).
> > 
> > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(int64)retval <0 / { printf("%X\n",
> > retval); }'
> > 
> > --> lots of negative values, I believe they are valid addresses in
> > unsigned long type. So I looked up IS_ERR_VALUE macro and translated it in
> > following probes.
> > 
> > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(uint64)retval >= (uint64)(-4095)/ {
> > printf("%X\n", retval); }'
> > 
> > --> none triggered
> > 
> > sudo bpftrace -e 'kr:sgx_alloc_epc_page /(uint64)retval >= (uint64)(-4095)/ {
> > printf("%X\n", retval); }'
> > 
> > --> FFFFFFF0 printed, which I believe is -EBUSY.
> > 
> > BR
> > Haitao
> 
> I see the same behavior as Haitao. 
> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ { printf("%d\n", retval); } -> This one gave an error stdin:1:24-31: ERROR: Unknown struct/union: 'long'  
> 
> So switched to sudo bpftrace -e 'kr:sgx_encl_page_alloc /(int64)retval <0 / { printf("%X\n", retval); }' as suggested by Haitao and do see lot of positive and negative values.
> 
> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(uint64)retval >= (uint64)(-4095)/ { printf("%X\n", retval); }' -> No output.
> 
> sudo bpftrace -e 'kr:sgx_alloc_epc_page /(uint64)retval >= (uint64)(-4095)/ { printf("%X\n", retval); } -> FFFFFFF0 is printed.

And you are still encountering segfaults? And does it
happen when e.g. EBUSY is encountered, which should be
fully legit. E.g. if there was segfault simultaneously
perhaps that code path has something wrong.

BR, Jarkko

Jarkko Sakkinen Aug. 14, 2022, 6:11 p.m. UTC | #20

On Fri, Aug 12, 2022 at 12:47:07AM -0500, Haitao Huang wrote:
> On Thu, 11 Aug 2022 21:29:42 -0500, Haitao Huang
> <haitao.huang@linux.intel.com> wrote:
> 
> > Hi Jarkko
> > 
> > On Wed, 10 Aug 2022 20:50:54 -0500, Jarkko Sakkinen <jarkko@kernel.org>
> > wrote:
> > 
> > > On Thu, Aug 11, 2022 at 04:36:57AM +0300, Jarkko Sakkinen wrote:
> > > > On Thu, Aug 11, 2022 at 04:01:15AM +0300, Jarkko Sakkinen wrote:
> > > > > On Wed, Aug 10, 2022 at 12:09:56AM +0000, Dhanraj, Vijay wrote:
> > > > > >
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > > Sent: Tuesday, August 9, 2022 11:53 AM
> > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> > > > Huang, Haitao
> > > > > > > <haitao.huang@intel.com>
> > > > > > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > > > > > >
> > > > > > > On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
> > > > > > > >
> > > > > > > >
> > > > > > > > > -----Original Message-----
> > > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > > > > Sent: Tuesday, August 9, 2022 9:10 AM
> > > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > > > > <reinette.chatre@intel.com>;
> > > > dave.hansen@linux.intel.com; Huang,
> > > > > > > > > Haitao <haitao.huang@intel.com>
> > > > > > > > > Subject: Re: [PATCH] Add SGX selftest
> > > > `augment_via_eaccept_long`
> > > > > > > > >
> > > > > > > > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko
> > > > Sakkinen wrote:
> > > > > > > > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko
> > > > Sakkinen wrote:
> > > > > > > > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj,
> > > > Vijay wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > > -----Original Message-----
> > > > > > > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > > > > > > > > > > > Sent: Monday, August 8, 2022 5:18 AM
> > > > > > > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > > > > > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > > > > > > > > > > > <reinette.chatre@intel.com>;
> > > > dave.hansen@linux.intel.com;
> > > > > > > > > > > > > Huang, Haitao <haitao.huang@intel.com>
> > > > > > > > > > > > > Subject: Re: [PATCH] Add SGX selftest
> > > > > > > > > > > > > `augment_via_eaccept_long`
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
> > > > > > > > > > > > > vijay.dhanraj@intel.com
> > > > > > > > > wrote:
> > > > > > > > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > This commit adds a new test case which is same as
> > > > > > > > > > > > > > `augment_via_eaccept` but adds more number
> > > > of EPC pages to
> > > > > > > > > > > > > > stress test
> > > > > > > > > > > > > `EAUG` via `EACCEPT`.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Signed-off-by: Vijay Dhanraj
> > > > <vijay.dhanraj@intel.com>
> > > > > > > > > > > > > > Signed-off-by: Haitao Huang
> > > > <haitao.huang@linux.intel.com>
> > > > > > > > > > > > >
> > > > > > > > > > > > > Hey, to reproduce the original issue: does it
> > > > reproduce on
> > > > > > > > > > > > > VM or should I run baremetal kernel?
> > > > > > > > > > > > >
> > > > > > > > > > > > > BR, Jarkko
> > > > > > > > > > > >
> > > > > > > > > > > > Hi Jarkko, The issue should be reproducible on
> > > > baremetal kernel.
> > > > > > > > > > >
> > > > > > > > > > > Thanks.
> > > > > > > > > >
> > > > > > > > > > I need comment out other tests in order to make sane
> > > > out of this
> > > > > > > > > > :-)
> > > > > > > > > >
> > > > > > > > > > Mentioning this because came into realization that
> > > > stress tests
> > > > > > > > > > should be IMHO moved each to a separate binary (so
> > > > that they can
> > > > > > > > > > be run separately). Just a note (TODO) to myself.
> > > > > > > > > >
> > > > > > > > > > I'll work on this today again and *possibly* split
> > > > your test to
> > > > > > > > > > its own application to get a starting point for
> > > > forementioned.
> > > > > > > > >
> > > > > > > > > I got
> > > > > > > > >
> > > > > > > > > #  RUN           enclave.augment_via_eaccept_long ...
> > > > > > > > > # main.c:1241:augment_via_eaccept_long:test enclave:
> > > > total_size =
> > > > > > > > > 8192,
> > > > > > > > > seg->size = 8192 #
> > > > main.c:1241:augment_via_eaccept_long:test enclave:
> > > > > > > > > total_size = 12288, seg->size = 4096 #
> > > > > > > > > main.c:1241:augment_via_eaccept_long:test enclave:
> > > > total_size =
> > > > > > > > > 36864,
> > > > > > > > > seg->size = 24576 #
> > > > main.c:1241:augment_via_eaccept_long:test enclave:
> > > > > > > > > total_size = 40960, seg->size = 4096 #
> > > > > > > > > main.c:1259:augment_via_eaccept_long:mmaping pages at end of
> > > > > > > enclave...
> > > > > > > > > # main.c:1273:augment_via_eaccept_long:Entering
> > > > enclave to run
> > > > > > > > > EACCEPT for each page of 8589934592 bytes may take a
> > > > while ...
> > > > > > > > > #            OK  enclave.augment_via_eaccept_long
> > > > > > > > >
> > > > > > > > > The CPU used for testing was according to /proc/cpuinfo:
> > > > > > > > >
> > > > > > > > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> > > > > > > > >
> > > > > > > > > I have couple of queries:
> > > > > > > > >
> > > > > > > > > 1. Is it possible to get dmesg output?
> > > > > > > > I did check the dmesg output but couldn't find anything
> > > > related to the
> > > > > > > failure. Just the general log messages.
> > > > > > > >
> > > > > > > > > 2. Do I have to repeat the test multiple times, or does it
> > > > > > > > >    occur unconditionaly?
> > > > > > > > >
> > > > > > > > I was able to repro every time but it was a bit sporadic
> > > > for Haitao.
> > > > > > > >
> > > > > > > > > BR, Jarkko
> > > > > > > >
> > > > > > > > Also, did you set the PRMRR size to 2GB per socket in
> > > > the BIOS? The
> > > > > > > > issue is only reproduced for oversubscribed scenario.
> > > > When I set my
> > > > > > > > PRMRR to 64GB per socket, I wasn't able to repro the issue.
> > > > > > >
> > > > > > > I need to revisit this.
> > > > > > >
> > > > > > > Can you simply run test_sgx with gdb and see where it hits?
> > > > > > > HOST_CFLAGS has apparently "-g" already.
> > > > > > >
> > > > > > > > Regards, Vijay
> > > > > > >
> > > > > > > BR, Jarkko
> > > > > >
> > > > > > I am able to repro the issue when I reduce the PRMRR to
> > > > 2B/socket but not but not able to break on the assertion failure
> > > > with gdb. I also enabled debug attribute in the secs but still
> > > > no avail. Anything I am missing here?
> > > > > >
> > > > > > diff --git a/tools/testing/selftests/sgx/load.c
> > > > b/tools/testing/selftests/sgx/load.c
> > > > > > index 7de1b15c90b1..c4bccd3f5f17 100644
> > > > > > --- a/tools/testing/selftests/sgx/load.c
> > > > > > +++ b/tools/testing/selftests/sgx/load.c
> > > > > > @@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl *encl)
> > > > > >
> > > > > >         memset(secs, 0, sizeof(*secs));
> > > > > >         secs->ssa_frame_size = 1;
> > > > > > -       secs->attributes = SGX_ATTR_MODE64BIT;
> > > > > > +       secs->attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_DEBUG;
> > > > > >         secs->xfrm = 3;
> > > > > >         secs->base = encl->encl_base;
> > > > > >         secs->size = encl->encl_size;
> > > > > >
> > > > > > Regards, Vijay
> > > > >
> > > > > I get also full pass with 2GB configuration (and also observed that
> > > > > kselftest runs much faster with this configuration).
> > > > >
> > > > > But I looked at sgx_alloc_epc_page() and saw this:
> > > > >
> > > > >                if (list_empty(&sgx_active_page_list))
> > > > >                         return ERR_PTR(-ENOMEM);
> > > > >
> > > > >                 if (!reclaim) {
> > > > >                         page = ERR_PTR(-EBUSY);
> > > > >                         break;
> > > > >                 }
> > > > >
> > > > > In sgx_vma_fault(), when running completely out of reclaimable
> > > > pages, this
> > > > > causes VM_FAULT_SIGBUS returned instead of VM_FAULT_NOPAGE:
> > > > >
> > > > > 	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
> > > > > 	if (IS_ERR(entry)) {
> > > > > 		mutex_unlock(&encl->lock);
> > > > >
> > > > > 		if (PTR_ERR(entry) == -EBUSY)
> > > > > 			return VM_FAULT_NOPAGE;
> > > > >
> > > > > 		return VM_FAULT_SIGBUS;
> > > > > 	}
> > > > >
> > > > > Not sure if those should be re-ordered that would keep the
> > > > process stuck up
> > > > > until there is something to reclaim. Now we use NOPAGE to
> > > > address situation
> > > > > when there is actually something to reclaim but because of
> > > > locking side of
> > > > > things we pass reclaim=false to sgx_alloc_epc_page().
> > > > >
> > > > > So this is kind of OOM behaviour how it works now instead of stalling
> > > > > processes.
> > > > 
> > > > Right, I looked at the original email at was really a page fault
> > > > that was catched. The above theory cannot possibly hold, as the
> > > > process does not exit with a bus error.
> > > > 
> > > > I looked next to sgx_encl_eaug_page(), and found this:
> > > > 
> > > >         encl_page = sgx_encl_page_alloc(encl, addr - encl->base,
> > > > secinfo_flags);
> > > >         if (IS_ERR(encl_page))
> > > >                 return VM_FAULT_OOM;
> > > > 
> > > > This is AFAIK the only code path in sgx_vma_fault() flow that
> > > > allocates non-EPC memory, and the code paths where EPC allocation
> > > > fails the result would be SIGBUS.
> > > > 
> > > > So perhaps allocation is failing here. You could pretty easily
> > > > trace allocations with bpftrace and kretprobe to see if this is
> > > > what is happening, e.g. in one terminal:
> > > > 
> > > > sudo bpftrace -e 'kr:sgx_encl_page_alloc /retval != 0/ {
> > > > printf("%d\n", retval); }'
> > > 
> > > Should be
> > > 
> > > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ {
> > > printf("%d\n", retval); }'
> > > 
> > > BR, Jarkko
> > 
> > I tried these probs and got following results when failure happens (not
> > always happen on my device).
> > 
> > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(int64)retval <0 / {
> > printf("%X\n", retval); }'
> > 
> > --> lots of negative values, I believe they are valid addresses in
> > unsigned long type. So I looked up IS_ERR_VALUE macro and translated it
> > in following probes.
> > 
> > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(uint64)retval >=
> > (uint64)(-4095)/ { printf("%X\n", retval); }'
> > 
> > --> none triggered
> > 
> > sudo bpftrace -e 'kr:sgx_alloc_epc_page /(uint64)retval >=
> > (uint64)(-4095)/ { printf("%X\n", retval); }'
> > 
> > --> FFFFFFF0 printed, which I believe is -EBUSY.
> > 
> 
> Using this probe:
>  sudo bpftrace -e 'kr:sgx_encl_grow /(uint64)retval >= (uint64)(-4095)/ {
> printf("%lX\n", retval); }'
> 
> I found sgx_encl_grow could also return -EBUSY when allocating the VA page
> for the page to be EAUG'd.
> But we did not handle the case in sgx_encl_eaug_page, and follow changes
> seem to fix the problem with limited tests:
> 
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 24c1bb8eb196..527de07c0e0b 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -345,7 +345,11 @@ static vm_fault_t sgx_encl_eaug_page(struct
> vm_area_struct *vma,
> 
>         va_page = sgx_encl_grow(encl, false);
>         if (IS_ERR(va_page))
> +    {
> +               if (PTR_ERR(va_page) == -EBUSY)
> +                       vmret =  VM_FAULT_NOPAGE;
>                 goto err_out_epc;
> +    }
> 
>         if (va_page)
>                 list_add(&va_page->list, &encl->va_pages);
> 
> Will do more testing to verify. Let me know if the changes make sense.

It does make sense to me. EBUSY should be always result
NOPAGE in any situation.

The reason for this is locking. We need to round-trip
through a #PF trigger when out-of-EPC when running out
of EPC because otherwise the code ends up into ABBA
deadlock. I.e. allocator just starts the reclaimer thread
and returns, which gives chance to reclaim some pages.

BR, Jarkko

Haitao Huang Aug. 15, 2022, 4:58 a.m. UTC | #21

On Sun, 14 Aug 2022 13:05:58 -0500, Jarkko Sakkinen <jarkko@kernel.org>  
wrote:

> On Thu, Aug 11, 2022 at 09:29:42PM -0500, Haitao Huang wrote:
>> Hi Jarkko
>>
>> On Wed, 10 Aug 2022 20:50:54 -0500, Jarkko Sakkinen <jarkko@kernel.org>
>> wrote:
>>
>> > On Thu, Aug 11, 2022 at 04:36:57AM +0300, Jarkko Sakkinen wrote:
>> > > On Thu, Aug 11, 2022 at 04:01:15AM +0300, Jarkko Sakkinen wrote:
>> > > > On Wed, Aug 10, 2022 at 12:09:56AM +0000, Dhanraj, Vijay wrote:
>> > > > >
>> > > > >
>> > > > > > -----Original Message-----
>> > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
>> > > > > > Sent: Tuesday, August 9, 2022 11:53 AM
>> > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
>> > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
>> > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
>> > > Huang, Haitao
>> > > > > > <haitao.huang@intel.com>
>> > > > > > Subject: Re: [PATCH] Add SGX selftest  
>> `augment_via_eaccept_long`
>> > > > > >
>> > > > > > On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay  
>> wrote:
>> > > > > > >
>> > > > > > >
>> > > > > > > > -----Original Message-----
>> > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
>> > > > > > > > Sent: Tuesday, August 9, 2022 9:10 AM
>> > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
>> > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
>> > > > > > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
>> > > Huang,
>> > > > > > > > Haitao <haitao.huang@intel.com>
>> > > > > > > > Subject: Re: [PATCH] Add SGX selftest
>> > > `augment_via_eaccept_long`
>> > > > > > > >
>> > > > > > > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko Sakkinen
>> > > wrote:
>> > > > > > > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko
>> > > Sakkinen wrote:
>> > > > > > > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj,
>> > > Vijay wrote:
>> > > > > > > > > > >
>> > > > > > > > > > > > -----Original Message-----
>> > > > > > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
>> > > > > > > > > > > > Sent: Monday, August 8, 2022 5:18 AM
>> > > > > > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
>> > > > > > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
>> > > > > > > > > > > > <reinette.chatre@intel.com>;
>> > > dave.hansen@linux.intel.com;
>> > > > > > > > > > > > Huang, Haitao <haitao.huang@intel.com>
>> > > > > > > > > > > > Subject: Re: [PATCH] Add SGX selftest
>> > > > > > > > > > > > `augment_via_eaccept_long`
>> > > > > > > > > > > >
>> > > > > > > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
>> > > > > > > > > > > > vijay.dhanraj@intel.com
>> > > > > > > > wrote:
>> > > > > > > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
>> > > > > > > > > > > > >
>> > > > > > > > > > > > > This commit adds a new test case which is same  
>> as
>> > > > > > > > > > > > > `augment_via_eaccept` but adds more number of
>> > > EPC pages to
>> > > > > > > > > > > > > stress test
>> > > > > > > > > > > > `EAUG` via `EACCEPT`.
>> > > > > > > > > > > > >
>> > > > > > > > > > > > > Signed-off-by: Vijay Dhanraj
>> > > <vijay.dhanraj@intel.com>
>> > > > > > > > > > > > > Signed-off-by: Haitao Huang
>> > > <haitao.huang@linux.intel.com>
>> > > > > > > > > > > >
>> > > > > > > > > > > > Hey, to reproduce the original issue: does it
>> > > reproduce on
>> > > > > > > > > > > > VM or should I run baremetal kernel?
>> > > > > > > > > > > >
>> > > > > > > > > > > > BR, Jarkko
>> > > > > > > > > > >
>> > > > > > > > > > > Hi Jarkko, The issue should be reproducible on
>> > > baremetal kernel.
>> > > > > > > > > >
>> > > > > > > > > > Thanks.
>> > > > > > > > >
>> > > > > > > > > I need comment out other tests in order to make sane out
>> > > of this
>> > > > > > > > > :-)
>> > > > > > > > >
>> > > > > > > > > Mentioning this because came into realization that
>> > > stress tests
>> > > > > > > > > should be IMHO moved each to a separate binary (so that
>> > > they can
>> > > > > > > > > be run separately). Just a note (TODO) to myself.
>> > > > > > > > >
>> > > > > > > > > I'll work on this today again and *possibly* split your
>> > > test to
>> > > > > > > > > its own application to get a starting point for
>> > > forementioned.
>> > > > > > > >
>> > > > > > > > I got
>> > > > > > > >
>> > > > > > > > #  RUN           enclave.augment_via_eaccept_long ...
>> > > > > > > > # main.c:1241:augment_via_eaccept_long:test enclave:
>> > > total_size =
>> > > > > > > > 8192,
>> > > > > > > > seg->size = 8192 #
>> > > main.c:1241:augment_via_eaccept_long:test enclave:
>> > > > > > > > total_size = 12288, seg->size = 4096 #
>> > > > > > > > main.c:1241:augment_via_eaccept_long:test enclave:
>> > > total_size =
>> > > > > > > > 36864,
>> > > > > > > > seg->size = 24576 #
>> > > main.c:1241:augment_via_eaccept_long:test enclave:
>> > > > > > > > total_size = 40960, seg->size = 4096 #
>> > > > > > > > main.c:1259:augment_via_eaccept_long:mmaping pages at end  
>> of
>> > > > > > enclave...
>> > > > > > > > # main.c:1273:augment_via_eaccept_long:Entering enclave  
>> to run
>> > > > > > > > EACCEPT for each page of 8589934592 bytes may take a  
>> while ...
>> > > > > > > > #            OK  enclave.augment_via_eaccept_long
>> > > > > > > >
>> > > > > > > > The CPU used for testing was according to /proc/cpuinfo:
>> > > > > > > >
>> > > > > > > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
>> > > > > > > >
>> > > > > > > > I have couple of queries:
>> > > > > > > >
>> > > > > > > > 1. Is it possible to get dmesg output?
>> > > > > > > I did check the dmesg output but couldn't find anything
>> > > related to the
>> > > > > > failure. Just the general log messages.
>> > > > > > >
>> > > > > > > > 2. Do I have to repeat the test multiple times, or does it
>> > > > > > > >    occur unconditionaly?
>> > > > > > > >
>> > > > > > > I was able to repro every time but it was a bit sporadic for
>> > > Haitao.
>> > > > > > >
>> > > > > > > > BR, Jarkko
>> > > > > > >
>> > > > > > > Also, did you set the PRMRR size to 2GB per socket in the
>> > > BIOS? The
>> > > > > > > issue is only reproduced for oversubscribed scenario. When I
>> > > set my
>> > > > > > > PRMRR to 64GB per socket, I wasn't able to repro the issue.
>> > > > > >
>> > > > > > I need to revisit this.
>> > > > > >
>> > > > > > Can you simply run test_sgx with gdb and see where it hits?
>> > > > > > HOST_CFLAGS has apparently "-g" already.
>> > > > > >
>> > > > > > > Regards, Vijay
>> > > > > >
>> > > > > > BR, Jarkko
>> > > > >
>> > > > > I am able to repro the issue when I reduce the PRMRR to
>> > > 2B/socket but not but not able to break on the assertion failure
>> > > with gdb. I also enabled debug attribute in the secs but still no
>> > > avail. Anything I am missing here?
>> > > > >
>> > > > > diff --git a/tools/testing/selftests/sgx/load.c
>> > > b/tools/testing/selftests/sgx/load.c
>> > > > > index 7de1b15c90b1..c4bccd3f5f17 100644
>> > > > > --- a/tools/testing/selftests/sgx/load.c
>> > > > > +++ b/tools/testing/selftests/sgx/load.c
>> > > > > @@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl *encl)
>> > > > >
>> > > > >         memset(secs, 0, sizeof(*secs));
>> > > > >         secs->ssa_frame_size = 1;
>> > > > > -       secs->attributes = SGX_ATTR_MODE64BIT;
>> > > > > +       secs->attributes = SGX_ATTR_MODE64BIT | SGX_ATTR_DEBUG;
>> > > > >         secs->xfrm = 3;
>> > > > >         secs->base = encl->encl_base;
>> > > > >         secs->size = encl->encl_size;
>> > > > >
>> > > > > Regards, Vijay
>> > > >
>> > > > I get also full pass with 2GB configuration (and also observed  
>> that
>> > > > kselftest runs much faster with this configuration).
>> > > >
>> > > > But I looked at sgx_alloc_epc_page() and saw this:
>> > > >
>> > > >                if (list_empty(&sgx_active_page_list))
>> > > >                         return ERR_PTR(-ENOMEM);
>> > > >
>> > > >                 if (!reclaim) {
>> > > >                         page = ERR_PTR(-EBUSY);
>> > > >                         break;
>> > > >                 }
>> > > >
>> > > > In sgx_vma_fault(), when running completely out of reclaimable
>> > > pages, this
>> > > > causes VM_FAULT_SIGBUS returned instead of VM_FAULT_NOPAGE:
>> > > >
>> > > > 	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
>> > > > 	if (IS_ERR(entry)) {
>> > > > 		mutex_unlock(&encl->lock);
>> > > >
>> > > > 		if (PTR_ERR(entry) == -EBUSY)
>> > > > 			return VM_FAULT_NOPAGE;
>> > > >
>> > > > 		return VM_FAULT_SIGBUS;
>> > > > 	}
>> > > >
>> > > > Not sure if those should be re-ordered that would keep the process
>> > > stuck up
>> > > > until there is something to reclaim. Now we use NOPAGE to address
>> > > situation
>> > > > when there is actually something to reclaim but because of locking
>> > > side of
>> > > > things we pass reclaim=false to sgx_alloc_epc_page().
>> > > >
>> > > > So this is kind of OOM behaviour how it works now instead of  
>> stalling
>> > > > processes.
>> > >
>> > > Right, I looked at the original email at was really a page fault
>> > > that was catched. The above theory cannot possibly hold, as the
>> > > process does not exit with a bus error.
>> > >
>> > > I looked next to sgx_encl_eaug_page(), and found this:
>> > >
>> > >         encl_page = sgx_encl_page_alloc(encl, addr - encl->base,
>> > > secinfo_flags);
>> > >         if (IS_ERR(encl_page))
>> > >                 return VM_FAULT_OOM;
>> > >
>> > > This is AFAIK the only code path in sgx_vma_fault() flow that
>> > > allocates non-EPC memory, and the code paths where EPC allocation
>> > > fails the result would be SIGBUS.
>> > >
>> > > So perhaps allocation is failing here. You could pretty easily
>> > > trace allocations with bpftrace and kretprobe to see if this is
>> > > what is happening, e.g. in one terminal:
>> > >
>> > > sudo bpftrace -e 'kr:sgx_encl_page_alloc /retval != 0/ {
>> > > printf("%d\n", retval); }'
>> >
>> > Should be
>> >
>> > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ {
>> > printf("%d\n", retval); }'
>> >
>> > BR, Jarkko
>>
>> I tried these probs and got following results when failure happens (not
>> always happen on my device).
>>
>> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(int64)retval <0 / {
>> printf("%X\n", retval); }'
>>
>> --> lots of negative values, I believe they are valid addresses in  
>> unsigned
>> long type. So I looked up IS_ERR_VALUE macro and translated it in  
>> following
>> probes.
>>
>> sudo bpftrace -e 'kr:sgx_encl_page_alloc /(uint64)retval >=  
>> (uint64)(-4095)/
>> { printf("%X\n", retval); }'
>
> Thank you for refining the probe.
>
> I'll add it to my collection:
>
> https://github.com/jarkkojs/bpftrace-sgx
>
> Despite the repository name it contains probes both for
> SGX and AMD-SNP, and also TDX and ARMv9 in future once
> I have ways to test them.
>
>>
>> --> none triggered
>>
>> sudo bpftrace -e 'kr:sgx_alloc_epc_page /(uint64)retval >=  
>> (uint64)(-4095)/
>> { printf("%X\n", retval); }'
>>
>> --> FFFFFFF0 printed, which I believe is -EBUSY.
>
> EBUSY should be fine, it will be retrigger the #PF handler
> through round-trip.
>
> Did you still encounter segfaults?
>
Yes, but I think it was -EBUSY returned from sgx_encl_grow that was not  
handled and caused the segfault. See the patch.

Thanks
Haitao

Dhanraj, Vijay Aug. 15, 2022, 4:16 p.m. UTC | #22

> -----Original Message-----
> From: Jarkko Sakkinen <jarkko@kernel.org>
> Sent: Sunday, August 14, 2022 11:09 AM
> To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> Cc: Haitao Huang <haitao.huang@linux.intel.com>; linux-
> sgx@vger.kernel.org; Chatre, Reinette <reinette.chatre@intel.com>;
> dave.hansen@linux.intel.com; Huang, Haitao <haitao.huang@intel.com>
> Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> 
> On Fri, Aug 12, 2022 at 03:23:26AM +0000, Dhanraj, Vijay wrote:
> >
> >
> > > -----Original Message-----
> > > From: Haitao Huang <haitao.huang@linux.intel.com>
> > > Sent: Thursday, August 11, 2022 7:30 PM
> > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>; Jarkko Sakkinen
> > > <jarkko@kernel.org>
> > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com; Huang,
> > > Haitao <haitao.huang@intel.com>
> > > Subject: Re: [PATCH] Add SGX selftest `augment_via_eaccept_long`
> > >
> > > Hi Jarkko
> > >
> > > On Wed, 10 Aug 2022 20:50:54 -0500, Jarkko Sakkinen
> > > <jarkko@kernel.org>
> > > wrote:
> > >
> > > > On Thu, Aug 11, 2022 at 04:36:57AM +0300, Jarkko Sakkinen wrote:
> > > >> On Thu, Aug 11, 2022 at 04:01:15AM +0300, Jarkko Sakkinen wrote:
> > > >> > On Wed, Aug 10, 2022 at 12:09:56AM +0000, Dhanraj, Vijay wrote:
> > > >> > >
> > > >> > >
> > > >> > > > -----Original Message-----
> > > >> > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > >> > > > Sent: Tuesday, August 9, 2022 11:53 AM
> > > >> > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > >> > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > >> > > > <reinette.chatre@intel.com>; dave.hansen@linux.intel.com;
> > > >> > > > Huang,
> > > >> Haitao
> > > >> > > > <haitao.huang@intel.com>
> > > >> > > > Subject: Re: [PATCH] Add SGX selftest
> > > >> > > > `augment_via_eaccept_long`
> > > >> > > >
> > > >> > > > On Tue, Aug 09, 2022 at 05:08:21PM +0000, Dhanraj, Vijay wrote:
> > > >> > > > >
> > > >> > > > >
> > > >> > > > > > -----Original Message-----
> > > >> > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > >> > > > > > Sent: Tuesday, August 9, 2022 9:10 AM
> > > >> > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > >> > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > >> > > > > > <reinette.chatre@intel.com>;
> > > >> > > > > > dave.hansen@linux.intel.com;
> > > >> Huang,
> > > >> > > > > > Haitao <haitao.huang@intel.com>
> > > >> > > > > > Subject: Re: [PATCH] Add SGX selftest
> > > >> `augment_via_eaccept_long`
> > > >> > > > > >
> > > >> > > > > > On Tue, Aug 09, 2022 at 01:45:35PM +0300, Jarkko
> > > >> > > > > > Sakkinen
> > > >> wrote:
> > > >> > > > > > > On Mon, Aug 08, 2022 at 06:29:13PM +0300, Jarkko
> > > >> > > > > > > Sakkinen
> > > >> wrote:
> > > >> > > > > > > > On Mon, Aug 08, 2022 at 01:00:54PM +0000, Dhanraj,
> > > >> > > > > > > > Vijay
> > > >> wrote:
> > > >> > > > > > > > >
> > > >> > > > > > > > > > -----Original Message-----
> > > >> > > > > > > > > > From: Jarkko Sakkinen <jarkko@kernel.org>
> > > >> > > > > > > > > > Sent: Monday, August 8, 2022 5:18 AM
> > > >> > > > > > > > > > To: Dhanraj, Vijay <vijay.dhanraj@intel.com>
> > > >> > > > > > > > > > Cc: linux-sgx@vger.kernel.org; Chatre, Reinette
> > > >> > > > > > > > > > <reinette.chatre@intel.com>;
> > > >> dave.hansen@linux.intel.com;
> > > >> > > > > > > > > > Huang, Haitao <haitao.huang@intel.com>
> > > >> > > > > > > > > > Subject: Re: [PATCH] Add SGX selftest
> > > >> > > > > > > > > > `augment_via_eaccept_long`
> > > >> > > > > > > > > >
> > > >> > > > > > > > > > On Thu, Aug 04, 2022 at 01:14:56PM -0700,
> > > >> > > > > > > > > > vijay.dhanraj@intel.com
> > > >> > > > > > wrote:
> > > >> > > > > > > > > > > From: Vijay Dhanraj <vijay.dhanraj@intel.com>
> > > >> > > > > > > > > > >
> > > >> > > > > > > > > > > This commit adds a new test case which is
> > > >> > > > > > > > > > > same as `augment_via_eaccept` but adds more
> > > >> > > > > > > > > > > number of EPC
> > > >> pages to
> > > >> > > > > > > > > > > stress test
> > > >> > > > > > > > > > `EAUG` via `EACCEPT`.
> > > >> > > > > > > > > > >
> > > >> > > > > > > > > > > Signed-off-by: Vijay Dhanraj
> > > >> <vijay.dhanraj@intel.com>
> > > >> > > > > > > > > > > Signed-off-by: Haitao Huang
> > > >> <haitao.huang@linux.intel.com>
> > > >> > > > > > > > > >
> > > >> > > > > > > > > > Hey, to reproduce the original issue: does it
> > > >> reproduce on
> > > >> > > > > > > > > > VM or should I run baremetal kernel?
> > > >> > > > > > > > > >
> > > >> > > > > > > > > > BR, Jarkko
> > > >> > > > > > > > >
> > > >> > > > > > > > > Hi Jarkko, The issue should be reproducible on
> > > >> baremetal kernel.
> > > >> > > > > > > >
> > > >> > > > > > > > Thanks.
> > > >> > > > > > >
> > > >> > > > > > > I need comment out other tests in order to make sane
> > > >> > > > > > > out of
> > > >> this
> > > >> > > > > > > :-)
> > > >> > > > > > >
> > > >> > > > > > > Mentioning this because came into realization that
> > > >> > > > > > > stress
> > > >> tests
> > > >> > > > > > > should be IMHO moved each to a separate binary (so
> > > >> > > > > > > that
> > > >> they can
> > > >> > > > > > > be run separately). Just a note (TODO) to myself.
> > > >> > > > > > >
> > > >> > > > > > > I'll work on this today again and *possibly* split
> > > >> > > > > > > your
> > > >> test to
> > > >> > > > > > > its own application to get a starting point for
> > > >> forementioned.
> > > >> > > > > >
> > > >> > > > > > I got
> > > >> > > > > >
> > > >> > > > > > #  RUN           enclave.augment_via_eaccept_long ...
> > > >> > > > > > # main.c:1241:augment_via_eaccept_long:test enclave:
> > > >> total_size =
> > > >> > > > > > 8192,
> > > >> > > > > > seg->size = 8192 #
> > > >> > > > > > seg->main.c:1241:augment_via_eaccept_long:test
> > > >> enclave:
> > > >> > > > > > total_size = 12288, seg->size = 4096 #
> > > >> > > > > > main.c:1241:augment_via_eaccept_long:test enclave:
> > > >> > > > > > total_size
> > > >> =
> > > >> > > > > > 36864,
> > > >> > > > > > seg->size = 24576 #
> > > >> > > > > > seg->main.c:1241:augment_via_eaccept_long:test
> > > >> enclave:
> > > >> > > > > > total_size = 40960, seg->size = 4096 #
> > > >> > > > > > main.c:1259:augment_via_eaccept_long:mmaping pages at
> > > >> > > > > > end of
> > > >> > > > enclave...
> > > >> > > > > > # main.c:1273:augment_via_eaccept_long:Entering enclave
> > > >> > > > > > to run EACCEPT for each page of 8589934592 bytes may
> > > >> > > > > > take a while
> > > ...
> > > >> > > > > > #            OK  enclave.augment_via_eaccept_long
> > > >> > > > > >
> > > >> > > > > > The CPU used for testing was according to /proc/cpuinfo:
> > > >> > > > > >
> > > >> > > > > > model name      : Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz
> > > >> > > > > >
> > > >> > > > > > I have couple of queries:
> > > >> > > > > >
> > > >> > > > > > 1. Is it possible to get dmesg output?
> > > >> > > > > I did check the dmesg output but couldn't find anything
> > > >> > > > > related
> > > >> to the
> > > >> > > > failure. Just the general log messages.
> > > >> > > > >
> > > >> > > > > > 2. Do I have to repeat the test multiple times, or does it
> > > >> > > > > >    occur unconditionaly?
> > > >> > > > > >
> > > >> > > > > I was able to repro every time but it was a bit sporadic
> > > >> > > > > for
> > > >> Haitao.
> > > >> > > > >
> > > >> > > > > > BR, Jarkko
> > > >> > > > >
> > > >> > > > > Also, did you set the PRMRR size to 2GB per socket in the
> BIOS?
> > > >> The
> > > >> > > > > issue is only reproduced for oversubscribed scenario.
> > > >> > > > > When I
> > > >> set my
> > > >> > > > > PRMRR to 64GB per socket, I wasn't able to repro the issue.
> > > >> > > >
> > > >> > > > I need to revisit this.
> > > >> > > >
> > > >> > > > Can you simply run test_sgx with gdb and see where it hits?
> > > >> > > > HOST_CFLAGS has apparently "-g" already.
> > > >> > > >
> > > >> > > > > Regards, Vijay
> > > >> > > >
> > > >> > > > BR, Jarkko
> > > >> > >
> > > >> > > I am able to repro the issue when I reduce the PRMRR to
> > > >> > > 2B/socket
> > > >> but not but not able to break on the assertion failure with gdb.
> > > >> I also enabled debug attribute in the secs but still no avail.
> > > >> Anything I am missing here?
> > > >> > >
> > > >> > > diff --git a/tools/testing/selftests/sgx/load.c
> > > >> b/tools/testing/selftests/sgx/load.c
> > > >> > > index 7de1b15c90b1..c4bccd3f5f17 100644
> > > >> > > --- a/tools/testing/selftests/sgx/load.c
> > > >> > > +++ b/tools/testing/selftests/sgx/load.c
> > > >> > > @@ -87,7 +87,7 @@ static bool encl_ioc_create(struct encl
> > > >> > > *encl)
> > > >> > >
> > > >> > >         memset(secs, 0, sizeof(*secs));
> > > >> > >         secs->ssa_frame_size = 1;
> > > >> > > -       secs->attributes = SGX_ATTR_MODE64BIT;
> > > >> > > +       secs->attributes = SGX_ATTR_MODE64BIT |
> > > >> > > + SGX_ATTR_DEBUG;
> > > >> > >         secs->xfrm = 3;
> > > >> > >         secs->base = encl->encl_base;
> > > >> > >         secs->size = encl->encl_size;
> > > >> > >
> > > >> > > Regards, Vijay
> > > >> >
> > > >> > I get also full pass with 2GB configuration (and also observed
> > > >> > that kselftest runs much faster with this configuration).
> > > >> >
> > > >> > But I looked at sgx_alloc_epc_page() and saw this:
> > > >> >
> > > >> >                if (list_empty(&sgx_active_page_list))
> > > >> >                         return ERR_PTR(-ENOMEM);
> > > >> >
> > > >> >                 if (!reclaim) {
> > > >> >                         page = ERR_PTR(-EBUSY);
> > > >> >                         break;
> > > >> >                 }
> > > >> >
> > > >> > In sgx_vma_fault(), when running completely out of reclaimable
> > > >> > pages,
> > > >> this
> > > >> > causes VM_FAULT_SIGBUS returned instead of
> VM_FAULT_NOPAGE:
> > > >> >
> > > >> > 	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
> > > >> > 	if (IS_ERR(entry)) {
> > > >> > 		mutex_unlock(&encl->lock);
> > > >> >
> > > >> > 		if (PTR_ERR(entry) == -EBUSY)
> > > >> > 			return VM_FAULT_NOPAGE;
> > > >> >
> > > >> > 		return VM_FAULT_SIGBUS;
> > > >> > 	}
> > > >> >
> > > >> > Not sure if those should be re-ordered that would keep the
> > > >> > process
> > > >> stuck up
> > > >> > until there is something to reclaim. Now we use NOPAGE to
> > > >> > address
> > > >> situation
> > > >> > when there is actually something to reclaim but because of
> > > >> > locking
> > > >> side of
> > > >> > things we pass reclaim=false to sgx_alloc_epc_page().
> > > >> >
> > > >> > So this is kind of OOM behaviour how it works now instead of
> > > >> > stalling processes.
> > > >>
> > > >> Right, I looked at the original email at was really a page fault
> > > >> that was catched. The above theory cannot possibly hold, as the
> > > >> process does not exit with a bus error.
> > > >>
> > > >> I looked next to sgx_encl_eaug_page(), and found this:
> > > >>
> > > >>         encl_page = sgx_encl_page_alloc(encl, addr - encl->base,
> > > >> secinfo_flags);
> > > >>         if (IS_ERR(encl_page))
> > > >>                 return VM_FAULT_OOM;
> > > >>
> > > >> This is AFAIK the only code path in sgx_vma_fault() flow that
> > > >> allocates non-EPC memory, and the code paths where EPC allocation
> > > >> fails the result would be SIGBUS.
> > > >>
> > > >> So perhaps allocation is failing here. You could pretty easily
> > > >> trace allocations with bpftrace and kretprobe to see if this is
> > > >> what is happening, e.g. in one terminal:
> > > >>
> > > >> sudo bpftrace -e 'kr:sgx_encl_page_alloc /retval != 0/ {
> > > >> printf("%d\n", retval); }'
> > > >
> > > > Should be
> > > >
> > > > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ {
> > > > printf("%d\n", retval); }'
> > > >
> > > > BR, Jarkko
> > >
> > > I tried these probs and got following results when failure happens
> > > (not always happen on my device).
> > >
> > > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(int64)retval <0 / {
> > > printf("%X\n", retval); }'
> > >
> > > --> lots of negative values, I believe they are valid addresses in
> > > unsigned long type. So I looked up IS_ERR_VALUE macro and translated
> > > it in following probes.
> > >
> > > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(uint64)retval >=
> > > (uint64)(-4095)/ { printf("%X\n", retval); }'
> > >
> > > --> none triggered
> > >
> > > sudo bpftrace -e 'kr:sgx_alloc_epc_page /(uint64)retval >=
> > > (uint64)(-4095)/ { printf("%X\n", retval); }'
> > >
> > > --> FFFFFFF0 printed, which I believe is -EBUSY.
> > >
> > > BR
> > > Haitao
> >
> > I see the same behavior as Haitao.
> > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(long)retval < 0/ { printf("%d\n",
> retval); } -> This one gave an error stdin:1:24-31: ERROR: Unknown
> struct/union: 'long'
> >
> > So switched to sudo bpftrace -e 'kr:sgx_encl_page_alloc /(int64)retval <0 /
> { printf("%X\n", retval); }' as suggested by Haitao and do see lot of positive
> and negative values.
> >
> > sudo bpftrace -e 'kr:sgx_encl_page_alloc /(uint64)retval >= (uint64)(-
> 4095)/ { printf("%X\n", retval); }' -> No output.
> >
> > sudo bpftrace -e 'kr:sgx_alloc_epc_page /(uint64)retval >= (uint64)(-4095)/
> { printf("%X\n", retval); } -> FFFFFFF0 is printed.
> 
> And you are still encountering segfaults? And does it happen when e.g.
> EBUSY is encountered, which should be fully legit. E.g. if there was segfault
> simultaneously perhaps that code path has something wrong.
> 
> BR, Jarkko

No, with the proposed VA page allocation fix from Haitao I no longer see the segfaults with Gramine tests.

Thanks, Vijay

Add SGX selftest `augment_via_eaccept_long`

Commit Message

Comments

Patch