Message ID | 20230128045529.15749-5-haitao.huang@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | x86/sgx: implement support for MADV_WILLNEED | expand |
On Fri, Jan 27, 2023 at 08:55:29PM -0800, Haitao Huang wrote: > Measure and compare run time for EAUG'ing different number > of EPC pages with/without madvise(..., WILLNEED) call. > > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com> > --- > tools/testing/selftests/sgx/main.c | 167 +++++++++++++++++++++++++++++ > 1 file changed, 167 insertions(+) > > diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c > index e457f2d35461..e3432e73af69 100644 > --- a/tools/testing/selftests/sgx/main.c > +++ b/tools/testing/selftests/sgx/main.c > @@ -10,6 +10,7 @@ > #include <stdint.h> > #include <stdlib.h> > #include <string.h> > +#include <time.h> > #include <unistd.h> > #include <sys/ioctl.h> > #include <sys/mman.h> > @@ -1360,6 +1361,172 @@ TEST_F_TIMEOUT(enclave, augment_via_eaccept_long, TIMEOUT_DEFAULT) > munmap(addr, DYNAMIC_HEAP_SIZE); > } > > +static int eaccept_range(struct _test_data_enclave *self, void *addr, > + unsigned long size, uint64_t flags, > + struct __test_metadata *_metadata) > +{ > + struct encl_op_eaccept eaccept_op; > + > + self->run.exception_vector = 0; > + self->run.exception_error_code = 0; > + self->run.exception_addr = 0; > + > + /* > + * Run EACCEPT on every page to trigger the #PF->EAUG->EACCEPT(again > + * without a #PF). All should be transparent to userspace. > + */ > + eaccept_op.flags = flags; > + eaccept_op.ret = 0; > + eaccept_op.header.type = ENCL_OP_EACCEPT; > + eaccept_op.len = size; > + eaccept_op.epc_addr = (uint64_t)(addr); > + > + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); > + > + EXPECT_EQ(self->run.exception_vector, 0); > + EXPECT_EQ(self->run.exception_error_code, 0); > + EXPECT_EQ(self->run.exception_addr, 0); > + ASSERT_EQ(eaccept_op.ret, 0); > + ASSERT_EQ(self->run.function, EEXIT); > + > + return 0; > +} > + > +static int trim_remove_range(struct _test_data_enclave *self, void *addr, > + unsigned long size, struct __test_metadata *_metadata) > +{ > + int ret, errno_save; > + struct sgx_enclave_remove_pages remove_ioc; > + struct sgx_enclave_modify_types modt_ioc; > + unsigned long offset; > + unsigned long count; > + > + if ((uint64_t)addr <= self->encl.encl_base) > + return -1; > + offset = (uint64_t)addr - self->encl.encl_base; > + > + memset(&modt_ioc, 0, sizeof(modt_ioc)); > + modt_ioc.offset = offset; > + modt_ioc.length = size; > + modt_ioc.page_type = SGX_PAGE_TYPE_TRIM; > + count = 0; > + do { > + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc); > + > + errno_save = ret == -1 ? errno : 0; > + if (errno_save != EAGAIN) > + break; > + EXPECT_EQ(modt_ioc.result, 0); > + > + count += modt_ioc.count; > + modt_ioc.offset += modt_ioc.count; > + modt_ioc.length -= modt_ioc.count; > + modt_ioc.result = 0; > + modt_ioc.count = 0; > + } while (modt_ioc.length != 0); > + > + EXPECT_EQ(ret, 0); > + EXPECT_EQ(errno_save, 0); > + EXPECT_EQ(modt_ioc.result, 0); > + count += modt_ioc.count; > + EXPECT_EQ(count, size); > + > + EXPECT_EQ(eaccept_range(self, addr, size, > + SGX_SECINFO_TRIM | SGX_SECINFO_MODIFIED, > + _metadata), 0); > + > + /* Complete page removal. */ > + memset(&remove_ioc, 0, sizeof(remove_ioc)); > + remove_ioc.offset = offset; > + remove_ioc.length = size; > + count = 0; > + do { > + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_REMOVE_PAGES, &remove_ioc); > + > + errno_save = ret == -1 ? errno : 0; > + if (errno_save != EAGAIN) > + break; > + > + count += remove_ioc.count; > + remove_ioc.offset += remove_ioc.count; > + remove_ioc.length -= remove_ioc.count; > + remove_ioc.count = 0; > + } while (remove_ioc.length != 0); > + > + EXPECT_EQ(ret, 0); > + EXPECT_EQ(errno_save, 0); > + count += remove_ioc.count; > + EXPECT_EQ(count, size); > + > + return 0; > +} > + > +/* > + * Compare performance with and without madvise call before EACCEPT'ing > + * different size of regions. > + */ > +TEST_F_TIMEOUT(enclave, augment_via_madvise, TIMEOUT_DEFAULT) > +{ > + unsigned long advise_size = PAGE_SIZE; > + unsigned long max_advise_size = get_total_epc_mem() * 3UL; > + int speed_up_percent; > + clock_t start; > + double time_used1, time_used2; > + size_t total_size = 0; > + unsigned long i; > + void *addr; > + > + if (!sgx2_supported()) > + SKIP(return, "SGX2 not supported"); > + > + ASSERT_TRUE(setup_test_encl_dynamic(ENCL_HEAP_SIZE_DEFAULT, > + max_advise_size, &self->encl, _metadata)); > + > + memset(&self->run, 0, sizeof(self->run)); > + self->run.tcs = self->encl.encl_base; > + > + for (i = 0; i < self->encl.nr_segments; i++) { > + struct encl_segment *seg = &self->encl.segment_tbl[i]; > + > + total_size += seg->size; > + } > + > + for (i = 1; i < 52 && advise_size < max_advise_size; i++) { > + addr = mmap((void *)self->encl.encl_base + total_size, advise_size, > + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, > + self->encl.fd, 0); > + EXPECT_NE(addr, MAP_FAILED); > + > + start = clock(); > + EXPECT_EQ(eaccept_range(self, addr, advise_size, > + SGX_SECINFO_R | SGX_SECINFO_W > + | SGX_SECINFO_REG > + | SGX_SECINFO_PENDING, > + _metadata), 0); > + time_used1 = (double)clock() - start; > + > + EXPECT_EQ(trim_remove_range(self, addr, advise_size, _metadata), 0); > + > + start = clock(); > + EXPECT_EQ(madvise(addr, advise_size, MADV_WILLNEED), 0); > + EXPECT_EQ(eaccept_range(self, addr, advise_size, > + SGX_SECINFO_R | SGX_SECINFO_W > + | SGX_SECINFO_REG > + | SGX_SECINFO_PENDING, > + _metadata), 0); > + time_used2 = (double)clock() - start; > + > + speed_up_percent = (int)((time_used1 - time_used2) / time_used1 * 100); > + TH_LOG("madvise speed up for eaug'ing %10ld pages: %d%%", > + advise_size / PAGE_SIZE, speed_up_percent); > + EXPECT_GE(speed_up_percent, 0); > + EXPECT_EQ(trim_remove_range(self, addr, advise_size, _metadata), 0); > + munmap(addr, advise_size); > + advise_size = (advise_size << 1UL); > + } > + encl_delete(&self->encl); > +} > + > /* > * SGX2 page type modification test in two phases: > * Phase 1: > -- > 2.25.1 > Tested-by: Jarkko Sakkinen <jarkko@kernel.org> Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org> BR, Jarkko
On Fri, 2023-01-27 at 20:55 -0800, Haitao Huang wrote: > +/* > + * Compare performance with and without madvise call before EACCEPT'ing > + * different size of regions. > + */ > +TEST_F_TIMEOUT(enclave, augment_via_madvise, TIMEOUT_DEFAULT) > +{ > [...] > + > + for (i = 0; i < self->encl.nr_segments; i++) { > + struct encl_segment *seg = &self->encl.segment_tbl[i]; > + > + total_size += seg->size; > + } > + > + for (i = 1; i < 52 && advise_size < max_advise_size; i++) { > + addr = mmap((void *)self->encl.encl_base + total_size, advise_size, > + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, > + self->encl.fd, 0); I see the problem now. Here 'pgoff' is always 0. I think this is wrong. Shouldn't you use the actual offset relative to the file as pgoff, which is total_size >> PAGE_SHIFT ? [...] > + munmap(addr, advise_size); Incorrect indent here? Perhaps you should use checkpatch.pl? > + advise_size = (advise_size << 1UL); > + } > + encl_delete(&self->encl); > +} > +
On Tue, 14 Feb 2023 20:38:29 -0600, Huang, Kai <kai.huang@intel.com> wrote: > On Fri, 2023-01-27 at 20:55 -0800, Haitao Huang wrote: >> +/* >> + * Compare performance with and without madvise call before EACCEPT'ing >> + * different size of regions. >> + */ >> +TEST_F_TIMEOUT(enclave, augment_via_madvise, TIMEOUT_DEFAULT) >> +{ >> > [...] > >> + >> + for (i = 0; i < self->encl.nr_segments; i++) { >> + struct encl_segment *seg = &self->encl.segment_tbl[i]; >> + >> + total_size += seg->size; >> + } >> + >> + for (i = 1; i < 52 && advise_size < max_advise_size; i++) { >> + addr = mmap((void *)self->encl.encl_base + total_size, advise_size, >> + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, >> + self->encl.fd, 0); > > I see the problem now. Here 'pgoff' is always 0. I think this is wrong. > > Shouldn't you use the actual offset relative to the file as pgoff, which > is > > total_size >> PAGE_SHIFT > > ? > But that will be inconsistent with current usage. We have been using zero offset always including these self tests. The offset is also redundant in this case because it is totally defined by the address for a given enclave fd. > > [...] > >> + munmap(addr, advise_size); > > Incorrect indent here? > > Perhaps you should use checkpatch.pl? > hmm, I did run it: scripts/checkpatch.pl --strict advise_patch_RFC_v4/v4-0004-selftests-sgx-Add-test-for-madvise-.-WILLNEED.patch total: 0 errors, 0 warnings, 0 checks, 179 lines checked Will fix. Thanks Haitao
On Tue, 2023-02-14 at 22:42 -0600, Haitao Huang wrote: > On Tue, 14 Feb 2023 20:38:29 -0600, Huang, Kai <kai.huang@intel.com> wrote: > > > On Fri, 2023-01-27 at 20:55 -0800, Haitao Huang wrote: > > > +/* > > > + * Compare performance with and without madvise call before EACCEPT'ing > > > + * different size of regions. > > > + */ > > > +TEST_F_TIMEOUT(enclave, augment_via_madvise, TIMEOUT_DEFAULT) > > > +{ > > > > > [...] > > > > > + > > > + for (i = 0; i < self->encl.nr_segments; i++) { > > > + struct encl_segment *seg = &self->encl.segment_tbl[i]; > > > + > > > + total_size += seg->size; > > > + } > > > + > > > + for (i = 1; i < 52 && advise_size < max_advise_size; i++) { > > > + addr = mmap((void *)self->encl.encl_base + total_size, advise_size, > > > + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, > > > + self->encl.fd, 0); > > > > I see the problem now. Here 'pgoff' is always 0. I think this is wrong. > > > > Shouldn't you use the actual offset relative to the file as pgoff, which > > is > > > > total_size >> PAGE_SHIFT > > > > ? > > > But that will be inconsistent with current usage. We have been using zero > offset always including these self tests. The offset is also redundant in > this case because it is totally defined by the address for a given enclave > fd. > > From mmap() manpage: void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset); The contents of a file mapping (as opposed to an anonymous mapping; see MAP_ANONYMOUS below), are initialized using length bytes starting at offset offset in the file (or other object) referred to by the file descriptor fd. offset must be a multiple of the page size as returned by sysconf(_SC_PAGE_SIZE). I think, conceptually, "always using 0 as offset to mmap() different offset of enclave file" is wrong. You never encountered any issue is because SGX driver doesn't use vma->vm_pgoff as you mentioned. I am not entire clear about SGX driver's history, so I am not sure SGX drvier, by design, has "relaxed semantics" of the offset parameter of mmap(), for instance, allow it to be any value. But to me I see no reason SGX should have such relaxed semantics. If you follow mmap() semantics, you won't need to manually set vma->vm_pgoff in sgx_mmap() (which is hacky IMHO) and everything just works. Jarkko/Dave, Do you have any comments?
On Wed, Feb 15, 2023 at 08:46:05AM +0000, Huang, Kai wrote: > On Tue, 2023-02-14 at 22:42 -0600, Haitao Huang wrote: > > On Tue, 14 Feb 2023 20:38:29 -0600, Huang, Kai <kai.huang@intel.com> wrote: > > > > > On Fri, 2023-01-27 at 20:55 -0800, Haitao Huang wrote: > > > > +/* > > > > + * Compare performance with and without madvise call before EACCEPT'ing > > > > + * different size of regions. > > > > + */ > > > > +TEST_F_TIMEOUT(enclave, augment_via_madvise, TIMEOUT_DEFAULT) > > > > +{ > > > > > > > [...] > > > > > > > + > > > > + for (i = 0; i < self->encl.nr_segments; i++) { > > > > + struct encl_segment *seg = &self->encl.segment_tbl[i]; > > > > + > > > > + total_size += seg->size; > > > > + } > > > > + > > > > + for (i = 1; i < 52 && advise_size < max_advise_size; i++) { > > > > + addr = mmap((void *)self->encl.encl_base + total_size, advise_size, > > > > + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, > > > > + self->encl.fd, 0); > > > > > > I see the problem now. Here 'pgoff' is always 0. I think this is wrong. > > > > > > Shouldn't you use the actual offset relative to the file as pgoff, which > > > is > > > > > > total_size >> PAGE_SHIFT > > > > > > ? > > > > > But that will be inconsistent with current usage. We have been using zero > > offset always including these self tests. The offset is also redundant in > > this case because it is totally defined by the address for a given enclave > > fd. > > > > > > From mmap() manpage: > > void *mmap(void *addr, size_t length, int prot, int flags, > int fd, off_t offset); > > The contents of a file mapping (as opposed to an anonymous > mapping; see MAP_ANONYMOUS below), are initialized using length > bytes starting at offset offset in the file (or other object) > referred to by the file descriptor fd. offset must be a multiple > of the page size as returned by sysconf(_SC_PAGE_SIZE). > > I think, conceptually, "always using 0 as offset to mmap() different offset of > enclave file" is wrong. You never encountered any issue is because SGX driver > doesn't use vma->vm_pgoff as you mentioned. > > I am not entire clear about SGX driver's history, so I am not sure SGX drvier, > by design, has "relaxed semantics" of the offset parameter of mmap(), for > instance, allow it to be any value. But to me I see no reason SGX should have > such relaxed semantics. > > If you follow mmap() semantics, you won't need to manually set vma->vm_pgoff in > sgx_mmap() (which is hacky IMHO) and everything just works. > > Jarkko/Dave, > > Do you have any comments? I think I gave my feedback in my earlier comments so yeah I agree with aligning anonymous semantics for sure. BR, Jarkko
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index e457f2d35461..e3432e73af69 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -10,6 +10,7 @@ #include <stdint.h> #include <stdlib.h> #include <string.h> +#include <time.h> #include <unistd.h> #include <sys/ioctl.h> #include <sys/mman.h> @@ -1360,6 +1361,172 @@ TEST_F_TIMEOUT(enclave, augment_via_eaccept_long, TIMEOUT_DEFAULT) munmap(addr, DYNAMIC_HEAP_SIZE); } +static int eaccept_range(struct _test_data_enclave *self, void *addr, + unsigned long size, uint64_t flags, + struct __test_metadata *_metadata) +{ + struct encl_op_eaccept eaccept_op; + + self->run.exception_vector = 0; + self->run.exception_error_code = 0; + self->run.exception_addr = 0; + + /* + * Run EACCEPT on every page to trigger the #PF->EAUG->EACCEPT(again + * without a #PF). All should be transparent to userspace. + */ + eaccept_op.flags = flags; + eaccept_op.ret = 0; + eaccept_op.header.type = ENCL_OP_EACCEPT; + eaccept_op.len = size; + eaccept_op.epc_addr = (uint64_t)(addr); + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + ASSERT_EQ(eaccept_op.ret, 0); + ASSERT_EQ(self->run.function, EEXIT); + + return 0; +} + +static int trim_remove_range(struct _test_data_enclave *self, void *addr, + unsigned long size, struct __test_metadata *_metadata) +{ + int ret, errno_save; + struct sgx_enclave_remove_pages remove_ioc; + struct sgx_enclave_modify_types modt_ioc; + unsigned long offset; + unsigned long count; + + if ((uint64_t)addr <= self->encl.encl_base) + return -1; + offset = (uint64_t)addr - self->encl.encl_base; + + memset(&modt_ioc, 0, sizeof(modt_ioc)); + modt_ioc.offset = offset; + modt_ioc.length = size; + modt_ioc.page_type = SGX_PAGE_TYPE_TRIM; + count = 0; + do { + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc); + + errno_save = ret == -1 ? errno : 0; + if (errno_save != EAGAIN) + break; + EXPECT_EQ(modt_ioc.result, 0); + + count += modt_ioc.count; + modt_ioc.offset += modt_ioc.count; + modt_ioc.length -= modt_ioc.count; + modt_ioc.result = 0; + modt_ioc.count = 0; + } while (modt_ioc.length != 0); + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + EXPECT_EQ(modt_ioc.result, 0); + count += modt_ioc.count; + EXPECT_EQ(count, size); + + EXPECT_EQ(eaccept_range(self, addr, size, + SGX_SECINFO_TRIM | SGX_SECINFO_MODIFIED, + _metadata), 0); + + /* Complete page removal. */ + memset(&remove_ioc, 0, sizeof(remove_ioc)); + remove_ioc.offset = offset; + remove_ioc.length = size; + count = 0; + do { + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_REMOVE_PAGES, &remove_ioc); + + errno_save = ret == -1 ? errno : 0; + if (errno_save != EAGAIN) + break; + + count += remove_ioc.count; + remove_ioc.offset += remove_ioc.count; + remove_ioc.length -= remove_ioc.count; + remove_ioc.count = 0; + } while (remove_ioc.length != 0); + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + count += remove_ioc.count; + EXPECT_EQ(count, size); + + return 0; +} + +/* + * Compare performance with and without madvise call before EACCEPT'ing + * different size of regions. + */ +TEST_F_TIMEOUT(enclave, augment_via_madvise, TIMEOUT_DEFAULT) +{ + unsigned long advise_size = PAGE_SIZE; + unsigned long max_advise_size = get_total_epc_mem() * 3UL; + int speed_up_percent; + clock_t start; + double time_used1, time_used2; + size_t total_size = 0; + unsigned long i; + void *addr; + + if (!sgx2_supported()) + SKIP(return, "SGX2 not supported"); + + ASSERT_TRUE(setup_test_encl_dynamic(ENCL_HEAP_SIZE_DEFAULT, + max_advise_size, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + for (i = 0; i < self->encl.nr_segments; i++) { + struct encl_segment *seg = &self->encl.segment_tbl[i]; + + total_size += seg->size; + } + + for (i = 1; i < 52 && advise_size < max_advise_size; i++) { + addr = mmap((void *)self->encl.encl_base + total_size, advise_size, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, + self->encl.fd, 0); + EXPECT_NE(addr, MAP_FAILED); + + start = clock(); + EXPECT_EQ(eaccept_range(self, addr, advise_size, + SGX_SECINFO_R | SGX_SECINFO_W + | SGX_SECINFO_REG + | SGX_SECINFO_PENDING, + _metadata), 0); + time_used1 = (double)clock() - start; + + EXPECT_EQ(trim_remove_range(self, addr, advise_size, _metadata), 0); + + start = clock(); + EXPECT_EQ(madvise(addr, advise_size, MADV_WILLNEED), 0); + EXPECT_EQ(eaccept_range(self, addr, advise_size, + SGX_SECINFO_R | SGX_SECINFO_W + | SGX_SECINFO_REG + | SGX_SECINFO_PENDING, + _metadata), 0); + time_used2 = (double)clock() - start; + + speed_up_percent = (int)((time_used1 - time_used2) / time_used1 * 100); + TH_LOG("madvise speed up for eaug'ing %10ld pages: %d%%", + advise_size / PAGE_SIZE, speed_up_percent); + EXPECT_GE(speed_up_percent, 0); + EXPECT_EQ(trim_remove_range(self, addr, advise_size, _metadata), 0); + munmap(addr, advise_size); + advise_size = (advise_size << 1UL); + } + encl_delete(&self->encl); +} + /* * SGX2 page type modification test in two phases: * Phase 1:
Measure and compare run time for EAUG'ing different number of EPC pages with/without madvise(..., WILLNEED) call. Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com> --- tools/testing/selftests/sgx/main.c | 167 +++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+)