Message ID | 1510238453-7713-1-git-send-email-lukasz.plewa@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Nov 10, 2017 at 1:40 AM, <lukasz.plewa@intel.com> wrote: > From: Łukasz Plewa <lukasz.plewa@intel.com> > > The next version of libpmem will depend on libndctl - as daxctl > depends on libpmem, it will make building packages inconvenient. > This patch replaces libpmem calls with intrinsic functions what > allows to remove a libpmem dependency. > > Signed-off-by: Łukasz Plewa <lukasz.plewa@intel.com> > --- > configure.ac | 15 +++++---------- > daxctl/Makefile.am | 4 ---- > daxctl/io.c | 23 +++++++++++++++++++---- > ndctl.spec.in | 7 ------- > 4 files changed, 24 insertions(+), 25 deletions(-) > > diff --git a/configure.ac b/configure.ac > index 5b10381..33a3e6e 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -32,6 +32,7 @@ AC_PREFIX_DEFAULT([/usr]) > > AC_PROG_SED > AC_PROG_MKDIR_P > +AC_CANONICAL_HOST > > AC_ARG_ENABLE([docs], > AS_HELP_STRING([--disable-docs], > @@ -94,16 +95,10 @@ PKG_CHECK_MODULES([UDEV], [libudev]) > PKG_CHECK_MODULES([UUID], [uuid]) > PKG_CHECK_MODULES([JSON], [json-c]) > > -AC_ARG_WITH([libpmem], > - AS_HELP_STRING([--with-libpmem], > - [Install with libpmem support. @<:@default=no@:>@]), > - [], > - [with_libpmem=no]) > -if test "x$with_libpmem" = "xyes"; then > - PKG_CHECK_MODULES([PMEM], [libpmem]) > - AC_DEFINE(ENABLE_DAXIO, 1, [Enable daxctl io]) > -fi > -AM_CONDITIONAL([ENABLE_DAXIO], [test "x$with_libpmem" = "xyes"]) > +AS_IF([test "x$host_cpu" == "xx86_64"], > + [AC_DEFINE([ENABLE_DAXIO], [1], enable daxctl io command)]) > +AM_CONDITIONAL([ENABLE_DAXIO], [test "x$host_cpu" == "xx86_64"]) > + > > AC_ARG_WITH([bash-completion-dir], > AS_HELP_STRING([--with-bash-completion-dir[=PATH]], > diff --git a/daxctl/Makefile.am b/daxctl/Makefile.am > index 81d405e..8e3fd7e 100644 > --- a/daxctl/Makefile.am > +++ b/daxctl/Makefile.am > @@ -17,7 +17,3 @@ daxctl_LDADD =\ > ../libutil.a \ > $(UUID_LIBS) \ > $(JSON_LIBS) > - > -if ENABLE_DAXIO > -daxctl_LDADD += $(PMEM_LIBS) > -endif > diff --git a/daxctl/io.c b/daxctl/io.c > index 27e7463..a0f9613 100644 > --- a/daxctl/io.c > +++ b/daxctl/io.c > @@ -20,9 +20,9 @@ > #include <sys/mman.h> > #include <fcntl.h> > #include <unistd.h> > +#include <emmintrin.h> > #include <limits.h> > #include <libgen.h> > -#include <libpmem.h> > #include <util/json.h> > #include <util/filter.h> > #include <util/size.h> > @@ -357,6 +357,20 @@ static int clear_badblocks(struct io_dev *dev, uint64_t len) > return 0; > } > > +#define FLUSH_ALIGN 64 > + > +static void clflush(const void *addr, size_t len) > +{ > + /* > + * Loop through cache-line-size (typically 64B) aligned chunks > + * covering the given range. > + */ > + for (uintptr_t uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); > + uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) > + _mm_clflush((char *)uptr); > + _mm_sfence(); > +} > + x86 specific stuff should probably be wrapped in an #ifdef > static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, > uint64_t len, bool zero) > { > @@ -366,12 +380,13 @@ static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, > if (zero && dst_dev->is_dax) { > dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; > memset(dst, 0, len); > - pmem_persist(dst, len); > + clflush(dst, len); > rc = len; > } else if (dst_dev->is_dax && src_dev->is_dax) { > src = (uint8_t *)src_dev->mmap + src_dev->offset; > dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; > - pmem_memcpy_persist(dst, src, len); > + memcpy(dst, src, len); > + clflush(dst, len); I think you would be better off moving this into a seperate memcpy_to_pmem() function. That would give you a nice place for an arch-independent fallback too. > rc = len; > } else if (src_dev->is_dax) { > src = (uint8_t *)src_dev->mmap + src_dev->offset; > @@ -422,7 +437,7 @@ static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, > break; > count += rc; > } while (count != (ssize_t)len); > - pmem_persist(dst, count); > + clflush(dst, count); > rc = count; > if (rc != (ssize_t)len) > printf("Requested size %lu larger than destination.\n", len); > diff --git a/ndctl.spec.in b/ndctl.spec.in > index 4aa133d..b481762 100644 > --- a/ndctl.spec.in > +++ b/ndctl.spec.in > @@ -20,9 +20,6 @@ BuildRequires: pkgconfig(libudev) > BuildRequires: pkgconfig(uuid) > BuildRequires: pkgconfig(json-c) > BuildRequires: pkgconfig(bash-completion) > -%ifarch x86_64 > -BuildRequires: pkgconfig(libpmem) > -%endif > > %description > Utility library for managing the "libnvdimm" subsystem. The "libnvdimm" > @@ -93,11 +90,7 @@ control API for these devices. > %build > echo "VERSION" > version > ./autogen.sh > -%ifarch x86_64 > -%configure --disable-static --enable-local --disable-silent-rules --with-libpmem > -%else > %configure --disable-static --enable-local --disable-silent-rules > -%endif > make %{?_smp_mflags} > > %install > -- > 2.7.4 > > _______________________________________________ > Linux-nvdimm mailing list > Linux-nvdimm@lists.01.org > https://lists.01.org/mailman/listinfo/linux-nvdimm
On Thu, Nov 9, 2017 at 6:40 AM, <lukasz.plewa@intel.com> wrote: > > From: Łukasz Plewa <lukasz.plewa@intel.com> > > The next version of libpmem will depend on libndctl - as daxctl > depends on libpmem, it will make building packages inconvenient. > This patch replaces libpmem calls with intrinsic functions what > allows to remove a libpmem dependency. > > Signed-off-by: Łukasz Plewa <lukasz.plewa@intel.com> [..] > @@ -357,6 +357,20 @@ static int clear_badblocks(struct io_dev *dev, uint64_t len) > return 0; > } > > +#define FLUSH_ALIGN 64 > + > +static void clflush(const void *addr, size_t len) > +{ > + /* > + * Loop through cache-line-size (typically 64B) aligned chunks > + * covering the given range. > + */ > + for (uintptr_t uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); > + uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) > + _mm_clflush((char *)uptr); > + _mm_sfence(); The clflush instruction is serializing so we don't need the sfence. Can we optionally use clwb here to avoid the cache invalidation? Also does the clwb intrinsic handle falling back to clflushopt and clflush when the cpu does not have clwb support?
On Thu, Nov 9, 2017 at 8:05 PM, Oliver <oohall@gmail.com> wrote: > On Fri, Nov 10, 2017 at 1:40 AM, <lukasz.plewa@intel.com> wrote: >> From: Łukasz Plewa <lukasz.plewa@intel.com> >> >> The next version of libpmem will depend on libndctl - as daxctl >> depends on libpmem, it will make building packages inconvenient. >> This patch replaces libpmem calls with intrinsic functions what >> allows to remove a libpmem dependency. >> >> Signed-off-by: Łukasz Plewa <lukasz.plewa@intel.com> >> --- >> configure.ac | 15 +++++---------- >> daxctl/Makefile.am | 4 ---- >> daxctl/io.c | 23 +++++++++++++++++++---- >> ndctl.spec.in | 7 ------- >> 4 files changed, 24 insertions(+), 25 deletions(-) >> >> diff --git a/configure.ac b/configure.ac >> index 5b10381..33a3e6e 100644 >> --- a/configure.ac >> +++ b/configure.ac >> @@ -32,6 +32,7 @@ AC_PREFIX_DEFAULT([/usr]) >> >> AC_PROG_SED >> AC_PROG_MKDIR_P >> +AC_CANONICAL_HOST >> >> AC_ARG_ENABLE([docs], >> AS_HELP_STRING([--disable-docs], >> @@ -94,16 +95,10 @@ PKG_CHECK_MODULES([UDEV], [libudev]) >> PKG_CHECK_MODULES([UUID], [uuid]) >> PKG_CHECK_MODULES([JSON], [json-c]) >> >> -AC_ARG_WITH([libpmem], >> - AS_HELP_STRING([--with-libpmem], >> - [Install with libpmem support. @<:@default=no@:>@]), >> - [], >> - [with_libpmem=no]) >> -if test "x$with_libpmem" = "xyes"; then >> - PKG_CHECK_MODULES([PMEM], [libpmem]) >> - AC_DEFINE(ENABLE_DAXIO, 1, [Enable daxctl io]) >> -fi >> -AM_CONDITIONAL([ENABLE_DAXIO], [test "x$with_libpmem" = "xyes"]) >> +AS_IF([test "x$host_cpu" == "xx86_64"], >> + [AC_DEFINE([ENABLE_DAXIO], [1], enable daxctl io command)]) >> +AM_CONDITIONAL([ENABLE_DAXIO], [test "x$host_cpu" == "xx86_64"]) >> + >> >> AC_ARG_WITH([bash-completion-dir], >> AS_HELP_STRING([--with-bash-completion-dir[=PATH]], >> diff --git a/daxctl/Makefile.am b/daxctl/Makefile.am >> index 81d405e..8e3fd7e 100644 >> --- a/daxctl/Makefile.am >> +++ b/daxctl/Makefile.am >> @@ -17,7 +17,3 @@ daxctl_LDADD =\ >> ../libutil.a \ >> $(UUID_LIBS) \ >> $(JSON_LIBS) >> - >> -if ENABLE_DAXIO >> -daxctl_LDADD += $(PMEM_LIBS) >> -endif >> diff --git a/daxctl/io.c b/daxctl/io.c >> index 27e7463..a0f9613 100644 >> --- a/daxctl/io.c >> +++ b/daxctl/io.c >> @@ -20,9 +20,9 @@ >> #include <sys/mman.h> >> #include <fcntl.h> >> #include <unistd.h> >> +#include <emmintrin.h> >> #include <limits.h> >> #include <libgen.h> >> -#include <libpmem.h> >> #include <util/json.h> >> #include <util/filter.h> >> #include <util/size.h> >> @@ -357,6 +357,20 @@ static int clear_badblocks(struct io_dev *dev, uint64_t len) >> return 0; >> } >> > >> +#define FLUSH_ALIGN 64 >> + >> +static void clflush(const void *addr, size_t len) >> +{ >> + /* >> + * Loop through cache-line-size (typically 64B) aligned chunks >> + * covering the given range. >> + */ >> + for (uintptr_t uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); >> + uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) >> + _mm_clflush((char *)uptr); >> + _mm_sfence(); >> +} >> + > > x86 specific stuff should probably be wrapped in an #ifdef > >> static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, >> uint64_t len, bool zero) >> { >> @@ -366,12 +380,13 @@ static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, >> if (zero && dst_dev->is_dax) { >> dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; >> memset(dst, 0, len); >> - pmem_persist(dst, len); >> + clflush(dst, len); >> rc = len; >> } else if (dst_dev->is_dax && src_dev->is_dax) { >> src = (uint8_t *)src_dev->mmap + src_dev->offset; >> dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; >> - pmem_memcpy_persist(dst, src, len); > >> + memcpy(dst, src, len); >> + clflush(dst, len); > > I think you would be better off moving this into a seperate > memcpy_to_pmem() function. That would give you a nice place for > an arch-independent fallback too. I agree, but we might save that refactoring for when another architecture shows up to implement that support.
Oliver <oohall@gmail.com> writes: > On Fri, Nov 10, 2017 at 1:40 AM, <lukasz.plewa@intel.com> wrote: >> From: Łukasz Plewa <lukasz.plewa@intel.com> >> >> The next version of libpmem will depend on libndctl - as daxctl >> depends on libpmem, it will make building packages inconvenient. >> This patch replaces libpmem calls with intrinsic functions what >> allows to remove a libpmem dependency. >> >> Signed-off-by: Łukasz Plewa <lukasz.plewa@intel.com> >> --- >> configure.ac | 15 +++++---------- >> daxctl/Makefile.am | 4 ---- >> daxctl/io.c | 23 +++++++++++++++++++---- >> ndctl.spec.in | 7 ------- >> 4 files changed, 24 insertions(+), 25 deletions(-) >> >> diff --git a/configure.ac b/configure.ac >> index 5b10381..33a3e6e 100644 >> --- a/configure.ac >> +++ b/configure.ac >> @@ -32,6 +32,7 @@ AC_PREFIX_DEFAULT([/usr]) >> >> AC_PROG_SED >> AC_PROG_MKDIR_P >> +AC_CANONICAL_HOST >> >> AC_ARG_ENABLE([docs], >> AS_HELP_STRING([--disable-docs], >> @@ -94,16 +95,10 @@ PKG_CHECK_MODULES([UDEV], [libudev]) >> PKG_CHECK_MODULES([UUID], [uuid]) >> PKG_CHECK_MODULES([JSON], [json-c]) >> >> -AC_ARG_WITH([libpmem], >> - AS_HELP_STRING([--with-libpmem], >> - [Install with libpmem support. @<:@default=no@:>@]), >> - [], >> - [with_libpmem=no]) >> -if test "x$with_libpmem" = "xyes"; then >> - PKG_CHECK_MODULES([PMEM], [libpmem]) >> - AC_DEFINE(ENABLE_DAXIO, 1, [Enable daxctl io]) >> -fi >> -AM_CONDITIONAL([ENABLE_DAXIO], [test "x$with_libpmem" = "xyes"]) >> +AS_IF([test "x$host_cpu" == "xx86_64"], >> + [AC_DEFINE([ENABLE_DAXIO], [1], enable daxctl io command)]) >> +AM_CONDITIONAL([ENABLE_DAXIO], [test "x$host_cpu" == "xx86_64"]) >> + >> >> AC_ARG_WITH([bash-completion-dir], >> AS_HELP_STRING([--with-bash-completion-dir[=PATH]], >> diff --git a/daxctl/Makefile.am b/daxctl/Makefile.am >> index 81d405e..8e3fd7e 100644 >> --- a/daxctl/Makefile.am >> +++ b/daxctl/Makefile.am >> @@ -17,7 +17,3 @@ daxctl_LDADD =\ >> ../libutil.a \ >> $(UUID_LIBS) \ >> $(JSON_LIBS) >> - >> -if ENABLE_DAXIO >> -daxctl_LDADD += $(PMEM_LIBS) >> -endif >> diff --git a/daxctl/io.c b/daxctl/io.c >> index 27e7463..a0f9613 100644 >> --- a/daxctl/io.c >> +++ b/daxctl/io.c >> @@ -20,9 +20,9 @@ >> #include <sys/mman.h> >> #include <fcntl.h> >> #include <unistd.h> >> +#include <emmintrin.h> >> #include <limits.h> >> #include <libgen.h> >> -#include <libpmem.h> >> #include <util/json.h> >> #include <util/filter.h> >> #include <util/size.h> >> @@ -357,6 +357,20 @@ static int clear_badblocks(struct io_dev *dev, uint64_t len) >> return 0; >> } >> > >> +#define FLUSH_ALIGN 64 >> + >> +static void clflush(const void *addr, size_t len) >> +{ >> + /* >> + * Loop through cache-line-size (typically 64B) aligned chunks >> + * covering the given range. >> + */ >> + for (uintptr_t uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); >> + uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) >> + _mm_clflush((char *)uptr); >> + _mm_sfence(); >> +} >> + > > x86 specific stuff should probably be wrapped in an #ifdef FLUSH_ALIGN is x86 specific. There's actually a sysctl to tell you the L1 dcache size, so I'd suggest using that. Calling it clflush doesn't bug me. I don't see anything else x86 specific there. If you start using the sysctl, you can get rid of the newly imposed x86 restriction, too. >> static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, >> uint64_t len, bool zero) >> { >> @@ -366,12 +380,13 @@ static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, >> if (zero && dst_dev->is_dax) { >> dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; >> memset(dst, 0, len); >> - pmem_persist(dst, len); >> + clflush(dst, len); >> rc = len; >> } else if (dst_dev->is_dax && src_dev->is_dax) { >> src = (uint8_t *)src_dev->mmap + src_dev->offset; >> dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; >> - pmem_memcpy_persist(dst, src, len); > >> + memcpy(dst, src, len); >> + clflush(dst, len); > > I think you would be better off moving this into a seperate > memcpy_to_pmem() function. That would give you a nice place for > an arch-independent fallback too. What's more, as I see this is basically lifting what's in libpmem, I'd like to point out that pmem_memcpy_persist will use MOVNT where available. This lifted version doesn't have that optimization. I don't know, is it impossible to make this work w/o yanking out -lpmem? Jeff
On Fri, Nov 10, 2017 at 7:31 AM, Jeff Moyer <jmoyer@redhat.com> wrote: > Oliver <oohall@gmail.com> writes: > >> On Fri, Nov 10, 2017 at 1:40 AM, <lukasz.plewa@intel.com> wrote: >>> From: Łukasz Plewa <lukasz.plewa@intel.com> >>> >>> The next version of libpmem will depend on libndctl - as daxctl >>> depends on libpmem, it will make building packages inconvenient. >>> This patch replaces libpmem calls with intrinsic functions what >>> allows to remove a libpmem dependency. >>> >>> Signed-off-by: Łukasz Plewa <lukasz.plewa@intel.com> >>> --- >>> configure.ac | 15 +++++---------- >>> daxctl/Makefile.am | 4 ---- >>> daxctl/io.c | 23 +++++++++++++++++++---- >>> ndctl.spec.in | 7 ------- >>> 4 files changed, 24 insertions(+), 25 deletions(-) >>> >>> diff --git a/configure.ac b/configure.ac >>> index 5b10381..33a3e6e 100644 >>> --- a/configure.ac >>> +++ b/configure.ac >>> @@ -32,6 +32,7 @@ AC_PREFIX_DEFAULT([/usr]) >>> >>> AC_PROG_SED >>> AC_PROG_MKDIR_P >>> +AC_CANONICAL_HOST >>> >>> AC_ARG_ENABLE([docs], >>> AS_HELP_STRING([--disable-docs], >>> @@ -94,16 +95,10 @@ PKG_CHECK_MODULES([UDEV], [libudev]) >>> PKG_CHECK_MODULES([UUID], [uuid]) >>> PKG_CHECK_MODULES([JSON], [json-c]) >>> >>> -AC_ARG_WITH([libpmem], >>> - AS_HELP_STRING([--with-libpmem], >>> - [Install with libpmem support. @<:@default=no@:>@]), >>> - [], >>> - [with_libpmem=no]) >>> -if test "x$with_libpmem" = "xyes"; then >>> - PKG_CHECK_MODULES([PMEM], [libpmem]) >>> - AC_DEFINE(ENABLE_DAXIO, 1, [Enable daxctl io]) >>> -fi >>> -AM_CONDITIONAL([ENABLE_DAXIO], [test "x$with_libpmem" = "xyes"]) >>> +AS_IF([test "x$host_cpu" == "xx86_64"], >>> + [AC_DEFINE([ENABLE_DAXIO], [1], enable daxctl io command)]) >>> +AM_CONDITIONAL([ENABLE_DAXIO], [test "x$host_cpu" == "xx86_64"]) >>> + >>> >>> AC_ARG_WITH([bash-completion-dir], >>> AS_HELP_STRING([--with-bash-completion-dir[=PATH]], >>> diff --git a/daxctl/Makefile.am b/daxctl/Makefile.am >>> index 81d405e..8e3fd7e 100644 >>> --- a/daxctl/Makefile.am >>> +++ b/daxctl/Makefile.am >>> @@ -17,7 +17,3 @@ daxctl_LDADD =\ >>> ../libutil.a \ >>> $(UUID_LIBS) \ >>> $(JSON_LIBS) >>> - >>> -if ENABLE_DAXIO >>> -daxctl_LDADD += $(PMEM_LIBS) >>> -endif >>> diff --git a/daxctl/io.c b/daxctl/io.c >>> index 27e7463..a0f9613 100644 >>> --- a/daxctl/io.c >>> +++ b/daxctl/io.c >>> @@ -20,9 +20,9 @@ >>> #include <sys/mman.h> >>> #include <fcntl.h> >>> #include <unistd.h> >>> +#include <emmintrin.h> >>> #include <limits.h> >>> #include <libgen.h> >>> -#include <libpmem.h> >>> #include <util/json.h> >>> #include <util/filter.h> >>> #include <util/size.h> >>> @@ -357,6 +357,20 @@ static int clear_badblocks(struct io_dev *dev, uint64_t len) >>> return 0; >>> } >>> >> >>> +#define FLUSH_ALIGN 64 >>> + >>> +static void clflush(const void *addr, size_t len) >>> +{ >>> + /* >>> + * Loop through cache-line-size (typically 64B) aligned chunks >>> + * covering the given range. >>> + */ >>> + for (uintptr_t uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); >>> + uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) >>> + _mm_clflush((char *)uptr); >>> + _mm_sfence(); >>> +} >>> + >> >> x86 specific stuff should probably be wrapped in an #ifdef > > FLUSH_ALIGN is x86 specific. There's actually a sysctl to tell you the > L1 dcache size, so I'd suggest using that. Calling it clflush doesn't > bug me. I don't see anything else x86 specific there. > > If you start using the sysctl, you can get rid of the newly imposed x86 > restriction, too. > >>> static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, >>> uint64_t len, bool zero) >>> { >>> @@ -366,12 +380,13 @@ static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, >>> if (zero && dst_dev->is_dax) { >>> dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; >>> memset(dst, 0, len); >>> - pmem_persist(dst, len); >>> + clflush(dst, len); >>> rc = len; >>> } else if (dst_dev->is_dax && src_dev->is_dax) { >>> src = (uint8_t *)src_dev->mmap + src_dev->offset; >>> dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; >>> - pmem_memcpy_persist(dst, src, len); >> >>> + memcpy(dst, src, len); >>> + clflush(dst, len); >> >> I think you would be better off moving this into a seperate >> memcpy_to_pmem() function. That would give you a nice place for >> an arch-independent fallback too. > > What's more, as I see this is basically lifting what's in libpmem, I'd > like to point out that pmem_memcpy_persist will use MOVNT where > available. This lifted version doesn't have that optimization. > > I don't know, is it impossible to make this work w/o yanking out -lpmem? My preference would be that allthe pmem_memcpy* routines move to libndctl since it is the lower level library. Arrange for nvml to depend on libdnctl for this in addition to the other libndctl dependencies that seem to be in the works.
Dan Williams <dan.j.williams@intel.com> writes: >> I don't know, is it impossible to make this work w/o yanking out -lpmem? > > My preference would be that allthe pmem_memcpy* routines move to > libndctl since it is the lower level library. Arrange for nvml to > depend on libdnctl for this in addition to the other libndctl > dependencies that seem to be in the works. That's fine by me. Can you convince the nvml developers to do that? Cheers, Jeff
On Fri, Nov 10, 2017 at 8:42 AM, Jeff Moyer <jmoyer@redhat.com> wrote: > Dan Williams <dan.j.williams@intel.com> writes: > >>> I don't know, is it impossible to make this work w/o yanking out -lpmem? >> >> My preference would be that allthe pmem_memcpy* routines move to >> libndctl since it is the lower level library. Arrange for nvml to >> depend on libdnctl for this in addition to the other libndctl >> dependencies that seem to be in the works. > > That's fine by me. Can you convince the nvml developers to do that? > The other concern is that libpmem supports other OSes, but libndctl is just Linux. That seems to suggest we need the memcpy routines to be their own standalone library that both ndctl and libpmem can import.
> On Fri, Nov 10, 2017 at 8:42 AM, Jeff Moyer <jmoyer@redhat.com> wrote: > > Dan Williams <dan.j.williams@intel.com> writes: > > > >>> I don't know, is it impossible to make this work w/o yanking out - > lpmem? > >> > >> My preference would be that allthe pmem_memcpy* routines move to > >> libndctl since it is the lower level library. Arrange for nvml to > >> depend on libdnctl for this in addition to the other libndctl > >> dependencies that seem to be in the works. > > > > That's fine by me. Can you convince the nvml developers to do that? > > > > The other concern is that libpmem supports other OSes, but libndctl is > just Linux. That seems to suggest we need the memcpy routines to be > their own standalone library that both ndctl and libpmem can import. Folks, I understand your concerns, but to be honest, I don't like the idea of moving pmem_memcpy* routines to libndctl or yet another library. The libpmem *is* the library. In practice, if we move pmem_memcpy*/pmem_memset* to ndctl, it means that almost the _entire_ libpmem code would need to be moved to ndctl (maybe without the APIs like pmem_map_file/pmem_unmap). So, looks like it's all about moving libpmem from nvml repo to ndctl repo, isn't it? It leads us to another question - perhaps we should simply merge those two projects into one? It would solve all the problems with cross-dependencies between ndctl and nvml, once and for all. However, besides some technical issues, one of the problems is the license - GPL vs. BSD - and I'm not sure how to deal with it. For me, there are a few options: 1) Integrate the patch proposed by Lukasz (maybe slightly modified). This looks like the easiest and simplest, thus preferred solution. Of course, we can detect the cache line size and the availability of CLFLUSHOPT/CLWB, if necessary. Correct me if I'm wrong, but I don't expect "daxctl io" to be used frequently, so even if it's not optimal (does not use MOVNT) and we need to flush the cache, the performance shouldn't be a big issue. BTW, this code is only compiled for x86, so maybe we don't even need to care about other architectures? 2) Clone the pmem_memcpy/pmem_memset/pmem_persist routines into ndctl (for internal use only). The libpmem would still provide those interfaces. It duplicates the code (is it a big deal?), but eliminates dependency on libpmem. 3) Remove "daxctl io" command from ndctl and integrate it with NVML's "pmempool" utility (or a dedicated tool/subpkg). There's not much we need to do, as in practice we already have a similar tool in the nvml repo (src/test/tools/ddmap). So far, it's only used for testing, but could become a standalone "ddmap" or "daxio" utility. Keep in mind that most of the problems with library dependencies are because of Device DAX support. Device DAX was intended to be a temporary workaround until the Linux kernel/filesystems provide full support for PM programming model, but in practice it cluttered NVML with DevDAX-specific code, triggered some deep modifications of NVML internals, and now you're proposing yet another significant change. Once the MAP_SYNC flag is available, Device DAX becomes less important, I believe, and might get eliminated eventually. Shall we change the architecture of NVML or how the code is organized just because of it? I opt for the simplest solution, as proposed by Lukasz. Regards, Krzysztof
On Thu, Nov 23, 2017 at 9:23 AM, Czurylo, Krzysztof <krzysztof.czurylo@intel.com> wrote: >> On Fri, Nov 10, 2017 at 8:42 AM, Jeff Moyer <jmoyer@redhat.com> wrote: >> > Dan Williams <dan.j.williams@intel.com> writes: >> > >> >>> I don't know, is it impossible to make this work w/o yanking out - >> lpmem? >> >> >> >> My preference would be that allthe pmem_memcpy* routines move to >> >> libndctl since it is the lower level library. Arrange for nvml to >> >> depend on libdnctl for this in addition to the other libndctl >> >> dependencies that seem to be in the works. >> > >> > That's fine by me. Can you convince the nvml developers to do that? >> > >> >> The other concern is that libpmem supports other OSes, but libndctl is >> just Linux. That seems to suggest we need the memcpy routines to be >> their own standalone library that both ndctl and libpmem can import. > > Folks, > > I understand your concerns, but to be honest, I don't like the idea of > moving pmem_memcpy* routines to libndctl or yet another library. > The libpmem *is* the library. > In practice, if we move pmem_memcpy*/pmem_memset* to ndctl, it means > that almost the _entire_ libpmem code would need to be moved to ndctl > (maybe without the APIs like pmem_map_file/pmem_unmap). So, looks like > it's all about moving libpmem from nvml repo to ndctl repo, isn't it? > It leads us to another question - perhaps we should simply merge those > two projects into one? It would solve all the problems with > cross-dependencies between ndctl and nvml, once and for all. > However, besides some technical issues, one of the problems is > the license - GPL vs. BSD - and I'm not sure how to deal with it. > > For me, there are a few options: > 1) Integrate the patch proposed by Lukasz (maybe slightly modified). > This looks like the easiest and simplest, thus preferred solution. > Of course, we can detect the cache line size and the availability > of CLFLUSHOPT/CLWB, if necessary. > Correct me if I'm wrong, but I don't expect "daxctl io" to be used > frequently, so even if it's not optimal (does not use MOVNT) and we > need to flush the cache, the performance shouldn't be a big issue. > BTW, this code is only compiled for x86, so maybe we don't even need > to care about other architectures? > > 2) Clone the pmem_memcpy/pmem_memset/pmem_persist routines into ndctl > (for internal use only). The libpmem would still provide those > interfaces. > It duplicates the code (is it a big deal?), but eliminates dependency > on libpmem. > > 3) Remove "daxctl io" command from ndctl and integrate it with > NVML's "pmempool" utility (or a dedicated tool/subpkg). > There's not much we need to do, as in practice we already have a similar > tool in the nvml repo (src/test/tools/ddmap). So far, it's only used > for testing, but could become a standalone "ddmap" or "daxio" utility. I'd like to investigate this option. We need a utility to manage (provision, re-provision, backup) device-dax capacity, and I don't much care if it comes from the daxctl package, or elsewhere. > Keep in mind that most of the problems with library dependencies are > because of Device DAX support. Device DAX was intended to be a temporary > workaround until the Linux kernel/filesystems provide full support for > PM programming model, but in practice it cluttered NVML with > DevDAX-specific code, triggered some deep modifications of NVML internals, > and now you're proposing yet another significant change. > Once the MAP_SYNC flag is available, Device DAX becomes less important, > I believe, and might get eliminated eventually. Shall we change the > architecture of NVML or how the code is organized just because of it? I disagree with this assessment. We continue to find uses for device-dax as a hugetlbfs alternative, and it remains the only interface that is safe for RDMA and virtualization with dax. MAP_SYNC is not a device-dax replacement. > I opt for the simplest solution, as proposed by Lukasz. Let's take a look at shipping 'daxctl io' alongside pmempool. I do not want multiple pmem_memcpy() implementations to start proliferating around various userspace packages.
> -----Original Message----- > From: Dan Williams [mailto:dan.j.williams@intel.com] > Sent: Thursday, November 23, 2017 19:09 > To: Czurylo, Krzysztof > Cc: Jeff Moyer; Plewa, Lukasz; linux-nvdimm@lists.01.org > Subject: Re: [PATCH] daxctl: remove libpmem dependency > (...) > > 3) Remove "daxctl io" command from ndctl and integrate it with NVML's > > "pmempool" utility (or a dedicated tool/subpkg). > > There's not much we need to do, as in practice we already have a > > similar tool in the nvml repo (src/test/tools/ddmap). So far, it's > > only used for testing, but could become a standalone "ddmap" or > "daxio" utility. > > I'd like to investigate this option. We need a utility to manage > (provision, re-provision, backup) device-dax capacity, and I don't much > care if it comes from the daxctl package, or elsewhere. > (...) > Let's take a look at shipping 'daxctl io' alongside pmempool. I do not > want multiple pmem_memcpy() implementations to start proliferating > around various userspace packages. If we choose this option, do we need to implement "daxctl list" command? Since "list" is also supported by ndctl (though the output is slightly different), I'd prefer to only implement "daxctl io" in nvml. Anything else this tool should be able to do besides the "io"? Are there any plans for other features/commands? -- K.
On Mon, Nov 27, 2017 at 9:33 AM, Czurylo, Krzysztof <krzysztof.czurylo@intel.com> wrote: > > >> -----Original Message----- >> From: Dan Williams [mailto:dan.j.williams@intel.com] >> Sent: Thursday, November 23, 2017 19:09 >> To: Czurylo, Krzysztof >> Cc: Jeff Moyer; Plewa, Lukasz; linux-nvdimm@lists.01.org >> Subject: Re: [PATCH] daxctl: remove libpmem dependency >> (...) >> > 3) Remove "daxctl io" command from ndctl and integrate it with NVML's >> > "pmempool" utility (or a dedicated tool/subpkg). >> > There's not much we need to do, as in practice we already have a >> > similar tool in the nvml repo (src/test/tools/ddmap). So far, it's >> > only used for testing, but could become a standalone "ddmap" or >> "daxio" utility. >> >> I'd like to investigate this option. We need a utility to manage >> (provision, re-provision, backup) device-dax capacity, and I don't much >> care if it comes from the daxctl package, or elsewhere. >> (...) >> Let's take a look at shipping 'daxctl io' alongside pmempool. I do not >> want multiple pmem_memcpy() implementations to start proliferating >> around various userspace packages. > > If we choose this option, do we need to implement "daxctl list" command? > Since "list" is also supported by ndctl (though the output is slightly > different), I'd prefer to only implement "daxctl io" in nvml. I'm fine with just implementing the "io" the command as a singular command when this moves over, maybe just call it 'daxio'? > Anything else this tool should be able to do besides the "io"? > Are there any plans for other features/commands? Yes, daxctl will continue to be enhanced for the non-nvdimm/pmem use cases for device-dax.
Dan Williams <dan.j.williams@intel.com> writes: > On Mon, Nov 27, 2017 at 9:33 AM, Czurylo, Krzysztof > <krzysztof.czurylo@intel.com> wrote: >> If we choose this option, do we need to implement "daxctl list" command? >> Since "list" is also supported by ndctl (though the output is slightly >> different), I'd prefer to only implement "daxctl io" in nvml. > > I'm fine with just implementing the "io" the command as a singular > command when this moves over, maybe just call it 'daxio'? > >> Anything else this tool should be able to do besides the "io"? >> Are there any plans for other features/commands? > > Yes, daxctl will continue to be enhanced for the non-nvdimm/pmem use > cases for device-dax. I think Krzysztof was asking whether dax_io was going to grow any new features. -Jeff
On Mon, Nov 27, 2017 at 10:14 AM, Jeff Moyer <jmoyer@redhat.com> wrote: > Dan Williams <dan.j.williams@intel.com> writes: > >> On Mon, Nov 27, 2017 at 9:33 AM, Czurylo, Krzysztof >> <krzysztof.czurylo@intel.com> wrote: > >>> If we choose this option, do we need to implement "daxctl list" command? >>> Since "list" is also supported by ndctl (though the output is slightly >>> different), I'd prefer to only implement "daxctl io" in nvml. >> >> I'm fine with just implementing the "io" the command as a singular >> command when this moves over, maybe just call it 'daxio'? >> >>> Anything else this tool should be able to do besides the "io"? >>> Are there any plans for other features/commands? >> >> Yes, daxctl will continue to be enhanced for the non-nvdimm/pmem use >> cases for device-dax. > > I think Krzysztof was asking whether dax_io was going to grow any new > features. Nothing more on the horizon that I can see.
diff --git a/configure.ac b/configure.ac index 5b10381..33a3e6e 100644 --- a/configure.ac +++ b/configure.ac @@ -32,6 +32,7 @@ AC_PREFIX_DEFAULT([/usr]) AC_PROG_SED AC_PROG_MKDIR_P +AC_CANONICAL_HOST AC_ARG_ENABLE([docs], AS_HELP_STRING([--disable-docs], @@ -94,16 +95,10 @@ PKG_CHECK_MODULES([UDEV], [libudev]) PKG_CHECK_MODULES([UUID], [uuid]) PKG_CHECK_MODULES([JSON], [json-c]) -AC_ARG_WITH([libpmem], - AS_HELP_STRING([--with-libpmem], - [Install with libpmem support. @<:@default=no@:>@]), - [], - [with_libpmem=no]) -if test "x$with_libpmem" = "xyes"; then - PKG_CHECK_MODULES([PMEM], [libpmem]) - AC_DEFINE(ENABLE_DAXIO, 1, [Enable daxctl io]) -fi -AM_CONDITIONAL([ENABLE_DAXIO], [test "x$with_libpmem" = "xyes"]) +AS_IF([test "x$host_cpu" == "xx86_64"], + [AC_DEFINE([ENABLE_DAXIO], [1], enable daxctl io command)]) +AM_CONDITIONAL([ENABLE_DAXIO], [test "x$host_cpu" == "xx86_64"]) + AC_ARG_WITH([bash-completion-dir], AS_HELP_STRING([--with-bash-completion-dir[=PATH]], diff --git a/daxctl/Makefile.am b/daxctl/Makefile.am index 81d405e..8e3fd7e 100644 --- a/daxctl/Makefile.am +++ b/daxctl/Makefile.am @@ -17,7 +17,3 @@ daxctl_LDADD =\ ../libutil.a \ $(UUID_LIBS) \ $(JSON_LIBS) - -if ENABLE_DAXIO -daxctl_LDADD += $(PMEM_LIBS) -endif diff --git a/daxctl/io.c b/daxctl/io.c index 27e7463..a0f9613 100644 --- a/daxctl/io.c +++ b/daxctl/io.c @@ -20,9 +20,9 @@ #include <sys/mman.h> #include <fcntl.h> #include <unistd.h> +#include <emmintrin.h> #include <limits.h> #include <libgen.h> -#include <libpmem.h> #include <util/json.h> #include <util/filter.h> #include <util/size.h> @@ -357,6 +357,20 @@ static int clear_badblocks(struct io_dev *dev, uint64_t len) return 0; } +#define FLUSH_ALIGN 64 + +static void clflush(const void *addr, size_t len) +{ + /* + * Loop through cache-line-size (typically 64B) aligned chunks + * covering the given range. + */ + for (uintptr_t uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); + uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) + _mm_clflush((char *)uptr); + _mm_sfence(); +} + static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, uint64_t len, bool zero) { @@ -366,12 +380,13 @@ static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, if (zero && dst_dev->is_dax) { dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; memset(dst, 0, len); - pmem_persist(dst, len); + clflush(dst, len); rc = len; } else if (dst_dev->is_dax && src_dev->is_dax) { src = (uint8_t *)src_dev->mmap + src_dev->offset; dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; - pmem_memcpy_persist(dst, src, len); + memcpy(dst, src, len); + clflush(dst, len); rc = len; } else if (src_dev->is_dax) { src = (uint8_t *)src_dev->mmap + src_dev->offset; @@ -422,7 +437,7 @@ static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, break; count += rc; } while (count != (ssize_t)len); - pmem_persist(dst, count); + clflush(dst, count); rc = count; if (rc != (ssize_t)len) printf("Requested size %lu larger than destination.\n", len); diff --git a/ndctl.spec.in b/ndctl.spec.in index 4aa133d..b481762 100644 --- a/ndctl.spec.in +++ b/ndctl.spec.in @@ -20,9 +20,6 @@ BuildRequires: pkgconfig(libudev) BuildRequires: pkgconfig(uuid) BuildRequires: pkgconfig(json-c) BuildRequires: pkgconfig(bash-completion) -%ifarch x86_64 -BuildRequires: pkgconfig(libpmem) -%endif %description Utility library for managing the "libnvdimm" subsystem. The "libnvdimm" @@ -93,11 +90,7 @@ control API for these devices. %build echo "VERSION" > version ./autogen.sh -%ifarch x86_64 -%configure --disable-static --enable-local --disable-silent-rules --with-libpmem -%else %configure --disable-static --enable-local --disable-silent-rules -%endif make %{?_smp_mflags} %install