daxctl: remove libpmem dependency

Message ID	1510238453-7713-1-git-send-email-lukasz.plewa@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-nvdimm-bounces@lists.01.org> Received-SPF: Pass (sender SPF authorized) identity=mailfrom; client-ip=192.55.52.43; helo=mga05.intel.com; envelope-from=lukasz.plewa@intel.com; receiver=linux-nvdimm@lists.01.org From: lukasz.plewa@intel.com To: linux-nvdimm@lists.01.org Subject: [PATCH] daxctl: remove libpmem dependency Date: Thu, 9 Nov 2017 15:40:53 +0100 Message-Id: <1510238453-7713-1-git-send-email-lukasz.plewa@intel.com> MIME-Version: 1.0 Precedence: list Cc: =?UTF-8?q?=C5=81ukasz=20Plewa?= <lukasz.plewa@intel.com> Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: linux-nvdimm-bounces@lists.01.org Sender: "Linux-nvdimm" <linux-nvdimm-bounces@lists.01.org>

Lukasz Plewa Nov. 9, 2017, 2:40 p.m. UTC

From: Łukasz Plewa <lukasz.plewa@intel.com>

The next version of libpmem will depend on libndctl - as daxctl
depends on libpmem, it will make building packages inconvenient.
This patch replaces libpmem calls with intrinsic functions what
allows to remove a libpmem dependency.

Signed-off-by: Łukasz Plewa <lukasz.plewa@intel.com>
---
 configure.ac       | 15 +++++----------
 daxctl/Makefile.am |  4 ----
 daxctl/io.c        | 23 +++++++++++++++++++----
 ndctl.spec.in      |  7 -------
 4 files changed, 24 insertions(+), 25 deletions(-)

Oliver O'Halloran Nov. 10, 2017, 4:05 a.m. UTC | #1

On Fri, Nov 10, 2017 at 1:40 AM,  <lukasz.plewa@intel.com> wrote:
> From: Łukasz Plewa <lukasz.plewa@intel.com>
>
> The next version of libpmem will depend on libndctl - as daxctl
> depends on libpmem, it will make building packages inconvenient.
> This patch replaces libpmem calls with intrinsic functions what
> allows to remove a libpmem dependency.
>
> Signed-off-by: Łukasz Plewa <lukasz.plewa@intel.com>
> ---
>  configure.ac       | 15 +++++----------
>  daxctl/Makefile.am |  4 ----
>  daxctl/io.c        | 23 +++++++++++++++++++----
>  ndctl.spec.in      |  7 -------
>  4 files changed, 24 insertions(+), 25 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 5b10381..33a3e6e 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -32,6 +32,7 @@ AC_PREFIX_DEFAULT([/usr])
>
>  AC_PROG_SED
>  AC_PROG_MKDIR_P
> +AC_CANONICAL_HOST
>
>  AC_ARG_ENABLE([docs],
>          AS_HELP_STRING([--disable-docs],
> @@ -94,16 +95,10 @@ PKG_CHECK_MODULES([UDEV], [libudev])
>  PKG_CHECK_MODULES([UUID], [uuid])
>  PKG_CHECK_MODULES([JSON], [json-c])
>
> -AC_ARG_WITH([libpmem],
> -       AS_HELP_STRING([--with-libpmem],
> -                      [Install with libpmem support. @<:@default=no@:>@]),
> -       [],
> -       [with_libpmem=no])
> -if test "x$with_libpmem" = "xyes"; then
> -       PKG_CHECK_MODULES([PMEM], [libpmem])
> -       AC_DEFINE(ENABLE_DAXIO, 1, [Enable daxctl io])
> -fi
> -AM_CONDITIONAL([ENABLE_DAXIO], [test "x$with_libpmem" = "xyes"])
> +AS_IF([test "x$host_cpu" == "xx86_64"],
> +       [AC_DEFINE([ENABLE_DAXIO], [1], enable daxctl io command)])
> +AM_CONDITIONAL([ENABLE_DAXIO], [test "x$host_cpu" == "xx86_64"])
> +
>
>  AC_ARG_WITH([bash-completion-dir],
>         AS_HELP_STRING([--with-bash-completion-dir[=PATH]],
> diff --git a/daxctl/Makefile.am b/daxctl/Makefile.am
> index 81d405e..8e3fd7e 100644
> --- a/daxctl/Makefile.am
> +++ b/daxctl/Makefile.am
> @@ -17,7 +17,3 @@ daxctl_LDADD =\
>         ../libutil.a \
>         $(UUID_LIBS) \
>         $(JSON_LIBS)
> -
> -if ENABLE_DAXIO
> -daxctl_LDADD += $(PMEM_LIBS)
> -endif
> diff --git a/daxctl/io.c b/daxctl/io.c
> index 27e7463..a0f9613 100644
> --- a/daxctl/io.c
> +++ b/daxctl/io.c
> @@ -20,9 +20,9 @@
>  #include <sys/mman.h>
>  #include <fcntl.h>
>  #include <unistd.h>
> +#include <emmintrin.h>
>  #include <limits.h>
>  #include <libgen.h>
> -#include <libpmem.h>
>  #include <util/json.h>
>  #include <util/filter.h>
>  #include <util/size.h>
> @@ -357,6 +357,20 @@ static int clear_badblocks(struct io_dev *dev, uint64_t len)
>         return 0;
>  }
>

> +#define FLUSH_ALIGN 64
> +
> +static void clflush(const void *addr, size_t len)
> +{
> +       /*
> +        * Loop through cache-line-size (typically 64B) aligned chunks
> +        * covering the given range.
> +        */
> +       for (uintptr_t uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1);
> +            uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN)
> +               _mm_clflush((char *)uptr);
> +       _mm_sfence();
> +}
> +

x86 specific stuff should probably be wrapped in an #ifdef

>  static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev,
>                 uint64_t len, bool zero)
>  {
> @@ -366,12 +380,13 @@ static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev,
>         if (zero && dst_dev->is_dax) {
>                 dst = (uint8_t *)dst_dev->mmap + dst_dev->offset;
>                 memset(dst, 0, len);
> -               pmem_persist(dst, len);
> +               clflush(dst, len);
>                 rc = len;
>         } else if (dst_dev->is_dax && src_dev->is_dax) {
>                 src = (uint8_t *)src_dev->mmap + src_dev->offset;
>                 dst = (uint8_t *)dst_dev->mmap + dst_dev->offset;
> -               pmem_memcpy_persist(dst, src, len);

> +               memcpy(dst, src, len);
> +               clflush(dst, len);

I think you would be better off moving this into a seperate
memcpy_to_pmem() function. That would give you a nice place for
an arch-independent fallback too.

>                 rc = len;
>         } else if (src_dev->is_dax) {
>                 src = (uint8_t *)src_dev->mmap + src_dev->offset;
> @@ -422,7 +437,7 @@ static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev,
>                                 break;
>                         count += rc;
>                 } while (count != (ssize_t)len);
> -               pmem_persist(dst, count);
> +               clflush(dst, count);
>                 rc = count;
>                 if (rc != (ssize_t)len)
>                         printf("Requested size %lu larger than destination.\n", len);
> diff --git a/ndctl.spec.in b/ndctl.spec.in
> index 4aa133d..b481762 100644
> --- a/ndctl.spec.in
> +++ b/ndctl.spec.in
> @@ -20,9 +20,6 @@ BuildRequires:        pkgconfig(libudev)
>  BuildRequires: pkgconfig(uuid)
>  BuildRequires: pkgconfig(json-c)
>  BuildRequires: pkgconfig(bash-completion)
> -%ifarch x86_64
> -BuildRequires: pkgconfig(libpmem)
> -%endif
>
>  %description
>  Utility library for managing the "libnvdimm" subsystem.  The "libnvdimm"
> @@ -93,11 +90,7 @@ control API for these devices.
>  %build
>  echo "VERSION" > version
>  ./autogen.sh
> -%ifarch x86_64
> -%configure --disable-static --enable-local --disable-silent-rules --with-libpmem
> -%else
>  %configure --disable-static --enable-local --disable-silent-rules
> -%endif
>  make %{?_smp_mflags}
>
>  %install
> --
> 2.7.4
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

Dan Williams Nov. 10, 2017, 7 a.m. UTC | #2

On Thu, Nov 9, 2017 at 6:40 AM, <lukasz.plewa@intel.com> wrote:
>
> From: Łukasz Plewa <lukasz.plewa@intel.com>
>
> The next version of libpmem will depend on libndctl - as daxctl
> depends on libpmem, it will make building packages inconvenient.
> This patch replaces libpmem calls with intrinsic functions what
> allows to remove a libpmem dependency.
>
> Signed-off-by: Łukasz Plewa <lukasz.plewa@intel.com>
[..]
> @@ -357,6 +357,20 @@ static int clear_badblocks(struct io_dev *dev, uint64_t len)
>         return 0;
>  }
>
> +#define FLUSH_ALIGN 64
> +
> +static void clflush(const void *addr, size_t len)
> +{
> +       /*
> +        * Loop through cache-line-size (typically 64B) aligned chunks
> +        * covering the given range.
> +        */
> +       for (uintptr_t uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1);
> +            uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN)
> +               _mm_clflush((char *)uptr);
> +       _mm_sfence();

The clflush instruction is serializing so we don't need the sfence.
Can we optionally use clwb here to avoid the cache invalidation? Also
does the clwb intrinsic handle falling back to clflushopt and clflush
when the cpu does not have clwb support?

Dan Williams Nov. 10, 2017, 7:02 a.m. UTC | #3

On Thu, Nov 9, 2017 at 8:05 PM, Oliver <oohall@gmail.com> wrote:
> On Fri, Nov 10, 2017 at 1:40 AM,  <lukasz.plewa@intel.com> wrote:
>> From: Łukasz Plewa <lukasz.plewa@intel.com>
>>
>> The next version of libpmem will depend on libndctl - as daxctl
>> depends on libpmem, it will make building packages inconvenient.
>> This patch replaces libpmem calls with intrinsic functions what
>> allows to remove a libpmem dependency.
>>
>> Signed-off-by: Łukasz Plewa <lukasz.plewa@intel.com>
>> ---
>>  configure.ac       | 15 +++++----------
>>  daxctl/Makefile.am |  4 ----
>>  daxctl/io.c        | 23 +++++++++++++++++++----
>>  ndctl.spec.in      |  7 -------
>>  4 files changed, 24 insertions(+), 25 deletions(-)
>>
>> diff --git a/configure.ac b/configure.ac
>> index 5b10381..33a3e6e 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -32,6 +32,7 @@ AC_PREFIX_DEFAULT([/usr])
>>
>>  AC_PROG_SED
>>  AC_PROG_MKDIR_P
>> +AC_CANONICAL_HOST
>>
>>  AC_ARG_ENABLE([docs],
>>          AS_HELP_STRING([--disable-docs],
>> @@ -94,16 +95,10 @@ PKG_CHECK_MODULES([UDEV], [libudev])
>>  PKG_CHECK_MODULES([UUID], [uuid])
>>  PKG_CHECK_MODULES([JSON], [json-c])
>>
>> -AC_ARG_WITH([libpmem],
>> -       AS_HELP_STRING([--with-libpmem],
>> -                      [Install with libpmem support. @<:@default=no@:>@]),
>> -       [],
>> -       [with_libpmem=no])
>> -if test "x$with_libpmem" = "xyes"; then
>> -       PKG_CHECK_MODULES([PMEM], [libpmem])
>> -       AC_DEFINE(ENABLE_DAXIO, 1, [Enable daxctl io])
>> -fi
>> -AM_CONDITIONAL([ENABLE_DAXIO], [test "x$with_libpmem" = "xyes"])
>> +AS_IF([test "x$host_cpu" == "xx86_64"],
>> +       [AC_DEFINE([ENABLE_DAXIO], [1], enable daxctl io command)])
>> +AM_CONDITIONAL([ENABLE_DAXIO], [test "x$host_cpu" == "xx86_64"])
>> +
>>
>>  AC_ARG_WITH([bash-completion-dir],
>>         AS_HELP_STRING([--with-bash-completion-dir[=PATH]],
>> diff --git a/daxctl/Makefile.am b/daxctl/Makefile.am
>> index 81d405e..8e3fd7e 100644
>> --- a/daxctl/Makefile.am
>> +++ b/daxctl/Makefile.am
>> @@ -17,7 +17,3 @@ daxctl_LDADD =\
>>         ../libutil.a \
>>         $(UUID_LIBS) \
>>         $(JSON_LIBS)
>> -
>> -if ENABLE_DAXIO
>> -daxctl_LDADD += $(PMEM_LIBS)
>> -endif
>> diff --git a/daxctl/io.c b/daxctl/io.c
>> index 27e7463..a0f9613 100644
>> --- a/daxctl/io.c
>> +++ b/daxctl/io.c
>> @@ -20,9 +20,9 @@
>>  #include <sys/mman.h>
>>  #include <fcntl.h>
>>  #include <unistd.h>
>> +#include <emmintrin.h>
>>  #include <limits.h>
>>  #include <libgen.h>
>> -#include <libpmem.h>
>>  #include <util/json.h>
>>  #include <util/filter.h>
>>  #include <util/size.h>
>> @@ -357,6 +357,20 @@ static int clear_badblocks(struct io_dev *dev, uint64_t len)
>>         return 0;
>>  }
>>
>
>> +#define FLUSH_ALIGN 64
>> +
>> +static void clflush(const void *addr, size_t len)
>> +{
>> +       /*
>> +        * Loop through cache-line-size (typically 64B) aligned chunks
>> +        * covering the given range.
>> +        */
>> +       for (uintptr_t uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1);
>> +            uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN)
>> +               _mm_clflush((char *)uptr);
>> +       _mm_sfence();
>> +}
>> +
>
> x86 specific stuff should probably be wrapped in an #ifdef
>
>>  static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev,
>>                 uint64_t len, bool zero)
>>  {
>> @@ -366,12 +380,13 @@ static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev,
>>         if (zero && dst_dev->is_dax) {
>>                 dst = (uint8_t *)dst_dev->mmap + dst_dev->offset;
>>                 memset(dst, 0, len);
>> -               pmem_persist(dst, len);
>> +               clflush(dst, len);
>>                 rc = len;
>>         } else if (dst_dev->is_dax && src_dev->is_dax) {
>>                 src = (uint8_t *)src_dev->mmap + src_dev->offset;
>>                 dst = (uint8_t *)dst_dev->mmap + dst_dev->offset;
>> -               pmem_memcpy_persist(dst, src, len);
>
>> +               memcpy(dst, src, len);
>> +               clflush(dst, len);
>
> I think you would be better off moving this into a seperate
> memcpy_to_pmem() function. That would give you a nice place for
> an arch-independent fallback too.

I agree, but we might save that refactoring for when another
architecture shows up to implement that support.

Jeff Moyer Nov. 10, 2017, 3:31 p.m. UTC | #4

Oliver <oohall@gmail.com> writes:

> On Fri, Nov 10, 2017 at 1:40 AM,  <lukasz.plewa@intel.com> wrote:
>> From: Łukasz Plewa <lukasz.plewa@intel.com>
>>
>> The next version of libpmem will depend on libndctl - as daxctl
>> depends on libpmem, it will make building packages inconvenient.
>> This patch replaces libpmem calls with intrinsic functions what
>> allows to remove a libpmem dependency.
>>
>> Signed-off-by: Łukasz Plewa <lukasz.plewa@intel.com>
>> ---
>>  configure.ac       | 15 +++++----------
>>  daxctl/Makefile.am |  4 ----
>>  daxctl/io.c        | 23 +++++++++++++++++++----
>>  ndctl.spec.in      |  7 -------
>>  4 files changed, 24 insertions(+), 25 deletions(-)
>>
>> diff --git a/configure.ac b/configure.ac
>> index 5b10381..33a3e6e 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -32,6 +32,7 @@ AC_PREFIX_DEFAULT([/usr])
>>
>>  AC_PROG_SED
>>  AC_PROG_MKDIR_P
>> +AC_CANONICAL_HOST
>>
>>  AC_ARG_ENABLE([docs],
>>          AS_HELP_STRING([--disable-docs],
>> @@ -94,16 +95,10 @@ PKG_CHECK_MODULES([UDEV], [libudev])
>>  PKG_CHECK_MODULES([UUID], [uuid])
>>  PKG_CHECK_MODULES([JSON], [json-c])
>>
>> -AC_ARG_WITH([libpmem],
>> -       AS_HELP_STRING([--with-libpmem],
>> -                      [Install with libpmem support. @<:@default=no@:>@]),
>> -       [],
>> -       [with_libpmem=no])
>> -if test "x$with_libpmem" = "xyes"; then
>> -       PKG_CHECK_MODULES([PMEM], [libpmem])
>> -       AC_DEFINE(ENABLE_DAXIO, 1, [Enable daxctl io])
>> -fi
>> -AM_CONDITIONAL([ENABLE_DAXIO], [test "x$with_libpmem" = "xyes"])
>> +AS_IF([test "x$host_cpu" == "xx86_64"],
>> +       [AC_DEFINE([ENABLE_DAXIO], [1], enable daxctl io command)])
>> +AM_CONDITIONAL([ENABLE_DAXIO], [test "x$host_cpu" == "xx86_64"])
>> +
>>
>>  AC_ARG_WITH([bash-completion-dir],
>>         AS_HELP_STRING([--with-bash-completion-dir[=PATH]],
>> diff --git a/daxctl/Makefile.am b/daxctl/Makefile.am
>> index 81d405e..8e3fd7e 100644
>> --- a/daxctl/Makefile.am
>> +++ b/daxctl/Makefile.am
>> @@ -17,7 +17,3 @@ daxctl_LDADD =\
>>         ../libutil.a \
>>         $(UUID_LIBS) \
>>         $(JSON_LIBS)
>> -
>> -if ENABLE_DAXIO
>> -daxctl_LDADD += $(PMEM_LIBS)
>> -endif
>> diff --git a/daxctl/io.c b/daxctl/io.c
>> index 27e7463..a0f9613 100644
>> --- a/daxctl/io.c
>> +++ b/daxctl/io.c
>> @@ -20,9 +20,9 @@
>>  #include <sys/mman.h>
>>  #include <fcntl.h>
>>  #include <unistd.h>
>> +#include <emmintrin.h>
>>  #include <limits.h>
>>  #include <libgen.h>
>> -#include <libpmem.h>
>>  #include <util/json.h>
>>  #include <util/filter.h>
>>  #include <util/size.h>
>> @@ -357,6 +357,20 @@ static int clear_badblocks(struct io_dev *dev, uint64_t len)
>>         return 0;
>>  }
>>
>
>> +#define FLUSH_ALIGN 64
>> +
>> +static void clflush(const void *addr, size_t len)
>> +{
>> +       /*
>> +        * Loop through cache-line-size (typically 64B) aligned chunks
>> +        * covering the given range.
>> +        */
>> +       for (uintptr_t uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1);
>> +            uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN)
>> +               _mm_clflush((char *)uptr);
>> +       _mm_sfence();
>> +}
>> +
>
> x86 specific stuff should probably be wrapped in an #ifdef

FLUSH_ALIGN is x86 specific.  There's actually a sysctl to tell you the
L1 dcache size, so I'd suggest using that.  Calling it clflush doesn't
bug me.  I don't see anything else x86 specific there.

If you start using the sysctl, you can get rid of the newly imposed x86
restriction, too.

>>  static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev,
>>                 uint64_t len, bool zero)
>>  {
>> @@ -366,12 +380,13 @@ static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev,
>>         if (zero && dst_dev->is_dax) {
>>                 dst = (uint8_t *)dst_dev->mmap + dst_dev->offset;
>>                 memset(dst, 0, len);
>> -               pmem_persist(dst, len);
>> +               clflush(dst, len);
>>                 rc = len;
>>         } else if (dst_dev->is_dax && src_dev->is_dax) {
>>                 src = (uint8_t *)src_dev->mmap + src_dev->offset;
>>                 dst = (uint8_t *)dst_dev->mmap + dst_dev->offset;
>> -               pmem_memcpy_persist(dst, src, len);
>
>> +               memcpy(dst, src, len);
>> +               clflush(dst, len);
>
> I think you would be better off moving this into a seperate
> memcpy_to_pmem() function. That would give you a nice place for
> an arch-independent fallback too.

What's more, as I see this is basically lifting what's in libpmem, I'd
like to point out that pmem_memcpy_persist will use MOVNT where
available.  This lifted version doesn't have that optimization.

I don't know, is it impossible to make this work w/o yanking out -lpmem?

Jeff

Dan Williams Nov. 10, 2017, 4:28 p.m. UTC | #5

On Fri, Nov 10, 2017 at 7:31 AM, Jeff Moyer <jmoyer@redhat.com> wrote:
> Oliver <oohall@gmail.com> writes:
>
>> On Fri, Nov 10, 2017 at 1:40 AM,  <lukasz.plewa@intel.com> wrote:
>>> From: Łukasz Plewa <lukasz.plewa@intel.com>
>>>
>>> The next version of libpmem will depend on libndctl - as daxctl
>>> depends on libpmem, it will make building packages inconvenient.
>>> This patch replaces libpmem calls with intrinsic functions what
>>> allows to remove a libpmem dependency.
>>>
>>> Signed-off-by: Łukasz Plewa <lukasz.plewa@intel.com>
>>> ---
>>>  configure.ac       | 15 +++++----------
>>>  daxctl/Makefile.am |  4 ----
>>>  daxctl/io.c        | 23 +++++++++++++++++++----
>>>  ndctl.spec.in      |  7 -------
>>>  4 files changed, 24 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/configure.ac b/configure.ac
>>> index 5b10381..33a3e6e 100644
>>> --- a/configure.ac
>>> +++ b/configure.ac
>>> @@ -32,6 +32,7 @@ AC_PREFIX_DEFAULT([/usr])
>>>
>>>  AC_PROG_SED
>>>  AC_PROG_MKDIR_P
>>> +AC_CANONICAL_HOST
>>>
>>>  AC_ARG_ENABLE([docs],
>>>          AS_HELP_STRING([--disable-docs],
>>> @@ -94,16 +95,10 @@ PKG_CHECK_MODULES([UDEV], [libudev])
>>>  PKG_CHECK_MODULES([UUID], [uuid])
>>>  PKG_CHECK_MODULES([JSON], [json-c])
>>>
>>> -AC_ARG_WITH([libpmem],
>>> -       AS_HELP_STRING([--with-libpmem],
>>> -                      [Install with libpmem support. @<:@default=no@:>@]),
>>> -       [],
>>> -       [with_libpmem=no])
>>> -if test "x$with_libpmem" = "xyes"; then
>>> -       PKG_CHECK_MODULES([PMEM], [libpmem])
>>> -       AC_DEFINE(ENABLE_DAXIO, 1, [Enable daxctl io])
>>> -fi
>>> -AM_CONDITIONAL([ENABLE_DAXIO], [test "x$with_libpmem" = "xyes"])
>>> +AS_IF([test "x$host_cpu" == "xx86_64"],
>>> +       [AC_DEFINE([ENABLE_DAXIO], [1], enable daxctl io command)])
>>> +AM_CONDITIONAL([ENABLE_DAXIO], [test "x$host_cpu" == "xx86_64"])
>>> +
>>>
>>>  AC_ARG_WITH([bash-completion-dir],
>>>         AS_HELP_STRING([--with-bash-completion-dir[=PATH]],
>>> diff --git a/daxctl/Makefile.am b/daxctl/Makefile.am
>>> index 81d405e..8e3fd7e 100644
>>> --- a/daxctl/Makefile.am
>>> +++ b/daxctl/Makefile.am
>>> @@ -17,7 +17,3 @@ daxctl_LDADD =\
>>>         ../libutil.a \
>>>         $(UUID_LIBS) \
>>>         $(JSON_LIBS)
>>> -
>>> -if ENABLE_DAXIO
>>> -daxctl_LDADD += $(PMEM_LIBS)
>>> -endif
>>> diff --git a/daxctl/io.c b/daxctl/io.c
>>> index 27e7463..a0f9613 100644
>>> --- a/daxctl/io.c
>>> +++ b/daxctl/io.c
>>> @@ -20,9 +20,9 @@
>>>  #include <sys/mman.h>
>>>  #include <fcntl.h>
>>>  #include <unistd.h>
>>> +#include <emmintrin.h>
>>>  #include <limits.h>
>>>  #include <libgen.h>
>>> -#include <libpmem.h>
>>>  #include <util/json.h>
>>>  #include <util/filter.h>
>>>  #include <util/size.h>
>>> @@ -357,6 +357,20 @@ static int clear_badblocks(struct io_dev *dev, uint64_t len)
>>>         return 0;
>>>  }
>>>
>>
>>> +#define FLUSH_ALIGN 64
>>> +
>>> +static void clflush(const void *addr, size_t len)
>>> +{
>>> +       /*
>>> +        * Loop through cache-line-size (typically 64B) aligned chunks
>>> +        * covering the given range.
>>> +        */
>>> +       for (uintptr_t uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1);
>>> +            uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN)
>>> +               _mm_clflush((char *)uptr);
>>> +       _mm_sfence();
>>> +}
>>> +
>>
>> x86 specific stuff should probably be wrapped in an #ifdef
>
> FLUSH_ALIGN is x86 specific.  There's actually a sysctl to tell you the
> L1 dcache size, so I'd suggest using that.  Calling it clflush doesn't
> bug me.  I don't see anything else x86 specific there.
>
> If you start using the sysctl, you can get rid of the newly imposed x86
> restriction, too.
>
>>>  static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev,
>>>                 uint64_t len, bool zero)
>>>  {
>>> @@ -366,12 +380,13 @@ static int64_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev,
>>>         if (zero && dst_dev->is_dax) {
>>>                 dst = (uint8_t *)dst_dev->mmap + dst_dev->offset;
>>>                 memset(dst, 0, len);
>>> -               pmem_persist(dst, len);
>>> +               clflush(dst, len);
>>>                 rc = len;
>>>         } else if (dst_dev->is_dax && src_dev->is_dax) {
>>>                 src = (uint8_t *)src_dev->mmap + src_dev->offset;
>>>                 dst = (uint8_t *)dst_dev->mmap + dst_dev->offset;
>>> -               pmem_memcpy_persist(dst, src, len);
>>
>>> +               memcpy(dst, src, len);
>>> +               clflush(dst, len);
>>
>> I think you would be better off moving this into a seperate
>> memcpy_to_pmem() function. That would give you a nice place for
>> an arch-independent fallback too.
>
> What's more, as I see this is basically lifting what's in libpmem, I'd
> like to point out that pmem_memcpy_persist will use MOVNT where
> available.  This lifted version doesn't have that optimization.
>
> I don't know, is it impossible to make this work w/o yanking out -lpmem?

My preference would be that allthe  pmem_memcpy* routines move to
libndctl since it is the lower level library. Arrange for nvml to
depend on libdnctl for this in addition to the other libndctl
dependencies that seem to be in the works.

Jeff Moyer Nov. 10, 2017, 4:42 p.m. UTC | #6

Dan Williams <dan.j.williams@intel.com> writes:

>> I don't know, is it impossible to make this work w/o yanking out -lpmem?
>
> My preference would be that allthe  pmem_memcpy* routines move to
> libndctl since it is the lower level library. Arrange for nvml to
> depend on libdnctl for this in addition to the other libndctl
> dependencies that seem to be in the works.

That's fine by me.  Can you convince the nvml developers to do that?

Cheers,
Jeff

Dan Williams Nov. 10, 2017, 5:50 p.m. UTC | #7

On Fri, Nov 10, 2017 at 8:42 AM, Jeff Moyer <jmoyer@redhat.com> wrote:
> Dan Williams <dan.j.williams@intel.com> writes:
>
>>> I don't know, is it impossible to make this work w/o yanking out -lpmem?
>>
>> My preference would be that allthe  pmem_memcpy* routines move to
>> libndctl since it is the lower level library. Arrange for nvml to
>> depend on libdnctl for this in addition to the other libndctl
>> dependencies that seem to be in the works.
>
> That's fine by me.  Can you convince the nvml developers to do that?
>

The other concern is that libpmem supports other OSes, but libndctl is
just Linux. That seems to suggest we need the memcpy routines to be
their own standalone library that both ndctl and libpmem can import.

Czurylo, Krzysztof Nov. 23, 2017, 5:23 p.m. UTC | #8

> On Fri, Nov 10, 2017 at 8:42 AM, Jeff Moyer <jmoyer@redhat.com> wrote:
> > Dan Williams <dan.j.williams@intel.com> writes:
> >
> >>> I don't know, is it impossible to make this work w/o yanking out -
> lpmem?
> >>
> >> My preference would be that allthe  pmem_memcpy* routines move to
> >> libndctl since it is the lower level library. Arrange for nvml to
> >> depend on libdnctl for this in addition to the other libndctl
> >> dependencies that seem to be in the works.
> >
> > That's fine by me.  Can you convince the nvml developers to do that?
> >
> 
> The other concern is that libpmem supports other OSes, but libndctl is
> just Linux. That seems to suggest we need the memcpy routines to be
> their own standalone library that both ndctl and libpmem can import.

Folks,

I understand your concerns, but to be honest, I don't like the idea of
moving pmem_memcpy* routines to libndctl or yet another library.
The libpmem *is* the library.
In practice, if we move pmem_memcpy*/pmem_memset* to ndctl, it means
that almost the _entire_ libpmem code would need to be moved to ndctl
(maybe without the APIs like pmem_map_file/pmem_unmap). So, looks like
it's all about moving libpmem from nvml repo to ndctl repo, isn't it?
It leads us to another question - perhaps we should simply merge those
two projects into one? It would solve all the problems with
cross-dependencies between ndctl and nvml, once and for all.
However, besides some technical issues, one of the problems is
the license - GPL vs. BSD - and I'm not sure how to deal with it.

For me, there are a few options:
1) Integrate the patch proposed by Lukasz (maybe slightly modified).
This looks like the easiest and simplest, thus preferred solution.
Of course, we can detect the cache line size and the availability
of CLFLUSHOPT/CLWB, if necessary.
Correct me if I'm wrong, but I don't expect "daxctl io" to be used
frequently, so even if it's not optimal (does not use MOVNT) and we
need to flush the cache, the performance shouldn't be a big issue.
BTW, this code is only compiled for x86, so maybe we don't even need
to care about other architectures?

2) Clone the pmem_memcpy/pmem_memset/pmem_persist routines into ndctl
(for internal use only). The libpmem would still provide those
interfaces.
It duplicates the code (is it a big deal?), but eliminates dependency
on libpmem.

3) Remove "daxctl io" command from ndctl and integrate it with
NVML's "pmempool" utility (or a dedicated tool/subpkg).
There's not much we need to do, as in practice we already have a similar
tool in the nvml repo (src/test/tools/ddmap). So far, it's only used
for testing, but could become a standalone "ddmap" or "daxio" utility.

Keep in mind that most of the problems with library dependencies are
because of Device DAX support. Device DAX was intended to be a temporary
workaround until the Linux kernel/filesystems provide full support for
PM programming model, but in practice it cluttered NVML with
DevDAX-specific code, triggered some deep modifications of NVML internals,
and now you're proposing yet another significant change.
Once the MAP_SYNC flag is available, Device DAX becomes less important,
I believe, and might get eliminated eventually. Shall we change the
architecture of NVML or how the code is organized just because of it?

I opt for the simplest solution, as proposed by Lukasz.

Regards,
Krzysztof

Dan Williams Nov. 23, 2017, 6:09 p.m. UTC | #9

On Thu, Nov 23, 2017 at 9:23 AM, Czurylo, Krzysztof
<krzysztof.czurylo@intel.com> wrote:
>> On Fri, Nov 10, 2017 at 8:42 AM, Jeff Moyer <jmoyer@redhat.com> wrote:
>> > Dan Williams <dan.j.williams@intel.com> writes:
>> >
>> >>> I don't know, is it impossible to make this work w/o yanking out -
>> lpmem?
>> >>
>> >> My preference would be that allthe  pmem_memcpy* routines move to
>> >> libndctl since it is the lower level library. Arrange for nvml to
>> >> depend on libdnctl for this in addition to the other libndctl
>> >> dependencies that seem to be in the works.
>> >
>> > That's fine by me.  Can you convince the nvml developers to do that?
>> >
>>
>> The other concern is that libpmem supports other OSes, but libndctl is
>> just Linux. That seems to suggest we need the memcpy routines to be
>> their own standalone library that both ndctl and libpmem can import.
>
> Folks,
>
> I understand your concerns, but to be honest, I don't like the idea of
> moving pmem_memcpy* routines to libndctl or yet another library.
> The libpmem *is* the library.
> In practice, if we move pmem_memcpy*/pmem_memset* to ndctl, it means
> that almost the _entire_ libpmem code would need to be moved to ndctl
> (maybe without the APIs like pmem_map_file/pmem_unmap). So, looks like
> it's all about moving libpmem from nvml repo to ndctl repo, isn't it?
> It leads us to another question - perhaps we should simply merge those
> two projects into one? It would solve all the problems with
> cross-dependencies between ndctl and nvml, once and for all.
> However, besides some technical issues, one of the problems is
> the license - GPL vs. BSD - and I'm not sure how to deal with it.
>
> For me, there are a few options:
> 1) Integrate the patch proposed by Lukasz (maybe slightly modified).
> This looks like the easiest and simplest, thus preferred solution.
> Of course, we can detect the cache line size and the availability
> of CLFLUSHOPT/CLWB, if necessary.
> Correct me if I'm wrong, but I don't expect "daxctl io" to be used
> frequently, so even if it's not optimal (does not use MOVNT) and we
> need to flush the cache, the performance shouldn't be a big issue.
> BTW, this code is only compiled for x86, so maybe we don't even need
> to care about other architectures?
>
> 2) Clone the pmem_memcpy/pmem_memset/pmem_persist routines into ndctl
> (for internal use only). The libpmem would still provide those
> interfaces.
> It duplicates the code (is it a big deal?), but eliminates dependency
> on libpmem.
>
> 3) Remove "daxctl io" command from ndctl and integrate it with
> NVML's "pmempool" utility (or a dedicated tool/subpkg).
> There's not much we need to do, as in practice we already have a similar
> tool in the nvml repo (src/test/tools/ddmap). So far, it's only used
> for testing, but could become a standalone "ddmap" or "daxio" utility.

I'd like to investigate this option. We need a utility to manage
(provision, re-provision, backup) device-dax capacity, and I don't
much care if it comes from the daxctl package, or elsewhere.

> Keep in mind that most of the problems with library dependencies are
> because of Device DAX support. Device DAX was intended to be a temporary
> workaround until the Linux kernel/filesystems provide full support for
> PM programming model, but in practice it cluttered NVML with
> DevDAX-specific code, triggered some deep modifications of NVML internals,
> and now you're proposing yet another significant change.
> Once the MAP_SYNC flag is available, Device DAX becomes less important,
> I believe, and might get eliminated eventually. Shall we change the
> architecture of NVML or how the code is organized just because of it?

I disagree with this assessment. We continue to find uses for
device-dax as a hugetlbfs alternative, and it remains the only
interface that is safe for RDMA and virtualization with dax. MAP_SYNC
is not a device-dax replacement.

> I opt for the simplest solution, as proposed by Lukasz.

Let's take a look at shipping 'daxctl io' alongside pmempool. I do not
want multiple pmem_memcpy() implementations to start proliferating
around various userspace packages.

Czurylo, Krzysztof Nov. 27, 2017, 5:33 p.m. UTC | #10

> -----Original Message-----
> From: Dan Williams [mailto:dan.j.williams@intel.com]
> Sent: Thursday, November 23, 2017 19:09
> To: Czurylo, Krzysztof
> Cc: Jeff Moyer; Plewa, Lukasz; linux-nvdimm@lists.01.org
> Subject: Re: [PATCH] daxctl: remove libpmem dependency
> (...)
> > 3) Remove "daxctl io" command from ndctl and integrate it with NVML's
> > "pmempool" utility (or a dedicated tool/subpkg).
> > There's not much we need to do, as in practice we already have a
> > similar tool in the nvml repo (src/test/tools/ddmap). So far, it's
> > only used for testing, but could become a standalone "ddmap" or
> "daxio" utility.
> 
> I'd like to investigate this option. We need a utility to manage
> (provision, re-provision, backup) device-dax capacity, and I don't much
> care if it comes from the daxctl package, or elsewhere.
> (...) 
> Let's take a look at shipping 'daxctl io' alongside pmempool. I do not
> want multiple pmem_memcpy() implementations to start proliferating
> around various userspace packages.

If we choose this option, do we need to implement "daxctl list" command?
Since "list" is also supported by ndctl (though the output is slightly
different), I'd prefer to only implement "daxctl io" in nvml.

Anything else this tool should be able to do besides the "io"?
Are there any plans for other features/commands?

--
K.

Dan Williams Nov. 27, 2017, 5:36 p.m. UTC | #11

On Mon, Nov 27, 2017 at 9:33 AM, Czurylo, Krzysztof
<krzysztof.czurylo@intel.com> wrote:
>
>
>> -----Original Message-----
>> From: Dan Williams [mailto:dan.j.williams@intel.com]
>> Sent: Thursday, November 23, 2017 19:09
>> To: Czurylo, Krzysztof
>> Cc: Jeff Moyer; Plewa, Lukasz; linux-nvdimm@lists.01.org
>> Subject: Re: [PATCH] daxctl: remove libpmem dependency
>> (...)
>> > 3) Remove "daxctl io" command from ndctl and integrate it with NVML's
>> > "pmempool" utility (or a dedicated tool/subpkg).
>> > There's not much we need to do, as in practice we already have a
>> > similar tool in the nvml repo (src/test/tools/ddmap). So far, it's
>> > only used for testing, but could become a standalone "ddmap" or
>> "daxio" utility.
>>
>> I'd like to investigate this option. We need a utility to manage
>> (provision, re-provision, backup) device-dax capacity, and I don't much
>> care if it comes from the daxctl package, or elsewhere.
>> (...)
>> Let's take a look at shipping 'daxctl io' alongside pmempool. I do not
>> want multiple pmem_memcpy() implementations to start proliferating
>> around various userspace packages.
>
> If we choose this option, do we need to implement "daxctl list" command?
> Since "list" is also supported by ndctl (though the output is slightly
> different), I'd prefer to only implement "daxctl io" in nvml.

I'm fine with just implementing the "io" the command as a singular
command when this moves over, maybe just call it 'daxio'?

> Anything else this tool should be able to do besides the "io"?
> Are there any plans for other features/commands?

Yes, daxctl will continue to be enhanced for the non-nvdimm/pmem use
cases for device-dax.

Jeff Moyer Nov. 27, 2017, 6:14 p.m. UTC | #12

Dan Williams <dan.j.williams@intel.com> writes:

> On Mon, Nov 27, 2017 at 9:33 AM, Czurylo, Krzysztof
> <krzysztof.czurylo@intel.com> wrote:

>> If we choose this option, do we need to implement "daxctl list" command?
>> Since "list" is also supported by ndctl (though the output is slightly
>> different), I'd prefer to only implement "daxctl io" in nvml.
>
> I'm fine with just implementing the "io" the command as a singular
> command when this moves over, maybe just call it 'daxio'?
>
>> Anything else this tool should be able to do besides the "io"?
>> Are there any plans for other features/commands?
>
> Yes, daxctl will continue to be enhanced for the non-nvdimm/pmem use
> cases for device-dax.

I think Krzysztof was asking whether dax_io was going to grow any new
features.

-Jeff

Dan Williams Nov. 27, 2017, 6:45 p.m. UTC | #13

On Mon, Nov 27, 2017 at 10:14 AM, Jeff Moyer <jmoyer@redhat.com> wrote:
> Dan Williams <dan.j.williams@intel.com> writes:
>
>> On Mon, Nov 27, 2017 at 9:33 AM, Czurylo, Krzysztof
>> <krzysztof.czurylo@intel.com> wrote:
>
>>> If we choose this option, do we need to implement "daxctl list" command?
>>> Since "list" is also supported by ndctl (though the output is slightly
>>> different), I'd prefer to only implement "daxctl io" in nvml.
>>
>> I'm fine with just implementing the "io" the command as a singular
>> command when this moves over, maybe just call it 'daxio'?
>>
>>> Anything else this tool should be able to do besides the "io"?
>>> Are there any plans for other features/commands?
>>
>> Yes, daxctl will continue to be enhanced for the non-nvdimm/pmem use
>> cases for device-dax.
>
> I think Krzysztof was asking whether dax_io was going to grow any new
> features.

Nothing more on the horizon that I can see.

daxctl: remove libpmem dependency

Commit Message

Comments

Patch