diff mbox series

[v2,1/3] core.fsyncmethod: add writeout-only mode

Message ID e79522cbdd4feb45b062b75225475f34039d1866.1638845211.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series A design for future-proofing fsync() configuration | expand

Commit Message

Neeraj Singh (WINDOWS-SFS) Dec. 7, 2021, 2:46 a.m. UTC
From: Neeraj Singh <neerajsi@microsoft.com>

This commit introduces the `core.fsyncmethod` configuration
knob, which can currently be set to `fsync` or `writeout-only`.

The new writeout-only mode attempts to tell the operating system to
flush its in-memory page cache to the storage hardware without issuing a
CACHE_FLUSH command to the storage controller.

Writeout-only fsync is significantly faster than a vanilla fsync on
common hardware, since data is written to a disk-side cache rather than
all the way to a durable medium. Later changes in this patch series will
take advantage of this primitive to implement batching of hardware
flushes.

When git_fsync is called with FSYNC_WRITEOUT_ONLY, it may fail and the
caller is expected to do an ordinary fsync as needed.

On Apple platforms, the fsync system call does not issue a CACHE_FLUSH
directive to the storage controller. This change updates fsync to do
fcntl(F_FULLFSYNC) to make fsync actually durable. We maintain parity
with existing behavior on Apple platforms by setting the default value
of the new core.fsyncmethod option.

Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
---
 Documentation/config/core.txt       |  9 +++++
 Makefile                            |  6 ++++
 cache.h                             |  7 ++++
 compat/mingw.h                      |  3 ++
 compat/win32/flush.c                | 28 +++++++++++++++
 config.c                            | 12 +++++++
 config.mak.uname                    |  3 ++
 configure.ac                        |  8 +++++
 contrib/buildsystems/CMakeLists.txt |  3 +-
 environment.c                       |  2 +-
 git-compat-util.h                   | 24 +++++++++++++
 wrapper.c                           | 56 +++++++++++++++++++++++++++++
 write-or-die.c                      | 10 +++---
 13 files changed, 165 insertions(+), 6 deletions(-)
 create mode 100644 compat/win32/flush.c

Comments

Patrick Steinhardt Dec. 7, 2021, 11:44 a.m. UTC | #1
On Tue, Dec 07, 2021 at 02:46:49AM +0000, Neeraj Singh via GitGitGadget wrote:
> From: Neeraj Singh <neerajsi@microsoft.com>
[snip]
> --- a/compat/mingw.h
> +++ b/compat/mingw.h
> @@ -329,6 +329,9 @@ int mingw_getpagesize(void);
>  #define getpagesize mingw_getpagesize
>  #endif
>  
> +int win32_fsync_no_flush(int fd);
> +#define fsync_no_flush win32_fsync_no_flush
> +
>  struct rlimit {
>  	unsigned int rlim_cur;
>  };
> diff --git a/compat/win32/flush.c b/compat/win32/flush.c
> new file mode 100644
> index 00000000000..75324c24ee7
> --- /dev/null
> +++ b/compat/win32/flush.c
> @@ -0,0 +1,28 @@
> +#include "../../git-compat-util.h"
> +#include <winternl.h>
> +#include "lazyload.h"
> +
> +int win32_fsync_no_flush(int fd)
> +{
> +       IO_STATUS_BLOCK io_status;
> +
> +#define FLUSH_FLAGS_FILE_DATA_ONLY 1
> +
> +       DECLARE_PROC_ADDR(ntdll.dll, NTSTATUS, NtFlushBuffersFileEx,
> +			 HANDLE FileHandle, ULONG Flags, PVOID Parameters, ULONG ParameterSize,
> +			 PIO_STATUS_BLOCK IoStatusBlock);
> +
> +       if (!INIT_PROC_ADDR(NtFlushBuffersFileEx)) {
> +		errno = ENOSYS;
> +		return -1;
> +       }

I'm wondering whether it would make sense to fall back to fsync(3P) in
case we cannot use writeout-only, but I see that were doing essentially
that in `fsync_or_die()`. There is no indicator to the user though that
writeout-only doesn't work -- do we want to print a one-time warning?

> +       memset(&io_status, 0, sizeof(io_status));
> +       if (NtFlushBuffersFileEx((HANDLE)_get_osfhandle(fd), FLUSH_FLAGS_FILE_DATA_ONLY,
> +				NULL, 0, &io_status)) {
> +		errno = EINVAL;
> +		return -1;
> +       }
> +
> +       return 0;
> +}

[snip]
> diff --git a/wrapper.c b/wrapper.c
> index 36e12119d76..1c5f2c87791 100644
> --- a/wrapper.c
> +++ b/wrapper.c
> @@ -546,6 +546,62 @@ int xmkstemp_mode(char *filename_template, int mode)
>  	return fd;
>  }
>  
> +int git_fsync(int fd, enum fsync_action action)
> +{
> +	switch (action) {
> +	case FSYNC_WRITEOUT_ONLY:
> +
> +#ifdef __APPLE__
> +		/*
> +		 * on macOS, fsync just causes filesystem cache writeback but does not
> +		 * flush hardware caches.
> +		 */
> +		return fsync(fd);

Below we're looping around `EINTR` -- are Apple systems never returning
it?

Patrick

> +#endif
> +
> +#ifdef HAVE_SYNC_FILE_RANGE
> +		/*
> +		 * On linux 2.6.17 and above, sync_file_range is the way to issue
> +		 * a writeback without a hardware flush. An offset of 0 and size of 0
> +		 * indicates writeout of the entire file and the wait flags ensure that all
> +		 * dirty data is written to the disk (potentially in a disk-side cache)
> +		 * before we continue.
> +		 */
> +
> +		return sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE |
> +						 SYNC_FILE_RANGE_WRITE |
> +						 SYNC_FILE_RANGE_WAIT_AFTER);
> +#endif
> +
> +#ifdef fsync_no_flush
> +		return fsync_no_flush(fd);
> +#endif
> +
> +		errno = ENOSYS;
> +		return -1;
> +
> +	case FSYNC_HARDWARE_FLUSH:
> +		/*
> +		 * On some platforms fsync may return EINTR. Try again in this
> +		 * case, since callers asking for a hardware flush may die if
> +		 * this function returns an error.
> +		 */
> +		for (;;) {
> +			int err;
> +#ifdef __APPLE__
> +			err = fcntl(fd, F_FULLFSYNC);
> +#else
> +			err = fsync(fd);
> +#endif
> +			if (err >= 0 || errno != EINTR)
> +				return err;
> +		}
> +
> +	default:
> +		BUG("unexpected git_fsync(%d) call", action);
> +	}
> +}
> +
>  static int warn_if_unremovable(const char *op, const char *file, int rc)
>  {
>  	int err;
> diff --git a/write-or-die.c b/write-or-die.c
> index 0b1ec8190b6..0702acdd5e8 100644
> --- a/write-or-die.c
> +++ b/write-or-die.c
> @@ -57,10 +57,12 @@ void fprintf_or_die(FILE *f, const char *fmt, ...)
>  
>  void fsync_or_die(int fd, const char *msg)
>  {
> -	while (fsync(fd) < 0) {
> -		if (errno != EINTR)
> -			die_errno("fsync error on '%s'", msg);
> -	}
> +	if (fsync_method == FSYNC_METHOD_WRITEOUT_ONLY &&
> +	    git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0)
> +		return;
> +
> +	if (git_fsync(fd, FSYNC_HARDWARE_FLUSH) < 0)
> +		die_errno("fsync error on '%s'", msg);
>  }
>  
>  void write_or_die(int fd, const void *buf, size_t count)
> -- 
> gitgitgadget
>
Ævar Arnfjörð Bjarmason Dec. 7, 2021, 12:14 p.m. UTC | #2
On Tue, Dec 07 2021, Patrick Steinhardt wrote:

> [[PGP Signed Part:Undecided]]
> On Tue, Dec 07, 2021 at 02:46:49AM +0000, Neeraj Singh via GitGitGadget wrote:
>> From: Neeraj Singh <neerajsi@microsoft.com>
> [...]
> [snip]
>> diff --git a/wrapper.c b/wrapper.c
>> index 36e12119d76..1c5f2c87791 100644
>> --- a/wrapper.c
>> +++ b/wrapper.c
>> @@ -546,6 +546,62 @@ int xmkstemp_mode(char *filename_template, int mode)
>>  	return fd;
>>  }
>>  
>> +int git_fsync(int fd, enum fsync_action action)
>> +{
>> +	switch (action) {
>> +	case FSYNC_WRITEOUT_ONLY:
>> +
>> +#ifdef __APPLE__
>> +		/*
>> +		 * on macOS, fsync just causes filesystem cache writeback but does not
>> +		 * flush hardware caches.
>> +		 */
>> +		return fsync(fd);
>
> Below we're looping around `EINTR` -- are Apple systems never returning
> it?

I think so per cccdfd22436 (fsync(): be prepared to see EINTR,
2021-06-04), but I'm not sure, but in any case it would make sense for
this to just call the same loop we've been calling elsewhere. It doesn't
seem to hurt, so we can just do that part in the portable portion of the
code.
Ævar Arnfjörð Bjarmason Dec. 7, 2021, 12:18 p.m. UTC | #3
On Tue, Dec 07 2021, Neeraj Singh via GitGitGadget wrote:

> From: Neeraj Singh <neerajsi@microsoft.com>
>
> This commit introduces the `core.fsyncmethod` configuration

Just a commit msg nit: core.fsyncMethod (I see the docs etc. are using
it camelCased, good..

> diff --git a/compat/win32/flush.c b/compat/win32/flush.c
> new file mode 100644
> index 00000000000..75324c24ee7
> --- /dev/null
> +++ b/compat/win32/flush.c
> @@ -0,0 +1,28 @@
> +#include "../../git-compat-util.h"

nit: Just FWIW I think the better thing is '#include
"git-compat-util.h"', i.e. we're compiling at the top-level and have
added it to -I.

(I know a lot of compat/ and contrib/ and even main-tree stuff does
that, but just FWIW it's not needed).

> +	if (!strcmp(var, "core.fsyncmethod")) {
> +		if (!value)
> +			return config_error_nonbool(var);
> +		if (!strcmp(value, "fsync"))
> +			fsync_method = FSYNC_METHOD_FSYNC;
> +		else if (!strcmp(value, "writeout-only"))
> +			fsync_method = FSYNC_METHOD_WRITEOUT_ONLY;
> +		else

As a non-nit comment I think this config schema looks great so far.

> +			warning(_("unknown %s value '%s'"), var, value);

Just a suggestion maybe something slightly scarier like:

    "unknown core.fsyncMethod value '%s'; config from future git version? ignoring requested fsync strategy"

Also using the nicer camelCased version instead of "var" (also helps
translators with context...)

> +int git_fsync(int fd, enum fsync_action action)
> +{
> +	switch (action) {
> +	case FSYNC_WRITEOUT_ONLY:
> +
> +#ifdef __APPLE__
> +		/*
> +		 * on macOS, fsync just causes filesystem cache writeback but does not
> +		 * flush hardware caches.
> +		 */
> +		return fsync(fd);
> +#endif
> +
> +#ifdef HAVE_SYNC_FILE_RANGE
> +		/*
> +		 * On linux 2.6.17 and above, sync_file_range is the way to issue
> +		 * a writeback without a hardware flush. An offset of 0 and size of 0
> +		 * indicates writeout of the entire file and the wait flags ensure that all
> +		 * dirty data is written to the disk (potentially in a disk-side cache)
> +		 * before we continue.
> +		 */
> +
> +		return sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE |
> +						 SYNC_FILE_RANGE_WRITE |
> +						 SYNC_FILE_RANGE_WAIT_AFTER);
> +#endif
> +
> +#ifdef fsync_no_flush
> +		return fsync_no_flush(fd);
> +#endif
> +
> +		errno = ENOSYS;
> +		return -1;
> +
> +	case FSYNC_HARDWARE_FLUSH:
> +		/*
> +		 * On some platforms fsync may return EINTR. Try again in this
> +		 * case, since callers asking for a hardware flush may die if
> +		 * this function returns an error.
> +		 */
> +		for (;;) {
> +			int err;
> +#ifdef __APPLE__
> +			err = fcntl(fd, F_FULLFSYNC);
> +#else
> +			err = fsync(fd);
> +#endif
> +			if (err >= 0 || errno != EINTR)
> +				return err;
> +		}
> +
> +	default:
> +		BUG("unexpected git_fsync(%d) call", action);

Don't include such "default" cases, you have an exhaustive "enum", if
you skip it the compiler will check this for you.

> +	}
> +}
> +
>  static int warn_if_unremovable(const char *op, const char *file, int rc)

Just a code nit: I think it's very much preferred if possible to have as
much of code like this compile on all platforms. See the series at
4002e87cb25 (grep: remove #ifdef NO_PTHREADS, 2018-11-03) is part of for
a good example.

Maybe not worth it in this case since they're not nested ifdef's.

I'm basically thinking of something (also re Patrick's comment on the
2nd patch) where we have a platform_fsync() whose return
value/arguments/whatever capture this "I want to return now" or "you
should be looping" and takes the enum_fsync_action" strategy.

Then the git_fsync() would be the platform-independent looping etc., and
another funciton would do the "one fsync at a time, maybe call me
again".

Maybe it would suck more, just food for thought... :)
Neeraj Singh Dec. 7, 2021, 11:29 p.m. UTC | #4
On Tue, Dec 7, 2021 at 3:45 AM Patrick Steinhardt <ps@pks.im> wrote:
>
> On Tue, Dec 07, 2021 at 02:46:49AM +0000, Neeraj Singh via GitGitGadget wrote:
> > From: Neeraj Singh <neerajsi@microsoft.com>
> [snip]
> > --- a/compat/mingw.h
> > +++ b/compat/mingw.h
> > @@ -329,6 +329,9 @@ int mingw_getpagesize(void);
> >  #define getpagesize mingw_getpagesize
> >  #endif
> >
> > +int win32_fsync_no_flush(int fd);
> > +#define fsync_no_flush win32_fsync_no_flush
> > +
> >  struct rlimit {
> >       unsigned int rlim_cur;
> >  };
> > diff --git a/compat/win32/flush.c b/compat/win32/flush.c
> > new file mode 100644
> > index 00000000000..75324c24ee7
> > --- /dev/null
> > +++ b/compat/win32/flush.c
> > @@ -0,0 +1,28 @@
> > +#include "../../git-compat-util.h"
> > +#include <winternl.h>
> > +#include "lazyload.h"
> > +
> > +int win32_fsync_no_flush(int fd)
> > +{
> > +       IO_STATUS_BLOCK io_status;
> > +
> > +#define FLUSH_FLAGS_FILE_DATA_ONLY 1
> > +
> > +       DECLARE_PROC_ADDR(ntdll.dll, NTSTATUS, NtFlushBuffersFileEx,
> > +                      HANDLE FileHandle, ULONG Flags, PVOID Parameters, ULONG ParameterSize,
> > +                      PIO_STATUS_BLOCK IoStatusBlock);
> > +
> > +       if (!INIT_PROC_ADDR(NtFlushBuffersFileEx)) {
> > +             errno = ENOSYS;
> > +             return -1;
> > +       }
>
> I'm wondering whether it would make sense to fall back to fsync(3P) in
> case we cannot use writeout-only, but I see that were doing essentially
> that in `fsync_or_die()`. There is no indicator to the user though that
> writeout-only doesn't work -- do we want to print a one-time warning?
>

I wanted to leave the fallback to the caller so that the algorithm can
be adjusted in some way based on whether writeout-only succeeded.  For
batched fsync object files, we refrain from doing the last fsync if we
were doing real fsyncs all along.

I didn't want to issue a warning, since this writeout-only codepath
might be invoked from multiple subprocesses, which would each
potentially issue their one warning.  The consequence of failing
writeout only is worse performance, but should not be compromised
safety, so I'm not sure the user gets enough from the warning to
justify something that's potentially spammy.  In practice, when batch
mode is adopted on Windows (by default), some older pre-Win8 systems
will experience fsyncs and equivalent performance to what they're
seeing today. I don't want these users to have a warning too.

> > +       memset(&io_status, 0, sizeof(io_status));
> > +       if (NtFlushBuffersFileEx((HANDLE)_get_osfhandle(fd), FLUSH_FLAGS_FILE_DATA_ONLY,
> > +                             NULL, 0, &io_status)) {
> > +             errno = EINVAL;
> > +             return -1;
> > +       }
> > +
> > +       return 0;
> > +}
>
> [snip]
> > diff --git a/wrapper.c b/wrapper.c
> > index 36e12119d76..1c5f2c87791 100644
> > --- a/wrapper.c
> > +++ b/wrapper.c
> > @@ -546,6 +546,62 @@ int xmkstemp_mode(char *filename_template, int mode)
> >       return fd;
> >  }
> >
> > +int git_fsync(int fd, enum fsync_action action)
> > +{
> > +     switch (action) {
> > +     case FSYNC_WRITEOUT_ONLY:
> > +
> > +#ifdef __APPLE__
> > +             /*
> > +              * on macOS, fsync just causes filesystem cache writeback but does not
> > +              * flush hardware caches.
> > +              */
> > +             return fsync(fd);
>
> Below we're looping around `EINTR` -- are Apple systems never returning
> it?
>

The EINTR check was added due to a test failure on HP NonStop.  I
don't believe any other platform has actually complained about that.

Thanks again for the code review!
-Neeraj
Neeraj Singh Dec. 7, 2021, 11:58 p.m. UTC | #5
On Tue, Dec 7, 2021 at 4:27 AM Ævar Arnfjörð Bjarmason <avarab@gmail.com> wrote:
>
>
> On Tue, Dec 07 2021, Neeraj Singh via GitGitGadget wrote:
>
> > From: Neeraj Singh <neerajsi@microsoft.com>
> >
> > This commit introduces the `core.fsyncmethod` configuration
>
> Just a commit msg nit: core.fsyncMethod (I see the docs etc. are using
> it camelCased, good..

Will fix.

> > diff --git a/compat/win32/flush.c b/compat/win32/flush.c
> > new file mode 100644
> > index 00000000000..75324c24ee7
> > --- /dev/null
> > +++ b/compat/win32/flush.c
> > @@ -0,0 +1,28 @@
> > +#include "../../git-compat-util.h"
>
> nit: Just FWIW I think the better thing is '#include
> "git-compat-util.h"', i.e. we're compiling at the top-level and have
> added it to -I.
>
> (I know a lot of compat/ and contrib/ and even main-tree stuff does
> that, but just FWIW it's not needed).
>

Will fix.

> > +     if (!strcmp(var, "core.fsyncmethod")) {
> > +             if (!value)
> > +                     return config_error_nonbool(var);
> > +             if (!strcmp(value, "fsync"))
> > +                     fsync_method = FSYNC_METHOD_FSYNC;
> > +             else if (!strcmp(value, "writeout-only"))
> > +                     fsync_method = FSYNC_METHOD_WRITEOUT_ONLY;
> > +             else
>
> As a non-nit comment I think this config schema looks great so far.
>
> > +                     warning(_("unknown %s value '%s'"), var, value);
>
> Just a suggestion maybe something slightly scarier like:
>
>     "unknown core.fsyncMethod value '%s'; config from future git version? ignoring requested fsync strategy"
>
> Also using the nicer camelCased version instead of "var" (also helps
> translators with context...)
>

Will fix.  The motivation for this scheme was to 'factor' the messages
so there would be less to translate. But I see now that the message
doesn't have enough context to translate reasonably.

> > +int git_fsync(int fd, enum fsync_action action)
> > +{
> > +     switch (action) {
> > +     case FSYNC_WRITEOUT_ONLY:
> > +
> > +#ifdef __APPLE__
> > +             /*
> > +              * on macOS, fsync just causes filesystem cache writeback but does not
> > +              * flush hardware caches.
> > +              */
> > +             return fsync(fd);
> > +#endif
> > +
> > +#ifdef HAVE_SYNC_FILE_RANGE
> > +             /*
> > +              * On linux 2.6.17 and above, sync_file_range is the way to issue
> > +              * a writeback without a hardware flush. An offset of 0 and size of 0
> > +              * indicates writeout of the entire file and the wait flags ensure that all
> > +              * dirty data is written to the disk (potentially in a disk-side cache)
> > +              * before we continue.
> > +              */
> > +
> > +             return sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE |
> > +                                              SYNC_FILE_RANGE_WRITE |
> > +                                              SYNC_FILE_RANGE_WAIT_AFTER);
> > +#endif
> > +
> > +#ifdef fsync_no_flush
> > +             return fsync_no_flush(fd);
> > +#endif
> > +
> > +             errno = ENOSYS;
> > +             return -1;
> > +
> > +     case FSYNC_HARDWARE_FLUSH:
> > +             /*
> > +              * On some platforms fsync may return EINTR. Try again in this
> > +              * case, since callers asking for a hardware flush may die if
> > +              * this function returns an error.
> > +              */
> > +             for (;;) {
> > +                     int err;
> > +#ifdef __APPLE__
> > +                     err = fcntl(fd, F_FULLFSYNC);
> > +#else
> > +                     err = fsync(fd);
> > +#endif
> > +                     if (err >= 0 || errno != EINTR)
> > +                             return err;
> > +             }
> > +
> > +     default:
> > +             BUG("unexpected git_fsync(%d) call", action);
>
> Don't include such "default" cases, you have an exhaustive "enum", if
> you skip it the compiler will check this for you.
>

Junio gave the feedback to include this "default:" case in the switch
[1].  Removing the default leads to the "error: control reaches end of
non-void function" on gcc. Fixing that error and adding a trial option
does give the exhaustiveness error that you're talking about.  I'd
rather just leave this as-is since the BUG() obviates the need for an
in-practice-unreachable return statement.

[1] https://lore.kernel.org/git/xmqqfsu70x58.fsf@gitster.g/

> > +     }
> > +}
> > +
> >  static int warn_if_unremovable(const char *op, const char *file, int rc)
>
> Just a code nit: I think it's very much preferred if possible to have as
> much of code like this compile on all platforms. See the series at
> 4002e87cb25 (grep: remove #ifdef NO_PTHREADS, 2018-11-03) is part of for
> a good example.
>
> Maybe not worth it in this case since they're not nested ifdef's.
>
> I'm basically thinking of something (also re Patrick's comment on the
> 2nd patch) where we have a platform_fsync() whose return
> value/arguments/whatever capture this "I want to return now" or "you
> should be looping" and takes the enum_fsync_action" strategy.
>
> Then the git_fsync() would be the platform-independent looping etc., and
> another funciton would do the "one fsync at a time, maybe call me
> again".
>
> Maybe it would suck more, just food for thought... :)

I'm going to introduce a new static function called fsync_loop which
does the looping and I'll call it from git_fsync.  That appears to be
the cleanest to me to address your and Patrick's feedback.

Thanks,
Neeraj
diff mbox series

Patch

diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index c04f62a54a1..dbb134f7136 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -547,6 +547,15 @@  core.whitespace::
   is relevant for `indent-with-non-tab` and when Git fixes `tab-in-indent`
   errors. The default tab width is 8. Allowed values are 1 to 63.
 
+core.fsyncMethod::
+	A value indicating the strategy Git will use to harden repository data
+	using fsync and related primitives.
++
+* `fsync` uses the fsync() system call or platform equivalents.
+* `writeout-only` issues pagecache writeback requests, but depending on the
+  filesystem and storage hardware, data added to the repository may not be
+  durable in the event of a system crash. This is the default mode on macOS.
+
 core.fsyncObjectFiles::
 	This boolean will enable 'fsync()' when writing object files.
 +
diff --git a/Makefile b/Makefile
index d56c0e4aadc..cba024615c9 100644
--- a/Makefile
+++ b/Makefile
@@ -403,6 +403,8 @@  all::
 #
 # Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
 #
+# Define HAVE_SYNC_FILE_RANGE if your platform has sync_file_range.
+#
 # Define NEEDS_LIBRT if your platform requires linking with librt (glibc version
 # before 2.17) for clock_gettime and CLOCK_MONOTONIC.
 #
@@ -1881,6 +1883,10 @@  ifdef HAVE_CLOCK_MONOTONIC
 	BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC
 endif
 
+ifdef HAVE_SYNC_FILE_RANGE
+	BASIC_CFLAGS += -DHAVE_SYNC_FILE_RANGE
+endif
+
 ifdef NEEDS_LIBRT
 	EXTLIBS += -lrt
 endif
diff --git a/cache.h b/cache.h
index eba12487b99..9cd60d94952 100644
--- a/cache.h
+++ b/cache.h
@@ -986,6 +986,13 @@  extern int read_replace_refs;
 extern char *git_replace_ref_base;
 
 extern int fsync_object_files;
+
+enum fsync_method {
+	FSYNC_METHOD_FSYNC,
+	FSYNC_METHOD_WRITEOUT_ONLY
+};
+
+extern enum fsync_method fsync_method;
 extern int core_preload_index;
 extern int precomposed_unicode;
 extern int protect_hfs;
diff --git a/compat/mingw.h b/compat/mingw.h
index c9a52ad64a6..6074a3d3ced 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -329,6 +329,9 @@  int mingw_getpagesize(void);
 #define getpagesize mingw_getpagesize
 #endif
 
+int win32_fsync_no_flush(int fd);
+#define fsync_no_flush win32_fsync_no_flush
+
 struct rlimit {
 	unsigned int rlim_cur;
 };
diff --git a/compat/win32/flush.c b/compat/win32/flush.c
new file mode 100644
index 00000000000..75324c24ee7
--- /dev/null
+++ b/compat/win32/flush.c
@@ -0,0 +1,28 @@ 
+#include "../../git-compat-util.h"
+#include <winternl.h>
+#include "lazyload.h"
+
+int win32_fsync_no_flush(int fd)
+{
+       IO_STATUS_BLOCK io_status;
+
+#define FLUSH_FLAGS_FILE_DATA_ONLY 1
+
+       DECLARE_PROC_ADDR(ntdll.dll, NTSTATUS, NtFlushBuffersFileEx,
+			 HANDLE FileHandle, ULONG Flags, PVOID Parameters, ULONG ParameterSize,
+			 PIO_STATUS_BLOCK IoStatusBlock);
+
+       if (!INIT_PROC_ADDR(NtFlushBuffersFileEx)) {
+		errno = ENOSYS;
+		return -1;
+       }
+
+       memset(&io_status, 0, sizeof(io_status));
+       if (NtFlushBuffersFileEx((HANDLE)_get_osfhandle(fd), FLUSH_FLAGS_FILE_DATA_ONLY,
+				NULL, 0, &io_status)) {
+		errno = EINVAL;
+		return -1;
+       }
+
+       return 0;
+}
diff --git a/config.c b/config.c
index c5873f3a706..c3410b8a868 100644
--- a/config.c
+++ b/config.c
@@ -1490,6 +1490,18 @@  static int git_default_core_config(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	if (!strcmp(var, "core.fsyncmethod")) {
+		if (!value)
+			return config_error_nonbool(var);
+		if (!strcmp(value, "fsync"))
+			fsync_method = FSYNC_METHOD_FSYNC;
+		else if (!strcmp(value, "writeout-only"))
+			fsync_method = FSYNC_METHOD_WRITEOUT_ONLY;
+		else
+			warning(_("unknown %s value '%s'"), var, value);
+
+	}
+
 	if (!strcmp(var, "core.fsyncobjectfiles")) {
 		fsync_object_files = git_config_bool(var, value);
 		return 0;
diff --git a/config.mak.uname b/config.mak.uname
index d0701f9beb0..774a09622d2 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -57,6 +57,7 @@  ifeq ($(uname_S),Linux)
 	HAVE_CLOCK_MONOTONIC = YesPlease
 	# -lrt is needed for clock_gettime on glibc <= 2.16
 	NEEDS_LIBRT = YesPlease
+	HAVE_SYNC_FILE_RANGE = YesPlease
 	HAVE_GETDELIM = YesPlease
 	FREAD_READS_DIRECTORIES = UnfortunatelyYes
 	BASIC_CFLAGS += -DHAVE_SYSINFO
@@ -453,6 +454,7 @@  endif
 	CFLAGS =
 	BASIC_CFLAGS = -nologo -I. -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE
 	COMPAT_OBJS = compat/msvc.o compat/winansi.o \
+		compat/win32/flush.o \
 		compat/win32/path-utils.o \
 		compat/win32/pthread.o compat/win32/syslog.o \
 		compat/win32/trace2_win32_process_info.o \
@@ -628,6 +630,7 @@  ifeq ($(uname_S),MINGW)
 	COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\"
 	COMPAT_OBJS += compat/mingw.o compat/winansi.o \
 		compat/win32/trace2_win32_process_info.o \
+		compat/win32/flush.o \
 		compat/win32/path-utils.o \
 		compat/win32/pthread.o compat/win32/syslog.o \
 		compat/win32/dirent.o
diff --git a/configure.ac b/configure.ac
index 5ee25ec95c8..6bd6bef1c44 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1082,6 +1082,14 @@  AC_COMPILE_IFELSE([CLOCK_MONOTONIC_SRC],
 	[AC_MSG_RESULT([no])
 	HAVE_CLOCK_MONOTONIC=])
 GIT_CONF_SUBST([HAVE_CLOCK_MONOTONIC])
+
+#
+# Define HAVE_SYNC_FILE_RANGE=YesPlease if sync_file_range is available.
+GIT_CHECK_FUNC(sync_file_range,
+	[HAVE_SYNC_FILE_RANGE=YesPlease],
+	[HAVE_SYNC_FILE_RANGE])
+GIT_CONF_SUBST([HAVE_SYNC_FILE_RANGE])
+
 #
 # Define NO_SETITIMER if you don't have setitimer.
 GIT_CHECK_FUNC(setitimer,
diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index 86b46114464..6d7bc16d054 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -261,7 +261,8 @@  if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
 				NOGDI OBJECT_CREATION_MODE=1 __USE_MINGW_ANSI_STDIO=0
 				USE_NED_ALLOCATOR OVERRIDE_STRDUP MMAP_PREVENTS_DELETE USE_WIN32_MMAP
 				UNICODE _UNICODE HAVE_WPGMPTR ENSURE_MSYSTEM_IS_SET)
-	list(APPEND compat_SOURCES compat/mingw.c compat/winansi.c compat/win32/path-utils.c
+	list(APPEND compat_SOURCES compat/mingw.c compat/winansi.c
+		compat/win32/flush.c compat/win32/path-utils.c
 		compat/win32/pthread.c compat/win32mmap.c compat/win32/syslog.c
 		compat/win32/trace2_win32_process_info.c compat/win32/dirent.c
 		compat/nedmalloc/nedmalloc.c compat/strdup.c)
diff --git a/environment.c b/environment.c
index 9da7f3c1a19..f9140e842cf 100644
--- a/environment.c
+++ b/environment.c
@@ -41,7 +41,7 @@  const char *git_attributes_file;
 const char *git_hooks_path;
 int zlib_compression_level = Z_BEST_SPEED;
 int pack_compression_level = Z_DEFAULT_COMPRESSION;
-int fsync_object_files;
+enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
 size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
 size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
 size_t delta_base_cache_limit = 96 * 1024 * 1024;
diff --git a/git-compat-util.h b/git-compat-util.h
index c6bd2a84e55..cb9abd7a08c 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -1239,6 +1239,30 @@  __attribute__((format (printf, 1, 2))) NORETURN
 void BUG(const char *fmt, ...);
 #endif
 
+#ifdef __APPLE__
+#define FSYNC_METHOD_DEFAULT FSYNC_METHOD_WRITEOUT_ONLY
+#else
+#define FSYNC_METHOD_DEFAULT FSYNC_METHOD_FSYNC
+#endif
+
+enum fsync_action {
+    FSYNC_WRITEOUT_ONLY,
+    FSYNC_HARDWARE_FLUSH
+};
+
+/*
+ * Issues an fsync against the specified file according to the specified mode.
+ *
+ * FSYNC_WRITEOUT_ONLY attempts to use interfaces available on some operating
+ * systems to flush the OS cache without issuing a flush command to the storage
+ * controller. If those interfaces are unavailable, the function fails with
+ * ENOSYS.
+ *
+ * FSYNC_HARDWARE_FLUSH does an OS writeout and hardware flush to ensure that
+ * changes are durable. It is not expected to fail.
+ */
+int git_fsync(int fd, enum fsync_action action);
+
 /*
  * Preserves errno, prints a message, but gives no warning for ENOENT.
  * Returns 0 on success, which includes trying to unlink an object that does
diff --git a/wrapper.c b/wrapper.c
index 36e12119d76..1c5f2c87791 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -546,6 +546,62 @@  int xmkstemp_mode(char *filename_template, int mode)
 	return fd;
 }
 
+int git_fsync(int fd, enum fsync_action action)
+{
+	switch (action) {
+	case FSYNC_WRITEOUT_ONLY:
+
+#ifdef __APPLE__
+		/*
+		 * on macOS, fsync just causes filesystem cache writeback but does not
+		 * flush hardware caches.
+		 */
+		return fsync(fd);
+#endif
+
+#ifdef HAVE_SYNC_FILE_RANGE
+		/*
+		 * On linux 2.6.17 and above, sync_file_range is the way to issue
+		 * a writeback without a hardware flush. An offset of 0 and size of 0
+		 * indicates writeout of the entire file and the wait flags ensure that all
+		 * dirty data is written to the disk (potentially in a disk-side cache)
+		 * before we continue.
+		 */
+
+		return sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE |
+						 SYNC_FILE_RANGE_WRITE |
+						 SYNC_FILE_RANGE_WAIT_AFTER);
+#endif
+
+#ifdef fsync_no_flush
+		return fsync_no_flush(fd);
+#endif
+
+		errno = ENOSYS;
+		return -1;
+
+	case FSYNC_HARDWARE_FLUSH:
+		/*
+		 * On some platforms fsync may return EINTR. Try again in this
+		 * case, since callers asking for a hardware flush may die if
+		 * this function returns an error.
+		 */
+		for (;;) {
+			int err;
+#ifdef __APPLE__
+			err = fcntl(fd, F_FULLFSYNC);
+#else
+			err = fsync(fd);
+#endif
+			if (err >= 0 || errno != EINTR)
+				return err;
+		}
+
+	default:
+		BUG("unexpected git_fsync(%d) call", action);
+	}
+}
+
 static int warn_if_unremovable(const char *op, const char *file, int rc)
 {
 	int err;
diff --git a/write-or-die.c b/write-or-die.c
index 0b1ec8190b6..0702acdd5e8 100644
--- a/write-or-die.c
+++ b/write-or-die.c
@@ -57,10 +57,12 @@  void fprintf_or_die(FILE *f, const char *fmt, ...)
 
 void fsync_or_die(int fd, const char *msg)
 {
-	while (fsync(fd) < 0) {
-		if (errno != EINTR)
-			die_errno("fsync error on '%s'", msg);
-	}
+	if (fsync_method == FSYNC_METHOD_WRITEOUT_ONLY &&
+	    git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0)
+		return;
+
+	if (git_fsync(fd, FSYNC_HARDWARE_FLUSH) < 0)
+		die_errno("fsync error on '%s'", msg);
 }
 
 void write_or_die(int fd, const void *buf, size_t count)