@@ -548,12 +548,26 @@ core.whitespace::
errors. The default tab width is 8. Allowed values are 1 to 63.
core.fsyncObjectFiles::
- This boolean will enable 'fsync()' when writing object files.
-+
-This is a total waste of time and effort on a filesystem that orders
-data writes properly, but can be useful for filesystems that do not use
-journalling (traditional UNIX filesystems) or that only journal metadata
-and not file contents (OS X's HFS+, or Linux ext3 with "data=writeback").
+ A value indicating the level of effort Git will expend in
+ trying to make objects added to the repo durable in the event
+ of an unclean system shutdown. This setting currently only
+ controls the object store, so updates to any refs or the
+ index may not be equally durable.
++
+* `false` allows data to remain in file system caches according to
+ operating system policy, whence it may be lost if the system loses power
+ or crashes.
+* `true` triggers a data integrity flush for each object added to the
+ object store. This is the safest setting that is likely to ensure durability
+ across all operating systems and file systems that honor the 'fsync' system
+ call. However, this setting comes with a significant performance cost on
+ common hardware.
+* `batch` enables an experimental mode that uses interfaces available in some
+ operating systems to write object data with a minimal set of FLUSH CACHE
+ (or equivalent) commands sent to the storage controller. If the operating
+ system interfaces are not available, this mode behaves the same as `true`.
+ This mode is expected to be safe on macOS for repos stored on HFS+ or APFS
+ filesystems and on Windows for repos stored on NTFS or ReFS.
core.preloadIndex::
Enable parallel index preload for operations like 'git diff'
@@ -406,6 +406,8 @@ all::
#
# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
#
+# Define HAVE_SYNC_FILE_RANGE if your platform has sync_file_range.
+#
# Define NEEDS_LIBRT if your platform requires linking with librt (glibc version
# before 2.17) for clock_gettime and CLOCK_MONOTONIC.
#
@@ -1896,6 +1898,10 @@ ifdef HAVE_CLOCK_MONOTONIC
BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC
endif
+ifdef HAVE_SYNC_FILE_RANGE
+ BASIC_CFLAGS += -DHAVE_SYNC_FILE_RANGE
+endif
+
ifdef NEEDS_LIBRT
EXTLIBS += -lrt
endif
@@ -678,7 +678,8 @@ int cmd_add(int argc, const char **argv, const char *prefix)
if (chmod_arg && pathspec.nr)
exit_status |= chmod_pathspec(&pathspec, chmod_arg[0], show_only);
- unplug_bulk_checkin();
+
+ unplug_bulk_checkin(&lock_file);
finish:
if (write_locked_index(&the_index, &lock_file,
@@ -3,15 +3,19 @@
*/
#include "cache.h"
#include "bulk-checkin.h"
+#include "lockfile.h"
#include "repository.h"
#include "csum-file.h"
#include "pack.h"
#include "strbuf.h"
+#include "string-list.h"
#include "packfile.h"
#include "object-store.h"
static int bulk_checkin_plugged;
+static struct string_list bulk_fsync_state = STRING_LIST_INIT_DUP;
+
static struct bulk_checkin_state {
char *pack_tmp_name;
struct hashfile *f;
@@ -62,6 +66,32 @@ clear_exit:
reprepare_packed_git(the_repository);
}
+static void do_sync_and_rename(struct string_list *fsync_state, struct lock_file *lock_file)
+{
+ if (fsync_state->nr) {
+ struct string_list_item *rename;
+
+ /*
+ * Issue a full hardware flush against the lock file to ensure
+ * that all objects are durable before any renames occur.
+ * The code in fsync_and_close_loose_object_bulk_checkin has
+ * already ensured that writeout has occurred, but it has not
+ * flushed any writeback cache in the storage hardware.
+ */
+ fsync_or_die(get_lock_file_fd(lock_file), get_lock_file_path(lock_file));
+
+ for_each_string_list_item(rename, fsync_state) {
+ const char *src = rename->string;
+ const char *dst = rename->util;
+
+ if (finalize_object_file(src, dst))
+ die_errno(_("could not rename '%s' to '%s'"), src, dst);
+ }
+
+ string_list_clear(fsync_state, 1);
+ }
+}
+
static int already_written(struct bulk_checkin_state *state, struct object_id *oid)
{
int i;
@@ -256,6 +286,53 @@ static int deflate_to_pack(struct bulk_checkin_state *state,
return 0;
}
+static void add_rename_bulk_checkin(struct string_list *fsync_state,
+ const char *src, const char *dst)
+{
+ string_list_insert(fsync_state, src)->util = xstrdup(dst);
+}
+
+int fsync_and_close_loose_object_bulk_checkin(int fd, const char *tmpfile,
+ const char *filename, time_t mtime)
+{
+ int do_finalize = 1;
+ int ret = 0;
+
+ if (fsync_object_files != FSYNC_OBJECT_FILES_OFF) {
+ /*
+ * If we have a plugged bulk checkin, we issue a call that
+ * cleans the filesystem page cache but avoids a hardware flush
+ * command. Later on we will issue a single hardware flush
+ * before renaming files as part of do_sync_and_rename.
+ */
+ if (bulk_checkin_plugged &&
+ fsync_object_files == FSYNC_OBJECT_FILES_BATCH &&
+ git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0) {
+ add_rename_bulk_checkin(&bulk_fsync_state, tmpfile, filename);
+ do_finalize = 0;
+
+ } else {
+ fsync_or_die(fd, "loose object file");
+ }
+ }
+
+ if (close(fd))
+ die_errno(_("error when closing loose object file"));
+
+ if (mtime) {
+ struct utimbuf utb;
+ utb.actime = mtime;
+ utb.modtime = mtime;
+ if (utime(tmpfile, &utb) < 0)
+ warning_errno(_("failed utime() on %s"), tmpfile);
+ }
+
+ if (do_finalize)
+ ret = finalize_object_file(tmpfile, filename);
+
+ return ret;
+}
+
int index_bulk_checkin(struct object_id *oid,
int fd, size_t size, enum object_type type,
const char *path, unsigned flags)
@@ -273,10 +350,12 @@ void plug_bulk_checkin(void)
bulk_checkin_plugged = 1;
}
-void unplug_bulk_checkin(void)
+void unplug_bulk_checkin(struct lock_file *lock_file)
{
assert(bulk_checkin_plugged);
bulk_checkin_plugged = 0;
if (bulk_checkin_state.f)
finish_bulk_checkin(&bulk_checkin_state);
+
+ do_sync_and_rename(&bulk_fsync_state, lock_file);
}
@@ -6,11 +6,14 @@
#include "cache.h"
+int fsync_and_close_loose_object_bulk_checkin(int fd, const char *tmpfile,
+ const char *filename, time_t mtime);
+
int index_bulk_checkin(struct object_id *oid,
int fd, size_t size, enum object_type type,
const char *path, unsigned flags);
void plug_bulk_checkin(void);
-void unplug_bulk_checkin(void);
+void unplug_bulk_checkin(struct lock_file *);
#endif
@@ -985,7 +985,13 @@ void reset_shared_repository(void);
extern int read_replace_refs;
extern char *git_replace_ref_base;
-extern int fsync_object_files;
+enum FSYNC_OBJECT_FILES_MODE {
+ FSYNC_OBJECT_FILES_OFF,
+ FSYNC_OBJECT_FILES_ON,
+ FSYNC_OBJECT_FILES_BATCH
+};
+
+extern enum FSYNC_OBJECT_FILES_MODE fsync_object_files;
extern int core_preload_index;
extern int precomposed_unicode;
extern int protect_hfs;
@@ -1509,7 +1509,13 @@ static int git_default_core_config(const char *var, const char *value, void *cb)
}
if (!strcmp(var, "core.fsyncobjectfiles")) {
- fsync_object_files = git_config_bool(var, value);
+ if (!value)
+ return config_error_nonbool(var);
+ if (!strcasecmp(value, "batch"))
+ fsync_object_files = FSYNC_OBJECT_FILES_BATCH;
+ else
+ fsync_object_files = git_config_bool(var, value)
+ ? FSYNC_OBJECT_FILES_ON : FSYNC_OBJECT_FILES_OFF;
return 0;
}
@@ -53,6 +53,7 @@ ifeq ($(uname_S),Linux)
HAVE_CLOCK_MONOTONIC = YesPlease
# -lrt is needed for clock_gettime on glibc <= 2.16
NEEDS_LIBRT = YesPlease
+ HAVE_SYNC_FILE_RANGE = YesPlease
HAVE_GETDELIM = YesPlease
SANE_TEXT_GREP=-a
FREAD_READS_DIRECTORIES = UnfortunatelyYes
@@ -1090,6 +1090,14 @@ AC_COMPILE_IFELSE([CLOCK_MONOTONIC_SRC],
[AC_MSG_RESULT([no])
HAVE_CLOCK_MONOTONIC=])
GIT_CONF_SUBST([HAVE_CLOCK_MONOTONIC])
+
+#
+# Define HAVE_SYNC_FILE_RANGE=YesPlease if sync_file_range is available.
+GIT_CHECK_FUNC(sync_file_range,
+ [HAVE_SYNC_FILE_RANGE=YesPlease],
+ [HAVE_SYNC_FILE_RANGE])
+GIT_CONF_SUBST([HAVE_SYNC_FILE_RANGE])
+
#
# Define NO_SETITIMER if you don't have setitimer.
GIT_CHECK_FUNC(setitimer,
@@ -43,7 +43,7 @@ const char *git_hooks_path;
int zlib_compression_level = Z_BEST_SPEED;
int core_compression_level;
int pack_compression_level = Z_DEFAULT_COMPRESSION;
-int fsync_object_files;
+enum FSYNC_OBJECT_FILES_MODE fsync_object_files;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
size_t delta_base_cache_limit = 96 * 1024 * 1024;
@@ -1210,6 +1210,13 @@ __attribute__((format (printf, 1, 2))) NORETURN
void BUG(const char *fmt, ...);
#endif
+enum fsync_action {
+ FSYNC_WRITEOUT_ONLY,
+ FSYNC_HARDWARE_FLUSH
+};
+
+int git_fsync(int fd, enum fsync_action action);
+
/*
* Preserves errno, prints a message, but gives no warning for ENOENT.
* Returns 0 on success, which includes trying to unlink an object that does
@@ -1859,15 +1859,6 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf,
return 0;
}
-/* Finalize a file on disk, and close it. */
-static void close_loose_object(int fd)
-{
- if (fsync_object_files)
- fsync_or_die(fd, "loose object file");
- if (close(fd) != 0)
- die_errno(_("error when closing loose object file"));
-}
-
/* Size of directory component, including the ending '/' */
static inline int directory_size(const char *filename)
{
@@ -1973,17 +1964,8 @@ static int write_loose_object(const struct object_id *oid, char *hdr,
die(_("confused by unstable object source data for %s"),
oid_to_hex(oid));
- close_loose_object(fd);
-
- if (mtime) {
- struct utimbuf utb;
- utb.actime = mtime;
- utb.modtime = mtime;
- if (utime(tmp_file.buf, &utb) < 0)
- warning_errno(_("failed utime() on %s"), tmp_file.buf);
- }
-
- return finalize_object_file(tmp_file.buf, filename.buf);
+ return fsync_and_close_loose_object_bulk_checkin(fd, tmp_file.buf,
+ filename.buf, mtime);
}
static int freshen_loose_object(const struct object_id *oid)
@@ -540,6 +540,42 @@ int xmkstemp_mode(char *filename_template, int mode)
return fd;
}
+int git_fsync(int fd, enum fsync_action action)
+{
+ if (action == FSYNC_WRITEOUT_ONLY) {
+#ifdef __APPLE__
+ /*
+ * on Mac OS X, fsync just causes filesystem cache writeback but does not
+ * flush hardware caches.
+ */
+ return fsync(fd);
+#endif
+
+#ifdef HAVE_SYNC_FILE_RANGE
+ /*
+ * On linux 2.6.17 and above, sync_file_range is the way to issue
+ * a writeback without a hardware flush. An offset of 0 and size of 0
+ * indicates writeout of the entire file and the wait flags ensure that all
+ * dirty data is written to the disk (potentially in a disk-side cache)
+ * before we continue.
+ */
+
+ return sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE |
+ SYNC_FILE_RANGE_WRITE |
+ SYNC_FILE_RANGE_WAIT_AFTER);
+#endif
+
+ errno = ENOSYS;
+ return -1;
+ }
+
+#ifdef __APPLE__
+ return fcntl(fd, F_FULLFSYNC);
+#else
+ return fsync(fd);
+#endif
+}
+
static int warn_if_unremovable(const char *op, const char *file, int rc)
{
int err;
@@ -57,7 +57,7 @@ void fprintf_or_die(FILE *f, const char *fmt, ...)
void fsync_or_die(int fd, const char *msg)
{
- while (fsync(fd) < 0) {
+ while (git_fsync(fd, FSYNC_HARDWARE_FLUSH) < 0) {
if (errno != EINTR)
die_errno("fsync error on '%s'", msg);
}