@@ -548,12 +548,29 @@ core.whitespace::
errors. The default tab width is 8. Allowed values are 1 to 63.
core.fsyncObjectFiles::
- This boolean will enable 'fsync()' when writing object files.
-+
-This is a total waste of time and effort on a filesystem that orders
-data writes properly, but can be useful for filesystems that do not use
-journalling (traditional UNIX filesystems) or that only journal metadata
-and not file contents (OS X's HFS+, or Linux ext3 with "data=writeback").
+ A value indicating the level of effort Git will expend in
+ trying to make objects added to the repo durable in the event
+ of an unclean system shutdown. This setting currently only
+ controls loose objects in the object store, so updates to any
+ refs or the index may not be equally durable.
++
+* `false` allows data to remain in file system caches according to
+ operating system policy, whence it may be lost if the system loses power
+ or crashes.
+* `true` triggers a data integrity flush for each loose object added to the
+ object store. This is the safest setting that is likely to ensure durability
+ across all operating systems and file systems that honor the 'fsync' system
+ call. However, this setting comes with a significant performance cost on
+ common hardware. Git does not currently fsync parent directories for
+ newly-added files, so some filesystems may still allow data to be lost on
+ system crash.
+* `batch` enables an experimental mode that uses interfaces available in some
+ operating systems to write loose object data with a minimal set of FLUSH
+ CACHE (or equivalent) commands sent to the storage controller. If the
+ operating system interfaces are not available, this mode behaves the same as
+ `true`. This mode is expected to be as safe as `true` on macOS for repos
+ stored on HFS+ or APFS filesystems and on Windows for repos stored on NTFS or
+ ReFS.
core.preloadIndex::
Enable parallel index preload for operations like 'git diff'
@@ -406,6 +406,8 @@ all::
#
# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
#
+# Define HAVE_SYNC_FILE_RANGE if your platform has sync_file_range.
+#
# Define NEEDS_LIBRT if your platform requires linking with librt (glibc version
# before 2.17) for clock_gettime and CLOCK_MONOTONIC.
#
@@ -1896,6 +1898,10 @@ ifdef HAVE_CLOCK_MONOTONIC
BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC
endif
+ifdef HAVE_SYNC_FILE_RANGE
+ BASIC_CFLAGS += -DHAVE_SYNC_FILE_RANGE
+endif
+
ifdef NEEDS_LIBRT
EXTLIBS += -lrt
endif
@@ -678,6 +678,7 @@ int cmd_add(int argc, const char **argv, const char *prefix)
if (chmod_arg && pathspec.nr)
exit_status |= chmod_pathspec(&pathspec, chmod_arg[0], show_only);
+
unplug_bulk_checkin();
finish:
@@ -3,14 +3,20 @@
*/
#include "cache.h"
#include "bulk-checkin.h"
+#include "lockfile.h"
#include "repository.h"
#include "csum-file.h"
#include "pack.h"
#include "strbuf.h"
+#include "string-list.h"
+#include "tmp-objdir.h"
#include "packfile.h"
#include "object-store.h"
static int bulk_checkin_plugged;
+static int needs_batch_fsync;
+
+static struct tmp_objdir *bulk_fsync_objdir;
static struct bulk_checkin_state {
char *pack_tmp_name;
@@ -62,6 +68,34 @@ clear_exit:
reprepare_packed_git(the_repository);
}
+/*
+ * Cleanup after batch-mode fsync_object_files.
+ */
+static void do_batch_fsync(void)
+{
+ /*
+ * Issue a full hardware flush against a temporary file to ensure
+ * that all objects are durable before any renames occur. The code in
+ * fsync_loose_object_bulk_checkin has already issued a writeout
+ * request, but it has not flushed any writeback cache in the storage
+ * hardware.
+ */
+
+ if (needs_batch_fsync) {
+ struct strbuf temp_path = STRBUF_INIT;
+ struct tempfile *temp;
+
+ strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", get_object_directory());
+ temp = xmks_tempfile(temp_path.buf);
+ fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
+ delete_tempfile(&temp);
+ strbuf_release(&temp_path);
+ }
+
+ if (bulk_fsync_objdir)
+ tmp_objdir_migrate(bulk_fsync_objdir);
+}
+
static int already_written(struct bulk_checkin_state *state, struct object_id *oid)
{
int i;
@@ -256,6 +290,26 @@ static int deflate_to_pack(struct bulk_checkin_state *state,
return 0;
}
+void fsync_loose_object_bulk_checkin(int fd)
+{
+ assert(fsync_object_files == FSYNC_OBJECT_FILES_BATCH);
+
+ /*
+ * If we have a plugged bulk checkin, we issue a call that
+ * cleans the filesystem page cache but avoids a hardware flush
+ * command. Later on we will issue a single hardware flush
+ * before as part of do_batch_fsync.
+ */
+ if (bulk_checkin_plugged &&
+ git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0) {
+ assert(the_repository->objects->odb->is_temp);
+ if (!needs_batch_fsync)
+ needs_batch_fsync = 1;
+ } else {
+ fsync_or_die(fd, "loose object file");
+ }
+}
+
int index_bulk_checkin(struct object_id *oid,
int fd, size_t size, enum object_type type,
const char *path, unsigned flags)
@@ -270,6 +324,20 @@ int index_bulk_checkin(struct object_id *oid,
void plug_bulk_checkin(void)
{
assert(!bulk_checkin_plugged);
+
+ /*
+ * Create a temporary object directory if the current
+ * object directory is not already temporary.
+ */
+ if (fsync_object_files == FSYNC_OBJECT_FILES_BATCH &&
+ !the_repository->objects->odb->is_temp) {
+ bulk_fsync_objdir = tmp_objdir_create();
+ if (!bulk_fsync_objdir)
+ die(_("Could not create temporary object directory for core.fsyncobjectfiles=batch"));
+
+ tmp_objdir_replace_main_odb(bulk_fsync_objdir);
+ }
+
bulk_checkin_plugged = 1;
}
@@ -279,4 +347,6 @@ void unplug_bulk_checkin(void)
bulk_checkin_plugged = 0;
if (bulk_checkin_state.f)
finish_bulk_checkin(&bulk_checkin_state);
+
+ do_batch_fsync();
}
@@ -6,6 +6,8 @@
#include "cache.h"
+void fsync_loose_object_bulk_checkin(int fd);
+
int index_bulk_checkin(struct object_id *oid,
int fd, size_t size, enum object_type type,
const char *path, unsigned flags);
@@ -985,7 +985,13 @@ void reset_shared_repository(void);
extern int read_replace_refs;
extern char *git_replace_ref_base;
-extern int fsync_object_files;
+enum fsync_object_files_mode {
+ FSYNC_OBJECT_FILES_OFF,
+ FSYNC_OBJECT_FILES_ON,
+ FSYNC_OBJECT_FILES_BATCH
+};
+
+extern enum fsync_object_files_mode fsync_object_files;
extern int core_preload_index;
extern int precomposed_unicode;
extern int protect_hfs;
@@ -1509,7 +1509,12 @@ static int git_default_core_config(const char *var, const char *value, void *cb)
}
if (!strcmp(var, "core.fsyncobjectfiles")) {
- fsync_object_files = git_config_bool(var, value);
+ if (value && !strcmp(value, "batch"))
+ fsync_object_files = FSYNC_OBJECT_FILES_BATCH;
+ else if (git_config_bool(var, value))
+ fsync_object_files = FSYNC_OBJECT_FILES_ON;
+ else
+ fsync_object_files = FSYNC_OBJECT_FILES_OFF;
return 0;
}
@@ -53,6 +53,7 @@ ifeq ($(uname_S),Linux)
HAVE_CLOCK_MONOTONIC = YesPlease
# -lrt is needed for clock_gettime on glibc <= 2.16
NEEDS_LIBRT = YesPlease
+ HAVE_SYNC_FILE_RANGE = YesPlease
HAVE_GETDELIM = YesPlease
SANE_TEXT_GREP=-a
FREAD_READS_DIRECTORIES = UnfortunatelyYes
@@ -1090,6 +1090,14 @@ AC_COMPILE_IFELSE([CLOCK_MONOTONIC_SRC],
[AC_MSG_RESULT([no])
HAVE_CLOCK_MONOTONIC=])
GIT_CONF_SUBST([HAVE_CLOCK_MONOTONIC])
+
+#
+# Define HAVE_SYNC_FILE_RANGE=YesPlease if sync_file_range is available.
+GIT_CHECK_FUNC(sync_file_range,
+ [HAVE_SYNC_FILE_RANGE=YesPlease],
+ [HAVE_SYNC_FILE_RANGE])
+GIT_CONF_SUBST([HAVE_SYNC_FILE_RANGE])
+
#
# Define NO_SETITIMER if you don't have setitimer.
GIT_CHECK_FUNC(setitimer,
@@ -43,7 +43,7 @@ const char *git_hooks_path;
int zlib_compression_level = Z_BEST_SPEED;
int core_compression_level;
int pack_compression_level = Z_DEFAULT_COMPRESSION;
-int fsync_object_files;
+enum fsync_object_files_mode fsync_object_files;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
size_t delta_base_cache_limit = 96 * 1024 * 1024;
@@ -1210,6 +1210,13 @@ __attribute__((format (printf, 1, 2))) NORETURN
void BUG(const char *fmt, ...);
#endif
+enum fsync_action {
+ FSYNC_WRITEOUT_ONLY,
+ FSYNC_HARDWARE_FLUSH
+};
+
+int git_fsync(int fd, enum fsync_action action);
+
/*
* Preserves errno, prints a message, but gives no warning for ENOENT.
* Returns 0 on success, which includes trying to unlink an object that does
@@ -750,6 +750,60 @@ void add_to_alternates_memory(const char *reference)
'\n', NULL, 0);
}
+struct object_directory *set_temporary_main_odb(const char *dir)
+{
+ struct object_directory *main_odb, *new_odb, *old_next;
+
+ /*
+ * Make sure alternates are initialized, or else our entry may be
+ * overwritten when they are.
+ */
+ prepare_alt_odb(the_repository);
+
+ /* Copy the existing object directory and make it an alternate. */
+ main_odb = the_repository->objects->odb;
+ new_odb = xmalloc(sizeof(*new_odb));
+ *new_odb = *main_odb;
+ *the_repository->objects->odb_tail = new_odb;
+ the_repository->objects->odb_tail = &(new_odb->next);
+ new_odb->next = NULL;
+
+ /*
+ * Reinitialize the main odb with the specified path, being careful
+ * to keep the next pointer value.
+ */
+ old_next = main_odb->next;
+ memset(main_odb, 0, sizeof(*main_odb));
+ main_odb->next = old_next;
+ main_odb->is_temp = 1;
+ main_odb->path = xstrdup(dir);
+ return new_odb;
+}
+
+void restore_main_odb(struct object_directory *odb)
+{
+ struct object_directory **prev, *main_odb;
+
+ /* Unlink the saved previous main ODB from the list. */
+ prev = &the_repository->objects->odb->next;
+ assert(*prev);
+ while (*prev != odb) {
+ prev = &(*prev)->next;
+ }
+ *prev = odb->next;
+ if (*prev == NULL)
+ the_repository->objects->odb_tail = prev;
+
+ /*
+ * Restore the data from the old main odb, being careful to
+ * keep the next pointer value
+ */
+ main_odb = the_repository->objects->odb;
+ SWAP(*main_odb, *odb);
+ main_odb->next = odb->next;
+ free_object_directory(odb);
+}
+
/*
* Compute the exact path an alternate is at and returns it. In case of
* error NULL is returned and the human readable error is added to `err`
@@ -1867,8 +1921,19 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf,
/* Finalize a file on disk, and close it. */
static void close_loose_object(int fd)
{
- if (fsync_object_files)
+ switch (fsync_object_files) {
+ case FSYNC_OBJECT_FILES_OFF:
+ break;
+ case FSYNC_OBJECT_FILES_ON:
fsync_or_die(fd, "loose object file");
+ break;
+ case FSYNC_OBJECT_FILES_BATCH:
+ fsync_loose_object_bulk_checkin(fd);
+ break;
+ default:
+ BUG("Invalid fsync_object_files mode.");
+ }
+
if (close(fd) != 0)
die_errno(_("error when closing loose object file"));
}
@@ -62,6 +62,19 @@ void add_to_alternates_file(const char *dir);
*/
void add_to_alternates_memory(const char *dir);
+/*
+ * Replace the current main object directory with the specified temporary
+ * object directory. We make a copy of the former main object directory,
+ * add it as an in-memory alternate, and return the copy so that it can
+ * be restored via restore_main_odb.
+ */
+struct object_directory *set_temporary_main_odb(const char *dir);
+
+/*
+ * Restore a previous ODB replaced by set_temporary_main_odb.
+ */
+void restore_main_odb(struct object_directory *odb);
+
/*
* Populate and return the loose object cache array corresponding to the
* given object ID.
@@ -72,6 +85,9 @@ struct oidtree *odb_loose_cache(struct object_directory *odb,
/* Empty the loose object cache for the specified object directory. */
void odb_clear_loose_cache(struct object_directory *odb);
+/* Clear and free the specified object directory */
+void free_object_directory(struct object_directory *odb);
+
struct packed_git {
struct hashmap_entry packmap_ent;
struct packed_git *next;
@@ -513,7 +513,7 @@ struct raw_object_store *raw_object_store_new(void)
return o;
}
-static void free_object_directory(struct object_directory *odb)
+void free_object_directory(struct object_directory *odb)
{
free(odb->path);
odb_clear_loose_cache(odb);
@@ -11,6 +11,7 @@
struct tmp_objdir {
struct strbuf path;
struct strvec env;
+ struct object_directory *prev_main_odb;
};
/*
@@ -50,8 +51,12 @@ static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal)
* freeing memory; it may cause a deadlock if the signal
* arrived while libc's allocator lock is held.
*/
- if (!on_signal)
+ if (!on_signal) {
+ if (t->prev_main_odb)
+ restore_main_odb(t->prev_main_odb);
tmp_objdir_free(t);
+ }
+
return err;
}
@@ -132,6 +137,7 @@ struct tmp_objdir *tmp_objdir_create(void)
t = xmalloc(sizeof(*t));
strbuf_init(&t->path, 0);
strvec_init(&t->env);
+ t->prev_main_odb = NULL;
strbuf_addf(&t->path, "%s/incoming-XXXXXX", get_object_directory());
@@ -269,6 +275,11 @@ int tmp_objdir_migrate(struct tmp_objdir *t)
if (!t)
return 0;
+ if (t->prev_main_odb) {
+ restore_main_odb(t->prev_main_odb);
+ t->prev_main_odb = NULL;
+ }
+
strbuf_addbuf(&src, &t->path);
strbuf_addstr(&dst, get_object_directory());
@@ -292,3 +303,10 @@ void tmp_objdir_add_as_alternate(const struct tmp_objdir *t)
{
add_to_alternates_memory(t->path.buf);
}
+
+void tmp_objdir_replace_main_odb(struct tmp_objdir *t)
+{
+ if (t->prev_main_odb)
+ BUG("the main object database is already replaced");
+ t->prev_main_odb = set_temporary_main_odb(t->path.buf);
+}
@@ -51,4 +51,10 @@ int tmp_objdir_destroy(struct tmp_objdir *);
*/
void tmp_objdir_add_as_alternate(const struct tmp_objdir *);
+/*
+ * Replaces the main object store in the current process with the temporary
+ * object directory and makes the former main object store an alternate.
+ */
+void tmp_objdir_replace_main_odb(struct tmp_objdir *);
+
#endif /* TMP_OBJDIR_H */
@@ -540,6 +540,50 @@ int xmkstemp_mode(char *filename_template, int mode)
return fd;
}
+int git_fsync(int fd, enum fsync_action action)
+{
+ switch (action) {
+ case FSYNC_WRITEOUT_ONLY:
+
+#ifdef __APPLE__
+ /*
+ * on macOS, fsync just causes filesystem cache writeback but does not
+ * flush hardware caches.
+ */
+ return fsync(fd);
+#endif
+
+#ifdef HAVE_SYNC_FILE_RANGE
+ /*
+ * On linux 2.6.17 and above, sync_file_range is the way to issue
+ * a writeback without a hardware flush. An offset of 0 and size of 0
+ * indicates writeout of the entire file and the wait flags ensure that all
+ * dirty data is written to the disk (potentially in a disk-side cache)
+ * before we continue.
+ */
+
+ return sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE |
+ SYNC_FILE_RANGE_WRITE |
+ SYNC_FILE_RANGE_WAIT_AFTER);
+#endif
+
+ errno = ENOSYS;
+ return -1;
+
+ case FSYNC_HARDWARE_FLUSH:
+
+#ifdef __APPLE__
+ return fcntl(fd, F_FULLFSYNC);
+#else
+ return fsync(fd);
+#endif
+
+ default:
+ BUG("unexpected git_fsync(%d) call", action);
+ }
+
+}
+
static int warn_if_unremovable(const char *op, const char *file, int rc)
{
int err;
@@ -57,7 +57,7 @@ void fprintf_or_die(FILE *f, const char *fmt, ...)
void fsync_or_die(int fd, const char *msg)
{
- while (fsync(fd) < 0) {
+ while (git_fsync(fd, FSYNC_HARDWARE_FLUSH) < 0) {
if (errno != EINTR)
die_errno("fsync error on '%s'", msg);
}