@@ -452,17 +452,8 @@ static int check_updates(struct unpack_trees_options *o,
if (should_update_submodules())
load_gitmodules_file(index, NULL);
- for (i = 0; i < index->cache_nr; i++) {
- const struct cache_entry *ce = index->cache[i];
-
- if (ce->ce_flags & CE_WT_REMOVE) {
- display_progress(progress, ++cnt);
- unlink_entry(ce, o->super_prefix);
- }
- }
-
- remove_marked_cache_entries(index, 0);
- remove_scheduled_dirs();
+ get_parallel_checkout_configs(&pc_workers, &pc_threshold);
+ cnt = run_parallel_unlink(index, progress, o->super_prefix, pc_workers, pc_threshold * 100, cnt);
if (should_update_submodules())
load_gitmodules_file(index, &state);
@@ -474,8 +465,6 @@ static int check_updates(struct unpack_trees_options *o,
*/
prefetch_cache_entries(index, must_checkout);
- get_parallel_checkout_configs(&pc_workers, &pc_threshold);
-
enable_delayed_checkout(&state);
if (pc_workers > 1)
init_parallel_checkout();
From: Han Young <hanyang.tony@bytedance.com> We have parallel_checkout option since 04155bdad, but the unlink is still executed single threaded. On very large repo, checkout across directory rename or restructure commit can lead to large amount of unlinked entries. In some instance, the unlink operation can be slower than the parallel checkout. This commit add parallel unlink support, parallel unlink uses multithreaded removal of entries. --- Unlink operation by itself is way faster than checkout, the default threshold should be way higher than parallel_checkout. I hardcoded the threshold to be 100 times higher, probably need to introduce a new config option with sensible default. To discover how many entries to remove require us to iterate index->cache, this is fast even for large number of entries compare to filesystem operation. I think we can reuse checkout.workers as the main switch for parallel_unlink, since it's also part of checkout process. unpack-trees.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-)