diff mbox series

[WIP,v3,6/7] mv: add check_dir_in_index() and solve general dir check issue

Message ID 20220619032549.156335-7-shaoxuan.yuan02@gmail.com (mailing list archive)
State Superseded
Headers show
Series mv: fix out-of-cone file/directory move logic | expand

Commit Message

Shaoxuan Yuan June 19, 2022, 3:25 a.m. UTC
Originally, moving a <source> directory which is not on-disk due
to its existence outside of sparse-checkout cone, "giv mv" command
errors out with "bad source".

Add a helper check_dir_in_index() function to see if a directory
name exists in the index. Also add a SKIP_WORKTREE_DIR bit to mark
such directories.

Change the checking logic, so that such <source> directory makes
"giv mv" command warns with "advise_on_updating_sparse_paths()"
instead of "bad source"; also user now can supply a "--sparse" flag so
this operation can be carried out successfully.

Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
---
 builtin/mv.c                  | 49 +++++++++++++++++++++++++++++++----
 t/t7002-mv-sparse-checkout.sh |  4 +--
 2 files changed, 46 insertions(+), 7 deletions(-)

Comments

Victoria Dye June 21, 2022, 10:55 p.m. UTC | #1
Shaoxuan Yuan wrote:
> Originally, moving a <source> directory which is not on-disk due
> to its existence outside of sparse-checkout cone, "giv mv" command
> errors out with "bad source".
> 
> Add a helper check_dir_in_index() function to see if a directory
> name exists in the index. Also add a SKIP_WORKTREE_DIR bit to mark
> such directories.
> 
> Change the checking logic, so that such <source> directory makes
> "giv mv" command warns with "advise_on_updating_sparse_paths()"
> instead of "bad source"; also user now can supply a "--sparse" flag so
> this operation can be carried out successfully.
> 
> Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
> ---
>  builtin/mv.c                  | 49 +++++++++++++++++++++++++++++++----
>  t/t7002-mv-sparse-checkout.sh |  4 +--
>  2 files changed, 46 insertions(+), 7 deletions(-)
> 
> diff --git a/builtin/mv.c b/builtin/mv.c
> index 7ce7992d6c..cb3441c7cb 100644
> --- a/builtin/mv.c
> +++ b/builtin/mv.c
> @@ -123,6 +123,37 @@ static int index_range_of_same_dir(const char *src, int length,
>  	return last - first;
>  }
>  
> +/*
> + * Check if an out-of-cone directory should be in the index. Imagine this case
> + * that all the files under a directory are marked with 'CE_SKIP_WORKTREE' bit
> + * and thus the directory is sparsified.> + *
> + * Return 0 if such directory exist (i.e. with any of its contained files not
> + * marked with CE_SKIP_WORKTREE, the directory would be present in working tree).
> + * Return 1 otherwise.
> + */
This explanation is helpful in clarifying that you don't mean *sparse
directories* (that is, directory entries in a sparse index), you mean
directories whose contents are all sparse. It's a tricky distinction, but
you handled it nicely here.

> +static int check_dir_in_index(const char *name, int namelen)
> +{
> +	int ret = 1;
> +	const char *with_slash = add_slash(name);
> +	int length = namelen + 1;
> +
> +	int pos = cache_name_pos(with_slash, length);
> +	const struct cache_entry *ce;
> +
> +	if (pos < 0) {
> +		pos = -pos - 1;
> +		if (pos >= the_index.cache_nr)
> +			return ret;
> +		ce = active_cache[pos];
> +		if (strncmp(with_slash, ce->name, length))
> +			return ret;
> +		if (ce_skip_worktree(ce))
> +			return ret = 0;
> +	}
> +	return ret;

The way 'ret' is handled here is a bit difficult to follow. Would you be
opposed to returning hardcoded '0' or '1', rather than changing the value of
'ret' throughout? Something like:

static int check_dir_in_index(const char *name, int namelen)
{
	int pos, length = namelen + 1;
	const struct cache_entry *ce;
	const char *with_slash = add_slash(name);

	pos = cache_name_pos(with_slash, length);
	if (pos < 0) {
		pos = -pos - 1;
		if (pos >= the_index.cache_nr)
			return 1;
		ce = active_cache[pos];
		if (strncmp(with_slash, ce->name, length))
			return 1;
		if (ce_skip_worktree(ce))
			return 0;
	}
	return 1;
}

> +}
> +
>  int cmd_mv(int argc, const char **argv, const char *prefix)
>  {
>  	int i, flags, gitmodules_modified = 0;
> @@ -184,7 +215,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>  	/* Checking */
>  	for (i = 0; i < argc; i++) {
>  		const char *src = source[i], *dst = destination[i];
> -		int length, src_is_dir;
> +		int length;
>  		const char *bad = NULL;
>  		int skip_sparse = 0;
>  
> @@ -198,12 +229,17 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>  
>  			pos = cache_name_pos(src, length);
>  			if (pos < 0) {
> +				const char *src_w_slash = add_slash(src);
> +				if (!check_dir_in_index(src, length) &&
> +					!path_in_sparse_checkout(src_w_slash, &the_index)) {

In checks like these, the less "expensive" one should come first (so that if
it returns 'false', we completely skip the more expensive one). Since
'check_dir_in_index()' requires binary searching the index, it's likely to
be more expensive than 'path_in_sparse_checkout()', so the condition order
should be flipped:

				if (!path_in_sparse_checkout(src_w_slash, &the_index) &&
				    !check_dir_in_index(src, length)) {

Also nit: alignment (more details on why/how in my last message [1]).

[1] https://lore.kernel.org/git/01b39c63-5652-4293-0424-ff99b6f9f7d2@github.com/

> +					modes[i] |= SKIP_WORKTREE_DIR;
> +					goto dir_check;
> +				}
>  				/* only error if existence is expected. */
>  				if (!(modes[i] & SPARSE))
>  					bad = _("bad source");
>  				goto act_on_entry;
>  			}
> -
>  			ce = active_cache[pos];
>  			if (!ce_skip_worktree(ce)) {
>  				bad = _("bad source");
> @@ -230,14 +266,17 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>  			bad = _("can not move directory into itself");
>  			goto act_on_entry;
>  		}
> -		if ((src_is_dir = S_ISDIR(st.st_mode))
> +		if (S_ISDIR(st.st_mode)
>  		    && lstat(dst, &st) == 0) {
>  			bad = _("cannot move directory over file");
>  			goto act_on_entry;
>  		}
> -		if (src_is_dir) {
> +
> +dir_check:
> +		if (S_ISDIR(st.st_mode)) {
>  			int j, dst_len, n;
> -			int first = cache_name_pos(src, length), last;
> +			int first, last;
> +			first = cache_name_pos(src, length);

Super-nit: why did this line change? It looks like it just rearranges the
lines for no functional purpose.

>  
>  			if (first >= 0) {
>  				prepare_move_submodule(src, first,
> diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh
> index 5b61fbad5f..30e13b9979 100755
> --- a/t/t7002-mv-sparse-checkout.sh
> +++ b/t/t7002-mv-sparse-checkout.sh
> @@ -219,7 +219,7 @@ test_expect_success 'refuse to move file to non-skip-worktree sparse path' '
>  	test_cmp expect stderr
>  '
>  
> -test_expect_failure 'refuse to move out-of-cone directory without --sparse' '
> +test_expect_success 'refuse to move out-of-cone directory without --sparse' '
>  	test_when_finished "cleanup_sparse_checkout" &&
>  	setup_sparse_checkout &&
>  
> @@ -230,7 +230,7 @@ test_expect_failure 'refuse to move out-of-cone directory without --sparse' '
>  	test_cmp expect stderr
>  '
>  
> -test_expect_failure 'can move out-of-cone directory with --sparse' '
> +test_expect_success 'can move out-of-cone directory with --sparse' '
>  	test_when_finished "cleanup_sparse_checkout" &&
>  	setup_sparse_checkout &&
>
diff mbox series

Patch

diff --git a/builtin/mv.c b/builtin/mv.c
index 7ce7992d6c..cb3441c7cb 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -123,6 +123,37 @@  static int index_range_of_same_dir(const char *src, int length,
 	return last - first;
 }
 
+/*
+ * Check if an out-of-cone directory should be in the index. Imagine this case
+ * that all the files under a directory are marked with 'CE_SKIP_WORKTREE' bit
+ * and thus the directory is sparsified.
+ *
+ * Return 0 if such directory exist (i.e. with any of its contained files not
+ * marked with CE_SKIP_WORKTREE, the directory would be present in working tree).
+ * Return 1 otherwise.
+ */
+static int check_dir_in_index(const char *name, int namelen)
+{
+	int ret = 1;
+	const char *with_slash = add_slash(name);
+	int length = namelen + 1;
+
+	int pos = cache_name_pos(with_slash, length);
+	const struct cache_entry *ce;
+
+	if (pos < 0) {
+		pos = -pos - 1;
+		if (pos >= the_index.cache_nr)
+			return ret;
+		ce = active_cache[pos];
+		if (strncmp(with_slash, ce->name, length))
+			return ret;
+		if (ce_skip_worktree(ce))
+			return ret = 0;
+	}
+	return ret;
+}
+
 int cmd_mv(int argc, const char **argv, const char *prefix)
 {
 	int i, flags, gitmodules_modified = 0;
@@ -184,7 +215,7 @@  int cmd_mv(int argc, const char **argv, const char *prefix)
 	/* Checking */
 	for (i = 0; i < argc; i++) {
 		const char *src = source[i], *dst = destination[i];
-		int length, src_is_dir;
+		int length;
 		const char *bad = NULL;
 		int skip_sparse = 0;
 
@@ -198,12 +229,17 @@  int cmd_mv(int argc, const char **argv, const char *prefix)
 
 			pos = cache_name_pos(src, length);
 			if (pos < 0) {
+				const char *src_w_slash = add_slash(src);
+				if (!check_dir_in_index(src, length) &&
+					!path_in_sparse_checkout(src_w_slash, &the_index)) {
+					modes[i] |= SKIP_WORKTREE_DIR;
+					goto dir_check;
+				}
 				/* only error if existence is expected. */
 				if (!(modes[i] & SPARSE))
 					bad = _("bad source");
 				goto act_on_entry;
 			}
-
 			ce = active_cache[pos];
 			if (!ce_skip_worktree(ce)) {
 				bad = _("bad source");
@@ -230,14 +266,17 @@  int cmd_mv(int argc, const char **argv, const char *prefix)
 			bad = _("can not move directory into itself");
 			goto act_on_entry;
 		}
-		if ((src_is_dir = S_ISDIR(st.st_mode))
+		if (S_ISDIR(st.st_mode)
 		    && lstat(dst, &st) == 0) {
 			bad = _("cannot move directory over file");
 			goto act_on_entry;
 		}
-		if (src_is_dir) {
+
+dir_check:
+		if (S_ISDIR(st.st_mode)) {
 			int j, dst_len, n;
-			int first = cache_name_pos(src, length), last;
+			int first, last;
+			first = cache_name_pos(src, length);
 
 			if (first >= 0) {
 				prepare_move_submodule(src, first,
diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh
index 5b61fbad5f..30e13b9979 100755
--- a/t/t7002-mv-sparse-checkout.sh
+++ b/t/t7002-mv-sparse-checkout.sh
@@ -219,7 +219,7 @@  test_expect_success 'refuse to move file to non-skip-worktree sparse path' '
 	test_cmp expect stderr
 '
 
-test_expect_failure 'refuse to move out-of-cone directory without --sparse' '
+test_expect_success 'refuse to move out-of-cone directory without --sparse' '
 	test_when_finished "cleanup_sparse_checkout" &&
 	setup_sparse_checkout &&
 
@@ -230,7 +230,7 @@  test_expect_failure 'refuse to move out-of-cone directory without --sparse' '
 	test_cmp expect stderr
 '
 
-test_expect_failure 'can move out-of-cone directory with --sparse' '
+test_expect_success 'can move out-of-cone directory with --sparse' '
 	test_when_finished "cleanup_sparse_checkout" &&
 	setup_sparse_checkout &&