diff mbox series

[v2,4/9] mv: check if <destination> is a SKIP_WORKTREE_DIR

Message ID 20220805030528.1535376-5-shaoxuan.yuan02@gmail.com (mailing list archive)
State Superseded
Headers show
Series mv: from in-cone to out-of-cone | expand

Commit Message

Shaoxuan Yuan Aug. 5, 2022, 3:05 a.m. UTC
Originally, <destination> is assumed to be in the working tree. If it is
not found as a directory, then it is determined to be either a regular file
path, or error out if used under the second form (move into a directory)
of 'git-mv'. Such behavior is not ideal, mainly because Git does not
look into the index for <destination>, which could potentially be a
SKIP_WORKTREE_DIR, which we need to determine for the later "moving from
in-cone to out-of-cone" patch.

Change the logic so that Git first check if <destination> is a directory
with all its contents sparsified (a SKIP_WORKTREE_DIR).

If <destination> is such a sparse directory, then we should modify the
index the same way as we would if this were a non-sparse directory. We
must be careful to ensure that the <destination> is marked with
SKIP_WORKTREE_DIR.

Also add a `dst_w_slash` to reuse the result from `add_slash()`, which
was everywhere and can be simplified.

Helped-by: Derrick Stolee <derrickstolee@github.com>
Helped-by: Victoria Dye <vdye@github.com>
Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
---
 builtin/mv.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

Comments

Victoria Dye Aug. 8, 2022, 11:41 p.m. UTC | #1
Shaoxuan Yuan wrote:
> Originally, <destination> is assumed to be in the working tree. If it is
> not found as a directory, then it is determined to be either a regular file
> path, or error out if used under the second form (move into a directory)
> of 'git-mv'. Such behavior is not ideal, mainly because Git does not
> look into the index for <destination>, which could potentially be a
> SKIP_WORKTREE_DIR, which we need to determine for the later "moving from
> in-cone to out-of-cone" patch.
> 
> Change the logic so that Git first check if <destination> is a directory
> with all its contents sparsified (a SKIP_WORKTREE_DIR).
> 
> If <destination> is such a sparse directory, then we should modify the
> index the same way as we would if this were a non-sparse directory. We
> must be careful to ensure that the <destination> is marked with
> SKIP_WORKTREE_DIR.
> 
> Also add a `dst_w_slash` to reuse the result from `add_slash()`, which
> was everywhere and can be simplified.

This all makes sense. Stepping through the code...

> 
> Helped-by: Derrick Stolee <derrickstolee@github.com>
> Helped-by: Victoria Dye <vdye@github.com>
> Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
> ---
>  builtin/mv.c | 18 ++++++++++++++----
>  1 file changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/builtin/mv.c b/builtin/mv.c
> index 0a999640c9..f213a92bf6 100644
> --- a/builtin/mv.c
> +++ b/builtin/mv.c
> @@ -171,6 +171,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>  		OPT_END(),
>  	};
>  	const char **source, **destination, **dest_path, **submodule_gitfile;
> +	const char *dst_w_slash;
>  	enum update_mode *modes;
>  	struct stat st;
>  	struct string_list src_for_dst = STRING_LIST_INIT_NODUP;
> @@ -201,6 +202,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>  	if (argc == 1 && is_directory(argv[0]) && !is_directory(argv[1]))
>  		flags = 0;
>  	dest_path = internal_prefix_pathspec(prefix, argv + argc, 1, flags);
> +	dst_w_slash = add_slash(dest_path[0]);

...you pre-compute a reusable 'dst_w_slash' here...

>  	submodule_gitfile = xcalloc(argc, sizeof(char *));
>  
>  	if (dest_path[0][0] == '\0')
> @@ -208,12 +210,20 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>  		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
>  	else if (!lstat(dest_path[0], &st) &&
>  			S_ISDIR(st.st_mode)) {
> -		dest_path[0] = add_slash(dest_path[0]);
> -		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
> +		destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);

...then remove the in-place 'add_slash()' of 'dest_path[0]' and use
'dst_w_slash' in 'internal_prefix_pathspec()'. Makes sense.

>  	} else {

Then, this block is reached if 'dest_path' is not '.' and it is not a
directory that exists on disk.

Previously, reaching this point meant that 'dest_path' *must* refer to a
file, not a directory. However, you want to add handling for the case where
'dst_w_slash' doesn't exist on disk because all of its contents are sparse:

> -		if (argc != 1)
> +		if (!path_in_sparse_checkout(dst_w_slash, &the_index) &&
> +		    empty_dir_has_sparse_contents(dst_w_slash)) {
> +			destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);

so the above condition identifies whether 'dest_path[0]' is non-empty in the
index, and sets 'destination' accordingly. 

It took me some time to understand what all of these (nested) conditions are
doing; one suggestion I have (feel free to ignore it, since it's really just
a matter of stylistic preference) is reduce some duplicate code/simplify the
change a bit by moving the sparse directory check into the main "if-else"
block:

------------->8------------->8------------->8------------->8------------->8-------------
diff --git a/builtin/mv.c b/builtin/mv.c
index 4729bb1a1a..1c1b9559f6 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -203,10 +203,11 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 	if (dest_path[0][0] == '\0')
 		/* special case: "." was normalized to "" */
 		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
-	else if (!lstat(dest_path[0], &st) &&
-			S_ISDIR(st.st_mode)) {
-		dest_path[0] = add_slash(dest_path[0]);
-		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
+	else if ((!lstat(dest_path[0], &st) && S_ISDIR(st.st_mode)) ||
+		 (!path_in_sparse_checkout(dst_w_slash, &the_index) &&
+		  empty_dir_has_sparse_contents(dst_w_slash))) {
+		/* directory dest_path[0] exists on-disk or in the index */
+		destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);
 	} else {
 		if (argc != 1)
 			die(_("destination '%s' is not a directory"), dest_path[0]);

-------------8<-------------8<-------------8<-------------8<-------------8<-------------

It doesn't make for the prettiest condition (so your current approach might
be better in terms of readability) but, to me, it creates a clearer
distinction between the "if" and "else if" blocks (which handle the case
where 'dest_path[0]' is a directory), and the "else" block (which handles
the case where 'dest_path[0]' is a file).

> +		} else if (argc != 1) {
>  			die(_("destination '%s' is not a directory"), dest_path[0]);
> -		destination = dest_path;
> +		} else {
> +			destination = dest_path;
> +		}
> +	}
> +	if (dst_w_slash != dest_path[0]) {
> +		free((char *)dst_w_slash);
> +		dst_w_slash = NULL;

Looks good.

>  	}
>  
>  	/* Checking */
Victoria Dye Aug. 9, 2022, 12:23 a.m. UTC | #2
Victoria Dye wrote:
> Shaoxuan Yuan wrote:
>> Originally, <destination> is assumed to be in the working tree. If it is
>> not found as a directory, then it is determined to be either a regular file
>> path, or error out if used under the second form (move into a directory)
>> of 'git-mv'. Such behavior is not ideal, mainly because Git does not
>> look into the index for <destination>, which could potentially be a
>> SKIP_WORKTREE_DIR, which we need to determine for the later "moving from
>> in-cone to out-of-cone" patch.
>>
>> Change the logic so that Git first check if <destination> is a directory
>> with all its contents sparsified (a SKIP_WORKTREE_DIR).
>>
>> If <destination> is such a sparse directory, then we should modify the
>> index the same way as we would if this were a non-sparse directory. We
>> must be careful to ensure that the <destination> is marked with
>> SKIP_WORKTREE_DIR.
>>
>> Also add a `dst_w_slash` to reuse the result from `add_slash()`, which
>> was everywhere and can be simplified.
> 
> This all makes sense. Stepping through the code...
> 
>>
>> Helped-by: Derrick Stolee <derrickstolee@github.com>
>> Helped-by: Victoria Dye <vdye@github.com>
>> Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
>> ---
>>  builtin/mv.c | 18 ++++++++++++++----
>>  1 file changed, 14 insertions(+), 4 deletions(-)
>>
>> diff --git a/builtin/mv.c b/builtin/mv.c
>> index 0a999640c9..f213a92bf6 100644
>> --- a/builtin/mv.c
>> +++ b/builtin/mv.c
>> @@ -171,6 +171,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>>  		OPT_END(),
>>  	};
>>  	const char **source, **destination, **dest_path, **submodule_gitfile;
>> +	const char *dst_w_slash;
>>  	enum update_mode *modes;
>>  	struct stat st;
>>  	struct string_list src_for_dst = STRING_LIST_INIT_NODUP;
>> @@ -201,6 +202,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>>  	if (argc == 1 && is_directory(argv[0]) && !is_directory(argv[1]))
>>  		flags = 0;
>>  	dest_path = internal_prefix_pathspec(prefix, argv + argc, 1, flags);
>> +	dst_w_slash = add_slash(dest_path[0]);
> 
> ...you pre-compute a reusable 'dst_w_slash' here...
> 
>>  	submodule_gitfile = xcalloc(argc, sizeof(char *));
>>  
>>  	if (dest_path[0][0] == '\0')
>> @@ -208,12 +210,20 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>>  		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
>>  	else if (!lstat(dest_path[0], &st) &&
>>  			S_ISDIR(st.st_mode)) {
>> -		dest_path[0] = add_slash(dest_path[0]);
>> -		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
>> +		destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);
> 
> ...then remove the in-place 'add_slash()' of 'dest_path[0]' and use
> 'dst_w_slash' in 'internal_prefix_pathspec()'. Makes sense.
> 
>>  	} else {
> 
> Then, this block is reached if 'dest_path' is not '.' and it is not a
> directory that exists on disk.
> 
> Previously, reaching this point meant that 'dest_path' *must* refer to a
> file, not a directory. However, you want to add handling for the case where
> 'dst_w_slash' doesn't exist on disk because all of its contents are sparse:
> 
>> -		if (argc != 1)
>> +		if (!path_in_sparse_checkout(dst_w_slash, &the_index) &&
>> +		    empty_dir_has_sparse_contents(dst_w_slash)) {
>> +			destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);
> 
> so the above condition identifies whether 'dest_path[0]' is non-empty in the
> index, and sets 'destination' accordingly. 
> 
> It took me some time to understand what all of these (nested) conditions are
> doing; one suggestion I have (feel free to ignore it, since it's really just
> a matter of stylistic preference) is reduce some duplicate code/simplify the
> change a bit by moving the sparse directory check into the main "if-else"
> block:
> 
> ------------->8------------->8------------->8------------->8------------->8-------------
> diff --git a/builtin/mv.c b/builtin/mv.c
> index 4729bb1a1a..1c1b9559f6 100644
> --- a/builtin/mv.c
> +++ b/builtin/mv.c
> @@ -203,10 +203,11 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>  	if (dest_path[0][0] == '\0')
>  		/* special case: "." was normalized to "" */
>  		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
> -	else if (!lstat(dest_path[0], &st) &&
> -			S_ISDIR(st.st_mode)) {
> -		dest_path[0] = add_slash(dest_path[0]);
> -		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
> +	else if ((!lstat(dest_path[0], &st) && S_ISDIR(st.st_mode)) ||
> +		 (!path_in_sparse_checkout(dst_w_slash, &the_index) &&
> +		  empty_dir_has_sparse_contents(dst_w_slash))) {
> +		/* directory dest_path[0] exists on-disk or in the index */
> +		destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);
>  	} else {
>  		if (argc != 1)
>  			die(_("destination '%s' is not a directory"), dest_path[0]);
> 
> -------------8<-------------8<-------------8<-------------8<-------------8<-------------
> 
> It doesn't make for the prettiest condition (so your current approach might
> be better in terms of readability) but, to me, it creates a clearer
> distinction between the "if" and "else if" blocks (which handle the case
> where 'dest_path[0]' is a directory), and the "else" block (which handles
> the case where 'dest_path[0]' is a file).

Now that I've read patch 6 [1], I can see that you need the "sparse
directory" condition block to stand alone. I think it might still help to
put that block in the top-level condition:

------------->8------------->8------------->8------------->8------------->8-------------
diff --git a/builtin/mv.c b/builtin/mv.c
index 4729bb1a1a..4a16a5e602 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -205,8 +205,10 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
 	else if (!lstat(dest_path[0], &st) &&
 			S_ISDIR(st.st_mode)) {
-		dest_path[0] = add_slash(dest_path[0]);
-		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
+		destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);
+	} else if (!path_in_sparse_checkout(dst_w_slash, &the_index) &&
+		 empty_dir_has_sparse_contents(dst_w_slash)) {
+		destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);
 	} else {
 		if (argc != 1)
 			die(_("destination '%s' is not a directory"), dest_path[0]);
-------------8<-------------8<-------------8<-------------8<-------------8<---------

...but, as before, I'm happy with whichever approach you decide to take.

[1] https://lore.kernel.org/git/20220805030528.1535376-7-shaoxuan.yuan02@gmail.com/

> 
>> +		} else if (argc != 1) {
>>  			die(_("destination '%s' is not a directory"), dest_path[0]);
>> -		destination = dest_path;
>> +		} else {
>> +			destination = dest_path;
>> +		}
>> +	}
>> +	if (dst_w_slash != dest_path[0]) {
>> +		free((char *)dst_w_slash);
>> +		dst_w_slash = NULL;
> 
> Looks good.
> 
>>  	}
>>  
>>  	/* Checking */
>
Shaoxuan Yuan Aug. 9, 2022, 2:31 a.m. UTC | #3
On 8/9/2022 7:41 AM, Victoria Dye wrote:
...truncated...
> It took me some time to understand what all of these (nested) conditions are
> doing; one suggestion I have (feel free to ignore it, since it's really just
> a matter of stylistic preference) is reduce some duplicate code/simplify the
> change a bit by moving the sparse directory check into the main "if-else"
> block:
Yes, I acknowledge this part is cluttered slightly ;)
> ------------->8------------->8------------->8------------->8------------->8-------------
> diff --git a/builtin/mv.c b/builtin/mv.c
> index 4729bb1a1a..1c1b9559f6 100644
> --- a/builtin/mv.c
> +++ b/builtin/mv.c
> @@ -203,10 +203,11 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>   	if (dest_path[0][0] == '\0')
>   		/* special case: "." was normalized to "" */
>   		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
> -	else if (!lstat(dest_path[0], &st) &&
> -			S_ISDIR(st.st_mode)) {
> -		dest_path[0] = add_slash(dest_path[0]);
> -		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
> +	else if ((!lstat(dest_path[0], &st) && S_ISDIR(st.st_mode)) ||
> +		 (!path_in_sparse_checkout(dst_w_slash, &the_index) &&
> +		  empty_dir_has_sparse_contents(dst_w_slash))) {
> +		/* directory dest_path[0] exists on-disk or in the index */
> +		destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);
>   	} else {
>   		if (argc != 1)
>   			die(_("destination '%s' is not a directory"), dest_path[0]);
>
> -------------8<-------------8<-------------8<-------------8<-------------8<-------------
>
> It doesn't make for the prettiest condition (so your current approach might
> be better in terms of readability) but, to me, it creates a clearer
> distinction between the "if" and "else if" blocks (which handle the case
> where 'dest_path[0]' is a directory), and the "else" block (which handles
> the case where 'dest_path[0]' is a file).

I also find this way clearer! Thanks for the suggestion!
diff mbox series

Patch

diff --git a/builtin/mv.c b/builtin/mv.c
index 0a999640c9..f213a92bf6 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -171,6 +171,7 @@  int cmd_mv(int argc, const char **argv, const char *prefix)
 		OPT_END(),
 	};
 	const char **source, **destination, **dest_path, **submodule_gitfile;
+	const char *dst_w_slash;
 	enum update_mode *modes;
 	struct stat st;
 	struct string_list src_for_dst = STRING_LIST_INIT_NODUP;
@@ -201,6 +202,7 @@  int cmd_mv(int argc, const char **argv, const char *prefix)
 	if (argc == 1 && is_directory(argv[0]) && !is_directory(argv[1]))
 		flags = 0;
 	dest_path = internal_prefix_pathspec(prefix, argv + argc, 1, flags);
+	dst_w_slash = add_slash(dest_path[0]);
 	submodule_gitfile = xcalloc(argc, sizeof(char *));
 
 	if (dest_path[0][0] == '\0')
@@ -208,12 +210,20 @@  int cmd_mv(int argc, const char **argv, const char *prefix)
 		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
 	else if (!lstat(dest_path[0], &st) &&
 			S_ISDIR(st.st_mode)) {
-		dest_path[0] = add_slash(dest_path[0]);
-		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
+		destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);
 	} else {
-		if (argc != 1)
+		if (!path_in_sparse_checkout(dst_w_slash, &the_index) &&
+		    empty_dir_has_sparse_contents(dst_w_slash)) {
+			destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);
+		} else if (argc != 1) {
 			die(_("destination '%s' is not a directory"), dest_path[0]);
-		destination = dest_path;
+		} else {
+			destination = dest_path;
+		}
+	}
+	if (dst_w_slash != dest_path[0]) {
+		free((char *)dst_w_slash);
+		dst_w_slash = NULL;
 	}
 
 	/* Checking */