diff mbox series

[v3] http: do not ignore proxy path

Message ID pull.1767.v3.git.1722441675945.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series [v3] http: do not ignore proxy path | expand

Commit Message

Ryan Hendrickson July 31, 2024, 4:01 p.m. UTC
From: Ryan Hendrickson <ryan.hendrickson@alum.mit.edu>

The documentation for `http.proxy` describes that option, and the
environment variables it overrides, as supporting "the syntax understood
by curl". curl allows SOCKS proxies to use a path to a Unix domain
socket, like `socks5h://localhost/path/to/socket.sock`. Git should
therefore include, if present, the path part of the proxy URL in what it
passes to libcurl.

Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Ryan Hendrickson <ryan.hendrickson@alum.mit.edu>
---
    http: do not ignore proxy path
    
    Re earlier discussion about regressions, it turns out that curl has only
    supported this syntax since 2022 [https://curl.se/ch/7.84.0.html].
    Earlier versions of curl, if provided an http_proxy that contained a
    path, would also ignore it. curl didn't seem to consider this a breaking
    change when the feature was introduced
    [https://github.com/curl/curl/pull/8668], though; is that a valid
    argument for Git not to lose sleep over it either?
    
    A test has been added, but unfortunately it is not being run on any of
    GitHub's CI workflows, because the runner images in use all have
    versions of libcurl that are too old. It works on my machine, but
    unknown troubles may await someone else.

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1767%2Frhendric%2Frhendric%2Fhttp-proxy-path-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1767/rhendric/rhendric/http-proxy-path-v3
Pull-Request: https://github.com/gitgitgadget/git/pull/1767

Range-diff vs v2:

 1:  b4715eba5b1 ! 1:  7a58da7102e http: do not ignore proxy path
     @@ Commit message
          therefore include, if present, the path part of the proxy URL in what it
          passes to libcurl.
      
     +    Co-authored-by: Jeff King <peff@peff.net>
          Signed-off-by: Ryan Hendrickson <ryan.hendrickson@alum.mit.edu>
      
       ## Documentation/config/http.txt ##
     @@ http.c: static CURL *get_curl_handle(void)
      -		curl_easy_setopt(result, CURLOPT_PROXY, proxy_auth.host);
      +		strbuf_addstr(&proxy, proxy_auth.host);
      +		if (proxy_auth.path) {
     -+			int path_is_supported = 0;
     -+			/* curl_version_info was added in curl 7.10 */
     -+#if LIBCURL_VERSION_NUM >= 0x070a00
      +			curl_version_info_data *ver = curl_version_info(CURLVERSION_NOW);
     -+			path_is_supported = ver->version_num >= 0x075400;
     -+#endif
     -+			if (path_is_supported) {
     -+				strbuf_addch(&proxy, '/');
     -+				strbuf_add_percentencode(&proxy, proxy_auth.path, 0);
     -+			} else {
     ++			if (ver->version_num < 0x075400)
      +				die("libcurl 7.84 or later is required to support paths in proxy URLs");
     -+			}
     ++
     ++			if (!starts_with(proxy_auth.protocol, "socks"))
     ++				die("Invalid proxy URL '%s': only SOCKS proxies support paths",
     ++				    curl_http_proxy);
     ++
     ++			if (strcasecmp(proxy_auth.host, "localhost"))
     ++				die("Invalid proxy URL '%s': host must be localhost if a path is present",
     ++				    curl_http_proxy);
     ++
     ++			strbuf_addch(&proxy, '/');
     ++			strbuf_add_percentencode(&proxy, proxy_auth.path, 0);
      +		}
      +		curl_easy_setopt(result, CURLOPT_PROXY, proxy.buf);
      +		strbuf_release(&proxy);
     @@ http.c: static CURL *get_curl_handle(void)
       		var_override(&curl_no_proxy, getenv("no_proxy"));
       		curl_easy_setopt(result, CURLOPT_NOPROXY, curl_no_proxy);
      
     - ## t/socks5-proxy-server/LICENSE (new) ##
     -@@
     -+MIT License
     -+
     -+Copyright (c) 2014 kaimi.io
     -+
     -+Permission is hereby granted, free of charge, to any person obtaining a copy
     -+of this software and associated documentation files (the "Software"), to deal
     -+in the Software without restriction, including without limitation the rights
     -+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     -+copies of the Software, and to permit persons to whom the Software is
     -+furnished to do so, subject to the following conditions:
     -+
     -+The above copyright notice and this permission notice shall be included in all
     -+copies or substantial portions of the Software.
     -+
     -+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     -+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     -+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     -+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     -+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     -+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     -+SOFTWARE.
     -
     - ## t/socks5-proxy-server/README.md (new) ##
     -@@
     -+socks5-proxy-server
     -+===================
     -+
     -+This Perl code implements a SOCKS5 proxy server that listens for incoming connections and processes them in separate threads. The server takes three input parameters: `path`, `login`, and `password`.
     -+
     -+When a client attempts to connect to the server, the server checks if the client supports any of the available authentication methods (no authentication or login/password authentication). If a suitable method is found, the server establishes a connection with the target server and begins forwarding data between the client and the target server.
     -+
     -+The code uses the `IO::Select` module for working with sockets and the `threads` module for creating and managing threads. It includes several functions, including:
     -+
     -+- `main`: the main function that creates threads for processing incoming connections.
     -+- `replenish`: a function that creates additional threads if the number of free threads is less than the specified value.
     -+- `new_client`: a function that handles incoming client connections and checks if the available authentication methods are supported by the client.
     -+- `socks_auth`: a function that performs login and password authentication.
     -+- `handle_client`: a function that establishes a connection with the target server and forwards data between the client and the target server.
     -+
     -+This code includes the use of the following Perl modules: `IO::Select`, `Socket`, `threads`, `threads::shared`.
     -+
     -+To run this code, enter the following command:
     -+`perl socks5.pl path login password`
     -+
     -+Note that this code is designed for educational purposes and should not be used in production environments without proper modifications and security measures. 

Comments

Junio C Hamano July 31, 2024, 10:24 p.m. UTC | #1
"Ryan Hendrickson via GitGitGadget" <gitgitgadget@gmail.com> writes:

> @@ -5,8 +5,8 @@ http.proxy::
>  	proxy string with a user name but no password, in which case git will
>  	attempt to acquire one in the same way it does for other credentials. See
>  	linkgit:gitcredentials[7] for more information. The syntax thus is
> -	'[protocol://][user[:password]@]proxyhost[:port]'. This can be overridden
> -	on a per-remote basis; see remote.<name>.proxy
> +	'[protocol://][user[:password]@]proxyhost[:port][/path]'. This can be
> +	overridden on a per-remote basis; see remote.<name>.proxy

OK.

> diff --git a/http.c b/http.c
> index 623ed234891..a50ba095889 100644
> --- a/http.c
> +++ b/http.c
> @@ -1227,6 +1227,7 @@ static CURL *get_curl_handle(void)
>  		 */
>  		curl_easy_setopt(result, CURLOPT_PROXY, "");
>  	} else if (curl_http_proxy) {
> +		struct strbuf proxy = STRBUF_INIT;
>  		if (starts_with(curl_http_proxy, "socks5h"))
>  			curl_easy_setopt(result,
>  				CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);

In a block with local variable decl, be more friendly to readers by
having a blank line between the end of declarations and the first
statement.

> +		strbuf_addstr(&proxy, proxy_auth.host);
> +		if (proxy_auth.path) {
> +			curl_version_info_data *ver = curl_version_info(CURLVERSION_NOW);
> +			if (ver->version_num < 0x075400)
> +				die("libcurl 7.84 or later is required to support paths in proxy URLs");
> +
> +			if (!starts_with(proxy_auth.protocol, "socks"))
> +				die("Invalid proxy URL '%s': only SOCKS proxies support paths",
> +				    curl_http_proxy);
> +
> +			if (strcasecmp(proxy_auth.host, "localhost"))
> +				die("Invalid proxy URL '%s': host must be localhost if a path is present",
> +				    curl_http_proxy);

We insist that it must be "localhost", so let's not do strcasecmp()
but just do strcmp().

> diff --git a/t/t5564-http-proxy.sh b/t/t5564-http-proxy.sh
> index bb35b87071d..7fcffba67a2 100755
> --- a/t/t5564-http-proxy.sh
> +++ b/t/t5564-http-proxy.sh
> @@ -39,4 +39,50 @@ test_expect_success 'clone can prompt for proxy password' '
>  	expect_askpass pass proxuser
>  '
>  
> +start_socks() {
> +	mkfifo socks_output &&
> +	{
> +		"$PERL_PATH" "$TEST_DIRECTORY/socks4-proxy.pl" "$1" >socks_output &
> +		socks_pid=$!
> +	} &&
> +	read line <socks_output &&
> +	test "$line" = ready
> +}
> +
> +test_expect_success PERL 'try to start SOCKS proxy' '
> +	# The %30 tests that the correct amount of percent-encoding is applied
> +	# to the proxy string passed to curl.
> +	if start_socks %30.sock
> +	then
> +		test_set_prereq SOCKS_PROXY
> +	fi
> +'

Making it a regular test_expect_success would mean GIT_SKIP_TEST
mechansim can be used to skip it, which is probably not what you
want.  Can't this be a more common test_lazy_prereq, perhaps like

	test_lazy_prereq SOCKS_PROXY '
		# useful comment about 30% here ...
		test_have_prereq PERL &&
		start_socks %30.sock
	'

or something?

Thanks.
Ryan Hendrickson Aug. 1, 2024, 3:44 a.m. UTC | #2
At 2024-07-31 15:24-0700, Junio C Hamano <gitster@pobox.com> sent:

> In a block with local variable decl, be more friendly to readers by
> having a blank line between the end of declarations and the first
> statement.

Will do.

> We insist that it must be "localhost", so let's not do strcasecmp()
> but just do strcmp().

I don't see the wisdom of being more restrictive than curl is in this 
respect. The documentation makes the claim that curl's syntax is what this 
option supports, and while that is not literally true due to how protocols 
are handled, I don't see a reason to intentionally deviate further.

I've tested that curl does the right thing with any casing of localhost, 
both on its own and via Git. Host names are generally case insensitive; 
the error message should be understood in that context. And if it is 
misunderstood, there's no negative impact on the user who writes 
"localhost" (while there is a negative impact on the user who quite 
reasonably expects "LOCALHOST" to work if we don't follow curl's lead).

> Making it a regular test_expect_success would mean GIT_SKIP_TEST
> mechansim can be used to skip it, which is probably not what you
> want.  Can't this be a more common test_lazy_prereq, perhaps like
>
> 	test_lazy_prereq SOCKS_PROXY '
> 		# useful comment about 30% here ...
> 		test_have_prereq PERL &&
> 		start_socks %30.sock
> 	'
>
> or something?

This existing test file is definitely not written with running individual 
tests in mind; the first test is a prerequisite for all that comes after, 
and the second test seems to be required for tests 3–5 as well.

But sure, I'll use that pattern if you like.

R
Junio C Hamano Aug. 1, 2024, 5:21 a.m. UTC | #3
Ryan Hendrickson <ryan.hendrickson@alum.mit.edu> writes:

>> We insist that it must be "localhost", so let's not do strcasecmp()
>> but just do strcmp().
>
> I don't see the wisdom of being more restrictive than curl is in this
> respect.

Ah, if curl is doing case insensitivity, then matching its behaviour
is perfectly fine.  I was just reacting to this message ...

> +	die("Invalid proxy URL '%s': host must be localhost if a path is present",

... that results when strcasecmp() does not like it.

Thanks.
Jeff King Aug. 1, 2024, 5:45 a.m. UTC | #4
On Wed, Jul 31, 2024 at 03:24:01PM -0700, Junio C Hamano wrote:

> > +start_socks() {
> > +	mkfifo socks_output &&
> > +	{
> > +		"$PERL_PATH" "$TEST_DIRECTORY/socks4-proxy.pl" "$1" >socks_output &
> > +		socks_pid=$!
> > +	} &&
> > +	read line <socks_output &&
> > +	test "$line" = ready
> > +}
> > +
> > +test_expect_success PERL 'try to start SOCKS proxy' '
> > +	# The %30 tests that the correct amount of percent-encoding is applied
> > +	# to the proxy string passed to curl.
> > +	if start_socks %30.sock
> > +	then
> > +		test_set_prereq SOCKS_PROXY
> > +	fi
> > +'
> 
> Making it a regular test_expect_success would mean GIT_SKIP_TEST
> mechansim can be used to skip it, which is probably not what you
> want.  Can't this be a more common test_lazy_prereq, perhaps like
> 
> 	test_lazy_prereq SOCKS_PROXY '
> 		# useful comment about 30% here ...
> 		test_have_prereq PERL &&
> 		start_socks %30.sock
> 	'
> 
> or something?

I think Ryan picked up this approach from my earlier mail. And the
reason I suggested doing it this way is that the prereq test has an
important side effect: it creates the socket and starts the proxy
server!

I think lazy prereqs only make sense when their only output is the
yes/no of whether we match the prereq. And then we don't care when or
how often they are evaluated. I do think things would work, assuming the
proxy server then can serve multiple tests. It just feels weird, and
doing it more like the git-daemon/http tests made more sense to me
(though those ones do not even do their work inside an expect block).

If we did it in a lazy prereq I think you'd need to munge the path, as
the lazy prereq operates in a throwaway directory (so the %30.sock would
get removed before we can use it in the next test).

-Peff
Junio C Hamano Aug. 1, 2024, 2:40 p.m. UTC | #5
Jeff King <peff@peff.net> writes:

> I think Ryan picked up this approach from my earlier mail. And the
> reason I suggested doing it this way is that the prereq test has an
> important side effect: it creates the socket and starts the proxy
> server!

Ah, OK.

> I think lazy prereqs only make sense when their only output is the
> yes/no of whether we match the prereq. And then we don't care when or
> how often they are evaluated.

Once the prereq test is attempted the result is cached (that's the
whole point of lazy prereq mechanism) so we won't see multiple sock
proxies spawned.

> I do think things would work, assuming the
> proxy server then can serve multiple tests. It just feels weird, and
> doing it more like the git-daemon/http tests made more sense to me
> (though those ones do not even do their work inside an expect block).

OK.  That's fine.  It is probably not a good fit for the pattern.

> If we did it in a lazy prereq I think you'd need to munge the path, as
> the lazy prereq operates in a throwaway directory (so the %30.sock would
> get removed before we can use it in the next test).

True.
diff mbox series

Patch

diff --git a/Documentation/config/http.txt b/Documentation/config/http.txt
index 162b33fc52f..a14371b5c96 100644
--- a/Documentation/config/http.txt
+++ b/Documentation/config/http.txt
@@ -5,8 +5,8 @@  http.proxy::
 	proxy string with a user name but no password, in which case git will
 	attempt to acquire one in the same way it does for other credentials. See
 	linkgit:gitcredentials[7] for more information. The syntax thus is
-	'[protocol://][user[:password]@]proxyhost[:port]'. This can be overridden
-	on a per-remote basis; see remote.<name>.proxy
+	'[protocol://][user[:password]@]proxyhost[:port][/path]'. This can be
+	overridden on a per-remote basis; see remote.<name>.proxy
 +
 Any proxy, however configured, must be completely transparent and must not
 modify, transform, or buffer the request or response in any way.  Proxies which
diff --git a/http.c b/http.c
index 623ed234891..a50ba095889 100644
--- a/http.c
+++ b/http.c
@@ -1227,6 +1227,7 @@  static CURL *get_curl_handle(void)
 		 */
 		curl_easy_setopt(result, CURLOPT_PROXY, "");
 	} else if (curl_http_proxy) {
+		struct strbuf proxy = STRBUF_INIT;
 		if (starts_with(curl_http_proxy, "socks5h"))
 			curl_easy_setopt(result,
 				CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
@@ -1265,7 +1266,26 @@  static CURL *get_curl_handle(void)
 		if (!proxy_auth.host)
 			die("Invalid proxy URL '%s'", curl_http_proxy);
 
-		curl_easy_setopt(result, CURLOPT_PROXY, proxy_auth.host);
+		strbuf_addstr(&proxy, proxy_auth.host);
+		if (proxy_auth.path) {
+			curl_version_info_data *ver = curl_version_info(CURLVERSION_NOW);
+			if (ver->version_num < 0x075400)
+				die("libcurl 7.84 or later is required to support paths in proxy URLs");
+
+			if (!starts_with(proxy_auth.protocol, "socks"))
+				die("Invalid proxy URL '%s': only SOCKS proxies support paths",
+				    curl_http_proxy);
+
+			if (strcasecmp(proxy_auth.host, "localhost"))
+				die("Invalid proxy URL '%s': host must be localhost if a path is present",
+				    curl_http_proxy);
+
+			strbuf_addch(&proxy, '/');
+			strbuf_add_percentencode(&proxy, proxy_auth.path, 0);
+		}
+		curl_easy_setopt(result, CURLOPT_PROXY, proxy.buf);
+		strbuf_release(&proxy);
+
 		var_override(&curl_no_proxy, getenv("NO_PROXY"));
 		var_override(&curl_no_proxy, getenv("no_proxy"));
 		curl_easy_setopt(result, CURLOPT_NOPROXY, curl_no_proxy);
diff --git a/t/socks4-proxy.pl b/t/socks4-proxy.pl
new file mode 100644
index 00000000000..4c3a35c0083
--- /dev/null
+++ b/t/socks4-proxy.pl
@@ -0,0 +1,48 @@ 
+use strict;
+use IO::Select;
+use IO::Socket::UNIX;
+use IO::Socket::INET;
+
+my $path = shift;
+
+unlink($path);
+my $server = IO::Socket::UNIX->new(Listen => 1, Local => $path)
+	or die "unable to listen on $path: $!";
+
+$| = 1;
+print "ready\n";
+
+while (my $client = $server->accept()) {
+	sysread $client, my $buf, 8;
+	my ($version, $cmd, $port, $ip) = unpack 'CCnN', $buf;
+	next unless $version == 4; # socks4
+	next unless $cmd == 1; # TCP stream connection
+
+	# skip NUL-terminated id
+	while (sysread $client, my $char, 1) {
+		last unless ord($char);
+	}
+
+	# version(0), reply(5a == granted), port (ignored), ip (ignored)
+	syswrite $client, "\x00\x5a\x00\x00\x00\x00\x00\x00";
+
+	my $remote = IO::Socket::INET->new(PeerHost => $ip, PeerPort => $port)
+		or die "unable to connect to $ip/$port: $!";
+
+	my $io = IO::Select->new($client, $remote);
+	while ($io->count) {
+		for my $fh ($io->can_read(0)) {
+			for my $pair ([$client, $remote], [$remote, $client]) {
+				my ($from, $to) = @$pair;
+				next unless $fh == $from;
+
+				my $r = sysread $from, my $buf, 1024;
+				if (!defined $r || $r <= 0) {
+					$io->remove($from);
+					next;
+				}
+				syswrite $to, $buf;
+			}
+		}
+	}
+}
diff --git a/t/t5564-http-proxy.sh b/t/t5564-http-proxy.sh
index bb35b87071d..7fcffba67a2 100755
--- a/t/t5564-http-proxy.sh
+++ b/t/t5564-http-proxy.sh
@@ -39,4 +39,50 @@  test_expect_success 'clone can prompt for proxy password' '
 	expect_askpass pass proxuser
 '
 
+start_socks() {
+	mkfifo socks_output &&
+	{
+		"$PERL_PATH" "$TEST_DIRECTORY/socks4-proxy.pl" "$1" >socks_output &
+		socks_pid=$!
+	} &&
+	read line <socks_output &&
+	test "$line" = ready
+}
+
+test_expect_success PERL 'try to start SOCKS proxy' '
+	# The %30 tests that the correct amount of percent-encoding is applied
+	# to the proxy string passed to curl.
+	if start_socks %30.sock
+	then
+		test_set_prereq SOCKS_PROXY
+	fi
+'
+
+test_expect_success SOCKS_PROXY 'clone via Unix socket' '
+	test_when_finished "rm -rf clone" &&
+	test_config_global http.proxy "socks4://localhost$PWD/%2530.sock" && {
+		{
+			GIT_TRACE_CURL=$PWD/trace git clone "$HTTPD_URL/smart/repo.git" clone 2>err &&
+			grep -i "SOCKS4 request granted." trace
+		} ||
+		grep "^fatal: libcurl 7\.84 or later" err
+	}
+'
+
+test_expect_success SOCKS_PROXY 'stop SOCKS proxy' 'kill "$socks_pid"'
+
+test_expect_success 'Unix socket requires socks*:' '
+	! git clone -c http.proxy=localhost/path https://example.com/repo.git 2>err && {
+		grep "^fatal: Invalid proxy URL '\''localhost/path'\'': only SOCKS proxies support paths" err ||
+		grep "^fatal: libcurl 7\.84 or later" err
+	}
+'
+
+test_expect_success 'Unix socket requires localhost' '
+	! git clone -c http.proxy=socks4://127.0.0.1/path https://example.com/repo.git 2>err && {
+		grep "^fatal: Invalid proxy URL '\''socks4://127\.0\.0\.1/path'\'': host must be localhost if a path is present" err ||
+		grep "^fatal: libcurl 7\.84 or later" err
+	}
+'
+
 test_done