diff mbox series

[v1.1,5/6] tools/libxs: Use writev()/sendmsg() instead of write()

Message ID 20240722162547.4060813-1-andrew.cooper3@citrix.com (mailing list archive)
State New
Headers show
Series None | expand

Commit Message

Andrew Cooper July 22, 2024, 4:25 p.m. UTC
With the input data now conveniently arranged, use writev()/sendmsg() instead
of decomposing it into write() calls.

This causes all requests to be submitted with a single system call, rather
than at least two.  While in principle short writes can occur, the chances of
it happening are slim given that most xenbus comms are only a handful of
bytes.

Nevertheless, provide {writev,sendmsg}_exact() wrappers which take care of
resubmitting on EINTR or short write.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Anthony PERARD <anthony.perard@vates.tech>
CC: Juergen Gross <jgross@suse.com>
CC: Frediano Ziglio <frediano.ziglio@cloud.com>

v1.1:
 * Fix iov overread, spotted by Frediano.  Factor the common updating logic
   out into update_iov().
---
 tools/libs/store/xs.c | 94 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 3 deletions(-)

Comments

Jürgen Groß July 23, 2024, 9:37 a.m. UTC | #1
On 22.07.24 18:25, Andrew Cooper wrote:
> With the input data now conveniently arranged, use writev()/sendmsg() instead
> of decomposing it into write() calls.
> 
> This causes all requests to be submitted with a single system call, rather
> than at least two.  While in principle short writes can occur, the chances of
> it happening are slim given that most xenbus comms are only a handful of
> bytes.
> 
> Nevertheless, provide {writev,sendmsg}_exact() wrappers which take care of
> resubmitting on EINTR or short write.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Anthony PERARD <anthony.perard@vates.tech>
> CC: Juergen Gross <jgross@suse.com>
> CC: Frediano Ziglio <frediano.ziglio@cloud.com>
> 
> v1.1:
>   * Fix iov overread, spotted by Frediano.  Factor the common updating logic
>     out into update_iov().
> ---
>   tools/libs/store/xs.c | 94 +++++++++++++++++++++++++++++++++++++++++--
>   1 file changed, 91 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c
> index e820cccc2314..f80ac7558cbe 100644
> --- a/tools/libs/store/xs.c
> +++ b/tools/libs/store/xs.c
> @@ -563,6 +563,95 @@ static void *read_reply(
>   	return body;
>   }
>   
> +/*
> + * Update an iov/nr pair after an incomplete writev()/sendmsg().
> + *
> + * Awkwardly, nr has different widths and signs between writev() and
> + * sendmsg(), so we take it and return it by value, rather than by pointer.
> + */
> +static size_t update_iov(struct iovec **p_iov, size_t nr, size_t res)
> +{
> +	struct iovec *iov = *p_iov;
> +
> +        /* Skip fully complete elements, including empty elements. */
> +        while (nr && res >= iov->iov_len) {
> +                res -= iov->iov_len;
> +                nr--;
> +                iov++;
> +        }
> +
> +        /* Partial element, adjust base/len. */
> +        if (res) {
> +                iov->iov_len  -= res;
> +                iov->iov_base += res;
> +        }
> +
> +        *p_iov = iov;
> +
> +	return nr;
> +}
> +
> +/*
> + * Wrapper around sendmsg() to resubmit on EINTR or short write.  Returns
> + * @true if all data was transmitted, or @false with errno for an error.
> + * Note: May alter @iov in place on resubmit.
> + */
> +static bool sendmsg_exact(int fd, struct iovec *iov, unsigned int nr)
> +{
> +	struct msghdr hdr = {
> +		.msg_iov = iov,
> +		.msg_iovlen = nr,
> +	};
> +
> +	/* Sanity check first element isn't empty */
> +	assert(iov->iov_len == sizeof(struct xsd_sockmsg));

Can you please move this assert() into write_request(), avoiding to have
2 copies of it?

With that:

Reviewed-by: Juergen Gross <jgross@suse.com>


Juergen
Andrew Cooper July 23, 2024, 12:25 p.m. UTC | #2
On 23/07/2024 10:37 am, Jürgen Groß wrote:
> On 22.07.24 18:25, Andrew Cooper wrote:
>> With the input data now conveniently arranged, use writev()/sendmsg()
>> instead
>> of decomposing it into write() calls.
>>
>> This causes all requests to be submitted with a single system call,
>> rather
>> than at least two.  While in principle short writes can occur, the
>> chances of
>> it happening are slim given that most xenbus comms are only a handful of
>> bytes.
>>
>> Nevertheless, provide {writev,sendmsg}_exact() wrappers which take
>> care of
>> resubmitting on EINTR or short write.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> ---
>> CC: Anthony PERARD <anthony.perard@vates.tech>
>> CC: Juergen Gross <jgross@suse.com>
>> CC: Frediano Ziglio <frediano.ziglio@cloud.com>
>>
>> v1.1:
>>   * Fix iov overread, spotted by Frediano.  Factor the common
>> updating logic
>>     out into update_iov().
>> ---
>>   tools/libs/store/xs.c | 94 +++++++++++++++++++++++++++++++++++++++++--
>>   1 file changed, 91 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c
>> index e820cccc2314..f80ac7558cbe 100644
>> --- a/tools/libs/store/xs.c
>> +++ b/tools/libs/store/xs.c
>> @@ -563,6 +563,95 @@ static void *read_reply(
>>       return body;
>>   }
>>   +/*
>> + * Update an iov/nr pair after an incomplete writev()/sendmsg().
>> + *
>> + * Awkwardly, nr has different widths and signs between writev() and
>> + * sendmsg(), so we take it and return it by value, rather than by
>> pointer.
>> + */
>> +static size_t update_iov(struct iovec **p_iov, size_t nr, size_t res)
>> +{
>> +    struct iovec *iov = *p_iov;
>> +
>> +        /* Skip fully complete elements, including empty elements. */
>> +        while (nr && res >= iov->iov_len) {
>> +                res -= iov->iov_len;
>> +                nr--;
>> +                iov++;
>> +        }
>> +
>> +        /* Partial element, adjust base/len. */
>> +        if (res) {
>> +                iov->iov_len  -= res;
>> +                iov->iov_base += res;
>> +        }
>> +
>> +        *p_iov = iov;
>> +
>> +    return nr;
>> +}
>> +
>> +/*
>> + * Wrapper around sendmsg() to resubmit on EINTR or short write. 
>> Returns
>> + * @true if all data was transmitted, or @false with errno for an
>> error.
>> + * Note: May alter @iov in place on resubmit.
>> + */
>> +static bool sendmsg_exact(int fd, struct iovec *iov, unsigned int nr)
>> +{
>> +    struct msghdr hdr = {
>> +        .msg_iov = iov,
>> +        .msg_iovlen = nr,
>> +    };
>> +
>> +    /* Sanity check first element isn't empty */
>> +    assert(iov->iov_len == sizeof(struct xsd_sockmsg));
>
> Can you please move this assert() into write_request(), avoiding to have
> 2 copies of it?

It was more relevant before update_iov() was split out.

But, there's exactly the same assertion in the write_request()'s caller,
so I'd prefer to simply drop it if that's ok?

The writev()/sendmsg() won't malfunction if the first element is 0, and
update_iov() will now cope too, so I don't think it's necessary.

~Andrew
Jürgen Groß July 23, 2024, 12:30 p.m. UTC | #3
On 23.07.24 14:25, Andrew Cooper wrote:
> On 23/07/2024 10:37 am, Jürgen Groß wrote:
>> On 22.07.24 18:25, Andrew Cooper wrote:
>>> With the input data now conveniently arranged, use writev()/sendmsg()
>>> instead
>>> of decomposing it into write() calls.
>>>
>>> This causes all requests to be submitted with a single system call,
>>> rather
>>> than at least two.  While in principle short writes can occur, the
>>> chances of
>>> it happening are slim given that most xenbus comms are only a handful of
>>> bytes.
>>>
>>> Nevertheless, provide {writev,sendmsg}_exact() wrappers which take
>>> care of
>>> resubmitting on EINTR or short write.
>>>
>>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>> ---
>>> CC: Anthony PERARD <anthony.perard@vates.tech>
>>> CC: Juergen Gross <jgross@suse.com>
>>> CC: Frediano Ziglio <frediano.ziglio@cloud.com>
>>>
>>> v1.1:
>>>    * Fix iov overread, spotted by Frediano.  Factor the common
>>> updating logic
>>>      out into update_iov().
>>> ---
>>>    tools/libs/store/xs.c | 94 +++++++++++++++++++++++++++++++++++++++++--
>>>    1 file changed, 91 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c
>>> index e820cccc2314..f80ac7558cbe 100644
>>> --- a/tools/libs/store/xs.c
>>> +++ b/tools/libs/store/xs.c
>>> @@ -563,6 +563,95 @@ static void *read_reply(
>>>        return body;
>>>    }
>>>    +/*
>>> + * Update an iov/nr pair after an incomplete writev()/sendmsg().
>>> + *
>>> + * Awkwardly, nr has different widths and signs between writev() and
>>> + * sendmsg(), so we take it and return it by value, rather than by
>>> pointer.
>>> + */
>>> +static size_t update_iov(struct iovec **p_iov, size_t nr, size_t res)
>>> +{
>>> +    struct iovec *iov = *p_iov;
>>> +
>>> +        /* Skip fully complete elements, including empty elements. */
>>> +        while (nr && res >= iov->iov_len) {
>>> +                res -= iov->iov_len;
>>> +                nr--;
>>> +                iov++;
>>> +        }
>>> +
>>> +        /* Partial element, adjust base/len. */
>>> +        if (res) {
>>> +                iov->iov_len  -= res;
>>> +                iov->iov_base += res;
>>> +        }
>>> +
>>> +        *p_iov = iov;
>>> +
>>> +    return nr;
>>> +}
>>> +
>>> +/*
>>> + * Wrapper around sendmsg() to resubmit on EINTR or short write.
>>> Returns
>>> + * @true if all data was transmitted, or @false with errno for an
>>> error.
>>> + * Note: May alter @iov in place on resubmit.
>>> + */
>>> +static bool sendmsg_exact(int fd, struct iovec *iov, unsigned int nr)
>>> +{
>>> +    struct msghdr hdr = {
>>> +        .msg_iov = iov,
>>> +        .msg_iovlen = nr,
>>> +    };
>>> +
>>> +    /* Sanity check first element isn't empty */
>>> +    assert(iov->iov_len == sizeof(struct xsd_sockmsg));
>>
>> Can you please move this assert() into write_request(), avoiding to have
>> 2 copies of it?
> 
> It was more relevant before update_iov() was split out.
> 
> But, there's exactly the same assertion in the write_request()'s caller,
> so I'd prefer to simply drop it if that's ok?
> 
> The writev()/sendmsg() won't malfunction if the first element is 0, and
> update_iov() will now cope too, so I don't think it's necessary.

Fine with me.


Juergen
Jason Andryuk July 23, 2024, 1:45 p.m. UTC | #4
On 2024-07-23 08:30, Juergen Gross wrote:
> On 23.07.24 14:25, Andrew Cooper wrote:
>> On 23/07/2024 10:37 am, Jürgen Groß wrote:
>>> On 22.07.24 18:25, Andrew Cooper wrote:
>>>> With the input data now conveniently arranged, use writev()/sendmsg()
>>>> instead
>>>> of decomposing it into write() calls.
>>>>
>>>> This causes all requests to be submitted with a single system call,
>>>> rather
>>>> than at least two.  While in principle short writes can occur, the
>>>> chances of
>>>> it happening are slim given that most xenbus comms are only a 
>>>> handful of
>>>> bytes.
>>>>
>>>> Nevertheless, provide {writev,sendmsg}_exact() wrappers which take
>>>> care of
>>>> resubmitting on EINTR or short write.
>>>>
>>>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>>> ---
>>>> CC: Anthony PERARD <anthony.perard@vates.tech>
>>>> CC: Juergen Gross <jgross@suse.com>
>>>> CC: Frediano Ziglio <frediano.ziglio@cloud.com>
>>>>
>>>> v1.1:
>>>>    * Fix iov overread, spotted by Frediano.  Factor the common
>>>> updating logic
>>>>      out into update_iov().
>>>> ---
>>>>    tools/libs/store/xs.c | 94 
>>>> +++++++++++++++++++++++++++++++++++++++++--
>>>>    1 file changed, 91 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c
>>>> index e820cccc2314..f80ac7558cbe 100644
>>>> --- a/tools/libs/store/xs.c
>>>> +++ b/tools/libs/store/xs.c
>>>> @@ -563,6 +563,95 @@ static void *read_reply(
>>>>        return body;
>>>>    }
>>>>    +/*
>>>> + * Update an iov/nr pair after an incomplete writev()/sendmsg().
>>>> + *
>>>> + * Awkwardly, nr has different widths and signs between writev() and
>>>> + * sendmsg(), so we take it and return it by value, rather than by
>>>> pointer.
>>>> + */
>>>> +static size_t update_iov(struct iovec **p_iov, size_t nr, size_t res)
>>>> +{
>>>> +    struct iovec *iov = *p_iov;
>>>> +
>>>> +        /* Skip fully complete elements, including empty elements. */
>>>> +        while (nr && res >= iov->iov_len) {
>>>> +                res -= iov->iov_len;
>>>> +                nr--;
>>>> +                iov++;
>>>> +        }
>>>> +
>>>> +        /* Partial element, adjust base/len. */
>>>> +        if (res) {
>>>> +                iov->iov_len  -= res;
>>>> +                iov->iov_base += res;
>>>> +        }
>>>> +
>>>> +        *p_iov = iov;
>>>> +
>>>> +    return nr;
>>>> +}
>>>> +
>>>> +/*
>>>> + * Wrapper around sendmsg() to resubmit on EINTR or short write.
>>>> Returns
>>>> + * @true if all data was transmitted, or @false with errno for an
>>>> error.
>>>> + * Note: May alter @iov in place on resubmit.
>>>> + */
>>>> +static bool sendmsg_exact(int fd, struct iovec *iov, unsigned int nr)
>>>> +{
>>>> +    struct msghdr hdr = {
>>>> +        .msg_iov = iov,
>>>> +        .msg_iovlen = nr,
>>>> +    };
>>>> +
>>>> +    /* Sanity check first element isn't empty */
>>>> +    assert(iov->iov_len == sizeof(struct xsd_sockmsg));
>>>
>>> Can you please move this assert() into write_request(), avoiding to have
>>> 2 copies of it?
>>
>> It was more relevant before update_iov() was split out.
>>
>> But, there's exactly the same assertion in the write_request()'s caller,
>> so I'd prefer to simply drop it if that's ok?
>>
>> The writev()/sendmsg() won't malfunction if the first element is 0, and
>> update_iov() will now cope too, so I don't think it's necessary.
> 
> Fine with me.

Reviewed-by: Jason Andryuk <jason.andryuk@amd.com>

Looks like xs_write_all() is now unused internally, but it's an exposed 
library function.  I guess it can just be kept instead of bumping the 
library version.

Regards,
Jason
Andrew Cooper July 23, 2024, 2:09 p.m. UTC | #5
On 23/07/2024 2:45 pm, Jason Andryuk wrote:
> On 2024-07-23 08:30, Juergen Gross wrote:
>> On 23.07.24 14:25, Andrew Cooper wrote:
>>> On 23/07/2024 10:37 am, Jürgen Groß wrote:
>>>> On 22.07.24 18:25, Andrew Cooper wrote:
>>>>> With the input data now conveniently arranged, use writev()/sendmsg()
>>>>> instead
>>>>> of decomposing it into write() calls.
>>>>>
>>>>> This causes all requests to be submitted with a single system call,
>>>>> rather
>>>>> than at least two.  While in principle short writes can occur, the
>>>>> chances of
>>>>> it happening are slim given that most xenbus comms are only a
>>>>> handful of
>>>>> bytes.
>>>>>
>>>>> Nevertheless, provide {writev,sendmsg}_exact() wrappers which take
>>>>> care of
>>>>> resubmitting on EINTR or short write.
>>>>>
>>>>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>>>> ---
>>>>> CC: Anthony PERARD <anthony.perard@vates.tech>
>>>>> CC: Juergen Gross <jgross@suse.com>
>>>>> CC: Frediano Ziglio <frediano.ziglio@cloud.com>
>>>>>
>>>>> v1.1:
>>>>>    * Fix iov overread, spotted by Frediano.  Factor the common
>>>>> updating logic
>>>>>      out into update_iov().
>>>>> ---
>>>>>    tools/libs/store/xs.c | 94
>>>>> +++++++++++++++++++++++++++++++++++++++++--
>>>>>    1 file changed, 91 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c
>>>>> index e820cccc2314..f80ac7558cbe 100644
>>>>> --- a/tools/libs/store/xs.c
>>>>> +++ b/tools/libs/store/xs.c
>>>>> @@ -563,6 +563,95 @@ static void *read_reply(
>>>>>        return body;
>>>>>    }
>>>>>    +/*
>>>>> + * Update an iov/nr pair after an incomplete writev()/sendmsg().
>>>>> + *
>>>>> + * Awkwardly, nr has different widths and signs between writev() and
>>>>> + * sendmsg(), so we take it and return it by value, rather than by
>>>>> pointer.
>>>>> + */
>>>>> +static size_t update_iov(struct iovec **p_iov, size_t nr, size_t
>>>>> res)
>>>>> +{
>>>>> +    struct iovec *iov = *p_iov;
>>>>> +
>>>>> +        /* Skip fully complete elements, including empty
>>>>> elements. */
>>>>> +        while (nr && res >= iov->iov_len) {
>>>>> +                res -= iov->iov_len;
>>>>> +                nr--;
>>>>> +                iov++;
>>>>> +        }
>>>>> +
>>>>> +        /* Partial element, adjust base/len. */
>>>>> +        if (res) {
>>>>> +                iov->iov_len  -= res;
>>>>> +                iov->iov_base += res;
>>>>> +        }
>>>>> +
>>>>> +        *p_iov = iov;
>>>>> +
>>>>> +    return nr;
>>>>> +}
>>>>> +
>>>>> +/*
>>>>> + * Wrapper around sendmsg() to resubmit on EINTR or short write.
>>>>> Returns
>>>>> + * @true if all data was transmitted, or @false with errno for an
>>>>> error.
>>>>> + * Note: May alter @iov in place on resubmit.
>>>>> + */
>>>>> +static bool sendmsg_exact(int fd, struct iovec *iov, unsigned int
>>>>> nr)
>>>>> +{
>>>>> +    struct msghdr hdr = {
>>>>> +        .msg_iov = iov,
>>>>> +        .msg_iovlen = nr,
>>>>> +    };
>>>>> +
>>>>> +    /* Sanity check first element isn't empty */
>>>>> +    assert(iov->iov_len == sizeof(struct xsd_sockmsg));
>>>>
>>>> Can you please move this assert() into write_request(), avoiding to
>>>> have
>>>> 2 copies of it?
>>>
>>> It was more relevant before update_iov() was split out.
>>>
>>> But, there's exactly the same assertion in the write_request()'s
>>> caller,
>>> so I'd prefer to simply drop it if that's ok?
>>>
>>> The writev()/sendmsg() won't malfunction if the first element is 0, and
>>> update_iov() will now cope too, so I don't think it's necessary.
>>
>> Fine with me.
>
> Reviewed-by: Jason Andryuk <jason.andryuk@amd.com>

Thanks.

>
> Looks like xs_write_all() is now unused internally, but it's an
> exposed library function.  I guess it can just be kept instead of
> bumping the library version.

I have a /dev/null shaped hole I'm itching to file it in, but that is
going to need a soname bump.  It's just one of many dubious functions
exposed...

One mess at a time.

~Andrew
diff mbox series

Patch

diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c
index e820cccc2314..f80ac7558cbe 100644
--- a/tools/libs/store/xs.c
+++ b/tools/libs/store/xs.c
@@ -563,6 +563,95 @@  static void *read_reply(
 	return body;
 }
 
+/*
+ * Update an iov/nr pair after an incomplete writev()/sendmsg().
+ *
+ * Awkwardly, nr has different widths and signs between writev() and
+ * sendmsg(), so we take it and return it by value, rather than by pointer.
+ */
+static size_t update_iov(struct iovec **p_iov, size_t nr, size_t res)
+{
+	struct iovec *iov = *p_iov;
+
+        /* Skip fully complete elements, including empty elements. */
+        while (nr && res >= iov->iov_len) {
+                res -= iov->iov_len;
+                nr--;
+                iov++;
+        }
+
+        /* Partial element, adjust base/len. */
+        if (res) {
+                iov->iov_len  -= res;
+                iov->iov_base += res;
+        }
+
+        *p_iov = iov;
+
+	return nr;
+}
+
+/*
+ * Wrapper around sendmsg() to resubmit on EINTR or short write.  Returns
+ * @true if all data was transmitted, or @false with errno for an error.
+ * Note: May alter @iov in place on resubmit.
+ */
+static bool sendmsg_exact(int fd, struct iovec *iov, unsigned int nr)
+{
+	struct msghdr hdr = {
+		.msg_iov = iov,
+		.msg_iovlen = nr,
+	};
+
+	/* Sanity check first element isn't empty */
+	assert(iov->iov_len == sizeof(struct xsd_sockmsg));
+
+	while (hdr.msg_iovlen) {
+		ssize_t res = sendmsg(fd, &hdr, 0);
+
+		if (res < 0 && errno == EINTR)
+			continue;
+		if (res <= 0)
+			return false;
+
+		hdr.msg_iovlen = update_iov(&hdr.msg_iov, hdr.msg_iovlen, res);
+	}
+
+	return true;
+}
+
+/*
+ * Wrapper around sendmsg() to resubmit on EINTR or short write.  Returns
+ * @true if all data was transmitted, or @false with errno for an error.
+ * Note: May alter @iov in place on resubmit.
+ */
+static bool writev_exact(int fd, struct iovec *iov, unsigned int nr)
+{
+	/* Sanity check first element isn't empty */
+	assert(iov->iov_len == sizeof(struct xsd_sockmsg));
+
+	while (nr) {
+		ssize_t res = writev(fd, iov, nr);
+
+		if (res < 0 && errno == EINTR)
+			continue;
+		if (res <= 0)
+			return false;
+
+		nr = update_iov(&iov, nr, res);
+	}
+
+	return true;
+}
+
+static bool write_request(struct xs_handle *h, struct iovec *iov, unsigned int nr)
+{
+	if (h->is_socket)
+		return sendmsg_exact(h->fd, iov, nr);
+	else
+		return writev_exact(h->fd, iov, nr);
+}
+
 /*
  * Send message to xenstore, get malloc'ed reply.  NULL and set errno on error.
  *
@@ -605,9 +694,8 @@  static void *xs_talkv(struct xs_handle *h,
 
 	mutex_lock(&h->request_mutex);
 
-	for (i = 0; i < num_vecs; i++)
-		if (!xs_write_all(h->fd, iovec[i].iov_base, iovec[i].iov_len))
-			goto fail;
+	if (!write_request(h, iovec, num_vecs))
+		goto fail;
 
 	ret = read_reply(h, &reply_type, len);
 	if (!ret)