diff mbox series

[v3,3/6] mailinfo: warn if CR found in decoded base64/QP email

Message ID 5aac2ba38e5442019d2816c63d7b65bf2bcf062a.1620311544.git.congdanhqx@gmail.com (mailing list archive)
State Superseded
Headers show
Series Teach am/mailinfo to process quoted CR | expand

Commit Message

Đoàn Trần Công Danh May 6, 2021, 3:02 p.m. UTC
When SMTP servers receive 8-bit email messages, possibly with only
LF as line ending, some of them decide to change said LF to CRLF.

Some mailing list softwares, when receive 8-bit email messages,
decide to encode those messages in base64 or quoted-printable.

If an email is transfered through above mail servers, then distributed
by such mailing list softwares, the recipients will receive an email
contains a patch mungled with CRLF encoded inside another encoding.

Thus, such CR couldn't be dropped by "mailsplit".
Hence, the mailed patch couldn't be applied cleanly.
Such accidents have been observed in the wild [1].

Instead of silently rejecting those messages, let's give our users
some warnings if such CR is found.

[1]: https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 mailinfo.c              | 14 ++++++++++++++
 mailinfo.h              |  1 +
 t/t5100-mailinfo.sh     | 15 +++++++++++++++
 t/t5100/quoted-cr-info  |  5 +++++
 t/t5100/quoted-cr-msg   |  2 ++
 t/t5100/quoted-cr-patch | 22 ++++++++++++++++++++++
 t/t5100/quoted-cr.mbox  | 22 ++++++++++++++++++++++
 7 files changed, 81 insertions(+)
 create mode 100644 t/t5100/quoted-cr-info
 create mode 100644 t/t5100/quoted-cr-msg
 create mode 100644 t/t5100/quoted-cr-patch
 create mode 100644 t/t5100/quoted-cr.mbox

Comments

Junio C Hamano May 8, 2021, 10:52 a.m. UTC | #1
Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:

> +test_expect_success 'mailinfo warn CR in base64 encoded email' '
> +	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-msg" >expect-cr-msg &&
> +	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-patch" >expect-cr-patch &&

As we are not interested in a lone CR in the middle of the line, I
wonder if we want to anchor the double per-cent to the end of the
line, i.e. "s/%%$/$(printf \\015)/".

On the other hand, we may want to make sure that the new option does
not disturb CR in the middle of the line, so we may be better off
leaving double per-cent unanchored, but allow replacing more than
once on a line, i.e. "s/%%/$(printf \\015)/g".

If we were to go to the latter route, we'd want to try a file with a
CR in the middle of the line (without a CR at the end of any line)
and make sure we won't warn or strip.
diff mbox series

Patch

diff --git a/mailinfo.c b/mailinfo.c
index 5681d9130d..dcf579700d 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -994,6 +994,11 @@  static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 	const char *rest;
 
 	if (!mi->format_flowed) {
+		if (len >= 2 &&
+		    line->buf[len - 2] == '\r' &&
+		    line->buf[len - 1] == '\n') {
+			mi->have_quoted_cr = 1;
+		}
 		handle_filter(mi, line);
 		return;
 	}
@@ -1033,6 +1038,12 @@  static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
 	handle_filter(mi, line);
 }
 
+static void summarize_quoted_cr(struct mailinfo *mi)
+{
+	if (mi->have_quoted_cr)
+		warning("quoted CR detected");
+}
+
 static void handle_body(struct mailinfo *mi, struct strbuf *line)
 {
 	struct strbuf prev = STRBUF_INIT;
@@ -1051,6 +1062,8 @@  static void handle_body(struct mailinfo *mi, struct strbuf *line)
 				handle_filter(mi, &prev);
 				strbuf_reset(&prev);
 			}
+			summarize_quoted_cr(mi);
+			mi->have_quoted_cr = 0;
 			if (!handle_boundary(mi, line))
 				goto handle_body_out;
 		}
@@ -1100,6 +1113,7 @@  static void handle_body(struct mailinfo *mi, struct strbuf *line)
 
 	if (prev.len)
 		handle_filter(mi, &prev);
+	summarize_quoted_cr(mi);
 
 	flush_inbody_header_accum(mi);
 
diff --git a/mailinfo.h b/mailinfo.h
index 79b1d6774e..b394ef9bce 100644
--- a/mailinfo.h
+++ b/mailinfo.h
@@ -24,6 +24,7 @@  struct mailinfo {
 	struct strbuf charset;
 	unsigned int format_flowed:1;
 	unsigned int delsp:1;
+	unsigned int have_quoted_cr:1;
 	char *message_id;
 	enum  {
 		TE_DONTCARE, TE_QP, TE_BASE64
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index 147e616533..c7ea1b30df 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -228,4 +228,19 @@  test_expect_success 'mailinfo handles unusual header whitespace' '
 	test_cmp expect actual
 '
 
+check_quoted_cr_mail () {
+	git mailinfo -u "$@" quoted-cr-msg quoted-cr-patch \
+		<"$DATA/quoted-cr.mbox" >quoted-cr-info 2>quoted-cr-err &&
+	test_cmp "expect-cr-msg" quoted-cr-msg &&
+	test_cmp "expect-cr-patch" quoted-cr-patch &&
+	test_cmp "$DATA/quoted-cr-info" quoted-cr-info
+}
+
+test_expect_success 'mailinfo warn CR in base64 encoded email' '
+	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-msg" >expect-cr-msg &&
+	sed "s/%%/$(printf \\015)/" "$DATA/quoted-cr-patch" >expect-cr-patch &&
+	check_quoted_cr_mail &&
+	grep "quoted CR detected" quoted-cr-err
+'
+
 test_done
diff --git a/t/t5100/quoted-cr-info b/t/t5100/quoted-cr-info
new file mode 100644
index 0000000000..dab2228b70
--- /dev/null
+++ b/t/t5100/quoted-cr-info
@@ -0,0 +1,5 @@ 
+Author: A U Thor
+Email: mail@example.com
+Subject: sample
+Date: Mon, 3 Aug 2020 22:40:55 +0700
+
diff --git a/t/t5100/quoted-cr-msg b/t/t5100/quoted-cr-msg
new file mode 100644
index 0000000000..a148bc7e26
--- /dev/null
+++ b/t/t5100/quoted-cr-msg
@@ -0,0 +1,2 @@ 
+On different distro, pytest is suffixed with different patterns.%%
+%%
diff --git a/t/t5100/quoted-cr-patch b/t/t5100/quoted-cr-patch
new file mode 100644
index 0000000000..580e2bddb8
--- /dev/null
+++ b/t/t5100/quoted-cr-patch
@@ -0,0 +1,22 @@ 
+---%%
+ configure | 2 +-%%
+ 1 file changed, 1 insertion(+), 1 deletion(-)%%
+%%
+diff --git a/configure b/configure%%
+index db3538b3..f7c1c095 100755%%
+--- a/configure%%
++++ b/configure%%
+@@ -814,7 +814,7 @@ if [ $have_python3 -eq 1 ]; then%%
+     printf "Checking for python3 pytest (>= 3.0)... "%%
+     conf=$(mktemp)%%
+     printf "[pytest]\nminversion=3.0\n" > $conf%%
+-    if pytest-3 -c $conf --version >/dev/null 2>&1; then%%
++    if "$python" -m pytest -c $conf --version >/dev/null 2>&1; then%%
+         printf "Yes.\n"%%
+         have_python3_pytest=1%%
+     else%%
+-- %%
+2.28.0%%
+_______________________________________________
+example mailing list -- list@example.org
+To unsubscribe send an email to list-leave@example.org
diff --git a/t/t5100/quoted-cr.mbox b/t/t5100/quoted-cr.mbox
new file mode 100644
index 0000000000..6ea9806a6b
--- /dev/null
+++ b/t/t5100/quoted-cr.mbox
@@ -0,0 +1,22 @@ 
+From: A U Thor <mail@example.com>
+To: list@example.org
+Subject: [PATCH v2] sample
+Date: Mon,  3 Aug 2020 22:40:55 +0700
+Message-Id: <msg-id@example.com>
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+T24gZGlmZmVyZW50IGRpc3RybywgcHl0ZXN0IGlzIHN1ZmZpeGVkIHdpdGggZGlmZmVyZW50IHBh
+dHRlcm5zLg0KDQotLS0NCiBjb25maWd1cmUgfCAyICstDQogMSBmaWxlIGNoYW5nZWQsIDEgaW5z
+ZXJ0aW9uKCspLCAxIGRlbGV0aW9uKC0pDQoNCmRpZmYgLS1naXQgYS9jb25maWd1cmUgYi9jb25m
+aWd1cmUNCmluZGV4IGRiMzUzOGIzLi5mN2MxYzA5NSAxMDA3NTUNCi0tLSBhL2NvbmZpZ3VyZQ0K
+KysrIGIvY29uZmlndXJlDQpAQCAtODE0LDcgKzgxNCw3IEBAIGlmIFsgJGhhdmVfcHl0aG9uMyAt
+ZXEgMSBdOyB0aGVuDQogICAgIHByaW50ZiAiQ2hlY2tpbmcgZm9yIHB5dGhvbjMgcHl0ZXN0ICg+
+PSAzLjApLi4uICINCiAgICAgY29uZj0kKG1rdGVtcCkNCiAgICAgcHJpbnRmICJbcHl0ZXN0XVxu
+bWludmVyc2lvbj0zLjBcbiIgPiAkY29uZg0KLSAgICBpZiBweXRlc3QtMyAtYyAkY29uZiAtLXZl
+cnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuDQorICAgIGlmICIkcHl0aG9uIiAtbSBweXRlc3Qg
+LWMgJGNvbmYgLS12ZXJzaW9uID4vZGV2L251bGwgMj4mMTsgdGhlbg0KICAgICAgICAgcHJpbnRm
+ICJZZXMuXG4iDQogICAgICAgICBoYXZlX3B5dGhvbjNfcHl0ZXN0PTENCiAgICAgZWxzZQ0KLS0g
+DQoyLjI4LjANCl9fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19f
+CmV4YW1wbGUgbWFpbGluZyBsaXN0IC0tIGxpc3RAZXhhbXBsZS5vcmcKVG8gdW5zdWJzY3JpYmUg
+c2VuZCBhbiBlbWFpbCB0byBsaXN0LWxlYXZlQGV4YW1wbGUub3JnCg==