mailx/mailx-12.5-encsplit.patch

diff --git a/mime.c b/mime.c
index 45de80a..ecac85d 100644
--- a/mime.c
+++ b/mime.c
@@ -1109,16 +1109,34 @@ fromhdr_end:
 }
 
 /*
+ * return length of this UTF-8 codepoint in bytes
+ */
+static size_t
+codepointsize(char tc)
+{
+	int rv = 0;
+	if ( ! ( tc & 0x80 ) )
+		return 1;
+	while ( tc & 0x80 )
+	{
+		rv++;
+		tc = tc<<1;
+	}
+	return rv;
+}
+
+/*
  * Convert header fields to RFC 1522 format and write to the file fo.
  */
 static size_t
 mime_write_tohdr(struct str *in, FILE *fo)
 {
 	char *upper, *wbeg, *wend, *charset, *lastwordend = NULL, *lastspc, b,
-		*charset7;
+		*charset7, *cp;
 	struct str cin, cout;
-	size_t sz = 0, col = 0, wr, charsetlen, charset7len;
+	size_t sz = 0, col = 0, wr, charsetlen, charset7len, cpsz;
 	int quoteany, mustquote, broken,
+		maxin, maxout, curin, cps,
 		maxcol = 65 /* there is the header field's name, too */;
 
 	upper = in->s + in->l;
@@ -1134,41 +1152,75 @@ mime_write_tohdr(struct str *in, FILE *fo)
 		if (mustquote_hdr(wbeg, wbeg == in->s, wbeg == &upper[-1]))
 			quoteany++;
 	}
+
+	/*
+	 * rfc2047 says we cannot split multi-byte characters over
+	 * encoded words, so we need to know if we're a multi-byte
+	 * source stream (UTF-8 specifically) or just an 8 bit
+	 * stream like ISO-8859-15
+	 * so test beginning of charset since it is valid to include
+	 * language in charset "UTF-8*DE" etc as per rfc 2184/2231
+	 */
+	char *thisset = b&0200 ? charset : charset7;
+	int is_utf8 = ( strncasecmp( thisset, "utf-8", 5 ) == 0 );
+
 	if (2 * quoteany > in->l) {
 		/*
 		 * Print the entire field in base64.
 		 */
-		for (wbeg = in->s; wbeg < upper; wbeg = wend) {
+		for (wbeg = in->s; wbeg < upper; ) {
 			wend = upper;
 			cin.s = wbeg;
-			for (;;) {
-				cin.l = wend - wbeg;
-				if (cin.l * 4/3 + 7 + charsetlen
-						< maxcol - col) {
-					fprintf(fo, "=?%s?B?",
-						b&0200 ? charset : charset7);
-					wr = mime_write_tob64(&cin, fo, 1);
-					fwrite("?=", sizeof (char), 2, fo);
-					wr += 7 + charsetlen;
-					sz += wr, col += wr;
-					if (wend < upper) {
-						fwrite("\n ", sizeof (char),
-								2, fo);
-						sz += 2;
-						col = 0;
-						maxcol = 76;
+			/*
+			 * we calculate the maximum number of bytes
+			 * we can use on this output line, and then what
+			 * this equates to as base64 encoded source bytes
+			 */
+			maxout = maxcol - col - 7 - charsetlen;
+			maxin = (maxout - (maxout & 0x03)) * 3/4;
+
+			/* short enough to finish ? */
+			if (maxin > upper - wbeg )
+			{
+				curin = upper - wbeg;
+				wbeg += curin;
+			}else
+			{
+				if (is_utf8)
+				{
+					/*
+					 * now scan the input from the beginning
+					 * to see how many codepoints will fit
+					 */
+					curin = 0;
+					while (curin < maxin
+						&& (cpsz = codepointsize(*wbeg)) <= (maxin - curin))
+					{
+						curin += cpsz;
+						wbeg += cpsz;
 					}
-					break;
-				} else {
-					if (col) {
-						fprintf(fo, "\n ");
-						sz += 2;
-						col = 0;
-						maxcol = 76;
-					} else
-						wend -= 4;
+				}else
+				{
+					curin = maxin;
+					wbeg += maxin;
 				}
 			}
+			cin.l = curin;
+			fprintf(fo, "%s=?%s?B?", (cin.s != in->s) ? " " : "", thisset );
+			wr = mime_write_tob64(&cin, fo, 1);
+
+			if (wbeg < upper)
+			{
+				wr += fwrite("?=\n ", sizeof (char), 4, fo) * sizeof (char);
+			}else
+			{
+				wr += fwrite("?=", sizeof (char), 2, fo) * sizeof (char);
+			}
+
+			/* and shuffle pointers and counts */
+			col = 1;
+			maxcol = 76;
+			sz += wr + 7 + charsetlen + ((cin.s != in->s) ? 1 : 0 );
 		}
 	} else {
 		/*
@@ -1243,7 +1295,29 @@ mime_write_tohdr(struct str *in, FILE *fo)
 								maxcol -= wbeg -
 									lastspc;
 						} else {
-							wend -= 4;
+							if (is_utf8)
+							{
+								/*
+								 * make sure wend is not pointing to
+								 * the middle of a codepoint
+								 */
+								cp = wend;
+								while (--cp > wbeg)
+								{
+									cps = codepointsize(*cp);
+									if (cps > 1)
+									{
+										if (wend - cp - cps > 4)
+											wend -= 4;
+										else
+											wend = cp;
+										break;
+									}
+								}
+								if (cp == wbeg)
+									wend -= 4;
+							} else
+								wend -= 4;
 						}
 						free(cout.s);
 					}
rebuilt in bindir applied patch set from Fedora [release 12.5-3mamba;Mon Apr 17 2023] 2024-01-06 06:53:04 +01:00			`diff --git a/mime.c b/mime.c`
			`index 45de80a..ecac85d 100644`
			`--- a/mime.c`
			`+++ b/mime.c`
			`@@ -1109,16 +1109,34 @@ fromhdr_end:`
			`}`

			`/*`
			`+ * return length of this UTF-8 codepoint in bytes`
			`+ */`
			`+static size_t`
			`+codepointsize(char tc)`
			`+{`
			`+ int rv = 0;`
			`+ if ( ! ( tc & 0x80 ) )`
			`+ return 1;`
			`+ while ( tc & 0x80 )`
			`+ {`
			`+ rv++;`
			`+ tc = tc<<1;`
			`+ }`
			`+ return rv;`
			`+}`
			`+`
			`+/*`
			`* Convert header fields to RFC 1522 format and write to the file fo.`
			`*/`
			`static size_t`
			`mime_write_tohdr(struct str in, FILE fo)`
			`{`
			`char upper, wbeg, wend, charset, lastwordend = NULL, lastspc, b,`
			`- *charset7;`
			`+ charset7, cp;`
			`struct str cin, cout;`
			`- size_t sz = 0, col = 0, wr, charsetlen, charset7len;`
			`+ size_t sz = 0, col = 0, wr, charsetlen, charset7len, cpsz;`
			`int quoteany, mustquote, broken,`
			`+ maxin, maxout, curin, cps,`
			`maxcol = 65 /* there is the header field's name, too */;`

			`upper = in->s + in->l;`
			`@@ -1134,41 +1152,75 @@ mime_write_tohdr(struct str in, FILE fo)`
			`if (mustquote_hdr(wbeg, wbeg == in->s, wbeg == &upper[-1]))`
			`quoteany++;`
			`}`
			`+`
			`+ /*`
			`+ * rfc2047 says we cannot split multi-byte characters over`
			`+ * encoded words, so we need to know if we're a multi-byte`
			`+ * source stream (UTF-8 specifically) or just an 8 bit`
			`+ * stream like ISO-8859-15`
			`+ * so test beginning of charset since it is valid to include`
			`+ * language in charset "UTF-8*DE" etc as per rfc 2184/2231`
			`+ */`
			`+ char *thisset = b&0200 ? charset : charset7;`
			`+ int is_utf8 = ( strncasecmp( thisset, "utf-8", 5 ) == 0 );`
			`+`
			`if (2 * quoteany > in->l) {`
			`/*`
			`* Print the entire field in base64.`
			`*/`
			`- for (wbeg = in->s; wbeg < upper; wbeg = wend) {`
			`+ for (wbeg = in->s; wbeg < upper; ) {`
			`wend = upper;`
			`cin.s = wbeg;`
			`- for (;;) {`
			`- cin.l = wend - wbeg;`
			`- if (cin.l * 4/3 + 7 + charsetlen`
			`- < maxcol - col) {`
			`- fprintf(fo, "=?%s?B?",`
			`- b&0200 ? charset : charset7);`
			`- wr = mime_write_tob64(&cin, fo, 1);`
			`- fwrite("?=", sizeof (char), 2, fo);`
			`- wr += 7 + charsetlen;`
			`- sz += wr, col += wr;`
			`- if (wend < upper) {`
			`- fwrite("\n ", sizeof (char),`
			`- 2, fo);`
			`- sz += 2;`
			`- col = 0;`
			`- maxcol = 76;`
			`+ /*`
			`+ * we calculate the maximum number of bytes`
			`+ * we can use on this output line, and then what`
			`+ * this equates to as base64 encoded source bytes`
			`+ */`
			`+ maxout = maxcol - col - 7 - charsetlen;`
			`+ maxin = (maxout - (maxout & 0x03)) * 3/4;`
			`+`
			`+ /* short enough to finish ? */`
			`+ if (maxin > upper - wbeg )`
			`+ {`
			`+ curin = upper - wbeg;`
			`+ wbeg += curin;`
			`+ }else`
			`+ {`
			`+ if (is_utf8)`
			`+ {`
			`+ /*`
			`+ * now scan the input from the beginning`
			`+ * to see how many codepoints will fit`
			`+ */`
			`+ curin = 0;`
			`+ while (curin < maxin`
			`+ && (cpsz = codepointsize(*wbeg)) <= (maxin - curin))`
			`+ {`
			`+ curin += cpsz;`
			`+ wbeg += cpsz;`
			`}`
			`- break;`
			`- } else {`
			`- if (col) {`
			`- fprintf(fo, "\n ");`
			`- sz += 2;`
			`- col = 0;`
			`- maxcol = 76;`
			`- } else`
			`- wend -= 4;`
			`+ }else`
			`+ {`
			`+ curin = maxin;`
			`+ wbeg += maxin;`
			`}`
			`}`
			`+ cin.l = curin;`
			`+ fprintf(fo, "%s=?%s?B?", (cin.s != in->s) ? " " : "", thisset );`
			`+ wr = mime_write_tob64(&cin, fo, 1);`
			`+`
			`+ if (wbeg < upper)`
			`+ {`
			`+ wr += fwrite("?=\n ", sizeof (char), 4, fo) * sizeof (char);`
			`+ }else`
			`+ {`
			`+ wr += fwrite("?=", sizeof (char), 2, fo) * sizeof (char);`
			`+ }`
			`+`
			`+ /* and shuffle pointers and counts */`
			`+ col = 1;`
			`+ maxcol = 76;`
			`+ sz += wr + 7 + charsetlen + ((cin.s != in->s) ? 1 : 0 );`
			`}`
			`} else {`
			`/*`
			`@@ -1243,7 +1295,29 @@ mime_write_tohdr(struct str in, FILE fo)`
			`maxcol -= wbeg -`
			`lastspc;`
			`} else {`
			`- wend -= 4;`
			`+ if (is_utf8)`
			`+ {`
			`+ /*`
			`+ * make sure wend is not pointing to`
			`+ * the middle of a codepoint`
			`+ */`
			`+ cp = wend;`
			`+ while (--cp > wbeg)`
			`+ {`
			`+ cps = codepointsize(*cp);`
			`+ if (cps > 1)`
			`+ {`
			`+ if (wend - cp - cps > 4)`
			`+ wend -= 4;`
			`+ else`
			`+ wend = cp;`
			`+ break;`
			`+ }`
			`+ }`
			`+ if (cp == wbeg)`
			`+ wend -= 4;`
			`+ } else`
			`+ wend -= 4;`
			`}`
			`free(cout.s);`
			`}`