summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy Harris <jgh146exb@wizmail.org>2016-11-26 18:35:48 +0000
committerJeremy Harris <jgh146exb@wizmail.org>2016-12-26 18:35:55 +0000
commit9427e87923288dfe6fdf80011f77bf4a135898ac (patch)
treeff756166542bdc11f542ca13f13aa688ac9c60cd
parentfd047340363431f15d2b0ac686b9dba4fa125781 (diff)
I18N: support IDNA2008. Bug 1911
-rw-r--r--doc/doc-txt/ChangeLog4
-rw-r--r--src/src/EDITME6
-rw-r--r--src/src/config.h.defaults1
-rw-r--r--src/src/transports/smtp.c9
-rw-r--r--src/src/utf8.c124
-rw-r--r--src/src/verify.c25
-rw-r--r--test/scripts/4200-International/420014
-rw-r--r--test/stdout/420011
8 files changed, 133 insertions, 61 deletions
diff --git a/doc/doc-txt/ChangeLog b/doc/doc-txt/ChangeLog
index 73afe1bb3..46ec11e34 100644
--- a/doc/doc-txt/ChangeLog
+++ b/doc/doc-txt/ChangeLog
@@ -5,7 +5,9 @@ affect Exim's operation, with an unchanged configuration file. For new
options, and new features, see the NewStuff file next to this ChangeLog.
Exim version 4.89
------------------
+-------------------
+JH/01 Bug 1922: Support IDNA2008. This has slightly different conversion rules
+ than -2003 did; needs libidn2 in addition to linidn.
Exim version 4.88
diff --git a/src/src/EDITME b/src/src/EDITME
index 69293467e..1bff9dab2 100644
--- a/src/src/EDITME
+++ b/src/src/EDITME
@@ -937,11 +937,15 @@ ZCAT_COMMAND=/usr/bin/zcat
#
# Uncomment the following to include Internationalisation features. This is the
# SMTPUTF8 ESMTP extension, and associated facilities for handling UTF8 domain
-# and localparts, per RFCs 5890, 6530 and 6533.
+# and localparts, per RFC 3490 (IDNA2003).
# You need to have the IDN library installed.
+# If you want IDNA2008 mappings per RFCs 5890, 6530 and 6533, you additionally
+# need libidn2 and SUPPORT_I18N_2008.
# SUPPORT_I18N=yes
# LDFLAGS += -lidn
+# SUPPORT_I18N_2008=yes
+# LDFLAGS += -lidn -lidn2
#------------------------------------------------------------------------------
diff --git a/src/src/config.h.defaults b/src/src/config.h.defaults
index bafdc1ba4..58e181309 100644
--- a/src/src/config.h.defaults
+++ b/src/src/config.h.defaults
@@ -135,6 +135,7 @@ it's a default value. */
#define SUPPORT_CRYPTEQ
#define SUPPORT_I18N
+#define SUPPORT_I18N_2008
#define SUPPORT_MAILDIR
#define SUPPORT_MAILSTORE
#define SUPPORT_MBX
diff --git a/src/src/transports/smtp.c b/src/src/transports/smtp.c
index a19e85ffb..527142967 100644
--- a/src/src/transports/smtp.c
+++ b/src/src/transports/smtp.c
@@ -2429,17 +2429,14 @@ for (addr = first_addr;
rcpt_addr = transport_rcpt_address(addr, tblock->rcpt_include_affixes);
#ifdef SUPPORT_I18N
- {
- uschar * dummy_errstr;
if ( testflag(addrlist, af_utf8_downcvt)
- && (rcpt_addr = string_address_utf8_to_alabel(rcpt_addr, &dummy_errstr),
- dummy_errstr
- ) )
+ && !(rcpt_addr = string_address_utf8_to_alabel(rcpt_addr, NULL))
+ )
{
+ /*XXX could we use a per-address errstr here? Not fail the whole send? */
errno = ERRNO_EXPANDFAIL;
goto SEND_FAILED;
}
- }
#endif
count = smtp_write_command(&outblock, no_flush, "RCPT TO:<%s>%s%s\r\n",
diff --git a/src/src/utf8.c b/src/src/utf8.c
index e394db0a8..be5bcb078 100644
--- a/src/src/utf8.c
+++ b/src/src/utf8.c
@@ -2,7 +2,7 @@
* Exim - an Internet mail transport agent *
*************************************************/
-/* Copyright (c) Jeremy Harris 2015 */
+/* Copyright (c) Jeremy Harris 2015, 2016 */
/* See the file NOTICE for conditions of use and distribution. */
@@ -10,10 +10,20 @@
#ifdef SUPPORT_I18N
-#include <idna.h>
+#ifdef SUPPORT_I18N_2008
+# include <idn2.h>
+#else
+# include <idna.h>
+#endif
+
#include <punycode.h>
#include <stringprep.h>
+static uschar *
+string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err);
+
+/**************************************************/
+
BOOL
string_is_utf8(const uschar * s)
{
@@ -22,17 +32,44 @@ if (s) while ((c = *s++)) if (c & 0x80) return TRUE;
return FALSE;
}
+static BOOL
+string_is_alabel(const uschar * s)
+{
+return s[0] == 'x' && s[1] == 'n' && s[2] == '-' && s[3] == '-';
+}
+
/**************************************************/
-/* Domain conversions */
-/* the *err string pointer should be null before the call */
+/* Domain conversions.
+The *err string pointer should be null before the call
+
+Return NULL for error, with optional errstr pointer filled in
+*/
uschar *
string_domain_utf8_to_alabel(const uschar * utf8, uschar ** err)
{
-uschar * s1;
-uschar * s;
+uschar * s1, * s;
int rc;
+#ifdef SUPPORT_I18N_2008
+/* Only lowercase is accepted by the library call. A pity since we lose
+any mixed-case annotation. This does not really matter for a domain. */
+ {
+ uschar c;
+ for (s1 = s = US utf8; (c = *s1); s1++) if (!(c & 0x80) && isupper(c))
+ {
+ s = string_copy(utf8);
+ for (s1 = s + (s1 - utf8); (c = *s1); s1++) if (!(c & 0x80) && isupper(c))
+ *s1 = tolower(c);
+ break;
+ }
+ }
+if ((rc = idn2_lookup_u8(CCS s, &s1, IDN2_NFC_INPUT)) != IDN2_OK)
+ {
+ if (err) *err = US idn2_strerror(rc);
+ return NULL;
+ }
+#else
s = US stringprep_utf8_nfkc_normalize(CCS utf8, -1);
if ( (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED))
!= IDNA_SUCCESS)
@@ -42,6 +79,7 @@ if ( (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED))
return NULL;
}
free(s);
+#endif
s = string_copy(s1);
free(s1);
return s;
@@ -52,8 +90,23 @@ return s;
uschar *
string_domain_alabel_to_utf8(const uschar * alabel, uschar ** err)
{
-uschar * s1;
-uschar * s;
+#ifdef SUPPORT_I18N_2008
+const uschar * label;
+int sep = '.';
+uschar * s = NULL;
+
+while (label = string_nextinlist(&alabel, &sep, NULL, 0))
+ if ( string_is_alabel(label)
+ && !(label = string_localpart_alabel_to_utf8_(label, err))
+ )
+ return NULL;
+ else
+ s = string_append_listele(s, '.', label);
+return s;
+
+#else
+
+uschar * s1, * s;
int rc;
if ( (rc = idna_to_unicode_8z8z(CCS alabel, CSS &s1, IDNA_USE_STD3_ASCII_RULES))
@@ -65,6 +118,7 @@ if ( (rc = idna_to_unicode_8z8z(CCS alabel, CSS &s1, IDNA_USE_STD3_ASCII_RULES)
s = string_copy(s1);
free(s1);
return s;
+#endif
}
/**************************************************/
@@ -103,25 +157,20 @@ return res;
}
-uschar *
-string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err)
+static uschar *
+string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err)
{
-size_t p_len = Ustrlen(alabel);
+size_t p_len;
punycode_uint * p;
-uschar * s;
-uschar * res;
int rc;
+uschar * s, * res;
-if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-')
- {
- if (err) *err = US"bad alabel prefix";
- return NULL;
- }
-
-p_len -= 4;
+DEBUG(D_expand) debug_printf("l_a2u: '%s'\n", alabel);
+alabel += 4;
+p_len = Ustrlen(alabel);
p = (punycode_uint *) store_get((p_len+1) * sizeof(*p));
-if ((rc = punycode_decode(p_len, CCS alabel+4, &p_len, p, NULL)) != PUNYCODE_SUCCESS)
+if ((rc = punycode_decode(p_len, CCS alabel, &p_len, p, NULL)) != PUNYCODE_SUCCESS)
{
if (err) *err = US punycode_strerror(rc);
return NULL;
@@ -134,9 +183,23 @@ return res;
}
+uschar *
+string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err)
+{
+if (string_is_alabel(alabel))
+ return string_localpart_alabel_to_utf8_(alabel, err);
+
+if (err) *err = US"bad alabel prefix";
+return NULL;
+}
+
+
/**************************************************/
-/* whole address conversion */
-/* the *err string pointer should be null before the call */
+/* Whole address conversion.
+The *err string pointer should be null before the call.
+
+Return NULL on oeeror, with (optional) errstring pointer filled in
+*/
uschar *
string_address_utf8_to_alabel(const uschar * utf8, uschar ** err)
@@ -153,8 +216,8 @@ for (s = utf8; *s; s++)
if (*s == '@')
{
l = string_copyn(utf8, s - utf8);
- if ( (l = string_localpart_utf8_to_alabel(l, err), err && *err)
- || (d = string_domain_utf8_to_alabel(++s, err), err && *err)
+ if ( !(l = string_localpart_utf8_to_alabel(l, err))
+ || !(d = string_domain_utf8_to_alabel(++s, err))
)
return NULL;
l = string_sprintf("%s@%s", l, d);
@@ -182,10 +245,21 @@ Returns: nothing
void
utf8_version_report(FILE *f)
{
+#ifdef SUPPORT_I18N_2008
+fprintf(f, "Library version: IDN2: Compile: %s\n"
+ " Runtime: %s\n",
+ IDN2_VERSION,
+ idn2_check_version(NULL));
+fprintf(f, "Library version: Stringprep: Compile: %s\n"
+ " Runtime: %s\n",
+ STRINGPREP_VERSION,
+ stringprep_check_version(NULL));
+#else
fprintf(f, "Library version: IDN: Compile: %s\n"
" Runtime: %s\n",
STRINGPREP_VERSION,
stringprep_check_version(NULL));
+#endif
}
#endif /* whole file */
diff --git a/src/src/verify.c b/src/src/verify.c
index 9652a395f..0959b0051 100644
--- a/src/src/verify.c
+++ b/src/src/verify.c
@@ -940,11 +940,10 @@ can do it there for the non-rcpt-verify case. For this we keep an addresscount.
}
else if ( addr->prop.utf8_msg
&& (addr->prop.utf8_downcvt || !(peer_offered & PEER_OFFERED_UTF8))
- && (setflag(addr, af_utf8_downcvt),
- from_address = string_address_utf8_to_alabel(from_address,
- &addr->message),
- addr->message
- ) )
+ && !(setflag(addr, af_utf8_downcvt),
+ from_address = string_address_utf8_to_alabel(from_address,
+ &addr->message)
+ ) )
{
errno = ERRNO_EXPANDFAIL;
setflag(addr, af_verify_nsfail);
@@ -1121,16 +1120,14 @@ can do it there for the non-rcpt-verify case. For this we keep an addresscount.
#ifdef SUPPORT_I18N
/*XXX should the conversion be moved into transport_rcpt_address() ? */
- uschar * dummy_errstr = NULL;
if ( testflag(addr, af_utf8_downcvt)
- && (rcpt = string_address_utf8_to_alabel(rcpt, &dummy_errstr),
- dummy_errstr
- ) )
- {
- errno = ERRNO_EXPANDFAIL;
- *failure_ptr = US"recipient";
- done = FALSE;
- }
+ && !(rcpt = string_address_utf8_to_alabel(rcpt, NULL))
+ )
+ {
+ errno = ERRNO_EXPANDFAIL;
+ *failure_ptr = US"recipient";
+ done = FALSE;
+ }
else
#endif
diff --git a/test/scripts/4200-International/4200 b/test/scripts/4200-International/4200
index d15b67d19..dd5348ea3 100644
--- a/test/scripts/4200-International/4200
+++ b/test/scripts/4200-International/4200
@@ -33,10 +33,10 @@ original: bogus.\xD9\x84.com
conversion: ${utf8_domain_to_alabel:bogus.\xD9\x84.com}
golden: bogus.xn--ghb.com
-original: arabic.\xD9\x84\xD9\x8A\xD9\x87\xD9\x85\xD8\xA7\xD8\xA8\xD8\xAA\xD9\x83\xD9\x84\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A\xD8\x9F.com
+original: arabic.\xD9\x84\xD9\x8A\xD9\x87\xD9\x85\xD8\xA7\xD8\xA8\xD8\xAA\xD9\x83\xD9\x84\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A.com
conversion: ${utf8_domain_to_alabel:arabic.\xD9\x84\xD9\x8A\xD9\x87\xD9\x85\xD8\xA7\xD8\xA8\xD8\xAA\xD9\x83\xD9\x84\
-\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A\xD8\x9F.com}
-golden: arabic.xn--egbpdaj6bu4bxfgehfvwxn.com
+\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A.com}
+golden: arabic.xn--mgbcah9ar9a4efegftvvn.com
original simpl.chinese.\xE4\xBB\x96\xE4\xBB\xAC\xE4\xB8\xBA\xE4\xBB\x80\xE4\xB9\x88\xE4\xB8\x8D\xE8\xAF\xB4\xE4\xB8\xAD\xE6\x96\x87.com
conversion: ${utf8_domain_to_alabel:simpl.chinese.\xE4\xBB\x96\xE4\xBB\xAC\xE4\xB8\xBA\xE4\xBB\x80\
@@ -49,9 +49,7 @@ conversion: ${utf8_domain_to_alabel:trad.chinese.\xE4\xBB\x96\xE5\x80\x91\xE7\x8
golden: trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com
original czech.\x50\x72\x6F\xC4\x8D\x70\x72\x6F\x73\x74\xC4\x9B\x6E\x65\x6D\x6C\x75\x76\xC3\xAD\xC4\x8D\x65\x73\x6B\x79.com
-conversion: ${utf8_domain_to_alabel:czech.\x50\x72\x6F\xC4\x8D\x70\x72\x6F\x73\x74\xC4\x9B\x6E\x65\
-\x6D\x6C\x75\x76\xC3\xAD\xC4\x8D\x65\x73\x6B\x79.com}
-golden: czech.xn--Proprostnemluvesky-uyb24dma41a.com
+conversion: ${utf8_domain_to_alabel:czech.Pro\xC4\x8Dprost\xC4\x9Bnemluv\xC3\xAD\xC4\x8Desky.com}
original hebrew.\xD7\x9C\xD7\x9E\xD7\x94\xD7\x94\xD7\x9D\xD7\xA4\xD7\xA9\xD7\x95\xD7\x98\xD7\x9C\xD7\x90\xD7\x9E\xD7\x93\xD7\x91\xD7\xA8\xD7\x99\xD7\x9D\xD7\xA2\xD7\x91\xD7\xA8\xD7\x99\xD7\xAA.com
conversion: ${utf8_domain_to_alabel:hebrew.\xD7\x9C\xD7\x9E\xD7\x94\xD7\x94\xD7\x9D\xD7\xA4\xD7\xA9\
@@ -107,10 +105,10 @@ golden: xn--strae-oqa.de
a-label domain to utf-8:
conversion: ${utf8_domain_from_alabel:arab.xn--ghb.com}
-conversion: ${utf8_domain_from_alabel:arab.xn--egbpdaj6bu4bxfgehfvwxn.com}
+conversion: ${utf8_domain_from_alabel:arab.xn--mgbcah9ar9a4efegftvvn.com}
conversion: ${utf8_domain_from_alabel:simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com}
conversion: ${utf8_domain_from_alabel:trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com}
-conversion: ${utf8_domain_from_alabel:czech.xn--Proprostnemluvesky-uyb24dma41a.com}
+conversion: ${utf8_domain_from_alabel:czech.xn--proprostnemluvesky-uyb24dma41a.com}
conversion: ${utf8_domain_from_alabel:hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com}
conversion: ${utf8_domain_from_alabel:hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com}
conversion: ${utf8_domain_from_alabel:japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com}
diff --git a/test/stdout/4200 b/test/stdout/4200
index 02bc22dce..91e8ea144 100644
--- a/test/stdout/4200
+++ b/test/stdout/4200
@@ -27,9 +27,9 @@
> conversion: bogus.xn--ghb.com
> golden: bogus.xn--ghb.com
>
-> original: arabic.ليهمابتكلموشعربي؟.com
-> conversion: arabic.xn--egbpdaj6bu4bxfgehfvwxn.com
-> golden: arabic.xn--egbpdaj6bu4bxfgehfvwxn.com
+> original: arabic.ليهمابتكلموشعربي.com
+> conversion: arabic.xn--mgbcah9ar9a4efegftvvn.com
+> golden: arabic.xn--mgbcah9ar9a4efegftvvn.com
>
> original simpl.chinese.他们为什么不说中文.com
> conversion: simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com
@@ -41,7 +41,6 @@
>
> original czech.Pročprostěnemluvíčesky.com
> conversion: czech.xn--proprostnemluvesky-uyb24dma41a.com
-> golden: czech.xn--Proprostnemluvesky-uyb24dma41a.com
>
> original hebrew.למההםפשוטלאמדבריםעברית.com
> conversion: hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com
@@ -80,10 +79,10 @@
> a-label domain to utf-8:
>
> conversion: arab.ل.com
-> conversion: arab.ليهمابتكلموشعربي؟.com
+> conversion: arab.ليهمابتكلموشعربي.com
> conversion: simpl.chinese.他们为什么不说中文.com
> conversion: trad.chinese.他們爲什麽不說中文.com
-> conversion: czech.Pročprostěnemluvíčesky.com
+> conversion: czech.pročprostěnemluvíčesky.com
> conversion: hebrew.למההםפשוטלאמדבריםעברית.com
> conversion: hindi.यहलोगहिन्दीक्योंनहींबोलसकतेहैं.com
> conversion: japanese.なぜみんな日本語を話してくれないのか.com