From f846c8f531d5615c24a6d4dc0afb9815c4f766f7 Mon Sep 17 00:00:00 2001 From: Jeremy Harris Date: Sun, 26 Apr 2015 00:05:08 +0100 Subject: MIME: Support RFC2231 for filenames. Bug 466 Patch originally from Alexander Shikoff, heavily reworked by JH. --- doc/doc-docbook/spec.xfpt | 3 +- doc/doc-txt/ChangeLog | 3 + src/src/macros.h | 9 +- src/src/mime.c | 371 ++++++++++++++++++++++++++-------------- src/src/mime.h | 18 +- test/log/4000 | 3 + test/mail/4000.userx | 65 +++++++ test/scripts/4000-scanning/4000 | 28 +++ test/stdout/4000 | 11 ++ 9 files changed, 368 insertions(+), 143 deletions(-) diff --git a/doc/doc-docbook/spec.xfpt b/doc/doc-docbook/spec.xfpt index bd1c8bfdd..9b7ada823 100644 --- a/doc/doc-docbook/spec.xfpt +++ b/doc/doc-docbook/spec.xfpt @@ -31157,7 +31157,8 @@ containing the decoded data. This is perhaps the most important of the MIME variables. It contains a proposed filename for an attachment, if one was found in either the &'Content-Type:'& or &'Content-Disposition:'& headers. The filename will be -RFC2047 decoded, but no additional sanity checks are done. If no filename was +RFC2047 or RFC2231 decoded, but no additional sanity checks are done. + If no filename was found, this variable contains the empty string. .vitem &$mime_is_coverletter$& diff --git a/doc/doc-txt/ChangeLog b/doc/doc-txt/ChangeLog index 2421bab45..08fb50f3e 100644 --- a/doc/doc-txt/ChangeLog +++ b/doc/doc-txt/ChangeLog @@ -86,6 +86,9 @@ JH/24 Verification callouts now attempt to use TLS by default. HS/01 DNSSEC options (dnssec_require_domains, dnssec_request_domains) are generic router options now. The defaults didn't change. +JH/25 Bug 466: Add RFC2322 support for MIME attachment filenames. + Original patch from Alexander Shikoff, worked over by JH. + Exim version 4.85 ----------------- diff --git a/src/src/macros.h b/src/src/macros.h index a8ab4f7ae..12843b27d 100644 --- a/src/src/macros.h +++ b/src/src/macros.h @@ -12,6 +12,9 @@ a string as a text string. This is sometimes useful for debugging output. */ #define mac_string(s) # s #define mac_expanded_string(s) mac_string(s) +/* Number of elements of an array */ +#define nelem(arr) (sizeof(arr) / sizeof(*arr)) + /* When running in the test harness, the load average is fudged. */ @@ -156,11 +159,11 @@ into big_buffer_size and in some circumstances increased. It should be at least as long as the maximum path length. */ #if defined PATH_MAX && PATH_MAX > 16384 -#define BIG_BUFFER_SIZE PATH_MAX +# define BIG_BUFFER_SIZE PATH_MAX #elif defined MAXPATHLEN && MAXPATHLEN > 16384 -#define BIG_BUFFER_SIZE MAXPATHLEN +# define BIG_BUFFER_SIZE MAXPATHLEN #else -#define BIG_BUFFER_SIZE 16384 +# define BIG_BUFFER_SIZE 16384 #endif /* header size of pipe content diff --git a/src/src/mime.c b/src/src/mime.c index 51f00d368..aeab33d9c 100644 --- a/src/src/mime.c +++ b/src/src/mime.c @@ -21,7 +21,7 @@ uschar *mime_current_boundary = NULL; give info on detected "problems" in MIME encodings. Those are defined in mime.h. */ -void +static void mime_set_anomaly(int level, const char *text) { mime_anomaly_level = level; @@ -41,7 +41,7 @@ mime_set_anomaly(int level, const char *text) 0-255 - char to write */ -uschar * +static uschar * mime_decode_qp_char(uschar *qp_p, int *c) { uschar *initial_pos = qp_p; @@ -240,7 +240,7 @@ return size; } -FILE * +static FILE * mime_get_decode_file(uschar *pname, uschar *fname) { FILE *f = NULL; @@ -369,7 +369,8 @@ mime_content_size = (size_counter + 1023) / 1024; return OK; } -int + +static int mime_get_header(FILE *f, uschar *header) { int c = EOF; @@ -474,21 +475,84 @@ else } +static void +mime_vars_reset(void) +{ +mime_anomaly_level = 0; +mime_anomaly_text = NULL; +mime_boundary = NULL; +mime_charset = NULL; +mime_decoded_filename = NULL; +mime_filename = NULL; +mime_content_description = NULL; +mime_content_disposition = NULL; +mime_content_id = NULL; +mime_content_transfer_encoding = NULL; +mime_content_type = NULL; +mime_is_multipart = 0; +mime_content_size = 0; +} + + +/* Grab a parameter value, dealing with quoting. + +Arguments: + str Input string. Updated on return to point to terminating ; or NUL + +Return: + Allocated string with parameter value +*/ +static uschar * +mime_param_val(uschar ** sp) +{ +uschar * s = *sp; +uschar * val = NULL; +int size = 0, ptr = 0; + +/* debug_printf(" considering paramval '%s'\n", s); */ + +while (*s && *s != ';') /* ; terminates */ + if (*s == '"') + { + s++; /* skip opening " */ + while (*s && *s != '"') /* " protects ; */ + val = string_cat(val, &size, &ptr, s++, 1); + if (*s) s++; /* skip closing " */ + } + else + val = string_cat(val, &size, &ptr, s++, 1); +if (val) val[ptr] = '\0'; +*sp = s; +return val; +} + +static uschar * +mime_next_semicolon(uschar * s) +{ +while (*s && *s != ';') /* ; terminates */ + if (*s == '"') + { + s++; /* skip opening " */ + while (*s && *s != '"') /* " protects ; */ + s++; + if (*s) s++; /* skip closing " */ + } + else + s++; +return s; +} + + int mime_acl_check(uschar *acl, FILE *f, struct mime_boundary_context *context, - uschar **user_msgptr, uschar **log_msgptr) + uschar **user_msgptr, uschar **log_msgptr) { int rc = OK; -uschar *header = NULL; +uschar * header = NULL; struct mime_boundary_context nested_context; /* reserve a line buffer to work in */ -if (!(header = (uschar *)malloc(MIME_MAX_HEADER_SIZE+1))) - { - log_write(0, LOG_PANIC, - "MIME ACL: can't allocate %d bytes of memory.", MIME_MAX_HEADER_SIZE+1); - return DEFER; - } +header = store_get(MIME_MAX_HEADER_SIZE+1); /* Not actually used at the moment, but will be vital to fixing * some RFC 2046 nonconformance later... */ @@ -498,26 +562,12 @@ nested_context.parent = context; while(1) { /* reset all per-part mime variables */ - mime_anomaly_level = 0; - mime_anomaly_text = NULL; - mime_boundary = NULL; - mime_charset = NULL; - mime_decoded_filename = NULL; - mime_filename = NULL; - mime_content_description = NULL; - mime_content_disposition = NULL; - mime_content_id = NULL; - mime_content_transfer_encoding = NULL; - mime_content_type = NULL; - mime_is_multipart = 0; - mime_content_size = 0; - - /* - If boundary is null, we assume that *f is positioned on the start of headers (for example, - at the very beginning of a message. - If a boundary is given, we must first advance to it to reach the start of the next header - block. - */ + mime_vars_reset(); + + /* If boundary is null, we assume that *f is positioned on the start of + headers (for example, at the very beginning of a message. If a boundary is + given, we must first advance to it to reach the start of the next header + block. */ /* NOTE -- there's an error here -- RFC2046 specifically says to * check for outer boundaries. This code doesn't do that, and @@ -526,130 +576,189 @@ while(1) * (I have moved partway towards adding support, however, by adding * a "parent" field to my new boundary-context structure.) */ - if (context != NULL) + if (context) for (;;) { - while(fgets(CS header, MIME_MAX_HEADER_SIZE, f)) + if (!fgets(CS header, MIME_MAX_HEADER_SIZE, f)) { - /* boundary line must start with 2 dashes */ - if ( Ustrncmp(header, "--", 2) == 0 - && Ustrncmp(header+2, context->boundary, Ustrlen(context->boundary)) == 0) - { - /* found boundary */ - if (Ustrncmp((header+2+Ustrlen(context->boundary)), "--", 2) == 0) - { - /* END boundary found */ - debug_printf("End boundary found %s\n", context->boundary); - return rc; - } - else - debug_printf("Next part with boundary %s\n", context->boundary); + /* Hit EOF or read error. Ugh. */ + DEBUG(D_acl) debug_printf("Hit EOF ...\n"); + return rc; + } - /* can't use break here */ - goto DECODE_HEADERS; + /* boundary line must start with 2 dashes */ + if ( Ustrncmp(header, "--", 2) == 0 + && Ustrncmp(header+2, context->boundary, Ustrlen(context->boundary)) == 0 + ) + { /* found boundary */ + if (Ustrncmp((header+2+Ustrlen(context->boundary)), "--", 2) == 0) + { + /* END boundary found */ + DEBUG(D_acl) debug_printf("End boundary found %s\n", + context->boundary); + return rc; } + + DEBUG(D_acl) debug_printf("Next part with boundary %s\n", + context->boundary); + break; } - /* Hit EOF or read error. Ugh. */ - debug_printf("Hit EOF ...\n"); - return rc; } -DECODE_HEADERS: /* parse headers, set up expansion variables */ while (mime_get_header(f, header)) { - int i; - /* loop through header list */ - for (i = 0; i < mime_header_list_size; i++) - if (strncmpic(mime_header_list[i].name, - header, mime_header_list[i].namelen) == 0) - { /* found an interesting header */ - uschar * header_value; - int header_value_len; - uschar * p = header + mime_header_list[i].namelen; - - /* grab the value (normalize to lower case) - and copy to its corresponding expansion variable */ - while(*p != ';') - { - *p = tolower(*p); - p++; - } - header_value_len = p - (header + mime_header_list[i].namelen); - p = header + mime_header_list[i].namelen; - header_value = string_copyn(p, header_value_len); - debug_printf("Found %s MIME header, value is '%s'\n", - mime_header_list[i].name, header_value); - *((uschar **)(mime_header_list[i].value)) = header_value; + struct mime_header * mh; + + /* look for interesting headers */ + for (mh = mime_header_list; + mh < mime_header_list + mime_header_list_size; + mh++) if (strncmpic(mh->name, header, mh->namelen) == 0) + { + uschar * header_value = NULL; + int header_value_len = 0; + uschar * p = header + mh->namelen; + uschar * q; - /* make p point to the next character after the closing ';' */ - p += header_value_len+1; + /* grab the value (normalize to lower case) + and copy to its corresponding expansion variable */ + + for (q = p; *q != ';' && *q; q++) ; + *mh->value = string_copynlc(p, q-p); + DEBUG(D_acl) debug_printf("found %s MIME header, value is '%s'\n", + mh->name, *mh->value); + + if (*(p = q)) p++; /* jump past the ; */ + + { + uschar * mime_fname = NULL; + uschar * mime_fname_rfc2231 = NULL; + uschar * mime_filename_charset = NULL; + BOOL decoding_failed = FALSE; /* grab all param=value tags on the remaining line, check if they are interesting */ -NEXT_PARAM_SEARCH: + while (*p) { - /* debug_printf(" considering paramlist '%s'\n", p); */ mime_parameter * mp; - for (mp = mime_parameter_list; - mp < &mime_parameter_list[mime_parameter_list_size]; - mp++) - { - uschar * param_value = NULL; - - /* found an interesting parameter? */ - if (strncmpic(mp->name, p, mp->namelen) == 0) + + DEBUG(D_acl) debug_printf(" considering paramlist '%s'\n", p); + + if ( !mime_filename + && strncmpic("content-disposition:", header, 20) == 0 + && strncmpic("filename*", p, 9) == 0 + ) + { /* RFC 2231 filename */ + uschar * q; + + /* find value of the filename */ + p += 9; + while(*p != '=' && *p) p++; + if (*p) p++; /* p is filename or NUL */ + q = mime_param_val(&p); /* p now trailing ; or NUL */ + + if (q && *q) { - int size = 0; - int ptr = 0; + uschar * temp_string, * err_msg; + int slen; - /* yes, grab the value and copy to its corresponding expansion variable */ - p += mp->namelen; - while(*p && *p != ';') /* ; terminates */ - if (*p == '"') + /* build up an un-decoded filename over successive + filename*= parameters (for use when 2047 decode fails) */ + + mime_fname_rfc2231 = string_sprintf("%#s%s", + mime_fname_rfc2231, q); + + if (!decoding_failed) + { + int size; + if (!mime_filename_charset) { - p++; /* skip leading " */ - while(*p && *p != '"') /* " protects ; */ - param_value = string_cat(param_value, &size, &ptr, p++, 1); - if (*p) p++; /* skip trailing " */ + uschar * s = q; + + /* look for a ' in the "filename" */ + while(*s != '\'' && *s) s++; /* s is ' or NUL */ + + if ((size = s-q) > 0) + { + mime_filename_charset = string_copyn(q, size); + p = s; + + while(*p == '\'' && *p) p++; /* p is after ' */ + } } else - param_value = string_cat(param_value, &size, &ptr, p++, 1); - if (*p) p++; /* skip trailing ; */ + p = q; - if (param_value) - { - uschar * dummy; - param_value[ptr++] = '\0'; - - param_value = rfc2047_decode(param_value, - check_rfc2047_length, NULL, 32, NULL, &dummy); - debug_printf(" Found %s MIME parameter in %s header, " - "value is '%s'\n", mp->name, mime_header_list[i].name, - param_value); + temp_string = expand_string(string_sprintf( + "=?%s?Q?${sg{%s}{\\N%%([\\dA-Fa-f]{2})\\N}{=\\$1}}?=", + mime_filename_charset, p)); + slen = Ustrlen(temp_string); + + temp_string = rfc2047_decode(temp_string, FALSE, NULL, 32, + NULL, &err_msg); + size = Ustrlen(temp_string); + + if (size == slen) + decoding_failed = TRUE; + else + /* build up a decoded filename over successive + filename*= parameters */ + + mime_filename = mime_fname = mime_fname + ? string_sprintf("%s%s", mime_fname, temp_string) + : temp_string; } - *mp->value = param_value; - goto NEXT_PARAM_SEARCH; - } - } - /* There is something, but not one of our interesting parameters. - Advance to the next unquoted semicolon */ - while(*p && *p != ';') - if (*p == '"') - { - while(*++p && *p != '"') ; - if (*p) p++; + } } + else - p++; - if (*p) p++; + /* look for interesting parameters */ + for (mp = mime_parameter_list; + mp < mime_parameter_list + nelem(mime_parameter_list); + mp++ + ) if (strncmpic(mp->name, p, mp->namelen) == 0) + { + uschar * q; + uschar * dummy_errstr; + + /* grab the value and copy to its expansion variable */ + p += mp->namelen; + q = mime_param_val(&p); /* p now trailing ; or NUL */ + + *mp->value = q && *q + ? rfc2047_decode(q, check_rfc2047_length, NULL, 32, NULL, + &dummy_errstr) + : NULL; + DEBUG(D_acl) debug_printf( + " found %s MIME parameter in %s header, value '%s'\n", + mp->name, mh->name, *mp->value); + + break; /* done matching param names */ + } + + + /* There is something, but not one of our interesting parameters. + Advance past the next semicolon */ + p = mime_next_semicolon(p); + if (*p) p++; + } /* param scan on line */ + + if (strncmpic("content-disposition:", header, 20) == 0) + { + if (decoding_failed) mime_filename = mime_fname_rfc2231; + + DEBUG(D_acl) debug_printf( + " found %s MIME parameter in %s header, value is '%s'\n", + "filename", mh->name, mime_filename); + } } } - } + } /* set additional flag variables (easier access) */ - if ( (mime_content_type != NULL) && - (Ustrncmp(mime_content_type,"multipart",9) == 0) ) + if ( mime_content_type + && Ustrncmp(mime_content_type,"multipart",9) == 0 + ) mime_is_multipart = 1; /* Make a copy of the boundary pointer. @@ -680,7 +789,8 @@ NEXT_PARAM_SEARCH: (nested_context.boundary != NULL) && (Ustrncmp(mime_content_type,"multipart",9) == 0) ) { - debug_printf("Entering multipart recursion, boundary '%s'\n", nested_context.boundary); + DEBUG(D_acl) debug_printf("Entering multipart recursion, boundary '%s'\n", + nested_context.boundary); nested_context.context = context && context->context == MBC_ATTACHMENT @@ -726,19 +836,22 @@ NEXT_PARAM_SEARCH: { log_write(0, LOG_MAIN, "mime_regex acl condition warning - could not decode RFC822 MIME part to file."); - return DEFER; + rc = DEFER; + goto out; } mime_decoded_filename = NULL; } NO_RFC822: /* If the boundary of this instance is NULL, we are finished here */ - if (context == NULL) break; + if (!context) break; if (context->context == MBC_COVERLETTER_ONESHOT) context->context = MBC_ATTACHMENT; } +out: +mime_vars_reset(); return rc; } diff --git a/src/src/mime.h b/src/src/mime.h index af09f677d..c9acb246c 100644 --- a/src/src/mime.h +++ b/src/src/mime.h @@ -22,17 +22,17 @@ struct mime_boundary_context }; typedef struct mime_header { - uschar *name; - int namelen; - void *value; + uschar * name; + int namelen; + uschar ** value; } mime_header; static mime_header mime_header_list[] = { - { US"content-type:", 13, &mime_content_type }, - { US"content-disposition:", 20, &mime_content_disposition }, + { US"content-type:", 13, &mime_content_type }, + { US"content-disposition:", 20, &mime_content_disposition }, { US"content-transfer-encoding:", 26, &mime_content_transfer_encoding }, - { US"content-id:", 11, &mime_content_id }, - { US"content-description:", 20 , &mime_content_description } + { US"content-id:", 11, &mime_content_id }, + { US"content-description:", 20, &mime_content_description } }; static int mime_header_list_size = sizeof(mime_header_list)/sizeof(mime_header); @@ -48,12 +48,10 @@ typedef struct mime_parameter { static mime_parameter mime_parameter_list[] = { { US"name=", 5, &mime_filename }, { US"filename=", 9, &mime_filename }, - { US"charset=", 8, &mime_charset }, + { US"charset=", 8, &mime_charset }, { US"boundary=", 9, &mime_boundary } }; -static int mime_parameter_list_size = sizeof(mime_parameter_list)/sizeof(mime_parameter); - /* MIME Anomaly list */ #define MIME_ANOMALY_BROKEN_BASE64 2, "Broken BASE64 encoding detected" diff --git a/test/log/4000 b/test/log/4000 index e2c364fca..bb1a04fe9 100644 --- a/test/log/4000 +++ b/test/log/4000 @@ -13,3 +13,6 @@ 1999-03-02 09:44:33 10HmbB-0005vi-00 <= CALLER@myhost.test.ex U=CALLER P=local-esmtp S=sss id=20041217133501.GA3059@test.ex T="Nasty4" 1999-03-02 09:44:33 10HmbB-0005vi-00 => userx R=r1 T=t1 1999-03-02 09:44:33 10HmbB-0005vi-00 Completed +1999-03-02 09:44:33 10HmbC-0005vi-00 <= CALLER@myhost.test.ex U=CALLER P=local-esmtp S=sss id=20041217133501.GA3058@test.ex +1999-03-02 09:44:33 10HmbC-0005vi-00 => userx R=r1 T=t1 +1999-03-02 09:44:33 10HmbC-0005vi-00 Completed diff --git a/test/mail/4000.userx b/test/mail/4000.userx index dbbac1aac..f59b904f3 100644 --- a/test/mail/4000.userx +++ b/test/mail/4000.userx @@ -295,3 +295,68 @@ foobar --T4sUOijqQbZv57TR-- +From CALLER@myhost.test.ex Tue Mar 02 09:44:33 1999 +Received: from CALLER (helo=test.ex) + by myhost.test.ex with local-esmtp (Exim x.yz) + (envelope-from ) + id 10HmbC-0005vi-00 + for userx@test.ex; Tue, 2 Mar 1999 09:44:33 +0000 +Date: Tue, 2 Mar 1999 09:44:33 +0000 +Message-ID: <20041217133501.GA3058@test.ex> +Mime-Version: 1.0 +Content-Type: multipart/mixed; boundary="T4sUOijqQbZv57TR" +From: CALLER_NAME +X-0-content-type: multipart/mixed +X-0-filename: +X-0-charset: +X-0-boundary: T4sUOijqQbZv57TR +X-0-content-disposition: +X-0-content-transfer-encoding: +X-0-content-id: +X-0-content-description: +X-0-is-multipart: 1 +X-0-is-coverletter: 1 +X-0-is-rfc822: 0 +X-0-decode-filename: TESTSUITE/spool/scan/10HmbC-0005vi-00/10HmbC-0005vi-00-00000 +X-0-content-size: 1 +X-1-content-type: text/plain +X-1-filename: test ä test1 +X-1-charset: us-ascii +X-1-boundary: +X-1-content-disposition: attachment +X-1-content-transfer-encoding: +X-1-content-id: +X-1-content-description: +X-1-is-multipart: 0 +X-1-is-coverletter: 1 +X-1-is-rfc822: 0 +X-1-decode-filename: TESTSUITE/spool/scan/10HmbC-0005vi-00/10HmbC-0005vi-00-00001 +X-1-content-size: 1 +X-2-content-type: text/plain +X-2-filename: test ä test2 +X-2-charset: us-ascii +X-2-boundary: +X-2-content-disposition: attachment +X-2-content-transfer-encoding: +X-2-content-id: +X-2-content-description: +X-2-is-multipart: 0 +X-2-is-coverletter: 0 +X-2-is-rfc822: 0 +X-2-decode-filename: TESTSUITE/spool/scan/10HmbC-0005vi-00/10HmbC-0005vi-00-00002 +X-2-content-size: 1 + +--T4sUOijqQbZv57TR +Content-Type: text/plain; charset=us-ascii +Content-Disposition: attachment; filename="=?iso-8859-1?Q?test_=E4_test1?=" + +(content 1: filename is rfc2047 encoded) + +--T4sUOijqQbZv57TR +Content-Type: text/plain; charset=us-ascii +Content-Disposition: attachment; filename*=ISO-8859-1''%74%65%73%74%20%E4%20%74%65%73%74%32 + +(content 2: filename is rfc2231 encoded) + +--T4sUOijqQbZv57TR-- + diff --git a/test/scripts/4000-scanning/4000 b/test/scripts/4000-scanning/4000 index cd53007b2..eda235b10 100644 --- a/test/scripts/4000-scanning/4000 +++ b/test/scripts/4000-scanning/4000 @@ -185,3 +185,31 @@ foobar . quit **** +# +# This one has two attachments, using different encodings +exim -odi -bs +ehlo test.ex +mail from:<> +rcpt to: +data +Date: Fri, 17 Dec 2004 14:35:01 +0100 +Message-ID: <20041217133501.GA3058@test.ex> +Mime-Version: 1.0 +Content-Type: multipart/mixed; boundary="T4sUOijqQbZv57TR" + +--T4sUOijqQbZv57TR +Content-Type: text/plain; charset=us-ascii +Content-Disposition: attachment; filename="=?iso-8859-1?Q?test_=E4_test1?=" + +(content 1: filename is rfc2047 encoded) + +--T4sUOijqQbZv57TR +Content-Type: text/plain; charset=us-ascii +Content-Disposition: attachment; filename*=ISO-8859-1''%74%65%73%74%20%E4%20%74%65%73%74%32 + +(content 2: filename is rfc2231 encoded) + +--T4sUOijqQbZv57TR-- +. +quit +**** diff --git a/test/stdout/4000 b/test/stdout/4000 index 24b8e2868..95511480e 100644 --- a/test/stdout/4000 +++ b/test/stdout/4000 @@ -53,3 +53,14 @@ 354 Enter message, ending with "." on a line by itself 250 OK id=10HmbB-0005vi-00 221 myhost.test.ex closing connection +220 myhost.test.ex ESMTP Exim x.yz Tue, 2 Mar 1999 09:44:33 +0000 +250-myhost.test.ex Hello CALLER at test.ex +250-SIZE 52428800 +250-8BITMIME +250-PIPELINING +250 HELP +250 OK +250 Accepted +354 Enter message, ending with "." on a line by itself +250 OK id=10HmbC-0005vi-00 +221 myhost.test.ex closing connection -- cgit v1.2.3