From 7d5055276a22a91de71104775ade236051cebefc Mon Sep 17 00:00:00 2001 From: Jeremy Harris Date: Sun, 19 Jun 2022 17:15:25 +0100 Subject: Regex compile cacheing --- src/OS/Makefile-Base | 3 +- src/scripts/MakeLinks | 2 +- src/src/acl.c | 12 +- src/src/daemon.c | 28 +++- src/src/deliver.c | 2 +- src/src/dns.c | 2 +- src/src/drtables.c | 4 +- src/src/exim.c | 51 +------ src/src/expand.c | 229 +++++++++++++++--------------- src/src/filter.c | 298 +++++++++++++++++++--------------------- src/src/functions.h | 16 ++- src/src/globals.c | 1 + src/src/globals.h | 1 + src/src/header.c | 6 +- src/src/macros.h | 12 +- src/src/malware.c | 100 +++++++------- src/src/match.c | 129 ++++++++--------- src/src/queue.c | 4 +- src/src/readconf.c | 2 +- src/src/regex.c | 42 +++--- src/src/regex_cache.c | 245 +++++++++++++++++++++++++++++++++ src/src/rewrite.c | 6 +- src/src/routers/iplookup.c | 14 +- src/src/structs.h | 2 +- src/src/transports/appendfile.c | 28 +--- src/src/transports/smtp.c | 6 +- src/src/transports/tf_maildir.c | 13 +- src/src/verify.c | 24 ++-- 28 files changed, 749 insertions(+), 533 deletions(-) create mode 100644 src/src/regex_cache.c (limited to 'src') diff --git a/src/OS/Makefile-Base b/src/OS/Makefile-Base index 99a9f7e75..0c64d45d4 100644 --- a/src/OS/Makefile-Base +++ b/src/OS/Makefile-Base @@ -488,7 +488,7 @@ OBJ_EXIM = acl.o base64.o child.o crypt16.o daemon.o dbfn.o debug.o deliver.o \ filtertest.o globals.o dkim.o dkim_transport.o dnsbl.o hash.o \ header.o host.o host_address.o ip.o log.o lss.o match.o md5.o moan.o \ os.o parse.o priv.o queue.o \ - rda.o readconf.o receive.o retry.o rewrite.o rfc2047.o \ + rda.o readconf.o receive.o retry.o rewrite.o rfc2047.o regex_cache.o \ route.o search.o sieve.o smtp_in.o smtp_out.o spool_in.o spool_out.o \ std-crypto.o store.o string.o tls.o tod.o transport.o tree.o verify.o \ environment.o macro.o \ @@ -809,6 +809,7 @@ readconf.o: $(HDRS) readconf.c receive.o: $(HDRS) receive.c retry.o: $(HDRS) retry.c rewrite.o: $(HDRS) rewrite.c +regex_cache.o: $(HDRS) regex_cache.c rfc2047.o: $(HDRS) rfc2047.c route.o: $(HDRS) route.c search.o: $(HDRS) search.c diff --git a/src/scripts/MakeLinks b/src/scripts/MakeLinks index afc2fab32..471b3a369 100755 --- a/src/scripts/MakeLinks +++ b/src/scripts/MakeLinks @@ -104,7 +104,7 @@ for f in blob.h dbfunctions.h exim.h functions.h globals.h \ exim_dbmbuild.c exim_dbutil.c exim_lock.c expand.c filter.c filtertest.c \ globals.c hash.c header.c host.c host_address.c ip.c log.c lss.c match.c md5.c moan.c \ parse.c perl.c priv.c queue.c rda.c readconf.c receive.c retry.c rewrite.c \ - rfc2047.c route.c search.c setenv.c environment.c \ + regex_cache.c rfc2047.c route.c search.c setenv.c environment.c \ sieve.c smtp_in.c smtp_out.c spool_in.c spool_out.c std-crypto.c store.c \ string.c tls.c tlscert-gnu.c tlscert-openssl.c tls-cipher-stdname.c \ tls-gnu.c tls-openssl.c \ diff --git a/src/src/acl.c b/src/src/acl.c index a1694fcdd..0078aca7d 100644 --- a/src/src/acl.c +++ b/src/src/acl.c @@ -3125,8 +3125,9 @@ int sep = -'/'; for (; cb; cb = cb->next) { - const uschar *arg; + const uschar * arg; int control_type; + BOOL textonly = FALSE; /* The message and log_message items set up messages to be used in case of rejection. They are expanded later. */ @@ -3160,7 +3161,8 @@ for (; cb; cb = cb->next) if (!conditions[cb->type].expand_at_top) arg = cb->arg; - else if (!(arg = expand_string(cb->arg))) + + else if (!(arg = expand_string_2(cb->arg, &textonly))) { if (f.expand_string_forcedfail) continue; *log_msgptr = string_sprintf("failed to expand ACL string \"%s\": %s", @@ -3875,14 +3877,14 @@ for (; cb; cb = cb->next) return ERROR; } - rc = malware(ss, timeout); + rc = malware(ss, textonly, timeout); if (rc == DEFER && defer_ok) rc = FAIL; /* FAIL so that the message is passed to the next ACL */ break; } case ACLC_MIME_REGEX: - rc = mime_regex(&arg); + rc = mime_regex(&arg, textonly); break; #endif @@ -3913,7 +3915,7 @@ for (; cb; cb = cb->next) #ifdef WITH_CONTENT_SCAN case ACLC_REGEX: - rc = regex(&arg); + rc = regex(&arg, textonly); break; #endif diff --git a/src/src/daemon.c b/src/src/daemon.c index a5eb707d0..54725e07d 100644 --- a/src/src/daemon.c +++ b/src/src/daemon.c @@ -1132,6 +1132,20 @@ exim_exit(EXIT_SUCCESS); * Listener socket for local work prompts * *************************************************/ +ssize_t +daemon_client_sockname(struct sockaddr_un * sup, uschar ** sname) +{ +#ifdef EXIM_HAVE_ABSTRACT_UNIX_SOCKETS +sup->sun_path[0] = 0; /* Abstract local socket addr - Linux-specific? */ +return offsetof(struct sockaddr_un, sun_path) + 1 + + snprintf(sup->sun_path+1, sizeof(sup->sun_path)-1, "exim_%d", getpid()); +#else +*sname = string_sprintf("%s/p_%d", spool_directory, getpid()); +return offsetof(struct sockaddr_un, sun_path) + + snprintf(sup->sun_path, sizeof(sup->sun_path), "%s", sname); +#endif +} + ssize_t daemon_notifier_sockname(struct sockaddr_un * sup) { @@ -1216,7 +1230,11 @@ bad: static uschar queuerun_msgid[MESSAGE_ID_LENGTH+1]; -/* Return TRUE if a sigalrm should be emulated */ +/* The notifier socket has something to read. Pull the message from it, decode +and do the action. + +Return TRUE if a sigalrm should be emulated */ + static BOOL daemon_notification(void) { @@ -1266,7 +1284,6 @@ for (struct cmsghdr * cp = CMSG_FIRSTHDR(&msg); { DEBUG(D_queue_run) debug_printf("%s: sender creds pid %d uid %d gid %d\n", __FUNCTION__, (int)cr->pid, (int)cr->uid, (int)cr->gid); - return FALSE; } # elif defined(LOCAL_CREDS) /* BSD-ish */ struct sockcred * cr = (struct sockcred *) CMSG_DATA(cp); @@ -1274,7 +1291,6 @@ for (struct cmsghdr * cp = CMSG_FIRSTHDR(&msg); { DEBUG(D_queue_run) debug_printf("%s: sender creds pid ??? uid %d gid %d\n", __FUNCTION__, (int)cr->sc_uid, (int)cr->sc_gid); - return FALSE; } # endif break; @@ -1305,8 +1321,12 @@ switch (buf[0]) (const struct sockaddr *)&sa_un, msg.msg_namelen) < 0) log_write(0, LOG_MAIN|LOG_PANIC, "%s: sendto: %s\n", __FUNCTION__, strerror(errno)); - return FALSE; + break; } + + case NOTIFY_REGEX: + regex_at_daemon(buf); + break; } return FALSE; } diff --git a/src/src/deliver.c b/src/src/deliver.c index 8a9a174e3..725d0c872 100644 --- a/src/src/deliver.c +++ b/src/src/deliver.c @@ -7203,7 +7203,7 @@ local and remote LMTP deliveries. */ if (!regex_IGNOREQUOTA) regex_IGNOREQUOTA = - regex_must_compile(US"\\n250[\\s\\-]IGNOREQUOTA(\\s|\\n|$)", FALSE, TRUE); + regex_must_compile(US"\\n250[\\s\\-]IGNOREQUOTA(\\s|\\n|$)", MCS_NOFLAGS, TRUE); /* Handle local deliveries */ diff --git a/src/src/dns.c b/src/src/dns.c index 7d7ee0c04..4071c5822 100644 --- a/src/src/dns.c +++ b/src/src/dns.c @@ -1324,7 +1324,7 @@ dns_pattern_init(void) { if (check_dns_names_pattern[0] != 0 && !regex_check_dns_names) regex_check_dns_names = - regex_must_compile(check_dns_names_pattern, FALSE, TRUE); + regex_must_compile(check_dns_names_pattern, MCS_NOFLAGS, TRUE); } /* vi: aw ai sw=2 diff --git a/src/src/drtables.c b/src/src/drtables.c index 513ef6c4a..b2f2a4b33 100644 --- a/src/src/drtables.c +++ b/src/src/drtables.c @@ -728,8 +728,8 @@ if (!(dd = exim_opendir(LOOKUP_MODULE_DIR))) } else { - const pcre2_code *regex_islookupmod = regex_must_compile( - US"\\." DYNLIB_FN_EXT "$", FALSE, TRUE); + const pcre2_code * regex_islookupmod = regex_must_compile( + US"\\." DYNLIB_FN_EXT "$", MCS_NOFLAGS, TRUE); DEBUG(D_lookup) debug_printf("Loading lookup modules from %s\n", LOOKUP_MODULE_DIR); while ((ent = readdir(dd))) diff --git a/src/src/exim.c b/src/src/exim.c index 052c6bf5c..99a4faa8c 100644 --- a/src/src/exim.c +++ b/src/src/exim.c @@ -83,45 +83,6 @@ enum commandline_info { CMDINFO_NONE=0, -/************************************************* -* Compile regular expression and panic on fail * -*************************************************/ - -/* This function is called when failure to compile a regular expression leads -to a panic exit. In other cases, pcre_compile() is called directly. In many -cases where this function is used, the results of the compilation are to be -placed in long-lived store, so we temporarily reset the store management -functions that PCRE uses if the use_malloc flag is set. - -Argument: - pattern the pattern to compile - caseless TRUE if caseless matching is required - use_malloc TRUE if compile into malloc store - -Returns: pointer to the compiled pattern -*/ - -const pcre2_code * -regex_must_compile(const uschar * pattern, BOOL caseless, BOOL use_malloc) -{ -size_t offset; -int options = caseless ? PCRE_COPT|PCRE2_CASELESS : PCRE_COPT; -const pcre2_code * yield; -int err; - -if (!(yield = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED, options, - &err, &offset, use_malloc ? pcre_mlc_cmp_ctx : pcre_gen_cmp_ctx))) - { - uschar errbuf[128]; - pcre2_get_error_message(err, errbuf, sizeof(errbuf)); - log_write(0, LOG_MAIN|LOG_PANIC_DIE, "regular expression error: " - "%s at offset %ld while compiling %s", errbuf, (long)offset, pattern); - } - -return yield; -} - - static void pcre_init(void) { @@ -2019,7 +1980,7 @@ this here, because the -M options check their arguments for syntactic validity using mac_ismsgid, which uses this. */ regex_ismsgid = - regex_must_compile(US"^(?:[^\\W_]{6}-){2}[^\\W_]{2}$", FALSE, TRUE); + regex_must_compile(US"^(?:[^\\W_]{6}-){2}[^\\W_]{2}$", MCS_NOFLAGS, TRUE); /* Precompile the regular expression that is used for matching an SMTP error code, possibly extended, at the start of an error message. Note that the @@ -2027,14 +1988,14 @@ terminating whitespace character is included. */ regex_smtp_code = regex_must_compile(US"^\\d\\d\\d\\s(?:\\d\\.\\d\\d?\\d?\\.\\d\\d?\\d?\\s)?", - FALSE, TRUE); + MCS_NOFLAGS, TRUE); #ifdef WHITELIST_D_MACROS /* Precompile the regular expression used to filter the content of macros given to -D for permissibility. */ regex_whitelisted_macro = - regex_must_compile(US"^[A-Za-z0-9_/.-]*$", FALSE, TRUE); + regex_must_compile(US"^[A-Za-z0-9_/.-]*$", MCS_NOFLAGS, TRUE); #endif for (i = 0; i < REGEX_VARS; i++) regex_vars[i] = NULL; @@ -2252,7 +2213,7 @@ on the second character (the one after '-'), to save some effort. */ -bdf: Ditto, but in the foreground. */ case 'd': - f.daemon_listen = TRUE; + f.daemon_listen = f.daemon_scion = TRUE; if (*argrest == 'f') f.background_daemon = FALSE; else if (*argrest) badarg = TRUE; break; @@ -2512,7 +2473,7 @@ on the second character (the one after '-'), to save some effort. */ case 'w': f.inetd_wait_mode = TRUE; f.background_daemon = FALSE; - f.daemon_listen = TRUE; + f.daemon_listen = f.daemon_scion = TRUE; if (*argrest) if ((inetd_wait_timeout = readconf_readtime(argrest, 0, FALSE)) <= 0) exim_fail("exim: bad time value %s: abandoned\n", argv[i]); @@ -5039,7 +5000,7 @@ for (i = 0;;) if (gecos_pattern && gecos_name) { const pcre2_code *re; - re = regex_must_compile(gecos_pattern, FALSE, TRUE); /* Use malloc */ + re = regex_must_compile(gecos_pattern, MCS_NOFLAGS, TRUE); /* Use malloc */ if (regex_match_and_setup(re, name, 0, -1)) { diff --git a/src/src/expand.c b/src/src/expand.c index 9b54ccad1..4d7dc7219 100644 --- a/src/src/expand.c +++ b/src/src/expand.c @@ -14,7 +14,7 @@ /* Recursively called function */ -static uschar *expand_string_internal(const uschar *, BOOL, const uschar **, BOOL, BOOL, BOOL *); +static uschar *expand_string_internal(const uschar *, BOOL, const uschar **, BOOL, BOOL, BOOL *, BOOL *); static int_eximarith_t expanded_string_integer(const uschar *, BOOL); #ifdef STAND_ALONE @@ -1748,9 +1748,7 @@ uschar buf[16]; int fd; ssize_t len; const uschar * where; -#ifndef EXIM_HAVE_ABSTRACT_UNIX_SOCKETS uschar * sname; -#endif if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0) { @@ -1758,17 +1756,9 @@ if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0) return NULL; } -#ifdef EXIM_HAVE_ABSTRACT_UNIX_SOCKETS -sa_un.sun_path[0] = 0; /* Abstract local socket addr - Linux-specific? */ -len = offsetof(struct sockaddr_un, sun_path) + 1 - + snprintf(sa_un.sun_path+1, sizeof(sa_un.sun_path)-1, "exim_%d", getpid()); -#else -sname = string_sprintf("%s/p_%d", spool_directory, getpid()); -len = offsetof(struct sockaddr_un, sun_path) - + snprintf(sa_un.sun_path, sizeof(sa_un.sun_path), "%s", sname); -#endif +len = daemon_client_sockname(&sa_un, &sname); -if (bind(fd, (const struct sockaddr *)&sa_un, len) < 0) +if (bind(fd, (const struct sockaddr *)&sa_un, (socklen_t)len) < 0) { where = US"bind"; goto bad; } #ifdef notdef @@ -2108,7 +2098,9 @@ Arguments: check_end if TRUE, check for final '}' name name of item, for error message resetok if not NULL, pointer to flag - write FALSE if unsafe to reset - the store. + the store + textonly_p if not NULL, pointer to bitmask of which subs were text-only + (did not change when expended) Returns: 0 OK; string pointer updated 1 curly bracketing error (too few arguments) @@ -2118,13 +2110,15 @@ Returns: 0 OK; string pointer updated static int read_subs(uschar **sub, int n, int m, const uschar **sptr, BOOL skipping, - BOOL check_end, uschar *name, BOOL *resetok) + BOOL check_end, uschar *name, BOOL *resetok, unsigned * textonly_p) { -const uschar *s = *sptr; +const uschar * s = *sptr; +unsigned textonly_l = 0; Uskip_whitespace(&s); for (int i = 0; i < n; i++) { + BOOL textonly; if (*s != '{') { if (i < m) @@ -2136,9 +2130,11 @@ for (int i = 0; i < n; i++) sub[i] = NULL; break; } - if (!(sub[i] = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, resetok))) + if (!(sub[i] = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, resetok, + textonly_p ? &textonly : NULL))) return 3; if (*s++ != '}') return 1; + if (textonly_p && textonly) textonly_l |= BIT(i); Uskip_whitespace(&s); } if (check_end && *s++ != '}') @@ -2153,6 +2149,7 @@ if (check_end && *s++ != '}') return 1; } +if (textonly_p) *textonly_p = textonly_l; *sptr = s; return 0; } @@ -2513,11 +2510,11 @@ Returns: a pointer to the first character after the condition, or */ static const uschar * -eval_condition(const uschar *s, BOOL *resetok, BOOL *yield) +eval_condition(const uschar * s, BOOL * resetok, BOOL * yield) { BOOL testfor = TRUE; BOOL tempcond, combined_cond; -BOOL *subcondptr; +BOOL * subcondptr; BOOL sub2_honour_dollar = TRUE; BOOL is_forany, is_json, is_jsons; int rc, cond_type; @@ -2525,7 +2522,8 @@ int_eximarith_t num[2]; struct stat statbuf; uschar * opname; uschar name[256]; -const uschar *sub[10]; +const uschar * sub[10]; +unsigned sub_textonly = 0; for (;;) if (Uskip_whitespace(&s) == '!') { testfor = !testfor; s++; } else break; @@ -2619,8 +2617,12 @@ switch(cond_type = identify_operator(&s, &opname)) if (Uskip_whitespace(&s) != '{') goto COND_FAILED_CURLY_START; /* }-for-text-editors */ - sub[0] = expand_string_internal(s+1, TRUE, &s, yield == NULL, TRUE, resetok); - if (!sub[0]) return NULL; + { + BOOL textonly; + sub[0] = expand_string_internal(s+1, TRUE, &s, yield == NULL, TRUE, resetok, &textonly); + if (!sub[0]) return NULL; + if (textonly) sub_textonly |= BIT(0); + } /* {-for-text-editors */ if (*s++ != '}') goto COND_FAILED_CURLY_END; @@ -2718,7 +2720,7 @@ switch(cond_type = identify_operator(&s, &opname)) if (*s++ != '{') goto COND_FAILED_CURLY_START; /*}*/ switch(read_subs(sub, nelem(sub), 1, - &s, yield == NULL, TRUE, name, resetok)) + &s, yield == NULL, TRUE, name, resetok, NULL)) { case 1: expand_string_message = US"too few arguments or bracketing " "error for acl"; @@ -2770,7 +2772,7 @@ switch(cond_type = identify_operator(&s, &opname)) Uskip_whitespace(&s); if (*s++ != '{') goto COND_FAILED_CURLY_START; /* }-for-text-editors */ switch(read_subs(sub, nelem(sub), 2, &s, yield == NULL, TRUE, name, - resetok)) + resetok, NULL)) { case 1: expand_string_message = US"too few arguments or bracketing " "error for saslauthd"; @@ -2838,9 +2840,11 @@ switch(cond_type = identify_operator(&s, &opname)) for (int i = 0; i < 2; i++) { + BOOL textonly; /* Sometimes, we don't expand substrings; too many insecure configurations created using match_address{}{} and friends, where the second param includes information from untrustworthy sources. */ + /*XXX is this moot given taint-tracking? */ BOOL honour_dollar = TRUE; if ((i > 0) && !sub2_honour_dollar) honour_dollar = FALSE; @@ -2853,8 +2857,9 @@ switch(cond_type = identify_operator(&s, &opname)) return NULL; } if (!(sub[i] = expand_string_internal(s+1, TRUE, &s, yield == NULL, - honour_dollar, resetok))) + honour_dollar, resetok, &textonly))) return NULL; + if (textonly) sub_textonly |= BIT(i); DEBUG(D_expand) if (i == 1 && !sub2_honour_dollar && Ustrchr(sub[1], '$')) debug_printf_indent("WARNING: the second arg is NOT expanded," " for security reasons\n"); @@ -2934,19 +2939,11 @@ switch(cond_type = identify_operator(&s, &opname)) case ECOND_MATCH: /* Regular expression match */ { - const pcre2_code * re; - PCRE2_SIZE offset; - int err; - - if (!(re = pcre2_compile((PCRE2_SPTR)sub[1], PCRE2_ZERO_TERMINATED, - PCRE_COPT, &err, &offset, pcre_gen_cmp_ctx))) - { - uschar errbuf[128]; - pcre2_get_error_message(err, errbuf, sizeof(errbuf)); - expand_string_message = string_sprintf("regular expression error in " - "\"%s\": %s at offset %ld", sub[1], errbuf, (long)offset); + const pcre2_code * re = regex_compile(sub[1], + sub_textonly & BIT(1) ? MCS_CACHEABLE : MCS_NOFLAGS, + &expand_string_message, pcre_gen_cmp_ctx); + if (!re) return NULL; - } tempcond = regex_match_and_setup(re, sub[0], 0, -1); break; @@ -3264,7 +3261,7 @@ switch(cond_type = identify_operator(&s, &opname)) Uskip_whitespace(&s); if (*s++ != '{') goto COND_FAILED_CURLY_START; /* }-for-text-editors */ - if (!(sub[0] = expand_string_internal(s, TRUE, &s, yield == NULL, TRUE, resetok))) + if (!(sub[0] = expand_string_internal(s, TRUE, &s, yield == NULL, TRUE, resetok, NULL))) return NULL; /* {-for-text-editors */ if (*s++ != '}') goto COND_FAILED_CURLY_END; @@ -3352,7 +3349,7 @@ switch(cond_type = identify_operator(&s, &opname)) if (Uskip_whitespace(&s) != '{') goto COND_FAILED_CURLY_START; /* }-for-text-editors */ ourname = cond_type == ECOND_BOOL_LAX ? US"bool_lax" : US"bool"; - switch(read_subs(sub_arg, 1, 1, &s, yield == NULL, FALSE, ourname, resetok)) + switch(read_subs(sub_arg, 1, 1, &s, yield == NULL, FALSE, ourname, resetok, NULL)) { case 1: expand_string_message = string_sprintf( "too few arguments or bracketing error for %s", @@ -3420,7 +3417,7 @@ switch(cond_type = identify_operator(&s, &opname)) uschar cksum[4]; BOOL boolvalue = FALSE; - switch(read_subs(sub, 2, 2, CUSS &s, yield == NULL, FALSE, name, resetok)) + switch(read_subs(sub, 2, 2, CUSS &s, yield == NULL, FALSE, name, resetok, NULL)) { case 1: expand_string_message = US"too few arguments or bracketing " "error for inbound_srs"; @@ -3431,7 +3428,7 @@ switch(cond_type = identify_operator(&s, &opname)) /* Match the given local_part against the SRS-encoded pattern */ re = regex_must_compile(US"^(?i)SRS0=([^=]+)=([A-Z2-7]+)=([^=]*)=(.*)$", - TRUE, FALSE); + MCS_CASELESS | MCS_CACHEABLE, FALSE); md = pcre2_match_data_create(4+1, pcre_gen_ctx); if (pcre2_match(re, sub[0], PCRE2_ZERO_TERMINATED, 0, PCRE_EOPT, md, pcre_gen_mtc_ctx) < 0) @@ -3677,7 +3674,7 @@ if (*s++ != '{') want this string. Set skipping in the call in the fail case (this will always be the case if we were already skipping). */ -sub1 = expand_string_internal(s, TRUE, &s, !yes, TRUE, resetok); +sub1 = expand_string_internal(s, TRUE, &s, !yes, TRUE, resetok, NULL); if (sub1 == NULL && (yes || !f.expand_string_forcedfail)) goto FAILED; f.expand_string_forcedfail = FALSE; if (*s++ != '}') @@ -3706,7 +3703,7 @@ already skipping. */ if (skip_whitespace(&s) == '{') { - sub2 = expand_string_internal(s+1, TRUE, &s, yes || skipping, TRUE, resetok); + sub2 = expand_string_internal(s+1, TRUE, &s, yes || skipping, TRUE, resetok, NULL); if (sub2 == NULL && (!yes || !f.expand_string_forcedfail)) goto FAILED; f.expand_string_forcedfail = FALSE; if (*s++ != '}') @@ -4445,6 +4442,7 @@ Arguments: FALSE if it's just another character resetok_p if not NULL, pointer to flag - write FALSE if unsafe to reset the store. + textonly_p if not NULL, pointer to flag - write bool for only-met-text Returns: NULL if expansion fails: expand_string_forcedfail is set TRUE if failure was forced @@ -4454,7 +4452,7 @@ Returns: NULL if expansion fails: static uschar * expand_string_internal(const uschar *string, BOOL ket_ends, const uschar **left, - BOOL skipping, BOOL honour_dollar, BOOL *resetok_p) + BOOL skipping, BOOL honour_dollar, BOOL *resetok_p, BOOL * textonly_p) { rmark reset_point = store_mark(); gstring * yield = string_get(Ustrlen(string) + 64); @@ -4462,7 +4460,7 @@ int item_type; const uschar * s = string; const uschar * save_expand_nstring[EXPAND_MAXN+1]; int save_expand_nlength[EXPAND_MAXN+1]; -BOOL resetok = TRUE, first = TRUE; +BOOL resetok = TRUE, first = TRUE, textonly = TRUE; expand_level++; f.expand_string_forcedfail = FALSE; @@ -4552,6 +4550,7 @@ while (*s) s += i; continue; } + textonly = FALSE; /* No { after the $ - must be a plain name or a number for string match variable. There has to be a fudge for variables that are the @@ -4714,7 +4713,7 @@ while (*s) int rc; switch(read_subs(sub, nelem(sub), 1, &s, skipping, TRUE, name, - &resetok)) + &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -4750,7 +4749,7 @@ while (*s) uschar * sub_arg[1]; switch(read_subs(sub_arg, nelem(sub_arg), 1, &s, skipping, TRUE, name, - &resetok)) + &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -4837,7 +4836,7 @@ while (*s) uschar *encoded; switch(read_subs(sub_arg, nelem(sub_arg), 1, &s, skipping, TRUE, name, - &resetok)) + &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -4897,7 +4896,7 @@ while (*s) if (Uskip_whitespace(&s) == '{') /*}*/ { - key = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok); + key = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok, NULL); if (!key) goto EXPAND_FAILED; /*{{*/ if (*s++ != '}') { @@ -4967,7 +4966,7 @@ while (*s) expand_string_message = US"missing '{' for lookup file-or-query arg"; goto EXPAND_FAILED_CURLY; /*}}*/ } - if (!(filename = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok))) + if (!(filename = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok, NULL))) goto EXPAND_FAILED; /*{{*/ if (*s++ != '}') @@ -5071,7 +5070,7 @@ while (*s) } switch(read_subs(sub_arg, EXIM_PERL_MAX_ARGS + 1, 1, &s, skipping, TRUE, - name, &resetok)) + name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -5141,7 +5140,7 @@ while (*s) { uschar * sub_arg[3], * p, * domain; - switch(read_subs(sub_arg, 3, 2, &s, skipping, TRUE, name, &resetok)) + switch(read_subs(sub_arg, 3, 2, &s, skipping, TRUE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -5214,15 +5213,16 @@ while (*s) prvscheck_address = NULL; prvscheck_keynum = NULL; - switch(read_subs(sub_arg, 1, 1, &s, skipping, FALSE, name, &resetok)) + switch(read_subs(sub_arg, 1, 1, &s, skipping, FALSE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: case 3: goto EXPAND_FAILED; } - re = regex_must_compile(US"^prvs\\=([0-9])([0-9]{3})([A-F0-9]{6})\\=(.+)\\@(.+)$", - TRUE,FALSE); + re = regex_must_compile( + US"^prvs\\=([0-9])([0-9]{3})([A-F0-9]{6})\\=(.+)\\@(.+)$", + MCS_CASELESS | MCS_CACHEABLE, FALSE); if (regex_match_and_setup(re,sub_arg[0],0,-1)) { @@ -5232,11 +5232,14 @@ while (*s) uschar * hash = string_copyn(expand_nstring[3],expand_nlength[3]); uschar * domain = string_copyn(expand_nstring[5],expand_nlength[5]); - DEBUG(D_expand) debug_printf_indent("prvscheck localpart: %s\n", local_part); - DEBUG(D_expand) debug_printf_indent("prvscheck key number: %s\n", key_num); - DEBUG(D_expand) debug_printf_indent("prvscheck daystamp: %s\n", daystamp); - DEBUG(D_expand) debug_printf_indent("prvscheck hash: %s\n", hash); - DEBUG(D_expand) debug_printf_indent("prvscheck domain: %s\n", domain); + DEBUG(D_expand) + { + debug_printf_indent("prvscheck localpart: %s\n", local_part); + debug_printf_indent("prvscheck key number: %s\n", key_num); + debug_printf_indent("prvscheck daystamp: %s\n", daystamp); + debug_printf_indent("prvscheck hash: %s\n", hash); + debug_printf_indent("prvscheck domain: %s\n", domain); + } /* Set up expansion variables */ g = string_cat (NULL, local_part); @@ -5246,7 +5249,7 @@ while (*s) prvscheck_keynum = string_copy(key_num); /* Now expand the second argument */ - switch(read_subs(sub_arg, 1, 1, &s, skipping, FALSE, name, &resetok)) + switch(read_subs(sub_arg, 1, 1, &s, skipping, FALSE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -5257,7 +5260,6 @@ while (*s) p = prvs_hmac_sha1(prvscheck_address, sub_arg[0], prvscheck_keynum, daystamp); - if (!p) { expand_string_message = US"hmac-sha1 conversion failed"; @@ -5300,7 +5302,7 @@ while (*s) /* Now expand the final argument. We leave this till now so that it can include $prvscheck_result. */ - switch(read_subs(sub_arg, 1, 0, &s, skipping, TRUE, name, &resetok)) + switch(read_subs(sub_arg, 1, 0, &s, skipping, TRUE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -5321,7 +5323,7 @@ while (*s) We need to make sure all subs are expanded first, so as to skip over the entire item. */ - switch(read_subs(sub_arg, 2, 1, &s, skipping, TRUE, name, &resetok)) + switch(read_subs(sub_arg, 2, 1, &s, skipping, TRUE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -5345,7 +5347,7 @@ while (*s) goto EXPAND_FAILED; } - switch(read_subs(sub_arg, 2, 1, &s, skipping, TRUE, name, &resetok)) + switch(read_subs(sub_arg, 2, 1, &s, skipping, TRUE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -5386,7 +5388,7 @@ while (*s) /* Read up to 4 arguments, but don't do the end of item check afterwards, because there may be a string for expansion on failure. */ - switch(read_subs(sub_arg, 4, 2, &s, skipping, FALSE, name, &resetok)) + switch(read_subs(sub_arg, 4, 2, &s, skipping, FALSE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: /* Won't occur: no end check */ @@ -5474,7 +5476,7 @@ while (*s) if (*s == '{') /*}*/ { - if (!expand_string_internal(s+1, TRUE, &s, TRUE, TRUE, &resetok)) + if (!expand_string_internal(s+1, TRUE, &s, TRUE, TRUE, &resetok, NULL)) goto EXPAND_FAILED; /*{*/ if (*s++ != '}') { /*{*/ @@ -5500,7 +5502,7 @@ while (*s) SOCK_FAIL: if (*s != '{') goto EXPAND_FAILED; /*}*/ DEBUG(D_any) debug_printf("%s\n", expand_string_message); - if (!(arg = expand_string_internal(s+1, TRUE, &s, FALSE, TRUE, &resetok))) + if (!(arg = expand_string_internal(s+1, TRUE, &s, FALSE, TRUE, &resetok, NULL))) goto EXPAND_FAILED; yield = string_cat(yield, arg); /*{*/ if (*s++ != '}') @@ -5551,14 +5553,14 @@ while (*s) s++; if (late_expand) /* this is the default case */ - { + { /*{*/ int n = Ustrcspn(s, "}"); arg = skipping ? NULL : string_copyn(s, n); s += n; } else { - if (!(arg = expand_string_internal(s, TRUE, &s, skipping, TRUE, &resetok))) + if (!(arg = expand_string_internal(s, TRUE, &s, skipping, TRUE, &resetok, NULL))) goto EXPAND_FAILED; Uskip_whitespace(&s); } @@ -5667,7 +5669,7 @@ while (*s) int o2m; uschar * sub[3]; - switch(read_subs(sub, 3, 3, &s, skipping, TRUE, name, &resetok)) + switch(read_subs(sub, 3, 3, &s, skipping, TRUE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -5709,7 +5711,7 @@ while (*s) sub[2] = NULL; switch(read_subs(sub, (item_type == EITEM_LENGTH)? 2:3, 2, &s, skipping, - TRUE, name, &resetok)) + TRUE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -5784,7 +5786,7 @@ while (*s) uschar innerkey[MAX_HASHBLOCKLEN]; uschar outerkey[MAX_HASHBLOCKLEN]; - switch (read_subs(sub, 3, 3, &s, skipping, TRUE, name, &resetok)) + switch (read_subs(sub, 3, 3, &s, skipping, TRUE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -5872,14 +5874,14 @@ while (*s) { const pcre2_code * re; int moffset, moffsetextra, slen; - PCRE2_SIZE roffset; pcre2_match_data * md; - int err, emptyopt; + int emptyopt; uschar * subject, * sub[3]; int save_expand_nmax = save_expand_strings(save_expand_nstring, save_expand_nlength); + unsigned sub_textonly = 0; - switch(read_subs(sub, 3, 3, &s, skipping, TRUE, name, &resetok)) + switch(read_subs(sub, 3, 3, &s, skipping, TRUE, name, &resetok, &sub_textonly)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -5889,15 +5891,12 @@ while (*s) /* Compile the regular expression */ - if (!(re = pcre2_compile((PCRE2_SPTR)sub[1], PCRE2_ZERO_TERMINATED, - PCRE_COPT, &err, &roffset, pcre_gen_cmp_ctx))) - { - uschar errbuf[128]; - pcre2_get_error_message(err, errbuf, sizeof(errbuf)); - expand_string_message = string_sprintf("regular expression error in " - "\"%s\": %s at offset %ld", sub[1], errbuf, (long)roffset); + re = regex_compile(sub[1], + sub_textonly & BIT(1) ? MCS_CACHEABLE : MCS_NOFLAGS, + &expand_string_message, pcre_gen_cmp_ctx); + if (!re) goto EXPAND_FAILED; - } + md = pcre2_match_data_create(EXPAND_MAXN + 1, pcre_gen_ctx); /* Now run a loop to do the substitutions as often as necessary. It ends @@ -6016,7 +6015,7 @@ while (*s) { for (int j = 5; j > 0 && *s == '{'; j--) /*'}'*/ { - if (!expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok)) + if (!expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok, NULL)) goto EXPAND_FAILED; /*'{'*/ if (*s++ != '}') { @@ -6043,7 +6042,7 @@ while (*s) { if (Uskip_whitespace(&s) == '{') /*'}'*/ { - if (!(sub[i] = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok))) + if (!(sub[i] = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok, NULL))) goto EXPAND_FAILED; /*'{'*/ if (*s++ != '}') { @@ -6237,7 +6236,7 @@ while (*s) goto EXPAND_FAILED_CURLY; } - sub[i] = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok); + sub[i] = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok, NULL); if (!sub[i]) goto EXPAND_FAILED; /*{{*/ if (*s++ != '}') { @@ -6318,7 +6317,7 @@ while (*s) case EITEM_LISTQUOTE: { uschar * sub[2]; - switch(read_subs(sub, 2, 2, &s, skipping, TRUE, name, &resetok)) + switch(read_subs(sub, 2, 2, &s, skipping, TRUE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -6347,7 +6346,7 @@ while (*s) expand_string_message = US"missing '{' for field arg of certextract"; goto EXPAND_FAILED_CURLY; /*}*/ } - sub[0] = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok); + sub[0] = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok, NULL); if (!sub[0]) goto EXPAND_FAILED; /*{{*/ if (*s++ != '}') { @@ -6379,7 +6378,7 @@ while (*s) "be a certificate variable"; goto EXPAND_FAILED; } - sub[1] = expand_string_internal(s+1, TRUE, &s, skipping, FALSE, &resetok); + sub[1] = expand_string_internal(s+1, TRUE, &s, skipping, FALSE, &resetok, NULL); if (!sub[1]) goto EXPAND_FAILED; /*{{*/ if (*s++ != '}') { @@ -6434,7 +6433,7 @@ while (*s) goto EXPAND_FAILED_CURLY; /*}*/ } - if (!(list = expand_string_internal(s, TRUE, &s, skipping, TRUE, &resetok))) + if (!(list = expand_string_internal(s, TRUE, &s, skipping, TRUE, &resetok, NULL))) goto EXPAND_FAILED; /*{{*/ if (*s++ != '}') { @@ -6452,7 +6451,7 @@ while (*s) expand_string_message = US"missing '{' for second arg of reduce"; goto EXPAND_FAILED_CURLY; /*}*/ } - t = expand_string_internal(s, TRUE, &s, skipping, TRUE, &resetok); + t = expand_string_internal(s, TRUE, &s, skipping, TRUE, &resetok, NULL); if (!t) goto EXPAND_FAILED; lookup_value = t; /*{{*/ if (*s++ != '}') @@ -6479,7 +6478,7 @@ while (*s) the normal internal expansion function. */ if (item_type != EITEM_FILTER) - temp = expand_string_internal(s, TRUE, &s, TRUE, TRUE, &resetok); + temp = expand_string_internal(s, TRUE, &s, TRUE, TRUE, &resetok, NULL); else if ((temp = eval_condition(expr, &resetok, NULL))) s = temp; @@ -6541,7 +6540,7 @@ while (*s) else { - uschar * t = expand_string_internal(expr, TRUE, NULL, skipping, TRUE, &resetok); + uschar * t = expand_string_internal(expr, TRUE, NULL, skipping, TRUE, &resetok, NULL); temp = t; if (!temp) { @@ -6632,7 +6631,7 @@ while (*s) goto EXPAND_FAILED_CURLY; /*}*/ } - srclist = expand_string_internal(s, TRUE, &s, skipping, TRUE, &resetok); + srclist = expand_string_internal(s, TRUE, &s, skipping, TRUE, &resetok, NULL); if (!srclist) goto EXPAND_FAILED; /*{{*/ if (*s++ != '}') { @@ -6647,7 +6646,7 @@ while (*s) goto EXPAND_FAILED_CURLY; /*}*/ } - cmp = expand_string_internal(s, TRUE, &s, skipping, FALSE, &resetok); + cmp = expand_string_internal(s, TRUE, &s, skipping, FALSE, &resetok, NULL); if (!cmp) goto EXPAND_FAILED; /*{{*/ if (*s++ != '}') { @@ -6682,7 +6681,7 @@ while (*s) } xtract = s; - if (!(tmp = expand_string_internal(s, TRUE, &s, TRUE, TRUE, &resetok))) + if (!(tmp = expand_string_internal(s, TRUE, &s, TRUE, TRUE, &resetok, NULL))) goto EXPAND_FAILED; xtract = string_copyn(xtract, s - xtract); /*{{*/ @@ -6710,7 +6709,7 @@ while (*s) /* extract field for comparisons */ iterate_item = srcitem; if ( !(srcfield = expand_string_internal(xtract, FALSE, NULL, FALSE, - TRUE, &resetok)) + TRUE, &resetok, NULL)) || !*srcfield) { expand_string_message = string_sprintf( @@ -6816,7 +6815,7 @@ while (*s) } switch(read_subs(argv, EXPAND_DLFUNC_MAX_ARGS + 2, 2, &s, skipping, - TRUE, name, &resetok)) + TRUE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -6892,7 +6891,7 @@ while (*s) if (Uskip_whitespace(&s) != '{') /*}*/ goto EXPAND_FAILED; - key = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok); + key = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok, NULL); if (!key) goto EXPAND_FAILED; /*{{*/ if (*s++ != '}') { @@ -6927,7 +6926,7 @@ while (*s) gstring * g = NULL; BOOL quoted = FALSE; - switch (read_subs(sub, 3, 3, CUSS &s, skipping, TRUE, name, &resetok)) + switch (read_subs(sub, 3, 3, CUSS &s, skipping, TRUE, name, &resetok, NULL)) { case 1: goto EXPAND_FAILED_CURLY; case 2: @@ -7057,7 +7056,7 @@ NOT_ITEM: ; { const uschar * s1 = s; sub = expand_string_internal(s+2, TRUE, &s1, skipping, - FALSE, &resetok); + FALSE, &resetok, NULL); if (!sub) goto EXPAND_FAILED; /*{*/ if (*s1 != '}') { /*{*/ @@ -7075,7 +7074,7 @@ NOT_ITEM: ; /*FALLTHROUGH*/ #endif default: - sub = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok); + sub = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok, NULL); if (!sub) goto EXPAND_FAILED; s++; break; @@ -7172,7 +7171,7 @@ NOT_ITEM: ; case EOP_EXPAND: { - uschar *expanded = expand_string_internal(sub, FALSE, NULL, skipping, TRUE, &resetok); + uschar *expanded = expand_string_internal(sub, FALSE, NULL, skipping, TRUE, &resetok, NULL); if (!expanded) { expand_string_message = @@ -8241,7 +8240,7 @@ NOT_ITEM: ; terminating brace. */ if (ket_ends && !*s) - { + { /*{{*/ expand_string_message = malformed_header ? US"missing } at end of string - could be header name not terminated by colon" : US"missing } at end of string"; @@ -8301,6 +8300,7 @@ DEBUG(D_expand) "skipping: result is not used\n"); } } +if (textonly_p) *textonly_p = textonly; expand_level--; return yield->s; @@ -8349,16 +8349,20 @@ return NULL; } + /* This is the external function call. Do a quick check for any expansion metacharacters, and if there are none, just return the input string. -Argument: the string to be expanded +Arguments + the string to be expanded + optional pointer for return boolean indicating no-dynamic-expansions + Returns: the expanded string, or NULL if expansion failed; if failure was due to a lookup deferring, search_find_defer will be TRUE */ const uschar * -expand_cstring(const uschar * string) +expand_string_2(const uschar * string, BOOL * textonly_p) { if (Ustrpbrk(string, "$\\") != NULL) { @@ -8368,19 +8372,22 @@ if (Ustrpbrk(string, "$\\") != NULL) f.search_find_defer = FALSE; malformed_header = FALSE; store_pool = POOL_MAIN; - s = expand_string_internal(string, FALSE, NULL, FALSE, TRUE, NULL); + s = expand_string_internal(string, FALSE, NULL, FALSE, TRUE, NULL, textonly_p); store_pool = old_pool; return s; } +if (textonly_p) *textonly_p = TRUE; return string; } +const uschar * +expand_cstring(const uschar * string) +{ return expand_string_2(string, NULL); } uschar * expand_string(uschar * string) -{ -return US expand_cstring(CUS string); -} +{ return US expand_string_2(CUS string, NULL); } + diff --git a/src/src/filter.c b/src/src/filter.c index a3e31a3b5..cc4af230e 100644 --- a/src/src/filter.c +++ b/src/src/filter.c @@ -1424,213 +1424,203 @@ Returns: TRUE if the condition is met */ static BOOL -test_condition(condition_block *c, BOOL toplevel) +test_condition(condition_block * c, BOOL toplevel) { -BOOL yield = FALSE; -const uschar *exp[2], * p, * pp; +BOOL yield = FALSE, textonly_re; +const uschar * exp[2], * p, * pp; int val[2]; -int i; -if (c == NULL) return TRUE; /* does this ever occur? */ +if (!c) return TRUE; /* does this ever occur? */ switch (c->type) { case cond_and: - yield = test_condition(c->left.c, FALSE) && - *error_pointer == NULL && - test_condition(c->right.c, FALSE); - break; + yield = test_condition(c->left.c, FALSE) && + *error_pointer == NULL && + test_condition(c->right.c, FALSE); + break; case cond_or: - yield = test_condition(c->left.c, FALSE) || - (*error_pointer == NULL && - test_condition(c->right.c, FALSE)); - break; + yield = test_condition(c->left.c, FALSE) || + (*error_pointer == NULL && + test_condition(c->right.c, FALSE)); + break; - /* The personal test is meaningless in a system filter. The tests are now in - a separate function (so Sieve can use them). However, an Exim filter does not - scan Cc: (hence the FALSE argument). */ + /* The personal test is meaningless in a system filter. The tests are now in + a separate function (so Sieve can use them). However, an Exim filter does not + scan Cc: (hence the FALSE argument). */ case cond_personal: - yield = f.system_filtering? FALSE : filter_personal(c->left.a, FALSE); - break; + yield = f.system_filtering? FALSE : filter_personal(c->left.a, FALSE); + break; case cond_delivered: - yield = filter_delivered; - break; + yield = filter_delivered; + break; - /* Only TRUE if a message is actually being processed; FALSE for address - testing and verification. */ + /* Only TRUE if a message is actually being processed; FALSE for address + testing and verification. */ case cond_errormsg: - yield = message_id[0] != 0 && - (sender_address == NULL || sender_address[0] == 0); - break; + yield = message_id[0] != 0 && + (sender_address == NULL || sender_address[0] == 0); + break; - /* Only FALSE if a message is actually being processed; TRUE for address - and filter testing and verification. */ + /* Only FALSE if a message is actually being processed; TRUE for address + and filter testing and verification. */ case cond_firsttime: - yield = filter_test != FTEST_NONE || message_id[0] == 0 || f.deliver_firsttime; - break; + yield = filter_test != FTEST_NONE || message_id[0] == 0 || f.deliver_firsttime; + break; - /* Only TRUE if a message is actually being processed; FALSE for address - testing and verification. */ + /* Only TRUE if a message is actually being processed; FALSE for address + testing and verification. */ case cond_manualthaw: - yield = message_id[0] != 0 && f.deliver_manual_thaw; - break; + yield = message_id[0] != 0 && f.deliver_manual_thaw; + break; - /* The foranyaddress condition loops through a list of addresses */ + /* The foranyaddress condition loops through a list of addresses */ case cond_foranyaddress: - p = c->left.u; - if (!(pp = expand_cstring(p))) - { - *error_pointer = string_sprintf("failed to expand \"%s\" in " - "filter file: %s", p, expand_string_message); - return FALSE; - } + p = c->left.u; + if (!(pp = expand_cstring(p))) + { + *error_pointer = string_sprintf("failed to expand \"%s\" in " + "filter file: %s", p, expand_string_message); + return FALSE; + } - yield = FALSE; - f.parse_allow_group = TRUE; /* Allow group syntax */ + yield = FALSE; + f.parse_allow_group = TRUE; /* Allow group syntax */ - while (*pp) - { - uschar *error; - int start, end, domain; - uschar * s; + while (*pp) + { + uschar *error; + int start, end, domain; + uschar * s; - p = parse_find_address_end(pp, FALSE); - s = string_copyn(pp, p - pp); + p = parse_find_address_end(pp, FALSE); + s = string_copyn(pp, p - pp); - filter_thisaddress = - parse_extract_address(s, &error, &start, &end, &domain, FALSE); + filter_thisaddress = + parse_extract_address(s, &error, &start, &end, &domain, FALSE); - if (filter_thisaddress) - { - if ((filter_test != FTEST_NONE && debug_selector != 0) || - (debug_selector & D_filter) != 0) - { - indent(); - debug_printf_indent("Extracted address %s\n", filter_thisaddress); - } - yield = test_condition(c->right.c, FALSE); - } + if (filter_thisaddress) + { + if ((filter_test != FTEST_NONE && debug_selector != 0) || + (debug_selector & D_filter) != 0) + { + indent(); + debug_printf_indent("Extracted address %s\n", filter_thisaddress); + } + yield = test_condition(c->right.c, FALSE); + } - if (yield) break; - if (!*p) break; - pp = p + 1; - } + if (yield) break; + if (!*p) break; + pp = p + 1; + } - f.parse_allow_group = FALSE; /* Reset group syntax flags */ - f.parse_found_group = FALSE; - break; + f.parse_allow_group = FALSE; /* Reset group syntax flags */ + f.parse_found_group = FALSE; + break; - /* All other conditions have left and right values that need expanding; - on error, it doesn't matter what value is returned. */ + /* All other conditions have left and right values that need expanding; + on error, it doesn't matter what value is returned. */ - default: - p = c->left.u; - for (i = 0; i < 2; i++) - { - if (!(exp[i] = expand_cstring(p))) + default: + p = c->left.u; + for (int i = 0; i < 2; i++) { - *error_pointer = string_sprintf("failed to expand \"%s\" in " - "filter file: %s", p, expand_string_message); - return FALSE; + if (!(exp[i] = expand_string_2(p, &textonly_re))) + { + *error_pointer = string_sprintf("failed to expand \"%s\" in " + "filter file: %s", p, expand_string_message); + return FALSE; + } + p = c->right.u; } - p = c->right.u; - } - /* Inner switch for the different cases */ - - switch(c->type) - { - case cond_is: - yield = strcmpic(exp[0], exp[1]) == 0; - break; + /* Inner switch for the different cases */ - case cond_IS: - yield = Ustrcmp(exp[0], exp[1]) == 0; - break; - - case cond_contains: - yield = strstric_c(exp[0], exp[1], FALSE) != NULL; - break; + switch(c->type) + { + case cond_is: + yield = strcmpic(exp[0], exp[1]) == 0; + break; - case cond_CONTAINS: - yield = Ustrstr(exp[0], exp[1]) != NULL; - break; + case cond_IS: + yield = Ustrcmp(exp[0], exp[1]) == 0; + break; - case cond_begins: - yield = strncmpic(exp[0], exp[1], Ustrlen(exp[1])) == 0; - break; + case cond_contains: + yield = strstric_c(exp[0], exp[1], FALSE) != NULL; + break; - case cond_BEGINS: - yield = Ustrncmp(exp[0], exp[1], Ustrlen(exp[1])) == 0; - break; + case cond_CONTAINS: + yield = Ustrstr(exp[0], exp[1]) != NULL; + break; - case cond_ends: - case cond_ENDS: - { - int len = Ustrlen(exp[1]); - const uschar *s = exp[0] + Ustrlen(exp[0]) - len; - yield = s < exp[0] - ? FALSE - : (c->type == cond_ends ? strcmpic(s, exp[1]) : Ustrcmp(s, exp[1])) == 0; - } - break; + case cond_begins: + yield = strncmpic(exp[0], exp[1], Ustrlen(exp[1])) == 0; + break; - case cond_matches: - case cond_MATCHES: - { - const pcre2_code *re; - int err; - PCRE2_SIZE offset; + case cond_BEGINS: + yield = Ustrncmp(exp[0], exp[1], Ustrlen(exp[1])) == 0; + break; - if ((filter_test != FTEST_NONE && debug_selector != 0) || - (debug_selector & D_filter) != 0) + case cond_ends: + case cond_ENDS: { - debug_printf_indent("Match expanded arguments:\n"); - debug_printf_indent(" Subject = %s\n", exp[0]); - debug_printf_indent(" Pattern = %s\n", exp[1]); + int len = Ustrlen(exp[1]); + const uschar *s = exp[0] + Ustrlen(exp[0]) - len; + yield = s < exp[0] + ? FALSE + : (c->type == cond_ends ? strcmpic(s, exp[1]) : Ustrcmp(s, exp[1])) == 0; + break; } - if (!(re = pcre2_compile((PCRE2_SPTR)exp[1], PCRE2_ZERO_TERMINATED, - PCRE_COPT | (c->type == cond_matches ? PCRE2_CASELESS : 0), - &err, &offset, pcre_gen_cmp_ctx))) + case cond_matches: + case cond_MATCHES: { - uschar errbuf[128]; - pcre2_get_error_message(err, errbuf, sizeof(errbuf)); - *error_pointer = string_sprintf("error while compiling " - "regular expression \"%s\": %s at offset %ld", - exp[1], errbuf, (long)offset); - return FALSE; - } + const pcre2_code * re; + mcs_flags flags = textonly_re ? MCS_CACHEABLE : MCS_NOFLAGS; - yield = regex_match_and_setup(re, exp[0], PCRE_EOPT, -1); - break; - } + if ((filter_test != FTEST_NONE && debug_selector != 0) || + (debug_selector & D_filter) != 0) + { + debug_printf_indent("Match expanded arguments:\n"); + debug_printf_indent(" Subject = %s\n", exp[0]); + debug_printf_indent(" Pattern = %s\n", exp[1]); + } - /* For above and below, convert the strings to numbers */ + if (c->type == cond_matches) flags |= MCS_CASELESS; + if (!(re = regex_compile(exp[1], flags, error_pointer, pcre_gen_cmp_ctx))) + return FALSE; - case cond_above: - case cond_below: - for (i = 0; i < 2; i++) - { - val[i] = get_number(exp[i], &yield); - if (!yield) - { - *error_pointer = string_sprintf("malformed numerical string \"%s\"", - exp[i]); - return FALSE; - } + yield = regex_match_and_setup(re, exp[0], PCRE_EOPT, -1); + break; + } + + /* For above and below, convert the strings to numbers */ + + case cond_above: + case cond_below: + for (int i = 0; i < 2; i++) + { + val[i] = get_number(exp[i], &yield); + if (!yield) + { + *error_pointer = string_sprintf("malformed numerical string \"%s\"", + exp[i]); + return FALSE; + } + } + yield = c->type == cond_above ? (val[0] > val[1]) : (val[0] < val[1]); + break; } - yield = (c->type == cond_above)? (val[0] > val[1]) : (val[0] < val[1]); break; - } - break; } if ((filter_test != FTEST_NONE && debug_selector != 0) || @@ -2356,7 +2346,7 @@ while (commands) commands = commands->next; } -return filter_delivered? FF_DELIVERED : FF_NOTDELIVERED; +return filter_delivered ? FF_DELIVERED : FF_NOTDELIVERED; } diff --git a/src/src/functions.h b/src/src/functions.h index 9c5e379d4..4caae346d 100644 --- a/src/src/functions.h +++ b/src/src/functions.h @@ -183,6 +183,7 @@ extern void release_cutthrough_connection(const uschar *); extern void daemon_go(void); #ifndef COMPILE_UTILITY +extern ssize_t daemon_client_sockname(struct sockaddr_un *, uschar **); extern ssize_t daemon_notifier_sockname(struct sockaddr_un *); #endif @@ -263,6 +264,7 @@ extern int exp_bool(address_item *addr, extern BOOL expand_check_condition(uschar *, uschar *, uschar *); extern uschar *expand_file_big_buffer(const uschar *); extern uschar *expand_string(uschar *); /* public, cannot make const */ +extern const uschar *expand_string_2(const uschar *, BOOL *); extern const uschar *expand_cstring(const uschar *); /* ... so use this one */ extern uschar *expand_getkeyed(const uschar *, const uschar *); @@ -335,7 +337,7 @@ extern BOOL macro_read_assignment(uschar *); extern uschar *macros_expand(int, int *, BOOL *); extern void mainlog_close(void); #ifdef WITH_CONTENT_SCAN -extern int malware(const uschar *, int); +extern int malware(const uschar *, BOOL, int); extern int malware_in_file(uschar *); extern void malware_init(void); extern gstring * malware_show_supported(gstring *); @@ -348,7 +350,7 @@ extern int match_check_list(const uschar **, int, tree_node **, unsigned int const uschar *, const uschar **); extern int match_isinlist(const uschar *, const uschar **, int, tree_node **, unsigned int *, int, BOOL, const uschar **); -extern int match_check_string(const uschar *, const uschar *, int, BOOL, BOOL, BOOL, +extern int match_check_string(const uschar *, const uschar *, int, mcs_flags, const uschar **); extern void message_start(void); @@ -363,7 +365,7 @@ extern int mime_acl_check(uschar *acl, FILE *f, struct mime_boundary_context *, uschar **, uschar **); extern int mime_decode(const uschar **); extern ssize_t mime_decode_base64(FILE *, FILE *, uschar *); -extern int mime_regex(const uschar **); +extern int mime_regex(const uschar **, BOOL); extern void mime_set_anomaly(int); #endif extern uschar *moan_check_errorcopy(uschar *); @@ -436,11 +438,14 @@ extern BOOL receive_msg(BOOL); extern int_eximarith_t receive_statvfs(BOOL, int *); extern void receive_swallow_smtp(void); #ifdef WITH_CONTENT_SCAN -extern int regex(const uschar **); +extern int regex(const uschar **, BOOL); #endif +extern void regex_at_daemon(const uschar *); extern BOOL regex_match(const pcre2_code *, const uschar *, int, uschar **); extern BOOL regex_match_and_setup(const pcre2_code *, const uschar *, int, int); -extern const pcre2_code *regex_must_compile(const uschar *, BOOL, BOOL); +extern const pcre2_code *regex_compile(const uschar *, mcs_flags, uschar **, + pcre2_compile_context *); +extern const pcre2_code *regex_must_compile(const uschar *, mcs_flags, BOOL); extern void retry_add_item(address_item *, uschar *, int); extern BOOL retry_check_address(const uschar *, host_item *, uschar *, BOOL, uschar **, uschar **); @@ -1224,6 +1229,7 @@ pid_t pid; DEBUG(D_any) debug_printf("%s forking for %s\n", process_purpose, purpose); if ((pid = fork()) == 0) { + f.daemon_listen = FALSE; process_purpose = purpose; DEBUG(D_any) debug_printf("postfork: %s\n", purpose); } diff --git a/src/src/globals.c b/src/src/globals.c index 62c9b2659..49988a8cc 100644 --- a/src/src/globals.c +++ b/src/src/globals.c @@ -234,6 +234,7 @@ struct global_flags f = .continue_more = FALSE, .daemon_listen = FALSE, + .daemon_scion = FALSE, .debug_daemon = FALSE, .deliver_firsttime = FALSE, .deliver_force = FALSE, diff --git a/src/src/globals.h b/src/src/globals.h index 0f0471101..3d5584555 100644 --- a/src/src/globals.h +++ b/src/src/globals.h @@ -199,6 +199,7 @@ extern struct global_flags { BOOL continue_more :1; /* Flag more addresses waiting */ BOOL daemon_listen :1; /* True if listening required */ + BOOL daemon_scion :1; /* Ancestor proc is daemon, and not re-exec'd */ BOOL debug_daemon :1; /* Debug the daemon process only */ BOOL deliver_firsttime :1; /* True for first delivery attempt */ BOOL deliver_force :1; /* TRUE if delivery was forced */ diff --git a/src/src/header.c b/src/src/header.c index 898d8d5c4..7ef59ff53 100644 --- a/src/src/header.c +++ b/src/src/header.c @@ -368,7 +368,7 @@ Returns: cond if the header exists and contains one of the strings; /* First we have a local subroutine to handle a single pattern */ static BOOL -one_pattern_match(uschar *name, int slen, BOOL has_addresses, uschar *pattern) +one_pattern_match(uschar * name, int slen, BOOL has_addresses, uschar * pattern) { BOOL yield = FALSE; const pcre2_code *re = NULL; @@ -376,7 +376,7 @@ const pcre2_code *re = NULL; /* If the pattern is a regex, compile it. Bomb out if compiling fails; these patterns are all constructed internally and should be valid. */ -if (*pattern == '^') re = regex_must_compile(pattern, TRUE, FALSE); +if (*pattern == '^') re = regex_must_compile(pattern, MCS_CASELESS, FALSE); /* Scan for the required header(s) and scan each one */ @@ -443,7 +443,7 @@ return yield; /* The externally visible interface */ BOOL -header_match(uschar *name, BOOL has_addresses, BOOL cond, string_item *strings, +header_match(uschar * name, BOOL has_addresses, BOOL cond, string_item * strings, int count, ...) { va_list ap; diff --git a/src/src/macros.h b/src/src/macros.h index fa89de12d..adbe6a267 100644 --- a/src/src/macros.h +++ b/src/src/macros.h @@ -1111,7 +1111,17 @@ should not be one active. */ #define NOTIFIER_SOCKET_NAME "exim_daemon_notify" -#define NOTIFY_MSG_QRUN 1 /* Notify message types */ +/* Notify message types */ +#define NOTIFY_MSG_QRUN 1 #define NOTIFY_QUEUE_SIZE_REQ 2 +#define NOTIFY_REGEX 3 + +/* Flags for match_check_string() */ +typedef unsigned mcs_flags; +#define MCS_NOFLAGS 0 +#define MCS_PARTIAL BIT(0) /* permit partial- search types */ +#define MCS_CASELESS BIT(1) /* caseless matching where possible */ +#define MCS_AT_SPECIAL BIT(2) /* recognize @, @[], etc. */ +#define MCS_CACHEABLE BIT(3) /* no dynamic expansions used for pattern */ /* End of macros.h */ diff --git a/src/src/malware.c b/src/src/malware.c index 976438511..8b5ec27c4 100644 --- a/src/src/malware.c +++ b/src/src/malware.c @@ -299,21 +299,10 @@ return sock; } static const pcre2_code * -m_pcre_compile(const uschar * re, uschar ** errstr) +m_pcre_compile(const uschar * re, BOOL cacheable, uschar ** errstr) { -int err; -PCRE2_SIZE roffset; -const pcre2_code * cre; - -if (!(cre = pcre2_compile((PCRE2_SPTR)re, PCRE2_ZERO_TERMINATED, - PCRE_COPT, &err, &roffset, pcre_gen_cmp_ctx))) - { - uschar errbuf[128]; - pcre2_get_error_message(err, errbuf, sizeof(errbuf)); - *errstr= string_sprintf("regular expression error in '%s': %s at offset %ld", - re, errbuf, (long)roffset); - } -return cre; +return regex_compile(re, cacheable ? MCS_CACHEABLE : MCS_NOFLAGS, errstr, + pcre_gen_cmp_ctx); } uschar * @@ -332,7 +321,7 @@ return US substr; static const pcre2_code * m_pcre_nextinlist(const uschar ** list, int * sep, - char * listerr, uschar ** errstr) + BOOL cacheable, char * listerr, uschar ** errstr) { const uschar * list_ele; const pcre2_code * cre = NULL; @@ -343,7 +332,7 @@ else { DEBUG(D_acl) debug_printf_indent("%15s%10s'%s'\n", "", "RE: ", string_printing(list_ele)); - cre = m_pcre_compile(CUS list_ele, errstr); + cre = m_pcre_compile(CUS list_ele, cacheable, errstr); } return cre; } @@ -569,6 +558,7 @@ is via malware(), or there's malware_in_file() used for testing/debugging. Arguments: malware_re match condition for "malware=" + cacheable the RE did not use any dynamic elements during expansion scan_filename the file holding the email to be scanned, if we're faking this up for the -bmalware test, else NULL timeout if nonzero, non-default timeoutl @@ -577,11 +567,12 @@ Returns: Exim message processing code (OK, FAIL, DEFER, ...) where true means malware was found (condition applies) */ static int -malware_internal(const uschar * malware_re, const uschar * scan_filename, - int timeout) +malware_internal(const uschar * malware_re, BOOL cacheable, + const uschar * scan_filename, int timeout) { int sep = 0; const uschar *av_scanner_work = av_scanner; +BOOL av_scanner_textonly; uschar *scanner_name; unsigned long mbox_size; FILE *mbox_file; @@ -608,30 +599,30 @@ the name), so we can close it right away. Get the directory too. */ eml_dir = string_copyn(eml_filename, Ustrrchr(eml_filename, '/') - eml_filename); /* parse 1st option */ -if (strcmpic(malware_re, US"false") == 0 || Ustrcmp(malware_re,"0") == 0) +if (strcmpic(malware_re, US"false") == 0 || Ustrcmp(malware_re, "0") == 0) return FAIL; /* explicitly no matching */ /* special cases (match anything except empty) */ -if ( strcmpic(malware_re,US"true") == 0 - || Ustrcmp(malware_re,"*") == 0 - || Ustrcmp(malware_re,"1") == 0 +if ( strcmpic(malware_re, US"true") == 0 + || Ustrcmp(malware_re, "*") == 0 + || Ustrcmp(malware_re, "1") == 0 ) { if ( !malware_default_re - && !(malware_default_re = m_pcre_compile(malware_regex_default, &errstr))) + && !(malware_default_re = m_pcre_compile(malware_regex_default, FALSE, &errstr))) return malware_panic_defer(errstr); malware_re = malware_regex_default; re = malware_default_re; } /* compile the regex, see if it works */ -else if (!(re = m_pcre_compile(malware_re, &errstr))) +else if (!(re = m_pcre_compile(malware_re, cacheable, &errstr))) return malware_panic_defer(errstr); /* if av_scanner starts with a dollar, expand it first */ if (*av_scanner == '$') { - if (!(av_scanner_work = expand_string(av_scanner))) + if (!(av_scanner_work = expand_string_2(av_scanner, &av_scanner_textonly))) return malware_panic_defer( string_sprintf("av_scanner starts with $, but expansion failed: %s", expand_string_message)); @@ -642,6 +633,8 @@ if (*av_scanner == '$') malware_name = NULL; malware_ok = FALSE; } +else + av_scanner_textonly = TRUE; /* Do not scan twice (unless av_scanner is dynamic). */ if (!malware_ok) @@ -746,13 +739,11 @@ if (!malware_ok) case M_FPROT6D: /* "f-prot6d" scanner type ----------------------------------- */ { int bread; - uschar * e; - uschar * linebuffer; - uschar * scanrequest; + uschar * e, * linebuffer, * scanrequest; uschar av_buffer[1024]; - if ((!fprot6d_re_virus && !(fprot6d_re_virus = m_pcre_compile(fprot6d_re_virus_str, &errstr))) - || (!fprot6d_re_error && !(fprot6d_re_error = m_pcre_compile(fprot6d_re_error_str, &errstr)))) + if ((!fprot6d_re_virus && !(fprot6d_re_virus = m_pcre_compile(fprot6d_re_virus_str, FALSE, &errstr))) + || (!fprot6d_re_error && !(fprot6d_re_error = m_pcre_compile(fprot6d_re_error_str, FALSE, &errstr)))) return malware_panic_defer(errstr); scanrequest = string_sprintf("SCAN FILE %s\n", eml_filename); @@ -922,7 +913,7 @@ badseek: err = errno; /* set up match regex */ if (!drweb_re) - drweb_re = m_pcre_compile(drweb_re_str, &errstr); + drweb_re = m_pcre_compile(drweb_re_str, FALSE, &errstr); /* read and concatenate virus names into one string */ for (int i = 0; i < drweb_vnum; i++) @@ -1101,7 +1092,7 @@ badseek: err = errno; /* set up match */ /* todo also SUSPICION\t */ if (!fsec_re) - fsec_re = m_pcre_compile(fsec_re_str, &errstr); + fsec_re = m_pcre_compile(fsec_re_str, FALSE, &errstr); /* read report, linewise. Apply a timeout as the Fsecure daemon sometimes wants an answer to "PING" but they won't tell us what */ @@ -1225,12 +1216,12 @@ badseek: err = errno; /* set up match regex, depends on retcode */ if (kav_rc == 3) { - if (!kav_re_sus) kav_re_sus = m_pcre_compile(kav_re_sus_str, &errstr); + if (!kav_re_sus) kav_re_sus = m_pcre_compile(kav_re_sus_str, FALSE, &errstr); kav_re = kav_re_sus; } else { - if (!kav_re_inf) kav_re_inf = m_pcre_compile(kav_re_inf_str, &errstr); + if (!kav_re_inf) kav_re_inf = m_pcre_compile(kav_re_inf_str, FALSE, &errstr); kav_re = kav_re_inf; } @@ -1279,13 +1270,13 @@ badseek: err = errno; return m_panic_defer(scanent, NULL, errstr); /* find scanner output trigger */ - cmdline_trigger_re = m_pcre_nextinlist(&av_scanner_work, &sep, + cmdline_trigger_re = m_pcre_nextinlist(&av_scanner_work, &sep, av_scanner_textonly, "missing trigger specification", &errstr); if (!cmdline_trigger_re) return m_panic_defer(scanent, NULL, errstr); /* find scanner name regex */ - cmdline_regex_re = m_pcre_nextinlist(&av_scanner_work, &sep, + cmdline_regex_re = m_pcre_nextinlist(&av_scanner_work, &sep, av_scanner_textonly, "missing virus name regex specification", &errstr); if (!cmdline_regex_re) return m_panic_defer(scanent, NULL, errstr); @@ -1908,13 +1899,13 @@ badseek: err = errno; string_printing(sockline_scanner)); /* find scanner output trigger */ - sockline_trig_re = m_pcre_nextinlist(&av_scanner_work, &sep, + sockline_trig_re = m_pcre_nextinlist(&av_scanner_work, &sep, av_scanner_textonly, "missing trigger specification", &errstr); if (!sockline_trig_re) return m_panic_defer_3(scanent, NULL, errstr, malware_daemon_ctx.sock); /* find virus name regex */ - sockline_name_re = m_pcre_nextinlist(&av_scanner_work, &sep, + sockline_name_re = m_pcre_nextinlist(&av_scanner_work, &sep, av_scanner_textonly, "missing virus name regex specification", &errstr); if (!sockline_name_re) return m_panic_defer_3(scanent, NULL, errstr, malware_daemon_ctx.sock); @@ -2045,11 +2036,11 @@ badseek: err = errno; */ if ( ( !ava_re_clean - && !(ava_re_clean = m_pcre_compile(ava_re_clean_str, &errstr))) + && !(ava_re_clean = m_pcre_compile(ava_re_clean_str, FALSE, &errstr))) || ( !ava_re_virus - && !(ava_re_virus = m_pcre_compile(ava_re_virus_str, &errstr))) + && !(ava_re_virus = m_pcre_compile(ava_re_virus_str, FALSE, &errstr))) || ( !ava_re_error - && !(ava_re_error = m_pcre_compile(ava_re_error_str, &errstr))) + && !(ava_re_error = m_pcre_compile(ava_re_error_str, FALSE, &errstr))) ) return malware_panic_defer(errstr); @@ -2211,15 +2202,16 @@ filename; it's a wrapper around the malware_file function. Arguments: malware_re match condition for "malware=" + cacheable the RE did not use any dynamic elements during expansion timeout if nonzero, timeout in seconds Returns: Exim message processing code (OK, FAIL, DEFER, ...) where true means malware was found (condition applies) */ int -malware(const uschar * malware_re, int timeout) +malware(const uschar * malware_re, BOOL cacheable, int timeout) { -int ret = malware_internal(malware_re, NULL, timeout); +int ret = malware_internal(malware_re, cacheable, NULL, timeout); if (ret == DEFER) av_failed = TRUE; return ret; @@ -2259,7 +2251,7 @@ recipients_list = NULL; receive_add_recipient(US"malware-victim@example.net", -1); f.enable_dollar_recipients = TRUE; -ret = malware_internal(US"*", eml_filename, 0); +ret = malware_internal(US"*", TRUE, eml_filename, 0); Ustrncpy(spooled_message_id, message_id, sizeof(spooled_message_id)); spool_mbox_ok = 1; @@ -2280,35 +2272,35 @@ void malware_init(void) { if (!malware_default_re) - malware_default_re = regex_must_compile(malware_regex_default, FALSE, TRUE); + malware_default_re = regex_must_compile(malware_regex_default, MCS_NOFLAGS, TRUE); #ifndef DISABLE_MAL_DRWEB if (!drweb_re) - drweb_re = regex_must_compile(drweb_re_str, FALSE, TRUE); + drweb_re = regex_must_compile(drweb_re_str, MCS_NOFLAGS, TRUE); #endif #ifndef DISABLE_MAL_FSECURE if (!fsec_re) - fsec_re = regex_must_compile(fsec_re_str, FALSE, TRUE); + fsec_re = regex_must_compile(fsec_re_str, MCS_NOFLAGS, TRUE); #endif #ifndef DISABLE_MAL_KAV if (!kav_re_sus) - kav_re_sus = regex_must_compile(kav_re_sus_str, FALSE, TRUE); + kav_re_sus = regex_must_compile(kav_re_sus_str, MCS_NOFLAGS, TRUE); if (!kav_re_inf) - kav_re_inf = regex_must_compile(kav_re_inf_str, FALSE, TRUE); + kav_re_inf = regex_must_compile(kav_re_inf_str, MCS_NOFLAGS, TRUE); #endif #ifndef DISABLE_MAL_AVAST if (!ava_re_clean) - ava_re_clean = regex_must_compile(ava_re_clean_str, FALSE, TRUE); + ava_re_clean = regex_must_compile(ava_re_clean_str, MCS_NOFLAGS, TRUE); if (!ava_re_virus) - ava_re_virus = regex_must_compile(ava_re_virus_str, FALSE, TRUE); + ava_re_virus = regex_must_compile(ava_re_virus_str, MCS_NOFLAGS, TRUE); if (!ava_re_error) - ava_re_error = regex_must_compile(ava_re_error_str, FALSE, TRUE); + ava_re_error = regex_must_compile(ava_re_error_str, MCS_NOFLAGS, TRUE); #endif #ifndef DISABLE_MAL_FFROT6D if (!fprot6d_re_error) - fprot6d_re_error = regex_must_compile(fprot6d_re_error_str, FALSE, TRUE); + fprot6d_re_error = regex_must_compile(fprot6d_re_error_str, MCS_NOFLAGS, TRUE); if (!fprot6d_re_virus) - fprot6d_re_virus = regex_must_compile(fprot6d_re_virus_str, FALSE, TRUE); + fprot6d_re_virus = regex_must_compile(fprot6d_re_virus_str, MCS_NOFLAGS, TRUE); #endif } diff --git a/src/src/match.c b/src/src/match.c index 2e4bff078..b4a0352ee 100644 --- a/src/src/match.c +++ b/src/src/match.c @@ -19,9 +19,7 @@ typedef struct check_string_block { const uschar *origsubject; /* caseful; keep these two first, in */ const uschar *subject; /* step with the block below */ int expand_setup; - BOOL use_partial; - BOOL caseless; - BOOL at_is_special; + mcs_flags flags; /* MCS_* defs in macros.h */ } check_string_block; @@ -32,7 +30,7 @@ typedef struct check_address_block { const uschar *origaddress; /* caseful; keep these two first, in */ uschar *address; /* step with the block above */ int expand_setup; - BOOL caseless; + mcs_flags flags; /* MCS_CASELESS, MCS_TEXTONLY_RE */ } check_address_block; @@ -93,9 +91,10 @@ Returns: OK if matched */ static int -check_string(void *arg, const uschar *pattern, const uschar **valueptr, uschar **error) +check_string(void * arg, const uschar * pattern, const uschar ** valueptr, + uschar ** error) { -const check_string_block *cb = arg; +const check_string_block * cb = arg; int search_type, partial, affixlen, starflags; int expand_setup = cb->expand_setup; const uschar * affix, * opts; @@ -128,7 +127,8 @@ required. */ if (pattern[0] == '^') { - const pcre2_code * re = regex_must_compile(pattern, cb->caseless, FALSE); + const pcre2_code * re = regex_must_compile(pattern, + cb->flags & (MCS_CACHEABLE | MCS_CASELESS), FALSE); if (expand_setup < 0 ? !regex_match(re, s, -1, NULL) : !regex_match_and_setup(re, s, 0, expand_setup) @@ -147,7 +147,7 @@ if (pattern[0] == '*') patlen = Ustrlen(++pattern); if (patlen > slen) return FAIL; - if (cb->caseless + if (cb->flags & MCS_CASELESS ? strncmpic(s + slen - patlen, pattern, patlen) != 0 : Ustrncmp(s + slen - patlen, pattern, patlen) != 0) return FAIL; @@ -166,7 +166,7 @@ the primary host name - implement this by changing the pattern. For the other cases we have to do some more work. If we don't recognize a special pattern, just fall through - the match will fail. */ -if (cb->at_is_special && pattern[0] == '@') +if (cb->flags & MCS_AT_SPECIAL && pattern[0] == '@') { if (pattern[1] == 0) { @@ -260,10 +260,10 @@ NOT_AT_SPECIAL: if ((semicolon = Ustrchr(pattern, ';')) == NULL) { - if (cb->caseless ? strcmpic(s, pattern) != 0 : Ustrcmp(s, pattern) != 0) + if (cb->flags & MCS_CASELESS ? strcmpic(s, pattern) != 0 : Ustrcmp(s, pattern) != 0) return FAIL; - if (expand_setup >= 0) expand_nmax = expand_setup; /* Original code! $0 gets the matched subject */ - if (valueptr) *valueptr = pattern; /* "value" gets the pattern */ + if (expand_setup >= 0) expand_nmax = expand_setup; /* $0 gets the matched subject */ + if (valueptr) *valueptr = pattern; /* "value" gets the pattern */ return OK; } @@ -280,7 +280,7 @@ if (search_type < 0) log_write(0, LOG_MAIN|LOG_PANIC_DIE, "%s", /* Partial matching is not appropriate for certain lookups (e.g. when looking up user@domain for sender rejection). There's a flag to disable it. */ -if (!cb->use_partial) partial = -1; +if (!(cb->flags & MCS_PARTIAL)) partial = -1; /* Set the parameters for the three different kinds of lookup. */ @@ -316,9 +316,10 @@ Arguments: s the subject string to be checked pattern the pattern to check it against expand_setup expansion setup option (see check_string()) - use_partial if FALSE, override any partial- search types - caseless TRUE for caseless matching where possible - at_is_special TRUE to recognize @, @[], etc. + flags + use_partial if FALSE, override any partial- search types + caseless TRUE for caseless matching where possible + at_is_special TRUE to recognize @, @[], etc. valueptr if not NULL, and a file lookup was done, return the result here instead of discarding it; else set it to point to NULL @@ -328,16 +329,14 @@ Returns: OK if matched */ int -match_check_string(const uschar *s, const uschar *pattern, int expand_setup, - BOOL use_partial, BOOL caseless, BOOL at_is_special, const uschar **valueptr) +match_check_string(const uschar * s, const uschar * pattern, int expand_setup, + mcs_flags flags, const uschar ** valueptr) { check_string_block cb; cb.origsubject = s; -cb.subject = caseless ? string_copylc(s) : string_copy(s); +cb.subject = flags & MCS_CASELESS ? string_copylc(s) : string_copy(s); cb.expand_setup = expand_setup; -cb.use_partial = use_partial; -cb.caseless = caseless; -cb.at_is_special = at_is_special; +cb.flags = flags; return check_string(&cb, pattern, valueptr, NULL); } @@ -364,14 +363,9 @@ switch(type) { case MCL_STRING: case MCL_DOMAIN: - case MCL_LOCALPART: - return ((check_string_block *)arg)->subject; - - case MCL_HOST: - return ((check_host_block *)arg)->host_address; - - case MCL_ADDRESS: - return ((check_address_block *)arg)->address; + case MCL_LOCALPART: return ((check_string_block *)arg)->subject; + case MCL_HOST: return ((check_host_block *)arg)->host_address; + case MCL_ADDRESS: return ((check_address_block *)arg)->address; } return US""; /* In practice, should never happen */ } @@ -438,6 +432,7 @@ BOOL include_unknown = FALSE, ignore_unknown = FALSE, const uschar *list; uschar *sss; uschar *ot = NULL; +BOOL textonly_re; /* Save time by not scanning for the option name when we don't need it. */ @@ -465,6 +460,7 @@ if (type >= MCL_NOEXPAND) { list = *listptr; type -= MCL_NOEXPAND; /* Remove the "no expand" flag */ + textonly_re = TRUE; } else { @@ -475,11 +471,11 @@ else { check_string_block *cb = (check_string_block *)arg; deliver_domain = string_copy(cb->subject); - list = expand_cstring(*listptr); + list = expand_string_2(*listptr, &textonly_re); deliver_domain = NULL; } else - list = expand_cstring(*listptr); + list = expand_string_2(*listptr, &textonly_re); if (!list) { @@ -495,6 +491,15 @@ else } } +if (textonly_re) switch (type) + { + case MCL_STRING: + case MCL_DOMAIN: + case MCL_LOCALPART: ((check_string_block *)arg)->flags |= MCS_CACHEABLE; break; + case MCL_HOST: ((check_host_block *)arg)->flags |= MCS_CACHEABLE; break; + case MCL_ADDRESS: ((check_address_block *)arg)->flags |= MCS_CACHEABLE; break; + } + /* For an unnamed list, use the expanded version in comments */ #define LIST_LIMIT_PR 2048 @@ -530,7 +535,7 @@ while ((sss = string_nextinlist(&list, &sep, NULL, 0))) if (at) Ustrncpy(cb->address, cb->origaddress, at - cb->origaddress); - cb->caseless = FALSE; + cb->flags &= ~MCS_CASELESS; continue; } } @@ -543,7 +548,7 @@ while ((sss = string_nextinlist(&list, &sep, NULL, 0))) { check_string_block *cb = (check_string_block *)arg; Ustrcpy(US cb->subject, cb->origsubject); - cb->caseless = FALSE; + cb->flags &= ~MCS_CASELESS; continue; } } @@ -958,15 +963,13 @@ unsigned int *local_cache_bits = cache_bits; check_string_block cb; cb.origsubject = s; cb.subject = caseless ? string_copylc(s) : string_copy(s); -cb.at_is_special = FALSE; +cb.flags = caseless ? MCS_PARTIAL+MCS_CASELESS : MCS_PARTIAL; switch (type & ~MCL_NOEXPAND) { - case MCL_DOMAIN: cb.at_is_special = TRUE; /*FALLTHROUGH*/ + case MCL_DOMAIN: cb.flags |= MCS_AT_SPECIAL; /*FALLTHROUGH*/ case MCL_LOCALPART: cb.expand_setup = 0; break; default: cb.expand_setup = sep > UCHAR_MAX ? 0 : -1; break; } -cb.use_partial = TRUE; -cb.caseless = caseless; if (valueptr) *valueptr = NULL; return match_check_list(listptr, sep, anchorptr, &local_cache_bits, check_string, &cb, type, s, valueptr); @@ -1003,7 +1006,8 @@ Returns: OK for a match */ static int -check_address(void *arg, const uschar *pattern, const uschar **valueptr, uschar **error) +check_address(void * arg, const uschar * pattern, const uschar ** valueptr, + uschar ** error) { check_address_block * cb = (check_address_block *)arg; check_string_block csb; @@ -1026,7 +1030,7 @@ sdomain = Ustrrchr(subject, '@'); /* The only case where a subject may not have a domain is if the subject is empty. Otherwise, a subject with no domain is a serious configuration error. */ -if (sdomain == NULL && *subject != 0) +if (!sdomain && *subject) { log_write(0, LOG_MAIN|LOG_PANIC, "no @ found in the subject of an " "address list match: subject=\"%s\" pattern=\"%s\"", subject, pattern); @@ -1037,14 +1041,14 @@ if (sdomain == NULL && *subject != 0) This may be the empty address. */ if (*pattern == '^') - return match_check_string(subject, pattern, cb->expand_setup, TRUE, - cb->caseless, FALSE, NULL); + return match_check_string(subject, pattern, cb->expand_setup, + cb->flags | MCS_PARTIAL, NULL); /* Handle a pattern that is just a lookup. Skip over possible lookup names (letters, digits, hyphens). Skip over a possible * or *@ at the end. Then we must have a semicolon for it to be a lookup. */ -for (s = pattern; isalnum(*s) || *s == '-'; s++); +for (s = pattern; isalnum(*s) || *s == '-'; s++) ; if (*s == '*') s++; if (*s == '@') s++; @@ -1057,8 +1061,7 @@ if (*s == ';') if (Ustrncmp(pattern, "partial-", 8) == 0) log_write(0, LOG_MAIN|LOG_PANIC, "partial matching is not applicable to " "whole-address lookups: ignored \"partial-\" in \"%s\"", pattern); - return match_check_string(subject, pattern, -1, FALSE, cb->caseless, FALSE, - valueptr); + return match_check_string(subject, pattern, -1, cb->flags, valueptr); } /* For the remaining cases, an empty subject matches only an empty pattern, @@ -1085,19 +1088,20 @@ if (pattern[0] == '@' && pattern[1] == '@') { int sep = 0; - if ((rc = match_check_string(key, pattern + 2, -1, TRUE, FALSE, FALSE, - CUSS &list)) != OK) return rc; + if ((rc = match_check_string(key, pattern + 2, -1, MCS_PARTIAL, CUSS &list)) + != OK) + return rc; /* Check for chaining from the last item; set up the next key if one is found. */ ss = Ustrrchr(list, ':'); - if (ss == NULL) ss = list; else ss++; - while (isspace(*ss)) ss++; + if (!ss) ss = list; else ss++; + Uskip_whitespace(&ss); if (*ss == '>') { *ss++ = 0; - while (isspace(*ss)) ss++; + Uskip_whitespace(&ss); key = string_copy(ss); } else key = NULL; @@ -1117,8 +1121,7 @@ if (pattern[0] == '@' && pattern[1] == '@') else local_yield = OK; *sdomain = 0; - rc = match_check_string(subject, ss, -1, TRUE, cb->caseless, FALSE, - valueptr); + rc = match_check_string(subject, ss, -1, cb->flags + MCS_PARTIAL, valueptr); *sdomain = '@'; switch(rc) @@ -1148,8 +1151,7 @@ if (pattern[0] == '@' && pattern[1] == '@') /* We get here if the pattern is not a lookup or a regular expression. If it contains an @ there is both a local part and a domain. */ -pdomain = Ustrrchr(pattern, '@'); -if (pdomain != NULL) +if ((pdomain = Ustrrchr(pattern, '@'))) { int pllen, sllen; @@ -1177,7 +1179,7 @@ if (pdomain != NULL) { int cllen = pllen - 1; if (sllen < cllen) return FAIL; - if (cb->caseless + if (cb->flags & MCS_CASELESS ? strncmpic(subject+sllen-cllen, pattern + 1, cllen) != 0 : Ustrncmp(subject+sllen-cllen, pattern + 1, cllen) != 0) return FAIL; @@ -1192,7 +1194,7 @@ if (pdomain != NULL) else { if (sllen != pllen) return FAIL; - if (cb->caseless + if (cb->flags & MCS_CASELESS ? strncmpic(subject, pattern, sllen) != 0 : Ustrncmp(subject, pattern, sllen) != 0) return FAIL; } @@ -1205,18 +1207,17 @@ original code read as follows: return match_check_string(sdomain + 1, pdomain ? pdomain + 1 : pattern, - cb->expand_setup + expand_inc, TRUE, cb->caseless, TRUE, NULL); + cb->expand_setup + expand_inc, cb->flags, NULL); This supported only literal domains and *.x.y patterns. In order to allow for -named domain lists (so that you can right, for example, "senders=+xxxx"), it +named domain lists (so that you can write, for example, "senders=+xxxx"), it was changed to use the list scanning function. */ csb.origsubject = sdomain + 1; -csb.subject = cb->caseless ? string_copylc(sdomain+1) : string_copy(sdomain+1); +csb.subject = cb->flags & MCS_CASELESS + ? string_copylc(sdomain+1) : string_copy(sdomain+1); csb.expand_setup = cb->expand_setup + expand_inc; -csb.use_partial = TRUE; -csb.caseless = cb->caseless; -csb.at_is_special = TRUE; +csb.flags = MCS_PARTIAL | MCS_AT_SPECIAL | cb->flags & MCS_CASELESS; listptr = pdomain ? pdomain + 1 : pattern; if (valueptr) *valueptr = NULL; @@ -1321,10 +1322,10 @@ if (expand_setup == 0) ab.origaddress = address; /* ab.address is above */ ab.expand_setup = expand_setup; -ab.caseless = caseless; +ab.flags = caseless ? MCS_CASELESS : 0; return match_check_list(listptr, sep, &addresslist_anchor, &local_cache_bits, - check_address, &ab, MCL_ADDRESS + (expand? 0:MCL_NOEXPAND), address, + check_address, &ab, MCL_ADDRESS + (expand ? 0 : MCL_NOEXPAND), address, valueptr); } diff --git a/src/src/queue.c b/src/src/queue.c index c0a1cd182..6e47d2c8a 100644 --- a/src/src/queue.c +++ b/src/src/queue.c @@ -423,11 +423,11 @@ if (!recurse) /* If deliver_selectstring is a regex, compile it. */ if (deliver_selectstring && f.deliver_selectstring_regex) - selectstring_regex = regex_must_compile(deliver_selectstring, TRUE, FALSE); + selectstring_regex = regex_must_compile(deliver_selectstring, MCS_CASELESS, FALSE); if (deliver_selectstring_sender && f.deliver_selectstring_sender_regex) selectstring_regex_sender = - regex_must_compile(deliver_selectstring_sender, TRUE, FALSE); + regex_must_compile(deliver_selectstring_sender, MCS_CASELESS, FALSE); /* If the spool is split into subdirectories, we want to process it one directory at a time, so as to spread out the directory scanning and the diff --git a/src/src/readconf.c b/src/src/readconf.c index c74b70b55..5068dc60e 100644 --- a/src/src/readconf.c +++ b/src/src/readconf.c @@ -3498,7 +3498,7 @@ if (!process_log_path || !*process_log_path) /* Compile the regex for matching a UUCP-style "From_" line in an incoming message. */ -regex_From = regex_must_compile(uucp_from_pattern, FALSE, TRUE); +regex_From = regex_must_compile(uucp_from_pattern, MCS_NOFLAGS, TRUE); /* Unpick the SMTP rate limiting options, if set */ diff --git a/src/src/regex.c b/src/src/regex.c index 9b7b07405..5de1c1704 100644 --- a/src/src/regex.c +++ b/src/src/regex.c @@ -18,9 +18,9 @@ /* Structure to hold a list of Regular expressions */ typedef struct pcre_list { - pcre2_code *re; - uschar *pcre_text; - struct pcre_list *next; + const pcre2_code * re; + uschar * pcre_text; + struct pcre_list * next; } pcre_list; uschar regex_match_string_buffer[1024]; @@ -28,31 +28,27 @@ uschar regex_match_string_buffer[1024]; extern FILE *mime_stream; extern uschar *mime_current_boundary; + static pcre_list * -compile(const uschar * list) +compile(const uschar * list, BOOL cacheable) { int sep = 0; -uschar *regex_string; -pcre_list *re_list_head = NULL; -pcre_list *ri; +uschar * regex_string; +pcre_list * re_list_head = NULL; +pcre_list * ri; /* precompile our regexes */ while ((regex_string = string_nextinlist(&list, &sep, NULL, 0))) if (strcmpic(regex_string, US"false") != 0 && Ustrcmp(regex_string, "0") != 0) { - pcre2_code * re; - int err; - PCRE2_SIZE pcre_erroffset; - /* compile our regular expression */ - if (!(re = pcre2_compile( (PCRE2_SPTR) regex_string, PCRE2_ZERO_TERMINATED, - 0, &err, &pcre_erroffset, pcre_gen_cmp_ctx))) + uschar * errstr; + const pcre2_code * re = regex_compile(regex_string, + cacheable ? MCS_CACHEABLE : MCS_NOFLAGS, &errstr, pcre_gen_cmp_ctx); + + if (!re) { - uschar errbuf[128]; - pcre2_get_error_message(err, errbuf, sizeof(errbuf)); - log_write(0, LOG_MAIN, - "regex acl condition warning - error in regex '%s': %s at offset %ld, skipped.", - regex_string, errbuf, (long)pcre_erroffset); + log_write(0, LOG_MAIN, "regex acl condition warning - %s, skipped", errstr); continue; } @@ -96,8 +92,10 @@ for (pcre_list * ri = re_list_head; ri; ri = ri->next) return FAIL; } + + int -regex(const uschar **listptr) +regex(const uschar **listptr, BOOL cacheable) { unsigned long mbox_size; FILE *mbox_file; @@ -130,7 +128,7 @@ else } /* precompile our regexes */ -if (!(re_list_head = compile(*listptr))) +if (!(re_list_head = compile(*listptr, cacheable))) return FAIL; /* no regexes -> nothing to do */ /* match each line against all regexes */ @@ -167,7 +165,7 @@ return ret; int -mime_regex(const uschar **listptr) +mime_regex(const uschar **listptr, BOOL cacheable) { pcre_list *re_list_head = NULL; FILE *f; @@ -179,7 +177,7 @@ int ret; regex_match_string = NULL; /* precompile our regexes */ -if (!(re_list_head = compile(*listptr))) +if (!(re_list_head = compile(*listptr, cacheable))) return FAIL; /* no regexes -> nothing to do */ /* check if the file is already decoded */ diff --git a/src/src/regex_cache.c b/src/src/regex_cache.c new file mode 100644 index 000000000..6ac134cd8 --- /dev/null +++ b/src/src/regex_cache.c @@ -0,0 +1,245 @@ +/************************************************* +* Exim - an Internet mail transport agent * +*************************************************/ + +/* + * Copyright (c) The Exim Maintainers 2022 + * License: GPL + */ + +/* Caching layers for compiled REs. There is a local layer in the process, +implemented as a tree for inserts and lookup. This cache is inherited from +the daemon, for the process tree deriving from there - but not by re-exec'd +proceses or commandline submission processes. + +If the process has to compile, and is not the daemon or a re-exec'd exim, +it notifies the use of the RE to the daemon via a unix-domain socket. +This is a fire-and-forget send with no response, hence cheap from the point-of +view of the sender. I have not measured the overall comms costs. The +daemon also compiles the RE, and caches the result. + +A second layer would be possible by asking the daemon via the notifier socket +(for a result from its cache, or a compile if it must). The comms overhead +is significant, not only for the channel but also for de/serialisation of +the compiled object. This makes it untenable for the primary use-case, the +transport process which has been re-exec'd to gain privs - and therefore does not +have the daemon-maintained cache. Using shared-memory might reduce that cost +(the attach time for the memory segment will matter); the implimentation +would require suitable R/W locks. +*/ + +#include "exim.h" + +typedef struct re_req { + uschar notifier_reqtype; + BOOL caseless; + uschar re[1]; /* extensible */ +} re_req; + +static tree_node * regex_cache = NULL; +static tree_node * regex_caseless_cache = NULL; + +/******************************************************************************/ + +static void +regex_to_daemon(const uschar * key, BOOL caseless) +{ +int klen = Ustrlen(key) + 1; +int rlen = sizeof(re_req) + klen; +re_req * req; +int fd, old_pool = store_pool; + +DEBUG(D_expand|D_lists) + debug_printf_indent("sending RE '%s' to daemon\n", key); + +store_pool = POOL_MAIN; + req = store_get(rlen, key); /* maybe need a size limit */ +store_pool = old_pool;; +req->notifier_reqtype = NOTIFY_REGEX; +req->caseless = caseless; +memcpy(req->re, key, klen); + +if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) >= 0) + { + struct sockaddr_un sa_un = {.sun_family = AF_UNIX}; + ssize_t len = daemon_notifier_sockname(&sa_un); + + if (sendto(fd, req, rlen, 0, (struct sockaddr *)&sa_un, (socklen_t)len) < 0) + DEBUG(D_queue_run) + debug_printf("%s: sendto %s\n", __FUNCTION__, strerror(errno)); + close(fd); + } +else DEBUG(D_queue_run) debug_printf(" socket: %s\n", strerror(errno)); +} + + +static const pcre2_code * +regex_from_cache(const uschar * key, BOOL caseless) +{ +tree_node * node = + tree_search(caseless ? regex_caseless_cache : regex_cache, key); +DEBUG(D_expand|D_lists) + debug_printf_indent("compiled %sRE '%s' %sfound in local cache\n", + caseless ? "caseless " : "", key, node ? "" : "not "); + +return node ? node->data.ptr : NULL; +} + + +static void +regex_to_cache(const uschar * key, BOOL caseless, const pcre2_code * cre) +{ +PCRE2_SIZE srelen; +uschar * sre; +tree_node * node; + +node = store_get(sizeof(tree_node) + Ustrlen(key) + 1, key); /* we are called with STORE_PERM */ +Ustrcpy(node->name, key); +node->data.ptr = (void *)cre; + +if (!tree_insertnode(caseless ? ®ex_caseless_cache : ®ex_cache, node)) + { DEBUG(D_expand|D_lists) debug_printf_indent("duplicate key!\n"); } +else DEBUG(D_expand|D_lists) + debug_printf_indent("compiled RE '%s' saved in local cache\n", key); + +/* Additionally, if not re-execed and not the daemon, tell the daemon of the RE +so it can add to the cache */ + +if (f.daemon_scion && !f.daemon_listen) + regex_to_daemon(key, caseless); + +return; +} + +/******************************************************************************/ + +/************************************************* +* Compile regular expression and panic on fail * +*************************************************/ + +/* This function is called when failure to compile a regular expression leads +to a panic exit. In other cases, pcre_compile() is called directly. In many +cases where this function is used, the results of the compilation are to be +placed in long-lived store, so we temporarily reset the store management +functions that PCRE uses if the use_malloc flag is set. + +Argument: + pattern the pattern to compile + flags + caseless caseless matching is required + cacheable use (writeback) cache + use_malloc TRUE if compile into malloc store + +Returns: pointer to the compiled pattern +*/ + +const pcre2_code * +regex_must_compile(const uschar * pattern, mcs_flags flags, BOOL use_malloc) +{ +BOOL caseless = !!(flags & MCS_CASELESS); +size_t offset; +const pcre2_code * yield; +int old_pool = store_pool, err; + +/* Optionall, check the cache and return if found */ + +if ( flags & MCS_CACHEABLE + && (yield = regex_from_cache(pattern, caseless))) + return yield; + +store_pool = POOL_PERM; + +if (!(yield = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED, + caseless ? PCRE_COPT|PCRE2_CASELESS : PCRE_COPT, + &err, &offset, use_malloc ? pcre_mlc_cmp_ctx : pcre_gen_cmp_ctx))) + { + uschar errbuf[128]; + pcre2_get_error_message(err, errbuf, sizeof(errbuf)); + log_write(0, LOG_MAIN|LOG_PANIC_DIE, "regular expression error: " + "%s at offset %ld while compiling %s", errbuf, (long)offset, pattern); + } + +if (use_malloc) + { + /*pcre2_general_context_free(gctx);*/ + } + +if (flags & MCS_CACHEABLE) + regex_to_cache(pattern, caseless, yield); + +store_pool = old_pool; +return yield; +} + + + + +/* Wrapper for pcre2_compile() and error-message handling. + +Arguments: pattern regex to compile + flags + caseless flag for match variant + cacheable use (writeback) cache + errstr on error, filled in with error message + cctx compile-context for pcre2 + +Return: NULL on error, with errstr set. Otherwise, the compiled RE object +*/ + +const pcre2_code * +regex_compile(const uschar * pattern, mcs_flags flags, uschar ** errstr, + pcre2_compile_context * cctx) +{ +const uschar * key = pattern; +BOOL caseless = !!(flags & MCS_CASELESS); +int err; +PCRE2_SIZE offset; +const pcre2_code * yield; +int old_pool = store_pool; + +/* Optionally, check the cache and return if found */ + +if ( flags & MCS_CACHEABLE + && (yield = regex_from_cache(key, caseless))) + return yield; + +DEBUG(D_expand|D_lists) debug_printf_indent("compiling %sRE '%s'\n", + caseless ? "caseless " : "", pattern); + +store_pool = POOL_PERM; +if (!(yield = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED, + caseless ? PCRE_COPT|PCRE2_CASELESS : PCRE_COPT, + &err, &offset, cctx))) + { + uschar errbuf[128]; + pcre2_get_error_message(err, errbuf, sizeof(errbuf)); + store_pool = old_pool; + *errstr = string_sprintf("regular expression error in " + "\"%s\": %s at offset %ld", pattern, errbuf, (long)offset); + } +else if (flags & MCS_CACHEABLE) + regex_to_cache(key, caseless, yield); +store_pool = old_pool; + +return yield; +} + + + +/* Handle a regex notify arriving at the daemon. We get sent the original RE; +compile it (again) and write to the cache. Later forked procs will be able to +read from the cache, unless they re-execed. Therefore, those latter never bother +sending us a notification. */ + +void +regex_at_daemon(const uschar * reqbuf) +{ +const re_req * req = (const re_req *)reqbuf; +uschar * errstr; +const pcre2_code * cre = regex_compile(req->re, + req->caseless ? MCS_CASELESS | MCS_CACHEABLE : MCS_CACHEABLE, + &errstr, pcre_gen_cmp_ctx); + +DEBUG(D_any) if (!cre) debug_printf("%s\n", errstr); +return; +} diff --git a/src/src/rewrite.c b/src/src/rewrite.c index 005dc51fe..bfd78b5f1 100644 --- a/src/src/rewrite.c +++ b/src/src/rewrite.c @@ -136,7 +136,8 @@ for (rewrite_rule * rule = rewrite_rules; if (flag & rewrite_smtp) { - uschar *key = expand_string(rule->key); + BOOL textonly_re; + const uschar * key = expand_string_2(rule->key, &textonly_re); if (!key) { if (!f.expand_string_forcedfail) @@ -144,7 +145,8 @@ for (rewrite_rule * rule = rewrite_rules; "checking for SMTP rewriting: %s", rule->key, expand_string_message); continue; } - if (match_check_string(subject, key, 0, TRUE, FALSE, FALSE, NULL) != OK) + if (match_check_string(subject, key, 0, + textonly_re ? MCS_CACHEABLE | MCS_PARTIAL : MCS_PARTIAL, NULL) != OK) continue; new = expand_string(rule->replacement); } diff --git a/src/src/routers/iplookup.c b/src/src/routers/iplookup.c index 94cde4e04..8b67f3116 100644 --- a/src/src/routers/iplookup.c +++ b/src/src/routers/iplookup.c @@ -84,10 +84,10 @@ iplookup_router_options_block iplookup_router_option_defaults = { consistency checks to be done, or anything else that needs to be set up. */ void -iplookup_router_init(router_instance *rblock) +iplookup_router_init(router_instance * rblock) { -iplookup_router_options_block *ob = - (iplookup_router_options_block *)(rblock->options_block); +iplookup_router_options_block * ob = + (iplookup_router_options_block *) rblock->options_block; /* A port and a host list must be given */ @@ -95,13 +95,13 @@ if (ob->port < 0) log_write(0, LOG_PANIC_DIE|LOG_CONFIG_FOR, "%s router:\n " "a port must be specified", rblock->name); -if (ob->hosts == NULL) +if (!ob->hosts) log_write(0, LOG_PANIC_DIE|LOG_CONFIG_FOR, "%s router:\n " "a host list must be specified", rblock->name); /* Translate protocol name into value */ -if (ob->protocol_name != NULL) +if (ob->protocol_name) { if (Ustrcmp(ob->protocol_name, "udp") == 0) ob->protocol = ip_udp; else if (Ustrcmp(ob->protocol_name, "tcp") == 0) ob->protocol = ip_tcp; @@ -111,9 +111,9 @@ if (ob->protocol_name != NULL) /* If a response pattern is given, compile it now to get the error early. */ -if (ob->response_pattern != NULL) +if (ob->response_pattern) ob->re_response_pattern = - regex_must_compile(ob->response_pattern, FALSE, TRUE); + regex_must_compile(ob->response_pattern, MCS_NOFLAGS, TRUE); } diff --git a/src/src/structs.h b/src/src/structs.h index b38aa6a9d..06cd06084 100644 --- a/src/src/structs.h +++ b/src/src/structs.h @@ -895,7 +895,7 @@ typedef struct check_host_block { const uschar *host_name; const uschar *host_address; const uschar *host_ipv4; - BOOL negative; + mcs_flags flags; } check_host_block; /* Structure for remembering lookup data when caching the result of diff --git a/src/src/transports/appendfile.c b/src/src/transports/appendfile.c index 600fb6125..7e29dd3bc 100644 --- a/src/src/transports/appendfile.c +++ b/src/src/transports/appendfile.c @@ -2213,23 +2213,14 @@ else if (ob->quota_value > 0 || THRESHOLD_CHECK || ob->maildir_use_size_file) { - PCRE2_SIZE offset; - int err; - /* Compile the regex if there is one. */ if (ob->quota_size_regex) { - if (!(re = pcre2_compile((PCRE2_SPTR)ob->quota_size_regex, - PCRE2_ZERO_TERMINATED, PCRE_COPT, &err, &offset, pcre_gen_cmp_ctx))) - { - uschar errbuf[128]; - pcre2_get_error_message(err, errbuf, sizeof(errbuf)); - addr->message = string_sprintf("appendfile: regular expression " - "error: %s at offset %ld while compiling %s", errbuf, (long)offset, - ob->quota_size_regex); + if (!(re = regex_compile(ob->quota_size_regex, + MCS_NOFLAGS, &addr->message, pcre_gen_cmp_ctx))) return FALSE; - } + DEBUG(D_transport) debug_printf("using regex for file sizes: %s\n", ob->quota_size_regex); } @@ -2302,23 +2293,14 @@ else if (ob->maildir_use_size_file) { const pcre2_code * dir_regex = NULL; - PCRE2_SIZE offset; - int err; if (ob->maildir_dir_regex) { int check_path_len = Ustrlen(check_path); - if (!(dir_regex = pcre2_compile((PCRE2_SPTR)ob->maildir_dir_regex, - PCRE2_ZERO_TERMINATED, PCRE_COPT, &err, &offset, pcre_gen_cmp_ctx))) - { - uschar errbuf[128]; - pcre2_get_error_message(err, errbuf, sizeof(errbuf)); - addr->message = string_sprintf("appendfile: regular expression " - "error: %s at offset %ld while compiling %s", errbuf, (long)offset, - ob->maildir_dir_regex); + if (!(dir_regex = regex_compile(ob->maildir_dir_regex, + MCS_NOFLAGS, &addr->message, pcre_gen_cmp_ctx))) return FALSE; - } DEBUG(D_transport) debug_printf("using regex for maildir directory selection: %s\n", diff --git a/src/src/transports/smtp.c b/src/src/transports/smtp.c index 4450d948d..6eee04d03 100644 --- a/src/src/transports/smtp.c +++ b/src/src/transports/smtp.c @@ -275,7 +275,7 @@ struct list for (struct list * l = list; l < list + nelem(list); l++) if (!*l->re) - *l->re = regex_must_compile(l->string, FALSE, TRUE); + *l->re = regex_must_compile(l->string, MCS_NOFLAGS, TRUE); } @@ -1000,7 +1000,7 @@ uschar authnum; unsigned short authbits = 0; if (!sx->esmtp) return 0; -if (!regex_AUTH) regex_AUTH = regex_must_compile(AUTHS_REGEX, FALSE, TRUE); +if (!regex_AUTH) regex_AUTH = regex_must_compile(AUTHS_REGEX, MCS_NOFLAGS, TRUE); if (!regex_match_and_setup(regex_AUTH, sx->buffer, 0, -1)) return 0; expand_nmax = -1; /* reset */ names = string_copyn(expand_nstring[1], expand_nlength[1]); @@ -1563,7 +1563,7 @@ f.smtp_authenticated = FALSE; client_authenticator = client_authenticated_id = client_authenticated_sender = NULL; if (!regex_AUTH) - regex_AUTH = regex_must_compile(AUTHS_REGEX, FALSE, TRUE); + regex_AUTH = regex_must_compile(AUTHS_REGEX, MCS_NOFLAGS, TRUE); /* Is the server offering AUTH? */ diff --git a/src/src/transports/tf_maildir.c b/src/src/transports/tf_maildir.c index 6bff1eb69..205ee41cb 100644 --- a/src/src/transports/tf_maildir.c +++ b/src/src/transports/tf_maildir.c @@ -142,22 +142,13 @@ a subfolder, and should ensure that a maildirfolder file exists. */ if (maildirfolder_create_regex) { - int err; - PCRE2_SIZE offset; const pcre2_code * re; DEBUG(D_transport) debug_printf("checking for maildirfolder requirement\n"); - if (!(re = pcre2_compile((PCRE2_SPTR)maildirfolder_create_regex, - PCRE2_ZERO_TERMINATED, PCRE_COPT, &err, &offset, pcre_gen_cmp_ctx))) - { - uschar errbuf[128]; - pcre2_get_error_message(err, errbuf, sizeof(errbuf)); - addr->message = string_sprintf("appendfile: regular expression " - "error: %s at offset %ld while compiling %s", errbuf, (long)offset, - maildirfolder_create_regex); + if (!(re = regex_compile(maildirfolder_create_regex, + MCS_NOFLAGS, &addr->message, pcre_gen_cmp_ctx))) return FALSE; - } if (regex_match(re, path, -1, NULL)) { diff --git a/src/src/verify.c b/src/src/verify.c index b4c2b9a8f..afc18d553 100644 --- a/src/src/verify.c +++ b/src/src/verify.c @@ -3074,7 +3074,7 @@ digits, full stops, and hyphens (the constituents of domain names). Allow underscores, as they are all too commonly found. Sigh. Also, if allow_utf8_domains is set, allow top-bit characters. */ -for (t = ss; *t != 0; t++) +for (t = ss; *t; t++) if (!isalnum(*t) && *t != '.' && *t != '-' && *t != '_' && (!allow_utf8_domains || *t < 128)) break; @@ -3082,7 +3082,7 @@ for (t = ss; *t != 0; t++) its IP address and match against that. Note that a multi-homed host will add items to the chain. */ -if (*t == 0) +if (!*t) { int rc; host_item h; @@ -3113,8 +3113,8 @@ outgoing hosts, the name is always given explicitly. If it is NULL, it means we must use sender_host_name and its aliases, looking them up if necessary. */ if (cb->host_name) /* Explicit host name given */ - return match_check_string(cb->host_name, ss, -1, TRUE, TRUE, TRUE, - valueptr); + return match_check_string(cb->host_name, ss, -1, + MCS_PARTIAL | MCS_CASELESS | MCS_AT_SPECIAL | cb->flags, valueptr); /* Host name not given; in principle we need the sender host name and its aliases. However, for query-style lookups, we do not need the name if the @@ -3143,7 +3143,9 @@ if ((semicolon = Ustrchr(ss, ';'))) if (isquery) { - switch(match_check_string(US"", ss, -1, TRUE, TRUE, TRUE, valueptr)) + switch(match_check_string(US"", ss, -1, + MCS_PARTIAL| MCS_CASELESS| MCS_AT_SPECIAL | (cb->flags & MCS_CACHEABLE), + valueptr)) { case OK: return OK; case DEFER: return DEFER; @@ -3169,7 +3171,9 @@ if (!sender_host_name) /* Match on the sender host name, using the general matching function */ -switch(match_check_string(sender_host_name, ss, -1, TRUE, TRUE, TRUE, valueptr)) +switch(match_check_string(sender_host_name, ss, -1, + MCS_PARTIAL| MCS_CASELESS| MCS_AT_SPECIAL | (cb->flags & MCS_CACHEABLE), + valueptr)) { case OK: return OK; case DEFER: return DEFER; @@ -3179,7 +3183,9 @@ switch(match_check_string(sender_host_name, ss, -1, TRUE, TRUE, TRUE, valueptr)) aliases = sender_host_aliases; while (*aliases) - switch(match_check_string(*aliases++, ss, -1, TRUE, TRUE, TRUE, valueptr)) + switch(match_check_string(*aliases++, ss, -1, + MCS_PARTIAL| MCS_CASELESS| MCS_AT_SPECIAL | (cb->flags & MCS_CACHEABLE), + valueptr)) { case OK: return OK; case DEFER: return DEFER; @@ -3255,8 +3261,8 @@ rc = match_check_list( check_host, /* function for testing */ &cb, /* argument for function */ MCL_HOST, /* type of check */ - (host_address == sender_host_address)? - US"host" : host_address, /* text for debugging */ + host_address == sender_host_address + ? US"host" : host_address, /* text for debugging */ valueptr); /* where to pass back data */ deliver_host_address = save_host_address; return rc; -- cgit v1.2.3