summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTony Finch <dot@dotat.at>2011-06-13 21:48:24 +0100
committerTony Finch <dot@dotat.at>2011-06-17 16:53:05 +0100
commitc99ce5c9a3ff397497892a741079be2edf385de2 (patch)
treeff83bc7b9fc75a4555e5ae7560e5af5d08032eba /src
parent921b12ca0c361b9c543368edf057712afa02ca14 (diff)
Improved ratelimit ACL condition.
Replace /noupdate with simpler /readonly option. (/noupdate is supported for backwards compatibility but no longer documented.) Better checking of the compatibility between per_* options and the ACL in which the ratelimit condition appears. Better handling of the start of a burst of email and of very low-rate clients. The new /count= option generalizes the per_byte and per_rcpt options. The new /unique= option is a rather groovy use for a Bloom filter.
Diffstat (limited to 'src')
-rw-r--r--src/OS/Makefile-Base4
-rw-r--r--src/src/acl.c458
-rw-r--r--src/src/dbstuff.h9
-rw-r--r--src/src/exim_dbutil.c155
4 files changed, 479 insertions, 147 deletions
diff --git a/src/OS/Makefile-Base b/src/OS/Makefile-Base
index 29a6ad371..474ab8a85 100644
--- a/src/OS/Makefile-Base
+++ b/src/OS/Makefile-Base
@@ -361,10 +361,10 @@ exim_dumpdb: $(OBJ_DUMPDB)
OBJ_FIXDB = exim_fixdb.o util-os.o util-store.o
-exim_fixdb: $(OBJ_FIXDB)
+exim_fixdb: $(OBJ_FIXDB) auths/auths.a
@echo "$(LNCC) -o exim_fixdb"
$(FE)$(LNCC) $(CFLAGS) $(INCLUDE) -o exim_fixdb $(LFLAGS) $(OBJ_FIXDB) \
- $(LIBS) $(EXTRALIBS) $(DBMLIB)
+ auths/auths.a $(LIBS) $(EXTRALIBS) $(DBMLIB)
@if [ x"$(STRIP_COMMAND)" != x"" ]; then \
echo $(STRIP_COMMAND) exim_fixdb; \
$(STRIP_COMMAND) exim_fixdb; \
diff --git a/src/src/acl.c b/src/src/acl.c
index 4ad2b01b9..68b45d8c7 100644
--- a/src/src/acl.c
+++ b/src/src/acl.c
@@ -664,6 +664,25 @@ static uschar *csa_reason_string[] = {
US"failed (client address mismatch)"
};
+/* Options for the ratelimit condition. Note that there are two variants of
+the per_rcpt option, depending on the ACL that is used to measure the rate.
+However any ACL must be able to look up per_rcpt rates in /noupdate mode,
+so the two variants must have the same internal representation as well as
+the same configuration string. */
+
+enum {
+ RATE_PER_WHAT, RATE_PER_CLASH, RATE_PER_ADDR, RATE_PER_BYTE, RATE_PER_CMD,
+ RATE_PER_CONN, RATE_PER_MAIL, RATE_PER_RCPT, RATE_PER_ALLRCPTS
+};
+
+#define RATE_SET(var,new) \
+ (((var) == RATE_PER_WHAT) ? ((var) = RATE_##new) : ((var) = RATE_PER_CLASH))
+
+static uschar *ratelimit_option_string[] = {
+ US"?", US"!", US"per_addr", US"per_byte", US"per_cmd",
+ US"per_conn", US"per_mail", US"per_rcpt", US"per_rcpt"
+};
+
/* Enable recursion between acl_check_internal() and acl_check_condition() */
static int acl_check_internal(int, address_item *, uschar *, int, uschar **,
@@ -2078,6 +2097,41 @@ return d->value;
+
+/*************************************************
+* Return a ratelimit error *
+*************************************************/
+
+/* Called from acl_ratelimit() below
+
+Arguments:
+ log_msgptr for error messages
+ format format string
+ ... supplementary arguments
+ ss ratelimit option name
+ where ACL_WHERE_xxxx indicating which ACL this is
+
+Returns: ERROR
+*/
+
+static int
+ratelimit_error(uschar **log_msgptr, const char *format, ...)
+{
+va_list ap;
+uschar buffer[STRING_SPRINTF_BUFFER_SIZE];
+va_start(ap, format);
+if (!string_vformat(buffer, sizeof(buffer), format, ap))
+ log_write(0, LOG_MAIN|LOG_PANIC_DIE,
+ "string_sprintf expansion was longer than %d", sizeof(buffer));
+va_end(ap);
+*log_msgptr = string_sprintf(
+ "error in arguments to \"ratelimit\" condition: %s", buffer);
+return ERROR;
+}
+
+
+
+
/*************************************************
* Handle rate limiting *
*************************************************/
@@ -2104,23 +2158,27 @@ Returns: OK - Sender's rate is above limit
static int
acl_ratelimit(uschar *arg, int where, uschar **log_msgptr)
{
-double limit, period;
+double limit, period, count;
uschar *ss;
uschar *key = NULL;
+uschar *unique = NULL;
int sep = '/';
-BOOL leaky = FALSE, strict = FALSE, noupdate = FALSE;
-BOOL per_byte = FALSE, per_cmd = FALSE, per_conn = FALSE, per_mail = FALSE;
+BOOL leaky = FALSE, strict = FALSE, readonly = FALSE;
+BOOL noupdate = FALSE, badacl = FALSE;
+int mode = RATE_PER_WHAT;
int old_pool, rc;
tree_node **anchor, *t;
open_db dbblock, *dbm;
+int dbdb_size;
dbdata_ratelimit *dbd;
+dbdata_ratelimit_unique *dbdb;
struct timeval tv;
/* Parse the first two options and record their values in expansion
variables. These variables allow the configuration to have informative
error messages based on rate limits obtained from a table lookup. */
-/* First is the maximum number of messages per period and maximum burst
+/* First is the maximum number of messages per period / maximum burst
size, which must be greater than or equal to zero. Zero is useful for
rate measurement as opposed to rate limiting. */
@@ -2134,15 +2192,11 @@ else
else if (tolower(*ss) == 'm') { limit *= 1024.0*1024.0; ss++; }
else if (tolower(*ss) == 'g') { limit *= 1024.0*1024.0*1024.0; ss++; }
}
-if (limit < 0.0 || *ss != 0)
- {
- *log_msgptr = string_sprintf("syntax error in argument for "
- "\"ratelimit\" condition: \"%s\" is not a positive number",
- sender_rate_limit);
- return ERROR;
- }
+if (limit < 0.0 || *ss != '\0')
+ return ratelimit_error(log_msgptr,
+ "\"%s\" is not a positive number", sender_rate_limit);
-/* Second is the rate measurement period and exponential smoothing time
+/* Second is the rate measurement period / exponential smoothing time
constant. This must be strictly greater than zero, because zero leads to
run-time division errors. */
@@ -2150,15 +2204,15 @@ sender_rate_period = string_nextinlist(&arg, &sep, NULL, 0);
if (sender_rate_period == NULL) period = -1.0;
else period = readconf_readtime(sender_rate_period, 0, FALSE);
if (period <= 0.0)
- {
- *log_msgptr = string_sprintf("syntax error in argument for "
- "\"ratelimit\" condition: \"%s\" is not a time value",
- sender_rate_period);
- return ERROR;
- }
+ return ratelimit_error(log_msgptr,
+ "\"%s\" is not a time value", sender_rate_period);
+
+/* By default we are counting one of something, but the per_rcpt,
+per_byte, and count options can change this. */
+
+count = 1.0;
-/* Parse the other options. Should we check if the per_* options are being
-used in ACLs where they don't make sense, e.g. per_mail in the connect ACL? */
+/* Parse the other options. */
while ((ss = string_nextinlist(&arg, &sep, big_buffer, big_buffer_size))
!= NULL)
@@ -2166,24 +2220,84 @@ while ((ss = string_nextinlist(&arg, &sep, big_buffer, big_buffer_size))
if (strcmpic(ss, US"leaky") == 0) leaky = TRUE;
else if (strcmpic(ss, US"strict") == 0) strict = TRUE;
else if (strcmpic(ss, US"noupdate") == 0) noupdate = TRUE;
- else if (strcmpic(ss, US"per_byte") == 0) per_byte = TRUE;
- else if (strcmpic(ss, US"per_cmd") == 0) per_cmd = TRUE;
- else if (strcmpic(ss, US"per_rcpt") == 0) per_cmd = TRUE; /* alias */
- else if (strcmpic(ss, US"per_conn") == 0) per_conn = TRUE;
- else if (strcmpic(ss, US"per_mail") == 0) per_mail = TRUE;
- else key = string_sprintf("%s", ss);
- }
-
-if (leaky + strict > 1 || per_byte + per_cmd + per_conn + per_mail > 1)
- {
- *log_msgptr = US"conflicting options for \"ratelimit\" condition";
- return ERROR;
+ else if (strcmpic(ss, US"readonly") == 0) readonly = TRUE;
+ else if (strcmpic(ss, US"per_cmd") == 0) RATE_SET(mode, PER_CMD);
+ else if (strcmpic(ss, US"per_conn") == 0)
+ {
+ RATE_SET(mode, PER_CONN);
+ if (where == ACL_WHERE_NOTSMTP || where == ACL_WHERE_NOTSMTP_START)
+ badacl = TRUE;
+ }
+ else if (strcmpic(ss, US"per_mail") == 0)
+ {
+ RATE_SET(mode, PER_MAIL);
+ if (where > ACL_WHERE_NOTSMTP) badacl = TRUE;
+ }
+ else if (strcmpic(ss, US"per_rcpt") == 0)
+ {
+ /* If we are running in the RCPT ACL, then we'll count the recipients
+ one by one, but if we are running when we have accumulated the whole
+ list then we'll add them all in one batch. */
+ if (where == ACL_WHERE_RCPT)
+ RATE_SET(mode, PER_RCPT);
+ else if (where >= ACL_WHERE_PREDATA && where <= ACL_WHERE_NOTSMTP)
+ RATE_SET(mode, PER_ALLRCPTS), count = (double)recipients_count;
+ else if (where == ACL_WHERE_MAIL || where > ACL_WHERE_NOTSMTP)
+ RATE_SET(mode, PER_RCPT), badacl = TRUE;
+ }
+ else if (strcmpic(ss, US"per_byte") == 0)
+ {
+ /* If we have not yet received the message data and there was no SIZE
+ declaration on the MAIL comand, then it's safe to just use a value of
+ zero and let the recorded rate decay as if nothing happened. */
+ RATE_SET(mode, PER_MAIL);
+ if (where > ACL_WHERE_NOTSMTP) badacl = TRUE;
+ else count = message_size < 0 ? 0.0 : (double)message_size;
+ }
+ else if (strcmpic(ss, US"per_addr") == 0)
+ {
+ RATE_SET(mode, PER_RCPT);
+ if (where != ACL_WHERE_RCPT) badacl = TRUE, unique = "*";
+ else unique = string_sprintf("%s@%s", deliver_localpart, deliver_domain);
+ }
+ else if (strncmpic(ss, US"count=", 6) == 0)
+ {
+ uschar *e;
+ count = Ustrtod(ss+6, &e);
+ if (count < 0.0 || *e != '\0')
+ return ratelimit_error(log_msgptr,
+ "\"%s\" is not a positive number", ss);
+ }
+ else if (strncmpic(ss, US"unique=", 7) == 0)
+ unique = string_copy(ss + 7);
+ else if (key == NULL)
+ key = string_copy(ss);
+ else
+ key = string_sprintf("%s/%s", key, ss);
}
-/* Default option values */
-
-if (!strict) leaky = TRUE;
-if (!per_byte && !per_cmd && !per_conn) per_mail = TRUE;
+/* Sanity check. When the badacl flag is set the update mode must either
+be readonly (which is the default if it is omitted) or, for backwards
+compatibility, a combination of noupdate and strict or leaky. */
+
+if (mode == RATE_PER_CLASH)
+ return ratelimit_error(log_msgptr, "conflicting per_* options");
+if (leaky + strict + readonly > 1)
+ return ratelimit_error(log_msgptr, "conflicting update modes");
+if (badacl && (leaky || strict) && !noupdate)
+ return ratelimit_error(log_msgptr,
+ "\"%s\" must not have /leaky or /strict option in %s ACL",
+ ratelimit_option_string[mode], acl_wherenames[where]);
+
+/* Set the default values of any unset options. In readonly mode we
+perform the rate computation without any increment so that its value
+decays to eventually allow over-limit senders through. */
+
+if (noupdate) readonly = TRUE, leaky = strict = FALSE;
+if (badacl) readonly = TRUE;
+if (readonly) count = 0.0;
+if (!strict && !readonly) leaky = TRUE;
+if (mode == RATE_PER_WHAT) mode = RATE_PER_MAIL;
/* Create the lookup key. If there is no explicit key, use sender_host_address.
If there is no sender_host_address (e.g. -bs or acl_not_smtp) then we simply
@@ -2193,35 +2307,48 @@ are added to the key because they alter the meaning of the stored data. */
if (key == NULL)
key = (sender_host_address == NULL)? US"" : sender_host_address;
-key = string_sprintf("%s/%s/%s/%s",
+key = string_sprintf("%s/%s/%s%s",
sender_rate_period,
- per_byte? US"per_byte" :
- per_cmd? US"per_cmd" :
- per_mail? US"per_mail" : US"per_conn",
- strict? US"strict" : US"leaky",
+ ratelimit_option_string[mode],
+ unique == NULL ? "" : "unique/",
key);
-HDEBUG(D_acl) debug_printf("ratelimit condition limit=%.0f period=%.0f key=%s\n",
- limit, period, key);
+HDEBUG(D_acl)
+ debug_printf("ratelimit condition count=%.0f %.1f/%s\n", count, limit, key);
/* See if we have already computed the rate by looking in the relevant tree.
For per-connection rate limiting, store tree nodes and dbdata in the permanent
-pool so that they survive across resets. */
+pool so that they survive across resets. In readonly mode we only remember the
+result for the rest of this command in case a later command changes it. After
+this bit of logic the code is independent of the per_* mode. */
-anchor = NULL;
old_pool = store_pool;
-if (per_conn)
- {
+if (readonly)
+ anchor = &ratelimiters_cmd;
+else switch(mode) {
+case RATE_PER_CONN:
anchor = &ratelimiters_conn;
store_pool = POOL_PERM;
- }
-else if (per_mail || per_byte)
+ break;
+case RATE_PER_BYTE:
+case RATE_PER_MAIL:
+case RATE_PER_ALLRCPTS:
anchor = &ratelimiters_mail;
-else if (per_cmd)
+ break;
+case RATE_PER_ADDR:
+case RATE_PER_CMD:
+case RATE_PER_RCPT:
anchor = &ratelimiters_cmd;
+ break;
+default:
+ log_write(0, LOG_MAIN|LOG_PANIC_DIE,
+ "internal ACL error: unknown ratelimit mode %d", mode);
+ break;
+}
-if (anchor != NULL && (t = tree_search(*anchor, key)) != NULL)
+t = tree_search(*anchor, key);
+if (t != NULL)
{
dbd = t->data.ptr;
/* The following few lines duplicate some of the code below. */
@@ -2233,9 +2360,8 @@ if (anchor != NULL && (t = tree_search(*anchor, key)) != NULL)
return rc;
}
-/* We aren't using a pre-computed rate, so get a previously recorded
-rate from the database, update it, and write it back when required. If there's
-no previous rate for this key, create one. */
+/* We aren't using a pre-computed rate, so get a previously recorded rate
+from the database, which will be updated and written back if required. */
dbm = dbfn_open(US"ratelimit", O_RDWR, &dbblock, TRUE);
if (dbm == NULL)
@@ -2246,17 +2372,172 @@ if (dbm == NULL)
*log_msgptr = US"ratelimit database not available";
return DEFER;
}
-dbd = dbfn_read(dbm, key);
+dbdb = dbfn_read_with_length(dbm, key, &dbdb_size);
+dbd = NULL;
gettimeofday(&tv, NULL);
+if (dbdb != NULL)
+ {
+ /* Locate the basic ratelimit block inside the DB data. */
+ HDEBUG(D_acl) debug_printf("ratelimit found key in database\n");
+ dbd = &dbdb->dbd;
+
+ /* Forget the old Bloom filter if it is too old, so that we count each
+ repeating event once per period. We don't simply clear and re-use the old
+ filter because we want its size to change if the limit changes. Note that
+ we keep the dbd pointer for copying the rate into the new data block. */
+
+ if(unique != NULL && tv.tv_sec > dbdb->bloom_epoch + period)
+ {
+ HDEBUG(D_acl) debug_printf("ratelimit discarding old Bloom filter\n");
+ dbdb = NULL;
+ }
+
+ /* Sanity check. */
+
+ if(unique != NULL && dbdb_size < sizeof(*dbdb))
+ {
+ HDEBUG(D_acl) debug_printf("ratelimit discarding undersize Bloom filter\n");
+ dbdb = NULL;
+ }
+ }
+
+/* Allocate a new data block if the database lookup failed
+or the Bloom filter passed its age limit. */
+
+if (dbdb == NULL)
+ {
+ if (unique == NULL)
+ {
+ /* No Bloom filter. This basic ratelimit block is initialized below. */
+ HDEBUG(D_acl) debug_printf("ratelimit creating new rate data block\n");
+ dbdb_size = sizeof(*dbd);
+ dbdb = store_get(dbdb_size);
+ }
+ else
+ {
+ int extra;
+ HDEBUG(D_acl) debug_printf("ratelimit creating new Bloom filter\n");
+
+ /* See the long comment below for an explanation of the magic number 2.
+ The filter has a minimum size in case the rate limit is very small;
+ this is determined by the definition of dbdata_ratelimit_unique. */
+
+ extra = (int)limit * 2 - sizeof(dbdb->bloom);
+ if (extra < 0) extra = 0;
+ dbdb_size = sizeof(*dbdb) + extra;
+ dbdb = store_get(dbdb_size);
+ dbdb->bloom_epoch = tv.tv_sec;
+ dbdb->bloom_size = sizeof(dbdb->bloom) + extra;
+ memset(dbdb->bloom, 0, dbdb->bloom_size);
+
+ /* Preserve any basic ratelimit data (which is our longer-term memory)
+ by copying it from the discarded block. */
+
+ if (dbd != NULL)
+ {
+ dbdb->dbd = *dbd;
+ dbd = &dbdb->dbd;
+ }
+ }
+ }
+
+/* If we are counting unique events, find out if this event is new or not.
+If the client repeats the event during the current period then it should be
+counted. We skip this code in readonly mode for efficiency, because any
+changes to the filter will be discarded and because count is already set to
+zero. */
+
+if (unique != NULL && !readonly)
+ {
+ /* We identify unique events using a Bloom filter. (You can find my
+ notes on Bloom filters at http://fanf.livejournal.com/81696.html)
+ With the per_addr option, an "event" is a recipient address, though the
+ user can use the unique option to define their own events. We only count
+ an event if we have not seen it before.
+
+ We size the filter according to the rate limit, which (in leaky mode)
+ is the limit on the population of the filter. We allow 16 bits of space
+ per entry (see the construction code above) and we set (up to) 8 of them
+ when inserting an element (see the loop below). The probability of a false
+ positive (an event we have not seen before but which we fail to count) is
+
+ size = limit * 16
+ numhash = 8
+ allzero = exp(-numhash * pop / size)
+ = exp(-0.5 * pop / limit)
+ fpr = pow(1 - allzero, numhash)
+
+ For senders at the limit the fpr is 0.06% or 1 in 1700
+ and for senders at half the limit it is 0.0006% or 1 in 170000
+
+ In strict mode the Bloom filter can fill up beyond the normal limit, in
+ which case the false positive rate will rise. This means that the
+ measured rate for very fast senders can bogusly drop off after a while.
+
+ At twice the limit, the fpr is 2.5% or 1 in 40
+ At four times the limit, it is 31% or 1 in 3.2
+
+ It takes ln(pop/limit) periods for an over-limit burst of pop events to
+ decay below the limit, and if this is more than one then the Bloom filter
+ will be discarded before the decay gets that far. The false positive rate
+ at this threshold is 9.3% or 1 in 10.7. */
+
+ BOOL seen;
+ unsigned n, hash, hinc;
+ uschar md5sum[16];
+ md5 md5info;
+
+ /* Instead of using eight independent hash values, we combine two values
+ using the formula h1 + n * h2. This does not harm the Bloom filter's
+ performance, and means the amount of hash we need is independent of the
+ number of bits we set in the filter. */
+
+ md5_start(&md5info);
+ md5_end(&md5info, unique, Ustrlen(unique), md5sum);
+ hash = md5sum[0] | md5sum[1] << 8 | md5sum[2] << 16 | md5sum[3] << 24;
+ hinc = md5sum[4] | md5sum[5] << 8 | md5sum[6] << 16 | md5sum[7] << 24;
+
+ /* Scan the bits corresponding to this event. A zero bit means we have
+ not seen it before. Ensure all bits are set to record this event. */
+
+ HDEBUG(D_acl) debug_printf("ratelimit checking uniqueness of %s\n", unique);
+
+ seen = TRUE;
+ for (n = 0; n < 8; n++, hash += hinc)
+ {
+ int bit = 1 << (hash % 8);
+ int byte = (hash / 8) % dbdb->bloom_size;
+ if ((dbdb->bloom[byte] & bit) == 0)
+ {
+ dbdb->bloom[byte] |= bit;
+ seen = FALSE;
+ }
+ }
+
+ /* If this event has occurred before, do not count it. */
+
+ if (seen)
+ {
+ HDEBUG(D_acl) debug_printf("ratelimit event found in Bloom filter\n");
+ count = 0.0;
+ }
+ else
+ HDEBUG(D_acl) debug_printf("ratelimit event added to Bloom filter\n");
+ }
+
+/* If there was no previous ratelimit data block for this key, initialize
+the new one, otherwise update the block from the database. The initial rate
+is what would be computed by the code below for an infinite interval. */
+
if (dbd == NULL)
{
- HDEBUG(D_acl) debug_printf("ratelimit initializing new key's data\n");
- dbd = store_get(sizeof(dbdata_ratelimit));
+ HDEBUG(D_acl) debug_printf("ratelimit initializing new key's rate data\n");
+ dbd = &dbdb->dbd;
dbd->time_stamp = tv.tv_sec;
dbd->time_usec = tv.tv_usec;
- dbd->rate = 0.0;
+ dbd->rate = count;
}
else
{
@@ -2317,59 +2598,58 @@ else
double i_over_p = interval / period;
double a = exp(-i_over_p);
+ /* Combine the instantaneous rate (period / interval) with the previous rate
+ using the smoothing factor a. In order to measure sized events, multiply the
+ instantaneous rate by the count of bytes or recipients etc. */
+
dbd->time_stamp = tv.tv_sec;
dbd->time_usec = tv.tv_usec;
-
- /* If we are measuring the rate in bytes per period, multiply the
- measured rate by the message size. If we don't know the message size
- then it's safe to just use a value of zero and let the recorded rate
- decay as if nothing happened. */
-
- if (per_byte)
- dbd->rate = (message_size < 0 ? 0.0 : (double)message_size)
- * (1 - a) / i_over_p + a * dbd->rate;
- else if (per_cmd && where == ACL_WHERE_NOTSMTP)
- dbd->rate = (double)recipients_count
- * (1 - a) / i_over_p + a * dbd->rate;
- else
- dbd->rate = (1 - a) / i_over_p + a * dbd->rate;
+ dbd->rate = (1 - a) * count / i_over_p + a * dbd->rate;
+
+ /* When events are very widely spaced the computed rate tends towards zero.
+ Although this is accurate it turns out not to be useful for our purposes,
+ especially when the first event after a long silence is the start of a spam
+ run. A more useful model is that the rate for an isolated event should be the
+ size of the event per the period size, ignoring the lack of events outside
+ the current period and regardless of where the event falls in the period. So,
+ if the interval was so long that the calculated rate is unhelpfully small, we
+ re-intialize the rate. In the absence of higher-rate bursts, the condition
+ below is true if the interval is greater than the period. */
+
+ if (dbd->rate < count) dbd->rate = count;
}
-/* Clients sending at the limit are considered to be over the limit. This
-matters for edge cases such the first message sent by a client (which gets
-the initial rate of 0.0) when the rate limit is zero (i.e. the client should
-be completely blocked). */
+/* Clients sending at the limit are considered to be over the limit.
+This matters for edge cases such as a limit of zero, when the client
+should be completely blocked. */
rc = (dbd->rate < limit)? FAIL : OK;
/* Update the state if the rate is low or if we are being strict. If we
are in leaky mode and the sender's rate is too high, we do not update
the recorded rate in order to avoid an over-aggressive sender's retry
-rate preventing them from getting any email through. If noupdate is set,
-do not do any updates. */
+rate preventing them from getting any email through. If readonly is set,
+neither leaky nor strict are set, so we do not do any updates. */
-if ((rc == FAIL || !leaky) && !noupdate)
+if ((rc == FAIL && leaky) || strict)
{
- dbfn_write(dbm, key, dbd, sizeof(dbdata_ratelimit));
+ dbfn_write(dbm, key, dbdb, dbdb_size);
HDEBUG(D_acl) debug_printf("ratelimit db updated\n");
}
else
{
HDEBUG(D_acl) debug_printf("ratelimit db not updated: %s\n",
- noupdate? "noupdate set" : "over the limit, but leaky");
+ readonly? "readonly mode" : "over the limit, but leaky");
}
dbfn_close(dbm);
-/* Store the result in the tree for future reference, if necessary. */
+/* Store the result in the tree for future reference. */
-if (anchor != NULL && !noupdate)
- {
- t = store_get(sizeof(tree_node) + Ustrlen(key));
- t->data.ptr = dbd;
- Ustrcpy(t->name, key);
- (void)tree_insertnode(anchor, t);
- }
+t = store_get(sizeof(tree_node) + Ustrlen(key));
+t->data.ptr = dbd;
+Ustrcpy(t->name, key);
+(void)tree_insertnode(anchor, t);
/* We create the formatted version of the sender's rate very late in
order to ensure that it is done using the correct storage pool. */
diff --git a/src/src/dbstuff.h b/src/src/dbstuff.h
index 94c9b97ad..819e46609 100644
--- a/src/src/dbstuff.h
+++ b/src/src/dbstuff.h
@@ -654,5 +654,14 @@ typedef struct {
double rate; /* Smoothed sending rate at that time */
} dbdata_ratelimit;
+/* Same as above, plus a Bloom filter for uniquifying events. */
+
+typedef struct {
+ dbdata_ratelimit dbd;
+ time_t bloom_epoch; /* When the Bloom filter was last reset */
+ unsigned bloom_size; /* Number of bytes in the Bloom filter */
+ uschar bloom[40]; /* Bloom filter which may be larger than this */
+} dbdata_ratelimit_unique;
+
/* End of dbstuff.h */
diff --git a/src/src/exim_dbutil.c b/src/src/exim_dbutil.c
index 1e94f33e6..cf66717c1 100644
--- a/src/src/exim_dbutil.c
+++ b/src/src/exim_dbutil.c
@@ -27,38 +27,7 @@ There are a number of common subroutines, followed by three main programs,
whose inclusion is controlled by -D on the compilation command. */
-/* Standard C headers and Unix headers */
-
-#include <ctype.h>
-#include <signal.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include <errno.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-
-
-/* These are two values from macros.h which should perhaps be accessible in
-some better way than just repeating them here. */
-
-#define WAIT_NAME_MAX 50
-#define MESSAGE_ID_LENGTH 16
-
-
-/* This selection of Exim headers contains exactly what we need, and hopefully
-not too much extra baggage. */
-
-#include "config.h" /* Needed to get the DB type */
-#include "mytypes.h"
-#include "macros.h"
-#include "dbstuff.h"
-#include "osfunctions.h"
-#include "store.h"
+#include "exim.h"
/* Identifiers for the different database types. */
@@ -70,6 +39,10 @@ not too much extra baggage. */
#define type_ratelimit 5
+/* This is used by our cut-down dbfn_open(). */
+
+uschar *spool_directory;
+
/*************************************************
@@ -101,7 +74,7 @@ printf("Berkeley DB error: %s\n", msg);
* SIGALRM handler *
*************************************************/
-static int sigalrm_seen;
+SIGNAL_BOOL sigalrm_seen;
void
sigalrm_handler(int sig)
@@ -169,7 +142,7 @@ Returns: nothing
*/
void
-log_write(unsigned int selector, int flags, char *format, ...)
+log_write(unsigned int selector, int flags, const char *format, ...)
{
va_list ap;
va_start(ap, format);
@@ -266,18 +239,18 @@ uses. We assume the database exists, and therefore give up if we cannot open
the lock file.
Arguments:
- spool The spool directory
name The single-component name of one of Exim's database files.
flags O_RDONLY or O_RDWR
dbblock Points to an open_db block to be filled in.
+ lof Unused.
Returns: NULL if the open failed, or the locking failed.
On success, dbblock is returned. This contains the dbm pointer and
the fd of the locked lock file.
*/
-static open_db *
-dbfn_open(uschar *spool, uschar *name, int flags, open_db *dbblock)
+open_db *
+dbfn_open(uschar *name, int flags, open_db *dbblock, BOOL lof)
{
int rc;
struct flock lock_data;
@@ -288,7 +261,7 @@ uschar buffer[256];
ensures that Exim has exclusive use of the database before it even tries to
open it. If there is a database, there should be a lock file in existence. */
-sprintf(CS buffer, "%s/db/%s.lockfile", spool, name);
+sprintf(CS buffer, "%s/db/%s.lockfile", spool_directory, name);
dbblock->lockfd = Uopen(buffer, flags, 0);
if (dbblock->lockfd < 0)
@@ -323,7 +296,7 @@ if (rc < 0)
/* At this point we have an opened and locked separate lock file, that is,
exclusive access to the database, so we can go ahead and open it. */
-sprintf(CS buffer, "%s/db/%s", spool, name);
+sprintf(CS buffer, "%s/db/%s", spool_directory, name);
EXIM_DBOPEN(buffer, flags, 0, &(dbblock->dbptr));
if (dbblock->dbptr == NULL)
@@ -357,7 +330,7 @@ Argument: a pointer to an open database block
Returns: nothing
*/
-static void
+void
dbfn_close(open_db *dbblock)
{
EXIM_DBCLOSE(dbblock->dbptr);
@@ -384,7 +357,7 @@ Returns: a pointer to the retrieved record, or
NULL if the record is not found
*/
-static void *
+void *
dbfn_read_with_length(open_db *dbblock, uschar *key, int *length)
{
void *yield;
@@ -424,7 +397,7 @@ Returns: the yield of the underlying dbm or db "write" function. If this
is dbm, the value is zero for OK.
*/
-static int
+int
dbfn_write(open_db *dbblock, uschar *key, void *ptr, int length)
{
EXIM_DATUM key_datum, value_datum;
@@ -454,7 +427,7 @@ Arguments:
Returns: the yield of the underlying dbm or db "delete" function.
*/
-static int
+int
dbfn_delete(open_db *dbblock, uschar *key)
{
EXIM_DATUM key_datum;
@@ -485,7 +458,7 @@ Returns: the next record from the file, or
NULL if there are no more
*/
-static uschar *
+uschar *
dbfn_scan(open_db *dbblock, BOOL start, EXIM_CURSOR **cursor)
{
EXIM_DATUM key_datum, value_datum;
@@ -531,7 +504,8 @@ uschar keybuffer[1024];
/* Check the arguments, and open the database */
dbdata_type = check_args(argc, argv, US"dumpdb", US"");
-dbm = dbfn_open(argv[1], argv[2], O_RDONLY, &dbblock);
+spool_directory = argv[1];
+dbm = dbfn_open(argv[2], O_RDONLY, &dbblock, FALSE);
if (dbm == NULL) exit(1);
/* Scan the file, formatting the information for each entry. Note
@@ -545,6 +519,7 @@ while (key != NULL)
dbdata_wait *wait;
dbdata_callout_cache *callout;
dbdata_ratelimit *ratelimit;
+ dbdata_ratelimit_unique *rate_unique;
int count_bad = 0;
int i, length;
uschar *t;
@@ -673,12 +648,24 @@ while (key != NULL)
break;
case type_ratelimit:
- ratelimit = (dbdata_ratelimit *)value;
-
- printf("%s.%06d rate: %10.3f key: %s\n",
- print_time(ratelimit->time_stamp), ratelimit->time_usec,
- ratelimit->rate, keybuffer);
-
+ if (Ustrstr(key, "/unique/") != NULL && length >= sizeof(*rate_unique))
+ {
+ ratelimit = (dbdata_ratelimit *)value;
+ rate_unique = (dbdata_ratelimit_unique *)value;
+ printf("%s.%06d rate: %10.3f epoch: %s size: %u key: %s\n",
+ print_time(ratelimit->time_stamp),
+ ratelimit->time_usec, ratelimit->rate,
+ print_time(rate_unique->bloom_epoch), rate_unique->bloom_size,
+ keybuffer);
+ }
+ else
+ {
+ ratelimit = (dbdata_ratelimit *)value;
+ printf("%s.%06d rate: %10.3f key: %s\n",
+ print_time(ratelimit->time_stamp),
+ ratelimit->time_usec, ratelimit->rate,
+ keybuffer);
+ }
break;
}
store_reset(value);
@@ -752,6 +739,7 @@ for(;;)
dbdata_wait *wait;
dbdata_callout_cache *callout;
dbdata_ratelimit *ratelimit;
+ dbdata_ratelimit_unique *rate_unique;
int i, oldlength;
uschar *t;
uschar field[256], value[256];
@@ -788,7 +776,8 @@ for(;;)
if (field[0] != 0)
{
int verify = 1;
- dbm = dbfn_open(argv[1], argv[2], O_RDWR, &dbblock);
+ spool_directory = argv[1];
+ dbm = dbfn_open(argv[2], O_RDWR, &dbblock, FALSE);
if (dbm == NULL) continue;
if (Ustrcmp(field, "d") == 0)
@@ -895,7 +884,6 @@ for(;;)
case type_ratelimit:
ratelimit = (dbdata_ratelimit *)record;
- length = sizeof(dbdata_ratelimit);
switch(fieldno)
{
case 0:
@@ -911,6 +899,51 @@ for(;;)
ratelimit->rate = Ustrtod(value, NULL);
break;
+ case 3:
+ if (Ustrstr(name, "/unique/") != NULL
+ && oldlength >= sizeof(dbdata_ratelimit_unique))
+ {
+ rate_unique = (dbdata_ratelimit_unique *)record;
+ if ((tt = read_time(value)) > 0) rate_unique->bloom_epoch = tt;
+ else printf("bad time value\n");
+ break;
+ }
+ /* else fall through */
+
+ case 4:
+ case 5:
+ if (Ustrstr(name, "/unique/") != NULL
+ && oldlength >= sizeof(dbdata_ratelimit_unique))
+ {
+ /* see acl.c */
+ BOOL seen;
+ unsigned n, hash, hinc;
+ uschar md5sum[16];
+ md5 md5info;
+ md5_start(&md5info);
+ md5_end(&md5info, value, Ustrlen(value), md5sum);
+ hash = md5sum[0] << 0 | md5sum[1] << 8
+ | md5sum[2] << 16 | md5sum[3] << 24;
+ hinc = md5sum[4] << 0 | md5sum[5] << 8
+ | md5sum[6] << 16 | md5sum[7] << 24;
+ rate_unique = (dbdata_ratelimit_unique *)record;
+ seen = TRUE;
+ for (n = 0; n < 8; n++, hash += hinc)
+ {
+ int bit = 1 << (hash % 8);
+ int byte = (hash / 8) % rate_unique->bloom_size;
+ if ((rate_unique->bloom[byte] & bit) == 0)
+ {
+ seen = FALSE;
+ if (fieldno == 5) rate_unique->bloom[byte] |= bit;
+ }
+ }
+ printf("%s %s\n",
+ seen ? "seen" : fieldno == 5 ? "added" : "unseen", value);
+ break;
+ }
+ /* else fall through */
+
default:
printf("unknown field number\n");
verify = 0;
@@ -919,7 +952,7 @@ for(;;)
break;
}
- dbfn_write(dbm, name, record, length);
+ dbfn_write(dbm, name, record, oldlength);
}
}
}
@@ -940,7 +973,8 @@ for(;;)
/* Handle a read request, or verify after an update. */
- dbm = dbfn_open(argv[1], argv[2], O_RDONLY, &dbblock);
+ spool_directory = argv[1];
+ dbm = dbfn_open(argv[2], O_RDONLY, &dbblock, FALSE);
if (dbm == NULL) continue;
record = dbfn_read_with_length(dbm, name, &oldlength);
@@ -1020,6 +1054,14 @@ for(;;)
printf("0 time stamp: %s\n", print_time(ratelimit->time_stamp));
printf("1 fract. time: .%06d\n", ratelimit->time_usec);
printf("2 sender rate: % .3f\n", ratelimit->rate);
+ if (Ustrstr(name, "/unique/") != NULL
+ && oldlength >= sizeof(dbdata_ratelimit_unique))
+ {
+ rate_unique = (dbdata_ratelimit_unique *)record;
+ printf("3 filter epoch: %s\n", print_time(rate_unique->bloom_epoch));
+ printf("4 test filter membership\n");
+ printf("5 add element to filter\n");
+ }
break;
}
}
@@ -1118,7 +1160,8 @@ database */
oldest = time(NULL) - maxkeep;
printf("Tidying Exim hints database %s/db/%s\n", argv[1], argv[2]);
-dbm = dbfn_open(argv[1], argv[2], O_RDWR, &dbblock);
+spool_directory = argv[1];
+dbm = dbfn_open(argv[2], O_RDWR, &dbblock, FALSE);
if (dbm == NULL) exit(1);
/* Prepare for building file names */