summaryrefslogtreecommitdiff
path: root/src/util/ratelimit.pl
blob: 749125c4f80f5a3b13d423eaae70bfd9a8ba5394 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/perl -wT

use strict;

sub usage () {
  print <<END;
usage: ratelimit.pl <period> <regex> logfile

The aim of this script is to compute clients' peak sending rates
from an Exim log file, using the same formula as Exim's ratelimit
ACL condition. This is so that you can get an idea of a reasonable
limit setting before you deploy the restrictions.

This script isn't perfectly accurate, because the time stamps in
Exim's log files are only accurate to a second whereas internally
Exim computes sender rates to the accuracy of your computer's clock
(typically 10ms).

The log files to be processed can be specified on the command line
after the other arguments; if no filenames are specified the script
will read from stdin.

The first command line argument is the smoothing period, as defined by
the documentation for the ratelimit ACL condition. The second argumetn
is a regular expression.

Each line is matched against the regular expression. Lines that do not
match are ignored. The regex may contain 0, 1, or 2 () capturing
sub-expressions.

If there are no () sub-expressions, then every line that matches is
used to compute a single rate. Its maximum value is reported when the
script finishes.

If there is one () sub-expression, then the text matched by the
sub-expression is used to identify a rate lookup key, similar to the
lookup key used by the ratelimit ACL condition. For example, you might
write a regex to match the client IP address, or the authenticated
username. Separate rates are computed for each different client and
the maximum rate for each client is reported when the script finishes.

If there are two () sub-expressions, then the text matched by the
first sub-expression is used to identify a rate lookup key as above,
and the second is used to match the message size recorded in the log
line, e.g. " S=(\\d+) ". In this case the byte rate is computed instead
of the message rate, similar to the per_byte option of the ratelimit
ACL condition.
END
  exit 1;
}

sub iso2unix (@) {
  my ($y,$m,$d,$H,$M,$S,$zs,$zh,$zm) = @_;
  use integer;
  $y -= $m < 3;
  $m += $m < 3 ? 10 : -2;
  my $z = defined $zs ? "${zs}1" * ($zh * 60 + $zm) : 0;
  my $t = $y/400 - $y/100 + $y/4 + $y*365
        + $m*367/12 + $d - 719499;
  return $t * 86400
       + $H * 3600
       + $M * 60
       + $S
       - $z;
}

my $debug = 0;
my $progress = 0;
while (@ARGV && $ARGV[0] =~ /^-\w+$/) {
  $debug = 1    if $ARGV[0] =~ s/(-\w*)d(\w*)/$1$2/;
  $progress = 1 if $ARGV[0] =~ s/(-\w*)p(\w*)/$1$2/;
  shift if $ARGV[0] eq "-";
}

usage if @ARGV < 2;

my $progtime = "";

my $period = shift;

my $re_txt = shift;
my $re = qr{$re_txt}o;

my %time;
my %rate;
my %max;

sub debug ($) {
  my $key = shift;
  printf "%s\t%12d %8s %5.2f %5.2f\n",
    $_, $time{$key}, $key, $max{$key}, $rate{$key};
}

while (<>) {
  next unless $_ =~ $re;
  my $key = $1 || "";
  my $size = $2 || 1.0;
  my $time = iso2unix
    ($_ =~ m{^(\d{4})-(\d\d)-(\d\d)[ ]
              (\d\d):(\d\d):(\d\d)[ ]
              (?:([+-])(\d\d)(\d\d)[ ])?
            }x);
  if ($progress) {
    my $prog_now = substr $_, 0, 14;
    if ($progtime ne $prog_now) {
      $progtime = $prog_now;
      print "$progtime\n";
    }
  }
  if (not defined $time{$key}) {
    $time{$key} = $time;
    $rate{$key} = 0.0;
    $max{$key} = 0.0;
    debug $key if $debug;
    next;
  }
  # see acl_ratelimit() for details of the following
  my $interval = $time - $time{$key};
  $interval = 1e-9 if $interval <= 0.0;
  my $i_over_p = $interval / $period;
  my $a = exp(-$i_over_p);
  $time{$key} = $time;
  $rate{$key} = $size * (1.0 - $a) / $i_over_p + $a * $rate{$key};
  $max{$key} = $rate{$key} if $rate{$key} > $max{$key};
  debug $key if $debug;
}

print map {
  " " x (20 - length) .
  "$_ : $max{$_}\n"
} sort {
  $max{$a} <=> $max{$b}
} keys %max;

# eof