summaryrefslogtreecommitdiff
path: root/src/util/renew-opendmarc-tlds.sh
blob: d626aac37efcaa5758cc24439d3fc1237568cf9d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/bin/sh -eu
# Copyright (c) The Exim Maintainers 2022
# SPDX-License-Identifier: GPL-2.0-or-later
#
# Short version of this script:
#   curl -f -o /var/cache/exim/opendmarc.tlds https://publicsuffix.org/list/public_suffix_list.dat
# but run as Exim runtime user, writing to a place it can write to, and with
# sanity checks and atomic replacement.
#
# For now, we deliberately leave the invalid file around for analysis
# with .<pid> suffix.
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~8< cut here >8~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Create a cron-job as the Exim run-time user to invoke this daily, with a
# single parameter, 'cron'.  Eg:
#
#    3 4 * * *	  /usr/local/sbin/renew-opendmarc-tlds.sh cron
#
# That will, at 3 minutes past the 4th hour (in whatever timezone cron is
# running it) invoke this script with 'cron'; we will then sleep between 10 and
# 50 seconds, before continuing.
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~8< cut here >8~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# This should be "pretty portable"; the only things it depends upon are:
#  * a POSIX shell which additionally implements 'local' (dash works)
#  * the 'curl' command; change the fetch_candidate() function to replace that
#  * the 'stat' command, to get the size of a file; else Perl
#    + change size_of() if need be; it's defined per-OS
#  * the 'hexdump' command and /dev/urandom existing
#    + used when invoked with 'cron', to avoid retrieving on a minute boundary
#      and contending with many other automated systems.
#    + with bash/zsh, can replace with: $(( 10 + ( RANDOM % 40 ) ))
#    + on Debian/Ubuntu systems, hexdump is in the 'bsdmainutils' package.

# Consider putting an email address inside the parentheses, something like
# noc@example.org or other reachable address, so that if something goes wrong
# and the server operators need to step in, they can see from logs who to
# contact instead of just blocking your IP:
readonly CurlUserAgent='renew-opendmarc-tlds/0.1 (distributed with Exim)'

# change this to your Exim run-time user (exim -n -bP exim_user) :
readonly RuntimeUser='_exim'

# Do not make this a directory which untrusted users can write to:
readonly StateDir='/var/cache/exim'

readonly URL='https://publicsuffix.org/list/public_suffix_list.dat'

readonly TargetShortFile='opendmarc.tlds'

# When replacing, new file must be at least this percentage the size of
# the old one or it's an error:
readonly MinNewSizeRation=90

# Each of these regexps must be matched by the file, or it's an error:
readonly MustExistRegexps='
  ^ac\.uk$
  ^org$
  ^tech$
  '

# =======================8< end of configuration >8=======================

set -eu

readonly FullTargetPath="${StateDir}/${TargetShortFile}"
readonly WorkingFile="${FullTargetPath}.$$"

progname="$(basename "$0")"
note() { printf >&2 '%s: %s\n' "$progname" "$*"; }
die() { note "$@"; exit 1; }

# guard against stomping on file-permissions
[ ".$(id -un)" = ".${RuntimeUser:?}" ] || \
  die "must be invoked as ${RuntimeUser}"

fetch_candidate() {
	curl --user-agent "$CurlUserAgent" -fSs -o "${WorkingFile}" "${URL}"
}

case $(uname -s) in
*BSD|Darwin)
	size_of() { stat -f %z "$1"; }
	;;
Linux)
	size_of() { stat -c %s "$1"; }
	;;
*)
	# why do we live in a world where Perl is the safe portable solution
	# to getting the size of a file?
	size_of() { perl -le 'print((stat($ARGV[0]))[7])' -- "$1"; }
	;;
esac

sanity_check_candidate() {
	local new_size prev_size re
	new_size="$(size_of "$WorkingFile")"

	for re in $MustExistRegexps; do
		grep -qs "$re" -- "$WorkingFile" || \
		  die "regexp $re not found in $WorkingFile"
	done

	if ! prev_size="$(size_of "$FullTargetPath")"; then
		note "missing previous file, can't size-compare: $FullTargetPath"
		# We're sane by definition, probably initial fetch, and the
		# stat failure and this note will be printed.  That's fine; if
		# a cron invocation is missing the file then something has gone
		# badly wrong.
		return 0
	fi
	local ratio
	ratio=$(expr $new_size \* 100 / $prev_size)
	if [ $ratio -lt $MinNewSizeRation ]; then
		die "New $TargetShortFile candidate only ${ratio}% size of old; $new_size vs $prev_size"
	fi
}

if [ "${1:-.}" = "cron" ]; then
	shift
	# Don't pull on-the-minute, wait for off-cycle-peak
	sleep $(( ($(dd if=/dev/urandom bs=1 count=1 2>/dev/null | hexdump -e '1/1 "%u"') % 40) + 10))
fi

umask 022
fetch_candidate
sanity_check_candidate
mv -- "$WorkingFile" "$FullTargetPath"