diff options
author | Heiko Schlittermann (HS12) <hs@schlittermann.de> | 2015-05-10 16:01:44 +0200 |
---|---|---|
committer | Heiko Schlittermann (HS12) <hs@schlittermann.de> | 2015-05-10 16:29:03 +0200 |
commit | 263c04a6b6ad8a18c9cdb7da847b695f0a8d6787 (patch) | |
tree | 47bc72014cf1fec21e0a96eb54411f5a0d4fe2a3 /doc | |
parent | d2a2c69b7b97d080d63dfb434584d98eb3228332 (diff) |
Docs: Make build unicode resistant
Force LC_ALL=C for spec.txt. Add an additional build target:
spec.utf8.
Diffstat (limited to 'doc')
-rw-r--r-- | doc/doc-docbook/.gitignore | 1 | ||||
-rwxr-xr-x | doc/doc-docbook/GenLocalParams | 8 | ||||
-rw-r--r-- | doc/doc-docbook/Makefile | 40 | ||||
-rwxr-xr-x | doc/doc-docbook/Tidytxt | 67 |
4 files changed, 81 insertions, 35 deletions
diff --git a/doc/doc-docbook/.gitignore b/doc/doc-docbook/.gitignore index ae93d1875..62828bf6f 100644 --- a/doc/doc-docbook/.gitignore +++ b/doc/doc-docbook/.gitignore @@ -3,6 +3,7 @@ spec*.xml spec.ps spec.pdf spec.txt +spec.utf8 filter*.xml filter.ps filter.pdf diff --git a/doc/doc-docbook/GenLocalParams b/doc/doc-docbook/GenLocalParams index 140890880..fc8e7fc58 100755 --- a/doc/doc-docbook/GenLocalParams +++ b/doc/doc-docbook/GenLocalParams @@ -5,8 +5,16 @@ output="${1:-local_params}" nicedate="$(date +"%d %b %Y")" +if which locale >/dev/null; then + charset="$(locale | grep ^LC_CTYPE=)" +else + charset=unknown +fi + exec > "$output" cat <<EOTEMPLATE +. # if this file changes, the build process will rebuild everything +. # locale's charset $charset .macro version ${EXIM_VER} .endmacro diff --git a/doc/doc-docbook/Makefile b/doc/doc-docbook/Makefile index 14bea8e6b..f456f80c9 100644 --- a/doc/doc-docbook/Makefile +++ b/doc/doc-docbook/Makefile @@ -2,7 +2,7 @@ notarget:; @echo "** You must specify a target, in the form x.y, where x is 'filter', 'spec'," @echo "** or 'test', and y is 'xml', 'fo', 'ps', 'pdf', 'html', 'txt', or 'info'." - @echo "** One other possible target is 'exim.8'". + @echo "** One other possible targets 'exim.8', 'spec.utf8'". exit 1 @@ -23,9 +23,17 @@ exim.8: spec.xml x2man ######################################################################## -.PHONY: local_params -local_params: - ./GenLocalParams $@ +# .PHONY doesn't work here, because it forces a rebuild of all dependend +# targets, always. It sets the internal timestamp of its target to +# now(). +# But it may happen that local_params does not change +FORCE: +local_params: FORCE GenLocalParams + @set -e; \ + trap 'rm -f $$tmp' EXIT; \ + tmp=`mktemp`; \ + ./GenLocalParams $$tmp; \ + cmp -s $@ $$tmp || mv $$tmp $@ ############################### FILTER ################################# @@ -88,7 +96,7 @@ filter.txt: filter-txt.xml Tidytxt MyStyle-txt-html.xsl MyStyle-html.xsl \ MyStyle.xsl /bin/rm -rf filter-txt.html xmlto -x MyStyle-txt-html.xsl html-nochunks filter-txt.xml - w3m -dump filter-txt.html | ./Tidytxt >filter.txt + LC_ALL=C w3m -dump filter-txt.html | ./Tidytxt >filter.txt ./SanityTestText filter.txt # I have not found a way of making docbook2texi write its output anywhere @@ -107,8 +115,8 @@ filter.info: filter-info.xml ################################ SPEC ################################## -spec.xml: local_params spec.xfpt - xfpt spec.xfpt +spec.xml: spec.xfpt local_params + xfpt $< spec-pr.xml: spec.xml Pre-xml ./Pre-xml -optbreak <spec.xml >spec-pr.xml @@ -165,13 +173,21 @@ spec.pdf: sdop-spec.pdf ### ### -spec.txt: spec-txt.xml Tidytxt MyStyle-txt-html.xsl MyStyle-html.xsl \ - MyStyle.xsl - /bin/rm -rf spec-txt.html - xmlto -x MyStyle-txt-html.xsl html-nochunks spec-txt.xml - w3m -dump spec-txt.html | ./Tidytxt >spec.txt +spec-txt.html: spec-txt.xml \ + MyStyle-txt-html.xsl MyStyle-html.xsl MyStyle.xsl + xmlto -x MyStyle-txt-html.xsl html-nochunks $< + +spec.utf8: spec-txt.html Tidytxt + @grep -iq 'LC_CTYPE=.*utf-\?8' local_params || { \ + echo 'your current locale does not support UTF-8' >&2; \ + false; } + w3m -dump $< | ./Tidytxt -utf8 >$@ + +spec.txt: spec-txt.html Tidytxt + LC_ALL=C w3m -dump $< | ./Tidytxt >$@ ./SanityTestText spec.txt + # I have not found a way of making docbook2texi write its output anywhere # other than the file name that it makes up. The --to-stdout option does not # work. diff --git a/doc/doc-docbook/Tidytxt b/doc/doc-docbook/Tidytxt index 9eb63dbcb..cfa692272 100755 --- a/doc/doc-docbook/Tidytxt +++ b/doc/doc-docbook/Tidytxt @@ -1,5 +1,20 @@ #! /usr/bin/perl +use strict; +use warnings; +use Getopt::Long; + + +# For now we can't rely on a perl >= 5.14 on +# the build sites, thus we throw away all unicode +# awarness and do the matching byte by byte +binmode STDIN; +binmode STDOUT; + +GetOptions( + 'u|utf8!' => \my $want_utf8, # do not replace unicode characters +) or die "Usage: $0 [-u|--utf8]\n"; + # Script to tidy up the output of w3m when it makes a text file. First we # convert sequences of blank lines into a single blank line, to get everything # uniform. Then we go through and insert blank lines before chapter and @@ -11,32 +26,36 @@ # (2) It uses U+25CF as its bullet character. # (3) It inserts a whole slew of "box drawing" characters round the heading. -@lines = <>; +my @lines = <>; +my $lastwasblank = 0; -$lastwasblank = 0; -foreach $line (@lines) +foreach my $line (@lines) { # (1) non-break space -> normal space $line =~ s/\x{c2}\x{a0}/ /g; - # (2) bullet -> asterisk - $line =~ s/\x{e2}\x{97}\x{8f}/*/g; - $line =~ s/\x{e2}\x{80}\x{a2}/*/g; # OpenSUSE - $line =~ s/\x{e2}\x{96}\x{a1}/*/g; # OpenSUSE - # (3a) horizontal box drawing -> hyphen - $line =~ s/\x{e2}\x{94}[\x{80}\x{81}\x{84}\x{85}\x{88}\x{89}]/-/g; - $line =~ s/\x{e2}\x{95}[\x{8c}\x{8d}\x{90}]/-/g; - $line =~ s/\x{e2}\x{95}[\x{b4}\x{b6}\x{b8}\x{ba}\x{bc}\x{be}]/-/g; - # (3b) vertical box drawing -> bar - $line =~ s/\x{e2}\x{94}[\x{82}\x{83}\x{86}\x{87}\x{8a}\x{8b}]/|/g; - $line =~ s/\x{e2}\x{95}[\x{8e}\x{8f}\x{91}]/|/g; - $line =~ s/\x{e2}\x{95}[\x{b5}\x{b7}\x{b9}\x{bb}\x{bd}\x{bf}]/|/g; - # (3c) corner box drawing -> plus - $line =~ s/\x{e2}\x{94}[\x{8c}-\x{bf}]/+/g; - $line =~ s/\x{e2}\x{95}[\x{80}-\x{8b}\x{92}-\x{b0}]/+/g; - # other - $line =~ s/\x{e2}\x{95}\x{b1}/\//g; - $line =~ s/\x{e2}\x{95}\x{b2}/\\/g; - $line =~ s/\x{e2}\x{95}\x{b3}/X/g; + + unless ($want_utf8) + { + # (2) bullet -> asterisk + $line =~ s/\x{e2}\x{97}\x{8f}/*/g; + $line =~ s/\x{e2}\x{80}\x{a2}/*/g; # OpenSUSE + $line =~ s/\x{e2}\x{96}\x{a1}/*/g; # OpenSUSE + # (3a) horizontal box drawing -> hyphen + $line =~ s/\x{e2}\x{94}[\x{80}\x{81}\x{84}\x{85}\x{88}\x{89}]/-/g; + $line =~ s/\x{e2}\x{95}[\x{8c}\x{8d}\x{90}]/-/g; + $line =~ s/\x{e2}\x{95}[\x{b4}\x{b6}\x{b8}\x{ba}\x{bc}\x{be}]/-/g; + # (3b) vertical box drawing -> bar + $line =~ s/\x{e2}\x{94}[\x{82}\x{83}\x{86}\x{87}\x{8a}\x{8b}]/|/g; + $line =~ s/\x{e2}\x{95}[\x{8e}\x{8f}\x{91}]/|/g; + $line =~ s/\x{e2}\x{95}[\x{b5}\x{b7}\x{b9}\x{bb}\x{bd}\x{bf}]/|/g; + # (3c) corner box drawing -> plus + $line =~ s/\x{e2}\x{94}[\x{8c}-\x{bf}]/+/g; + $line =~ s/\x{e2}\x{95}[\x{80}-\x{8b}\x{92}-\x{b0}]/+/g; + # other + $line =~ s/\x{e2}\x{95}\x{b1}/\//g; + $line =~ s/\x{e2}\x{95}\x{b2}/\\/g; + $line =~ s/\x{e2}\x{95}\x{b3}/X/g; + } # w3m rendering issue apparently only seen by pdp # affects section numbers after the ToC, some info on spool-file -lines, etc @@ -54,6 +73,7 @@ foreach $line (@lines) # Find start of TOC, uppercasing its title +my $i = 0; for ($i = 0; $i < scalar @lines; $i++) { $lines[$i] = "TABLE OF CONTENTS\n" if $lines[$i] =~ /^Table of Contents/; @@ -69,7 +89,8 @@ for ($i++; $i < scalar @lines; $i++) # looking for preceding and following blank lines, and then matching against # the numbers. -$chapter = 0; +my $chapter = 0; +my $section; for (; $i < scalar @lines; $i++) { next if $lines[$i-1] !~ /^$/ || $lines[$i+1] !~ /^$/; |