diff options
Diffstat (limited to 'doc/doc-scripts/DoIndex')
-rwxr-xr-x | doc/doc-scripts/DoIndex | 430 |
1 files changed, 430 insertions, 0 deletions
diff --git a/doc/doc-scripts/DoIndex b/doc/doc-scripts/DoIndex new file mode 100755 index 000000000..1caddbd6f --- /dev/null +++ b/doc/doc-scripts/DoIndex @@ -0,0 +1,430 @@ +#! /usr/bin/perl -w +# $Cambridge: exim/doc/doc-scripts/DoIndex,v 1.1 2004/10/07 15:04:35 ph10 Exp $ + +# Script for producing the Index for the Exim manual from the output of the +# SGCAL run. This is copied from the script for the Exim book. + + +############################################################################## +# Patterns for matching things to be removed from the sort keys + +# This was copied from the Exim book processor, but we have now found a +# better way of doing this. Leave the code until I am quite sure... + +# $pat[0] = qr/ \(\\\*see also\*\\[^)]+\)/; +# $pat[1] = qr/(?<!@)\/\//; # // +# $pat[2] = qr/(?<!@)\/\\/; # /\ +# $pat[3] = qr/(?<!@)\\\//; # \/ +# $pat[4] = qr/(?<!@) \\ # non-@ \, followed by one of +# (?: +# [\.\/] | # dot or slash +# !- | # !- +# !\+ | # !+ +# !\. | # !. +# "\+ | # "+ +# \([.\/]? | # ( and optional . or slash +# [[\$\\%?!-"] | # [ $ \ % ! " or - +# \*{1,2} | # * or ** +# \^{1,2}\/? # ^ or ^^ and optional slash +# )/x; +# $pat[5] = qr/(?: []\$\\%)?!"] | # ] $ \ % ) ? " or ! ) +# \*{1,2} | # * or ** ) optional +# \^{1,2})? # ^ or ^^ ) +# \\/x; # then \ +# $pat[6] = qr/(?<!@)::/; +# $pat[7] = qr/\sR[FS]\b/; +# $pat[8] = qr/``/; +# $pat[9] = qr/''/; +# $pat[10] = qr/`/; +# $pat[11] = qr/'/; +# $pat[12] = qr/,/; +# $pat[13] = qr/\(e?s\)/; + + +# Other patterns + +# $keysplit = qr/^(.*?)(\|\|.*?)?\s(R[AZ])?\s?(\d+)$/; + +$keysplit = qr/^(.*?)(\@\|\@\|.*?)?\s(R[AZ])?\s?(\d+)$/; + + +# The sort function + +sub cf { +my($x,$y) = ($a,$b); + +############old############# +#foreach $pattern (@pat) # Remove strings by pattern +# { +# $x =~ s/$pattern//g; +# $y =~ s/$pattern//g; +# } +########################## + + +# Turn || into @|@| + +$x =~ s/\|\|/@|@|/g; +$y =~ s/\|\|/@|@|/g; + +# Remove all special characters, except those preceded by @ + +$x =~ s/(?<!\@)[^\w\@\s]//g; +$y =~ s/(?<!\@)[^\w\@\s]//g; + +# Remove the escaping @s + +#$x =~ s/\@(.)/$1/g; +#$y =~ s/\@(.)/$1/g; + + + +################old ######################## +#$x =~ s/:(\w+):/$1/g; # :fail: etc => fail +#$y =~ s/:(\w+):/$1/g; + +#$x =~ s/^\@[^a-z]+/\@/i; # Make keys starting with @ +#$y =~ s/^\@[^a-z]+/\@/i; # sort on @ followed by the first letter +##############################################3 + + +$x =~ s/\@_/\x7f/g; # Make underscore sort late (option names) +$y =~ s/\@_/\x7f/g; + +# Split up to sort on individual parts + +my($xp,$xs,$xr,$xn) = $x =~ /$keysplit/; +my($yp,$ys,$yr,$yn) = $y =~ /$keysplit/; + +$xr = "" if !defined $xr; +$yr = "" if !defined $yr; + +$xs = "" if !defined $xs; +$ys = "" if !defined $ys; + +if ($show_keys) + { + print "a=$a\n x=$x\n xp=$xp\n xs=$xs\n xr=$xr\n xn=$xn\n"; + print "b=$b\n y=$y\n yp=$yp\n ys=$ys\n yr=$yr\n yn=$yn\n"; + } + +my ($c) = "\L$xp" cmp "\L$yp"; # Caseless, primary text only +$c = $xp cmp $yp if $c == 0; # Caseful, primary text only +$c = "\L$xs" cmp "\L$ys" if $c == 0; # Caseless, secondary text only +$c = $xs cmp $ys if $c == 0; # Caseful, secondary text only +$c = $xn <=> $yn if $c == 0; # Compare the numbers +$c = $xr cmp $yr if $c == 0; # Sort RA before RZ +return $c; +} + + + +############################################################################## +# Function for getting the next line from the @lines vector, using the global +# index $1. If the next pair of lines specifies a range of pages, combine them. +# That's why $linenumber has to be global - so we can increment it. If there's +# a range error, return "". + +sub getnextentry { +my($line) = $lines[$linenumber]; +my($aa,$zz,$tline,$nextline,$tnextline); + +if ($line =~ / RA (\d+)/) + { + $aa = $1; + $nextline = $lines[++$linenumber]; + if ($nextline =~ / RZ (\d+)/) + { + $zz = $1; + } + else + { + print STDERR "** Bad range data (1)\n"; + print STDERR " $line\n"; + print STDERR " $nextline\n"; + return ""; + } + + $tline = $line; + $tnextline = $nextline; + + $tline =~ s/ RA \d+//; + $tnextline =~ s/ RZ \d+//; + + if ($tline ne $tnextline) + { + print STDERR "** Bad range data (2)\n"; + print STDERR " $line\n"; + print STDERR " $nextline\n"; + return ""; + } + + $line = ($aa eq $zz)? "$tline $aa" : "$tline $aa--$zz"; + } + +elsif ($line =~ / RZ (\d+)/) + { + print STDERR "** Bad range data (RZ without RA)\n"; + print STDERR " $line\n"; + return ""; + } + +return $line +} + + + + +############################################################################## +# Function for outputting a line, checking for the current primary +# and indenting a bit for secondaries. We also need a newpar +# before each item, because the main indent is set to a largish indent +# for long reference lists, but the parindent is set to counter this. +# This is where we handle the break between letters. We know that any non- +# alphamerics at the start of lines are markup, except for @. A reference +# value of 99999 is for the "see also" lines. Suppress it. + +sub outline { +my($text,$ref) = ($_[0],$_[1]); +my ($letter) = $text =~ /^[^A-Za-z0-9\@]*(.)/; + +return if $text =~ /^\s*$/; + +if ($ref eq "99999") # dummy for see also + { + $ref = "" + } +else + { + $ref = "#$ref"; # prepend space + } + +if ($letter =~ /\d/) { $letter = "0"; } else { $letter = "\U$letter"; } + +print OUT ".newpar\n"; + +if ($letter ne $currentletter && $letter ge "A") + { + print OUT ".newletter\n"; + $currentletter = $letter; + } + +$text =~ s/\@'/\$'/g; # Turns @' into $' so that it prints a non-curly quote + +if ($text =~ /^(.+)\|\|(.*)$/) + { + my($primary,$secondary) = ($1,$2); + + if ($primary ne $lastprimary) + { + print OUT ".primary $primary\n"; + $lastprimary = $primary; + } + + $primary =~ s/"/""/g; + $secondary =~ s/"/""/g; + + my($contprim) = $primary; + $contprim =~ s/ \(\\\*see also\*\\[^)]+\)//; + + print OUT ".secondary \"$primary\" \"$secondary$ref\" \"$contprim\"\n"; + } + +# Not a two-part item; insert @ if the first char is a dot + +else + { + print OUT "@" if $text =~ /^\./; + print OUT "$text$ref\n"; + $lastprimary = $text; + } +} + + + + + +############################################################################## +# The main script + +$save_sorted = 0; +$test_index = 0; +$show_keys = 0; + +while (@ARGV > 0) + { + my($arg) = shift @ARGV; + if ($arg eq "-k") { $show_keys = 1; } + elsif ($arg eq "-s") { $save_sorted = 1; } + elsif ($arg eq "-t") { $test_index = $save_sorted = 1; } + else { die "Unknown option $arg\n"; } + } + +if ($test_index) + { + open(IN, "z-testindex") || die "Can't open z-testindex\n"; + } +else + { + open(IN, "z-rawindex") || die "Can't open z-rawindex\n"; + } + +open(OUT, ">z-index") || die "Can't open z-index\n"; + +# Extract index lines ($e lines are contents). Until we hit the first +# $e line, we are dealing with "see also" index lines, for which we want +# to turn the line number into 99999. + +$#lines = -1; +$prestuff = 1; + +while (<IN>) + { + s/\n$//; + if (/\$e/) + { + $prestuff = 0; + } + else + { + s/(\D)$/$1 99999/ if $prestuff; # No number in "see also" + push(@lines, $_); + } + $index_pagenumber = $1 if /^Index\$e(\d+)/; + } +close(IN); + +# Sort, ignoring markup + +print STDERR "Sorting ...\n"; +@lines = sort cf @lines; + +# Keep a copy of the sorted data, for reference + +if ($save_sorted) + { + open(X, ">z-indexsorted") || die "Can't open z-indexsorted\n"; + foreach $line (@lines) + { + print X "$line\n"; + } + close(X); + } + +# Heading for the index file + +print OUT <<"EOF"; +.library "a4ps" +.linelength ~~sys.linelength + 16.0 + +.include "markup.sg" + +.indent 3em +.parspace 0 +.parindent -3em +.justify left +. +.foot +\$c [~~sys.pagenumber] +.endfoot +. +.cancelflag # +.flag # "\$S*1" +.set INDEX true +. +.macro primary "text" +.if ~~sys.leftonpage < 2ld +.newcolumn +.fi +~~1 +.newpar +.endm +. +.macro secondary "prim" "sec" "contprim" +.if ~~sys.leftonpage < 1ld +.newcolumn +.newpar +~~3 \$it\{(continued)\} +.newpar +.fi +##~~2 +.endm +. +.macro newletter +.if ~~sys.leftonpage < 4ld +.newcolumn +.else +.space 1ld +.fi +.newpar +.endm +. +.set chapter -1 +.page $index_pagenumber +.chapter Index +.columns 2 +.newpar +. +EOF + +# Process the lines and output the result. +# Note that $linenumber is global, and is changed by getnextentry() for +# pairs of lines that represent ranges. + +$lastprimary = ""; +$lastref = ""; +$currenttext = $currentref = ""; +$currentletter = ""; +$badrange = 0; + +print STDERR "Processing ...\n"; + +for ($linenumber = 0; $linenumber < @lines; $linenumber++) + { + $line = &getnextentry(); + + if ($line eq "") # Bad range data - but carry on to get all of it + { + $badrange = 1; + next; + } + + # Split off the text and reference + + ($text,$ref) = $line =~ /^(.*)\s+([\d-]+)$/; + + # If same as current text, just add the new reference, unless its a duplicate + + if ($text eq $currenttext) + { + if ($ref ne $lastref) + { + $currentref .= ", $ref"; + $lastref = $ref; + } + next; + } + + # Not the same as the current text. Output the current text, then + # set up a new current. + + &outline($currenttext, $currentref); + + $currenttext = $text; + $currentref = $lastref = $ref; + } + +# Output the final line and close the file + +&outline($currenttext, $currentref); +close(OUT); + +die "** Aborted\n" if $badrange; + +# Format the index + +system("sgcal z-index -to zi-gcode -index /dev/null"); +system("sgtops zi-gcode -to zi-ps"); +print "PostScript in zi-ps\n"; + +# End |