#! /usr/bin/perl -w # $Cambridge: exim/doc/doc-scripts/DoIndex,v 1.1 2004/10/07 15:04:35 ph10 Exp $ # Script for producing the Index for the Exim manual from the output of the # SGCAL run. This is copied from the script for the Exim book. ############################################################################## # Patterns for matching things to be removed from the sort keys # This was copied from the Exim book processor, but we have now found a # better way of doing this. Leave the code until I am quite sure... # $pat[0] = qr/ \(\\\*see also\*\\[^)]+\)/; # $pat[1] = qr/(?<!@)\/\//; # // # $pat[2] = qr/(?<!@)\/\\/; # /\ # $pat[3] = qr/(?<!@)\\\//; # \/ # $pat[4] = qr/(?<!@) \\ # non-@ \, followed by one of # (?: # [\.\/] | # dot or slash # !- | # !- # !\+ | # !+ # !\. | # !. # "\+ | # "+ # \([.\/]? | # ( and optional . or slash # [[\$\\%?!-"] | # [ $ \ % ! " or - # \*{1,2} | # * or ** # \^{1,2}\/? # ^ or ^^ and optional slash # )/x; # $pat[5] = qr/(?: []\$\\%)?!"] | # ] $ \ % ) ? " or ! ) # \*{1,2} | # * or ** ) optional # \^{1,2})? # ^ or ^^ ) # \\/x; # then \ # $pat[6] = qr/(?<!@)::/; # $pat[7] = qr/\sR[FS]\b/; # $pat[8] = qr/``/; # $pat[9] = qr/''/; # $pat[10] = qr/`/; # $pat[11] = qr/'/; # $pat[12] = qr/,/; # $pat[13] = qr/\(e?s\)/; # Other patterns # $keysplit = qr/^(.*?)(\|\|.*?)?\s(R[AZ])?\s?(\d+)$/; $keysplit = qr/^(.*?)(\@\|\@\|.*?)?\s(R[AZ])?\s?(\d+)$/; # The sort function sub cf { my($x,$y) = ($a,$b); ############old############# #foreach $pattern (@pat) # Remove strings by pattern # { # $x =~ s/$pattern//g; # $y =~ s/$pattern//g; # } ########################## # Turn || into @|@| $x =~ s/\|\|/@|@|/g; $y =~ s/\|\|/@|@|/g; # Remove all special characters, except those preceded by @ $x =~ s/(?<!\@)[^\w\@\s]//g; $y =~ s/(?<!\@)[^\w\@\s]//g; # Remove the escaping @s #$x =~ s/\@(.)/$1/g; #$y =~ s/\@(.)/$1/g; ################old ######################## #$x =~ s/:(\w+):/$1/g; # :fail: etc => fail #$y =~ s/:(\w+):/$1/g; #$x =~ s/^\@[^a-z]+/\@/i; # Make keys starting with @ #$y =~ s/^\@[^a-z]+/\@/i; # sort on @ followed by the first letter ##############################################3 $x =~ s/\@_/\x7f/g; # Make underscore sort late (option names) $y =~ s/\@_/\x7f/g; # Split up to sort on individual parts my($xp,$xs,$xr,$xn) = $x =~ /$keysplit/; my($yp,$ys,$yr,$yn) = $y =~ /$keysplit/; $xr = "" if !defined $xr; $yr = "" if !defined $yr; $xs = "" if !defined $xs; $ys = "" if !defined $ys; if ($show_keys) { print "a=$a\n x=$x\n xp=$xp\n xs=$xs\n xr=$xr\n xn=$xn\n"; print "b=$b\n y=$y\n yp=$yp\n ys=$ys\n yr=$yr\n yn=$yn\n"; } my ($c) = "\L$xp" cmp "\L$yp"; # Caseless, primary text only $c = $xp cmp $yp if $c == 0; # Caseful, primary text only $c = "\L$xs" cmp "\L$ys" if $c == 0; # Caseless, secondary text only $c = $xs cmp $ys if $c == 0; # Caseful, secondary text only $c = $xn <=> $yn if $c == 0; # Compare the numbers $c = $xr cmp $yr if $c == 0; # Sort RA before RZ return $c; } ############################################################################## # Function for getting the next line from the @lines vector, using the global # index $1. If the next pair of lines specifies a range of pages, combine them. # That's why $linenumber has to be global - so we can increment it. If there's # a range error, return "". sub getnextentry { my($line) = $lines[$linenumber]; my($aa,$zz,$tline,$nextline,$tnextline); if ($line =~ / RA (\d+)/) { $aa = $1; $nextline = $lines[++$linenumber]; if ($nextline =~ / RZ (\d+)/) { $zz = $1; } else { print STDERR "** Bad range data (1)\n"; print STDERR " $line\n"; print STDERR " $nextline\n"; return ""; } $tline = $line; $tnextline = $nextline; $tline =~ s/ RA \d+//; $tnextline =~ s/ RZ \d+//; if ($tline ne $tnextline) { print STDERR "** Bad range data (2)\n"; print STDERR " $line\n"; print STDERR " $nextline\n"; return ""; } $line = ($aa eq $zz)? "$tline $aa" : "$tline $aa--$zz"; } elsif ($line =~ / RZ (\d+)/) { print STDERR "** Bad range data (RZ without RA)\n"; print STDERR " $line\n"; return ""; } return $line } ############################################################################## # Function for outputting a line, checking for the current primary # and indenting a bit for secondaries. We also need a newpar # before each item, because the main indent is set to a largish indent # for long reference lists, but the parindent is set to counter this. # This is where we handle the break between letters. We know that any non- # alphamerics at the start of lines are markup, except for @. A reference # value of 99999 is for the "see also" lines. Suppress it. sub outline { my($text,$ref) = ($_[0],$_[1]); my ($letter) = $text =~ /^[^A-Za-z0-9\@]*(.)/; return if $text =~ /^\s*$/; if ($ref eq "99999") # dummy for see also { $ref = "" } else { $ref = "#$ref"; # prepend space } if ($letter =~ /\d/) { $letter = "0"; } else { $letter = "\U$letter"; } print OUT ".newpar\n"; if ($letter ne $currentletter && $letter ge "A") { print OUT ".newletter\n"; $currentletter = $letter; } $text =~ s/\@'/\$'/g; # Turns @' into $' so that it prints a non-curly quote if ($text =~ /^(.+)\|\|(.*)$/) { my($primary,$secondary) = ($1,$2); if ($primary ne $lastprimary) { print OUT ".primary $primary\n"; $lastprimary = $primary; } $primary =~ s/"/""/g; $secondary =~ s/"/""/g; my($contprim) = $primary; $contprim =~ s/ \(\\\*see also\*\\[^)]+\)//; print OUT ".secondary \"$primary\" \"$secondary$ref\" \"$contprim\"\n"; } # Not a two-part item; insert @ if the first char is a dot else { print OUT "@" if $text =~ /^\./; print OUT "$text$ref\n"; $lastprimary = $text; } } ############################################################################## # The main script $save_sorted = 0; $test_index = 0; $show_keys = 0; while (@ARGV > 0) { my($arg) = shift @ARGV; if ($arg eq "-k") { $show_keys = 1; } elsif ($arg eq "-s") { $save_sorted = 1; } elsif ($arg eq "-t") { $test_index = $save_sorted = 1; } else { die "Unknown option $arg\n"; } } if ($test_index) { open(IN, "z-testindex") || die "Can't open z-testindex\n"; } else { open(IN, "z-rawindex") || die "Can't open z-rawindex\n"; } open(OUT, ">z-index") || die "Can't open z-index\n"; # Extract index lines ($e lines are contents). Until we hit the first # $e line, we are dealing with "see also" index lines, for which we want # to turn the line number into 99999. $#lines = -1; $prestuff = 1; while (<IN>) { s/\n$//; if (/\$e/) { $prestuff = 0; } else { s/(\D)$/$1 99999/ if $prestuff; # No number in "see also" push(@lines, $_); } $index_pagenumber = $1 if /^Index\$e(\d+)/; } close(IN); # Sort, ignoring markup print STDERR "Sorting ...\n"; @lines = sort cf @lines; # Keep a copy of the sorted data, for reference if ($save_sorted) { open(X, ">z-indexsorted") || die "Can't open z-indexsorted\n"; foreach $line (@lines) { print X "$line\n"; } close(X); } # Heading for the index file print OUT <<"EOF"; .library "a4ps" .linelength ~~sys.linelength + 16.0 .include "markup.sg" .indent 3em .parspace 0 .parindent -3em .justify left . .foot \$c [~~sys.pagenumber] .endfoot . .cancelflag # .flag # "\$S*1" .set INDEX true . .macro primary "text" .if ~~sys.leftonpage < 2ld .newcolumn .fi ~~1 .newpar .endm . .macro secondary "prim" "sec" "contprim" .if ~~sys.leftonpage < 1ld .newcolumn .newpar ~~3 \$it\{(continued)\} .newpar .fi ##~~2 .endm . .macro newletter .if ~~sys.leftonpage < 4ld .newcolumn .else .space 1ld .fi .newpar .endm . .set chapter -1 .page $index_pagenumber .chapter Index .columns 2 .newpar . EOF # Process the lines and output the result. # Note that $linenumber is global, and is changed by getnextentry() for # pairs of lines that represent ranges. $lastprimary = ""; $lastref = ""; $currenttext = $currentref = ""; $currentletter = ""; $badrange = 0; print STDERR "Processing ...\n"; for ($linenumber = 0; $linenumber < @lines; $linenumber++) { $line = &getnextentry(); if ($line eq "") # Bad range data - but carry on to get all of it { $badrange = 1; next; } # Split off the text and reference ($text,$ref) = $line =~ /^(.*)\s+([\d-]+)$/; # If same as current text, just add the new reference, unless its a duplicate if ($text eq $currenttext) { if ($ref ne $lastref) { $currentref .= ", $ref"; $lastref = $ref; } next; } # Not the same as the current text. Output the current text, then # set up a new current. &outline($currenttext, $currentref); $currenttext = $text; $currentref = $lastref = $ref; } # Output the final line and close the file &outline($currenttext, $currentref); close(OUT); die "** Aborted\n" if $badrange; # Format the index system("sgcal z-index -to zi-gcode -index /dev/null"); system("sgtops zi-gcode -to zi-ps"); print "PostScript in zi-ps\n"; # End