#! /usr/bin/perl -w
# $Cambridge: exim/doc/doc-scripts/g2h,v 1.3 2005/02/17 12:17:09 ph10 Exp $
# This is a script that turns the SGCAL source of Exim's documentation into
# HTML. It can be used for both the filter document and the main Exim
# specification. The syntax is
#
# g2h [-split no|section|chapter]
\n";
}
# Forward/backward links when section splitting
elsif ($sectsplit)
{
print OUT "\n";
printf OUT ("Previous \n",
$thissection - 1) if $thissection > 1;
printf OUT ("Next \n",
$thissection + 1) if $thissection < $maxsection;
print OUT "Contents\n";
print OUT " " x 6, "($doctitle)\n
\n";
}
# Save the final component of the current file name (for TOC creation)
$_[0] =~ /^(?:.*)\/([^\/]+)$/;
$current_file = $1;
}
##################################################
# Close an output file #
##################################################
# The first argument is one of:
#
# "CHAP" a chapter is ending
# "SECT" a section is ending
# "" the whole thing is ending
#
# In the first two cases $thischapter and $thissection contain the new chapter
# and section numbers, respectively. In the third case, we can deduce what is
# ending from the flags. The variables contain the current values.
sub closeout {
my($s) = $_[0];
print OUT "
\n" if !$lastwasrule;
&setpar(0);
if ($s eq "CHAP")
{
print OUT "\n";
printf OUT ("Previous ",
$thischapter - 2) if ($thischapter > 2);
print OUT "Next ";
print OUT "Contents\n";
print OUT " " x 6, "($doctitle)\n\n";
}
elsif ($s eq "SECT")
{
print OUT "\n";
printf OUT ("Previous ",
$thissection - 2) if ($thissection > 2);
print OUT "Next ";
print OUT "Contents\n";
print OUT " " x 6, "($doctitle)\n\n";
}
else
{
if ($chapsplit)
{
print OUT "\n";
printf OUT ("Previous ",
$thischapter - 1) if ($thischapter > 1);
print OUT "Contents\n";
print OUT " " x 6, "($doctitle)\n\n";
}
elsif ($sectsplit)
{
print OUT "\n";
printf OUT ("Previous ",
$thissection - 1) if ($thissection > 1);
print OUT "Contents\n";
print OUT " " x 6, "($doctitle)\n\n";
}
}
print OUT "\n\n";
close(OUT);
}
##################################################
# Handle an index line #
##################################################
# This function returns an empty string so that it can be called as part
# of an s operator when handling index items within paragraphs. The two
# arguments are:
#
# the text to index, already converted to HTML
# 1 for the concept index, 0 for the options index
sub handle_index {
my($text) = $_[0];
my($hash) = $_[1]? \%cindex : \%oindex;
my ($key,$ref);
# Up the index count, and compute the reference to the file and the
# label within it.
$index_count++;
$ref = $chapsplit?
"${file_base}_$thischapter.html#IX$index_count"
: $sectsplit?
"${file_base}_$thissection.html#IX$index_count"
:
"#IX$index_count";
# Create the index key, which consists of the text with all the HTML
# coding and any leading quotation marks removed. Turn the primary/secondary
# splitting string "||" into ":".
$text =~ s/\|\|/:/g;
$key = "$text";
$key =~ s/<[^>]+>//g;
$key =~ s/(\d+);/chr($1)/eg;
$key =~ s/^`+//;
$key =~ s/^"//;
# Turn all spaces in the text into so that they don't ever split.
# However, there may be spaces in the HTML that already exists in the
# text, so we have to avoid changing spaces inside <>.
$text =~ s/ (?=[^<>]*(?:<|$))/ /g;
# If this is the first encounter with this index key, we create a
# straightforward reference.
if (!defined $$hash{$key})
{
$$hash{$key} = "$text";
}
# For the second and subsequent encounters, add "[2]" etc. to the
# index text. We find out the number by counting occurrences of "[$number]";
}
# Place the name in the current output
print OUT "\n";
return "";
}
##################################################
# Handle emphasis bars #
##################################################
# Set colour green for text marked with "emphasis bars", keeping
# track in case the matching isn't perfect.
sub setinem {
if ($_[0])
{
return "" if $inem;
$inem = 1;
return "\n";
}
else
{
return "" if !$inem;
$inem = 0;
return "\n";
}
}
##################################################
# Convert marked-up text #
##################################################
# This function converts text from SGCAL markup to HTML markup, with a couple
# of exceptions:
#
# 1. We don't touch $t because that is handled by the .display code.
#
# 2. The text may contain embedded .index, .em, and .nem directives. We
# handle .em and .nem, but leave .index because it must be done during
# paragraph outputting.
#
# In a non-"rm" display, we turn $rm{ into cancelling of . Otherwise
# it is ignored - in practice it is only used in that special case.
#
# The order in which things are done in this function is highly sensitive!
sub handle_text {
my($s) = $_[0];
my($rmspecial) = $_[1];
# Escape all & characters (they aren't involved in markup) but for the moment
# use &+ instead of so that we can handle # characters in the text.
$s =~ s/&/&+038;/g;
# Turn SGCAL literals into HTML literals that don't look like SGCAL
# markup, so won't be touched by what follows. Again, use + instead of #.
$s =~ s/@@/&+064;/g;
$s =~ s/@([^@])/"&+".sprintf("%0.3d",ord($1)).";"/eg;
# Now turn any #s that are markup into spaces, and convert the previously
# created literals to the correct form.
$s =~ s/#/ /g;
$s =~ s/&\+(\d+);/$1;/g;
# Some simple markup that doesn't involve argument text.
$s =~ s/\$~//g; # turn $~ into nothing
$s =~ s/__/_/g; # turn __ into _
$s =~ s/--(?=$|\s|\d)//mg; # turn -- into endash in text or number range
$s =~ s/\(c\)/©/g; # turn (c) into copyright symbol
# Use double quotes
# $s =~ s/`([^']+)'/``$1''/g;
$s =~ s/`([^']+)'/$1/g;
# This is a fudge for some specific usages of $<; can't just do a global
# is it occurs in things like "$
\n";
$inpar = 1;
}
}
##################################################
# Handle a "paragraph" #
##################################################
# Read a paragraph of text, which may contain many lines and may contain
# .index, .em, and .nem directives within it. We may also encounter
# ".if ~~html" within paragraphs. Process those directives,
# convert the markup, and output the rest as an HTML paragraph.
sub handle_paragraph{
my($par) = $_;
my($htmlcond) = 0;
while( \n";
foreach $key (sort keys %indexindex)
{
print INDEX " $key\n";
}
print INDEX "
\n"; }
elsif (/^\.blank\b/ || /^\.space\b/) { print OUT "
\n"; }
elsif (/^\.rule\b/) { &setpar(0); print OUT "
\n"; $lastwasrule = 1; }
elsif (/^\.index\s+(.*)/) { &handle_index(&handle_text($1), 1); }
# Emphasis is handled by colour
elsif (/^\.em\b/)
{
&setpar(0);
print OUT "" if ! $inem;
$inem = 1;
}
elsif (/^\.nem\b/)
{
&setpar(0);
print OUT "" if $inem;
$inem = 0;
}
# Ignore tab setting stuff - we use tables instead.
elsif (/^\.tabs(?:et)?\b/) {}
# .tempindent is used only to align some of the expansion stuff nicely;
# just ignore it. It is used in conjunction with .push/.pop.
elsif (/^\.(tempindent|push|pop)\b/) {}
# There are some instances of .if ~~sys.fancy in the source. Some of those
# that are not inside displays are two-part things, in which case we just keep
# the non-fancy part. For diagrams, however, they are in three parts:
#
# .if ~~sys.fancy
# \n
\n\n\n";
close(TOC);
close(IN);
}
##################################################
# Adjust index points #
##################################################
# Because of the way the source is written, there are often index entries
# that immediately follow the start of chapters and sections and the definition
# of "items" like "helo = verify". This gets the correct page numbers for the
# PostScript and PDF formats. However, for HTML we want the index anchor to be
# before the section heading, because browsers tend to put the index point at
# the top of the screen. So we re-read all the files we've just created, and
# move some of the index points about. This is necessary only if indexes exist.
# The files are small enough to be handled entirely in memory.
sub adjust_index_points {
print "Adjusting index points to precede headings\n";
$" = "";
opendir(DIR, "$html") || die "Failed to opendir $html\n";
while ($file = readdir(DIR))
{
my($i);
next unless $file =~ /^${file_base}_\d+\.html$/;
open(IN, "<$html/$file") ||
die "Failed to open $html/$file (read)\n";
my(@lines) = \n
which uses a typewriter font.
# Otherwise, we have to do our own line breaking. Turn tabbed lines
# into an HTML table. There will always be a .tabs line first.
elsif (/^\.display\b/)
{
my($intable) = 0;
my($asis) = /asis/;
my($rm) = /rm/;
my($eol,$indent);
# For non asis displays, start a paragraph, and set up to put an
# explicit break after every line.
if (!$asis)
{
&setpar(1);
$eol = "
";
$indent = " ";
}
# For asis displays, use and no explicit breaks
else
{
print OUT "
\n";
$eol = "";
$indent = " ";
}
# Now read through until we hit .endd (or EOF, but that shouldn't happen)
# and process the lines in the display.
while (
\n" if $asis;
}
# Handle configuration option definitions
elsif (/^\.startconf\s+(.*)/)
{
$confuse = &handle_text($1);
}
elsif (/^\.conf\b/)
{
my($option, $type, $default) =
/^\.conf\s+(\S+)\s+("(?:[^"]|"")+"|\S+)\s+("(?:[^"]|"")+"|.*)/;
$option =~ s/\@_/_/g; # Underscore will be quoted in option name
# If $type ends with $**$, add ",expanded" as there doesn't seem to be
# a dagger character generally available.
$type =~ s/^"([^"]+)"/$1/;
$type =~ s/\$\*\*\$/, expanded/;
# Default may be quoted, and it may also have quotes that are required,
# if it is a string.
$default =~ s/^"(.*)"$/$1/;
$default =~ s/""/"/g;
$default = &handle_text($default, 0);
print OUT "\n";
}
# Some displays have an indent setting - ignore
elsif (/^\.indent\b/) {}
# Some displays have .blank inside them
elsif (/^\.blank\b/)
{
print OUT "
\n" if $intable;
print OUT "
\n";
}
# Some displays have emphasis inside them
elsif (/^\.em\b/)
{
print OUT "" if ! $inem;
$inem = 1;
}
elsif (/^\.nem\b/)
{
print OUT "" if $inem;
$inem = 0;
}
# There are occasional instances of .if [!]~~sys.fancy inside displays.
# In both cases we want the non-fancy alternative. (The only thing that
# matters in practice is noticing .tabs[et] actually.) Assume the syntax
# is valid.
elsif (/^\.if\s+~~sys.fancy/ || /^\.else\b/)
{
while (/g;
}
else
{
s/\s*\$t\s*/ <\/tt><\/td> /g;
}
s/<\/tt>//g;
print OUT " \n";
}
# Otherwise, output straight, with $_
for non asis displays
else
{
s/<\/tt>//g;
print OUT "$indent$_$eol\n";
}
}
} # Loop for display contents
# Finish off the table and the - leave a paragraph open
print OUT "
";
&setpar(0);
&handle_index($option, 0);
print OUT "$option
\n" .
"Use: $confuse
" .
"Type: $type
Default: $default
\n";
}
elsif (/^\.endconf\b/)
{
print OUT "
\n";
}
# Handle "items" - used for expansion items and the like. We force the
# item text into bold, and put a rule between items.
elsif (/^\.startitems\b/) {}
elsif (/^\.item\s+(.*)/)
{
my($arg) = $1;
chomp($arg);
$arg =~ s/^"(.*)"$/$1/;
$arg = &handle_text($arg, 0);
# If there are two .items in a row, we don't want to put in the
# separator line or start a new paragraph.
if ($lastwasitem)
{
print OUT "
";
}
else
{
print OUT "
";
&setpar(1);
}
print OUT "$arg\n";
$new_lastwasitem = 1;
}
elsif (/^\.enditems\b/)
{
print OUT "
\n";
}
# Handle command line option items
elsif (/^\.startoptions\b/) {}
elsif (/^\.option\s+(.*)/)
{
my($arg) = $1;
$arg =~ s/"([^"]*)"/$1/g;
print OUT "
";
&setpar(0);
# For indexing, we want to take up to the first # or < in the line,
# before processing.
my($name) = $arg =~ /^([^#<]+)/;
$name = &handle_text($name, 0);
&handle_index("-$name", 0);
# Output as heading, after the index
$arg = &handle_text($arg, 0);
print OUT "-$arg
\n";
}
elsif (/^\.endoptions\b/)
{
print OUT "
\n";
}
# Found an SGCAL directive that isn't dealt with. Oh dear.
else
{
print "*** Unexpected SGCAL directive: line $. ignored:\n";
print "$_\n";
}
# Remember if last was a .item, and read the next line
$lastwasitem = $new_lastwasitem;
$_ = $doctitle
\n\n";
# Open the data file if we are not splitting at chapters
&openout("$html/${file_base}.html") if !$chapsplit;
# Loop for handling chapters. At the start of this loop, $_ is either EOF,
# or contains a .chapter line.
$firstchapter = $thischapter + 1;
while ($_)
{
print TOC "
\n" if $insection;
$insection = 0;
$thischapter++;
$thissection = 0;
$lastwasrule = 0;
# Start a new file if required
if ($chapsplit)
{
&closeout("CHAP") if $thischapter != $firstchapter;
&openout("$html/${file_base}_$thischapter.html");
}
# Set up the chapter title. Save it for the TOC. Set up the anchor and
# link back to the TOC and show the title.
$_ =~ /^\.chapter\s+(.*)/;
my($title) = (($thischapter > 0)? "$thischapter. " : "") . &handle_text($1, 0);
$tocn++;
print TOC "\n";
print OUT "\n";
print OUT "$title\n
\n";
# Scan the contents of the chapter
$_ = \n" if !$insection;
$insection = 1;
my($title) = (($thischapter > 0)? "$thischapter.$thissection " :
"$thissection. ") . &handle_text($1, 0);
if ($sectsplit)
{
&closeout("SECT");
&openout("$html/${file_base}_$thissection.html");
}
$tocn++;
printf TOC ("
\n" if $insection;
if (defined %cindex)
{
$cindex_tocn = ++$tocn;
print TOC "\n";
printf OUT (" 0)? "$thischapter." : "");
print OUT "href=\"${file_base}_toc.html#TOC$tocn\">\n";
print OUT "$title\n
\n";
$_ = or whatever. This preserves the order of
# multiple index lines, not that that matters.
if ($lines[$i-1] =~ /^<\/a><\/h(\d)>/)
{
my($j);
my($found) = 0;
for ($j = $i-2; $j > 0 && $j > $i - 10; $j--)
{
if ($lines[$j] =~ /
/)
{
splice(@lines, $i-2, 0, splice(@lines, $i, 1));
}
# Handle an index line that follows an "option" definition
elsif ($lines[$i-1] =~ /^
/)
{
splice(@lines, $i-1, 0, splice(@lines, $i, 1));
}
}
}
open(OUT, ">$html/$file") ||
die "Failed to open $html/$file (write)\n";
print OUT "@lines";
close OUT;
undef @lines;
}
}
##################################################
# Create Index #
##################################################
sub create_index{
my($hash) = $_[0];
my($ifname) = $_[1];
my($ititle) = $_[2];
my(%indexindex);
open(INDEX, ">$html/${file_base}_$_[1].html") ||
die "Failed to open $html/${file_base}_$ifname\n";
print INDEX "\n";
print INDEX "\n\n
$ititle
\n";
# We have to scan the keys in the hash twice; first to build the list
# of initial letters, and then to do the business. The first time we
# do not need to sort them.
foreach $key (keys %$hash)
{
my($initial) = substr($key,0,1);
$initial = "\U$initial";
$indexindex{$initial} = 1 if $initial ge "A" && $initial le "Z";
}
print INDEX "
\n";
foreach $key (sort
{
my($aa) = $a;
my($bb) = $b;
$aa =~ s/^\x93//; # Seems like the actual char values are
$bb =~ s/^\x93//; # set by this time, not ""
return ("\L$aa" eq "\L$bb")? ("$aa" cmp "$bb") : ("\L$aa" cmp "\L$bb");
}
keys %$hash)
{
my($initial) = substr($key,0,1);
$initial = "\U$initial";
if ($initial ne $letter && $initial ge "A" && $initial le "Z")
{
print INDEX "
\n";
print INDEX "\n";
print INDEX "\U$initial\E
\n";
$letter = $initial;
}
print INDEX "$$hash{$key}
\n";
}
print INDEX "