#! /usr/bin/perl -w
# $Cambridge: exim/doc/doc-scripts/g2h,v 1.3 2005/02/17 12:17:09 ph10 Exp $
# This is a script that turns the SGCAL source of Exim's documentation into
# HTML. It can be used for both the filter document and the main Exim
# specification. The syntax is
#
# g2h [-split no|section|chapter]
\n";
$inpar = 0;
}
if ($_[0])
{
print OUT "
\n";
$inpar = 1;
}
}
##################################################
# Handle a "paragraph" #
##################################################
# Read a paragraph of text, which may contain many lines and may contain
# .index, .em, and .nem directives within it. We may also encounter
# ".if ~~html" within paragraphs. Process those directives,
# convert the markup, and output the rest as an HTML paragraph.
sub handle_paragraph{
my($par) = $_;
my($htmlcond) = 0;
while()
{
if (/^\.if\s+~~html\b/)
{
$htmlcond = 1;
$par =~ s/\s+$//; # lose unwanted whitespace and newlines
next;
}
elsif ($htmlcond && /^\.else\b/)
{
while () { last if /^\.fi\b/; }
$htmlcond = 0;
next;
}
elsif ($htmlcond && /^\.fi\b/)
{
$htmlcond = 0;
next;
}
last if /^\s*$/ || (/^\./ && !/^\.index\b/ && !/^\.em\b/ && !/^\.nem\b/);
$par .= $_;
}
$par = &handle_text($par, 0);
# We can't handle .index until this point, when we do it just before
# outputting the paragraph.
if ($par !~ /^\s*$/)
{
&setpar(1);
$par =~ s/\.index\s+([^\n]+)\n/&handle_index($1, 1)/eg;
print OUT "$par";
}
}
##################################################
# Handle a non-paragraph directive #
##################################################
# The directives .index, .em, and .nem can also appear within paragraphs,
# and are then handled within the handle_paragraph() code.
sub handle_directive{
my($new_lastwasitem) = 0;
$lastwasrule = 0;
if (/^\.r?set\b/ || /^\.(?:\s|$)/) {} # ignore .(r)set and comments
elsif (/^\.justify\b/) {} # and .justify
elsif (/^\.newline\b/) { print OUT " \n"; }
elsif (/^\.blank\b/ || /^\.space\b/) { print OUT " \n"; }
elsif (/^\.rule\b/) { &setpar(0); print OUT "\n"; $lastwasrule = 1; }
elsif (/^\.index\s+(.*)/) { &handle_index(&handle_text($1), 1); }
# Emphasis is handled by colour
elsif (/^\.em\b/)
{
&setpar(0);
print OUT "" if ! $inem;
$inem = 1;
}
elsif (/^\.nem\b/)
{
&setpar(0);
print OUT "" if $inem;
$inem = 0;
}
# Ignore tab setting stuff - we use tables instead.
elsif (/^\.tabs(?:et)?\b/) {}
# .tempindent is used only to align some of the expansion stuff nicely;
# just ignore it. It is used in conjunction with .push/.pop.
elsif (/^\.(tempindent|push|pop)\b/) {}
# There are some instances of .if ~~sys.fancy in the source. Some of those
# that are not inside displays are two-part things, in which case we just keep
# the non-fancy part. For diagrams, however, they are in three parts:
#
# .if ~~sys.fancy
#
# .elif !~~html
#
# .else
#
# .fi
#
# In this case, we skip to the third part.
elsif (/^\.if\s+~~sys\.fancy/ || /^\.else\b/)
{
while ()
{ last if /^\.else\b/ || /^\.elif\s+!\s*~~html/ || /^\.fi\b/; }
if (/^\.elif\b/)
{
while () { last if /^\.else\b/ || /^\.fi\b/; }
}
}
# Similarly, for .if !~~sys.fancy, take the non-fancy part.
elsif (/^\.if\s+!\s*~~sys.fancy/) {}
# There are some explicit tests for ~~html for direct HTML inclusions
elsif (/^\.if\s+~~html\b/) {}
# There are occasional requirements to do things differently for Texinfo/HTML
# and PS/txt versions. The latter are produced by SGCAL, so that's what the
# flag is called.
elsif (/\.if\s+~~sgcal/)
{
while () { last if /\.else\b/ || /\.fi\b/; }
}
# Also there is a texinfo flag
elsif (/^\.if\s+~~texinfo\b/)
{
while ()
{ last if /^\.else\b/ || /^\.elif\s+!\s*~~html/ || /^\.fi\b/; }
}
# Ignore any other .if, .else, or .fi directives
elsif (/^\.if\b/ || /^\.fi\b/ || /^\.else\b/) {}
# Ignore .indent
elsif (/^\.indent\b/) {}
# Various flavours of numberpars map to corresponding list types.
elsif (/^\.numberpars\b/)
{
$rest = $';
&setpar(0);
if ($rest =~ /(?:\$\.|\" \")/)
{
unshift @endlist, "ul";
unshift @listtype, "";
print OUT "
";
}
}
elsif (/^\.nextp\b/)
{
&setpar(0);
print OUT "
\n
";
}
elsif (/^\.endp\b/)
{
&setpar(0);
print OUT "
\n$endlist[0]>\n";
shift @listtype;
shift @endlist;
}
# .display asis can use
which uses a typewriter font.
# Otherwise, we have to do our own line breaking. Turn tabbed lines
# into an HTML table. There will always be a .tabs line first.
elsif (/^\.display\b/)
{
my($intable) = 0;
my($asis) = /asis/;
my($rm) = /rm/;
my($eol,$indent);
# For non asis displays, start a paragraph, and set up to put an
# explicit break after every line.
if (!$asis)
{
&setpar(1);
$eol = " ";
$indent = "";
}
# For asis displays, use
and no explicit breaks
else
{
print OUT "
\n";
$eol = "";
$indent = " ";
}
# Now read through until we hit .endd (or EOF, but that shouldn't happen)
# and process the lines in the display.
while ()
{
last if /^\.endd\b/;
# The presence of .tabs[et] starts a table
if (/^\.tabs/)
{
$intable = 1;
print OUT "
\n";
}
# Some displays have an indent setting - ignore
elsif (/^\.indent\b/) {}
# Some displays have .blank inside them
elsif (/^\.blank\b/)
{
print OUT " \n";
}
# Some displays have emphasis inside them
elsif (/^\.em\b/)
{
print OUT "" if ! $inem;
$inem = 1;
}
elsif (/^\.nem\b/)
{
print OUT "" if $inem;
$inem = 0;
}
# There are occasional instances of .if [!]~~sys.fancy inside displays.
# In both cases we want the non-fancy alternative. (The only thing that
# matters in practice is noticing .tabs[et] actually.) Assume the syntax
# is valid.
elsif (/^\.if\s+~~sys.fancy/ || /^\.else\b/)
{
while ()
{
last if /^\.fi\b/ || /^\.else/;
}
}
elsif (/^\.if\s+!\s*~~sys.fancy/) {}
elsif (/^\.fi\b/) {}
# Ignore .newline and .linelength
elsif (/^\.newline\b/ || /^\.linelength\b/) {}
# Ignore comments
elsif (/^\.(\s|$)/) {}
# There shouldn't be any other directives inside displays
elsif (/^\./)
{
print "*** Ignored directive inside .display: $_";
}
# Handle a data line within a display. If it's an asis display, the only
# conversion is to escape the HTML characters. Otherwise, process the
# SGCAL markup.
else
{
chomp;
if ($asis)
{
s/&/&/g;
s/</g;
s/>/>/g;
}
else
{
$_ = &handle_text($_, !$rm);
$_ = "$_" if !$rm && $_ ne "";
}
# In a table, break fields at $t. For non-rm we must break the
# group as well.
if ($intable)
{
if ($rm)
{
s/\s*\$t\s*/ <\/td>
/g;
}
else
{
s/\s*\$t\s*/ <\/tt><\/td>
/g;
}
s/<\/tt>//g;
print OUT "
$_
\n";
}
# Otherwise, output straight, with for non asis displays
else
{
s/<\/tt>//g;
print OUT "$indent$_$eol\n";
}
}
} # Loop for display contents
# Finish off the table and the
- leave a paragraph open
print OUT "
\n" if $intable;
print OUT "
\n" if $asis;
}
# Handle configuration option definitions
elsif (/^\.startconf\s+(.*)/)
{
$confuse = &handle_text($1);
}
elsif (/^\.conf\b/)
{
my($option, $type, $default) =
/^\.conf\s+(\S+)\s+("(?:[^"]|"")+"|\S+)\s+("(?:[^"]|"")+"|.*)/;
$option =~ s/\@_/_/g; # Underscore will be quoted in option name
# If $type ends with $**$, add ",expanded" as there doesn't seem to be
# a dagger character generally available.
$type =~ s/^"([^"]+)"/$1/;
$type =~ s/\$\*\*\$/, expanded/;
# Default may be quoted, and it may also have quotes that are required,
# if it is a string.
$default =~ s/^"(.*)"$/$1/;
$default =~ s/""/"/g;
$default = &handle_text($default, 0);
print OUT "";
&setpar(0);
&handle_index($option, 0);
print OUT "
$option
\n" .
"Use: $confuse " .
"Type: $type Default: $default \n";
}
elsif (/^\.endconf\b/)
{
print OUT " \n";
}
# Handle "items" - used for expansion items and the like. We force the
# item text into bold, and put a rule between items.
elsif (/^\.startitems\b/) {}
elsif (/^\.item\s+(.*)/)
{
my($arg) = $1;
chomp($arg);
$arg =~ s/^"(.*)"$/$1/;
$arg = &handle_text($arg, 0);
# If there are two .items in a row, we don't want to put in the
# separator line or start a new paragraph.
if ($lastwasitem)
{
print OUT " ";
}
else
{
print OUT "";
&setpar(1);
}
print OUT "$arg\n";
$new_lastwasitem = 1;
}
elsif (/^\.enditems\b/)
{
print OUT " \n";
}
# Handle command line option items
elsif (/^\.startoptions\b/) {}
elsif (/^\.option\s+(.*)/)
{
my($arg) = $1;
$arg =~ s/"([^"]*)"/$1/g;
print OUT "";
&setpar(0);
# For indexing, we want to take up to the first # or < in the line,
# before processing.
my($name) = $arg =~ /^([^#<]+)/;
$name = &handle_text($name, 0);
&handle_index("-$name", 0);
# Output as heading, after the index
$arg = &handle_text($arg, 0);
print OUT "
-$arg
\n";
}
elsif (/^\.endoptions\b/)
{
print OUT " \n";
}
# Found an SGCAL directive that isn't dealt with. Oh dear.
else
{
print "*** Unexpected SGCAL directive: line $. ignored:\n";
print "$_\n";
}
# Remember if last was a .item, and read the next line
$lastwasitem = $new_lastwasitem;
$_ = ;
}
##################################################
# First Pass - collect references #
##################################################
sub pass_one{
$thischapter = 0;
open (IN, $source_file) || die "Can't open $source_file (first pass)\n";
$_ = ;
# At the start of the specification text, there are some textual replacement
# definitions. They set values, but not cross-references. They may be preceded
# by comments.
$_ = while (/^\.(\s|$)/);
while (/^\.r?set\s+(\S+)\s+"?([^"]+)\"?\s*$/)
{
$var_value{$1} = $2;
$_ = ;
}
# Now skip on till we hit the start of the first chapter. It will be numbered
# 0 if we hit ".set chapter -1". There is only ever one unnumbered chapter.
while (!/^\.chapter/)
{
$thischapter = -1 if /^\.set\s+chapter\s+-1/;
$_ = ;
}
# Loop for handling chapters
while ($_)
{
$thischapter++;
$thissection = 0;
# Scan through chapter, setting up cross-references to the chapter
# and to the sections within it.
while ()
{
last if /^\.chapter/;
chomp;
if (/^\.section/)
{
$thissection++;
next;
}
# Handle .(r)set directives.
if (/^\.r?set\s+(\S+)\s+"?([^"]+)\"?\s*$/ && $1 ne "runningfoot")
{
my($key,$value) = ($1,$2);
$value =~ s/~~chapter/$thischapter/e;
$value =~ s/~~section/$thissection/e;
# Only one of $chapsplit or $sectsplit can be set.
if ($key =~ /^CHAP/)
{
$value = $chapsplit?
"$value"
:
"$value";
}
elsif ($key =~ /^SECT/)
{
$value = $chapsplit?
"$value"
:
$sectsplit? "$value"
:
"$value";
}
$var_value{$key} = $value;
}
}
}
close(IN);
}
##################################################
# Second Pass - generate HTML #
##################################################
sub pass_two{
my($tocn) = 0;
my($inmacro) = 0;
my($insection) = 0;
$inem = 0;
$thischapter = 0;
$thissection = 0;
# Open the source file and get the first line
open (IN, $source_file) || die "Can't open $source_file (2nd pass)\n";
$_ = ;
# Skip on till we hit the start of the first chapter, but note if we
# pass ".set chapter -1", which is used to indicate no chapter numbering for
# the first chapter (we number is 0). Keep track of whether we are in macro
# definitions or not, and when not, notice occurrences of .index, because this
# are the "x see y" type entries.
while (!/^\.chapter/)
{
$thischapter = -1 if /^\.set\s+chapter\s+-1/;
$inmacro = 1 if /^\.macro/;
$inmacro = 0 if /^\.endm/;
if (!$inmacro && /^\.index\s+(.*)/)
{
my($key);
my($s) = $1;
$s = &handle_text($s, 0);
$s =~ s/ / /g; # All spaces unsplittable
$key = "\L$s";
$key =~ s/<[^>]+>//g;
$key =~ s/(\d+);/chr($1)/eg;
$cindex{$key} = $s;
}
$_ = ;
}
# Open the TOC file
open (TOC, ">$html/${file_base}_toc.html") ||
die "Can't open $html/${file_base}_toc.html\n";
print TOC "\n";
print TOC "\n\n$doctitle Contents\n\n" .
"\n";
print TOC "
$doctitle
\n
\n";
# Open the data file if we are not splitting at chapters
&openout("$html/${file_base}.html") if !$chapsplit;
# Loop for handling chapters. At the start of this loop, $_ is either EOF,
# or contains a .chapter line.
$firstchapter = $thischapter + 1;
while ($_)
{
print TOC "
\n" if $insection;
$insection = 0;
$thischapter++;
$thissection = 0;
$lastwasrule = 0;
# Start a new file if required
if ($chapsplit)
{
&closeout("CHAP") if $thischapter != $firstchapter;
&openout("$html/${file_base}_$thischapter.html");
}
# Set up the chapter title. Save it for the TOC. Set up the anchor and
# link back to the TOC and show the title.
$_ =~ /^\.chapter\s+(.*)/;
my($title) = (($thischapter > 0)? "$thischapter. " : "") . &handle_text($1, 0);
$tocn++;
print TOC "
\n";
# Scan the contents of the chapter
$_ = ;
while ($_)
{
last if /^\.chapter/;
# Handle the start of a new section, starting a new file if required
if (/^\.section\s+(.*)/)
{
$thissection++;
print TOC "
\n";
$_ = ;
$lastwasrule = 0;
}
# Blank lines at this level are ignored
elsif (/^\s*$/)
{
$_ = ;
}
# Directive and non-directive lines are handled independently, though
# in each case further lines may be read. Afterwards, the next line is
# in $_. If .em is at the start of a paragraph, treat it with the
# paragraph, because the matching .nem will be too. Messy!
elsif (/^\./)
{
if (/^\.em\b/)
{
$_=;
if (/^\./)
{
print OUT "" if ! $inem;
$inem = 1;
# Used to handle it here - but that fails if it is .section.
# Just let the next iteration of the loop handle it.
# &handle_directive();
}
else
{
$_ = ".em\n" . $_;
&handle_paragraph();
$lastwasrule = 0;
$lastwasitem = 0;
}
}
# Not .em
else
{
&handle_directive();
}
}
# Not a directive
else
{
&handle_paragraph();
$lastwasrule = 0;
$lastwasitem = 0;
}
} # Loop for each line in a chapter
} # Loop for each chapter
# Close the last file, end off the TOC, and we are done.
&closeout("");
print TOC "
\n" if $insection;
if (defined %cindex)
{
$cindex_tocn = ++$tocn;
print TOC "
\n\n\n";
close(TOC);
close(IN);
}
##################################################
# Adjust index points #
##################################################
# Because of the way the source is written, there are often index entries
# that immediately follow the start of chapters and sections and the definition
# of "items" like "helo = verify". This gets the correct page numbers for the
# PostScript and PDF formats. However, for HTML we want the index anchor to be
# before the section heading, because browsers tend to put the index point at
# the top of the screen. So we re-read all the files we've just created, and
# move some of the index points about. This is necessary only if indexes exist.
# The files are small enough to be handled entirely in memory.
sub adjust_index_points {
print "Adjusting index points to precede headings\n";
$" = "";
opendir(DIR, "$html") || die "Failed to opendir $html\n";
while ($file = readdir(DIR))
{
my($i);
next unless $file =~ /^${file_base}_\d+\.html$/;
open(IN, "<$html/$file") ||
die "Failed to open $html/$file (read)\n";
my(@lines) = ;
close(IN);
for ($i = 0; $i < @lines; $i++)
{
if ($lines[$i] =~ /^<\/a>$/)
{
# Handle an index line that follows a heading definition. Move it back
# to just before the
\n";
foreach $key (sort
{
my($aa) = $a;
my($bb) = $b;
$aa =~ s/^\x93//; # Seems like the actual char values are
$bb =~ s/^\x93//; # set by this time, not ""
return ("\L$aa" eq "\L$bb")? ("$aa" cmp "$bb") : ("\L$aa" cmp "\L$bb");
}
keys %$hash)
{
my($initial) = substr($key,0,1);
$initial = "\U$initial";
if ($initial ne $letter && $initial ge "A" && $initial le "Z")
{
print INDEX " \n";
print INDEX "\n";
print INDEX "\U$initial\E \n";
$letter = $initial;
}
print INDEX "$$hash{$key} \n";
}
print INDEX "
\n";
print INDEX "\n\n";
close(INDEX);
}
##################################################
# Show usage and die #
##################################################
sub usage {
die "Usage: g2h [-split no|section|chapter]