#!/usr/bin/perl

# Script to turn the Exim FAQ into HTML.

use integer;

# Function to do text conversions that apply to both displays and non displays

sub process_both {
my($s) = $_[0];
$s =~ s/</&#60;/g;                                 # Deal with < and >
$s =~ s/>/&#62;/g;
return $s;
}


# Function to do text conversions to display paragraphs

sub process_display {
my($s) = $_[0];
$s =~ s/^==>/   /;
my($indent) = $s =~ /^(\s+)/;
my($remove) = " " x (length($indent) - 3);
$s =~ s/^$remove//mg;
$s = &process_both($s);
return $s;
}


# Function to do text conversions to paragraphs not in displays.

sub process_non_display {
my($s) = &process_both($_[0]);

$s =~ s/@\\/@@backslash@@/g;                       # @\ temporarily hidden

$s =~ s/\\#/&nbsp;/g;                              # \# is a hard space

$s =~ s/\\\*\*([^*]*)\*\*\\/<b>$1<\/b>/g;          # \**...**\   => bold
$s =~ s/\\\*([^*]*)\*\\/<i>$1<\/i>/g;              # \*.....*\   => italic
$s =~ s/\\"([^"]*)"\\/<tt>$1<\/tt>/g;              # \"....."\   => fixed pitch
$s =~ s/\\\$([^\$]*)\$\\/<i>\$$1<\/i>/g;           # \$.....$\   => $italic
$s =~ s/\\\\([^\\]*)\\\\/<small>$1<\/small>/g;     # \\.....\\   => small
$s =~ s/\\\(([^)]*)\)\\/<i>$1<\/i>/g;              # \(.....)\   => italic
$s =~ s/\\-([^\\]*)-\\/<b>-$1<\/b>/g;              # \-.....-\   => -bold
$s =~ s/\\\[([^]]*)\]\\/&\#60;<i>$1<\/i>&\#62;/gx; # \[.....]\   => <italic>
$s =~ s/\\\?(.*?)\?\\/<a href="$1">$1<\/a>/g;      # \?.....?\   => URL
$s =~ s/\\\^\^([^^]*)\^\^\\/<i>$1<\/i>/g;          # \^^...^^\   => italic
$s =~ s/\\\^([^^]*)\^\\/<i>$1<\/i>/g;              # \^.....^\   => italic
$s =~ s/\\%([^%]*)%\\/<b>$1<\/b>/g;                # \%.....%\   => bold
$s =~ s/\\\/([^\/]*)\/\\/<i>$1<\/i>/g;             # \/...../\   => italic
$s =~ s/\\([^\\]+)\\/<tt>$1<\/tt>/g;               # \.......\   => fixed pitch

$s =~ s"//([^/\"]*)//"<i>$1</i>"g;                 # //.....//   => italic
$s =~ s/::([^:]*)::/<i>$1:<\/i>/g;                 # ::.....::   => italic:

$s =~ s/``(.*?)''/&#147;$1&#148;/g;                # ``.....''   => quoted text

$s =~ s/\s*\[\[br\]\]\s*/<br>/g;                   # [[br]]      => <br>

$s =~ s/@@backslash@@/\\/g;                        # Put back single backslash

$s =~ s/^(\s*\(\d\)\s)/$1&nbsp;/;                  # Extra space after (1), etc.

# Cross references within paragraphs

$s =~ s/Q(\d{4})(?!:)/<a href="$xref{$1}">$&<\/a>/xg;

# References to configuration samples

$s =~ s/\b([CFLS]\d\d\d)\b/<a href="$1.txt">$1<\/a>/g;

# Remove white space preceding a newline in the middle of paragraphs,
# to keep the file smaller (and for human reading when debugging).

$s =~ s/^\s+//mg;

return $s;
}


# Main program

# We want to read the file paragraph by paragraph; Perl only does this if the
# separating lines are truly blank. Having been caught by lines containing
# whitespace before, do a detrailing pass first.

open(IN, "$ARGV[0]") || die "can't open $ARGV[0] (preliminary)\n";
open(OUT, ">$ARGV[0]-$$") || die "can't open $ARGV[0]-$$\n";
while (<IN>)
  {
  s/[ \t]+$//;
  print OUT;
  }
close(IN);
close(OUT);
rename("$ARGV[0]-$$", "$ARGV[0]") ||
  die "can't rename $ARGV[0]-$$ as $ARGV[0]\n";

# The second argument is the name of a directory into which to put multiple
# HTML files. We start off with FAQ.html.

$hdir = $ARGV[1];
open(OUT, ">$hdir/FAQ.html") || die "can't open $hdir/FAQ.html\n";

# Initial output

print OUT <<End ;
<html>
<head>
<title>The Exim FAQ</title>
</head>
<body bgcolor="#F8F8F8" text="#00005A" link="#0066FF" alink="#0066FF" vlink="#000099">
<h1>The Exim FAQ</h1>
End

$/ = "";

# First pass to read the titles and questions and create the table of
# contents. We save it up in a vector so that it can be written after the
# introductory paragraphs.

open(IN, "$ARGV[0]") || die "can't open $ARGV[0] (first time)\n";

$toc = 0;
$sec = -1;
$inul = 0;

while ($_ = <IN>)
  {
  $count = s/\n/\n/g - 1;          # Number of lines in paragraph

  if ($count == 1 && /^\d+\./)     # Look for headings
    {
    chomp;
    push @toc, "</ul>" if $inul;
    $inul = 0;
    push @toc, "<br>\n\n" if $sec++ >= 0;
    push @toc, "<a name=\"TOC$toc\" href=\"FAQ_$sec.html\">$_</a>\n";
    $toc++;

    ($number,$title) = /^(\d+)\.\s+(.*)$/;
    if ($title ne "UUCP" && $title ne "IRIX" && $title ne "BSDI" &&
        $title ne "HP-UX")
      {
      ($initial,$rest) = $title =~ /^(.)(.*)$/;
      $title = "$initial\L$rest";
      $title =~ s/isdn/ISDN/;
      $title =~ s/\btls\b/TLS/;
      $title =~ s/\bssl\b/SSL/;
      $title =~ s/ os x/ OS X/; 
      }
    push @seclist, "<a href=\"FAQ_$sec.html\">$number. $title</a>";

    next;
    }

  if (/^(Q\d{4})/)                 # Q initial paragraph
    {
    if (!$inul)
      {
      push @toc, "<ul>\n";
      $inul = 1;
      }
    $num = $1;
    $rest = $';
    $xref{substr($num,1)} = "FAQ_$sec.html#TOC$toc";
    $rest =~ s/^: /:&nbsp;&nbsp;/;
    $rest = &process_non_display($rest);
    push @toc, "<li><a name=\"TOC$toc\" href=\"FAQ_$sec.html#TOC$toc\">$num</a>$rest<br><br></li>\n";
    $toc++;
    next;
    }
  }

push @toc, "</ul>\n" if $inul;
close(IN);


# This is the main processing pass. We have to detect the different kinds of
# "paragraph" and do appropriate things.

open(IN, "$ARGV[0]") || die "can't open $ARGV[0] (second time)\n";

# Skip the title line

$_ = <IN>;

# Handle the rest of the file

$toc = 0;
$maxsec = $sec;
$sec = -1;

while ($_ = <IN>)
  {
  $count = s/\n/\n/g - 1;          # Number of lines in paragraph
  chomp;                           # Trailing newlines

  if (/^The FAQ is divided into/)
    {
    my($count) = scalar(@seclist);
    my($cols) = ($count + 1)/2;

    print OUT "<hr><a name=\"TOC\"><h1>Index</h1></a>\n";
    print OUT "<p>A <i>Keyword-in-context</i> <a href=\"FAQ-KWIC_A.html\">index</a> " .
              "to the questions is available. This is usually the " .
              "quickest way to find information in the FAQ.</p>\n";

    print OUT "<h1>Contents</h1>\n";
    print OUT "<p>The FAQ is divided into the following sections:<br><br></p>\n";

    print OUT "<table>\n";

    for ($i = 0; $i < $cols; $i++)
      {
      print OUT "<tr>\n";
      print OUT "  <td>", "&nbsp;" x 4, "</td>\n";
      print OUT "  <td>&nbsp;$seclist[$i]</td>\n";
      print OUT "  <td>", "&nbsp;" x8, "$seclist[$cols+$i]</td>\n"
        if $cols+$i < $count;
      print OUT "</tr>\n";
      }
    print OUT "</table><br><p>\n<hr><br>\n";
    print OUT "<h1>List of questions</h1>\n";

    $_ = <IN>;                     # Skip section list
    next;
    }

  if ($count == 1 && /^\d+\./)     # Look for headings
    {
    if (@toc != 0)                 # TOC when hit first heading
      {
      while (@toc != 0) { print OUT shift @toc; }
      }

    # Output links at the bottom of this page

    print OUT "<hr><br>\n";
    print OUT "<a href=\"FAQ.html#TOC\">Contents</a>&nbsp;&nbsp;\n";
    if ($sec > 0)
      {
      printf OUT ("<a href=\"FAQ_%d.html\">Previous</a>&nbsp;&nbsp;\n", $sec-1);
      }
    printf OUT ("<a href=\"FAQ_%d.html\">Next</a>\n", $sec+1);

    # New section goes in new file

    print OUT "</body>\n</html>\n";
    close OUT;

    $sec++;
    open(OUT, ">$hdir/FAQ_$sec.html") ||
      die "Can't open $hdir/FAQ_$sec.html\n";

    print OUT "<html>\n<head>\n" .
      "<title>The Exim FAQ Section $sec</title>\n" .
      "</head>\n" .
      "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " .
      "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n";

    printf OUT "<h1>The Exim FAQ</h1>\n";

    print OUT "<a href=\"FAQ.html#TOC\">Contents</a>&nbsp;&nbsp;\n";
    if ($sec > 0)
      {
      printf OUT ("<a href=\"FAQ_%d.html\">Previous</a>&nbsp;&nbsp;\n", $sec-1);
      }
    if ($sec < $maxsec)
      {
      printf OUT ("<a href=\"FAQ_%d.html\">Next</a>\n", $sec+1);
      }

    print OUT "<hr><br>\n";

    print OUT "<h2><a href=\"FAQ.html#TOC$toc\">$_</a></h2>\n";
    $toc++;
    next;
    }

  s/^([QA]\d{4}|[CFLS]\d{3}): /$1:&nbsp;&nbsp;/;

  if (/^(Q\d{4}:)/)               # Q initial paragraph
    {
    print OUT "<p>\n<a name=\"TOC$toc\" href=\"FAQ.html#TOC$toc\">$1</a>";
    $_ = &process_non_display($');
    print OUT "$_\n</p>\n";
    $toc++;
    next;
    }

  if (/^A\d{4}:/)                 # A initial paragraph
    {
    $_ = &process_non_display($_);
    s/^(A\d{4}:)/<font color="#00BB00">$1<\/font>/;
    print OUT "<p>\n$_\n</p>\n";
    next;
    }

  # If a paragraph begins ==> it is a display which must remain verbatin
  # and not be reformatted. The flag gets turned into spaces.

  if ($_ =~ /^==>/)
    {
    $_ = &process_display($_);
    chomp;
    print OUT "<pre>\n$_</pre>\n";
    }

  # Non-display paragraph; massage the final line & my sig.

  elsif (/^\*\*\* End of Exim FAQ \*\*\*/)
    {
    }

  else
    {
    $_ = &process_non_display($_);
    if (/^Philip Hazel/)
      {
      s/\n/<br>\n/g;
      s/<br>$/<hr><br>/;
      }
    print OUT "<p>\n$_\n</p>\n";
    }
  }

close(IN);

print OUT "<hr><br>\n";
print OUT "<a href=\"FAQ.html#TOC\">Contents</a>&nbsp;&nbsp;\n";
printf OUT ("<a href=\"FAQ_%d.html\">Previous</a>\n", $sec-1);

print OUT "</body>\n</html>\n";
close(OUT);
End