#!/usr/local/bin/perl5 # # news2html - convert a set of MUSH news files to a set of html files # with serious cross-referencing. # # Usage: news2html [options] file(s) # # By Alan Schwartz # # For each file, produces file.html from that file. # # Options: # -v Verbose # -f type What type of file (news, help, etc.) # -s Add links only when we see a " " string. # Otherwise, we link all occurences of a term! (ex: -s news) # -a Include admin topics (which begin with &) # -d dir Directory to place the output files in # -t title Base title name for the pages # # Algorithm: # 1. Scan over all the files and extract a set of topic names and their # file # 2. Scan over all the files, adding links. require 'getopts.pl'; die &usage unless @ARGV; &Getopts('af:svd:t:'); $opt_f =~ s/(.*)/\u$1/; $title = $opt_t ? "$opt_t" : "MUSH $opt_f"; # -s requires -f die "You must use the -f type switch with -s\n" if ($opt_s && ! $opt_f); # Prescan to build the topics list for $file (@ARGV) { unless (open(IN,$file)) { warn "Unable to open $file, skipping.\n"; next; } unshift(@files,$file); @lines = ; # Slurp! close(IN); for $_ (@lines) { # Look for topic names. Topics are lines which begin with an "&" if (/^& *(.*)/) { $name = $1; $name =~ s/1head/Table of Contents/i; # Skip admin topics if appropriate next if (!$opt_a && $name =~ /^&/); $xrefs{$name} = "$file.html"; } } } # Ditch the topic "help" delete($xrefs{"help"}); # Using the topic list, build a perl miniprogram which will search # and replace topics with links or anchors for $topic (sort bylength keys %xrefs) { $anchorprog .= "\$done++ if (! \$done && s!([^>#\"])($topic)!\$1\$2!i);"; if ($opt_s) { $linkprog .= "s!($opt_f) ($topic)!\$1 \$2!gi;"; } else { $linkprog .= "s!(^|\\s)($topic)\\b!\$1\$2!i;"; } } $dir = "$opt_d/" if $opt_d; # Now edit the files, running the appropriate programs. for $file (@files) { unless (open(IN,$file)) { warn "Unable to open $file, skipping.\n"; next; } $outfile = "$dir$file.html"; unless (open(OUT,">$outfile")) { warn "Unable to open $file.html, skipping.\n"; close(IN); next; } $header = $file; $header =~ s/1head/Table of Contents/i; $header =~ s/^(.*)\..*/\u$1/; print OUT < $title: $header

$header

EOS
  while () {
    study;
    if (/^&/) {
      $inadmintopic = /^& *&/;
      next if $inadmintopic && ! $opt_a;
      # extract and lowercase the topic
      s#^& *(.*)#\L$1#;
      if ($intopic) {
	# This is the second or later of a multiply named topic
	s#(.*)# aka \u$1#;
      } else {
        s#(.*)#

\n\u$1#; } $done = 0; eval $anchorprog; $intopic = 1; } else { print OUT "

\n" if $intopic;
      s/&/&/g;
      s//>/g;
      $intopic = 0;
      next if $inadmintopic && ! $opt_a;
      eval $linkprog;
    }
    print OUT;
  }
  close(IN);
  print OUT <


EOS
  close(OUT);
  chmod 0644, $outfile;
}

exit 0;


# Compare the length of two strings.
sub bylength {
  return length($b) <=> length($a);
}

# Return usage info
sub usage {
<	Specify type of file (news, help, etc.)
  -s		Add links only when we see " "
  -a		Include admin topics
  -d 	Place output files in 
  -t 	Base title for the web pages
EOS
}