#!/usr/bin/perl #--------------------------------------------------------- # info2html #--------------------------------------------------------- # # PURPOSE # This perl script converts info nodes to HTML format. # # DESCRIPTION # The node is specified on the command line using the # syntax: # () # # If and/or are missing, (dir)Top is assumed. # # AUTHORS # 2015.12.25 +A.M.Danischewski # 2006.08.16 Jon Howell # 1995.08.04 Tim Witham # # ORIGINAL AUTHOR # 11.10.93 Karl Guggisberg # # HISTORY # 11.10.93 V 1.0 # 14.10.93 V 1.0a some comments added # 15.10.93 V 1.0b file for configuration settings # 16.10.93 V 1.0c multiple info path possible # some bugs in escaping references removed # 28.6.94 V 1.0d some minor changes # 8.4.95 V 1.1 bug fixes by Tim Witham # # 1998.05.05 V 1.2 bug fixes, added expires headers, added infocat, # taken over web site maintenance. # Jon Howell # 2006-08-16 V 2.0 Output HTML is tidier now. CSS added. # Lots of new config vars added, so old config files # may not work happily unless you tweak them a bit. # Minor bugfixes to the (de)escaping logic. # Minor typoes in comments fixed. # 2015.12.25 V 2.1 Hacked into quasi PHP-CGI, removed CGI headers and # and warnings. Changed configuration to auto-locate # from the home directory of this script. # +A.M.Danischewski # 2015.12.27 V 2.1 Removed '.' from the first capture group of # ParsHeaderToken, because some info pages have # multiple .'s in their node name (e.g. Nasm). #------------------------------------------------------- require 5; # even though most of the code is in Perl 4 style. $VERSION = "2.1"; # Getting the full path of the info2html.conf $0 =~ m!(.*/)[^/]+$!; use File::Basename; $CURRENT_PATH=dirname(__FILE__); $INFO2HTMLCONF = $CURRENT_PATH."/info2html.conf"; require($INFO2HTMLCONF); #-- configuration settings use URI::Escape; #-- patterns $NODEBORDER = '\037\014?'; #-- delimiter of an info node $REDIRSEP = '\177'; #-- delimiter in tag tables $WS = '[ \t]+'; #-- white space + $WSS = '[ \t]*'; #-- white space * $TE = '[\t\,\.]'; #-- end of a tag $TAG = '[^\t\,\.]+'; #-- pattern for a tag $FTAG = '[^\)]+'; #-- pattern for a file name in #-- a cross reference #--------------------------------------------------------- # DieFileNotFound #--------------------------------------------------------- # Replies and error message if the file '$FileName' is # not accessible. #--------------------------------------------------------- # Don't reveal where we're looking... --jonh 5/20/97 (and reapplied 5/4/1998) sub DieFileNotFound{ local($FileName) = @_; #-- TEXT : error message if a file could not be opened print <<"EOF"; Info Files - Error Message $BOTS_STAY_AWAY $HTML_HEAD_STUFF

File IO Error

The Info file could not be opened for reading.

EOF die "\n"; } #--------------------------------------------------------- # Escape #--------------------------------------------------------- sub Escape{ local($Tag) = @_; return uri_escape($Tag); } #---------------------------------------------------------- # DeEscape #---------------------------------------------------------- sub DeEscape{ local($Tag) = @_; return uri_unescape($Tag); } #---------------------------------------------------------- # ParsHeaderToken #---------------------------------------------------------- # Parses the heaer line of an info node for a specific # link directive (e.g. Up, Prev) #---------------------------------------------------------- sub ParsHeaderToken{ local($HL,$Token) = @_; local($InfoFile,$Tag,$Temp); return ("","") if $HL !~ /$Token:/; #-- token not available $HL =~ m!$Token:$WS(\(($FTAG)\))!; $InfoFile = $2; $Temp = $2 ne "" ? '\('.$2.'\)' : ""; ## +A.M.Danischewski 20151227 -- Removed the period from the first ## capture group, some info pages have multiple periods in the node ## name (e.g. Nasm). $HL =~ m!$Token:$WS$Temp$WSS([^\t\,\n]+)?([\t\,\.\n])!; $Tag = $1 ne "" ? $1 : "Top"; return $InfoFile,$Tag; } #--------------------------------------------------------- # ParsHeaderLine #-------------------------------------------------------- # Parses the header line on an info node for all link # directives allowed in a header line. # Sometimes the keyword 'Previous' is found in stead of # 'Prev'. That's why the redirection line is checked # against both of these keywords. #------------------------------------------------------- sub ParsHeaderLine{ local($HL) = @_; local(@LinkInfo,@LinkList); #-- Node @LinkInfo = &ParsHeaderToken($HL,"Node"); push(@LinkList,@LinkInfo); #-- Next @LinkInfo = &ParsHeaderToken($HL,"Next"); push(@LinkList,@LinkInfo); #-- Up @LinkInfo = &ParsHeaderToken($HL,"Up"); push(@LinkList,@LinkInfo); #-- Prev or Previous @LinkInfo = &ParsHeaderToken($HL,"Prev"); &ParsHeaderToken($HL,"Previous") if $LinkInfo[0] eq "" && $LinkInfo[1] eq ""; push(@LinkList,@LinkInfo); return @LinkList; } ############################################################ # turn tabs into correct number of spaces # sub Tab2Space { local($line) = @_; $line =~ s/^\t/ /; # 8 leading spaces if initial tab while ($line =~ s/^([^\t]+)(\t)/$1 . ' ' x (8 - length($1) % 8)/e) { } # replace each tab with right num of spaces return $line; } #-------------------------------------------------------- # MenuItem2HTML #-------------------------------------------------------- # Transform an info menu item in HTML with references #------------------------------------------------------- sub MenuItem2HTML{ local($Line,$BaseInfoFile) = @_; local($MenuLinkTag,$MenuLinkFile,$MenuLinkRef,$MenuLinkText); $Line = &Tab2Space($Line); # make sure columns line up well if ($Line =~ /\* ([^:]+)::/){ # -- is a simple entry ending with :: ? $MenuLinkTag = $1; $MenuLinkRef = $1; $MenuLinkText = $'; $MenuLinkFile = &Escape($BaseInfoFile); } elsif ($Line =~ /\* ([^:]+):(\s*\(($FTAG)\)\.?)?(.*)$/) { $MenuLinkFile = $BaseInfoFile; $MenuLinkRef = $1; $MenuLinkText = $4; if ($2) { $MenuLinkFile = $3; $MenuLinkTag = 'Top'; $MenuLinkText = ($2 ? ' ' x (length($2)+1) : '') . "$4\n"; } else { $Line = "$4\n"; if ($Line =~ /( *($TAG)?$TE(.*))$/) { $MenuLinkTag = $2; $MenuLinkText = $Line; } } } else { # can't determine link, just show it return $Line; } $MenuLinkTag = &Escape($MenuLinkTag); # -- escape special chars # Yes, we routinely double-escape. Does anyone remember why? #-- produce a HTML line return "$MENU_DOT$MenuLinkRef$MenuLinkText"; } #------------------------------------------------------------- # ReadIndirectTable #------------------------------------------------------------ # Scans an info file for the occurence of an 'Indirect:' # table. Scans the entries and returns two lists with the # filenames and the global offsets. #--------------------------------------------------------- sub ReadIndirectTable{ local($FileName,*InfoFiles,*Offsets) = @_; local($i,$Next); # open(FH1,$FileName) || &DieFileNotFound($FileName); if ( $FileName =~ /^(.+)\.gz$/ ) { open(FH1,"gunzip < " . $FileName . " 2>/dev/null |") || &DieFileNotFound($FileName); } elsif ( $FileName =~ /^(.+)\.bz2$/ ) { open(FH1,"bzcat " . $FileName . " 2>/dev/null |") || &DieFileNotFound($FileName); } else { open(FH1,$FileName) || &DieFileNotFound($FileName); } #-- scan for start of Indirect: Table while(){ $Next = if /$NODEBORDER/; last if $Next =~ /^Indirect:/i; } $i = 0; #-- scan the entries and setup the arrays while(){ last if /$NODEBORDER/; if(/([^:]+):[ \t]+(\d+)/){ push(@InfoFiles,$1); push(@Offsets,$2); } } close(FH1); } #--------------------------------------------------------- # ReadTagTable #-------------------------------------------------------- # Reads in a tag table from an info file. # Returns an associative array with the tags found. # Tags are transformed to lower case (info is not # case sensitive for tags). # The entries in the associative array are of the # form # # # may be empty if an indirect table is # present or if the node is located in the # main file. # 'Exists' indicates if a tag table has been found. # 'IsIndirect' indicates if the tag table is based # on a indirect table. #-------------------------------------------------------- sub ReadTagTable{ local($FileName,*TagList,*Exists,*IsIndirect) = @_; local($File,$Offset); if ( $FileName =~ /^(.+)\.gz$/ ) { open(FH,"gunzip < " . $FileName . " 2>/dev/null |") || &DieFileNotFound($FileName); } elsif ( $FileName =~ /^(.+)\.bz2$/ ) { open(FH,"bzcat " . $FileName . " 2>/dev/null |") || &DieFileNotFound($FileName); } else { open(FH,$FileName) || &DieFileNotFound($FileName); } $Exists = 0; $IsIndirect = 0; #-- scan for start of tag table while(){ if(/$NODEBORDER/){ if ( =~ /^Tag table:/i){ $Exists = 1; last; } } } #-- scan the entries while (){ $IsIndirect = 1 if /^\(Indirect\)/i; last if /$NODEBORDER/; /Node:[ \t]+([^$REDIRSEP]+)$REDIRSEP(\d+)/; $Tag = $1; $Tag =~ y/A-Z/a-z/; #-- to lowercase $Offset = $2; if(/File:[ \t]+([^\t,]+)/){ $File = $1; } else{ $File = ""; } $TagList{$Tag} = $File."#".$Offset; } close(FH); } #---------------------------------------------------------- # ParsCrossRefs #---------------------------------------------------------- # scans a line for the existence of cross references and # transforms them to HTML using a little icon #---------------------------------------------------------- sub ParsCrossRefs{ local($prev,$Line,$BaseInfoFile) = @_; local($NewLine,$Token) = (); $Line = " ".$Line; if ($prev =~ /\*Note([^\t\,\.]*)$/i) { if ($Line =~ /^$TAG$TE/) { $Line = "$prev-NEWLINE-$Line"; } } @Tokens = split(/(\*Note)\s*/i,$Line); # -- split the line while($Token = shift @Tokens){ $CrossRefTag = $CrossRefRef = $CrossRefFile = $CrossRefText = ''; if($Token !~ /^\*Note/i){ #-- this part is pure text $NewLine .= $Token; next; #-- ... take the next part } $CrossRef = shift(@Tokens); if ($CrossRef !~ /:/){ #-- seems not to be a valid cross ref. $NewLine .= $Token.$CrossRef; next; # -- ... take the next one } if ($CrossRef =~ /^([^:]+)::/){ # -- a simple cross ref.. $CrossRefTag = $1; $CrossRefText = $'; $CrossRefRef = $CrossRefTag; $CrossRefTag =~ s/-NEWLINE-/ /g; $CrossRefTag =~ s/^\s+//; $CrossRefTag =~ s/\s+/ /g; $CrossRefRef =~ s/-NEWLINE-/\n/g; $CrossRefTag = &Escape($CrossRefTag); # -- escape specials $BaseInfoFile = &Escape($BaseInfoFile); $NewLine .= "$CR_URL$CrossRefRef$CrossRefText"; next; # -- .. take the next one } if ($CrossRef !~ /$TE/) { # never mind if tag doesn't end on this line $NewLine .= $Token.$CrossRef; next; } #print "--- Com. CR : $CrossRef --- \n"; $CrossRef =~ /([^:]+):/; #-- A more complicated one .. $CrossRefRef = $1; $CrossRef = $'; $CrossRefText = $CrossRef; if ($CrossRef =~ /^(\s|\n|-NEWLINE-)*\(($FTAG)\)/){ #-- .. with another file ? $CrossRefFile = $2; $CrossRef = $'; } $CrossRef =~ /^(\s|\n|-NEWLINE-)*($TAG)?($TE)/; #-- ... and a tag ? $CrossRefTag = $2; if ($CrossRefTag eq "" && $CrossRefFile eq ""){ $NewLine .= "*Note : $CrossRefText$3"; next; } $CrossRefTag =~ s/-NEWLINE-/ /g; $CrossRefTag =~ s/^\s+//; $CrossRefTag =~ s/\s+/ /g; $CrossRefRef =~ s/-NEWLINE-/\n/g; $CrossRefText =~ s/-NEWLINE-/\n/g; $CrossRefFile = $BaseInfoFile if $CrossRefFile eq ""; $CrossRefTag = "Top" if $CrossRefTag eq ""; $CrossRefRef = "($CrossRefFile)$CrossRefTag" if $CrossRefRef eq ''; $CrossRefTag = &Escape($CrossRefTag); #-- escape specials $CrossRefFile = &Escape($CrossRefFile); #-- append the HTML text $NewLine .= "$CR_URL$CrossRefRef$CrossRefText"; } if ($NewLine =~ /\*Note([^\t\,\.]*)$/i) { return "DONTPRINTYET $NewLine"; } else { $NewLine; #-- return the new line } } #------------------------------------------------------------- # PrintLinkInfo #------------------------------------------------------------- # prints the HTML text for a link information in the # header of an info node. Uses some URLs of icons # are specified in 'info2html.conf'. #------------------------------------------------------------ sub PrintLinkInfo{ local($LinkType,$LinkFile,$LinkTag,$BaseInfoFile,$NoKeys) = @_; local($LinkFileEsc, $LinkAtts); return if $LinkFile eq "" && $LinkTag eq ""; $LinkAtts = ''; #-- Link Type 'Prev' if ($LinkType =~ /Prev/){ $LinkTypeText = $PREV_URL; $LinkAtts = $NoKeys ? " title='$LinkType' " : " accesskey='p' title='alt-p: previous' "; } #-- Link Type 'Up' elsif($LinkType =~ /Up/){ $LinkTypeText = $UP_URL; $LinkAtts = $NoKeys ? " title='$LinkType' " : " accesskey='u' title='alt-u: up' "; } #-- Link Type 'Next' elsif($LinkType =~ /Next/){ $LinkTypeText = $NEXT_URL; $LinkAtts = $NoKeys ? " title='$LinkType' " : " accesskey='n' title='alt-n: next' "; } #-- If no auxiliary file specified, use the current info file $LinkFilePre = $LinkFile eq "" ? "" : "$LinkFile: "; $LinkFile = $LinkFile eq "" ? $BaseInfoFile : $LinkFile; $LinkRef = $LinkTag; $LinkTag = &Escape($LinkTag); $LinkFileEsc = &Escape($LinkFile); #-- print the HTML Text print <<"EOF"; $LinkTypeText $LinkFilePre$LinkRef EOF } #------------------------------------------------------------- # PrintHeader #------------------------------------------------------------- # Prints the header for an info node in HTML format #------------------------------------------------------------ sub PrintHeader{ local(*LinkList,$BaseInfoFile) = @_; #-- TEXT for the header of an info node local $heading = $LinkList[1] eq 'Top' ? "

$BaseInfoFile

" : "

$BaseInfoFile: $LinkList[1]

" ; print <<"EOF"; Info: ($BaseInfoFile) $LinkList[1] $HTML_HEAD_STUFF EOF print "\n\n\n$heading"; print "\n
";
  return;
}


#---------------------------------------------------------
#                       PrintFooter
#---------------------------------------------------------
#  prints the footer for an info node in HTML format
#---------------------------------------------------------
sub PrintFooter{
  local(*LinkList,$BaseInfoFile) =@_;
  #-- TEXT for the footer of an info node
  print "
\n\n"; print "\n"; print "\n"; print "\n"; return; } #---------------------------------------------------------- # ReplyNotFoundMessage #---------------------------------------------------------- sub ReplyNotFoundMessage{ local($FileName,$Tag) = @_; print <<"EOF"; Info Files - Error Message $BOTS_STAY_AWAY $HTML_HEAD_STUFF

Error: Page not found

The Info node $Tag in Info file $FileName does not exist.

EOF } #----------------------------------------------------------- # InfoNode2HTML #----------------------------------------------------------- # scans an info file for the node with the name '$Tag' # starting at the postion '$Offset'. # If found, the node is translated to HTML and printed. #------------------------------------------------------------ sub InfoNode2HTML{ local($FileName,$Offset,$Tag,$BaseInfoFile) = @_; local($Found); if ( $FileName =~ /^(.+)\.gz$/ ) { open(FH2,"gunzip < " . $FileName . " 2>/dev/null |") || &DieFileNotFound($FileName); } elsif ( $FileName =~ /^(.+)\.bz2$/ ) { open(FH2,"bzcat " . $FileName . " 2>/dev/null |") || &DieFileNotFound($FileName); } else { open(FH2,$FileName) || &DieFileNotFound($FileName); } seek(FH2,$Offset,0); $Tag =~ y/A-Z/a-z/; # -- to lowercase #-- scan for the node start $Found = 0; while(){ if (/$NODEBORDER/){ $Line = ; @LinkList = &ParsHeaderLine($Line); $CompareTag = $Tag; $CompareTag =~ s/([^0-9A-Za-z])/\\$1/g; #-- escape special chars $Temp = $LinkList[1]; $Temp =~ y/A-Z/a-z/; #-- to lower case if($Temp =~ /^\s*$CompareTag\s*$/){ #-- node start found ? $Found = 1; last; } } } if($Found == 0){ # -- break if not found &ReplyNotFoundMessage($FileName,$Tag); return; } &PrintHeader(*LinkList,$BaseInfoFile); $InMenu = 0; while(){ last if /$NODEBORDER/; #-- replace metacharacters s/&/&/g; s/>/>/g; s/\n

Menu

\n