#! /usr/bin/perl # this is the perl variant of the mksite script. It based directly on a # copy of mksite.sh which is derived from snippets that I was using to # finish doc pages for website publishing. Using only sh/sed along with # files has a great disadvantage: it is a very slow process atleast. The # perl language in contrast has highly optimized string, replace, search # functions as well as data structures to store intermediate values. As # an advantage large parts of the syntax are similar to the sh/sed variant. # # http://zziplib.sf.net/mksite/ # THE MKSITE.SH (ZLIB/LIBPNG) LICENSE # Copyright (c) 2004 Guido U. Draheim # This software is provided 'as-is', without any express or implied warranty # In no event will the authors be held liable for any damages arising # from the use of this software. # Permission is granted to anyone to use this software for any purpose, # including commercial applications, and to alter it and redistribute it # freely, subject to the following restrictions: # 1. The origin of this software must not be misrepresented; you must not # claim that you wrote the original software. If you use this software # in a product, an acknowledgment in the product documentation would be # appreciated but is not required. # 2. Altered source versions must be plainly marked as such, and must not # be misrepresented as being the original software. # 3. This notice may not be removed or altered from any source distribution. # $Id: mksite.pl,v 1.2 2006-09-22 00:33:22 guidod Exp $ use strict; use warnings; no warnings "uninitialized"; use File::Basename qw(basename); use POSIX qw(strftime); # initialize some defaults my $SITEFILE=""; $SITEFILE="site.htm" if not $SITEFILE and -f "site.htm"; $SITEFILE="site.html" if not $SITEFILE and -f "site.html"; $SITEFILE="site.htm" if not $SITEFILE; # my $MK="-mksite"; # note the "-" at the start my $SED="sed"; my $DATA="~~"; # extension for meta data files my $HEAD="~head~"; # extension for head sed script my $BODY="~body~"; # extension for body sed script my $FOOT="~foot~"; # append to body text (non sed) my $SED_LONGSCRIPT="$SED -f"; my $az="a-z"; # for perl my $AZ="A-Z"; # we may assume there are my $NN="0-9"; # char-ranges available my $AA="_$NN$AZ$az"; # that makes the resulting my $AX="$AA.+-"; # script more readable my $n = "\n"; my $Q = "q class"; my $QX = "/q"; # LANG="C" ; LANGUAGE="C" ; LC_COLLATE="C" # these are needed for proper # export LANG LANGUAGE LC_COLLATE # lowercasing as some collate # treat A-Z to include a-z my @HTMLTAGS = qw/a p h1 h2 h3 h4 h5 h6 dl dd dt ul ol li pre code table tr td th b u i s q em strong strike cite big small sup sub tt thead tbody center hr br nobr wbr span div img adress blockquote/; my @HTMLTAGS2 = qw/html head body title meta http-equiv style link/; # ========================================================================== my $hint=""; sub echo { print join(" ",@_),$n; } sub error { print STDERR "ERROR: ", join(" ",@_),$n; } sub warns { print STDERR "WARN: ", join(" ",@_), $n; } sub hint { print STDERR "NOTE: ", join(" ", @_), $n if $hint; } sub init { $hint="1" if -d "DEBUG"; } &init ("NOW!!!"); sub ls_s { my $x=`ls -s @_`; chomp($x); return $x; } # ========================================================================== # reading options from the command line GETOPT my %o = (); # to store option variables $o{variables}="files"; $o{fileseparator}="?"; $o{files}=""; $o{main_file}=""; $o{formatter}="$0"; my $opt=""; for my $arg (@ARGV) { # this variant should allow to embed spaces in $arg if ($opt) { $o{$opt}=$arg; $opt=""; } else { $_=$arg; if (/^-.*=.*$/) { $opt=$arg; $opt =~ s/-*([$AA][$AA-]*).*/$1/; $opt =~ y/-/_/; if (not $opt) { error "invalid option $arg"; } else { $arg =~ s/^[^=]*=//; $o{$opt} = $arg; $o{variables} .= " ".$opt; } $opt="";; } elsif (/^-.*.-.*$/) { $opt=$arg; $opt =~ s/-*([$AA][$AA-]*).*/$1/; $opt =~ y/-/_/; if (not $opt) { error "invalid option $arg"; $opt=""; } else { # keep the option for next round } ;; } elsif (/^-.*/) { $opt=$arg; $opt =~ s/^-*([$AA][$AA-]*).*/$1/; $opt =~ y/-/_/; if (not $opt) { error "invalid option $arg"; } else { $arg =~ s/^[^=]*=//; $o{$opt} = ' '; } $opt="" ;; } else { hint "<$arg>"; if (not $o{main_file}) { $o{main_file} = $arg; } else { $o{files} .= $o{fileseparator} if $o{files}; $o{files} .= $arg; }; $opt="" ;; }; } } ; if ($opt) { $o{$opt}=" "; $opt=""; } ### env | grep ^opt $SITEFILE=$o{main_file} if $o{main_file} and -f $o{main_file}; $SITEFILE=$o{site_file} if $o{site_file} and -f $o{site_file}; $hint="1" if $o{debug}; if ($o{help}) { $_=$SITEFILE; echo "$0 [sitefile]"; echo " default sitefile = $_ ($o{main_file}) ($o{files})"; echo "options:"; echo " --filelist : show list of target files as ectracted from $_"; echo " --src-dir xx : if source files are not where mksite is executed"; echo " --tmp-dir xx : use temp instead of local directory"; echo " --tmp : use automatic temp directory in \$TEMP/mksite.*"; exit; echo " internal:"; echo "--fileseparator=x : for building the internal filelist (def. '?')"; echo "--files xx : for list of additional files to be processed"; echo "--main-file xx : for the main sitefile to take file list from"; } if (not $SITEFILE) { error "no SITEFILE found (default would be 'site.htm')$n"; exit 1; } else { hint "sitefile: ", ls_s($SITEFILE); } # we use internal hashes to store mappings - kind of relational tables my @MK_TAGS= (); # "./$MK.tags.tmp" my @MK_VARS= (); # "./$MK.vars.tmp" my @MK_SPAN= (); # "./$MK.span.tmp" my @MK_META= (); # "./$MK.meta.tmp" my @MK_METT= (); # "./$MK.mett.tmp" my @MK_TEST= (); # "./$MK.test.tmp" my @MK_FAST= (); # "./$MK.fast.tmp" my @MK_GETS= (); # "./$MK.gets.tmp" my @MK_PUTS= (); # "./$MK.puts.tmp" my @MK_OLDS= (); # "./$MK.olds.tmp" my @MK_SITE= (); # "./$MK.site.tmp" my @MK_SECT1= (); # "./$MK.sect1.tmp" my @MK_SECT2= (); # "./$MK.sect2.tmp" my @MK_SECT3= (); # "./$MK.sect3.tmp" my @MK_DATA= (); # "./$MK~~" my %DATA= (); # used for $F.$PARTs # ======================================================================== # ======================================================================== # ======================================================================== # MAGIC VARS # IN $SITEFILE my $printerfriendly=""; my $sectionlayout="list"; my $sitemaplayout="list"; my $attribvars=" "; # my $updatevars=" "; # default my $expandvars=" "; # my $commentvars=" "; # $updatevars && $expandsvars my $sectiontab=" "; # highlight ^...href="$section" my $currenttab=" "; # highlight ^
.. my $headsection="no"; my $tailsection="no"; my $sectioninfo="no"; # using

title

= info text my $emailfooter="no"; for (source($SITEFILE)) { if (//) { warns("do not use ," ." change to $SITEFILE" ."warning: or" ." " ." "); $sectionlayout="multi"; $sitemaplayout="multi"; } if (//) { $sectionlayout="multi"; $sitemaplayout="multi"; } if (//) { $sectionlayout="multi"; $sitemaplayout="multi"; } } sub mksite_magic_option { # $1 is word/option to check for my ($U,$INP,$Z) = @_; $INP=$SITEFILE if not $INP; for (source($INP)) { s/(/$1$2: -->/g; s/(/$1$3:$2-->/g; /.*/$1/; s/.*.*/$1/; /||g;"; push @MK_TAGS, "s|||g;"; push @MK_TAGS, "s|||g;"; push @MK_TAGS, "s|(<[^<>]*)\\\${[$AX]*:[?=]([^<{}>]*)}([^<>]*>)|\$1\$2\$3|g;"; my $TRIMM=" -e 's:^ *::' -e 's: *\$::'"; # trimm away leading/trailing spaces sub trimm { my ($T,$Z) = @_; $T =~ s:\A\s*::s; $T =~ s:\s*\Z::s; return $T; } sub trimmm { my ($T,$Z) = @_; $T =~ s:\A\s*::s; $T =~ s:\s*\Z::s; $T =~ s:\s+: :g; return $T; } sub timezone { # +%z is an extension while +%Z is supposed to be posix my $tz; eval { $tz = strftime("%z", localtime()) }; return $tz if $tz =~ /[+]/; return $tz if $tz =~ /[-]/; return strftime("%Z", localtime()); } sub timetoday { return strftime("%Y-%m-%d", localtime()); } sub timetodays { return strftime("%Y-%m%d", localtime()); } sub esc { my ($TXT,$XXX) = @_; $TXT =~ s|&|\\\\&|g; return $TXT; } my %SOURCE; sub source # $file : @lines { my ($FILE,$Z) = @_; if (exists $SOURCE{$FILE}) { return @{$SOURCE{$FILE}}; } my @TEXT = (); open FILE, "<$FILE" or die "could not open $FILE: $!"; for my $line () { push @TEXT, $line; } close FILE; @{$SOURCE{$FILE}} = @TEXT; return @{$SOURCE{$FILE}}; } sub savesource # $file \@lines { my ($FILE,$LINES,$Z) = @_; @{$SOURCE{$FILE}} = @{$LINES}; } my $F; # current file during loop <<<<<<<<< my $i = 100; sub savelist { if (-d "DEBUG") { my ($script,$ext,$Z) = @_; if (not $ext) { $ext = "_".$i; $i++; } my $X = "$F.$ext.tmp.PL"; $X =~ s|/|:|g; open X, ">DEBUG/$X" or die "could not open $X: $!"; print X "#! /usr/bin/env perl",$n; print X "# ",$#_," $ext files ",localtime(),$n; my $TEXT = join("$n", @{$script}); $TEXT =~ s|source\([^()]*\)|<>|; print X $TEXT,$n; close X; } } sub eval_MK_LIST # $str @list { my $FILETYPE = $_[0]; shift @_; my $result = $_[0]; shift @_; my $extra = ""; my $script = "\$_ = \$result; my \$Z;"; $script .= join(";$n ", @_); $script .= "$n;\$result = \$_;$n"; savelist([$script],$FILETYPE); eval $script; return $result.$extra; } sub eval_MK_FILE { my $FILETYPE = $_[0]; shift @_; my $FILENAME = $_[0]; shift @_; my $result = ""; my $script = "my \$FILE; my \$extra = ''; my \$Z; $n"; $script.= "for (source('$FILENAME')) { $n"; $script.= join(";$n ", @_); $script.= "$n; \$result .= \$_; "; $script.= "$n if(\$extra){\$result.=\$extra;\$extra='';\$result.=\"\\n\"}"; $script.= "$n} if(\$extra){\$result.=\$extra;}$n"; savelist([$script],$FILETYPE); eval $script; return $result; } my $sed_add = "\$extra .= "; # "/r "; sub foo { print " '$F'$n"; } # ====================================================================== # FUNCS my $SOURCEFILE; # current file <<<<<<<< my @FILELIST; # <<<<<<< sub sed_slash_key # helper to escape chars special in /anchor/ regex { # currently escaping "/" "[" "]" "." my $R = $_[0]; $R =~ s|[\"./[-]|\\$&|g; $R =~ s|\]|\\\\$&|g; return $R; } sub sed_piped_key # helper to escape chars special in s|anchor|| regex { # currently escaping "|" "[" "]" "." my $R = $_[0]; $R =~ s/[\".|[-]/\\$&/g; $R =~ s/\]/\\\\$&/g; return $R; } sub back_path # helper to get the series of "../" for a given path { my ($R,$Z) = @_; if ($R !~ /\//) { return ""; } $R =~ s|/[^/]*$|/|; $R =~ s|[^/]*/|../|g; return $R; } sub dir_name { my $R = $_[0]; $R =~ s:/[^/][^/]*\$::; return $R; } sub info2vars_sed # generate substition sed addon script { my ($INP,$Z) = @_; $INP = \@{$DATA{$F}} if not $INP; my @OUT = (); my $V8=" *([^ ][^ ]*) +(.*)<$QX>"; my $V9=" *DC[.]([^ ][^ ]*) +(.*)<$QX>"; my $N8=" *([^ ][^ ]*) ([$NN].*)<$QX>"; my $N9=" *DC[.]([^ ][^ ]*) ([$NN].*)<$QX>"; my $V0="([<]*)\\\$"; my $V1="([^<>]*)\\\$"; my $V2="([^{<>}]*)"; my $V3="([^<>]*)"; my $SS="<"."<>".">"; # spacer so value="2004" dont make for s|\(...\)|\12004| $Z="\$Z="; $updatevars = "no" if $commentvars eq "no"; # duplicated from $expandvars = "no" if $commentvars eq "no"; # option handling my @_INP = (); for (@{$INP}) { my $x=$_; $x =~ s/(>[^<>]*)'([^<>]*<)/$1\\'$2/; push @_INP, $x; # OOOOPS } if ($expandvars ne "no") { for (@_INP) { if (/^=....=formatter /) { next; } elsif (/^<$Q='name'>$V9/){push @OUT, "\$Z='$2';s||- \$Z|;"} elsif (/^<$Q='Name'>$V9/){push @OUT, "\$Z='$2';s||(\$Z)|;"} elsif (/^<$Q='name'>$V8/){push @OUT, "\$Z='$2';s||- \$Z|;"} elsif (/^<$Q='Name'>$V8/){push @OUT, "\$Z='$2';s||(\$Z)|;"} } } if ($expandvars ne "no") { for (@_INP) { if (/^=....=formatter /) { next; } elsif (/^<$Q='text'>$V9/){push @OUT, "\$Z='$2';s||\$1$SS\$Z|;"} elsif (/^<$Q='Text'>$V9/){push @OUT, "\$Z='$2';s||\$1$SS\$Z|;"} elsif (/^<$Q='name'>$V9/){push @OUT, "\$Z='$2';s||\$1$SS\$Z|;"} elsif (/^<$Q='Name'>$V9/){push @OUT, "\$Z='$2';s||\$1$SS\$Z|;"} elsif (/^<$Q='text'>$V8/){push @OUT, "\$Z='$2';s||\$1$SS\$Z|;"} elsif (/^<$Q='Text'>$V8/){push @OUT, "\$Z='$2';s||\$1$SS\$Z|;"} elsif (/^<$Q='name'>$V8/){push @OUT, "\$Z='$2';s||\$1$SS\$Z|;"} elsif (/^<$Q='Name'>$V8/){push @OUT, "\$Z='$2';s||\$1$SS\$Z|;"} } } if ($updatevars ne "no") { for (@_INP) { my $H = "[^<>]*"; if (/^=....=formatter /) { next; } elsif (/^<$Q='name'>$V9/){push @OUT, "\$Z='$2';s|$H|- \$Z|;"} elsif (/^<$Q='Name'>$V9/){push @OUT, "\$Z='$2';s|$H|(\$Z)|;"} elsif (/^<$Q='name'>$V8/){push @OUT, "\$Z='$2';s|$H|- \$Z|;"} elsif (/^<$Q='Name'>$V8/){push @OUT, "\$Z='$2';s|$H|(\$Z)|;"} } } if ($updatevars ne "no") { for (@_INP) { my $H = "[^<>]*"; if (/^=....=formatter /) { next; } elsif (/^<$Q='text'>$V9/){push @OUT,"\$Z='$2';s|$H|\$1$SS\$Z|;"} elsif (/^<$Q='Text'>$V9/){push @OUT,"\$Z='$2';s|$H|\$1$SS\$Z|;"} elsif (/^<$Q='name'>$V9/){push @OUT,"\$Z='$2';s|$H|\$1$SS\$Z|;"} elsif (/^<$Q='Name'>$V9/){push @OUT,"\$Z='$2';s|$H|\$1$SS\$Z|;"} elsif (/^<$Q='text'>$V8/){push @OUT,"\$Z='$2';s|$H|\$1$SS\$Z|;"} elsif (/^<$Q='Text'>$V8/){push @OUT,"\$Z='$2';s|$H|\$1$SS\$Z|;"} elsif (/^<$Q='name'>$V8/){push @OUT,"\$Z='$2';s|$H|\$1$SS\$Z|;"} elsif (/^<$Q='Name'>$V8/){push @OUT,"\$Z='$2';s|$H|\$1$SS\$Z|;"} } } if ($attribvars ne "no") { for (@_INP) { my $H = "[^<>]*"; if (/^=....=formatter /) { next; } elsif (/^<$Q='text'>$V9/){push @OUT,"\$Z='$2';s|<$V1\{$1:[=]$V2}$V3>|<\$1$SS\$Z\$3>|;"} elsif (/^<$Q='Text'>$V9/){push @OUT,"\$Z='$2';s|<$V1\{$1:[=]$V2}$V3>|<\$1$SS\$Z\$3>|;"} elsif (/^<$Q='name'>$V9/){push @OUT,"\$Z='$2';s|<$V1\{$1:[?]$V2}$V3>|<\$1$SS\$Z\$3>|;"} elsif (/^<$Q='Name'>$V9/){push @OUT,"\$Z='$2';s|<$V1\{$1:[?]$V2}$V3>|<\$1$SS\$Z\$3>|;"} elsif (/^<$Q='text'>$V8/){push @OUT,"\$Z='$2';s|<$V1\{$1:[=]$V2}$V3>|<\$1$SS\$Z\$3>|;"} elsif (/^<$Q='Text'>$V8/){push @OUT,"\$Z='$2';s|<$V1\{$1:[=]$V2}$V3>|<\$1$SS\$Z\$3>|;"} elsif (/^<$Q='name'>$V8/){push @OUT,"\$Z='$2';s|<$V1\{$1:[?]$V2}$V3>|<\$1$SS\$Z\$3>|;"} elsif (/^<$Q='Name'>$V8/){push @OUT,"\$Z='$2';s|<$V1\{$1:[?]$V2}$V3>|<\$1$SS\$Z\$3>|;"} } for (split / /, $o{variables}) { {push @OUT,"\$Z='$o{$_}';s|<$V1\{$_:[?]$V2}$V3>|<\$1$SS\$Z\$3>|;"} } } # if value="2004" then generated sed might be "\\12004" which is bad # instead we generate an edited value of "\\1$SS$value" and cut out # the spacer now after expanding the variable values: push @OUT, "s|$SS||g;"; return @OUT; } sub info2meta_sed # generate text portion { my ($INP,$XXX) = @_; $INP = \@{$DATA{$F}} if not $INP; my @OUT = (); # http://www.metatab.de/meta_tags/DC_type.htm my $V6=" *HTTP[.]([^ ]+) (.*)<$QX>"; my $V7=" *DC[.]([^ ]+) (.*)<$QX>"; my $V8=" *([^ ]+) (.*)<$QX>" ; sub __TYPE_SCHEME { "name=\"DC.type\" content=\"$2\" scheme=\"$1\"" }; sub __DCMI { "name=\"$1\" content=\"$2\" scheme=\"DCMIType\"" }; sub __NAME { "name=\"$1\" content=\"$2\"" }; sub __NAME_TZ { "name=\"$1\" content=\"$2 ".&timezone()."\"" }; sub __HTTP { "http-equiv=\"$1\" content=\"$2\"" }; for (@$INP) { if (/=....=today /) { next; } if (/<$Q='meta'>HTTP[.]/ && /<$Q='meta'>$V6/) { push @OUT, " " if $2; next; } if (/<$Q='meta'>DC[.]DCMIType / && /<$Q='meta'>$V7/) { push @OUT, " " if $2; next; } if (/<$Q='meta'>DC[.]type Collection$/ && /<$Q='meta'>$V8/) { push @OUT, " " if $2; next; } if (/<$Q='meta'>DC[.]type Dataset$/ && /<$Q='meta'>$V8/) { push @OUT, " " if $2; next; } if (/<$Q='meta'>DC[.]type Event$/ && /<$Q='meta'>$V8/) { push @OUT, " " if $2; next; } if (/<$Q='meta'>DC[.]type Image$/ && /<$Q='meta'>$V8/) { push @OUT, " " if $2; next; } if (/<$Q='meta'>DC[.]type Service$/ && /<$Q='meta'>$V8/) { push @OUT, " " if $2; next; } if (/<$Q='meta'>DC[.]type Software$/ && /<$Q='meta'>$V8/) { push @OUT, " " if $2; next; } if (/<$Q='meta'>DC[.]type Sound$/ && /<$Q='meta'>$V8/) { push @OUT, " " if $2; next; } if (/<$Q='meta'>DC[.]type Text$/ && /<$Q='meta'>$V8/) { push @OUT, " " if $2; next; } if (/<$Q='meta'>DC[.]date[.].*[+]/ && /<$Q='meta'>$V8/) { push @OUT, " " if $2; next; } if (/<$Q='meta'>DC[.]date[.].*[:]/ && /<$Q='meta'>$V8/) { push @OUT, " " if $2; next; } if (/<$Q='meta'>/ && /<$Q='meta'>$V8/) { push @OUT, " " if $2; next; } } return @OUT; } sub info_get_entry # get the first value known so far { my ($TXT,$INP,$XXX) = @_; $TXT = "sect" if not $TXT; $INP = \@{$DATA{$F}} if not $INP; for (grep {/<$Q='text'>$TXT /} @$INP) { my $info = $_; $info =~ s|<$Q='text'>$TXT ||; $info =~ s|<$QX>||; chomp($info); chomp($info); return $info; } } sub info1grep # test for a substition to be already present { my ($TXT,$INP,$XXX) = @_; $TXT = "sect" if not $TXT; $INP = \@{$DATA{$F}} if not $INP; return scalar(grep {/^<$Q='text'>$TXT /} @$INP); # returning the count } sub dx_init { @{$DATA{$F}} = (); &dx_meta ("formatter", basename($o{formatter})); for (split / /, $o{variables}) { # commandline --def=value if (/_/) { my $u=$_; $u =~ y/_/-/; # makes for override &dx_meta ($u, $o{$_}); } else { &dx_text ($_, $o{$_}); } } } sub dx_line { my ($U,$V,$W,$Z) = @_; chomp($U); chomp($V); push @{$DATA{$F}}, "<$Q=$U>".$V." ".trimmm($W)."<$QX>"; } sub DX_line { my ($U,$V,$W,$Z) = @_; $W =~ s/<[^<>]*>//g; &dx_line ($U,$V,$W); } sub dx_text { my ($U,$V,$Z) = @_; &dx_line ("'text'",$U,$V); } sub DX_text # add a substition includings format variants { my ($N, $T,$XXX) = @_; $N = trimm($N); $T = trimm($T); if ($N) { if ($T) { my $text=lc("$T"); $text =~ s/<[^<>]*>//g; &dx_line ("'text'",$N,$T); &dx_line ("'name'",$N,$text); my $varname=$N; $varname =~ s/.*[.]//; # cut out front part if ($N ne $varname and $varname) { $text=lc("$varname $T"); $text =~ s/<[^<>]*>//g; &dx_line ("'Text'",$varname,$T); &dx_line ("'Name'",$varname,$text); } } } } sub dx_meta { my ($U,$V,$Z) = @_; &DX_line ("'meta'",$U,$V); } sub DX_meta # add simple meta entry and its subsitution { my ($U,$V,$Z) = @_; &DX_line ("'meta'",$U,$V); &DX_text ("$U", $V); } sub DC_meta # add new DC.meta entry plus two substitutions { my ($U,$V,$Z) = @_; &DX_line ("'meta'","DC.$U",$V); &DX_text ("DC.$U", $V); &DX_text ("$U", $V); } sub HTTP_meta # add new HTTP.meta entry plus two substitutions { my ($U,$V,$Z) = @_; &DX_line ("'meta'","HTTP.$U",$V); &DX_text ("HTTP.$U", $V); &DX_text ("$U", $V); } sub DC_VARS_Of # check DC vars as listed in $DC_VARS global/generate DC_meta { # the results will be added to .meta.tmp and .vars.tmp later my ($FILENAME,$Z)= @_; $FILENAME=$SOURCEFILE if not $FILENAME; for my $M (@DC_VARS, "title") { # scan for a of this name FIXME my ($part,$text); for (source($FILENAME)) { /<$M>/ or next; s|.*<$M>||; s|.*||; $part = trimm($_); last; } $text=$part; $text =~ s|^\w*:||; $text = trimm($text); next if not $text; # will be if ($text ne $part) { my $N=$part; $N =~ s/:.*//; &DC_meta ("$M.$N", $text); } elsif ($M eq "date") { &DC_meta ("$M.issued", $text); # "" -> "issued:" } else { &DC_meta ("$M", $text); } } } sub HTTP_VARS_Of # check HTTP-EQUIVs as listed in $_EQUIV global then { # generate meta tags that are http-equiv= instead of name= my ($FILENAME,$Z)= @_; $FILENAME=$SOURCEFILE if not $FILENAME; for my $M (@_EQUIVS) { # scan for a of this name FIXME my ($part,$text); for (source($FILENAME)) { /<$M>/ or next; s|.*<$M>||; s|.*||; $part = trimm($_); last; } $text=$part; $text =~ s|^\w*:||; $text = trimm($text); next if not $text; if ($M eq "redirect") { &HTTP_meta ("refresh", "5; url=$text"); &DX_text ("$M", $text); } elsif ($M eq "charset") { &HTTP_meta ("content-type", "text/html; charset=$text"); } else { &HTTP_meta ("$M", $text); } } } sub DC_isFormatOf # make sure there is this DC.relation.isFormatOf tag { # choose argument for a fallback (usually $SOURCEFILE) my ($NAME,$Z) = @_; $NAME=$SOURCEFILE if not $NAME; if (not &info1grep ("DC.relation.isFormatOf")) { &DC_meta ("relation.isFormatOf", "$NAME"); } } sub DC_publisher # make sure there is this DC.publisher meta tag { # choose argument for a fallback (often $USER) my ($NAME,$Z) = @_; $NAME=$ENV{"USER"} if not $NAME; if (not &info1grep ("DC.publisher")) { &DC_meta ("publisher", "$NAME"); } } sub DC_modified # make sure there is a DC.date.modified meta tag { # maybe choose from filesystem dates if possible my ($ZZ,$Z) = @_; # target file if (not &info1grep ("DC.date.modified")) { my @stats = stat($ZZ); my $text = strftime("%Y-%m-%d", localtime($stats[9])); &DC_meta ("date.modified", $text); } } sub DC_date # make sure there is this DC.date meta tag { # choose from one of the available DC.date.* specials my ($ZZ,$Z) = @_; # source file if (&info1grep ("DC.date")) { &DX_text ("issue", "dated ".&info_get_entry("DC.date")); &DX_text ("updated", &info_get_entry("DC.date")); } else { my $text=""; my $kind; for $kind (qw/available issued modified created/) { $text=&info_get_entry("DC.date.$kind"); # test ".$text" != "." && echo "$kind = date = $text ($ZZ)" last if $text; } if (not $text) { my $part; my $M="date"; for (source($ZZ)) { /<$M>/ or next; s|.*<$M>||; s|.*||; $part=trimm($_); last; } $text=$part; $text =~ s|^[$AA]*:||; $text = &trimm ($text); } if (not $text) { my $part; my $M="!--date:*=*--"; # takeover updateable variable... for (source($ZZ)) { /<$M>/ or next; s|.*<$M>||; s|/ or next; s|.*<$M>||; s|.*||; $text = trimm($_); last; } last if $text; for (source($ZZ)) { /<$M [^<>]*>/ or next; s|.*<$M [^<>]*>||; s|.*||; $text = trimm($_); last; } last if $text; } if (not $text) { $text=basename($ZZ,".html"); $text=basename($text,".htm"); $text =~ y/_/ /; $text =~ s/$/ info/; $text=~ s/\n/ /g; } $term=$text; $term =~ s/.*[\(]//; $term =~ s/[\)].*//; $text =~ s/[\(][^\(\)]*[\)]//; if (not $term or $term eq $text) { &DC_meta ("title", "$text"); } else { &DC_meta ("title", "$term - $text"); } } } sub site_get_section # return parent section page of given page { my $_F_ = &sed_slash_key(@_); for my $x (grep {/<$Q='sect'>$_F_ /} @MK_DATA) { my $info = $x; $info =~ s|<$Q='sect'>[^ ]* ||; $info =~ s|<$QX>||; return $info; } } sub DC_section # not really a DC relation (shall we use isPartOf ?) { # each document should know its section father my $sectn = &site_get_section($F); if ($sectn) { &DC_meta ("relation.section", $sectn); } } sub info_get_entry_section { return &info_get_entry("DC.relation.section"); } sub site_get_selected # return section of given page { my $_F_ = &sed_slash_key(@_); for my $x (grep {/<$Q='[u]se.'>$_F_ /} @MK_DATA) { my $info = $x; $info =~ s/<$Q='[u]se.'>[^ ]* //; $info =~ s|<$QX>||; return $info; } } sub DC_selected # not really a DC title (shall we use alternative ?) { # each document might want to highlight the currently selected item my $short=&site_get_selected($F); if ($short) { &DC_meta ("title.selected", $short); } } sub info_get_entry_selected { return &info_get_entry("DC.title.selected"); } sub site_get_rootsections # return all sections from root of nav tree { my @OUT; for (grep {/<$Q='[u]se1'>/} @MK_DATA) { my $x = $_; $x =~ s/<$Q='[u]se.'>([^ ]*) .*/$1/; push @OUT, $x; } return @OUT; } sub site_get_sectionpages # return all children pages in the given section { my $_F_=&sed_slash_key(@_); my @OUT = (); for (grep {/^<$Q='sect'>[^ ]* $_F_$/} @MK_DATA) { my $x = $_; $x =~ s/^<$Q='sect'>//; $x =~ s/ .*//; $x =~ s|<$QX>||; push @OUT, $x; } return @OUT; } sub site_get_subpages # return all page children of given page { my $_F_=&sed_slash_key(@_); my @OUT = (); for (grep {/^<$Q='node'>[^ ]* $_F_<[^<>]*>$/} @MK_DATA) { my $x = $_; $x =~ s/^<$Q='node'>//; $x =~ s/ .*//; $x =~ s|<$QX>||; push @OUT, $x; } return @OUT; } sub site_get_parentpage # ret parent page for given page (".." for sections) { my $_F_=&sed_slash_key(@_); for (grep {/^<$Q='node'>$_F_ /} @MK_DATA) { my $x = $_; $x =~ s/^<$Q='node'>[^ ]* //; $x =~ s|<$QX>||; return $x; } } sub DX_alternative # detect wether page asks for alternative style { # which is generally a shortpage variant my ($U,$Z) = @_; my $x=&mksite_magic_option("alternative",$U); $x =~ s/^ *//; $x =~s/ .*//; if ($x) { &DX_text ("alternative", $x); } } sub info2head_sed # append alternative handling script to $HEAD { my @OUT = (); my $have=&info_get_entry("alternative"); if ($have) { push @OUT, "// && do {"; push @OUT, "s//\$1/"; push @OUT, "$sed_add \$_; last; };"; } return @OUT; } sub info2body_sed # append alternative handling script to $BODY { my @OUT = (); my $have=&info_get_entry("alternative"); if ($have) { push @OUT, "s//\$1/"; } return @OUT; } sub bodymaker_for_sectioninfo { if ($sectioninfo eq "no") { return ""; } my $_x_=""; my $_q_="([^<>]*[$AX][^<>]*)"; $_q_="[ ][ ]*$sectioninfo([ ])" if $sectioninfo ne " "; my @OUT = (); push @OUT, "s|(^<[hH][$NN][ >].*)$_q_|\$1$_x_\$2|"; push @OUT, "/$_x_/ and s|^|
|"; push @OUT, "/$_x_/ and s||&|"; push @OUT, "/$_x_/ and s|\$|
|"; push @OUT, "s|$_x_||"; return @OUT; } sub fast_href # args "$FILETOREFERENCE" "$FROMCURRENTFILE:$F" { # prints path to $FILETOREFERENCE href-clickable in $FROMCURRENTFILE # if no subdirectoy then output is the same as input $FILETOREFERENCE my ($T,$R,$Z) = @_; my $S=&back_path ($R); if (not $S) { return $T; } else { my $t=$T; $t =~ s/^ *$//; $t =~ s/^\/.*//; $t =~ s/^[.][.].*//; $t =~ s/^\w*:.*//; if (not $t) { # don't move any in the pattern above return $T; } else { return "$S$T"; # prefixed with backpath } } } sub make_back_path # "$FILE" { my ($R,$Z) = @_; my $S=&back_path ($R); my @OUT = (); return @OUT if $S !~ /^\.\./; push @OUT, "s|(<[^<>]*\\shref=\\\")(\\w[^<>:]*\\\"[^<>]*>)|\$1$S\$2|g;"; push @OUT, "s|(<[^<>]*\\ssrc=\\\")(\\w[^<>:]*\\\"[^<>]*>)|\$1$S\$2|g;"; return @OUT; } # ============================================================== SITE MAP DATA # each entry needs atleast a list-title, a long-title, and a list-date # these are the basic information to be printed in the sitemap file # where it is bound the hierarchy of sect/subsect of the entries. sub site_map_list_title # $file $text { my ($U,$V,$Z) = @_; chomp($U); push @MK_DATA, "<$Q='list'>$U ".trimm($V)."<$QX>"; } sub info_map_list_title # $file $text { my ($U,$V,$Z) = @_; chomp($U); push @{$DATA{$U}}, "<$Q='list'>".trimm($V)."<$QX>"; } sub site_map_long_title # $file $text { my ($U,$V,$Z) = @_; chomp($U); push @MK_DATA, "<$Q='long'>$U ".trimm($V)."<$QX>"; } sub info_map_long_title # $file $text { my ($U,$V,$Z) = @_; chomp($U); push @{$DATA{$U}}, "<$Q='long'>".trimm($V)."<$QX>"; } sub site_map_list_date # $file $text { my ($U,$V,$Z) = @_; chomp($U); push @MK_DATA, "<$Q='date'>$U ".trimm($V)."<$QX>"; } sub info_map_list_date # $file $text { my ($U,$V,$Z) = @_; chomp($U); push @{$DATA{$U}}, "<$Q='date'>".trimm($V)."<$QX>"; } sub site_get_list_title { my ($U,$V,$Z) = @_; for (@MK_DATA) { if (m|^<$Q='list'>$U (.*)<$QX>|) { return $1; } } return ""; } sub site_get_long_title { my ($U,$V,$Z) = @_; for (@MK_DATA) { if (m|^<$Q='long'>$U (.*)<$QX>|) { return $1; } } return ""; } sub site_get_list_date { my ($U,$V,$Z) = @_; for (@MK_DATA) { if (m|^<$Q='date'>$U (.*)<$QX>|) { return $1; } } return ""; } sub siteinfo2sitemap# generate addon sed scriptlet { # the resulting script will act on each item/line # containing and expand any following # reference of or or my ($INP,$Z) = @_ ; $INP= \@MK_DATA if not $INP; my @OUT = (); my $_list_= sub{"s|(.*)]*>.*|\$1$2|"}; my $_date_= sub{"s|(.*).*|\$1$2|"}; my $_long_= sub{"s|(.*).*|\$1$2|"}; for (@$INP) { my $info = $_; $info =~ s:<$Q='list'>([^ ]*) (.*)<$QX>:&$_list_:e; $info =~ s:<$Q='date'>([^ ]*) (.*)<$QX>:&$_date_:e; $info =~ s:<$Q='long'>([^ ]*) (.*)<$QX>:&$_long_:e; $info =~ /^s\|/ || next; push @OUT, $info; } return @OUT; } sub make_multisitemap { # each category gets its own column along with the usual entries my ($INPUTS,$Z)= @_ ; $INPUTS=\@MK_DATA if not $INPUTS; @MK_SITE = &siteinfo2sitemap(); # have addon-sed my @OUT = (); my $_form_= sub{"$3" ."
$3......" }; my $_tiny_="small> ".$n; for (grep {/<$Q='[Uu]se.'>/} @$INPUTS) { my $x = $_; $x =~ />\w\w\w\w*:/ and next; # name: http: ftp: mailto: ... $x =~ s|<$Q='[Uu]se(.)'>([^ ]*) (.*)<$QX>|&$_form_|e; $x = &eval_MK_LIST("multisitemap", $x, @MK_SITE); $x =~ /||; $x =~ s|||; $x =~ s||
|; $x =~ s||
|; $x =~ s// /g; $x =~ s|
||; $x =~ s|||; $x =~ s||
|; $x =~ s|||; $x =~ s|||; chomp $x; push @OUT, $x.$n; } push @OUT, "".$n; return @OUT; } sub make_listsitemap { # traditional - the body contains a list with date and title extras my ($INPUTS,$Z)= @_ ; $INPUTS=\@MK_DATA if not $INPUTS; @MK_SITE = &siteinfo2sitemap(); # have addon-sed my @OUT = (); my $_form_=sub{ "$3......$3"}; my $_tabb_="\ \;"; push @OUT, "".$n; my $xx; for $xx (grep {/<$Q='[Uu]se.'>/} @$INPUTS) { my $x = "".$xx; $x =~ />\w\w\w\w*:/ and next; $x =~ s|<$Q='[Uu]se(.)'>([^ ]*) (.*)<$QX>|&$_form_|e; $x = &eval_MK_LIST("listsitemap", $x, @MK_SITE); $x =~ /||; $x =~ s|||; $x =~ s|||; $x =~ // and $x =~ s|(]*>)|$1- |; $x =~ s|| |g; $x =~ s||$_tabb_|; $x =~ s||$_tabb_|; $x =~ s|||; push @OUT, $x.$n; } for $xx (grep {/<$Q='[u]se.'>/} @$INPUTS) { my $x = $xx; $x =~ s/<$Q='[u]se.'>name:sitemap://; $x =~ s|<$QX>||; $x =~ s:\s*::gs; if (-f $x) { for (grep {//} source($x)) { push @OUT, $_; } } } push @OUT, "
*
-
|; $x =~ s|||; $x =~ s||
".$n; return @OUT; } my $_xi_include_= " addon-sed my @OUT = (); my $_form_=sub{"$3"}; my $xx; for $xx (grep {/<$Q='[Uu]se.'>/} @$INPUTS) { my $x = "".$xx; $x =~ />\w\w\w\w*:/ and next; $x =~ s|<$Q='[Uu]se(.)'>([^ ]*) (.*)<$QX>|&$_form_|e; $x = &eval_MK_LIST("listsitemap", $x, @MK_SITE); $x =~ /.*| />|; push @OUT, $x.$n; } return @OUT; } sub print_extension { my ($ARG,$Z)= @_ ; $ARG=$o{print} if not $ARG; if ($ARG =~ /^([.-])/) { return $ARG; } else { return ".print"; } } sub from_sourcefile { my ($U,$Z) = @_; if (-f $U) { return $U; } elsif (-f "$o{src_dir}/$U") { return "$o{src_dir}/$U"; } else { return $U; } } sub html_sourcefile # generally just cut away the trailing "l" (ell) { # making "page.html" argument into "page.htm" return my ($U,$Z) = @_; my $_SRCFILE_=$U; $_SRCFILE_ =~ s/l$//; my $_XMLFILE_=$U; $_XMLFILE_ =~ s/\.html$/.dbk/; if (-f $_SRCFILE_) { return $_SRCFILE_; } elsif (-f $_XMLFILE_) { return $_XMLFILE_; } elsif (-f "$o{src_dir}/$_SRCFILE_") { return "$o{src_dir}/$_SRCFILE_"; } elsif (-f "$o{src_dir}/$_XMLFILE_") { return "$o{src_dir}/$_XMLFILE_"; } else { return ".//$_SRCFILE_"; } } sub html_printerfile_sourcefile { my ($U,$Z) = @_; if (not $printerfriendly) { $U =~ s/l\$//; return $U; } else { my $_ext_=&sed_slash_key(&print_extension($printerfriendly)); $U =~ s/l\$//; $U =~ s/$_ext_([.][\w]*)$/$1/; return $U; } } sub fast_html_printerfile { my ($U,$V,$Z) = @_; my $x=&html_printerfile($U) ; return basename($x); # my $x=&html_printerfile($U) ; return &fast_href($x,$V); } sub html_printerfile # generate the printerfile for a given normal output { my ($U,$Z) = @_; my $_ext_=&esc(&print_extension($printerfriendly)); $U =~ s/([.][\w]*)$/$_ext_$1/; return $U; # index.html -> index.print.html } sub make_printerfile_fast # generate s/file.html/file.print.html/ for hrefs { # we do that only for the $FILELIST my ($U,$Z) = @_; my $ALLPAGES=$U; my @OUT = (); for my $p (@$ALLPAGES) { my $a=&sed_slash_key($p); my $b=&html_printerfile($p); if ($b ne $p) { $b =~ s:/:\\/:g; push @OUT, "s/])*>//;"; } } return @OUT; } sub echo_printsitefile_style { my $_bold_="text-decoration : none ; font-weight : bold ; "; return " " ."$n"; } sub make_printsitefile_head # $sitefile { my $MK_STYLE = &echo_printsitefile_style(); my @OUT = (); for (source($SITEFILE)) { if (//) { push @OUT, $_; push @OUT, $MK_STYLE; next; } if (//) { push @OUT, $_; next; } if (/<\/head>/) { push @OUT, $_; next; } if (/<body>/) { push @OUT, $_; next; } if (/<link [^<>]*rel=\"shortcut icon\"[^<>]*>/) { push @OUT, $_; next; } } return @OUT; } # ------------------------------------------------------------------------ # The printsitefile is a long text containing html href markups where # each of the href lines in the file is being prefixed with the section # relation. During a secondary call the printsitefile can grepp'ed for # those lines that match a given output fast-file. The result is a # navigation header with 1...3 lines matching the nesting level # these alt-texts will be only visible in with a text-mode browser: my $printsitefile_square="width=\"8\" height=\"8\" border=\"0\""; my $printsitefile_img_1="<img alt=\"|go text:\" $printsitefile_square />"; my $printsitefile_img_2="<img alt=\"||topics:\" $printsitefile_square />"; my $printsitefile_img_3="<img alt=\"|||pages:\" $printsitefile_square />"; my $_SECT="mksite:sect:"; sub echo_current_line # $sect $extra { # add the prefix which is used by select_in_printsitefile to cut out things my ($N,$M,$Z) = @_; return "<!--$_SECT\"$N\"-->$M"; } sub make_current_entry # $sect $file ## requires $MK_SITE { my ($S,$R,$Z) = @_; my $RR=&sed_slash_key($R); my $sep=" - " ; my $_left_=" [ " ; my $_right_=" ] "; my $name = site_get_list_title($R); $_ = &echo_current_line ("$S", "<a href=\"$R\">$name</a>$sep"); if ($R eq $S) { s/<a href/$_left_$&/; s/<\/a>/$&$_right_/; } return $_; } sub echo_subpage_line # $sect $extra { my ($N,$M,$Z) = @_; return "<!--$_SECT*:\"$N\"-->$M"; } sub make_subpage_entry { my ($S,$R,$Z) = @_; my $RR=&sed_slash_key($R); my $sep=" - " ; my $name = site_get_list_title($R); $_ = &echo_subpage_line ("$S", "<a href=\"$R\">$name</a>$sep"); return $_; } sub make_printsitefile { # building the printsitefile looks big but its really a loop over sects my ($INPUTS,$Z) = @_; $INPUTS=\@MK_DATA if not $INPUTS; @MK_SITE = &siteinfo2sitemap(); # have <name><long><date> addon-sed savelist(\@MK_SITE,"SITE"); my @OUT = &make_printsitefile_head ($SITEFILE); my $sep=" - " ; my $_sect1= "<a href=\"#.\" title=\"section\">$printsitefile_img_1</a> ||$sep"; my $_sect2= "<a href=\"#.\" title=\"topics\">$printsitefile_img_2</a> ||$sep"; my $_sect3= "<a href=\"#.\" title=\"pages\">$printsitefile_img_3</a> ||$sep"; my $_SECT1="mksite:sect1"; my $_SECT2="mksite:sect2"; my $_SECT3="mksite:sect3"; @MK_SECT1 = &site_get_rootsections(); # round one - for each root section print a current menu for my $r (@MK_SECT1) { push @OUT, &echo_current_line ("$r", "<!--$_SECT1:A--><br>$_sect1"); for my $s (@MK_SECT1) { push @OUT, &make_current_entry ("$r", "$s"); } push @OUT, &echo_current_line ("$r", "<!--$_SECT1:Z-->"); } # round two - for each subsection print a current and subpage menu for my $r (@MK_SECT1) { @MK_SECT2 = &site_get_subpages ("$r"); for my $s (@MK_SECT2) { push @OUT, &echo_current_line ("$s", "<!--$_SECT2:A--><br>$_sect2"); for my $t (@MK_SECT2) { push @OUT, &make_current_entry ("$s", "$t"); } # "$t" push @OUT, &echo_current_line ("$s", "<!--$_SECT2:Z-->"); } # "$s" my $_have_children_=""; for my $t (@MK_SECT2) { if (not $_have_children_) { push @OUT, &echo_subpage_line ("$r", "<!--$_SECT2:A--><br>$_sect2"); } $_have_children_ .= "1"; push @OUT, &make_subpage_entry ("$r", "$t"); } if ($_have_children_) { push @OUT, &echo_subpage_line ("$r", "<!--$_SECT2:Z-->"); } } # "$r" # round three - for each subsubsection print a current and subpage menu for my $r (@MK_SECT1) { @MK_SECT2 = &site_get_subpages ("$r"); for my $s (@MK_SECT2) { @MK_SECT3 = &site_get_subpages ("$s"); for my $t (@MK_SECT3) { push @OUT, &echo_current_line ("$t", "<!--$_SECT3:A--><br>$_sect3"); for my $u (@MK_SECT3) { push @OUT, &make_current_entry ("$t", "$u"); } # "$t" push @OUT, &echo_current_line ("$t", "<!--$_SECT3:Z-->"); } # "$t" my $_have_children_=""; for my $u (@MK_SECT3) { if (not $_have_children_) { push @OUT, &echo_subpage_line ("$s", "<!--$_SECT3:A--><br>$_sect3"); } $_have_children_ .= "1"; push @OUT, &make_subpage_entry ("$s", "$u"); } if ($_have_children_) { push @OUT, &echo_subpage_line ("$s", "<!--$_SECT3:Z-->"); } } # "$s" } # "$r" push @OUT, "<a name=\".\"></a>"; push @OUT, "</body></html>"; savelist(\@OUT,"FORM"); return @OUT; } # create a selector that can grep a printsitefile for the matching entries sub select_in_printsitefile # arg = "page" : return to stdout >> $P.$HEAD { my ($N,$Z) = @_; my $_selected_="$N" ; $_selected_="$F" if not $_selected_; my $_section_=&sed_slash_key($_selected_); my @OUT = (); push @OUT, "s/^<!--$_SECT\\\"$_section_\\\"-->//;"; # sect3 push @OUT, "s/^<!--$_SECT\[*\]:\\\"$_section_\\\"-->//;"; # children $_selected_=&site_get_parentpage($_selected_); $_section_=&sed_slash_key($_selected_); push @OUT, "s/^<!--$_SECT\\\"$_section_\\\"-->//;"; # sect2 $_selected_=&site_get_parentpage($_selected_); $_section_=&sed_slash_key($_selected_); push @OUT, "s/^<!--$_SECT\\\"$_section_\\\"-->//;"; # sect1 push @OUT, "/^<!--$_SECT\\\"[^\\\"]*\\\"-->/ and next;"; push @OUT, "/^<!--$_SECT\[*\]:\\\"[^\\\"]*\\\"-->/ and next;"; push @OUT, "s/^<!--mksite:sect[$NN]:[$AZ]-->//;"; return @OUT; } sub body_for_emailfooter { return "" if $emailfooter eq "no"; my $_email_=$emailfooter; $_email_ =~ s|[?].*||; my $_dated_=&info_get_entry("updated"); return "<hr><table border=\"0\" width=\"100%\"><tr><td>" ."$n"."<a href=\"mailto:$emailfooter\">$_email_</a>" ."$n"."</td><td align=\"right\">" ."$n"."$_dated_</td></tr></table>" ."$n"; } # =================================================================== CSS # There was another project to support sitemap build from xml files. # The source format was using .dbk+xml with embedded references to .css # files for visual preview in a browser. An docbook xml file with semantic # outlines is far better suited for quality documentation than any html # source. It happens that the xml/css support in browsers is still not # very portable - especially embedded css style blocks are a nightmare. # Instead we (a) grab all non-html xml markup tags (b) grab all referenced # css stylesheets (c) cut out css defs from [b] that are known by [a] and # (d) append those to the <style> tag in the output html file as well as # (e) reformatting the defs as well as markups from tags to tag classes. # Input dbk/htm # <?xml-stylesheet type="text/css" href="html.css" ?> <!-- dbk/xml --> # <link rel="stylesheet" type="text/css" href="sdocbook.css" /> <!-- xhtml --> # <article><para> # Using some <command>exe</command> # </para></article> # Input css: # article { .. ; display : block } # para { .. ; display : block } # command { .. ; display : inline } # Output html: # <html><style type="text/css"> # div .article { .. } # div .para { .. } # span .command { .. } # </style> # <div class="article"><div class="para> # Using some <span class="command">exe</span> # </div></div> sub css_sourcefile { my ($X,$XXX) = @_; return $X if -f $X; return "$o{src_dir}/$X" if -f "$o{src_dir}/$X"; return "$X" if "$X" =~ m:^/:; return "./$X"; } my %XMLTAGS = (); sub css_xmltags # $SOURCEFILE { my $X=$SOURCEFILE; my %R = (); my $line; foreach $line (source($SOURCEFILE)) { $line =~ s|>[^<>]*<|><|g; $line =~ s|^[^<>]*<|<|; $line =~ s|>[^<>]*\$|>|; my $item; foreach $item (split /</, $line) { $item =~ m:^/: and next; $item =~ m:^\s*$: and next; $item !~ m|>| and next; $item =~ s|>.*||; chomp $item; $R{$item} = ""; } } @{$XMLTAGS{$X}} = keys %R; } my %XMLSTYLESHEETS = (); sub css_xmlstyles # $SOURCEFILE { my $X=$SOURCEFILE; my %R = (); my $text = ""; my $line = ""; foreach $line (source($SOURCEFILE)) { $text .= $line; $text =~ s|<link *rel=[\'\"]*stylesheet|<?xml-stylesheet |; if ($text !~ m/<.xml-stylesheet/) { $text = ""; next; } if ($text !~ m/href=/) { next; } $text =~ s|^.*<.xml-stylesheet||; $text =~ s|^.*href=[\"\']||; $text =~ s|[\"\'].*||s; chomp $text; $R{$text} = ""; } foreach $line (source($SITEFILE)) { $text .= $line; $text =~ s|<link *rel=[\'\"]*stylesheet|<?xml-stylesheet |; if ($text !~ m/<.xml-stylesheet/) { $text = ""; next; } if ($text !~ m/href=/) { next; } $text =~ s|^.*<.xml-stylesheet||; $text =~ s|^.*href=[\"\']||; $text =~ s|[\"\'].*||s; chomp $text; $R{$text} = ""; } @{$XMLSTYLESHEETS{$X}} = keys %R; } my %XMLTAGSCSS = (); sub css_xmltags_css # $SOURCEFILE { my $X=$SOURCEFILE; my @S = $XMLTAGS{$X}; my @R = (); my $xmlstylesheet; foreach $xmlstylesheet (@{$XMLSTYLESHEETS{$X}}) { my $stylesheet = css_sourcefile($xmlstylesheet); if (-f $stylesheet) { push @R, "/* $xmlstylesheet */"; my $text = ""; my $line = ""; my $STYLESHEET = $stylesheet; open STYLESHEET, "<$STYLESHEET" or next; foreach $line (<STYLESHEET>) { $text .= $line; if ($text =~ /^[^\{]*\}/s) { $text = ""; next; } if ($text !~ /^[^\{]*\{.*\}/s) { next; } $text =~ s|\r||g; my $xmltag; my $found = 0; foreach $xmltag (grep /^\w/, @{$XMLTAGS{$X}}) { $xmltag =~ s| .*||; if (grep {$_ eq $xmltag} qw/title section/) { next if $xmltag eq "section"; $found++ if $text =~ /\b$xmltag\s*(?:,[^{},]*)*\s*\{/s; my $xmlparent; foreach $xmlparent (@{$XMLTAGS{$X}}) { $xmlparent =~ s| .*||; /^\w/ or next; $found++ if $text =~ /\b$xmlparent\s+$xmltag\s*(?:,[^{},]*)*\s*\{/s; } } else { $found++ if $text =~ /\b$xmltag\s*(?:,[^\{\},]*)*\{/s; } last if $found; } if (not $found) { $text = ""; next; } foreach $xmltag (grep /^\w/, @{$XMLTAGS{$X}}) { $xmltag =~ s| .*||; if (grep {$_ eq $xmltag} @HTMLTAGS) { next; } if (grep {$_ eq $xmltag} @HTMLTAGS2) { next; } $text =~ s/(\b$xmltag\s*(?:,[^{},]*)*\s*\{)/.$1/gs; } chomp $text; push @R, $text; $text = ""; next; } } else { warn "$xmlstylesheet : ERROR, no such stylesheet $xmlstylesheet"; } } @{$XMLTAGSCSS{$X}} = @R; } my %XMLMAPPING = (); sub css_xmlmapping # $SOURCEFILE { my $X=$SOURCEFILE; my %R = (); foreach (@{$XMLTAGSCSS{$X}}) { my $span = ""; $span="li" if /\bdisplay\s*:\s*list-item\b/; $span="caption" if /\bdisplay\s*:\s*table-caption\b/; $span="td" if /\bdisplay\s*:\s*table-cell\b/; $span="tr" if /\bdisplay\s*:\s*table-row\b/; $span="table" if /\bdisplay\s*:\s*table\b/; $span="div" if /\bdisplay\s*:\s*block\b/; $span="span" if /\bdisplay\s*:\s*inline\b/; $span="small" if /\bdisplay\s*:\s*none\b/; $span="ul" if /\blist-style-type\s*:\s*disc\b/ and $span eq "div"; $span="ol" if /\blist-style-type\s*:\s*decimal\b/ and $span eq "div"; $span="tt" if /\bfont-family\s*:\s*monospace\b/ and $span eq "span"; $span="em" if /\bfont-style\s*:\s*italic\b/ and $span eq "span"; $span="b" if /\bfont-weight\s*:\s*bold\b/ and $span eq "span"; $span="pre" if /\bwhite-space\s*:\s*pre\b/ and $span eq "div"; my $xmltag; for $xmltag (grep /^\w/, @{$XMLTAGS{$X}}) { $xmltag =~ s| .*||; if (/\.$xmltag\b/s) { $R{$xmltag} = $span; $R{$xmltag} = "p" if $xmltag eq "para" and $span eq "div"; $R{$xmltag} = "a" if $xmltag eq "ulink" and $span eq "span"; } } } %{$XMLMAPPING{$X}} = %R; } sub css_scan # $SOURCEFILE { css_xmltags (); css_xmlstyles (); css_xmltags_css (); css_xmlmapping (); } sub tags2span_sed # $SOURCEFILE > $++ { my $X=$SOURCEFILE; my $xmltag; my @R = (); push @R, "s|<[?]xml-stylesheet[^<>]*[?]>||"; push @R, "s|<link *rel=['\"]*stylesheet[^<>]*>||"; push @R, "s|<section[^<>]*>||g;"; push @R, "s|</section[^<>]*>||g;"; for $xmltag (grep /^\w/, @{$XMLTAGS{$X}}) { $xmltag =~ s| .*||; if (grep {$_ eq $xmltag} @HTMLTAGS) { next; } if (grep {$_ eq $xmltag} @HTMLTAGS2) { next; } my $span = $XMLMAPPING{$X}{$xmltag}; $span = "span" if $span eq ""; push @R, "s|<$xmltag([\\n\\t ][^<>]*)url=|<$span class=\"$xmltag\"\$1href=|g;"; push @R, "s|<$xmltag([\\n\\t >])|<$span class=\"$xmltag\"\$1|g;"; push @R, "s|</$xmltag([\\n\\t >])|</$span\$1|g;"; } my $xmlstylesheet; foreach $xmlstylesheet (@{$XMLSTYLESHEETS{$X}}) { my $H="[^<>]*href=[\'\"]${xmlstylesheet}[\'\"][^<>]*"; push @R, "s|<[?]xml-stylesheet$H>||;"; push @R, "s|<link[^<>]* rel=['\"]*stylesheet['\"]$H>||;"; } return @R; } sub tags2meta_sed # $SOURCEFILE > $++ { my @R = (); push @R, " <style type=\"text/css\"><!--"; push @R, map {s/(^|\n)/$1 /g;$_} @{$XMLTAGSCSS{$SOURCEFILE}}; push @R, " --></style>"; @R = () if $#R < 3; return @R; } # ========================================================================== # xml/docbook support is taking an dbk input file converting any html DBK # syntax into pure docbook tagging. Each file is being given a docbook # doctype so that an xml/docbook viewer can render it correctly - that # is needed atleast since docbook files do not embed stylesheet infos. # Most of the processing is related to remap html markup and some other # shortcut markup into correct docbook markup. The result is NOT checked # for being well-formed or even matching the docbook schema DTD at all. sub scan_xml_rootnode { my ($INF,$XXX) = @_; $INF = \@{$DATA{$F}} if not $INF; for my $entry (source($SOURCEFILE)) { my $line = $entry; next if $line !~ /<\w/; $line =~ s/<(\w*).*/$1/s; # print ":",$line,$n; push @{$INF}, "<!root $F>$line"; return; } } sub get_xml_rootnode { my ($INF,$XXX) = @_; $INF = \@{$DATA{$F}} if not $INF; my $_file_ = sed_slash_key($F); foreach my $entry (grep /^<!root $_file_>/, @{$INF}) { my $line=$entry; $line =~ s|.*>||; return $line; } } sub xml_sourcefile { my ($X,$XXX) = @_; my $XMLFILE=$X; $XMLFILE =~ s/\.xml$/.dbk/; my $SRCFILE=$X; $SRCFILE =~ s/\.xml$/.htm/; $XMLFILE="///" if $X eq $XMLFILE; $SRCFILE="///" if $X eq $SRCFILE; return $XMLFILE if -f $XMLFILE; return $SRCFILE if -f $SRCFILE; return "$o{src_dir}/$XMLFILE" if -f "$o{src_dir}/$XMLFILE"; return "$o{src_dir}/$SRCFILE" if -f "$o{src_dir}/$SRCFILE"; return ".//$XMLFILE"; # $++ (not found?) } sub scan_xmlfile { $SOURCEFILE= &xml_sourcefile($F); hint "'$SOURCEFILE': scanning xml -> '$F'"; scan_xml_rootnode(); my $rootnode=&get_xml_rootnode(); $rootnode =~ s|^(h\d.*$)|$1 <?section?>|; hint "'$SOURCEFILE': rootnode ('$rootnode')"; } sub make_xmlfile { $SOURCEFILE= &xml_sourcefile($F); my $X=$SOURCEFILE; my $article= &get_xml_rootnode(); $article="article" if $article eq ""; my $text = ""; $text .= '<!DOCTYPE '.$article. ' PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"'.$n; $text .= ' "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd">' .$n; for my $stylesheet (@{$XMLSTYLESHEETS{$X}}) { $text .= "<?xml-stylesheet type=\"text/css\" href=\"$stylesheet\" ?>" .$n; } for (source($SOURCEFILE)) { s!<>!\ \;!g; s!(&)(&)!${1}amp;${2}amp;!g; s!(<[^<>]*)(width)(=)(\d+\%*)!$1$2$3\"$4\"!g; s!(<[^<>]*)(cellpadding)(=)(\d+\%*)!$1$2$3\"$4\"!g; s!(<[^<>]*)(border)(=)(\d+\%*)!$1$2$3\"$4\"!g; s!<[?]xml-stylesheet[^<>]*>!!; s!<link[^<>]* rel=[\'\"]*stylesheet[^<>]*>!!; s!<[hH]\d!<title!g; s!</[hH]\d!</title!g; s!( *)([^<>]*\w[^<>\r\n]*)$!$1$2!; s!(.*)!$1!g; s!(.*)!$1!g; s!(
[^<>]*)(.*[^<>\n]*)$!$1$2!gx; s!!!g; s!!!g; s!!!g; s!!!g; s!!!g; s!!!g; s!!!g; s!!!g; s!!!g; s!!!g; s!<(s|strike)>!!g; s!!!g; s!
!
!g; s!
!!g; s!

!!g; s!<[pP]>!!g; s!!!g; s!<(pre|PRE)>!!g; s!!!g; s!]*)name=([^<>]*)/>!!g; s!]*)name=([^<>]*)>!!g; s!]*)href=!!!g; s! remap=\"url\">[^<>]*! />!g; s!<(/?)span(\s[^<>]*)?>!<${1}phrase${2}>!g; s!]*)?>!!g; s!]*)?>!!g; s!<(/?)(sup)>!<${1}superscript>!g; s!<(/?)(sub)>!<${1}subscript>!g; s!(<)(li)(><)!${1}listitem${3}!g; s!(>)!${1}listitem${3}!g; s!(<)(li)(>)!${1}listitem${3}!g; s!()!${1}listitem${3}!g; s!(!${1}itemizedlist>!g; s!(!${1}orderedlist>!g; s!(!${1}variablelist>!g; s!<(/?)DT>!<${1}dt>!g; s!<(/?)DD>!<${1}dd>!g; s!<(/?)DL>!<${1}dl>!g; s!

!
!g; s!
!
!g; s!<(/?)dl>!<${1}variablelist>!g; s!]*)>!!g; s!]*)>!!g; s!]*)>]*)>!>!g; s!]*)>!!g; s!]*)>!!g; s!]*>(]*>)!$1!; s!()!$1!; s!]*)>!!g; s!]*)>!!g; s!(]*)?>!${1}row${2}>!g; s!(]*)?>!${1}entry${2}>!g; s!(]*\swidth=\"100\%\")!$1 pgwide=\"1\"!g; s!(]*\scols=\"2\">)() !$1$2!gx; s!(]*\s)width=(\"\d*\%*\")!${1}remap=${2}!g; s!([\'\`]*)!$1!g; s!([\'\`]*)!$1!g; s!<(tt|code)>([\`\"\'])!$2!g; s!<(tt|code)>([\`\"\'])!$2!g; s!([\`\"\'])!$1!g; s!([\`\"\'])!$1!g; s!(!${1}constant>!g; s!(!${1}literal>!g; s!
!
!g; s!
!\n!g; $text .= $_; } open F, ">$F" or die "could not write $F: $!"; print F $text; close F; echo "'$SOURCEFILE': ",&ls_s($SOURCEFILE)," >> ",&ls_s($F); } sub make_xmlmaster { $SOURCEFILE= &xml_sourcefile($F); my $X=$SOURCEFILE; my $article="section"; # book? chapter? my $text = ""; $text .= '' .$n; for my $stylesheet (@{$XMLSTYLESHEETS{$X}}) { $text .= "" .$n; } # $text .= "
..."; $text .= "
Documentation$n"; for (make_xmlsitemap()) { $text .= $_; } $text .= "
$n"; open F, ">$F" or die "could not write $F: $!"; print F $text; close F; echo "'$SOURCEFILE': ",&ls_s($SOURCEFILE)," >*> ",&ls_s($F); } # ========================================================================== # # During processing we will create a series of intermediate files that # store relations. They all have the same format being # =relationtype=key value # where key is usually s filename or an anchor. For mere convenience # we assume that the source html text does not have lines that start # off with =xxxx= (btw, ye remember perl section notation...). Of course # any other format would be usuable as well. # # we scan the SITEFILE for href references to be converted # - in the new variant we use a ".gets.tmp" sed script that SECTS # marks all interesting lines so they can be checked later # with an sed anchor of sect="[$NN]" (or sect="[$AZ]") my $S="\\ \\;"; # S="[&]nbsp[;]" # HR and EM style markups must exist in input - BR sometimes left out # these routines in(ter)ject hardspace before, between, after markups # note that "
" is sometimes used with HR - it must exist in input sub echo_HR_EM_PP { my ($U,$V,$W,$X,$Z) = @_; my @list = ( "s%^($U$V$W*$U$V$W*$V$W*$W*$V$W*$W*$U$V*$V*$V*$U*<>$U*$U<>*<>**$U<>$U$U<><>$U<>$U$U<><>", "$h1", "sect=\\\"1\\\""); push @MK_GETS, &echo_HR_EM_PP("
","", "$h1", "sect=\\\"1\\\""); push @MK_GETS, &echo_HR_EM_PP("
","", "$h1", "sect=\\\"1\\\""); push @MK_GETS, &echo_HR_PP ("
", , "$b1$b1", "sect=\\\"1\\\""); push @MK_GETS, &echo_HR_PP ("
", , "$b2$b2", "sect=\\\"2\\\""); push @MK_GETS, &echo_HR_PP ("
", , "$b3$b3", "sect=\\\"3\\\""); push @MK_GETS, &echo_br_PP ("
", , "$b2$b2", "sect=\\\"2\\\""); push @MK_GETS, &echo_br_PP ("
", , "$b3$b3", "sect=\\\"3\\\""); push @MK_GETS, &echo_br_EM_PP("
","" , "$q3" , "sect=\\\"3\\\""); push @MK_GETS, &echo_br_EM_PP("
","" , "$q3" , "sect=\\\"3\\\""); push @MK_GETS, &echo_br_EM_PP("
","" , "$q3" , "sect=\\\"3\\\""); push @MK_GETS, &echo_HR_PP ("
", , "$q3" , "sect=\\\"3\\\""); push @MK_GETS, &echo_br_PP ("", , "$b2" , "sect=\\\"2\\\""); push @MK_GETS, &echo_sp_PP ( "$q3" , "sect=\\\"3\\\""); push @MK_GETS, &echo_sp_SP ( "" , "sect=\\\"2\\\""); push @MK_GETS, &echo_sp_sp ( "$q3" , "sect=\\\"9\\\""); push @MK_GETS, &echo_sp_sp ("
", "sect=\\\"9\\\""); @MK_PUTS = map { my $x=$_; $x =~ s/(>)(\[)/$1 *$2/; $x } @MK_GETS; # the .puts.tmp variant is used to
some hrefs which # shall not be used otherwise for being generated - this is nice for # some quicklinks somewhere. The difference: a whitspace "
" } my $_uses_= sub{"<$Q='use$1'>$2 $3<$QX>" }; my $_name_= sub{"<$Q='use$1'>name:$2 $3<$QX>" }; sub make_sitemap_list { my ($V,$Z) = @_; $V = $SITEFILE if not $V; # scan sitefile for references pages - store as "=use+=href+ anchortext" for (source($V)) { my $x = $_; local $_ = &eval_MK_LIST("sitemap_list", $x, @MK_GETS); /
]*>(.*).*}{&$_uses_}e; s{.*]*>(.*).*}{&$_name_}e; s{.*]*>(.*)}{&$_name_}e; /^<$Q=/ or next; /^$2 $3<$QX>" }; my $_Name_= sub{"<$Q='Use$1'>name:$2 $3<$QX>" }; sub make_subsitemap_list # file-to-scan { my ($V,$W,$Z) = @_; $V = $SITEFILE if not $V; # scan sitefile for references pages - store as "=use+=href+ anchortext" for (source($V)) { my $x = $_; local $_ = &eval_MK_LIST("subsitemap_list", $x, @MK_GETS); /]*>(.*).*}{&$_Uses_}e; s{.*]*>(.*).*}{&$_Name_}e; s{.*]*>(.*)}{&$_Name_}e; /^<$Q=/ or next; /^([^:./][^:./]*[./])|>$W$1|; push @MK_DATA, $_; } } sub make_sitemap_sect { # scan used pages and store prime section group relation =sect= and =node= # (A) each "use1" creates "=sect=href+ href1" for all following non-"use1" # (B) each "use1" creates "=node=href2 href1" for all following "use2" my $sect = ""; for (grep {/<$Q='[u]se.'>/} @MK_DATA) { if (/<$Q='[u]se1'>([^ ]*) .*/) { $sect = $1; } my $x = $_; # chomp $x; $x =~ s|<$Q='[u]se.'>([^ ]*) .*|<$Q='sect'>$1 $sect<$QX>|; push @MK_DATA, $x; } for (grep {/<$Q='[u]se.'>/} @MK_DATA) { if (/<$Q='[u]se1'>([^ ]*) .*/) { $sect = $1; } /<$Q='[u]se[13456789]'>/ and next; my $x = $_; # chomp $x; $x =~ s|<$Q='[u]se.'>([^ ]*) .*|<$Q='node'>$1 $sect<$QX>|; push @MK_DATA, $x; } } sub make_sitemap_page { # scan used pages and store secondary group relation =page= and =node= # the parenting =node= for use3 is usually a use2 (or use1 if none there) my $sect = ""; for (grep {/<$Q='[u]se.'>/} @MK_DATA) { if (/<$Q='[u]se1'>([^ ]*) .*/) { $sect = $1; } if (/<$Q='[u]se2'>([^ ]*) .*/) { $sect = $1; } /<$Q='[u]se[1]'>/ and next; my $x = $_; $x =~ s|<$Q='[u]se.'>([^ ]*) .*|<$Q='page'>$1<$QX>|; chomp $x; push @MK_DATA, "$x $sect"; } for (grep {/<$Q='[u]se.'>/} @MK_DATA) { if (/<$Q='[u]se1'>([^ ]*) .*/) { $sect = $1; } if (/<$Q='[u]se2'>([^ ]*) .*/) { $sect = $1; } /<$Q='[u]se[12456789]'>/ and next; my $x = $_; $x =~ s/<$Q='[u]se.'>([^ ]*) .*/<$Q='node'>$1<$QX>/; chomp $x; push @MK_DATA, "$x $sect"; ## print "(",$_,")","$x $sect", $n; } # and for the root sections we register ".." as the parenting group for (grep {/<$Q='[u]se1'>/} @MK_DATA) { my $x = $_; $x = trimm($x); $x =~ s/<$Q='[u]se.'>([^ ]*) .*/<$Q='node'>$1 ..<$QX>/; chomp $x; push @MK_DATA, $x; } } sub echo_site_filelist { my @OUT = (); for (grep {/<$Q='[u]se.'>/} @MK_DATA) { my $x = $_; $x =~ s/<$Q='[u]se.'>//; $x =~ s/ .*[\n]*//; push @OUT, $x; } return @OUT; } # ========================================================================== # originally this was a one-pass compiler but the more information # we were scanning out the more slower the system ran - since we # were rescanning files for things like section information. Now # we scan the files first for global information. # 1.PASS sub scan_sitefile # $F { $SOURCEFILE=&html_sourcefile($F); hint "'$SOURCEFILE': scanning -> sitefile"; if ($SOURCEFILE ne $F) { dx_init "$F"; dx_text ("today", &timetoday()); my $short=$F; $short =~ s:.*/::; $short =~ s:[.].*::; # basename for all exts $short .=" ~"; DC_meta ("title", "$short"); DC_meta ("date.available", &timetoday()); DC_meta ("subject", "sitemap"); DC_meta ("DCMIType", "Collection"); DC_VARS_Of ($SOURCEFILE) ; HTTP_VARS_Of ($SOURCEFILE) ; DC_modified ($SOURCEFILE) ; DC_date ($SOURCEFILE); DC_section ($F); DX_text ("date.formatted", &timetoday()); if ($printerfriendly) { DX_text ("printerfriendly", fast_html_printerfile($F)); } if ($ENV{USER}) { DC_publisher ($ENV{USER}); } echo "'$SOURCEFILE': $short (sitemap)"; site_map_list_title ($F, "$short"); site_map_long_title ($F, "generated sitemap index"); site_map_list_date ($F, &timetoday()); } } sub scan_htmlfile # "$F" { my ($FF,$Z) = @_; $SOURCEFILE=&html_sourcefile($F); # SCAN : hint "'$SOURCEFILE': scanning -> $F"; # HTML : if ($SOURCEFILE ne $F) { if ( -f $SOURCEFILE) { dx_init "$F"; dx_text ("today", &timetoday()); dx_text ("todays", &timetodays()); DC_VARS_Of ($SOURCEFILE); HTTP_VARS_Of ($SOURCEFILE); DC_title ($SOURCEFILE); DC_isFormatOf ($SOURCEFILE); DC_modified ($SOURCEFILE); DC_date ($SOURCEFILE); DC_date ($SITEFILE); DC_section ($F); DC_selected ($F); DX_alternative ($SOURCEFILE); if ($ENV{USER}) { DC_publisher ($ENV{USER}); } DX_text ("date.formatted", &timetoday()); if ($printerfriendly) { DX_text ("printerfriendly", fast_html_printerfile($F)); } my $sectn=&info_get_entry("DC.relation.section"); my $short=&info_get_entry("DC.title.selected"); &site_map_list_title ($F, "$short"); &info_map_list_title ($F, "$short"); my $title=&info_get_entry("DC.title"); &site_map_long_title ($F, "$title"); &info_map_long_title ($F, "$title"); my $edate=&info_get_entry("DC.date"); my $issue=&info_get_entry("issue"); &site_map_list_date ($F, "$edate"); &info_map_list_date ($F, "$edate"); css_scan(); echo "'$SOURCEFILE': '$title' ('$short') @ '$issue' ('$sectn')"; }else { echo "'$SOURCEFILE': does not exist"; site_map_list_title ($F, "$F"); site_map_long_title ($F, "$F (no source)"); } } else { echo "<$F> - skipped - ($SOURCEFILE)"; } } sub scan_subsitemap_long { my ($V,$W,$ZZZ) = @_; for (source($V)) { my $x = $_; if ($x =~ m|.*([^<>]*)|) { &site_map_list_date($W.$1,$2); } if ($x =~ m|.*([^<>]*)|) { &site_map_long_title($W.$1,$2); } } } sub scan_namespec { # nothing so far # my ($F,$ZZZ) = @_; if ($F =~ /^name:sitemap:/) { my $short=$F; $short =~ s:.*/::; $short =~ s:[.].*::; # basename for all exts $short =~ s/name:sitemap://; $short .=" ~"; site_map_list_title ($F, "$short"); site_map_long_title ($F, "external sitemap index"); site_map_list_date ($F, &timetoday()); echo "'$F' external sitemap index"; } elsif ($F =~ /^name:(.*\.html*)$/) { # assuming it is a subsitefile my $FF=$1; my $FFF=$FF; $FFF =~ s:/[^/]*$:/:; # dirname $FFF="" if $FFF !~ m:/:; make_subsitemap_list($FF, $FFF); scan_subsitemap_long($FF, $FFF); } } sub scan_httpspec { # nothing so far } sub skip_namespec { # nothing so far } sub skip_httpspec { # nothing so far } # ========================================================================== # and now generate the output pages # 2.PASS sub head_sed_sitemap # $filename $section { my ($U,$V,$Z) = @_; my $FF=&sed_piped_key($U); my $SECTION=&sed_slash_key($V); my $SECTS="sect=\"[$NN$AZ]\"" ; my $SECTN="sect=\"[$NN]\""; # lines with hrefs my @OUT = (); push @OUT, "s|(.*)|\$1|;"; push @OUT, "/ href=\\\"$SECTION\\\"/ " ."and s|^.*)|\$1|;"; push @OUT, "/ href=\\\"$SECTION\\\"/ " ."and s|^||" # after that all the (still) numeric SECTNs are deactivated / killed. for my $section ($SECTION, $headsection, $tailsection) { next if $section eq "no"; for (grep {/^<$Q='sect'>[^ ]* $section/} @MK_DATA) { my $x = $_; $x =~ s|<$Q='sect'>||; $x =~ s| .*||; # $filename $x =~ s/(.*)/s|name:[^ ]* $section/} @MK_DATA) { my $x = $_; $x =~ s|<$Q='sect'>name:||; $x =~ s| .*||; # $filename $x =~ s/(.*)/s|]*)>.*||;"; push @OUT, "s|.*]*)>.*||;"; push @OUT, "s|(.*)|\$1|;"; push @OUT, "/ href=\\\"$SECTION\\\"/ " ."and s|^ vars substituted @MK_META = &info2meta_sed(); # add values my @F_HEAD = (); my @F_FOOT = (); push @F_HEAD, @MK_PUTS; push @F_HEAD, &head_sed_sitemap ($F, &info_get_entry_section()); push @F_HEAD, "// and $sed_add join(\"\\n\", \@MK_META);"; push @F_HEAD, @MK_VARS; push @F_HEAD, @MK_TAGS; push @F_HEAD, "/<\\/body>/ and next;"; #cut lastline if ( $sitemaplayout eq "multi") { push @F_FOOT, &make_multisitemap(); # here we use ~foot~ to } else { push @F_FOOT, &make_listsitemap(); # hold the main text } my $html = ""; # $html .= &eval_MK_FILE("SITE", $SITEFILE, @F_HEAD); $html .= join("", @F_FOOT); for (source($SITEFILE)) { /<\/body>/ or next; $html .= &eval_MK_LIST("sitefile", $_, @MK_VARS); } open F, ">$F"; print F $html; close F; echo "'$SOURCEFILE': ",ls_s($SOURCEFILE)," >-> ",ls_s($F); savesource("$F.~head~", \@F_HEAD); savesource("$F.~foot~", \@F_FOOT); } else { echo "'$SOURCEFILE': does not exist"; } } } sub make_htmlfile # "$F" { $SOURCEFILE=&html_sourcefile($F); # 2.PASS if ("$SOURCEFILE" ne "$F") { if (-f "$SOURCEFILE") { if (grep {/ vars substituted @MK_META = &info2meta_sed(); # add values @MK_SPAN = &tags2span_sed(); # extern text/css -> intern css classes push @MK_META, &tags2meta_sed(); # extern text/css -> intern css classes my @F_HEAD = (); my @F_BODY = (); my $F_FOOT = ""; push @F_HEAD, @MK_PUTS; if ( $sectionlayout eq "multi") { push @F_HEAD, &head_sed_multisection ($F, &info_get_entry_section()); } else { push @F_HEAD, &head_sed_listsection ($F, &info_get_entry_section()); } push @F_HEAD, @MK_VARS; push @F_HEAD, @MK_TAGS; push @F_HEAD, @MK_SPAN; push @F_HEAD, "/<\\/body>/ and next;"; #cut lastline push @F_HEAD, "// and $sed_add join(\"\\n\",\@MK_META);"; #add metatags push @F_BODY, "// and next;"; #not that line push @F_BODY, @MK_VARS; push @F_BODY, @MK_TAGS; push @F_BODY, @MK_SPAN; push @F_BODY, &bodymaker_for_sectioninfo(); #if sectioninfo push @F_BODY, &info2body_sed(); #cut early push @F_HEAD, &info2head_sed(); push @F_HEAD, &make_back_path($F); if ($emailfooter ne "no") { $F_FOOT = &body_for_emailfooter(); } my $html = ""; $html .= eval_MK_FILE("head", $SITEFILE, @F_HEAD); $html .= eval_MK_FILE("body", $SOURCEFILE, @F_BODY); $html .= $F_FOOT; for (source($SITEFILE)) { /<\/body>/ or next; $_ = &eval_MK_LIST("htmlfile", $_, @MK_VARS); $html .= $_; } open F, ">$F" or die "could not write $F: $!"; print F $html; close F; echo "'$SOURCEFILE': ",&ls_s($SOURCEFILE)," -> ",&ls_s($F); savesource("$F.~head~", \@F_HEAD); savesource("$F.~body~", \@F_BODY); } else { echo "'$SOURCEFILE': does not exist"; }} else { echo "<$F> - skipped"; } } my $PRINTSITEFILE; sub make_printerfriendly # "$F" { # PRINTER my $printsitefile="0"; # FRIENDLY my $BODY_TXT; my $BODY_SED; my $P=&html_printerfile ($F); my @P_HEAD = (); my @P_BODY = (); if ("$F" =~ /^(${SITEFILE}|${SITEFILE}l)$/) { $printsitefile=">=>" ; $BODY_TXT="$F.~foot~" ; } elsif ("$F" =~ /^(.*[.]html)$/) { $printsitefile="=>" ; $BODY_TXT="$SOURCEFILE"; } if (grep {/<meta name="formatter"/} source($BODY_TXT)) { return; } if ($printsitefile ne "0" and -f $SOURCEFILE) { my $x; @MK_FAST = &make_printerfile_fast (\@FILELIST); push @P_HEAD, @MK_VARS; push @P_HEAD, @MK_TAGS; push @P_HEAD, @MK_FAST; @MK_METT = map { $x = $_; $x =~ /DC.relation.isFormatOf/ and $x =~ s|content=\"[^\"]*\"|content=\"$F\"| ; $x } @MK_META; push @P_HEAD, "/<head>/ and $sed_add join(\"\\n\", \@MK_METT);"; push @P_HEAD, "/<\\/body>/ and next;"; push @P_HEAD, &select_in_printsitefile ("$F"); my $_ext_=&print_extension($printerfriendly); # my $line_=&sed_slash_key($printsitefile_img_2); push @P_HEAD, "/\\|\\|topics:/" ." and s| href=\\\"\\#\\.\\\"| href=\\\"$F\\\"|;"; push @P_HEAD, "/\\|\\|\\|pages:/" ." and s| href=\\\"\\#\\.\\\"| href=\\\"$F\\\"|;"; push @P_HEAD, &make_back_path("$F"); push @P_BODY, @MK_VARS; push @P_BODY, @MK_TAGS; push @P_BODY, @MK_FAST; push @P_BODY, &make_back_path("$F"); my $html = ""; $html .= eval_MK_FILE("p_head", $PRINTSITEFILE, @P_HEAD); $html .= eval_MK_FILE("p_body", $BODY_TXT, @P_BODY); for (source($PRINTSITEFILE)) { /<\/body>/ or next; $_ = &eval_MK_LIST("printerfriendly", $_, @MK_VARS); $html .= $_; } open P, ">$P" or die "could not write $P: $!"; print P $html; close P; echo "'$SOURCEFILE': ",ls_s($SOURCEFILE)," $printsitefile ",ls_s($P); } } # ======================================================================== # ======================================================================== # ======================================================================== # ======================================================================== # #### 0. INIT $F=$SITEFILE; &make_sitemap_init(); &make_sitemap_list($SITEFILE); &make_sitemap_sect(); &make_sitemap_page(); savelist(\@MK_DATA, "DATA"); @FILELIST=&echo_site_filelist(); if ($o{filelist} or $o{list} eq "file" or $o{list} eq "files") { for (@FILELIST) { echo $_; } exit; # --filelist } if ($o{files}) { @FILELIST=split(/ /, $o{files}); } # --files if ($#FILELIST < 0) { warns "nothing to do (no --filelist)"; } if ($#FILELIST == 0 and $FILELIST[0] eq $SITEFILE) { warns "only '$SITEFILE'?!"; } for (@FILELIST) { #### 1. PASS $F = $_; if (/^(name:.*)$/) { &scan_namespec ("$F"); } elsif (/^(http:|https:|ftp:|mailto:|telnet:|news:|gopher:|wais:)/) { &scan_httpspec ("$F"); } elsif (/^(${SITEFILE}|${SITEFILE}l)$/) { &scan_sitefile ("$F") ;; # ........... SCAN SITE } elsif (/^(.*\@.*\.de)$/) { echo "!! -> '$F' (skipping malformed mailto:-link)"; } elsif (/^(\.\.\/.*)$/) { echo "!! -> '$F' (skipping topdir build)"; # */*.html) # make_back_path # try for later subdir build # echo "!! -> '$F' (skipping subdir build)" # ;; # */*/*/|*/*/|*/|*/index.htm|*/index.html) # echo "!! -> '$F' (skipping subdir index.html)" # ;; } elsif (/^(.*\.html)$/) { &scan_htmlfile ("$F"); # ........... SCAN HTML if ($o{xml}) { $F =~ s/\.html$/.xml/; &scan_xmlfile ("$F"); } } elsif (/^(.*\.xml)$/) { &scan_xmlfile ("$F") ;; } elsif (/^(.*\/)$/) { echo "'$F' : directory - skipped"; &site_map_list_title ("$F", &sed_slash_key($F)); &site_map_long_title ("$F", "(directory)"); } else { echo "?? -> '$F'"; } } if ($printerfriendly) { # .......... PRINT VERSION my $_ext_=esc(&print_extension($printerfriendly)); $PRINTSITEFILE=$SITEFILE; $PRINTSITEFILE =~ s/(\.\w*)$/$_ext_$1/; $F=$PRINTSITEFILE; my @TEXT = &make_printsitefile(); echo "NOTE: going to create printer-friendly sitefile '$PRINTSITEFILE'" ." $F._$i"; savelist(\@TEXT, "TEXT"); my @LINES = map { chomp; $_."$n" } @TEXT; savesource($PRINTSITEFILE, \@LINES); if (1) { if (open PRINTSITEFILE, ">$PRINTSITEFILE") { print PRINTSITEFILE join("", @LINES); close PRINTSITEFILE; } } } for (@FILELIST) { #### 2. PASS $F = $_; if (/^(name:.*)$/) { &skip_namespec ("$F") ;; } elsif (/^(http:|https:|ftp:|mailto:|telnet:|news:|gopher:|wais:)/) { &skip_httpspec ("$F") ;; } elsif (/^(${SITEFILE}|${SITEFILE}l)$/) { &make_sitefile ("$F") ;; # ........ SITE FILE &make_printerfriendly ("$F") if ($printerfriendly); if ($o{xml}) { $F =~ s/\.html$/.xml/; &make_xmlmaster ("$F"); } } elsif (/^(.*\@.*\.de)$/) { echo "!! -> '$F' (skipping malformed mailto:-link)"; } elsif (/^(\.\.\/.*)$/) { echo "!! -> '$F' (skipping topdir build)"; # */*.html) # echo "!! -> '$F' (skipping subdir build)" # ;; # */*/*/|*/*/|*/|*/index.htm|*/index.html) # echo "!! -> '$F' (skipping subdir index.html)" # ;; } elsif (/^(.*\.html)$/) { &make_htmlfile ("$F") ; # .................. HTML FILES &make_printerfriendly ("$F") if ($printerfriendly); if ($o{xml}) { $F =~ s/\.html$/.xml/; &make_xmlfile ("$F"); } } elsif (/^(.*\.xml)$/) { &make_xmlfile ("$F") ;; } elsif (/^(.*\/)$/) { echo "'$F' : directory - skipped"; } else { echo "?? -> '$F'"; } # .............. debug .................... if (-d "DEBUG" and -f $F) { my $INP = \@{$DATA{$F}}; my $FFFF = $F; $FFFF =~ s,/,:,g; if (open FFFF, ">DEBUG/$FFFF.data.tmp.ht") { for (@{$INP}) { print FFFF $_,$n; } close FFFF; } if (open FFFF, ">DEBUG/$FFFF.tags.tmp.pl") { print FFFF "# /usr/bin/env perl -p",$n; for (@MK_TAGS) { print FFFF $_,$n; } close FFFF; } if (open FFFF, ">DEBUG/$FFFF.vars.tmp.pl") { print FFFF "# /usr/bin/env perl -p",$n; for (@MK_VARS) { print FFFF $_,$n; } close FFFF; } if (open FFFF, ">DEBUG/$FFFF.span.tmp.pl") { print FFFF "# /usr/bin/env perl -p",$n; for (@MK_SPAN) { print FFFF $_,$n; } close FFFF; } if (open FFFF, ">DEBUG/$FFFF.meta.tmp.ht") { for (@MK_META) { print FFFF $_,$n; } close FFFF; } if (open FFFF, ">DEBUG/$FFFF.gets.tmp.ht") { for (@MK_GETS) { print FFFF $_,$n; } close FFFF; } if (open FFFF, ">DEBUG/$FFFF.puts.tmp.ht") { for (@MK_PUTS) { print FFFF $_,$n; } close FFFF; } if (open FFFF, ">DEBUG/$FFFF.fast.tmp.ht") { for (@MK_FAST) { print FFFF $_,$n; } close FFFF; } } } # done ## rm ./$MK.*.tmp.* if not $o{keeptmpfiles} exit 0