#! /usr/bin/env perl # # Usage: ./urlbst.pl [--eprint] [--doi] # [--nohyperlinks] [--inlinelinks] [--hypertex] [--hyperref] # [input-file [output-file]] # If either input-file or output-file is omitted, they are replaced by # stdin or stdout respectively. # # See http://purl.org/nxg/dist/urlbst for documentation # # $Id: urlbst.in,v 1.8 2007/03/26 17:21:56 norman Exp $ $version = '0.6'; ($progname = $0) =~ s/.*\///; $mymarker = "% $progname"; $mymarkerend = "% ...$progname to here"; $myurl = 'http://purl.org/nxg/dist/urlbst'; $infile = '-'; $outfile = '-'; $addeprints = 0; # if true (nonzero) we add support for eprints $eprintprefix = 'arXiv:'; # make these settable with --eprint? syntax? $eprinturl = 'http://arxiv.org/abs/'; $adddoiresolver = '0'; $doiprefix = 'doi:'; $doiurl = 'http://dx.doi.org/'; $makehref = 0; $availablestring = "Available from: "; $inlinelinks = 0; $Usage = "$progname [--eprint] [--doi]\n [--nohyperlinks] [--inlinelinks] [--hypertex] [--hyperref]\n [--help] [input-file [output-file]]"; while ($#ARGV >= 0) { if ($ARGV[0] eq '--eprint') { $addeprints = 1; } elsif ($ARGV[0] eq '--doi') { $adddoiresolver = 1; } elsif ($ARGV[0] eq '--nohyperlinks') { $makehref = 0; } elsif ($ARGV[0] eq '--hypertex') { $makehref = 1; } elsif ($ARGV[0] eq '--hyperref') { $makehref = 2; } elsif ($ARGV[0] eq '--inlinelinks') { $inlinelinks = 1; } elsif ($ARGV[0] eq '--help') { print <$outfile") || die "Can't open $outfile to write"; # We have to make certain assumptions about the source files, in order # to patch them at the correct places. Specifically, we assume that # # - there's a function init.state.consts # # - ...and an output.nonnull which does the actual outputting, which # has the `usual' interface. # # - we can replace # fin.entry # by # new.block # output.url % the function which formats and displays any URL # fin.entry # # - there is a function which handles the `article' entry type (this # will always be true) # # - there is a function output.bibitem which is called at the # beginning of each entry type # - ...and one called fin.entry which is called at the end # # If the functions format.date, format.title or new.block are not defined (the # former is not in apalike, for example, and the last is not in the # AMS styles), then replacements are included in the output. # # All these assumptions are true of the standard files and, since most # style files derive from them more or less directly, are true of most (?) # other style files, too. # # There's some rather ugly Perl down here. The parsing for # brace-matching could probably do with being rewritten in places, to # make it less ugly, and more robust. print OUT "%%% Modification of BibTeX style file ", ($infile eq '-' ? '' : $infile), "\n"; print OUT "%%% ... by $progname, version $version (marked with \"$mymarker\")\n%%% See <$myurl>\n"; print OUT "%%% Added webpage entry type, and url and lastchecked fields.\n"; print OUT "%%% Added eprint support.\n" if ($addeprints); print OUT "%%% Added DOI support.\n" if ($adddoiresolver); print OUT "%%% Added HyperTeX support.\n" if ($makehref == 1); print OUT "%%% Added hyperref support.\n" if ($makehref == 2); print OUT "%%% Original headers follow...\n\n"; $found{initconsts} = 0; $found{outputnonnull} = 0; $found{article} = 0; $found{outputbibitem} = 0; $found{finentry} = 0; $found{formatdate} = 0; $found{formattitle} = 0; $found{newblock} = 0; while () { /^ *%/ && do { # Pass commented lines unchanged print OUT; next; }; /^ *ENTRY/ && do { # Work through the list of entry types, finding what ones are there. # If we find a URL entry there already, object, since these edits # will mess things up. $line = $_; until ($line =~ /\{\s*(\w*)/) { $line .= ; } $bracematchtotal = 0; # reset bracematcher($line); $line =~ /\{\s*(\w*)/; $found{'entry'.$1} = 1; print OUT $line; $line = ; until (bracematcher($line) == 0) { # XXX deal with multiple entries on one line ($line =~ /^\s*(\w*)/) && ($found{'entry'.$1} = 1); print OUT $line; $line = ; } if (defined($found{entryurl})) { print STDERR "$progname: style file $infile already has URL entry!\n"; # print out the rest of the file, and give up print OUT $line; while () { print OUT; } $exitstatus = 1; last; } else { print OUT " eprint $mymarker\n doi $mymarker\n url $mymarker\n lastchecked $mymarker\n"; } print OUT $line; next; }; /^ *FUNCTION *\{init\.state\.consts\}/ && do { # In the init.state.consts function, add an extra set of # constants at the beginning. Also use this as the marker for # the place to add the init strings function. print OUT <; } $line =~ s/(\{.*?\}.*?\{)/$1 #0 'outside.brackets := $mymarker #1 'open.brackets := #2 'within.brackets := #3 'close.brackets := /s; print OUT $line; $found{initconsts} = 1; next; }; /^ *EXECUTE *\{init\.state\.consts\}/ && do { print OUT "EXECUTE {init.urlbst.variables}\n"; print OUT; next; }; /^ *FUNCTION *\{new.block\}/ && do { $found{newblock} = 1; }; /^ *FUNCTION *{output\.nonnull}/ && do { print OUT "$mymarker\n"; print OUT "FUNCTION {output.nonnull.original}\n"; copy_block(); print_output_functions(); $found{outputnonnull} = 1; next; }; /FUNCTION *\{fin.entry\}/ && do { # Rename fin.entry to fin.entry.original (wrapped below) s/fin.entry/fin.entry.original/; $found{finentry} = 1; print OUT; next; }; /^ *FUNCTION *{format\.date}/ && do { $found{formatdate} = 1; print OUT; next; }; /^ *FUNCTION *{format\.title}/ && do { # record that we found this $found{formattitle} = 1; print OUT; next; }; /^ *format\.b?title/ && do { # interpolate a call to setup.inlinelink print OUT " title empty\$ 'skip\$ 'setup\.inlinelink if\$ $mymarker\n"; print OUT; next; }; /^ *FUNCTION *\{article\}/ && do { print_missing_functions(); print_webpage_def(); print OUT; $found{article} = 1; next; }; /FUNCTION *\{output.bibitem\}/ && do { # Rename output.bibitem to output.bibitem.original (wrapped below) s/{output.bibitem\}/\{output.bibitem.original\}/; $found{outputbibitem} = 1; print OUT; next; }; print OUT; }; if ($exitstatus == 0) { # Skip this if we've already reported an error -- it'll only be confusing foreach $k (keys %found) { if ($found{$k} == 0) { print STDERR "$progname: $infile: failed to find feature $k\n"; } } } close (IN); close (OUT); exit $exitstatus;; sub print_output_functions { print OUT "$mymarker...\n"; print OUT <<'EOD'; % The following three functions are for handling inlinelink. They wrap % a block of text which is potentially output with write$ by multiple % other functions, so we don't know the content a priori. % They communicate between each other using the variables makeinlinelink % (which is true if a link should be made), and closeinlinelink (which holds % the string which should close any current link. They can be called % at any time, but start.inlinelink will be a no-op unless something has % previously set makeinlinelink true, and the two ...end.inlinelink functions % will only do their stuff if start.inlinelink has previously set % closeinlinelink to be non-empty. FUNCTION {setup.inlinelink} { makeinlinelink { hrefform #1 = % hypertex { "\special {html: }{" * 'openinlinelink := "\special {html:}" 'closeinlinelink := } { hrefform #2 = % hyperref { "\href{" url * "}{" * 'openinlinelink := "}" 'closeinlinelink := } 'skip$ if$ % hrefform #2 = } if$ % hrefform #1 = #0 'makeinlinelink := } 'skip$ if$ % makeinlinelink } FUNCTION {add.inlinelink} { openinlinelink empty$ 'skip$ { openinlinelink swap$ * closeinlinelink * "" 'openinlinelink := } if$ } EOD # new.block is defined elsewhere print OUT <<'EOD'; FUNCTION {output.nonnull} { % Save the thing we've been asked to output 's := % If the bracket-state is close.brackets, then add a close-bracket to % what is currently at the top of the stack, and set bracket.state % to outside.brackets bracket.state close.brackets = { "]" * outside.brackets 'bracket.state := } 'skip$ if$ bracket.state outside.brackets = { % We're outside all brackets -- this is the normal situation. % Write out what's currently at the top of the stack, using the % original output.nonnull function. s add.inlinelink output.nonnull.original % invoke the original output.nonnull } { % Still in brackets. Add open-bracket or (continuation) comma, add the % new text (in s) to the top of the stack, and move to the close-brackets % state, ready for next time (unless inbrackets resets it). If we come % into this branch, then output.state is carefully undisturbed. bracket.state open.brackets = { " [" * } { ", " * } % bracket.state will be within.brackets if$ s * close.brackets 'bracket.state := } if$ } % Call this function just before adding something which should be presented in % brackets. bracket.state is handled specially within output.nonnull. FUNCTION {inbrackets} { bracket.state close.brackets = { within.brackets 'bracket.state := } % reset the state: not open nor closed { open.brackets 'bracket.state := } if$ } FUNCTION {format.lastchecked} { lastchecked empty$ { "" } { inbrackets "cited " lastchecked * } if$ } EOD print OUT "$mymarkerend\n"; } sub print_webpage_def { print OUT "$mymarker...\n"; # Some of the functions below call new.block, so we need a dummy # version, in the case where the style being edited doesn't supply # that function. if (! $found{newblock}) { print OUT "FUNCTION {new.block} % dummy new.block function\n{\n % empty\n}\n\n"; $found{newblock} = 1; } print OUT <<'EOD'; % Functions for making hypertext links. % In all cases, the stack has (link-text href-url) % % make 'null' specials FUNCTION {make.href.null} { pop$ } % make hypertex specials FUNCTION {make.href.hypertex} { "\special {html: }" * swap$ * "\special {html:}" * } % make hyperref specials FUNCTION {make.href.hyperref} { "\href {" swap$ * "} {\path{" * swap$ * "}}" * } FUNCTION {make.href} { hrefform #2 = 'make.href.hyperref % hrefform = 2 { hrefform #1 = 'make.href.hypertex % hrefform = 1 'make.href.null % hrefform = 0 (or anything else) if$ } if$ } % If inlinelinks is true, then format.url should be a no-op, since it's % (a) redundant, and (b) could end up as a link-within-a-link. FUNCTION {format.url} { inlinelinks #1 = url empty$ or { "" } { hrefform #1 = { % special case -- add HyperTeX specials urlintro "\url{" url * "}" * url make.href.hypertex * } { urlintro "\url{" * url * "}" * } if$ } if$ } FUNCTION {format.eprint} { eprint empty$ { "" } { eprintprefix eprint * eprinturl eprint * make.href } if$ } FUNCTION {format.doi} { doi empty$ { "" } { doiprefix doi * doiurl doi * make.href } if$ } % Output a URL. We can't use the more normal idiom (something like % `format.url output'), because the `inbrackets' within % format.lastchecked applies to everything between calls to `output', % so that `format.url format.lastchecked * output' ends up with both % the URL and the lastchecked in brackets. FUNCTION {output.url} { url empty$ 'skip$ { new.block format.url output format.lastchecked output } if$ } FUNCTION {output.web.refs} { new.block output.url addeprints eprint empty$ not and { format.eprint output.nonnull } 'skip$ if$ adddoiresolver doi empty$ not and { format.doi output.nonnull } 'skip$ if$ } % Wrapper for output.bibitem.original. % If the URL field is not empty, set makeinlinelink to be true, % so that an inline link will be started at the next opportunity FUNCTION {output.bibitem} { outside.brackets 'bracket.state := output.bibitem.original inlinelinks url empty$ not and { #1 'makeinlinelink := } { #0 'makeinlinelink := } if$ } % Wrapper for fin.entry.original FUNCTION {fin.entry} { output.web.refs % urlbst makeinlinelink % ooops, it appears we didn't have a title for inlinelink { setup.inlinelink % add some artificial link text here, as a fallback "[link]" output.nonnull } 'skip$ if$ bracket.state close.brackets = % urlbst { "]" * } 'skip$ if$ fin.entry.original } % Webpage entry type. % Title and url fields required; % author, note, year, month, and lastchecked fields optional % See references % ISO 690-2 http://www.nlc-bnc.ca/iso/tc46sc9/standard/690-2e.htm % http://www.classroom.net/classroom/CitingNetResources.html % http://neal.ctstateu.edu/history/cite.html % http://www.cas.usf.edu/english/walker/mla.html % for citation formats for web pages. FUNCTION {webpage} { output.bibitem author empty$ { editor empty$ 'skip$ % author and editor both optional { format.editors output.nonnull } if$ } { editor empty$ { format.authors output.nonnull } { "can't use both author and editor fields in " cite$ * warning$ } if$ } if$ new.block title empty$ 'skip$ 'setup.inlinelink if$ format.title "title" output.check inbrackets "online" output new.block year empty$ 'skip$ { format.date "year" output.check } if$ % We don't need to output the URL details ('lastchecked' and 'url'), % because fin.entry does that for us, using output.web.refs. The only % reason we would want to put them here is if we were to decide that % they should go in front of the rather miscellaneous information in 'note'. new.block note output fin.entry } EOD print OUT "$mymarkerend\n\n\n"; } sub print_missing_functions { # We've got to the bit of the file which handles the entry # types, so write out the webpage entry handler. This uses # the format.date function, which which many but not all # bst files have (for example, apalike doesn't). So # check that we either have found this function already, or # add it. if (! $found{formatdate}) { if ($found{entrymonth}) { print OUT <<'EOD'; FUNCTION {format.date} { year empty$ { month empty$ { "" } { "there's a month but no year in " cite$ * warning$ month } if$ } { month empty$ 'year { month " " * year * } if$ } if$ } EOD } else { print OUT <<'EOD'; FUNCTION {format.date} { year empty$ 'skip$ { %write$ "(" year * ")" * } if$ } EOD } $found{formatdate} = 1; } # If the style file didn't supply a format.title function, then supply # one here (the {webpage} function requires it). if (! $found{formattitle}) { print OUT <<'EOD'; FUNCTION {format.title} { title empty$ { "" } { title "t" change.case$ } if$ } EOD $found{formattitle} = 1; } } # Utility function: Keep track of open and close braces in the string argument. # Keep state in $bracematchtotal, return the current value. sub bracematcher { my $s = shift; $s =~ s/[^\{\}]//g; #print "s=$s\n"; foreach my $c (split (//, $s)) { $bracematchtotal += ($c eq '{' ? 1 : -1); } return $bracematchtotal; } # Utility function: use bracematcher to copy the complete block which starts # on or after the current line. sub copy_block { $bracematchtotal = 0; # copy any leading lines which don't have braces (presumably comments) while (defined ($line = ) && ($line !~ /{/)) { print OUT $line; } while (defined ($line) && bracematcher($line) > 0) { print OUT $line; $line = ; } print OUT "$line\n"; # print out terminating \} (assumed # alone on the line) }