#!/usr/bin/perl -w # Convert BibTeX files to dictionary format, using comments # Author : Lyndon Hill # Last Edit: 2008.02.28 # # (c) Lyndon Hill, 2008. # This script is free software under terms of the GNU General Public License version 3. # Please see http://www.gnu.org/licenses/ for more details. use strict; ## User definable variables ## # location of your bibtex files, make sure abbreviations are first my $bibtexdir = "/home/username/Documents/BibTex"; # location of your comments; plain text files named after each citation key my $bibsdir = "/home/username/Documents/Bibs"; # My comment files have the title and author on the first few lines. # I want to skip these lines for the dictionary file. $titlelength is # the number of lines to skip my $titlelength = 3; # the name of the dictionary file to create my $output = "bibs-src.bedic"; # your email address my $email = "My Name "; ## End of user definable variables. ## # Get individual BibTeX filenames my $source = `ls -1 $bibtexdir`; my @bibtexfiles = split /\n/, $source; # statistics my $entries = 0; my $bibs = 0; my $line = ""; my $abbr = ""; my $full = ""; my %abbrhash = (); open(OUTPUT, ">$output") || die "Can't open file $output for writing.\n"; my $filehandle = *OUTPUT; printheader($filehandle); foreach my $dic (@bibtexfiles) { # open file open(INPUT, "$bibtexdir/$dic") || die "Can't open bibtex file $bibtexdir/$dic.\n"; print "Opened $dic\n"; # parse file while() { $line = $_; chop $line; $abbr = ""; $full = ""; if(lc(substr($line, 0, 7)) eq ("@" . "string")) { ($abbr, $full) = split /=/, $line; substr($abbr, 0, 8) = ""; $abbr =~ s/\{//g; $abbr =~ s/\}//g; while(substr($abbr, length($abbr)-1, 1) eq " ") { chop $abbr; } # eat trailing spaces $full =~ s/\{//g; $full =~ s/\}//g; while(substr($full, 0, 1) eq " ") { substr($full, 0, 1) = ""; } # eat preceding spaces $abbrhash{$abbr} = $full; } elsif(substr($line, 0, 1) eq "@") { $entries++; my $type = ""; my $ref = ""; ($type, $ref) = split /\{/, $line; chop $ref; $type = lc(substr($type, 1, length($type)-1)); # print "Found reference: $ref type $type\n"; print OUTPUT "$ref\n"; print OUTPUT "{s}\n"; print OUTPUT " {ct}$type" . "{/ct}\n"; my $comment = ""; # read comments if( -e "$bibsdir/$ref") { $bibs++; my $linecounter = 0; my $linebreak = 0; # Last iteration inserted a linebreak for paragraphisation open(COMMENT, "$bibsdir/$ref"); while() { $line = $_; chop $line; $linecounter++; # remove special brackets and deal with html $line =~ s/\/\>/g; $line =~ s/\{/$/g; $line =~ s/\}/$/g; $line =~ s/\&/\&/g; # add lines to comment, if blank line add a line feed if($linecounter > $titlelength) { if($line eq "") { if($linebreak == 0) { $comment .= "{br/}\n{br/}\n"; } else { $comment .= "{br/}\n"; } $linebreak = 1; } elsif(length($line) < 40 && $line ne "") { $comment .= $line . "{br/}\n"; $linebreak = 1; } else { $comment .= $line . " "; $linebreak = 0; } } } close(COMMENT); } # parse the rest of the entry my %entryhash = (); my $author = ""; while() { $line = $_; chop $line; my $tag = ""; my $data = ""; ($tag, $data) = split /\=/, $line; if(!defined($tag)) { $tag = ""; } if(!defined($data)) { $data = ""; } # remove whitespace from tag $tag =~ s/\ //g; $tag =~ s/\t//g; $tag = lc($tag); # remove speech marks, curly braces and final comma from data $data =~ s/"//g; $data =~ s/\{//g; $data =~ s/\}//g; $data =~ s/\\//g; while(substr($data, length($data)-1, 1) eq " ") { chop $data; } # eat trailing spaces if(substr($data, length($data)-1, 1) eq ",") { chop $data; } # remove white space from certain data if($tag eq "year" || $tag eq "volume" || $tag eq "number" || $tag eq "pages") { $data =~ s/\ //g; $data =~ s/\t//g; } while(substr($data, 0, 1) eq " ") { substr($data, 0, 1) = ""; } # eat preceding spaces # Add tag to entry hash if($tag ne "") { $entryhash{$tag} = $data; } # end reading an entry if tag was empty if($tag eq "") { $line = ""; my $potentialauthors = $entryhash{"author"}; if(!defined($potentialauthors)) { $potentialauthors = ""; } my @authors = split /\ and\ /, $potentialauthors; my $numauthors = @authors; if($numauthors == 1) { $author = $entryhash{"author"}; } elsif($numauthors > 1) { if(substr($authors[0], 0, 1) eq " ") { substr($authors[0], 0, 1) = "" }; $author = $authors[0]; for($a = 1; $a < $numauthors - 1; $a++) { #substr($authors[$a], 0, 1) = ""; #chop $authors[$a]; $author .= ", " . $authors[$a]; } #substr($authors[$numauthors-1], 0, 1) = ""; $author .= " and " . $authors[$numauthors-1]; } # Output entry details print OUTPUT " {ss}"; if($author ne "") { print OUTPUT $author . ": "; } print OUTPUT $entryhash{"title"} . "."; # where published my $where = ""; if($type eq "inproceedings" && defined($entryhash{"booktitle"})) { $where = $entryhash{"booktitle"}; if(defined($abbrhash{$where})) { $where = $abbrhash{$where}; } print OUTPUT " " . $where . ""; } if($type eq "article" && defined($entryhash{"journal"})) { $where = $entryhash{"journal"}; if(defined($abbrhash{$where})) { $where = $abbrhash{$where}; } print OUTPUT " " . $where . ""; } if($type eq "book" && defined($entryhash{"publisher"})) { $where = $entryhash{"publisher"}; if(defined($abbrhash{$where})) { $where = $abbrhash{$where}; } print OUTPUT " " . $where . ""; } if($type eq "techreport" && defined($entryhash{"institution"})) { $where = $entryhash{"institution"}; if(defined($abbrhash{$where})) { $where = $abbrhash{$where}; } print OUTPUT " " . $where . ""; } if($type eq "article" && defined($entryhash{"journal"})) { print "Citation $ref does not have a journal for it's article!\n"; } if($type eq "inproceedings" && defined($entryhash{"booktitle"})) { print "Citation $ref does not have a booktitle for it's proceedings!\n"; } # which issue my $issue = ""; if(defined($entryhash{"volume"}) && defined($entryhash{"number"})) { $issue = $entryhash{"volume"} . "(" . $entryhash{"number"} . ")"; } elsif(defined($entryhash{"volume"})) { $issue = "volume " . $entryhash{"volume"}; } elsif(defined($entryhash{"number"})) { $issue = "number " . $entryhash{"number"}; } if($where ne "" && $issue ne "") { print OUTPUT ", $issue"; } elsif($issue ne "") { print OUTPUT " " . $issue; } # important details my $pages = ""; if(defined($entryhash{"pages"})) { $pages = $entryhash{"pages"}; $pages =~ s/--/-/; if($issue ne "" || $where ne "") { print OUTPUT ", pages " . $pages; } else { print OUTPUT "Pages " . $pages; } } if(defined($entryhash{"year"})) { my $year = $entryhash{"year"}; if(substr($year, length($year)-1, 1) eq ",") { chop $year; } if($issue ne "" || $pages ne "" || $where ne "") { print OUTPUT ", $year."; } else { print OUTPUT " $year."; } } else { print "Citation $ref has no year!\n"; } print OUTPUT "{/ss}\n{/s}\n"; if(defined($entryhash{"note"}) || defined($entryhash{"url"})) { print OUTPUT "{s}\n"; if(defined($entryhash{"note"})) { print OUTPUT " {ss}" . $entryhash{"note"} . "{/ss}\n"; } if(defined($entryhash{"url"})) { print OUTPUT " {ss}" . $entryhash{"url"} . "{/ss}\n"; } print OUTPUT "{/s}\n"; } if($comment ne "") { print OUTPUT "{s}\n {ss}" . $comment . "{/ss}\n{/s}\n"; } # entry separating linefeed print OUTPUT "\n"; last; } } } } close(INPUT); } close(OUTPUT); print "$entries entries, $bibs with comments.\n"; exit; # Output the header at the start of the dictionary file sub printheader { my $fh = shift; print $fh "id=BibTeX References\n"; print $fh "description=My private BibTeX and notes.\n"; print $fh "maintainer=$email\n"; print $fh "char-precedence={ -,!/.()?*+=^&:;_[]<>\{\}%\"\#\'~}{0123456789}{aA}{bB}{cC}{dD}{eE}{fF}{gG}{hH}{iI}{jJ}{kK}{lL}{mM}{nN}{oO}{pP}{qQ}{rR}{sS}{tT}{uU}{vV}{wW}{xX}{yY}{zZ}\n\n"; }