[gutvol-p] Converts catalog.rdf to OpenOffice

Bernard Weiler BernardWeiler at compuserve.de
Mon Dec 27 02:34:26 PST 2004


hi,

In oder to easily read the booklist of gutenberg, I needed a table, 
sortable by authors and language. So, catalog.rdf has to be converted to 
OpenOffice-Spreadsheet.
I don't know where to depose it, so please find this perl-program 
converting to HTML-table and OpenOffice-Spreadsheet

#!/usr/bin/perl
#

use XML::LibXML;
#use Spreadsheet::WriteExcel;
use OpenOffice::OOCBuilder;
require Encode;

# catalog.rdf is in utf-8 so stdout should be utf-8 too
#binmode (STDOUT, ':utf8');

my $parser = XML::LibXML->new ();
$parser->keep_blanks (0);
 
my $doc = $parser->parse_file ('catalog.rdf');

my %books;

# parse XML into %books data structure
#
# parse book nodes

my @booknodes = $doc->findnodes ('/rdf:RDF/pgterms:etext');

foreach my $booknode (@booknodes) {
    # this is a book description node
    my $etext_no = $booknode->getAttribute ('ID');
    $etext_no =~ s/^etext//;
    my $o = {};
  #print map(($_->nodeName."\n"),$booknode->childNodes);
    foreach $title ($booknode->findnodes ('dc:title//text()')) {
        push @{$o->{'titles'}}, Encode::encode_utf8($title->textContent);
    }
    foreach $creator ($booknode->findnodes ('dc:creator//text()')) {
        push @{$o->{'authors'}}, Encode::encode_utf8($creator->textContent);
    }
    foreach $creator ($booknode->findnodes ('dc:alternative//text()')) {
        push @{$o->{'titles'}}, Encode::encode_utf8("aka: 
".$creator->textContent);
    }
    foreach $title ($booknode->findnodes ('dc:subject//text()')) {
        push @{$o->{'titles'}}, Encode::encode_utf8("Subject: 
".$title->textContent);
    }
    foreach $creator ($booknode->findnodes ('dc:contributor//text()')) {
        push @{$o->{'authors'}}, Encode::encode_utf8("Contributor: 
".$creator->textContent);
    }
    foreach $creator ($booknode->findnodes ('dc:created//text()')) {
        push @{$o->{'created'}}, Encode::encode_utf8($creator->textContent);
    }
    foreach $creator ($booknode->findnodes ('dc:description//text()')) {
        push @{$o->{'titles'}}, Encode::encode_utf8("Description: 
".$creator->textContent);
    }
    foreach $creator ($booknode->findnodes ('dc:language//text()')) {
        push @{$o->{'lang'}}, Encode::encode_utf8($creator->textContent);
    }
    $books{$etext_no} = $o;
}
@booknodes = undef; # release some memory

# parse file nodes

my @filenodes = $doc->findnodes ('/rdf:RDF/pgterms:file');

foreach my $filenode (@filenodes) {
    foreach my $n ($filenode->findnodes ('dcterms:isFormatOf')) {
        # this is a file description node
        my $etext_no = $n->getAttribute ('resource');
        $etext_no =~ s/^\#etext//;
        push @{$books{$etext_no}->{'files'}}, $filenode->getAttribute 
('about');
    }
}
@filenodes = undef; # release some memory
$doc = undef;


# output HTML
#
open(FH,">catalog.html");
print FH "<html><title>Gutenberg Index</title><body><table border=1>\n";
print FH 
"<tr><td>Author<td>Title<td>scandate<td>lang<td>Nr<td>Location<td>Fetch?</tr>\n";
while (my ($etext_no, $o) = each (%books)) {
    print FH "<tr>\n";
    print FH "<td>".join("<br>",@{$o->{'authors'}});
    print FH "<td>".join("<br>",@{$o->{'titles'}});
    print FH "<td>".join("<br>",@{$o->{'created'}});
    print FH "<td>".join("<br>",@{$o->{'lang'}});
    print FH "<td>$etext_no\n";
    print FH "<td><font size=-2>".join("<br>",@{$o->{'files'}})."</font>";
    print FH "<td></tr>\n";
}
print FH "</table></html>\n";
close(FH);


#output OOo

if(0){#no more excel
my $OO=Spreadsheet::WriteExcel->new("catalog.xls");
my $OOo=$OO->add_worksheet();
$OO->add_format()->set_text_wrap();
my $ii=0;
$OOo->write($ii,0,"Author");
$OOo->write($ii,1,"Title");
$OOo->write($ii,2,"Scandate");
$OOo->write($ii,3,"lang");
$OOo->write($ii,4,"Nr");
$OOo->write($ii,5,"Location");
$OOo->write($ii,6,"Fetch");
while (my ($etext_no, $o) = each (%books)) {
    $ii++;#exit if $ii>10000;
    $OOo->write($ii,0,join("\n",@{$o->{'authors'}}));
    $OOo->write($ii,1,join("\n",@{$o->{'titles'}}));
    $OOo->write($ii,2,join(" ",@{$o->{'created'}}));
    $OOo->write($ii,3,join(" ",@{$o->{'lang'}}));
    $OOo->write($ii,4,$etext_no);
    $OOo->write($ii,5,join(" ",@{$o->{'files'}}));
}
}

my $OOo=new OpenOffice::OOCBuilder();
my $ii=1;
$OOo->set_data_xy(1,$ii,"Author");
$OOo->set_data_xy(2,$ii,"Title");
$OOo->set_data_xy(3,$ii,"Scandate");
$OOo->set_data_xy(4,$ii,"lang");
$OOo->set_data_xy(5,$ii,"Nr");
$OOo->set_data_xy(6,$ii,"Location");
$OOo->set_data_xy(7,$ii,"Fetch");
while (my ($etext_no, $o) = each (%books)) {
    $ii++;#exit if $ii>10000;
    $OOo->set_data_xy(1,$ii,join("\n",@{$o->{'authors'}}));
    $OOo->set_data_xy(2,$ii,join("\n",@{$o->{'titles'}}));
    $OOo->set_data_xy(3,$ii,join(" ",@{$o->{'created'}}));
    $OOo->set_data_xy(4,$ii,join(" ",@{$o->{'lang'}}));
    $OOo->set_data_xy(5,$ii,$etext_no);
    $OOo->set_data_xy(6,$ii,join(" ",@{$o->{'files'}}));
}
$OOo->generate('catalog');






-- 
 
Mit freundlichen GrĂ¼ssen

Bernard Weiler
BernardWeiler at compuserve.de





More information about the gutvol-p mailing list