From BernardWeiler at compuserve.de Mon Dec 27 02:34:26 2004 From: BernardWeiler at compuserve.de (Bernard Weiler) Date: Mon, 27 Dec 2004 11:34:26 +0100 Subject: [gutvol-p] Converts catalog.rdf to OpenOffice Message-ID: <41CFE532.1070207@compuserve.de> hi, In oder to easily read the booklist of gutenberg, I needed a table, sortable by authors and language. So, catalog.rdf has to be converted to OpenOffice-Spreadsheet. I don't know where to depose it, so please find this perl-program converting to HTML-table and OpenOffice-Spreadsheet #!/usr/bin/perl # use XML::LibXML; #use Spreadsheet::WriteExcel; use OpenOffice::OOCBuilder; require Encode; # catalog.rdf is in utf-8 so stdout should be utf-8 too #binmode (STDOUT, ':utf8'); my $parser = XML::LibXML->new (); $parser->keep_blanks (0); my $doc = $parser->parse_file ('catalog.rdf'); my %books; # parse XML into %books data structure # # parse book nodes my @booknodes = $doc->findnodes ('/rdf:RDF/pgterms:etext'); foreach my $booknode (@booknodes) { # this is a book description node my $etext_no = $booknode->getAttribute ('ID'); $etext_no =~ s/^etext//; my $o = {}; #print map(($_->nodeName."\n"),$booknode->childNodes); foreach $title ($booknode->findnodes ('dc:title//text()')) { push @{$o->{'titles'}}, Encode::encode_utf8($title->textContent); } foreach $creator ($booknode->findnodes ('dc:creator//text()')) { push @{$o->{'authors'}}, Encode::encode_utf8($creator->textContent); } foreach $creator ($booknode->findnodes ('dc:alternative//text()')) { push @{$o->{'titles'}}, Encode::encode_utf8("aka: ".$creator->textContent); } foreach $title ($booknode->findnodes ('dc:subject//text()')) { push @{$o->{'titles'}}, Encode::encode_utf8("Subject: ".$title->textContent); } foreach $creator ($booknode->findnodes ('dc:contributor//text()')) { push @{$o->{'authors'}}, Encode::encode_utf8("Contributor: ".$creator->textContent); } foreach $creator ($booknode->findnodes ('dc:created//text()')) { push @{$o->{'created'}}, Encode::encode_utf8($creator->textContent); } foreach $creator ($booknode->findnodes ('dc:description//text()')) { push @{$o->{'titles'}}, Encode::encode_utf8("Description: ".$creator->textContent); } foreach $creator ($booknode->findnodes ('dc:language//text()')) { push @{$o->{'lang'}}, Encode::encode_utf8($creator->textContent); } $books{$etext_no} = $o; } @booknodes = undef; # release some memory # parse file nodes my @filenodes = $doc->findnodes ('/rdf:RDF/pgterms:file'); foreach my $filenode (@filenodes) { foreach my $n ($filenode->findnodes ('dcterms:isFormatOf')) { # this is a file description node my $etext_no = $n->getAttribute ('resource'); $etext_no =~ s/^\#etext//; push @{$books{$etext_no}->{'files'}}, $filenode->getAttribute ('about'); } } @filenodes = undef; # release some memory $doc = undef; # output HTML # open(FH,">catalog.html"); print FH "
| Author | Title | scandate | lang | Nr | Location | Fetch? |
| ".join(" ",@{$o->{'authors'}}); print FH " | ".join(" ",@{$o->{'titles'}}); print FH " | ".join(" ",@{$o->{'created'}}); print FH " | ".join(" ",@{$o->{'lang'}}); print FH " | $etext_no\n"; print FH " | ".join(" ",@{$o->{'files'}}).""; print FH " |