From BernardWeiler@compuserve.de Mon Dec 27 02:34:49 2004 From: Bernard Weiler To: gutvol-p@lists.pglaf.org Subject: [gutvol-p] Converts catalog.rdf to OpenOffice Date: Mon, 27 Dec 2004 11:34:26 +0100 Message-ID: <41CFE532.1070207@compuserve.de> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============0072920255889586639==" --===============0072920255889586639== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable hi, In oder to easily read the booklist of gutenberg, I needed a table,=20 sortable by authors and language. So, catalog.rdf has to be converted to=20 OpenOffice-Spreadsheet. I don't know where to depose it, so please find this perl-program=20 converting to HTML-table and OpenOffice-Spreadsheet #!/usr/bin/perl # use XML::LibXML; #use Spreadsheet::WriteExcel; use OpenOffice::OOCBuilder; require Encode; # catalog.rdf is in utf-8 so stdout should be utf-8 too #binmode (STDOUT, ':utf8'); my $parser =3D XML::LibXML->new (); $parser->keep_blanks (0); =20 my $doc =3D $parser->parse_file ('catalog.rdf'); my %books; # parse XML into %books data structure # # parse book nodes my @booknodes =3D $doc->findnodes ('/rdf:RDF/pgterms:etext'); foreach my $booknode (@booknodes) { # this is a book description node my $etext_no =3D $booknode->getAttribute ('ID'); $etext_no =3D~ s/^etext//; my $o =3D {}; #print map(($_->nodeName."\n"),$booknode->childNodes); foreach $title ($booknode->findnodes ('dc:title//text()')) { push @{$o->{'titles'}}, Encode::encode_utf8($title->textContent); } foreach $creator ($booknode->findnodes ('dc:creator//text()')) { push @{$o->{'authors'}}, Encode::encode_utf8($creator->textContent); } foreach $creator ($booknode->findnodes ('dc:alternative//text()')) { push @{$o->{'titles'}}, Encode::encode_utf8("aka:=20 ".$creator->textContent); } foreach $title ($booknode->findnodes ('dc:subject//text()')) { push @{$o->{'titles'}}, Encode::encode_utf8("Subject:=20 ".$title->textContent); } foreach $creator ($booknode->findnodes ('dc:contributor//text()')) { push @{$o->{'authors'}}, Encode::encode_utf8("Contributor:=20 ".$creator->textContent); } foreach $creator ($booknode->findnodes ('dc:created//text()')) { push @{$o->{'created'}}, Encode::encode_utf8($creator->textContent); } foreach $creator ($booknode->findnodes ('dc:description//text()')) { push @{$o->{'titles'}}, Encode::encode_utf8("Description:=20 ".$creator->textContent); } foreach $creator ($booknode->findnodes ('dc:language//text()')) { push @{$o->{'lang'}}, Encode::encode_utf8($creator->textContent); } $books{$etext_no} =3D $o; } @booknodes =3D undef; # release some memory # parse file nodes my @filenodes =3D $doc->findnodes ('/rdf:RDF/pgterms:file'); foreach my $filenode (@filenodes) { foreach my $n ($filenode->findnodes ('dcterms:isFormatOf')) { # this is a file description node my $etext_no =3D $n->getAttribute ('resource'); $etext_no =3D~ s/^\#etext//; push @{$books{$etext_no}->{'files'}}, $filenode->getAttribute=20 ('about'); } } @filenodes =3D undef; # release some memory $doc =3D undef; # output HTML # open(FH,">catalog.html"); print FH "Gutenberg Index\n"; print FH=20 "= \n"; while (my ($etext_no, $o) =3D each (%books)) { print FH "\n"; print FH "\n"; } print FH "
AuthorTitlescandatelangNrLocationFetch?
".join("
",@{$o->{'authors'}}); print FH "
".join("
",@{$o->{'titles'}}); print FH "
".join("
",@{$o->{'created'}}); print FH "
".join("
",@{$o->{'lang'}}); print FH "
$etext_no\n"; print FH "".join("
",@{$o->{'files'}})."
"; print FH "
\n"; close(FH); #output OOo if(0){#no more excel my $OO=3DSpreadsheet::WriteExcel->new("catalog.xls"); my $OOo=3D$OO->add_worksheet(); $OO->add_format()->set_text_wrap(); my $ii=3D0; $OOo->write($ii,0,"Author"); $OOo->write($ii,1,"Title"); $OOo->write($ii,2,"Scandate"); $OOo->write($ii,3,"lang"); $OOo->write($ii,4,"Nr"); $OOo->write($ii,5,"Location"); $OOo->write($ii,6,"Fetch"); while (my ($etext_no, $o) =3D each (%books)) { $ii++;#exit if $ii>10000; $OOo->write($ii,0,join("\n",@{$o->{'authors'}})); $OOo->write($ii,1,join("\n",@{$o->{'titles'}})); $OOo->write($ii,2,join(" ",@{$o->{'created'}})); $OOo->write($ii,3,join(" ",@{$o->{'lang'}})); $OOo->write($ii,4,$etext_no); $OOo->write($ii,5,join(" ",@{$o->{'files'}})); } } my $OOo=3Dnew OpenOffice::OOCBuilder(); my $ii=3D1; $OOo->set_data_xy(1,$ii,"Author"); $OOo->set_data_xy(2,$ii,"Title"); $OOo->set_data_xy(3,$ii,"Scandate"); $OOo->set_data_xy(4,$ii,"lang"); $OOo->set_data_xy(5,$ii,"Nr"); $OOo->set_data_xy(6,$ii,"Location"); $OOo->set_data_xy(7,$ii,"Fetch"); while (my ($etext_no, $o) =3D each (%books)) { $ii++;#exit if $ii>10000; $OOo->set_data_xy(1,$ii,join("\n",@{$o->{'authors'}})); $OOo->set_data_xy(2,$ii,join("\n",@{$o->{'titles'}})); $OOo->set_data_xy(3,$ii,join(" ",@{$o->{'created'}})); $OOo->set_data_xy(4,$ii,join(" ",@{$o->{'lang'}})); $OOo->set_data_xy(5,$ii,$etext_no); $OOo->set_data_xy(6,$ii,join(" ",@{$o->{'files'}})); } $OOo->generate('catalog'); --=20 =20 Mit freundlichen Gr=C3=BCssen Bernard Weiler BernardWeiler(a)compuserve.de --===============0072920255889586639==--