#!/yer/perl/here -T use strict; =pod =head1 NAME rss2blogger =head1 SUMMARY # Stick this in your crontab 0 0-23 1-31 1-12 * /yer/path/to/rss2blogger =head1 DESCRIPTION This is a simple program that fetches a list of rss feeds and posts them to a Blogger account. I wrote it as a debugging tool for Blogger.pm and figured it might be of interest to someone else. =cut use lib qw ( /yer/perl/lib ); # CORE modules use CGI qw (:html); # CPAN modules use Carp; use HTTP::Request; use LWP::UserAgent; use Storable; use XML::RSS; # OTHER modules use Blogger; # Things you want to define # use constant VERBOSE => 1; # The path to a file where Storable.pm # can save the $store_file hash. This # data is used to prevent duplicate # postings for an RSS feed. use constant STORE => undef; # In order to prevent the STORE file # from getting too big, we periodically # delete data whose created time is # older than MAX_DAYS use constant MAX_DAYS => undef; # Blogger.pm data # See module docs for details use constant APPKEY => ""; use constant USERNAME => ""; use constant PASSWORD => ""; use constant BLOGNAME => ""; # Put yer unquoted URLs here # URLs should be separated by a space. use constant FEEDS => qw []; # # Okay, stop defining use constant NOW_IN_SECONDS => time; use constant SECONDS_IN_DAY => 60 * 60 * 24; use constant MAX_PUBHACK_TRIES => 5; use constant PUBHACK_SLEEP_SECS => 15; my $store_hash = {}; my $publish = 0; my $pid = 0; { &main(); exit; } sub main { # Some basic sanity checking croak "No feeds. Nothing to do." if (! FEEDS); croak "No store file specified." if (! STORE); if (-f STORE) { $store_hash = retrieve(STORE) || croak "Unable to retrieve \$store_hash. $!"; } # Prune the store_hash my $prune_time = (NOW_IN_SECONDS - ((SECONDS_IN_DAY)*MAX_DAYS)); foreach my $f (keys %$store_hash) { foreach my $t (keys %{ $store_hash->{"$f"} }) { if ($store_hash->{"$f"}{"$t"}{"created"} < $prune_time) { delete $store_hash->{"$f"}{"$t"}; } } } my $b = Blogger->new(appkey=>APPKEY,username=>USERNAME,password=>PASSWORD) || croak "Can't create Blogger object. $Blogger::LAST_ERROR\n"; $b->BlogId($b->GetBlogId(blogname=>BLOGNAME)) || croak "No blog id ".$b->LastError(); my $ua = LWP::UserAgent->new() || croak "Failed to create LWP object. $!"; my $cgi = CGI->new() || croak "Failed to create CGI object. $!"; # We keep track how many feeds we're reading/have read # and do the same for RSS items below. This is done so # that we only issue a single "publish" command to the # Blogger server rather than generating/publishing files # for every single RSS item. That's just bad form. my $num_feeds = scalar FEEDS; my $cur_feed = 0; # Reverse the order to preserve the order. foreach my $url ( reverse FEEDS ) { $cur_feed++; my $req = HTTP::Request->new(GET=>$url) || &{ croak "Failed to create HTTP object. $!"; next; }; my $rss = XML::RSS->new() || &{ croak "Failed to create RSS object. $!"; next; }; my $rc = $ua->request($req); if ($rc->{'_rc'} ne "200") { carp "Failed to slurp $url."; next; } eval { $rss->parse($rc->{'_content'}); }; if ($@) { carp "There was an error parsing $url\n$@"; next; } if (! scalar @{ $rss->{'items'} }) { carp "$url returned no items."; next; } my $feed = $rss->{'channel'}{'title'}; if (! $feed) { carp "Unable to determine title for $url\n"; next; } # Reverse the order to preserve the order. @{$rss->{'items'}} = reverse @{$rss->{'items'}}; # See note above re num_feeds my $num_items = scalar @{$rss->{'items'}}; my $cur_item = 0; foreach my $item (@{$rss->{'items'}}) { $cur_item++; my $title = $item->{'title'}; my $link = $item->{'link'}; # We have already posted this item # Move along. These are not the posts # you're looking for. next if ($store_hash->{ "$feed" }{ "$title" }); my $ptitle = "$feed, $title"; my $post = ($link) ? $cgi->a({-href=>$link},$ptitle) : $ptitle; if (my $desc = $item->{'description'}) { $post .= $cgi->div({-class=>"description"},$desc); } # Is this the last item from the last feed? $publish = (($cur_feed == $num_feeds) && ($cur_item == $num_items)) ? 1 : 0; $pid = $b->newPost(postbody=>\$post,publish=>$publish) || &{ carp $b->LastError(); next; }; $store_hash->{ $feed }{ $title }{ "created" } = NOW_IN_SECONDS; print "($publish) $feed -- $title -- $pid\n" if (VERBOSE); } } # Since the last item of the last feed may have already # been blogged, the we may never get around to publishing # anything. So, we call the publish_hack_tool which is just # that. Please be sure to read the comments for the function. if (($pid) && (! $publish)) { warn "No publish flag set. Trying edit post hack." if (VERBOSE); &publish_hack_tool($b); } store $store_hash, STORE || croak "Failed to store \$store_hash. $!"; return 1; } sub publish_hack_tool { my $b = shift; # Check the caller stack to see how # many times the publish_tool func # has been called. # # See below for details on why we're doing # this. if (caller(MAX_PUBHACK_TRIES)) { warn "I tried to use the edit post hack (4) times without success. Giving up."; return 0; } # There is a known lag in the amount of time # required to wait between sending a post to # the Blogger db and actually performing another # action on it. # # So, we'll sit and cool our heels for a little # while before trying to edit the last post with # it's own content. Until such a time as the # Blogger API supports a generic "publishAll" method # there's not much else we can do. # # Unfortunately, I haven't been able to pin down # how long the Blogger db needs to get a grip, so # we're going to enter the land of : "If you change # stuff you're not supposed to and find yourself in # an infinite loop, you're on your own." # # We're going to sleep for a few seconds and then # check the Blogger server for the last post. If we # get it then we're golden. If not, then we're going # to call the current function again. Remember that # we check to see how many times the routine has been # called and simply bail if we exceed 4 tries. If the # Blogger server were hosed, for instance, and we had # no checks this program would never exit. And then in # an hour another copy would be started -- I could add # checks to prevent that -- and then bad things would # start happening. At the very least, your sysadmin # will probably yell at you. sleep(PUBHACK_SLEEP_SECS); my $post = $b->getPost($pid); if (! $post) { warn $b->LastError() if (VERBOSE); warn "Trying post hack again." if (VERBOSE); return &publish_hack_tool($b); } my $ok = $b->editPost(postbody=>\$post->{'content'},postid=>$post->{'postid'},publish=>1); # We could write even more code to keep # trying but...we won't if (! $ok) { warn "Unable to editPost $pid. Giving up. ".$b->LastError(); return 0; } warn "Publish successful." if (VERBOSE); return 1; } =pod =head1 VERSION 1.1 =head1 DATE September 07, 2001 =head1 CHANGES =head2 1.1 =over 4 =item Added I function. =item Increased use of named constants. =back =head2 1.0 =over 4 =item Initial setup. =back =head1 AUTHOR Aaron Straup Cope =head LICENSE Copyright 2001, Aaron Straup Cope. This is free software, you may use it and distribute it under the same terms as Perl itself. =cut