#!/yer/perl/here use strict; use CGI qw (:html); use Carp; use Storable; use Net::Blogger; use Net::Google; # With any luck, I'll get around to putting this # on the CPAN shortly : # http://archive.develooper.com/modules@perl.org/msg11209.html # For the time being, you can get it here: # http://aaronland.net/src/perl/userland/weblog-update-0.3.1.tar.gz use Userland::weblogUpdates; # Things you want to define # use constant DEBUG => 0; use constant VERBOSE => 0; # The path to a file where Storable.pm # can save the $store_file hash. This # data is used to prevent duplicate # postings for an RSS feed. use constant STORE => ""; # In order to prevent the STORE file # from getting too big, we periodically # delete data whose created time is # older than MAX_DAYS use constant MAX_DAYS => 60; # Net::Blogger data # See module docs for details use constant APPKEY => undef; use constant USERNAME => undef; use constant PASSWORD => undef; use constant BLOGNAME => undef; use constant BLOGURL => undef; # NET::Google data # See module docs for details use constant GOOGLE_KEY => undef; # Put Google queries here use constant QUERIES => []; # # Okay, stop defining use constant NOW_IN_SECONDS => time; use constant SECONDS_IN_DAY => 60 * 60 * 24; use constant MAX_PUBHACK_TRIES => 5; use constant PUBHACK_SLEEP_SECS => 15; my $store_hash = {}; my $publish = 0; my $pid = 0; my %debug = (); { &main(); exit; } sub main { # Some basic sanity checking croak "No queries. Nothing to do." if (! QUERIES); if (-f STORE) { $store_hash = retrieve(STORE) || croak "Unable to retrieve \$store_hash. $!"; } # Prune the store_hash my $prune_time = (NOW_IN_SECONDS - ((SECONDS_IN_DAY)*MAX_DAYS)); foreach my $f (keys %$store_hash) { foreach my $t (keys %{ $store_hash->{"$f"} }) { if ($store_hash->{"$f"}{"$t"}{"created"} < $prune_time) { print "Deleting from \$store_hash : $t\n" if (VERBOSE); delete $store_hash->{"$f"}{"$t"}; } } } my $blogger = Net::Blogger->new(appkey=>APPKEY,username=>USERNAME,password=>PASSWORD,debug=>DEBUG) || croak "Can't create Blogger object. $Blogger::LAST_ERROR\n"; $blogger->BlogId($b->GetBlogId(blogname=>BLOGNAME)) || croak "No blog id ".$blogger->LastError(); my $google = Net::Google->new(key=>GOOGLE_KEY,debug=>DEBUG) || croak "Can't create Google object\n"; my $search = $google->search() || croak "Can't create Google::Search object\n"; my $cgi = CGI->new() || croak "Failed to create CGI object. $!"; # We keep track how many feeds we're reading/have read # and do the same for RSS items below. This is done so # that we only issue a single "publish" command to the # Blogger server rather than generating/publishing files # for every single RSS item. That's just bad form. my $num_queries = scalar QUERIES; my $cur_query = 0; # Reverse the order to preserve the order. foreach my $query ( reverse QUERIES ) { $cur_query++; $search->query("",$query); my $results = $search->results(); if (scalar @{$results} < 1) { carp "No results for ".$search->query(); next; } # See note above re num_feeds my $num_res = scalar @{$results}; my $cur_res = 0; foreach my $item (@{$results}) { $cur_res++; my $title = $item->title(); my $link = $item->URL(); # We have already posted this item # Move along. These are not the posts # you're looking for. next if ($store_hash->{ "$query" }{ "$title" }); my $ptitle = "$query, $title"; my $post = ($link) ? $cgi->a({-href=>$link},$ptitle) : $ptitle; if (my $desc = $item->snippet()) { $post .= $cgi->div({-class=>"description"},$desc,); } # Is this the last item from the last feed? $publish = (($cur_query == $num_queries) && ($cur_res == $num_res)) ? 1 : 0; $pid = $blogger->newPost(postbody=>\$post,publish=>$publish) || &{ carp $blogger->LastError(); next; }; $store_hash->{ $query }{ $title }{ "created" } = NOW_IN_SECONDS; print "($publish) $query -- $title -- $pid\n" if (VERBOSE); } } # Since the last item of the last feed may have already # been blogged, the we may never get around to publishing # anything. So, we call the publish_hack_tool which is just # that. Please be sure to read the comments for the function. if (($pid) && (! $publish)) { warn "No publish flag set. Trying edit post hack." if (VERBOSE); &publish_hack_tool($blogger); } if ($pid) { my $u = Userland::weblogUpdates->new(transport=>"SOAP"); $u->ping(BLOGNAME,BLOGURL); } store $store_hash, STORE || croak "Failed to store \$store_hash. $!"; return 1; } sub publish_hack_tool { my $blogger = shift; # Check the caller stack to see how # many times the publish_tool func # has been called. # # See below for details on why we're doing # this. if (caller(MAX_PUBHACK_TRIES)) { warn "I tried to use the edit post hack (4) times without success. Giving up."; return 0; } # There is a known lag in the amount of time # required to wait between sending a post to # the Blogger db and actually performing another # action on it. # # So, we'll sit and cool our heels for a little # while before trying to edit the last post with # it's own content. Until such a time as the # Blogger API supports a generic "publishAll" method # there's not much else we can do. # # Unfortunately, I haven't been able to pin down # how long the Blogger db needs to get a grip, so # we're going to enter the land of "If you change # stuff you're not supposed to and find yourself in # an infinite loop, you're on your own." # # We're going to sleep for a few seconds and then # check the Blogger server for the last post. If we # get it then we're golden. If not, then we're going # to call the current function again. Remember that # we check to see how many times the routine has been # called and simply bail if we exceed 4 tries. If the # Blogger server were hosed, for instance, and we had # no checks this program would never exit. And then in # an hour another copy would be started -- I could add # checks to prevent that -- and then bad things would # start happening. At the very least, your sysadmin # will probably yell at you. sleep(PUBHACK_SLEEP_SECS); my $post = $blogger->getPost($pid); if (! $post) { warn $blogger->LastError() if (VERBOSE); warn "Trying post hack again." if (VERBOSE); return &publish_hack_tool($blogger); } my $ok = $blogger->editPost(postbody=>\$post->{'content'},postid=>$post->{'postid'},publish=>1); # We could write even more code to keep # trying but...we won't if (! $ok) { warn "Unable to editPost $pid. Giving up. ".$blogger->LastError(); $pid = undef; return 0; } warn "Publish successful." if (VERBOSE); return 1; } =head1 NAME google2blogger - Poll Google for a list of query terms and post the results to a Blogger API enabled server. =head1 SYNOPSIS # Stick this in your crontab 0 0-23 1-31 1-12 * /yer/path/to/google2blogger =head1 DESCRIPTION This program will poll Google for a list of query terms and post the results to a Blogger API enabled server. =head1 VERSION 1.0 =head1 DATE April 17, 2002 =head1 CHANGES =head2 1.0 =over 4 =item * Initial revision. =back =head1 AUTHOR Aaron Straup Cope =head1 LICENSE Copyright (c) 2002, Aaron Straup Cope. This is free software, you may use it and distribute it under the same terms as Perl itself. =cut