#!/yer/perl/here
use strict;
use CGI qw (:html);
use Carp;
use Storable;
use Net::Blogger;
use Net::Google;
# With any luck, I'll get around to putting this
# on the CPAN shortly :
# http://archive.develooper.com/modules@perl.org/msg11209.html
# For the time being, you can get it here:
# http://aaronland.net/src/perl/userland/weblog-update-0.3.1.tar.gz
use Userland::weblogUpdates;
# Things you want to define
#
use constant DEBUG => 0;
use constant VERBOSE => 0;
# The path to a file where Storable.pm
# can save the $store_file hash. This
# data is used to prevent duplicate
# postings for an RSS feed.
use constant STORE => "";
# In order to prevent the STORE file
# from getting too big, we periodically
# delete data whose created time is
# older than MAX_DAYS
use constant MAX_DAYS => 60;
# Net::Blogger data
# See module docs for details
use constant APPKEY => undef;
use constant USERNAME => undef;
use constant PASSWORD => undef;
use constant BLOGNAME => undef;
use constant BLOGURL => undef;
# NET::Google data
# See module docs for details
use constant GOOGLE_KEY => undef;
# Put Google queries here
use constant QUERIES => [];
#
# Okay, stop defining
use constant NOW_IN_SECONDS => time;
use constant SECONDS_IN_DAY => 60 * 60 * 24;
use constant MAX_PUBHACK_TRIES => 5;
use constant PUBHACK_SLEEP_SECS => 15;
my $store_hash = {};
my $publish = 0;
my $pid = 0;
my %debug = ();
{
&main();
exit;
}
sub main {
# Some basic sanity checking
croak "No queries. Nothing to do." if (! QUERIES);
if (-f STORE) { $store_hash = retrieve(STORE) || croak "Unable to retrieve \$store_hash. $!"; }
# Prune the store_hash
my $prune_time = (NOW_IN_SECONDS - ((SECONDS_IN_DAY)*MAX_DAYS));
foreach my $f (keys %$store_hash) {
foreach my $t (keys %{ $store_hash->{"$f"} }) {
if ($store_hash->{"$f"}{"$t"}{"created"} < $prune_time) {
print "Deleting from \$store_hash : $t\n" if (VERBOSE);
delete $store_hash->{"$f"}{"$t"};
}
}
}
my $blogger = Net::Blogger->new(appkey=>APPKEY,username=>USERNAME,password=>PASSWORD,debug=>DEBUG)
|| croak "Can't create Blogger object. $Blogger::LAST_ERROR\n";
$blogger->BlogId($b->GetBlogId(blogname=>BLOGNAME)) || croak "No blog id ".$blogger->LastError();
my $google = Net::Google->new(key=>GOOGLE_KEY,debug=>DEBUG)
|| croak "Can't create Google object\n";
my $search = $google->search()
|| croak "Can't create Google::Search object\n";
my $cgi = CGI->new()
|| croak "Failed to create CGI object. $!";
# We keep track how many feeds we're reading/have read
# and do the same for RSS items below. This is done so
# that we only issue a single "publish" command to the
# Blogger server rather than generating/publishing files
# for every single RSS item. That's just bad form.
my $num_queries = scalar QUERIES;
my $cur_query = 0;
# Reverse the order to preserve the order.
foreach my $query ( reverse QUERIES ) {
$cur_query++;
$search->query("",$query);
my $results = $search->results();
if (scalar @{$results} < 1) {
carp "No results for ".$search->query();
next;
}
# See note above re num_feeds
my $num_res = scalar @{$results};
my $cur_res = 0;
foreach my $item (@{$results}) {
$cur_res++;
my $title = $item->title();
my $link = $item->URL();
# We have already posted this item
# Move along. These are not the posts
# you're looking for.
next if ($store_hash->{ "$query" }{ "$title" });
my $ptitle = "$query, $title";
my $post = ($link) ? $cgi->a({-href=>$link},$ptitle) : $ptitle;
if (my $desc = $item->snippet()) {
$post .= $cgi->div({-class=>"description"},$desc,);
}
# Is this the last item from the last feed?
$publish = (($cur_query == $num_queries) && ($cur_res == $num_res)) ? 1 : 0;
$pid = $blogger->newPost(postbody=>\$post,publish=>$publish)
|| &{ carp $blogger->LastError(); next; };
$store_hash->{ $query }{ $title }{ "created" } = NOW_IN_SECONDS;
print "($publish) $query -- $title -- $pid\n" if (VERBOSE);
}
}
# Since the last item of the last feed may have already
# been blogged, the we may never get around to publishing
# anything. So, we call the publish_hack_tool which is just
# that. Please be sure to read the comments for the function.
if (($pid) && (! $publish)) {
warn "No publish flag set. Trying edit post hack." if (VERBOSE);
&publish_hack_tool($blogger);
}
if ($pid) {
my $u = Userland::weblogUpdates->new(transport=>"SOAP");
$u->ping(BLOGNAME,BLOGURL);
}
store $store_hash, STORE
|| croak "Failed to store \$store_hash. $!";
return 1;
}
sub publish_hack_tool {
my $blogger = shift;
# Check the caller stack to see how
# many times the publish_tool func
# has been called.
#
# See below for details on why we're doing
# this.
if (caller(MAX_PUBHACK_TRIES)) {
warn "I tried to use the edit post hack (4) times without success. Giving up.";
return 0;
}
# There is a known lag in the amount of time
# required to wait between sending a post to
# the Blogger db and actually performing another
# action on it.
#
# So, we'll sit and cool our heels for a little
# while before trying to edit the last post with
# it's own content. Until such a time as the
# Blogger API supports a generic "publishAll" method
# there's not much else we can do.
#
# Unfortunately, I haven't been able to pin down
# how long the Blogger db needs to get a grip, so
# we're going to enter the land of "If you change
# stuff you're not supposed to and find yourself in
# an infinite loop, you're on your own."
#
# We're going to sleep for a few seconds and then
# check the Blogger server for the last post. If we
# get it then we're golden. If not, then we're going
# to call the current function again. Remember that
# we check to see how many times the routine has been
# called and simply bail if we exceed 4 tries. If the
# Blogger server were hosed, for instance, and we had
# no checks this program would never exit. And then in
# an hour another copy would be started -- I could add
# checks to prevent that -- and then bad things would
# start happening. At the very least, your sysadmin
# will probably yell at you.
sleep(PUBHACK_SLEEP_SECS);
my $post = $blogger->getPost($pid);
if (! $post) {
warn $blogger->LastError() if (VERBOSE);
warn "Trying post hack again." if (VERBOSE);
return &publish_hack_tool($blogger);
}
my $ok = $blogger->editPost(postbody=>\$post->{'content'},postid=>$post->{'postid'},publish=>1);
# We could write even more code to keep
# trying but...we won't
if (! $ok) {
warn "Unable to editPost $pid. Giving up. ".$blogger->LastError();
$pid = undef;
return 0;
}
warn "Publish successful." if (VERBOSE);
return 1;
}
=head1 NAME
google2blogger - Poll Google for a list of query terms and post the results to a Blogger API enabled server.
=head1 SYNOPSIS
# Stick this in your crontab
0 0-23 1-31 1-12 * /yer/path/to/google2blogger
=head1 DESCRIPTION
This program will poll Google for a list of query terms and post the results to a Blogger API enabled server.
=head1 VERSION
1.0
=head1 DATE
April 17, 2002
=head1 CHANGES
=head2 1.0
=over 4
=item *
Initial revision.
=back
=head1 AUTHOR
Aaron Straup Cope
=head1 LICENSE
Copyright (c) 2002, Aaron Straup Cope.
This is free software, you may use it and distribute it under the same terms as Perl itself.
=cut