#!/usr/bin/perl
# get-twitter-rss - get a Twitter user timeline feed via RSS
# formats the output with the Perl Template Toolkit
#
# Copyright 2009-2010 by Ian Kluft
# Redistribution permitted by the author under the conditions of the
# GNU General Public License Version 3.
# http://ian.kluft.com/opensource/GPLv3.txt
#
# Usage: usage: get-twitter-rss --user=id,id,... --template=path [--outfile=path] [--include=path]
# where:
# user is a comma-delimited list of Twitter user ID/numbers
# template is a template which will be filled in w/ Perl Template Toolkit
# see Template::Manual, Template::Tutorial and sample template below
# outfile is a path to an output file
# include is an optional include path, if used then template may be a
# file in this path
#
# Sample output templates - put one of these in a file and use it for template
# parameter
#
# Simple sample template: single user's feed
# ------------------------------------------------------------------------
#
#
# Follow me on twitter
#
# Twitter updates as of [% update_timestamp %]
# |
#
# [% FOREACH item = items %][% LAST IF loop.count >= 15 %]
#
#
# [% item.tweet_html %]
#
# [% item.tweet_date %]
# |
#
# [% END %]
#
# ------------------------------------------------------------------------
#
# Advanced sample template: combining multiple users' feeds
# ------------------------------------------------------------------------
#
#
# ------------------------------------------------------------------------
use strict;
use Carp;
use Scalar::Util qw( blessed );
use Getopt::Long;
use Time::ParseDate;
use Time::Timezone;
use Date::Calc qw( Date_to_Text );
use XML::RSS;
use LWP::UserAgent;
use Template;
# define exceptions/errors
use Exception::Class (
'ComplainNowException',
'SilentlyEscapeException',
'UsageException' => {
isa => 'ComplainNowException',
description => "usage: get-twitter-rss --user=id,id,... --template=path [--outfile=path] [--include=path]",
trace => 0,
},
'RssNetworkException' => {
isa => 'SilentlyEscapeException',
description => "Failed to access RSS feed",
trace => 0,
},
'TemplateException' => {
isa => 'ComplainNowException',
description => "template processing error",
trace => 1,
}
);
# globals
my $debug = 0;
my $VERSION = "0.6";
my $sw_name = "get-twitter-rss";
my $twit_url = "http://twitter.com/statuses/user_timeline/%s.rss?count=25";
# get command-line parameters
sub get_cmdline
{
my ( $user, $template, $outfile, $include );
GetOptions (
"user|username|usernum=s" => \$user,
"template=s" => \$template,
"outfile:s" => \$outfile,
"include:s" => \$include,
) or UsageException->throw( "command-line options parsing error" );
my $req;
if ( !defined $user ) {
UsageException->throw( "missing user parameter" );
}
if ( !defined $template ) {
UsageException->throw( "missing template parameter" );
}
my %config;
$config{user} = [ split ( /,/, $user ) ];
$config{template} = $template;
( defined $outfile ) and $config{outfile} = $outfile;
( defined $include ) and $config{include} = $include;
return \%config;
}
# get RSS feed and return it as text
sub get_feed
{
my $config = shift;
my $ua = LWP::UserAgent->new;
$ua->agent("$sw_name/$VERSION ");
my @response;
foreach my $user ( @{$config->{user}}) {
my $response = $ua->get(sprintf($twit_url, $user));
if ($response->is_success) {
push @response, $response->decoded_content;
} else {
RssNetworkException->throw( $response->status_line );
}
}
return @response;
}
# extract a string value from a scalar/ref if possible
sub extract_value
{
my $thing = shift;
( defined $thing ) or return undef;
if ( ref $thing ) {
if ( !blessed $thing ) {
# it's a HASH/ARRAY/etc, not an object
return undef;
}
if ( $thing->can( "as_string" )) {
return $thing->as_string;
}
return undef;
} else {
$thing =~ s/\s+$//s;
length $thing > 0 or return undef;
return $thing;
}
}
# format a date as text
sub format_date
{
my $time = shift; # time in sec since Unix epoch (1/1/1970 0000 GMT)
my ( $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst ) =
localtime( $time );
$mon++; $year += 1900;
return Date_to_Text( $year, $mon, $mday )
." "
.sprintf( "%02d:%02d:%02d", $hour, $min, $sec )
." "
.tz2zone(undef,undef,$isdst);
}
# filter out news items
sub filter_items
{
my $f_item = shift;
if ( $f_item->{title} =~ /^[a-z0-9_]+: \@[a-z0-9_]/i ) {
# reject messages to other users - not for public display
return 0;
}
# add processed HTML content field
my $desc = $f_item->{description};
$desc =~ s/^([a-z0-9_]+):\s+//i; # get rid of poster's username
$f_item->{username} = $1;
my $desc2 = "";
my $pos = 0;
while (( substr $desc, $pos )
=~ /([\@\#][a-z0-9_]+|http:\/\/[^\s]+)/i )
{
my $str = $1;
my ( $mode, $url, $ch, $name );
if ( $str =~ /^http:/ ) {
$mode = 1;
$url = $str;
} else {
$ch = substr $str, 0, 1;
$name = substr $str, 1;
$mode = ( $ch eq "#" ) ? 2 : ( $ch eq "\@" ) ? 3 : 4;
}
my $ind = index $desc, $str, $pos;
( $ind == -1 ) and die "substr not found";
$desc2 .= substr $desc, $pos, $ind - $pos;
if ( $mode == 1 ) {
$desc2 .= "$url";
} elsif ( $mode == 2 ) {
$desc2 .= "#$name";
} elsif ( $mode == 3 ) {
$desc2 .= "\@$name";
} else {
die "should not have matched character '$ch' in: $desc\n";
}
$pos += ( $ind - $pos ) + length( $str );
}
$desc2 .= substr $desc, $pos;
#$desc =~ s/([\@\#])([a-z0-9_]+)/\1\2<\/a>/ig;
#$desc =~ s/\#([a-z0-9_]+)/\#\1<\/a>/ig;
$f_item->{tweet_html} = $desc2;
# add processed date field using local time
my $pub_epoch = parsedate( $f_item->{pubDate}, "GMT" );
$f_item->{pub_epoch} = $pub_epoch;
$f_item->{tweet_date} = format_date( $pub_epoch );
# no reasons for rejection found
return 1;
}
# parse RSS feed into hash structure
sub parse_feed
{
my ( %feed, @items );
foreach my $text ( @_ ) {
my $rss = new XML::RSS;
$rss->parse($text);
# parse values from top of structure
my ( $field, $item, @buckets );
foreach $field ( keys %$rss ) {
if ( ref $rss->{$field} eq "HASH" ) {
push @buckets, $field;
}
my $value = extract_value( $rss->{$field});
( defined $value ) or next;
$feed{$field} = $value;
}
# parse hashes, i.e. channel parameters, XML/RSS modeules, etc
my $bucket;
foreach $bucket ( @buckets ) {
( defined $rss->{$bucket}) or next;
$feed{$bucket} = {};
foreach $field ( keys %{$rss->{$bucket}} ) {
my $value = extract_value( $rss->{$bucket}{$field});
( defined $value ) or next;
$feed{$bucket}{$field} = $value;
}
}
# parse each item from the news feed
foreach $item ( @{$rss->{items}}) {
my $f_item = {};
foreach $field ( keys %$item ) {
my $value = extract_value( $item->{$field});
( defined $value ) or next;
$f_item->{$field} = $value;
}
filter_items( $f_item ) or next;
push @items, $f_item;
}
}
# reverse sort items by time (in case this is multiple merged feeds)
$feed{items} = [ sort { $b->{pub_epoch} <=> $a->{pub_epoch} } @items ];
# add a timestamp that the template can use
$feed{update_timestamp} = format_date(time);
return \%feed;
}
# format output of feed
sub format_feed
{
my $config = shift;
my $feed = shift;
# configure and create template object
my %tt_config = (
ABSOLUTE => 1,
RELATIVE => 1,
);;
if ( defined $config->{include}) {
$tt_config{INCLUDE_PATH} = $config->{include}
}
my $template = Template->new( \%tt_config );
# process template
my $result;
if ( defined $config->{outfile}) {
$result = $template->process($config->{template}, $feed,
$config->{outfile}, { binmode => ':utf8'});
} else {
$result = $template->process($config->{template}, $feed );
}
$result or TemplateException->throw ( $template->error());
}
# run main processing
sub main
{
my $config = get_cmdline;
my @text = get_feed( $config );
my $feed = parse_feed( @text );
format_feed( $config, $feed );
}
# try processing and catch exceptions
eval { &main; };
my $e;
if ( $e = Exception::Class->caught( 'ComplainNowException' )) {
my %fields = $e->Fields;
warn $e->error, "\n",
$e->description, "\n",
( $fields{trace} ? ( $e->trace->as_string, "\n" ) : ());
exit 1;
} elsif ( $e = Exception::Class->caught( 'SilentlyEscapeException' )) {
if ( $debug ) {
my %fields = $e->Fields;
warn $e->error, "\n",
$e->description, "\n",
( $fields{trace} ? ( $e->trace->as_string, "\n" ) : ());
}
exit 0;
} else {
$e = Exception::Class->caught();
if ( ! $e ) {
exit 0;
}
if ( ref $e ) {
$e->rethrow
} else {
die $e;
}
}
# should not get here
confess "fell through - should not get here\n";