This version of the script uses cursor-based pagination instead of page-based pagination.
#!perl -w # # twfriend.pl - Categorize Twitter contacts according to whether they are # mutual friends, only friends, or only followers. # # Usage: twfriend.pl [-p proxyserver:proxyport] [-P proxyuser:proxypass] username:password # # proxyserver:proxyport - Optional. HTTP proxy server and port. # proxyuser:proxypass - Optional. HTTP proxy login. # username:password - Required. Twitter login. # # Login information can be specified in Base64 format. If it is, it will be # placed directly in the HTTP headers. use strict; use XML::Simple; use LWP::UserAgent; use Getopt::Std; use MIME::Base64; use Data::Dumper; # Parse XML and store friends in a hash. sub process_xml { my $xmlstr = shift; my $friends = shift; my $xml = XMLin($xmlstr, ForceArray => ['user'], KeyAttr => []); # print Dumper($xml); for my $user (@{$xml->{users}{user}}) { $friends->{$user->{id}} = $user->{screen_name}; } return $xml->{next_cursor}; } # Display list of friends from a hash. sub show_friends { my $friends = shift; my $friends_type = shift; print scalar(keys %$friends), " $friends_type:\n"; my $i = 0; for my $id (sort {$a <=> $b} keys %$friends) { print ++$i, ": $friends->{$id}\n"; } print"\n"; } # Take a list of friends and a list of followers. See which ones are mutual # friends, only friends, and only followers. sub process_friends { my $friends = shift; my $followers = shift; my %mutual; my %only_friends; my %only_followers; for my $id (keys %$friends, keys %$followers) { if (exists $friends->{$id}) { (exists $followers->{$id} ? $mutual{$id} : $only_friends{$id}) = $friends->{$id}; } elsif (exists $followers->{$id}) { $only_followers{$id} = $followers->{$id}; } } show_friends(\%mutual, "mutual friends"); show_friends(\%only_friends, "only friends"); show_friends(\%only_followers, "only followers"); } # Encode in Base64 if login string is in user:pass format. Otherwise, # assume it is already in Base64. sub base64login { my $login = shift; $login =~ /:/ ? encode_base64($login) : $login; } my $MAXRETRY = 5; # Download list of friends/followers through Twitter API. sub twitter_api { my $login = shift; my $proxy = shift; my $proxylogin = shift; my $what = shift; # "friends" or "followers" my $cursor = shift; my $retrycount = 0; while (1) { my $ua = new LWP::UserAgent; $proxy ne '' and $ua->proxy('http', "http://$proxy"); $proxylogin ne '' and $ua->default_header('Proxy-Authorization' => "Basic ".base64login($proxylogin)); $login ne '' and $ua->default_header('Authorization' => "Basic ".base64login($login)); my $response = $ua->get("http://api.twitter.com/1/statuses/$what.xml?cursor=$cursor"); $response->is_success and return $response->content; ++$retrycount; $retrycount > $MAXRETRY and die $response->as_string; warn "Retrying...\n"; sleep 3; } } # Get all pages of friends or followers and add them to a hash. sub get_pages { my $login = shift; my $proxy = shift; my $proxylogin = shift; my $what = shift; # "friends" or "followers" my $friends = shift; my $page = 0; my $cursor = -1; my $count; do { ++$page; warn "Getting $what page $page...\n"; my $xmlstr; eval { $xmlstr = twitter_api($login, $proxy, $proxylogin, $what, $cursor); }; return if $@; $cursor = process_xml($xmlstr, $friends); sleep 1; } until $cursor == 0; } sub usage { die "Usage: $0 [-p proxyhost:proxyport] [-P proxyuser:proxypass] user:password\n"; } sub main { binmode STDOUT, ":utf8"; my $proxy = ''; my $login = ''; my $proxylogin = ''; my %opts; getopts('p:P:', \%opts) or usage; defined $opts{p} and $proxy = $opts{p}; defined $opts{P} and $proxylogin = $opts{P}; @ARGV < 1 and usage; $login = shift @ARGV; my %friends; my %followers; get_pages($login, $proxy, $proxylogin, "friends", \%friends); get_pages($login, $proxy, $proxylogin, "followers", \%followers); process_friends(\%friends, \%followers); } main; __END__