randomfox (randomfox) wrote,
randomfox
randomfox

Twitter contact categorizer (with cursor-based pagination)

This Perl script works like Twitter Karma in that it sorts your Twitter contacts into groups according to whether they are mutual friends, friends only, or followers only.

This version of the script uses cursor-based pagination instead of page-based pagination.


#!perl -w
#
# twfriend.pl - Categorize Twitter contacts according to whether they are
# mutual friends, only friends, or only followers.
#
# Usage: twfriend.pl [-p proxyserver:proxyport] [-P proxyuser:proxypass] username:password
#
# proxyserver:proxyport - Optional. HTTP proxy server and port.
# proxyuser:proxypass   - Optional. HTTP proxy login.
# username:password     - Required. Twitter login. 
#
# Login information can be specified in Base64 format. If it is, it will be
# placed directly in the HTTP headers.

use strict;
use XML::Simple;
use LWP::UserAgent; 
use Getopt::Std;
use MIME::Base64;
use Data::Dumper;

# Parse XML and store friends in a hash.
sub process_xml {
    my $xmlstr = shift;
    my $friends = shift;

    my $xml = XMLin($xmlstr, ForceArray => ['user'], KeyAttr => []);

#     print Dumper($xml);

    for my $user (@{$xml->{users}{user}}) {
	$friends->{$user->{id}} = $user->{screen_name};
    }

    return $xml->{next_cursor};
}

# Display list of friends from a hash.
sub show_friends {
    my $friends = shift;
    my $friends_type = shift;

    print scalar(keys %$friends), " $friends_type:\n";
    my $i = 0;
    for my $id (sort {$a <=> $b} keys %$friends) {
	print ++$i, ": $friends->{$id}\n";
    }
    print"\n";
}

# Take a list of friends and a list of followers. See which ones are mutual
# friends, only friends, and only followers.
sub process_friends {
    my $friends = shift;
    my $followers = shift;

    my %mutual;
    my %only_friends;
    my %only_followers;

    for my $id (keys %$friends, keys %$followers) {
	if (exists $friends->{$id}) {
	    (exists $followers->{$id} ? $mutual{$id} : $only_friends{$id}) =
		$friends->{$id};
	}
	elsif (exists $followers->{$id}) {
	    $only_followers{$id} = $followers->{$id};
	}
    }

    show_friends(\%mutual, "mutual friends");
    show_friends(\%only_friends, "only friends");
    show_friends(\%only_followers, "only followers");
}

# Encode in Base64 if login string is in user:pass format. Otherwise,
# assume it is already in Base64.
sub base64login {
    my $login = shift;
    $login =~ /:/ ? encode_base64($login) : $login;
}

my $MAXRETRY = 5;

# Download list of friends/followers through Twitter API.
sub twitter_api {
    my $login = shift;
    my $proxy = shift;
    my $proxylogin = shift;
    my $what = shift; # "friends" or "followers"
    my $cursor = shift;

    my $retrycount = 0;
    while (1) {
	my $ua = new LWP::UserAgent;
	$proxy ne '' and $ua->proxy('http', "http://$proxy");

	$proxylogin ne '' and
	    $ua->default_header('Proxy-Authorization' => 
		"Basic ".base64login($proxylogin));
	$login ne '' and
	    $ua->default_header('Authorization' =>
		"Basic ".base64login($login));

	my $response = $ua->get("http://api.twitter.com/1/statuses/$what.xml?cursor=$cursor");
	$response->is_success and return $response->content;

	++$retrycount;
	$retrycount > $MAXRETRY and die $response->as_string;

	warn "Retrying...\n";
	sleep 3;
    }
}

# Get all pages of friends or followers and add them to a hash.
sub get_pages {
    my $login = shift;
    my $proxy = shift;
    my $proxylogin = shift;
    my $what = shift; # "friends" or "followers"
    my $friends = shift;

    my $page = 0;
    my $cursor = -1;
    my $count;
    do {
	++$page;
	warn "Getting $what page $page...\n";
	my $xmlstr;
	eval {
	    $xmlstr = twitter_api($login, $proxy, $proxylogin, $what, $cursor);
	};
	return if $@;
	$cursor = process_xml($xmlstr, $friends);
	sleep 1;
    } until $cursor == 0;
}

sub usage {
    die "Usage: $0 [-p proxyhost:proxyport] [-P proxyuser:proxypass] user:password\n";
}

sub main {
    binmode STDOUT, ":utf8";

    my $proxy = '';
    my $login = '';
    my $proxylogin = '';

    my %opts;
    getopts('p:P:', \%opts) or usage;
    defined $opts{p} and $proxy = $opts{p};
    defined $opts{P} and $proxylogin = $opts{P};

    @ARGV < 1 and usage;
    $login = shift @ARGV;

    my %friends;
    my %followers;

    get_pages($login, $proxy, $proxylogin, "friends", \%friends);
    get_pages($login, $proxy, $proxylogin, "followers", \%followers);
    process_friends(\%friends, \%followers);
}

main;

__END__

Tags: perl, twitter
Subscribe
  • Post a new comment

    Error

    Anonymous comments are disabled in this journal

    default userpic

    Your reply will be screened

    Your IP address will be recorded 

  • 0 comments