randomfox (randomfox) wrote,
randomfox
randomfox

Flickr Views Group Cleaning Script

#!perl -w
use strict;

# Views group cleaning script.

use Flickr::API;
use XML::Simple;
use LWP::UserAgent;

use Encode;
use Time::HiRes qw(usleep);
use FileHandle;
use Getopt::Long;

# Define these strings in apikey.ph
our ($api_key, $shared_secret, $auth_token);
require "apikey.ph";

my %groups = (
    25 => {
	name => "1-25 Views",
	id => '66969363@N00',
	lbound => 0,
	ubound => 25
    },
    50 => {
	name => "25-50 Views",
	id => '55265535@N00',
	lbound => 25,
	ubound => 50
    },
    75 => {
	name => "50-75 Views",
	id => '38541060@N00',
	lbound => 50,
	ubound => 75
    },
    100 => {
	name => "75-100 Views",
	id => '45499242@N00',
	lbound => 75,
	ubound => 100
    });

# Get a list of photos in the group.
sub getphotos {
    my $groupid = shift;
    my $pagenum = shift;
    my $pagelen = shift;

    my $api = new Flickr::API({'key' => $api_key});

    my $response = $api->execute_method("flickr.groups.pools.getPhotos", 
	{
	    group_id => $groupid,
	    per_page => $pagelen,
	    page => $pagenum
	});
    die "Error: $response->{error_message}\n" unless $response->{success};

    my $xmlp = new XML::Simple;
    my $xm = $xmlp->XMLin($response->{_content}, forcearray=>['photo']);

    my $photos = $xm->{photos};
    print "Page $photos->{page} of $photos->{pages}...\n";

    my $photolist = $photos->{photo};

    my $nphotos = scalar keys %{$photolist};
    my $n = 0;

    my @photoarr;

    for my $id (keys %{$photolist}) {
	my $photo = $photolist->{$id};
	$photo->{id} = $id;
	$photo->{url} = "http://www.flickr.com/photos/$photo->{owner}/$photo->{id}/in/pool-$groupid";
	push @photoarr, $photo;
    }
    ( $photos->{pages}, \@photoarr );
}

# Get view counts of the photos.
sub getviews {
    my $photolist = shift;

    my $agent = new LWP::UserAgent;
    $agent->parse_head(0);

    my $i = 0;
    for my $photo (@$photolist) {
	++$i;
	print "Getting photo $i of @{[scalar(@$photolist)]}...\n";

	my $retry_count = 0;
	my $response;
	do {
	    $response = $agent->get($photo->{url});
	    usleep 250000;
	} while $retry_count++ < 3 and $response->is_error;
	if ($response->is_error) {
	    warn $response->status_line, "\n";
	    next;
	}

	my $resp = $response->content;

	if ($resp =~ /<li class="Stats">\s*Viewed <b>(\d+)<\/b> times\s*<\/li>/) {
	    $photo->{views} = $1;
	}
    }
}

# Open log file.
sub openlog {
    my $logfn = sprintf("p%X.htm", time);
    my $fh = new FileHandle $logfn, "w";
    defined $fh or die "Error opening $logfn for writing: $!\n";
    $fh->autoflush(1);
    $fh;
}

# Remove photos that don't belong from the group.
sub rejectphotos {
    my $logfh = shift;
    my $groupid = shift;
    my $photolist = shift;
    my $lowerbound = shift;
    my $upperbound = shift;
    my $testmode = shift;

    my $api = new Flickr::API({'key' => $api_key, secret => $shared_secret});

    $testmode and print "Test mode. Photos will not be removed.\n";

    my $i = 0;
    for my $photo (@$photolist) {
	defined $photo->{views} or next;
	if ($photo->{views} < $lowerbound or $photo->{views} > $upperbound) {

	    # Need to encode because there could be wide characters.
	    my $title = encode("iso-8859-1", $photo->{title});
	    my $ownername = encode("iso-8859-1", $photo->{ownername});

	    ++$i;
	    print $logfh <<EOM;
$i. <a href="$photo->{url}">$photo->{id}</a>: $title by $ownername, <b>$photo->{views}</b> views<br>
EOM

	    print "$i. Rejecting photo $photo->{id}...\n";

	    next if $testmode;

	    my $res = $api->execute_method('flickr.groups.pools.remove',
		{
		    group_id => $groupid,
		    photo_id => $photo->{id},
		    auth_token => $auth_token
		});
	    $res->{success} or 
		print "Error rejecting photo $photo->{id}: $res->{error_message}\n";
	}
    }
}

# Process one page in the group.
sub processpage {
    my $pagenum = shift;
    my $pagelen = shift;
    my $group = shift;
    my $testmode = shift;

    my ($totalpages, $photolist) = getphotos($group->{id}, $pagenum, $pagelen);
    getviews($photolist);

    my $logfh = openlog();
    print $logfh <<EOM;
<html>
<head>
<title>Rejected photos from $group->{name}: Page $pagenum of $totalpages</title>
</head>
<body>
<h1>Rejected photos from $group->{name}: Page $pagenum of $totalpages</h1>
EOM

    rejectphotos($logfh, $group->{id}, $photolist, 
	$group->{lbound}, $group->{ubound}, $testmode);

    print $logfh <<EOM;
</body>
</html>
EOM

    $logfh->close;
}

sub usage {
    print <<EOM;
pool.pl [-test] [-pagelen=n] [-group=n] pagenum

-test: Test mode. Don't remove photos from pool.

-pagelen=n: 
	Set the page length to n. 
	n must be at least 1. 
	Default n is 100.

-group=n:
	Specify the group to scan. 
	n=25: 1-25 views group.
	n=50: 25-50 views group.
	n=75: 50-75 views group.
	n=100: 75-100 views group.
	Default n is 100.

pagenum:
	Page # to scan. 
	First page is 1.
	Default is the first page.
EOM
    exit 1;
}

my $groupkey = 100;
my $pagelen = 100;
my $test = '';
Getopt::Long::Configure("bundling_override");
GetOptions('test' => \$test, 
    'pagelen=s' => \$pagelen, 
    'group=s' => 
    sub {
	my $optname = shift;
	my $optval = shift;
	defined $groups{$optval} or die "Invalid group $optval\n";
	$groupkey = $optval;
    }) or usage();
die "Page length must be at least 1\n" if $pagelen < 1;

my $group = $groups{$groupkey};

my $pagenum = shift;
defined $pagenum or $pagenum = 1;

processpage($pagenum, $pagelen, $group, $test);

__END__

Subscribe
  • Post a new comment

    Error

    Anonymous comments are disabled in this journal

    default userpic

    Your reply will be screened

    Your IP address will be recorded 

  • 0 comments