randomfox (randomfox) wrote,
randomfox
randomfox

Postcrossing Stats by Country

This script parses the postcards sent/received pages from Postcrossing and produces a table showing the number of postcards received from each country ranked by postcard count. It can differentiate between the sent page and the received page so you don't have to tell it which one it is processing.


#!perl -w
use strict;

sub readinput {
    my $tablecount = 0;
    my $tablemode = 0;

    my $rec;
    my @recs;

    while (<>) {
	chomp;
	if ($tablemode) {
	    if (/<\/table>/) {
		$tablemode = 0;
		last;
	    }
	    # Skip over thead section.
	    elsif (/<tbody>/) {
		$tablemode = 2;
	    }
	    elsif ($tablemode == 2) {
		if (/<tr/) {
		    $rec = [];
		}
		elsif (/<\/tr>/) {
		    push @recs, $rec;
		}
		elsif (/<td.*\/td>/) {
		    my $s = "";
		    if (/<span.*>(.*)<\/span>/ ||
			/<a href=.*>(.*)<\/a>/ ||
			/<td.*>(.*)<\/td>/) {
			$s = $1;
		    }
		    push @$rec, $s;
		}
	    }
	}
	elsif (/<table/) {
	    # Look for the 4th table.
	    ++$tablecount;
	    $tablecount == 4 and $tablemode = 1;
	}
    }
    \@recs;
}

sub count {
    my $recs = shift;

    my %countries;
    my $total = 0;

    for my $rec (@$recs) {
	if (defined $rec->[6] and $rec->[6] =~ /Registered/) {
	    $countries{$rec->[2]} ++;
	    $total++;
	}
    }

    if ($total == 0) {
	# This must be a "postcards received" page. So recount without looking
	# for "Registered" status.
	for my $rec (@$recs) {
	    $countries{$rec->[2]} ++;
	    $total++;
	}
    }

    ( $total, \%countries );
}

sub output {
    my $total = shift;
    my $countries = shift;

    print <<EOM;
<table cellpadding="2" cellspacing="1" border="0">
<tr color="#000000" bgcolor="#ffc56d" align="left">
<th>Rank</th>
<th>Country</th>
<th>Postcards</th>
<th>Percentage</th>
</tr>
EOM

    my $rank = 0;

    for my $country
    (sort { $countries->{$b} <=> $countries->{$a} } keys %$countries) {
	++$rank;
	my $percent = sprintf "%.2f", $countries->{$country} / $total * 100;
	my $bgcolor = $rank % 2 ? "#ffe2b6" : "#fff0db";
	print <<EOM;
<tr color="#000000" bgcolor="$bgcolor" align="left">
<td>$rank</td>
<td>$country</td>
<td>$countries->{$country}</td>
<td>$percent%</td>
</tr>
EOM
    }

    print <<EOM;
<tr color="#000000" bgcolor="#ffc56d" align="left">
<th>&nbsp;</th>
<th>Total</th>
<th>$total</th>
<th>&nbsp;</th>
</tr>
</table>
EOM
}

my $recs = readinput;
my ($total, $countries) = count $recs;
output $total, $countries;

__END__

Subscribe
  • Post a new comment

    Error

    Anonymous comments are disabled in this journal

    default userpic

    Your reply will be screened

    Your IP address will be recorded 

  • 0 comments