randomfox ([info]randomfox) wrote,

Import Delicious bookmarks into Evernote.

This Perl script takes an export file from Delicious and imports the bookmarks into Evernote, creating one note for each bookmark. This script is slow if you have a large number of bookmarks because it has to create the notes one at a time. I hope the Evernote API will one day support batch note creation.

Customize this script by adding your Evernote API consumer key and secret before you use it.


#!/usr/bin/perl

# Import Delicious bookmarks into Evernote.
# Usage: importdel.pl evernote_user evernote_password bookmarks_file
#
# This script takes as input a bookmarks file exported from Delicious. It
# creates a notebook and posts each bookmark as a note in that notebook.

use Thrift;
use Thrift::BinaryProtocol;
use Thrift::HttpClient;

# Evernote's EDAM Thrift bindings (we `use' evrything here)

use EdamErrors::Constants;
use EdamErrors::Types;

use EdamLimits::Constants;
use EdamLimits::Types;

use EdamNoteStore::Constants;
use EdamNoteStore::Types;

use EdamTypes::Constants;
use EdamTypes::Types;

use EdamUserStore::Constants;
use EdamUserStore::Types;

use NoteStore;
use UserStore;

use warnings;
use strict;
use Getopt::Std;

use Data::Dumper;

#-------------------------------------------------------------------------------
# change those variables to your needs

my $EVERNOTE_SERVER = 'www.evernote.com'; # server name; use sandbox.evernote.com for testing
my $CONSUMER_KEY = '???'; # your 'consumer key' string you received from Evernote
my $CONSUMER_SECRET = '???'; # your 'consumer secret' string you received from Evernote


#-------------------------------------------------------------------------------
# internal variables

my $USERSTORE_URL = "https://$EVERNOTE_SERVER/edam/user";
my $NOTESTORE_URL = "https://$EVERNOTE_SERVER/edam/note";

my $ENML_NOTE_HEADER = q|
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml.dtd">
<en-note>
|;

my $ENML_NOTE_FOOTER = q|
</en-note>|;

#-------------------------------------------------------------------------------
sub edamErrorCodeAsString {
  my $code = shift;
  return 'UNKNOWN'		if ($code eq &EDAMErrors::EDAMErrorCode::UNKNOWN);
  return 'BAD_DATA_FORMAT'	if ($code eq &EDAMErrors::EDAMErrorCode::BAD_DATA_FORMAT);
  return 'PERMISSION_DENIED'	if ($code eq &EDAMErrors::EDAMErrorCode::PERMISSION_DENIED);
  return 'INTERNAL_ERROR'	if ($code eq &EDAMErrors::EDAMErrorCode::INTERNAL_ERROR);
  return 'DATA_REQUIRED'	if ($code eq &EDAMErrors::EDAMErrorCode::DATA_REQUIRED);
  return 'LIMIT_REACHED'	if ($code eq &EDAMErrors::EDAMErrorCode::LIMIT_REACHED);
  return 'QUOTA_REACHED'	if ($code eq &EDAMErrors::EDAMErrorCode::QUOTA_REACHED);
  return 'INVALID_AUTH'		if ($code eq &EDAMErrors::EDAMErrorCode::INVALID_AUTH);
  return 'AUTH_EXPIRED'		if ($code eq &EDAMErrors::EDAMErrorCode::AUTH_EXPIRED);
  return 'DATA_CONFLICT'	if ($code eq &EDAMErrors::EDAMErrorCode::DATA_CONFLICT);
  return 'ENML_VALIDATION'	if ($code eq &EDAMErrors::EDAMErrorCode::ENML_VALIDATION);
  return 'SHARD_UNAVAILABLE'	if ($code eq &EDAMErrors::EDAMErrorCode::SHARD_UNAVAILABLE);
  return "UNKNOWN ERROR $code";
}

#-------------------------------------------------------------------------------
sub edamErrorObjectAsString {
  my $obj = shift;
  my $error = edamErrorCodeAsString($obj->{errorCode});
  my $param = $obj->{parameter};
  return "$error, $param";
}


sub edam_connect_userstore {
    print "Connecting to UserStore...\n";

    my $user_http_client = new Thrift::HttpClient($USERSTORE_URL);
    my $user_protocol = new Thrift::BinaryProtocol($user_http_client);
    new UserStoreClient($user_protocol, $user_protocol);
}

# Call Evernote API with error handling and retry.
sub call_evernote {
    my $func = shift;
    my $errorfunc = shift;

    my $result;
    my $retry_count = 0;
    while (1) {
	eval {
	    $result = $func->();
	};

	if ($@) {
	    # retry if errorfunc returns true.
	    defined($errorfunc) and $errorfunc->() and next;

	    if (defined($@->{errorCode}) and defined($@->{parameter})) {
		die edamErrorObjectAsString($@)."\n";
	    }

	    if ($retry_count < 5) {
		++ $retry_count;
		# Retry all unknown errors. This may have to be changed.
		warn "Unknown error: $@\n";
		sleep 1;
		next;
	    }

	    die "Unknown error: $@\n";
	}

	return $result;
    }
}

sub edam_check_client_version {
    my $userStore = shift;

    print "Checking client version...\n";
    call_evernote(sub {
	$userStore->checkVersion('Delicious Importer',
	    EDAMUserStore::Constants::EDAM_VERSION_MAJOR,
	    EDAMUserStore::Constants::EDAM_VERSION_MINOR);
    });
}


sub edam_auth_user {
    my $userStore = shift;
    my $username = shift;
    my $password = shift;

    print "Authenticating user...\n";

    my $result = call_evernote(sub {
	$userStore->authenticate($username, $password,
	    $CONSUMER_KEY, $CONSUMER_SECRET);
    });

    print "Authenticated successfully as $result->{user}->{name}.\n";
    ( $result->{authenticationToken}, $result->{user}->{shardId} );
}

sub edam_refresh_auth {
    my $userStore = shift;
    my $auth = shift;

    print "Refreshing authentication...\n";

    my $result = call_evernote(sub {
	$userStore->refreshAuthentication($auth);
    });

    $result->{authenticationToken};
}

sub edam_connect_notestore {
    my $shardID = shift;

    print "Connecting to NoteStore...\n";

    my $note_http_client = new Thrift::HttpClient($NOTESTORE_URL.'/'.$shardID);
    my $note_protocol = new Thrift::BinaryProtocol($note_http_client);
    new NoteStoreClient($note_protocol, $note_protocol);
}

# Open a notebook or create notebook if it doesn't exist.
sub edam_open_notebook {
    my $auth = shift;
    my $noteStore = shift;
    my $notebook_name = shift;

    print "Fetching the list of notebooks...\n";

    my $serverNotebooks = call_evernote(sub {
	$noteStore->listNotebooks($auth);
    });

    # Check if the notebook exists.
    foreach my $notebook (@$serverNotebooks) {
	if ($notebook->{name} eq $notebook_name) {
	    print "'$notebook_name' notebook found.\n";
	    return $notebook->{guid};
	}
    }

    # Notebook doesn't already exist so create it.
    print "Creating '$notebook_name' notebook...\n";

    my $notebook = new EDAMTypes::Notebook();
    $notebook->{name} = $notebook_name;

    $notebook = call_evernote(sub {
	$noteStore->createNotebook($auth, $notebook);
    });

    print "Notebook guid is {$notebook->{guid}}\n";
    $notebook->{guid};
}

sub edam_create_note {
    my $auth = shift;
    my $noteStore = shift;

    my $note = new EDAMTypes::Note();
    $note->{notebookGuid} = shift;
    $note->{title} = shift;
    $note->{tagNames} = shift;
    $note->{active} = 1; # this is an active note, not a 'deleted' one

    my $content = shift;
    my $altcontent = shift;

    $note->{content} = <<EOM;
$ENML_NOTE_HEADER
$content
$ENML_NOTE_FOOTER
EOM

# print Dumper($note);

    my $retry_content = 0;

    call_evernote(sub {
	    $noteStore->createNote($auth, $note);
	}, sub {
	    if (defined($@->{parameter}) and
		not $retry_content and 
		$@->{parameter} =~ /invalid a href attribute/i) {

		# If Evernote complains about our link, retry with the
		# alternate unlinked version of the note content.

		$note->{content} = <<EOM;
$ENML_NOTE_HEADER
$altcontent
$ENML_NOTE_FOOTER
EOM
		$retry_content = 1;
		return 1; # Retry the API call.
	    }
	    0; # Otherwise, let call_evernote handle the error.
	});
}

# Parse the posts tag and use the info to generate a notebook name. If the
# posts tag is not found or is missing the update tag, make up a notebook
# name using the current time. Note that this name can still be overridden
# by user option.
sub get_notebook_name {
    my $name;

    while (<>) {
	if (/<posts /) {
	    if (/update="([^"]*)"/) {
		$name = "Delicious $1";
	    }
	    else {
		$name = "Delicious ".sprintf("%X", time);
	    }
	    return $name;
	}
    }
    die "Can't find posts tag in input.\n";
}

# Clean up and format the bookmark description to make a note title that
# Evernote will accept.
sub format_title {
    my $desc = shift;
    $desc =~ s/\t/ /g;
    $desc =~ s/[[:^print:]]/ /g;
    $desc =~ s/^\s+//;
    $desc =~ s/\s+$//;
    substr($desc, 0, EDAMLimits::Constants::EDAM_NOTE_TITLE_LEN_MAX);
}

sub format_link {
    my $href = shift;
    "<a href=\"$href\">$href<\/a>";
}

sub import_bookmarks {
    my $auth = shift;
    my $userStore = shift;
    my $noteStore = shift;
    my $notebook_guid = shift;

    my $lineno = 0;
    my $curline = '';

    while (<>) {

	# Merge lines until we see a closing tag. Then check if it is a
	# post tag.
	$curline .= $_;
	if (/\/>$/) {

	    if ($curline =~ /<post .*\/>/s) {
		++$lineno;
		my $post = $&;

		my $href = '';
		my $link = '';
		if ($post =~ /href="([^"]*)"/) {
		    $href = $1;
		    $link = format_link($href);
		}

		my $extended = '';
		$post =~ /extended="([^"]*)"/s and $extended = $1;
		# Turn newlines into HTML linebreaks.
		$extended =~ s/\n/<br \/>/g;

		my $tag = '';
		$post =~ /tag="([^"]*)"/ and $tag = $1;

		my $title = '';
		$post =~ /description="([^"]*)"/s and 
		    $title = format_title($1);

		print "$lineno: Creating note '$title'...\n";
		edam_create_note($auth, $noteStore, $notebook_guid,
		    $title, [ split(' ', $tag) ],
		    "$link<br \/>$extended",
		    # This is an alternate version of the note contents
		    # with the URL unlinked. Use this if we hit an Evernote
		    # href validation error.
		    "$href<br \/>$extended");

		#last if $lineno >= 50;
	    }

	    # Need to refresh authentication periodically before the 
	    # token expires.
	    $lineno % 500 == 0 and
		$auth = edam_refresh_auth($userStore, $auth);

	    $curline = '';
	}
    }
}

sub usage {
    warn <<EOM;
Usage: $0 [-n notebook-name] username password bookmarks-file

    -n notebook-name
	Specifies the name of the notebook to add the bookmarks to. This
	notebook will be created if necessary. If this option is not
	specified, the default name is "Delicious export-date", where
	export-date is the export date extracted from the bookmarks file.
EOM
    exit 0;
}

our($opt_n);
getopts('n:') or usage();

@ARGV >= 2 or usage();
my $username = shift;
my $password = shift;

my $userStore = edam_connect_userstore();

my $notebook_name = get_notebook_name();
defined $opt_n and $notebook_name = $opt_n;

edam_check_client_version($userStore);
my ($auth, $shardID) = edam_auth_user($userStore, $username, $password);

my $noteStore = edam_connect_notestore($shardID);

my $notebook_guid = edam_open_notebook($auth, $noteStore, $notebook_name);

import_bookmarks($auth, $userStore, $noteStore, $notebook_guid);


__END__

Tags: bookmarks, delicious, evernote, perl

  • Post a new comment

    Error

    Anonymous comments are disabled in this journal

    Your reply will be screened

    Your IP address will be recorded 

  • 6 comments

Anonymous

June 23 2010, 14:55:34 UTC 1 year ago

Looks interesting, but need some newbie help

Hello - I'd love to use this script, but I'm a complete n00b regarding Perl.

I'm on Snow Leopard, do you have some pointers on how to use this script on this platform? I've tried to run it, but Perl complains about not being able to find Thrift, which probably is an external lib I need to install first?

Any help greatly appreciated!

Thanks, Martijn.

[info]randomfox

June 23 2010, 15:28:09 UTC 1 year ago

Re: Looks interesting, but need some newbie help

Thrift is included in the Evernote API.
http://www.evernote.com/about/developer/api/

If you haven't done that already, you'll also need to apply for an API key since I did not provide one with the source.

Anonymous

January 25 2011, 22:12:27 UTC 1 year ago

Import format

what format should the import file be in? I exported from my Delicious account, and when i try to run the script it tells me :
"Connecting to UserStore...
Can't find posts tag in input.
"

Can you post a small sample import file? A couple of lines would be cool, just to see the format.

Much appreciated!

[info]randomfox

February 20 2012, 15:33:47 UTC 2 months ago

I don't think Delicious exports in the same format that it used to since AVOS took over. It's not going to work any more.

[info]miscsecurity

February 16 2012, 00:56:50 UTC 3 months ago

I'm pretty sure this script doesn't work any more.

[info]randomfox

February 20 2012, 15:34:39 UTC 2 months ago

If you'd like to write an updated version that works, post it and I'll link to it.
Create an Account
Forgot your login or password?
Facebook Twitter More login options
English • Español • Deutsch • Русский…