#!/usr/bin/perl -w

use strict;
use HTML::TokeParser::Simple;
use Date::Format;
use Digest::MD5 qw(md5_hex);

my $file = shift;
open (INPUT, "<$file") or die "Can't open file: $!";

my @links;
my $cur;

while (<INPUT>)
{
	my $stream = HTML::TokeParser::Simple->new(\$_);
	my $tag;

	while ($tag = $stream->get_token) 
	{
		my $link = {};
		next unless $tag->is_start_tag ('a');
		next if $tag->return_attr('href') =~ m!file://!i;
		$link->{'link'} = $tag->return_attr('href');
		$link->{'time'} = time2str("%Y-%m-%dT%H:%MZ", $tag->return_attr('add_date'));
		$tag = $stream->get_token;
		if ($tag->is_text)
		{
			$link->{'title'} = $tag->as_is;
		}
		push @links, $link;
	}
}

print '<?xml version="1.0" standalone="yes"?>', "\n";
print '<posts tag="" user="">', "\n";

foreach (@links)
{
	print '<post href="', $_->{'link'}, '" description="', $_->{'title'},
	      '" hash="', md5_hex($_->{'link'}), ' tag="" time="', 
	      $_->{'time'}, '" />', "\n";	
}
print '</posts>', "\n";

