package Lire::Extensions::WWW::SearchSchema;

# vim:syntax=perl

use strict;

use vars qw( $VERSION @ISA );

use Lire::AsciiDlf::ExtendedFieldsCreator;

use Carp;

BEGIN {
    ($VERSION)	= '$Revision: 1.2 $' =~ m!Revision: ([.\d]+)!;
    @ISA = qw( Lire::AsciiDlf::ExtendedFieldsCreator );
}

# Order is important
# Because google.yahoo.com isn't the same as www.google.com
my @Engine2Keywords = 
  (
   ["yahoo.com",    'p'],
   ["altavista.com",'q'],
   ["google",	    'q'],
   ["google",	    'query'],
   ["www.google",   'q'],
   ["eureka.com",   'q'],
   ["lycos.com",    'query'],
   ["hotbot.com",   'MT'],
   ["msn.com",	    'MT'],
   ["infoseek.com", 'qt'],
   ["webcrawler",   'searchText'],
   [ "excite",	    'search'],
   ["netscape.com", 'search'],
   ["mamma.com",    'query'],
   ["alltheweb.com", 'query'],
   ["northernlight.com", 'qr'],
  );

sub init_computation {
    my ( $self ) = @_;

    # Cache the index
    $self->{referer_idx} = $self->schema->field( "referer" )->pos;
}

sub create_extended_fields {
    my ( $self, $dlf ) = @_;

    my $referer = $dlf->[$self->{referer_idx} ];
    return [ "LIRE_NOTAVAIL" ]
      if ( $referer eq "LIRE_NOTAVAIL" );

    return [ "-" ] unless $referer =~ m!^http://(.*?)/.*\?(.*)$!;

    my ( $host, $query ) = ($1, $2);
    
    foreach my $spec ( @Engine2Keywords ) {
	my ( $engine, $param ) = @$spec;
	next if index( lc $host, $engine ) == -1;
	next unless $query =~ /$param=(.*?)([;&]|$)/;
	my $keywords = $1;
	$keywords =~ tr/+/ /s;
	$keywords =~ s/%([0-9a-fA-F]{2})/chr(hex $1)/eg;
        # we've seen
        # http://www.google.com/search?as_q=&num=10&btnG=Google+Search&\
        #  as_epq=Trippin+Smurfs&as_oq=&as_eq=&lr=&as_ft=i&as_filetype=&\
        #  as_qdr=all&as_occt=any&as_dt=i&as_sitesearch=&safe=off
        # so: deal with empty search requests.
        $keywords eq '' and $keywords = '-';
	return [ $keywords ];
    }

    return [ "-" ];
}

# keep perl happy
1;

__END__

=pod

=head1 NAME

Lire::Extension::WWW::SearchSchema -

=head1 SYNOPSIS

=head1 DESCRIPTION

Create 'keywords' DLF field based on analysis of the 'referer' field.

=head1 VERSION

$Id: SearchSchema.pm,v 1.2 2002/01/25 14:08:53 vanbaal Exp $

=head1 COPYRIGHT

Copyright (C) 2001 Stichting LogReport Foundation LogReport@LogReport.org

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=head1 AUTHOR

Francis J. Lacoste <flacoste@logreport.org>

=cut
