<?php
/* ******************************************************************** */
/* CATALYST PHP Source Code                                             */
/* -------------------------------------------------------------------- */
/* This program is free software; you can redistribute it and/or modify */
/* it under the terms of the GNU General Public License as published by */
/* the Free Software Foundation; either version 2 of the License, or    */
/* (at your option) any later version.                                  */
/*                                                                      */
/* This program is distributed in the hope that it will be useful,      */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of       */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        */
/* GNU General Public License for more details.                         */
/*                                                                      */
/* You should have received a copy of the GNU General Public License    */
/* along with this program; if not, write to:                           */
/*   The Free Software Foundation, Inc., 59 Temple Place, Suite 330,    */
/*   Boston, MA  02111-1307  USA                                        */
/* -------------------------------------------------------------------- */
/*                                                                      */
/* Filename:    search-lucene-defs.php                                  */
/* Author:      Paul Waite                                              */
/* Description: NB: This module is a variant of the original lucene     */
/*              module which processed fields line-by-line. This module */
/*              implements the XML interface to Lucene.                 */
/*                                                                      */
/*              Definitions for interfacing to the LUCENE search        */
/*              engine system. LUCENE is a system which is optimised    */
/*              for indexing and searching in a generic way. It is      */
/*              implemented as a server accessible via a port over TCP. */
/*              This module understands the protocol that this server   */
/*              uses to implement indexing and search queries.          */
/*                                                                      */
/* ******************************************************************** */
/** @package search */

/** Generic search classes */
include_once("search-defs.php");
/** Stopwatch microtimer */
include_once("timer-defs.php");
/** XML classes */
include_once("xml-defs.php");

// ----------------------------------------------------------------------
/** Do not wait on socket receive, return immediately */
define("SOCK_NO_WAIT", 0);
/** Wait on socket forever (well, 24hrs is that, more or less) */
define("SOCK_FOREVER", 86400);
/** Times to retry timed-out socket sends/receives */
define("SOCK_RETRIES", 3);
/** Used to indicate that a field should be indexed by Lucene */
define("INDEXED", true);
/** Used to indicate that a field should NOT be indexed by Lucene */
define("NOT_INDEXED", false);
/** Used to indicate that a field should be stored by Lucene */
define("STORED", true);
/** Used to indicate that a field should NOT be stored by Lucene */
define("NOT_STORED", false);
/** The name of the field Lucene should assume if none specified */
define("DEFAULT_FIELD", "Text");
/** Default type of field: 'Text', 'Date', 'Id' */
define("DEFAULT_FIELDTYPE", "Text");
/** Mode of index ID generation is by incrementing integer */
define("ID_FROM_INC", 0);
/** Mode of index ID generation is by filename stripped of path and extension */
define("ID_FROM_NAME", 1);
/** Mode of index ID generation is by full filename (incl. extension) */
define("ID_FROM_FILENAME", 2);
/** Mode of index ID generation is by full path to file */
define("ID_FROM_PATH", 3);
/** Indicates index fields come from meta tag extraction */
define("META_TAG_FIELDS", true);

// ----------------------------------------------------------------------
/**
* The SearchEngine connection class
* This class inherits the functionality of the 'search' class since mostly
* that is what we will be connecting to SearchEngine for. The Indexing and
* Control descendants can just ignore this inherited basic searching
* functionality.
* This class knows how to connect to a SearchEngine server and send and
* receive messages to/from it. Child classes which need to talk to this
* server to do indexing or querying should inherit this class.
* @package search
*/
class searchengine_connection extends search {
  // Public
  /** HOST running the SearchEngine query server */
  var $host = "";
  /** PORT that the server is listening on */
  var $port = "";
  /** Timeout for send in seconds */
  var $timeoutsecs = 10;

  // Private
  /** Whether SearchEngine is enabled..
      @access private */
  var $enabled = true;
  /** The message waiting to be sent
      @access private */
  var $message = "";
  /** Raw response content we receive back from the SearchEngine server
      @access private */
  var $responsebuf = "";
  /** Socket file pointer
      @access private */
  var $sockfp = false;
  /** True if we are connected to socket
      @access private */
  var $connected = false;
  /** An execution timer
      @access private */
  var $timer;
  // .....................................................................
  /** Constructor - SearchEngine connection. Normally this will just be
   * called with no host/port, and the object is just initialised ready for
   * the call to send(). If called with host/port, then the connection is
   * fired up immediately.
  * @param string $host Hostname or IP of SearchEngine server
  * @param string $port Port of SearchEngine server
  * @param integer $timeoutsecs Seconds to timeout the connection
  */
  function searchengine_connection($host="", $port="", $timeoutsecs="") {
    if ($host != "" && $port != "") {
      $this->connect($host, $port, $timeoutsecs);
    }
    $this->timer = new microtimer();
  } // searchengine_connection
  // .....................................................................
  /**
  * Sets the search engine host and port for the connection 
  * @param string $host Hostname or IP of SearchEngine server
  * @param string $port Port of SearchEngine server
  */
  function set_host_and_port($host, $port) {
    $this->host = $host;
    $this->port = $port;
  } // set_host_and_port
  // .....................................................................
  /**
  * Connect to the SearchEngine server. Optionally over-ride various settings
  * which were set in the constructor. Normally this method is only
  * called internally, in response to a request to send a message to
  * the SearchEngineserver.
  * @access private
  * @param string $host Hostname or IP of SearchEngine server
  * @param string $port Port of SearchEngine server
  * @param integer $timeoutsecs Seconds to timeout the connection
  */
  function connect($host="", $port="", $timeoutsecs="") {
    // Override host and port if given..
    if ($host != "") $this->host = $host;
    if ($port != "") $this->port = $port;
      
    // If host and port not defined, try config..
    if ($this->host == "") {
      if (class_exists("configuration")) {
        $config = new configuration("sys_control");
        if ($config->field_exists("Search Engine Host")) {
          $this->host = $config->value("Search Engine Host");
          $this->port = $config->value("Search Engine Port");
        }
        debugbr("acquired Axyl config: host [$this->host] port [$this->port]", DBG_DUMP);
      }
    }
    
    // Try to open socket if we have a host..
    $this->connected = false;
    if ($this->enabled && $this->host != "") {
      $this->sockfp = fsockopen($this->host, $this->port);
      if(!$this->sockfp) {
        $this->log_error("failed to connect to '$this->host:$this->port'");
      }
      else {
        if ($timeoutsecs != "") $this->timeoutsecs = $timeoutsecs;
        $this->set_timeout($this->timeoutsecs);
        $this->connected = true;
        debugbr("searchengine_connection: connected to '$this->host:$this->port'", DBG_DUMP);
      }
    }
    // Return result..
    return $this->connected;
  } // connect
  // .....................................................................
  /**
  * Disconnect from the SearchEngine server. Normally this is used only by
  * internal SearchEngineserver methods.
  * @access private
  */
  function disconnect() {
    if ($this->connected) {
      fclose($this->sockfp);
      $this->sockfp = false;
      $this->connected = false;
    }
  } // disconnect
  // .....................................................................
  /**
  * Set the socket timeout. Deals with the special case of setting
  * the socket to non-blocking mode (zero timeout)..
  * @param integer $timeoutsecs Set the timeout in seconds
  */
  function set_timeout($timeoutsecs) {
    if ($this->connected && $timeoutsecs != "") {
      $this->timeoutsecs = $timeoutsecs;
      if ($this->timeoutsecs != SOCK_NO_WAIT) {
        socket_set_timeout( $this->sockfp, $this->timeoutsecs);
      }
      socket_set_blocking( $this->sockfp, (($this->timeoutsecs == SOCK_NO_WAIT) ? false : true) );
    }
  } // set_timeout
  // .....................................................................
  /**
  * Sends a message to the SearchEngine server, and receives the response. We
  * operate on the understanding that every time we send something to
  * SearchEngine we expect a response. Since this method already calls the
  * recieve() method, there is no need to call it from your application.
  * The content to be sent is expected to be already in the class
  * string variable $message. The response is put into $response which
  * is an array of LF-delimited lines sent back.
  * @param integer $timeoutsecs Override for timeout in seconds
  * @return boolean True if the message was sent ok
  */
  function send($timeoutsecs="") {
    $send_ok = true;
    if (!$this->connected) {
      $this->connect();
    }
    if ($this->connected) {
      // Check for timeout over-ride..
      if ($timeoutsecs != "") $this->timeoutsecs = $timeoutsecs;
      $this->set_timeout($this->timeoutsecs);
      // Send message..
      if ($this->message != "") {
        $this->timer->restart();
        $bytesput = fputs($this->sockfp, $this->message);
        $this->timer->stop();
        if (debugging()) {
          $buf = trim(substr(rawurldecode($this->message),0, 5000));
          debugbr("<pre>" . xmldump($buf) . "</pre>", DBG_DUMP);
          debugbr("searchengine_connection: send transaction took "
                . $this->timer->formatted_millisecs() . "mS",
                DBG_DUMP
                );
        }
        if ($bytesput != -1) {
          debugbr("searchengine_connection: send ok ($bytesput bytes)", DBG_DUMP);
          for ($i=0; $i< SOCK_RETRIES; $i++) {
            $send_ok = $this->receive();
            if ($send_ok) break;
            debugbr("searchengine_connection: receive retry #" . ($i + 1), DBG_DUMP);
          }
        }
        else {
          $this->log_error("write to server failed");
          $send_ok = false;
        }
      }
      else {
        $this->log_error("trying to send null content");
        $send_ok = false;
      }
    }
    else {
      $this->log_error("send with no open socket to host [$this->host] port [$this->port]");
      $send_ok = false;
    }
    // Return status..
    return $send_ok;
  } // send
  // .....................................................................
  /**
  * Receive a message from the SearchEngine server. We can specify a timeout
  * period in seconds. If set to SOCK_NO_WAIT, it will return immediately with or
  * without a message. This is a low-level routine which deals with receiving the
  * message over TCP sockets.
  * @return boolean True if the message was received loud and clear
  * @access private
  */
  function receive() {
    $received_ok = true;
    if ($this->connected) {
      $this->timer->restart();
      $this->responsebuf = "";
      while (!feof($this->sockfp)) {
        $buf = fread($this->sockfp, 10000);
        if ($buf !== false) {
          $this->responsebuf .= $buf;
        }
        else {
          $this->log_error("no response from server");
          $received_ok = false;
          break;
        }
      }
      $this->timer->stop();
      if (debugging()) {
        debugbr("<pre>" . xmldump($this->responsebuf) . "</pre>", DBG_DUMP);
        debugbr("searchengine_connection: response from server took "
               . $this->timer->formatted_millisecs() . "mS",
               DBG_DUMP
               );
      }
    }
    else {
      $this->log_error("receive with no open socket");
      $received_ok = false;
    }
    // Return status..
    return $received_ok;
  } // receive
  // .....................................................................
  /** Log a message to the syslog and print info to debugger.
  * @access private
  */
  function log_error($err) {
    $prefix = (defined("APP_NAME") ? APP_NAME . ": " : "");
    $err = "SearchEngine error: " . get_class($this) . ": $this->host:$this->port: $err";
    debugbr($err);
    error_log($prefix . $err, 0);
  } // log_error
} // searchengine_connection class

// ----------------------------------------------------------------------
/** The SearchEngine fieldset class. This holds the SearchEngine fields for a SearchEngine
* message. These fields comprise the list of tags which make up
* a query message or an index message.
* @access private
* @package search
*/
class searchengine_fieldset {
  /** Fields stored as an array of XML <Field> tags */
  var $xmltags = array();
  // .....................................................................
  /** Constructor */
  function searchengine_fieldset() { }
  // .....................................................................
  /**
  * Return a copy of the named field object from fieldset by name.
  * NOTES: This function will return a new field if it does not already
  * exist. In this case the field will not be stored until you use the
  * put() method to do so. Always returns a field object.
  * @param string $fieldname The name of the field to get
  * @return object An xmltag object for the field
  */
  function get_field($fieldname) {
    if (isset($this->xmltags[$fieldname])) {
      $field = $this->xmltags[$fieldname];
    }
    else {
      $field = new xmltag("Field");
      $field->setattribute("name", $fieldname);
    }
    return $field;
  } // get_field
  // .....................................................................
  /**
  * Puts the named field into fieldset, indexed by fieldname.
  * @param string $fieldname Unique name of the field in the set
  * @param object $field The field object to store
  */
  function put_field($fieldname, $field) {
    $this->xmltags[$fieldname] = $field;
  } // put_field
  // .....................................................................
  /** Define a field in the fieldset. Set the definition for a field
  * in this fieldset. If the field does not exist it is created and
  * its definition set. If it exists the definition is updated.
  * @param string $fieldname Name of the field
  * @param string $type Type of this field eg. "Date"
  * @param boolean $stored Whether field value should be stored by SearchEngine
  * @param boolean $indexed Whether field value should be indexed by SearchEngine
  */
  function define_field($fieldname, $type, $stored=STORED, $indexed=INDEXED) {
    $field = $this->get_field($fieldname);
    $field->setattribute("type", $type);
    $field->setattribute("stored",  ($stored  ? "true" : "false"));
    $field->setattribute("indexed", ($indexed ? "true" : "false"));
    $this->put_field($fieldname, $field);
  } // define_field
  // .....................................................................
  /** Add a field to the fieldset.
  * @param string $fieldname Name of the field
  * @param string $fieldvalue Value to associate with this field
  */
  function add_field($fieldname, $fieldvalue="") {
    $field = $this->get_field($fieldname);
    $field->value = $fieldvalue;
    $this->put_field($fieldname, $field);
  } // add_field
  // .....................................................................
  /** Clear all fields from the fieldset */
  function clear() {
    $this->xmltags = array();
  } // clear
  // .....................................................................
  function render() {
    $s = "";
    foreach ($this->xmltags as $field) {
      $s .= $field->render();
    }
    return $s;
  } // render
} // searchengine_fieldset class

// ----------------------------------------------------------------------
/**
* The SearchEngine msg class. This is a raw class which holds the basic
* message fields and data and knows how to build them into a full
* message for sending to the SearchEngine server.
* @package search
*/
class searchengine_msg extends searchengine_connection {
  // Public
  /** Type/name of this message */
  var $type = "";

  // Private
  /** Array containing XML tags
      @access private */
  var $xmltags = array();
  /** Object containing SearchEngine fields
      @access private */
  var $fieldset;
  /** True if message has been built
      @access private */
  var $built = false;
  /** Error message if any error occurred
      @access private */
  var $error_msg = "";
  // .....................................................................
  /** Constructor
  * Notes: The application is either specified in the formal paramters or it
  * can be determined for an Axyl application by using the APP_PREFIX which
  * is unique to the application. This is the recommended option. Other
  * developers have, however, also used the configvalue 'SearchEngine Application'
  * for some reason, so this is still supported here. If none of these
  * methods results in a valid identifier, 'default' is used.
  * @param string $type Type of message this is, eg; QUERY, INDEX..
  * @param string $application The application name. Sets default SearchEngine config.
  * @param string $host Hostname or IP of SearchEngine server
  * @param string $port Port of SearchEngine server
  */
  function searchengine_msg($type="", $application="?", $host="", $port="") {
    $this->searchengine_connection($host, $port);
    $this->type = $type;
    $this->fieldset = new searchengine_fieldset();
    // We must have an application..
    if ($application == "?") {
      // Axyl configuration value is not be defined so the
      // APP_PREFIX could be used in this case..
      if ( defined("APP_PREFIX")) {
        $application = APP_PREFIX;
      }
      // If still not defined, try config..
      if ($application == "" || $application == "?") {
        if (class_exists("configuration")) {
          $config = new configuration("sys_control");
          $application = $config->value("Search Engine Application");
        }
        else {
          // The default case for standalone apps..
          $application = "default";
        }
      }
    }
    // Set the application..
    $this->set_application($application);
  } // searchengine_msg
  // .....................................................................
  /**
  * Add a new XML tag object to this SearchEngine message. We usually just
  * append the tag to an array of tags, which is produced for the message,
  * however the $mode option allows us to replace an existing tag
  * previously added, by tag name. Eg. this could be used to re-assign the
  * 'application' tag, since there should only ever be one of those. This
  * mode will only ever replace the first occurence, not multiple.
  * @param object $tag The xmltag object to add to our SearchEngine msg
  * @param string $mode If "replace": replace existing, "append": append tag
  */
  function add_xmltag($tag, $mode="append") {
    switch ($mode) {
      case "append":
        $this->xmltags[] = $tag;
        $this->built = false;
        break;
        
      case "replace":
        $new_xmltags = array();
        $replaced = false;
        foreach ($this->xmltags as $xmltag) {
          if (!$replaced && $xmltag->tagname == $tag->tagname) {
            $new_xmltags[] = $tag;
            $replaced = true;
          }
          else {
            $new_xmltags[] = $xmltag;
          }
        } // foreach
        if (!$replaced) {
          $new_xmltags[] = $tag;
        }
        // Install new set of tags..
        $this->xmltags = $new_xmltags;
        $this->built = false;
        break;
    } // switch
    
  } // add_xmltag
  // .....................................................................
  /**
  * Specify the application. The application is the name of a configuration
  * set which has been specified either by a control message, or by using
  * configuration files on the server. A given configuration set identified
  * by an application name can have specific fields already defined, such
  * as Sort: or Domain: etc.
  * Notes: The 'Application' header can only appear once in the message.
  * To this end we call 'add_xmltag' in "replace" mode, so this method can
  * in fact be called multiple times, and only one application tag will
  * be present in the final message.
  * @param string $application The application name to set.
  */
  function set_application($application) {
    $this->add_xmltag( new xmltag("Application", $application), "replace" );
  } // set_application
  // .....................................................................
  /**
  * Specify a domain. A domain is an identifier which groups indexed
  * objects internally to SearchEngine. This allows searches on multiple
  * archives of documents in a single SearchEngine installation.
  * Notes: There may be zero or more domain headers in the message. If it
  * does not appear, then any domain header defined for the application
  * will be applied on its own. Otherwise any definitions added by this
  * method are OR'd with any specified in the application config.
  * NB: If no domains are specified anywhere, any searching will be done
  * across all domains (which would probably yield very confusing return
  * data!).
  * @param string $domain The domain to set.
  */
  function set_domain($domain) {
    $this->add_xmltag( new xmltag("Domain", $domain) );
  } // set_domain
  // .....................................................................
  /** Add a field to the fieldset.
  * @param string $fieldname Name of the field
  * @param string $fieldvalue Value to associate with this field
  */
  function add_field($fieldname, $fieldvalue="") {
    $this->fieldset->add_field($fieldname, $fieldvalue);
    $this->built = false;
  } // add_field
  // .....................................................................
  /** Clear all data/fields, leaving type definition alone. */
  function clear() {
    $this->fieldset->clear();
    $this->message = "";
    $this->built = false;
  } // clear
  // .....................................................................
  /**
  * Builds the message according to the message type. This method
  * may be over-ridden in children inheriting this class
  * @access private
  */
  function build() {
    if (!$this->built) {
      if ($this->type != "") {
        $xml = new xmltag($this->type);
        // XML TAGS
        foreach ($this->xmltags as $tag) {
          $xml->childtag($tag);
        }
        // FIELDS
        if (count($this->fieldset->xmltags) > 0) {
          $fields = new xmltag("Fields");
          foreach ($this->fieldset->xmltags as $field) {
            $fields->childtag($field);
          }
          $xml->childtag($fields);
        }
        $this->message = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" . $xml->render();
        $this->built = true;
      }
    }
    return $this->built;
  } // build
  // .....................................................................
  /**
  * Sends the current message to SearchEngine, and checks for protocol
  * errors in the received response.
  * @param integer $timeoutsecs Override for timeout in seconds
  * @return boolean True if the send operation was a success
  */
  function send($timeoutsecs="") {
    $success = false;
    if ($this->build()) {
      // Low-level socket send-receive transaction..
      $success = searchengine_connection::send($timeoutsecs);
      $this->disconnect();
    }
    return $success;
  } // send
} // searchengine_msg class

// ----------------------------------------------------------------------
/**
* The SearchEngine message class. This class extends its parent class
* searchengine_msg and adds some higher level methods for adding groups of
* fields to the message.
* @package search
*/
class searchengine_message extends searchengine_msg {
  /** Response object which will parse XML content
      @access private */
  var $response;
  // .....................................................................
  /** Constructor
  * This is a more complex class which builds on the basic searchengine_msg
  * class to provide some higher level methods for adding fields in
  * specific ways to support CONTROL, QUERY and INDEX message types.
  * @param string $type Type of message this is, eg; QUERY, INDEX..
  * @param string $application The application name. Sets default SearchEngine config.
  * @param string $host Hostname or IP of SearchEngine server
  * @param string $port Port of SearchEngine server
  */
  function searchengine_message($type="", $application="?", $host="", $port="") {
    $this->searchengine_msg($type, $application, $host, $port);
  } // searchengine_message
  // .....................................................................
  /**
  * Strip field type specifiers out of field strings. A field string with
  * a type specifier in it is of the form: 'Foo:Date', where the field
  * name is 'Foo' and the field type is 'Date'. Possible field types are
  * 'Id', 'Text' (the default), and 'Date'.
  * Note that sort field specification is a special case, where the syntax
  * can be 'Foo:Date:Desc' or 'Foo:Desc' indicating the sort on the given
  * field should be done in descending order.
  * At present you would only use this facility with a 'Date' field, and
  * everything else would then default to 'Text'. [The 'Id' type being a
  * special one]
  * We return the field stripped of any type, and if a type was present
  * we issue the define_field() directive to define it. A field so-defined
  * will always be both stored by SearchEngine and indexed.
  * @param string $field Field in 'Foo:Date' format, or just 'Foo' for default type
  * @return string The fieldname stripped of any type specifier
  * @access private
  */
  function strip_field_type($field) {
    $fieldname = $field;
    $retfieldname = $field;
    if (strstr($field, ":")) {
      // Extract field specifier parts..
      $bits = explode(":", $field);
      $fieldname = trim( array_shift($bits) );
      if (strtolower($fieldname) == "rank") {
        $fieldname = "RANK";
      }
      $retfieldname = $fieldname;
      $f1 = trim(array_shift($bits));
      $f2 = trim(array_shift($bits));
      // Check for a sort field with DESC specifier..
      if ($f1 == "Desc" || $f2 == "Desc") {
        $retfieldname .= ":Desc";
      }
      // Check for valid field type specifier..
      if ($f1 == "Date" || $f1 == "Text" || $f1 == "Id") {
        // Define field by name..
        $this->define_field($fieldname, $f1);
      }
    }
    // Return fieldname plus any sort spec..
    return $retfieldname;
  } // strip_field_type
  // .....................................................................
  /**
  * Define a field. We supply the name of the field, it's type (Text, Date
  * or Id), and whether it should be stored by SearchEngine for later retreival
  * in queries. For example you would not store the raw document/content as
  * this is usually stored elsewhere.
  * We also cater for fields which might not need to be indexed. These would
  * be fields of data you just want to return with the document, if found in
  * a query, but not search on. An example might be a field containing the
  * path to the physical document on disk. For these fields you would then
  * specify NOT_INDEXED for the $indexed parameter. These fields MUST be
  * stored, so we make the rule: if the field is NOT_INDEXED then it must
  * be STORED (this will be forced).
  * In the normal course of events, fields will be defined to be both stored
  * and indexed. The exception is the special "Text" field associated with
  * an item "Body", which is indexed, but never stored.
  * This method adds the field settings directly via the add_field() method.
  * @see add_field()
  * @param string $fieldname Name of the field to index
  * @param string $type Type of field data: Text, Date or Id.
  * @param boolean $stored If true then SearchEngine will store the content itself
  * @param boolean $indexed If true then SearchEngine will index the field content
  */
  function define_field($fieldname, $type, $stored=STORED, $indexed=INDEXED) {
    // Force non-indexed fields to be stored..
    if ($indexed == NOT_INDEXED) {
      $stored = STORED;
    }
    $this->fieldset->define_field($fieldname, $type, $stored, $indexed);
  } // define_field
  // .....................................................................
  /**
  * Specify the fields you want returned from SearchEngine.
  * Fields should be in a comma-separated list of field names. Each field
  * name can have the field type included in the form 'Foo:Date', where
  * 'Date' is the type in this instance. In fact, since 'Text' is the
  * default filed type, 'Date' is probably the only one you need to use
  * as the current implementation stands.
  * This method adds the field setting directly via the add_field() method.
  * @see add_field
  * @param mixed $fields Comma-delimited fieldname list, or array of fields
  */
  function set_returnfields($fields) {
    if (!is_array($fields)) {
      $flds = explode(",", $fields);
    }
    else {
      $flds = $fields;
    }
    $returnfields = array();
    foreach ($flds as $field) {
      $returnfields[] = $this->strip_field_type($field);
    }
    $returnlist = implode(" ", $returnfields);
    $this->add_xmltag( new xmltag("Return", $returnlist) );
  } // set_returnfields
  // .....................................................................
  /**
  * Specify query limit field. This sets the maximum number of results
  * that SearchEngine should return.
  * @param integer $limit Maximum number of results (hits) to return
  */
  function set_limit($limit) {
    $this->add_xmltag( new xmltag("Limit", $limit) );
  } // set_limit
  // .....................................................................
  /**
  * Specify query offset field 'First'. This sets the offset for the
  * returned results. For example, if this was set to 3, and SearchEngine
  * found 20 hits, then results would be sent back from the 3rd hit
  * onwards.
  * @param integer $first Offset in result set to start from
  */
  function set_first($first) {
    $this->add_xmltag( new xmltag("First", $first) );
  } // set_first
  // .....................................................................
  /**
  * Specify the fields you want query results to be ordered by.
  * Fields should be in a comma-separated list of field names. Each field
  * name can have the field type included in the form 'Foo:Date', where
  * 'Date' is the type in this instance. In fact, since 'Text' is the
  * default filed type, 'Date' is probably the only one you need to use
  * as the current implementation stands.
  * Note that sort field specification is a special case, where the syntax
  * can be 'Foo:Date:Desc' or 'Foo:Desc' indicating the sort on the given
  * field should be done in descending order.
  * @param mixed $fields Comma-delimited fieldname list, or array of fields
  */
  function set_sortorder($fields) {
    if (!is_array($fields)) {
      $flds = explode(",", $fields);
    }
    else {
      $flds = $fields;
    }
    $sortfields = array();
    foreach ($flds as $field) {
      $sortfields[] = $this->strip_field_type($field);
    }
    // Create the field..
    $sortlist = implode(" ", $sortfields);
    $this->add_xmltag( new xmltag("Sort", $sortlist) );
  } // set_sortorder
  // .....................................................................
  /**
  * Specify a range on a field for querying. We specify the name of a field
  * which is used to select articles within the given limits, and
  * the limits themeselves. Either limit may be passed as nullstring
  * which indicates no limit on that side. Any dates must be passed as
  * standard Unix timestamps (seconds since 1970).
  * Notes: This method can be called multiple times to define additional
  * ranges for different field names.
  * This method adds the field setting directly via the add_field() method.
  * @see add_field
  * @param string $range_from Value of lowerbound range
  * @param string $range_to Value of upperbound range
  * @param string $range_fieldname Name of field to use in range query.
  */
  function set_range($range_from="", $range_to="", $range_fieldname="") {
    if ($range_fieldname != "") {
      $range = new xmltag("Range");
      $range->setattribute("field", $this->strip_field_type($range_fieldname));
      if ($range_from != "" && $range_from != false) {
        $range->childtag( new xmltag("From", $range_from) );
      }
      if ($range_to != "" && $range_to != false) {
        $range->childtag( new xmltag("To", $range_to) );
      }
      $this->add_xmltag( $range );
    }
  } // set_range
  // .....................................................................
  /**
  * Supply a stopword list to SearchEngine.
  * This method adds the field setting directly via the add_field() method.
  * @see add_field
  * @param mixed $stopwords Space-delimited list, or array of stopwords
  */
  function set_stopwords($stopwords) {
    if (is_array($stopwords)) {
      $mystops = implode(" ", $stopwords);
    }
    else {
      $mystops = $stopwords;
    }
    $this->add_xmltag( new xmltag("Stop-List", $mystops) );
  } // set_stopwords
  
} // searchengine_message class

// ----------------------------------------------------------------------
/**
* Encapsulation of the result of a generic search query. This is for
* internal use only.
* @package search
* @access private
*/
class queryresult {
  var $rank = "";
  var $fields = array();

  function queryresult($rank="") {
    $this->rank = $rank;
  }
  function addfield($fieldname, $fieldvalue) {
    $this->fields[$fieldname] = $fieldvalue;
  }
} // queryresult class

// ----------------------------------------------------------------------
/**
* Class comprising the functionality of a SearchEngine response parser. This
* is for internal use only.
* @package search
* @access private
*/
class response_parser extends xmlparser  {
  /** Current/last tag opened */
  var $tag = "";
  /** Attributes array for current/last tag */
  var $attr = array();
  /** Serial transaction ID */
  var $serial = "";
  /** Status message */
  var $status_message = "";
  /** True if response was valid, ie. no errors */
  var $valid = true;
  /** All cdata content for the response */
  var $tag_data = array();
  // .....................................................................
  /** Construct a new parser. */
  function response_parser() {
    $this->xmlparser();
  } // response_parser
  // .....................................................................
  /** Method invoked when a tag is opened */
  function tag_open($parser, $tag, $attributes) {
    $this->tag = $tag;
    if (is_array($attributes) && count($attributes) > 0) {
      foreach ($attributes as $key => $value ) {
        $this->attr[$key] = $value;
      }
    }
    switch ($tag) {
      case "Error":
        $this->valid = false;
        break;
    } // switch
  } // tag_open
  // .....................................................................
  /** 
   * Method invoked when character data is available. This is essentially
   * a field of data, with the name of the field being the tag-name, and
   * the value being the cdata itself. Here we cherry-pick a few 'special'
   * values and assign to class vars for easier access. The character
   * data is all stashed in the 'tag_data' array under the name of the tag
   * as well, so no fields are lost.
   */
  function cdata($parser, $cdata) {
    switch ($this->tag) {
      case "Error":
        $this->error_message = $cdata;
        debugbr("SearchEngine error: $this->error_message", DBG_DUMP);
        break;
      case "Status":
        $this->status_message = $cdata;
        debugbr("SearchEngine status: $this->status_message", DBG_DUMP);
        break;
      case "Serial":
        $this->serial = $cdata;
        debugbr("SearchEngine serial#: $this->serial", DBG_DUMP);
        break;
    } // switch
    
    // Record all tag data. Note that attributes, if any, are
    // not recorded at this point..
    $this->tag_data[$this->tag] = $cdata;
    
  } // cdata
  // .....................................................................
  /** Method invoked when a tag is closed */
  function tag_close($parser, $tag) {
    $this->tag = "";
    $this->attr = array();
  } // tag_close
  // .....................................................................
  function parse($xml) {
    xmlparser::parse($xml);
    if (!$this->valid_xml) {
      $this->valid = false;
    }
    if ($this->error_message != "") {
      log_sys($this->error_message);
    }
  } // parse
} // response_parser class

// ----------------------------------------------------------------------
/**
* Class comprising the functionality of an XML parser for queries. This
* is for internal use only.
* @package search
* @access private
*/
class queryresponse_parser extends response_parser {
  /** Results returned count */
  var $count = 0;
  var $results;
  var $results_stream = false;
  // .....................................................................
  /** Construct a new parser. */
  function queryresponse_parser() {
    $this->response_parser();
  } // queryresponse_parser
  // .....................................................................
  /** Method invoked when a tag is opened */
  function tag_open($parser, $tag, $attributes) {
    response_parser::tag_open($parser, $tag, $attributes);
    switch ($tag) {
      case "Results":
        $this->results_stream = true;
        break;
      case "Result":
        $this->addresult(
            $this->attr["counter"],
            $this->attr["rank"]
            );
        $this->attr = array();
        break;
    } // switch
  } // tag_open
  // .....................................................................
  /** Method invoked when character data is available */
  function cdata($parser, $cdata) {
    response_parser::cdata($parser, $cdata);
    switch ($this->tag) {
      case "Count":
        $this->count = $cdata;
        break;
      case "Field":
        if ($this->results_stream) {
          if (count($this->attr) > 0) {
            $result = array_pop($this->results);
            $fieldname = $this->attr["name"];
            $fieldval  = $cdata;
            $result->addfield($fieldname, $fieldval);
            array_push($this->results, $result);
          }
          $this->attr = array();
        }
        break;
    } // switch
  } // cdata
  // .....................................................................
  /** Method invoked when a tag is closed */
  function tag_close($parser, $tag) {
    response_parser::tag_close($parser, $tag);
    switch ($tag) {
      case "Results":
        $this->results_stream = false;
        break;
    } // switch
  } // tag_close
  // .....................................................................
  /** Add a result field to the response */
  function addresult($id, $rank) {
    $this->results[$id] = new queryresult($rank);
  } // addresult
} // queryresponse_parser class

// ----------------------------------------------------------------------
/**
* The SearchEngine query message class. This class inherits all the functionality
* of the searchengine_connection, searchengine_msg and searchengine_message classes. It adds
* query-specific methods for searching.
* @package search
*/
class searchengine_querymsg extends searchengine_message {
  /** Set to true if sort limit was exceeded in query */
  var $sort_limit_exceeded = false;
  /** Set to true if SearchEngine blew its memory trying to sort */
  var $sort_memory_exceeded = false;
  // .....................................................................
  /** Constructor
  * Make a new SearchEngine query message. You can specify the application to
  * use here, and also an optional query string to send.
  * @param string $application Optional application specifier.
  * @param string $host Hostname or IP of SearchEngine server
  * @param string $port Port of SearchEngine server
  */
  function searchengine_querymsg($application="?", $host="", $port="") {
    $this->searchengine_message("LuceneQueryRequest", $application, $host, $port);
  } // searchengine_querymsg
  // .....................................................................
  /**
  * Set the query for this message. There can be only one query defined.
  * This method can be called repeatedly, and each time it is called the
  * new value will replace the old one.
  * @param string $query The query to submit to SearchEngine.
  */
  function set_query($query) {
    $queryxml = new xmltag("Query", $query);
    $queryxml->setattribute("default-field", DEFAULT_FIELD);
    $this->add_xmltag($queryxml);
  } // set_query
  // .....................................................................
  /**
  * Send the message to SearchEngine, and then post-process the response for
  * query hits. The hitcount is extracted, followed by the hits, which
  * may comprise multiple fields. A hit is thus defined as an array of
  * fields, and each hit is put into a single container array called
  * 'hit', which is a property of the parent class 'search'.
  * @param integer $timeoutsecs Override for timeout in seconds
  */
  function send($timeoutsecs="") {
    // Initialise flags..
    $this->sort_limit_exceeded = false;
    $this->sort_memory_exceeded = false;

    // Msg-level send-receive transaction..
    searchengine_message::send($timeoutsecs);

    // Process the response to our request..
    $this->response = new queryresponse_parser();
    $this->response->parse($this->responsebuf);

    // Unpack the response if no errors..
    if ($this->response->valid) {
      // Here we will unpack the returned search query hits
      // and store them locally for use by child classes.
      if (isset($this->response->results)) {
        foreach ($this->response->results as $result) {
          $hit = array();
          $hit["RANK"] = $result->rank;
          foreach ($result->fields as $fieldname => $fieldvalue) {
            $hit[$fieldname] = $fieldvalue;
          }
          $this->hit[] = $hit;
        }
      }
    }
    else {
      // Check for sort limit/memory error conditions..
      if (stristr($this->response->error_message, "system sort limit")) {
        $this->sort_limit_exceeded = true;
      }
      if (stristr($this->response->error_message, "out of memory")) {
        $this->sort_memory_exceeded = true;
      }
    }
  } // send
} // searchengine_querymsg class

// ----------------------------------------------------------------------
/**
* The SearchEngine index message class. This class inherits all the functionality
* of the searchengine_connection, searchengine_msg and searchengine_message classes. It adds
* indexing-specific methods.
* @package search
*/
class searchengine_indexmsg extends searchengine_message {
  // Public
  /** Indication that the indexing was successful */
  var $indexed = false;

  // Private
  /** A unique handle to identify the index
      response from SearchEngine
      @access private */
  var $serialno = "";
  // .....................................................................
  /** Constructor
  * Make a new SearchEngine index message.
  * @param string $application Optional application specifier
  * @param string $host Hostname or IP of SearchEngine server
  * @param string $port Port of SearchEngine server
  * @param string $serialno Optional specific serial number to use
  */
  function searchengine_indexmsg($application="?", $host="", $port="", $serialno="") {
    global $RESPONSE;
    $this->searchengine_message("LuceneIndexRequest", $application, $host, $port);
    if ($serialno != "") {
      $this->serialno = $serialno;
    }
    else {
      $this->serialno = md5(uniqid(""));
    }
    $this->add_xmltag( new xmltag("Serial", $this->serialno) );
    $this->define_field(DEFAULT_FIELD, DEFAULT_FIELDTYPE, NOT_STORED);
  } // searchengine_indexmsg
  // .....................................................................
  /**
  * Supply field content for indexing. This causes SearchEngine to take the given
  * fieldname and index the given value against it. NB: we silently ignore
  * the request for nullstring, since these cause SearchEngine indexing to throw
  * an exception, and indexing will fail.
  * The field name can have the field type included in the form 'Foo:Date',
  * where 'Date' is the type in this instance. In fact, since 'Text' is the
  * default filed type, 'Date' is probably the only one you need to use
  * as the current implementation stands.
  * @param string $fieldname Name of the field to index.
  * @param string $fieldvalue Content of the field to index
  */
  function index_field($fieldname, $fieldvalue) {
    if ($fieldvalue !== "") {
      $fieldname = $this->strip_field_type($fieldname);
      $this->add_field($fieldname, $fieldvalue);
    }
  } // index_field
  // .....................................................................
  /**
  * Index the given content against the given ID. This automatically
  * defines the default field called "Text", and the data added as a field
  * called "Text" as well. Attaches the "Body" tag to this field via a
  * call to add_data() method. Thus, the content is submitted as a raw
  * binary stream, rather than url-encoded text.
  * @param string $id The ID to associate with the given indexed data.
  * @param string $content The binary/text content to be indexed.
  */
  function index_content($id, $content) {
    if ($content !== "") {
      $this->add_xmltag( new xmltag("Id", $id), "replace" );
      $content = preg_replace("/[\n\r\t]/", " ", $content);
      $content = preg_replace("/[ ]{2,}/", " ", $content);
      $this->add_field(DEFAULT_FIELD, $content);
    }
  } // index_content
  // .....................................................................
  /**
  * Send the message to SearchEngine, and then post-process the response for
  * indication of a successful index operation. We expect to receive
  * a response back from SearchEngine which has our serialno in it. This method
  * returns True if the indexing was successful, else False.
  * @param integer $timeoutsecs Override for timeout in seconds
  * @return boolean True if indexing was successful.
  */
  function send($timeoutsecs="") {
    $success = false;
    if (searchengine_message::send($timeoutsecs)) {
      $this->response = new response_parser();
      $this->response->parse($this->responsebuf);
      if ($this->response->valid) {
        $this->indexed = ($this->response->serial == $this->serialno);
        $success = $this->indexed;
      }
    }
    return $success;
  } // send
} // searchengine_indexmsg class

// ----------------------------------------------------------------------
/**
* The SearchEngine unindex message class. This class allows you to remove an
* item from the SearchEngine index. You must know the unique ID that identifies
* the document.
* @package search
*/
class searchengine_unindexmsg extends searchengine_message {
  // .....................................................................
  /** Constructor
  * Make a new SearchEngine unindex message. This message is provided to allow
  * you to delete an item from the SearchEngine index. It has a single method
  * 'unindex' which takes the ID of the item to delete.
  * @param string $application Optional application specifier
  * @param string $host Hostname or IP of SearchEngine server
  * @param string $port Port of SearchEngine server
  */
  function searchengine_unindexmsg($application="?", $host="", $port="") {
    $this->searchengine_message("LuceneUnIndexRequest", $application, $host, $port);
  } // searchengine_unindexmsg
  // .....................................................................
  /**
  * Unindex the given document, as identified by the unique ID. If no errors
  * arise, then the item will be removed from the SearchEngine index.
  * @param string $id The ID to allow SearchEngine to identify the item to unindex
  */
  function unindex($id) {
    $this->add_xmltag( new xmltag("Id", $id), "replace" );
  } // unindex

  // .....................................................................
  /**
  * Send the message to SearchEngine, and then post-process the response for
  * indication of a successful index operation.
  * @param integer $timeoutsecs Override for timeout in seconds
  * @return boolean True if everything was successful.
  */
  function send($timeoutsecs="") {
    $success = false;
    if (searchengine_message::send($timeoutsecs)) {
      $this->response = new response_parser();
      $this->response->parse($this->responsebuf);
      $success = $this->response->valid;
    }
    return $success;
  } // send

} // searchengine_unindexmsg class

// ----------------------------------------------------------------------
/**
* The SearchEngine purge message class. This class allows you to remove all
* items from the SearchEngine index. Take care!
* @package search
*/
class searchengine_purgemsg extends searchengine_unindexmsg {
  // .....................................................................
  /** Constructor
  * Make a new SearchEngine purge message. This message is provided to allow
  * you to delete all items from the SearchEngine index. It is just a special
  * case of the unindex message.
  * @param string $application Optional application specifier
  * @param string $host Hostname or IP of SearchEngine server
  * @param string $port Port of SearchEngine server
  */
  function searchengine_purgemsg($application="?", $host="", $port="") {
    $this->searchengine_unindexmsg($application, $host, $port);
    $this->add_xmltag( new xmltag("Purge") );
  } // searchengine_purgemsg
} // searchengine_purgemsg class

// ----------------------------------------------------------------------
/**
* The SearchEngine utility message class. Used for special SearchEngine operations.
* @package search
*/
class searchengine_utilitymsg extends searchengine_message {
  /** Constructor
  * @param string $utilitycmd Command for this utility message.
  * @param string $application Optional application specifier
  * @param string $host Hostname or IP of SearchEngine server
  * @param string $port Port of SearchEngine server
  */
  function searchengine_utilitymsg($utilitycmd="", $application="?", $host="", $port="") {
    $this->searchengine_message("LuceneUtilityRequest", $application, $host, $port);
    if ($utilitycmd != "") {
      $this->add_xmltag( new xmltag("Utility", $utilitycmd) );
    }
  } // searchengine_utilitymsg
  // .....................................................................
  /**
  * Send the message to SearchEngine, and then post-process the response for
  * indication of a successful utility operation. We expect to receive
  * a response back from SearchEngine which has nothing much it, unless there
  * has been an error.
  * returns True if the operation was successful, else False.
  * @param integer $timeoutsecs Override for timeout in seconds
  * @return boolean True if operation was successful.
  */
  function send($timeoutsecs="") {
    // Msg-level send-receive transaction..
    searchengine_message::send($timeoutsecs);

    // Process the response to our request..
    $this->response = new response_parser();
    $this->response->parse($this->responsebuf);

    // Return status of indexing operation..
    return $this->response->valid;
  } // send
} // searchengine_utilitymsg class

// ----------------------------------------------------------------------
?>