/*************************************************************************
 *
 *  $RCSfile: Index.java,v $
 *
 *  $Revision: 1.1 $
 *
 *  last change: $Author: abi $ $Date: 2000/11/30 18:03:34 $
 *
 *  The Contents of this file are made available subject to the terms of
 *  either of the following licenses
 *
 *         - GNU Lesser General Public License Version 2.1
 *         - Sun Industry Standards Source License Version 1.1
 *
 *  Sun Microsystems Inc., October, 2000
 *
 *  GNU Lesser General Public License Version 2.1
 *  =============================================
 *  Copyright 2000 by Sun Microsystems, Inc.
 *  901 San Antonio Road, Palo Alto, CA 94303, USA
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License version 2.1, as published by the Free Software Foundation.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA  02111-1307  USA
 *
 *
 *  Sun Industry Standards Source License Version 1.1
 *  =================================================
 *  The contents of this file are subject to the Sun Industry Standards
 *  Source License Version 1.1 (the "License"); You may not use this file
 *  except in compliance with the License. You may obtain a copy of the
 *  License at http://www.openoffice.org/license.html.
 *
 *  Software provided under this License is provided on an "AS IS" basis,
 *  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING,
 *  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
 *  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
 *  See the License for the specific provisions governing your rights and
 *  obligations concerning the Software.
 *
 *  The Initial Developer of the Original Code is: Sun Microsystems, Inc.
 *
 *  Copyright: 2000 by Sun Microsystems, Inc.
 *
 *  All Rights Reserved.
 *
 *  Contributor(s): _______________________________________
 *
 *
 ************************************************************************/
package com.sun.xmlsearch.util;

import java.io.RandomAccessFile;
import java.io.IOException;
import java.io.InputStream;
import java.util.Hashtable;
import com.sun.xmlsearch.db.*;

public class Index extends IndexAccessor {
  protected final boolean      _update;
  
  protected Schema            _schema;
  private BtreeDictParameters _dictParams;
  private FullBtreeDict       _dict;
  private int                 _freeID;

  private DocumentCompressor _documentCompressor;
  private IntegerArray _concepts;
  private IntegerArray _offsets;
  private byte[]       _allLists; // POSITIONS
  protected IntegerArray _microIndexOffsets;
  protected IntegerArray _documents;
  protected IntegerArray _titles;
  
  protected byte[]       _positions;
  private int _positionsCacheSize = 0;
  private int _currentBatchOffset = 0;
  private int _maxDocNumberInCache = -1;
  private boolean _allInCache = false;
  
  protected RandomAccessFile _positionsFile = null;
  protected RandomAccessFile _offsetsFile = null;
  
  private Hashtable _cache;	// for symbol ids
  
  public Index(String indexName, boolean update) {
    super(indexName);
    _update = update;
  }

  public boolean init() throws Exception {
    boolean indexExists = false;
    if (_update) {
      createIfNeeded();
      _cache = new Hashtable(15000);
    }
    _schema = new Schema(this, _update);
    _dictParams = new BtreeDictParameters(_schema, "DICTIONARY");
    if (_dictParams.readState() == false) {
      _dictParams.setBlockSize(2048);
      _dictParams.setRoot(0);
      _dictParams.setFreeID(1);
    }
    else
      indexExists = true;

    _dict = new FullBtreeDict(_dictParams, _update);
    _freeID = _dictParams.getFreeID();
      
    _documents = new IntegerArray(4096);
    if (indexExists) {
      // read in index parts
      _allLists = readByteArray("DOCS");
      readDocumentsTable("DOCS.TAB");
      InputStream in = getInputStream("OFFSETS");
      readOffsetsTables(in);
      in.close();
      readPositions();
    }
    else {
      _microIndexOffsets = new IntegerArray();
      _titles = new IntegerArray();
    }
    return true;
  }

  private DocumentCompressor getDocumentCompressor() {
    if (_documentCompressor == null)
      _documentCompressor = new DocumentCompressor();
    return _documentCompressor;
  }

  public void compress(int docID, int titleID,
		       ConceptLocation[] locations, int count,
		       ConceptLocation[] extents, int extCount)
    throws IOException {
      RandomAccessFile positions = getPositionsFile();
      long currentEnd = positions.length();
      positions.seek(currentEnd);
      
      _documents.add(docID);
      _microIndexOffsets.add((int)currentEnd);
      _titles.add(titleID);
      
      getDocumentCompressor().writeOutMicroIndex(positions,
						 locations, count,
						 extents, extCount);
  }

  protected void writeOutOffsets() throws IOException {
    Compressor documents = new Compressor();
    int k1 = documents.minimize(_documents, 8);
    Compressor offsets = new Compressor();
    int k2 = offsets.compressAscending(_microIndexOffsets);
    Compressor titles = new Compressor();
    int k3 = titles.minimize(_titles, 8); // 8 is the starting k
    int nBytes = documents.byteCount();
    RandomAccessFile out = getOffsetsFile();
    out.seek(0);	// position at beginning
    out.write(k1);
    documents.write(out);
    out.write(k2);
    offsets.write(out);
    out.write(k3);
    titles.write(out);
  }
  
  public void close() throws Exception {
    /*
      BtreeDictCompactor source = new BtreeDictCompactor(_dictParams, false);
    
      URL url = new URL("file", "", _indexDir + "compacted");
      BtreeDictParameters params =
      new BtreeDictParameters(url, _dictParams.getBlockSize(), 0, _freeID);
      source.compact(params);
      URL tmapURL = new URL("file", "", _indexDir + "DICTIONARY");
      File tmap = new File(tmapURL.getFile());
      File compacted = new File(url.getFile());
      compacted.renameTo(tmap);
      _dictParams.setRoot(params.getRootPosition());
      _dictParams.updateSchema();
      */
    _dict.close(_freeID);
    _positionsFile.close();
    _positionsFile = null;
    if (_update) {
      writeOutOffsets();
      _dictParams.setFreeID(_freeID);
      _dictParams.updateSchema();
      _schema.save();
      IndexInverter inverter = new IndexInverter(this);
      inverter.invertIndex(_documents.cardinality(), _microIndexOffsets);
    }
    if (_offsetsFile != null) {
      _offsetsFile.close();
      _offsetsFile = null;
    }
  }

  public final boolean occursInText(int concept) {
    return _concepts.binarySearch(concept) >= 0;
  }
  
  public final int fetch(String conceptName) throws Exception {
    return _dict.fetch(conceptName);
  }
  
  public final String fetch(int conceptID) throws Exception {
    return _dict.fetch(conceptID);
  }
  
  public final IntegerArray withPrefix(String prefix) throws Exception {
    return _dict.withPrefix(prefix);
  }

  public final String documentName(int docNumber) throws Exception {
    return fetch(_documents.at(docNumber));
  }

  public final RandomAccessFile getPositionsFile() throws IOException {
    if (_positionsFile == null)
      _positionsFile = getRAF("POSITIONS", _update);
    return _positionsFile;
  }
  
  public RandomAccessFile getOffsetsFile() throws IOException {
    if (_offsetsFile == null)
      _offsetsFile = getRAF("OFFSETS", _update);
    return _offsetsFile;
  }
  
  public NonnegativeIntegerGenerator getDocumentIterator(int concept) {
    int index = _concepts.binarySearch(concept);
    if (index >= 0)
      return new ConceptList(_allLists, _offsets.at(index));
    else
      return null;
  }
  
  public final int getDocumentIndex(int docNo) {
    return _microIndexOffsets.at(docNo) - _currentBatchOffset;
  }
  
  private void readMicroindexes(int docNo) throws IOException {
    //    System.out.println("readMicroindexes " + docNo);
    _currentBatchOffset = _microIndexOffsets.at(docNo);
    final int offsetLimit = _currentBatchOffset + _positionsCacheSize;
    int upTo = 0, nextDoc = docNo;
    int lastOffset = 0;
    do {
      if (++nextDoc == _microIndexOffsets.cardinality())
	lastOffset = (int)_positionsFile.length();
      else if (_microIndexOffsets.at(nextDoc) > offsetLimit)
	lastOffset = _microIndexOffsets.at(nextDoc);
    }
    while (lastOffset == 0);
    
    if (lastOffset > offsetLimit) {
      upTo = _microIndexOffsets.at(nextDoc - 1);
      _maxDocNumberInCache = nextDoc - 2;
    } else {
      upTo = lastOffset;
      _maxDocNumberInCache = nextDoc - 1;
    }

    if (_maxDocNumberInCache < docNo) {	// cache too small
      // for current microindex
      _positionsCacheSize = lastOffset - _currentBatchOffset;
      System.out.println("expanding cache to " + _positionsCacheSize);
      _positions = new byte[_positionsCacheSize];
      readMicroindexes(docNo);
      return;
    }

    _positionsFile.seek(_currentBatchOffset);
    _positionsFile.read(_positions, 0, upTo - _currentBatchOffset);
  }

  public void reset() {
    _maxDocNumberInCache = _allInCache ? _microIndexOffsets.cardinality() - 1 : -1;
  }

  public byte[] getPositions(int docNo) throws Exception {
    if (docNo > _maxDocNumberInCache)
      readMicroindexes(docNo);
    return _positions;
  }

  private void readPositions() throws IOException {
    getPositionsFile();
    //!!! temporary: better than fixed large value, worse than 'intelligent' size mgt
    _positionsCacheSize = (int)_positionsFile.length();
    if (_positionsFile.length() <= _positionsCacheSize) {
      _allInCache = true;
      reset();
      _positions = new byte[(int)_positionsFile.length()];
      _positionsFile.read(_positions);
      System.out.println("POS fits in cache");
    }
  }

  private void readDocumentsTable(String fileName) throws Exception {
    InputStream in = getInputStream(fileName);
    int k1 = in.read();
    _concepts = new IntegerArray(4096);
    StreamDecompressor sddocs = new StreamDecompressor(in);
    sddocs.ascDecode(k1, _concepts);
    int k2 = in.read();
    _offsets = new IntegerArray(_concepts.cardinality() + 1);
    _offsets.add(0);
    StreamDecompressor sdoffsets = new StreamDecompressor(in);
    sdoffsets.ascDecode(k2, _offsets);     
    in.close();
  }
  
  protected void readOffsetsTables(InputStream in) throws Exception {
    int k1 = in.read();
    StreamDecompressor sddocs = new StreamDecompressor(in);
    sddocs.decode(k1, _documents);
    int k2 = in.read();
    _microIndexOffsets = new IntegerArray(_documents.cardinality() + 1);
    StreamDecompressor sdoffsets = new StreamDecompressor(in);
    sdoffsets.ascDecode(k2, _microIndexOffsets);
    // decompress titles' ids table
    int k3 = in.read();
    _titles = new IntegerArray(_documents.cardinality());
    StreamDecompressor sdtitles = new StreamDecompressor(in);
    sdtitles.decode(k3, _titles);
  }
  
  public int nDocuments() {
    return _documents.cardinality();
  }

  public void removeSymbol(String name) throws Exception {
    _cache.remove(name);
    _dict.store(name, 0);
  }

  public int intern(String name) throws Exception {
    Integer cached = (Integer)_cache.get(name);
    if (cached != null)
      return cached.intValue();
    else {
      int id = _dict.fetch(name);
      if (id == 0)
	_dict.store(name, id = _freeID++);
      _cache.put(name, new Integer(id));
      return id;
    }
  }
}
