/**
  \class CAdaptiveHuffman
  \brief An adaptive Huffman encoder in C++

  This probably isn't the fastest implementation around, but at least this
  code is (hopefully) a lot easier to understand than, for example, the JPEG
  Huffman encoder (which is static anyway).
  
  This class has an extra for the compressor: it allows you to set marks
  in the compressed bitstream. This is useful if you want to add a 
  group of values that belong together, but you don't know a priori if
  they will fit in the remaining bufferspace. By setting a mark after 
  each succesfull group you can be assured you don't end up with half groups
  (and you don't waste space in the buffer either).
*/

#include <stdio.h>
#include <string.h>

#include "AdaptiveHuffman.h"

#undef DEBUG

#define VAL_ZERO  -1
#define VAL_NODE  -4


/* There are two structures at work here... First there's the (obligatory)
   binary tree structure as needed by the Huffman algorithm. Second, the so
   called 'unique node number' from the various descriptions on adaptive
   Huffman is being used, but in a slightly different way. 
   
   All created nodes are placed in an array in the order they were created
   and given a number. This number is set at creation time and doesn't
   change anymore (all texts imply updating the numbers, but is absolutely
   pointless!) The root is at location 0 and stays there. This has one other
   minor consequence, which is actually an advantage later: the number
   decreases for higher nodes ('higher' in the tree structure, that is).
   
   When updating the tree, this array has to be searched for the 'highest'
   node with equal weight. In fact, by using the array this way, when you
   start at index 0, the _first_ node you find with equal weight is this 
   node!
   
   This took me a while to figure it out, but if you draw the tree, and walk
   through all the nodes in Array you descend the tree layer by layer (*),
   from right to left. You will also find that the weights of the nodes you
   visit remains equal or decreases. This is the so called Sibling Property
   [Gallager: 1978].
   
   (*) A layer being defined as all nodes with the same distance from the
   Root node.
 */

struct HuffNode
{

   /* Pointer to parent and children. */ 
   struct HuffNode *Parent, *Left, *Right;

   int Value;			// R/O
   int Number;			// R/O
   int Weight;			// R/W
   int Code, CodeLength;	// R/O

   HuffNode(int value, int number);
   ~HuffNode();

   void SetCode(int code, int code_len);
   void SetCodes();
   void SetNodes(HuffNode *l, HuffNode *r);

   void Exchange(HuffNode *other);
   int GetCodeLength();

   // debug stuff
   void Emit();
   void Print();
};

union HuffBlock
{
   HuffNode *Leader;
   HuffBlock *FreePtr;
};




HuffNode::HuffNode(int value, int number)
{
   Value = value;
   Number = number;
   Weight = 0;
   Code = 0; 
   CodeLength = 0;
   Parent = Left = Right = NULL;
}

HuffNode::~HuffNode()
{
#if DEBUG
   printf("Deleting node %d\n", Number);
#endif
   delete Left;
   delete Right;
}

// public

void HuffNode::SetCode(int code, int code_len)
{
   Code = code;
   CodeLength = code_len;
   SetCodes();
}

/* Set codes for us and our children. 
   Note: the MSB of the code is placed right; this allows for easier
   shifting and parsing.
 */   
void HuffNode::SetCodes()
{
   if (Left)
     Left->SetCode (Code                    , CodeLength + 1);
   if (Right)
     Right->SetCode(Code | (1 << CodeLength), CodeLength + 1);
}

void HuffNode::SetNodes(HuffNode *l, HuffNode *r)
{
   Left = l;
   Right = r;
   Left->Parent = this;
   Right->Parent = this;
   
   Value = VAL_NODE;
   SetCodes();
}


/* Exchange only necessary information with the other node */
void HuffNode::Exchange(HuffNode *other)
{
   int t;
   HuffNode *nt;
   
   t = other->Value;
   other->Value = Value;
   Value = t;
   t = other->Weight;
   other->Weight = Weight;
   Weight = t;
   
   // Swap left and right pointers, and set the new parent in the children!
   nt = other->Left;  
   other->Left = Left;  
   if (Left)
     Left->Parent = other;
   if (nt) 
     nt->Parent = this;
   Left  = nt;

   nt = other->Right; 
   other->Right = Right; 
   if (Right)
     Right->Parent = other;
   if (nt)
     nt->Parent = this;
   Right = nt;
}


// Debugging functions

// Show emitted code
void HuffNode::Emit()
{
   int m;
   
   m = 1 << (CodeLength - 1);
   while (m) {
#ifdef DEBUG   
     printf("%c", Code & m ? '1' : '0');
#endif     
     m >>= 1;
   }
#ifdef DEBUG   
   printf(" ");
#endif   
}

// Print tree
void HuffNode::Print()
{
   int i;

   if (Right) Right->Print();
   for (i = 0; i < CodeLength; i++)
      printf("  ");
    printf("W:%3d   [%3d 0x%04x %2d] ", Weight, Number, Code, CodeLength);
   if (Value < 0) {
     if (Value == VAL_ZERO)
       printf("Val:   (zero)");
   }   
   else
     printf("Val: %4d (%c)", Value, Value);
   printf("\n");
   if (Left) Left->Print();
}


/*********/

/**
  \brief constructor
  \param max_values The number of values being used for compression
  
  This initializes the encoder/decoder. With \i max_values you specify
  the maximum number of different symbols you want to encode. This number
  cannot be changed later!
*/
  
CAdaptiveHuffman::CAdaptiveHuffman(int max_values)
{
   MaxValue = max_values;

   LogValue = 0; // Determing 2log
   max_values--;
   while (max_values) {
     LogValue++;
     max_values >>= 1;
   }
   Compressor = 0;
   Root = NULL;
   Values = NULL;
   Array = NULL;
   Buffer = NULL;
}


CAdaptiveHuffman::~CAdaptiveHuffman()
{
   CleanUp();
}

// private

void CAdaptiveHuffman::Allocate()
{
   Count = 0;
   Values = new HuffNode *[MaxValue];
   memset(Values, 0, MaxValue * sizeof(HuffNode *)); // Clear
   Array = new HuffNode *[MaxValue * 2 + 1];
   Zero = new HuffNode(VAL_ZERO, Count);
   Root = Zero;
   Array[Count++] = Root;
}

void CAdaptiveHuffman::CleanUp()
{
   delete Root;
   delete Values;
   delete Array;
   Root = NULL;
   Values = NULL;
   Array = NULL;
}



/* Put bits of node in buffer. 
   Note: the first bit is the MSB of a byte.
   
   TODO: keep 'left over' bits for next run, start at bufferstart
 */
bool CAdaptiveHuffman::Stuff(int code, int length)
{
   char *b;
   int r;

//printf("Stuff: 0x%02x %d\n", code, length);

   if (BitPos + length > BufferSize) {
     // printf
     return false;
   }

   // Align code with existing data   
   code <<= (BitPos & 7);
   b = Buffer + (BitPos >> 3);
   r = 8 - (BitPos & 7); 
   BitPos += length;

   while (length >= 0) {
      *b |= (code & 0xFF);
      code >>= 8;
      length -= r;
      r = 8;
      b++;
   }
   if (!UseMarks)
     MarkedBitPos = BitPos;
   return true;
}


// public
/**
  \fn void CAdaptiveHuffman::InitCompressor(int buffer_size, char use_marks)
  \param buffer_size Size in \b bytes
  \param use_marks flag: use marks in bitstream
  
  When the \b use_marks parameter is set to non-zero, the Compressor uses
  marks in the stream. Each time you call \ref Mark() the internal 
  bitpointer is advanced to the current bit position. When \b use_marks is
  0, no marking is done and the bitpointer is always at the end of the buffer.
*/
void CAdaptiveHuffman::InitCompressor(int buffer_size, char use_marks)
{
   Compressor = 1;
   UseMarks = use_marks;
   CleanUp();
   Allocate();

   delete Buffer;
   Buffer = new char[buffer_size];
   BufferSize = 8 * buffer_size; // This assumes 8 bits for char; this may not be the case on all machines
   BitPos = 0; // absolute bit position in buffer
   MarkedBitPos = 0; 
}


int CAdaptiveHuffman::AddValue(int v)
{
   HuffNode *Node, *Scan, *Parent;
   int regen = 0;
   int nn, sn;
   
   if (v < 0 || v >= MaxValue)
     return ValueOutOfBounds;
   Node = Values[v];

   if (Node == NULL) {
     HuffNode *NewZero;

     // First use
#ifdef DEBUG     
     printf("New value `%c'.\n", v);   
#endif
     // Send 'Zero' code, then the value itself. Note that the 
     // very first character is send without a prefix, because Zero 
     // does not have a code yet!
     if (Compressor == 1) {
       if (!Stuff(Zero->Code, Zero->CodeLength))
         return BufferFull;
       if (!Stuff(v, LogValue))
         return BufferFull;
     }

     // Split Zero node
     // New external node; store in Array and Values
     Node = new HuffNode(v, Count);
     Array[Count++] = Node;
     Values[v] = Node;

     // A new 'Zero' node     
     NewZero = new HuffNode(VAL_ZERO, Count);
     Array[Count++] = NewZero;
     Zero->SetNodes(NewZero, Node);

     // Update weights
     Node->Weight++;
     Zero->Weight++;

     // Go to 'old' Zero
     Node = Zero;
     // Remember new Zero
     Zero = NewZero;
     
     if (Node == Root)
       return 0; // Bail out now
     Node = Node->Parent;
   }
   else {
#ifdef DEBUG   
     printf("Existing value `%c' (%d)\n", v, Node->Weight);
#endif
     if (Compressor == 1)
       if (!Stuff(Node->Code, Node->CodeLength))
         return BufferFull;
   }

   // Update tree (this is the 'adaptive' part :) )
   while (1) {
      // Always remember parent, no matter what happens below
      Parent = Node->Parent;

      // Scan for 'highest' node with the same weight (actually lowest number)
      Scan = NULL;
      nn = Node->Number;
#ifdef DEBUG      
      printf("Walk: node %d\n", nn);
#endif      
      for (sn = 1; sn < nn; sn++) { // Skip root
         if (Array[sn]->Weight == Node->Weight) {
           Scan = Array[sn];
           break; // Found one!
         }
      }
      // We're not allowed to swap with our parent, since this will always be a node, not a leaf!
      if (Scan != NULL && Scan != Parent) {
#ifdef DEBUG
        printf("Swapping nodes %d (`%c', %d) <-> %d (`%c', %d)\n", 
                nn, Node->Value, Node->Weight, 
                sn, Scan->Value, Scan->Weight);
#endif

	// Swap value pointers (not Array), AND ONLY VALUES!
	if (Node->Value >= 0) Values[Node->Value] = Scan;
	if (Scan->Value >= 0) Values[Scan->Value] = Node;
        
        // Swap data (not pointers): this means Value, Weight, 
        // the Left/Right pointers, not this parent, but DO the parents
        // of our children... Argh!
        Node->Exchange(Scan);

        Node = Scan; // Follow our swapped node
        Parent = Node->Parent;

        regen++;
      }
      Node->Weight++;

      if (Parent == NULL)
        break; // We're at the root, done

      Node = Parent;
   }
   if (regen)
     Root->SetCodes();
     
   /* 20000724 041925 MILESTONE Unbelievable; after 3 days of code hacking
      and debugging I managed to get a tree EXACTLY like the example. Pfrt.
      Where was I ?
    */
   return 0;
}     

/**
  \fn void CAdaptiveHuffman::Mark()
  \brief Set bit position marker
*/
void CAdaptiveHuffman::Mark()
{
   if (UseMarks)
     MarkedBitPos = BitPos;
}

/**
  \fn int CAdaptiveHuffman::GetBits() const
  \brief Return number of bits in (marked) buffer
  
  This returns the number of \b bits that are used in the buffer. In case
  of marks, returns the number in the buffer at the last call to Mark().
*/  
int CAdaptiveHuffman::GetBits() const
{
   return MarkedBitPos;
}

const char *CAdaptiveHuffman::GetBuffer() const
{
   return Buffer;
}




void CAdaptiveHuffman::InitDecompressor(int bits, const char *buffer)
{
   Compressor = 2;
   UseMarks = 0; // Not used here
   CleanUp();
   Allocate();

   BufferSize = bits;
   BitPos = 0; // absolute bit position in buffer

   State = 1; // Start with the first uncompressed value
   BitShift = 1;

   Walk = Root;
   BufP = buffer;
   BufC = *BufP;
}

int CAdaptiveHuffman::GetValue()
{
   int value = 0;
   int ret;

   ret = EndOfFile;
   while (BitPos < BufferSize && ret < 0) {
      if (State == 1) {
        // This can be done a lot quicker, but this shows what's being done
        if (BufC & 1)
          value |= BitShift;
      
        BitShift <<= 1;
        if (BitShift >= MaxValue) { // done!
          State = 0;
//printf("Read value %d (`%c')\n", value, value);
          AddValue(value); // Keep in sync with sender
          ret = value;
        }
      }
      else { // Tree walk
        if (BufC & 1) 
          Walk = Walk->Right;
        else
          Walk = Walk->Left;

        if (Walk->Value >= 0) {
          // Endpoint!
          value = Walk->Value;
//printf("Traced to leaf %d (`%c')\n", value, value);
          AddValue(value);
          Walk = Root;
          ret = value;
        }
        if (Walk->Value == VAL_ZERO) {
//printf("Found 0 node. Switching...\n");
          Walk = Root;
          State = 1;
          BitShift = 1;
        }
      }

      BitPos++;
      BufC >>= 1;
      if ((BitPos & 7) == 0) {
        BufP++;
        BufC = *BufP;
      }
   }
   return ret;
   
   /* 20000724 072320 MILESTONE
      Got the decompressor working too. Umpf.
    */
}



// Debug stuff


void CAdaptiveHuffman::PrintTree()
{
   printf("Dumping Huffman tree:\n");
   if (Root)
     Root->Print();
   printf("\n");
}

void CAdaptiveHuffman::PrintAncestors()
{
   int i, v;
   HuffNode *Node;
   
   printf("Showing Ancestry:\n");
   for (i = 0; i < Count; i++) {
      Node = Array[i];
      v = Node->Value;
      if (v >= 0)
        printf("W: %d Val: %d (%c)\n", Node->Weight, v, v);
      else
        printf("W: %d Val: %d\n", Node->Weight, v);
   }
}
