\ shannon.4th
\
\ Estimate the Shannon entropy for a data buffer
\
\ Copyright (c) Krishna Myneni, 2001-09-04
\
\ Requires:
\
\	noise.4th
\
\ Notes:
\
\	See the original paper by Claude E. Shannon,
\	"A Mathematical Theory of Communication", Bell System Technical
\	Journal, vol. 27, 1948.
\
\ The above paper may be downloaded from the following link:
\
\	http://cm.bell-labs.com/cm/ms/what/shannonday/paper.html
\


24 constant MAXBITS			\ maximum symbol length
1 MAXBITS lshift constant MAXBINS	\ maximum bins in the frequency table 
create freq_table MAXBINS cells allot	\ symbol frequency table
1024 2048 * 10 * constant MAXBUFSIZE	\ max size in bytes for symbol buffer
create sbuf MAXBUFSIZE allot		\ allocate the symbol buffer

variable nbits		\ number of bits in the symbol buffer

variable sym_size	\ symbol size in bits
variable nbins		\ number of bins in the frequency table
variable nsym		\ number of symbols in the symbol buffer
variable sym_mask	\ mask for sym_size bits

fvariable fnsym		\ floating pt version of nsym ( for calcs)

: set_symbol_size ( n -- | set necessary variables assoc. with symbol size)
	dup MAXBITS > if
	  drop ." Symbol size cannot exceed " MAXBITS . cr
	  abort
	then
	dup dup sym_size !
	1 swap lshift dup nbins !
	1- sym_mask !
	1- nbits @ swap - dup nsym !
	s>f fnsym f! ;


: @symbol ( n -- m | fetch the n^th symbol in the buffer )
	dup 7 and			\ bit offset in the starting byte 
	swap 3 rshift			\ index of starting byte in buffer
	sbuf + @			\ fetch 32 bits
	swap rshift			\ shift by the offset
	sym_mask @ and ;

: clear_hist ( -- | zero the frequency table )
	freq_table MAXBINS cells erase ;	

: make_hist ( n -- | make histogram of length n-bit symbols )
	set_symbol_size  
	clear_hist
	nsym @ 0 ?do	\ loop over each symbol and build up the histogram
	  1
	  i @symbol		\ get the i^th symbol
	  cells freq_table +	\ address of count for symbol
	  +!
	loop ;

: .hist ( -- | print the frequency table )
	nbins @ 0 ?do
	  i 5 .r 2 spaces
	  freq_table i cells + @ 5 .r cr
	loop ;
	

1e 2e flog f/ fconstant 1/LOG2
 
: log_base2 ( f -- f2 | compute log base 2 )
	flog 1/LOG2 f* ;

\ The Shannon entropy is G in the limit as n becomes large

: G ( n -- f | compute the entropy for length n-bit symbols )
	make_hist
	  
	\ Compute G

	0e
	nbins @ 0 ?do
	  freq_table i cells + @
	  dup 0> if
	    s>f fnsym f@ f/	\ compute p_i
	    fdup flog
	    f* f+
	  else
	   drop
	  then
	loop
	1/LOG2 f*
	fnegate
	sym_size @ s>f f/ ;


: se ( -- | print G_n vs n )
	\ Press a key to abort
	MAXBITS 1+ 1 do
	  i 2 .r 2 spaces 
	  i G f. cr
	  key? if 
	    key drop cr ." Aborted by user" 
	    unloop exit 
	  then
	loop ;

\ Some test patterns

include noise

: random_bit ( -- b | generate a random bit )
	ran0 0.5e f> 1 and ;

: random_byte ( -- b | generate a random 8 bit pattern )
	0 8 0 do random_bit or 2* loop 2/ ;

: tp0 ( -- | clear all bits in sbuf )
	sbuf MAXBUFSIZE erase ;

: tp1 ( -- | set alternating bits in sbuf )
	sbuf MAXBUFSIZE 170 fill ;	\ decimal 170 = binary 10101010

: tp2 ( -- | generate random bits in sbuf )
	-3 idum ! 
	nbits @ 8 / 0 ?do
	  random_byte sbuf i + c!
	loop ;

MAXBUFSIZE 2/ 8 * nbits !

\ Files


: load_file ( ^name -- | load the file into the buffer )
	0 open dup 0< if drop ." Error opening file." abort then
	dup sbuf MAXBUFSIZE read
	dup 0 <= if drop close drop ." Error reading file" abort then
	8 * MAXBUFSIZE 8 * 32 - min 
	dup nbits ! 
	." Loaded " . ." bits" cr
	close drop ;

	
