#   include	"config.h"

#   include	"bmintern.h"
#   include	<string.h>

#   define	y0	math_y0
#   define	y1	math_y1
#   include	<math.h>
#   undef	y0
#   undef	y1

#   include	<debugon.h>

/************************************************************************/
/*  Bit reversal operation on a bitmap. This routine also makes the	*/
/*  sizes a power of two.						*/
/************************************************************************/

typedef struct it_complex
    {
    float	Re;
    float	Im;
    } it_complex;

static int i_fourier_transform_2d(	int		width,
					int		height,
					int		direction,
					it_complex *	data	);

int bmBlur(	BitmapDescription *		bdOut,
		const BitmapDescription *	bdIn,
		unsigned char **		pBufOut,
		const unsigned char *		bufIn,
		int				scale	)
{
    unsigned char * 		bufOut;
    BitmapDescription		bd;

    it_complex *		transform;
    it_complex *		t;
    unsigned char *		to;
    const unsigned char *	from;

    unsigned char		bitCount[256];
    int				row, col;
    int				i, j;

    double			mx;

    bmCopyDescription( &bd, bdIn );
    j= 1; i= ( bdIn->bdPixelsWide+ scale- 1 )/ scale- 1;
    while( i > 0 )
	{ i /= 2; j *= 2; }
    bd.bdPixelsWide= j;
    j= 1; i= ( bdIn->bdPixelsHigh+ scale- 1 )/ scale- 1;
    while( i > 0 )
	{ i /= 2; j *= 2; }
    bd.bdPixelsHigh= j;

    /*  Must be Square	*/
    if  ( bd.bdPixelsWide < bd.bdPixelsHigh )
	{ bd.bdPixelsWide=  bd.bdPixelsHigh;	}
    if  ( bd.bdPixelsHigh < bd.bdPixelsWide )
	{ bd.bdPixelsHigh=  bd.bdPixelsWide;	}

    bd.bdBitsPerSample= 8;
    bd.bdBitsPerPixel= bd.bdSamplesPerPixel* bd.bdBitsPerSample;
    bd.bdBytesPerRow= ( bd.bdPixelsWide* bd.bdBitsPerPixel+ 7 )/ 8;
    bd.bdBufferLength= bd.bdPixelsHigh* bd.bdBytesPerRow;

    if  ( bd.bdXResolution > scale )
	{ bd.bdXResolution /= scale;	}
    if  ( bd.bdYResolution > scale )
	{ bd.bdYResolution /= scale;	}

    bufOut= malloc( bd.bdBufferLength );
    if  ( ! bufOut )
	{ LLDEB(bd.bdBufferLength,bufOut); return -1;	}

    transform= malloc( bd.bdPixelsWide* bd.bdPixelsHigh* sizeof( it_complex ) );
    if  ( ! transform )
	{ LLDEB(bd.bdBufferLength,transform); return -1;	}

    t= transform;
    for ( row= 0; row < bd.bdPixelsHigh; row++ )
	{
	for ( col= 0; col < bd.bdPixelsWide; col++ )
	    { t->Re= t->Im= 0.0; t++; }
	}

    switch( bdIn->bdBitsPerSample )
	{
	case  1:
	    memset( bitCount, 0, 256 );
	    for ( i= 0; i < 8; i++ )
		{
		for ( j= 0; j < 256; j++ )
		    {
		    if  ( ( j >> i ) % 2 )
			{ bitCount[j]++;	}
		    }
		}
	    break;
	case  2:
	case  4:
	case  8:
	default:
	    LDEB(bdIn->bdBitsPerPixel); return -1;
	}

    switch( bdIn->bdBitsPerSample )
	{
	case  1:
	    switch( scale )
		{
		case 8:
		    for ( row= 0; row < bdIn->bdPixelsHigh; row++ )
			{
			from= bufIn+ row* bdIn->bdBytesPerRow;
			t= transform+ ( row/scale )* bd.bdPixelsWide;

			for ( col= 0; col < bdIn->bdBytesPerRow; col++ )
			    { (t++)->Re += bitCount[*(from++)]; }
			}
		    break;
		case 4:
		    for ( row= 0; row < bdIn->bdPixelsHigh; row++ )
			{
			from= bufIn+ row* bdIn->bdBytesPerRow;
			t= transform+ ( row/scale )* bd.bdPixelsWide;

			for ( col= 0; col < bdIn->bdBytesPerRow; col++ )
			    {
			    (t++)->Re += bitCount[( *(from  ) >> 4 ) & 0x0f ];
			    (t++)->Re += bitCount[( *(from++)      ) & 0x0f ];
			    }
			}
		    break;
		case 2:
		    for ( row= 0; row < bdIn->bdPixelsHigh; row++ )
			{
			from= bufIn+ row* bdIn->bdBytesPerRow;
			t= transform+ ( row/scale )* bd.bdPixelsWide;

			for ( col= 0; col < bdIn->bdBytesPerRow; col++ )
			    {
			    (t++)->Re += bitCount[( *(from  ) >> 6 ) & 0x03 ];
			    (t++)->Re += bitCount[( *(from  ) >> 4 ) & 0x03 ];
			    (t++)->Re += bitCount[( *(from  ) >> 2 ) & 0x03 ];
			    (t++)->Re += bitCount[( *(from++)      ) & 0x03 ];
			    }
			}
		    break;
		default:
		    LLDEB(bdIn->bdBitsPerPixel,scale); return -1;
		}
	    break;
	case  2:
	case  4:
	case  8:
	default:
	    LLDEB(bdIn->bdBitsPerPixel,scale); return -1;
	}

    (void) i_fourier_transform_2d( bd.bdPixelsWide, bd.bdPixelsHigh, 
							    1, transform );
    (void) i_fourier_transform_2d( bd.bdPixelsWide, bd.bdPixelsHigh, 
							    -1, transform );

    /*  Maximum value without transform */
    j= ( ( 1 << bdIn->bdBitsPerPixel )- 1 )* scale *scale;

    mx= 255.0/ j;
    for ( row= 0; row < bd.bdPixelsHigh; row++ )
	{
	to= bufOut+ row* bd.bdBytesPerRow;
	t= transform+ row* bd.bdPixelsWide;

	for ( col= 0; col < bd.bdPixelsWide; col++ )
	    { *(to++)= mx* t->Re; t++; }
	}

    free( transform );

    *bdOut= bd; *pBufOut= bufOut;

    return 0;
    }

/************************************************************************/
/*  Fast fourrier transform, taken from Image Star.			*/
/*  Did They take it from Numerical Recipes?				*/
/************************************************************************/
#define to_float(n) ((float)(n))
#define to_double(f) ((double)(f))
#define pi2          to_double(6.28318530717959)
#define FFTINDEX(i)     (((i) - 1) >> 1)
#define swap(x,y,t)  ((t) = (x), (x) = (y), (y) = (t))

static int i_fourier_transform_2d(	int		width,
					int		height,
					int		direction,
					it_complex *	data	)
{
  long z_size;
  int size[2];
  int dim,n,nprev;
  int nrem,ip1,ip2,ip3,i2rev;
  int i1,i2,i3,i3rev,ibit;
  it_complex *z,*z1,*z2,tmp;
  int ifp1,ifp2;
  double wpr,wpi,wr,wi;
  double theta,wtemp;
  float Re,Im;

  /* compute total number of complex values */
  z_size=(long) width * (long) height;
  size[0]=width;
  size[1]=height;
  z= data;

  for(nprev=1, dim=1; dim>=0; nprev*=n, dim--)
    {
      /* initialisation */
      n=size[dim];
      nrem=z_size/(n*nprev);
      ip1= nprev<<1;
      ip2= ip1*n;
      ip3=ip2*nrem;
      i2rev=1;
      
      /* bit reversal section */
      for(i2=1;i2<=ip2;i2rev+=ibit,i2+=ip1)
	{
	  if(i2<i2rev)
	    {
	      for(i1=i2;i1<=(i2+ip1-2);i1+=2)
		{
		  for(i3=i1;i3<=ip3;i3+=ip2)
		    {
		      i3rev=i2rev+i3-i2;
		      z1= &z[FFTINDEX(i3)];
		      z2= &z[FFTINDEX(i3rev)];
		      swap(*z1,*z2,tmp);
		    }
		}
	    }
	  for(ibit=(ip2>>1);(ibit>=ip1)&&(i2rev>ibit);i2rev-=ibit,ibit>>=1);
	}

      /* Danielson-Lanczos section */
      for(ifp1=ip1;ifp1<ip2;ifp1=ifp2)
	{
	  /* initialise trig values to avoid repeated calls */
	  ifp2=(ifp1<<1);
	  theta=(direction*pi2)/(ifp2/to_double(ip1));
	  wtemp=sin(theta*0.5);
	  wpr=wtemp*wtemp*(-2.0);
          wpi=sin(theta);
          wr=1.0;
          wi=0.0;
	  
	  for(i3=1;i3<=ifp1;i3+=ip1)
	    {
	      for(i1=i3;i1<=(i3+ip1-2);i1+=2)
		{
		  for(i2=i1;i2<=ip3;i2+=ifp2)
		    {
		      /* Danielson-Lanczos Formula */
		      z1= &z[FFTINDEX(i2)];
		      z2= &z[FFTINDEX(i2+ifp1)];
		      Re = to_float((wr * z2->Re) - (wi * z2->Im));
		      Im = to_float((wr * z2->Im) + (wi * z2->Re));
		      z2->Re = (z1->Re) - Re;
		      z2->Im = (z1->Im) - Im;
		      z1->Re += Re;
		      z1->Im += Im;
		    }
		}
	      wr=((wtemp=wr)*wpr)-(wi*wpi)+wr;
	      wi=(wi*wpr)+(wtemp*wpi)+wi;
	    }
	}
    }
  return(0);
}
