### FFT

References : Toth Laszlo
(this linked file is included below)
Notes :
A paper (postscript) and some C++ source for 4 different fft algorithms, compiled by Toth Laszlo from the Hungarian Academy of Sciences Research Group on Artificial Intelligence.
Toth says: "I've found that Sorensen's split-radix algorithm was the fastest, so I use this since then (this means that you may as well delete the other routines in my source - if you believe my results)."
from : metabogy[AT]yahoo[DOT]com
comment : Thank you very much, this was useful, and it worked right out of the box, so to speak. It's very efficient, and the algorithm is readable. It also includes some very useful functions.

```//
//	                 FFT library
//
//  (one-dimensional complex and real FFTs for array
//  lengths of 2^n)
//
//	Author: Toth Laszlo (tothl@inf.u-szeged.hu)
//
//	Research Group on Artificial Intelligence
//  H-6720 Szeged, Aradi vertanuk tere 1, Hungary
//
/////////////////////////////////////////////////////////

#include <math.h>
#include <stdlib.h>
#include "pi.h"

/////////////////////////////////////////////////////////
//Gives back "i" bit-reversed where "size" is the array
//length
//currently none of the routines call it

long bitreverse(long i, long size){

result=0;
for(;size>1;size>>=1){
i>>=1;
result<<=1;
}

/*     __asm{       the same in assebly
mov eax,i
mov ecx,sizze
mov ebx,0
l:shr eax,1
rcl ebx,1
shr ecx,1
cmp ecx,1
jnz l
mov result,ebx
}*/
return result;
}

////////////////////////////////////////////////////////
//Bit-reverser for the Bruun FFT
//Parameters as of "bitreverse()"

long bruun_reverse(long i, long sizze){

result=0;

while(1){
if(i!=0) {
}
if(i!=0) {
}
}
}

/*assembly version
long bruun_reverse(long i, long sizze){

long result;

result=0;

__asm{
mov edx,sizze
mov eax,i
mov ebx,0
l: bsf cx,eax
jz kesz1
inc cx
shr edx,cl
shr eax,cl
bsf cx,eax
jz kesz2
inc cx
shr edx,cl
sub ebx,edx
shr eax,cl
jmp l
kesz1:
shl ebx,1

jmp vege
kesz2:
shl ebx,1
sub ebx,edx

vege: mov result,ebx
}
return result;
}*/

/////////////////////////////////////////////////////////
// data: array of doubles:
// re(0),im(0),re(1),im(1),...,re(size-1),im(size-1)
// it means that size=array_length/2 !!
//
// suggested use:
// intput in normal order
// output in bit-reversed order
//
// Source: Rabiner-Gold: Theory and Application of DSP,
// Prentice Hall,1978

void dif_butterfly(double *data,long size){

long angle,astep,dl;
double xr,yr,xi,yi,wr,wi,dr,di,ang;
double *l1, *l2, *end, *ol2;

astep=1;
end=data+size+size;
for(dl=size;dl>1;dl>>=1,astep+=astep){
l1=data;
l2=data+dl;
for(;l2<end;l1=l2,l2=l2+dl){
ol2=l2;
for(angle=0;l1<ol2;l1+=2,l2+=2){
ang=2*pi*angle/size;
wr=cos(ang);
wi=-sin(ang);
xr=*l1+*l2;
xi=*(l1+1)+*(l2+1);
dr=*l1-*l2;
di=*(l1+1)-*(l2+1);
yr=dr*wr-di*wi;
yi=dr*wi+di*wr;
*(l1)=xr;*(l1+1)=xi;
*(l2)=yr;*(l2+1)=yi;
angle+=astep;
}
}
}
}

/////////////////////////////////////////////////////////
// Decimation-in-time radix-2 in-place inverse butterfly
// data: array of doubles:
// re(0),im(0),re(1),im(1),...,re(size-1),im(size-1)
// it means that size=array_length/2 !!
//
// suggested use:
// intput in bit-reversed order
// output in normal order
//
// Source: Rabiner-Gold: Theory and Application of DSP,
// Prentice Hall,1978

void inverse_dit_butterfly(double *data,long size){

long angle,astep,dl;
double xr,yr,xi,yi,wr,wi,dr,di,ang;
double *l1, *l2, *end, *ol2;

astep=size>>1;
end=data+size+size;
for(dl=2;astep>0;dl+=dl,astep>>=1){
l1=data;
l2=data+dl;
for(;l2<end;l1=l2,l2=l2+dl){
ol2=l2;
for(angle=0;l1<ol2;l1+=2,l2+=2){
ang=2*pi*angle/size;
wr=cos(ang);
wi=sin(ang);
xr=*l1;
xi=*(l1+1);
yr=*l2;
yi=*(l2+1);
dr=yr*wr-yi*wi;
di=yr*wi+yi*wr;
*(l1)=xr+dr;*(l1+1)=xi+di;
*(l2)=xr-dr;*(l2+1)=xi-di;
angle+=astep;
}
}
}
}

/////////////////////////////////////////////////////////
// data shuffling into bit-reversed order
// data: array of doubles:
// re(0),im(0),re(1),im(1),...,re(size-1),im(size-1)
// it means that size=array_length/2 !!
//
// Source: Rabiner-Gold: Theory and Application of DSP,
// Prentice Hall,1978

void unshuffle(double *data, long size){

long i,j,k,l,m;
double re,im;

//old version - turned out to be a bit slower
/*for(i=0;i<size-1;i++){
j=bitreverse(i,size);
if (j>i){   //swap
re=data[i+i];im=data[i+i+1];
data[i+i]=data[j+j];data[i+i+1]=data[j+j+1];
data[j+j]=re;data[j+j+1]=im;
}
}*/

l=size-1;
m=size>>1;
for (i=0,j=0; i<l ; i++){
if (i<j){
re=data[j+j]; im=data[j+j+1];
data[j+j]=data[i+i]; data[j+j+1]=data[i+i+1];
data[i+i]=re; data[i+i+1]=im;
}
k=m;
while (k<=j){
j-=k;
k>>=1;
}
j+=k;
}
}

/////////////////////////////////////////////////////////
// used by realfft
// parameters as above
//
// Source: Brigham: The Fast Fourier Transform
// Prentice Hall, ?

void realize(double *data, long size){

double xr,yr,xi,yi,wr,wi,dr,di,ang,astep;
double *l1, *l2;

l1=data;
l2=data+size+size-2;
xr=*l1;
xi=*(l1+1);
*l1=xr+xi;
*(l1+1)=xr-xi;
l1+=2;
astep=pi/size;
for(ang=astep;l1<=l2;l1+=2,l2-=2,ang+=astep){
xr=(*l1+*l2)/2;
yi=(-(*l1)+(*l2))/2;
yr=(*(l1+1)+*(l2+1))/2;
xi=(*(l1+1)-*(l2+1))/2;
wr=cos(ang);
wi=-sin(ang);
dr=yr*wr-yi*wi;
di=yr*wi+yi*wr;
*l1=xr+dr;
*(l1+1)=xi+di;
*l2=xr-dr;
*(l2+1)=-xi+di;
}
}

/////////////////////////////////////////////////////////
// used by inverse realfft
// parameters as above
//
// Source: Brigham: The Fast Fourier Transform
// Prentice Hall, ?

void unrealize(double *data, long size){

double xr,yr,xi,yi,wr,wi,dr,di,ang,astep;
double *l1, *l2;

l1=data;
l2=data+size+size-2;
xr=(*l1)/2;
xi=(*(l1+1))/2;
*l1=xr+xi;
*(l1+1)=xr-xi;
l1+=2;
astep=pi/size;
for(ang=astep;l1<=l2;l1+=2,l2-=2,ang+=astep){
xr=(*l1+*l2)/2;
yi=-(-(*l1)+(*l2))/2;
yr=(*(l1+1)+*(l2+1))/2;
xi=(*(l1+1)-*(l2+1))/2;
wr=cos(ang);
wi=-sin(ang);
dr=yr*wr-yi*wi;
di=yr*wi+yi*wr;
*l2=xr+dr;
*(l1+1)=xi+di;
*l1=xr-dr;
*(l2+1)=-xi+di;
}
}

/////////////////////////////////////////////////////////
// in-place Radix-2 FFT for complex values
// data: array of doubles:
// re(0),im(0),re(1),im(1),...,re(size-1),im(size-1)
// it means that size=array_length/2 !!
//
// output is in similar order, normalized by array length
//
// Source: see the routines it calls ...

void fft(double *data, long size){

double *l, *end;

dif_butterfly(data,size);
unshuffle(data,size);

end=data+size+size;
for(l=data;l<end;l++){*l=*l/size;};
}

/////////////////////////////////////////////////////////
// in-place Radix-2 inverse FFT for complex values
// data: array of doubles:
// re(0),im(0),re(1),im(1),...,re(size-1),im(size-1)
// it means that size=array_length/2 !!
//
// output is in similar order, NOT normalized by
// array length
//
// Source: see the routines it calls ...

void ifft(double* data, long size){

unshuffle(data,size);
inverse_dit_butterfly(data,size);
}

/////////////////////////////////////////////////////////
// in-place Radix-2 FFT for real values
// (by the so-called "packing method")
// data: array of doubles:
// re(0),re(1),re(2),...,re(size-1)
//
// output:
// re(0),re(size/2),re(1),im(1),re(2),im(2),...,re(size/2-1),im(size/2-1)
// normalized by array length
//
// Source: see the routines it calls ...

void realfft_packed(double *data, long size){

double *l, *end;
double div;

size>>=1;
dif_butterfly(data,size);
unshuffle(data,size);
realize(data,size);

end=data+size+size;
div=size+size;
for(l=data;l<end;l++){*l=*l/div;};
}

/////////////////////////////////////////////////////////
// in-place Radix-2 inverse FFT for real values
// (by the so-called "packing method")
// data: array of doubles:
// re(0),re(size/2),re(1),im(1),re(2),im(2),...,re(size/2-1),im(size/2-1)
//
// output:
// re(0),re(1),re(2),...,re(size-1)
// NOT normalized by array length
//
//Source: see the routines it calls ...

void irealfft_packed(double *data, long size){

double *l, *end;

size>>=1;
unrealize(data,size);
unshuffle(data,size);
inverse_dit_butterfly(data,size);

end=data+size+size;
for(l=data;l<end;l++){*l=(*l)*2;};
}

/////////////////////////////////////////////////////////
// Bruun FFT for real values
// data: array of doubles:
// re(0),re(1),re(2),...,re(size-1)
//
// output:
// re(0),re(size/2),...,re(i),im(i)... pairs in
// "bruun-reversed" order
// normalized by array length
//
// Source:
// Bruun: z-Transform DFT Filters and FFT's
// IEEE Trans. ASSP, ASSP-26, No. 1, February 1978
//
// (this version is implemented in a manner that every
// cosine is calculated only once;
// faster than the other version (see next)

void realfft_bruun(double *data, long size){

double *end, *l0, *l1, *l2, *l3;
long dl, dl2, dl_o, dl2_o, i, j, k, kk;
double d0,d1,d2,d3,c,c2,p4;

end=data+size;
//first filterings, when there're only two taps
size>>=1;
dl=size;
dl2=dl/2;
for(;dl>1;dl>>=1,dl2>>=1){
l0=data;
l3=data+dl;
for(i=0;i<dl;i++){
d0=*l0;
d2=*l3;
*l0=d0+d2;
*l3=d0-d2;
l0++;
l3++;
}
}
l0=data;l1=data+1;
d0=*l0;d1=*l1;
*l0=d0+d1;*l1=d0-d1;
l1+=2;
*l1=-(*l1);

//the remaining filterings

p4=pi/(2*size);
j=1;
kk=1;
dl_o=size/2;
dl2_o=size/4;
while(dl_o>1){
for(k=0;k<kk;k++){
c2=p4*bruun_reverse(j,size);
c=2*cos(c2);
c2=2*sin(c2);
dl=dl_o;
dl2=dl2_o;
for(;dl>1;dl>>=1,dl2>>=1){
l0=data+((dl*j)<<1);
l1=l0+dl2;l2=l0+dl;l3=l1+dl;
for(i=0;i<dl2;i++){
d1=(*l1)*c;
d2=(*l2)*c;
d3=*l3+(*l1);
d0=*l0+(*l2);
*l0=d0+d1;
*l1=d3+d2;
*l2=d0-d1;
*l3=d3-d2;
l0++;
l1++;
l2++;
l3++;
}
}
//real conversion
l3-=4;
*l3=*l3-c*(*l0)/2;
*l0=-c2*(*l0)/2;
*l1=*l1+c*(*l2)/2;
*l2=-c2*(*l2)/2;
j++;
}
dl_o>>=1;
dl2_o>>=1;
kk<<=1;
}

//division with array length
size<<=1;
for(i=0;i<size;i++) data[i]=data[i]/size;
}

/////////////////////////////////////////////////////////
// Bruun FFT for real values
// data: array of doubles:
// re(0),re(1),re(2),...,re(size-1)
//
// output:
// re(0),re(size/2),re(1),im(1),re(2),im(2),...,re(size/2-1),im(size/2-1)
// normalized by array length
//
// Source: see the routines it calls ...
void realfft_bruun_unshuffled(double *data, long size){

double *data2;
long i,j,k;

realfft_bruun(data,size);
//unshuffling - cannot be done in-place (?)
data2=(double *)malloc(size*sizeof(double));
for(i=1,k=size>>1;i<k;i++){
j=bruun_reverse(i,k);
data2[j+j]=data[i+i];
data2[j+j+1]=data[i+i+1];
}
for(i=2;i<size;i++) data[i]=data2[i];
free(data2);
}

/////////////////////////////////////////////////////////
// Bruun FFT for real values
// data: array of doubles:
// re(0),re(1),re(2),...,re(size-1)
//
// output:
// re(0),re(size/2),...,re(i),im(i)... pairs in
// "bruun-reversed" order
// normalized by array length
//
// Source:
// Bruun: z-Transform DFT Filters and FFT's
// IEEE Trans. ASSP, ASSP-26, No. 1, February 1978
//
// (this version is implemented in a row-by-row manner;
// control structure is simpler, but there are too
// much cosine calls - with a cosine lookup table
// probably this would be slightly faster than bruun_fft

/*void realfft_bruun2(double *data, long size){

double *end, *l0, *l1, *l2, *l3;
long dl, dl2, i, j;
double d0,d1,d2,d3,c,c2,p4;

end=data+size;
p4=pi/(size);
size>>=1;
dl=size;
dl2=dl/2;
//first filtering, when there're only two taps
for(;dl>1;dl>>=1,dl2>>=1){
l0=data;
l3=data+dl;
for(i=0;i<dl;i++){
d0=*l0;
d2=*l3;
*l0=d0+d2;
*l3=d0-d2;
l0++;
l3++;
}
//the remaining filterings
j=1;
while(l3<end){
l0=l3;l1=l0+dl2;l2=l0+dl;l3=l1+dl;
c=2*cos(p4*bruun_reverse(j,size));
for(i=0;i<dl2;i++){
d0=*l0;
d1=*l1;
d2=*l2;
d3=*l3;
*l0=d0+c*d1+d2;
*l2=d0-c*d1+d2;
*l1=d1+c*d2+d3;
*l3=d1-c*d2+d3;
l0++;
l1++;
l2++;
l3++;
}
j++;
}
}

//the last row: transform of real data
//the first two cells
l0=data;l1=data+1;
d0=*l0;d1=*l1;
*l0=d0+d1;*l1=d0-d1;
l1+=2;
*l1=-(*l1);
l0+=4;l1+=2;
//the remaining cells
j=1;
while(l0<end){
c=p4*bruun_reverse(j,size);
c2=sin(c);
c=cos(c);
*l0=*l0-c*(*l1);
*l1=-c2*(*l1);
l0+=2;
l1+=2;
*l0=*l0+c*(*l1);
*l1=-c2*(*l1);
l0+=2;
l1+=2;
j++;
}
//division with array length
for(i=0;i<size;i++) data[i]=data[i]/size;
}
*/

//the same as realfft_bruun_unshuffled,
//but calls realfft_bruun2
/*void realfft_bruun_unshuffled2(double *data, long size){

double *data2;
long i,j,k;

realfft_bruun2(data,size);
//unshuffling - cannot be done in-place (?)
data2=(double *)malloc(size*sizeof(double));
for(i=1,k=size>>1;i<k;i++){
j=bruun_reverse(i,k);
data2[j+j]=data[i+i];
data2[j+j+1]=data[i+i+1];
}
for(i=2;i<size;i++) data[i]=data2[i];
free(data2);
}*/

/////////////////////////////////////////////////////////
// Sorensen in-place split-radix FFT for real values
// data: array of doubles:
// re(0),re(1),re(2),...,re(size-1)
//
// output:
// re(0),re(1),re(2),...,re(size/2),im(size/2-1),...,im(1)
// normalized by array length
//
// Source:
// Sorensen et al: Real-Valued Fast Fourier Transform Algorithms,
// IEEE Trans. ASSP, ASSP-35, No. 6, June 1987

void realfft_split(double *data,long n){

long i,j,k,i5,i6,i7,i8,i0,id,i1,i2,i3,i4,n2,n4,n8;
double t1,t2,t3,t4,t5,t6,a3,ss1,ss3,cc1,cc3,a,e,sqrt2;

sqrt2=sqrt(2.0);
n4=n-1;

//data shuffling
for (i=0,j=0,n2=n/2; i<n4 ; i++){
if (i<j){
t1=data[j];
data[j]=data[i];
data[i]=t1;
}
k=n2;
while (k<=j){
j-=k;
k>>=1;
}
j+=k;
}

/*----------------------*/

//length two butterflies
i0=0;
id=4;
do{
for (; i0<n4; i0+=id){
i1=i0+1;
t1=data[i0];
data[i0]=t1+data[i1];
data[i1]=t1-data[i1];
}
id<<=1;
i0=id-2;
id<<=1;
} while ( i0<n4 );

/*----------------------*/
//L shaped butterflies
n2=2;
for(k=n;k>2;k>>=1){
n2<<=1;
n4=n2>>2;
n8=n2>>3;
e = 2*pi/(n2);
i1=0;
id=n2<<1;
do{
for (; i1<n; i1+=id){
i2=i1+n4;
i3=i2+n4;
i4=i3+n4;
t1=data[i4]+data[i3];
data[i4]-=data[i3];
data[i3]=data[i1]-t1;
data[i1]+=t1;
if (n4!=1){
i0=i1+n8;
i2+=n8;
i3+=n8;
i4+=n8;
t1=(data[i3]+data[i4])/sqrt2;
t2=(data[i3]-data[i4])/sqrt2;
data[i4]=data[i2]-t1;
data[i3]=-data[i2]-t1;
data[i2]=data[i0]-t2;
data[i0]+=t2;
}
}
id<<=1;
i1=id-n2;
id<<=1;
} while ( i1<n );
a=e;
for (j=2; j<=n8; j++){
a3=3*a;
cc1=cos(a);
ss1=sin(a);
cc3=cos(a3);
ss3=sin(a3);
a=j*e;
i=0;
id=n2<<1;
do{
for (; i<n; i+=id){
i1=i+j-1;
i2=i1+n4;
i3=i2+n4;
i4=i3+n4;
i5=i+n4-j+1;
i6=i5+n4;
i7=i6+n4;
i8=i7+n4;
t1=data[i3]*cc1+data[i7]*ss1;
t2=data[i7]*cc1-data[i3]*ss1;
t3=data[i4]*cc3+data[i8]*ss3;
t4=data[i8]*cc3-data[i4]*ss3;
t5=t1+t3;
t6=t2+t4;
t3=t1-t3;
t4=t2-t4;
t2=data[i6]+t6;
data[i3]=t6-data[i6];
data[i8]=t2;
t2=data[i2]-t3;
data[i7]=-data[i2]-t3;
data[i4]=t2;
t1=data[i1]+t5;
data[i6]=data[i1]-t5;
data[i1]=t1;
t1=data[i5]+t4;
data[i5]-=t4;
data[i2]=t1;
}
id<<=1;
i=id-n2;
id<<=1;
} while(i<n);
}
}

//division with array length
for(i=0;i<n;i++) data[i]/=n;
}

/////////////////////////////////////////////////////////
// Sorensen in-place split-radix FFT for real values
// data: array of doubles:
// re(0),re(1),re(2),...,re(size-1)
//
// output:
// re(0),re(size/2),re(1),im(1),re(2),im(2),...,re(size/2-1),im(size/2-1)
// normalized by array length
//
// Source:
// Source: see the routines it calls ...

void realfft_split_unshuffled(double *data,long n){

double *data2;
long i,j;

realfft_split(data,n);
//unshuffling - not in-place
data2=(double *)malloc(n*sizeof(double));
j=n/2;
data2[0]=data[0];
data2[1]=data[j];
for(i=1;i<j;i++) {data2[i+i]=data[i];data2[i+i+1]=data[n-i];}
for(i=0;i<n;i++) data[i]=data2[i];
free(data2);
}

/////////////////////////////////////////////////////////
// Sorensen in-place inverse split-radix FFT for real values
// data: array of doubles:
// re(0),re(1),re(2),...,re(size/2),im(size/2-1),...,im(1)
//
// output:
// re(0),re(1),re(2),...,re(size-1)
// NOT normalized by array length
//
// Source:
// Sorensen et al: Real-Valued Fast Fourier Transform Algorithms,
// IEEE Trans. ASSP, ASSP-35, No. 6, June 1987

void irealfft_split(double *data,long n){

long i,j,k,i5,i6,i7,i8,i0,id,i1,i2,i3,i4,n2,n4,n8,n1;
double t1,t2,t3,t4,t5,a3,ss1,ss3,cc1,cc3,a,e,sqrt2;

sqrt2=sqrt(2.0);

n1=n-1;
n2=n<<1;
for(k=n;k>2;k>>=1){
id=n2;
n2>>=1;
n4=n2>>2;
n8=n2>>3;
e = 2*pi/(n2);
i1=0;
do{
for (; i1<n; i1+=id){
i2=i1+n4;
i3=i2+n4;
i4=i3+n4;
t1=data[i1]-data[i3];
data[i1]+=data[i3];
data[i2]*=2;
data[i3]=t1-2*data[i4];
data[i4]=t1+2*data[i4];
if (n4!=1){
i0=i1+n8;
i2+=n8;
i3+=n8;
i4+=n8;
t1=(data[i2]-data[i0])/sqrt2;
t2=(data[i4]+data[i3])/sqrt2;
data[i0]+=data[i2];
data[i2]=data[i4]-data[i3];
data[i3]=2*(-t2-t1);
data[i4]=2*(-t2+t1);
}
}
id<<=1;
i1=id-n2;
id<<=1;
} while ( i1<n1 );
a=e;
for (j=2; j<=n8; j++){
a3=3*a;
cc1=cos(a);
ss1=sin(a);
cc3=cos(a3);
ss3=sin(a3);
a=j*e;
i=0;
id=n2<<1;
do{
for (; i<n; i+=id){
i1=i+j-1;
i2=i1+n4;
i3=i2+n4;
i4=i3+n4;
i5=i+n4-j+1;
i6=i5+n4;
i7=i6+n4;
i8=i7+n4;
t1=data[i1]-data[i6];
data[i1]+=data[i6];
t2=data[i5]-data[i2];
data[i5]+=data[i2];
t3=data[i8]+data[i3];
data[i6]=data[i8]-data[i3];
t4=data[i4]+data[i7];
data[i2]=data[i4]-data[i7];
t5=t1-t4;
t1+=t4;
t4=t2-t3;
t2+=t3;
data[i3]=t5*cc1+t4*ss1;
data[i7]=-t4*cc1+t5*ss1;
data[i4]=t1*cc3-t2*ss3;
data[i8]=t2*cc3+t1*ss3;
}
id<<=1;
i=id-n2;
id<<=1;
} while(i<n1);
}
}

/*----------------------*/
i0=0;
id=4;
do{
for (; i0<n1; i0+=id){
i1=i0+1;
t1=data[i0];
data[i0]=t1+data[i1];
data[i1]=t1-data[i1];
}
id<<=1;
i0=id-2;
id<<=1;
} while ( i0<n1 );

/*----------------------*/

//data shuffling
for (i=0,j=0,n2=n/2; i<n1 ; i++){
if (i<j){
t1=data[j];
data[j]=data[i];
data[i]=t1;
}
k=n2;
while (k<=j){
j-=k;
k>>=1;
}
j+=k;
}
}

/////////////////////////////////////////////////////////
// Sorensen in-place radix-2 FFT for real values
// data: array of doubles:
// re(0),re(1),re(2),...,re(size-1)
//
// output:
// re(0),re(1),re(2),...,re(size/2),im(size/2-1),...,im(1)
// normalized by array length
//
// Source:
// Sorensen et al: Real-Valued Fast Fourier Transform Algorithms,
// IEEE Trans. ASSP, ASSP-35, No. 6, June 1987

double  xt,a,e, t1, t2, cc, ss;
long  i, j, k, n1, n2, n3, n4, i1, i2, i3, i4;

n4=n-1;
//data shuffling
for (i=0,j=0,n2=n/2; i<n4 ; i++){
if (i<j){
xt=data[j];
data[j]=data[i];
data[i]=xt;
}
k=n2;
while (k<=j){
j-=k;
k>>=1;
}
j+=k;
}

/* -------------------- */
for (i=0; i<n; i += 2)
{
xt = data[i];
data[i] = xt + data[i+1];
data[i+1] = xt - data[i+1];
}
/* ------------------------ */
n2 = 1;
for (k=n;k>2;k>>=1){
n4 = n2;
n2 = n4 << 1;
n1 = n2 << 1;
e = 2*pi/(n1);
for (i=0; i<n; i+=n1){
xt = data[i];
data[i] = xt + data[i+n2];
data[i+n2] = xt-data[i+n2];
data[i+n4+n2] = -data[i+n4+n2];
a = e;
n3=n4-1;
for (j = 1; j <=n3; j++){
i1 = i+j;
i2 = i - j + n2;
i3 = i1 + n2;
i4 = i - j + n1;
cc = cos(a);
ss = sin(a);
a += e;
t1 = data[i3] * cc + data[i4] * ss;
t2 = data[i3] * ss - data[i4] * cc;
data[i4] = data[i2] - t2;
data[i3] = -data[i2] - t2;
data[i2] = data[i1] - t1;
data[i1] += t1;
}
}
}

//division with array length
for(i=0;i<n;i++) data[i]/=n;
}

/////////////////////////////////////////////////////////
// Sorensen in-place split-radix FFT for real values
// data: array of doubles:
// re(0),re(1),re(2),...,re(size-1)
//
// output:
// re(0),re(size/2),re(1),im(1),re(2),im(2),...,re(size/2-1),im(size/2-1)
// normalized by array length
//
// Source:
// Source: see the routines it calls ...

double *data2;
long i,j;

//unshuffling - not in-place