-
Jan Thorbecke authoredJan Thorbecke authored
rc1fft.c.omp 3.54 KiB
#include "genfft.h"
#include <string.h>
/**
* NAME: rc1fft
*
* DESCRIPTION: real to complex FFT
*
* USAGE:
* void rc1fft(REAL *rdata, complex *cdata, int n, int sign)
*
* INPUT: - *rdata: real 1D input vector
* - n: number of (real) samples in input vector data rdata
* - sign: sign of the Fourier kernel
*
* OUTPUT: - *cdata: complex 1D output vector unscaled
*
* Notice in the preceding formula that there are n real input values,
* and n/2 + 1 complex output values. This property is characteristic of
* real-to-complex FFTs.
*
* NOTES: Optimized system dependent FFT's implemented for:
* - SGI/CRAY ORIGIN 2000 (scsl)
* - AMD ACML 4.4.0
* - inplace FFT from Mayer and SU (see file fft_mayer.c)
*
* AUTHOR:
* Jan Thorbecke (janth@xs4all.nl)
* The Netherlands
*
*----------------------------------------------------------------------
* REVISION HISTORY:
* VERSION AUTHOR DATE COMMENT
* 1.0 Jan Thorbecke Feb '94 Initial version (TU Delft)
* 1.1 Jan Thorbecke June '94 faster in-place FFT
* 2.0 Jan Thorbecke July '97 added Cray SGI calls
*
----------------------------------------------------------------------*/
#if defined(ACML440)
#if defined(DOUBLE)
#define acmlrcfft dzfft
#else
#define acmlrcfft scfft
#endif
#endif
void rc1fft(REAL *rdata, complex *cdata, int n, int sign)
{
#if defined(HAVE_LIBSCS) || defined(ACML440)
static int *nprev;
int ntable, nwork, zero=0, one=1, i;
static int *isys;
static REAL *work, *table, scale=1.0;
REAL scl, *data;
int max_threads, id;
#endif
#if defined(OPENMP)
max_threads = omp_get_max_threads();
id = omp_get_thread_num();
#else
max_threads = 1;
id = 0;
#endif
nprev = (int *)calloc(max_threads, sizeof(int));
isys = (int *)calloc(max_threads, sizeof(int));
#if defined(HAVE_LIBSCS)
if (n != nprev[id]) {
isys[id] = 0;
ntable = n + 15;
nwork = n + 2;
if (work) free(work);
work = (REAL *)malloc(max_threads*nwork*sizeof(REAL));
if (work == NULL) fprintf(stderr,"rc1fft: memory allocation error\n");
if (table) free(table);
table = (float *)malloc(ntable*sizeof(float));
if (table == NULL) fprintf(stderr,"rc1fft: memory allocation error\n");
scfft_(&zero, &n, &scale, rdata, cdata, table, &work[id*nwork], &isys[id]);
nprev[id] = n;
}
scfft_(&sign, &n, &scale, rdata, cdata, table, &work[id*nwork], &isys[id]);
#elif defined(ACML440)
data = (REAL *)malloc(n*sizeof(REAL));
memcpy(data, rdata, n*sizeof(float));
if (n != nprev[id]) {
isys[id] = 0;
nwork = 3*n + 100;
if (work) free(work);
work = (REAL *)malloc(max_threads*nwork*sizeof(REAL));
if (work == NULL) fprintf(stderr,"rc1fft: memory allocation error\n");
acmlrcfft(zero, n, data, &work[id*nwork], &isys[id]);
nprev[id] = n;
}
acmlrcfft(one, n, data, &work[id*nwork], &isys[id]);
scl = sqrt(n);
for (i=0; i<n/2+1;i++) {
cdata[i].r=scl*data[i];
}
cdata[0].i=0.0;
for (i=1; i<((n-1)/2)+1; i++) {
cdata[i].i=-sign*scl*data[n-i];
}
cdata[n/2].i=0.0;
free(data);
#else
rc1_fft(rdata, cdata, n, sign);
#endif
return;
}
/****************** NO COMPLEX DEFINED ******************/
void Rrc1fft(REAL *rdata, REAL *cdata, int n, int sign)
{
rc1fft(rdata, (complex *)cdata, n , sign);
return;
}
/****************** FORTRAN SHELL *****************/
#ifdef DF_CAPFNAMES
#define nrc1fft FNAME(RC1FFTF)
#else
#define nrc1fft FNAME(rc1fftf)
#endif
void nrc1fft(REAL *rdata, complex *cdata, int *n, int *sign)
{
rc1fft(rdata, cdata, *n, *sign);
return;
}