From f8db720538e77db4a8c7660e98463a980fa0fac9 Mon Sep 17 00:00:00 2001 From: Jan at TU-Delft <J.W.Thorbecke@tudelft.nl> Date: Fri, 15 Mar 2019 08:21:28 +0100 Subject: [PATCH] thread-safe MKL FFT's --- .gitignore | 2 +- FFTlib/cc1fft.c | 25 ++++++++++++++++--------- FFTlib/ccmfft.c | 26 ++++++++++++++++---------- FFTlib/cr1fft.c | 29 ++++++++++++++++++----------- FFTlib/crmfft.c | 31 +++++++++++++++++++------------ FFTlib/rc1fft.c | 37 ++++++++++++++++++------------------- FFTlib/rcmfft.c | 31 +++++++++++++++++++------------ marchenko/Makefile | 28 ++++++++++++++++++++++++---- 8 files changed, 131 insertions(+), 78 deletions(-) diff --git a/.gitignore b/.gitignore index c3ba6fa..4c7346a 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ bin/* *.su *.bin */.DS* -*/*/.DS* +**/.DS* .DS* Make_include fdelmodc_cuda.tgz diff --git a/FFTlib/cc1fft.c b/FFTlib/cc1fft.c index 5367191..2fc4e63 100644 --- a/FFTlib/cc1fft.c +++ b/FFTlib/cc1fft.c @@ -61,9 +61,16 @@ void cc1fft(complex *data, int n, int sign) REAL scl; complex *y; #elif defined(MKL) - static DFTI_DESCRIPTOR_HANDLE handle=0; - static int nprev=0; + static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS]; + static int nprev[MAX_NUMTHREADS]; MKL_LONG Status; + int id; +#endif + +#ifdef _OPENMP + id = omp_get_thread_num(); +#else + id = 0; #endif #if defined(HAVE_LIBSCS) @@ -99,26 +106,26 @@ void cc1fft(complex *data, int n, int sign) } acmlcc1fft(sign, scl, inpl, n, data, 1, y, 1, work, &isys); #elif defined(MKL) - if (n != nprev) { - DftiFreeDescriptor(&handle); + if (n != nprev[id]) { + DftiFreeDescriptor(&handle[id]); - Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n); + Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCreateDescriptor FAIL\n"); } - Status = DftiCommitDescriptor(handle); + Status = DftiCommitDescriptor(handle[id]); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCommitDescriptor FAIL\n"); } - nprev = n; + nprev[id] = n; } if (sign < 0) { - Status = DftiComputeBackward(handle, data); + Status = DftiComputeBackward(handle[id], data); } else { - Status = DftiComputeForward(handle, data); + Status = DftiComputeForward(handle[id], data); } #else cc1_fft(data, n, sign); diff --git a/FFTlib/ccmfft.c b/FFTlib/ccmfft.c index cca95cb..fbbfde3 100644 --- a/FFTlib/ccmfft.c +++ b/FFTlib/ccmfft.c @@ -64,10 +64,16 @@ void ccmfft(complex *data, int n1, int n2, int ld1, int sign) REAL scl; complex *y; #elif defined(MKL) - static DFTI_DESCRIPTOR_HANDLE handle=0; - static int nprev=0; + static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS]; + static int nprev[MAX_NUMTHREADS]; MKL_LONG Status; - int j; + int j, id; +#endif + +#ifdef _OPENMP + id = omp_get_thread_num(); +#else + id = 0; #endif #if defined(HAVE_LIBSCS) @@ -99,29 +105,29 @@ void ccmfft(complex *data, int n1, int n2, int ld1, int sign) } acmlccmfft(sign, scl, inpl, n2, n1, data, 1, ld1, y, 1, ld1, work, &isys); #elif defined(MKL) - if (n1 != nprev) { - DftiFreeDescriptor(&handle); + if (n1 != nprev[id]) { + DftiFreeDescriptor(&handle[id]); - Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n1); + Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n1); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCreateDescriptor FAIL\n"); } - Status = DftiCommitDescriptor(handle); + Status = DftiCommitDescriptor(handle[id]); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCommitDescriptor FAIL\n"); } - nprev = n1; + nprev[id] = n1; } if (sign < 0) { for (j=0; j<n2; j++) { - Status = DftiComputeBackward(handle, &data[j*ld1]); + Status = DftiComputeBackward(handle[id], &data[j*ld1]); } } else { for (j=0; j<n2; j++) { - Status = DftiComputeForward(handle, &data[j*ld1]); + Status = DftiComputeForward(handle[id], &data[j*ld1]); } } #else diff --git a/FFTlib/cr1fft.c b/FFTlib/cr1fft.c index 67b8f67..4831beb 100644 --- a/FFTlib/cr1fft.c +++ b/FFTlib/cr1fft.c @@ -57,13 +57,20 @@ void cr1fft(complex *cdata, REAL *rdata, int n, int sign) static REAL *work, *table, scale=1.0; REAL scl; #elif defined(MKL) - static DFTI_DESCRIPTOR_HANDLE handle=0; - static int nprev=0; + static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS]; + static int nprev[MAX_NUMTHREADS]; REAL *tmp; MKL_LONG Status; - int i; + int i, id; #endif +#ifdef _OPENMP + id = omp_get_thread_num(); +#else + id = 0; +#endif + + #if defined(HAVE_LIBSCS) if (n != nprev) { isys = 0; @@ -98,34 +105,34 @@ void cr1fft(complex *cdata, REAL *rdata, int n, int sign) } acmlcrfft(one, n, rdata, work, &isys); #elif defined(MKL) - if (n != nprev) { - DftiFreeDescriptor(&handle); + if (n != nprev[id]) { + DftiFreeDescriptor(&handle[id]); - Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n); + Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCreateDescriptor FAIL\n"); } - Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE); + Status = DftiSetValue(handle[id], DFTI_PLACEMENT, DFTI_NOT_INPLACE); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiSetValue FAIL\n"); } - Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX); + Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX); if (! DftiErrorClass(Status, DFTI_NO_ERROR)) { dfti_status_print(Status); printf(" DftiSetValue FAIL\n"); } - Status = DftiCommitDescriptor(handle); + Status = DftiCommitDescriptor(handle[id]); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCommitDescriptor FAIL\n"); } - nprev = n; + nprev[id] = n; } tmp = (float *)malloc(n*sizeof(float)); - Status = DftiComputeBackward(handle, (MKL_Complex8 *)cdata, tmp); + Status = DftiComputeBackward(handle[id], (MKL_Complex8 *)cdata, tmp); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiComputeBackward FAIL\n"); diff --git a/FFTlib/crmfft.c b/FFTlib/crmfft.c index a9d4d64..e4dad54 100644 --- a/FFTlib/crmfft.c +++ b/FFTlib/crmfft.c @@ -71,13 +71,20 @@ void crmfft(complex *cdata, REAL *rdata, int n1, int n2, int ldc, int ldr, int s static REAL *work; REAL scl, *data; #elif defined(MKL) - static DFTI_DESCRIPTOR_HANDLE handle=0; - static int nprev=0; + static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS]; + static int nprev[MAX_NUMTHREADS]; REAL *tmp; MKL_LONG Status; - int i, j; + int i, j, id; +#endif + +#ifdef _OPENMP + id = omp_get_thread_num(); +#else + id = 0; #endif + #if defined(HAVE_LIBSCS) nmp = mp_my_threadnum(); if(nmp>=MAX_NUMTHREADS) { @@ -138,37 +145,37 @@ void crmfft(complex *cdata, REAL *rdata, int n1, int n2, int ldc, int ldr, int s memcpy(&rdata[j*ldr],&data[j*n1],n1*sizeof(REAL)); } #elif defined(MKL) - if (n1 != nprev) { - DftiFreeDescriptor(&handle); + if (n1 != nprev[id]) { + DftiFreeDescriptor(&handle[id]); - Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1); + Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCreateDescriptor FAIL\n"); } - Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE); + Status = DftiSetValue(handle[id], DFTI_PLACEMENT, DFTI_NOT_INPLACE); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiSetValue FAIL\n"); } - Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX); + Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX); //This options is what we would like, but is depreciated in the future - //Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_REAL); + //Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_REAL); if (! DftiErrorClass(Status, DFTI_NO_ERROR)) { dfti_status_print(Status); printf(" DftiSetValue FAIL\n"); } - Status = DftiCommitDescriptor(handle); + Status = DftiCommitDescriptor(handle[id]); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCommitDescriptor FAIL\n"); } - nprev = n1; + nprev[id] = n1; } tmp = (float *)malloc(n1*sizeof(float)); for (j=0; j<n2; j++) { - Status = DftiComputeBackward(handle, (MKL_Complex8 *)&cdata[j*ldc], tmp); + Status = DftiComputeBackward(handle[id], (MKL_Complex8 *)&cdata[j*ldc], tmp); rdata[j*ldr] = tmp[0]; if (sign < 0) { for (i=1; i<n1; i++) rdata[j*ldr+i] = -sign*tmp[n1-i]; diff --git a/FFTlib/rc1fft.c b/FFTlib/rc1fft.c index fa7386d..9968442 100644 --- a/FFTlib/rc1fft.c +++ b/FFTlib/rc1fft.c @@ -57,10 +57,16 @@ void rc1fft(REAL *rdata, complex *cdata, int n, int sign) static REAL *work, *table, scale=1.0; REAL scl, *data; #elif defined(MKL) - static DFTI_DESCRIPTOR_HANDLE handle=0; - static int nprev=0; + static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS]; + static int nprev[MAX_NUMTHREADS]; MKL_LONG Status; - int i; + int i, id; +#endif + +#ifdef _OPENMP + id = omp_get_thread_num(); +#else + id = 0; #endif #if defined(HAVE_LIBSCS) @@ -102,40 +108,33 @@ void rc1fft(REAL *rdata, complex *cdata, int n, int sign) cdata[n/2].i=0.0; free(data); #elif defined(MKL) - if (n != nprev) { - DftiFreeDescriptor(&handle); + if (n != nprev[id]) { + + DftiFreeDescriptor(&handle[id]); - Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n); + Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCreateDescriptor FAIL\n"); } - Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE); + Status = DftiSetValue(handle[id], DFTI_PLACEMENT, DFTI_NOT_INPLACE); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiSetValue FAIL\n"); } -/* - Status = DftiSetValue(handle, DFTI_FORWARD_DOMAIN, DFTI_REAL); - if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ - dfti_status_print(Status); - printf(" DftiSetValue FAIL\n"); - } -*/ - - Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX); + Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX); if (! DftiErrorClass(Status, DFTI_NO_ERROR)) { dfti_status_print(Status); printf(" DftiSetValue FAIL\n"); } - Status = DftiCommitDescriptor(handle); + Status = DftiCommitDescriptor(handle[id]); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCommitDescriptor FAIL\n"); } - nprev = n; + nprev[id] = n; } - Status = DftiComputeForward(handle, rdata, (MKL_Complex8 *)cdata); + Status = DftiComputeForward(handle[id], rdata, (MKL_Complex8 *)cdata); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiComputeForward FAIL\n"); diff --git a/FFTlib/rcmfft.c b/FFTlib/rcmfft.c index 5c492fa..574de30 100644 --- a/FFTlib/rcmfft.c +++ b/FFTlib/rcmfft.c @@ -69,12 +69,19 @@ void rcmfft(REAL *rdata, complex *cdata, int n1, int n2, int ldr, int ldc, int s static REAL *work; REAL scl, *data; #elif defined(MKL) - static DFTI_DESCRIPTOR_HANDLE handle=0; - static int nprev=0; + static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS]; + static int nprev[MAX_NUMTHREADS]; MKL_LONG Status; - int i,j; + int i, j, id; #endif +#ifdef _OPENMP + id = omp_get_thread_num(); +#else + id = 0; +#endif + + #if defined(HAVE_LIBSCS) nmp = mp_my_threadnum(); if(nmp>=MAX_NUMTHREADS) { @@ -130,39 +137,39 @@ void rcmfft(REAL *rdata, complex *cdata, int n1, int n2, int ldr, int ldc, int s } free(data); #elif defined(MKL) - if (n1 != nprev) { - DftiFreeDescriptor(&handle); + if (n1 != nprev[id]) { + DftiFreeDescriptor(&handle[id]); - Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1); + Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCreateDescriptor FAIL\n"); } - Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE); + Status = DftiSetValue(handle[id], DFTI_PLACEMENT, DFTI_NOT_INPLACE); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiSetValue FAIL\n"); } - Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX); + Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX); if (! DftiErrorClass(Status, DFTI_NO_ERROR)) { dfti_status_print(Status); printf(" DftiSetValue FAIL\n"); } - Status = DftiCommitDescriptor(handle); + Status = DftiCommitDescriptor(handle[id]); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCommitDescriptor FAIL\n"); } - nprev = n1; + nprev[id] = n1; } - Status = DftiComputeForward(handle, rdata, (MKL_Complex8 *)cdata); + Status = DftiComputeForward(handle[id], rdata, (MKL_Complex8 *)cdata); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiComputeForward FAIL\n"); } for (j=0; j<n2; j++) { - Status = DftiComputeForward(handle, &rdata[j*ldr], (MKL_Complex8 *)&cdata[j*ldc]); + Status = DftiComputeForward(handle[id], &rdata[j*ldr], (MKL_Complex8 *)&cdata[j*ldc]); for (i=1; i<((n1-1)/2)+1; i++) { cdata[j*ldc+i].i *= -sign; } diff --git a/marchenko/Makefile b/marchenko/Makefile index 260e2c0..20354ed 100644 --- a/marchenko/Makefile +++ b/marchenko/Makefile @@ -7,7 +7,7 @@ include ../Make_include #ALL: fmute marchenko marchenko2 -ALL: fmute marchenko +ALL: fmute marchenko marchenko_primaries SRCJ = fmute.c \ getFileInfo.c \ @@ -35,6 +35,20 @@ SRCH = marchenko.c \ docpkge.c \ getpars.c +SRCP = marchenko_primaries.c \ + getFileInfo.c \ + readData.c \ + readShotData.c \ + readTinvData.c \ + writeData.c \ + writeDataIter.c \ + wallclock_time.c \ + name_ext.c \ + verbosepkg.c \ + atopkge.c \ + docpkge.c \ + getpars.c + OBJJ = $(SRCJ:%.c=%.o) fmute: $(OBJJ) @@ -45,22 +59,28 @@ OBJH = $(SRCH:%.c=%.o) marchenko: $(OBJH) $(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o marchenko $(OBJH) $(LIBS) +OBJP = $(SRCP:%.c=%.o) + +marchenko_primaries: $(OBJP) + $(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o marchenko_primaries $(OBJP) $(LIBS) + OBJH2 = $(SRCH2:%.c=%.o) marchenko2: $(OBJH2) $(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o marchenko2 $(OBJH2) $(LIBS) -install: fmute marchenko +install: fmute marchenko marchenko_primaries cp fmute $B cp marchenko $B + cp marchenko_primaries $B # cp marchenko2 $B clean: - rm -f core fmute $(OBJJ) marchenko $(OBJH) marchenko2 $(OBJH2) + rm -f core fmute $(OBJJ) marchenko $(OBJH) marchenko2 $(OBJH2) marchenko_primaries $(OBJP) realclean: clean - rm -f $B/fmute $B/marchenko $B/marchenko2 + rm -f $B/fmute $B/marchenko $B/marchenko2 $B/marchenko_primaries -- GitLab