From f8db720538e77db4a8c7660e98463a980fa0fac9 Mon Sep 17 00:00:00 2001
From: Jan at TU-Delft <J.W.Thorbecke@tudelft.nl>
Date: Fri, 15 Mar 2019 08:21:28 +0100
Subject: [PATCH] thread-safe MKL FFT's

---
 .gitignore         |  2 +-
 FFTlib/cc1fft.c    | 25 ++++++++++++++++---------
 FFTlib/ccmfft.c    | 26 ++++++++++++++++----------
 FFTlib/cr1fft.c    | 29 ++++++++++++++++++-----------
 FFTlib/crmfft.c    | 31 +++++++++++++++++++------------
 FFTlib/rc1fft.c    | 37 ++++++++++++++++++-------------------
 FFTlib/rcmfft.c    | 31 +++++++++++++++++++------------
 marchenko/Makefile | 28 ++++++++++++++++++++++++----
 8 files changed, 131 insertions(+), 78 deletions(-)

diff --git a/.gitignore b/.gitignore
index c3ba6fa..4c7346a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,7 @@ bin/*
 *.su
 *.bin
 */.DS*
-*/*/.DS*
+**/.DS*
 .DS*
 Make_include
 fdelmodc_cuda.tgz
diff --git a/FFTlib/cc1fft.c b/FFTlib/cc1fft.c
index 5367191..2fc4e63 100644
--- a/FFTlib/cc1fft.c
+++ b/FFTlib/cc1fft.c
@@ -61,9 +61,16 @@ void cc1fft(complex *data, int n, int sign)
     REAL scl;
 	complex *y;
 #elif defined(MKL)
-	static DFTI_DESCRIPTOR_HANDLE handle=0;
-    static int nprev=0;
+	static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS];
+	static int nprev[MAX_NUMTHREADS];
     MKL_LONG Status;
+	int id;
+#endif
+
+#ifdef _OPENMP
+	id = omp_get_thread_num();
+#else
+	id = 0;
 #endif
 
 #if defined(HAVE_LIBSCS)
@@ -99,26 +106,26 @@ void cc1fft(complex *data, int n, int sign)
     }
 	acmlcc1fft(sign, scl, inpl, n, data, 1, y, 1, work, &isys);
 #elif defined(MKL)
-    if (n != nprev) {
-        DftiFreeDescriptor(&handle);
+    if (n != nprev[id]) {
+        DftiFreeDescriptor(&handle[id]);
 
-        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n);
+        Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCreateDescriptor FAIL\n");
         }
-        Status = DftiCommitDescriptor(handle);
+        Status = DftiCommitDescriptor(handle[id]);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCommitDescriptor FAIL\n");
         }
-        nprev = n;
+        nprev[id] = n;
     }
 	if (sign < 0) {
-    	Status = DftiComputeBackward(handle, data);
+    	Status = DftiComputeBackward(handle[id], data);
 	}
 	else {
-    	Status = DftiComputeForward(handle, data);
+    	Status = DftiComputeForward(handle[id], data);
 	}
 #else
 	cc1_fft(data, n, sign);
diff --git a/FFTlib/ccmfft.c b/FFTlib/ccmfft.c
index cca95cb..fbbfde3 100644
--- a/FFTlib/ccmfft.c
+++ b/FFTlib/ccmfft.c
@@ -64,10 +64,16 @@ void ccmfft(complex *data, int n1, int n2, int ld1, int sign)
 	REAL scl;
 	complex *y;
 #elif defined(MKL)
-    static DFTI_DESCRIPTOR_HANDLE handle=0;
-    static int nprev=0;
+	static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS];
+	static int nprev[MAX_NUMTHREADS];
     MKL_LONG Status;
-	int j;
+	int j, id;
+#endif
+
+#ifdef _OPENMP
+	id = omp_get_thread_num();
+#else
+	id = 0;
 #endif
 
 #if defined(HAVE_LIBSCS)
@@ -99,29 +105,29 @@ void ccmfft(complex *data, int n1, int n2, int ld1, int sign)
 	}
 	acmlccmfft(sign, scl, inpl, n2, n1, data, 1, ld1, y, 1, ld1, work, &isys);
 #elif defined(MKL)
-    if (n1 != nprev) {
-        DftiFreeDescriptor(&handle);
+    if (n1 != nprev[id]) {
+        DftiFreeDescriptor(&handle[id]);
         
-        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n1);
+        Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n1);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCreateDescriptor FAIL\n");
         }
-        Status = DftiCommitDescriptor(handle);
+        Status = DftiCommitDescriptor(handle[id]);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCommitDescriptor FAIL\n");
         }
-        nprev = n1;
+        nprev[id] = n1;
     }
     if (sign < 0) {
     	for (j=0; j<n2; j++) {
-        	Status = DftiComputeBackward(handle, &data[j*ld1]);
+        	Status = DftiComputeBackward(handle[id], &data[j*ld1]);
 		}
     }
     else {
     	for (j=0; j<n2; j++) {
-        	Status = DftiComputeForward(handle, &data[j*ld1]);
+        	Status = DftiComputeForward(handle[id], &data[j*ld1]);
 		}
     }
 #else
diff --git a/FFTlib/cr1fft.c b/FFTlib/cr1fft.c
index 67b8f67..4831beb 100644
--- a/FFTlib/cr1fft.c
+++ b/FFTlib/cr1fft.c
@@ -57,13 +57,20 @@ void cr1fft(complex *cdata, REAL *rdata, int n, int sign)
 	static REAL *work, *table, scale=1.0;
 	REAL scl;
 #elif defined(MKL)
-	static DFTI_DESCRIPTOR_HANDLE handle=0;
-    static int nprev=0;
+	static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS];
+	static int nprev[MAX_NUMTHREADS];
 	REAL *tmp;
     MKL_LONG Status;
-	int i;
+	int i, id;
 #endif
 
+#ifdef _OPENMP
+	id = omp_get_thread_num();
+#else
+	id = 0;
+#endif
+
+
 #if defined(HAVE_LIBSCS)
 	if (n != nprev) {
 		isys   = 0;
@@ -98,34 +105,34 @@ void cr1fft(complex *cdata, REAL *rdata, int n, int sign)
 	}
 	acmlcrfft(one, n, rdata, work, &isys);
 #elif defined(MKL)
-    if (n != nprev) {
-        DftiFreeDescriptor(&handle);
+    if (n != nprev[id]) {
+        DftiFreeDescriptor(&handle[id]);
 
-        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n);
+        Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCreateDescriptor FAIL\n");
         }
-        Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
+        Status = DftiSetValue(handle[id], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiSetValue FAIL\n");
         }
 
-        Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
+        Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
         if (! DftiErrorClass(Status, DFTI_NO_ERROR)) {
             dfti_status_print(Status);
             printf(" DftiSetValue FAIL\n");
         }
-        Status = DftiCommitDescriptor(handle);
+        Status = DftiCommitDescriptor(handle[id]);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCommitDescriptor FAIL\n");
         }
-        nprev = n;
+        nprev[id] = n;
     }
 	tmp = (float *)malloc(n*sizeof(float));
-    Status = DftiComputeBackward(handle, (MKL_Complex8 *)cdata, tmp);
+    Status = DftiComputeBackward(handle[id], (MKL_Complex8 *)cdata, tmp);
     if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
         dfti_status_print(Status);
         printf(" DftiComputeBackward FAIL\n");
diff --git a/FFTlib/crmfft.c b/FFTlib/crmfft.c
index a9d4d64..e4dad54 100644
--- a/FFTlib/crmfft.c
+++ b/FFTlib/crmfft.c
@@ -71,13 +71,20 @@ void crmfft(complex *cdata, REAL *rdata, int n1, int n2, int ldc, int ldr, int s
     static REAL *work;
     REAL scl, *data;
 #elif defined(MKL)
-    static DFTI_DESCRIPTOR_HANDLE handle=0;
-    static int nprev=0;
+	static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS];
+	static int nprev[MAX_NUMTHREADS];
     REAL *tmp;
     MKL_LONG Status;
-	int i, j;
+	int i, j, id;
+#endif
+
+#ifdef _OPENMP
+	id = omp_get_thread_num();
+#else
+	id = 0;
 #endif
 
+
 #if defined(HAVE_LIBSCS)
 	nmp = mp_my_threadnum();
 	if(nmp>=MAX_NUMTHREADS) {
@@ -138,37 +145,37 @@ void crmfft(complex *cdata, REAL *rdata, int n1, int n2, int ldc, int ldr, int s
         memcpy(&rdata[j*ldr],&data[j*n1],n1*sizeof(REAL));
     }
 #elif defined(MKL)
-    if (n1 != nprev) {
-        DftiFreeDescriptor(&handle);
+    if (n1 != nprev[id]) {
+        DftiFreeDescriptor(&handle[id]);
 
-        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1);
+        Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCreateDescriptor FAIL\n");
         }
-        Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
+        Status = DftiSetValue(handle[id], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiSetValue FAIL\n");
         }
 
-        Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
+        Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
         //This options is what we would like, but is depreciated in the future
-        //Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_REAL);
+        //Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_REAL);
         if (! DftiErrorClass(Status, DFTI_NO_ERROR)) {
             dfti_status_print(Status);
             printf(" DftiSetValue FAIL\n");
         }
-        Status = DftiCommitDescriptor(handle);
+        Status = DftiCommitDescriptor(handle[id]);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCommitDescriptor FAIL\n");
         }
-        nprev = n1;
+        nprev[id] = n1;
     }
     tmp = (float *)malloc(n1*sizeof(float));
     for (j=0; j<n2; j++) {
-    	Status = DftiComputeBackward(handle, (MKL_Complex8 *)&cdata[j*ldc], tmp);
+    	Status = DftiComputeBackward(handle[id], (MKL_Complex8 *)&cdata[j*ldc], tmp);
     	rdata[j*ldr] = tmp[0];
 		if (sign < 0) {
     		for (i=1; i<n1; i++) rdata[j*ldr+i] = -sign*tmp[n1-i];
diff --git a/FFTlib/rc1fft.c b/FFTlib/rc1fft.c
index fa7386d..9968442 100644
--- a/FFTlib/rc1fft.c
+++ b/FFTlib/rc1fft.c
@@ -57,10 +57,16 @@ void rc1fft(REAL *rdata, complex *cdata, int n, int sign)
 	static REAL *work, *table, scale=1.0;
 	REAL scl, *data;
 #elif defined(MKL)
-	static DFTI_DESCRIPTOR_HANDLE handle=0;
-	static int nprev=0;
+	static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS];
+	static int nprev[MAX_NUMTHREADS];
     MKL_LONG Status;
-	int i;
+	int i, id;
+#endif
+
+#ifdef _OPENMP
+	id = omp_get_thread_num();
+#else
+	id = 0;
 #endif
 
 #if defined(HAVE_LIBSCS)
@@ -102,40 +108,33 @@ void rc1fft(REAL *rdata, complex *cdata, int n, int sign)
 	cdata[n/2].i=0.0;
 	free(data);
 #elif defined(MKL)
-	if (n != nprev) {
-		DftiFreeDescriptor(&handle);
+	if (n != nprev[id]) {
+
+		DftiFreeDescriptor(&handle[id]);
 
-		Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n);
+		Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n);
     	if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
         	dfti_status_print(Status);
         	printf(" DftiCreateDescriptor FAIL\n");
     	}
-		Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
+		Status = DftiSetValue(handle[id], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
 		if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
 			dfti_status_print(Status);
 			printf(" DftiSetValue FAIL\n");
 		}
-/*
-		Status = DftiSetValue(handle, DFTI_FORWARD_DOMAIN, DFTI_REAL);
-		if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
-			dfti_status_print(Status);
-			printf(" DftiSetValue FAIL\n");
-		}
-*/
-
-		Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
+		Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
 		if (! DftiErrorClass(Status, DFTI_NO_ERROR)) {
 			dfti_status_print(Status);
 			printf(" DftiSetValue FAIL\n");
 		}
-		Status = DftiCommitDescriptor(handle);
+		Status = DftiCommitDescriptor(handle[id]);
 		if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
 			dfti_status_print(Status);
 			printf(" DftiCommitDescriptor FAIL\n");
 		}
-		nprev = n;
+		nprev[id] = n;
 	}
-	Status = DftiComputeForward(handle, rdata, (MKL_Complex8 *)cdata);
+	Status = DftiComputeForward(handle[id], rdata, (MKL_Complex8 *)cdata);
 	if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
 		dfti_status_print(Status);
 		printf(" DftiComputeForward FAIL\n");
diff --git a/FFTlib/rcmfft.c b/FFTlib/rcmfft.c
index 5c492fa..574de30 100644
--- a/FFTlib/rcmfft.c
+++ b/FFTlib/rcmfft.c
@@ -69,12 +69,19 @@ void rcmfft(REAL *rdata, complex *cdata, int n1, int n2, int ldr, int ldc, int s
 	static REAL *work;
 	REAL scl, *data;
 #elif defined(MKL)
-    static DFTI_DESCRIPTOR_HANDLE handle=0;
-    static int nprev=0;
+	static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS];
+	static int nprev[MAX_NUMTHREADS];
     MKL_LONG Status;
-	int i,j;
+	int i, j, id;
 #endif
 
+#ifdef _OPENMP
+	id = omp_get_thread_num();
+#else
+	id = 0;
+#endif
+
+
 #if defined(HAVE_LIBSCS)
 	nmp = mp_my_threadnum();
 	if(nmp>=MAX_NUMTHREADS) {
@@ -130,39 +137,39 @@ void rcmfft(REAL *rdata, complex *cdata, int n1, int n2, int ldr, int ldc, int s
 	}
 	free(data);
 #elif defined(MKL)
-    if (n1 != nprev) {
-        DftiFreeDescriptor(&handle);
+    if (n1 != nprev[id]) {
+        DftiFreeDescriptor(&handle[id]);
         
-        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1);
+        Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCreateDescriptor FAIL\n");
         }
-        Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
+        Status = DftiSetValue(handle[id], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiSetValue FAIL\n");
         }
         
-        Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
+        Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
         if (! DftiErrorClass(Status, DFTI_NO_ERROR)) {
             dfti_status_print(Status);
             printf(" DftiSetValue FAIL\n");
         }
-        Status = DftiCommitDescriptor(handle);
+        Status = DftiCommitDescriptor(handle[id]);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCommitDescriptor FAIL\n");
         }
-        nprev = n1;
+        nprev[id] = n1;
     }
-    Status = DftiComputeForward(handle, rdata, (MKL_Complex8 *)cdata);
+    Status = DftiComputeForward(handle[id], rdata, (MKL_Complex8 *)cdata);
     if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
         dfti_status_print(Status);
         printf(" DftiComputeForward FAIL\n");
     }
 	for (j=0; j<n2; j++) {
-    	Status = DftiComputeForward(handle, &rdata[j*ldr], (MKL_Complex8 *)&cdata[j*ldc]);
+    	Status = DftiComputeForward(handle[id], &rdata[j*ldr], (MKL_Complex8 *)&cdata[j*ldc]);
     	for (i=1; i<((n1-1)/2)+1; i++) {
         	cdata[j*ldc+i].i *= -sign;
     	}
diff --git a/marchenko/Makefile b/marchenko/Makefile
index 260e2c0..20354ed 100644
--- a/marchenko/Makefile
+++ b/marchenko/Makefile
@@ -7,7 +7,7 @@ include ../Make_include
 
 #ALL: fmute marchenko marchenko2
 
-ALL: fmute marchenko 
+ALL: fmute marchenko marchenko_primaries
 
 SRCJ	= fmute.c \
 		getFileInfo.c  \
@@ -35,6 +35,20 @@ SRCH	= marchenko.c \
 		docpkge.c \
 		getpars.c
 
+SRCP	= marchenko_primaries.c \
+		getFileInfo.c  \
+		readData.c \
+		readShotData.c \
+		readTinvData.c \
+		writeData.c \
+		writeDataIter.c \
+		wallclock_time.c \
+		name_ext.c  \
+		verbosepkg.c  \
+		atopkge.c \
+		docpkge.c \
+		getpars.c
+
 OBJJ	= $(SRCJ:%.c=%.o)
 
 fmute:	$(OBJJ) 
@@ -45,22 +59,28 @@ OBJH	= $(SRCH:%.c=%.o)
 marchenko:	$(OBJH) 
 	$(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o marchenko $(OBJH) $(LIBS)
 
+OBJP	= $(SRCP:%.c=%.o)
+
+marchenko_primaries:	$(OBJP) 
+	$(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o marchenko_primaries $(OBJP) $(LIBS)
+
 OBJH2	= $(SRCH2:%.c=%.o)
 
 marchenko2:	$(OBJH2) 
 	$(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o marchenko2 $(OBJH2) $(LIBS)
 
-install: fmute marchenko 
+install: fmute marchenko marchenko_primaries
 	cp fmute $B
 	cp marchenko $B
+	cp marchenko_primaries $B
 
 #	cp marchenko2 $B
 
 clean:
-		rm -f core fmute $(OBJJ) marchenko $(OBJH) marchenko2 $(OBJH2)
+		rm -f core fmute $(OBJJ) marchenko $(OBJH) marchenko2 $(OBJH2) marchenko_primaries $(OBJP)
 
 realclean: clean
-		rm -f $B/fmute $B/marchenko $B/marchenko2
+		rm -f $B/fmute $B/marchenko $B/marchenko2 $B/marchenko_primaries
 
 
 
-- 
GitLab