diff --git a/.gitignore b/.gitignore
index a0caee250f5bbd368528285a1171eea3b070906c..c3ba6facbcab7a47d2251f83fbd6d0672fd6eb5a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 *.[oa]
+*.eps
 bin/*
 *.su
 *.bin
@@ -20,6 +21,7 @@ utils/makemod
 utils/makewave
 utils/mat2su
 utils/syn2d
+utils/green3D
 fdelmodc/fdelmodc
 include/genfft.h
 Make_include
@@ -29,3 +31,18 @@ marchenko_full/fmute
 marchenko_full/marchenko
 corrvir/corrvir
 fdemmodc/fdemmodc
+FFTlib/test/cc1test
+FFTlib/test/cc2dtest
+FFTlib/test/ccmtest
+FFTlib/test/rc1test
+FFTlib/test/rc2dtest
+FFTlib/test/rcmtest
+marchenko_applications/HomG
+marchenko_applications/MuteSnap
+marchenko_applications/combine
+marchenko_applications/gmshift
+marchenko_applications/iba
+marchenko_applications/marchenko_app
+marchenko_applications/reshape_su
+raytime3d/raytime3d
+MDD/mdd
diff --git a/FFTlib/cc1fft.c b/FFTlib/cc1fft.c
index d4658464b5aff9d2af055655c411293f4c768ab9..5367191b0f9c0a0ea49460e9ace61f63520ee321 100644
--- a/FFTlib/cc1fft.c
+++ b/FFTlib/cc1fft.c
@@ -1,4 +1,8 @@
 #include "genfft.h"
+#ifdef MKL
+#include "mkl_dfti.h"
+void dfti_status_print(MKL_LONG status);
+#endif
 
 /**
 *   NAME:     cc1fft
@@ -56,6 +60,10 @@ void cc1fft(complex *data, int n, int sign)
     static complex *work;
     REAL scl;
 	complex *y;
+#elif defined(MKL)
+	static DFTI_DESCRIPTOR_HANDLE handle=0;
+    static int nprev=0;
+    MKL_LONG Status;
 #endif
 
 #if defined(HAVE_LIBSCS)
@@ -90,6 +98,28 @@ void cc1fft(complex *data, int n, int sign)
 		nprev = n;
     }
 	acmlcc1fft(sign, scl, inpl, n, data, 1, y, 1, work, &isys);
+#elif defined(MKL)
+    if (n != nprev) {
+        DftiFreeDescriptor(&handle);
+
+        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiCreateDescriptor FAIL\n");
+        }
+        Status = DftiCommitDescriptor(handle);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiCommitDescriptor FAIL\n");
+        }
+        nprev = n;
+    }
+	if (sign < 0) {
+    	Status = DftiComputeBackward(handle, data);
+	}
+	else {
+    	Status = DftiComputeForward(handle, data);
+	}
 #else
 	cc1_fft(data, n, sign);
 #endif
diff --git a/FFTlib/cc2dfft.c b/FFTlib/cc2dfft.c
index d65fc26d6ca5a3d720c68df39cf28ad632b985dd..8ccd2775793d9f5e3c2d060a81994f6560bd8040 100644
--- a/FFTlib/cc2dfft.c
+++ b/FFTlib/cc2dfft.c
@@ -1,4 +1,8 @@
 #include "genfft.h"
+#ifdef MKL
+#include "mkl_dfti.h"
+void dfti_status_print(MKL_LONG status);
+#endif
 
 /**
 *   NAME:     cc2dfft
@@ -60,6 +64,10 @@ void cc2dfft(complex *data, int nx, int ny, int ldx, int sign)
 	REAL scl;
 	complex *y;
 	complex *tmp;
+#elif defined(MKL)
+    static DFTI_DESCRIPTOR_HANDLE handle=0;
+    static int nxprev=0, nyprev=0;
+    MKL_LONG Status, N[2];
 #else
 	int i, j;
 	complex *tmp;
@@ -110,6 +118,30 @@ void cc2dfft(complex *data, int nx, int ny, int ldx, int sign)
 	else {
 		cc1fft(data, nx, sign);
 	}
+#elif defined(MKL)
+	if (nx != nxprev || ny != nyprev) {
+        DftiFreeDescriptor(&handle);
+          
+		N[0] = nx; N[1] = ny;
+        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_COMPLEX, 2, N);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiCreateDescriptor FAIL\n");
+        } 
+        Status = DftiCommitDescriptor(handle);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiCommitDescriptor FAIL\n");
+        }
+        nxprev = nx;
+        nyprev = ny;
+    }
+    if (sign < 0) {
+    	Status = DftiComputeBackward(handle, data);
+    }
+    else {
+    	Status = DftiComputeForward(handle, data);
+    }   
 #else 
 	if (ny != 1) {
 		ccmfft(data, nx, ny, ldx, sign);
diff --git a/FFTlib/ccmfft.c b/FFTlib/ccmfft.c
index 9c8623506304cb4bb9bb02858e1bdf52adb2717a..cca95cba96ed969a67eaa42f58ab1b359063abc8 100644
--- a/FFTlib/ccmfft.c
+++ b/FFTlib/ccmfft.c
@@ -1,4 +1,8 @@
 #include "genfft.h"
+#ifdef MKL
+#include "mkl_dfti.h"
+void dfti_status_print(MKL_LONG status);
+#endif
 
 /**
 *   NAME:     ccmfft
@@ -59,6 +63,11 @@ void ccmfft(complex *data, int n1, int n2, int ld1, int sign)
 	static complex *work;
 	REAL scl;
 	complex *y;
+#elif defined(MKL)
+    static DFTI_DESCRIPTOR_HANDLE handle=0;
+    static int nprev=0;
+    MKL_LONG Status;
+	int j;
 #endif
 
 #if defined(HAVE_LIBSCS)
@@ -89,6 +98,32 @@ void ccmfft(complex *data, int n1, int n2, int ld1, int sign)
 		nprev = n1;
 	}
 	acmlccmfft(sign, scl, inpl, n2, n1, data, 1, ld1, y, 1, ld1, work, &isys);
+#elif defined(MKL)
+    if (n1 != nprev) {
+        DftiFreeDescriptor(&handle);
+        
+        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n1);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiCreateDescriptor FAIL\n");
+        }
+        Status = DftiCommitDescriptor(handle);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiCommitDescriptor FAIL\n");
+        }
+        nprev = n1;
+    }
+    if (sign < 0) {
+    	for (j=0; j<n2; j++) {
+        	Status = DftiComputeBackward(handle, &data[j*ld1]);
+		}
+    }
+    else {
+    	for (j=0; j<n2; j++) {
+        	Status = DftiComputeForward(handle, &data[j*ld1]);
+		}
+    }
 #else
 	ccm_fft(data, n1, n2, ld1, sign);
 #endif
diff --git a/FFTlib/cr1fft.c b/FFTlib/cr1fft.c
index 9aebffad8c00c1ff4d481005b5def42776edb5b8..67b8f67839bb4098e83c6224ed5603383a9c1b40 100644
--- a/FFTlib/cr1fft.c
+++ b/FFTlib/cr1fft.c
@@ -1,4 +1,8 @@
 #include "genfft.h"
+#ifdef MKL
+#include "mkl_dfti.h"
+void dfti_status_print(MKL_LONG status);
+#endif
 
 /**
 *   NAME:     cr1fft
@@ -52,10 +56,12 @@ void cr1fft(complex *cdata, REAL *rdata, int n, int sign)
 	static int isys;
 	static REAL *work, *table, scale=1.0;
 	REAL scl;
-#elif defined(FFTW3)
-	static int nprev=0;
-	int   iopt, ier;
-	static float *work;
+#elif defined(MKL)
+	static DFTI_DESCRIPTOR_HANDLE handle=0;
+    static int nprev=0;
+	REAL *tmp;
+    MKL_LONG Status;
+	int i;
 #endif
 
 #if defined(HAVE_LIBSCS)
@@ -91,6 +97,47 @@ void cr1fft(complex *cdata, REAL *rdata, int n, int sign)
 		nprev = n;
 	}
 	acmlcrfft(one, n, rdata, work, &isys);
+#elif defined(MKL)
+    if (n != nprev) {
+        DftiFreeDescriptor(&handle);
+
+        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiCreateDescriptor FAIL\n");
+        }
+        Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiSetValue FAIL\n");
+        }
+
+        Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
+        if (! DftiErrorClass(Status, DFTI_NO_ERROR)) {
+            dfti_status_print(Status);
+            printf(" DftiSetValue FAIL\n");
+        }
+        Status = DftiCommitDescriptor(handle);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiCommitDescriptor FAIL\n");
+        }
+        nprev = n;
+    }
+	tmp = (float *)malloc(n*sizeof(float));
+    Status = DftiComputeBackward(handle, (MKL_Complex8 *)cdata, tmp);
+    if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+        dfti_status_print(Status);
+        printf(" DftiComputeBackward FAIL\n");
+    }
+    rdata[0] = tmp[0];
+	if (sign < 0) {
+    	for (i=1; i<n; i++) rdata[i] = -sign*tmp[n-i];
+	}
+	else {
+    	for (i=1; i<n; i++) rdata[i] = tmp[i];
+	}
+	free(tmp);
 #else
 	cr1_fft(cdata, rdata, n, sign);
 #endif
diff --git a/FFTlib/crmfft.c b/FFTlib/crmfft.c
index b1ca8904ca966a6d40ff29020e4b10d687ae103a..a9d4d64ea3d9535cc937104f16222da65adf81e8 100644
--- a/FFTlib/crmfft.c
+++ b/FFTlib/crmfft.c
@@ -1,5 +1,9 @@
 #include "genfft.h"
 #include <string.h>
+#ifdef MKL
+#include "mkl_dfti.h"
+void dfti_status_print(MKL_LONG status);
+#endif
 
 /**
 *   NAME:     crmfft
@@ -66,6 +70,12 @@ void crmfft(complex *cdata, REAL *rdata, int n1, int n2, int ldc, int ldr, int s
     static int isys;
     static REAL *work;
     REAL scl, *data;
+#elif defined(MKL)
+    static DFTI_DESCRIPTOR_HANDLE handle=0;
+    static int nprev=0;
+    REAL *tmp;
+    MKL_LONG Status;
+	int i, j;
 #endif
 
 #if defined(HAVE_LIBSCS)
@@ -127,6 +137,51 @@ void crmfft(complex *cdata, REAL *rdata, int n1, int n2, int ldc, int ldr, int s
     for (j=0; j<n2; j++) {
         memcpy(&rdata[j*ldr],&data[j*n1],n1*sizeof(REAL));
     }
+#elif defined(MKL)
+    if (n1 != nprev) {
+        DftiFreeDescriptor(&handle);
+
+        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiCreateDescriptor FAIL\n");
+        }
+        Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiSetValue FAIL\n");
+        }
+
+        Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
+        //This options is what we would like, but is depreciated in the future
+        //Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_REAL);
+        if (! DftiErrorClass(Status, DFTI_NO_ERROR)) {
+            dfti_status_print(Status);
+            printf(" DftiSetValue FAIL\n");
+        }
+        Status = DftiCommitDescriptor(handle);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiCommitDescriptor FAIL\n");
+        }
+        nprev = n1;
+    }
+    tmp = (float *)malloc(n1*sizeof(float));
+    for (j=0; j<n2; j++) {
+    	Status = DftiComputeBackward(handle, (MKL_Complex8 *)&cdata[j*ldc], tmp);
+    	rdata[j*ldr] = tmp[0];
+		if (sign < 0) {
+    		for (i=1; i<n1; i++) rdata[j*ldr+i] = -sign*tmp[n1-i];
+		}
+		else {
+    		for (i=1; i<n1; i++) rdata[j*ldr+i] = tmp[i];
+		}
+	}
+    free(tmp);
+    if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+        dfti_status_print(Status);
+        printf(" DftiComputeBackward FAIL\n");
+    }
 #else
 	crm_fft(cdata, rdata, n1, n2, ldc, ldr, sign);
 #endif
diff --git a/FFTlib/rc1fft.c b/FFTlib/rc1fft.c
index efddc3a73b32cec99cd76214f0502a539e30d89e..fa7386d7936e35dbdcf2b0f08038b58e3e5be57f 100644
--- a/FFTlib/rc1fft.c
+++ b/FFTlib/rc1fft.c
@@ -1,5 +1,9 @@
 #include "genfft.h"
 #include <string.h>
+#ifdef MKL
+#include "mkl_dfti.h"
+void dfti_status_print(MKL_LONG status);
+#endif
 
 /**
 *   NAME:     rc1fft
@@ -52,6 +56,11 @@ void rc1fft(REAL *rdata, complex *cdata, int n, int sign)
 	static int isys;
 	static REAL *work, *table, scale=1.0;
 	REAL scl, *data;
+#elif defined(MKL)
+	static DFTI_DESCRIPTOR_HANDLE handle=0;
+	static int nprev=0;
+    MKL_LONG Status;
+	int i;
 #endif
 
 #if defined(HAVE_LIBSCS)
@@ -92,6 +101,49 @@ void rc1fft(REAL *rdata, complex *cdata, int n, int sign)
 	}
 	cdata[n/2].i=0.0;
 	free(data);
+#elif defined(MKL)
+	if (n != nprev) {
+		DftiFreeDescriptor(&handle);
+
+		Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n);
+    	if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+        	dfti_status_print(Status);
+        	printf(" DftiCreateDescriptor FAIL\n");
+    	}
+		Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
+		if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+			dfti_status_print(Status);
+			printf(" DftiSetValue FAIL\n");
+		}
+/*
+		Status = DftiSetValue(handle, DFTI_FORWARD_DOMAIN, DFTI_REAL);
+		if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+			dfti_status_print(Status);
+			printf(" DftiSetValue FAIL\n");
+		}
+*/
+
+		Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
+		if (! DftiErrorClass(Status, DFTI_NO_ERROR)) {
+			dfti_status_print(Status);
+			printf(" DftiSetValue FAIL\n");
+		}
+		Status = DftiCommitDescriptor(handle);
+		if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+			dfti_status_print(Status);
+			printf(" DftiCommitDescriptor FAIL\n");
+		}
+		nprev = n;
+	}
+	Status = DftiComputeForward(handle, rdata, (MKL_Complex8 *)cdata);
+	if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+		dfti_status_print(Status);
+		printf(" DftiComputeForward FAIL\n");
+	}
+	for (i=1; i<((n-1)/2)+1; i++) {
+		cdata[i].i *= -sign;
+	}
+
 #else
 	rc1_fft(rdata, cdata, n, sign);
 #endif
@@ -99,6 +151,18 @@ void rc1fft(REAL *rdata, complex *cdata, int n, int sign)
 	return;
 }
 
+#ifdef MKL
+void dfti_status_print(MKL_LONG status)
+{
+    MKL_LONG class_error;
+    char*   error_message;
+
+    error_message = DftiErrorMessage(status);
+     printf("error_message = %s \n", error_message);
+    return;
+}
+#endif
+
 
 /****************** NO COMPLEX DEFINED ******************/
 
diff --git a/FFTlib/rcmfft.c b/FFTlib/rcmfft.c
index e336eb722736d0e5f9dafd4e2ccf2fc092633b8a..5c492faff59d28c7f69fe4d886c85e0f0b47ca6c 100644
--- a/FFTlib/rcmfft.c
+++ b/FFTlib/rcmfft.c
@@ -1,5 +1,9 @@
 #include "genfft.h"
 #include <string.h>
+#ifdef MKL
+#include "mkl_dfti.h"
+void dfti_status_print(MKL_LONG status);
+#endif
 
 /**
 *   NAME:     rcmfft
@@ -64,6 +68,11 @@ void rcmfft(REAL *rdata, complex *cdata, int n1, int n2, int ldr, int ldc, int s
 	static int isys;
 	static REAL *work;
 	REAL scl, *data;
+#elif defined(MKL)
+    static DFTI_DESCRIPTOR_HANDLE handle=0;
+    static int nprev=0;
+    MKL_LONG Status;
+	int i,j;
 #endif
 
 #if defined(HAVE_LIBSCS)
@@ -120,6 +129,44 @@ void rcmfft(REAL *rdata, complex *cdata, int n1, int n2, int ldr, int ldc, int s
 		cdata[j*ldc+n1/2].i=0.0;
 	}
 	free(data);
+#elif defined(MKL)
+    if (n1 != nprev) {
+        DftiFreeDescriptor(&handle);
+        
+        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiCreateDescriptor FAIL\n");
+        }
+        Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiSetValue FAIL\n");
+        }
+        
+        Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
+        if (! DftiErrorClass(Status, DFTI_NO_ERROR)) {
+            dfti_status_print(Status);
+            printf(" DftiSetValue FAIL\n");
+        }
+        Status = DftiCommitDescriptor(handle);
+        if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+            dfti_status_print(Status);
+            printf(" DftiCommitDescriptor FAIL\n");
+        }
+        nprev = n1;
+    }
+    Status = DftiComputeForward(handle, rdata, (MKL_Complex8 *)cdata);
+    if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
+        dfti_status_print(Status);
+        printf(" DftiComputeForward FAIL\n");
+    }
+	for (j=0; j<n2; j++) {
+    	Status = DftiComputeForward(handle, &rdata[j*ldr], (MKL_Complex8 *)&cdata[j*ldc]);
+    	for (i=1; i<((n1-1)/2)+1; i++) {
+        	cdata[j*ldc+i].i *= -sign;
+    	}
+    }
 #else
 	rcm_fft(rdata, cdata, n1, n2, ldr, ldc, sign);
 #endif
diff --git a/FFTlib/test/Makefile b/FFTlib/test/Makefile
index dc1c31fda3b99b480debd4fa1b7696d8ca7051cd..bfca6f33111dea66fa64634c1a28aeff482ca5a4 100644
--- a/FFTlib/test/Makefile
+++ b/FFTlib/test/Makefile
@@ -2,7 +2,7 @@
 
 include ../../Make_include
 
-LIBS    += -L$L -lgenfft -lm
+LIBS += -L$L -lgenfft $(FFT) -lm
 
 ALL: cc1test cc2dtest ccmtest rc1test rc2dtest rcmtest rc1loop cc1loop
 
diff --git a/FFTlib/test/cc1loop.c b/FFTlib/test/cc1loop.c
index f076afdd7da22470a39d2aed3fca3d4f728ec71c..040fa70375df1e2872911ebec4ca4bc0f74bb521 100644
--- a/FFTlib/test/cc1loop.c
+++ b/FFTlib/test/cc1loop.c
@@ -1,8 +1,7 @@
 #include <genfft.h>
 #include <time.h>
-#include <kiss_fft.h>
 
-main () {
+int main () {
 
 	int j,i,n,sign, isign;
 	int N, Nmax=600, Nitcc; 
@@ -65,5 +64,6 @@ main () {
 		N += 1;
 
    	} 
+	return 0;
 }
 
diff --git a/FFTlib/test/cc1test.c b/FFTlib/test/cc1test.c
index 346f112b290af05771b8571dd94ba1e372e79282..8e076508dd5efa0d8c31245151854c01e6618725 100644
--- a/FFTlib/test/cc1test.c
+++ b/FFTlib/test/cc1test.c
@@ -1,7 +1,7 @@
 #include <genfft.h>
 #include <time.h>
 
-main () {
+int main () {
 
 	int j,i,n,sign, isign;
 	int N, Nmax=8192, Nitcc; 
@@ -79,5 +79,6 @@ main () {
 		k += 1.0;
 
    	} 
+	return 0;
 }
 
diff --git a/FFTlib/test/cc2dtest.c b/FFTlib/test/cc2dtest.c
index 157a0674f20670d6db71a5de648448a3b2419fbd..f714d32c5cb2405df8b18c7803efe061df28a5c7 100644
--- a/FFTlib/test/cc2dtest.c
+++ b/FFTlib/test/cc2dtest.c
@@ -1,7 +1,7 @@
 #include <genfft.h>
 #include <time.h>
 
-main () {
+int main () {
 
 	int j,i,n,sign, isign;
 	int N, Nmax=1024, Nitcc, Nlot=1024, ld1;
@@ -142,5 +142,6 @@ main () {
 		Nlot *= 2;
 
    	} 
+	return 0;
 }
 
diff --git a/FFTlib/test/ccmtest.c b/FFTlib/test/ccmtest.c
index 39b10059f8101012a1d2e4d21f6c1345ff0acc69..5ea472e4e3d1381b0e290ed8fc148630c742a3cd 100644
--- a/FFTlib/test/ccmtest.c
+++ b/FFTlib/test/ccmtest.c
@@ -1,7 +1,7 @@
 #include <genfft.h>
 #include <time.h>
 
-main () {
+int main () {
 
 	int j,i,n,sign, isign;
 	int N, Nmax=4097, Nitcc, Nlot=513, ld1; 
@@ -128,5 +128,6 @@ main () {
 		k += 1.0;
 
    	} 
+	return 0;
 }
 
diff --git a/FFTlib/test/rc1loop.c b/FFTlib/test/rc1loop.c
index a2209a191b919c8ae75b6dd590ebaab53ba09573..9018e0c46a55af8c89088883673880849f3f9b84 100644
--- a/FFTlib/test/rc1loop.c
+++ b/FFTlib/test/rc1loop.c
@@ -5,7 +5,7 @@ void crdft(complex *cdata, REAL *rdata, int n, int sign);
 void rcdft(REAL *rdata, complex *cdata, int n, int sign);
 
 
-main () {
+int main () {
 
 	int j,i,n,k,sign, isign;
 	int N, Nmax=600, Nitcc; 
@@ -94,5 +94,6 @@ main () {
 		N += 1;
 
    	} 
+	return 0;
 }
 
diff --git a/FFTlib/test/rc1test.c b/FFTlib/test/rc1test.c
index c375793c6f6fc181e27414338dd5e13e530c66a4..8771a371dc526006dc44dc7cf64b057412c47a9b 100644
--- a/FFTlib/test/rc1test.c
+++ b/FFTlib/test/rc1test.c
@@ -1,7 +1,7 @@
 #include <genfft.h>
 #include <time.h>
 
-main () {
+int main () {
 
 	int j,i,n,sign, isign;
 	int N, Nmax=8192, Nitcc; 
@@ -24,25 +24,41 @@ main () {
 
 	N = 16;
 	k = 5.0;
-	sign = 1;
-	isign = -1;
+	sign = -1;
+	isign = 1;
 	while (N <= Nmax) {
 
 		/* Initialize the data */
 
 		for (i=0;i<N;i++) {
-			c_data[i]   = (float)-0.1+0.5*(N/2-i);
+			c_data[i]   = (float)-0.1+0.5*(N/3-i)*sin(i*M_PI/N);
 //			c_data[i]   = 0.0;
 		}
 //		c_data[0]   = 1.0;
 		t = 0.0;
 
+/*
+N=16;
+			scl = 1.0/(float)N;
+			for (j=0;j<N;j++) data[j] = c_data[j];
+			rc1fft(data, cdata, N, sign);
+			cr1fft(cdata, data, N, isign);
+			for (i=0; i<N; i++) 
+            fprintf(stderr,"%s: i = %d data = %f Ref-data = %f Complex=%f,%f\n", machine, i, data[i]*scl, c_data[i],cdata[i/2].r,cdata[i/2].i);
+			for (j=0;j<N;j++) data[j] = c_data[j];
+			rc1_fft(data, cdata, N, sign);
+			cr1_fft(cdata, data, N, isign);
+			for (i=0; i<N; i++) 
+            fprintf(stderr,"%s: i = %d data = %f Ref-data = %f Complex=%f,%f\n", machine, i, data[i]*scl, c_data[i],cdata[i/2].r,cdata[i/2].i);
+			return;
+*/
         /* FFT */
 		for (i=0; i<2500; i++) {
 			for (j=0;j<N;j++) data[j] = c_data[j];
 			t0 = wallclock_time();
 			rc1fft(data, cdata, N, sign);
 			cr1fft(cdata, data, N, isign);
+//		cr1_fft(cdata, data, N, isign);
 			t1 = wallclock_time();
 			t += t1-t0;
 		}
@@ -51,9 +67,9 @@ main () {
 		scl = 1.0/(float)N;
 		for (i=0; i<N; i++) {
 /*
-            fprintf(stderr,"%s: i = %d data = %f C-data = %f\n", machine, i, data[i].r*scl, c_data[i].r);
-            fprintf(stderr,"%s: i = %d data = %f C-data = %f\n", machine, i, data[i].i*scl, c_data[i].i);
+            fprintf(stderr,"%s: i = %d data = %f Ref-data = %f Complex=%f,%f\n", machine, i, data[i]*scl, c_data[i],cdata[i/2].r,cdata[i/2].i);
 */
+
 			if (c_data[i] != 0.0) {
 				diff = fabs((data[i]*scl - c_data[i]) / c_data[i]);
 			}
@@ -77,5 +93,6 @@ main () {
 		k += 1.0;
 
    	} 
+	return 0;
 }
 
diff --git a/FFTlib/test/rc2dtest.c b/FFTlib/test/rc2dtest.c
index e9965f34611a098d5593f70c3182f1675bf6106e..685b8071a78e7e190c8eca6736aae23bd8dff89d 100644
--- a/FFTlib/test/rc2dtest.c
+++ b/FFTlib/test/rc2dtest.c
@@ -1,7 +1,7 @@
 #include <genfft.h>
 #include <time.h>
 
-main () {
+int main () {
 
 	int l,j,i,n,sign, isign, ldr, ldc;
 	int N, Nmax=2048, Nlot=150, Nitcc; 
@@ -141,6 +141,6 @@ main () {
         Nlot *= 2;
 
     }
-
+	return 0;
 }
 
diff --git a/FFTlib/test/rcmtest.c b/FFTlib/test/rcmtest.c
index f8ef83dc5c14cf8493b30bd360c5d20f72c7eb4a..9602ceb8acf0db0f1d024a5301afeb21549dffb1 100644
--- a/FFTlib/test/rcmtest.c
+++ b/FFTlib/test/rcmtest.c
@@ -1,7 +1,7 @@
 #include <genfft.h>
 #include <time.h>
 
-main () {
+int main () {
 
 	int l,j,i,n,sign, isign, ldr, ldc;
 	int N, Nmax=8193, Nlot=150, Nitcc; 
@@ -82,5 +82,6 @@ main () {
 		k += 1.0;
 
    	} 
+	return 0;
 }
 
diff --git a/MDD/Makefile b/MDD/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..561b309ee2e3272ff12013336db442c01840db4b
--- /dev/null
+++ b/MDD/Makefile
@@ -0,0 +1,61 @@
+# Makefile
+
+include ../Make_include
+
+########################################################################
+# define general include and system library
+ALLINC  = -I.
+#BLAS libs with Intel compiler
+#LIBS += -mkl -L$L -lgenfft $(LIBSM)
+#General BLAS library
+#LIBS += -L$L -lgenfft $(LIBSM)
+#General BLAS library
+#LIBS += $(BLAS)
+
+#CFLAGS += -I$(MKLROOT)/include  
+#LIBS += -lblas -llapack -L$L -lgenfft $(LIBSM) -lc -lm
+
+all: mdd 
+
+PRG = mdd
+
+SRCC =  $(PRG).c \
+	atopkge.c \
+	docpkge.c \
+	getpars.c \
+	readShotData.c \
+	writeEigen.c \
+	deconvolve.c \
+	computeMatrixInverse.c \
+	getFileInfo.c \
+	verbosepkg.c \
+	name_ext.c \
+	wallclock_time.c
+
+OBJC	= $(SRCC:%.c=%.o)
+
+$(PRG):	$(OBJC) 
+	$(CC) $(LDFLAGS) $(CFLAGS) $(OPTC) -o $(PRG) $(OBJC) $(LIBS)
+
+install: $(PRG) 
+	cp $(PRG) $B
+
+clean:
+		rm -f core $(OBJC) $(OBJM) $(PRG) 
+
+realclean:
+		rm -f core $(OBJC) $(OBJM) $(PRG) $B/$(PRG) 
+
+
+print:	Makefile $(SRC)
+	$(PRINT) $?
+	@touch print
+
+count:
+	@wc $(SRC)
+
+tar:
+	@tar cf $(PRG).tar Makefile $(SRC) && compress $(PRG).tar
+
+
+
diff --git a/MDD/atopkge.c b/MDD/atopkge.c
new file mode 120000
index 0000000000000000000000000000000000000000..5107e2b2ccd382ede29d397838d8fad88126a516
--- /dev/null
+++ b/MDD/atopkge.c
@@ -0,0 +1 @@
+../utils/atopkge.c
\ No newline at end of file
diff --git a/MDD/computeMatrixInverse.c b/MDD/computeMatrixInverse.c
new file mode 100644
index 0000000000000000000000000000000000000000..a4ba57fd53d3d54e75e2cbf2f3e85d1dd1f88800
--- /dev/null
+++ b/MDD/computeMatrixInverse.c
@@ -0,0 +1,524 @@
+#include<math.h>
+#include<stdlib.h>
+#include<stdio.h>
+#include <assert.h>
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+
+/* Cholesky based inverse */
+void cpotrf_(char *uplo, int *N, float *A, int *lda, int *info);
+void cpotri_(char *uplo, int *N, float *A, int *lda, int *info);
+				
+/* LU based inverse */
+void cgetrf_(int *M, int *N, float *A, int *lda, int *ipvt, int *info);
+void cgetri_(int *N, float *A, int *lda, int *ipvt, float *work, int *lwork, int *info);
+void zgetrf_(int *M, int *N, double *A, int *lda, int *ipvt, int *info);
+void zgetri_(int *N, double *A, int *lda, int *ipvt, double *work, int *lwork, int *info);
+int ilaenv_(int *ispec, char *name, char *opts, int *n1, int *n2, int *n3, int *n4);
+
+/* SVD based inverse */
+void cgesvd_(char *jobu, char *jobvt, int *M, int *N, float *A, int *lda, float *S, float *U, int *ldu, float *vt, int *ldvt, float *work, int *lwork, float *rwork, int *info);
+void zgesvd_(char *jobu, char *jobvt, int *M, int *N, double *A, int *lda, double *S, double *U, int *ldu, double *vt, int *ldvt, double *work, int *lwork, double *rwork, int *info);
+void cgesdd_(char *jobz, int *M, int *N, float *A, int *lda, float *S, float *U, int *ldu, float *vt, int *ldvt, float *work, int *lwork, float *rwork, int *iwork, int *info);
+
+/* Eigenvalues */
+void zgeev_(char *jobvl, char *jobvr, int *N, double *A, int *lda, double *S, double *vl, int *ldvl, double *vr, int *ldvr, 
+			double *work, int *lwork, double *rwork, int *info);
+
+typedef struct { /* complex number */
+	float r,i;
+} complex;
+
+void computeMatrixInverse(complex *matrix, int nxm, int rthm, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int iw, int verbose)
+{
+	int i,j,k,N,lda,info,lwork,*ipvt;
+	float energy;
+	complex tmp, one, *work;
+	char *uplo;
+
+	uplo = "U";
+	lda = N = nxm;
+	one.r = 1.0;
+	one.i = 0.0;
+
+	if (rthm==0) {
+		energy=0.0;
+		if (eps_r != 0.0) {
+			for (i=0; i<nxm; i++) {
+				for (j=0; j<nxm; j++) {
+					tmp = matrix[i*nxm+j];
+					energy += sqrt(tmp.r*tmp.r+tmp.i*tmp.i);
+				}
+//		fprintf(stderr,"i=%d energy=%e\n", i, energy);
+			}
+		}
+		if (verbose>1) fprintf(stderr,"energy=%e eps_r=%e eps_a=%e\n", energy, eps_r*energy, eps_a);
+		/* add small value at diagonal */
+#pragma ivdep
+		for (i=0; i<nxm; i++) {
+			tmp.r = eps_r*energy+eps_a;
+			matrix[i*nxm+i].r+=tmp.r;
+		}
+		/* Cholesky based matrix inversion */
+		cpotrf_(uplo, &N, &matrix[0].r, &lda, &info);
+		assert (info == 0);
+		cpotri_(uplo, &N, &matrix[0].r, &lda, &info);
+		assert (info == 0);
+		/* fill lower part of inverse matrix */
+		for (i=0; i<nxm; i++) {
+#pragma ivdep
+			for (j=i+1; j<nxm; j++) {
+					matrix[i*nxm+j].r=matrix[j*nxm+i].r;
+					matrix[i*nxm+j].i=-1.0*matrix[j*nxm+i].i;
+			}
+		}
+
+	}
+	else if (rthm==1) {
+		int ispec, n1, nb;
+		char *name , *opts;
+
+		ispec = 1;
+		name = "CGETRI";
+		n1 = nxm;
+		nb = ilaenv_(&ispec, name, opts, &n1, &n1, &n1, &n1);
+		nb = MAX(1,nb);
+		lwork = nb*nxm;
+		ipvt = (int *)malloc(nxm*sizeof(int));
+		work = (complex *)malloc(lwork*sizeof(complex));
+
+		energy=0.0;
+		if (eps_r != 0.0) {
+			for (i=0; i<nxm; i++) {
+				for (j=0; j<nxm; j++) {
+					tmp = matrix[i*nxm+j];
+					energy += sqrt(tmp.r*tmp.r+tmp.i*tmp.i);
+				}
+			}
+		}
+		if (verbose>1) fprintf(stderr,"eps_r=%e eps_a=%e\n", eps_r*energy, eps_a);
+		/* add small value at diagonal */
+		for (i=0; i<nxm; i++) {
+			tmp.r = eps_r*energy+eps_a;
+			matrix[i*nxm+i].r+=tmp.r;
+		}
+		/* LU based matrix inversion */
+		cgetrf_(&nxm, &nxm, &matrix[0].r, &nxm, ipvt, &info);
+		assert (info == 0);
+		cgetri_(&nxm, &matrix[0].r, &nxm, ipvt, &work[0].r, &lwork, &info);
+		assert (info == 0);
+
+		free(ipvt);
+		free(work);
+	}
+	else if (rthm==2) { /* SVD general algorithm most accurate */
+		float *rwork, *S;
+		double S0,Si;
+		complex *U, *VT, a, b;
+		char *jobu, *jobvt;
+		int neig;
+
+		energy=0.0;
+		if (eps_r != 0.0) {
+			for (i=0; i<nxm; i++) {
+				for (j=0; j<nxm; j++) {
+					tmp = matrix[i*nxm+j];
+					energy += sqrt(tmp.r*tmp.r+tmp.i*tmp.i);
+				}
+			}
+			fprintf(stderr,"energy = %e\n", energy);
+		}
+		if (verbose>1) fprintf(stderr,"eps_r=%e eps_a=%e\n", eps_r*energy, eps_a);
+		/* add small value at diagonal */
+		for (i=0; i<nxm; i++) {
+			tmp.r = eps_r*energy+eps_a;
+			matrix[i*nxm+i].r+=tmp.r;
+		}
+
+		jobu = "A";
+		jobvt = "A";
+		lda = N = nxm;
+		lwork = N*8;
+		S = (float *)malloc(N*sizeof(float));
+		U = (complex *)malloc(N*N*sizeof(complex));
+		VT = (complex *)malloc(N*N*sizeof(complex));
+		work = (complex *)malloc(lwork*sizeof(complex));
+		rwork = (float *)malloc(5*N*sizeof(float));
+
+		/* Compute SVD */
+		cgesvd_(jobu, jobvt, &N, &N, &matrix[0].r, &lda, S, &U[0].r, &lda, &VT[0].r, 
+			&lda, &work[0].r, &lwork, rwork, &info);
+		assert (info == 0);
+
+		if (eigenvalues) {
+			for (i=0; i<N; i++) {
+					eigen[i] = S[i];
+			}
+		}
+
+		/* Compute inverse */
+		S0 = S[0];
+		neig = 0;
+		for (i=0; i<N; i++) {
+/*			fprintf(stderr,"S[%d] = %e ",i,S[i]);*/
+			Si = S[i];
+			if ((Si/S0) > numacc) { S[i]=1.0/S[i]; neig++; }
+			else S[i] = 0.0;
+			/*S[i]=1.0/(S[i]+eps_r*S[0]);*/
+/*			fprintf(stderr,"S^-1[%d] = %e\n",i,S[i]);*/
+		}
+		if(verbose) fprintf(stderr,"fraction of eigenvalues used = %.3f\n",(float)(neig/((float)N)));
+
+		for (j=0; j<N; j++) {
+			for (i=0; i<N; i++) {
+				U[j*N+i].r=S[j]*U[j*N+i].r;
+				U[j*N+i].i=-1.0*S[j]*U[j*N+i].i;
+			}
+		}
+		for (j=0; j<N; j++) {
+			for (i=0; i<N; i++) {
+				tmp.r = tmp.i = 0.0;
+				for (k=0; k<N; k++) {
+					a = U[k*N+j];
+					b.r = VT[i*N+k].r;
+					b.i = -1.0*VT[i*N+k].i;
+					tmp.r += (a.r*b.r-a.i*b.i);
+					tmp.i += (a.r*b.i+a.i*b.r);
+				}
+				matrix[j*nxm+i] = tmp;
+			}
+		}
+
+		free(U);
+		free(VT);
+		free(S);
+		free(work);
+		free(rwork);
+	}
+	else if (rthm==3) { /* SVD algorithm Divide and Conquerer less accurate */
+		/* CGESDD*/
+		int *iwork;
+		int neig;
+		float *rwork, *S;
+		double S0,Si;
+		complex *U, *VT, a, b;
+		char *jobz;
+
+		energy=0.0;
+		if (eps_r != 0.0) {
+			for (i=0; i<nxm; i++) {
+				for (j=0; j<nxm; j++) {
+					tmp = matrix[i*nxm+j];
+					energy += sqrt(tmp.r*tmp.r+tmp.i*tmp.i);
+				}
+			}
+		}
+		if (verbose>1) fprintf(stderr,"eps_r=%e eps_a=%e\n", eps_r*energy, eps_a);
+		/* add small value at diagonal */
+		for (i=0; i<nxm; i++) {
+			tmp.r = eps_r*energy+eps_a;
+			matrix[i*nxm+i].r+=tmp.r;
+		}
+
+		jobz = "A";
+		lda = N = nxm;
+		lwork = N*N+4*N;
+		S = (float *)malloc(N*sizeof(float));
+		U = (complex *)malloc(N*N*sizeof(complex));
+		VT = (complex *)malloc(N*N*sizeof(complex));
+		work = (complex *)malloc(lwork*sizeof(complex));
+		rwork = (float *)malloc(5*(N*N+N)*sizeof(float));
+		iwork = (int *)malloc(8*N*sizeof(int));
+
+		/* Compute SVD */
+		cgesdd_(jobz, &N, &N, &matrix[0].r, &lda, S, &U[0].r, &lda, &VT[0].r, 
+			&lda, &work[0].r, &lwork, rwork, iwork, &info);
+		assert (info == 0);
+
+		if (eigenvalues) {
+			for (i=0; i<N; i++) {
+					eigen[i] = S[i];
+			}
+		}
+
+		/* Compute inverse */
+		S0 = S[0];
+		neig = 0;
+		for (i=0; i<N; i++) {
+/*			fprintf(stderr,"S[%d] = %e S0 = %e\n ",i,S[i], S0);*/
+			Si = S[i];
+			if ((Si/S0) > numacc) { S[i]=1.0/S[i]; neig++; }
+			else S[i] = 0.0;
+/*			fprintf(stderr,"S^-1[%d] = %e\n",i,S[i]);*/
+		}
+		if(verbose) fprintf(stderr,"fraction of eigenvalues used = %.3f\n",(float)(neig/((float)N)));
+
+		for (j=0; j<N; j++) {
+			for (i=0; i<N; i++) {
+				U[j*N+i].r=S[j]*U[j*N+i].r;
+				U[j*N+i].i=-1.0*S[j]*U[j*N+i].i;
+			}
+		}
+		for (j=0; j<N; j++) {
+			for (i=0; i<N; i++) {
+				tmp.r = tmp.i = 0.0;
+				for (k=0; k<N; k++) {
+					a = U[k*N+j];
+					b.r = VT[i*N+k].r;
+					b.i = -1.0*VT[i*N+k].i;
+					tmp.r += (a.r*b.r-a.i*b.i);
+					tmp.i += (a.r*b.i+a.i*b.r);
+				}
+				matrix[j*nxm+i] = tmp;
+			}
+		}
+
+		free(U);
+		free(VT);
+		free(S);
+		free(work);
+		free(rwork);
+		free(iwork);
+	}
+	else if (rthm==4) { /* SVD general algorithm double precission most accurate */
+		double *rwork, *S, *U, *VT, ar, ai, br, bi, tmpr, tmpi;
+		double S0,Si,*Mat,*dwork;
+		int neig;
+		char *jobu, *jobvt;
+
+		energy=0.0;
+		if (eps_r != 0.0) {
+			for (i=0; i<nxm; i++) {
+				for (j=0; j<nxm; j++) {
+					tmp = matrix[i*nxm+j];
+					energy += sqrt(tmp.r*tmp.r+tmp.i*tmp.i);
+				}
+			}
+		}
+		if (verbose>1) fprintf(stderr,"eps_r=%e eps_a=%e\n", eps_r*energy, eps_a);
+		/* add small value at diagonal */
+		for (i=0; i<nxm; i++) {
+			tmp.r = eps_r*energy+eps_a;
+			matrix[i*nxm+i].r+=tmp.r;
+		}
+
+		Mat = (double *)malloc(2*N*N*sizeof(double));
+		/* convert to doubles */
+		for (i=0; i<nxm; i++) {
+			for (j=0; j<nxm; j++) {
+				Mat[i*2*nxm+j*2] = (double)matrix[i*nxm+j].r;
+				Mat[i*2*nxm+j*2+1] = (double)matrix[i*nxm+j].i;
+			}
+		}
+		jobu = "A";
+		jobvt = "A";
+		lda = N = nxm;
+		lwork = N*8;
+		S = (double *)malloc(N*sizeof(double));
+		U = (double *)malloc(2*N*N*sizeof(double));
+		VT = (double *)malloc(2*N*N*sizeof(double));
+		dwork = (double *)malloc(2*lwork*sizeof(double));
+		rwork = (double *)malloc(5*N*sizeof(double));
+
+		/* Compute SVD */
+		zgesvd_(jobu, jobvt, &N, &N, &Mat[0], &lda, S, &U[0], &lda, &VT[0], 
+			&lda, &dwork[0], &lwork, rwork, &info);
+		assert (info == 0);
+
+		if (eigenvalues) {
+			for (i=0; i<N; i++) {
+					eigen[i] = (float)S[i];
+			}
+		}
+
+		/* Compute inverse */
+		S0 = S[0];
+		neig = 0;
+		for (i=0; i<N; i++) {
+			if (verbose=4) fprintf(stderr,"S[%d] = %e ",i,S[i]);
+			Si = S[i];
+			if ((Si/S0) > numacc) { S[i]=1.0/S[i]; neig++; }
+			else S[i] = 0.0;
+			/*S[i]=1.0/(S[i]+eps_r*S[0]);*/
+/*			fprintf(stderr,"S^-1[%d] = %e\n",i,S[i]);*/
+		}
+		if(verbose) fprintf(stderr,"fraction of eigenvalues used = %.3f\n",(float)(neig/((float)N)));
+
+		for (j=0; j<N; j++) {
+			for (i=0; i<N; i++) {
+				U[j*2*N+2*i]=S[j]*U[j*2*N+2*i];
+				U[j*2*N+2*i+1]=-1.0*S[j]*U[j*2*N+2*i+1];
+			}
+		}
+		for (j=0; j<N; j++) {
+			for (i=0; i<N; i++) {
+				tmpr = tmpi = 0.0;
+				for (k=0; k<N; k++) {
+					ar = U[k*2*N+2*j];
+					ai = U[k*2*N+2*j+1];
+					br = VT[i*2*N+2*k];
+					bi = -1.0*VT[i*2*N+2*k+1];
+					tmpr += (ar*br-ai*bi);
+					tmpi += (ar*bi+ai*br);
+				}
+				matrix[j*nxm+i].r = (float)tmpr;
+				matrix[j*nxm+i].i = (float)tmpi;
+			}
+		}
+
+		free(U);
+		free(VT);
+		free(S);
+		free(dwork);
+		free(rwork);
+		free(Mat);
+	}
+	else if (rthm==5) { /* double precission LU decomposition */
+		int ispec, n1, nb;
+		char *name , *opts;
+		double *Mat, *dwork;
+
+		ispec = 1;
+		name = "ZGETRI";
+		n1 = nxm;
+		nb = ilaenv_(&ispec, name, opts, &n1, &n1, &n1, &n1);
+		nb = MAX(1,nb);
+		lwork = nb*nxm;
+		ipvt = (int *)malloc(nxm*sizeof(int));
+		dwork = (double *)malloc(2*lwork*sizeof(double));
+		Mat = (double *)malloc(2*N*N*sizeof(double));
+
+		energy=0.0;
+		if (eps_r != 0.0) {
+			for (i=0; i<nxm; i++) {
+				for (j=0; j<nxm; j++) {
+					tmp = matrix[i*nxm+j];
+					energy += sqrt(tmp.r*tmp.r+tmp.i*tmp.i);
+				}
+			}
+		}
+		if (verbose>1) fprintf(stderr,"eps_r=%e eps_a=%e\n", eps_r*energy, eps_a);
+		/* convert to doubles */
+		for (i=0; i<nxm; i++) {
+			for (j=0; j<nxm; j++) {
+				Mat[i*2*nxm+j*2] = (double)matrix[i*nxm+j].r;
+				Mat[i*2*nxm+j*2+1] = (double)matrix[i*nxm+j].i;
+			}
+		}
+
+		/* add small value at diagonal */
+		for (i=0; i<nxm; i++) {
+			Mat[i*2*nxm+i*2]  +=eps_r*energy+eps_a;
+//			Mat[i*2*nxm+i*2+1]+=eps_r*energy+eps_a;
+		}
+
+		/* LU based matrix inversion */
+		zgetrf_(&nxm, &nxm, &Mat[0], &nxm, ipvt, &info);
+		if (info != 0) fprintf(stderr,"error in zgetrf %d at frequency %d\n", info, iw);
+		assert (info == 0);
+		zgetri_(&nxm, &Mat[0], &nxm, ipvt, &dwork[0], &lwork, &info);
+		if (info != 0) fprintf(stderr,"error in zgetri %d at frequency %d\n", info, iw);
+		assert (info == 0);
+
+		/* convert back to floats */
+		for (i=0; i<nxm; i++) {
+			for (j=0; j<nxm; j++) {
+				matrix[i*nxm+j].r = (float)Mat[i*2*nxm+j*2];
+				matrix[i*nxm+j].i = (float)Mat[i*2*nxm+j*2+1];
+			}
+		}
+
+		free(ipvt);
+		free(dwork);
+		free(Mat);
+	}
+	else if (rthm==6) { /* eigenvalue decomposition */
+		int *iwork;
+		int neig;
+		double *work, *vr, *vl;
+		double *rwork, *S, *U, *VT, ar, ai, br, bi, tmpr, tmpi;
+		double S0,Si,nxi,*Mat;
+		char *jobvl, *jobvr;
+
+		jobvl = "V";
+		jobvr = "V";
+		lwork = N*N+2*N;
+		work  = (double *)malloc(2*lwork*sizeof(double));
+		rwork = (double *)malloc(N*2*sizeof(double));
+		vr    = (double *)malloc(2*N*N*sizeof(double));
+		vl    = (double *)malloc(2*N*N*sizeof(double));
+		S = (double *)malloc(2*N*sizeof(double));
+		U = (double *)malloc(2*N*N*sizeof(double));
+
+		Mat = (double *)malloc(2*N*N*sizeof(double));
+		/* convert to doubles */
+		for (i=0; i<nxm; i++) {
+			for (j=0; j<nxm; j++) {
+				Mat[i*2*nxm+j*2] = (double)matrix[i*nxm+j].r;
+				Mat[i*2*nxm+j*2+1] = (double)matrix[i*nxm+j].i;
+			}
+		}
+
+		zgeev_(jobvl, jobvr, &N, Mat, &N, S, vl, &N, vr, &N, 
+			work, &lwork, rwork, &info);
+		assert (info == 0);
+
+		nxi = 1.0/N;
+		for (i=0; i<N; i++) {
+			S[2*i] = (float)S[2*i]*nxi;
+			S[2*i+1] = (float)S[2*i+1]*nxi;
+		}
+		
+		for (i=0; i<N; i++) {
+			for (j=0; j<N; j++) {
+				U[i*2*N+2*j]  = (float)vr[(j)*2*N+2*i];
+				U[i*2*N+2*j+1]  = (float)vr[(i)*2*N+2*j+1];
+			}
+		}
+
+		/* Compute inverse */
+		S0 = S[0];
+		neig = 0;
+		for (i=0; i<N; i++) {
+/*			fprintf(stderr,"S[%d] = %e ",i,S[i]);*/
+			Si = S[i];
+			if ((Si/S0) > numacc) { S[i]=1.0/S[i]; neig++; }
+			else S[i] = 0.0;
+/*			fprintf(stderr,"S^-1[%d] = %e\n",i,S[i]);*/
+		}
+		if(verbose) fprintf(stderr,"fraction of eigenvalues used = %.3f\n",(float)(neig/((float)N)));
+
+		for (j=0; j<N; j++) {
+			for (i=0; i<N; i++) {
+				U[j*2*N+2*i]=S[j]*U[j*2*N+2*i];
+				U[j*2*N+2*i+1]=-1.0*S[j]*U[j*2*N+2*i+1];
+			}
+		}
+		for (j=0; j<N; j++) {
+			for (i=0; i<N; i++) {
+				tmpr = tmpi = 0.0;
+				for (k=0; k<N; k++) {
+					ar = U[k*2*N+2*j];
+					ai = U[k*2*N+2*j+1];
+					br = U[i*2*N+2*k];
+					bi = U[i*2*N+2*k+1];
+					tmpr += (ar*br-ai*bi);
+					tmpi += (ar*bi+ai*br);
+				}
+				matrix[j*nxm+i].r = (float)tmpr;
+				matrix[j*nxm+i].i = (float)tmpi;
+			}
+		}
+
+
+		free(work);
+		free(rwork);
+		free(vr);
+		free(Mat);
+		free(S);
+		free(U);
+	}
+
+	return;
+}
+
diff --git a/MDD/deconvolve.c b/MDD/deconvolve.c
new file mode 100644
index 0000000000000000000000000000000000000000..147fc7b6da2909c40e18485881cf0f5de472215f
--- /dev/null
+++ b/MDD/deconvolve.c
@@ -0,0 +1,194 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+#ifdef MKL
+#include<mkl_cblas.h>
+#endif
+
+typedef struct { /* complex number */
+	float r,i;
+} complex;
+
+/*
+cblas interface
+void cgemm(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
+           const MKL_Complex8 *alpha, const MKL_Complex8 *a, const MKL_INT *lda,
+           const MKL_Complex8 *b, const MKL_INT *ldb, const MKL_Complex8 *beta,
+           MKL_Complex8 *c, const MKL_INT *ldc);
+*/
+
+void cgemm_(char *transA, char *transb, int *M, int *N, int *K, float *alpha, float *A, int *lda, float *B, int *ldb, float *beta, float *C, int *ldc);
+/*
+CGEMM - perform one of the matrix-matrix operations C := alpha*op( A )*op( B ) + beta*C,
+
+Synopsis
+
+SUBROUTINE CGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
+
+CHARACTER*1 TRANSA, TRANSB
+
+INTEGER M, N, K, LDA, LDB, LDC
+
+COMPLEX ALPHA, BETA
+
+COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * )
+
+TRANSA - CHARACTER*1. On entry, TRANSA specifies the form of op( A ) to be used in the matrix multiplication as follows:
+
+TRANSA = 'N' or 'n', op( A ) = A.
+
+TRANSA = 'T' or 't', op( A ) = A'.
+
+TRANSA = 'C' or 'c', op( A ) = conjg( A' ).
+
+Unchanged on exit.
+
+TRANSB - CHARACTER*1. On entry, TRANSB specifies the form of op( B ) to be used in the matrix multiplication as follows:
+
+TRANSB = 'N' or 'n', op( B ) = B.
+
+TRANSB = 'T' or 't', op( B ) = B'.
+
+TRANSB = 'C' or 'c', op( B ) = conjg( B' ).
+
+Unchanged on exit.
+
+M - INTEGER.
+On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. Unchanged on exit.
+
+N - INTEGER.
+On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. Unchanged on exit.
+
+K - INTEGER.
+On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. Unchanged on exit.
+
+ALPHA - COMPLEX .
+On entry, ALPHA specifies the scalar alpha. Unchanged on exit.
+
+A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is k when TRANSA = 'N' or 'n', and is m otherwise. Before entry with TRANSA = 'N' or 'n', the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. Unchanged on exit.
+
+LDA - INTEGER.
+On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When TRANSA = 'N' or 'n' then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ). Unchanged on exit.
+
+B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is n when TRANSB = 'N' or 'n', and is k otherwise. Before entry with TRANSB = 'N' or 'n', the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. Unchanged on exit.
+
+LDB - INTEGER.
+On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. When TRANSB = 'N' or 'n' then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ). Unchanged on exit.
+
+BETA - COMPLEX .
+On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. Unchanged on exit.
+
+C - COMPLEX array of DIMENSION ( LDC, n ).
+Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
+
+LDC - INTEGER.
+On entry, LDC specifies the first dimension of C as declared in the calling (sub) program. LDC must be at least max( 1, m ).  Unchanged on exit.
+
+*/
+
+void computeMatrixInverse(complex *matrix, int nxm, int rthm, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int iw, int verbose);
+
+int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int verbose)
+{
+	int istation, jstation, i, j, k, icc, ibb, NA, NB, NC, nshots;
+	size_t  iwnA, iw, iwnB, iwAB, iwBB;
+	complex *AB, *BB;
+	char *transa, *transb,*transN;
+	complex beta, alpha, tmp, a, b;
+
+	AB = (complex *)calloc(nstationA*nstationB,sizeof(complex));
+	BB = (complex *)calloc(nstationB*nstationB,sizeof(complex));
+
+	if (conjgA == 1) transa = "C";
+	else if (conjgA == 0) transa = "N";
+	else transa = "T";
+	if (conjgB == 1) transb = "C";
+	else if(conjgB ==0) transb = "N";
+	else transb = "T";
+	transN = "N";
+	alpha.r = 1.0; alpha.i = 0.0;
+	beta.r = 0.0; beta.i = 0.0;
+	nshots = nblock;
+	NA = nstationA;
+	NB = nstationB;
+	if (conjgA) NC = nshots;
+	else NC = nstationB;
+
+//	if (verbose) fprintf(stderr,"transa=%s transb=%s %d %d %d\n", transa, transb, NA, NB, nshots);
+
+#pragma omp for schedule(static) \
+private(iw, iwnA, iwnB, iwAB, iwBB) 
+	for (iw=0; iw< nfreq; iw++) {
+
+		iwnA = iw*nstationA*nshots;
+		iwnB = iw*nstationB*nshots;
+		iwAB = iw*NC*NC;
+		if (mdd==0) { /* Correlation */
+				/* cblas_cgemm(CblasRowMajor,CblasNoTrans, CblasConjTrans, NA, NB, nshots, &alpha.r, 
+				&cA[iwnA].r, NA, 
+				&cB[iwnB].r, NB, &beta.r,
+				&cC[iwAB].r, NC); */
+			cgemm_(transa, transb, &NA, &NB, &nshots, &alpha.r, 
+				&cA[iwnA].r, &NA, 
+				&cB[iwnB].r, &NB, &beta.r, 
+				&cC[iwAB].r, &NC); 	
+//				memcpy(&cC[iwAB].r, &cB[iwnA].r, sizeof(float)*2*nstationA*nshots);
+		}
+		else if (mdd==1) { /* Multi Dimensional deconvolution */
+            /* compute AB^h and BB^h */
+			iwBB = iw*nstationB*nstationB;
+			cgemm_(transa, transb, &NA, &NB, &nshots, &alpha.r, 
+				&cA[iwnA].r, &NA, 
+				&cB[iwnB].r, &NB, &beta.r, 
+				&AB[0].r, &NA);
+	
+			cgemm_(transa, transb, &NB, &NB, &nshots, &alpha.r, 
+				&cB[iwnB].r, &NB, 
+				&cB[iwnB].r, &NB, &beta.r, 
+				&BB[0].r, &NB);
+	
+			if (oBB!=NULL) memcpy(&oBB[iwBB].r, &BB[0].r, nstationB*nstationB*sizeof(complex));
+
+			/* compute inverse of BB^h as [BB^h+eps]^-1 */
+			computeMatrixInverse(BB, NB, rthm, eps_a, eps_r, numacc, eigenvalues, &eigen[iw*NB], iw, verbose);
+	
+			/* multiply with AB to get Least Squares inversion */
+			/* C = A/B => AB^h/(BB^h+eps) */
+			cgemm_(transa, transa, &NA, &NB, &NB, &alpha.r, 
+				&AB[0].r, &NA, 
+				&BB[0].r, &NB, &beta.r, 
+				&cC[iwAB].r, &NA);
+		}
+		else if (mdd==2) { /* Multi Dimensional deconvolution, but AB^H en BB^H already computed */
+
+			memcpy(&BB[0].r, &cB[iwnB].r, nstationB*nshots*sizeof(complex));
+
+			computeMatrixInverse(BB, NB, rthm, eps_a, eps_r, numacc, eigenvalues, &eigen[iw*NB], iw, verbose);
+	
+			transN = "N";
+			transN = "N";
+			cgemm_(transN, transN, &NA, &NB, &NB, &alpha.r, 
+				&cA[iwnA].r, &NA, 
+				&BB[0].r, &NB, &beta.r, 
+				&cC[iwAB].r, &NA);
+		}
+		else if (mdd==3) { /* Copy matrix A or B to memory for testing purposes */
+			memcpy(&cC[iwAB].r, &cA[iwnA].r, sizeof(complex)*nstationA*nshots);
+		}
+		else if (mdd==4) {
+			memcpy(&cC[iwAB].r, &cB[iwnB].r, sizeof(complex)*nstationB*nshots);
+		}
+		else if (mdd==5) {
+			cblas_cdotu_sub(nshots, &cA[iwnA].r, NA, &cB[iwnB].r, NB, &cC[iwnA].r);
+		}
+
+	}
+
+	free(AB);
+	free(BB);
+
+	return 0;
+}
+
diff --git a/MDD/docpkge.c b/MDD/docpkge.c
new file mode 120000
index 0000000000000000000000000000000000000000..5384bb3801703c3f0db8fcc032235ca6130fa08b
--- /dev/null
+++ b/MDD/docpkge.c
@@ -0,0 +1 @@
+../utils/docpkge.c
\ No newline at end of file
diff --git a/MDD/getFileInfo.c b/MDD/getFileInfo.c
new file mode 120000
index 0000000000000000000000000000000000000000..ae38ea27f17697d65d7248c8e89038b632314182
--- /dev/null
+++ b/MDD/getFileInfo.c
@@ -0,0 +1 @@
+../utils/getFileInfo.c
\ No newline at end of file
diff --git a/MDD/getpars.c b/MDD/getpars.c
new file mode 120000
index 0000000000000000000000000000000000000000..fa7dc3355428e8ea9013fafad6e319dde3a48ebb
--- /dev/null
+++ b/MDD/getpars.c
@@ -0,0 +1 @@
+../utils/getpars.c
\ No newline at end of file
diff --git a/MDD/mdd.c b/MDD/mdd.c
new file mode 100644
index 0000000000000000000000000000000000000000..93a6e277feabffc68a1ded5e8b09d8686693f58b
--- /dev/null
+++ b/MDD/mdd.c
@@ -0,0 +1,593 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include "par.h"
+#include "segy.h"
+
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define NINT(x) ((int)((x)>0.0?(x)+0.5:(x)-0.5))
+
+#ifdef _OPENMP
+int omp_get_thread_num(void);
+#endif
+double wallclock_time(void);
+void name_ext(char *filename, char *extension);
+
+typedef struct { /* complex number */
+        float r,i;
+} complex;
+
+void cr1fft(complex *cdata, float *rdata, int n, int sign);
+int optncr(int n);
+
+int getFileInfo(char *filename, int *n1, int *n2, int *ngath, float *d1, float *d2, float *f1, float *f2, float *xmin, float *xmax, float *sclsxgx, int *nxm);
+
+int readShotData(char *filename, float xmin, float dx, float *xrcv, float *xsrc, int *xnx, complex *cdata, int nw, int nw_low, int ngath, int nx, int nxm, int ntfft, float alpha, float scl, float conjg, int transpose, int verbose);
+
+int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int verbose);
+
+void writeEigen(char *file_out, float df, int nw_low, int nw_high, int nw, float *eigen, int nx, float dx, float xmin);
+void writeDatamatrix(char *file_out, complex *P, int ntfft, int ntc, int Nrec, int Nshot, int nfreq, int nw_low, float dt, int verbose);
+
+void gausstaper(float *taper, float dx, int n, float enddecay);
+
+/**************
+* ntc output samples of deconvolution result
+* note that nt (the number of samples read by the IO routine)
+* should be 2*ntc and a number efficient for FFT's
+*/
+
+/*********************** self documentation **********************/
+char *sdoc[] = {
+" ",
+" mdd - multi-dimensional deconvolution (OpenMP)",
+"  ",
+" mdd file_A= file_B= file_out= [optional parameters]",
+"  ",
+" Required parameters: ",
+" ",
+"   file_A= .................. name of file(s) which store the data in location A",
+"   file_B= .................. name of file(s) which store the data in location B",
+" ",
+" Optional parameters: ",
+" ",
+"   ntc=nt ................... number of output time samples",
+"   ntfft=nt ................. number of samples used in fft",
+"   fmin=0 ................... minimum frequency",
+"   fmax=70 .................. maximum frequency to use in deconvolution",
+" INPUT DEFINITION ",
+"   cjA=1 .................... -1 => apply complex conjugate to A",
+"   sclA=1 ................... apply scaling factor to A",
+"   tranposeA=0 .............. apply transpose to A",
+"   cjB=1 .................... -1 => apply complex conjugate to B",
+"   sclB=1 ................... apply scaling factor to B",
+"   tranposeB=0 .............. apply transpose to B",
+" MATRIX INVERSION CALCULATION ",
+"   conjgA=0 ................. apply complex conjugate-transpose to A",
+"   conjgB=1 ................. apply complex conjugate-transpose to B",
+"   rthm=0 ................... see below for options",
+"   eps_a=1e-5 ............... absolute stabilization factor for LS",
+"   eps_r=1e-4 ............... relative stabilization factor for LS",
+"   numacc=1e-6 .............. numerical accurary for SVD",
+"   ntap=0 ................... number of taper points matrix",
+"   ftap=0 ................... percentage for tapering",
+"   tap=0 .................... type of taper: 0=cos 1=exp",
+"   eigenvalues= ............. write SVD eigenvalues to file ",
+"   mdd=1 .................... mdd=0 => computes correlation ",
+" OUTPUT DEFINITION ",
+"   file_out= ................ output base name ",
+"   causal=1 ................. output causal(1), non-causal(2), both(3), or summed(4)",
+"   one_file=1 ............... write all shots into one file ",
+"   file_dmat= ............... if defined writes matrix in frequency domain",
+"   verbose=0 ................ silent option; >0 displays info",
+" ",
+" Notes: ",
+"    ntc output samples of deconvolution result",
+"    nt (the number of samples read by the IO routine)",
+" ",
+" Options for mdd= ",
+"     2 = A/(B + eps) ",
+"     1 = A*B^H/(B*B^H + eps) ",
+"     0 = A*B^H ",
+" ",
+" Option for rthm= ",
+"     0 = Least Squares QR based inversion",
+"     1 = Least Squares LU based inversion",
+"     2 = SVD inversion single precision",
+"     3 = SVD divide-and-conquer method",
+"     4 = SVD inversion double precision",
+"     5 = Least Squares LU based inversion double precision",
+"     6 = Eigenvalue based (not yet working)",
+" ",
+" author  : Jan Thorbecke : 2008 (j.w.thorbecke@tudelft.nl)",
+" ",
+NULL};
+/**************** end self doc ***********************************/
+
+int main (int argc, char **argv)
+{
+	FILE    *fpin, *fpout;
+	int		i, j, k, ret, nshots, ntraces;
+	int		size, n1, n2, ntfft, nf, causal;
+	int     verbose, fullcorr, ncorstat, err;
+	int     nt, nc, ncc, ntc, nshotA, nshotB;
+	size_t  nstationA, nstationB, nfreq, istation, jstation, iw;
+	int     pgsz, istep,jstep;
+	int     mdd;
+	int		conjgA, conjgB;
+	int 	ntap, nxm, ngath, nw, nw_low, nw_high, eigenvalues, rthm, combine, distance;
+	size_t  nwrite, cdatainSize, datainSize, cdataoutSize, stationSize, is;
+	float	dx, dt, fmin, fmax, df, eps_r, eps_a, ftap, numacc;
+	float	*rC, scl, *rl, *eigen;
+	float   f1, f2, d1, d2, sclsxgx, xmin, xmax, alpha, wshot, wpi, wrec;
+ 	float   *xrcvA, *xsrcA, *xrcvB, *xsrcB;
+	float	*taper;
+    int     *xnx;
+	float sclA,sclB, cjA, cjB;
+	int  transposeA, transposeB;
+
+
+	complex *cdataout;
+	double  t0, t1, t2, t3, tinit, twrite, tread, tdec, tfft;
+	char	*file_A, *file_B, *file_out, *file_dmat, filename[1024], number[128], *rthmName;
+	int     pe=0, root_pe=0, npes=1, ipe, size_s, one_file;
+	complex *cA, *cB, *oBB;
+	segy *hdr;
+
+	t0 = wallclock_time();
+	initargs(argc, argv);
+	requestdoc(1);
+
+	if (!getparint("verbose", &verbose)) verbose = 0;
+	if (!getparstring("file_A", &file_A)) file_A=NULL;
+	assert(file_A != NULL);
+	if (!getparstring("file_B", &file_B)) file_B=NULL;
+	assert(file_B != NULL);
+	if (!getparstring("file_out", &file_out)) file_out=NULL;
+	if (!getparstring("file_dmat", &file_dmat)) file_dmat=NULL;
+	if (!getparint("one_file", &one_file)) one_file = 1;
+
+	if (!getparfloat("fmin", &fmin)) fmin = 0.0;
+	if (!getparint("rthm", &rthm)) rthm = 0;
+	if (!getparint("combine", &combine)) combine = 0;
+	if (!getparint("causal", &causal)) causal = 1;
+	if (!getparint("ntap", &ntap)) ntap = 0;
+	if (!getparfloat("ftap", &ftap)) ftap = 0.;
+	if (!getparfloat("eps_r", &eps_r)) eps_r = 1e-4;
+	if (!getparfloat("eps_a", &eps_a)) eps_a = 1e-5;
+	if (!getparfloat("numacc", &numacc)) numacc = 1e-6;
+	if (!getparint("eigenvalues", &eigenvalues)) eigenvalues = 0;
+	if (!getparint("mdd", &mdd)) mdd = 1;
+
+	if (!getparint("transposeA", &transposeA)) transposeA = 0;
+	if (!getparfloat("sclA", &sclA)) sclA = 1.;
+	if (!getparfloat("cjA", &cjA)) cjA = 1.;
+	if (!getparint("transposeB", &transposeB)) transposeB = 0;
+	if (!getparfloat("sclB", &sclB)) sclB = 1.;
+	if (!getparfloat("cjB", &cjB)) cjB = 1.;
+
+#ifdef _OPENMP
+	npes = atoi(getenv("OMP_NUM_THREADS"));
+	assert(npes != 0);
+	if (verbose) fprintf(stderr,"Number of OpenMP thread's is %d\n", npes);
+#else
+   npes=1;
+#endif
+
+/* get information from input files */
+
+	nshotA = 0;
+	getFileInfo(file_A, &n1, &n2, &nshotA, &d1, &d2, &f1, &f2, &xmin, &xmax, &sclsxgx, &nxm);
+	if (!getparint("nt", &nt)) nt=n1;
+	if (!getparint("ntc", &ntc)) ntc = n1;
+	if (!getparint("conjgA", &conjgA)) conjgA = 0;
+	if (!getparint("conjgB", &conjgB)) conjgB = 1;
+	if (!getparfloat("dt", &dt)) dt = d1;
+	if (!getparfloat("dx", &dx)) dx = d2;
+	if (!getparfloat("fmax", &fmax)) fmax = 1.0/(2.0*dt);
+
+	nstationA = n2;
+
+	nshotB = 0;
+	getFileInfo(file_B, &n1, &n2, &nshotB, &d1, &d2, &f1, &f2, &xmin, &xmax, &sclsxgx, &nxm);
+	assert( n1 == nt);
+	nstationB = n2;
+	assert( nshotA == nshotB);
+
+/*================ initializations ================*/
+
+	tinit = 0.0;
+	tfft  = 0.0;
+	tread = 0.0;
+	tdec = 0.0;
+
+    if (!getparint("ntfft", &ntfft)) ntfft = nt;
+	ntfft = optncr(ntfft);
+	nf    = ntfft/2+1;
+	df    = 1.0/(ntfft*dt);
+    nw_high  = MIN( (int)((fmax)/df), nf );
+    nw_low   = MAX( (int)(fmin/df), 1 );
+    nw       = nw_high - nw_low + 1;
+	nfreq = MIN(nf,nw);
+
+/* scaling of the results by Johno van IJsseldijk */ 
+    if (mdd == 0) scl = dx*dt/((float)ntfft); //correlation
+    else if (mdd==1) scl = 1/((float)ntfft)/dx/dt; // MDD
+    else if (mdd==2) scl = 1/((float)ntfft)/dx/dt; // MDD with A and B already computed (NOT TESTED)
+    else scl = 1.0/((float)ntfft); // Passing A or B through
+
+/* allocate in shared memory the in- and output data */
+
+	jstep        = nfreq*nshotA;
+	cdatainSize  = nfreq*nshotA*sizeof(complex);
+	cdataoutSize = nstationA*nstationB*nfreq*sizeof(complex);
+	cdataout     = (complex *)malloc(cdataoutSize);
+	cA           = (complex *)malloc(nstationA*cdatainSize);
+	cB           = (complex *)malloc(nstationB*cdatainSize);
+	taper        = (float *)malloc(2*nstationB*sizeof(float));
+	if (file_dmat!=NULL) oBB = (complex *)malloc(nstationB*nstationB*nfreq*sizeof(complex));
+	else oBB = NULL;
+	assert(cdataout != NULL);
+	assert(cA != NULL);
+	assert(cB != NULL);
+
+
+/* for first touch binding of allocated memory */
+#pragma omp parallel for schedule(static) private(jstation,is) default(shared)
+	for (jstation=0; jstation<nstationB; jstation++) {
+		stationSize=nstationA*nfreq*sizeof(complex);
+		is = jstation*nstationA*nfreq;
+		memset(&cdataout[is],0,stationSize);
+		memset(&cB[jstation*jstep],0,jstep*sizeof(complex));
+	}
+
+#pragma omp parallel for schedule(static) private(jstation) default(shared)
+	for (jstation=0; jstation<nstationA; jstation++) {
+		memset(&cA[jstation*jstep],0,jstep*sizeof(complex));
+	}
+
+    if (verbose) {
+        if (rthm==0) rthmName="Cholesky";
+        else if (rthm==1) rthmName="LU";
+        else if (rthm==2) rthmName="SVD single precision";
+        else if (rthm==3) rthmName="SVD divide-and-conquer";
+        else if (rthm==4) rthmName="SVD double precision";
+        else if (rthm==5) rthmName="LU double precision";
+        else if (rthm==6) rthmName="Eigenvalue double precision";
+        fprintf(stderr,"--- Input Information ---\n");
+        fprintf(stderr,"  dt nt ............ : %f : %d\n", dt, nt);
+        fprintf(stderr,"  dx ............... : %f\n", dx);
+        fprintf(stderr,"  nshotA ........... : %d\n", nshotA );
+        fprintf(stderr,"  nstationA ........ : %ld\n", nstationA );
+        fprintf(stderr,"  nshotB ........... : %d\n", nshotB );
+        fprintf(stderr,"  nstationB ........ : %ld\n", nstationB );
+        fprintf(stderr,"  number t-fft ..... : %d\n", ntfft);
+		fprintf(stderr,"  Input  size ...... : %ld MB\n", (nstationA+nstationB)*cdatainSize/(1024*1024));
+		fprintf(stderr,"  Output size ...... : %ld MB\n", (cdataoutSize/((size_t)1024*1024)));
+        fprintf(stderr,"  taper points ..... : %d (%.2f %%)\n", ntap, ftap*100.0);
+        fprintf(stderr,"  process number ... : %d\n", pe);
+        fprintf(stderr,"  fmin ............. : %.3f (%d)\n", fmin, nw_low);
+        fprintf(stderr,"  fmax ............. : %.3f (%d)\n", fmax, nw_high);
+        fprintf(stderr,"  nfreq  ........... : %ld\n", nfreq);
+        if (mdd) fprintf(stderr,"  Matrix inversion . : %s\n", rthmName);
+        else  fprintf(stderr,"  Correlation ...... : \n");
+        fprintf(stderr,"  eps_r ............ : %e\n", eps_r);
+        fprintf(stderr,"  eps_a ............ : %e\n", eps_a);
+        fprintf(stderr,"  mdd .............. : %d\n", mdd);
+    }
+
+	t1 = wallclock_time();
+	tinit += t1-t0;
+
+/* read in first nt samples, and store in data */
+
+    xsrcA     = (float *)calloc(nshotA,sizeof(float));
+    xrcvA     = (float *)calloc(nshotA*nstationA,sizeof(float));
+    xnx       = (int *)calloc(nshotA,sizeof(int));
+	alpha = 0.0;
+    readShotData(file_A, xmin, dx, xrcvA, xsrcA, xnx, cA, nw, nw_low, nshotA, nstationA, nstationA, ntfft, alpha, sclA, cjA, transposeA, verbose);
+
+    xsrcB     = (float *)calloc(nshotB,sizeof(float));
+    xrcvB     = (float *)calloc(nshotB*nstationB,sizeof(float));
+	alpha = 0.0;
+    readShotData(file_B, xmin, dx, xrcvB, xsrcB, xnx, cB, nw, nw_low, nshotB, nstationB, nstationB, ntfft, alpha, sclB, cjB, transposeB, verbose);
+
+	//cB = cA;
+
+	eigen = (float *)malloc(nfreq*nstationB*sizeof(float));
+
+	t2 = wallclock_time();
+	tread += t2-t1;
+
+#pragma omp parallel default(none) \
+	private(t1,t2,pe) \
+	shared(cA,cB,eigen,eigenvalues,numacc,eps_r,eps_a) \
+	shared(nstationA,nstationB,verbose,cdatainSize) \
+	shared(rthm,mdd,nfreq,nshotA,conjgA,conjgB) \
+	shared(cdataout,oBB)
+{ /* start of OpenMP parallel part */
+
+
+#ifdef _OPENMP
+	pe = omp_get_thread_num();
+#endif
+
+	/* compute deconvolution */
+	deconvolve(cA, cB, cdataout, oBB, nfreq, nshotA, nstationA, nstationB, 
+		eps_a, eps_r, numacc, eigenvalues, eigen, rthm, mdd, conjgA, conjgB, verbose);
+
+} /*end of parallel OpenMP part */
+
+	fflush(stderr);
+	fflush(stdout);
+
+	t3 = wallclock_time();
+	tdec += t3-t2;
+	if (verbose>=1) {
+		fprintf(stderr,"************* PE %d ************* \n", pe);
+		fprintf(stderr,"CPU-time read data         = %.3f\n", tread);
+		fprintf(stderr,"CPU-time deconvolution     = %.3f\n", tdec);
+	}
+
+/* for writing out combined shots cA */
+	free(cA);
+	free(cB);
+
+/* Inverse FFT of deconvolution results */
+/* This is done for every deconvolution component seperately */
+
+	rC = (float *)malloc(nstationA*ntc*sizeof(float));
+	assert(rC != NULL);
+
+/*
+#pragma omp parallel default(none) \
+	private(istation,jstation,pe,j,i,t1,t2,t3,hdr,rl) \
+	private(filename, k, fpout, nwrite, cA, iw,number) \
+	shared(tfft) \
+	shared(rC,dt,ntc,file_out) \
+	shared(nt,nstationA,nstationB,verbose,err,ntfft,t0,twrite) \
+	shared(nfreq,stderr,stdout, nshotA, nshotB, nw_low, causal) \
+	shared(cdataout,istep,jstep,one_file)
+*/
+//{ /* start of OpenMP parallel part */
+//#ifdef _OPENMP
+//	pe = omp_get_thread_num();
+//#else 
+    pe = 0;
+//#endif
+
+	rl  = (float *)calloc(ntfft,sizeof(float));
+	cA  = (complex *)calloc(ntfft,sizeof(complex));
+	hdr = (segy *)calloc(1,sizeof(segy));
+
+/* for writing out combined shots cA */
+
+	tfft   = 0.0;
+	twrite = 0.0;
+	if (one_file && pe==0) {
+		strcpy(filename, file_out);
+		if (verbose>2) fprintf(stderr,"writing all output shot into file %s\n", filename);
+		fpout = fopen( filename, "w+" );
+	}
+//#pragma omp for
+	for (jstation=0; jstation<nstationB; jstation++) {
+		/* FFT */
+		t1 = wallclock_time();
+		for (istation=0; istation<nstationA; istation++) {
+			memset(cA,0,ntfft*sizeof(complex));
+			for (iw=0;iw<nfreq;iw++) {
+				cA[iw+nw_low].r = cdataout[(iw*nstationB+jstation)*nstationA+istation].r*scl;
+				cA[iw+nw_low].i = cdataout[(iw*nstationB+jstation)*nstationA+istation].i*scl;
+			}
+			cr1fft(cA, rl, ntfft, 1);
+			memcpy(&rC[istation*ntc],rl,ntc*sizeof(float));
+
+			if (causal==1) {
+				memcpy(&rC[istation*ntc],rl,ntc*sizeof(float));
+			}
+			else if (causal==2) {
+				rC[istation*ntc] = rl[0];
+				for (j=1;j<ntc; j++) {
+					rC[istation*ntc+j] = rl[ntfft-j];
+				}
+			}
+			else if (causal==3) {
+				for (j=1;j<=(ntc/2); j++) {
+					rC[istation*ntc+ntc/2-j] = rl[ntfft-j];
+				}
+				for (j=ntc/2;j<ntc; j++) {
+					rC[istation*ntc+j] = rl[j-ntc/2];
+				}
+			}
+			else if (causal==4) {
+				rC[istation*ntc] = rl[0];
+				for (j=1;j<ntc; j++) {
+					rC[istation*ntc+j] = rl[ntfft-j] + rl[j];
+				}
+			}
+		}
+		t2 = wallclock_time();
+		tfft += t2-t1;
+
+		if (pe == 0) {
+			/* write data to file */
+			hdr[0].d1  = dt;
+			if (causal == 3) hdr[0].f1=-0.5*ntc*dt;
+			else hdr[0].f1=0.0;
+			hdr[0].dt  = (int)(dt*1000000);
+			hdr[0].ns  = ntc;
+			hdr[0].fldr  = jstation+1;
+			hdr[0].scalco = -1000;
+			hdr[0].scalel = -1000;
+			hdr[0].trid = 1;
+			hdr[0].f2 = f2;
+			hdr[0].d2 = dx;
+//			hdr[0].trwf = nstationA;
+			hdr[0].sx = NINT((f2+dx*jstation)*1000);
+			hdr[0].ntr = nstationA*nstationB;
+			if (!one_file) {
+				strcpy(filename, file_out);
+				sprintf(number,"Station%03d\0",jstation+1);
+				name_ext(filename, number);
+				if (verbose>3) fprintf(stderr,"writing to file %s\n", filename);
+				fpout = fopen( filename, "w+" );
+			}
+			for (istation=0; istation<nstationA; istation++) {
+				hdr[0].tracl = istation+1;
+				hdr[0].gx = NINT((f2+dx*istation)*1000);
+				hdr[0].offset = NINT((f2+dx*istation));
+				nwrite = fwrite( hdr, 1, TRCBYTES, fpout );
+				assert (nwrite == TRCBYTES);
+				nwrite = fwrite( &rC[istation*ntc], sizeof(float), ntc, fpout );
+				assert (nwrite == ntc);
+			}
+			if (!one_file) {
+				fflush(fpout);
+				fclose(fpout);
+			}
+			t3 = wallclock_time();
+			twrite += t3-t2;
+//			fprintf(stderr,"write %f and fft %f for %d\n",twrite, tfft, jstation);
+		}
+	}
+	if (one_file && pe==0) {
+		fflush(fpout);
+		fclose(fpout);
+	}
+	free(cA);
+	free(rl);
+//}
+
+	free(rC);
+	free(cdataout);
+
+	if (eigenvalues) {
+		writeEigen(file_out, df, nw_low, nw_high, nfreq, eigen, nstationB, dx, f2);
+	}
+	free(eigen);
+
+	/* if file_dmat write frequency slices of matrix */
+	if (file_dmat!=NULL) {
+		t2 = wallclock_time();
+		strcpy(filename, file_dmat);
+		fpout = fopen( filename, "w+" );
+		hdr[0].d1  = df;
+		hdr[0].dt  = (int)(df*1000000);
+		hdr[0].ns  = nfreq;
+		hdr[0].trid  = 111;
+/*
+		for (iw=0;iw<nfreq;iw++) {
+			hdr[0].fldr  = iw+1;
+//			sprintf(number,"Station%03d\0",jstation+1);
+//			name_ext(filename, number);
+//			if (verbose>3) fprintf(stderr,"writing to file %s\n", filename);
+//			fpout = fopen( filename, "w+" );
+			twrite = 0.0;
+			for (istation=0; istation<nstationB; istation++) {
+				hdr[0].tracl = istation+1;
+				nwrite = fwrite( hdr, 1, TRCBYTES, fpout );
+				assert (nwrite == TRCBYTES);
+//				nwrite = fwrite( &oBB[iw*nstationB*nstationB+istation].r, sizeof(complex), nfreq, fpout );
+//				assert (nwrite == nfreq);
+			}
+		}
+*/
+		fflush(fpout);
+		fclose(fpout);
+		t3 = wallclock_time();
+		twrite += t3-t2;
+		free(oBB);
+	}
+	free(hdr);
+
+/*================ end ================*/
+
+	if (verbose) {
+		t3 = wallclock_time();
+		fprintf(stderr,"CPU-time inverse FFT's     = %.3f\n", tfft);
+		fprintf(stderr,"CPU-time write data        = %.3f\n", twrite);
+		fprintf(stderr,"CPU-time initialization    = %.3f\n", tinit);
+		fprintf(stderr,"Total CPU-time             = %.3f\n", t3-t0);
+	}
+
+	return 0;
+}
+
+void gausstaper(float *taper, float dx, int n, float enddecay)
+{
+	int 	ix, hn;
+	float 	dist, sigma2;
+
+	if (enddecay > 0.999) {
+		for (ix = 0; ix < n; ix++) taper[ix] = 1.0;
+		return;
+	}
+
+	hn = (n-1)/2;
+	sigma2 = (hn*dx*hn*dx)/(log(enddecay));
+
+	for (ix = 0; ix <= hn; ix++) {
+		dist = ix*dx;
+		taper[hn+ix] = exp(dist*dist/sigma2);
+	}
+
+	for (ix = 0; ix < hn; ix++) 
+		taper[ix] = taper[n-1-ix];
+
+	return;
+}
+
+void writeDatamatrix(char *file_out, complex *P, int ntfft, int ntc, int Nrec, int Nshot, int nfreq, int nw_low, float dt, int verbose)
+{
+	FILE *fpout;
+	char filename[1024];
+	size_t  nwrite;
+	int jstation, istation, iw;
+	float *rl, *rC;
+	complex *cA;
+	segy *hdr;
+
+	rC = (float *)malloc(Nrec*ntc*sizeof(float));
+	rl  = (float *)calloc(ntfft,sizeof(float));
+	cA  = (complex *)calloc(ntfft,sizeof(complex));
+	hdr = (segy *)calloc(1,sizeof(segy));
+
+/* for writing out combined shots cA */
+
+	strcpy(filename, file_out);
+	if (verbose>2) fprintf(stderr,"writing all output shot into file %s\n", filename);
+	fpout = fopen( file_out, "w+" );
+	for (jstation=0; jstation<Nshot; jstation++) {
+
+		/* FFT */
+		for (istation=0; istation<Nrec; istation++) {
+			memset(cA,0,ntfft*sizeof(complex));
+			for (iw=0;iw<nfreq;iw++) {
+				cA[iw+nw_low] = P[(iw*Nshot+jstation)*Nrec+istation];
+			}
+			cr1fft(cA, rl, ntfft, 1);
+			memcpy(&rC[istation*ntc],rl,ntc*sizeof(float));
+		}
+
+		/* write data to file */
+		hdr[0].d1  = dt;
+		hdr[0].dt  = (int)(dt*1000000);
+		hdr[0].ns  = ntc;
+		hdr[0].fldr  = jstation+1;
+		for (istation=0; istation<Nrec; istation++) {
+			hdr[0].tracl = istation+1;
+			nwrite = fwrite( hdr, 1, TRCBYTES, fpout );
+			assert (nwrite == TRCBYTES);
+			nwrite = fwrite( &rC[istation*ntc], sizeof(float), ntc, fpout );
+			assert (nwrite == ntc);
+		}
+	}
+
+	free(cA);
+	free(rl);
+	free(rC);
+	return;
+}
+
diff --git a/MDD/name_ext.c b/MDD/name_ext.c
new file mode 120000
index 0000000000000000000000000000000000000000..83ac1f8ddf2ec6a316557877ae7db38720a5ca53
--- /dev/null
+++ b/MDD/name_ext.c
@@ -0,0 +1 @@
+../utils/name_ext.c
\ No newline at end of file
diff --git a/MDD/par.h b/MDD/par.h
new file mode 120000
index 0000000000000000000000000000000000000000..0fa273cea748f9ead16e0e231201941174a3dd46
--- /dev/null
+++ b/MDD/par.h
@@ -0,0 +1 @@
+../utils/par.h
\ No newline at end of file
diff --git a/MDD/readShotData.c b/MDD/readShotData.c
new file mode 100644
index 0000000000000000000000000000000000000000..20f953e2ac140a6032ed3c71198e482ee06bb69f
--- /dev/null
+++ b/MDD/readShotData.c
@@ -0,0 +1,139 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "segy.h"
+#include <assert.h>
+
+typedef struct { /* complex number */
+        float r,i;
+} complex;
+
+#define NINT(x) ((int)((x)>0.0?(x)+0.5:(x)-0.5))
+
+int optncr(int n);
+void cc1fft(complex *data, int n, int sign);
+void rc1fft(float *rdata, complex *cdata, int n, int sign);
+
+int compare(const void *a, const void *b) 
+{ return (*(float *)b-*(float *)a); }
+
+int readShotData(char *filename, float xmin, float dx, float *xrcv, float *xsrc, int *xnx, complex *cdata, int nw, int nw_low, int ngath, int nx, int nxm, int ntfft, float alpha, float scale, float conjg, int transpose, int verbose)
+{
+	FILE *fp;
+	segy hdr;
+	size_t nread;
+	int fldr_shot, sx_shot, itrace, one_shot, igath, iw, i, j, k;
+	int end_of_file, nt, ir, is;
+	float scl, dt, *trace;
+	complex *ctrace;
+
+	/* Reading first header  */
+
+	if (filename == NULL) fp = stdin;
+	else fp = fopen( filename, "r" );
+	if ( fp == NULL ) {
+		fprintf(stderr,"input file %s has an error\n", filename);
+		perror("error in opening file: ");
+		fflush(stderr);
+		return -1;
+	}
+
+	fseek(fp, 0, SEEK_SET);
+	nread = fread( &hdr, 1, TRCBYTES, fp );
+	assert(nread == TRCBYTES);
+	if (hdr.scalco < 0) scl = 1.0/fabs(hdr.scalco);
+	else if (hdr.scalco == 0) scl = 1.0;
+	else scl = hdr.scalco;
+	fseek(fp, 0, SEEK_SET);
+
+	nt        = hdr.ns;
+
+	trace  = (float *)calloc(nx*ntfft,sizeof(float));
+	ctrace = (complex *)malloc(ntfft*sizeof(complex));
+
+	end_of_file = 0;
+	one_shot    = 1;
+	igath       = 0;
+
+	/* Read shots in file */
+
+	while (!end_of_file) {
+
+		/* start reading data (shot records) */
+		itrace = 0;
+		nread = fread( &hdr, 1, TRCBYTES, fp );
+		if (nread != TRCBYTES) { /* no more data in file */
+			break;
+		}
+
+		sx_shot  = hdr.sx;
+		fldr_shot  = hdr.fldr;
+		xsrc[igath] = sx_shot*scl;
+		xnx[igath]=0;
+		/* read in all traces within a shot */
+		while (one_shot) {
+			xrcv[igath*nxm+itrace] = hdr.gx*scl;
+			nread = fread( &trace[itrace*ntfft], sizeof(float), nt, fp );
+			assert (nread == hdr.ns);
+			itrace++;
+			xnx[igath]+=1;
+
+			/* read next hdr of next trace */
+			nread = fread( &hdr, 1, TRCBYTES, fp );
+			if (nread != TRCBYTES) { 
+				one_shot = 0;
+				end_of_file = 1;
+				break;
+			}
+			if ((sx_shot != hdr.sx) || (fldr_shot != hdr.fldr) ) break;
+		}
+
+		for (i=0; i<itrace; i++) {
+			/* apply alpha factor */
+			if (alpha != 0.0) {
+        		for (j=0; j<nt; j++) {
+					trace[i*ntfft+j] *= exp(alpha*j*dt);
+				}
+			}
+        	for (j=nt; j<ntfft; j++) {
+				trace[i*ntfft+j] = 0.0;
+			}
+
+			/* transform to frequency domain */
+        	rc1fft(&trace[i*ntfft],ctrace,ntfft,-1);
+
+			if (transpose == 0) {
+        		for (iw=0; iw<nw; iw++) {
+        			cdata[iw*ngath*nx+igath*nx+i].r = scale*ctrace[nw_low+iw].r;
+        			cdata[iw*ngath*nx+igath*nx+i].i = conjg*scale*ctrace[nw_low+iw].i;
+        		}
+			}
+			else {
+        		for (iw=0; iw<nw; iw++) {
+        			cdata[iw*ngath*nx+i*ngath+igath].r = scale*ctrace[nw_low+iw].r;
+        			cdata[iw*ngath*nx+i*ngath+igath].i = conjg*scale*ctrace[nw_low+iw].i;
+        		}
+			}
+		}
+
+		if (verbose>2) {
+			fprintf(stderr,"finished reading shot %d (%d) with %d traces\n",sx_shot,igath,itrace);
+		}
+
+		if (itrace != 0) { /* end of shot record */
+			fseek( fp, -TRCBYTES, SEEK_CUR );
+			igath++;
+		}
+		else {
+			end_of_file = 1;
+		}
+	}
+
+	free(ctrace);
+	free(trace);
+
+	return 0;
+}
+
+
diff --git a/MDD/segy.h b/MDD/segy.h
new file mode 100644
index 0000000000000000000000000000000000000000..d0a0d769d1548115b04396076b4a15d5be0ee687
--- /dev/null
+++ b/MDD/segy.h
@@ -0,0 +1,849 @@
+/* Copyright (c) Colorado School of Mines, 2011.*/
+/* All rights reserved.                       */
+
+/* segy.h - include file for SEGY traces
+ *
+ * declarations for:
+ *	typedef struct {} segy - the trace identification header
+ *	typedef struct {} bhed - binary header
+ *
+ * Note:
+ *	If header words are added, run the makefile in this directory
+ *	to recreate hdr.h.
+ *
+ * Reference:
+ *	K. M. Barry, D. A. Cavers and C. W. Kneale, "Special Report:
+ *		Recommended Standards for Digital Tape Formats",
+ *		Geophysics, vol. 40, no. 2 (April 1975), P. 344-352.
+ *	
+ * $Author: john $
+ * $Source: /usr/local/cwp/src/su/include/RCS/segy.h,v $
+ * $Revision: 1.33 $ ; $Date: 2011/11/11 23:56:14 $
+ */ 
+
+#include <limits.h>
+#include "par.h"
+
+#ifndef SEGY_H
+#define SEGY_H
+#define TRCBYTES		240
+
+#define SU_NFLTS	32767	/* Arbitrary limit on data array size	*/
+
+
+/* TYPEDEFS */
+typedef struct {	/* segy - trace identification header */
+
+	int tracl;	/* Trace sequence number within line
+			   --numbers continue to increase if the
+			   same line continues across multiple
+			   SEG Y files.
+			   byte# 1-4
+			 */
+
+	int tracr;	/* Trace sequence number within SEG Y file
+			   ---each file starts with trace sequence
+			   one
+			   byte# 5-8
+			 */
+
+	int fldr;	/* Original field record number
+			   byte# 9-12 
+			*/
+
+	int tracf;	/* Trace number within original field record 
+			   byte# 13-16
+			*/
+
+	int ep;		/* energy source point number
+			   ---Used when more than one record occurs
+			   at the same effective surface location.
+			   byte# 17-20
+			 */
+
+	int cdp;	/* Ensemble number (i.e. CDP, CMP, CRP,...) 
+			   byte# 21-24
+			*/
+
+	int cdpt;	/* trace number within the ensemble
+			   ---each ensemble starts with trace number one.
+			   byte# 25-28
+			 */
+
+	short trid;	/* trace identification code:
+			-1 = Other
+		         0 = Unknown
+			 1 = Seismic data
+			 2 = Dead
+			 3 = Dummy
+			 4 = Time break
+			 5 = Uphole
+			 6 = Sweep
+			 7 = Timing
+			 8 = Water break
+			 9 = Near-field gun signature
+			10 = Far-field gun signature
+			11 = Seismic pressure sensor
+			12 = Multicomponent seismic sensor
+				- Vertical component
+			13 = Multicomponent seismic sensor
+				- Cross-line component 
+			14 = Multicomponent seismic sensor
+				- in-line component 
+			15 = Rotated multicomponent seismic sensor
+				- Vertical component
+			16 = Rotated multicomponent seismic sensor
+				- Transverse component
+			17 = Rotated multicomponent seismic sensor
+				- Radial component
+			18 = Vibrator reaction mass
+			19 = Vibrator baseplate
+			20 = Vibrator estimated ground force
+			21 = Vibrator reference
+			22 = Time-velocity pairs
+			23 ... N = optional use 
+				(maximum N = 32,767)
+
+			Following are CWP id flags:
+
+			109 = autocorrelation
+			110 = Fourier transformed - no packing
+			     xr[0],xi[0], ..., xr[N-1],xi[N-1]
+			111 = Fourier transformed - unpacked Nyquist
+			     xr[0],xi[0],...,xr[N/2],xi[N/2]
+			112 = Fourier transformed - packed Nyquist
+	 		     even N:
+			     xr[0],xr[N/2],xr[1],xi[1], ...,
+				xr[N/2 -1],xi[N/2 -1]
+				(note the exceptional second entry)
+			     odd N:
+			     xr[0],xr[(N-1)/2],xr[1],xi[1], ...,
+				xr[(N-1)/2 -1],xi[(N-1)/2 -1],xi[(N-1)/2]
+				(note the exceptional second & last entries)
+			113 = Complex signal in the time domain
+			     xr[0],xi[0], ..., xr[N-1],xi[N-1]
+			114 = Fourier transformed - amplitude/phase
+			     a[0],p[0], ..., a[N-1],p[N-1]
+			115 = Complex time signal - amplitude/phase
+			     a[0],p[0], ..., a[N-1],p[N-1]
+			116 = Real part of complex trace from 0 to Nyquist
+			117 = Imag part of complex trace from 0 to Nyquist
+			118 = Amplitude of complex trace from 0 to Nyquist
+			119 = Phase of complex trace from 0 to Nyquist
+			121 = Wavenumber time domain (k-t)
+			122 = Wavenumber frequency (k-omega)
+			123 = Envelope of the complex time trace
+			124 = Phase of the complex time trace
+			125 = Frequency of the complex time trace
+			130 = Depth-Range (z-x) traces
+			201 = Seismic data packed to bytes (by supack1)
+			202 = Seismic data packed to 2 bytes (by supack2)
+			   byte# 29-30
+			*/
+
+	short nvs;	/* Number of vertically summed traces yielding
+			   this trace. (1 is one trace, 
+			   2 is two summed traces, etc.)
+			   byte# 31-32
+			 */
+
+	short nhs;	/* Number of horizontally summed traces yielding
+			   this trace. (1 is one trace
+			   2 is two summed traces, etc.)
+			   byte# 33-34
+			 */
+
+	short duse;	/* Data use:
+				1 = Production
+				2 = Test
+			   byte# 35-36
+			 */
+
+	int offset;	/* Distance from the center of the source point 
+			   to the center of the receiver group 
+			   (negative if opposite to direction in which 
+			   the line was shot).
+			   byte# 37-40
+			 */
+
+	int gelev;	/* Receiver group elevation from sea level
+			   (all elevations above the Vertical datum are 
+			   positive and below are negative).
+			   byte# 41-44
+			 */
+
+	int selev;	/* Surface elevation at source.
+			   byte# 45-48
+			 */
+
+	int sdepth;	/* Source depth below surface (a positive number).
+			   byte# 49-52
+			 */
+
+	int gdel;	/* Datum elevation at receiver group.
+			   byte# 53-56
+			*/
+
+	int sdel;	/* Datum elevation at source.
+			   byte# 57-60
+			*/
+
+	int swdep;	/* Water depth at source.
+			   byte# 61-64
+			*/
+
+	int gwdep;	/* Water depth at receiver group.
+			   byte# 65-68
+			*/
+
+	short scalel;	/* Scalar to be applied to the previous 7 entries
+			   to give the real value. 
+			   Scalar = 1, +10, +100, +1000, +10000.
+			   If positive, scalar is used as a multiplier,
+			   if negative, scalar is used as a divisor.
+			   byte# 69-70
+			 */
+
+	short scalco;	/* Scalar to be applied to the next 4 entries
+			   to give the real value. 
+			   Scalar = 1, +10, +100, +1000, +10000.
+			   If positive, scalar is used as a multiplier,
+			   if negative, scalar is used as a divisor.
+			   byte# 71-72
+			 */
+
+	int  sx;	/* Source coordinate - X 
+			   byte# 73-76
+			*/
+
+	int  sy;	/* Source coordinate - Y 
+			   byte# 77-80
+			*/
+
+	int  gx;	/* Group coordinate - X 
+			   byte# 81-84
+			*/
+
+	int  gy;	/* Group coordinate - Y 
+			   byte# 85-88
+			*/
+
+	short counit;	/* Coordinate units: (for previous 4 entries and
+				for the 7 entries before scalel)
+			   1 = Length (meters or feet)
+			   2 = Seconds of arc
+			   3 = Decimal degrees
+			   4 = Degrees, minutes, seconds (DMS)
+
+			In case 2, the X values are longitude and 
+			the Y values are latitude, a positive value designates
+			the number of seconds east of Greenwich
+				or north of the equator
+
+			In case 4, to encode +-DDDMMSS
+			counit = +-DDD*10^4 + MM*10^2 + SS,
+			with scalco = 1. To encode +-DDDMMSS.ss
+			counit = +-DDD*10^6 + MM*10^4 + SS*10^2 
+			with scalco = -100.
+			   byte# 89-90
+			*/
+
+	short wevel;	/* Weathering velocity. 
+			   byte# 91-92
+			*/
+
+	short swevel;	/* Subweathering velocity. 
+			   byte# 93-94
+			*/
+
+	short sut;	/* Uphole time at source in milliseconds. 
+			   byte# 95-96
+			*/
+
+	short gut;	/* Uphole time at receiver group in milliseconds. 
+			   byte# 97-98
+			*/
+
+	short sstat;	/* Source static correction in milliseconds. 
+			   byte# 99-100
+			*/
+
+	short gstat;	/* Group static correction  in milliseconds.
+			   byte# 101-102
+			*/
+
+	short tstat;	/* Total static applied  in milliseconds.
+			   (Zero if no static has been applied.)
+			   byte# 103-104
+			*/
+
+	short laga;	/* Lag time A, time in ms between end of 240-
+			   byte trace identification header and time
+			   break, positive if time break occurs after
+			   end of header, time break is defined as
+			   the initiation pulse which maybe recorded
+			   on an auxiliary trace or as otherwise
+			   specified by the recording system 
+			   byte# 105-106
+			*/
+
+	short lagb;	/* lag time B, time in ms between the time break
+			   and the initiation time of the energy source,
+			   may be positive or negative 
+			   byte# 107-108
+			*/
+
+	short delrt;	/* delay recording time, time in ms between
+			   initiation time of energy source and time
+			   when recording of data samples begins
+			   (for deep water work if recording does not
+			   start at zero time) 
+			   byte# 109-110
+			*/
+
+	short muts;	/* mute time--start 
+			   byte# 111-112
+			*/
+
+	short mute;	/* mute time--end 
+			   byte# 113-114
+			*/
+
+	unsigned short ns;	/* number of samples in this trace 
+			   byte# 115-116
+			*/
+
+	unsigned short dt;	/* sample interval; in micro-seconds
+			   byte# 117-118
+			*/
+
+	short gain;	/* gain type of field instruments code:
+				1 = fixed
+				2 = binary
+				3 = floating point
+				4 ---- N = optional use 
+			   byte# 119-120
+			*/
+
+	short igc;	/* instrument gain constant 
+			   byte# 121-122
+			*/
+
+	short igi;	/* instrument early or initial gain 
+			   byte# 123-124
+			*/
+
+	short corr;	/* correlated:
+				1 = no
+				2 = yes 
+			   byte# 125-126
+			*/
+
+	short sfs;	/* sweep frequency at start 
+			   byte# 127-128
+			*/
+
+	short sfe;	/* sweep frequency at end
+			   byte# 129-130
+			*/
+
+	short slen;	/* sweep length in ms 
+			   byte# 131-132
+			*/
+
+	short styp;	/* sweep type code:
+				1 = linear
+				2 = cos-squared
+				3 = other
+			   byte# 133-134
+			*/
+
+	short stas;	/* sweep trace length at start in ms
+			   byte# 135-136
+			*/
+
+	short stae;	/* sweep trace length at end in ms 
+			   byte# 137-138
+			*/
+
+	short tatyp;	/* taper type: 1=linear, 2=cos^2, 3=other 
+			   byte# 139-140
+			*/
+
+	short afilf;	/* alias filter frequency if used 
+			   byte# 141-142
+			*/
+
+	short afils;	/* alias filter slope
+			   byte# 143-144
+			*/
+
+	short nofilf;	/* notch filter frequency if used
+			   byte# 145-146
+			*/
+
+	short nofils;	/* notch filter slope
+			   byte# 147-148
+			*/
+
+	short lcf;	/* low cut frequency if used
+			   byte# 149-150
+			*/
+
+	short hcf;	/* high cut frequncy if used
+			   byte# 151-152
+			*/
+
+	short lcs;	/* low cut slope
+			   byte# 153-154
+			*/
+
+	short hcs;	/* high cut slope
+			   byte# 155-156
+			*/
+
+	short year;	/* year data recorded
+			   byte# 157-158
+			*/
+
+	short day;	/* day of year
+			   byte# 159-160
+			*/
+
+	short hour;	/* hour of day (24 hour clock) 
+			   byte# 161-162
+			*/
+
+	short minute;	/* minute of hour
+			   byte# 163-164
+			*/
+
+	short sec;	/* second of minute
+			   byte# 165-166
+			*/
+
+	short timbas;	/* time basis code:
+				1 = local
+				2 = GMT
+				3 = other
+			   byte# 167-168
+			*/
+
+	short trwf;	/* trace weighting factor, defined as 1/2^N
+			   volts for the least sigificant bit
+			   byte# 169-170
+			*/
+
+	short grnors;	/* geophone group number of roll switch
+			   position one
+			   byte# 171-172
+			*/
+
+	short grnofr;	/* geophone group number of trace one within
+			   original field record
+			   byte# 173-174
+			*/
+
+	short grnlof;	/* geophone group number of last trace within
+			   original field record
+			   byte# 175-176
+			*/
+
+	short gaps;	/* gap size (total number of groups dropped)
+			   byte# 177-178
+			*/
+
+	short otrav;	/* overtravel taper code:
+				1 = down (or behind)
+				2 = up (or ahead)
+			   byte# 179-180
+			*/
+
+#ifdef SLTSU_SEGY_H  /* begin Unocal SU segy.h differences */
+
+
+	/* cwp local assignments */
+	float d1;	/* sample spacing for non-seismic data
+			   byte# 181-184
+			*/
+
+	float f1;	/* first sample location for non-seismic data
+			   byte# 185-188
+			*/
+
+	float d2;	/* sample spacing between traces
+			   byte# 189-192
+			*/
+
+	float f2;	/* first trace location
+			   byte# 193-196
+			*/
+
+	float ungpow;	/* negative of power used for dynamic
+			   range compression
+			   byte# 197-200
+			*/
+
+	float unscale;	/* reciprocal of scaling factor to normalize
+			   range
+			   byte# 201-204
+			*/
+
+	short mark;	/* mark selected traces
+			   byte# 205-206
+			*/
+
+	/* SLTSU local assignments */ 
+	short mutb;	/* mute time at bottom (start time)
+			   bottom mute ends at last sample
+			   byte# 207-208
+			*/
+	float dz;	/* depth sampling interval in (m or ft)
+			if =0.0, input are time samples
+			   byte# 209-212
+			*/
+
+	float fz;	/* depth of first sample in (m or ft)
+			   byte# 213-116
+			*/
+
+	short n2;	/* number of traces per cdp or per shot
+			   byte# 217-218
+			*/
+
+        short shortpad; /* alignment padding
+			   byte# 219-220
+			*/
+
+	int ntr; 	/* number of traces
+			   byte# 221-224
+			*/
+
+	/* SLTSU local assignments end */ 
+
+	short unass[8];	/* unassigned
+			   byte# 225-240
+			*/
+
+#else
+
+	/* cwp local assignments */
+	float d1;	/* sample spacing for non-seismic data
+			   byte# 181-184
+			*/
+
+	float f1;	/* first sample location for non-seismic data
+			   byte# 185-188
+			*/
+
+	float d2;	/* sample spacing between traces
+			   byte# 189-192
+			*/
+
+	float f2;	/* first trace location
+			   byte# 193-196
+			*/
+
+	float ungpow;	/* negative of power used for dynamic
+			   range compression
+			   byte# 197-200
+			*/
+
+	float unscale;	/* reciprocal of scaling factor to normalize
+			   range
+			   byte# 201-204
+			*/
+
+	int ntr; 	/* number of traces
+			   byte# 205-208
+			*/
+
+	short mark;	/* mark selected traces
+			   byte# 209-210
+			*/
+
+        short shortpad; /* alignment padding
+			   byte# 211-212
+			*/
+
+
+	short unass[14];	/* unassigned--NOTE: last entry causes 
+			   a break in the word alignment, if we REALLY
+			   want to maintain 240 bytes, the following
+			   entry should be an odd number of short/UINT2
+			   OR do the insertion above the "mark" keyword
+			   entry
+			   byte# 213-240
+			*/
+#endif
+
+} segy;
+
+
+typedef struct {	/* bhed - binary header */
+
+	int jobid;	/* job identification number */
+
+	int lino;	/* line number (only one line per reel) */
+
+	int reno;	/* reel number */
+
+	short ntrpr;	/* number of data traces per record */
+
+        short nart;	/* number of auxiliary traces per record */
+
+	unsigned short hdt; /* sample interval in micro secs for this reel */
+
+	unsigned short dto; /* same for original field recording */
+
+	unsigned short hns; /* number of samples per trace for this reel */
+
+	unsigned short nso; /* same for original field recording */
+
+	short format;	/* data sample format code:
+				1 = floating point, 4 byte (32 bits)
+				2 = fixed point, 4 byte (32 bits)
+				3 = fixed point, 2 byte (16 bits)
+				4 = fixed point w/gain code, 4 byte (32 bits)
+				5 = IEEE floating point, 4 byte (32 bits)
+				8 = two's complement integer, 1 byte (8 bits)
+			*/
+
+	short fold;	/* CDP fold expected per CDP ensemble */
+
+	short tsort;	/* trace sorting code: 
+				1 = as recorded (no sorting)
+				2 = CDP ensemble
+				3 = single fold continuous profile
+				4 = horizontally stacked */
+
+	short vscode;	/* vertical sum code:
+				1 = no sum
+				2 = two sum ...
+				N = N sum (N = 32,767) */
+
+	short hsfs;	/* sweep frequency at start */
+
+	short hsfe;	/* sweep frequency at end */
+
+	short hslen;	/* sweep length (ms) */
+
+	short hstyp;	/* sweep type code:
+				1 = linear
+				2 = parabolic
+				3 = exponential
+				4 = other */
+
+	short schn;	/* trace number of sweep channel */
+
+	short hstas;	/* sweep trace taper length at start if
+			   tapered (the taper starts at zero time
+			   and is effective for this length) */
+
+	short hstae;	/* sweep trace taper length at end (the ending
+			   taper starts at sweep length minus the taper
+			   length at end) */
+
+	short htatyp;	/* sweep trace taper type code:
+				1 = linear
+				2 = cos-squared
+				3 = other */
+
+	short hcorr;	/* correlated data traces code:
+				1 = no
+				2 = yes */
+
+	short bgrcv;	/* binary gain recovered code:
+				1 = yes
+				2 = no */
+
+	short rcvm;	/* amplitude recovery method code:
+				1 = none
+				2 = spherical divergence
+				3 = AGC
+				4 = other */
+
+	short mfeet;	/* measurement system code:
+				1 = meters
+				2 = feet */
+
+	short polyt;	/* impulse signal polarity code:
+				1 = increase in pressure or upward
+				    geophone case movement gives
+				    negative number on tape
+				2 = increase in pressure or upward
+				    geophone case movement gives
+				    positive number on tape */
+
+	short vpol;	/* vibratory polarity code:
+				code	seismic signal lags pilot by
+				1	337.5 to  22.5 degrees
+				2	 22.5 to  67.5 degrees
+				3	 67.5 to 112.5 degrees
+				4	112.5 to 157.5 degrees
+				5	157.5 to 202.5 degrees
+				6	202.5 to 247.5 degrees
+				7	247.5 to 292.5 degrees
+				8	293.5 to 337.5 degrees */
+
+	short hunass[170];	/* unassigned */
+
+} bhed;
+
+/* DEFINES */
+#define gettr(x)	fgettr(stdin, (x))
+#define vgettr(x)	fvgettr(stdin, (x))
+#define puttr(x)	fputtr(stdout, (x))
+#define vputtr(x)	fvputtr(stdout, (x))
+#define gettra(x, y)    fgettra(stdin, (x), (y))
+
+
+/* TOTHER represents "other"					*/
+#define		TOTHER		-1	
+/* TUNK represents time traces of an unknown type		*/
+#define		TUNK		0
+/* TREAL represents real time traces 				*/
+#define		TREAL		1
+/* TDEAD represents dead time traces 				*/
+#define		TDEAD		2
+/* TDUMMY represents dummy time traces 				*/
+#define		TDUMMY		3
+/* TBREAK represents time break traces 				*/
+#define		TBREAK		4
+/* UPHOLE represents uphole traces 				*/
+#define		UPHOLE		5
+/* SWEEP represents sweep traces 				*/
+#define		SWEEP		6
+/* TIMING represents timing traces 				*/
+#define		TIMING		7
+/* WBREAK represents timing traces 				*/
+#define		WBREAK		8
+/* NFGUNSIG represents near field gun signature 		*/
+#define		NFGUNSIG	9	
+/* FFGUNSIG represents far field gun signature	 		*/
+#define		FFGUNSIG	10
+/* SPSENSOR represents seismic pressure sensor	 		*/
+#define		SPSENSOR	11
+/* TVERT represents multicomponent seismic sensor 
+	- vertical component */
+#define		TVERT		12
+/* TXLIN represents multicomponent seismic sensor 
+	- cross-line component */
+#define		TXLIN		13
+/* TINLIN represents multicomponent seismic sensor 
+	- in-line component */
+#define		TINLIN	14
+/* ROTVERT represents rotated multicomponent seismic sensor 
+	- vertical component */
+#define		ROTVERT		15
+/* TTRANS represents rotated multicomponent seismic sensor 
+	- transverse component */
+#define		TTRANS		16
+/* TRADIAL represents rotated multicomponent seismic sensor 
+	- radial component */
+#define		TRADIAL		17
+/* VRMASS represents vibrator reaction mass */
+#define		VRMASS		18
+/* VBASS represents vibrator baseplate */
+#define		VBASS		19
+/* VEGF represents vibrator estimated ground force */
+#define		VEGF		20
+/* VREF represents vibrator reference */
+#define		VREF		21
+
+/*** CWP trid assignments ***/
+/* ACOR represents autocorrelation  */
+#define		ACOR		109
+/* FCMPLX represents fourier transformed - no packing 
+   xr[0],xi[0], ..., xr[N-1],xi[N-1] */
+#define		FCMPLX		110
+/* FUNPACKNYQ represents fourier transformed - unpacked Nyquist
+   xr[0],xi[0],...,xr[N/2],xi[N/2] */
+#define		FUNPACKNYQ	111
+/* FTPACK represents fourier transformed - packed Nyquist
+   even N: xr[0],xr[N/2],xr[1],xi[1], ...,
+	xr[N/2 -1],xi[N/2 -1]
+   (note the exceptional second entry)
+    odd N:
+     xr[0],xr[(N-1)/2],xr[1],xi[1], ...,
+     xr[(N-1)/2 -1],xi[(N-1)/2 -1],xi[(N-1)/2]
+   (note the exceptional second & last entries)
+*/
+#define		FTPACK		112
+/* TCMPLX represents complex time traces 			*/
+#define		TCMPLX		113
+/* FAMPH represents freq domain data in amplitude/phase form	*/
+#define		FAMPH		114
+/* TAMPH represents time domain data in amplitude/phase form	*/
+#define		TAMPH		115
+/* REALPART represents the real part of a trace to Nyquist	*/
+#define		REALPART	116
+/* IMAGPART represents the real part of a trace to Nyquist	*/
+#define		IMAGPART	117
+/* AMPLITUDE represents the amplitude of a trace to Nyquist	*/
+#define		AMPLITUDE	118
+/* PHASE represents the phase of a trace to Nyquist		*/
+#define		PHASE		119
+/* KT represents wavenumber-time domain data 			*/
+#define		KT		121
+/* KOMEGA represents wavenumber-frequency domain data		*/
+#define		KOMEGA		122
+/* ENVELOPE represents the envelope of the complex time trace	*/
+#define		ENVELOPE	123
+/* INSTPHASE represents the phase of the complex time trace	*/
+#define		INSTPHASE	124
+/* INSTFREQ represents the frequency of the complex time trace	*/
+#define		INSTFREQ	125
+/* DEPTH represents traces in depth-range (z-x)			*/
+#define		TRID_DEPTH	130
+/* 3C data...  v,h1,h2=(11,12,13)+32 so a bitmask will convert  */
+/* between conventions */
+/* CHARPACK represents byte packed seismic data from supack1	*/
+#define		CHARPACK	201
+/* SHORTPACK represents 2 byte packed seismic data from supack2	*/
+#define		SHORTPACK	202
+
+
+#define ISSEISMIC(id) (( (id)==TUNK || (id)==TREAL || (id)==TDEAD || (id)==TDUMMY || (id)==TBREAK || (id)==UPHOLE || (id)==SWEEP || (id)==TIMING || (id)==WBREAK || (id)==NFGUNSIG || (id)==FFGUNSIG || (id)==SPSENSOR || (id)==TVERT || (id)==TXLIN || (id)==TINLIN || (id)==ROTVERT || (id)==TTRANS || (id)==TRADIAL || (id)==ACOR ) ? cwp_true : cwp_false ) 
+
+/* FUNCTION PROTOTYPES */
+#ifdef __cplusplus /* if C++, specify external linkage to C functions */
+extern "C" {
+#endif
+
+/* get trace and put trace */
+int fgettr(FILE *fp, segy *tp);
+int fvgettr(FILE *fp, segy *tp);
+void fputtr(FILE *fp, segy *tp);
+void fvputtr(FILE *fp, segy *tp);
+int fgettra(FILE *fp, segy *tp, int itr);
+
+/* get gather and put gather */
+segy **fget_gather(FILE *fp, cwp_String *key,cwp_String *type,Value *n_val,
+                        int *nt,int *ntr, float *dt,int *first);
+segy **get_gather(cwp_String *key, cwp_String *type, Value *n_val,
+			int *nt, int *ntr, float *dt, int *first);
+segy **fput_gather(FILE *fp, segy **rec,int *nt, int *ntr);
+segy **put_gather(segy **rec,int *nt, int *ntr);
+
+/* hdrpkge */
+void gethval(const segy *tp, int index, Value *valp);
+void puthval(segy *tp, int index, Value *valp);
+void getbhval(const bhed *bhp, int index, Value *valp);
+void putbhval(bhed *bhp, int index, Value *valp);
+void gethdval(const segy *tp, char *key, Value *valp);
+void puthdval(segy *tp, char *key, Value *valp);
+char *hdtype(const char *key);
+char *getkey(const int index);
+int getindex(const char *key);
+void swaphval(segy *tp, int index);
+void swapbhval(bhed *bhp, int index);
+void printheader(const segy *tp);
+
+void tabplot(segy *tp, int itmin, int itmax);
+
+#ifdef __cplusplus /* if C++, end external linkage specification */
+}
+#endif
+
+#endif
diff --git a/MDD/verbosepkg.c b/MDD/verbosepkg.c
new file mode 120000
index 0000000000000000000000000000000000000000..248253edebc2c7b207e139ecf16b68b318f057df
--- /dev/null
+++ b/MDD/verbosepkg.c
@@ -0,0 +1 @@
+../utils/verbosepkg.c
\ No newline at end of file
diff --git a/MDD/wallclock_time.c b/MDD/wallclock_time.c
new file mode 120000
index 0000000000000000000000000000000000000000..0bd00b4c2878f007a8dc398f0af7c7cb44f50717
--- /dev/null
+++ b/MDD/wallclock_time.c
@@ -0,0 +1 @@
+../utils/wallclock_time.c
\ No newline at end of file
diff --git a/MDD/writeEigen.c b/MDD/writeEigen.c
new file mode 100644
index 0000000000000000000000000000000000000000..e2ede69a44b6d140fde868b3ce89f7e28331cb28
--- /dev/null
+++ b/MDD/writeEigen.c
@@ -0,0 +1,55 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "segy.h"
+#include <assert.h>
+
+
+void writeEigen(char *file_out, float df, int nw_low, int nw_high, int nw, float *eigen, int nx, float dx, float xmin)
+{
+    static FILE *out_file;
+	float *trace, scl, re, im;
+	int sign, ntfft, i, j, ie, iw, count;
+	segy *hdrs_out;
+	size_t nwrite;
+    char filename[256], ext[32];
+
+    trace  = (float *)malloc(nx*sizeof(float));
+	hdrs_out  = (segy *)calloc(TRCBYTES,1);
+    
+	hdrs_out[0].dt=df*1000000;
+	hdrs_out[0].trid = 1;
+	hdrs_out[0].ns = nx;
+	hdrs_out[0].d1 = 1;
+	hdrs_out[0].f1 = 1;
+	hdrs_out[0].f2 = nw_low*df;
+	hdrs_out[0].d2 = df;
+	hdrs_out[0].trwf = nw;
+	hdrs_out[0].fldr = 1;
+
+	strcpy(filename, file_out);
+	sprintf(ext,"%s.su", "_eigen");
+	strcpy(strstr(filename, ".su"), ext);
+	out_file = fopen(filename, "w+"); assert( out_file );
+	fprintf(stderr,"writing eigenvalues of matrix to %s\n", filename);
+	count=1;
+
+	for (iw=0; iw<nw; iw++) {
+		hdrs_out[0].tracl = iw+1;
+		for (i = 0; i < nx; i++) {
+           	trace[i] = eigen[iw*nx+i];
+		}
+		nwrite = fwrite(&hdrs_out[0], 1, TRCBYTES, out_file);
+		assert( nwrite == TRCBYTES );
+		nwrite = fwrite(trace, sizeof(float), nx, out_file);
+		assert( nwrite == nx );
+    }
+    fflush(out_file);
+   	fclose(out_file);
+
+    free(hdrs_out);
+    free(trace);
+
+	return;
+}
+
diff --git a/Make_include_template b/Make_include_template
index 6d0d98175c624b2cd9899cd774f3a7b7e876d679..e0ed07d7fa23c55dff0fd27a15f9a1bd97092419 100644
--- a/Make_include_template
+++ b/Make_include_template
@@ -8,6 +8,12 @@
 # the current directory (in vi ":r!pwd")
 ROOT=/Users/jan/src/OpenSource
 
+#############################################################################
+# Some convenient abbreviations
+B = $(ROOT)/bin
+I = $(ROOT)/include
+L = $(ROOT)/lib
+
 ########################################################################
 # C compiler; change this only if you are using a different C-compiler
 
@@ -41,7 +47,7 @@ OPTC += -fopenmp
 ### Linux
 ##OPTC = -O3 -no-prec-div -qopt-report-phase=vec,openmp
 ##OPTF = -O3 -no-prec-div -qopt-report-phase=vec,openmp
-#OPTC = -O3 -no-prec-div -xCORE-AVX2
+#OPTC = -O3 -no-prec-div -xCORE-AVX2 
 #OPTF = -O3 -no-prec-div -xCORE-AVX2
 ##to include parallelisation with OpenMP
 #OPTC += -qopenmp
@@ -75,19 +81,32 @@ OPTC += -fopenmp
 #OPTF = -Ofast
 #LDFLAGS = -static -Ofast
 
+#############################################################################
+# BLAS and LAPACK libraries 
+MKLROOT=/opt/intel/mkl/
+#for GNU compilers
+BLAS = ${MKLROOT}/lib/libmkl_gf_lp64.a ${MKLROOT}/lib/libmkl_sequential.a ${MKLROOT}/lib/libmkl_core.a -lpthread -lm -ldl
+#for intel compilers
+#BLAS = -mkl
+
 #############################################################################
 # FOR FFT LIBRARIES
 #AMD ACML 4.4.0
 #AMDROOT = /home/thorbcke/amdsdk/v1.0/acml/open64_64
 #OPTC += -DACML440 -I$(AMDROOT)/include
-#LIBSM = -L$(AMDROOT)/lib -lacml -lfortran -lffio -lrt -lm
+#BLAS = -L$(AMDROOT)/lib -lacml -lfortran -lffio -lrt -lm
+#Intel MKL
+MKLROOT=/opt/intel/mkl/
+OPTC += -DMKL -I$(MKLROOT)/include
+#for GNU compilers
+BLAS = ${MKLROOT}/lib/libmkl_gf_lp64.a ${MKLROOT}/lib/libmkl_sequential.a ${MKLROOT}/lib/libmkl_core.a -lpthread -lm -ldl
+#for intel compilers
+#FFT = ${MKLROOT}/lib/libmkl_intel_lp64.a ${MKLROOT}/lib/libmkl_sequential.a ${MKLROOT}/lib/libmkl_core.a -lpthread -lm -ldl
+
+#LIBARIES
+LIBS= -L$L -lgenfft $(FFT) $(BLAS)
 
-#############################################################################
-# Some convenient abbreviations
 
-B = $(ROOT)/bin
-I = $(ROOT)/include
-L = $(ROOT)/lib
 
 ########################################################################
 # standard CFLAGS
diff --git a/Makefile b/Makefile
index b85040fdfba64a2b9366a7c20369765ba55c5119..74e45defbad119dcd86d34c28ca92712ca38f366 100644
--- a/Makefile
+++ b/Makefile
@@ -7,6 +7,7 @@ all: mkdirs
 	cd marchenko	; $(MAKE) install
 	cd corrvir		; $(MAKE) install
 	cd raytime		; $(MAKE) install
+	cd MDD			; $(MAKE) install
 
 mkdirs:
 	-mkdir -p lib
@@ -20,6 +21,7 @@ clean:
 	cd marchenko	; $(MAKE) $@
 	cd corrvir		; $(MAKE) $@
 	cd raytime		; $(MAKE) $@
+	cd MDD			; $(MAKE) $@
 
 realclean:
 	cd FFTlib 		; $(MAKE) $@
@@ -28,6 +30,7 @@ realclean:
 	cd marchenko	; $(MAKE) $@
 	cd corrvir		; $(MAKE) $@
 	cd raytime		; $(MAKE) $@
+	cd MDD			; $(MAKE) $@
 	rm -f lib/*
 	rm -f include/*
 	rm -f bin/*
diff --git a/README b/README
index 6b9f9f5bd6bd80c034c062022d7a334f3266a46e..16f6b372e69c416c32efe670171e40dc75afd046 100644
--- a/README
+++ b/README
@@ -88,6 +88,18 @@ To reproduce the Figures shown in the GEOPHYICS paper "Implementation of the Mar
 
 To reproduce the Figures shown in the Scientific Reports paper "Virtual acoustics in inhomogeneous media with single-sided access" the scripts in marchenko/demo/ScientificReports directory can be used. The README in this directory gives more instructions and guidelines. 
 
+MDD
+---
+The MDD kernels depend on BLAS and LAPACK calls. Free downloads of these libraries can be found on 
+
+https://www.netlib.org/blas/index.html
+https://www.netlib.org/lapack/index.html
+
+If you are running on Intel processors you can download (for free) Intel's highly optimised MKL package:
+
+https://software.intel.com/en-us/mkl/choose-download
+
+
 MISC
 ----
 Other make commands which can be useful:
diff --git a/corrvir/Makefile b/corrvir/Makefile
index a8866219482d7e6fc03fcd94f46b8077b8f8e1d9..de45912b8eac959ba611b109b3b240c7d13810b6 100644
--- a/corrvir/Makefile
+++ b/corrvir/Makefile
@@ -3,7 +3,7 @@
 include ../Make_include
 
 ALLINC  = -I.
-LIBS    += -L$L -lgenfft -lm
+#LIBS    += -L$L -lgenfft -lm
 #OPTC += -g -O0 -m64
 #OPTC = -g -O0 -fno-omit-frame-pointer
 
diff --git a/fdelmodc/Makefile b/fdelmodc/Makefile
index 56b3c4db58b1edee66c9341b593d29f36afe820d..4214ad0836e2eb3fc6b8d426ffa5ec1138c22c57 100644
--- a/fdelmodc/Makefile
+++ b/fdelmodc/Makefile
@@ -5,8 +5,7 @@ include ../Make_include
 ########################################################################
 # define general include and system library
 ALLINC  = -I.
-LIBS    += -L$L -lgenfft -lm $(LIBSM)
-#LIBS    += -L$L -lgenfft -lm -lc
+#LIBS    += -L$L -lgenfft -lm $(LIBSM)
 #OPTC = -g -Wall -fsignaling-nans -O0 -fopenmp
 #OPTC += -fopenmp -Waddress
 #OPTC := $(subst -O3 -ffast-math, -O1 -g ,$(OPTC))
diff --git a/fdelmodc/threadAffinity.c b/fdelmodc/threadAffinity.c
index bd9913ca40cb9d42b3151b53b62810fa1cbabf50..0fcee39c331948fa03d1754f623b03c2d77641a9 100644
--- a/fdelmodc/threadAffinity.c
+++ b/fdelmodc/threadAffinity.c
@@ -10,6 +10,9 @@
 #include <sched.h>
 #include <sys/types.h>
 #include <sys/sysctl.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
 
 #define CPU_SETSIZE	1024
 #define SYSCTL_CORE_COUNT   "machdep.cpu.core_count"
diff --git a/fdemmodc/Makefile b/fdemmodc/Makefile
index 762538283e1353e719731c662a3e473e22dd4ddb..60fc989b6bdea945895eb84ca3598b7c360b48ea 100644
--- a/fdemmodc/Makefile
+++ b/fdemmodc/Makefile
@@ -5,7 +5,7 @@ include ../Make_include
 ########################################################################
 # define general include and system library
 ALLINC  = -I.
-LIBS    += -L$L -lgenfft -lm $(LIBSM)
+#LIBS    += -L$L -lgenfft -lm $(LIBSM)
 #LIBS    += -L$L -lgenfft -lm -lc
 #OPTC = -g -Wall -fsignaling-nans -O0
 #OPTC += -fopenmp -Waddress
diff --git a/marchenko/Makefile b/marchenko/Makefile
index 237c134e904d7c4b7311e63065c4c09282ace67f..260e2c02845925df6222452cf89035f50faf8704 100644
--- a/marchenko/Makefile
+++ b/marchenko/Makefile
@@ -2,7 +2,7 @@
 
 include ../Make_include
 
-LIBS    += -L$L -lgenfft -lm $(LIBSM)
+#LIBS    += -L$L -lgenfft -lm $(LIBSM)
 #OPTC += -g -O0 -Wall 
 
 #ALL: fmute marchenko marchenko2
diff --git a/raytime/Makefile b/raytime/Makefile
index b3aa6f39b0b3a26f00e0c892e352eebbbe223768..398bd1fedc4198afb9c050c4acdfc0423f76a15e 100644
--- a/raytime/Makefile
+++ b/raytime/Makefile
@@ -5,7 +5,7 @@ include ../Make_include
 ########################################################################
 # define general include and system library
 ALLINC  = -I.
-LIBS    += -L$L -lgenfft -lm $(LIBSM)
+#LIBS    += -L$L -lgenfft -lm $(LIBSM)
 #LIBS    += -L$L -lgenfft -lm -lc
 #OPTC = -g -Wall -fsignaling-nans -O0
 #OPTC += -fopenmp -Waddress
diff --git a/raytime3d/Makefile b/raytime3d/Makefile
index be0bb6e043b09ee22c89254f9c54afceb990ad92..a71b7a36a1a2cd6c4cf0927efaad5e231d10e9e3 100644
--- a/raytime3d/Makefile
+++ b/raytime3d/Makefile
@@ -5,7 +5,7 @@ include ../Make_include
 ########################################################################
 # define general include and system library
 ALLINC  = -I.
-LIBS    += -L$L -lgenfft -lm $(LIBSM)
+#LIBS    += -L$L -lgenfft -lm $(LIBSM)
 #LIBS    += -L$L -lgenfft -lm -lc
 #OPTC = -g -Wall -fsignaling-nans -O0
 #OPTC += -fopenmp -Waddress
diff --git a/utils/Makefile b/utils/Makefile
index 9d47eceee936c6e5dffd52824bbd667ed5a4adea..7b056b16b5e646d7af1ae0f0101b8f2c1ead0027 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -2,7 +2,7 @@
 
 include ../Make_include
 
-LIBS    += -L$L -lgenfft -lm $(LIBSM)
+#LIBS    += -L$L -lgenfft -lm $(LIBSM)
 #OPTC += -openmp 
 #OPTC += -g -O0
 
diff --git a/utils/getFileInfo.c b/utils/getFileInfo.c
index 490ba4ad7e382117ca7612b6f28b039cc4f4b7f1..61ff7bae8cff39ed8251ba37d335a42cdfba3395 100644
--- a/utils/getFileInfo.c
+++ b/utils/getFileInfo.c
@@ -1,6 +1,3 @@
-#define _FILE_OFFSET_BITS 64
-#define _LARGEFILE_SOURCE
-
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>