diff --git a/.gitignore b/.gitignore
index c3ba6facbcab7a47d2251f83fbd6d0672fd6eb5a..4c7346a6893ef92090ec48fe78f4054f4488c9fa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,7 @@ bin/*
 *.su
 *.bin
 */.DS*
-*/*/.DS*
+**/.DS*
 .DS*
 Make_include
 fdelmodc_cuda.tgz
diff --git a/FFTlib/cc1fft.c b/FFTlib/cc1fft.c
index 5367191b0f9c0a0ea49460e9ace61f63520ee321..c8bb2e6a88be9b20bfe8505b8778cb299af519bd 100644
--- a/FFTlib/cc1fft.c
+++ b/FFTlib/cc1fft.c
@@ -61,10 +61,17 @@ void cc1fft(complex *data, int n, int sign)
     REAL scl;
 	complex *y;
 #elif defined(MKL)
-	static DFTI_DESCRIPTOR_HANDLE handle=0;
-    static int nprev=0;
+	static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS];
+	static int nprev[MAX_NUMTHREADS];
     MKL_LONG Status;
 #endif
+	int id;
+
+#ifdef _OPENMP
+	id = omp_get_thread_num();
+#else
+	id = 0;
+#endif
 
 #if defined(HAVE_LIBSCS)
 	pe = mp_my_threadnum();
@@ -99,26 +106,26 @@ void cc1fft(complex *data, int n, int sign)
     }
 	acmlcc1fft(sign, scl, inpl, n, data, 1, y, 1, work, &isys);
 #elif defined(MKL)
-    if (n != nprev) {
-        DftiFreeDescriptor(&handle);
+    if (n != nprev[id]) {
+        DftiFreeDescriptor(&handle[id]);
 
-        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n);
+        Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCreateDescriptor FAIL\n");
         }
-        Status = DftiCommitDescriptor(handle);
+        Status = DftiCommitDescriptor(handle[id]);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCommitDescriptor FAIL\n");
         }
-        nprev = n;
+        nprev[id] = n;
     }
 	if (sign < 0) {
-    	Status = DftiComputeBackward(handle, data);
+    	Status = DftiComputeBackward(handle[id], data);
 	}
 	else {
-    	Status = DftiComputeForward(handle, data);
+    	Status = DftiComputeForward(handle[id], data);
 	}
 #else
 	cc1_fft(data, n, sign);
diff --git a/FFTlib/ccmfft.c b/FFTlib/ccmfft.c
index cca95cba96ed969a67eaa42f58ab1b359063abc8..cbb0d29dcbf4f15a3762498d5c2f8af11a363145 100644
--- a/FFTlib/ccmfft.c
+++ b/FFTlib/ccmfft.c
@@ -64,11 +64,18 @@ void ccmfft(complex *data, int n1, int n2, int ld1, int sign)
 	REAL scl;
 	complex *y;
 #elif defined(MKL)
-    static DFTI_DESCRIPTOR_HANDLE handle=0;
-    static int nprev=0;
+	static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS];
+	static int nprev[MAX_NUMTHREADS];
     MKL_LONG Status;
 	int j;
 #endif
+	int id;
+
+#ifdef _OPENMP
+	id = omp_get_thread_num();
+#else
+	id = 0;
+#endif
 
 #if defined(HAVE_LIBSCS)
 	if (n1 != nprev) {
@@ -99,29 +106,29 @@ void ccmfft(complex *data, int n1, int n2, int ld1, int sign)
 	}
 	acmlccmfft(sign, scl, inpl, n2, n1, data, 1, ld1, y, 1, ld1, work, &isys);
 #elif defined(MKL)
-    if (n1 != nprev) {
-        DftiFreeDescriptor(&handle);
+    if (n1 != nprev[id]) {
+        DftiFreeDescriptor(&handle[id]);
         
-        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n1);
+        Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n1);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCreateDescriptor FAIL\n");
         }
-        Status = DftiCommitDescriptor(handle);
+        Status = DftiCommitDescriptor(handle[id]);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCommitDescriptor FAIL\n");
         }
-        nprev = n1;
+        nprev[id] = n1;
     }
     if (sign < 0) {
     	for (j=0; j<n2; j++) {
-        	Status = DftiComputeBackward(handle, &data[j*ld1]);
+        	Status = DftiComputeBackward(handle[id], &data[j*ld1]);
 		}
     }
     else {
     	for (j=0; j<n2; j++) {
-        	Status = DftiComputeForward(handle, &data[j*ld1]);
+        	Status = DftiComputeForward(handle[id], &data[j*ld1]);
 		}
     }
 #else
diff --git a/FFTlib/cr1fft.c b/FFTlib/cr1fft.c
index 67b8f67839bb4098e83c6224ed5603383a9c1b40..7d7c19e680e37571b5b329a2c5e546107352b670 100644
--- a/FFTlib/cr1fft.c
+++ b/FFTlib/cr1fft.c
@@ -57,12 +57,20 @@ void cr1fft(complex *cdata, REAL *rdata, int n, int sign)
 	static REAL *work, *table, scale=1.0;
 	REAL scl;
 #elif defined(MKL)
-	static DFTI_DESCRIPTOR_HANDLE handle=0;
-    static int nprev=0;
+	static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS];
+	static int nprev[MAX_NUMTHREADS];
 	REAL *tmp;
     MKL_LONG Status;
 	int i;
 #endif
+	int id;
+
+#ifdef _OPENMP
+	id = omp_get_thread_num();
+#else
+	id = 0;
+#endif
+
 
 #if defined(HAVE_LIBSCS)
 	if (n != nprev) {
@@ -98,34 +106,34 @@ void cr1fft(complex *cdata, REAL *rdata, int n, int sign)
 	}
 	acmlcrfft(one, n, rdata, work, &isys);
 #elif defined(MKL)
-    if (n != nprev) {
-        DftiFreeDescriptor(&handle);
+    if (n != nprev[id]) {
+        DftiFreeDescriptor(&handle[id]);
 
-        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n);
+        Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCreateDescriptor FAIL\n");
         }
-        Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
+        Status = DftiSetValue(handle[id], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiSetValue FAIL\n");
         }
 
-        Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
+        Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
         if (! DftiErrorClass(Status, DFTI_NO_ERROR)) {
             dfti_status_print(Status);
             printf(" DftiSetValue FAIL\n");
         }
-        Status = DftiCommitDescriptor(handle);
+        Status = DftiCommitDescriptor(handle[id]);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCommitDescriptor FAIL\n");
         }
-        nprev = n;
+        nprev[id] = n;
     }
 	tmp = (float *)malloc(n*sizeof(float));
-    Status = DftiComputeBackward(handle, (MKL_Complex8 *)cdata, tmp);
+    Status = DftiComputeBackward(handle[id], (MKL_Complex8 *)cdata, tmp);
     if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
         dfti_status_print(Status);
         printf(" DftiComputeBackward FAIL\n");
diff --git a/FFTlib/crmfft.c b/FFTlib/crmfft.c
index a9d4d64ea3d9535cc937104f16222da65adf81e8..d628b3e24906b106a4d983f9317fdb615a36e785 100644
--- a/FFTlib/crmfft.c
+++ b/FFTlib/crmfft.c
@@ -71,12 +71,20 @@ void crmfft(complex *cdata, REAL *rdata, int n1, int n2, int ldc, int ldr, int s
     static REAL *work;
     REAL scl, *data;
 #elif defined(MKL)
-    static DFTI_DESCRIPTOR_HANDLE handle=0;
-    static int nprev=0;
+	static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS];
+	static int nprev[MAX_NUMTHREADS];
     REAL *tmp;
     MKL_LONG Status;
 	int i, j;
 #endif
+	int id;
+
+#ifdef _OPENMP
+	id = omp_get_thread_num();
+#else
+	id = 0;
+#endif
+
 
 #if defined(HAVE_LIBSCS)
 	nmp = mp_my_threadnum();
@@ -138,37 +146,37 @@ void crmfft(complex *cdata, REAL *rdata, int n1, int n2, int ldc, int ldr, int s
         memcpy(&rdata[j*ldr],&data[j*n1],n1*sizeof(REAL));
     }
 #elif defined(MKL)
-    if (n1 != nprev) {
-        DftiFreeDescriptor(&handle);
+    if (n1 != nprev[id]) {
+        DftiFreeDescriptor(&handle[id]);
 
-        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1);
+        Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCreateDescriptor FAIL\n");
         }
-        Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
+        Status = DftiSetValue(handle[id], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiSetValue FAIL\n");
         }
 
-        Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
+        Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
         //This options is what we would like, but is depreciated in the future
-        //Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_REAL);
+        //Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_REAL);
         if (! DftiErrorClass(Status, DFTI_NO_ERROR)) {
             dfti_status_print(Status);
             printf(" DftiSetValue FAIL\n");
         }
-        Status = DftiCommitDescriptor(handle);
+        Status = DftiCommitDescriptor(handle[id]);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCommitDescriptor FAIL\n");
         }
-        nprev = n1;
+        nprev[id] = n1;
     }
     tmp = (float *)malloc(n1*sizeof(float));
     for (j=0; j<n2; j++) {
-    	Status = DftiComputeBackward(handle, (MKL_Complex8 *)&cdata[j*ldc], tmp);
+    	Status = DftiComputeBackward(handle[id], (MKL_Complex8 *)&cdata[j*ldc], tmp);
     	rdata[j*ldr] = tmp[0];
 		if (sign < 0) {
     		for (i=1; i<n1; i++) rdata[j*ldr+i] = -sign*tmp[n1-i];
diff --git a/FFTlib/rc1fft.c b/FFTlib/rc1fft.c
index fa7386d7936e35dbdcf2b0f08038b58e3e5be57f..f37fb3d5678335aa120b5b8544ae0ee64b2fba0d 100644
--- a/FFTlib/rc1fft.c
+++ b/FFTlib/rc1fft.c
@@ -57,11 +57,18 @@ void rc1fft(REAL *rdata, complex *cdata, int n, int sign)
 	static REAL *work, *table, scale=1.0;
 	REAL scl, *data;
 #elif defined(MKL)
-	static DFTI_DESCRIPTOR_HANDLE handle=0;
-	static int nprev=0;
+	static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS];
+	static int nprev[MAX_NUMTHREADS];
     MKL_LONG Status;
 	int i;
 #endif
+	int id;
+
+#ifdef _OPENMP
+	id = omp_get_thread_num();
+#else
+	id = 0;
+#endif
 
 #if defined(HAVE_LIBSCS)
 	if (n != nprev) {
@@ -102,40 +109,33 @@ void rc1fft(REAL *rdata, complex *cdata, int n, int sign)
 	cdata[n/2].i=0.0;
 	free(data);
 #elif defined(MKL)
-	if (n != nprev) {
-		DftiFreeDescriptor(&handle);
+	if (n != nprev[id]) {
+
+		DftiFreeDescriptor(&handle[id]);
 
-		Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n);
+		Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n);
     	if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
         	dfti_status_print(Status);
         	printf(" DftiCreateDescriptor FAIL\n");
     	}
-		Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
+		Status = DftiSetValue(handle[id], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
 		if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
 			dfti_status_print(Status);
 			printf(" DftiSetValue FAIL\n");
 		}
-/*
-		Status = DftiSetValue(handle, DFTI_FORWARD_DOMAIN, DFTI_REAL);
-		if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
-			dfti_status_print(Status);
-			printf(" DftiSetValue FAIL\n");
-		}
-*/
-
-		Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
+		Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
 		if (! DftiErrorClass(Status, DFTI_NO_ERROR)) {
 			dfti_status_print(Status);
 			printf(" DftiSetValue FAIL\n");
 		}
-		Status = DftiCommitDescriptor(handle);
+		Status = DftiCommitDescriptor(handle[id]);
 		if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
 			dfti_status_print(Status);
 			printf(" DftiCommitDescriptor FAIL\n");
 		}
-		nprev = n;
+		nprev[id] = n;
 	}
-	Status = DftiComputeForward(handle, rdata, (MKL_Complex8 *)cdata);
+	Status = DftiComputeForward(handle[id], rdata, (MKL_Complex8 *)cdata);
 	if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
 		dfti_status_print(Status);
 		printf(" DftiComputeForward FAIL\n");
diff --git a/FFTlib/rcmfft.c b/FFTlib/rcmfft.c
index 5c492faff59d28c7f69fe4d886c85e0f0b47ca6c..91da4c6ee9d286508ad3051983aded5b72dd004d 100644
--- a/FFTlib/rcmfft.c
+++ b/FFTlib/rcmfft.c
@@ -69,11 +69,19 @@ void rcmfft(REAL *rdata, complex *cdata, int n1, int n2, int ldr, int ldc, int s
 	static REAL *work;
 	REAL scl, *data;
 #elif defined(MKL)
-    static DFTI_DESCRIPTOR_HANDLE handle=0;
-    static int nprev=0;
+	static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS];
+	static int nprev[MAX_NUMTHREADS];
     MKL_LONG Status;
-	int i,j;
+	int i, j;
 #endif
+	int id;
+
+#ifdef _OPENMP
+	id = omp_get_thread_num();
+#else
+	id = 0;
+#endif
+
 
 #if defined(HAVE_LIBSCS)
 	nmp = mp_my_threadnum();
@@ -130,39 +138,39 @@ void rcmfft(REAL *rdata, complex *cdata, int n1, int n2, int ldr, int ldc, int s
 	}
 	free(data);
 #elif defined(MKL)
-    if (n1 != nprev) {
-        DftiFreeDescriptor(&handle);
+    if (n1 != nprev[id]) {
+        DftiFreeDescriptor(&handle[id]);
         
-        Status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1);
+        Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_REAL, 1, (MKL_LONG)n1);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCreateDescriptor FAIL\n");
         }
-        Status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
+        Status = DftiSetValue(handle[id], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiSetValue FAIL\n");
         }
         
-        Status = DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
+        Status = DftiSetValue(handle[id], DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
         if (! DftiErrorClass(Status, DFTI_NO_ERROR)) {
             dfti_status_print(Status);
             printf(" DftiSetValue FAIL\n");
         }
-        Status = DftiCommitDescriptor(handle);
+        Status = DftiCommitDescriptor(handle[id]);
         if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
             dfti_status_print(Status);
             printf(" DftiCommitDescriptor FAIL\n");
         }
-        nprev = n1;
+        nprev[id] = n1;
     }
-    Status = DftiComputeForward(handle, rdata, (MKL_Complex8 *)cdata);
+    Status = DftiComputeForward(handle[id], rdata, (MKL_Complex8 *)cdata);
     if(! DftiErrorClass(Status, DFTI_NO_ERROR)){
         dfti_status_print(Status);
         printf(" DftiComputeForward FAIL\n");
     }
 	for (j=0; j<n2; j++) {
-    	Status = DftiComputeForward(handle, &rdata[j*ldr], (MKL_Complex8 *)&cdata[j*ldc]);
+    	Status = DftiComputeForward(handle[id], &rdata[j*ldr], (MKL_Complex8 *)&cdata[j*ldc]);
     	for (i=1; i<((n1-1)/2)+1; i++) {
         	cdata[j*ldc+i].i *= -sign;
     	}
diff --git a/fdelmodc3D/.vscode/settings.json b/fdelmodc3D/.vscode/settings.json
new file mode 100644
index 0000000000000000000000000000000000000000..871aedf628d81ee20a28f494175a0b13b0d82556
--- /dev/null
+++ b/fdelmodc3D/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+    "files.associations": {
+        "*.tcc": "c"
+    }
+}
\ No newline at end of file
diff --git a/fdelmodc3D/acoustic4_3D.c b/fdelmodc3D/acoustic4_3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..453d59f0267d9ae4321129ee7b807a8750b95171
--- /dev/null
+++ b/fdelmodc3D/acoustic4_3D.c
@@ -0,0 +1,174 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<math.h>
+#include<assert.h>
+#include"fdelmodc3D.h"
+
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+
+long applySource(modPar mod, srcPar src, wavPar wav, bndPar bnd, long itime, long ixsrc, long izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float **src_nwav, long verbose);
+
+long storeSourceOnSurface(modPar mod, srcPar src, bndPar bnd, long ixsrc, long izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, long verbose);
+
+long reStoreSourceOnSurface(modPar mod, srcPar src, bndPar bnd, long ixsrc, long izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, long verbose);
+
+long boundariesP(modPar mod, bndPar bnd, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, long itime, long verbose);
+
+long boundariesV(modPar mod, bndPar bnd, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, long itime, long verbose);
+
+long acoustic4_3D(modPar mod, srcPar src, wavPar wav, bndPar bnd, long itime, long ixsrc, long iysrc, long izsrc, float **src_nwav, float *vx, float *vy, float *vz, float *p, float *rox, float *roy, float *roz, float *l2m, long verbose)
+{
+/*********************************************************************
+       COMPUTATIONAL OVERVIEW OF THE 4th ORDER STAGGERED GRID: 
+
+  The captial symbols T (=P) Txz,Vx,Vz represent the actual grid
+  The indices ix,iz are related to the T grid, so the capital T 
+  symbols represent the actual modelled grid.
+
+  one cel (iz,ix)
+       |
+       V                              extra column of vx,txz
+                                                      |
+    -------                                           V
+   | txz vz| txz vz  txz vz  txz vz  txz vz  txz vz txz
+   |       |      
+   | vx  t | vx  t   vx  t   vx  t   vx  t   vx  t  vx
+    -------
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz
+
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   | 
+     txz vz  txz Vz--Txz-Vz--Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   |
+     txz vz  txz Vz  Txz-Vz  Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   |
+     txz vz  txz Vz  Txz-Vz  Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz
+
+     vx  t   vx  t   vx  t   vx  t   vx  t   vx  t  vx
+
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz  <--| 
+                                                             |
+                                         extra row of txz/vz |
+
+   AUTHOR:
+           Jan Thorbecke (janth@xs4all.nl)
+           The Netherlands 
+
+***********************************************************************/
+
+	float   c1, c2;
+	long    ix, iy, iz;
+	long    n1, n2;
+	long    ioXx, ioXy, ioXz, ioYx, ioYy, ioYz, ioZx, ioZy, ioZz, ioPx, ioPy, ioPz;
+
+	c1 = 9.0/8.0; 
+	c2 = -1.0/24.0;
+	n1  = mod.naz;
+    n2  = mod.nay;
+
+	/* calculate vx for all grid points except on the virtual boundary*/
+#pragma omp for private (ix, iy, iz) nowait schedule(guided,1)
+	for (iy=mod.ioXy; iy<mod.ieXy; iy++) {
+#pragma ivdep
+        for (ix=mod.ioXx; ix<mod.ieXx; ix++) {
+            for (iz=mod.ioXz; iz<mod.ieXz; iz++) {
+                vx[iy*n2*n1+ix*n1+iz] -= rox[iy*n2*n1+ix*n1+iz]*(
+                    c1*(p[iy*n2*n1+ix*n1+iz]     - p[iy*n2*n1+(ix-1)*n1+iz]) +
+                    c2*(p[iy*n2*n1+(ix+1)*n1+iz] - p[iy*n2*n1+(ix-2)*n1+iz]));
+            }
+        }
+    }
+
+	/* calculate vy for all grid points except on the virtual boundary*/
+#pragma omp for private (ix, iy, iz) nowait schedule(guided,1)
+	for (iy=mod.ioYy; iy<mod.ieYy; iy++) {
+#pragma ivdep
+        for (ix=mod.ioYx; ix<mod.ieYx; ix++) {
+            for (iz=mod.ioYz; iz<mod.ieYz; iz++) {
+                vy[iy*n2*n1+ix*n1+iz] -= roy[iy*n2*n1+ix*n1+iz]*(
+                    c1*(p[iy*n2*n1+ix*n1+iz]     - p[(iy-1)*n2*n1+ix*n1+iz]) +
+                    c2*(p[(iy+1)*n2*n1+ix*n1+iz] - p[(iy-2)*n2*n1+ix*n1+iz]));
+            }
+        }
+    }
+
+	/* calculate vz for all grid points except on the virtual boundary */
+#pragma omp for private (ix, iy, iz) schedule(guided,1) 
+	for (iy=mod.ioZy; iy<mod.ieZy; iy++) {
+#pragma ivdep
+        for (ix=mod.ioZx; ix<mod.ieZx; ix++) {
+            for (iz=mod.ioZz; iz<mod.ieZz; iz++) {
+                vz[iy*n2*n1+ix*n1+iz] -= roz[iy*n2*n1+ix*n1+iz]*(
+                    c1*(p[iy*n2*n1+ix*n1+iz]   - p[iy*n2*n1+ix*n1+iz-1]) +
+                    c2*(p[iy*n2*n1+ix*n1+iz+1] - p[iy*n2*n1+ix*n1+iz-2]));
+            }
+        }
+    }
+        
+	/* boundary condition clears velocities on boundaries */
+    boundariesP3D(mod, bnd, vx, vy, vz, p, NULL, NULL, NULL, NULL, NULL, rox, roy, roz, l2m, NULL, NULL, itime, verbose);
+
+	/* Add force source */
+	if (src.type > 5) {
+         applySource3D(mod, src, wav, bnd, itime, ixsrc, iysrc, izsrc, vx, vy, vz, p, NULL, NULL, NULL, NULL, NULL, rox, roy, roz, l2m, src_nwav, verbose);
+	}
+
+	/* this is needed because the P fields are not using tapered boundaries (bnd....=4) */
+    if (bnd.top==2) mod.ioPz += bnd.npml;
+    if (bnd.bot==2) mod.iePz -= bnd.npml;
+    if (bnd.fro==2) mod.ioPy += bnd.npml;
+    if (bnd.bac==2) mod.iePy -= bnd.npml;
+    if (bnd.lef==2) mod.ioPx += bnd.npml;
+    if (bnd.rig==2) mod.iePx -= bnd.npml;
+
+	/* calculate p/tzz for all grid points except on the virtual boundary */
+#pragma omp for private (ix, iy, iz) schedule(guided,1) 
+//#pragma omp for private (ix, iz) schedule(dynamic) 
+#pragma ivdep
+	for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+#pragma ivdep
+		for (iy=mod.ioPy; iy<mod.iePy; iy++) {
+            for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+                p[iy*n1*n2+ix*n1+iz] -= l2m[iy*n1*n2+ix*n1+iz]*(
+                            c1*(vx[iy*n1*n2+(ix+1)*n1+iz] - vx[iy*n1*n2+ix*n1+iz]) +
+                            c2*(vx[iy*n1*n2+(ix+2)*n1+iz] - vx[iy*n1*n2+(ix-1)*n1+iz]) +
+                            c1*(vy[(iy+1)*n1*n2+ix*n1+iz] - vy[iy*n1*n2+ix*n1+iz]) +
+                            c2*(vy[(iy+2)*n1*n2+ix*n1+iz] - vy[(iy-1)*n1*n2+ix*n1+iz]) +
+                            c1*(vz[iy*n1*n2+ix*n1+iz+1]   - vz[iy*n1*n2+ix*n1+iz]) +
+                            c2*(vz[iy*n1*n2+ix*n1+iz+2]   - vz[iy*n1*n2+ix*n1+iz-1]));
+            }
+        }
+	}
+    if (bnd.top==2) mod.ioPz -= bnd.npml;
+    if (bnd.bot==2) mod.iePz += bnd.npml;
+    if (bnd.fro==2) mod.ioPy -= bnd.npml;
+    if (bnd.bac==2) mod.iePy += bnd.npml;
+    if (bnd.lef==2) mod.ioPx -= bnd.npml;
+    if (bnd.rig==2) mod.iePx += bnd.npml;
+
+	/* Add stress source */
+	if (src.type < 6) {
+        applySource3D(mod, src, wav, bnd, itime, ixsrc, iysrc, izsrc, vx, vy, vz, p, NULL, NULL, NULL, NULL, NULL, rox, roy, roz, l2m, src_nwav, verbose);
+	}
+    
+/* Free surface: calculate free surface conditions for stresses */
+
+	/* check if there are sources placed on the free surface */
+    storeSourceOnSurface3D(mod, src, bnd, ixsrc, iysrc, izsrc, vx, vy, vz, p, NULL, NULL, NULL, NULL, NULL, verbose);
+
+	/* Free surface: calculate free surface conditions for stresses */
+    boundariesV3D(mod, bnd, vx, vy, vz, p, NULL, NULL, NULL, NULL, NULL, rox, roy, roz, l2m, NULL, NULL, itime, verbose);
+
+	/* restore source positions on the edge */
+    reStoreSourceOnSurface3D(mod, src, bnd, ixsrc, iysrc, izsrc, vx, vy, vz, p, NULL, NULL, NULL, NULL, NULL, verbose);
+
+	return 0;
+}
diff --git a/fdelmodc3D/applySource3D.c b/fdelmodc3D/applySource3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..80298b31c9b5a1281a30a69ab044c35b25df9c88
--- /dev/null
+++ b/fdelmodc3D/applySource3D.c
@@ -0,0 +1,365 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<math.h>
+#include<assert.h>
+#include"fdelmodc3D.h"
+
+void vmess(char *fmt, ...);
+
+#define c1 (9.0/8.0)
+#define c2 (-1.0/24.0)
+
+/*********************************************************************
+ * 
+ * Add's the source amplitude(s) to the grid.
+ * 
+ * For the acoustic schemes, the source-type must not be txx tzz or txz.
+ *
+ *   AUTHOR:
+ *           Jan Thorbecke (janth@xs4all.nl)
+ *           The Netherlands 
+ *
+ **********************************************************************/
+
+long applySource3D(modPar mod, srcPar src, wavPar wav, bndPar bnd, long itime, long ixsrc, long iysrc, long izsrc, float *vx, float *vy, float *vz, float *tzz, float *tyy, float *txx, float *txz, float *txy, float *tyz, float *rox, float *roy, float *roz, float *l2m, float **src_nwav, long verbose)
+{
+	long is0, ibndz, ibndy, ibndx;
+	long isrc, ix, iy, iz, n1, n2;
+	long id1, id2, id3;
+	float src_ampl, time, scl, dt, sdx;
+	float Mxx, Myy, Mzz, Mxz, Myz, Mxy, rake;
+	static long first=1;
+
+	if (src.type==6) {
+    	ibndz = mod.ioXz;
+    	ibndy = mod.ioXy;
+    	ibndx = mod.ioXx;
+	}
+	else if (src.type==7) {
+    	ibndz = mod.ioZz;
+    	ibndy = mod.ioZy;
+    	ibndx = mod.ioZx;
+	}
+	else if (src.type==2) {
+    	ibndz = mod.ioTz;
+    	ibndy = mod.ioTy;
+    	ibndx = mod.ioTx;
+    	if (bnd.lef==4 || bnd.lef==2) ibndx += bnd.ntap;
+    	if (bnd.bac==4 || bnd.fro==2) ibndy += bnd.ntap;
+    	if (bnd.top==4 || bnd.top==2) ibndz += bnd.ntap;
+	}
+	else {	
+    	ibndz = mod.ioPz;
+    	ibndy = mod.ioPy;
+    	ibndx = mod.ioPx;
+    	if (bnd.lef==4 || bnd.lef==2) ibndx += bnd.ntap;
+    	if (bnd.bac==4 || bnd.fro==2) ibndy += bnd.ntap;
+    	if (bnd.top==4 || bnd.top==2) ibndz += bnd.ntap;
+	}
+
+	n1   = mod.naz;
+    n2   = mod.nax;
+	dt   = mod.dt;
+	sdx  = 1.0/mod.dx;
+
+	/* special txz source activated? */
+
+	if ((bnd.top==1) && (src.type==2)) {
+		iz = izsrc + ibndz;
+		if (iz==ibndz) {
+            if (src.orient != 1) {
+				if (first) {
+					vmess("Only monopole Txz source allowed at surface. Reset to monopole");
+					first = 0;
+				}
+				src.orient=1;
+			}
+		}
+	}
+             
+/*
+* for plane wave sources the sources are placed 
+* around the central shot position 
+* the first source position has an offset in x of is0
+*
+* itime = 0 corresponds with time=0
+* itime = 1 corresponds with time=dt
+* src[0] (the first sample) corresponds with time = 0
+*/
+
+	is0 = -1*floor((src.n-1)/2);
+#pragma omp	for private (isrc, src_ampl, ix, iy, iz, time, id1, id2, id3, scl) 
+	for (isrc=0; isrc<src.n; isrc++) {
+		src_ampl=0.0;
+		/* calculate the source position */
+		if (src.random || src.multiwav) {
+			ix = src.x[isrc] + ibndx;
+			iy = src.y[isrc] + ibndy;
+			iz = src.z[isrc] + ibndz;
+		}
+		else { /* plane wave and point sources */
+            ix = ixsrc + ibndx + is0 + isrc;
+            iy = iysrc + ibndy + is0 + isrc;
+            iz = izsrc + ibndz;
+		}
+		time = itime*dt - src.tbeg[isrc];
+		id1 = floor(time/dt);
+		id2 = id1+1;
+        
+		/* delay not reached or no samples left in source wavelet? */
+		if ( (time < 0.0) || ( (itime*dt) >= src.tend[isrc]) ) continue;
+
+//		fprintf(stderr,"isrc=%li ix=%li iz=%li src.x=%li src.z=%li\n", isrc, ix, iz, src.x[isrc], src.z[isrc]);
+
+		if (!src.multiwav) { /* only one wavelet for all sources */
+			src_ampl = src_nwav[0][id1]*(id2-time/dt) + src_nwav[0][id2]*(time/dt-id1);
+		}
+		else { /* multi-wavelet sources */
+			src_ampl = src_nwav[isrc][id1]*(id2-time/dt) + src_nwav[isrc][id2]*(time/dt-id1);
+		}
+
+		if (src_ampl==0.0) continue;
+		if ( ((ix-ibndx)<0) || ((ix-ibndx)>mod.nx) ) continue; /* source outside grid */
+        if ( ((iy-ibndy)<0) || ((iy-ibndy)>mod.ny) ) continue; /* source outside grid */
+
+		if (verbose>=4 && itime==0) {
+			vmess("Source %li positioned at grid ix=%li iy=%li iz=%li",isrc, ix, iy, iz);
+		}
+
+		/* cosine squared windowing to reduce edge effects on shot arrays */
+		if ( (src.n>1) && src.window) {
+            scl = 1.0;
+			if (isrc < src.window) {
+				scl = cos(0.5*M_PI*(src.window - isrc)/src.window);
+			}
+			else if (isrc > src.n-src.window+1) {
+				scl = cos(0.5*M_PI*(src.window - (src.n-isrc+1))/src.window);
+			}
+			src_ampl *= scl*scl;
+		}
+
+		/* source scaling factor to compensate for discretisation */
+
+		/* old amplitude setting does not obey reciprocity */
+		// src_ampl *= rox[ix*n1+iz]*l2m[ix*n1+iz]/(dt);
+
+/* in older version added factor 2.0 to be compliant with defined Green's functions in Marchenko algorithm */
+/* this is now set to 1.0 */
+		src_ampl *= (1.0/mod.dx)*l2m[iy*n1*n2+ix*n1+iz];
+
+		if (verbose>5) {
+			vmess("Source %li at grid [ix=%li,iy=%li,iz=%li] at itime %li has value %e",isrc, ix, iy, iz, itime, src_ampl);
+		}
+
+		/* Force source */
+
+		if (src.type == 6) {
+			vx[iy*n1*n2+ix*n1+iz] += src_ampl*rox[iy*n1*n2+ix*n1+iz]/(l2m[iy*n1*n2+ix*n1+iz]);
+			/* stable implementation from "Numerical Techniques for Conservation Laws with Source Terms" by Justin Hudson */
+			//vx[ix*n1+iz] = 0.5*(vx[(ix+1)*n1+iz]+vx[(ix-1)*n1+iz])+src_ampl*rox[ix*n1+iz]/(l2m[ix*n1+iz]);
+		}
+		else if (src.type == 7) {
+			vz[iy*n1*n2+ix*n1+iz] += src_ampl*roz[iy*n1*n2+ix*n1+iz]/(l2m[iy*n1*n2+ix*n1+iz]);
+			/* stable implementation from "Numerical Techniques for Conservation Laws with Source Terms" by Justin Hudson */
+			/* stable implementation changes amplitude and more work is needed */
+			//vz[ix*n1+iz] = 0.5*(vz[ix*n1+iz-1]+vz[ix*n1+iz+1])+src_ampl*roz[ix*n1+iz]/(l2m[ix*n1+iz]);
+			//vz[ix*n1+iz] = 0.25*(vz[ix*n1+iz-2]+vz[ix*n1+iz-1]+vz[ix*n1+iz]+vz[ix*n1+iz+1])+src_ampl*roz[ix*n1+iz]/(l2m[ix*n1+iz]);
+        } /* src.type */
+
+        
+		/* Stress source */
+
+		if (mod.ischeme <= 2) { /* Acoustic scheme */
+			/* Compressional source */
+			if (src.type == 1) {
+				if (src.orient != 1) src_ampl=src_ampl/mod.dx;
+
+				if (src.orient==1) { /* monopole */
+					tzz[iy*n1*n2+ix*n1+iz] += src_ampl;
+				}
+				else if (src.orient==2) { /* dipole +/- */
+					tzz[iy*n1*n2+ix*n1+iz] += src_ampl;
+					tzz[iy*n1*n2+ix*n1+iz+1] -= src_ampl;
+				}
+				else if (src.orient==3) { /* dipole - + */
+					tzz[iy*n1*n2+ix*n1+iz] += src_ampl;
+					tzz[iy*n1*n2+(ix-1)*n1+iz] -= src_ampl;
+				}
+				else if (src.orient==4) { /* dipole +/0/- */
+					if (iz > ibndz) 
+						tzz[iy*n1*n2+ix*n1+iz-1]+= 0.5*src_ampl;
+					if (iz < mod.nz+ibndz-1) 
+						tzz[iy*n1*n2+ix*n1+iz+1] -= 0.5*src_ampl;
+				}
+				else if (src.orient==5) { /* dipole + - */
+					tzz[iy*n1*n2+ix*n1+iz] += src_ampl;
+					tzz[iy*n1*n2+(ix+1)*n1+iz] -= src_ampl;
+				}
+			}
+		}
+		else { /* Elastic scheme */
+			/* Compressional source */
+			if (src.type == 1) {
+				if (src.orient==1) { /* monopole */
+					txx[iy*n1*n2+ix*n1+iz] += src_ampl;
+					tzz[iy*n1*n2+ix*n1+iz] += src_ampl;
+				}
+				else if (src.orient==2) { /* dipole +/- */
+					txx[iy*n1*n2+ix*n1+iz] += src_ampl;
+					tzz[iy*n1*n2+ix*n1+iz] += src_ampl;
+					txx[iy*n1*n2+ix*n1+iz+1] -= src_ampl;
+					tzz[iy*n1*n2+ix*n1+iz+1] -= src_ampl;
+				}
+				else if (src.orient==3) { /* dipole - + */
+					txx[iy*n1*n2+ix*n1+iz] += src_ampl;
+					tzz[iy*n1*n2+ix*n1+iz] += src_ampl;
+					txx[iy*n1*n2+(ix-1)*n1+iz] -= src_ampl;
+					tzz[iy*n1*n2+(ix-1)*n1+iz] -= src_ampl;
+				}
+				else if (src.orient==4) { /* dipole +/0/- */
+					if (iz > ibndz) {
+						txx[iy*n1*n2+ix*n1+iz-1]+= 0.5*src_ampl;
+						tzz[iy*n1*n2+ix*n1+iz-1]+= 0.5*src_ampl;
+					}
+					if (iz < mod.nz+ibndz-1) {
+						txx[iy*n1*n2+ix*n1+iz+1] -= 0.5*src_ampl;
+						tzz[iy*n1*n2+ix*n1+iz+1] -= 0.5*src_ampl;
+					}
+				}
+				else if (src.orient==5) { /* dipole + - */
+					txx[iy*n1*n2+ix*n1+iz] += src_ampl;
+					tzz[iy*n1*n2+ix*n1+iz] += src_ampl;
+					txx[iy*n1*n2+(ix+1)*n1+iz] -= src_ampl;
+					tzz[iy*n1*n2+(ix+1)*n1+iz] -= src_ampl;
+				}
+			}
+			else if (src.type == 2) {
+				/* Txz source */
+				if ((iz == ibndz) && bnd.top==1) {
+					txz[iy*n1*n2+(ix-1)*n1+iz-1] += src_ampl;
+					txz[iy*n1*n2+ix*n1+iz-1] += src_ampl;
+				}
+				else {
+					txz[iy*n1*n2+ix*n1+iz] += src_ampl;
+				}
+				/* possible dipole orientations for a txz source */
+				if (src.orient == 2) { /* dipole +/- */
+					txz[iy*n1*n2+ix*n1+iz+1] -= src_ampl;
+				}
+				else if (src.orient == 3) { /* dipole - + */
+					txz[iy*n1*n2+(ix-1)*n1+iz] -= src_ampl;
+				}
+				else if (src.orient == 4) { /*  dipole +/O/- */
+					/* correction: subtrace previous value to prevent z-1 values. */
+					txz[iy*n1*n2+ix*n1+iz] -= 2.0*src_ampl;
+					txz[iy*n1*n2+ix*n1+iz+1] += src_ampl;
+				}
+				else if (src.orient == 5) { /* dipole + - */
+					txz[iy*n1*n2+(ix+1)*n1+iz] -= src_ampl;
+				}
+			}
+			/* Tzz source */
+			else if(src.type == 3) {
+				tzz[iy*n1*n2+ix*n1+iz] += src_ampl;
+			} 
+			/* Txx source */
+			else if(src.type == 4) {
+				txx[iy*n1*n2+ix*n1+iz] += src_ampl;
+			} 
+
+/***********************************************************************
+* pure potential shear S source (experimental)
+* Curl S-pot = CURL(F) = dF_x/dz - dF_z/dx
+***********************************************************************/
+			else if(src.type == 5) {
+				src_ampl = src_ampl*rox[iy*n1*n2+ix*n1+iz]/(l2m[iy*n1*n2+ix*n1+iz]);
+				if (src.orient == 3) src_ampl = -src_ampl;
+                /* first order derivatives */
+				vx[iy*n1*n2+ix*n1+iz]         += src_ampl*sdx;
+				vx[(iy-1)*n1*n2+ix*n1+iz-1]   -= src_ampl*sdx;
+				vy[iy*n1*n2+ix*n1+iz]         += src_ampl*sdx;
+				vy[iy*n1*n2+(ix-1)*n1+iz-1]   -= src_ampl*sdx;
+				vz[iy*n1*n2+ix*n1+iz]         -= src_ampl*sdx;
+				vz[(iy-1)*n1*n2+(ix-1)*n1+iz] += src_ampl*sdx;
+                
+                /* second order derivatives */
+                /*
+				vx[ix*n1+iz]     += c1*src_ampl*sdx;
+                vx[ix*n1+iz-1]   -= c1*src_ampl*sdx;
+				vx[ix*n1+iz+1]   += c2*src_ampl*sdx;
+                vx[ix*n1+iz-2]   -= c2*src_ampl*sdx;
+
+                vz[ix*n1+iz]     -= c1*src_ampl*sdx;
+				vz[(ix-1)*n1+iz] += c1*src_ampl*sdx;
+				vz[(ix+1)*n1+iz] -= c2*src_ampl*sdx;
+				vz[(ix-2)*n1+iz] += c2*src_ampl*sdx;
+                 */
+
+				/* determine second position of dipole */
+				if (src.orient == 2) { /* dipole +/- vertical */
+					iz += 1;
+                    vx[iy*n1*n2+ix*n1+iz]         -= src_ampl*sdx;
+                    vx[(iy-1)*n1*n2+ix*n1+iz-1]   += src_ampl*sdx;
+                    vy[iy*n1*n2+ix*n1+iz]         -= src_ampl*sdx;
+                    vy[iy*n1*n2+(ix-1)*n1+iz-1]   += src_ampl*sdx;
+                    vz[iy*n1*n2+ix*n1+iz]         += src_ampl*sdx;
+                    vz[(iy-1)*n1*n2+(ix-1)*n1+iz] -= src_ampl*sdx;
+				}
+				else if (src.orient == 3) { /* dipole - + horizontal */
+					ix += 1;
+                    vx[iy*n1*n2+ix*n1+iz]         -= src_ampl*sdx;
+                    vx[(iy-1)*n1*n2+ix*n1+iz-1]   += src_ampl*sdx;
+                    vy[iy*n1*n2+ix*n1+iz]         -= src_ampl*sdx;
+                    vy[iy*n1*n2+(ix-1)*n1+iz-1]   += src_ampl*sdx;
+                    vz[iy*n1*n2+ix*n1+iz]         += src_ampl*sdx;
+                    vz[(iy-1)*n1*n2+(ix-1)*n1+iz] -= src_ampl*sdx;
+				}
+            }
+/***********************************************************************
+* pure potential pressure P source (experimental)
+* Divergence P-pot = DIV(F) = dF_x/dx + dF_z/dz
+***********************************************************************/
+            else if(src.type == 8) {
+			    src_ampl = src_ampl*rox[iy*n1*n2+ix*n1+iz]/(l2m[iy*n1*n2+ix*n1+iz]);
+                if (src.orient == 3) src_ampl = -src_ampl;
+                vx[iy*n1*n2+(ix+1)*n1+iz] += src_ampl*sdx;
+                vx[iy*n1*n2+ix*n1+iz]     -= src_ampl*sdx;
+                vy[(iy+1)*n1*n2+ix*n1+iz] += src_ampl*sdx;
+                vy[iy*n1*n2+ix*n1+iz]     -= src_ampl*sdx;
+                vz[iy*n1*n2+ix*n1+iz+1]   += src_ampl*sdx;
+                vz[iy*n1*n2+ix*n1+iz]     -= src_ampl*sdx;
+                /* determine second position of dipole */
+                if (src.orient == 2) { /* dipole +/- */
+                    iz += 1;
+                    vx[iy*n1*n2+(ix+1)*n1+iz] -= src_ampl*sdx;
+                    vx[iy*n1*n2+ix*n1+iz]     += src_ampl*sdx;
+                    vy[(iy+1)*n1*n2+ix*n1+iz] -= src_ampl*sdx;
+                    vy[iy*n1*n2+ix*n1+iz]     += src_ampl*sdx;
+                    vz[iy*n1*n2+ix*n1+iz+1]   -= src_ampl*sdx;
+                    vz[iy*n1*n2+ix*n1+iz]     += src_ampl*sdx;
+                }
+                else if (src.orient == 3) { /* dipole - + */
+                    ix += 1;
+                    vx[iy*n1*n2+(ix+1)*n1+iz] -= src_ampl*sdx;
+                    vx[iy*n1*n2+ix*n1+iz]     += src_ampl*sdx;
+                    vy[(iy+1)*n1*n2+ix*n1+iz] -= src_ampl*sdx;
+                    vy[iy*n1*n2+ix*n1+iz]     += src_ampl*sdx;
+                    vz[iy*n1*n2+ix*n1+iz+1]   -= src_ampl*sdx;
+                    vz[iy*n1*n2+ix*n1+iz]     += src_ampl*sdx;
+                }
+			}
+            else if(src.type == 9) {
+				rake = 0.5*M_PI;
+				Mxx = -1.0*(sin(src.dip)*cos(rake)*sin(2.0*src.strike)+sin(src.dip*2.0)*sin(rake)*sin(src.strike)*sin(src.strike));
+				Mxz = -1.0*(cos(src.dip)*cos(rake)*cos(src.strike)+cos(src.dip*2.0)*sin(rake)*sin(src.strike));
+				Mzz = sin(src.dip*2.0)*sin(rake);
+
+				txx[iy*n1*n2+ix*n1+iz] -= Mxx*src_ampl;
+				tzz[iy*n1*n2+ix*n1+iz] -= Mzz*src_ampl;
+				txz[iy*n1*n2+ix*n1+iz] -= Mxz*src_ampl;
+			} /* src.type */
+		} /* ischeme */
+	} /* loop over isrc */
+
+	return 0;
+}
diff --git a/fdelmodc3D/boundaries3D.c b/fdelmodc3D/boundaries3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..3f4f55549582762f00f308d9728eb1dc6a7b3410
--- /dev/null
+++ b/fdelmodc3D/boundaries3D.c
@@ -0,0 +1,2253 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<math.h>
+#include<assert.h>
+#include"fdelmodc3D.h"
+
+void vmess(char *fmt, ...);
+
+long boundariesP3D(modPar mod, bndPar bnd, float *vx, float *vy, float *vz, float *tzz, float *tyy, float *txx, float *txz, float *txy, float *tyz, float *rox, float *roy, float *roz, float *l2m, float *lam, float *mul, long itime, long verbose)
+{
+/*********************************************************************
+
+   AUTHOR:
+		   Jan Thorbecke (janth@xs4all.nl)
+		   The Netherlands 
+
+***********************************************************************/
+
+	float c1, c2;
+	float dp, dvx, dvy, dvz;
+	long   ix, iy, iz, ixs, iys, izs, ibnd, ib, ibx, iby, ibz;
+	long   nx, ny, nz, n1, n2, n3;
+	long   is0, isrc;
+	long   ixo, ixe, iyo, iye, izo, ize;
+    long   npml, ipml, pml;
+    float kappu, alphu, sigmax, R, a, m, fac, dx, dy, dt;
+    float dpx, dpy, dpz, *p;
+    static float *Vxpml, *Vypml, *Vzpml, *sigmu, *RA;
+	static long allocated=0;
+    float Jx, Jy, Jz, rho, d;
+
+	c1 = 9.0/8.0;
+	c2 = -1.0/24.0;
+	nx  = mod.nx;
+    ny  = mod.ny;
+    nz  = mod.nz;
+    n1  = mod.naz;
+    n2  = mod.nax;
+    n3  = mod.nay;
+    dx  = mod.dx;
+    dy  = mod.dy;
+    dt  = mod.dt;
+    fac = dt/dx;
+    if ( (bnd.top==2) || (bnd.bot==2) || (bnd.lef==2) || (bnd.rig==2) ) pml=1;
+	else pml=0;
+
+	ibnd = mod.iorder/2-1;
+
+	if (mod.ischeme <= 2) { /* Acoustic scheme */
+		if (bnd.top==1) { /* free surface at top */
+#pragma omp	for private (ix,iy) nowait
+			for (iy=mod.ioPy; iy<mod.iePy; iy++) {
+                for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+                    iz = bnd.surface[ix];
+                    vz[iy*n2*n1+ix*n1+iz]   = vz[iy*n2*n1+ix*n1+iz+1];
+                    vz[iy*n2*n1+ix*n1+iz-1] = vz[iy*n2*n1+ix*n1+iz+2];
+                }
+            }
+		}
+	}
+
+/************************************************************/
+/* rigid boundary condition clears velocities on boundaries */
+/************************************************************/
+
+	if (bnd.top==3) { /* rigid surface at top */
+#pragma omp for private (ix, iy, iz) nowait
+#pragma ivdep
+        for (iy=1; iy<=ny; iy++) {
+            for (ix=1; ix<=nx; ix++) {
+                vx[iy*n2*n1+ix*n1+ibnd] = 0.0;
+                vy[iy*n2*n1+ix*n1+ibnd] = 0.0;
+                vz[iy*n2*n1+ix*n1+ibnd] = -vz[iy*n2*n1+ix*n1+ibnd+1];
+                if (mod.iorder >= 4) vz[iy*n2*n1+ix*n1+ibnd-1] = -vz[iy*n2*n1+ix*n1+ibnd+2];
+                if (mod.iorder >= 6) vz[iy*n2*n1+ix*n1+ibnd-2] = -vz[iy*n2*n1+ix*n1+ibnd+3];
+            }
+        }
+	}
+	if (bnd.rig==3) { /* rigid surface at right */
+#pragma omp for private (ix, iy, iz) nowait
+#pragma ivdep
+        for (iy=1; iy<=ny; iy++) {
+            for (iz=1; iz<=nz; iz++) {
+                vz[iy*n2*n1+(nx+ibnd-1)*n1+iz] = 0.0;
+                vy[iy*n2*n1+(nx+ibnd-1)*n1+iz] = 0.0;
+                vx[iy*n2*n1+(nx+ibnd)*n1+iz]   = -vx[iy*n2*n1+(nx+ibnd-1)*n1+iz];
+                if (mod.iorder == 4) vx[iy*n2*n1+(nx+2)*n1+iz] = -vx[iy*n2*n1+(nx-1)*n1+iz];
+                if (mod.iorder == 6) {
+                    vx[iy*n2*n1+(nx+1)*n1+iz] = -vx[iy*n2*n1+(nx)*n1+iz];
+                    vx[iy*n2*n1+(nx+3)*n1+iz] = -vx[iy*n2*n1+(nx-2)*n1+iz];
+                }
+            }
+        }
+	}if (bnd.bac==3) { /* rigid surface at back */
+#pragma omp for private (ix, iy, iz) nowait
+#pragma ivdep
+        for (ix=1; ix<=nx; ix++) {
+            for (iz=1; iz<=nz; iz++) {
+                vz[(ny+ibnd-1)*n2*n1+ix*n1+iz] = 0.0;
+                vx[(ny+ibnd-1)*n2*n1+ix*n1+iz] = 0.0;
+                vy[(ny+ibnd)*n2*n1+ix*n1+iz]   = -vy[(ny+ibnd-1)*n2*n1+ix*n1+iz];
+                if (mod.iorder == 4) vy[(ny+2)*n2*n1+ix*n1+iz] = -vy[(ny-1)*n2*n1+iy*n1+iz];
+                if (mod.iorder == 6) {
+                    vy[(ny+1)*n2*n1+ix*n1+iz] = -vy[ny*n2*n1+ix*n1+iz];
+                    vy[(ny+3)*n2*n1+ix*n1+iz] = -vy[(ny-2)*n2*n1+ix*n1+iz];
+                }
+            }
+        }
+	}
+	if (bnd.bot==3) { /* rigid surface at bottom */
+#pragma omp for private (ix, iy, iz) nowait
+#pragma ivdep
+        for (iy=1; iy<=ny; iy++) {
+            for (ix=1; ix<=nx; ix++) {
+                vx[iy*n2*n1+ix*n1+nz+ibnd-1] = 0.0;
+                vy[iy*n2*n1+ix*n1+nz+ibnd-1] = 0.0;
+                vz[iy*n2*n1+ix*n1+nz+ibnd]   = -vz[iy*n2*n1+ix*n1+nz+ibnd-1];
+                if (mod.iorder == 4) vz[iy*n2*n1+ix*n1+nz+2] = -vz[iy*n2*n1+ix*n1+nz-1];
+                if (mod.iorder == 6) {
+                    vz[iy*n2*n1+ix*n1+nz+1] = -vz[iy*n2*n1+ix*n1+nz];
+                    vz[iy*n2*n1+ix*n1+nz+3] = -vz[iy*n2*n1+ix*n1+nz-2];
+                }
+            }
+        }
+	}
+	if (bnd.lef==3) { /* rigid surface at left */
+#pragma omp for private (ix, iy, iz) nowait
+#pragma ivdep
+        for (iy=1; iy<=ny; iy++) {
+            for (iz=1; iz<=nz; iz++) {
+                vz[iy*n2*n1+ibnd*n1+iz] = 0.0;
+                vy[iy*n2*n1+ibnd*n1+iz] = 0.0;
+                vx[iy*n2*n1+ibnd*n1+iz] = -vx[iy*n2*n1+(ibnd+1)*n1+iz];
+                if (mod.iorder == 4) vx[iy*n2*n1+0*n1+iz] = -vx[iy*n2*n1+3*n1+iz];
+                if (mod.iorder == 6) {
+                    vx[iy*n2*n1+1*n1+iz] = -vx[iy*n2*n1+4*n1+iz];
+                    vx[iy*n2*n1+0*n1+iz] = -vx[iy*n2*n1+5*n1+iz];
+                }
+            }
+        }
+	}
+    if (bnd.fro==3) { /* rigid surface at front */
+#pragma omp for private (ix, iy, iz) nowait
+#pragma ivdep
+        for (ix=1; ix<=nx; ix++) {
+            for (iz=1; iz<=nz; iz++) {
+                vz[ibnd*n2*n1+ix*n1+iz] = 0.0;
+                vx[ibnd*n2*n1+ix*n1+iz] = 0.0;
+                vy[ibnd*n2*n1+ix*n1+iz] = -vy[(ibnd+1)*n2*n1+ix*n1+iz];
+                if (mod.iorder == 4) vy[0*n2*n1+ix*n1+iz] = -vy[3*n2*n1+ix*n1+iz];
+                if (mod.iorder == 6) {
+                    vy[1*n2*n1+ix*n1+iz] = -vy[4*n2*n1+ix*n1+iz];
+                    vy[0*n2*n1+ix*n1+iz] = -vy[5*n2*n1+ix*n1+iz];
+                }
+            }
+        }
+	}
+    
+/************************************************************/
+/* Tapered boundaries for both elastic and acoustic schemes */
+/* compute all field values in tapered areas				*/
+/************************************************************/
+
+	/*********/
+	/*  Top  */
+	/*********/
+	if (bnd.top==4) {
+		
+		if (mod.ischeme <= 2) { /* Acoustic scheme */
+			
+			/* Vx field */
+			ixo = mod.ioXx;
+			ixe = mod.ieXx;
+			iyo = mod.ioXy;
+			iye = mod.ieXy;
+			izo = mod.ioXz-bnd.ntap;
+			ize = mod.ioXz;
+	
+			ibz = (bnd.ntap+izo-1);
+#pragma omp for private(ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+
+						vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapx[ibz-iz];
+					}
+				}
+			}
+			/* right top corner */
+			if (bnd.rig==4) {
+				ixo = mod.ieXx;
+				ixe = ixo+bnd.ntap;
+				ibz = (bnd.ntap+izo-1);
+				ibx = (ixo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+							vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(ix-ibx)*bnd.ntap+(ibz-iz)];
+						}
+					}
+				}
+				/* right top front corner */
+				if (bnd.fro==4) {
+					iyo = mod.ioXy-bnd.ntap;
+					iye = mod.ioXy;
+					iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+											c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+			
+								vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iby-iy)*bnd.ntap*bnd.ntap+(ix-ibx)*bnd.ntap+(ibz-iz)];
+							}
+						}
+					}
+				}
+				/* right top back corner */
+				if (bnd.bac==4) {
+					iyo = mod.ieXy;
+					iye = mod.ieXy+bnd.ntap;
+					iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+											c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+			
+								vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iy-iby)*bnd.ntap*bnd.ntap+(ix-ibx)*bnd.ntap+(ibz-iz)];
+							}
+						}
+					}
+				}
+			}
+			ixo = mod.ioXx;
+			ixe = mod.ieXx;
+			iyo = mod.ioXy;
+			iye = mod.ieXy;
+			izo = mod.ioXz-bnd.ntap;
+			ize = mod.ioXz;
+
+			/* left top corner */
+			if (bnd.lef==4) {
+				ixo = mod.ioXx-bnd.ntap;
+				ixe = mod.ioXx;
+				ibz = (bnd.ntap+izo-1);
+				ibx = (bnd.ntap+ixo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+							
+							vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(ibx-ix)*bnd.ntap+(ibz-iz)];
+						}
+					}
+				}
+				/* left top front corner */
+				if (bnd.fro==4) {
+					iyo = mod.ioXy-bnd.ntap;
+					iye = mod.ioXy;
+					iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+											c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+			
+								vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iby-iy)*bnd.ntap*bnd.ntap+(ibx-ix)*bnd.ntap+(ibz-iz)];
+							}
+						}
+					}
+				}
+				/* left top back corner */
+				if (bnd.bac==4) {
+					iyo = mod.ieXy;
+					iye = mod.ieXy+bnd.ntap;
+					iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+											c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+			
+								vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iy-iby)*bnd.ntap*bnd.ntap+(ibx-ix)*bnd.ntap+(ibz-iz)];
+							}
+						}
+					}
+				}
+			}
+			/* front top corner */
+			if (bnd.fro==4) {
+				ixo = mod.ioXx;
+				ixe = mod.ieXx;
+				iyo = mod.ioXy-bnd.ntap;
+				iye = mod.ioXy;
+				ibz = (bnd.ntap+izo-1);
+				iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+							vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iby-iy)*bnd.ntap+(ibz-iz)];
+						}
+					}
+				}
+			}
+			/* Back top corner */
+			if (bnd.bac==4) {
+				iyo = mod.ieXy;
+				iye = mod.ioXy+bnd.ntap;
+				ibz = (bnd.ntap+izo-1);
+				iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+							vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iy-iby)*bnd.ntap+(ibz-iz)];
+						}
+					}
+				}
+			}
+
+			/* Vy field */
+			ixo = mod.ioYx;
+			ixe = mod.ieYx;
+			iyo = mod.ioYy;
+			iye = mod.ieYy;
+			izo = mod.ioYz-bnd.ntap;
+			ize = mod.ioYz;
+	
+			ib = (bnd.ntap+izo-1);
+#pragma omp for private(ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+
+						vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapy[ib-iz];
+					}
+				}
+			}
+			/* right top corner */
+			if (bnd.rig==4) {
+				ixo = mod.ieYx;
+				ixe = ixo+bnd.ntap;
+				ibz = (bnd.ntap+izo-1);
+				ibx = (ixo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+							vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(ix-ibx)*bnd.ntap+(ibz-iz)];
+						}
+					}
+				}
+				/* right top front corner */
+				if (bnd.fro==4) {
+					iyo = mod.ioYy-bnd.ntap;
+					iye = mod.ioYy;
+					iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+											c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+			
+								vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iby-iy)*bnd.ntap*bnd.ntap+(ix-ibx)*bnd.ntap+(ibz-iz)];
+							}
+						}
+					}
+				}
+				/* right top back corner */
+				if (bnd.bac==4) {
+					iyo = mod.ieYy;
+					iye = mod.ieYy+bnd.ntap;
+					iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+											c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+			
+								vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iy-iby)*bnd.ntap*bnd.ntap+(ix-ibx)*bnd.ntap+(ibz-iz)];
+							}
+						}
+					}
+				}
+			}
+			ixo = mod.ioYx;
+			ixe = mod.ieYx;
+			iyo = mod.ioYy;
+			iye = mod.ieYy;
+			izo = mod.ioYz-bnd.ntap;
+			ize = mod.ioYz;
+
+			/* left top corner */
+			if (bnd.lef==4) {
+				ixo = mod.ioYx-bnd.ntap;
+				ixe = mod.ioYx;
+				ibz = (bnd.ntap+izo-1);
+				ibx = (bnd.ntap+ixo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+							
+							vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(ibx-ix)*bnd.ntap+(ibz-iz)];
+						}
+					}
+				}
+				/* left top front corner */
+				if (bnd.fro==4) {
+					iyo = mod.ioYy-bnd.ntap;
+					iye = mod.ioYy;
+					iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+											c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+			
+								vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iby-iy)*bnd.ntap*bnd.ntap+(ibx-ix)*bnd.ntap+(ibz-iz)];
+							}
+						}
+					}
+				}
+				/* left top back corner */
+				if (bnd.bac==4) {
+					iyo = mod.ieYy;
+					iye = mod.ieYy+bnd.ntap;
+					iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+											c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+			
+								vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iy-iby)*bnd.ntap*bnd.ntap+(ibx-ix)*bnd.ntap+(ibz-iz)];
+							}
+						}
+					}
+				}
+			}
+			/* front top corner */
+			if (bnd.fro==4) {
+				ixo = mod.ioXx;
+				ixe = mod.ieXx;
+				iyo = mod.ioXy-bnd.ntap;
+				iye = mod.ioXy;
+				ibz = (bnd.ntap+izo-1);
+				iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+							vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iby-iy)*bnd.ntap+(ibz-iz)];
+						}
+					}
+				}
+			}
+			/* Back top corner */
+			if (bnd.bac==4) {
+				iyo = mod.ieXy;
+				iye = mod.ioXy+bnd.ntap;
+				ibz = (bnd.ntap+izo-1);
+				iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+							vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iy-iby)*bnd.ntap+(ibz-iz)];
+						}
+					}
+				}
+			}
+
+
+			/* Vz field */
+			ixo = mod.ioZx;
+			ixe = mod.ieZx;
+			iyo = mod.ioZy;
+			iye = mod.ieZy;
+			izo = mod.ioZz-bnd.ntap;
+			ize = mod.ioZz;
+	
+			ib = (bnd.ntap+izo-1);
+#pragma omp for private (ix, iy, iz) 
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]   - tzz[iy*n1*n2+ix*n1+iz-1]) +
+									c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+
+						vz[iy*n1*n2+ix*n1+iz] *= bnd.tapz[ib-iz];
+					}
+				}
+			}
+			/* right top corner */
+			if (bnd.rig==4) {
+				ixo = mod.ieZx;
+				ixe = ixo+bnd.ntap;
+				ibz = (bnd.ntap+izo-1);
+				ibx = (ixo);
+#pragma omp for private(ix,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]   - tzz[iy*n1*n2+ix*n1+iz-1]) +
+										c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+		
+							vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(ix-ibx)*bnd.ntap+(ibz-iz)];
+						}
+					}
+				}
+				/* right top front corner */
+				if (bnd.fro==4) {
+					iyo = mod.ioZy-bnd.ntap;
+					iye = mod.ioZy;
+					iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+											c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+			
+								vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iby-iy)*bnd.ntap*bnd.ntap+(ix-ibx)*bnd.ntap+(ibz-iz)];
+							}
+						}
+					}
+				}
+				/* right top back corner */
+				if (bnd.bac==4) {
+					iyo = mod.ieZy;
+					iye = mod.ieZy+bnd.ntap;
+					iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+											c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+			
+								vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iy-iby)*bnd.ntap*bnd.ntap+(ix-ibx)*bnd.ntap+(ibz-iz)];
+							}
+						}
+					}
+				}
+			}
+
+			ixo = mod.ioZx;
+			ixe = mod.ieZx;
+			iyo = mod.ioZy;
+			iye = mod.ieZy;
+			izo = mod.ioZz-bnd.ntap;
+			ize = mod.ioZz;
+
+			/* left top corner */
+			if (bnd.lef==4) {
+				ixo = mod.ioZx-bnd.ntap;
+				ixe = mod.ioZx;
+				ibz = (bnd.ntap+izo-1);
+				ibx = (bnd.ntap+ixo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iz=izo; iz<ize; iz++) {
+						vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]   - tzz[iy*n1*n2+ix*n1+iz-1]) +
+									c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+						
+						vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(ibx-ix)*bnd.ntap+(ibz-iz)];
+					}
+				}
+				/* left top front corner */
+				if (bnd.fro==4) {
+					iyo = mod.ioZy-bnd.ntap;
+					iye = mod.ioZy;
+					iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+											c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+			
+								vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iby-iy)*bnd.ntap*bnd.ntap+(ibx-ix)*bnd.ntap+(ibz-iz)];
+							}
+						}
+					}
+				}
+				/* left top back corner */
+				if (bnd.bac==4) {
+					iyo = mod.ieZy;
+					iye = mod.ieZy+bnd.ntap;
+					iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+											c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+			
+								vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iy-iby)*bnd.ntap*bnd.ntap+(ibx-ix)*bnd.ntap+(ibz-iz)];
+							}
+						}
+					}
+				}
+			}
+			/* front top corner */
+			if (bnd.fro==4) {
+				ixo = mod.ioXx;
+				ixe = mod.ieXx;
+				iyo = mod.ioXy-bnd.ntap;
+				iye = mod.ioXy;
+				ibz = (bnd.ntap+izo-1);
+				iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+										c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+		
+							vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iby-iy)*bnd.ntap+(ibz-iz)];
+						}
+					}
+				}
+			}
+			/* Back top corner */
+			if (bnd.bac==4) {
+				iyo = mod.ieXy;
+				iye = mod.ioXy+bnd.ntap;
+				ibz = (bnd.ntap+izo-1);
+				iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+										c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+		
+							vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iy-iby)*bnd.ntap+(ibz-iz)];
+						}
+					}
+				}
+			}
+		}
+		
+	}
+	
+	/*********/
+	/* Bottom */
+	/*********/
+	if (bnd.bot==4) {
+		
+		if (mod.ischeme <= 2) { /* Acoustic scheme */
+			
+			/* Vx field */
+			ixo = mod.ioXx;
+			ixe = mod.ieXx;
+			iyo = mod.ioXy;
+			iye = mod.ieXy;
+			izo = mod.ieXz;
+			ize = mod.ieXz+bnd.ntap;
+			
+			ib = (ize-bnd.ntap);
+#pragma omp for private(ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+									c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+
+						vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapx[iz-ib];
+					}
+				}
+			}
+			/* right bottom corner */
+			if (bnd.rig==4) {
+				ixo = mod.ieXx;
+				ixe = ixo+bnd.ntap;
+				ibz = (izo);
+				ibx = (ixo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+							vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(ix-ibx)*bnd.ntap+(iz-ibz)];
+						}
+					}
+				}
+				/* right bottom front corner */
+				if (bnd.fro==4) {
+					iyo = mod.ioXy-bnd.ntap;
+					iye = mod.ioXy;
+					iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+											c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+			
+								vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iby-iy)*bnd.ntap*bnd.ntap+(ix-ibx)*bnd.ntap+(iz-ibz)];
+							}
+						}
+					}
+				}
+				/* right bottom back corner */
+				if (bnd.bac==4) {
+					iyo = mod.ieXy;
+					iye = mod.ieXy+bnd.ntap;
+					iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+											c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+			
+								vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iy-iby)*bnd.ntap*bnd.ntap+(ix-ibx)*bnd.ntap+(iz-ibz)];
+							}
+						}
+					}
+				}
+			}
+
+			ixo = mod.ioXx;
+			ixe = mod.ieXx;
+			iyo = mod.ioXy;
+			iye = mod.ieXy;
+			izo = mod.ieXz;
+			ize = mod.ieXz+bnd.ntap;
+
+			/* left bottom corner */
+			if (bnd.lef==4) {
+				ixo = mod.ioXx-bnd.ntap;
+				ixe = mod.ioXx;
+				ibz = (izo);
+				ibx = (bnd.ntap+ixo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+							
+							vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(ibx-ix)*bnd.ntap+(iz-ibz)];
+						}
+					}
+				}
+				/* left bottom front corner */
+				if (bnd.fro==4) {
+					iyo = mod.ioXy-bnd.ntap;
+					iye = mod.ioXy;
+					iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+											c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+			
+								vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iby-iy)*bnd.ntap*bnd.ntap+(ibx-ix)*bnd.ntap+(iz-ibz)];
+							}
+						}
+					}
+				}
+				/* left bottom back corner */
+				if (bnd.bac==4) {
+					iyo = mod.ieXy;
+					iye = mod.ieXy+bnd.ntap;
+					iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+											c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+			
+								vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iy-iby)*bnd.ntap*bnd.ntap+(ibx-ix)*bnd.ntap+(iz-ibz)];
+							}
+						}
+					}
+				}
+			}
+			/* front bottom corner */
+			if (bnd.fro==4) {
+				ixo = mod.ioXx;
+				ixe = mod.ieXx;
+				iyo = mod.ioXy-bnd.ntap;
+				iye = mod.ioXy;
+				ibz = (izo);
+				iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+							vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iby-iy)*bnd.ntap+(iz-ibz)];
+						}
+					}
+				}
+			}
+			/* Back bottom corner */
+			if (bnd.bac==4) {
+				iyo = mod.ieXy;
+				iye = mod.ioXy+bnd.ntap;
+				ibz = (izo);
+				iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+							vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iy-iby)*bnd.ntap+(iz-ibz)];
+						}
+					}
+				}
+			}
+
+			/* Vy field */
+			ixo = mod.ioYx;
+			ixe = mod.ieYx;
+			iyo = mod.ioYy;
+			iye = mod.ieYy;
+			izo = mod.ioYz;
+			ize = mod.ioYz+bnd.ntap;
+	
+			ib = (bnd.ntap+izo-1);
+#pragma omp for private(ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+
+						vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapy[iz-ib];
+					}
+				}
+			}
+			/* right bottom corner */
+			if (bnd.rig==4) {
+				ixo = mod.ieYx;
+				ixe = ixo+bnd.ntap;
+				ibz = (izo);
+				ibx = (ixo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+							vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(ix-ibx)*bnd.ntap+(iz-ibz)];
+						}
+					}
+				}
+				/* right bottom front corner */
+				if (bnd.fro==4) {
+					iyo = mod.ioYy-bnd.ntap;
+					iye = mod.ioYy;
+					iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+											c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+			
+								vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iby-iy)*bnd.ntap*bnd.ntap+(ix-ibx)*bnd.ntap+(iz-ibz)];
+							}
+						}
+					}
+				}
+				/* right bottom back corner */
+				if (bnd.bac==4) {
+					iyo = mod.ieYy;
+					iye = mod.ieYy+bnd.ntap;
+					iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+											c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+			
+								vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iy-iby)*bnd.ntap*bnd.ntap+(ix-ibx)*bnd.ntap+(iz-ibz)];
+							}
+						}
+					}
+				}
+			}
+			ixo = mod.ioYx;
+			ixe = mod.ieYx;
+			iyo = mod.ioYy;
+			iye = mod.ieYy;
+			izo = mod.ioYz;
+			ize = mod.ioYz+bnd.ntap;
+
+			/* left bottom corner */
+			if (bnd.lef==4) {
+				ixo = mod.ioYx-bnd.ntap;
+				ixe = mod.ioYx;
+				ibz = (izo);
+				ibx = (bnd.ntap+ixo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+							
+							vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(ibx-ix)*bnd.ntap+(iz-ibz)];
+						}
+					}
+				}
+				/* left bottom front corner */
+				if (bnd.fro==4) {
+					iyo = mod.ioYy-bnd.ntap;
+					iye = mod.ioYy;
+					iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+											c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+			
+								vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iby-iy)*bnd.ntap*bnd.ntap+(ibx-ix)*bnd.ntap+(iz-ibz)];
+							}
+						}
+					}
+				}
+				/* left bottom back corner */
+				if (bnd.bac==4) {
+					iyo = mod.ieYy;
+					iye = mod.ieYy+bnd.ntap;
+					iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+											c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+			
+								vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iy-iby)*bnd.ntap*bnd.ntap+(ibx-ix)*bnd.ntap+(iz-ibz)];
+							}
+						}
+					}
+				}
+			}
+			/* front bottom corner */
+			if (bnd.fro==4) {
+				ixo = mod.ioXx;
+				ixe = mod.ieXx;
+				iyo = mod.ioXy-bnd.ntap;
+				iye = mod.ioXy;
+				ibz = (izo);
+				iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+							vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iby-iy)*bnd.ntap+(iz-ibz)];
+						}
+					}
+				}
+			}
+			/* Back bottom corner */
+			if (bnd.bac==4) {
+				iyo = mod.ieXy;
+				iye = mod.ioXy+bnd.ntap;
+				ibz = (izo);
+				iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+							vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iy-iby)*bnd.ntap+(iz-ibz)];
+						}
+					}
+				}
+			}
+
+			/* Vz field */
+			ixo = mod.ioZx;
+			ixe = mod.ieZx;
+			iyo = mod.ioZy;
+			iye = mod.ieZy;
+			izo = mod.ieZz;
+			ize = mod.ieZz+bnd.ntap;
+			
+			ib = (ize-bnd.ntap);
+#pragma omp for private (ix, iz) 
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iz=izo; iz<ize; iz++) {
+					vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+								c1*(tzz[iy*n1*n2+ix*n1+iz]   - tzz[iy*n1*n2+ix*n1+iz-1]) +
+								c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+					vz[iy*n1*n2+ix*n1+iz] *= bnd.tapz[iz-ib];
+				}
+			}
+			/* right bottom corner */
+			if (bnd.rig==4) {
+				ixo = mod.ieZx;
+				ixe = ixo+bnd.ntap;
+				ibz = (izo);
+				ibx = (ixo);
+#pragma omp for private(ix,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iz=izo; iz<ize; iz++) {
+						vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]   - tzz[iy*n1*n2+ix*n1+iz-1]) +
+									c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+						
+						vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(ix-ibx)*bnd.ntap+(iz-ibz)];
+					}
+				}
+				/* right bottom front corner */
+				if (bnd.fro==4) {
+					iyo = mod.ioYy-bnd.ntap;
+					iye = mod.ioYy;
+					iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+											c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+			
+								vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iby-iy)*bnd.ntap*bnd.ntap+(ix-ibx)*bnd.ntap+(iz-ibz)];
+							}
+						}
+					}
+				}
+				/* right bottom back corner */
+				if (bnd.bac==4) {
+					iyo = mod.ieYy;
+					iye = mod.ieYy+bnd.ntap;
+					iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+											c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+			
+								vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iy-iby)*bnd.ntap*bnd.ntap+(ix-ibx)*bnd.ntap+(iz-ibz)];
+							}
+						}
+					}
+				}
+			}
+			/* left bottom corner */
+			if (bnd.lef==4) {
+				ixo = mod.ioZx-bnd.ntap;
+				ixe = mod.ioZx;
+				ibz = (izo);
+				ibx = (bnd.ntap+ixo-1);
+#pragma omp for private(ix,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iz=izo; iz<ize; iz++) {
+						vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]   - tzz[iy*n1*n2+ix*n1+iz-1]) +
+									c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+						
+						vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(ibx-ix)*bnd.ntap+(iz-ibz)];
+					}
+				}
+				/* left bottom front corner */
+				if (bnd.fro==4) {
+					iyo = mod.ioYy-bnd.ntap;
+					iye = mod.ioYy;
+					iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+											c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+			
+								vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iby-iy)*bnd.ntap*bnd.ntap+(ibx-ix)*bnd.ntap+(iz-ibz)];
+							}
+						}
+					}
+				}
+				/* left bottom back corner */
+				if (bnd.bac==4) {
+					iyo = mod.ieYy;
+					iye = mod.ieYy+bnd.ntap;
+					iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+					for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+						for (iy=iyo; iy<iye; iy++) {
+							for (iz=izo; iz<ize; iz++) {
+								vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+											c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+											c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+			
+								vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxyz[(iy-iby)*bnd.ntap*bnd.ntap+(ibx-ix)*bnd.ntap+(iz-ibz)];
+							}
+						}
+					}
+				}
+			}
+			/* front bottom corner */
+			if (bnd.fro==4) {
+				ixo = mod.ioXx;
+				ixe = mod.ieXx;
+				iyo = mod.ioXy-bnd.ntap;
+				iye = mod.ioXy;
+				ibz = (izo);
+				iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+										c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+		
+							vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iby-iy)*bnd.ntap+(iz-ibz)];
+						}
+					}
+				}
+			}
+			/* Back bottom corner */
+			if (bnd.bac==4) {
+				iyo = mod.ieXy;
+				iye = mod.ioXy+bnd.ntap;
+				ibz = (izo);
+				iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+										c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+		
+							vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iy-iby)*bnd.ntap+(iz-ibz)];
+						}
+					}
+				}
+			}
+			
+		}
+	}
+	
+	/*********/
+	/* Left  */
+	/*********/
+	if (bnd.lef==4) {
+		
+		if (mod.ischeme <= 2) { /* Acoustic scheme */
+			
+			/* Vx field */
+			ixo = mod.ioXx-bnd.ntap;
+			ixe = mod.ioXx;
+			iyo = mod.ioXy;
+			iye = mod.ieXy;
+			izo = mod.ioXz;
+			ize = mod.ieXz;
+			
+			ib = (bnd.ntap+ixo-1);
+#pragma omp for private(ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+									c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+						
+						vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapx[ib-ix];
+					}
+				}
+			}
+			/* left front corner */
+			if (bnd.fro==4) {
+				iyo = mod.ioXy-bnd.ntap;
+				iye = mod.ioXy;
+				ibx = (bnd.ntap+ixo-1);
+				iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+							vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iby-iy)*bnd.ntap+(ibx-ix)];
+						}
+					}
+				}
+			}
+			/* left back corner */
+			if (bnd.bac==4) {
+				iyo = mod.ieXy;
+				iye = mod.ieXy+bnd.ntap;
+				ibx = (bnd.ntap+ixo-1);
+				iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+							vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iy-iby)*bnd.ntap+(ibx-ix)];
+						}
+					}
+				}
+			}
+
+			/* Vy field */
+			ixo = mod.ioYx-bnd.ntap;
+			ixe = mod.ioYx;
+			iyo = mod.ioYy;
+			iye = mod.ieYy;
+			izo = mod.ioYz;
+			ize = mod.ieYz;
+			
+			ib = (bnd.ntap+ixo-1);
+#pragma omp for private(ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+									c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+						
+						vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapx[ib-ix];
+					}
+				}
+			}
+			/* left front corner */
+			if (bnd.fro==4) {
+				iyo = mod.ioYy-bnd.ntap;
+				iye = mod.ioYy;
+				ibx = (bnd.ntap+ixo-1);
+				iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+							vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iby-iy)*bnd.ntap+(ibx-ix)];
+						}
+					}
+				}
+			}
+			/* left back corner */
+			if (bnd.bac==4) {
+				iyo = mod.ieYy;
+				iye = mod.ieYy+bnd.ntap;
+				ibx = (bnd.ntap+ixo-1);
+				iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+							vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iy-iby)*bnd.ntap+(ibx-ix)];
+						}
+					}
+				}
+			}
+			
+			/* Vz field */
+			ixo = mod.ioZx-bnd.ntap;
+			ixe = mod.ioZx;
+			iyo = mod.ioZy;
+			iye = mod.ieZy;
+			izo = mod.ioZz;
+			ize = mod.ieZz;
+
+			ib = (bnd.ntap+ixo-1);
+#pragma omp for private (ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]   - tzz[iy*n1*n2+ix*n1+iz-1]) +
+									c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+						
+						vz[iy*n1*n2+ix*n1+iz] *= bnd.tapz[ib-ix];
+					}
+				}
+			}
+			/* left front corner */
+			if (bnd.fro==4) {
+				iyo = mod.ioZy-bnd.ntap;
+				iye = mod.ioZy;
+				ibx = (bnd.ntap+ixo-1);
+				iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+										c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+		
+							vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iby-iy)*bnd.ntap+(ibx-ix)];
+						}
+					}
+				}
+			}
+			/* left back corner */
+			if (bnd.bac==4) {
+				iyo = mod.ieZy;
+				iye = mod.ieZy+bnd.ntap;
+				ibx = (bnd.ntap+ixo-1);
+				iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+										c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+		
+							vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iy-iby)*bnd.ntap+(ibx-ix)];
+						}
+					}
+				}
+			}
+
+		}
+		
+	}
+
+	/*********/
+	/* Right */
+	/*********/
+	if (bnd.rig==4) {
+		
+		if (mod.ischeme <= 2) { /* Acoustic scheme */
+			
+			/* Vx field */
+			ixo = mod.ieXx;
+			ixe = mod.ieXx+bnd.ntap;
+			iyo = mod.ioXy;
+			iye = mod.ieXy;
+			izo = mod.ioXz;
+			ize = mod.ieXz;
+		
+			ib = (ixe-bnd.ntap);
+#pragma omp for private(ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+									c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+						vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapx[ix-ib];
+					}
+				}
+			}
+			/* left front corner */
+			if (bnd.fro==4) {
+				iyo = mod.ioXy-bnd.ntap;
+				iye = mod.ioXy;
+				ibx = (ixo);
+				iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+							vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iby-iy)*bnd.ntap+(ix-ibx)];
+						}
+					}
+				}
+			}
+			/* left back corner */
+			if (bnd.bac==4) {
+				iyo = mod.ieXy;
+				iye = mod.ieXy+bnd.ntap;
+				ibx = (ixo);
+				iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+										c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+							vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iy-iby)*bnd.ntap+(ix-ibx)];
+						}
+					}
+				}
+			}
+
+			/* Vy field */
+			ixo = mod.ieYx;
+			ixe = mod.ieYx+bnd.ntap;
+			iyo = mod.ioYy;
+			iye = mod.ieYy;
+			izo = mod.ioYz;
+			ize = mod.ieYz;
+		
+			ib = (ixe-bnd.ntap);
+#pragma omp for private(ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+									c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+						vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapx[ix-ib];
+					}
+				}
+			}
+			/* left front corner */
+			if (bnd.fro==4) {
+				iyo = mod.ioYy-bnd.ntap;
+				iye = mod.ioYy;
+				ibx = (ixo);
+				iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+							vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iby-iy)*bnd.ntap+(ix-ibx)];
+						}
+					}
+				}
+			}
+			/* left back corner */
+			if (bnd.bac==4) {
+				iyo = mod.ieYy;
+				iye = mod.ieYy+bnd.ntap;
+				ibx = (ixo);
+				iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+										c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+							vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iy-iby)*bnd.ntap+(ix-ibx)];
+						}
+					}
+				}
+			}
+
+			/* Vz field */
+			ixo = mod.ieZx;
+			ixe = mod.ieZx+bnd.ntap;
+			iyo = mod.ioZy;
+			iye = mod.ieZy;
+			izo = mod.ioZz;
+			ize = mod.ieZz;
+			
+			ib = (ixe-bnd.ntap);
+#pragma omp for private (ix,iy,iz) 
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]   - tzz[iy*n1*n2+ix*n1+iz-1]) +
+									c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+		
+						vz[iy*n1*n2+ix*n1+iz] *= bnd.tapz[ix-ib];
+					}
+				}
+			}
+			/* left front corner */
+			if (bnd.fro==4) {
+				iyo = mod.ioZy-bnd.ntap;
+				iye = mod.ioZy;
+				ibx = (ixo);
+				iby = (bnd.ntap+iyo-1);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+										c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+		
+							vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iby-iy)*bnd.ntap+(ix-ibx)];
+						}
+					}
+				}
+			}
+			/* left back corner */
+			if (bnd.bac==4) {
+				iyo = mod.ieZy;
+				iye = mod.ieZy+bnd.ntap;
+				ibx = (ixo);
+				iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+				for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+					for (iy=iyo; iy<iye; iy++) {
+						for (iz=izo; iz<ize; iz++) {
+							vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+										c1*(tzz[iy*n1*n2+ix*n1+iz]	 - tzz[iy*n1*n2+ix*n1+iz-1]) +
+										c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+		
+							vz[iy*n1*n2+ix*n1+iz]   *= bnd.tapxz[(iy-iby)*bnd.ntap+(ix-ibx)];
+						}
+					}
+				}
+			}
+		
+		}
+		
+	}
+
+	/*********/
+	/* Front */
+	/*********/
+	if (bnd.fro==4) {
+		
+		if (mod.ischeme <= 2) { /* Acoustic scheme */
+			
+			/* Vx field */
+			ixo = mod.ioXx;
+			ixe = mod.ieXx;
+			iyo = mod.ioXy-bnd.ntap;
+			iye = mod.ioXy;
+			izo = mod.ioXz;
+			ize = mod.ieXz;
+		
+			iby = (bnd.ntap+iyo+1);
+#pragma omp for private(ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+									c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+						vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapy[iby-iy];
+					}
+				}
+			}
+
+			/* Vy field */
+			ixo = mod.ioYx;
+			ixe = mod.ieYx;
+			iyo = mod.ioYy-bnd.ntap;
+			iye = mod.ioYy;
+			izo = mod.ioYz;
+			ize = mod.ieYz;
+		
+			iby = (bnd.ntap+iyo+1);
+#pragma omp for private(ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+									c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+						vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapy[iby-iy];
+					}
+				}
+			}
+
+			/* Vz field */
+			ixo = mod.ioZx;
+			ixe = mod.ieZx;
+			iyo = mod.ioZy-bnd.ntap;
+			iye = mod.ioZy;
+			izo = mod.ioZz;
+			ize = mod.ieZz;
+			
+			iby = (bnd.ntap+iyo+1);
+#pragma omp for private (ix,iy,iz) 
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]   - tzz[iy*n1*n2+ix*n1+iz-1]) +
+									c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+		
+						vz[iy*n1*n2+ix*n1+iz] *= bnd.tapy[iby-iy];
+					}
+				}
+			}
+		}
+		
+	}
+
+	/********/
+	/* Back */
+	/********/
+	if (bnd.bac==4) {
+		
+		if (mod.ischeme <= 2) { /* Acoustic scheme */
+			
+			/* Vx field */
+			ixo = mod.ioXx;
+			ixe = mod.ieXx;
+			iyo = mod.ieXy;
+			iye = mod.ieXy+bnd.ntap;
+			izo = mod.ioXz;
+			ize = mod.ieXz;
+		
+			iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vx[iy*n1*n2+ix*n1+iz] -= rox[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+									c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+		
+						vx[iy*n1*n2+ix*n1+iz]   *= bnd.tapy[iy-iby];
+					}
+				}
+			}
+
+			/* Vy field */
+			ixo = mod.ioYx;
+			ixe = mod.ieYx;
+			iyo = mod.ieYy;
+			iye = mod.ieYy+bnd.ntap;
+			izo = mod.ioYz;
+			ize = mod.ieYz;
+		
+			iby = (iyo);
+#pragma omp for private(ix,iy,iz)
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vy[iy*n1*n2+ix*n1+iz] -= roy[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]	   - tzz[(iy-1)*n1*n2+ix*n1+iz]) +
+									c2*(tzz[(iy+1)*n1*n2+ix*n1+iz] - tzz[(iy-2)*n1*n2+ix*n1+iz]));
+		
+						vy[iy*n1*n2+ix*n1+iz]   *= bnd.tapy[iy-iby];
+					}
+				}
+			}
+
+			/* Vz field */
+			ixo = mod.ioZx;
+			ixe = mod.ieZx;
+			iyo = mod.ioZy;
+			iye = mod.ieZy+bnd.ntap;
+			izo = mod.ioZz;
+			ize = mod.ieZz;
+			
+			iby = (iyo);
+#pragma omp for private (ix,iy,iz) 
+			for (ix=ixo; ix<ixe; ix++) {
+#pragma ivdep
+				for (iy=iyo; iy<iye; iy++) {
+					for (iz=izo; iz<ize; iz++) {
+						vz[iy*n1*n2+ix*n1+iz] -= roz[iy*n1*n2+ix*n1+iz]*(
+									c1*(tzz[iy*n1*n2+ix*n1+iz]   - tzz[iy*n1*n2+ix*n1+iz-1]) +
+									c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+		
+						vz[iy*n1*n2+ix*n1+iz] *= bnd.tapy[iy-iby];
+					}
+				}
+			}
+		}
+		
+	}
+
+    if ( (npml != 0) && (itime==mod.nt-1) && pml) {
+#pragma omp master
+{
+		if (allocated) {
+            free(Vxpml);
+        	free(Vzpml);
+        	free(sigmu);
+        	free(RA);
+			allocated=0;
+		}
+}
+	}
+
+	return 0;
+} 
+
+long boundariesV3D(modPar mod, bndPar bnd, float *vx, float *vy, float *vz, float *tzz, float *tyy, float *txx, float *txz, float *txy, float *tyz, float *rox, float *roy, float *roz, float *l2m, float *lam, float *mul, long itime, long verbose)
+{
+/*********************************************************************
+	 
+	AUTHOR:
+	Jan Thorbecke (janth@xs4all.nl)
+	 The Netherlands 
+	 
+***********************************************************************/
+
+	float c1, c2;
+	float dp, dvx, dvy, dvz;
+	long   ix, iy, iz, ixs, iys, izs, izp, ib;
+    long   nx, ny, nz, n1, n2, n3;
+	long   is0, isrc;
+	long   ixo, ixe, iyo, iye, izo, ize;
+    long   npml, ipml, ipml2, pml;
+    float kappu, alphu, sigmax, R, a, m, fac, dx, dy, dt;
+    float *p;
+    static float *Pxpml, *Pypml, *Pzpml, *sigmu, *RA;
+	static long allocated=0;
+    float Jx, Jy, Jz, rho, d;
+    
+    c1 = 9.0/8.0;
+    c2 = -1.0/24.0;
+    nx  = mod.nx;
+    ny  = mod.ny;
+    nz  = mod.nz;
+    n1  = mod.naz;
+    n2  = mod.nax;
+    n3  = mod.nay;
+    dx  = mod.dx;
+    dy  = mod.dy;
+    dt  = mod.dt;
+    fac = dt/dx;
+    if ( (bnd.top==2) || (bnd.bot==2) || (bnd.lef==2) || (bnd.rig==2) ) pml=1;
+	else pml=0;
+
+/************************************************************/
+/* PML boundaries for acoustic schemes                      */
+/* compute all field values in tapered areas				*/
+/************************************************************/	
+   
+    npml=bnd.npml; /* lenght of pml in grid-points */
+    if ( (npml != 0) && (itime==0) && pml) {
+#pragma omp master
+{
+		if (allocated) {
+            free(Pxpml);
+            free(Pypml);
+        	free(Pzpml);
+        	free(sigmu);
+        	free(RA);
+		}
+        Pxpml = (float *)calloc(2*n1*n3*npml,sizeof(float));
+        Pypml = (float *)calloc(2*n2*n1*npml,sizeof(float));
+        Pzpml = (float *)calloc(2*n3*n2*npml,sizeof(float));
+        sigmu = (float *)calloc(npml,sizeof(float));
+        RA    = (float *)calloc(npml,sizeof(float));
+		allocated = 1;
+        
+        /* calculate sigmu and RA only once with fixed velocity Cp */
+        m=bnd.m; /* scaling order */
+        R=bnd.R; /* the theoretical reflection coefficient after discretization */
+        kappu = 1.0; /* auxiliary attenuation coefficient for small angles */
+        alphu=0.0; /* auxiliary attenuation coefficient  for low frequencies */
+        d = (npml-1)*dx; /* depth of pml */
+        /* sigmu attenuation factor representing the loss in the PML depends on the grid position in the PML */
+        
+        sigmax = ((3.0*mod.cp_min)/(2.0*d))*log(1.0/R);
+        for (ib=0; ib<npml; ib++) { /* ib=0 interface between PML and interior */
+            a = (float) (ib/(npml-1.0));
+            sigmu[ib] = sigmax*pow(a,m);
+            RA[ib] = (1.0)/(1.0+0.5*dt*sigmu[ib]);
+        }
+}
+    }
+
+#pragma omp barrier
+    if (mod.ischeme == 1 && pml) { /* Acoustic scheme PML's */
+        p = tzz; /* Tzz array pointer points to P-field */
+        
+        if (bnd.top==2) mod.ioPz += bnd.npml;
+        if (bnd.bot==2) mod.iePz -= bnd.npml;
+        if (bnd.lef==2) mod.ioPx += bnd.npml;
+        if (bnd.rig==2) mod.iePx -= bnd.npml;
+        if (bnd.fro==2) mod.ioPy += bnd.npml;
+        if (bnd.bac==2) mod.iePy -= bnd.npml;
+
+        /* PML top P */
+        if (bnd.top == 2) {
+            /* PML top P-Vz-component */
+#pragma omp for private (ix, iy, iz, dvx, dvy, dvz, Jz, ipml) 
+			for (iy=mod.ioPy; iy<mod.iePy; iy++) {
+				ipml2 = npml-1;
+				for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+					ipml = npml-1;
+					for (iz=mod.ioPz-npml; iz<mod.ioPz; iz++) {
+						dvx = c1*(vx[iy*n2*n1+(ix+1)*n1+iz] - vx[iy*n2*n1+ix*n1+iz]) +
+							  c2*(vx[iy*n2*n1+(ix+2)*n1+iz] - vx[iy*n2*n1+(ix-1)*n1+iz]);
+						dvy = c1*(vy[(iy+1)*n2*n1+ix*n1+iz] - vy[iy*n2*n1+ix*n1+iz]) +
+							  c2*(vy[(iy+2)*n2*n1+ix*n1+iz] - vy[(iy-1)*n2*n1+ix*n1+iz]);
+						dvz = c1*(vz[iy*n2*n1+ix*n1+iz+1]   - vz[iy*n2*n1+ix*n1+iz]) +
+							  c2*(vz[iy*n2*n1+ix*n1+iz+2]   - vz[iy*n2*n1+ix*n1+iz-1]);
+						Jz = RA[ipml2]*RA[ipml]*dvz - RA[ipml2]*RA[ipml]*dt*Pzpml[ix*npml+ipml];
+						Pzpml[ix*npml+ipml] += sigmu[ipml]*Jz;
+						p[ix*n1+iz] -= l2m[ix*n1+iz]*(Jz+dvx);
+						ipml--;
+					}
+				}
+			}
+        }
+        
+        /* PML left P */
+        if (bnd.lef == 2) {
+            /* PML left P-Vx-component */
+#pragma omp for private (ix, iz, dvx, dvz, Jx, ipml) 
+            for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+                ipml = npml-1;
+                for (ix=mod.ioPx-npml; ix<mod.ioPx; ix++) {
+                    dvx = c1*(vx[(ix+1)*n1+iz] - vx[ix*n1+iz]) +
+                          c2*(vx[(ix+2)*n1+iz] - vx[(ix-1)*n1+iz]);
+                    dvz = c1*(vz[ix*n1+iz+1]   - vz[ix*n1+iz]) +
+                          c2*(vz[ix*n1+iz+2]   - vz[ix*n1+iz-1]);
+                    Jx = RA[ipml]*dvx - RA[ipml]*dt*Pxpml[iz*npml+ipml];
+                    Pxpml[iz*npml+ipml] += sigmu[ipml]*Jx;
+                    p[ix*n1+iz] -= l2m[ix*n1+iz]*(Jx+dvz);
+                    ipml--;
+                }
+            }
+        }
+        
+        /* PML corner left-top P */
+        if (bnd.lef == 2 && bnd.top == 2) {
+            /* PML left P-Vx-component */
+#pragma omp for private (ix, iz, dvx, Jx, ipml) 
+            for (iz=mod.ioPz-npml; iz<mod.ioPz; iz++) {
+                ipml = npml-1;
+                for (ix=mod.ioPx-npml; ix<mod.ioPx; ix++) {
+                    dvx = c1*(vx[(ix+1)*n1+iz] - vx[ix*n1+iz]) +
+                          c2*(vx[(ix+2)*n1+iz] - vx[(ix-1)*n1+iz]);
+                    Jx = RA[ipml]*dvx - RA[ipml]*dt*Pxpml[iz*npml+ipml];
+                    Pxpml[iz*npml+ipml] += sigmu[ipml]*Jx;
+                    p[ix*n1+iz] -= l2m[ix*n1+iz]*(Jx);
+                    ipml--;
+                }
+            }
+            /* PML top P-Vz-component */
+#pragma omp for private (ix, iz, dvz, Jz, ipml) 
+            for (ix=mod.ioPx-npml; ix<mod.ioPx; ix++) {
+                ipml = npml-1;
+                for (iz=mod.ioPz-npml; iz<mod.ioPz; iz++) {
+                    dvz = c1*(vz[ix*n1+iz+1]   - vz[ix*n1+iz]) +
+                          c2*(vz[ix*n1+iz+2]   - vz[ix*n1+iz-1]);
+                    Jz = RA[ipml]*dvz - RA[ipml]*dt*Pzpml[ix*npml+ipml];
+                    Pzpml[ix*npml+ipml] += sigmu[ipml]*Jz;
+                    p[ix*n1+iz] -= l2m[ix*n1+iz]*(Jz);
+                    ipml--;
+                }
+            }
+        }
+        
+        /* PML right P */
+        if (bnd.rig == 2) {
+            /* PML right P Vx-component */
+#pragma omp for private (ix, iz, dvx, dvz, Jx, ipml) 
+            for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+                ipml = 0;
+                for (ix=mod.iePx; ix<mod.iePx+npml; ix++) {
+                    dvx = c1*(vx[(ix+1)*n1+iz] - vx[ix*n1+iz]) +
+                          c2*(vx[(ix+2)*n1+iz] - vx[(ix-1)*n1+iz]);
+                    dvz = c1*(vz[ix*n1+iz+1]   - vz[ix*n1+iz]) +
+                          c2*(vz[ix*n1+iz+2]   - vz[ix*n1+iz-1]);
+                    Jx = RA[ipml]*dvx - RA[ipml]*dt*Pxpml[n1*npml+iz*npml+ipml];
+                    Pxpml[n1*npml+iz*npml+ipml] += sigmu[ipml]*Jx;
+                    p[ix*n1+iz] -= l2m[ix*n1+iz]*(Jx+dvz);
+                    ipml++;
+                }
+            }
+        }
+        
+        /* PML corner right-top P */
+        if (bnd.rig == 2 && bnd.top == 2) {
+            /* PML right P Vx-component */
+#pragma omp for private (ix, iz, dvx, Jx, ipml) 
+            for (iz=mod.ioPz-npml; iz<mod.ioPz; iz++) {
+                ipml = 0;
+                for (ix=mod.iePx; ix<mod.iePx+npml; ix++) {
+                    dvx = c1*(vx[(ix+1)*n1+iz] - vx[ix*n1+iz]) +
+                          c2*(vx[(ix+2)*n1+iz] - vx[(ix-1)*n1+iz]);
+                    Jx = RA[ipml]*dvx - RA[ipml]*dt*Pxpml[n1*npml+iz*npml+ipml];
+                    Pxpml[n1*npml+iz*npml+ipml] += sigmu[ipml]*Jx;
+                    p[ix*n1+iz] -= l2m[ix*n1+iz]*(Jx);
+                    ipml++;
+                }
+            }
+            /* PML top P-Vz-component */
+#pragma omp for private (ix, iz, dvz, Jz, ipml) 
+            for (ix=mod.iePx; ix<mod.iePx+npml; ix++) {
+                ipml = npml-1;
+                for (iz=mod.ioPz-npml; iz<mod.ioPz; iz++) {
+                    dvz = c1*(vz[ix*n1+iz+1]   - vz[ix*n1+iz]) +
+                          c2*(vz[ix*n1+iz+2]   - vz[ix*n1+iz-1]);
+                    Jz = RA[ipml]*dvz - RA[ipml]*dt*Pzpml[ix*npml+ipml];
+                    Pzpml[ix*npml+ipml] += sigmu[ipml]*Jz;
+                    p[ix*n1+iz] -= l2m[ix*n1+iz]*(Jz);
+                    ipml--;
+                }
+            }
+        }
+        
+        /* PML bottom P */
+        if (bnd.bot == 2) {
+            /* PML bottom P Vz-component */
+#pragma omp for private (ix, iz, dvx, dvz, Jz, ipml)
+            for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+                ipml = 0;
+                for (iz=mod.iePz; iz<mod.iePz+npml; iz++) {
+                    dvx = c1*(vx[(ix+1)*n1+iz] - vx[ix*n1+iz]) +
+                          c2*(vx[(ix+2)*n1+iz] - vx[(ix-1)*n1+iz]);
+                    dvz = c1*(vz[ix*n1+iz+1]   - vz[ix*n1+iz]) +
+                          c2*(vz[ix*n1+iz+2]   - vz[ix*n1+iz-1]);
+                    Jz = RA[ipml]*dvz - RA[ipml]*dt*Pzpml[n2*npml+ix*npml+ipml];
+                    Pzpml[n2*npml+ix*npml+ipml] += sigmu[ipml]*Jz;
+                    p[ix*n1+iz] -= l2m[ix*n1+iz]*(Jz+dvx);
+                    ipml++;
+                }
+            }
+        }
+        
+        /* PML corner bottom-right P */
+        if (bnd.bot == 2 && bnd.rig == 2) {
+            /* PML bottom P Vz-component */
+#pragma omp for private (ix, iz, dvz, Jz, ipml)
+            for (ix=mod.iePx; ix<mod.iePx+npml; ix++) {
+                ipml = 0;
+                for (iz=mod.iePz; iz<mod.iePz+npml; iz++) {
+                    dvz = c1*(vz[ix*n1+iz+1]   - vz[ix*n1+iz]) +
+                          c2*(vz[ix*n1+iz+2]   - vz[ix*n1+iz-1]);
+                    Jz = RA[ipml]*dvz - RA[ipml]*dt*Pzpml[n2*npml+ix*npml+ipml];
+                    Pzpml[n2*npml+ix*npml+ipml] += sigmu[ipml]*Jz;
+                    p[ix*n1+iz] -= l2m[ix*n1+iz]*(Jz);
+                    ipml++;
+                }
+            }
+            /* PML right P Vx-component */
+#pragma omp for private (ix, iz, dvx, Jx, ipml)
+            for (iz=mod.iePz; iz<mod.iePz+npml; iz++) {
+                ipml = 0;
+                for (ix=mod.iePx; ix<mod.iePx+npml; ix++) {
+                    dvx = c1*(vx[(ix+1)*n1+iz] - vx[ix*n1+iz]) +
+                          c2*(vx[(ix+2)*n1+iz] - vx[(ix-1)*n1+iz]);
+                    Jx = RA[ipml]*dvx - RA[ipml]*dt*Pxpml[n1*npml+iz*npml+ipml];
+                    Pxpml[n1*npml+iz*npml+ipml] += sigmu[ipml]*Jx;
+                    p[ix*n1+iz] -= l2m[ix*n1+iz]*(Jx);
+                    //p[ix*n1+iz] -= l2m[ix*n1+iz]*(dvx);
+                    ipml++;
+                }
+            }
+        }
+        
+        /* PML corner left-bottom P */
+        if (bnd.bot == 2 && bnd.lef == 2) {
+            /* PML bottom P Vz-component */
+#pragma omp for private (ix, iz, dvz, Jz, ipml)
+            for (ix=mod.ioPx-npml; ix<mod.ioPx; ix++) {
+                ipml = 0;
+                for (iz=mod.iePz; iz<mod.iePz+npml; iz++) {
+                    dvz = c1*(vz[ix*n1+iz+1]   - vz[ix*n1+iz]) +
+                          c2*(vz[ix*n1+iz+2]   - vz[ix*n1+iz-1]);
+                    Jz = RA[ipml]*dvz - RA[ipml]*dt*Pzpml[n2*npml+ix*npml+ipml];
+                    Pzpml[n2*npml+ix*npml+ipml] += sigmu[ipml]*Jz;
+                    p[ix*n1+iz] -= l2m[ix*n1+iz]*(Jz);
+                    ipml++;
+                }
+            }
+            /* PML left P Vx-component */
+#pragma omp for private (ix, iz, dvx, Jx, ipml)
+            for (iz=mod.iePz; iz<mod.iePz+npml; iz++) {
+                ipml = npml-1;
+                for (ix=mod.ioPx-npml; ix<mod.ioPx; ix++) {
+                    dvx = c1*(vx[(ix+1)*n1+iz] - vx[ix*n1+iz]) +
+                          c2*(vx[(ix+2)*n1+iz] - vx[(ix-1)*n1+iz]);
+                    Jx = RA[ipml]*dvx - RA[ipml]*dt*Pxpml[iz*npml+ipml];
+                    Pxpml[iz*npml+ipml] += sigmu[ipml]*Jx;
+                    p[ix*n1+iz] -= l2m[ix*n1+iz]*(Jx);
+                    ipml--;
+                }
+            }
+        }
+        if (bnd.top==2) mod.ioPz -= bnd.npml;
+        if (bnd.bot==2) mod.iePz += bnd.npml;
+        if (bnd.lef==2) mod.ioPx -= bnd.npml;
+        if (bnd.rig==2) mod.iePx += bnd.npml;
+
+    } /* end acoustic PML */
+
+
+	
+/****************************************************************/	
+/* Free surface: calculate free surface conditions for stresses */
+/****************************************************************/
+
+	
+	ixo = mod.ioPx;
+	ixe = mod.iePx;
+	iyo = mod.ioPy;
+	iye = mod.iePy;
+	izo = mod.ioPz;
+	ize = mod.iePz;
+
+	if (mod.ischeme <= 2) { /* Acoustic scheme */
+		if (bnd.top==1) { /* free surface at top */
+#pragma omp	for private (ix,iy) nowait
+			for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+				for (iy=mod.ioPy; iy<mod.iePy; iy++) {
+					iz = bnd.surface[iy*n2+ix];
+					tzz[iy*n2*n1+ix*n1+iz] = 0.0;
+					//vz[ix*n1+iz] = -vz[ix*n1+iz+1];
+					//vz[ix*n1+iz-1] = -vz[ix*n1+iz+2];
+				}
+			}
+		}
+		if (bnd.rig==1) { /* free surface at right */
+#pragma omp	for private (iy,iz) nowait
+			for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+				for (iy=mod.ioPy; iy<mod.iePy; iy++) {
+					tzz[iy*n1*n2+(mod.iePx-1)*n1+iz] = 0.0;
+				}
+			}
+		}
+		if (bnd.fro==1) { /* free surface at front */
+#pragma omp	for private (ix,iz) nowait
+			for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+				for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+					tzz[(mod.ioPy-1)*n1*n2+ix*n1+iz] = 0.0;
+				}
+			}
+		}
+		if (bnd.bot==1) { /* free surface at bottom */
+#pragma omp	for private (ix,iy) nowait
+			for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+				for (iy=mod.ioPy; iy<mod.iePy; iy++) {
+					tzz[iy*n1*n2+ix*n1+mod.iePz-1] = 0.0;
+				}
+			}
+		}
+		if (bnd.lef==1) { /* free surface at left */
+#pragma omp	for private (iy,iz) nowait
+			for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+				for (iy=mod.ioPy; iy<mod.iePy; iy++) {
+					tzz[iy*n1*n2+(mod.ioPx-1)*n1+iz] = 0.0;
+				}
+			}
+		}
+		if (bnd.bac==1) { /* free surface at back */
+#pragma omp	for private (ix,iz) nowait
+			for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+				for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+					tzz[(mod.iePy-1)*n1*n2+ix*n1+iz] = 0.0;
+				}
+			}
+		}
+	}
+	
+    if ( (npml != 0) && (itime==mod.nt-1) && pml) {
+#pragma omp master
+{
+		if (allocated) {
+            free(Pxpml);
+			free(Pypml);
+        	free(Pzpml);
+        	free(sigmu);
+        	free(RA);
+            allocated=0;
+		}
+}
+	}
+
+	return 0;
+}
diff --git a/fdelmodc3D/fdelmodc3D.c b/fdelmodc3D/fdelmodc3D.c
index a79ede1973ad96f941f5eeb096f8486954c7a5d2..1bb3ed3fc890c62e4548ad1867f72114d52647a4 100644
--- a/fdelmodc3D/fdelmodc3D.c
+++ b/fdelmodc3D/fdelmodc3D.c
@@ -589,51 +589,41 @@ shared (shot, bnd, mod, src, wav, rec, ixsrc, iysrc, izsrc, it, src_nwav, verbos
 //						vx, vz, tzz, rox, roz, l2m, verbose);
 //					break;
 				case -1 : /* Acoustic dissipative media FD kernel */
-					acoustic4_qr(mod, src, wav, bnd, it, ixsrc, izsrc, src_nwav, 
-						vx, vz, tzz, rox, roz, l2m, verbose);
+					vmess("Acoustic dissipative not yet available");
 					break;
 				case 1 : /* Acoustic FD kernel */
 					if (mod.iorder==2) {
-						acoustic2(mod, src, wav, bnd, it, ixsrc, izsrc, src_nwav, 
-							vx, vz, tzz, rox, roz, l2m, verbose);
+						vmess("Acoustic order 2 not yet available");
 					}
 					else if (mod.iorder==4) {
                         if (mod.sh) {
-                            acousticSH4(mod, src, wav, bnd, it, ixsrc, izsrc, src_nwav, 
-                                  vx, vz, tzz, rox, roz, l2m, verbose);
+                            vmess("SH order 4 not yet available");
                         }
                         else {
-                            acoustic4(mod, src, wav, bnd, it, ixsrc, izsrc, src_nwav, 
-                                      vx, vz, tzz, rox, roz, l2m, verbose);
+							acoustic4_3D(mod, src, wav, bnd, it, ixsrc, iysrc, izsrc, src_nwav,
+									vx, vy, vz, tzz, rox, roy, roz, l2m, verbose);
                         }
 					}
 					else if (mod.iorder==6) {
-						acoustic6(mod, src, wav, bnd, it, ixsrc, izsrc, src_nwav, 
-							vx, vz, tzz, rox, roz, l2m, verbose);
+						vmess("Acoustic order 6 not yet available");
 					}
 					break;
 				case 2 : /* Visco-Acoustic FD kernel */
-					viscoacoustic4(mod, src, wav, bnd, it, ixsrc, izsrc, src_nwav, 
-							vx, vz, tzz, rox, roz, l2m, tss, tep, q, verbose);
+						vmess("Visco-Acoustic order 4 not yet available");
 					break;
 				case 3 : /* Elastic FD kernel */
                     if (mod.iorder==4) {
-                        elastic4(mod, src, wav, bnd, it, ixsrc, izsrc, src_nwav, 
-                            vx, vz, tzz, txx, txz, rox, roz, l2m, lam, mul, verbose);
+						vmess("Elastic order 4 not yet available");
 					}
 					else if (mod.iorder==6) {
-                        elastic6(mod, src, wav, bnd, it, ixsrc, izsrc, src_nwav, 
-							vx, vz, tzz, txx, txz, rox, roz, l2m, lam, mul, verbose);
+						vmess("Elastic order 6 not yet available");
                     }
 					break;
 				case 4 : /* Visco-Elastic FD kernel */
-					viscoelastic4(mod, src, wav, bnd, it, ixsrc, izsrc, src_nwav, 
-						vx, vz, tzz, txx, txz, rox, roz, l2m, lam, mul, 
-						tss, tep, tes, r, q, p, verbose);
+						vmess("Visco-Elastic order 4 not yet available");
 					break;
 				case 5 : /* Elastic FD kernel with S-velocity set to zero*/
-                     elastic4dc(mod, src, wav, bnd, it, ixsrc, izsrc, src_nwav, 
-                            vx, vz, tzz, txx, txz, rox, roz, l2m, lam, mul, verbose);
+						vmess("DC-Elastic order 4 not yet available");
 					break;
 			}
 
@@ -649,29 +639,33 @@ shared (shot, bnd, mod, src, wav, rec, ixsrc, iysrc, izsrc, it, src_nwav, verbos
                 /* Note that time step it=0 (t=0 for t**-fields t=-1/2 dt for v*-field) is not recorded */
 				isam        = (it-rec.delay-itwritten)/rec.skipdt+1;
 				/* store time at receiver positions */
-				getRecTimes(mod, rec, bnd, it, isam, vx, vz, tzz, txx, txz, 
-					l2m, rox, roz, 
-					rec_vx, rec_vz, rec_txx, rec_tzz, rec_txz, 
+				getRecTimes3D(mod, rec, bnd, it, isam, vx, vy, vz,
+					tzz, tyy, txx, txz, txy, tyz,
+					l2m, rox, roy, roz,
+					rec_vx, rec_vy, rec_vz,
+					rec_txx, rec_tyy, rec_tzz, rec_txz, rec_txy, rec_tyz,
 					rec_p, rec_pp, rec_ss, rec_udp, rec_udvz, verbose);
 			
 				/* at the end of modeling a shot, write receiver array to output file(s) */
 				if (writeToFile && (it+rec.skipdt <= it1-1) ) {
 					fileno = ( ((it-rec.delay)/rec.skipdt)+1)/rec.nt;
-					writeRec(rec, mod, bnd, wav, ixsrc, izsrc, isam+1, ishot, fileno,
-						rec_vx, rec_vz, rec_txx, rec_tzz, rec_txz, 
-						rec_p, rec_pp, rec_ss, rec_udp, rec_udvz, verbose);
+					writeRec3D(rec, mod, bnd, wav, ixsrc, iysrc, izsrc, isam+1, ishot, fileno, 
+             			rec_vx, rec_vy, rec_vz, rec_txx, rec_tyy, rec_tzz, rec_txz, rec_tyz, rec_txy,
+             			rec_p, rec_pp, rec_ss, rec_udp, rec_udvz, verbose);
 				}
 			}
 
 			/* write snapshots to output file(s) */
 			if (sna.nsnap) {
-				writeSnapTimes(mod, sna, bnd, wav, ixsrc, izsrc, it, vx, vz, tzz, txx, txz, verbose);
+				writeSnapTimes3D(mod, sna, bnd, wav, ixsrc, iysrc, izsrc, it,
+								vx, vy, vz, tzz, tyy, txx, txz, tyz, txy, verbose);
+
 			}
 
 			/* calculate beams */
 			if(sna.beam) {
-				getBeamTimes(mod, sna, vx, vz, tzz, txx,  txz, 
-					beam_vx, beam_vz, beam_txx, beam_tzz, beam_txz, 
+				getBeamTimes3D(mod, sna, vx, vy, vz, tzz, tyy, txx, txz, tyz, txy,
+				 	beam_vx, beam_vy, beam_vz, beam_txx, beam_tyy, beam_tzz, beam_txz, beam_tyz, beam_txy,
 					beam_p, beam_pp, beam_ss, verbose);
 			}
 }
@@ -696,27 +690,29 @@ shared (shot, bnd, mod, src, wav, rec, ixsrc, iysrc, izsrc, it, src_nwav, verbos
 		if (fileno) fileno++;
 		
 		if (rec.scale==1) { /* scale receiver with distance src-rcv */
-			float xsrc, zsrc, Rrec, rdx, rdz;
+			float xsrc, ysrc, zsrc, Rrec, rdx, rdy, rdz;
 			long irec;
 			xsrc=mod.x0+mod.dx*ixsrc;
+			ysrc=mod.y0+mod.dy*iysrc;
 			zsrc=mod.z0+mod.dz*izsrc;
 			for (irec=0; irec<rec.n; irec++) {
 				rdx=mod.x0+rec.xr[irec]-xsrc;
+				rdy=mod.y0+rec.yr[irec]-ysrc;
 				rdz=mod.z0+rec.zr[irec]-zsrc;
-				Rrec = sqrt(rdx*rdx+rdz*rdz);
-				fprintf(stderr,"Rec %li is scaled with distance %f R=%.2f,%.2f S=%.2f,%.2f\n", irec, Rrec,rdx,rdz,xsrc,zsrc);
+				Rrec = sqrt(rdx*rdx+rdy*rdy+rdz*rdz);
+				fprintf(stderr,"Rec %li is scaled with distance %f R=%.2f,%.2f,%.2f S=%.2f,%.2f,%.2f\n", irec, Rrec,rdx,rdy,rdz,xsrc,ysrc,zsrc);
 				for (it=0; it<rec.nt; it++) {
 					rec_p[irec*rec.nt+it] *= sqrt(Rrec);
 				}
 			}
 		}
-		writeRec(rec, mod, bnd, wav, ixsrc, izsrc, isam+1, ishot, fileno,
-			rec_vx, rec_vz, rec_txx, rec_tzz, rec_txz, 
-			rec_p, rec_pp, rec_ss, rec_udp, rec_udvz, verbose);
+		writeRec3D(rec, mod, bnd, wav, ixsrc, iysrc, izsrc, isam+1, ishot, fileno, 
+            rec_vx, rec_vy, rec_vz, rec_txx, rec_tyy, rec_tzz, rec_txz, rec_tyz, rec_txy,
+            rec_p, rec_pp, rec_ss, rec_udp, rec_udvz, verbose);
 		
-		writeBeams(mod, sna, ixsrc, izsrc, ishot, fileno, 
-				   beam_vx, beam_vz, beam_txx, beam_tzz, beam_txz, 
-				   beam_p, beam_pp, beam_ss, verbose);
+		writeBeams3D(mod, sna, ixsrc, iysrc, izsrc, ishot, fileno, 
+			   		beam_vx, beam_vy, beam_vz, beam_txx, beam_tyy, beam_tzz, beam_txz, beam_tyz, beam_txy, 
+			   		beam_p, beam_pp, beam_ss, verbose);
 		
 
 	} /* end of loop over number of shots */
@@ -731,20 +727,26 @@ shared (shot, bnd, mod, src, wav, rec, ixsrc, iysrc, izsrc, it, src_nwav, verbos
 
 	initargs(argc,argv); /* this will free the arg arrays declared */
 	free(rox);
+	free(roy);
 	free(roz);
 	free(l2m);
 	free(src_nwav[0]);
 	free(src_nwav);
 	free(vx);
+	free(vy);
 	free(vz);
 	free(tzz);
 	freeStoreSourceOnSurface();
 	if (rec.type.vz)  free(rec_vz);
+	if (rec.type.vy)  free(rec_vy);
 	if (rec.type.vx)  free(rec_vx);
 	if (rec.type.p)   free(rec_p);
 	if (rec.type.txx) free(rec_txx);
+	if (rec.type.tyy) free(rec_tyy);
 	if (rec.type.tzz) free(rec_tzz);
 	if (rec.type.txz) free(rec_txz);
+	if (rec.type.tyz) free(rec_tyz);
+	if (rec.type.txy) free(rec_txy);
 	if (rec.type.pp)  free(rec_pp);
 	if (rec.type.ss)  free(rec_ss);
 	if (rec.type.ud)  {
@@ -753,11 +755,15 @@ shared (shot, bnd, mod, src, wav, rec, ixsrc, iysrc, izsrc, it, src_nwav, verbos
 	}
 	if(sna.beam) {
 		if (sna.type.vz)  free(beam_vz);
+		if (sna.type.vy)  free(beam_vy);
 		if (sna.type.vx)  free(beam_vx);
 		if (sna.type.p)   free(beam_p);
 		if (sna.type.txx) free(beam_txx);
+		if (sna.type.tyy) free(beam_tyy);
 		if (sna.type.tzz) free(beam_tzz);
 		if (sna.type.txz) free(beam_txz);
+		if (sna.type.tyz) free(beam_tyz);
+		if (sna.type.txy) free(beam_txy);
 		if (sna.type.pp)  free(beam_pp);
 		if (sna.type.ss)  free(beam_ss);
 	}
@@ -771,7 +777,10 @@ shared (shot, bnd, mod, src, wav, rec, ixsrc, iysrc, izsrc, it, src_nwav, verbos
 		free(lam);
 		free(mul);
 		free(txz);
+		free(tyz);
+		free(txy);
 		free(txx);
+		free(tyy);
 	}
 	if (mod.ischeme==4) {
 		free(tss);
diff --git a/fdelmodc3D/getBeamTimes3D.c b/fdelmodc3D/getBeamTimes3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..f23ef7b7a4e4f163cdef3dc4b8d367b06e910743
--- /dev/null
+++ b/fdelmodc3D/getBeamTimes3D.c
@@ -0,0 +1,246 @@
+#define _FILE_OFFSET_BITS 64
+#define _LARGEFILE_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <math.h>
+#include <string.h>
+#include "segy.h"
+#include "fdelmodc3D.h"
+
+/**
+*  getBeamTimes: stores energy fields (beams) in arrays at certain time steps 
+*  writeBeams: writes the stored fields to output file(s) 
+*
+*   AUTHOR:
+*           Jan Thorbecke (janth@xs4all.nl)
+*           The Netherlands 
+**/
+
+
+FILE *fileOpen(char *file, char *ext, int append);
+int traceWrite(segy *hdr, float *data, int n, FILE *fp);
+void name_ext(char *filename, char *extension);
+void vmess(char *fmt, ...);
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define NINT(x) ((long)((x)>0.0?(x)+0.5:(x)-0.5))
+
+long getBeamTimes3D(modPar mod, snaPar sna, float *vx, float *vy, float *vz, float *tzz, float *tyy, float *txx, float *txz, float *tyz, float *txy,
+				 float *beam_vx, float *beam_vy, float *beam_vz, float *beam_txx, float *beam_tyy, float *beam_tzz, float *beam_txz, float *beam_tyz, float *beam_txy,
+				 float *beam_p, float *beam_pp, float *beam_ss, long verbose)
+{
+	long n1, n2, ibndx, ibndy, ibndz, ixs, iys, izs, ize, i, j, l;
+	long ix, iy, iz, ix2, iy2, iz2;
+	float sdx, s, p;
+
+    ibndx = mod.ioPx;
+    ibndy = mod.ioPy;
+    ibndz = mod.ioPz;
+	n1   = mod.naz;
+	n2   = mod.nax;
+	sdx  = 1.0/mod.dx;
+	izs = sna.z1+ibndx;
+	ize = sna.z2+ibndz;
+
+    for (iys=sna.y1, l=0; iys<=sna.y2; iys+=sna.skipdy, l++) {
+        iy  = iys+ibndy;
+        iy2 = iy+1;
+        for (ixs=sna.x1, i=0; ixs<=sna.x2; ixs+=sna.skipdx, i++) {
+            ix  = ixs+ibndx;
+            ix2 = ix+1;
+
+            if (sna.type.vx) {
+                for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+                    beam_vx[l*sna.nz*sna.nz+i*sna.nz+j] += sqrt(vx[iy*n1*n2+ix2*n1+iz]*vx[iy*n1*n2+ix2*n1+iz]);
+                }
+            }
+            if (sna.type.vy) {
+                for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+                    beam_vy[l*sna.nz*sna.nz+i*sna.nz+j] += sqrt(vy[iy2*n1*n2+ix*n1+iz]*vx[iy2*n1*n2+ix*n1+iz]);
+                }
+            }
+            if (sna.type.vz) {
+                for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+                    beam_vz[l*sna.nz*sna.nz+i*sna.nz+j] += sqrt(vz[iy*n1*n2+ix*n1+iz+1]*vz[iy*n1*n2+ix*n1+iz+1]);
+                }
+            }
+            if (sna.type.p) {
+                for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+                    beam_p[l*sna.nz*sna.nz+i*sna.nz+j] += sqrt(tzz[iy*n1*n2+ix*n1+iz]*tzz[iy*n1*n2+ix*n1+iz]);
+                }
+            }
+            if (sna.type.tzz) {
+                for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+                    beam_tzz[l*sna.nz*sna.nz+i*sna.nz+j] += sqrt(tzz[iy*n1*n2+ix*n1+iz]*tzz[iy*n1*n2+ix*n1+iz]);
+                }
+            }
+            if (sna.type.tyy) {
+                for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+                    beam_tyy[l*sna.nz*sna.nz+i*sna.nz+j] += sqrt(tyy[iy*n1*n2+ix*n1+iz]*tyy[iy*n1*n2+ix*n1+iz]);
+                }
+            }
+            if (sna.type.txx) {
+                for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+                    beam_txx[l*sna.nz*sna.nz+i*sna.nz+j] += sqrt(txx[iy*n1*n2+ix*n1+iz]*txx[iy*n1*n2+ix*n1+iz]);
+                }
+            }
+            if (sna.type.txz) {
+                for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+                    beam_txz[l*sna.nz*sna.nz+i*sna.nz+j] += sqrt(txz[iy*n1*n2+ix2*n1+iz+1]*txz[iy*n1*n2+ix2*n1+iz+1]);
+                }
+            }
+            if (sna.type.tyz) {
+                for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+                    beam_tyz[l*sna.nz*sna.nz+i*sna.nz+j] += sqrt(tyz[iy2*n1*n2+ix*n1+iz+1]*tyz[iy2*n1*n2+ix*n1+iz+1]);
+                }
+            }
+            if (sna.type.txz) {
+                for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+                    beam_txy[l*sna.nz*sna.nz+i*sna.nz+j] += sqrt(txy[iy2*n1*n2+ix2*n1+iz]*txy[iy2*n1*n2+ix2*n1+iz]);
+                }
+            }
+            /* calculate divergence of velocity field */
+            if (sna.type.pp) {
+                for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+                    iz2 = iz+1;
+                    p =    sdx*((vx[iy*n1*n2+ix2*n1+iz]-vx[iy*n1*n2+ix*n1+iz])+
+                                (vy[iy2*n1*n2+ix*n1+iz]-vy[iy*n1*n2+ix*n1+iz])+
+                                (vz[iy*n1*n2+ix*n1+iz2]-vz[iy*n1*n2+ix*n1+iz]));
+                    beam_pp[l*sna.nz*sna.nz+i*sna.nz+j] += sqrt(p*p);
+                }
+            }
+            /* calculate rotation of velocity field */
+            if (sna.type.ss) {
+                for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+                    iz2 = iz+1;
+                    s =    sdx*((vx[iy2*n1*n2+ix2*n1+iz2]-vx[iy*n1*n2+ix2*n1+iz])-
+                                (vy[iy2*n1*n2+ix2*n1+iz2]-vy[iy2*n1*n2+ix*n1+iz])-
+                                (vz[iy2*n1*n2+ix2*n1+iz2]-vz[iy*n1*n2+ix*n1+iz2]));
+                    beam_ss[l*sna.nz*sna.nz+i*sna.nz+j] += sqrt(s*s);
+                }
+            }
+        }
+    }
+	return 0;
+}
+
+
+long writeBeams3D(modPar mod, snaPar sna, long ixsrc, long iysrc, long izsrc, long ishot, long fileno, 
+			   float *beam_vx, float *beam_vy, float *beam_vz, float *beam_txx, float *beam_tyy, float *beam_tzz, float *beam_txz, float *beam_tyz, float *beam_txy, 
+			   float *beam_p, float *beam_pp, float *beam_ss, long verbose)
+{
+	FILE    *fpvx, *fpvy, *fpvz, *fptxx, *fptyy, *fptzz, *fptxz, *fptyz, *fptxy, *fpp, *fppp, *fpss;
+	long append;
+	long ix, iy;
+	char number[16], filename[1024];
+	segy hdr;
+
+	if (sna.beam==0) return 0;
+	/* all beam snapshots are written to the same output file(s) */
+	if (ishot) append=1;
+	else append=0;
+	
+	strcpy(filename, sna.file_beam);
+	if (fileno) {
+		sprintf(number,"_%03d",fileno);
+		name_ext(filename, number);
+	}
+	if (verbose>2) vmess("Writing beam data to file %s", filename);
+
+
+	if (sna.type.vx)  fpvx  = fileOpen(filename, "_bvx", (int)append);
+	if (sna.type.vy)  fpvy  = fileOpen(filename, "_bvy", (int)append);
+	if (sna.type.vz)  fpvz  = fileOpen(filename, "_bvz", (int)append);
+	if (sna.type.p)   fpp   = fileOpen(filename, "_bp", (int)append);
+	if (sna.type.txx) fptxx = fileOpen(filename, "_btxx", (int)append);
+	if (sna.type.tyy) fptyy = fileOpen(filename, "_btyy", (int)append);
+	if (sna.type.tzz) fptzz = fileOpen(filename, "_btzz", (int)append);
+	if (sna.type.txz) fptxz = fileOpen(filename, "_btxz", (int)append);
+	if (sna.type.tyz) fptyz = fileOpen(filename, "_btyz", (int)append);
+	if (sna.type.txy) fptxy = fileOpen(filename, "_btxy", (int)append);
+	if (sna.type.pp)  fppp  = fileOpen(filename, "_bpp", (int)append);
+	if (sna.type.ss)  fpss  = fileOpen(filename, "_bss", (int)append);
+	
+	memset(&hdr,0,TRCBYTES);
+	hdr.dt     = 1000000*(mod.dt);
+	hdr.scalco = -1000;
+	hdr.scalel = -1000;
+	hdr.sx     = 1000*(mod.x0+ixsrc*mod.dx);
+	hdr.sy     = 1000*(mod.y0+iysrc*mod.dy);
+	hdr.sdepth = 1000*(mod.z0+izsrc*mod.dz);
+	hdr.fldr   = ishot+1;
+	hdr.trid   = 1;
+	hdr.ns     = sna.nz;
+	hdr.trwf   = sna.nx*sna.ny;
+	hdr.ntr    = sna.nx*sna.ny;
+	hdr.f1     = sna.z1*mod.dz+mod.z0;
+	hdr.f2     = sna.x1*mod.dx+mod.x0;
+	hdr.d1     = mod.dz*sna.skipdz;
+	hdr.d2     = mod.dx*sna.skipdx;
+
+    for (iy=0; iy<sna.ny; iy++) {
+        for (ix=0; ix<sna.nx; ix++) {
+            hdr.tracf  = iy*sna.nx+ix+1;
+            hdr.tracl  = iy*sna.nx+ix+1;
+            hdr.gx     = 1000*(mod.x0+(sna.x1+ix)*mod.dx);
+            hdr.gy     = 1000*(mod.y0+(sna.y1+ix)*mod.dy);
+
+            if (sna.type.vx) {
+                traceWrite( &hdr, &beam_vx[iy*sna.nx*sna.nz+ix*sna.nz], (int)sna.nz, fpvx) ;
+            }
+            if (sna.type.vy) {
+                traceWrite( &hdr, &beam_vy[iy*sna.nx*sna.nz+ix*sna.nz], (int)sna.nz, fpvy) ;
+            }
+            if (sna.type.vz) {
+                traceWrite( &hdr, &beam_vz[iy*sna.nx*sna.nz+ix*sna.nz], (int)sna.nz, fpvz) ;
+            }
+            if (sna.type.p) {
+                traceWrite( &hdr, &beam_p[iy*sna.nx*sna.nz+ix*sna.nz], (int)sna.nz, fpp) ;
+            }
+            if (sna.type.tzz) {
+                traceWrite( &hdr, &beam_tzz[iy*sna.nx*sna.nz+ix*sna.nz], (int)sna.nz, fptzz) ;
+            }
+            if (sna.type.tyy) {
+                traceWrite( &hdr, &beam_tyy[iy*sna.nx*sna.nz+ix*sna.nz], (int)sna.nz, fptyy) ;
+            }
+            if (sna.type.txx) {
+                traceWrite( &hdr, &beam_txx[iy*sna.nx*sna.nz+ix*sna.nz], (int)sna.nz, fptxx) ;
+            }
+            if (sna.type.txz) {
+                traceWrite( &hdr, &beam_txz[iy*sna.nx*sna.nz+ix*sna.nz], (int)sna.nz, fptxz) ;
+            }
+            if (sna.type.txy) {
+                traceWrite( &hdr, &beam_txy[iy*sna.nx*sna.nz+ix*sna.nz], (int)sna.nz, fptxy) ;
+            }
+            if (sna.type.tyz) {
+                traceWrite( &hdr, &beam_tyz[iy*sna.nx*sna.nz+ix*sna.nz], (int)sna.nz, fptyz) ;
+            }
+            if (sna.type.pp) {
+                traceWrite( &hdr, &beam_pp[iy*sna.nx*sna.nz+ix*sna.nz], (int)sna.nz, fppp) ;
+            }
+            if (sna.type.ss) {
+                traceWrite( &hdr, &beam_ss[iy*sna.nx*sna.nz+ix*sna.nz], (int)sna.nz, fpss) ;
+            }
+        }
+	}
+
+	if (sna.type.vx) fclose(fpvx);
+	if (sna.type.vy) fclose(fpvy);
+	if (sna.type.vz) fclose(fpvz);
+	if (sna.type.p) fclose(fpp);
+	if (sna.type.txx) fclose(fptxx);
+	if (sna.type.tyy) fclose(fptyy);
+	if (sna.type.tzz) fclose(fptzz);
+	if (sna.type.txz) fclose(fptxz);
+	if (sna.type.tyz) fclose(fptyz);
+	if (sna.type.txy) fclose(fptxy);
+	if (sna.type.pp) fclose(fppp);
+	if (sna.type.ss) fclose(fpss);
+
+	return 0;
+}
\ No newline at end of file
diff --git a/fdelmodc3D/getRecTimes3D.c b/fdelmodc3D/getRecTimes3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..33f5578732303b8951f70b416b89e703fdd1ccbc
--- /dev/null
+++ b/fdelmodc3D/getRecTimes3D.c
@@ -0,0 +1,514 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<math.h>
+#include<assert.h>
+#include"fdelmodc3D.h"
+#include"par.h"
+
+/**
+*  Stores the wavefield at the receiver positions.
+*
+*  On a staggered grid the fields are all on different positions, 
+*  to compensate for that the rec.int_vx and rec.int_vz options
+*  can be set.
+*
+*   AUTHOR:
+*           Jan Thorbecke (janth@xs4all.nl)
+*           The Netherlands 
+**/
+
+long getRecTimes3D(modPar mod, recPar rec, bndPar bnd, long itime, long isam, float *vx, float *vy, float *vz, float *tzz, float *tyy, float *txx, float *txz, float *txy, float *tyz, float *l2m, float *rox, float *roy, float *roz, float *rec_vx, float *rec_vy, float *rec_vz, float *rec_txx, float *rec_tyy, float *rec_tzz, float *rec_txz, float *rec_txy, float *rec_tyz, float *rec_p, float *rec_pp, float *rec_ss, float *rec_udp, float *rec_udvz, long verbose)
+{
+	long n1, n2, ibndx, ibndy, ibndz;
+	long irec, ix, iy, iz, ix2, iy2, iz2, ix1, iy1, iz1;
+	float dvx, dvy, dvz, rdz, rdy, rdx;
+    float C000, C100, C010, C001, C110, C101, C011, C111;
+	float *vz_t, c1, c2, lroz, field;
+
+    ibndx = mod.ioPx;
+    ibndy = mod.ioPy;
+    ibndz = mod.ioPz;
+    if (bnd.lef==4 || bnd.lef==2) ibndx += bnd.ntap;
+    if (bnd.bac==4 || bnd.fro==2) ibndy += bnd.ntap;
+    if (bnd.top==4 || bnd.top==2) ibndz += bnd.ntap;
+	n1    = mod.naz;
+    n2    = mod.nax;
+	c1 = 9.0/8.0;
+	c2 = -1.0/24.0;
+
+	if (!rec.n) return 0;
+
+/***********************************************************************
+* velocity or txz or potential registrations issues:
+* rec_x and rec_z are related to actual txx/tzz/p positions.
+* offsets from virtual boundaries must be taken into account.
+*
+* vx velocities have one sample less in x-direction
+* vz velocities have one sample less in z-direction
+* txz stresses have one sample less in z-direction and x-direction
+*
+* Note, in the acoustic scheme P is stored in the Tzz array.
+***********************************************************************/
+
+	for (irec=0; irec<rec.n; irec++) {
+		iz = rec.z[irec]+ibndz;
+		iy = rec.y[irec]+ibndy;
+		ix = rec.x[irec]+ibndx;
+		iz1 = iz-1;
+		iy1 = iy-1;
+		ix1 = ix-1;
+		iz2 = iz+1;
+		iy2 = iy+1;
+		ix2 = ix+1;
+		/* interpolation to precise (not necessary on a grid point) position */
+		if ( rec.int_p==3 ) {
+
+			iz = (long)floorf(rec.zr[irec]/mod.dz)+ibndz;
+			iy = (long)floorf(rec.yr[irec]/mod.dy)+ibndy;
+			ix = (long)floorf(rec.xr[irec]/mod.dx)+ibndx;
+			rdz = (rec.zr[irec] - (iz-ibndz)*mod.dz)/mod.dz;
+			rdy = (rec.yr[irec] - (iy-ibndy)*mod.dy)/mod.dy;
+			rdx = (rec.xr[irec] - (ix-ibndx)*mod.dx)/mod.dx;
+			iz1 = iz-1;
+			iy1 = iy-1;
+			ix1 = ix-1;
+			iz2 = iz+1;
+			iy2 = iy+1;
+			ix2 = ix+1;
+			
+			/*
+			 // Interpolate according to Dirk Kraaijpool's scheme 
+			 // Reference:  "Seismic ray fields and ray field maps : theory and algorithms" , 
+			 // PhD thesis Utrecht University,Faculty of Geosciences, 2003) 
+			 
+			 C00 = tzz[ix*n1+iz]      + 0.5*((tzz[(ix+1)*n1+iz]   +tzz[(ix-1)*n1+iz]+ 
+			 tzz[(ix  )*n1+iz+1] +tzz[(ix  )*n1+iz-1])/(2.0*mod.dx));
+			 C10 = tzz[(ix+1)*n1+iz]  + 0.5*((tzz[(ix+2)*n1+iz]   +tzz[(ix  )*n1+iz]+
+			 tzz[(ix+1)*n1+iz+1] +tzz[(ix+1)*n1+iz-1])/(2.0*mod.dz));
+			 C01 = tzz[ix*n1+iz+1]    + 0.5*((tzz[(ix+1)*n1+iz+1] +tzz[(ix-1)*n1+iz+1]+
+			 tzz[(ix)*n1+iz+2]   +tzz[(ix  )*n1+iz])/(2.0*mod.dx));
+			 C11 = tzz[(ix+1)*n1+iz+1]+ 0.5*((tzz[(ix+2)*n1+iz+1] +tzz[(ix  )*n1+iz+1]+
+			 tzz[(ix+1)*n1+iz+2] +tzz[(ix+1)*n1+iz])/(2.0*mod.dz));
+			 */
+			
+			if (rec.type.p){
+				/* bi-linear interpolation */
+				C000 = tzz[iy*n1*n2+ix*n1+iz];
+				C100 = tzz[iy*n1*n2+(ix+1)*n1+iz];
+				C010 = tzz[iy*n1*n2+ix*n1+iz+1];
+				C001 = tzz[(iy+1)*n1*n2+ix*n1+iz];
+				C110 = tzz[iy*n1*n2+(ix+1)*n1+iz+1];
+				C101 = tzz[(iy+1)*n1*n2+(ix+1)*n1+iz];
+				C011 = tzz[(iy+1)*n1*n2+ix*n1+iz+1];
+				C111 = tzz[(iy+1)*n1*n2+(ix+1)*n1+iz+1];
+				rec_p[irec*rec.nt+isam] =   C000*(1.0-rdx)*(1.0-rdz)*(1.0-rdy) +
+                                            C100*rdx*(1.0-rdz)*(1.0-rdy) +
+										    C010*(1.0-rdx)*rdz*(1.0-rdy) +
+										    C001*(1.0-rdx)*(1.0-rdz)*rdy +
+                                            C110*rdx*rdz*(1.0-rdy) +
+											C101*rdx*(1.0-rdz)*rdy +
+											C011*(1.0-rdx)*rdz*rdy +
+											C111*rdx*rdz*rdy;
+			}
+			if (rec.type.txx) {
+				C000 = txx[iy*n1*n2+ix*n1+iz];
+				C100 = txx[iy*n1*n2+(ix+1)*n1+iz];
+				C010 = txx[iy*n1*n2+ix*n1+iz+1];
+				C001 = txx[(iy+1)*n1*n2+ix*n1+iz];
+				C110 = txx[iy*n1*n2+(ix+1)*n1+iz+1];
+				C101 = txx[(iy+1)*n1*n2+(ix+1)*n1+iz];
+				C011 = txx[(iy+1)*n1*n2+ix*n1+iz+1];
+				C111 = txx[(iy+1)*n1*n2+(ix+1)*n1+iz+1];
+				rec_txx[irec*rec.nt+isam] = C000*(1.0-rdx)*(1.0-rdz)*(1.0-rdy) +
+                                            C100*rdx*(1.0-rdz)*(1.0-rdy) +
+										    C010*(1.0-rdx)*rdz*(1.0-rdy) +
+										    C001*(1.0-rdx)*(1.0-rdz)*rdy +
+                                            C110*rdx*rdz*(1.0-rdy) +
+											C101*rdx*(1.0-rdz)*rdy +
+											C011*(1.0-rdx)*rdz*rdy +
+											C111*rdx*rdz*rdy;
+			}
+			if (rec.type.tyy) {
+				C000 = tyy[iy*n1*n2+ix*n1+iz];
+				C100 = tyy[iy*n1*n2+(ix+1)*n1+iz];
+				C010 = tyy[iy*n1*n2+ix*n1+iz+1];
+				C001 = tyy[(iy+1)*n1*n2+ix*n1+iz];
+				C110 = tyy[iy*n1*n2+(ix+1)*n1+iz+1];
+				C101 = tyy[(iy+1)*n1*n2+(ix+1)*n1+iz];
+				C011 = tyy[(iy+1)*n1*n2+ix*n1+iz+1];
+				C111 = tyy[(iy+1)*n1*n2+(ix+1)*n1+iz+1];
+				rec_tyy[irec*rec.nt+isam] = C000*(1.0-rdx)*(1.0-rdz)*(1.0-rdy) +
+                                            C100*rdx*(1.0-rdz)*(1.0-rdy) +
+										    C010*(1.0-rdx)*rdz*(1.0-rdy) +
+										    C001*(1.0-rdx)*(1.0-rdz)*rdy +
+                                            C110*rdx*rdz*(1.0-rdy) +
+											C101*rdx*(1.0-rdz)*rdy +
+											C011*(1.0-rdx)*rdz*rdy +
+											C111*rdx*rdz*rdy;
+			}
+			if (rec.type.tzz) {
+				C000 = tzz[iy*n1*n2+ix*n1+iz];
+				C100 = tzz[iy*n1*n2+(ix+1)*n1+iz];
+				C010 = tzz[iy*n1*n2+ix*n1+iz+1];
+				C001 = tzz[(iy+1)*n1*n2+ix*n1+iz];
+				C110 = tzz[iy*n1*n2+(ix+1)*n1+iz+1];
+				C101 = tzz[(iy+1)*n1*n2+(ix+1)*n1+iz];
+				C011 = tzz[(iy+1)*n1*n2+ix*n1+iz+1];
+				C111 = tzz[(iy+1)*n1*n2+(ix+1)*n1+iz+1];
+				rec_tzz[irec*rec.nt+isam] = C000*(1.0-rdx)*(1.0-rdz)*(1.0-rdy) +
+                                            C100*rdx*(1.0-rdz)*(1.0-rdy) +
+										    C010*(1.0-rdx)*rdz*(1.0-rdy) +
+										    C001*(1.0-rdx)*(1.0-rdz)*rdy +
+                                            C110*rdx*rdz*(1.0-rdy) +
+											C101*rdx*(1.0-rdz)*rdy +
+											C011*(1.0-rdx)*rdz*rdy +
+											C111*rdx*rdz*rdy;
+			}
+			if (rec.type.txz) {
+				C000 = txz[iy*n1*n2+ix*n1+iz];
+				C100 = txz[iy*n1*n2+(ix+1)*n1+iz];
+				C010 = txz[iy*n1*n2+ix*n1+iz+1];
+				C001 = txz[(iy+1)*n1*n2+ix*n1+iz];
+				C110 = txz[iy*n1*n2+(ix+1)*n1+iz+1];
+				C101 = txz[(iy+1)*n1*n2+(ix+1)*n1+iz];
+				C011 = txz[(iy+1)*n1*n2+ix*n1+iz+1];
+				C111 = txz[(iy+1)*n1*n2+(ix+1)*n1+iz+1];
+				rec_txz[irec*rec.nt+isam] = C000*(1.0-rdx)*(1.0-rdz)*(1.0-rdy) +
+                                            C100*rdx*(1.0-rdz)*(1.0-rdy) +
+										    C010*(1.0-rdx)*rdz*(1.0-rdy) +
+										    C001*(1.0-rdx)*(1.0-rdz)*rdy +
+                                            C110*rdx*rdz*(1.0-rdy) +
+											C101*rdx*(1.0-rdz)*rdy +
+											C011*(1.0-rdx)*rdz*rdy +
+											C111*rdx*rdz*rdy;
+			}
+			if (rec.type.txy) {
+				C000 = txy[iy*n1*n2+ix*n1+iz];
+				C100 = txy[iy*n1*n2+(ix+1)*n1+iz];
+				C010 = txy[iy*n1*n2+ix*n1+iz+1];
+				C001 = txy[(iy+1)*n1*n2+ix*n1+iz];
+				C110 = txy[iy*n1*n2+(ix+1)*n1+iz+1];
+				C101 = txy[(iy+1)*n1*n2+(ix+1)*n1+iz];
+				C011 = txy[(iy+1)*n1*n2+ix*n1+iz+1];
+				C111 = txy[(iy+1)*n1*n2+(ix+1)*n1+iz+1];
+				rec_txy[irec*rec.nt+isam] = C000*(1.0-rdx)*(1.0-rdz)*(1.0-rdy) +
+                                            C100*rdx*(1.0-rdz)*(1.0-rdy) +
+										    C010*(1.0-rdx)*rdz*(1.0-rdy) +
+										    C001*(1.0-rdx)*(1.0-rdz)*rdy +
+                                            C110*rdx*rdz*(1.0-rdy) +
+											C101*rdx*(1.0-rdz)*rdy +
+											C011*(1.0-rdx)*rdz*rdy +
+											C111*rdx*rdz*rdy;
+			}
+			if (rec.type.tyz) {
+				C000 = tyz[iy*n1*n2+ix*n1+iz];
+				C100 = tyz[iy*n1*n2+(ix+1)*n1+iz];
+				C010 = tyz[iy*n1*n2+ix*n1+iz+1];
+				C001 = tyz[(iy+1)*n1*n2+ix*n1+iz];
+				C110 = tyz[iy*n1*n2+(ix+1)*n1+iz+1];
+				C101 = tyz[(iy+1)*n1*n2+(ix+1)*n1+iz];
+				C011 = tyz[(iy+1)*n1*n2+ix*n1+iz+1];
+				C111 = tyz[(iy+1)*n1*n2+(ix+1)*n1+iz+1];
+				rec_tyz[irec*rec.nt+isam] = C000*(1.0-rdx)*(1.0-rdz)*(1.0-rdy) +
+                                            C100*rdx*(1.0-rdz)*(1.0-rdy) +
+										    C010*(1.0-rdx)*rdz*(1.0-rdy) +
+										    C001*(1.0-rdx)*(1.0-rdz)*rdy +
+                                            C110*rdx*rdz*(1.0-rdy) +
+											C101*rdx*(1.0-rdz)*rdy +
+											C011*(1.0-rdx)*rdz*rdy +
+											C111*rdx*rdz*rdy;
+			}
+			if (rec.type.pp) {
+				C000 = (vx[iy*n1*n2+ix2*n1+iz]-vx[iy*n1*n2+ix*n1+iz] +
+						vy[iy2*n1*n2+ix*n1+iz]-vy[iy*n1*n2+ix*n1+iz] +
+						vz[iy*n1*n2+ix*n1+iz2]-vz[iy*n1*n2+ix*n1+iz])/mod.dx;
+				C100 = (vx[iy*n1*n2+(ix2+1)*n1+iz]-vx[iy*n1*n2+(ix+1)*n1+iz] +
+						vy[iy2*n1*n2+(ix+1)*n1+iz]-vy[iy*n1*n2+(ix+1)*n1+iz] +
+						vz[iy*n1*n2+(ix+1)*n1+iz2]-vz[iy*n1*n2+(ix+1)*n1+iz])/mod.dx;
+				C010 = (vx[iy*n1*n2+ix2*n1+iz+1]-vx[iy*n1*n2+ix*n1+iz+1] +
+						vy[iy2*n1*n2+ix*n1+iz+1]-vy[iy*n1*n2+ix*n1+iz+1] +
+						vz[iy*n1*n2+ix*n1+iz2+1]-vz[iy*n1*n2+ix*n1+iz+1])/mod.dx;
+				C001 = (vx[(iy+1)*n1*n2+ix2*n1+iz]-vx[(iy+1)*n1*n2+ix*n1+iz] +
+						vy[(iy2+1)*n1*n2+ix*n1+iz]-vy[(iy+1)*n1*n2+ix*n1+iz] +
+						vz[(iy+1)*n1*n2+ix*n1+iz2]-vz[(iy+1)*n1*n2+ix*n1+iz])/mod.dx;
+				C110 = (vx[iy*n1*n2+(ix2+1)*n1+iz+1]-vx[iy*n1*n2+(ix+1)*n1+iz+1] +
+						vy[iy2*n1*n2+(ix+1)*n1+iz+1]-vy[iy*n1*n2+(ix+1)*n1+iz+1] +
+						vz[iy*n1*n2+(ix+1)*n1+iz2+1]-vz[iy*n1*n2+(ix+1)*n1+iz+1])/mod.dx;
+				C101 = (vx[(iy+1)*n1*n2+(ix2+1)*n1+iz]-vx[(iy+1)*n1*n2+(ix+1)*n1+iz] +
+						vy[(iy2+1)*n1*n2+(ix+1)*n1+iz]-vy[(iy+1)*n1*n2+(ix+1)*n1+iz] +
+						vz[(iy+1)*n1*n2+(ix+1)*n1+iz2]-vz[(iy+1)*n1*n2+(ix+1)*n1+iz])/mod.dx;
+				C011 = (vx[(iy+1)*n1*n2+ix2*n1+iz+1]-vx[(iy+1)*n1*n2+ix*n1+iz+1] +
+						vy[(iy2+1)*n1*n2+ix*n1+iz+1]-vy[(iy+1)*n1*n2+ix*n1+iz+1] +
+						vz[(iy+1)*n1*n2+ix*n1+iz2+1]-vz[(iy+1)*n1*n2+ix*n1+iz+1])/mod.dx;
+				C111 = (vx[(iy+1)*n1*n2+(ix2+1)*n1+iz+1]-vx[(iy+1)*n1*n2+(ix+1)*n1+iz+1] +
+						vy[(iy2+1)*n1*n2+(ix+1)*n1+iz+1]-vy[(iy+1)*n1*n2+(ix+1)*n1+iz+1] +
+						vz[(iy+1)*n1*n2+(ix+1)*n1+iz2+1]-vz[(iy+1)*n1*n2+(ix+1)*n1+iz+1])/mod.dx;
+				rec_pp[irec*rec.nt+isam] =  C000*(1.0-rdx)*(1.0-rdz)*(1.0-rdy) +
+                                            C100*rdx*(1.0-rdz)*(1.0-rdy) +
+										    C010*(1.0-rdx)*rdz*(1.0-rdy) +
+										    C001*(1.0-rdx)*(1.0-rdz)*rdy +
+                                            C110*rdx*rdz*(1.0-rdy) +
+											C101*rdx*(1.0-rdz)*rdy +
+											C011*(1.0-rdx)*rdz*rdy +
+											C111*rdx*rdz*rdy;
+			}
+			if (rec.type.ss) {
+				C000 = 	(vx[iy2*n1*n2+ix2*n1+iz2]-vx[iy*n1*n2+ix2*n1+iz] -
+						(vy[iy2*n1*n2+ix2*n1+iz2]-vy[iy2*n1*n2+ix*n1+iz]) -
+						(vz[iy2*n1*n2+ix2*n1+iz2]-vz[iy*n1*n2+ix*n1+iz2]))/mod.dx;
+				C100 =	(vx[iy2*n1*n2+(ix2+1)*n1+iz2]-vx[iy*n1*n2+(ix2+1)*n1+iz] -
+						(vy[iy2*n1*n2+(ix2+1)*n1+iz2]-vy[iy2*n1*n2+(ix+1)*n1+iz]) -
+						(vz[iy2*n1*n2+(ix2+1)*n1+iz2]-vz[iy*n1*n2+(ix+1)*n1+iz2]))/mod.dx;
+				C010 =	(vx[iy2*n1*n2+ix2*n1+iz2+1]-vx[iy*n1*n2+ix2*n1+iz+1] -
+						(vy[iy2*n1*n2+ix2*n1+iz2+1]-vy[iy2*n1*n2+ix*n1+iz+1]) -
+						(vz[iy2*n1*n2+ix2*n1+iz2+1]-vz[iy*n1*n2+ix*n1+iz2+1]))/mod.dx;
+				C001 =	(vx[(iy2+1)*n1*n2+ix2*n1+iz2]-vx[(iy+1)*n1*n2+ix2*n1+iz] -
+						(vy[(iy2+1)*n1*n2+ix2*n1+iz2]-vy[(iy2+1)*n1*n2+ix*n1+iz]) -
+						(vz[(iy2+1)*n1*n2+ix2*n1+iz2]-vz[(iy+1)*n1*n2+ix*n1+iz2]))/mod.dx;
+				C110 =	(vx[iy2*n1*n2+(ix2+1)*n1+iz2+1]-vx[iy*n1*n2+(ix2+1)*n1+iz+1] -
+						(vy[iy2*n1*n2+(ix2+1)*n1+iz2+1]-vy[iy2*n1*n2+(ix+1)*n1+iz+1]) -
+						(vz[iy2*n1*n2+(ix2+1)*n1+iz2+1]-vz[iy*n1*n2+(ix+1)*n1+iz2+1]))/mod.dx;
+				C101 =	(vx[(iy2+1)*n1*n2+(ix2+1)*n1+iz2]-vx[(iy+1)*n1*n2+(ix2+1)*n1+iz] -
+						(vy[(iy2+1)*n1*n2+(ix2+1)*n1+iz2]-vy[(iy2+1)*n1*n2+(ix+1)*n1+iz]) -
+						(vz[(iy2+1)*n1*n2+(ix2+1)*n1+iz2]-vz[(iy+1)*n1*n2+(ix+1)*n1+iz2]))/mod.dx;
+				C011 =	(vx[(iy2+1)*n1*n2+ix2*n1+iz2+1]-vx[(iy+1)*n1*n2+ix2*n1+iz+1] -
+						(vy[(iy2+1)*n1*n2+ix2*n1+iz2+1]-vy[(iy2+1)*n1*n2+ix*n1+iz+1]) -
+						(vz[(iy2+1)*n1*n2+ix2*n1+iz2+1]-vz[(iy+1)*n1*n2+ix*n1+iz2+1]))/mod.dx;
+				C111 =	(vx[(iy2+1)*n1*n2+(ix2+1)*n1+iz2+1]-vx[(iy+1)*n1*n2+(ix2+1)*n1+iz+1] -
+						(vy[(iy2+1)*n1*n2+(ix2+1)*n1+iz2+1]-vy[(iy2+1)*n1*n2+(ix+1)*n1+iz+1]) -
+						(vz[(iy2+1)*n1*n2+(ix2+1)*n1+iz2+1]-vz[(iy+1)*n1*n2+(ix+1)*n1+iz2+1]))/mod.dx;
+				rec_ss[irec*rec.nt+isam] =  C000*(1.0-rdx)*(1.0-rdz)*(1.0-rdy) +
+                                            C100*rdx*(1.0-rdz)*(1.0-rdy) +
+										    C010*(1.0-rdx)*rdz*(1.0-rdy) +
+										    C001*(1.0-rdx)*(1.0-rdz)*rdy +
+                                            C110*rdx*rdz*(1.0-rdy) +
+											C101*rdx*(1.0-rdz)*rdy +
+											C011*(1.0-rdx)*rdz*rdy +
+											C111*rdx*rdz*rdy;
+			}
+			if (rec.type.vz) {
+				C000 = vz[iy*n1*n2+ix*n1+iz2];
+				C100 = vz[iy*n1*n2+(ix+1)*n1+iz2];
+				C010 = vz[iy*n1*n2+ix*n1+iz2+1];
+				C001 = vz[(iy+1)*n1*n2+ix*n1+iz2];
+				C110 = vz[iy*n1*n2+(ix+1)*n1+iz2+1];
+				C101 = vz[(iy+1)*n1*n2+(ix+1)*n1+iz2];
+				C011 = vz[(iy+1)*n1*n2+ix*n1+iz2+1];
+				C111 = vz[(iy+1)*n1*n2+(ix+1)*n1+iz2+1];
+				rec_vz[irec*rec.nt+isam] =  C000*(1.0-rdx)*(1.0-rdz)*(1.0-rdy) +
+                                            C100*rdx*(1.0-rdz)*(1.0-rdy) +
+										    C010*(1.0-rdx)*rdz*(1.0-rdy) +
+										    C001*(1.0-rdx)*(1.0-rdz)*rdy +
+                                            C110*rdx*rdz*(1.0-rdy) +
+											C101*rdx*(1.0-rdz)*rdy +
+											C011*(1.0-rdx)*rdz*rdy +
+											C111*rdx*rdz*rdy;
+			}
+			if (rec.type.vy) {
+				C000 = vy[iy2*n1*n2+ix*n1+iz];
+				C100 = vy[iy2*n1*n2+(ix+1)*n1+iz];
+				C010 = vy[iy2*n1*n2+ix*n1+iz+1];
+				C001 = vy[(iy2+1)*n1*n2+ix*n1+iz];
+				C110 = vy[iy2*n1*n2+(ix+1)*n1+iz+1];
+				C101 = vy[(iy2+1)*n1*n2+(ix+1)*n1+iz];
+				C011 = vy[(iy2+1)*n1*n2+ix*n1+iz+1];
+				C111 = vy[(iy2+1)*n1*n2+(ix+1)*n1+iz+1];
+				rec_vy[irec*rec.nt+isam] =  C000*(1.0-rdx)*(1.0-rdz)*(1.0-rdy) +
+                                            C100*rdx*(1.0-rdz)*(1.0-rdy) +
+										    C010*(1.0-rdx)*rdz*(1.0-rdy) +
+										    C001*(1.0-rdx)*(1.0-rdz)*rdy +
+                                            C110*rdx*rdz*(1.0-rdy) +
+											C101*rdx*(1.0-rdz)*rdy +
+											C011*(1.0-rdx)*rdz*rdy +
+											C111*rdx*rdz*rdy;
+			}
+			if (rec.type.vx) {
+				C000 = vy[iy*n1*n2+ix2*n1+iz];
+				C100 = vy[iy*n1*n2+(ix2+1)*n1+iz];
+				C010 = vy[iy*n1*n2+ix2*n1+iz+1];
+				C001 = vy[(iy+1)*n1*n2+ix2*n1+iz];
+				C110 = vy[iy*n1*n2+(ix2+1)*n1+iz+1];
+				C101 = vy[(iy+1)*n1*n2+(ix2+1)*n1+iz];
+				C011 = vy[(iy+1)*n1*n2+ix2*n1+iz+1];
+				C111 = vy[(iy+1)*n1*n2+(ix2+1)*n1+iz+1];
+				rec_vx[irec*rec.nt+isam] =  C000*(1.0-rdx)*(1.0-rdz)*(1.0-rdy) +
+                                            C100*rdx*(1.0-rdz)*(1.0-rdy) +
+										    C010*(1.0-rdx)*rdz*(1.0-rdy) +
+										    C001*(1.0-rdx)*(1.0-rdz)*rdy +
+                                            C110*rdx*rdz*(1.0-rdy) +
+											C101*rdx*(1.0-rdz)*rdy +
+											C011*(1.0-rdx)*rdz*rdy +
+											C111*rdx*rdz*rdy;
+			}
+			
+		}
+		else { /* read values directly from the grid points */
+			if (verbose>=4 && isam==0) {
+				vmess("Receiver %li read at gridpoint ix=%li iy=%li iz=%li",irec, ix, iy, iz);
+			}
+			/* interpolation of receivers to same time step is only done for acoustic scheme */
+			if (rec.type.p) {
+				if (rec.int_p == 1) {
+					if (mod.ischeme == 1) { /* interpolate Tzz times -1/2 Dt backward to Vz times */
+                        dvx = c1*(vx[iy*n1*n2+(ix+1)*n1+iz] - vx[iy*n1*n2+ix*n1+iz]) +
+                              c2*(vx[iy*n1*n2+(ix+2)*n1+iz] - vx[iy*n1*n2+(ix-1)*n1+iz]);
+                        dvy = c1*(vy[(iy+1)*n1*n2+ix*n1+iz] - vy[iy*n1*n2+ix*n1+iz]) +
+                              c2*(vy[(iy+2)*n1*n2+ix*n1+iz] - vy[(iy-1)*n1*n2+ix*n1+iz]);
+                        dvz = c1*(vz[iy*n1*n2+ix*n1+iz+1]   - vz[iy*n1*n2+ix*n1+iz]) +
+                              c2*(vz[iy*n1*n2+ix*n1+iz+2]   - vz[iy*n1*n2+ix*n1+iz-1]);
+                        field = tzz[iy*n1*n2+ix*n1+iz] + (1.0/2.0)*l2m[iy*n1*n2+ix*n1+iz]*(dvx+dvy+dvz);
+                        dvx = c1*(vx[iy*n1*n2+(ix+1)*n1+iz1] - vx[iy*n1*n2+ix*n1+iz1]) +
+                              c2*(vx[iy*n1*n2+(ix+2)*n1+iz1] - vx[iy*n1*n2+(ix-1)*n1+iz1]);
+                        dvy = c1*(vy[(iy+1)*n1*n2+ix*n1+iz1] - vy[iy*n1*n2+ix*n1+iz1]) +
+                              c2*(vy[(iy+2)*n1*n2+ix*n1+iz1] - vy[(iy-1)*n1*n2+ix*n1+iz1]);
+                        dvz = c1*(vz[iy*n1*n2+ix*n1+iz1+1]   - vz[iy*n1*n2+ix*n1+iz1]) +
+                              c2*(vz[iy*n1*n2+ix*n1+iz1+2]   - vz[iy*n1*n2+ix*n1+iz1-1]);
+                        field += tzz[iy*n1*n2+ix*n1+iz1] + (1.0/2.0)*l2m[iy*n1*n2+ix*n1+iz1]*(dvx+dvy+dvz);
+						rec_p[irec*rec.nt+isam] = 0.5*field;
+					}
+					else {
+						rec_p[irec*rec.nt+isam] = 0.5*(tzz[iy*n1*n2+ix*n1+iz1]+tzz[iy*n1*n2+ix*n1+iz]);
+					}
+				}
+				else if (rec.int_p == 2) {
+					if (mod.ischeme == 1) { /* interpolate Tzz times -1/2 Dt backward to Vx times */
+                        dvx = c1*(vx[iy*n1*n2+(ix+1)*n1+iz] - vx[iy*n1*n2+ix*n1+iz]) +
+                              c2*(vx[iy*n1*n2+(ix+2)*n1+iz] - vx[iy*n1*n2+(ix-1)*n1+iz]);
+                        dvy = c1*(vy[(iy+1)*n1*n2+ix*n1+iz] - vy[iy*n1*n2+ix*n1+iz]) +
+                              c2*(vy[(iy+2)*n1*n2+ix*n1+iz] - vy[(iy-1)*n1*n2+ix*n1+iz]);
+                        dvz = c1*(vz[iy*n1*n2+ix*n1+iz+1]   - vz[iy*n1*n2+ix*n1+iz]) +
+                              c2*(vz[iy*n1*n2+ix*n1+iz+2]   - vz[iy*n1*n2+ix*n1+iz-1]);
+                        field = tzz[iy*n1*n2+ix*n1+iz] + (1.0/2.0)*l2m[iy*n1*n2+ix*n1+iz]*(dvx+dvy+dvz);
+                        dvx = c1*(vx[iy*n1*n2+(ix1+1)*n1+iz] - vx[iy*n1*n2+ix1*n1+iz]) +
+                              c2*(vx[iy*n1*n2+(ix1+2)*n1+iz] - vx[iy*n1*n2+(ix1-1)*n1+iz]);
+                        dvy = c1*(vy[(iy+1)*n1*n2+ix1*n1+iz] - vy[iy*n1*n2+ix1*n1+iz]) +
+                              c2*(vy[(iy+2)*n1*n2+ix1*n1+iz] - vy[(iy-1)*n1*n2+ix1*n1+iz]);
+                        dvz = c1*(vz[iy*n1*n2+ix1*n1+iz+1]   - vz[iy*n1*n2+ix1*n1+iz]) +
+                              c2*(vz[iy*n1*n2+ix1*n1+iz+2]   - vz[iy*n1*n2+ix1*n1+iz-1]);
+                        field += tzz[iy*n1*n2+ix1*n1+iz] + (1.0/2.0)*l2m[iy*n1*n2+ix1*n1+iz]*(dvx+dvy+dvz);
+						rec_p[irec*rec.nt+isam] = 0.5*field;
+					}
+					else {
+						rec_p[irec*rec.nt+isam] = 0.5*(tzz[iy*n1*n2+ix1*n1+iz]+tzz[iy*n1*n2+ix*n1+iz]);
+					}
+				}
+				else {
+					rec_p[irec*rec.nt+isam] = tzz[iy*n1*n2+ix*n1+iz];
+				}
+			}
+			if (rec.type.txx) rec_txx[irec*rec.nt+isam] = txx[iy*n1*n2+ix*n1+iz];
+			if (rec.type.tzz) rec_tzz[irec*rec.nt+isam] = tzz[iy*n1*n2+ix*n1+iz];
+			if (rec.type.txz) { /* time interpolation to be done */
+				if (rec.int_vz == 2 || rec.int_vx == 2) {
+					rec_txz[irec*rec.nt+isam] = 0.125*(
+							txz[iy*n1*n2+ix*n1+iz]  + txz[iy*n1*n2+ix*n1+iz2]+
+							txz[iy2*n1*n2+ix*n1+iz] + txz[iy2*n1*n2+ix*n1+iz2]+
+							txz[iy*n1*n2+ix2*n1+iz]  + txz[iy*n1*n2+ix2*n1+iz2]+
+							txz[iy2*n1*n2+ix2*n1+iz] + txz[iy2*n1*n2+ix2*n1+iz2]);
+				}
+				else {
+					rec_txz[irec*rec.nt+isam] = txz[iy2*n1*n2+ix2*n1+iz2];
+				}
+			}
+			if (rec.type.pp) {
+				rec_pp[irec*rec.nt+isam] = (vx[iy*n1*n2+ix2*n1+iz]-vx[iy*n1*n2+ix*n1+iz] +
+											vy[iy2*n1*n2+ix*n1+iz]-vy[iy*n1*n2+ix*n1+iz] +
+											vz[iy*n1*n2+ix*n1+iz2]-vz[iy*n1*n2+ix*n1+iz])/mod.dx;
+			}
+			if (rec.type.ss) {
+				rec_ss[irec*rec.nt+isam] = (vx[iy2*n1*n2+ix2*n1+iz2]-vx[iy*n1*n2+ix2*n1+iz] -
+										   (vy[iy2*n1*n2+ix2*n1+iz2]-vy[iy2*n1*n2+ix*n1+iz]) -
+										   (vz[iy2*n1*n2+ix2*n1+iz2]-vz[iy*n1*n2+ix*n1+iz2]))/mod.dx;
+			}
+			if (rec.type.vz) {
+/* interpolate vz to vx position to the right and above of vz */
+				if (rec.int_vz == 1) {
+					rec_vz[irec*rec.nt+isam] = 0.125*(
+							vz[iy*n1*n2+ix*n1+iz2]+vz[iy*n1*n2+ix1*n1+iz2]+
+							vz[iy*n1*n2+ix*n1+iz] +vz[iy*n1*n2+ix1*n1+iz]+
+							vz[iy1*n1*n2+ix*n1+iz2]+vz[iy1*n1*n2+ix1*n1+iz2]+
+							vz[iy1*n1*n2+ix*n1+iz] +vz[iy1*n1*n2+ix1*n1+iz]);
+				}
+/* interpolate vz to Txx/Tzz position by taking the mean of 2 values */
+				else if (rec.int_vz == 2) {
+					if (mod.ischeme == 1) { /* interpolate Vz times +1/2 Dt forward to P times */
+                        field = vz[iy*n1*n2+ix*n1+iz] - 0.5*roz[iy*n1*n2+ix*n1+iz]*(
+                        	c1*(tzz[iy*n1*n2+ix*n1+iz]   - tzz[iy*n1*n2+ix*n1+iz-1]) +
+                        	c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+                        field += vz[iy*n1*n2+ix*n1+iz2] - 0.5*roz[iy*n1*n2+ix*n1+iz2]*(
+                        	c1*(tzz[iy*n1*n2+ix*n1+iz2]   - tzz[iy*n1*n2+ix*n1+iz2-1]) +
+                        	c2*(tzz[iy*n1*n2+ix*n1+iz2+1] - tzz[iy*n1*n2+ix*n1+iz2-2]));
+						rec_vz[irec*rec.nt+isam] = 0.5*field;
+					}
+					else {
+						rec_vz[irec*rec.nt+isam] = 0.5*(vz[iy*n1*n2+ix*n1+iz2]+vz[iy*n1*n2+ix*n1+iz]);
+					}
+				}
+				else {
+					rec_vz[irec*rec.nt+isam] = vz[iy*n1*n2+ix*n1+iz2];
+					//rec_vz[irec*rec.nt+isam] = vz[ix*n1+iz];
+					//fprintf(stderr,"isam=%li vz[%li]=%e vz[%li]=%e vz[%li]=%e \n",isam, iz-1,vz[ix*n1+iz-1],iz,vz[ix*n1+iz], iz+1, vz[ix*n1+iz+1]);
+				}
+			}
+			if (rec.type.vx) {
+/* interpolate vx to vz position to the left and below of vx */
+				if (rec.int_vx == 1) {
+					rec_vx[irec*rec.nt+isam] = 0.125*(
+							vx[iy*n1*n2+ix2*n1+iz]+vx[iy*n1*n2+ix2*n1+iz1]+
+							vx[iy*n1*n2+ix*n1+iz]+vx[iy*n1*n2+ix*n1+iz1]+
+							vx[iy2*n1*n2+ix2*n1+iz]+vx[iy2*n1*n2+ix2*n1+iz1]+
+							vx[iy2*n1*n2+ix*n1+iz]+vx[iy2*n1*n2+ix*n1+iz1]);
+				}
+/* interpolate vx to Txx/Tzz position by taking the mean of 2 values */
+				else if (rec.int_vx == 2) {
+					if (mod.ischeme == 1) { /* interpolate Vx times +1/2 Dt forward to P times */
+            			field = vx[iy*n1*n2+ix*n1+iz] - 0.5*rox[iy*n1*n2+ix*n1+iz]*(
+                			c1*(tzz[iy*n1*n2+ix*n1+iz]     - tzz[iy*n1*n2+(ix-1)*n1+iz]) +
+                			c2*(tzz[iy*n1*n2+(ix+1)*n1+iz] - tzz[iy*n1*n2+(ix-2)*n1+iz]));
+            			field += vx[ix2*n1+iz] - 0.5*rox[ix2*n1+iz]*(
+                			c1*(tzz[iy*n1*n2+ix2*n1+iz]     - tzz[iy*n1*n2+(ix2-1)*n1+iz]) +
+                			c2*(tzz[iy*n1*n2+(ix2+1)*n1+iz] - tzz[iy*n1*n2+(ix2-2)*n1+iz]));
+						rec_vx[irec*rec.nt+isam] = 0.5*field;
+					}
+					else {
+						rec_vx[irec*rec.nt+isam] = 0.5*(vx[iy*n1*n2+ix2*n1+iz]+vx[iy*n1*n2+ix*n1+iz]);
+					}
+				}
+				else {
+					rec_vx[irec*rec.nt+isam] = vx[iy*n1*n2+ix2*n1+iz];
+				}
+			}
+		}
+
+	} /* end of irec loop */
+
+	/* store all x-values on z-level for P Vz for up-down decomposition */
+	if (rec.type.ud) {
+		iz = rec.z[0]+ibndz;
+		iz2 = iz+1;
+		vz_t = (float *)calloc(2*mod.nax*mod.nay,sizeof(float));
+		/* P and Vz are staggered in time and need to correct for this */
+		/* -1- compute Vz at next time step and average with current time step */
+		lroz = mod.dt/(mod.dx*rec.rho);
+    	for (iy=mod.ioZy; iy<mod.ieZy; iy++) {
+			for (ix=mod.ioZx; ix<mod.ieZx; ix++) {
+				vz_t[iy*mod.nax+ix] = vz[iy*n1*n2+ix*n1+iz] - lroz*(
+							c1*(tzz[iy*n1*n2+ix*n1+iz]   - tzz[iy*n1*n2+ix*n1+iz-1]) +
+							c2*(tzz[iy*n1*n2+ix*n1+iz+1] - tzz[iy*n1*n2+ix*n1+iz-2]));
+				vz_t[iy*mod.nax+mod.nay*mod.nax+ix] = vz[iy*n1*n2+ix*n1+iz2] - lroz*(
+							c1*(tzz[iy*n1*n2+ix*n1+iz2]   - tzz[iy*n1*n2+ix*n1+iz2-1]) +
+							c2*(tzz[iy*n1*n2+ix*n1+iz2+1] - tzz[iy*n1*n2+ix*n1+iz2-2]));
+			}
+		}
+		for (iy=0; iy<mod.nay; iy++) {
+			for (ix=0; ix<mod.nax; ix++) {
+				/* -2- compute average in time and depth to get Vz at same depth and time as P */
+				rec_udvz[iy*mod.nax*rec.nt+ix*rec.nt+isam] = 0.25*(vz[iy*n1*n2+ix*n1+iz2]+vz[iy*n1*n2+ix*n1+iz]+vz_t[iy*mod.nax+mod.nay*mod.nax+ix]+vz_t[iy*mod.nax+ix]);
+				rec_udp[iy*mod.nax*rec.nt+ix*rec.nt+isam]  = tzz[iy*n1*n2+ix*n1+iz];
+			}
+		}
+		free(vz_t);
+	}
+
+	return 0;
+}
diff --git a/fdelmodc3D/writeRec3D.c b/fdelmodc3D/writeRec3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..4dd9cb4ff5debb11049f283506aac02d57946ea4
--- /dev/null
+++ b/fdelmodc3D/writeRec3D.c
@@ -0,0 +1,255 @@
+#define _FILE_OFFSET_BITS 64
+#define _LARGEFILE_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <math.h>
+#include <string.h>
+#include "segy.h"
+#include "fdelmodc3D.h"
+#include <genfft.h>
+
+#ifndef COMPLEX
+typedef struct _complexStruct { /* complex number */
+    float r,i;
+} complex;
+typedef struct _dcomplexStruct { /* complex number */
+    double r,i;
+} dcomplex;
+#endif/* complex */
+
+/**
+*  Writes the receiver array(s) to output file(s)
+*
+*   AUTHOR:
+*           Jan Thorbecke (janth@xs4all.nl)
+*           The Netherlands 
+**/
+
+FILE *fileOpen(char *file, char *ext, int append);
+int traceWrite(segy *hdr, float *data, int n, FILE *fp) ;
+void name_ext(char *filename, char *extension);
+void kxwdecomp(complex *rp, complex *rvz, complex *up, complex *down,
+               int nkx, float dx, int nt, float dt, float fmin, float fmax,
+               float cp, float rho, int vznorm, int verbose);
+
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define NINT(x) ((long)((x)>0.0?(x)+0.5:(x)-0.5))
+
+long writeRec3D(recPar rec, modPar mod, bndPar bnd, wavPar wav, long ixsrc, long iysrc, long izsrc, long nsam, long ishot, long fileno, 
+             float *rec_vx, float *rec_vy, float *rec_vz, float *rec_txx, float *rec_tyy, float *rec_tzz, float *rec_txz,  float *rec_tyz,  float *rec_txy,
+             float *rec_p, float *rec_pp, float *rec_ss, float *rec_udp, float *rec_udvz, long verbose)
+{
+    FILE    *fpvx, *fpvy, *fpvz, *fptxx, *fptyy, *fptzz, *fptxz, *fptyz, *fptxy, *fpp, *fppp, *fpss, *fpup, *fpdown;
+    float *rec_up, *rec_down, *trace, *rec_vze, *rec_pe;
+    float dx, dy, dt, cp, rho, fmin, fmax;
+    complex *crec_vz, *crec_p, *crec_up, *crec_dw;
+    long irec, ntfft, nfreq, nkx, nky, xorig, yorig, ix, iy, iz, it, ibndx, ibndy;
+    long append, vznorm, sx, sy;
+    double ddt;
+    char number[16], filename[1024];
+    segy hdr;
+    
+    if (!rec.n) return 0;
+    if (ishot) append=1;
+    else append=0;
+
+    /* if the total number of samples exceeds rec_ntsam then a new (numbered) file is opened */
+    /* fileno has a non-zero value (from fdelmodc.c) if the number of samples exceeds rec_ntsam. */
+    strcpy(filename, rec.file_rcv);
+    if (fileno) {
+        sprintf(number,"_%03d",fileno);
+        name_ext(filename, number);
+    }
+#ifdef MPI
+    sx = (long)mod.x0+ixsrc*mod.dx;
+    sprintf(number,"_%06d",sx);
+    name_ext(filename, number);
+#endif
+
+    if (verbose>2) vmess("Writing receiver data to file %s", filename);
+    if (nsam != rec.nt && verbose) vmess("Number of samples written to last file = %li",nsam);
+
+    memset(&hdr,0,TRCBYTES);
+    ddt = (double)mod.dt;/* to avoid rounding in 32 bit precision */
+    dt  = (float)ddt*rec.skipdt;
+    dx  = (rec.x[1]-rec.x[0])*mod.dx;
+    dy  = (rec.y[1]-rec.y[0])*mod.dy;
+    hdr.dt     = (unsigned short)lround((((double)1.0e6*ddt*rec.skipdt)));
+    hdr.scalco = -1000;
+    hdr.scalel = -1000;
+    hdr.sx     = 1000*(mod.x0+ixsrc*mod.dx);
+    hdr.sy     = 1000*(mod.y0+ixsrc*mod.dy);
+    hdr.sdepth = 1000*(mod.z0+izsrc*mod.dz);
+    hdr.selev  = (int)(-1000.0*(mod.z0+izsrc*mod.dz));
+    hdr.fldr   = ishot+1;
+    hdr.trid   = 1;
+    hdr.ns     = nsam;
+    hdr.trwf   = rec.n;
+    hdr.ntr    = rec.n;
+    if (mod.grid_dir) { /* reverse time modeling */
+        hdr.f1 = (-mod.nt+1)*mod.dt;
+    }
+    else {
+        hdr.f1 = 0.0;
+    }
+    hdr.d1     = mod.dt*rec.skipdt;
+    hdr.d2     = (rec.x[1]-rec.x[0])*mod.dx;
+    hdr.f2     = mod.x0+rec.x[0]*mod.dx;
+
+    if (rec.type.vx)  fpvx  = fileOpen(filename, "_rvx", (int)append);
+    if (rec.type.vy)  fpvy  = fileOpen(filename, "_rvy", (int)append);
+    if (rec.type.vz)  fpvz  = fileOpen(filename, "_rvz", (int)append);
+    if (rec.type.p)   fpp   = fileOpen(filename, "_rp", (int)append);
+    if (rec.type.txx) fptxx = fileOpen(filename, "_rtxx", (int)append);
+    if (rec.type.tyy) fptyy = fileOpen(filename, "_rtyy", (int)append);
+    if (rec.type.tzz) fptzz = fileOpen(filename, "_rtzz", (int)append);
+    if (rec.type.txz) fptxz = fileOpen(filename, "_rtxz", (int)append);
+    if (rec.type.tyz) fptyz = fileOpen(filename, "_rtyz", (int)append);
+    if (rec.type.txy) fptxy = fileOpen(filename, "_rtxy", (int)append);
+    if (rec.type.pp)  fppp  = fileOpen(filename, "_rpp", (int)append);
+    if (rec.type.ss)  fpss  = fileOpen(filename, "_rss", (int)append);
+
+    /* decomposed wavefield */
+    if (rec.type.ud && (mod.ischeme==1 || mod.ischeme==2) )  {
+        fpup   = fileOpen(filename, "_ru", (int)append);
+        fpdown = fileOpen(filename, "_rd", (int)append);
+        ntfft = optncr(nsam);
+        nfreq = ntfft/2+1;
+        fmin = 0.0;
+        fmax = wav.fmax;
+        nkx = optncc(2*mod.nax);
+        nky = optncc(2*mod.nay);
+        ibndx = mod.ioPx;
+        ibndy = mod.ioPy;
+        if (bnd.lef==4 || bnd.lef==2) ibndx += bnd.ntap;
+        if (bnd.fro==4 || bnd.fro==2) ibndy += bnd.ntap;
+        cp  = rec.cp;
+        rho = rec.rho;
+		if (rec.type.ud==2) vznorm=1;
+		else vznorm=0;
+        if (verbose) vmess("Decomposition array at z=%.2f with cp=%.2f rho=%.2f", rec.zr[0]+mod.z0, cp, rho);
+        rec_up  = (float *)calloc(ntfft*nkx*nky,sizeof(float));
+        rec_down= (float *)calloc(ntfft*nkx*nky,sizeof(float));
+        crec_vz = (complex *)malloc(nfreq*nkx*nky*sizeof(complex));
+        crec_p  = (complex *)malloc(nfreq*nkx*nky*sizeof(complex));
+        crec_up = (complex *)malloc(nfreq*nkx*nky*sizeof(complex));
+        crec_dw = (complex *)malloc(nfreq*nkx*nky*sizeof(complex));
+
+        rec_vze = rec_up;
+        rec_pe  = rec_down;
+        /* copy input data into extended arrays with padded zeroes */
+        for (iy=0; iy<mod.nay; iy++) {
+            for (ix=0; ix<mod.nax; ix++) {
+                memcpy(&rec_vze[iy*mod.nax*ntfft+ix*ntfft],&rec_udvz[iy*mod.nax*rec.nt+ix*rec.nt],nsam*sizeof(float));
+                memcpy(&rec_pe[iy*mod.nax*ntfft+ix*ntfft], &rec_udp[iy*mod.nax*rec.nt+ix*rec.nt], nsam*sizeof(float));
+            }
+        }
+
+        /* transform from t-x to kx-w */
+        xorig = ixsrc+ibndx;
+        xt2wkx(rec_vze, crec_vz, ntfft, nkx, ntfft, nkx, xorig);
+        xt2wkx(rec_pe, crec_p, ntfft, nkx, ntfft, nkx, xorig);
+
+        /* apply decomposition operators */
+        kxwdecomp(crec_p, crec_vz, crec_up, crec_dw,
+            nkx, mod.dx, nsam, dt, fmin, fmax, cp, rho, vznorm, verbose);
+
+        /* transform back to t-x */
+        wkx2xt(crec_up, rec_up, ntfft, nkx, nkx, ntfft, xorig);
+        wkx2xt(crec_dw, rec_down, ntfft, nkx, nkx, ntfft, xorig);
+
+        /* reduce array to rec.nt samples rec.n traces */
+        for (irec=0; irec<rec.n; irec++) {
+            ix = rec.x[irec]+ibndx;
+            iy = rec.y[irec]+ibndy;
+            for (it=0; it<rec.nt; it++) {
+                rec_up[irec*rec.nt+it]   = rec_up[iy*nkx*ntfft+ix*ntfft+it];
+                rec_down[irec*rec.nt+it] = rec_down[iy*nkx*ntfft+ix*ntfft+it];
+            }
+        }
+        free(crec_vz);
+        free(crec_p);
+        free(crec_up);
+        free(crec_dw);
+    }
+    if (rec.type.ud && (mod.ischeme==3 || mod.ischeme==4) )  {
+    }
+
+    for (irec=0; irec<rec.n; irec++) {
+        hdr.tracf  = irec+1;
+        hdr.tracl  = ishot*rec.n+irec+1;
+        hdr.gx     = 1000*(mod.x0+rec.x[irec]*mod.dx);
+        hdr.gy     = 1000*(mod.y0+rec.y[irec]*mod.dy);
+        hdr.offset = (rec.x[irec]-ixsrc)*mod.dx;
+        hdr.gelev  = (int)(-1000*(mod.z0+rec.z[irec]*mod.dz));
+
+        if (rec.type.vx) {
+            traceWrite( &hdr, &rec_vx[irec*rec.nt], (int)nsam, fpvx) ;
+        }
+        if (rec.type.vy) {
+            traceWrite( &hdr, &rec_vy[irec*rec.nt], (int)nsam, fpvy) ;
+        }
+        if (rec.type.vz) {
+            traceWrite( &hdr, &rec_vz[irec*rec.nt], (int)nsam, fpvz) ;
+        }
+        if (rec.type.p) {
+            traceWrite( &hdr, &rec_p[irec*rec.nt], (int)nsam, fpp) ;
+        }
+        if (rec.type.txx) {
+            traceWrite( &hdr, &rec_txx[irec*rec.nt], (int)nsam, fptxx) ;
+        }
+        if (rec.type.tyy) {
+            traceWrite( &hdr, &rec_tyy[irec*rec.nt], (int)nsam, fptyy) ;
+        }
+        if (rec.type.tzz) {
+            traceWrite( &hdr, &rec_tzz[irec*rec.nt], (int)nsam, fptzz) ;
+        }
+        if (rec.type.txz) {
+            traceWrite( &hdr, &rec_txz[irec*rec.nt], (int)nsam, fptxz) ;
+        }
+        if (rec.type.txy) {
+            traceWrite( &hdr, &rec_txy[irec*rec.nt], (int)nsam, fptxy) ;
+        }
+        if (rec.type.tyz) {
+            traceWrite( &hdr, &rec_tyz[irec*rec.nt], (int)nsam, fptyz) ;
+        }
+        if (rec.type.pp) {
+            traceWrite( &hdr, &rec_pp[irec*rec.nt], (int)nsam, fppp) ;
+        }
+        if (rec.type.ss) {
+            traceWrite( &hdr, &rec_ss[irec*rec.nt], (int)nsam, fpss) ;
+        }
+        if (rec.type.ud && mod.ischeme==1)  {
+            traceWrite( &hdr, &rec_up[irec*rec.nt], (int)nsam, fpup) ;
+            traceWrite( &hdr, &rec_down[irec*rec.nt], (int)nsam, fpdown) ;
+        }
+    }
+
+    if (rec.type.vx) fclose(fpvx);
+    if (rec.type.vy) fclose(fpvy);
+    if (rec.type.vz) fclose(fpvz);
+    if (rec.type.p) fclose(fpp);
+    if (rec.type.txx) fclose(fptxx);
+    if (rec.type.tyy) fclose(fptyy);
+    if (rec.type.tzz) fclose(fptzz);
+    if (rec.type.txz) fclose(fptxz);
+    if (rec.type.txy) fclose(fptxy);
+    if (rec.type.tyz) fclose(fptyz);
+    if (rec.type.pp) fclose(fppp);
+    if (rec.type.ss) fclose(fpss);
+    if (rec.type.ud) {
+        fclose(fpup);
+        fclose(fpdown);
+        free(rec_up);
+        free(rec_down);
+    }
+
+    return 0;
+}
+
diff --git a/fdelmodc3D/writeSnapTimes3D.c b/fdelmodc3D/writeSnapTimes3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..56c81f5bec79781651efa9a0aaad1bf1e93f86f2
--- /dev/null
+++ b/fdelmodc3D/writeSnapTimes3D.c
@@ -0,0 +1,261 @@
+#define _FILE_OFFSET_BITS 64
+#define _LARGEFILE_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#define ISODD(n) ((n) & 01)
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <math.h>
+#include <string.h>
+#include "par.h"
+#include "segy.h"
+#include "fdelmodc3D.h"
+
+/**
+*  Writes gridded wavefield(s) at a desired time to output file(s) 
+*
+*   AUTHOR:
+*           Jan Thorbecke (janth@xs4all.nl)
+*           The Netherlands 
+**/
+
+
+FILE *fileOpen(char *file, char *ext, int append);
+int traceWrite(segy *hdr, float *data, int n, FILE *fp);
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define NINT(x) ((long)((x)>0.0?(x)+0.5:(x)-0.5))
+
+long writeSnapTimes3D(modPar mod, snaPar sna, bndPar bnd, wavPar wav, long ixsrc, long iysrc, long izsrc, long itime, float *vx, float *vy, float *vz, float *tzz, float *tyy, float *txx, float *txz, float *tyz, float *txy, long verbose)
+{
+	FILE    *fpvx, *fpvy, *fpvz, *fptxx, *fptyy, *fptzz, *fptxz, *fptyz, *fptxy, *fpp, *fppp, *fpss;
+	long append, isnap;
+	static long first=1;
+	long n1, n2, ibndx, ibndy, ibndz, ixs, iys, izs, ize, i, j, l;
+	long ix, iy, iz, ix2, iy2;
+	float *snap, sdx, stime;
+	segy hdr;
+
+	if (sna.nsnap==0) return 0;
+
+    ibndx = mod.ioXx;
+    ibndy = mod.ioXy;
+    ibndz = mod.ioXz;
+	n1    = mod.naz;
+	n2    = mod.nax;
+	sdx   = 1.0/mod.dx;
+
+	if (sna.withbnd) {
+		sna.nz=mod.naz;
+		sna.z1=0;
+		sna.z2=mod.naz-1;
+		sna.skipdz=1;
+
+		sna.ny=mod.nax;
+		sna.y1=0;
+		sna.y2=mod.nay-1;
+		sna.skipdy=1;
+
+		sna.nx=mod.nax;
+		sna.x1=0;
+		sna.x2=mod.nax-1;
+		sna.skipdx=1;
+	}
+
+	/* check if this itime is a desired snapshot time */
+	if ( (((itime-sna.delay) % sna.skipdt)==0) && 
+		  (itime >= sna.delay) &&
+		  (itime <= sna.delay+(sna.nsnap-1)*sna.skipdt) ) {
+
+		isnap = NINT((itime-sna.delay)/sna.skipdt);
+
+        if (mod.grid_dir) stime = (-wav.nt+1+itime+1)*mod.dt;  /* reverse time modeling */
+        else  stime = itime*mod.dt;
+		if (verbose) vmess("Writing snapshot(%li) at time=%.4f", isnap+1, stime);
+	
+		if (first) {
+			append=0;
+			first=0;
+		}
+		else {
+			append=1;
+		}
+
+		if (sna.type.vx)  fpvx  = fileOpen(sna.file_snap, "_svx", (int)append);
+		if (sna.type.vy)  fpvy  = fileOpen(sna.file_snap, "_svy", (int)append);
+		if (sna.type.vz)  fpvz  = fileOpen(sna.file_snap, "_svz", (int)append);
+		if (sna.type.p)   fpp   = fileOpen(sna.file_snap, "_sp", (int)append);
+		if (sna.type.txx) fptxx = fileOpen(sna.file_snap, "_stxx", (int)append);
+		if (sna.type.tyy) fptyy = fileOpen(sna.file_snap, "_styy", (int)append);
+		if (sna.type.tzz) fptzz = fileOpen(sna.file_snap, "_stzz", (int)append);
+		if (sna.type.txz) fptxz = fileOpen(sna.file_snap, "_stxz", (int)append);
+		if (sna.type.tyz) fptyz = fileOpen(sna.file_snap, "_styz", (int)append);
+		if (sna.type.txy) fptxy = fileOpen(sna.file_snap, "_stxy", (int)append);
+		if (sna.type.pp)  fppp  = fileOpen(sna.file_snap, "_spp", (int)append);
+		if (sna.type.ss)  fpss  = fileOpen(sna.file_snap, "_sss", (int)append);
+	
+		memset(&hdr,0,TRCBYTES);
+		hdr.dt     = 1000000*(sna.skipdt*mod.dt);
+		hdr.ungpow  = (sna.delay*mod.dt);
+		hdr.scalco = -1000;
+		hdr.scalel = -1000;
+		hdr.sx     = 1000*(mod.x0+ixsrc*mod.dx);
+		hdr.sy     = 1000*(mod.y0+iysrc*mod.dy);
+		hdr.sdepth = 1000*(mod.z0+izsrc*mod.dz);
+		hdr.fldr   = isnap+1;
+		hdr.trid   = 1;
+		hdr.ns     = sna.nz;
+		hdr.trwf   = sna.nx*sna.nx;
+		hdr.ntr    = (isnap+1)*sna.nx;
+		hdr.f1     = sna.z1*mod.dz+mod.z0;
+		hdr.f2     = sna.x1*mod.dx+mod.x0;
+		hdr.d1     = mod.dz*sna.skipdz;
+		hdr.d2     = mod.dx*sna.skipdx;
+		if (sna.withbnd) {
+        	if ( !ISODD(bnd.top)) hdr.f1 = mod.z0 - bnd.ntap*mod.dz;
+        	if ( !ISODD(bnd.lef)) hdr.f2 = mod.x0 - bnd.ntap*mod.dx;
+        	//if ( !ISODD(bnd.rig)) ;
+        	//if ( !ISODD(bnd.bot)) store=1;
+		}
+
+/***********************************************************************
+* vx velocities have one sample less in x-direction
+* vz velocities have one sample less in z-direction
+* txz stresses have one sample less in z-direction and x-direction
+***********************************************************************/
+
+		snap = (float *)malloc(sna.nz*sizeof(float));
+
+		/* Decimate, with skipdx and skipdz, the number of gridpoints written to file 
+		   and write to file. */
+		for (iys=sna.y1, l=0; iys<=sna.x2; iys+=sna.skipdx, l++) {
+			for (ixs=sna.x1, i=0; ixs<=sna.x2; ixs+=sna.skipdx, i++) {
+				hdr.tracf  = l*sna.nx+i+1;
+				hdr.tracl  = isnap*sna.nx*sna.ny+l*sna.nx+i+1;
+				hdr.gx     = 1000*(mod.x0+ixs*mod.dx);
+				hdr.gy     = 1000*(mod.y0+ixs*mod.dy);
+				ix  = ixs+ibndx;
+				ix2 = ix+1;
+				iy  = iys+ibndy;
+				iy2 = iy+1;
+
+				izs = sna.z1+ibndz;
+				ize = sna.z2+ibndz;
+
+				if (sna.withbnd) {
+					izs = 0;
+					ize = sna.z2;
+					ix  = ixs;
+					ix2 = ix;
+					iy  = iys;
+					iy2 = iy;
+					if (sna.type.vz || sna.type.txz || sna.type.tyz) izs = -1;
+					if ( !ISODD(bnd.lef)) hdr.gx = 1000*(mod.x0 - bnd.ntap*mod.dx);
+					if ( !ISODD(bnd.fro)) hdr.gy = 1000*(mod.y0 - bnd.ntap*mod.dy);
+				}
+
+				if (sna.type.vx) {
+					for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+						snap[j] = vx[iy*n1*n2+ix2*n1+iz];
+					}
+					traceWrite(&hdr, snap, (int)sna.nz, fpvx);
+				}
+				if (sna.type.vy) {
+					for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+						snap[j] = vy[iy2*n1*n2+ix*n1+iz];
+					}
+					traceWrite(&hdr, snap, (int)sna.nz, fpvy);
+				}
+				if (sna.type.vz) {
+					for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+						snap[j] = vz[iy*n1*n2+ix*n1+iz+1];
+					}
+					traceWrite(&hdr, snap, (int)sna.nz, fpvz);
+				}
+				if (sna.type.p) {
+					for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+						snap[j] = tzz[iy*n1*n2+ix*n1+iz];
+					}
+					traceWrite(&hdr, snap, (int)sna.nz, fpp);
+				}
+				if (sna.type.tzz) {
+					for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+						snap[j] = tzz[iy*n1*n2+ix*n1+iz];
+					}
+					traceWrite(&hdr, snap, (int)sna.nz, fptzz);
+				}
+				if (sna.type.tyy) {
+					for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+						snap[j] = tyy[iy*n1*n2+ix*n1+iz];
+					}
+					traceWrite(&hdr, snap, (int)sna.nz, fptyy);
+				}
+				if (sna.type.txx) {
+					for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+						snap[j] = txx[iy*n1*n2+ix*n1+iz];
+					}
+					traceWrite(&hdr, snap, (int)sna.nz, fptxx);
+				}
+				if (sna.type.txz) {
+					for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+						snap[j] = txz[iy*n1*n2+ix2*n1+iz+1];
+					}
+					traceWrite(&hdr, snap, (int)sna.nz, fptxz);
+				}
+				if (sna.type.txy) {
+					for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+						snap[j] = txy[iy2*n1*n2+ix2*n1+iz];
+					}
+					traceWrite(&hdr, snap, (int)sna.nz, fptxy);
+				}
+				if (sna.type.tyz) {
+					for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+						snap[j] = tyz[iy2*n1*n2+ix*n1+iz+1];
+					}
+					traceWrite(&hdr, snap, (int)sna.nz, fptyz);
+				}
+				/* calculate divergence of velocity field */
+				if (sna.type.pp) {
+					for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+						snap[j] =  sdx*((vx[iy*n1*n2+(ix+1)*n1+iz]-vx[iy*n1*n2+ix*n1+iz])+
+										(vy[(iy+1)*n1*n2+ix*n1+iz]-vy[iy*n1*n2+ix*n1+iz])+
+										(vz[iy*n1*n2+ix*n1+iz+1]-vz[iy*n1*n2+ix*n1+iz]));
+					}
+					traceWrite(&hdr, snap, (int)sna.nz, fppp);
+				}
+				/* calculate rotation of velocity field */
+				if (sna.type.ss) {
+					for (iz=izs, j=0; iz<=ize; iz+=sna.skipdz, j++) {
+						snap[j] =  sdx*((vx[iy*n1*n2+ix*n1+iz]-vx[(iy-1)*n1*n2+ix*n1+iz-1])-
+										(vy[iy*n1*n2+ix*n1+iz]-vy[iy*n1*n2+(ix-1)*n1+iz-1])-
+										(vz[iy*n1*n2+ix*n1+iz]-vz[(iy-1)*n1*n2+(ix-1)*n1+iz]));
+					}
+					traceWrite(&hdr, snap, (int)sna.nz, fpss);
+				}
+
+			}
+		}
+
+		if (sna.type.vx) fclose(fpvx);
+		if (sna.type.vy) fclose(fpvy);
+		if (sna.type.vz) fclose(fpvz);
+		if (sna.type.p) fclose(fpp);
+		if (sna.type.txx) fclose(fptxx);
+		if (sna.type.tyy) fclose(fptyy);
+		if (sna.type.tzz) fclose(fptzz);
+		if (sna.type.txz) fclose(fptxz);
+		if (sna.type.tyz) fclose(fptyz);
+		if (sna.type.txy) fclose(fptxy);
+		if (sna.type.pp) fclose(fppp);
+		if (sna.type.ss) fclose(fpss);
+
+		free(snap);
+	}
+
+	return 0;
+}
+
diff --git a/fdelmodc3D/writeSrcRecPos3D.c b/fdelmodc3D/writeSrcRecPos3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..92b20b60344721b1895a36c7b0a85d4b38784312
--- /dev/null
+++ b/fdelmodc3D/writeSrcRecPos3D.c
@@ -0,0 +1,160 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<math.h>
+#include<assert.h>
+#include"par.h"
+#include"fdelmodc3D.h"
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define NINT(x) ((long)((x)>0.0?(x)+0.5:(x)-0.5))
+
+/**
+* Writes the source and receiver positions into a gridded file,
+* which has the same size as the input gridded model files. 
+* Source positions have a value +1 and receivers -1.
+*
+*   AUTHOR:
+*           Jan Thorbecke (janth@xs4all.nl)
+*           The Netherlands 
+**/
+
+long writesufile3D(char *filename, float *data, long n1, long n2, float f1, float f2, float d1, float d2);
+
+long writeSrcRecPos3D(modPar *mod, recPar *rec, srcPar *src, shotPar *shot)
+{
+	FILE *fp;
+	float *dum, sub_x0, sub_y0, sub_z0, dx, dy, dz;
+	long is, nx, ny, nz, is0, ish, ix, iy, iz, ndot, idx, idy, idz;
+	char tmpname[1024];
+
+ 	ndot = 2;
+	nx = mod->nx;
+    ny = mod->ny;
+	nz = mod->nz;
+	dx = mod->dx;
+    dy = mod->dy;
+	dz = mod->dz;
+	sub_x0 = mod->x0;
+	sub_y0 = mod->y0;
+	sub_z0 = mod->z0;
+
+	/* write velocity field with positions of the sources */
+	dum = (float *)calloc(nx*ny*nz, sizeof(float));
+	vmess("Positions: shot=%li src=%li rec=%li", shot->n, src->n, rec->n);
+	/* source positions for random shots */
+	if (src->random) {
+		sprintf(tmpname,"SrcPositions%li.txt",src->n);
+		fp = fopen(tmpname, "w+");
+		for (is=0; is<src->n; is++) {
+            for (idy=0; idy<=ndot; idy++) {
+                for (idx=0; idx<=ndot; idx++) {
+                    for (idz=0; idz<=ndot; idz++) {
+                        dum[(MAX(0,src->y[is]-idy))*nz*nx+(MAX(0,src->x[is]-idx))*nz+MAX(0,src->z[is]-idz)] = 1.0;
+                        dum[(MAX(0,src->y[is]-idy))*nz*nx+(MAX(0,src->x[is]-idx))*nz+MIN(nz-1,src->z[is]+idz)] = 1.0;
+                        dum[(MAX(0,src->y[is]-idy))*nz*nx+(MIN(nx-1,src->x[is]+idx))*nz+MAX(0,src->z[is]-idz)] = 1.0;
+                        dum[(MAX(0,src->y[is]-idy))*nz*nx+(MIN(nx-1,src->x[is]+idx))*nz+MIN(nz-1,src->z[is]+idz)] = 1.0;
+                        dum[(MIN(ny-1,src->y[is]+idy))*nz*nx+(MAX(0,src->x[is]-idx))*nz+MIN(nz-1,src->z[is]+idz)] = 1.0;
+                        dum[(MIN(ny-1,src->y[is]+idy))*nz*nx+(MAX(0,src->x[is]-idx))*nz+MAX(0,src->z[is]-idz)] = 1.0;
+                        dum[(MIN(ny-1,src->y[is]+idy))*nz*nx+(MIN(nx-1,src->x[is]+idx))*nz+MIN(nz-1,src->z[is]+idz)] = 1.0;
+                        dum[(MIN(ny-1,src->y[is]+idy))*nz*nx+(MIN(nx-1,src->x[is]+idx))*nz+MAX(0,src->z[is]-idz)] = 1.0;
+                    }
+                }
+            }
+			fprintf(fp, "%f %f %f\n", src->z[is]*dz+sub_z0, src->y[is]*dy+sub_y0, src->x[is]*dx+sub_x0);
+		}
+		fclose(fp);
+	}
+	/* source positions for single shot sources with plane waves */
+	else if (src->plane) {
+    	is0 = -1*floor((src->n-1)/2);
+		sprintf(tmpname,"SrcPositions%li.txt",shot->n);
+		fp = fopen(tmpname, "w+");
+		for (ish=0; ish<shot->n; ish++) {
+			for (is=0; is<src->n; is++) {
+				ix = shot->x[ish] + 1 + is0 + is;
+				iy = shot->y[ish] + 1 + is0 + is;
+				iz = shot->z[ish] + 1;
+				dum[iy*nx*nz+ix*nz+iz] = 1.0;
+
+                dum[(MAX(0,iy-1))*nx*nz+ix*nz+iz] = 1.0;
+				dum[(MIN(ny-1,iy+1))*nx*nz+ix*nz+iz] = 1.0;
+				dum[iy*nx*nz+(MAX(0,ix-1))*nz+iz] = 1.0;
+				dum[iy*nx*nz+(MIN(nx-1,ix+1))*nz+iz] = 1.0;
+				dum[iy*nx*nz+ix*nz+MAX(0,iz-1)] = 1.0;
+				dum[iy*nx*nz+ix*nz+MIN(nz-1,iz+1)] = 1.0;
+				fprintf(fp, "(%f, %f, %f)\n", ix*dx+sub_x0, iy*dy+sub_y0, iz*dz+sub_z0);
+			}
+		}
+		fclose(fp);
+	}
+	else if (src->multiwav) {
+	/* source positions for single shot sources with multiple wavelets */
+		sprintf(tmpname,"SrcPositions%li.txt",shot->n);
+		fp = fopen(tmpname, "w+");
+		for (ish=0; ish<shot->n; ish++) {
+			for (is=0; is<src->n; is++) {
+				ix = src->x[is];
+                iy = src->y[is];
+				iz = src->z[is];
+				dum[iy*nx*nz+ix*nz+iz] = 1.0;
+
+                dum[(MAX(0,iy-1))*nx*nz+ix*nz+iz] = 1.0;
+				dum[(MIN(ny-1,iy+1))*nx*nz+ix*nz+iz] = 1.0;
+				dum[iy*nx*nz+(MAX(0,ix-1))*nz+iz] = 1.0;
+				dum[iy*nx*nz+(MIN(nx-1,ix+1))*nz+iz] = 1.0;
+				dum[iy*nx*nz+ix*nz+MAX(0,iz-1)] = 1.0;
+				dum[iy*nx*nz+ix*nz+MIN(nz-1,iz+1)] = 1.0;
+				fprintf(fp, "(%f, %f, %f)\n", ix*dx+sub_x0, iy*dy+sub_y0, iz*dz+sub_z0);
+			}
+		}
+		fclose(fp);
+	}
+	else {
+		sprintf(tmpname,"SrcPositions%li.txt",shot->n);
+		fp = fopen(tmpname, "w+");
+		for (is=0; is<shot->n; is++) {
+            for (idy=0; idy<=ndot; idy++) {
+                for (idx=0; idx<=ndot; idx++) {
+                    for (idz=0; idz<=ndot; idz++) {
+                        dum[(MAX(0,shot->y[is]-idy))*nz*nx+(MAX(0,shot->x[is]-idx))*nz+MAX(0,shot->z[is]-idz)] = 1.0;
+                        dum[(MAX(0,shot->y[is]-idy))*nz*nx+(MAX(0,shot->x[is]-idx))*nz+MIN(nz-1,shot->z[is]+idz)] = 1.0;
+                        dum[(MAX(0,shot->y[is]-idy))*nz*nx+(MIN(nx-1,shot->x[is]+idx))*nz+MAX(0,shot->z[is]-idz)] = 1.0;
+                        dum[(MAX(0,shot->y[is]-idy))*nz*nx+(MIN(nx-1,shot->x[is]+idx))*nz+MIN(nz-1,shot->z[is]+idz)] = 1.0;
+                        dum[(MIN(ny-1,shot->y[is]+idy))*nz*nx+(MAX(0,shot->x[is]-idx))*nz+MIN(nz-1,shot->z[is]+idz)] = 1.0;
+                        dum[(MIN(ny-1,shot->y[is]+idy))*nz*nx+(MAX(0,shot->x[is]-idx))*nz+MAX(0,shot->z[is]-idz)] = 1.0;
+                        dum[(MIN(ny-1,shot->y[is]+idy))*nz*nx+(MIN(nx-1,shot->x[is]+idx))*nz+MIN(nz-1,shot->z[is]+idz)] = 1.0;
+                        dum[(MIN(ny-1,shot->y[is]+idy))*nz*nx+(MIN(nx-1,shot->x[is]+idx))*nz+MAX(0,shot->z[is]-idz)] = 1.0;
+                    }
+                }
+            }
+			fprintf(fp, "%f %f %f\n", shot->z[is]*dz+sub_z0, shot->y[is]*dy+sub_y0, shot->x[is]*dx+sub_x0);
+		}
+		fclose(fp);
+	}
+
+	/* receiver positions */
+	sprintf(tmpname,"RcvPositions%li.txt",rec->n);
+	fp = fopen(tmpname, "w+");
+	for (is=0; is<rec->n; is++) {
+		dum[rec->y[is]*nx*nz+rec->x[is]*nz+rec->z[is]] = -1.0;
+		dum[(MAX(0,rec->y[is]-1))*nx*nz+rec->x[is]*nz+rec->z[is]] = -1.0;
+		dum[(MIN(ny-1,rec->y[is]+1))*nx*nz+rec->x[is]*nz+rec->z[is]] = -1.0;
+		dum[rec->y[is]*nx*nz+(MAX(0,rec->x[is]-1))*nz+rec->z[is]] = -1.0;
+		dum[rec->y[is]*nx*nz+(MIN(nx-1,rec->x[is]+1))*nz+rec->z[is]] = -1.0;
+		dum[rec->y[is]*nx*nz+rec->x[is]*nz+MAX(0,rec->z[is]-1)] = -1.0;
+		dum[rec->y[is]*nx*nz+rec->x[is]*nz+MIN(nz-1,rec->z[is]+1)] = -1.0;
+
+		if (rec->int_vx==3) {
+			fprintf(fp, "(%f, %f, %f)\n", rec->xr[is]*dx+sub_x0, rec->yr[is]*dy+sub_y0, rec->zr[is]*dz+sub_z0);
+		}
+		else {
+			fprintf(fp, "(%f, %f, %f)\n", rec->x[is]*dx+sub_x0, rec->y[is]*dy+sub_y0, rec->z[is]*dz+sub_z0);
+		}
+	}
+	fclose(fp);
+	writesufile3D("SrcRecPositions.su", dum, nz, nx*ny, sub_z0, sub_x0, dz, dx);
+	free(dum);
+
+	return 0;
+}
diff --git a/fdelmodc3D/writesufile3D.c b/fdelmodc3D/writesufile3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..6f2af263839aa33dc179bfe6c74763209a94d445
--- /dev/null
+++ b/fdelmodc3D/writesufile3D.c
@@ -0,0 +1,162 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include "par.h"
+#include "fdelmodc3D.h"
+#include "SUsegy.h"
+#include "segy.h"
+
+/**
+*  Writes an 2D array to a SU file
+*
+*   AUTHOR:
+*           Jan Thorbecke (janth@xs4all.nl)
+*           The Netherlands 
+**/
+
+#define TRCBYTES 240
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define ISODD(n) ((n) & 01)
+#define NINT(x) ((long)((x)>0.0?(x)+0.5:(x)-0.5))
+
+long writesufile3D(char *filename, float *data, long n1, long n2, float f1, float f2, float d1, float d2)
+{
+	FILE    *file_out;
+	size_t  nwrite, itrace;
+	long     ns;
+	segy    *hdr;
+
+/* Read in parameters */
+
+
+	
+	if (n1 > USHRT_MAX) {
+		vwarn("Output file %s: number of samples is truncated from %li to USHRT_MAX.", filename, n1);
+	}
+	ns = MIN(n1,USHRT_MAX);
+
+	file_out = fopen( filename, "w+" );
+	assert( file_out );
+
+	hdr = (segy *)calloc(1,TRCBYTES);
+	hdr->ns = ns;
+	hdr->dt = NINT(1000000*(d1));
+	hdr->d1 = d1;
+	hdr->d2 = d2;
+	hdr->f1 = f1;
+	hdr->f2 = f2;
+	hdr->fldr = 1;
+	hdr->trwf = n2;
+
+	for (itrace=0; itrace<n2; itrace++) {
+		hdr->tracl = itrace+1;
+		nwrite = fwrite( hdr, 1, TRCBYTES, file_out );
+		assert (nwrite == TRCBYTES);
+		nwrite = fwrite( &data[itrace*n1], sizeof(float), ns, file_out );
+		assert (nwrite == ns);
+	} 
+	fclose(file_out);
+	free(hdr);
+
+	return 0;
+}
+
+/**
+*  Writes an 2D array to a SU file
+*  special routine for src_nwav array which has a different number of samples for each shot 
+*
+**/
+
+long writesufilesrcnwav3D(char *filename, float **src_nwav, wavPar wav, long n1, long n2, float f1, float f2, float d1, float d2)
+{
+	FILE    *file_out;
+	size_t  nwrite, itrace;
+	float   *trace;
+	long     ns;
+	segy    *hdr;
+
+/* Read in parameters */
+
+	if (n1 > USHRT_MAX) {
+		vwarn("Output file %s: number of samples is truncated from %li to USHRT_MAX.", filename, n1);
+	}
+	ns = MIN(n1,USHRT_MAX);
+
+	file_out = fopen( filename, "w+" );
+	assert( file_out );
+
+	trace = (float *)malloc(n1*sizeof(float));
+	hdr = (segy *)calloc(1,TRCBYTES);
+	hdr->ns = ns;
+	hdr->dt = NINT(1000000*(d1));
+	hdr->d1 = d1;
+	hdr->d2 = d2;
+	hdr->f1 = f1;
+	hdr->f2 = f2;
+	hdr->fldr = 1;
+	hdr->trwf = n2;
+
+	for (itrace=0; itrace<n2; itrace++) {
+		hdr->tracl = itrace+1;
+		nwrite = fwrite( hdr, 1, TRCBYTES, file_out );
+		assert (nwrite == TRCBYTES);
+		memset(trace, 0, n1*sizeof(float));
+		memcpy(trace, &src_nwav[itrace][0], wav.nsamp[itrace]*sizeof(float));
+		nwrite = fwrite( &trace[0], sizeof(float), ns, file_out );
+		assert (nwrite == ns);
+	} 
+	fclose(file_out);
+	free(hdr);
+	free(trace);
+
+	return 0;
+}
+
+/**
+*  Writes an 2D array to a SU file
+*  special routine which used segyhdrs which have ns defined as integer (32 bit)
+*  to handle more than 2^16 samples per trace.
+*
+**/
+
+long writeSUfile3D(char *filename, float *data, long n1, long n2, float f1, float f2, float d1, float d2)
+{
+	FILE    *file_out;
+	size_t  nwrite, itrace;
+	SUsegy  *SUhdr;
+	char    *ptr;
+
+/* Read in parameters */
+
+    ptr = strstr(filename, " ");
+    *ptr = '\0';
+
+	file_out = fopen( filename, "w+" );
+	assert( file_out );
+
+	SUhdr = (SUsegy *)calloc(1,TRCBYTES);
+	SUhdr->ns = n1;
+	SUhdr->dt = NINT(1000000*(d1));
+	SUhdr->d1 = d1;
+	SUhdr->d2 = d2;
+	SUhdr->f1 = f1;
+	SUhdr->f2 = f2;
+	SUhdr->fldr = 1;
+	SUhdr->trwf = n2;
+
+	for (itrace=0; itrace<n2; itrace++) {
+		SUhdr->tracl = itrace+1;
+		nwrite = fwrite( SUhdr, 1, TRCBYTES, file_out );
+		assert (nwrite == TRCBYTES);
+		nwrite = fwrite( &data[itrace*n1], sizeof(float), n1, file_out );
+		assert (nwrite == n1);
+	} 
+	fclose(file_out);
+	free(SUhdr);
+
+	return 0;
+}
+
diff --git a/marchenko/Makefile b/marchenko/Makefile
index 260e2c02845925df6222452cf89035f50faf8704..20354ed6b6a81e3b9a5632e318bb970ef484693b 100644
--- a/marchenko/Makefile
+++ b/marchenko/Makefile
@@ -7,7 +7,7 @@ include ../Make_include
 
 #ALL: fmute marchenko marchenko2
 
-ALL: fmute marchenko 
+ALL: fmute marchenko marchenko_primaries
 
 SRCJ	= fmute.c \
 		getFileInfo.c  \
@@ -35,6 +35,20 @@ SRCH	= marchenko.c \
 		docpkge.c \
 		getpars.c
 
+SRCP	= marchenko_primaries.c \
+		getFileInfo.c  \
+		readData.c \
+		readShotData.c \
+		readTinvData.c \
+		writeData.c \
+		writeDataIter.c \
+		wallclock_time.c \
+		name_ext.c  \
+		verbosepkg.c  \
+		atopkge.c \
+		docpkge.c \
+		getpars.c
+
 OBJJ	= $(SRCJ:%.c=%.o)
 
 fmute:	$(OBJJ) 
@@ -45,22 +59,28 @@ OBJH	= $(SRCH:%.c=%.o)
 marchenko:	$(OBJH) 
 	$(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o marchenko $(OBJH) $(LIBS)
 
+OBJP	= $(SRCP:%.c=%.o)
+
+marchenko_primaries:	$(OBJP) 
+	$(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o marchenko_primaries $(OBJP) $(LIBS)
+
 OBJH2	= $(SRCH2:%.c=%.o)
 
 marchenko2:	$(OBJH2) 
 	$(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o marchenko2 $(OBJH2) $(LIBS)
 
-install: fmute marchenko 
+install: fmute marchenko marchenko_primaries
 	cp fmute $B
 	cp marchenko $B
+	cp marchenko_primaries $B
 
 #	cp marchenko2 $B
 
 clean:
-		rm -f core fmute $(OBJJ) marchenko $(OBJH) marchenko2 $(OBJH2)
+		rm -f core fmute $(OBJJ) marchenko $(OBJH) marchenko2 $(OBJH2) marchenko_primaries $(OBJP)
 
 realclean: clean
-		rm -f $B/fmute $B/marchenko $B/marchenko2
+		rm -f $B/fmute $B/marchenko $B/marchenko2 $B/marchenko_primaries
 
 
 
diff --git a/marchenko/marchenko_primaries.c b/marchenko/marchenko_primaries.c
new file mode 100644
index 0000000000000000000000000000000000000000..392f921a4f8e5246fe82d0d52032c738e16cf0f1
--- /dev/null
+++ b/marchenko/marchenko_primaries.c
@@ -0,0 +1,998 @@
+/*
+ * Copyright (c) 2017 by the Society of Exploration Geophysicists.
+ * For more information, go to http://software.seg.org/2017/00XX .
+ * You must read and accept usage terms at:
+ * http://software.seg.org/disclaimer.txt before use.
+ */
+
+#include "par.h"
+#include "segy.h"
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+#include <genfft.h>
+
+int omp_get_max_threads(void);
+int omp_get_num_threads(void);
+void omp_set_num_threads(int num_threads);
+
+#ifndef MAX
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#endif
+#ifndef MIN
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#endif
+#define NINT(x) ((int)((x)>0.0?(x)+0.5:(x)-0.5))
+int compareInt(const void *a, const void *b) 
+{ return (*(int *)a-*(int *)b); }
+
+
+#ifndef COMPLEX
+typedef struct _complexStruct { /* complex number */
+    float r,i;
+} complex;
+#endif/* complex */
+
+int readShotData(char *filename, float *xrcv, float *xsrc, float *zsrc, int *xnx, complex *cdata, int nw, int nw_low, int nshots,
+int nx, int nxs, float fxsb, float dxs, int ntfft, int mode, float scale, float tsq, float Q, float f0, int reci, int *nshots_r, int *isxcount, int *reci_xsrc,  int *reci_xrcv, float *ixmask, int verbose);
+int readTinvData(char *filename, float *xrcv, float *xsrc, float *zsrc, int *xnx, int Nfoc, int nx, int ntfft, int mode, int *maxval, float *tinv, int hw, int verbose);
+int writeDataIter(char *file_iter, float *data, segy *hdrs, int n1, int n2, float d2, float f2, int n2out, int Nfoc, float *xsyn,
+float *zsyn, int *ixpos, int npos, int iter);
+
+void name_ext(char *filename, char *extension);
+
+int getFileInfo(char *filename, int *n1, int *n2, int *ngath, float *d1, float *d2, float *f1, float *f2, float *xmin, float *xmax, float *sclsxgx, int *ntraces);
+int readData(FILE *fp, float *data, segy *hdrs, int n1);
+int writeData(FILE *fp, float *data, segy *hdrs, int n1, int n2);
+int disp_fileinfo(char *file, int n1, int n2, float f1, float f2, float d1, float d2, segy *hdrs);
+double wallclock_time(void);
+
+void synthesis(complex *Refl, complex *Fop, float *Top, float *iRN, int nx, int nt, int nxs, int nts, float dt, float *xsyn, int
+Nfoc, float *xrcv, float *xsrc, int *xnx, float fxse, float fxsb, float dxs, float dxsrc, float dx, int ntfft, int
+nw, int nw_low, int nw_high,  int mode, int reci, int nshots, int *ixpos, int npos, double *tfft, int *isxcount, int
+*reci_xsrc,  int *reci_xrcv, float *ixmask, int verbose);
+
+void synthesisPosistions(int nx, int nt, int nxs, int nts, float dt, float *xsyn, int Nfoc, float *xrcv, float *xsrc, int *xnx,
+float fxse, float fxsb, float dxs, float dxsrc, float dx, int nshots, int *ixpos, int *npos, int *isxcount, int countmin, int reci, int verbose);
+
+int linearsearch(int *array, size_t N, int value);
+
+/*********************** self documentation **********************/
+char *sdoc[] = {
+" ",
+" MARCHENKO_primaries - Iterative primary reflections retrieval",
+" ",
+" marchenko_primaries file_tinv= file_shot= [optional parameters]",
+" ",
+" Required parameters: ",
+" ",
+"   file_shot= ............... Reflection response: R",
+" ",
+" Optional parameters: ",
+" ",
+" INTEGRATION ",
+"   ishot=nshots/2 ........... shot position(s) to remove internal multiples ",
+"   file_src=spike ........... convolve ishot(s) with source wavelet",
+"   file_tinv= ............... use file_tinv to remove internal multiples",
+" COMPUTATION",
+"   cgemm=0 .................. 1: use BLAS's cgemm to compute R*ishot",
+"   tap=0 .................... lateral taper focusing(1), shot(2) or both(3)",
+"   ntap=0 ................... number of taper points at boundaries",
+"   fmin=0 ................... minimum frequency in the Fourier transform",
+"   fmax=70 .................. maximum frequency in the Fourier transform",
+"   file_src= ................ optional source wavelet to convolve selected ishot's",
+" MARCHENKO ITERATIONS ",
+"   niter=10 ................. number of iterations",
+"   istart=20 ................ start sample of iterations for primaries",
+"   iend=nt .................. end sample of iterations for primaries",
+" MUTE-WINDOW ",
+"   shift=20 ................. number of points to account for wavelet (epsilon in papers)",
+"   smooth=5 ................. number of points to smooth mute with cosine window",
+" REFLECTION RESPONSE CORRECTION ",
+"   tsq=0.0 .................. scale factor n for t^n for true amplitude recovery",
+"   Q=0.0 .......,............ Q correction factor",
+"   f0=0.0 ................... ... for Q correction factor",
+"   scale=2 .................. scale factor of R for summation of Ni with G_d",
+"   pad=0 .................... amount of samples to pad the reflection series",
+"   reci=0 ................... 1; add receivers as shots 2; only use receivers as shot positions",
+"   countmin=0 ............... 0.3*nxrcv; minumum number of reciprocal traces for a contribution",
+" OUTPUT DEFINITION ",
+"   file_rr= ................. output file with primary only shot record",
+"   T=0 ...................... :1 compute transmission-losses compensated primaries ",
+"   verbose=0 ................ silent option; >0 displays info",
+" ",
+" ",
+" author  : Lele Zhang & Jan Thorbecke : 2019 ",
+" ",
+NULL};
+/**************** end self doc ***********************************/
+
+int main (int argc, char **argv)
+{
+    FILE    *fp_out, *fp_rr, *fp_w;
+    size_t nread;
+    int     i, j, l, ret, nshots, Nfoc, nt, nx, nts, nxs, ngath;
+    int     size, n1, n2, ntap, tap, di, ntraces, pad;
+    int     nw, nw_low, nw_high, nfreq, *xnx, *xnxsyn;
+    int     reci, countmin, mode, n2out, verbose, ntfft;
+    int     iter, niter, tracf, *muteW, *tsynW;
+	int		hw, ii, ishot, istart, iend, k, m;
+    int     smooth, shift, *ixpos, npos, ix, cgemm;
+    int     nshots_r, *isxcount, *reci_xsrc, *reci_xrcv;
+    float   fmin, fmax, *tapersh, *tapersy, fxf, dxf, *xsrc, *xrcv, *zsyn, *zsrc, *xrcvsyn;
+    double  t0, t1, t2, t3, tsyn, tread, tfft, tcopy, tii, energyNi, energyN0;
+    float   d1, d2, f1, f2, fxsb, fxse, ft, fx, *xsyn, dxsrc;
+    float   *G_d, *DD, *RR, dt, dx, dxs, scl, mem, T;
+    float   *f1min, *iRN, *Ni, *rtrace, *tmpdata;
+    float   xmin, xmax, scale, tsq, Q, f0;
+    float   *ixmask;
+    float   grad2rad, p, src_angle, src_velo;
+    complex *Refl, *Fop, *ctrace, *cwave;
+    char    *file_tinv, *file_shot, *file_rr, *file_src;
+    segy    *hdrs_out, hdr;
+
+    initargs(argc, argv);
+    requestdoc(1);
+
+    tsyn = tread = tfft = tcopy = tii = 0.0;
+    t0   = wallclock_time();
+
+    if (!getparstring("file_shot", &file_shot)) file_shot = NULL;
+    if (!getparstring("file_tinv", &file_tinv)) file_tinv = NULL;
+    if(!getparstring("file_src", &file_src)) file_src = NULL;
+    if (!getparstring("file_rr", &file_rr)) file_rr = NULL;
+    if (!getparint("verbose", &verbose)) verbose = 0;
+    if (file_tinv == NULL && file_shot == NULL) 
+        verr("file_tinv and file_shot cannot be both input pipe");
+    if (!getparstring("file_rr", &file_rr)) {
+        if (verbose) vwarn("parameter file_rr not found, assume pipe");
+        file_rr = NULL;
+    }
+    if (!getparfloat("fmin", &fmin)) fmin = 0.0;
+    if (!getparfloat("fmax", &fmax)) fmax = 70.0;
+    if (!getparint("reci", &reci)) reci = 0;
+    if (!getparfloat("scale", &scale)) scale = 2.0;
+    if (!getparfloat("tsq", &tsq)) tsq = 0.0;
+    if (!getparfloat("Q", &Q)) Q = 0.0;
+    if (!getparfloat("f0", &f0)) f0 = 0.0;
+    if (!getparint("tap", &tap)) tap = 0;
+    if (!getparint("ntap", &ntap)) ntap = 0;
+    if (!getparint("pad", &pad)) pad = 0;
+    if (!getparfloat("T", &T)) T = 1.0;
+	if (T>=0) T=1.0;
+	else T=-1.0;
+
+    if(!getparint("niter", &niter)) niter = 10;
+    if(!getparint("hw", &hw)) hw = 15;
+    if(!getparint("smooth", &smooth)) smooth = 5;
+    if(!getparint("shift", &shift)) shift=20;
+    if(!getparint("cgemm", &cgemm)) cgemm=0;
+
+    if (reci && ntap) vwarn("tapering influences the reciprocal result");
+
+/*================ Reading info about shot and initial operator sizes ================*/
+
+    ngath = 0; /* setting ngath=0 scans all traces; nx contains maximum traces/gather */
+    ret = getFileInfo(file_shot, &nt, &nx, &ngath, &d1, &dx, &ft, &fx, &xmin, &xmax, &scl, &ntraces);
+    nshots = ngath;
+//    assert (nxs >= nshots);
+
+    if (!getparfloat("dt", &dt)) dt = d1;
+    if(!getparint("istart", &istart)) istart=20;
+    if(!getparint("iend", &iend)) iend=nt;
+
+	if (file_tinv != NULL) {
+    	ngath = 0; /* setting ngath=0 scans all traces; n2 contains maximum traces/gather */
+    	ret = getFileInfo(file_tinv, &n1, &n2, &ngath, &d1, &d2, &f1, &f2, &xmin, &xmax, &scl, &ntraces);
+        Nfoc = ngath;
+        nxs  = n2;
+        nts  = n1;
+        dxs  = d2;
+        fxsb = f2;
+	}
+	else {
+		/* 'G_d' is one of the shot records */
+    	if(!getparint("ishot", &ishot)) ishot=(nshots)/2;
+    	Nfoc = 1;
+    	nxs  = nx; 
+    	nts  = nt;
+    	dxs  = dx; 
+    	fxsb = fx; 
+	}
+
+    ntfft = optncr(MAX(nt+pad, nts+pad)); 
+    nfreq = ntfft/2+1;
+    nw_low = (int)MIN((fmin*ntfft*dt), nfreq-1);
+    nw_low = MAX(nw_low, 1);
+    nw_high = MIN((int)(fmax*ntfft*dt), nfreq-1);
+    nw  = nw_high - nw_low + 1;
+    if (!getparint("countmin", &countmin)) countmin = 0.3*nx;
+
+/* ========================= Opening wavelet file ====================== */
+
+    cwave = (complex *)calloc(ntfft,sizeof(complex));
+    if (file_src != NULL){
+        if (verbose) vmess("Reading wavelet from file %s.", file_src);
+
+        fp_w = fopen(file_src, "r");
+        if (fp_w == NULL) verr("error on opening input file_src=%s", file_src);
+    	nread = fread(&hdr, 1, TRCBYTES, fp_w);
+        assert (nread == TRCBYTES);
+        tmpdata = (float *)malloc(hdr.ns*sizeof(float));
+        nread = fread(tmpdata, sizeof(float), hdr.ns, fp_w);
+        assert (nread == hdr.ns);
+        fclose(fp_w);
+
+        dt = d1; // To Do check dt wavelet is the same as reflection data
+    	rtrace = (float *)calloc(ntfft,sizeof(float));
+
+		/* To Do add samples in middle */
+        if (hdr.ns <= ntfft) {
+            for (i = 0; i < hdr.ns; i++) rtrace[i] = tmpdata[i];
+            for (i = hdr.ns; i < ntfft; i++) rtrace[i] = 0.0;
+        }
+        else {
+            vwarn("file_src has more samples than reflection data: truncated to ntfft = %d", ntfft);
+            for (i = 0; i < ntfft; i++) rtrace[i] = tmpdata[i];
+        }
+        rc1fft(rtrace, cwave, ntfft, -1);
+        free(tmpdata);
+        free(rtrace);
+    }
+	else {
+        for (i = 0; i < nfreq; i++) cwave[i].r = 1.0;
+	}
+    
+/*================ Allocating all data arrays ================*/
+
+    Fop     = (complex *)calloc(nxs*nw*Nfoc,sizeof(complex));
+    f1min   = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    iRN     = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    Ni      = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    G_d     = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    DD      = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    RR      = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    muteW   = (int *)calloc(Nfoc*nxs,sizeof(int));
+    tsynW   = (int *)malloc(Nfoc*nxs*sizeof(int)); // time-shift for Giovanni's plane-wave on non-zero times
+    tapersy = (float *)malloc(nxs*sizeof(float));
+    xrcvsyn = (float *)calloc(Nfoc*nxs,sizeof(float)); // x-rcv postions of focal points
+    xsyn    = (float *)malloc(Nfoc*sizeof(float)); // x-src position of focal points
+    zsyn    = (float *)malloc(Nfoc*sizeof(float)); // z-src position of focal points
+    xnxsyn  = (int *)calloc(Nfoc,sizeof(int)); // number of traces per focal point
+    ixpos   = (int *)calloc(nxs,sizeof(int)); // x-position of source of shot in G_d domain (nxs with dxs)
+
+    Refl    = (complex *)malloc(nw*nx*nshots*sizeof(complex));
+    tapersh = (float *)malloc(nx*sizeof(float));
+    xrcv    = (float *)calloc(nshots*nx,sizeof(float)); // x-rcv postions of shots
+    xsrc    = (float *)calloc(nshots,sizeof(float)); //x-src position of shots
+    zsrc    = (float *)calloc(nshots,sizeof(float)); // z-src position of shots
+    xnx     = (int *)calloc(nshots,sizeof(int)); // number of traces per shot
+
+	if (reci!=0) {
+        reci_xsrc = (int *)malloc((nxs*nxs)*sizeof(int));
+        reci_xrcv = (int *)malloc((nxs*nxs)*sizeof(int));
+        isxcount  = (int *)calloc(nxs,sizeof(int));
+        ixmask  = (float *)calloc(nxs,sizeof(float));
+    }
+
+/*================ Reading shot records ================*/
+
+    mode=1;
+    readShotData(file_shot, xrcv, xsrc, zsrc, xnx, Refl, nw, nw_low, nshots, nx, nxs, fxsb, dxs, ntfft, 
+         mode, scale, tsq, Q, f0, reci, &nshots_r, isxcount, reci_xsrc, reci_xrcv, ixmask, verbose);
+
+    tapersh = (float *)malloc(nx*sizeof(float));
+    if (tap == 2 || tap == 3) {
+        for (j = 0; j < ntap; j++)
+            tapersh[j] = (cos(PI*(j-ntap)/ntap)+1)/2.0;
+        for (j = ntap; j < nx-ntap; j++)
+            tapersh[j] = 1.0;
+        for (j = nx-ntap; j < nx; j++)
+            tapersh[j] =(cos(PI*(j-(nx-ntap))/ntap)+1)/2.0;
+    }
+    else {
+        for (j = 0; j < nx; j++) tapersh[j] = 1.0;
+    }
+    if (tap == 2 || tap == 3) {
+        if (verbose) vmess("Taper for shots applied ntap=%d", ntap);
+        for (l = 0; l < nshots; l++) {
+            for (j = 1; j < nw; j++) {
+                for (i = 0; i < nx; i++) {
+                    Refl[l*nx*nw+j*nx+i].r *= tapersh[i];
+                    Refl[l*nx*nw+j*nx+i].i *= tapersh[i];
+                }   
+            }   
+        }
+    }
+    free(tapersh);
+
+    /* check consistency of header values */
+    fxf = xsrc[0];
+    if (nx > 1) dxf = (xrcv[nx-1] - xrcv[0])/(float)(nx-1);
+    else dxf = d2;
+    if (NINT(dx*1e3) != NINT(fabs(dxf)*1e3)) {
+        vmess("dx in hdr.d1 (%.3f) and hdr.gx (%.3f) not equal",dx, dxf);
+        if (dxf != 0) dx = fabs(dxf);
+        else verr("gx hdrs not set");
+        vmess("dx used => %f", dx);
+    }
+    
+    dxsrc = (float)xsrc[1] - xsrc[0];
+    if (dxsrc == 0) {
+        vwarn("sx hdrs are not filled in!!");
+        dxsrc = dx;
+    }
+
+/*================ Define focusing operator(s) ================*/
+/* G_d = -R(ishot,-t)*/
+
+/* select ishot from R */
+//        for (k=0; k<nFoc; k++) {
+
+/*================ Read and define mute window based on focusing operator(s) ================*/
+/* G_d = p_0^+ = G_d (-t) ~ Tinv */
+
+	if (file_tinv != NULL) {
+    	mode=-1; /* apply complex conjugate to read in data */
+    	readTinvData(file_tinv, xrcvsyn, xsyn, zsyn, xnxsyn, Nfoc, nxs, ntfft,
+         	mode, muteW, G_d, hw, verbose);
+    	/* reading data added zero's to the number of time samples to be the same as ntfft */
+    	nts   = ntfft;
+        /* copy G_d to DD */
+        for (l = 0; l < Nfoc; l++) {
+            for (i = 0; i < nxs; i++) { 
+				for (j = 0; j < nts; j++) { 
+					DD[l*nxs*nts+i*nts+j]  = G_d[l*nxs*nts+i*nts+j]; 
+					G_d[l*nxs*nts+i*nts+j] = 0.0; 
+				}
+			} 
+		}
+        /* check consistency of header values */
+        if (xrcvsyn[0] != 0 || xrcvsyn[1] != 0 ) fxsb = xrcvsyn[0];
+        fxse = fxsb + (float)(nxs-1)*dxs;
+        dxf = (xrcvsyn[nxs-1] - xrcvsyn[0])/(float)(nxs-1);
+        if (NINT(dxs*1e3) != NINT(fabs(dxf)*1e3)) {
+            vmess("dx in hdr.d1 (%.3f) and hdr.gx (%.3f) not equal",d2, dxf);
+            if (dxf != 0) dxs = fabs(dxf);
+            vmess("dx in operator => %f", dxs);
+        }
+	}
+	else { /* use ishot from Refl and optionally convolve with wavelet */
+        /* reading data added zero's to the number of time samples to be the same as ntfft */
+        nts   = ntfft;
+
+        l = 0;
+        k = ishot;    
+        scl   = 1.0/((float)ntfft);
+        rtrace = (float *)calloc(ntfft,sizeof(float));
+        ctrace = (complex *)calloc(ntfft,sizeof(complex));
+        memset(&ctrace[0].r,0,nfreq*2*sizeof(float));
+        for (i = 0; i < xnx[k]; i++) {
+            for (j = nw_low, m = 0; j <= nw_high; j++, m++) {
+                ctrace[j].r =  Refl[k*nw*nx+m*nx+i].r*cwave[j].r + Refl[k*nw*nx+m*nx+i].i*cwave[j].i;
+                ctrace[j].i = -Refl[k*nw*nx+m*nx+i].i*cwave[j].r + Refl[k*nw*nx+m*nx+i].r*cwave[j].i;;
+            }
+    	    /* transfrom result back to time domain */
+            cr1fft(ctrace, rtrace, ntfft, 1);
+            for (j = 0; j < nts; j++) {
+                DD[l*nxs*nts+i*nts+j] = -1.0*scl*rtrace[j];
+            }
+        }
+	    free(ctrace);
+	    free(rtrace);
+
+	    fxse = fxsb = xrcv[0];
+        /* check consistency of header values */
+        for (k=0; k<nshots; k++) {
+            for (i = 0; i < nx; i++) {
+                fxsb = MIN(xrcv[k*nx+i],fxsb);
+                fxse = MAX(xrcv[k*nx+i],fxse);
+            }
+        }
+        fxse = fxsb + (float)(nxs-1)*dxs;
+        dxf = dx;
+	}
+
+	/* just fill with zero's */
+	for (i=0; i<nxs*Nfoc; i++) {
+		tsynW[i] = 0;
+	}
+
+    /* define tapers to taper edges of acquisition */
+    if (tap == 1 || tap == 3) {
+        for (j = 0; j < ntap; j++)
+            tapersy[j] = (cos(PI*(j-ntap)/ntap)+1)/2.0;
+        for (j = ntap; j < nxs-ntap; j++)
+            tapersy[j] = 1.0;
+        for (j = nxs-ntap; j < nxs; j++)
+            tapersy[j] =(cos(PI*(j-(nxs-ntap))/ntap)+1)/2.0;
+    }
+    else {
+        for (j = 0; j < nxs; j++) tapersy[j] = 1.0;
+    }
+    if (tap == 1 || tap == 3) {
+        if (verbose) vmess("Taper for operator applied ntap=%d", ntap);
+        for (l = 0; l < Nfoc; l++) {
+            for (i = 0; i < nxs; i++) {
+                for (j = 0; j < nts; j++) {
+                    DD[l*nxs*nts+i*nts+j] *= tapersy[i];
+                }   
+            }   
+        }   
+    }
+
+/*================ Check the size of the files ================*/
+
+    if (NINT(dxsrc/dx)*dx != NINT(dxsrc)) {
+        vwarn("source (%.2f) and receiver step (%.2f) don't match",dxsrc,dx);
+        if (reci == 2) vwarn("step used from operator (%.2f) ",dxs);
+    }
+    di = NINT(dxf/dxs);
+    if ((NINT(di*dxs) != NINT(dxf)) && verbose) 
+        vwarn("dx in receiver (%.2f) and operator (%.2f) don't match",dx,dxs);
+    if (nt != nts) 
+        vmess("Time samples in shot (%d) and focusing operator (%d) are not equal",nt, nts);
+    if (verbose) {
+        vmess("Number of focusing operators   = %d", Nfoc);
+        vmess("Number of receivers in focusop = %d", nxs);
+        vmess("number of shots                = %d", nshots);
+        vmess("number of receiver/shot        = %d", nx);
+        vmess("first model position           = %.2f", fxsb);
+        vmess("last model position            = %.2f", fxse);
+        vmess("first source position fxf      = %.2f", fxf);
+        vmess("source distance dxsrc          = %.2f", dxsrc);
+        vmess("last source position           = %.2f", fxf+(nshots-1)*dxsrc);
+        vmess("receiver distance     dxf      = %.2f", dxf);
+        vmess("direction of increasing traces = %d", di);
+        vmess("number of time samples (nt,nts) = %d (%d,%d)", ntfft, nt, nts);
+        vmess("time sampling                  = %e ", dt);
+        if (file_rr != NULL)    vmess("RR output file                 = %s ", file_rr);
+    }
+
+/*================ initializations ================*/
+
+    if (reci) n2out = nxs;
+    else n2out = nshots;
+    mem = Nfoc*n2out*ntfft*sizeof(float)/1048576.0;
+    if (verbose) {
+        vmess("number of output traces        = %d", n2out);
+        vmess("number of output samples       = %d", ntfft);
+        vmess("Size of output data/file       = %.1f MB", mem);
+    }
+
+
+    /* dry-run of synthesis to get all x-positions calcalated by the integration */
+    synthesisPosistions(nx, nt, nxs, nts, dt, xsyn, Nfoc, xrcv, xsrc, xnx, fxse, fxsb, 
+        dxs, dxsrc, dx, nshots, ixpos, &npos, isxcount, countmin, reci, verbose);
+    if (verbose) {
+        vmess("synthesisPosistions: nshots=%d npos=%d", nshots, npos);
+    }
+
+/*================ set variables for output data ================*/
+
+    n1 = nts; n2 = n2out;
+    f1 = ft; f2 = fxsb+dxs*ixpos[0];
+    d1 = dt;
+    if (reci == 0) d2 = dxsrc; // distance between sources in R
+    else if (reci == 1) d2 = dxs; // distance between traces in G_d 
+    else if (reci == 2) d2 = dx; // distance between receivers in R
+
+    hdrs_out = (segy *) calloc(n2,sizeof(segy));
+    if (hdrs_out == NULL) verr("allocation for hdrs_out");
+    size  = nxs*nts;
+
+    for (i = 0; i < n2; i++) {
+        hdrs_out[i].ns     = n1;
+        hdrs_out[i].trid   = 1;
+        hdrs_out[i].dt     = dt*1000000;
+        hdrs_out[i].f1     = f1;
+        hdrs_out[i].f2     = f2;
+        hdrs_out[i].d1     = d1;
+        hdrs_out[i].d2     = d2;
+        hdrs_out[i].trwf   = n2out;
+        hdrs_out[i].scalco = -1000;
+        hdrs_out[i].gx = NINT(1000*(f2+i*d2));
+        hdrs_out[i].scalel = -1000;
+        hdrs_out[i].tracl = i+1;
+    }
+    t1    = wallclock_time();
+    tread = t1-t0;
+
+
+/*================ start loop over number of time-samples ================*/
+
+    for (ii=istart; ii<iend; ii++) {
+
+/*================ initialization ================*/
+
+		/* To Do copy to G_d is not needed */
+
+        for (l = 0; l < Nfoc; l++) {
+            for (i = 0; i < nxs; i++) {
+                for (j = 0; j < nts; j++) {
+                    G_d[l*nxs*nts+i*nts+j] = DD[l*nxs*nts+i*nts+j];
+                }
+                for (j = 0; j < nts-ii+shift; j++) { 
+                    G_d[l*nxs*nts+i*nts+j] = 0.0;
+                }
+            }
+            for (i = 0; i < npos; i++) {
+                    j = 0;
+                    ix = ixpos[i];
+                    f1min[l*nxs*nts+i*nts+j] = -DD[l*nxs*nts+ix*nts+j];
+                    for (j = 1; j < nts; j++) {
+                       f1min[l*nxs*nts+i*nts+j] = -DD[l*nxs*nts+ix*nts+nts-j];
+                  }
+            }
+        }
+    	memcpy(Ni, G_d, Nfoc*nxs*ntfft*sizeof(float));
+
+/*================ start Marchenko iterations ================*/
+
+        for (iter=0; iter<niter; iter++) {
+
+            t2    = wallclock_time();
+    
+/*================ construction of Ni(-t) = - \int R(x,t) Ni(t)  ================*/
+
+            synthesis(Refl, Fop, Ni, iRN, nx, nt, nxs, nts, dt, xsyn, Nfoc, 
+                xrcv, xsrc, xnx, fxse, fxsb, dxs, dxsrc, dx, ntfft, nw, nw_low, nw_high, mode,
+                reci, nshots, ixpos, npos, &tfft, isxcount, reci_xsrc, reci_xrcv, ixmask, verbose);
+
+            t3 = wallclock_time();
+            tsyn +=  t3 - t2;
+
+            for (l = 0; l < Nfoc; l++) {
+                for (i = 0; i < npos; i++) {
+                    j = 0;
+                    ix = ixpos[i]; 
+                    Ni[l*nxs*nts+i*nts+j]    = -iRN[l*nxs*nts+ix*nts+j];
+                    for (j = 1; j < nts; j++) {
+                        Ni[l*nxs*nts+i*nts+j]    = -iRN[l*nxs*nts+ix*nts+nts-j];
+                    }
+                }
+            }
+
+            /* primaries only scheme */
+            if (iter % 2 == 0) { /* even iterations: => f_1^-(t) */
+            	/* apply muting for the acausal part */
+                for (l = 0; l < Nfoc; l++) {
+                    for (i = 0; i < npos; i++) {
+                        for (j = ii-shift; j < nts; j++) {
+                            Ni[l*nxs*nts+i*nts+j] = 0.0;
+                        }
+                        for (j = 0; j < shift; j++) {
+                            Ni[l*nxs*nts+i*nts+j] = 0.0;
+                        }
+                    }
+                }
+            }
+            else {/* odd iterations: => f_1^+(t)  */
+                for (l = 0; l < Nfoc; l++) {
+                    for (i = 0; i < npos; i++) {
+                        j = 0;
+                        f1min[l*nxs*nts+i*nts+j] -= Ni[l*nxs*nts+i*nts+j];
+                        for (j = 1; j < nts; j++) {
+                            f1min[l*nxs*nts+i*nts+j] -= Ni[l*nxs*nts+i*nts+nts-j];
+                        }
+                    }
+                }
+                for (l = 0; l < Nfoc; l++) {
+                    for (i = 0; i < npos; i++) {
+                        for (j = nts-shift; j < nts; j++) {
+                            Ni[l*nxs*nts+i*nts+j] = 0.0;
+                        }
+                        for (j = 0; j < nts-ii+shift; j++) {
+                            Ni[l*nxs*nts+i*nts+j] = 0.0;
+                        }
+                    }
+                }
+            } /* end else (iter) branch */
+
+            t2 = wallclock_time();
+            tcopy +=  t2 - t3;
+    
+            if (verbose>2) vmess("*** Iteration %d finished ***", iter);
+    
+        } /* end of iterations */
+
+        for (l = 0; l < Nfoc; l++) {
+            for (i = 0; i < nxs; i++) {
+                 RR[l*nxs*nts+i*nts+ii] = f1min[l*nxs*nts+i*nts+ii];
+            }
+        }
+
+		/* To Do optional write intermediate RR results to file */
+
+        if (verbose) {
+            t3=wallclock_time();
+            tii=(t3-t1)*((float)(iend-istart)/(ii-istart+1.0))-(t3-t1);
+            vmess("Remaining compute time at time-sample %d = %.2f s.",ii, tii);
+        }
+
+	} /* end of time iterations ii */
+
+    free(Ni);
+    free(G_d);
+
+    free(muteW);
+    free(tsynW);
+
+    t2 = wallclock_time();
+    if (verbose) {
+        vmess("Total CPU-time marchenko = %.3f", t2-t0);
+        vmess("with CPU-time synthesis  = %.3f", tsyn);
+        vmess("with CPU-time copy array = %.3f", tcopy);
+        vmess("     CPU-time fft data   = %.3f", tfft);
+        vmess("and CPU-time read data   = %.3f", tread);
+    }
+
+/*================ write output files ================*/
+
+
+    fp_rr = fopen(file_rr, "w+");
+    if (fp_rr==NULL) verr("error on creating output file %s", file_rr);
+
+    tracf = 1;
+    for (l = 0; l < Nfoc; l++) {
+        if (reci) f2 = fxsb;
+        else f2 = fxf;
+
+        for (i = 0; i < n2; i++) {
+            hdrs_out[i].fldr   = l+1;
+            hdrs_out[i].sx     = NINT(xsyn[l]*1000);
+            hdrs_out[i].offset = (long)NINT((f2+i*d2) - xsyn[l]);
+            hdrs_out[i].tracf  = tracf++;
+            hdrs_out[i].selev  = NINT(zsyn[l]*1000);
+            hdrs_out[i].sdepth = NINT(-zsyn[l]*1000);
+            hdrs_out[i].f1     = f1;
+        }
+
+        ret = writeData(fp_rr, (float *)&RR[l*size], hdrs_out, n1, n2);
+        if (ret < 0 ) verr("error on writing output file.");
+    }
+    ret = fclose(fp_rr);
+    if (ret < 0) verr("err %d on closing output file",ret);
+
+    if (verbose) {
+        t1 = wallclock_time();
+        vmess("and CPU-time write data  = %.3f", t1-t2);
+    }
+
+/*================ free memory ================*/
+
+    free(hdrs_out);
+    free(tapersy);
+
+    exit(0);
+}
+
+
+/*================ Convolution and Integration ================*/
+
+void synthesis(complex *Refl, complex *Fop, float *Top, float *iRN, int nx, int nt, int nxs, int nts, float dt, float *xsyn, int
+Nfoc, float *xrcv, float *xsrc, int *xnx, float fxse, float fxsb, float dxs, float dxsrc, float dx, int ntfft, int
+nw, int nw_low, int nw_high,  int mode, int reci, int nshots, int *ixpos, int npos, double *tfft, int *isxcount, int
+*reci_xsrc,  int *reci_xrcv, float *ixmask, int verbose)
+{
+    int     nfreq, size, inx;
+    float   scl;
+    int     i, j, l, m, iw, ix, k, ixsrc, il, ik;
+    float   *rtrace, idxs;
+    complex *sum, *ctrace;
+    int     npe;
+    static int first=1, *ixrcv;
+    static double t0, t1, t;
+
+    size  = nxs*nts;
+    nfreq = ntfft/2+1;
+    /* scale factor 1/N for backward FFT,
+     * scale dt for correlation/convolution along time, 
+     * scale dx (or dxsrc) for integration over receiver (or shot) coordinates */
+    scl   = 1.0*dt/((float)ntfft);
+
+#ifdef _OPENMP
+    npe   = omp_get_max_threads();
+    /* parallelisation is over number of virtual source positions (Nfoc) */
+    if (npe > nshots) {
+        vmess("Number of OpenMP threads set to %d (was %d)", Nfoc, npe);
+        omp_set_num_threads(Nfoc);
+    }
+#endif
+
+    t0 = wallclock_time();
+
+    /* reset output data to zero */
+    memset(&iRN[0], 0, Nfoc*nxs*nts*sizeof(float));
+    ctrace = (complex *)calloc(ntfft,sizeof(complex));
+
+    if (!first) {
+    /* transform muted Ni (Top) to frequency domain, input for next iteration  */
+        for (l = 0; l < Nfoc; l++) {
+            /* set Fop to zero, so new operator can be defined within ixpos points */
+            memset(&Fop[l*nxs*nw].r, 0, nxs*nw*2*sizeof(float));
+            for (i = 0; i < npos; i++) {
+                   rc1fft(&Top[l*size+i*nts],ctrace,ntfft,-1);
+                   ix = ixpos[i];
+                   for (iw=0; iw<nw; iw++) {
+                       Fop[l*nxs*nw+iw*nxs+ix].r = ctrace[nw_low+iw].r;
+                       Fop[l*nxs*nw+iw*nxs+ix].i = mode*ctrace[nw_low+iw].i;
+                   }
+            }
+        }
+    }
+    else { /* only for first call to synthesis using all nxs traces in G_d */
+    /* transform G_d to frequency domain, over all nxs traces */
+        first=0;
+        for (l = 0; l < Nfoc; l++) {
+            /* set Fop to zero, so new operator can be defined within all ix points */
+            memset(&Fop[l*nxs*nw].r, 0, nxs*nw*2*sizeof(float));
+            for (i = 0; i < nxs; i++) {
+                   rc1fft(&Top[l*size+i*nts],ctrace,ntfft,-1);
+                   for (iw=0; iw<nw; iw++) {
+                       Fop[l*nxs*nw+iw*nxs+i].r = ctrace[nw_low+iw].r;
+                       Fop[l*nxs*nw+iw*nxs+i].i = mode*ctrace[nw_low+iw].i;
+                   }
+            }
+        }
+        idxs = 1.0/dxs;
+        ixrcv = (int *)malloc(nshots*nx*sizeof(int));
+        for (k=0; k<nshots; k++) {
+            for (i = 0; i < nx; i++) {
+                ixrcv[k*nx+i] = NINT((xrcv[k*nx+i]-fxsb)*idxs);
+            }
+        }
+    }
+    free(ctrace);
+    t1 = wallclock_time();
+    *tfft += t1 - t0;
+
+/* Loop over total number of shots */
+    if (reci == 0 || reci == 1) {
+#pragma omp parallel default(none) \
+ shared(iRN, dx, npe, nw, verbose, nshots, xnx) \
+ shared(Refl, Nfoc, reci, xrcv, xsrc, xsyn, fxsb, fxse, nxs, dxs) \
+ shared(nx, dxsrc, nfreq, nw_low, nw_high) \
+ shared(Fop, size, nts, ntfft, scl, ixrcv) \
+ private(l, ix, j, m, i, sum, rtrace, k, ixsrc, inx)
+{ /* start of parallel region */
+        sum   = (complex *)malloc(nfreq*sizeof(complex));
+        rtrace = (float *)calloc(ntfft,sizeof(float));
+
+#pragma omp for schedule(guided,1)
+        for (k=0; k<nshots; k++) {
+            if ((xsrc[k] < 0.999*fxsb) || (xsrc[k] > 1.001*fxse)) continue;
+            ixsrc = NINT((xsrc[k] - fxsb)/dxs);
+            inx = xnx[k]; /* number of traces per shot */
+
+/*================ SYNTHESIS ================*/
+
+            for (l = 0; l < Nfoc; l++) {
+		        /* compute integral over receiver positions */
+                /* multiply R with Fop and sum over nx */
+                memset(&sum[0].r,0,nfreq*2*sizeof(float));
+                for (j = nw_low, m = 0; j <= nw_high; j++, m++) {
+                    for (i = 0; i < inx; i++) {
+                        ix = ixrcv[k*nx+i];
+                        sum[j].r += Refl[k*nw*nx+m*nx+i].r*Fop[l*nw*nxs+m*nxs+ix].r -
+                                    Refl[k*nw*nx+m*nx+i].i*Fop[l*nw*nxs+m*nxs+ix].i;
+                        sum[j].i += Refl[k*nw*nx+m*nx+i].i*Fop[l*nw*nxs+m*nxs+ix].r +
+                                    Refl[k*nw*nx+m*nx+i].r*Fop[l*nw*nxs+m*nxs+ix].i;
+                    }
+                }
+
+                /* transfrom result back to time domain */
+                cr1fft(sum, rtrace, ntfft, 1);
+
+                /* place result at source position ixsrc; dx = receiver distance */
+                for (j = 0; j < nts; j++) 
+                    iRN[l*size+ixsrc*nts+j] += rtrace[j]*scl*dx;
+            
+            } /* end of parallel Nfoc loop */
+
+
+        if (verbose>4) vmess("*** Shot gather %d processed ***", k);
+
+        } /* end of nshots (k) loop */
+
+        free(sum);
+        free(rtrace);
+#ifdef _OPENMP
+#pragma omp single 
+            npe   = omp_get_num_threads();
+#endif
+} /* end of parallel region */
+
+
+
+    }     /* end of if reci */
+
+/* if reciprocal traces are enabled start a new loop over reciprocal shot positions */
+    if (reci != 0) {
+        for (k=0; k<nxs; k++) {
+            if (isxcount[k] == 0) continue;
+            ixsrc = k;
+            inx = isxcount[ixsrc]; /* number of traces per reciprocal shot */
+
+#pragma omp parallel default(none) \
+ shared(iRN, dx, nw, verbose) \
+ shared(Refl, Nfoc, reci, xrcv, xsrc, xsyn, fxsb, fxse, nxs, dxs) \
+ shared(nx, dxsrc, inx, k, nfreq, nw_low, nw_high) \
+ shared(reci_xrcv, reci_xsrc, ixmask) \
+ shared(Fop, size, nts, ntfft, scl, ixrcv, ixsrc) \
+ private(l, ix, j, m, i, sum, rtrace, ik, il)
+{ /* start of parallel region */
+            sum   = (complex *)malloc(nfreq*sizeof(complex));
+            rtrace = (float *)calloc(ntfft,sizeof(float));
+
+#pragma omp for schedule(guided,1)
+            for (l = 0; l < Nfoc; l++) {
+
+		        /* compute integral over (reciprocal) source positions */
+                /* multiply R with Fop and sum over nx */
+                memset(&sum[0].r,0,nfreq*2*sizeof(float));
+                for (j = nw_low, m = 0; j <= nw_high; j++, m++) {
+                    for (i = 0; i < inx; i++) {
+                        il = reci_xrcv[ixsrc*nxs+i];
+                        ik = reci_xsrc[ixsrc*nxs+i];
+            			ix = NINT((xsrc[il] - fxsb)/dxs);
+                        sum[j].r += Refl[il*nw*nx+m*nx+ik].r*Fop[l*nw*nxs+m*nxs+ix].r -
+                                    Refl[il*nw*nx+m*nx+ik].i*Fop[l*nw*nxs+m*nxs+ix].i;
+                        sum[j].i += Refl[il*nw*nx+m*nx+ik].i*Fop[l*nw*nxs+m*nxs+ix].r +
+                                    Refl[il*nw*nx+m*nx+ik].r*Fop[l*nw*nxs+m*nxs+ix].i;
+                    }
+                }
+
+                /* transfrom result back to time domain */
+                cr1fft(sum, rtrace, ntfft, 1);
+
+                /* place result at source position ixsrc; dxsrc = shot distance */
+                for (j = 0; j < nts; j++) 
+                    iRN[l*size+ixsrc*nts+j] = ixmask[ixsrc]*(iRN[l*size+ixsrc*nts+j]+rtrace[j]*scl*dxsrc);
+                
+            } /* end of parallel Nfoc loop */
+        
+            free(sum);
+            free(rtrace);
+        
+ } /* end of parallel region */
+
+        } /* end of reciprocal shots (k) loop */
+	} /* end of if reci */
+
+    t = wallclock_time() - t0;
+    if (verbose>3) {
+        vmess("OMP: parallel region = %f seconds (%d threads)", t, npe);
+    }
+
+    return;
+}
+
+void synthesisPosistions(int nx, int nt, int nxs, int nts, float dt, float *xsyn, int Nfoc, float *xrcv, float *xsrc, int *xnx,
+float fxse, float fxsb, float dxs, float dxsrc, float dx, int nshots, int *ixpos, int *npos, int *isxcount, int countmin, int reci, int verbose)
+{
+    int     i, j, l, ixsrc, ixrcv, dosrc, k, *count;
+    float   x0, x1;
+
+    count   = (int *)calloc(nxs,sizeof(int)); // number of traces that contribute to the integration over x
+
+/*================ SYNTHESIS ================*/
+
+    for (l = 0; l < 1; l++) { /* assuming all focal operators cover the same lateral area */
+//    for (l = 0; l < Nfoc; l++) {
+        *npos=0;
+
+        if (reci == 0 || reci == 1) {
+            for (k=0; k<nshots; k++) {
+
+                ixsrc = NINT((xsrc[k] - fxsb)/dxs);
+                if (verbose>=3) {
+                    vmess("source position:     %.2f in operator %d", xsrc[k], ixsrc);
+                    vmess("receiver positions:  %.2f <--> %.2f", xrcv[k*nx+0], xrcv[k*nx+nx-1]);
+                    vmess("focal point positions:  %.2f <--> %.2f", fxsb, fxse);
+                }
+        
+                if ((NINT(xsrc[k]-fxse) > 0) || (NINT(xrcv[k*nx+nx-1]-fxse) > 0) ||
+                    (NINT(xrcv[k*nx+nx-1]-fxsb) < 0) || (NINT(xsrc[k]-fxsb) < 0) || 
+                    (NINT(xrcv[k*nx+0]-fxsb) < 0) || (NINT(xrcv[k*nx+0]-fxse) > 0) ) {
+                    vwarn("source/receiver positions are outside synthesis aperture");
+                    vmess("xsrc = %.2f xrcv_1 = %.2f xrvc_N = %.2f", xsrc[k], xrcv[k*nx+0], xrcv[k*nx+nx-1]);
+                    vmess("source position:     %.2f in operator %d", xsrc[k], ixsrc);
+                    vmess("receiver positions:  %.2f <--> %.2f", xrcv[k*nx+0], xrcv[k*nx+nx-1]);
+                    vmess("focal point positions:  %.2f <--> %.2f", fxsb, fxse);
+                }
+        
+                if ( (xsrc[k] >= 0.999*fxsb) && (xsrc[k] <= 1.001*fxse) ) {
+				    j = linearsearch(ixpos, *npos, ixsrc);
+				    if (j < *npos) { /* the position (at j) is already included */
+					    count[j] += xnx[k];
+				    }
+				    else { /* add new postion */
+            		    ixpos[*npos]=ixsrc;
+					    count[*npos] += xnx[k];
+                   	    *npos += 1;
+				    }
+    //                vmess("source position %d is inside synthesis model %f *npos=%d count=%d", k, xsrc[k], *npos, count[*npos]);
+			    }
+    
+    	    } /* end of nshots (k) loop */
+   	    } /* end of reci branch */
+
+        /* if reci=1 or reci=2 source-receive reciprocity is used and new (reciprocal-)sources are added */
+        if (reci != 0) {
+            for (k=0; k<nxs; k++) { /* check count in total number of shots added by reciprocity */
+                if (isxcount[k] >= countmin) {
+				    j = linearsearch(ixpos, *npos, k);
+				    if (j < *npos) { /* the position (at j) is already included */
+					    count[j] += isxcount[k];
+				    }
+				    else { /* add new postion */
+            		    ixpos[*npos]=k;
+					    count[*npos] += isxcount[k];
+                   	    *npos += 1;
+				    }
+                }
+                else {
+                    isxcount[k] = 0;
+                }
+            }
+   	    } /* end of reci branch */
+    } /* end of Nfoc loop */
+
+    if (verbose>=4) {
+	    for (j=0; j < *npos; j++) { 
+            vmess("ixpos[%d] = %d count=%d", j, ixpos[j], count[j]);
+		}
+    }
+    free(count);
+
+/* sort ixpos into increasing values */
+    qsort(ixpos, *npos, sizeof(int), compareInt);
+
+
+    return;
+}
+
+int linearsearch(int *array, size_t N, int value)
+{
+	int j;
+/* Check is position is already in array */
+    j = 0;
+    while (j < N && value != array[j]) {
+        j++;
+    }
+	return j;
+}
+
+/*
+void update(float *field, float *term, int Nfoc, int nx, int nt, int reverse, int ixpos)
+{
+    int   i, j, l, ix;
+
+    if (reverse) {
+        for (l = 0; l < Nfoc; l++) {
+            for (i = 0; i < npos; i++) {
+                j = 0;
+                Ni[l*nxs*nts+i*nts+j] = -iRN[l*nxs*nts+i*nts+j];
+                for (j = 1; j < nts; j++) {
+                    Ni[l*nxs*nts+i*nts+j] = -iRN[l*nxs*nts+i*nts+nts-j];
+                }
+            }
+        }
+    }
+    else {
+        for (l = 0; l < Nfoc; l++) {
+            for (i = 0; i < npos; i++) {
+                j = 0;
+                Ni[l*nxs*nts+i*nts+j] = -iRN[l*nxs*nts+i*nts+j];
+                for (j = 1; j < nts; j++) {
+                    Ni[l*nxs*nts+i*nts+j] = -iRN[l*nxs*nts+i*nts+nts-j];
+                }
+            }
+        }
+    }
+    return;
+}
+*/
diff --git a/marchenko3D/ampest3D_backup.c b/marchenko3D/ampest3D_backup.c
new file mode 100644
index 0000000000000000000000000000000000000000..08b480b642ec861b4b0d6222e5ed238c6b7a1a17
--- /dev/null
+++ b/marchenko3D/ampest3D_backup.c
@@ -0,0 +1,298 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "segy.h"
+#include <assert.h>
+#include "par.h"
+#include <genfft.h>
+
+#ifndef COMPLEX
+typedef struct _complexStruct { /* complex number */
+    float r,i;
+} complex;
+#endif/* complex */
+
+#define NINT(x) ((long)((x)>0.0?(x)+0.5:(x)-0.5))
+
+long loptncr(long n);
+long maxest3D(float *data, long nt);
+long readData3D(FILE *fp, float *data, segy *hdrs, long n1);
+void scl_data(float *data, long nsam, long nrec, float scl, float *datout, long nsamout);
+void pad_data(float *data, long nsam, long nrec, long nsamout, float *datout);
+void corr(float *data1, float *data2, float *cov, long nrec, long nsam, float dt, long shift);
+void convol(float *data1, float *data2, float *con, long nrec, long nsam, float dt, long shift);
+
+void AmpEst3D(float *f1d, float *Gd, float *ampest, long Nfoc, long nxs, long nys, long ntfft, long *ixpos, long npos,
+    char *file_wav, float dx, float dy, float dt)
+{
+	
+	long 	l, i, ix, iw, nfreq;
+	float 	scl, sclt, *wavelet, *scaled, *conv, *f1dsamp;
+	float   dtm, dxm, cpm, rom, *trace;
+	FILE 	*fp_wav;
+	segy 	*hdrs_wav;
+
+	scl = dx*dy;
+    sclt = 1.0*dt/((float)ntfft);
+
+	conv	= (float *)calloc(nys*nxs*ntfft,sizeof(float));
+	wavelet	= (float *)calloc(ntfft,sizeof(float));
+	scaled	= (float *)calloc(ntfft,sizeof(float));
+	f1dsamp	= (float *)calloc(nys*nxs*ntfft,sizeof(float));
+
+	if (file_wav!=NULL) {
+		trace	= (float *)calloc(ntfft,sizeof(float));
+		hdrs_wav = (segy *)calloc(1, sizeof(segy));
+    	fp_wav = fopen(file_wav, "r");
+        if (fp_wav==NULL) verr("error on opening wavelet file %s", file_wav);
+    	readData3D(fp_wav, trace, hdrs_wav, 0);
+    	fclose(fp_wav);
+		corr(trace, trace, wavelet,  1, ntfft, dt, 0);
+		free(hdrs_wav); free(trace);
+		/* For a monopole source the scaling is (2.0*dt*cp*cp*rho)/(dx*dx) */
+		for (iw=0; iw<ntfft; iw++){
+			wavelet[iw] *= dt;
+		}
+	}
+
+	for (l=0; l<Nfoc; l++) {
+		for (i=0; i<npos; i++) {
+			ix = ixpos[i];
+			for (iw=0; iw<ntfft; iw++) {
+				f1dsamp[i*ntfft+iw] = f1d[l*nxs*nys*ntfft+ix*ntfft+iw];
+			}
+		}
+		if (file_wav==NULL){
+			corr(f1dsamp, f1dsamp, conv,  nxs*nys, ntfft, dt, 0);
+			for (i=0; i<nxs*nys; i++) {
+				for (iw=0; iw<ntfft; iw++) {
+					wavelet[iw] += dt*scl*conv[i*ntfft+iw];
+				}
+			}
+		}
+		memset(&conv[0],0.0, sizeof(float)*ntfft*nxs*nys);
+		convol(f1dsamp, &Gd[l*nxs*nys*ntfft], conv, nxs*nys, ntfft, dt, 0);
+		for (i=0; i<nxs*nys; i++) {
+			for (iw=0; iw<ntfft; iw++) {
+				scaled[iw] += dt*scl*conv[i*ntfft+iw];
+			}
+		}
+		ampest[l] = sqrtf(wavelet[0]/scaled[0]);
+		memset(&conv[0],0.0,    sizeof(float)*ntfft*nxs*nys);
+		memset(&scaled[0],0.0,  sizeof(float)*ntfft);
+	}
+	free(wavelet);free(scaled);free(conv);free(f1dsamp);
+
+	return;
+}
+
+long maxest3D(float *data, long nt)
+{
+	float maxt;
+	long it;
+
+	maxt = data[0];
+	for (it = 0; it < nt; it++) {
+		if (fabs(data[it]) > fabs(maxt)) maxt=data[it];
+	}
+
+	return maxt;
+}
+
+/**
+* Calculates the time convolution of two arrays by 
+* transforming the arrayis to frequency domain,
+* multiplies the arrays and transform back to time.
+*
+**/
+
+void convol(float *data1, float *data2, float *con, long nrec, long nsam, float dt, long shift)
+{
+	long 	i, j, n, optn, nfreq, sign;
+	float  	df, dw, om, tau, scl;
+	float 	*qr, *qi, *p1r, *p1i, *p2r, *p2i, *rdata1, *rdata2;
+	complex *cdata1, *cdata2, *ccon, tmp;
+
+	optn = loptncr(nsam);
+	nfreq = optn/2+1;
+
+	
+	cdata1 = (complex *)malloc(nfreq*nrec*sizeof(complex));
+	if (cdata1 == NULL) verr("memory allocation error for cdata1");
+	cdata2 = (complex *)malloc(nfreq*nrec*sizeof(complex));
+	if (cdata2 == NULL) verr("memory allocation error for cdata2");
+	ccon = (complex *)malloc(nfreq*nrec*sizeof(complex));
+	if (ccon == NULL) verr("memory allocation error for ccov");
+	
+	rdata1 = (float *)malloc(optn*nrec*sizeof(float));
+	if (rdata1 == NULL) verr("memory allocation error for rdata1");
+	rdata2 = (float *)malloc(optn*nrec*sizeof(float));
+	if (rdata2 == NULL) verr("memory allocation error for rdata2");
+
+	/* pad zeroes until Fourier length is reached */
+	pad_data(data1, nsam, nrec, optn, rdata1);
+	pad_data(data2, nsam, nrec, optn, rdata2);
+
+	/* forward time-frequency FFT */
+	sign = -1;
+	rcmfft(&rdata1[0], &cdata1[0], (int)optn, (int)nrec, (int)optn, (int)nfreq, (int)sign);
+	rcmfft(&rdata2[0], &cdata2[0], (int)optn, (int)nrec, (int)optn, (int)nfreq, (int)sign);
+
+	/* apply convolution */
+	p1r = (float *) &cdata1[0];
+	p2r = (float *) &cdata2[0];
+	qr = (float *) &ccon[0].r;
+	p1i = p1r + 1;
+	p2i = p2r + 1;
+	qi = qr + 1;
+	n = nrec*nfreq;
+	for (j = 0; j < n; j++) {
+		*qr = (*p2r**p1r-*p2i**p1i);
+		*qi = (*p2r**p1i+*p2i**p1r);
+		qr += 2;
+		qi += 2;
+		p1r += 2;
+		p1i += 2;
+		p2r += 2;
+		p2i += 2;
+	}
+	free(cdata1);
+	free(cdata2);
+
+	if (shift) {
+		df = 1.0/(dt*optn);
+		dw = 2*PI*df;
+//		tau = 1.0/(2.0*df);
+		tau = dt*(nsam/2);
+		for (j = 0; j < nrec; j++) {
+			om = 0.0;
+			for (i = 0; i < nfreq; i++) {
+				tmp.r = ccon[j*nfreq+i].r*cos(om*tau) + ccon[j*nfreq+i].i*sin(om*tau);
+				tmp.i = ccon[j*nfreq+i].i*cos(om*tau) - ccon[j*nfreq+i].r*sin(om*tau);
+				ccon[j*nfreq+i] = tmp;
+				om += dw;
+			}
+		}
+	}
+
+        /* inverse frequency-time FFT and scale result */
+	sign = 1;
+	scl = 1.0/((float)(optn));
+	crmfft(&ccon[0], &rdata1[0], (int)optn, (int)nrec, (int)nfreq, (int)optn, (int)sign);
+	scl_data(rdata1,optn,nrec,scl,con,nsam);
+
+	free(ccon);
+	free(rdata1);
+	free(rdata2);
+	return;
+}
+
+/**
+* Calculates the time correlation of two arrays by 
+* transforming the arrayis to frequency domain,
+* multiply the arrays and transform back to time.
+*
+**/
+
+
+void corr(float *data1, float *data2, float *cov, long nrec, long nsam, float dt, long shift)
+{
+	long 	i, j, n, optn, nfreq, sign;
+	float  	df, dw, om, tau, scl;
+	float 	*qr, *qi, *p1r, *p1i, *p2r, *p2i, *rdata1, *rdata2;
+	complex *cdata1, *cdata2, *ccov, tmp;
+
+	optn = loptncr(nsam);
+	nfreq = optn/2+1;
+
+	cdata1 = (complex *)malloc(nfreq*nrec*sizeof(complex));
+	if (cdata1 == NULL) verr("memory allocation error for cdata1");
+	cdata2 = (complex *)malloc(nfreq*nrec*sizeof(complex));
+	if (cdata2 == NULL) verr("memory allocation error for cdata2");
+	ccov = (complex *)malloc(nfreq*nrec*sizeof(complex));
+	if (ccov == NULL) verr("memory allocation error for ccov");
+
+	rdata1 = (float *)malloc(optn*nrec*sizeof(float));
+	if (rdata1 == NULL) verr("memory allocation error for rdata1");
+	rdata2 = (float *)malloc(optn*nrec*sizeof(float));
+	if (rdata2 == NULL) verr("memory allocation error for rdata2");
+
+	/* pad zeroes until Fourier length is reached */
+	pad_data(data1, nsam, nrec, optn, rdata1);
+	pad_data(data2, nsam, nrec, optn, rdata2);
+
+	/* forward time-frequency FFT */
+	sign = -1;
+	rcmfft(&rdata1[0], &cdata1[0], (int)optn, (int)nrec, (int)optn, (int)nfreq, (int)sign);
+	rcmfft(&rdata2[0], &cdata2[0], (int)optn, (int)nrec, (int)optn, (int)nfreq, (int)sign);
+
+	/* apply correlation */
+	p1r = (float *) &cdata1[0];
+	p2r = (float *) &cdata2[0];
+	qr  = (float *) &ccov[0].r;
+	p1i = p1r + 1;
+	p2i = p2r + 1;
+	qi = qr + 1;
+	n = nrec*nfreq;
+	for (j = 0; j < n; j++) {
+		*qr = (*p1r * *p2r + *p1i * *p2i);
+		*qi = (*p1i * *p2r - *p1r * *p2i);
+		qr += 2;
+		qi += 2;
+		p1r += 2;
+		p1i += 2;
+		p2r += 2;
+		p2i += 2;
+	}
+	free(cdata1);
+	free(cdata2);
+
+	/* shift t=0 to middle of time window (nsam/2)*/
+	if (shift) {
+		df = 1.0/(dt*optn);
+		dw = 2*PI*df;
+		tau = dt*(nsam/2);
+
+		for (j = 0; j < nrec; j++) {
+			om = 0.0;
+			for (i = 0; i < nfreq; i++) {
+				tmp.r = ccov[j*nfreq+i].r*cos(om*tau) + ccov[j*nfreq+i].i*sin(om*tau);
+				tmp.i = ccov[j*nfreq+i].i*cos(om*tau) - ccov[j*nfreq+i].r*sin(om*tau);
+				ccov[j*nfreq+i] = tmp;
+				om += dw;
+			}
+		}
+	}
+
+	/* inverse frequency-time FFT and scale result */
+	sign = 1;
+	scl = 1.0/(float)optn;
+	crmfft(&ccov[0], &rdata1[0], (int)optn, (int)nrec, (int)nfreq, (int)optn, (int)sign);
+	scl_data(rdata1,optn,nrec,scl,cov,nsam);
+
+	free(ccov);
+	free(rdata1);
+	free(rdata2);
+	return;
+}
+
+void pad_data(float *data, long nsam, long nrec, long nsamout, float *datout)
+{
+	long it,ix;
+	for (ix=0;ix<nrec;ix++) {
+	   for (it=0;it<nsam;it++)
+		datout[ix*nsamout+it]=data[ix*nsam+it];
+	   for (it=nsam;it<nsamout;it++)
+		datout[ix*nsamout+it]=0.0;
+	}
+}
+
+void scl_data(float *data, long nsam, long nrec, float scl, float *datout, long nsamout)
+{
+	long it,ix;
+	for (ix = 0; ix < nrec; ix++) {
+		for (it = 0 ; it < nsamout ; it++)
+			datout[ix*nsamout+it] = scl*data[ix*nsam+it];
+	}
+}
\ No newline at end of file
diff --git a/marchenko3D/imaging3D.c b/marchenko3D/imaging3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..6245d6e07f7da9f3c88332cfef0ec63629941acd
--- /dev/null
+++ b/marchenko3D/imaging3D.c
@@ -0,0 +1,49 @@
+#include "par.h"
+#include "segy.h"
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+#include <genfft.h>
+
+double wallclock_time(void);
+
+void scl_data(float *data, long nsam, long nrec, float scl, float *datout, long nsamout);
+void pad_data(float *data, long nsam, long nrec, long nsamout, float *datout);
+void convol(float *data1, float *data2, float *con, long nrec, long nsam, float dt, long shift);
+
+
+void imaging3D(float *Image, float *Gmin, float *f1plus, long nx, long ny, long nt, float dx, float dy, float dt, long Nfoc, long verbose)
+{
+    long     i, l, count=0;
+	float   *conv;
+    double  t0, t2;
+
+    t0   = wallclock_time();
+
+#pragma omp parallel default(shared) \
+  private(i,conv) 
+{	
+	conv	= (float *)calloc(nx*ny*nt,sizeof(float));
+
+#pragma omp for
+	for (l = 0; l < Nfoc; l++) {
+		count+=1;
+		if (verbose > 2) vmess("Imaging location %d out of %d",count,Nfoc);
+
+		convol(&Gmin[l*nx*ny*nt], &f1plus[l*nx*ny*nt], conv, nx*ny, nt, dt, 0);
+		for (i=0; i<nx*ny; i++) {
+        	Image[l] += conv[i*nt]*dx*dy*dt;
+		}
+	}
+    free(conv);
+}
+
+    t2 = wallclock_time();
+    if (verbose) {
+        vmess("Total Imaging time = %.3f", t2-t0);
+    }
+
+    return;
+}
\ No newline at end of file
diff --git a/marchenko3D/makeWindow3D.c b/marchenko3D/makeWindow3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..bd5ba82b5dbc34204ec762823880b5b0605e0da5
--- /dev/null
+++ b/marchenko3D/makeWindow3D.c
@@ -0,0 +1,155 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "segy.h"
+#include <assert.h>
+#include <genfft.h>
+
+#ifndef COMPLEX
+typedef struct _complexStruct { /* complex number */
+    float r,i;
+} complex;
+#endif/* complex */
+
+#ifndef MAX
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#endif
+#ifndef MIN
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#endif
+#define NINT(x) ((long)((x)>0.0?(x)+0.5:(x)-0.5))
+
+void findShotInMute(float *xrcvMute, float xrcvShot, long nxs, long *imute);
+long readData3D(FILE *fp, float *data, segy *hdrs, long n1);
+long readSnapData3D(char *filename, float *data, segy *hdrs, long nsnaps, long nx, long ny, long nz, long sx, long ex, long sy, long ey, long sz, long ez);
+
+
+
+void makeWindow3D(char *file_ray, char *file_amp, char *file_wav, float dt, float *xrcv, float *yrcv, float *xsrc, float *ysrc, float *zsrc, 
+    long *xnx, long Nfoc, long nx, long ny, long ntfft, long *maxval, float *tinv, long verbose)
+{
+	FILE *fp;
+	segy hdr, *hdrs_mute, *hdrs_amp;
+	size_t nread;
+	long ig, is, i, iy, ix, j, l, nfreq, ntwav;
+	float *wavelet, *wavtmp, scl, *timeval, dw, *amp;
+	complex *cmute, *cwav;
+
+	/*Define parameters*/
+	nfreq   = ntfft/2+1;	
+	wavelet = (float *)calloc(ntfft,sizeof(float));
+	cwav	= (complex *)malloc(nfreq*sizeof(complex));
+	cmute	= (complex *)malloc(nfreq*sizeof(complex));
+	dw  	= 2*M_PI/(ntfft*dt);
+
+	/*Create wavelet using parameters or read in wavelet*/
+    if (file_wav != NULL) {
+        //Determine the amount of sample
+        if (verbose>0) vmess("Reading in wavelet");
+        fp = fopen( file_wav, "r" );
+        if ( fp == NULL ) {
+            perror("Error opening file containing wavelet");
+        }
+        nread = fread( &hdr, 1, TRCBYTES, fp );
+        ntwav = hdr.ns;
+        fclose(fp);
+        //Read in the wavelet
+        fp = fopen( file_wav, "r" );
+	    wavtmp = (float *)calloc(ntwav,sizeof(float));
+        readData3D(fp, wavtmp, &hdr, ntwav);
+        //Fit the wavelet into the same time-axis as the Marchenko scheme
+        for (i=0; i<ntwav; i++) {
+            wavelet[i] = wavtmp[i];
+            wavelet[ntfft-1-i] = wavtmp[ntwav-1-i];
+        }
+        rc1fft(wavelet,cwav,ntfft,-1);
+        free(wavtmp);
+        free(wavelet);
+    }
+
+
+	timeval = (float *)calloc(Nfoc*nx*ny,sizeof(float));
+	if (file_amp!=NULL) amp = (float *)calloc(Nfoc*nx*ny,sizeof(float));
+
+
+    /* Defining mute window using raytimes */
+    vmess("Using raytime for mutewindow");
+    hdrs_mute = (segy *) calloc(Nfoc,sizeof(segy));
+    fp = fopen( file_ray, "r" );
+    if ( fp == NULL ) {
+        perror("Error opening file containing ray");
+    }
+    fclose(fp);
+    readSnapData3D(file_ray, timeval, hdrs_mute, Nfoc, 1, 1, nx, 0, 1, 0, 1, 0, nx);
+    // for (is=0; is<Nfoc; is++) {
+    //     readData3D(fp, &timeval[is*nx*ny], &hdrs_mute[is], nx);
+    // }
+
+    /*Check whether the amplitude is also used*/
+    if (file_amp != NULL) {
+        vmess("Using ray-amplitudes");
+        hdrs_amp = (segy *) calloc(Nfoc,sizeof(segy));
+        fp = fopen( file_amp, "r" );
+        if ( fp == NULL ) {
+            perror("Error opening file containing ray-amplitude");
+        }
+        fclose(fp);
+        readSnapData3D(file_amp, amp, hdrs_amp, Nfoc, 1, 1, nx, 0, 1, 0, 1, 0, nx);
+        // for (is=0; is<Nfoc; is++) {
+        //     readData3D(fp, &amp[is*nx*ny], &hdrs_amp[is], nx);
+        // }
+    }
+
+    /*Define source and receiver locations from the raytime*/
+    for (is=0; is<Nfoc; is++) {
+        for (iy=0; iy<ny; iy++) {
+            for (ix=0; ix<nx; ix++) {
+                xrcv[is*nx*ny+iy*nx+ix] = (hdrs_mute[is].f1 + hdrs_mute[is].d1*((float)ix));
+                yrcv[is*nx*ny+iy*nx+ix] = hdrs_mute[is].gy;
+            }
+        }
+        xnx[is]=hdrs_mute[is].ns;
+        if (hdrs_mute[is].scalco < 0) scl=-1.0/hdrs_mute[is].scalco;
+        else scl=hdrs_mute[is].scalco;
+        xsrc[is] = hdrs_mute[is].sx*scl;
+        ysrc[is] = hdrs_mute[is].sy*scl;
+        zsrc[is] = hdrs_mute[is].sdepth*scl;
+    }
+
+
+	/*Determine the mutewindow*/
+	for (j=0; j<Nfoc; j++) {
+        for (l=0; l<ny; l++) {
+            for (i=0; i<nx; i++) {
+                maxval[j*ny*nx+l*nx+i] = (long)roundf(timeval[j*ny*nx+l*nx+i]/dt);
+                if (maxval[j*ny*nx+l*nx+i] > ntfft-1) maxval[j*ny*nx+l*nx+i] = ntfft-1;
+                if (file_wav!=NULL) { /*Apply the wavelet to create a first arrival*/
+                    if (file_amp != NULL) {
+                        for (ig=0; ig<nfreq; ig++) {
+                            cmute[ig].r = (dt/sqrtf((float)ntfft))*(cwav[ig].r*cos(ig*dw*timeval[j*ny*nx+l*nx+i]-M_PI/4.0)-cwav[ig].i*sin(ig*dw*timeval[j*ny*nx+l*nx+i]-M_PI/4.0))/(amp[j*ny*nx+l*nx+i]*amp[j*ny*nx+l*nx+i]);
+                            cmute[ig].i = (dt/sqrtf((float)ntfft))*(cwav[ig].i*cos(ig*dw*timeval[j*ny*nx+l*nx+i]-M_PI/4.0)+cwav[ig].r*sin(ig*dw*timeval[j*ny*nx+l*nx+i]-M_PI/4.0))/(amp[j*ny*nx+l*nx+i]*amp[j*ny*nx+l*nx+i]);
+                        }
+                    }
+                    else { /*Use the raytime only to determine the mutewindow*/
+                        for (ig=0; ig<nfreq; ig++) {
+                            cmute[ig].r = (dt/sqrtf((float)ntfft))*(cwav[ig].r*cos(ig*dw*timeval[j*ny*nx+l*nx+i]-M_PI/4.0)-cwav[ig].i*sin(ig*dw*timeval[j*ny*nx+l*nx+i]-M_PI/4.0));
+                            cmute[ig].i = (dt/sqrtf((float)ntfft))*(cwav[ig].i*cos(ig*dw*timeval[j*ny*nx+l*nx+i]-M_PI/4.0)+cwav[ig].r*sin(ig*dw*timeval[j*ny*nx+l*nx+i]-M_PI/4.0));
+                        }
+                    }
+                }
+                else {
+                    for (ig=0; ig<nfreq; ig++) {
+                        cmute[ig].r = (1.0/sqrtf((float)ntfft))*cos(ig*dw*timeval[j*ny*nx+l*nx+i]-M_PI/4.0);
+                        cmute[ig].i = (1.0/sqrtf((float)ntfft))*sin(ig*dw*timeval[j*ny*nx+l*nx+i]-M_PI/4.0);
+                    }
+                }
+                cr1fft(cmute,&tinv[j*ny*nx*ntfft+l*nx*ntfft+i*ntfft],ntfft,1);
+            }
+        }
+    }
+
+
+	return;
+}
+
diff --git a/marchenko3D/marchenko3D_backup.c b/marchenko3D/marchenko3D_backup.c
new file mode 100644
index 0000000000000000000000000000000000000000..ace2b3179f7110a2d0884615c06fefa5ac7096bc
--- /dev/null
+++ b/marchenko3D/marchenko3D_backup.c
@@ -0,0 +1,851 @@
+/*
+ * Copyright (c) 2017 by the Society of Exploration Geophysicists.
+ * For more information, go to http://software.seg.org/2017/00XX .
+ * You must read and accept usage terms at:
+ * http://software.seg.org/disclaimer.txt before use.
+ */
+
+#include "par.h"
+#include "segy.h"
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+#include <genfft.h>
+
+int omp_get_max_threads(void);
+int omp_get_num_threads(void);
+void omp_set_num_threads(int num_threads);
+
+#ifndef MAX
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#endif
+#ifndef MIN
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#endif
+#define NINT(x) ((long)((x)>0.0?(x)+0.5:(x)-0.5))
+
+
+
+#ifndef COMPLEX
+typedef struct _complexStruct { /* complex number */
+    float r,i;
+} complex;
+#endif/* complex */
+
+long readShotData3D(char *filename, float *xrcv, float *yrcv, float *xsrc, float *ysrc, float *zsrc, long *xnx, complex *cdata,
+    long nw, long nw_low, long nshots, long nx, long ny, long ntfft, long mode, float scale, long verbose);
+long readTinvData3D(char *filename, float *xrcv, float *yrcv, float *xsrc, float *ysrc, float *zsrc,
+    long *xnx, long Nfoc, long nx, long ny, long ntfft, long mode, long *maxval, float *tinv, long hw, long verbose);
+// int writeDataIter(char *file_iter, float *data, segy *hdrs, int n1, int n2, float d2, float f2, int n2out, int Nfoc, float *xsyn,
+//     float *zsyn, int *ixpos, int npos, int iter);
+long unique_elements(float *arr, long len);
+
+void name_ext(char *filename, char *extension);
+
+void applyMute3D(float *data, long *mute, long smooth, long above, long Nfoc, long nxs, long nt, long *xrcvsyn, long npos, long shift);
+
+long getFileInfo3D(char *filename, long *n1, long *n2, long *n3, long *ngath, float *d1, float *d2, float *d3, float *f1, float *f2, float *f3,
+    float *sclsxgxsygy, long *nxm);
+long readData3D(FILE *fp, float *data, segy *hdrs, long n1);
+long writeData3D(FILE *fp, float *data, segy *hdrs, long n1, long n2);
+long disp_fileinfo3D(char *file, long n1, long n2, long n3, float f1, float f2, float f3, float d1, float d2, float d3, segy *hdrs);
+double wallclock_time(void);
+
+void AmpEst3D(float *f1d, float *Gd, float *ampest, long Nfoc, long nxs, long nys, long ntfft, long *ixpos, long npos,
+    char *file_wav, float dx, float dy, float dt);
+
+void synthesisPositions3D(long nx, long ny, long nxs, long nys, long Nfoc, float *xrcv, float *yrcv, float *xsrc, float *ysrc,
+    long *xnx, float fxse, float fyse, float fxsb, float fysb, float dxs, float dys, long nshots, long nxsrc, long nysrc,
+    long *ixpos, long *npos, long reci, long verbose);
+void synthesis3D(complex *Refl, complex *Fop, float *Top, float *iRN, long nx, long ny, long nt, long nxs, long nys, long nts, float dt,
+    float *xsyn, float *ysyn, long Nfoc, float *xrcv, float *yrcv, float *xsrc, float *ysrc, long *xnx,
+    float fxse, float fxsb, float fyse, float fysb, float dxs, float dys, float dxsrc, float dysrc, 
+    float dx, float dy, long ntfft, long nw, long nw_low, long nw_high,  long mode, long reci, long nshots, long nxsrc, long nysrc, 
+    long *ixpos, long npos, double *tfft, long *isxcount, long *reci_xsrc,  long *reci_xrcv, float *ixmask, long verbose);
+
+long linearsearch(long *array, size_t N, long value);
+
+/*********************** self documentation **********************/
+char *sdoc[] = {
+" ",
+" MARCHENKO3D - Iterative Green's function and focusing functions retrieval in 3D",
+" ",
+" marchenko3D file_tinv= file_shot= [optional parameters]",
+" ",
+" Required parameters: ",
+" ",
+"   file_tinv= ............... direct arrival from focal point: G_d",
+"   file_shot= ............... Reflection response: R",
+" ",
+" Optional parameters: ",
+" ",
+" INTEGRATION ",
+"   tap=0 .................... lateral taper focusing(1), shot(2) or both(3)",
+"   ntap=0 ................... number of taper points at boundaries",
+"   fmin=0 ................... minimum frequency in the Fourier transform",
+"   fmax=70 .................. maximum frequency in the Fourier transform",
+" MARCHENKO ITERATIONS ",
+"   niter=10 ................. number of iterations",
+" MUTE-WINDOW ",
+"   above=0 .................. mute above(1), around(0) or below(-1) the first travel times of file_tinv",
+"   shift=12 ................. number of points above(positive) / below(negative) travel time for mute",
+"   hw=8 ..................... window in time samples to look for maximum in next trace",
+"   smooth=5 ................. number of points to smooth mute with cosine window",
+" REFLECTION RESPONSE CORRECTION ",
+"   scale=2 .................. scale factor of R for summation of Ni with G_d",
+"   pad=0 .................... amount of samples to pad the reflection series",
+"   reci=0 ................... 1; add receivers as shots 2; only use receivers as shot positions",
+"   countmin=0 ............... 0.3*nxrcv; minumum number of reciprocal traces for a contribution",
+" OUTPUT DEFINITION ",
+"   file_green= .............. output file with full Green function(s)",
+"   file_gplus= .............. output file with G+ ",
+"   file_gmin= ............... output file with G- ",
+"   file_f1plus= ............. output file with f1+ ",
+"   file_f1min= .............. output file with f1- ",
+"   file_f2= ................. output file with f2 (=p+) ",
+"   file_pplus= .............. output file with p+ ",
+"   file_pmin= ............... output file with p- ",
+"   file_iter= ............... output file with -Ni(-t) for each iteration",
+"   verbose=0 ................ silent option; >0 displays info",
+" ",
+" ",
+" author  : Jan Thorbecke     : 2016 (j.w.thorbecke@tudelft.nl)",
+" author  : Joeri Brackenhoff : 2019 (j.a.brackenhoff@tudelft.nl)",
+" ",
+NULL};
+/**************** end self doc ***********************************/
+
+int main (int argc, char **argv)
+{
+    FILE    *fp_out, *fp_f1plus, *fp_f1min;
+    FILE    *fp_gmin, *fp_gplus, *fp_f2, *fp_pmin;
+    long    i, j, l, k, ret, nshots, nxshot, nyshot, Nfoc, nt, nx, ny, nts, nxs, nys, ngath;
+    long    size, n1, n2, n3, ntap, tap, dxi, dyi, ntraces, pad;
+    long    nw, nw_low, nw_high, nfreq, *xnx, *xnxsyn;
+    long    reci, countmin, mode, n2out, n3out, verbose, ntfft;
+    long    iter, niter, tracf, *muteW, ampest;
+    long    hw, smooth, above, shift, *ixpos, npos, ix;
+    long    nshots_r, *isxcount, *reci_xsrc, *reci_xrcv;
+    float   fmin, fmax, *tapersh, *tapersy, fxf, fyf, dxf, dyf, *xsrc, *ysrc, *xrcv, *yrcv, *zsyn, *zsrc, *xrcvsyn, *yrcvsyn;
+    double  t0, t1, t2, t3, tsyn, tread, tfft, tcopy, energyNi, energyN0;
+    float   d1, d2, d3, f1, f2, f3, fxsb, fxse, fysb, fyse, ft, fx, fy, *xsyn, *ysyn, dxsrc, dysrc;
+    float   *green, *f2p, *pmin, *G_d, dt, dx, dy, dxs, dys, scl, mem;
+    float   *f1plus, *f1min, *iRN, *Ni, *trace, *Gmin, *Gplus;
+    float   xmin, xmax, ymin, ymax, scale, tsq, Q, f0;
+    float   *ixmask, *iymask, *ampscl, *Gd;
+    complex *Refl, *Fop;
+    char    *file_tinv, *file_shot, *file_green, *file_iter, *file_wav;
+    char    *file_f1plus, *file_f1min, *file_gmin, *file_gplus, *file_f2, *file_pmin;
+    segy    *hdrs_out;
+
+    initargs(argc, argv);
+    requestdoc(1);
+
+    tsyn = tread = tfft = tcopy = 0.0;
+    t0   = wallclock_time();
+
+    if (!getparstring("file_shot", &file_shot)) file_shot = NULL;
+    if (!getparstring("file_tinv", &file_tinv)) file_tinv = NULL;
+    if (!getparstring("file_f1plus", &file_f1plus)) file_f1plus = NULL;
+    if (!getparstring("file_f1min", &file_f1min)) file_f1min = NULL;
+    if (!getparstring("file_gplus", &file_gplus)) file_gplus = NULL;
+    if (!getparstring("file_gmin", &file_gmin)) file_gmin = NULL;
+    if (!getparstring("file_pplus", &file_f2)) file_f2 = NULL;
+    if (!getparstring("file_f2", &file_f2)) file_f2 = NULL;
+    if (!getparstring("file_pmin", &file_pmin)) file_pmin = NULL;
+    if (!getparstring("file_iter", &file_iter)) file_iter = NULL;
+    if (!getparstring("file_wav", &file_wav)) file_wav = NULL;
+    if (!getparlong("verbose", &verbose)) verbose = 0;
+    if (file_tinv == NULL && file_shot == NULL) 
+        verr("file_tinv and file_shot cannot be both input pipe");
+    if (!getparstring("file_green", &file_green)) {
+        if (verbose) vwarn("parameter file_green not found, assume pipe");
+        file_green = NULL;
+    }
+    if (!getparfloat("fmin", &fmin)) fmin = 0.0;
+    if (!getparfloat("fmax", &fmax)) fmax = 70.0;
+    if (!getparlong("reci", &reci)) reci = 0;
+    if (!getparfloat("scale", &scale)) scale = 2.0;
+    if (!getparfloat("tsq", &tsq)) tsq = 0.0;
+    if (!getparfloat("Q", &Q)) Q = 0.0;
+    if (!getparfloat("f0", &f0)) f0 = 0.0;
+    if (!getparlong("tap", &tap)) tap = 0;
+    if (!getparlong("ntap", &ntap)) ntap = 0;
+    if (!getparlong("pad", &pad)) pad = 0;
+    if (!getparlong("ampest", &ampest)) ampest = 0;
+
+    if(!getparlong("niter", &niter)) niter = 10;
+    if(!getparlong("hw", &hw)) hw = 15;
+    if(!getparlong("smooth", &smooth)) smooth = 5;
+    if(!getparlong("above", &above)) above = 0;
+    if(!getparlong("shift", &shift)) shift=12;
+
+    if (reci && ntap) vwarn("tapering influences the reciprocal result");
+
+/*================ Reading info about shot and initial operator sizes ================*/
+
+    ngath = 0; /* setting ngath=0 scans all traces; n2 contains maximum traces/gather */
+    ret = getFileInfo3D(file_tinv, &n1, &n2, &n3, &ngath, &d1, &d2, &d3, &f1, &f2, &f3, &scl, &ntraces);
+    Nfoc = ngath;
+    nxs  = n2; 
+    nys  = n3;
+    nts  = n1;
+    dxs  = d2;
+    dys  = d3; 
+    fxsb = f2;
+    fysb = f3;
+
+    ngath = 0; /* setting ngath=0 scans all traces; nx contains maximum traces/gather */
+    ret = getFileInfo3D(file_shot, &nt, &nx, &ny, &ngath, &d1, &dx, &dy, &ft, &fx, &fy, &scl, &ntraces);
+    nshots = ngath;
+    assert (nxs*nys >= nshots);
+
+    if (!getparfloat("dt", &dt)) dt = d1;
+
+    ntfft = loptncr(MAX(nt+pad, nts+pad)); 
+    nfreq = ntfft/2+1;
+    nw_low = (long)MIN((fmin*ntfft*dt), nfreq-1);
+    nw_low = MAX(nw_low, 1);
+    nw_high = MIN((long)(fmax*ntfft*dt), nfreq-1);
+    nw  = nw_high - nw_low + 1;
+    scl   = 1.0/((float)ntfft);
+    if (!getparlong("countmin", &countmin)) countmin = 0.3*nx*ny;
+    
+/*================ Allocating all data arrays ================*/
+
+    Fop     = (complex *)calloc(nys*nxs*nw*Nfoc,sizeof(complex));
+    green   = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
+    f2p     = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
+    pmin    = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
+    f1plus  = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
+    f1min   = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
+    iRN     = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
+    Ni      = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
+    G_d     = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
+    muteW   = (long *)calloc(Nfoc*nys*nxs,sizeof(long));
+    trace   = (float *)malloc(ntfft*sizeof(float));
+    tapersy = (float *)malloc(nxs*sizeof(float));
+    xrcvsyn = (float *)calloc(Nfoc*nys*nxs,sizeof(float)); // x-rcv postions of focal points
+    yrcvsyn = (float *)calloc(Nfoc*nys*nxs,sizeof(float)); // x-rcv postions of focal points
+    xsyn    = (float *)malloc(Nfoc*sizeof(float)); // x-src position of focal points
+    ysyn    = (float *)malloc(Nfoc*sizeof(float)); // x-src position of focal points
+    zsyn    = (float *)malloc(Nfoc*sizeof(float)); // z-src position of focal points
+    xnxsyn  = (long *)calloc(Nfoc,sizeof(long)); // number of traces per focal point
+    ixpos   = (long *)calloc(nys*nxs,sizeof(long)); // x-position of source of shot in G_d domain (nxs*nys with dxs, dys)
+
+    Refl    = (complex *)malloc(nw*ny*nx*nshots*sizeof(complex));
+    tapersh = (float *)malloc(nx*sizeof(float));
+    xrcv    = (float *)calloc(nshots*ny*nx,sizeof(float)); // x-rcv postions of shots
+    yrcv    = (float *)calloc(nshots*ny*nx,sizeof(float)); // x-rcv postions of shots
+    xsrc    = (float *)calloc(nshots,sizeof(float)); //x-src position of shots
+    ysrc    = (float *)calloc(nshots,sizeof(float)); //x-src position of shots
+    zsrc    = (float *)calloc(nshots,sizeof(float)); //z-src position of shots
+    xnx     = (long *)calloc(nshots,sizeof(long)); // number of traces per shot
+
+	if (reci!=0) {
+        reci_xsrc = (long *)malloc((nxs*nxs*nys*nys)*sizeof(long));
+        reci_xrcv = (long *)malloc((nxs*nxs*nys*nys)*sizeof(long));
+        isxcount  = (long *)calloc(nxs*nys,sizeof(long));
+        ixmask  = (float *)calloc(nxs*nys,sizeof(float));
+    }
+
+/*================ Read and define mute window based on focusing operator(s) ================*/
+/* G_d = p_0^+ = G_d (-t) ~ Tinv */
+
+    mode=-1; /* apply complex conjugate to read in data */
+    readTinvData3D(file_tinv, xrcvsyn, yrcvsyn, xsyn, ysyn, zsyn, xnxsyn, Nfoc,
+        nxs, nys, ntfft, mode, muteW, G_d, hw, verbose);
+    /* reading data added zero's to the number of time samples to be the same as ntfft */
+    nts   = ntfft;
+                             
+    /* define tapers to taper edges of acquisition */
+    if (tap == 1 || tap == 3) {
+        for (j = 0; j < ntap; j++)
+            tapersy[j] = (cos(PI*(j-ntap)/ntap)+1)/2.0;
+        for (j = ntap; j < nxs-ntap; j++)
+            tapersy[j] = 1.0;
+        for (j = nxs-ntap; j < nxs; j++)
+            tapersy[j] =(cos(PI*(j-(nxs-ntap))/ntap)+1)/2.0;
+    }
+    else {
+        for (j = 0; j < nxs; j++) tapersy[j] = 1.0;
+    }
+    if (tap == 1 || tap == 3) {
+        if (verbose) vmess("Taper for operator applied ntap=%li", ntap);
+        for (l = 0; l < Nfoc; l++) {
+            for (k = 0; k < nys; k++) {
+                for (i = 0; i < nxs; i++) {
+                    for (j = 0; j < nts; j++) {
+                        G_d[l*nys*nxs*nts+k*nxs*nts+i*nts+j] *= tapersy[i];
+                    }
+                }   
+            }   
+        }   
+    }
+
+    /* check consistency of header values */
+    if (xrcvsyn[0] != 0 || xrcvsyn[1] != 0 )    fxsb = xrcvsyn[0];
+    if (yrcvsyn[0] != 0 || yrcvsyn[nys*nxs-1] != 0 )  fysb = yrcvsyn[0];
+    if (nxs>1) { 
+        fxse = fxsb + (float)(nxs-1)*dxs;
+        dxf = (xrcvsyn[nys*nxs-1] - xrcvsyn[0])/(float)(nxs-1);
+    }
+    else {
+        fxse = fxsb;
+        dxs = 1.0;
+        dx = 1.0;
+        d2 = 1.0;
+        dxf = 1.0;
+    }
+    if (nys>1) {
+        fyse = fysb + (float)(nys-1)*dys;
+        dyf = (yrcvsyn[nys*nxs-1] - yrcvsyn[0])/(float)(nys-1);
+    }
+    else {
+        fyse = fysb;
+        dys = 1.0;
+        d3 = 1.0;
+        dy = 1.0;
+        dyf = 1.0;
+    }
+    if (NINT(dxs*1e3) != NINT(fabs(dxf)*1e3)) {
+        vmess("dx in hdr.d2 (%.3f) and hdr.gx (%.3f) not equal",d2, dxf);
+        if (dxf != 0) dxs = fabs(dxf);
+        vmess("dx in operator => %f", dxs);
+    }
+    if (NINT(dys*1e3) != NINT(fabs(dyf)*1e3)) {
+        vmess("dy in hdr.d3 (%.3f) and hdr.gy (%.3f) not equal",d3, dyf);
+        if (dyf != 0) dys = fabs(dyf);
+        vmess("dy in operator => %f", dys);
+    }
+
+/*================ Reading shot records ================*/
+
+    mode=1;
+    readShotData3D(file_shot, xrcv, yrcv, xsrc, ysrc, zsrc, xnx, Refl, nw,
+        nw_low, nshots, nx, ny, ntfft, mode, scale, verbose);
+
+    tapersh = (float *)malloc(nx*sizeof(float));
+    if (tap == 2 || tap == 3) {
+        for (j = 0; j < ntap; j++)
+            tapersh[j] = (cos(PI*(j-ntap)/ntap)+1)/2.0;
+        for (j = ntap; j < nx-ntap; j++)
+            tapersh[j] = 1.0;
+        for (j = nx-ntap; j < nx; j++)
+            tapersh[j] =(cos(PI*(j-(nx-ntap))/ntap)+1)/2.0;
+    }
+    else {
+        for (j = 0; j < nx; j++) tapersh[j] = 1.0;
+    }
+    if (tap == 2 || tap == 3) {
+        if (verbose) vmess("Taper for shots applied ntap=%li", ntap);
+        for (l = 0; l < nshots; l++) {
+            for (j = 1; j < nw; j++) {
+                for (k = 0; k < ny; k++) {
+                    for (i = 0; i < nx; i++) {
+                        Refl[l*nx*ny*nw+j*nx*ny+k*nx+i].r *= tapersh[i];
+                        Refl[l*nx*ny*nw+j*nx*ny+k*nx+i].i *= tapersh[i];
+                    }
+                }   
+            }   
+        }
+    }
+    free(tapersh);
+
+    /* check consistency of header values */
+    nxshot = unique_elements(xsrc,nshots);
+    nyshot = nshots/nxshot;
+
+    fxf = xsrc[0];
+    if (nx > 1) dxf = (xrcv[ny*nx-1] - xrcv[0])/(float)(nx-1);
+    else dxf = d2;
+    if (NINT(dx*1e3) != NINT(fabs(dxf)*1e3)) {
+        vmess("dx in hdr.d2 (%.3f) and hdr.gx (%.3f) not equal",dx, dxf);
+        if (dxf != 0) dx = fabs(dxf);
+        else verr("gx hdrs not set");
+        vmess("dx used => %f", dx);
+    }
+    fyf = ysrc[0];
+    if (ny > 1) dyf = (yrcv[ny*nx-1] - yrcv[0])/(float)(ny-1);
+    else dyf = d3;
+    if (NINT(dy*1e3) != NINT(fabs(dyf)*1e3)) {
+        vmess("dy in hdr.d3 (%.3f) and hdr.gy (%.3f) not equal",dy, dyf);
+        if (dyf != 0) dy = fabs(dyf);
+        else verr("gy hdrs not set");
+        vmess("dy used => %f", dy);
+    }
+    
+    dxsrc = (float)xsrc[1] - xsrc[0];
+    if (dxsrc == 0) {
+        vwarn("sx hdrs are not filled in!!");
+        dxsrc = dx;
+    }
+    dysrc = (float)ysrc[nxshot-1] - ysrc[0];
+    if (dysrc == 0) {
+        vwarn("sy hdrs are not filled in!!");
+        dysrc = dy;
+    }
+
+/*================ Check the size of the files ================*/
+
+    if (NINT(dxsrc/dx)*dx != NINT(dxsrc)) {
+        vwarn("x: source (%.2f) and receiver step (%.2f) don't match",dxsrc,dx);
+        if (reci == 2) vwarn("x: step used from operator (%.2f) ",dxs);
+    }
+    if (NINT(dysrc/dy)*dy != NINT(dysrc)) {
+        vwarn("y: source (%.2f) and receiver step (%.2f) don't match",dysrc,dy);
+        if (reci == 2) vwarn("y: step used from operator (%.2f) ",dys);
+    }
+    dxi = NINT(dxf/dxs);
+    if ((NINT(dxi*dxs) != NINT(dxf)) && verbose) 
+        vwarn("dx in receiver (%.2f) and operator (%.2f) don't match",dx,dxs);
+    dyi = NINT(dyf/dys);
+    if ((NINT(dyi*dys) != NINT(dyf)) && verbose) 
+        vwarn("dy in receiver (%.2f) and operator (%.2f) don't match",dy,dys);
+    if (nt != nts) 
+        vmess("Time samples in shot (%li) and focusing operator (%li) are not equal",nt, nts);
+    if (verbose) {
+        vmess("Number of focusing operators    = %li", Nfoc);
+        vmess("Number of receivers in focusop  = x:%li y:%li total:%li", nxs, nys, nxs*nys);
+        vmess("number of shots                 = %li", nshots);
+        vmess("number of receiver/shot         = x:%li y:%li total:%li", nx, ny, nx*ny);
+        vmess("first model position            = x:%.2f y:%.2f", fxsb, fysb);
+        vmess("last model position             = x:%.2f y:%.2f", fxse, fyse);
+        vmess("first source position           = x:%.2f y:%.2f", fxf, fyf);
+        vmess("source distance                 = x:%.2f y:%.2f", dxsrc, dysrc);
+        vmess("last source position            = x:%.2f y:%.2f", fxf+(nxshot-1)*dxsrc, fyf+(nyshot-1)*dysrc);
+        vmess("receiver distance               = x:%.2f y:%.2f", dxf, dyf);
+        vmess("direction of increasing traces  = x:%li y:%li", dxi, dyi);
+        vmess("number of time samples (nt,nts) = %li (%li,%li)", ntfft, nt, nts);
+        vmess("time sampling                   = %e ", dt);
+        if (file_green != NULL) vmess("Green output file              = %s ", file_green);
+        if (file_gmin != NULL)  vmess("Gmin output file               = %s ", file_gmin);
+        if (file_gplus != NULL) vmess("Gplus output file              = %s ", file_gplus);
+        if (file_pmin != NULL)  vmess("Pmin output file               = %s ", file_pmin);
+        if (file_f2 != NULL)    vmess("f2 (=pplus) output file        = %s ", file_f2);
+        if (file_f1min != NULL) vmess("f1min output file              = %s ", file_f1min);
+        if (file_f1plus != NULL)vmess("f1plus output file             = %s ", file_f1plus);
+        if (file_iter != NULL)  vmess("Iterations output file         = %s ", file_iter);
+    }
+
+/*================ initializations ================*/
+
+    if (reci) { 
+        n2out = nxs; 
+        n3out = nys;
+    }
+    else { 
+        n2out = nxshot; 
+        n3out = nyshot;
+    }
+    mem = Nfoc*n2out*n3out*ntfft*sizeof(float)/1048576.0;
+    if (verbose) {
+        vmess("number of output traces        = x:%li y:%li total:%li", n2out, n3out, n2out*n3out);
+        vmess("number of output samples       = %li", ntfft);
+        vmess("Size of output data/file       = %.1f MB", mem);
+    }
+
+
+    /* dry-run of synthesis to get all x-positions calcalated by the integration */
+    synthesisPositions3D(nx, ny, nxs, nys, Nfoc, xrcv, yrcv, xsrc, ysrc, xnx, 
+        fxse, fyse, fxsb, fysb, dxs, dys, nshots, nxshot, nyshot, ixpos, &npos, reci, verbose);
+    if (verbose) {
+        vmess("synthesisPosistions: nxshot=%li nyshot=%li nshots=%li npos=%li", nxshot, nyshot, nshots, npos);
+    }
+
+/*================ set variables for output data ================*/
+
+    n1 = nts; n2 = n2out; n3 = n3out;
+    f1 = ft; f2 = xrcvsyn[ixpos[0]]; f3 = yrcvsyn[ixpos[0]];
+    d1 = dt;
+    if (reci == 0) {      // distance between sources in R
+        d2 = dxsrc; 
+        d3 = dysrc;
+    }
+    else if (reci == 1) { // distance between traces in G_d 
+        d2 = dxs; 
+        d3 = dys;
+    }
+    else if (reci == 2) { // distance between receivers in R
+        d2 = dx; 
+        d3 = dy;
+    }
+
+    hdrs_out = (segy *) calloc(n2*n3,sizeof(segy));
+    if (hdrs_out == NULL) verr("allocation for hdrs_out");
+    size  = nys*nxs*nts;
+
+    for (k = 0; k < n3; k++) {
+        for (i = 0; i < n2; i++) {
+            hdrs_out[k*n2+i].ns     = n1;
+            hdrs_out[k*n2+i].trid   = 1;
+            hdrs_out[k*n2+i].dt     = dt*1000000;
+            hdrs_out[k*n2+i].f1     = f1;
+            hdrs_out[k*n2+i].f2     = f2;
+            hdrs_out[k*n2+i].d1     = d1;
+            hdrs_out[k*n2+i].d2     = d2;
+            hdrs_out[k*n2+i].trwf   = n2out*n3out;
+            hdrs_out[k*n2+i].scalco = -1000;
+            hdrs_out[k*n2+i].gx     = NINT(1000*(f2+i*d2));
+            hdrs_out[k*n2+i].gy     = NINT(1000*(f3+k*d3));
+            hdrs_out[k*n2+i].scalel = -1000;
+            hdrs_out[k*n2+i].tracl  = k*n2+i+1;
+        }
+    }
+    t1    = wallclock_time();
+    tread = t1-t0;
+
+/*================ initialization ================*/
+
+    memcpy(Ni, G_d, Nfoc*nys*nxs*ntfft*sizeof(float));
+    for (l = 0; l < Nfoc; l++) {
+        for (i = 0; i < npos; i++) {
+            j = 0;
+            ix = ixpos[i]; /* select the traces that have an output trace after integration */
+            f2p[l*nys*nxs*nts+i*nts+j] = G_d[l*nys*nxs*nts+ix*nts+j];
+            f1plus[l*nys*nxs*nts+i*nts+j] = G_d[l*nys*nxs*nts+ix*nts+j];
+            for (j = 1; j < nts; j++) {
+                f2p[l*nys*nxs*nts+i*nts+j] = G_d[l*nys*nxs*nts+ix*nts+j];
+                f1plus[l*nys*nxs*nts+i*nts+j] = G_d[l*nys*nxs*nts+ix*nts+j];
+            }
+        }
+    }
+
+/*================ start Marchenko iterations ================*/
+
+    for (iter=0; iter<niter; iter++) {
+
+        t2    = wallclock_time();
+    
+/*================ construction of Ni(-t) = - \int R(x,t) Ni(t)  ================*/
+
+        synthesis3D(Refl, Fop, Ni, iRN, nx, ny, nt, nxs, nys, nts, dt, xsyn, ysyn,
+            Nfoc, xrcv, yrcv, xsrc, ysrc, xnx, fxse, fxsb, fyse, fysb, dxs, dys,
+            dxsrc, dysrc, dx, dy, ntfft, nw, nw_low, nw_high, mode, reci, nshots,
+            nxshot, nyshot, ixpos, npos, &tfft, isxcount, reci_xsrc, reci_xrcv,
+            ixmask, verbose);
+
+        t3 = wallclock_time();
+        tsyn +=  t3 - t2;
+
+        // if (file_iter != NULL) {
+        //     writeDataIter(file_iter, iRN, hdrs_out, ntfft, nxs*nys, d2, f2, n2out*n3out, Nfoc, xsyn, zsyn, ixpos, npos, iter);
+        // }
+        /* N_k(x,t) = -N_(k-1)(x,-t) */
+        /* p0^-(x,t) += iRN = (R * T_d^inv)(t) */
+        for (l = 0; l < Nfoc; l++) {
+			energyNi = 0.0;
+            for (i = 0; i < npos; i++) {
+                j = 0;
+                ix = ixpos[i]; 
+                Ni[l*nys*nxs*nts+i*nts+j]    = -iRN[l*nys*nxs*nts+ix*nts+j];
+                pmin[l*nys*nxs*nts+i*nts+j] += iRN[l*nys*nxs*nts+ix*nts+j];
+                energyNi += iRN[l*nys*nxs*nts+ix*nts+j]*iRN[l*nys*nxs*nts+ix*nts+j];
+                for (j = 1; j < nts; j++) {
+                    Ni[l*nys*nxs*nts+i*nts+j]    = -iRN[l*nys*nxs*nts+ix*nts+nts-j];
+                    pmin[l*nys*nxs*nts+i*nts+j] += iRN[l*nys*nxs*nts+ix*nts+j];
+                    energyNi += iRN[l*nys*nxs*nts+ix*nts+j]*iRN[l*nys*nxs*nts+ix*nts+j];
+                }
+            }
+            if (iter==0) energyN0 = energyNi;
+            if (verbose >=2) vmess(" - iSyn %li: Ni at iteration %li has energy %e; relative to N0 %e",
+                l, iter, sqrt(energyNi), sqrt(energyNi/energyN0));
+        }
+
+        /* apply mute window based on times of direct arrival (in muteW) */
+        applyMute3D(Ni, muteW, smooth, above, Nfoc, nxs*nys, nts, ixpos, npos, shift);
+
+        /* update f2 */
+        for (l = 0; l < Nfoc; l++) {
+            for (i = 0; i < npos; i++) {
+                j = 0;
+                f2p[l*nys*nxs*nts+i*nts+j] += Ni[l*nys*nxs*nts+i*nts+j];
+                for (j = 1; j < nts; j++) {
+                    f2p[l*nys*nxs*nts+i*nts+j] += Ni[l*nys*nxs*nts+i*nts+j];
+                }
+            }
+        }
+
+        if (iter % 2 == 0) { /* even iterations update: => f_1^-(t) */
+            for (l = 0; l < Nfoc; l++) {
+                for (i = 0; i < npos; i++) {
+                    j = 0;
+                    f1min[l*nys*nxs*nts+i*nts+j] -= Ni[l*nys*nxs*nts+i*nts+j];
+                    for (j = 1; j < nts; j++) {
+                        f1min[l*nys*nxs*nts+i*nts+j] -= Ni[l*nys*nxs*nts+i*nts+nts-j];
+                    }
+                }
+            }
+        }
+        else {/* odd iterations update: => f_1^+(t)  */
+            for (l = 0; l < Nfoc; l++) {
+                for (i = 0; i < npos; i++) {
+                    j = 0;
+                    f1plus[l*nys*nxs*nts+i*nts+j] += Ni[l*nys*nxs*nts+i*nts+j];
+                    for (j = 1; j < nts; j++) {
+                        f1plus[l*nys*nxs*nts+i*nts+j] += Ni[l*nys*nxs*nts+i*nts+j];
+                    }
+                }
+            }
+        }
+
+        t2 = wallclock_time();
+        tcopy +=  t2 - t3;
+
+        if (verbose) vmess("*** Iteration %li finished ***", iter);
+
+    } /* end of iterations */
+
+    free(Ni);
+    if (ampest < 1) free(G_d);
+
+    /* compute full Green's function G = int R * f2(t) + f2(-t) = Pplus + Pmin */
+    for (l = 0; l < Nfoc; l++) {
+        for (i = 0; i < npos; i++) {
+            j = 0;
+            /* set green to zero if mute-window exceeds nt/2 */
+            if (muteW[l*nys*nxs+ixpos[i]] >= nts/2) {
+                memset(&green[l*nys*nxs*nts+i*nts],0, sizeof(float)*nt);
+                continue;
+            }
+            green[l*nys*nxs*nts+i*nts+j] = f2p[l*nys*nxs*nts+i*nts+j] + pmin[l*nys*nxs*nts+i*nts+j];
+            for (j = 1; j < nts; j++) {
+                green[l*nys*nxs*nts+i*nts+j] = f2p[l*nys*nxs*nts+i*nts+nts-j] + pmin[l*nys*nxs*nts+i*nts+j];
+            }
+        }
+    }
+    applyMute3D(green, muteW, smooth, 4, Nfoc, nxs*nys, nts, ixpos, npos, shift);
+
+    /* compute upgoing Green's function G^+,- */
+    if (file_gmin != NULL) {
+        Gmin    = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
+
+        /* use f1+ as operator on R in frequency domain */
+        mode=1;
+        synthesis3D(Refl, Fop, f1plus, iRN, nx, ny, nt, nxs, nys, nts, dt, xsyn, ysyn,
+            Nfoc, xrcv, yrcv, xsrc, ysrc, xnx, fxse, fxsb, fyse, fysb, dxs, dys,
+            dxsrc, dysrc, dx, dy, ntfft, nw, nw_low, nw_high, mode, reci, nshots,
+            nxshot, nyshot, ixpos, npos, &tfft, isxcount, reci_xsrc, reci_xrcv,
+            ixmask, verbose);
+
+        /* compute upgoing Green's G^-,+ */
+        for (l = 0; l < Nfoc; l++) {
+            for (i = 0; i < npos; i++) {
+                j=0;
+                ix = ixpos[i]; 
+                Gmin[l*nys*nxs*nts+i*nts+j] = iRN[l*nys*nxs*nts+ix*nts+j] - f1min[l*nys*nxs*nts+i*nts+j];
+                for (j = 1; j < nts; j++) {
+                    Gmin[l*nys*nxs*nts+i*nts+j] = iRN[l*nys*nxs*nts+ix*nts+j] - f1min[l*nys*nxs*nts+i*nts+j];
+                }
+            }
+        }
+        /* Apply mute with window for Gmin */
+        applyMute3D(Gmin, muteW, smooth, 4, Nfoc, nxs*nys, nts, ixpos, npos, shift);
+    } /* end if Gmin */
+
+    /* compute downgoing Green's function G^+,+ */
+    if (file_gplus != NULL || ampest > 0) {
+        Gplus   = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
+
+        /* use f1-(*) as operator on R in frequency domain */
+        mode=-1;
+        synthesis3D(Refl, Fop, f1min, iRN, nx, ny, nt, nxs, nys, nts, dt, xsyn, ysyn,
+            Nfoc, xrcv, yrcv, xsrc, ysrc, xnx, fxse, fxsb, fyse, fysb, dxs, dys,
+            dxsrc, dysrc, dx, dy, ntfft, nw, nw_low, nw_high, mode, reci, nshots,
+            nxshot, nyshot, ixpos, npos, &tfft, isxcount, reci_xsrc, reci_xrcv,
+            ixmask, verbose);
+
+        /* compute downgoing Green's G^+,+ */
+        for (l = 0; l < Nfoc; l++) {
+            for (i = 0; i < npos; i++) {
+                j=0;
+                ix = ixpos[i]; 
+                Gplus[l*nys*nxs*nts+i*nts+j] = -iRN[l*nys*nxs*nts+ix*nts+j] + f1plus[l*nys*nxs*nts+i*nts+j];
+                for (j = 1; j < nts; j++) {
+                    Gplus[l*nys*nxs*nts+i*nts+j] = -iRN[l*nys*nxs*nts+ix*nts+j] + f1plus[l*nys*nxs*nts+i*nts+nts-j];
+                }
+            }
+        }
+        /* Apply mute with window for Gplus */
+        applyMute3D(Gplus, muteW, smooth, 4, Nfoc, nxs*nys, nts, ixpos, npos, shift);
+    } /* end if Gplus */
+
+    /* Estimate the amplitude of the Marchenko Redatuming */
+	if (ampest>0) {
+        if (verbose>0) vmess("Estimating amplitude scaling");
+		Gd		= (float *)calloc(Nfoc*nxs*nys*ntfft,sizeof(float));
+		memcpy(Gd,Gplus,sizeof(float)*Nfoc*nxs*nys*ntfft);
+		applyMute3D(Gd, muteW, smooth, 2, Nfoc, nxs*nys, nts, ixpos, npos, shift);
+		ampscl	= (float *)calloc(Nfoc,sizeof(float));
+		AmpEst3D(G_d,Gd,ampscl,Nfoc,nxs,nys,ntfft,ixpos,npos,file_wav,dxs,dys,dt);
+		for (l=0; l<Nfoc; l++) {
+			for (j=0; j<nxs*nys*nts; j++) {
+				green[l*nxs*nts+j] *= ampscl[l];
+				if (file_gplus != NULL) Gplus[l*nxs*nys*nts+j] *= ampscl[l];
+    			if (file_gmin != NULL) Gmin[l*nxs*nys*nts+j] *= ampscl[l];
+    			if (file_f2 != NULL) f2p[l*nxs*nys*nts+j] *= ampscl[l];
+    			if (file_pmin != NULL) pmin[l*nxs*nys*nts+j] *= ampscl[l];
+    			if (file_f1plus != NULL) f1plus[l*nxs*nys*nts+j] *= ampscl[l];
+    			if (file_f1min != NULL) f1min[l*nxs*nys*nts+j] *= ampscl[l];
+			}
+            if (verbose>1) vmess("Amplitude of focal position %li is equal to %.3e",l,ampscl[l]);
+		}
+        free(Gd);
+        if (file_gplus == NULL) free(Gplus);
+	}
+
+    t2 = wallclock_time();
+    if (verbose) {
+        vmess("Total CPU-time marchenko = %.3f", t2-t0);
+        vmess("with CPU-time synthesis  = %.3f", tsyn);
+        vmess("with CPU-time copy array = %.3f", tcopy);
+        vmess("     CPU-time fft data   = %.3f", tfft);
+        vmess("and CPU-time read data   = %.3f", tread);
+    }
+
+/*================ write output files ================*/
+
+
+    fp_out = fopen(file_green, "w+");
+    if (fp_out==NULL) verr("error on creating output file %s", file_green);
+    if (file_gmin != NULL) {
+        fp_gmin = fopen(file_gmin, "w+");
+        if (fp_gmin==NULL) verr("error on creating output file %s", file_gmin);
+    }
+    if (file_gplus != NULL) {
+        fp_gplus = fopen(file_gplus, "w+");
+        if (fp_gplus==NULL) verr("error on creating output file %s", file_gplus);
+    }
+    if (file_f2 != NULL) {
+        fp_f2 = fopen(file_f2, "w+");
+        if (fp_f2==NULL) verr("error on creating output file %s", file_f2);
+    }
+    if (file_pmin != NULL) {
+        fp_pmin = fopen(file_pmin, "w+");
+        if (fp_pmin==NULL) verr("error on creating output file %s", file_pmin);
+    }
+    if (file_f1plus != NULL) {
+        fp_f1plus = fopen(file_f1plus, "w+");
+        if (fp_f1plus==NULL) verr("error on creating output file %s", file_f1plus);
+    }
+    if (file_f1min != NULL) {
+        fp_f1min = fopen(file_f1min, "w+");
+        if (fp_f1min==NULL) verr("error on creating output file %s", file_f1min);
+    }
+
+
+    tracf = 1;
+    for (l = 0; l < Nfoc; l++) {
+        if (reci) {
+            f2 = fxsb;
+            f3 = fysb;
+        }
+        else {
+            f2 = fxf;
+            f3 = fyf;
+        }
+
+        for (k = 0; k < n3; k++) {
+            for (i = 0; i < n2; i++) {
+                hdrs_out[k*n2+i].fldr   = l+1;
+                hdrs_out[k*n2+i].sx     = NINT(xsyn[l]*1000);
+                hdrs_out[k*n2+i].sy     = NINT(ysyn[l]*1000);
+                hdrs_out[k*n2+i].offset = (long)NINT((f2+i*d2) - xsyn[l]);
+                hdrs_out[k*n2+i].tracf  = tracf++;
+                hdrs_out[k*n2+i].selev  = NINT(zsyn[l]*1000);
+                hdrs_out[k*n2+i].sdepth = NINT(-zsyn[l]*1000);
+                hdrs_out[k*n2+i].f1     = f1;
+            }
+        }
+
+        ret = writeData3D(fp_out, (float *)&green[l*size], hdrs_out, n1, n2*n3);
+        if (ret < 0 ) verr("error on writing output file.");
+
+        if (file_gmin != NULL) {
+            ret = writeData3D(fp_gmin, (float *)&Gmin[l*size], hdrs_out, n1, n2*n3);
+            if (ret < 0 ) verr("error on writing output file.");
+        }
+        if (file_gplus != NULL) {
+            ret = writeData3D(fp_gplus, (float *)&Gplus[l*size], hdrs_out, n1, n2*n3);
+            if (ret < 0 ) verr("error on writing output file.");
+        }
+        if (file_f2 != NULL) {
+            ret = writeData3D(fp_f2, (float *)&f2p[l*size], hdrs_out, n1, n2*n3);
+            if (ret < 0 ) verr("error on writing output file.");
+        }
+        if (file_pmin != NULL) {
+            ret = writeData3D(fp_pmin, (float *)&pmin[l*size], hdrs_out, n1, n2*n3);
+            if (ret < 0 ) verr("error on writing output file.");
+        }
+        if (file_f1plus != NULL) {
+            /* rotate to get t=0 in the middle */
+            for (i = 0; i < n2*n3; i++) {
+                hdrs_out[i].f1     = -n1*0.5*dt;
+                memcpy(&trace[0],&f1plus[l*size+i*nts],nts*sizeof(float));
+                for (j = 0; j < n1/2; j++) {
+                    f1plus[l*size+i*nts+n1/2+j] = trace[j];
+                }
+                for (j = n1/2; j < n1; j++) {
+                    f1plus[l*size+i*nts+j-n1/2] = trace[j];
+                }
+            }
+            ret = writeData3D(fp_f1plus, (float *)&f1plus[l*size], hdrs_out, n1, n2*n3);
+            if (ret < 0 ) verr("error on writing output file.");
+        }
+        if (file_f1min != NULL) {
+            /* rotate to get t=0 in the middle */
+            for (i = 0; i < n2*n3; i++) {
+                hdrs_out[i].f1     = -n1*0.5*dt;
+                memcpy(&trace[0],&f1min[l*size+i*nts],nts*sizeof(float));
+                for (j = 0; j < n1/2; j++) {
+                    f1min[l*size+i*nts+n1/2+j] = trace[j];
+                }
+                for (j = n1/2; j < n1; j++) {
+                    f1min[l*size+i*nts+j-n1/2] = trace[j];
+                }
+            }
+            ret = writeData3D(fp_f1min, (float *)&f1min[l*size], hdrs_out, n1, n2*n3);
+            if (ret < 0 ) verr("error on writing output file.");
+        }
+    }
+    ret = fclose(fp_out);
+    if (file_gplus != NULL) {ret += fclose(fp_gplus);}
+    if (file_gmin != NULL) {ret += fclose(fp_gmin);}
+    if (file_f2 != NULL) {ret += fclose(fp_f2);}
+    if (file_pmin != NULL) {ret += fclose(fp_pmin);}
+    if (file_f1plus != NULL) {ret += fclose(fp_f1plus);}
+    if (file_f1min != NULL) {ret += fclose(fp_f1min);}
+    if (ret < 0) verr("err %li on closing output file",ret);
+
+    if (verbose) {
+        t1 = wallclock_time();
+        vmess("and CPU-time write data  = %.3f", t1-t2);
+    }
+
+/*================ free memory ================*/
+
+    free(hdrs_out);
+    free(tapersy);
+
+    exit(0);
+}
+
+long unique_elements(float *arr, long len)
+{
+     if (len <= 0) return 0;
+     long unique = 1;
+     long outer, inner, is_unique;
+
+     for (outer = 1; outer < len; ++outer)
+     {
+        is_unique = 1;
+        for (inner = 0; is_unique && inner < outer; ++inner)
+        {  
+             if ((arr[inner] >= arr[outer]-1.0) && (arr[inner] <= arr[outer]+1.0)) is_unique = 0;
+        }
+        if (is_unique) ++unique;
+     }
+     return unique;
+}
\ No newline at end of file
diff --git a/marchenko3D/readSnapData3D.c b/marchenko3D/readSnapData3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..503d4905a4cf14d46d03f6a572f8acc3390da289
--- /dev/null
+++ b/marchenko3D/readSnapData3D.c
@@ -0,0 +1,56 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "segy.h"
+#include <assert.h>
+
+typedef struct { /* complex number */
+        float r,i;
+} complex;
+
+long readSnapData3D(char *filename, float *data, segy *hdrs, long nsnaps, long nx, long ny, long nz, long sx, long ex, long sy, long ey, long sz, long ez)
+{
+	FILE *fp;
+	segy hdr;
+	size_t nread;
+	long nt, it, ix, iy, iz, dx, dy, dz;
+	float *tmpdata;
+
+	tmpdata = (float *)malloc(nsnaps*nx*ny*nz*sizeof(float));
+	/* Reading first header  */
+	if (filename == NULL) fp = stdin;
+	else fp = fopen( filename, "r" );
+	if ( fp == NULL ) {
+		fprintf(stderr,"input file %s has an error\n", filename);
+		perror("error in opening file: ");
+		fflush(stderr);
+		return -1;
+	}
+    //nread = fread(&hdr, 1, TRCBYTES, fp);
+    for (it = 0; it < nsnaps*nx*ny; it++) {
+		nread = fread(&hdr, 1, TRCBYTES, fp);
+		if (nread != TRCBYTES) {
+			break;
+		}
+		assert(nread == TRCBYTES);
+        nread = fread(&tmpdata[it*nz], sizeof(float), nz, fp);
+        assert (nread == nz);
+		memcpy(&hdrs[it], &hdr, TRCBYTES);
+    }
+	dx = ex-sx;
+	dy = ey-sy;
+	dz = ez-sz;
+	for (iz = sz; iz < ez; iz++) {
+        for (iy = sy; iy < ey; iy++) {
+            for (ix = sx; ix < ex; ix++) {
+                for (it = 0; it < nsnaps; it++) {
+                    data[it*dy*dx*dz+(iy-sy)*dx*dz+(ix-sx)*dz+iz-sz]=tmpdata[it*ny*nx*nz+iy*nx*nz+ix*nz+iz];
+                }
+            }
+        }
+    }
+	fclose(fp);
+	free(tmpdata);
+	return 0;
+}
diff --git a/utils/Makefile b/utils/Makefile
index 7b056b16b5e646d7af1ae0f0101b8f2c1ead0027..1eb7b80f47f9622dca28aba432da738da126e5d7 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -6,7 +6,7 @@ include ../Make_include
 #OPTC += -openmp 
 #OPTC += -g -O0
 
-ALL: makemod makewave extendModel fconv correigen green green3D basop syn2d mat2su ftr1d
+ALL: makemod makewave extendModel fconv correigen green green3D basop syn2d mat2su ftr1d ampdet
 
 SRCM	= \
 		makemod.c  \
@@ -133,6 +133,15 @@ SRCT	= ftr1d.c \
 		docpkge.c \
 		getpars.c
 
+SRCAD	= ampdet.c \
+		getFileInfo.c  \
+		readData.c \
+		writeData.c \
+		verbosepkg.c  \
+		atopkge.c \
+		docpkge.c \
+		getpars.c
+
 OBJM	= $(SRCM:%.c=%.o)
 
 makemod:	$(OBJM) 
@@ -188,7 +197,12 @@ OBJT	= $(SRCT:%.c=%.o)
 ftr1d:	$(OBJT) 
 	$(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o ftr1d $(OBJT) $(LIBS)
 
-install: makemod makewave extendModel fconv correigen green green3D basop syn2d mat2su ftr1d
+OBJAD	= $(SRCAD:%.c=%.o)
+
+ampdet:	$(OBJAD) 
+	$(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o ampdet $(OBJAD) $(LIBS)
+
+install: makemod makewave extendModel fconv correigen green green3D basop syn2d mat2su ftr1d 
 	cp makemod $B
 	cp makewave $B
 	cp extendModel $B
@@ -202,7 +216,7 @@ install: makemod makewave extendModel fconv correigen green green3D basop syn2d
 	cp ftr1d $B
 
 clean:
-		rm -f core $(OBJM) makemod $(OBJW) makewave $(OBJE) extendModel $(OBJF) fconv $(OBJG) $(OBJC) correigen green $(OBJG3) green3D $(OBJB) basop $(OBJJ) syn2d $(OBJS) mat2su $(OBJA) ftr1d $(OBJT)
+		rm -f core $(OBJM) makemod $(OBJW) makewave $(OBJE) extendModel $(OBJF) fconv $(OBJG) $(OBJC) correigen green $(OBJG3) green3D $(OBJB) basop $(OBJJ) syn2d $(OBJS) mat2su $(OBJA) ftr1d $(OBJT) ampdet $(OBJAD)
 
 realclean: clean
 		rm -f $B/makemod $B/makewave $B/extendModel $B/fconv $B/correigen $B/green $B/green3D $B/basop $B/syn2d $B/mat2su $B/ftr1d
diff --git a/utils/ampdet.c b/utils/ampdet.c
new file mode 100644
index 0000000000000000000000000000000000000000..d00a7c4265721cdb8deab4cf9285deb80df2a73c
--- /dev/null
+++ b/utils/ampdet.c
@@ -0,0 +1,323 @@
+#include "par.h"
+#include "segy.h"
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+#include <genfft.h>
+
+#ifndef MAX
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#endif
+#ifndef MIN
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#endif
+#define NINT(x) ((int)((x)>0.0?(x)+0.5:(x)-0.5))
+
+#ifndef COMPLEX
+typedef struct _complexStruct { /* complex number */
+    float r,i;
+} complex;
+#endif/* complex */
+
+int getFileInfo(char *filename, int *n1, int *n2, int *ngath, float *d1, float *d2, float *f1, float *f2, float *xmin, float *xmax, float *sclsxgx, int *nxm);
+int readData(FILE *fp, float *data, segy *hdrs, int n1);
+int writeData(FILE *fp, float *data, segy *hdrs, int n1, int n2);
+void complex_sqrt(complex *z);
+void deconv_small(complex *c1, complex *c2, complex *c3, float nkx, float nfreq, float reps, float eps);
+void conv_small(complex *c1, complex *c2, complex *c3, float nkx, float nfreq);
+void corr_small(complex *c1, complex *c2, complex *c3, float nkx, float nfreq);
+void scl_data(float *data, int nsam, int nrec, float scl, float *datout, int nsamout);
+void pad2d_data(float *data, int nsam, int nrec, int nsamout, int nrecout, float *datout);
+void pad_data(float *data, int nsam, int nrec, int nsamout, float *datout);
+
+/*********************** self documentation **********************/
+char *sdoc[] = {
+" ",
+" ampdet - Determine amplitude",
+" ",
+" author  : Jan Thorbecke : 19-04-1995 (janth@xs4all.nl)",
+" product : Originates from DELPHI software",
+"                         : revision 2010",
+" ",
+NULL};
+/**************** end self doc ***********************************/
+
+int main (int argc, char **argv)
+{
+    FILE    *fp;
+	char    *file_gp, *file_fp, *file_wav;
+    int     nx, nt, ngath, ntraces, ret, size, nxwav;
+    int     ntfft, nfreq, nxfft, nkx, i, j, n;
+    float   dx, dt, fx, ft, xmin, xmax, scl;
+    float   df, dw, dkx, eps, reps;
+    float   *Gpd, *f1pd, *G_pad, *f_pad, *wav, *wav_pad;
+    complex *G_w, *f_w, *Gf, *amp, *wav_w, *S, *ZS, *SS;
+    segy    *hdr_gp, *hdr_fp, *hdr_wav;
+
+	initargs(argc, argv);
+	requestdoc(1);
+
+	if(!getparstring("file_gp", &file_gp)) file_gp=NULL;
+    if (file_gp==NULL) verr("file %s does not exist",file_gp);
+    if(!getparstring("file_gp", &file_fp)) file_fp=NULL;
+    if (file_fp==NULL) verr("file %s does not exist",file_fp);
+    if(!getparstring("file_wav", &file_wav)) file_wav=NULL;
+    if (file_wav==NULL) verr("file %s does not exist",file_wav);
+	if(!getparfloat("eps", &eps)) eps=0.00;
+	if(!getparfloat("reps", &reps)) reps=0.01;
+
+    ngath = 1;
+    ret = getFileInfo(file_gp, &nt, &nx, &ngath, &dt, &dx, &ft, &fx, &xmin, &xmax, &scl, &ntraces);
+
+    size    = nt*nx;
+
+	Gpd     = (float *)malloc(size*sizeof(float));
+	hdr_gp  = (segy *) calloc(nx,sizeof(segy));
+    fp      = fopen(file_gp, "r");
+	if (fp == NULL) verr("error on opening input file_in1=%s", file_gp);
+    nx      = readData(fp, Gpd, hdr_gp, nt);
+    fclose(fp);
+
+	f1pd    = (float *)malloc(size*sizeof(float));
+	hdr_fp  = (segy *) calloc(nx,sizeof(segy));
+    fp      = fopen(file_fp, "r");
+	if (fp == NULL) verr("error on opening input file_in1=%s", file_fp);
+    nx      = readData(fp, f1pd, hdr_fp, nt);
+    fclose(fp);
+
+    wav     = (float *)malloc(nt*sizeof(float));
+	hdr_wav = (segy *) calloc(1,sizeof(segy));
+    fp      = fopen(file_wav, "r");
+	if (fp == NULL) verr("error on opening input file_in1=%s", file_fp);
+    nxwav   = readData(fp, wav, hdr_wav, nt);
+    fclose(fp);
+    vmess("test:%d",nxwav);
+
+    /* Start the scaling */
+    ntfft   = optncr(nt);
+	nfreq   = ntfft/2+1;
+	df      = 1.0/(ntfft*dt);
+    dw      = 2.0*PI*df;
+	nkx     = optncc(nx);
+	dkx     = 2.0*PI/(nkx*dx);
+
+    vmess("ntfft:%d, nfreq:%d, nkx:%d",ntfft,nfreq,nkx);
+
+    /* Allocate the arrays */
+    G_pad = (float *)malloc(ntfft*nkx*sizeof(float));
+	if (G_pad == NULL) verr("memory allocation error for G_pad");
+    f_pad = (float *)malloc(ntfft*nkx*sizeof(float));
+	if (f_pad == NULL) verr("memory allocation error for f_pad");
+    wav_pad = (float *)malloc(ntfft*sizeof(float));
+	if (wav_pad == NULL) verr("memory allocation error for wav_pad");
+    G_w   = (complex *)malloc(nfreq*nkx*sizeof(complex));
+	if (G_w == NULL) verr("memory allocation error for G_w");
+    f_w   = (complex *)malloc(nfreq*nkx*sizeof(complex));
+	if (f_w == NULL) verr("memory allocation error for f_w");
+    Gf    = (complex *)malloc(nfreq*nkx*sizeof(complex));
+	if (Gf == NULL) verr("memory allocation error for Gf");
+    wav_w = (complex *)malloc(nfreq*nkx*sizeof(complex));
+	if (wav_w == NULL) verr("memory allocation error for wav_w");
+    amp   = (complex *)malloc(nfreq*nkx*sizeof(complex));
+	if (amp == NULL) verr("memory allocation error for amp");
+    S   = (complex *)malloc(nfreq*nkx*sizeof(complex));
+	if (S == NULL) verr("memory allocation error for S");
+    ZS   = (complex *)malloc(nfreq*nkx*sizeof(complex));
+	if (ZS == NULL) verr("memory allocation error for ZS");
+    SS   = (complex *)malloc(nfreq*nkx*sizeof(complex));
+	if (SS == NULL) verr("memory allocation error for SS");
+
+    /* pad zeroes in 2 directions to reach FFT lengths */
+	pad2d_data(Gpd, nt,nx,ntfft,nkx,G_pad);
+	pad2d_data(f1pd,nt,nx,ntfft,nkx,f_pad);
+    pad_data(wav, nt, 1, ntfft, wav_pad);
+
+    /* double forward FFT */
+	xt2wkx(&G_pad[0], &G_w[0], ntfft, nkx, ntfft, nkx, 0);
+	xt2wkx(&f_pad[0], &f_w[0], ntfft, nkx, ntfft, nkx, 0);
+    rcmfft(&wav_pad[0], &wav_w[0], ntfft, 1, ntfft, nfreq, -1);
+
+    for (i=1; i<nkx; i++) {
+        for (j=0; j<nfreq; j++) {
+            wav_w[i*nfreq+j] = wav_w[j];
+        }
+    }
+
+    /* Create Z*(|S|*)/(|S|*(|S|*)) */
+    conv_small(  G_w,   f_w,   Gf,  nkx, nfreq); // Z
+    corr_small(  wav_w, wav_w, S,   nkx, nfreq); //|S|
+    corr_small(  Gf,    G_w,   ZS,  nkx, nfreq); // Z *(|S|*)
+    corr_small(  G_w,   G_w,   SS,  nkx, nfreq); //|S|*(|S|*)
+    deconv_small(ZS,    SS,    amp, nkx, nfreq, reps, eps); // amp
+
+    for (i=0; i<nkx*nfreq; i++) {
+        complex_sqrt(&amp[i]);
+    }
+    
+    conv_small(G_w, amp, Gf, nkx, nfreq); // Scaled data
+
+    /* inverse double FFT */
+	wkx2xt(&Gf[0], &G_pad[0], ntfft, nkx, nkx, ntfft, 0);
+	/* select original samples and traces */
+	scl = 1.0;
+	scl_data(G_pad,ntfft,nx,scl,Gpd ,nt);
+
+    fp      = fopen("out.su", "w+");
+    ret = writeData(fp, Gpd, hdr_gp, nt, nx);
+	if (ret < 0 ) verr("error on writing output file.");
+    fclose(fp);
+
+    free(f1pd);free(Gpd);free(hdr_gp);free(hdr_fp);
+
+	return 0;
+}
+
+void conv_small(complex *c1, complex *c2, complex *c3, float nkx, float nfreq)
+{
+
+    float   *qr, *qi, *p1r, *p1i, *p2r, *p2i;
+    int     n, j;
+
+    /* apply convolution */
+	p1r = (float *) &c1[0];
+	p2r = (float *) &c2[0];
+	qr = (float *) &c3[0].r;
+	p1i = p1r + 1;
+	p2i = p2r + 1;
+	qi = qr + 1;
+	n = nkx*nfreq;
+	for (j = 0; j < n; j++) {
+		*qr = (*p2r**p1r - *p2i**p1i);
+		*qi = (*p2r**p1i + *p2i**p1r);
+		qr += 2;
+		qi += 2;
+		p1r += 2;
+		p1i += 2;
+		p2r += 2;
+		p2i += 2;
+	}
+}
+
+void corr_small(complex *c1, complex *c2, complex *c3, float nkx, float nfreq)
+{
+
+    float   *qr, *qi, *p1r, *p1i, *p2r, *p2i;
+    int     n, j;
+
+    /* apply convolution */
+	p1r = (float *) &c1[0];
+	p2r = (float *) &c2[0];
+	qr = (float *) &c3[0].r;
+	p1i = p1r + 1;
+	p2i = p2r + 1;
+	qi = qr + 1;
+	n = nkx*nfreq;
+	for (j = 0; j < n; j++) {
+		*qr = (*p2r**p1r + *p2i**p1i);
+		*qi = (*p2r**p1i - *p2i**p1r);
+		qr += 2;
+		qi += 2;
+		p1r += 2;
+		p1i += 2;
+		p2r += 2;
+		p2i += 2;
+	}
+}
+
+void deconv_small(complex *c1, complex *c2, complex *c3, float nkx, float nfreq, float reps, float eps)
+{
+
+    float   *qr, *qi, *p1r, *p1i, *p2r, *p2i, maxden, *den, leps;
+    int     n, j;
+
+    den = (float *)malloc(nfreq*nkx*sizeof(float));
+	if (den == NULL) verr("memory allocation error for den");
+
+    /* apply deconvolution */
+	p1r = (float *) &c1[0];
+	p2r = (float *) &c2[0];
+	p1i = p1r + 1;
+	p2i = p2r + 1;
+	n = nkx*nfreq;
+	maxden=0.0;
+	for (j = 0; j < n; j++) {
+		den[j] = *p2r**p2r + *p2i**p2i;
+		maxden = MAX(den[j], maxden);
+		p2r += 2;
+		p2i += 2;
+	}
+	p1r = (float *) &c1[0];
+	p2r = (float *) &c2[0];
+	qr = (float *) &c3[0].r;
+	p1i = p1r + 1;
+	p2i = p2r + 1;
+    qi = qr + 1;
+	leps = reps*maxden+eps;
+	for (j = 0; j < n; j++) {
+
+		if (fabs(*p2r)>=fabs(*p2i)) {
+			*qr = (*p2r**p1r+*p2i**p1i)/(den[j]+leps);
+			*qi = (*p2r**p1i-*p2i**p1r)/(den[j]+leps);
+		} else {
+			*qr = (*p1r**p2r+*p1i**p2i)/(den[j]+leps);
+			*qi = (*p1i**p2r-*p1r**p2i)/(den[j]+leps);
+		}
+		qr += 2;
+		qi += 2;
+		p1r += 2;
+		p1i += 2;
+		p2r += 2;
+		p2i += 2;
+	}
+}
+
+void complex_sqrt(complex *z)
+{
+    float zmod, zmodr, zzmr, zzmi, zzm;
+
+    zmod  = sqrtf(z[0].r*z[0].r+z[0].i*z[0].i);
+    zmodr = sqrtf(zmod);
+    zzmr  = z[0].r + zmod;
+    zzmi  = z[0].i;
+    zzm   = sqrtf(zzmr*zzmr+zzmi*zzmi);
+
+    z[0].r = (zmodr*zzmr)/zzm;
+    z[0].i = (zmodr*zzmi)/zzm;
+}
+
+void pad_data(float *data, int nsam, int nrec, int nsamout, float *datout)
+{
+	int it,ix;
+	for (ix=0;ix<nrec;ix++) {
+		for (it=0;it<nsam;it++)
+			datout[ix*nsamout+it]=data[ix*nsam+it];
+		for (it=nsam;it<nsamout;it++)
+			datout[ix*nsamout+it]=0.0;
+	}
+}
+
+void pad2d_data(float *data, int nsam, int nrec, int nsamout, int nrecout, float *datout)
+{
+	int it,ix;
+	for (ix=0;ix<nrec;ix++) {
+		for (it=0;it<nsam;it++)
+			datout[ix*nsam+it]=data[ix*nsam+it];
+		for (it=nsam;it<nsamout;it++)
+			datout[ix*nsam+it]=0.0;
+	}
+	for (ix=nrec;ix<nrecout;ix++) {
+		for (it=0;it<nsamout;it++)
+			datout[ix*nsam+it]=0.0;
+	}
+}
+
+void scl_data(float *data, int nsam, int nrec, float scl, float *datout, int nsamout)
+{
+	int it,ix;
+	for (ix = 0; ix < nrec; ix++) {
+		for (it = 0 ; it < nsamout ; it++)
+			datout[ix*nsamout+it] = scl*data[ix*nsam+it];
+	}
+}
\ No newline at end of file