diff --git a/marchenko3D/Makefile b/marchenko3D/Makefile
index 491a1c0b10c7b333768fb81720759f0a0fa71b13..6a8acbd37b6d7fbef7ae55138ff5e673656ca4eb 100644
--- a/marchenko3D/Makefile
+++ b/marchenko3D/Makefile
@@ -87,7 +87,7 @@ OBJJ3	= $(SRCJ3:%.c=%.o)
 fmute3D:	$(OBJJ3) 
 	$(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o fmute3D $(OBJJ3) $(LIBS)
 
-install: fmute marchenko test fmute3D
+install: fmute marchenko marchenko3D fmute3D
 	cp fmute $B
 	cp marchenko $B
 	cp marchenko3D $B
diff --git a/marchenko3D/demo/oneD/marchenko.scr b/marchenko3D/demo/oneD/marchenko.scr
index 6fd6cacab5f12eda0d2ecafb39c5ac9d78313f46..8538681e22d6a73dbaef67b3c09efeebfbce2916 100755
--- a/marchenko3D/demo/oneD/marchenko.scr
+++ b/marchenko3D/demo/oneD/marchenko.scr
@@ -7,10 +7,10 @@ export OMP_NUM_THREADS=1
 fmute file_shot=iniFocus_rp.su file_out=p0plus.su above=-1 shift=-8 verbose=1 check=0 hw=8
 
 #apply the Marchenko algorithm
-~/OpenSource/marchenko3D/test file_shot=shotsy.su file_tinv=p0y.su nshots=901 verbose=10 \
-	tap=0 niter=8 hw=8 shift=12 smooth=3 \
-	file_green=pgreen2.su file_gplus=Gplus02.su file_gmin=Gmin02.su  \
-	file_f1plus=f1plus02.su file_f1min=f1min02.su file_f2=f22.su 
+marchenko file_shot=shotsdx5_rp.su file_tinv=p0plus.su nshots=901 verbose=2 \
+	tap=0 niter=8 hw=8 shift=12 smooth=3 scale=4 \
+	file_green=pgreen3.su file_gplus=Gplus03.su file_gmin=Gmin03.su  \
+	file_f1plus=f1plus03.su file_f1min=f1min03.su file_f2=f23.su 
 
 exit
 
diff --git a/marchenko3D/demo/oneD/p5all.scr b/marchenko3D/demo/oneD/p5all.scr
index 333be5510ec6a203c098595abfdabe5cdba2466b..c749523043cbeb7f11ea3a1a86f72b397b805271 100755
--- a/marchenko3D/demo/oneD/p5all.scr
+++ b/marchenko3D/demo/oneD/p5all.scr
@@ -4,9 +4,9 @@ export PATH=$HOME/src/OpenSource/bin:$PATH:
 
 # Generate the full R matrix for a fixed spread geometry.
 
-dxshot=5000 # with scalco factor of 1000
+dxshot=10000 # with scalco factor of 1000
 ishot=0
-nshots=901
+nshots=451
 
 echo $1
 
@@ -16,16 +16,15 @@ while (( ishot < nshots ))
 do
 
 	(( xsrc = -2250000 + ${ishot}*${dxshot} ))
-	(( tr1 = 901 - ${ishot} ))
-	(( tr2 = ${tr1} + 900 ))
+	(( tr1 = $nshots - ${ishot} ))
+	(( tr2 = ${tr1} + $nshots - 1 ))
 	echo xsrc=$xsrc tr1=$tr1 tr2=$tr2
 
 	(( ishot = $ishot + 1))
 
-	suwind < shot5_rp.su key=tracl min=$tr1 max=$tr2 | \
+	suwind < shot_kxky.su key=tracl min=$tr1 max=$tr2 | \
 	sushw key=sx,gx,fldr,trwf \
-	a=$xsrc,-2250000,$ishot,901 b=0,5000,0,0 j=0,901,0,0 | \
+	a=$xsrc,-2250000,$ishot,$nshots b=0,$dxshot,0,0 j=0,$nshots,0,0 | \
 	suchw key1=offset key2=gx key3=sx c=-1 d=1000 >> shotsdx5_rp.su
 
 done
-
diff --git a/marchenko3D/marchenko3D.c b/marchenko3D/marchenko3D.c
index 7323516501465457ae78f8bea999ba224fdd5c92..983aa15579a63adbc16529cf527ce878c1068e57 100644
--- a/marchenko3D/marchenko3D.c
+++ b/marchenko3D/marchenko3D.c
@@ -24,7 +24,7 @@ void omp_set_num_threads(int num_threads);
 #ifndef MIN
 #define MIN(x,y) ((x) < (y) ? (x) : (y))
 #endif
-#define NINT(x) ((int)((x)>0.0?(x)+0.5:(x)-0.5))
+#define NINT(x) ((long)((x)>0.0?(x)+0.5:(x)-0.5))
 
 
 
@@ -34,36 +34,42 @@ typedef struct _complexStruct { /* complex number */
 } complex;
 #endif/* complex */
 
-int readShotData3D(char *filename, float *xrcv, float *yrcv, float *xsrc, float *ysrc, float *zsrc, int *xnx, complex *cdata, int nw, int nw_low, int nshots, int nx, int ny, int ntfft, int mode, float scale, int verbose);
-int readTinvData3D(char *filename, float *xrcv, float *yrcv, float *xsrc, float *ysrc, float *zsrc, int *xnx, int Nfoc, int nx, int ny, int ntfft, int mode, int *maxval, float *tinv, int hw, int verbose);
-int writeDataIter(char *file_iter, float *data, segy *hdrs, int n1, int n2, float d2, float f2, int n2out, int Nfoc, float *xsyn,
-float *zsyn, int *ixpos, int npos, int iter);
-int unique_elements(float *arr, int len);
+long readShotData3D(char *filename, float *xrcv, float *yrcv, float *xsrc, float *ysrc, float *zsrc, long *xnx, complex *cdata,
+    long nw, long nw_low, long nshots, long nx, long ny, long ntfft, long mode, float scale, long verbose);
+long readTinvData3D(char *filename, float *xrcv, float *yrcv, float *xsrc, float *ysrc, float *zsrc,
+    long *xnx, long Nfoc, long nx, long ny, long ntfft, long mode, long *maxval, float *tinv, long hw, long verbose);
+// int writeDataIter(char *file_iter, float *data, segy *hdrs, int n1, int n2, float d2, float f2, int n2out, int Nfoc, float *xsyn,
+//     float *zsyn, int *ixpos, int npos, int iter);
+long unique_elements(float *arr, long len);
 
 void name_ext(char *filename, char *extension);
 
-void applyMute(float *data, int *mute, int smooth, int above, int Nfoc, int nxs, int nt, int *xrcvsyn, int npos, int shift);
+void applyMute3D(float *data, long *mute, long smooth, long above, long Nfoc, long nxs, long nt, long *xrcvsyn, long npos, long shift);
 
-int getFileInfo3D(char *filename, int *n1, int *n2, int *n3, int *ngath, float *d1, float *d2, float *d3, float *f1, float *f2, float *f3, float *sclsxgxsygy, int *nxm);
-int readData(FILE *fp, float *data, segy *hdrs, int n1);
-int writeData(FILE *fp, float *data, segy *hdrs, int n1, int n2);
-int disp_fileinfo3D(char *file, int n1, int n2, int n3, float f1, float f2, float f3, float d1, float d2, float d3, segy *hdrs);
+long getFileInfo3D(char *filename, long *n1, long *n2, long *n3, long *ngath, float *d1, float *d2, float *d3, float *f1, float *f2, float *f3,
+    float *sclsxgxsygy, long *nxm);
+long readData3D(FILE *fp, float *data, segy *hdrs, long n1);
+long writeData3D(FILE *fp, float *data, segy *hdrs, long n1, long n2);
+long disp_fileinfo3D(char *file, long n1, long n2, long n3, float f1, float f2, float f3, float d1, float d2, float d3, segy *hdrs);
 double wallclock_time(void);
 
-void synthesisPositions3D(int nx, int ny, int nxs, int nys, int Nfoc, float *xrcv, float *yrcv, float *xsrc, float *ysrc, int *xnx, float fxse, float fyse, float fxsb, float fysb, float dxs, float dys, int nshots, int nxsrc, int nysrc, int *ixpos, int *npos, int reci, int verbose);
-void synthesis3D(complex *Refl, complex *Fop, float *Top, float *iRN, int nx, int ny, int nt, int nxs, int nys, int nts, float dt, float *xsyn, float *ysyn, 
-int Nfoc, float *xrcv, float *yrcv, float *xsrc, float *ysrc, int *xnx, float fxse, float fxsb, float fyse, float fysb, float dxs, float dys, float dxsrc, 
-float dysrc, float dx, float dy, int ntfft, int nw, int nw_low, int nw_high,  int mode, int reci, int nshots, int nxsrc, int nysrc, 
-int *ixpos, int npos, double *tfft, int *isxcount, int *reci_xsrc,  int *reci_xrcv, float *ixmask, int verbose);
+void synthesisPositions3D(long nx, long ny, long nxs, long nys, long Nfoc, float *xrcv, float *yrcv, float *xsrc, float *ysrc,
+    long *xnx, float fxse, float fyse, float fxsb, float fysb, float dxs, float dys, long nshots, long nxsrc, long nysrc,
+    long *ixpos, long *npos, long reci, long verbose);
+void synthesis3D(complex *Refl, complex *Fop, float *Top, float *iRN, long nx, long ny, long nt, long nxs, long nys, long nts, float dt,
+    float *xsyn, float *ysyn, long Nfoc, float *xrcv, float *yrcv, float *xsrc, float *ysrc, long *xnx,
+    float fxse, float fxsb, float fyse, float fysb, float dxs, float dys, float dxsrc, float dysrc, 
+    float dx, float dy, long ntfft, long nw, long nw_low, long nw_high,  long mode, long reci, long nshots, long nxsrc, long nysrc, 
+    long *ixpos, long npos, double *tfft, long *isxcount, long *reci_xsrc,  long *reci_xrcv, float *ixmask, long verbose);
 
-int linearsearch(int *array, size_t N, int value);
+long linearsearch(long *array, size_t N, long value);
 
 /*********************** self documentation **********************/
 char *sdoc[] = {
 " ",
-" MARCHENKO - Iterative Green's function and focusing functions retrieval",
+" MARCHENKO3D - Iterative Green's function and focusing functions retrieval in 3D",
 " ",
-" marchenko file_tinv= file_shot= [optional parameters]",
+" marchenko3D file_tinv= file_shot= [optional parameters]",
 " ",
 " Required parameters: ",
 " ",
@@ -102,7 +108,8 @@ char *sdoc[] = {
 "   verbose=0 ................ silent option; >0 displays info",
 " ",
 " ",
-" author  : Jan Thorbecke : 2016 (j.w.thorbecke@tudelft.nl)",
+" author  : Jan Thorbecke     : 2016 (j.w.thorbecke@tudelft.nl)",
+" author  : Joeri Brackenhoff : 2019 (j.a.brackenhoff@tudelft.nl)",
 " ",
 NULL};
 /**************** end self doc ***********************************/
@@ -111,13 +118,13 @@ int main (int argc, char **argv)
 {
     FILE    *fp_out, *fp_f1plus, *fp_f1min;
     FILE    *fp_gmin, *fp_gplus, *fp_f2, *fp_pmin;
-    int     i, j, l, k, ret, nshots, nxshot, nyshot, Nfoc, nt, nx, ny, nts, nxs, nys, ngath;
-    int     size, n1, n2, n3, ntap, tap, dxi, dyi, ntraces, pad;
-    int     nw, nw_low, nw_high, nfreq, *xnx, *xnxsyn;
-    int     reci, countmin, mode, n2out, n3out, verbose, ntfft;
-    int     iter, niter, tracf, *muteW;
-    int     hw, smooth, above, shift, *ixpos, npos, ix;
-    int     nshots_r, *isxcount, *reci_xsrc, *reci_xrcv;
+    long    i, j, l, k, ret, nshots, nxshot, nyshot, Nfoc, nt, nx, ny, nts, nxs, nys, ngath;
+    long    size, n1, n2, n3, ntap, tap, dxi, dyi, ntraces, pad;
+    long    nw, nw_low, nw_high, nfreq, *xnx, *xnxsyn;
+    long    reci, countmin, mode, n2out, n3out, verbose, ntfft;
+    long    iter, niter, tracf, *muteW;
+    long    hw, smooth, above, shift, *ixpos, npos, ix;
+    long    nshots_r, *isxcount, *reci_xsrc, *reci_xrcv;
     float   fmin, fmax, *tapersh, *tapersy, fxf, fyf, dxf, dyf, *xsrc, *ysrc, *xrcv, *yrcv, *zsyn, *zsrc, *xrcvsyn, *yrcvsyn;
     double  t0, t1, t2, t3, tsyn, tread, tfft, tcopy, energyNi, energyN0;
     float   d1, d2, d3, f1, f2, f3, fxsb, fxse, fysb, fyse, ft, fx, fy, *xsyn, *ysyn, dxsrc, dysrc;
@@ -146,7 +153,7 @@ int main (int argc, char **argv)
     if (!getparstring("file_f2", &file_f2)) file_f2 = NULL;
     if (!getparstring("file_pmin", &file_pmin)) file_pmin = NULL;
     if (!getparstring("file_iter", &file_iter)) file_iter = NULL;
-    if (!getparint("verbose", &verbose)) verbose = 0;
+    if (!getparlong("verbose", &verbose)) verbose = 0;
     if (file_tinv == NULL && file_shot == NULL) 
         verr("file_tinv and file_shot cannot be both input pipe");
     if (!getparstring("file_green", &file_green)) {
@@ -155,20 +162,20 @@ int main (int argc, char **argv)
     }
     if (!getparfloat("fmin", &fmin)) fmin = 0.0;
     if (!getparfloat("fmax", &fmax)) fmax = 70.0;
-    if (!getparint("reci", &reci)) reci = 0;
+    if (!getparlong("reci", &reci)) reci = 0;
     if (!getparfloat("scale", &scale)) scale = 2.0;
     if (!getparfloat("tsq", &tsq)) tsq = 0.0;
     if (!getparfloat("Q", &Q)) Q = 0.0;
     if (!getparfloat("f0", &f0)) f0 = 0.0;
-    if (!getparint("tap", &tap)) tap = 0;
-    if (!getparint("ntap", &ntap)) ntap = 0;
-    if (!getparint("pad", &pad)) pad = 0;
+    if (!getparlong("tap", &tap)) tap = 0;
+    if (!getparlong("ntap", &ntap)) ntap = 0;
+    if (!getparlong("pad", &pad)) pad = 0;
 
-    if(!getparint("niter", &niter)) niter = 10;
-    if(!getparint("hw", &hw)) hw = 15;
-    if(!getparint("smooth", &smooth)) smooth = 5;
-    if(!getparint("above", &above)) above = 0;
-    if(!getparint("shift", &shift)) shift=12;
+    if(!getparlong("niter", &niter)) niter = 10;
+    if(!getparlong("hw", &hw)) hw = 15;
+    if(!getparlong("smooth", &smooth)) smooth = 5;
+    if(!getparlong("above", &above)) above = 0;
+    if(!getparlong("shift", &shift)) shift=12;
 
     if (reci && ntap) vwarn("tapering influences the reciprocal result");
 
@@ -194,12 +201,12 @@ int main (int argc, char **argv)
 
     ntfft = optncr(MAX(nt+pad, nts+pad)); 
     nfreq = ntfft/2+1;
-    nw_low = (int)MIN((fmin*ntfft*dt), nfreq-1);
+    nw_low = (long)MIN((fmin*ntfft*dt), nfreq-1);
     nw_low = MAX(nw_low, 1);
-    nw_high = MIN((int)(fmax*ntfft*dt), nfreq-1);
+    nw_high = MIN((long)(fmax*ntfft*dt), nfreq-1);
     nw  = nw_high - nw_low + 1;
     scl   = 1.0/((float)ntfft);
-    if (!getparint("countmin", &countmin)) countmin = 0.3*nx*ny;
+    if (!getparlong("countmin", &countmin)) countmin = 0.3*nx*ny;
     
 /*================ Allocating all data arrays ================*/
 
@@ -212,7 +219,7 @@ int main (int argc, char **argv)
     iRN     = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
     Ni      = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
     G_d     = (float *)calloc(Nfoc*nys*nxs*ntfft,sizeof(float));
-    muteW   = (int *)calloc(Nfoc*nys*nxs,sizeof(int));
+    muteW   = (long *)calloc(Nfoc*nys*nxs,sizeof(long));
     trace   = (float *)malloc(ntfft*sizeof(float));
     tapersy = (float *)malloc(nxs*sizeof(float));
     xrcvsyn = (float *)calloc(Nfoc*nys*nxs,sizeof(float)); // x-rcv postions of focal points
@@ -220,8 +227,8 @@ int main (int argc, char **argv)
     xsyn    = (float *)malloc(Nfoc*sizeof(float)); // x-src position of focal points
     ysyn    = (float *)malloc(Nfoc*sizeof(float)); // x-src position of focal points
     zsyn    = (float *)malloc(Nfoc*sizeof(float)); // z-src position of focal points
-    xnxsyn  = (int *)calloc(Nfoc,sizeof(int)); // number of traces per focal point
-    ixpos   = (int *)calloc(nys*nxs,sizeof(int)); // x-position of source of shot in G_d domain (nxs*nys with dxs, dys)
+    xnxsyn  = (long *)calloc(Nfoc,sizeof(long)); // number of traces per focal point
+    ixpos   = (long *)calloc(nys*nxs,sizeof(long)); // x-position of source of shot in G_d domain (nxs*nys with dxs, dys)
 
     Refl    = (complex *)malloc(nw*ny*nx*nshots*sizeof(complex));
     tapersh = (float *)malloc(nx*sizeof(float));
@@ -230,12 +237,12 @@ int main (int argc, char **argv)
     xsrc    = (float *)calloc(nshots,sizeof(float)); //x-src position of shots
     ysrc    = (float *)calloc(nshots,sizeof(float)); //x-src position of shots
     zsrc    = (float *)calloc(nshots,sizeof(float)); // z-src position of shots
-    xnx     = (int *)calloc(nshots,sizeof(int)); // number of traces per shot
+    xnx     = (long *)calloc(nshots,sizeof(long)); // number of traces per shot
 
 	if (reci!=0) {
-        reci_xsrc = (int *)malloc((nxs*nxs*nys*nys)*sizeof(int));
-        reci_xrcv = (int *)malloc((nxs*nxs*nys*nys)*sizeof(int));
-        isxcount  = (int *)calloc(nxs*nys,sizeof(int));
+        reci_xsrc = (long *)malloc((nxs*nxs*nys*nys)*sizeof(long));
+        reci_xrcv = (long *)malloc((nxs*nxs*nys*nys)*sizeof(long));
+        isxcount  = (long *)calloc(nxs*nys,sizeof(long));
         ixmask  = (float *)calloc(nxs*nys,sizeof(float));
     }
 
@@ -518,9 +525,9 @@ int main (int argc, char **argv)
         t3 = wallclock_time();
         tsyn +=  t3 - t2;
 
-        if (file_iter != NULL) {
-            writeDataIter(file_iter, iRN, hdrs_out, ntfft, nxs*nys, d2, f2, n2out*n3out, Nfoc, xsyn, zsyn, ixpos, npos, iter);
-        }
+        // if (file_iter != NULL) {
+        //     writeDataIter(file_iter, iRN, hdrs_out, ntfft, nxs*nys, d2, f2, n2out*n3out, Nfoc, xsyn, zsyn, ixpos, npos, iter);
+        // }
         /* N_k(x,t) = -N_(k-1)(x,-t) */
         /* p0^-(x,t) += iRN = (R * T_d^inv)(t) */
         for (l = 0; l < Nfoc; l++) {
@@ -604,6 +611,7 @@ int main (int argc, char **argv)
             }
         }
     }
+    applyMute(green, muteW, smooth, 4, Nfoc, nxs*nys, nts, ixpos, npos, shift);
 
     /* compute upgoing Green's function G^+,- */
     if (file_gmin != NULL) {
@@ -629,7 +637,7 @@ int main (int argc, char **argv)
             }
         }
         /* Apply mute with window for Gmin */
-        applyMute(Gmin, muteW, smooth, 1, Nfoc, nxs*nys, nts, ixpos, npos, shift);
+        applyMute(Gmin, muteW, smooth, 4, Nfoc, nxs*nys, nts, ixpos, npos, shift);
     } /* end if Gmin */
 
     /* compute downgoing Green's function G^+,+ */
@@ -655,6 +663,8 @@ int main (int argc, char **argv)
                 }
             }
         }
+        /* Apply mute with window for Gplus */
+        applyMute(Gplus, muteW, smooth, 4, Nfoc, nxs*nys, nts, ixpos, npos, shift);
     } /* end if Gplus */
 
     t2 = wallclock_time();
diff --git a/marchenko3D/readTinvData3D.c b/marchenko3D/readTinvData3D.c
index c6a9e45811c6d24b25dc65ffca7d257444a46e06..845d08076656e6987299f3fdbbbcf5d0aa55ec1f 100644
--- a/marchenko3D/readTinvData3D.c
+++ b/marchenko3D/readTinvData3D.c
@@ -122,25 +122,28 @@ int readTinvData3D(char *filename, float *xrcv, float *yrcv, float *xsrc, float
 		xnx[isyn]=itrace;
 
         /* alternative find maximum at source position */
-        dxrcv = (gx1 - gx0)*scl/(float)(nx1-1);
-        dyrcv = (gy1 - gy0)*scl/(float)(ny1-1);
-        //imax = NINT(((sx_shot-gx0)*scl)/dxrcv);
+		if (nx1>1) dxrcv = (gx1 - gx0)*scl/(float)(nx1-1);
+        else dxrcv = (gx1 - gx0)*scl/(float)(1);
+		if (dxrcv==0.0) dxrcv=1.0;
         ixmax = NINT(((sx_shot-gx0)*scl)/dxrcv);
+        if (ny1>1) dyrcv = (gy1 - gy0)*scl/(float)(ny1-1);
+		else dyrcv = (gy1 - gy0)*scl/(float)(1);
+		if (dyrcv==0.0) dyrcv=1.0;
         iymax = NINT(((sy_shot-gy0)*scl)/dyrcv);
 		if (iymax > ny1-1) {
-            vmess("source of y is past array, snapping to nearest y");
+            vmess("source of y (%d) is past array, snapping to nearest y (%d)",iymax,ny1-1);
             iymax = ny1-1;
         }
         if (iymax < 0) {
-            vmess("source of y is before array, snapping to nearest y");
+            vmess("source of y (%d) is before array, snapping to nearest y (%d)",iymax,0);
             iymax = 0;
         }
         if (ixmax > nx1-1) {
-            vmess("source of x is past array, snapping to nearest x");
+            vmess("source of x (%d) is past array, snapping to nearest x (%d)",ixmax,nx1-1);
             ixmax = nx1-1;
         }
         if (ixmax < 0) {
-            vmess("source of x is before array, snapping to nearest x");
+            vmess("source of x (%d) is before array, snapping to nearest x (%d)",ixmax,nx1-1);
             ixmax = 0;
         }
         tmax=0.0;
diff --git a/marchenko3D/synthesis3D.c b/marchenko3D/synthesis3D.c
index 8ab12a94806fe3663dc8d4f27d6f3f388d71afd2..19d4c80b2ade811d3afb0b139a35d68cb34aafc5 100644
--- a/marchenko3D/synthesis3D.c
+++ b/marchenko3D/synthesis3D.c
@@ -136,12 +136,21 @@ int *ixpos, int npos, double *tfft, int *isxcount, int *reci_xsrc,  int *reci_xr
     int     nfreq, size, inx;
     float   scl;
     int     i, j, l, m, iw, ix, k, isrc, il, ik, nxy, nxys;
-    float   *rtrace, idxs, idys;
+    float   *rtrace, idxs, idys, fxb, fyb, fxe, fye;
     complex *sum, *ctrace;
     int     npe;
     static int first=1, *ircv;
     static double t0, t1, t;
 
+    if (fxsb < 0) fxb = 1.001*fxsb;
+    else          fxb = 0.999*fxsb;
+    if (fysb < 0) fyb = 1.001*fysb;
+    else          fyb = 0.999*fysb;
+    if (fxse > 0) fxe = 1.001*fxse;
+    else          fxe = 0.999*fxse;
+    if (fyse > 0) fye = 1.001*fyse;
+    else          fye = 0.999*fyse;
+
     nxy     = nx*ny;
     nxys    = nxs*nys;
 
@@ -149,7 +158,7 @@ int *ixpos, int npos, double *tfft, int *isxcount, int *reci_xsrc,  int *reci_xr
     nfreq = ntfft/2+1;
     /* scale factor 1/N for backward FFT,
      * scale dt for correlation/convolution along time, 
-     * scale dx (or dxsrc) for integration over receiver (or shot) coordinates */
+     * scale dx*dy (or dxsrc*dysrc) for integration over receiver (or shot) coordinates */
     scl   = 1.0*dt/((float)ntfft);
 
 #ifdef _OPENMP
@@ -212,7 +221,7 @@ int *ixpos, int npos, double *tfft, int *isxcount, int *reci_xsrc,  int *reci_xr
 /* Loop over total number of shots */
     if (reci == 0 || reci == 1) {
         for (k=0; k<nshots; k++) {
-            if ((xsrc[k] < 0.999*fxsb) || (xsrc[k] > 1.001*fxse) || (ysrc[k] < 0.999*fysb) || (ysrc[k] > 1.001*fyse)) continue;
+            if ((xsrc[k] < fxb) || (xsrc[k] > fxe) || (ysrc[k] < fyb) || (ysrc[k] > fye)) continue;
             isrc = NINT((ysrc[k] - fysb)/dys)*nxs+NINT((xsrc[k] - fxsb)/dxs);
             inx = xnx[k]; /* number of traces per shot */
 
@@ -234,8 +243,8 @@ int *ixpos, int npos, double *tfft, int *isxcount, int *reci_xsrc,  int *reci_xr
 		        /* compute integral over receiver positions */
                 /* multiply R with Fop and sum over nx */
                 memset(&sum[0].r,0,nfreq*2*sizeof(float));
-                for (j = nw_low, m = 0; j <= nw_high; j++, m++) {
-                    for (i = 0; i < inx; i++) {
+                for (i = 0; i < inx; i++) {
+                    for (j = nw_low, m = 0; j <= nw_high; j++, m++) {
                         ix = ircv[k*nxy+i];
                         sum[j].r += Refl[k*nw*nxy+m*nxy+i].r*Fop[l*nw*nxys+m*nxys+ix].r -
                                     Refl[k*nw*nxy+m*nxy+i].i*Fop[l*nw*nxys+m*nxys+ix].i;
diff --git a/utils/Makefile b/utils/Makefile
index 3b59b4f71f844acc56c9b9851deba24861d82e5b..9d47eceee936c6e5dffd52824bbd667ed5a4adea 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -6,7 +6,7 @@ LIBS    += -L$L -lgenfft -lm $(LIBSM)
 #OPTC += -openmp 
 #OPTC += -g -O0
 
-ALL: makemod makewave extendModel fconv correigen green basop syn2d mat2su ftr1d
+ALL: makemod makewave extendModel fconv correigen green green3D basop syn2d mat2su ftr1d
 
 SRCM	= \
 		makemod.c  \
@@ -84,6 +84,17 @@ SRCG	= green.c \
 		docpkge.c \
 		getpars.c
 
+SRCG3	= green3D.c \
+		getFileInfo.c  \
+		getrecpos3D.c  \
+		readData.c \
+		writeData.c \
+		wallclock_time.c \
+		verbosepkg.c  \
+		atopkge.c \
+		docpkge.c \
+		getpars.c
+
 SRCB	= basop.c \
 		getFileInfo.c  \
 		kxwfilter.c  \
@@ -152,6 +163,11 @@ OBJG	= $(SRCG:%.c=%.o)
 green:	$(OBJG) 
 	$(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o green $(OBJG) $(LIBS)
 
+OBJG3	= $(SRCG3:%.c=%.o)
+
+green3D:	$(OBJG3) 
+	$(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o green3D $(OBJG3) $(LIBS)
+
 OBJB	= $(SRCB:%.c=%.o)
 
 basop:	$(OBJB) 
@@ -172,23 +188,24 @@ OBJT	= $(SRCT:%.c=%.o)
 ftr1d:	$(OBJT) 
 	$(CC) $(LDFLAGS) $(OPTC) $(CFLAGS) -o ftr1d $(OBJT) $(LIBS)
 
-install: makemod makewave extendModel fconv correigen green basop syn2d mat2su ftr1d
+install: makemod makewave extendModel fconv correigen green green3D basop syn2d mat2su ftr1d
 	cp makemod $B
 	cp makewave $B
 	cp extendModel $B
 	cp fconv $B
 	cp correigen $B
 	cp green $B
+	cp green3D $B
 	cp basop $B
 	cp syn2d $B
 	cp mat2su $B
 	cp ftr1d $B
 
 clean:
-		rm -f core $(OBJM) makemod $(OBJW) makewave $(OBJE) extendModel $(OBJF) fconv $(OBJG) $(OBJC) correigen green $(OBJB) basop $(OBJJ) syn2d $(OBJS) mat2su $(OBJA) ftr1d $(OBJT)
+		rm -f core $(OBJM) makemod $(OBJW) makewave $(OBJE) extendModel $(OBJF) fconv $(OBJG) $(OBJC) correigen green $(OBJG3) green3D $(OBJB) basop $(OBJJ) syn2d $(OBJS) mat2su $(OBJA) ftr1d $(OBJT)
 
 realclean: clean
-		rm -f $B/makemod $B/makewave $B/extendModel $B/fconv $B/correigen $B/green $B/basop $B/syn2d $B/mat2su $B/ftr1d
+		rm -f $B/makemod $B/makewave $B/extendModel $B/fconv $B/correigen $B/green $B/green3D $B/basop $B/syn2d $B/mat2su $B/ftr1d
 
 
 
diff --git a/utils/getrecpos3D.c b/utils/getrecpos3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..85e9623fbc57864eb7be454d66520faf5e88c28b
--- /dev/null
+++ b/utils/getrecpos3D.c
@@ -0,0 +1,135 @@
+#include "par.h"
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+
+/**
+* read receiver positions used in green
+*
+*   AUTHOR:
+*           Jan Thorbecke (janth@xs4all.nl)
+*           The Netherlands 
+**/
+
+#define NINT(x) ((int)((x)>0.0?(x)+0.5:(x)-0.5))
+#ifndef MAX
+#define	MAX(x,y) ((x) > (y) ? (x) : (y))
+#endif
+#ifndef MIN
+#define	MIN(x,y) ((x) < (y) ? (x) : (y))
+#endif
+#define SGN(x) ((x) < 0 ? -1.0 : 1.0)
+#ifndef ABS
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+#endif
+
+void getrecpos3D(float *xi, float *yi, float *zi, int nx, int ny, float *xrcv, float *yrcv, float *zrcv, int verbose)
+{
+	int		nrx, nry, i, j, l, ndeltx, ndelty, np, lint, seed;
+	long    idum;
+	float	xprev, yprev, zprev, deltx, delty, deltz, dxrcv, dyrcv, dzrcv, var, irr, maxirr;
+    float   rrcv, dphi, oxrcv, oyrcv, ozrcv;
+
+	nrx = countparval("xrcv");
+    nry = countparval("yrcv");
+	if(!getparfloat("dxrcv",&dxrcv)) dxrcv = 15;
+    if(!getparfloat("dyrcv",&dyrcv)) dyrcv = 15;
+	if(!getparfloat("var", &var)) var=0;
+	if(!getparint("lint", &lint)) lint=1;
+	if(!getparint("seed", &seed)) seed=0;
+    
+    /* check if receiver positions on a circle are defined */
+	if (getparfloat("rrcv", &rrcv)) {
+		if (!getparfloat("dphi",&dphi)) dphi=2.0;
+		if (!getparfloat("oxrcv",&oxrcv)) oxrcv=0.0;
+		if (!getparfloat("oyrcv",&oyrcv)) oyrcv=0.0;
+		if (!getparfloat("ozrcv",&ozrcv)) ozrcv=0.0;
+		
+        np = 0;
+		for (i=0; i<ny; i++) {
+            for (j=0; j<ny; j++) {
+			    xi[np]   = oxrcv+rrcv*cos(((i*dphi)/360.0)*(2.0*M_PI));
+                yi[np]   = oyrcv+rrcv*cos(((i*dphi)/360.0)*(2.0*M_PI));
+			    zi[np++] = ozrcv+rrcv*sin(((i*dphi)/360.0)*(2.0*M_PI));
+			    if (verbose>4) fprintf(stderr,"Receiver Circle: xrcv[%d]=%f yrcv=%f zrcv=%f\n", i, xi[i], yi[i], zi[i]);
+			}
+		}
+		return;
+	}
+
+
+	if (var <= 0) {
+		if (lint == 1) {
+			xprev = xrcv[0];
+            yprev = yrcv[0];
+			zprev = zrcv[0];
+			np = 0;
+			for (i = 1; i < nry; i++) {
+                for (l = 1; l < nrx; l++) {
+                    deltx = xrcv[i] - xprev;
+                    delty = yrcv[i] - yprev;
+                    deltz = zrcv[i] - zprev;
+                    ndeltx = NINT(ABS(deltx/dxrcv));
+                    ndelty = NINT(ABS(delty/dyrcv));
+                    dzrcv = deltz/ndeltx;
+                    for (j = 0; j < ndeltx; j++) {
+                        zi[np]   = zprev + j*dzrcv;
+                        yi[np]   = yprev + i*dyrcv;
+                        xi[np++] = xprev + j*dxrcv;
+                    }
+                    xprev = xrcv[i*nx+l];
+                    yprev = yrcv[i*nx+l];
+                    zprev = zrcv[i*nx+l];
+                }
+                xi[i*nx+nx-1] = xrcv[nrx-1];
+                yi[i*nx+nx-1] = yrcv[nrx-1];
+			    zi[i*nx+nx-1] = zrcv[nrx-1];
+			}
+		}
+		else {
+			for (i = 0; i < nry; i++) {
+                for (l = 0; l < nrx; l++) {
+				    xi[i*nx+l] = xrcv[l];
+                    yi[i*nx+l] = yrcv[i];
+				    zi[i*nx+l] = zrcv[l];
+                }
+			}
+		}
+	}
+	else {
+		xprev = xrcv[0];
+		yprev = yrcv[0];
+		zprev = zrcv[0];
+		np = 0;
+		maxirr = 0;
+		idum = (long) seed;
+		srand48(idum);
+		for (i = 1; i < nrx; i++) {
+			deltx = xrcv[i] - xprev;
+			deltz = zrcv[i] - zprev;
+			ndeltx = NINT(ABS(deltx/dxrcv));
+			dzrcv = deltz/ndeltx;
+			for (j = 0; j < ndeltx; j++) {
+				irr = var*((float)drand48());
+				if (fabs(irr) > maxirr) maxirr = fabs(irr);
+				zi[np]   = zprev + j*dzrcv;
+				xi[np++] = xprev + j*dxrcv + irr;
+				if (verbose==13)vmess("xrcv %d = %f (%f)",np-1,xi[np-1], irr);
+			}
+			xprev = xrcv[i];
+			zprev = zrcv[i];
+		}
+		irr = var*((float)drand48());
+		if (fabs(irr) > maxirr) maxirr = fabs(irr);
+		xi[nx-1] = xrcv[nrx-1] + irr;
+		zi[nx-1] = zrcv[nrx-1];
+		if (verbose) vmess("maximum error in receiver position %f", maxirr);
+		if (verbose==13) vmess("xrcv %d = %f (%f)", nx-1, xi[nx-1], irr);
+	}
+
+	if (verbose) vmess("getrecpos number of receivers = %d", np+1);
+
+	return;
+}
diff --git a/utils/green3D b/utils/green3D
new file mode 100755
index 0000000000000000000000000000000000000000..cc36382df2db967794d33a3b4afe6e4bb4e56e07
Binary files /dev/null and b/utils/green3D differ
diff --git a/utils/green3D.c b/utils/green3D.c
new file mode 100644
index 0000000000000000000000000000000000000000..69daa8ea3552b7083eb3871d87ac1ee87694fe0a
--- /dev/null
+++ b/utils/green3D.c
@@ -0,0 +1,761 @@
+#include <genfft.h>
+#include "par.h"
+#include "segy.h"
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+
+#define NINT(x) ((int)((x)>0.0?(x)+0.5:(x)-0.5))
+#ifndef MAX
+#define	MAX(x,y) ((x) > (y) ? (x) : (y))
+#endif
+#ifndef MIN
+#define	MIN(x,y) ((x) < (y) ? (x) : (y))
+#endif
+#define SGN(x) ((x) < 0 ? -1.0 : 1.0)
+
+
+#ifndef COMPLEX
+typedef struct _complexStruct { /* complex number */
+    float r,i;
+} complex;
+#endif/* complex */
+
+int disp_fileinfo(char *file, int n1, int n2, float f1, float f2, float d1, float d2, segy *hdrs);
+int getFileInfo(char *filename, int *n1, int *n2, int *ngath, float *d1, float *d2, float *f1, float *f2, float *xmin, float *xmax, float *sclsxgx, int *nxm);
+int readData(FILE *fp, float *data, segy *hdrs, int n1);
+
+void xwgreen3D(float *data, int nt, int nx, int ny, float dt, float fmin, float fmax, float *xi, float xsrc,
+			float dx, float *yi, float ysrc, float dy, float *zi, float zsrc, float c, float cs, float rho,
+			float *wavelet, float dipx, float maxdip, int far, int p_vz, int dip, int verbose);
+
+
+/*********************** self documentation **********************/
+char *sdoc[] = {
+" 								",
+" green - calculation of 2D Greens function in homogenoeus medium based one exact expressions",
+" 								",
+" green c= zsrc1= [optional parameters]",
+" 							        ",
+" Required parameters:",
+" ",
+"   c= ....................... P-wave velocity",
+"   cs=0.7*c ................. S-wave velocity",
+"   zsrc1= ................... depth of source",
+" 							        ",
+" Optional parameters:",
+" ",
+"   file_out= ................ output file (default SU-pipe)",
+" RECEIVER POSITIONS ",
+"   xrcv=-1500,1500 .......... x-position's of receivers (array)",
+"   yrcv=-1500,1500 .......... y-position's of receivers (array)",
+"   zrcv=0,0 ................. z-position's of receivers (array)",
+"   dxrcv=15 ................. step in receiver x-direction",
+"   dyrcv=15 ................. step in receiver y-direction",
+"   var=0 .................... variance for irregular sampling (dxrcv +- var)",
+"   seed=0 ................... seed for random generator",
+"   lint=1 ................... linear interpolate between the rcv points",
+"   rrcv= .................... radius for receivers on a circle ",
+"   oxrcv=0.0 ................ x-center position of circle",
+"   oyrcv=0.0 ................ y-center position of circle",
+"   ozrcv=0.0 ................ z-center position of circle",
+"   dphi=2 ................... angle between receivers on circle ",
+" SOURCE POSITIONS ",
+"   xsrc1=0.0 ................ x-position of first source",
+"   xsrc2=xsrc1 .............. x-position of last source",
+"   dxsrc=0.0 ................ step in source x-direction",
+"   ysrc1=0.0 ................ y-position of first source",
+"   ysrc2=ysrc1 .............. y-position of last source",
+"   dysrc=0.0 ................ step in source y-direction",
+"   zsrc2=zsrc1 .............. depth position (z) of last source",
+"   dzsrc=0.0 ................ step in source z-direction",
+" SAMPLING AND SOURCE DEFINITION ",
+"   file_src=spike ........... source wavelet (overrules dt)",
+"   nt=256 ................... number of samples",
+"   dt=0.004 ................. stepsize in time-direction ",
+"   fmin=0 ................... minimum frequency",
+"   fmax=70 .................. maximum frequency",
+"   dipx=0 ................... local dip of the dipole in x-direction",
+"   dipy=0 ................... local dip of the dipole in y-direction",
+"   dip=1 .................... 1; dipole 0; monopole source",
+"   rho=1000 ................. density",
+" FIELD DEFINITION ",
+"   far=0 .................... farfield approximation 0=off)",
+"   p_vz=0  .................. P or Vz field (0 = P field, 1 = Vz field)",
+"   Fz=0  .................... Force source in z with Vz receivers",
+"   Fx=0  .................... Force source in x with Vz receivers",
+"   maxdip=90 ................ maximum angle (degrees) to be computed ",
+"   sum=0 .................... sum all sources",
+"   verbose=0 ................ silent option; >0 display info",
+"",
+"  The P or Vz field of a dipole source at depth z below the receivers",
+"  in a homogeneous 2-D medium is calculated.",
+"   ",
+" author  : Jan Thorbecke : 23-03-1995 (janth@xs4all.nl)",
+"                         : revision 2010",
+" ",
+NULL};
+/**************** end self doc ***********************************/
+
+int main(int argc, char **argv)
+{
+	FILE	*fp_in, *fp_out;
+	int     n1, n2, n3, i, j, l, nrx, nry, nrz, dip;
+	int     far, p_vz, nt, nx, ny, Nsx, Nsy, is, isy, sum, lint, verbose;
+	int     size, ntraces, ngath, Fz, Fx;
+	float   scl, xmin, xmax, ymin, ymax;
+	float   dx, dy, dt, d1, d2, d3, fmin, fmax, f1, f2, f3, c, cs, rho;
+	float 	*data, *wavelet, *tmpdata, dipx, dipy, xsrc1, xsrc2, ysrc1, ysrc2;
+	float 	*xrcv, *yrcv, *zrcv, *xi, *yi, *zi, x0, y0, maxdip;
+    float   rrcv, dphi, oxrcv, ozrcv;
+	float	zsrc1, zsrc2, dxsrc, dysrc, dzsrc, xsrc, ysrc, zsrc, dxrcv, dyrcv;
+	char    *file_src, *file_out;
+	size_t  nwrite;
+	segy	*hdrs;
+
+/* ========================= Reading parameters ====================== */
+
+	initargs(argc, argv);
+	requestdoc(1);
+
+	if(!getparint("verbose", &verbose)) verbose = 0;
+	if(!getparstring("file_out", &file_out)){
+		if (verbose) vwarn("parameter file_out not found, assume pipe");
+		file_out = NULL;
+	}
+	if(!getparstring("file_src", &file_src)) file_src = NULL;
+	if(!getparfloat("c", &c)) verr("velocity must be specified.");
+    if(!getparfloat("cs", &cs)) cs=0.7*c;
+	if(!getparfloat("zsrc1", &zsrc1)) verr("zsrc1(depth) must be specified.");
+	if(!getparint("lint", &lint)) lint=1;
+	if(!getparfloat("maxdip", &maxdip)) maxdip=90.0;
+
+	nrx  = countparval("xrcv");
+    nry  = countparval("yrcv");
+	// nrz  = countparval("zrcv");
+	nrz = 0;
+	if(!getparfloat("dxrcv",&dxrcv)) dxrcv = 15;
+    if(!getparfloat("dyrcv",&dyrcv)) dyrcv = 15;
+
+	if (nrx != 0 && nry != 0 && nrz == 0) {
+		if (nrx != 2) verr("xrcv should have only two values");
+        if (nry != 2) verr("yrcv should have only two values");
+		xrcv = (float *)malloc(nrx*sizeof(float));
+        yrcv = (float *)malloc(nry*sizeof(float));
+		getparfloat("xrcv",xrcv);
+        getparfloat("yrcv",yrcv);
+		nx = NINT((xrcv[1] - xrcv[0])/dxrcv) + 1;
+        ny = NINT((yrcv[1] - yrcv[0])/dyrcv) + 1;
+		xi = (float *)malloc(nx*ny*sizeof(float));
+        yi = (float *)malloc(nx*ny*sizeof(float));
+		zi = (float *)malloc(nx*ny*sizeof(float));
+		x0 = xrcv[0];
+        y0 = yrcv[0];
+		for (i = 0; i < ny; i++) {
+            for (j = 0; j < nx; j++) {
+                xi[i*nx+j] = x0 + j*dxrcv;
+                yi[i*nx+j] = y0 + i*dyrcv;
+			    zi[i*nx+j] = 0;
+            }
+		}
+	}
+	else if (nrx == 0 && nry == 0 && nrz == 0) {
+		nx = NINT((3000)/dxrcv) + 1;
+		ny = NINT((3000)/dyrcv) + 1;
+		xi = (float *)malloc(nx*ny*sizeof(float));
+		yi = (float *)malloc(nx*ny*sizeof(float));
+		zi = (float *)malloc(nx*ny*sizeof(float));
+		x0 = -1500;
+		y0 = -1500;
+		for (i = 0; i < ny; i++) {
+            for (j = 0; j < nx; j++) {
+                xi[i*nx+j] = x0 + j*dxrcv;
+                yi[i*nx+j] = y0 + i*dyrcv;
+			    zi[i*nx+j] = 0;
+            }
+		}
+	}
+	else verr("Number of xrcv and yrcv values are not equal");
+
+	if (verbose) vmess("number of receivers nx = %d, ny = %d total = %d", nx, ny, nx*ny);
+	if (verbose == 13) {
+		for (j = 0; j < ny; j++) {
+			for (i = 0; i < nx; i++) {
+				vmess("xi = %d yi = %d x = %f y=%f z = %f", i, j, xi[j*nx+i], yi[j*nx+i], zi[j*nx+i]);
+			}
+		}
+	}
+
+	if(!getparfloat("xsrc1", &xsrc1)) xsrc1=0;
+	if(!getparfloat("xsrc2", &xsrc2)) xsrc2=xsrc1;
+	if(!getparfloat("dxsrc", &dxsrc)) dxsrc=0.0;
+    if(!getparfloat("ysrc1", &ysrc1)) ysrc1=0;
+	if(!getparfloat("ysrc2", &ysrc2)) ysrc2=ysrc1;
+	if(!getparfloat("dysrc", &dysrc)) dysrc=0.0;
+	if(!getparfloat("zsrc2", &zsrc2)) zsrc2=zsrc1;
+	if(!getparfloat("dzsrc", &dzsrc)) dzsrc=0;
+	if(!getparint("nt", &nt)) nt = 256;
+	if(!getparfloat("fmin", &fmin)) fmin = 0.0;
+	if(!getparfloat("fmax", &fmax)) fmax = 70.0;
+	if(!getparfloat("dipx", &dipx)) dipx = 0.0;
+    if(!getparfloat("dipy", &dipy)) dipy = 0.0;
+	if(!getparfloat("rho", &rho)) rho = 1000.0;
+	if(!getparint("far", &far)) far = 0;
+	if(!getparint("p_vz", &p_vz)) p_vz = 0;
+    if(!getparint("Fz", &Fz)) Fz = 0;
+    if(!getparint("Fx", &Fx)) Fx = 0;
+	if(!getparint("dip", &dip)) dip = 1;
+	if(!getparint("sum", &sum)) sum = 0;
+    if(Fz) p_vz=2;
+    if(Fx) p_vz=3;
+
+/* ========================= Opening wavelet file ====================== */
+
+	if (file_src == NULL){
+		if(!getparfloat("dt", &dt)) dt = 0.004;
+		wavelet = (float *)calloc(nt,sizeof(float));
+		wavelet[0] = 1.0;
+	}
+	else {
+		if (verbose) vmess("Reading wavelet from file %s.", file_src);
+		ngath = 1;
+		getFileInfo(file_src, &n1, &n2, &ngath, &d1, &d2, &f1, &f2, &xmin, &xmax, &scl, &ntraces);
+		
+		fp_in = fopen(file_src, "r");
+		if (fp_in == NULL) verr("error on opening input file_src=%s", file_src);
+		
+		tmpdata = (float *)calloc(n1*n2,sizeof(float));
+		hdrs = (segy *) calloc(n2,sizeof(segy));
+		
+		n2 = readData(fp_in, tmpdata, hdrs, n1);
+		fclose(fp_in);
+		if (verbose) {
+			disp_fileinfo(file_src, n1, n2, f1,  f2,  d1,  d2, hdrs);
+		}
+		dt = d1;
+		wavelet = (float *)calloc(nt,sizeof(float));
+
+		if (n1 <= nt) {
+			for (i = 0; i < n1; i++) wavelet[i] = tmpdata[i];
+			for (i = n1; i < nt; i++) wavelet[i] = 0.0;
+		}
+		else {
+			vwarn("file_src has more samples than output");
+			for (i = 0; i < nt; i++) wavelet[i] = tmpdata[i];
+		}
+		if( tmpdata ) free(tmpdata);
+		if( hdrs ) free( (void *) hdrs);
+	}
+
+/* ============ INITIALIZE AND CHECK PARAMETERS =============== */
+
+	if (xsrc2==xsrc1) Nsx = 1;
+	else Nsx = NINT((xsrc2 - xsrc1)/dxsrc) + 1;
+	if (ysrc2==ysrc1) Nsy = 1;
+	else Nsy = NINT((ysrc2 - ysrc1)/dysrc) + 1;
+
+	if (verbose) vmess("Number of shot records to generate x = %d y = %d", Nsx, Nsy);
+	if (Nsx > 1 && Nsy > 1) {
+		dxsrc = (xsrc2-xsrc1)/(Nsx-1);
+		dysrc = (ysrc2-ysrc1)/(Nsy-1);
+		dzsrc = (zsrc2-zsrc1)/(Nsx-1);
+		if (verbose) {
+			vmess("dxsrc = %f", dxsrc);
+			vmess("dysrc = %f", dysrc);
+			vmess("dzsrc = %f", dzsrc);
+		}
+	}
+
+	size = nt * nx *ny;
+	dx   = dxrcv;
+	dy   = dyrcv;
+	tmpdata = (float *)calloc(size,sizeof(float));
+	data = (float *)calloc(size,sizeof(float));
+	hdrs = (segy *) calloc(nx*ny,sizeof(segy));
+	for (i = 0; i < ny; i++) {
+		for(j = 0; j < nx; j++) {
+			hdrs[i*nx+j].f1= 0.0;
+			hdrs[i*nx+j].f2= x0;
+			hdrs[i*nx+j].d1= dt;
+			hdrs[i*nx+j].d2= dx;
+			hdrs[i*nx+j].ns= nt;
+			hdrs[i*nx+j].dt= (int)1000000*dt;
+			hdrs[i*nx+j].trwf= nx*ny;
+			hdrs[i*nx+j].tracl= i*nx+j+1;
+			hdrs[i*nx+j].tracf= i*nx+j+1;
+			hdrs[i*nx+j].gx = (x0 + j*dx)*1000;
+			hdrs[i*nx+j].gy = (y0 + i*dy)*1000;
+			hdrs[i*nx+j].scalco = -1000;
+			hdrs[i*nx+j].trid = TREAL;
+		}
+	}
+	if (file_out==NULL) fp_out=stdout;
+	else fp_out = fopen(file_out,"w");
+	if (fp_out == NULL) verr("error in creating output file");
+
+	for (isy = 0; isy < Nsy; isy++) {
+		for (is = 0; is < Nsx; is++) {
+			xsrc = xsrc1 + is*dxsrc;
+			ysrc = ysrc1 + isy*dysrc;
+			zsrc = zsrc1 + is*dzsrc;
+			if (verbose) vmess("xsrc = %f ysrc=%f zsrc = %f", xsrc, ysrc, zsrc);
+
+			xwgreen3D(data,nt,nx,ny,dt,fmin,fmax,xi,xsrc,dx,yi,ysrc,dy,zi,zsrc,c,cs,rho,wavelet,
+				dipx, maxdip, far, p_vz, dip, verbose);
+
+			for (l = 0; l < ny; l++) {
+				for (i = 0; i < nx; i++) {
+					for (j = 0; j < nt; j++) tmpdata[l*nx*nt+i*nt+j] = data[l*nx*nt+i*nt+j];
+					hdrs[l*nx+i].sx = NINT(xsrc*1000);
+					hdrs[l*nx+i].sy = NINT(ysrc*1000);
+					hdrs[l*nx+i].scalco = -1000;
+					hdrs[l*nx+i].offset = xi[l*nx+i]-xsrc;
+					hdrs[l*nx+i].gx = NINT(xi[l*nx+i]*1000);
+					hdrs[l*nx+i].gy = NINT(yi[l*nx+i]*1000);
+					hdrs[l*nx+i].fldr = isy*Nsx+is+1;
+					hdrs[l*nx+i].trwf = nx*ny;
+					nwrite = fwrite( &hdrs[l*nx+i], 1, TRCBYTES, fp_out);
+					assert(nwrite == TRCBYTES);
+					nwrite = fwrite( &tmpdata[l*nx*nt+i*nt], sizeof(float), nt, fp_out);
+					assert(nwrite == nt);
+				}
+			}
+		}
+	}
+
+	if( xi ) free(xi);
+	if( yi ) free(yi);
+	if( zi ) free(zi);
+	if( wavelet ) free( wavelet );
+
+    fclose(fp_out);
+
+	if( data ) free(data);
+	if( tmpdata ) free(tmpdata);
+	if( hdrs ) free( hdrs);
+
+	exit ( 0 );
+}
+
+/***************************************************************************
+*  
+*   Calculation of pulse response in homogeneous medium
+*
+*
+***************************************************************************/
+
+void xwgreen3D(float *data, int nt, int nx, int ny, float dt, float fmin, float fmax, float *xi, float xsrc, float dx, float *yi, float ysrc, float dy, float *zi, float zsrc, float c, float cs, float rho, float *wavelet, float dipx, float maxdip, int far, int p_vz, int dip, int verbose)
+{
+	int    	iomin, iomax, iom, ix, iy, nfreq, i, sign, optn;
+	float  	df, deltom, om, k, r, x, y, invr, phi, phi2, cosphi;
+	float	*rwave, *rdata, cos2, scl, z, kp, ks, sclr;
+	complex	*cwave, *cdata, tmp, tmp2, sum;
+    complex H02p, H12p, H02s, H12s, Gp, Gs;
+
+	optn	= optncr(nt);
+	nfreq	= 1+(optn/2);
+	df		= 1.0/(dt*optn);
+	deltom	= 2.*M_PI*df;
+	iomin	= (int)MIN((fmin*dt*optn), (nfreq-1));
+	iomin	= MAX(iomin, 1);
+	iomax	= MIN((int)(fmax*dt*optn), (nfreq-1));
+
+	rdata = (float *)calloc(optn*nx*ny,sizeof(float));
+	cdata = (complex *)calloc(nfreq*nx*ny,sizeof(complex));
+	rwave = (float *)calloc(optn,sizeof(float));
+	cwave = (complex *)calloc(nfreq,sizeof(complex));
+
+	for (i = 0; i < nt; i++) rwave[i] = wavelet[i]*dt;
+	for (i = nt; i < optn; i++) rwave[i] = 0.0;
+	
+	sign = -1;
+	rc1fft(rwave, cwave, optn, sign);
+
+	for (iy = 0; iy < ny; iy++) {
+		for (ix = 0; ix < nx; ix++) {
+			for (iom = 0; iom < iomin; iom++) {
+				cdata[iy*nx*nfreq+ix*nfreq+iom].r = 0.0;
+				cdata[iy*nx*nfreq+ix*nfreq+iom].i = 0.0;
+			}
+		}
+	}
+	for (iy = 0; iy < ny; iy++) {
+		for (ix = 0; ix < nx; ix++) {
+			for (iom = iomax; iom < nfreq; iom++) {
+				cdata[iy*nx*nfreq+ix*nfreq+iom].r = 0.0;
+				cdata[iy*nx*nfreq+ix*nfreq+iom].i = 0.0;
+			}
+		}
+	}
+
+	if (p_vz == 0) {
+		if (far == 0 && dip == 1) {
+			if (verbose) vmess("near and far P field of dipole");
+			for (iy = 0; iy < ny; iy++) {
+				for (ix = 0; ix < nx; ix++) {
+					x      = xi[iy*nx+ix] - xsrc;
+					y      = yi[iy*nx+ix] - ysrc;
+					z      = fabs(zi[iy*nx+ix] - zsrc);
+					r      = sqrt(x*x + y*y + z*z);
+					if (r != 0) phi = acos(z/r);
+					else phi = M_PI/2;
+					phi2   = SGN(x)*phi - (dipx*M_PI/180.0);
+					cosphi = 0.25*cos(phi2)*rho;
+					if (fabs(phi) < maxdip*M_PI/180.0) {
+						for (iom = iomin; iom <= iomax; iom++) {
+							om = iom*deltom;
+							k = om/c;
+							tmp.r = -k*cosphi*y1(k*r);
+							tmp.i = -k*cosphi*j1(k*r);
+							cdata[iy*nx*nfreq+ix*nfreq+iom].r = tmp.r*cwave[iom].r - 
+											tmp.i*cwave[iom].i;
+							cdata[iy*nx*nfreq+ix*nfreq+iom].i = tmp.r*cwave[iom].i +
+											tmp.i*cwave[iom].r;
+						}
+					}
+					else {
+						for (iom = iomin; iom <= iomax; iom++) {
+							cdata[iy*nx*nfreq+ix*nfreq+iom].r = 0.0;
+							cdata[iy*nx*nfreq+ix*nfreq+iom].i = 0.0;
+						}
+					}
+				}
+			}
+		}
+		else if (far == 1 && dip == 1){
+			if (verbose) vmess("far P field of dipole");
+			for (iy = 0; iy < ny; iy++) {
+				for (ix = 0; ix < nx; ix++) {
+					x = xi[ix] - xsrc;
+					y = yi[iy*nx+ix] - ysrc;
+					z = fabs(zi[iy*nx+ix] - zsrc);
+					r = sqrt(x*x + y*y + z*z);
+					if (r != 0) phi = acos(z/r);
+					else phi = M_PI/2;
+					phi2   = SGN(x)*phi - (dipx*M_PI/180.0);
+					cosphi = 0.5*cos(phi2)*rho/sqrt(r);
+					if (fabs(phi) < maxdip*M_PI/180.0) {
+						for (iom = iomin; iom <= iomax; iom++) {
+							om = iom*deltom;
+							k = om/c;
+							tmp.r = sqrt(k/(2.0*M_PI))*cosphi*cos(k*r-M_PI/4.0);
+							tmp.i = -sqrt(k/(2.0*M_PI))*cosphi*sin(k*r-M_PI/4.0);
+
+							cdata[iy*nx*nfreq+ix*nfreq+iom].r = tmp.r*cwave[iom].r -
+											tmp.i*cwave[iom].i;
+							cdata[iy*nx*nfreq+ix*nfreq+iom].i = tmp.r*cwave[iom].i +
+											tmp.i*cwave[iom].r;
+						}
+					}
+					else {
+						for (iom = iomin; iom <= iomax; iom++) {
+							cdata[iy*nx*nfreq+ix*nfreq+iom].r = 0.0;
+							cdata[iy*nx*nfreq+ix*nfreq+iom].i = 0.0;
+						}
+					}
+				}
+			}
+		}
+		else if (far == 0 && dip == 0){
+			if (verbose) vmess("near and far P field of monopole");
+			for (iy = 0; iy < ny; iy++) {
+				for (ix = 0; ix < nx; ix++) {
+					x = xi[iy*nx+ix] - xsrc;
+					y = yi[iy*nx+ix] - ysrc;
+					z = fabs(zi[iy*nx+ix] - zsrc);
+					r = sqrt(x*x + y*y + z*z);
+					if (r != 0) phi = acos(z/r);
+					else phi = M_PI/2;
+					scl = 0.25*rho;
+					if (fabs(phi) < maxdip*M_PI/180.0) {
+						for (iom = iomin; iom <= iomax; iom++) {
+							om = iom*deltom;
+							k  = om/c;
+							tmp.r = -scl*y0(k*r);
+							tmp.i = -scl*j0(k*r);
+
+							cdata[iy*nx*nfreq+ix*nfreq+iom].r = tmp.r*cwave[iom].r - 
+											tmp.i*cwave[iom].i;
+							cdata[iy*nx*nfreq+ix*nfreq+iom].i = tmp.r*cwave[iom].i +
+											tmp.i*cwave[iom].r;
+						}
+					}
+					else {
+						for (iom = iomin; iom <= iomax; iom++) {
+							cdata[iy*nx*nfreq+ix*nfreq+iom].r = 0.0;
+							cdata[iy*nx*nfreq+ix*nfreq+iom].i = 0.0;
+						}
+					}
+				}
+			}
+		}
+		else if (far == 1 && dip == 0){
+			if (verbose) vmess("far P field of monopole");
+			for (iy = 0; iy < ny; iy++) {
+				for (ix = 0; ix < nx; ix++) {
+					x = xi[iy*nx+ix] - xsrc;
+					y = yi[iy*nx+ix] - ysrc;
+					z = fabs(zi[iy*nx+ix] - zsrc);
+					r = sqrt(x*x + y*y + z*z);
+					if (r != 0) phi = acos(z/r);
+					else phi = M_PI*0.5;
+					scl = 0.5*rho/sqrt(r);
+					if (fabs(phi) <= M_PI*(maxdip/180.0)) {
+						for (iom = iomin; iom <= iomax; iom++) {
+							om = iom*deltom;
+							k = om/c;
+							tmp.r = -sqrt(1.0/(2.0*M_PI*k))*scl*sin(k*r-M_PI/4.0);
+							tmp.i = -sqrt(1.0/(2.0*M_PI*k))*scl*cos(k*r-M_PI/4.0);
+
+							cdata[iy*nx*nfreq+ix*nfreq+iom].r = tmp.r*cwave[iom].r -
+											tmp.i*cwave[iom].i;
+							cdata[iy*nx*nfreq+ix*nfreq+iom].i = tmp.r*cwave[iom].i +
+											tmp.i*cwave[iom].r;
+						}
+					}
+					else {
+						for (iom = iomin; iom <= iomax; iom++) {
+							cdata[iy*nx*nfreq+ix*nfreq+iom].r = 0.0;
+							cdata[iy*nx*nfreq+ix*nfreq+iom].i = 0.0;
+						}
+					}
+				}
+			}
+		}
+	}
+	else if (p_vz == 1) {
+		if (dip == 1) {
+	    	if (far == 0) {
+	    		if (verbose) vmess("near and far Vz field of dipole");
+				for (iy = 0; iy < ny; iy++) {
+					for (ix = 0; ix < nx; ix++) {
+						x = xi[iy*nx+ix] - xsrc;
+						y = yi[iy*nx+ix] - ysrc;
+						z = fabs(zi[iy*nx+ix] - zsrc);
+						r = sqrt(x*x + y*y + z*z);
+						invr   = -0.25/(c);
+						if (r != 0) phi = acos(z/r);
+						else phi = M_PI/2;
+						phi2   = SGN(x)*phi - (dipx*M_PI/180.0);
+						cosphi = cos(phi2);
+						cos2 = cosphi*cosphi;
+						if (fabs(phi) < maxdip*M_PI/180.0) {
+							for (iom = iomin; iom <= iomax; iom++) {
+								om = iom*deltom;
+								k = om/c;
+								tmp.r = k*cos2*invr*j0(k*r);
+								tmp.i = -k*cos2*invr*y0(k*r);
+								tmp2.r = k*(1-2*cos2)*invr*j1(k*r)/(k*r);
+								tmp2.i = -k*(1-2*cos2)*invr*y1(k*r)/(k*r);
+								sum.r = tmp.r + tmp2.r;
+								sum.i = tmp.i + tmp2.i;
+
+								cdata[iy*nx*nfreq+ix*nfreq+iom].r = sum.r*cwave[iom].r -
+												sum.i*cwave[iom].i;
+								cdata[iy*nx*nfreq+ix*nfreq+iom].i = sum.r*cwave[iom].i +
+												sum.i*cwave[iom].r;
+							}
+						}
+						else {
+							for (iom = iomin; iom <= iomax; iom++) {
+								cdata[iy*nx*nfreq+ix*nfreq+iom].r = 0.0;
+								cdata[iy*nx*nfreq+ix*nfreq+iom].i = 0.0;
+							}
+						}
+					}
+				}
+	    	}
+	    	else {
+	    		if (verbose) vmess("far Vz field of dipole");
+				for (iy = 0; iy < ny; iy++) {
+					for (ix = 0; ix < nx; ix++) {
+						x = xi[iy*nx+ix] - xsrc;
+						y = yi[iy*nx+ix] - ysrc;
+						z = fabs(zi[iy*nx+ix] - zsrc);
+						r = sqrt(x*x + y*y + z*z);
+						invr   = -0.5/(c*sqrt(r));
+						if (r != 0) phi = acos(z/r);
+						else phi = M_PI/2;
+						phi2   = SGN(x)*phi - (dipx*M_PI/180.0);
+						cosphi = cos(phi2);
+						cos2 = cosphi*cosphi;
+						if (fabs(phi) < maxdip*M_PI/180.0) {
+							for (iom = iomin; iom <= iomax; iom++) {
+								om = iom*deltom;
+								k = om/c;
+								tmp.r = sqrt(k/(2.0*M_PI))*invr*cos2*cos(k*r-M_PI/4.0);
+								tmp.i = -sqrt(k/(2.0*M_PI))*invr*cos2*sin(k*r-M_PI/4.0);
+
+								cdata[iy*nx*nfreq+ix*nfreq+iom].r = tmp.r*cwave[iom].r -
+												tmp.i*cwave[iom].i;
+								cdata[iy*nx*nfreq+ix*nfreq+iom].i = tmp.r*cwave[iom].i +
+												tmp.i*cwave[iom].r;
+							}
+						}
+						else {
+							for (iom = iomin; iom <= iomax; iom++) {
+								cdata[iy*nx*nfreq+ix*nfreq+iom].r = 0.0;
+								cdata[iy*nx*nfreq+ix*nfreq+iom].i = 0.0;
+							}
+						}
+					}
+				}
+	    	}
+		}
+		else {
+	    	if (verbose) vmess("near and far Vz field of monopole");
+
+			for (iy = 0; iy < ny; iy++) {
+				for (ix = 0; ix < nx; ix++) {
+					x = xi[iy*nx+ix] - xsrc;
+					y = yi[iy*nx+ix] - ysrc;
+					z = fabs(zi[iy*nx+ix] - zsrc);
+					r = sqrt(x*x + y*y + z*z);
+					if (r != 0) phi = acos(z/r);
+					else phi = M_PI/2;
+					phi2   = SGN(x)*phi - (dipx*M_PI/180.0);
+					cosphi = cos(phi2);
+					if (fabs(phi) < maxdip*M_PI/180.0) {
+						for (iom = iomin; iom <= iomax; iom++) {
+							om = iom*deltom;
+							k = om/c;
+							tmp.i = -cosphi*y1(k*r)/(4.0*c);
+							tmp.r = cosphi*j1(k*r)/(4.0*c);
+							cdata[iy*nx*nfreq+ix*nfreq+iom].r = tmp.r*cwave[iom].r - 
+											tmp.i*cwave[iom].i;
+							cdata[iy*nx*nfreq+ix*nfreq+iom].i = tmp.r*cwave[iom].i +
+											tmp.i*cwave[iom].r;
+						}
+					}
+					else {
+						for (iom = iomin; iom <= iomax; iom++) {
+							cdata[iy*nx*nfreq+ix*nfreq+iom].r = 0.0;
+							cdata[iy*nx*nfreq+ix*nfreq+iom].i = 0.0;
+						}
+					}
+				}
+			}
+		}
+	}
+    else if (p_vz == 2) { /* Fz source with Vz receivers Fz=1 == p_vz=2 */
+        for (iy = 0; iy < ny; iy++) {
+			for (ix = 0; ix < nx; ix++) {
+				x = xi[iy*nx+ix] - xsrc;
+				y = yi[iy*nx+ix] - ysrc;
+				z = fabs(zi[iy*nx+ix] - zsrc);
+				r = sqrt(x*x + y*y + z*z);
+
+				if (r != 0) phi = acos(z/r);
+				else phi = M_PI/2;
+				phi2   = SGN(x)*phi - (dipx*M_PI/180.0);
+				cosphi = cos(phi2);
+				sclr = (z*z-x*x-y*y)/(r);
+				if (fabs(phi) < maxdip*M_PI/180.0) {
+					for (iom = iomin; iom <= iomax; iom++) {
+						om = iom*deltom;
+						kp = om/c;
+						ks = om/cs;
+						H02p.r = j0(kp*r);
+						H02p.i = -y0(kp*r);
+						H12p.r = j1(kp*r);
+						H12p.i = -y1(kp*r);
+						
+						H02s.r = j0(ks*r);
+						H02s.i = -y0(ks*r);
+						H12s.r = j1(ks*r);
+						H12s.i = -y1(ks*r);
+
+						Gp.r = kp/(4*om*rho*r*r)*(-z*z*kp*H02p.r + sclr*H12p.r);
+						Gp.i = kp/(4*om*rho*r*r)*(-z*z*kp*H02p.i + sclr*H12p.i);
+
+						Gs.r = ks/(4*om*rho*r*r)*(-z*z*ks*H02s.r + sclr*H12s.r);
+						Gs.i = ks/(4*om*rho*r*r)*(-z*z*ks*H02s.i + sclr*H12s.i);
+
+						tmp.i = (-1.0/om)*((om/(4*rho*cs*cs))*(H02s.r) - Gp.r + Gs.r);
+						tmp.r = ( 1.0/om)*((om/(4*rho*cs*cs))*(H02s.i) - Gp.i + Gs.i);
+
+						cdata[iy*nx*nfreq+ix*nfreq+iom].r = tmp.r*cwave[iom].r - 
+						tmp.i*cwave[iom].i;
+						cdata[iy*nx*nfreq+ix*nfreq+iom].i = tmp.r*cwave[iom].i +
+						tmp.i*cwave[iom].r;
+					}
+				}
+				else {
+					for (iom = iomin; iom <= iomax; iom++) {
+						cdata[iy*nx*nfreq+ix*nfreq+iom].r = 0.0;
+						cdata[iy*nx*nfreq+ix*nfreq+iom].i = 0.0;
+					}
+				}
+			}
+        }
+
+    }
+    else if (p_vz == 3) { /* Fx source with Vz receivers Fx=1 == p_vz=3 */
+        for (iy = 0; iy < ny; iy++) {
+			for (ix = 0; ix < nx; ix++) {
+				x = xi[iy*nx+ix] - xsrc;
+				y = yi[iy*nx+ix] - ysrc;
+				z = fabs(zi[iy*nx+ix] - zsrc);
+				r = sqrt(x*x + y*y + z*z);
+
+				if (r != 0) phi = acos(z/r);
+				else phi = M_PI/2;
+				phi2   = SGN(x)*phi - (dipx*M_PI/180.0);
+				cosphi = cos(phi2);
+				scl = (z*x*y)/(4.0*r*r*rho);
+				if (fabs(phi) < maxdip*M_PI/180.0) {
+					for (iom = iomin; iom <= iomax; iom++) {
+						om = iom*deltom;
+						kp = om/c;
+						ks = om/cs;
+						H02p.r = kp*kp*j0(kp*r);
+						H02p.i = -kp*kp*y0(kp*r);
+						H12p.r = 2.0*kp*j1(kp*r)/r;
+						H12p.i = -2.0*kp*y1(kp*r)/r;
+						
+						H02s.r = ks*ks*j0(ks*r);
+						H02s.i = -ks*ks*y0(ks*r);
+						H12s.r = 2.0*ks*j1(ks*r)/r;
+						H12s.i = -2.0*ks*y1(ks*r)/r;
+						
+						tmp.i = (scl/(om*om))*((H02p.r-H12p.r) - (H02s.r-H12s.r));
+						tmp.r = -(scl/(om*om))*((H02p.i-H12p.i) - (H02s.i-H12s.i));
+						
+						cdata[iy*nx*nfreq+ix*nfreq+iom].r = tmp.r*cwave[iom].r - 
+						tmp.i*cwave[iom].i;
+						cdata[iy*nx*nfreq+ix*nfreq+iom].i = tmp.r*cwave[iom].i +
+						tmp.i*cwave[iom].r;
+					}
+				}
+				else {
+					for (iom = iomin; iom <= iomax; iom++) {
+						cdata[iy*nx*nfreq+ix*nfreq+iom].r = 0.0;
+						cdata[iy*nx*nfreq+ix*nfreq+iom].i = 0.0;
+					}
+				}
+			}
+        }
+
+    }
+
+
+	scl  = df;
+	sign = 1;
+	crmfft(&cdata[0], &rdata[0], optn, nx*ny, nfreq, optn, sign);
+	for (iy = 0; iy < ny; iy++) {
+		for (ix = 0; ix < nx; ix++) {
+			for (i = 0; i < nt; i++) {
+				data[iy*nx*nt+ix*nt+i] = scl*rdata[iy*nx*optn+ix*optn+i];
+			}
+		}
+	}
+
+	free(cdata);
+	free(cwave);
+	free(rdata);
+	free(rwave);
+
+	return;
+}