diff --git a/extrap3d/.DS_Store b/extrap3d/.DS_Store
index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..593965622f18068d430cffa965e3c613e0c08ef2 100644
Binary files a/extrap3d/.DS_Store and b/extrap3d/.DS_Store differ
diff --git a/extrap3d/test/forwardbeam.scr b/extrap3d/demo/forwardbeam.scr
similarity index 97%
rename from extrap3d/test/forwardbeam.scr
rename to extrap3d/demo/forwardbeam.scr
index a6f177d3514719049ed614b8713f6ffd218a7d01..5328c0af9a9bac6324c78d7024369054668e392f 100644
--- a/extrap3d/test/forwardbeam.scr
+++ b/extrap3d/demo/forwardbeam.scr
@@ -5,7 +5,7 @@
 
 # Directories
 
-export datadir=/home/thorbcke/src/OpenSource/extrap3d/test
+export datadir=/home/thorbcke/src/OpenSource/extrap3d/demo
 export PATH=/home/thorbcke/src/OpenSource/bin:$PATH
 
 cd $datadir
diff --git a/extrap3d/test/saltvel_zxy.bin.gz b/extrap3d/demo/saltvel_zxy.bin.gz
similarity index 100%
rename from extrap3d/test/saltvel_zxy.bin.gz
rename to extrap3d/demo/saltvel_zxy.bin.gz
diff --git a/extrap3d/lib/Makefile b/extrap3d/lib/Makefile
index 7f409d5b8ad5f4d5ec9c06b706be582f20b791e3..4802cc3ff1bbb26eda3e9c0f7216498f755f0d59 100644
--- a/extrap3d/lib/Makefile
+++ b/extrap3d/lib/Makefile
@@ -9,6 +9,7 @@ CFLAGS += -D_GNU_SOURCE
 CSRC = \
 	xwExtr3d.c \
 	xwCFP3d.c \
+	xwMigr3d.c \
 	getreccfp.c \
 	SolveAxb.c \
 	W3d.c \
diff --git a/extrap3d/lib/xwMigr3d.c b/extrap3d/lib/xwMigr3d.c
new file mode 100644
index 0000000000000000000000000000000000000000..72aca8ae3e99d453ee1a4a46d7544039dc9c6d05
--- /dev/null
+++ b/extrap3d/lib/xwMigr3d.c
@@ -0,0 +1,97 @@
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "Area.h"
+
+/***** Direct Convolution *****/
+void conv2D(complex *data, float *velmod, int oplx, 
+	int oply, float om, Area *area, int mode);
+
+/***** McClellan *****/
+void conv2DMcC(complex *field, float *velmod, int order, 
+	float omega, Area *area, int mode);
+
+void conv2DMcC_2(complex *field, float *velocity, int order, 
+	float omega, Area *area, int mode);
+
+/***** Split-Step Fourier *****/
+void conv_kxw_sr(complex *src, complex *rec, float *velmod, 
+	float om, int ntap, Area *area);
+
+void conv_kxw(complex *data, float *velmod, float om, int ntap, Area *area, int mode);
+
+/***** Finite Difference (Salvo implementation) *****/
+/*
+void conv_FD(complex *rec, float *velocity, float vmin,
+	float om, int nterms, int filter_type, Area *ar, int mode);
+*/
+
+void taperedges(int tap_opt, int ntap, complex *data, Area *area);
+
+
+void xwMigr3d(complex *src, complex *rec, float *velocity, float vmin,
+	int oplx, int oply, int order, int McC, float om, int nterms,
+	int filter_inc, int ntap, int tap_opt, Area *area, int zomigr, int method)
+{
+	/* Extrapolate the data */
+
+	if (!zomigr) {
+		if (method == 1) {
+			conv2D(src,velocity,oplx,oply,om,area,1);
+			conv2D(rec,velocity,oplx,oply,om,area,-1);
+		} 
+		else if (method == 2) {
+			if (McC == 1) {
+				conv2DMcC(src,velocity,order,om,area,1);
+				conv2DMcC(rec,velocity,order,om,area,-1);
+			}
+			else if (McC == 2) {
+				conv2DMcC_2(src,velocity,order,om,area,1);
+				conv2DMcC_2(rec,velocity,order,om,area,-1);
+			}
+		}
+		else if (method == 3) {
+			conv_kxw_sr(src,rec,velocity,om,ntap,area);
+		}
+/*
+		else if (method == 4) {
+			conv_FD( rec, velocity, vmin, om, nterms, filter_inc, area, -1);
+			conv_FD( src, velocity, vmin, om, nterms, filter_inc, area, 1);
+		}
+*/
+
+		if (ntap) {
+			taperedges(tap_opt, ntap, rec, area);
+			taperedges(tap_opt, ntap, src, area);
+		}
+
+	}
+	else {
+		if (method == 1) {
+			conv2D(rec,velocity,oplx,oply,om,area,-1);
+		} 
+		else if (method == 2) {
+			if (McC == 1) {
+				conv2DMcC(rec,velocity,order,om,area,-1);
+			}
+			else if (McC == 2) {
+				conv2DMcC_2(rec,velocity,order,om,area,-1);
+			}
+		}
+		else if (method == 3) {
+			conv_kxw(rec,velocity,om,ntap,area,-1);
+		}
+/*
+		else if (method == 4) {
+			conv_FD( rec, velocity, vmin, om, nterms, filter_inc, area, -1);
+		}
+*/
+		
+		if (ntap) {
+			taperedges(tap_opt, ntap, rec, area);
+		}
+
+	}
+
+	return;
+}
diff --git a/extrap3d/main/Makefile b/extrap3d/main/Makefile
index a10ed526638e8b6e2bb9f88a30f4afbed75824d9..4ed828591f063fa0d414c9b783593507830bc1fc 100644
--- a/extrap3d/main/Makefile
+++ b/extrap3d/main/Makefile
@@ -10,17 +10,23 @@ CPROGS = \
 	cfpmod3d \
 	extrap3d \
 	tablecalc3d \
-
+	migr3d \
+	zomodel3d \
 
 CTARGET	  = %: %.c
 
-INSTALL: $(CPROGS) 
+INSTALL: $(CPROGS) migr3d_mpi
 
 $(CPROGS):	$(CTARGET) $L/libextrap3d.a
 	$(CC) $(CFLAGS) $(OPTC) $(CPPFLAGS) $@.c $(LDFLAGS) -o $@.tmp
 	test ! -f $@ || $(RM) $@
 	mv $@.tmp $B/$@
 
+migr3d_mpi: migr3d.c  $L/libextrap3d.a
+	mpicc $(CFLAGS) -DMPI $(OPTC) $(CPPFLAGS) migr3d.c $(LDFLAGS) -o $@.tmp
+	test ! -f $@ || $(RM) $@
+	mv $@.tmp $B/$@
+
 clean:
 	-rm -f *.o
 
diff --git a/extrap3d/main/extrap3d.c b/extrap3d/main/extrap3d.c
index 46213f6b1325c8d0b00851cceca7be7ab0edb2f7..17f6dab3b55b671f1d4021c07cd18fbef4184e94 100644
--- a/extrap3d/main/extrap3d.c
+++ b/extrap3d/main/extrap3d.c
@@ -673,7 +673,7 @@ int main(int argc, char *argv[])
 	MPI_Finalize();
 #endif
 
-	return;
+	return 0;
 }
 
 
diff --git a/extrap3d/main/migr3d.c b/extrap3d/main/migr3d.c
new file mode 100644
index 0000000000000000000000000000000000000000..d4b9bec1f6e50002f335a38468eb7fccaea79b4c
--- /dev/null
+++ b/extrap3d/main/migr3d.c
@@ -0,0 +1,768 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <time.h>
+#include <math.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include "par.h"
+#include "segy.h"
+#include "genfft.h"
+#include "Area.h"
+#ifdef MPI
+#include <mpi.h>
+#endif
+
+int optncr(int n);
+double wallclock_time(void);
+void rc1fft(float *rdata, complex *cdata, int n, int sign);
+int frequency_distribution(int nw_low, int nw, int npes, int pe, int maxlw, int *freq_index, int type );
+
+/****** IO routines *******/
+int openVelocityFile(char *file_vel, FILE **fp, Area *vel_area, int verbose);
+
+void readVelocitySlice(FILE *fp, float *velocity, int iz, int nyv, int nxv);
+
+int read_FFT_DataFile(FILE *fp, complex *data, Area vel_area, int nfft, 
+	int nw, int nw_low, int *tr_read_in, int *tr_shot, int *ixmin, 
+	int *ixmax, int *iymin, int *iymax, int *sx, int *sy, int conjg, int verbose);
+
+int write_ImageFile(FILE *fp, float *data, Area data_area, int fldr, 
+	int d, int out_su, int verbose);
+
+void write_image(float *image, int d, FILE *image_file, int stackmigr, 
+	int image_su, float *tot_image, segy *hdri, Area *ar, float yvmin);
+
+/***** Operator tables *****/
+void tablecalc_2D(int oplx, int oply, int nx, float dx, int ny, float dy, 
+	float dz, float alpha, float fmin, float fmax, float cmin, float cmax, 
+	float df, float weight, int fine, int method, char *file_table, int verbose);
+
+void tablecalc_1D(int order, int nx, float dx, float dz, float theta, 
+	float fmin, float fmax, float vmin, float vmax, float df, int fine, 
+	int oper_opt, int verbose);
+
+/***** Imaging *****/
+void image_condition(complex *src, complex *rec, Area *ar,
+	float *image, float om, float wmax, float epsilon, int image_type);
+
+
+/***** Wave field extrapolation and imaging *****/
+
+void xwMigr3d(complex *src, complex *rec, float *velocity, float vmin,
+	int oplx, int oply, int order, int McC, float om, int nterms,
+	int filter_inc, int ntap, int tap_opt, Area *shot_area, int zomigr, int method);
+
+/*********************** self documentation **********************/
+char *sdoc[] = {
+"  ",
+" MIGR3D - 3D pre- and post-stack depth migration (x,y-w).",
+"  ",
+" migr3d file_shot= file_vel= [optional parameters]",
+"  ",
+" Required parameters:",
+" ",
+"   file_shot= ............... input data to be migrated",
+"   file_vel= ................ gridded velocity file ",
+"   vx,vy,vz ................. these 3 parameters must be set correct",
+"  ",
+" Optional parameters:",
+" ",
+"   conjg=0 .................. 1: take complex conjugate of input data",
+" VELOCITY MODEL ",
+"   dxv=dxv .................. stepsize in x-direction of velocity model ",
+"   dyv=dyv .................. stepsize in y-direction of velocity model ",
+"   dzv=dzv .................. stepsize in z-direction of velocity model ",
+"   nxv= ..................... number of samples in the x-direction file_vel",
+"   nyv= ..................... number of samples in the y-direction file_vel",
+"   nzv= ..................... number of samples in the z-direction file_vel",
+"   xvmin=0 .................. first x-position of velocity sampling point",
+"   yvmin=0 .................. first y-position of velocity sampling point",
+"   zvmin=0 .................. first z-position of velocity sampling point",
+"   vmin=1500 ................ minimum velocity in file_vel ",
+"   vmax=4800 ................ maximum velocity in file_vel ",
+"   vx=1 ..................... dimension number for x axis (1=sample)",
+"   vy=2 ..................... dimension number for y axis (2=trace)",
+"   vz=3 ..................... dimension number for z axis (3=gather)",
+"   tmp_dir=/tmp ............. tmp directory to store local velocity file",
+" MIGRATION ",
+"   imc=0 .................... image condition (see * below)",
+"   ndepth=all ............... number of depth steps",
+"   dstep=10 ................. number of depth steps per frequency region",
+"   area=0 ................... number of points around acquisition aperture",
+"   ntap=0 ................... number of taper points at boundaries",
+"   tap_opt=1 ................ 0: exponential, 1: cosinus 2: linear",
+"   eps_a=0.0 ................ absolute stabilization factor for imc=[1,2,3]",
+"   eps_r=0.001 .............. relative stabilization factor for imc=[1,2,3]",
+"   stackmigr=0 .............. 1; stacks migrated shot records :0 don't stack",
+" ZERO OFFSET MIGRATION ",
+"   imc=4 .................... image condition fixed",
+"   zomigr=0 ................. 1: zero-offset migration (=> velocity *= 0.5)",
+"            ................. 2: zero-offset migration (=> velocity *= 1.0)",
+" SOURCE DEFINITION ",
+"   file_src=<file_name> ..... wavelet used ",
+"   fmin=0 ................... minimum frequency ",
+"   fmax=45 .................. maximum frequency",
+"   conjgs=0 ................. 1: take complex conjugate of source wavefield",
+" EXTRAPOLATION OPERATOR DEFINITION ",
+"   file_table= .............. file which contains pre-computed operators ",
+"   method=1 ................. type of 3D extrapolation method (see below) ",
+"   oplx=25 .................. length of the convolution operator in x (odd)",
+"   oply=oplx ................ length of the convolution operator in y (odd)",
+"   order=13 ................. order in McClellan and Series expansion",
+"   McC=1 .................... type of McClellan (cos(kr)) operator",
+"   oper_opt=1 ............... 1D operator in McC 1:smooth 2:filter 3:remez",
+"   alpha=65 ................. maximum angle of interest (used in operator)",
+"   weight=5e-5 .............. weight factor in WLSQ operator optimization",
+"   weights=1e-2 ............. weight factor in series expansion optimization",
+"   fine=2 ................... fine sampling in operator table",
+"   filter_inc=1 ............. the increment in dz for the Li filter (0=off)",
+"   nterms=1 ................. the number of terms in the paraxial expansion",
+" OUTPUT DEFINITION ",
+"   file_image= .............. output file with migrated result(s)",
+"   verbose=0 ................ >1: shows various parameters and results",
+"  ",
+"   Options for method:",
+"         - 1  = Direct 2D convolution (Walter's scheme)",
+"         - 2  = McCLellan transformation with Chebyshev recursion",
+"         - 3  = Split-Step Fourier domain extrapolation",
+//"         - 4  = Finite Difference with Li's (1991) filter",
+"   Options for McC (only for method=2 and method=3):",
+"         - (1) 1 st order McClellan (3x3 stencil,  9 points)",
+"         - (2) 2 nd order McClellan (5x5 stencil, 17 points)",
+"  ",
+"   Imaging condition (imc) :",
+"         - 0 = correlation",
+"         - 1 = stabilized inversion",
+"         - 2 = the derivative imaging condition",
+"         - 3 = stabilized Least Squares (not yet implemented!)",
+"         - 4 = 2*data.r for zero-offset migration",
+" ",
+"  The shot and receiver positions in the model are determined by",
+"  the hdr values gx,gy and sx,sy. The data from file_shot is extrapolated ",
+"  backward, the data from file_src is extrapolated forward.",
+"  The velocity file is assumed to have the data ordered per depth slice.",
+" ",
+"      Jan Thorbecke 2006",
+"      TU Delft ",
+"      E-mail: janth@xs4all.com ",
+"  ",
+NULL};
+/**************** end self doc ***********************************/
+
+int main(int argc, char *argv[])
+{
+	FILE    *shot_file, *vel_file, *src_file, *image_file;
+	size_t  nread, bytes, size, trace_sz, size_out;
+	int     verbose,  method, zomigr, ntraces, MB;
+	int     nxv, nyv, nzv, dstep, id, id1, id2, err;
+	int     d, nt, ndepth, i, j, conjg, conjgs;
+	int     ntap, tap_opt, order, McC, oplx, oply, fine, pgsz;
+	int     stackmigr, imc, area, ixmin, ixmax, iymin, iymax, ns;
+	int     nfft, nfreq, nw_high, nw_low, nw, sx, sy, ix, iy;
+	int     npages_w, iw, one_shot, traces_shot, sign; 
+	int     traces_read_in, nxy, fd, nx, ny, num_threads, nel, oper_opt;
+	int     traces_read_in_src, traces_shot_src, is;
+	int		fldr_s, fldr_w, power_of_2, image_su, nterms, filter_inc;
+	Area    shot_area;
+	float   alpha, weight, scl, sclw;
+	float   eps_r, eps_a;
+	float   fmin, fmax, dt;
+	float   *tot_image;
+	float   *velocity, *image, weights;
+	float   xvmin, yvmin, zvmin, dxv, dyv, dzv, vmin, vmax;
+	float   dw, df, om, dtw, wmax, tdw, t_w, tr, ti;
+	double  t0, t1, t2, t_migr=0, t_io=0, t_table=0, t_init=0;
+	double  t_comm=0;
+	complex *src_field, *rec_field, *src, *rec;
+	char    *file_vel, *file_shot, *file_src, *file_table;
+	char    *tmp_dir, sys_call[256];
+	char    *file_image;
+	segy    *hdr, *hdrw;
+	int     npes, pe, root_pe=0, nlw, maxlw, *freq_index, fdist, ipe;
+#ifdef MPI
+	int *nlwcounts, *recvcounts, *displacements;
+	int nlw_tag;
+	complex  *gath_rec_field;
+	MPI_Status status;
+	MPI_Request request;
+
+	MPI_Init( &argc, &argv );
+	MPI_Comm_size( MPI_COMM_WORLD, &npes );
+	MPI_Comm_rank( MPI_COMM_WORLD, &pe );
+#else
+	npes = 1;
+	pe   = 0;
+#endif
+
+	t0 = wallclock_time();
+
+/* Read in parameters */
+
+	initargs(argc,argv);
+	requestdoc(0);
+
+	if(!getparstring("file_shot", &file_shot)) file_shot=NULL;
+	if(!getparstring("file_vel", &file_vel)) file_vel=NULL; 
+	if(!getparstring("file_image", &file_image)) file_image=NULL;
+	if(!getparstring("file_src", &file_src)) file_src = NULL;
+	if(!getparstring("file_table", &file_table)) file_table=NULL;
+	if(!getparstring("tmp_dir", &tmp_dir)) tmp_dir="/tmp";
+	if(!getparfloat("fmin", &fmin)) fmin = 0.0;
+	if(!getparfloat("fmax", &fmax)) fmax = 45.0;
+	if(!getparfloat("eps_a", &eps_a)) eps_a = 0.001;
+	if(!getparfloat("eps_r", &eps_r)) eps_r = 0.001;
+	if(!getparint("method", &method)) method = 1;
+	if(!getparint("conjg", &conjg)) conjg = 0;
+	if(!getparint("conjgs", &conjgs)) conjgs = 0;
+	if(!getparint("area", &area)) area = 0;
+	if(!getparint("imc", &imc)) imc = 0;
+	if(!getparint("zomigr", &zomigr)) zomigr = 0;
+	if(!getparint("ntap", &ntap)) ntap = 0;
+	if(!getparint("tap_opt", &tap_opt)) tap_opt = 1;
+	if(!getparint("oplx", &oplx)) oplx = 25;
+	if(!getparint("oply", &oply)) oply = oplx;
+	if(!getparint("order", &order)) order = 13;
+	if(!getparint("McC", &McC)) McC = 1;
+	if(!getparint("oper_opt", &oper_opt)) oper_opt = 1;
+	if(!getparint("nterms", &nterms)) nterms = 1;
+	if(!getparint("filter_inc", &filter_inc)) filter_inc = 1;
+	if(!getparfloat("alpha", &alpha)) alpha = 65.0;
+	if(!getparfloat("weight", &weight)) weight = 5e-5;
+	if(!getparfloat("weights", &weights)) weights = 1e-2;
+	if(!getparint("fine", &fine)) fine = 2;
+	if(!getparint("stackmigr", &stackmigr)) stackmigr = 0;
+	if(!getparint("verbose", &verbose)) verbose = 0;
+	
+	if(!ISODD(oplx)) oplx += 1;
+	if(!ISODD(oply)) oply += 1;
+	if(conjg)  conjg  = -1; else  conjg = 1;
+	if(conjgs) conjgs = -1; else conjgs = 1;
+	assert(McC <= 2 && McC >= 1);
+	assert(method <= 3 && method >= 1);
+	assert(file_vel != NULL);
+	assert(file_image != NULL);
+	assert(imc >= 0 && imc < 4);
+	if (zomigr && file_src != NULL) 
+		fprintf(stderr,"\nFor zero-offset migration file_src is not used\n");
+
+	if (zomigr) { imc = 4; stackmigr=0; }
+
+	t2 = wallclock_time(); t_init += t2-t0;
+
+
+/* Open velocity file to read size and area information */
+
+	openVelocityFile(file_vel, &vel_file, &shot_area, verbose);
+
+	xvmin = shot_area.xmin;
+	yvmin = shot_area.ymin;
+	zvmin = shot_area.zmin;
+	nxv   = shot_area.nx;
+	nyv   = shot_area.ny;
+	nzv   = shot_area.nz;
+	dxv   = shot_area.dx;  
+	dyv   = shot_area.dy;
+	dzv   = shot_area.dz;
+	nxy   = shot_area.sxy;
+
+	t1 = wallclock_time(); t_io += t1-t2;
+
+	if(!getparint("ndepth", &ndepth)) ndepth = nzv;
+	else assert (ndepth <= nzv);
+	if(!getparint("dstep", &dstep)) dstep = MIN(10, ndepth);
+	if(!getparfloat("vmin", &vmin)) vmin = 1500;
+	if(!getparfloat("vmax", &vmax)) vmax = 4800;
+
+	image_su = (strstr(file_image, ".su")!=NULL);
+
+	t2 = wallclock_time(); t_init += t2 - t1;
+
+/* Open receiver file and read first hdr */
+	
+	hdr  = (segy *)malloc(TRCBYTES);
+	if (file_shot == NULL) shot_file = stdin;
+	else shot_file = fopen( file_shot, "r" );
+	assert( shot_file );
+	nread = fread( hdr, 1, TRCBYTES, shot_file );
+	assert (nread == TRCBYTES);
+	fseek ( shot_file, 0, SEEK_END );
+	bytes = ftell(shot_file); 
+
+	nt       = hdr[0].ns;
+	dt       = 1e-6*hdr[0].dt;
+	trace_sz = sizeof(float)*nt+TRCBYTES;
+	ntraces  = (int) (bytes/trace_sz);
+	nfft     = optncr(nt);
+	nfreq    = nfft/2 + 1;
+	df       = 1.0/(nfft*dt);
+	dw       = 2.*M_PI*df;
+	nw_high  = MIN( (int)(fmax/df), nfreq );
+	nw_low   = MAX( (int)((fmin)/df), 1 );
+	nw       = nw_high - nw_low + 1;
+	wmax     = 2.*M_PI*nw_low*df;
+	sx       = hdr[0].sx;
+	sy       = hdr[0].sy;
+	fldr_s   = hdr[0].fldr;
+	if (hdr[0].scalco < 0) scl = 1.0/fabs(hdr[0].scalco);
+	else if (hdr[0].scalco == 0) scl = 1.0;
+	else scl = hdr[0].scalco;
+	free(hdr);
+
+/*======= compute frequency distribution for multiple CPU's ========*/
+
+	fdist = 0;
+	maxlw = ceil((float)nw/(float)npes);
+	freq_index = (int *)malloc(maxlw*sizeof(int));
+	nlw = frequency_distribution(nw_low, nw, npes, pe, maxlw, 
+		freq_index, fdist );
+
+#ifdef MPI
+	if( verbose ) {
+		/* print out all the frequencies for each process */
+		MPI_Barrier(MPI_COMM_WORLD);
+		for( ipe=0; ipe<npes; ipe++ ) {
+			if( pe == ipe ) {
+				fprintf(stderr, "pe=%d:\tf[%d] = df*{", pe, nlw );
+				if( nlw > 0 )
+				for( iw=0; iw<nlw; iw++ )
+					fprintf(stderr, " %d", freq_index[iw] );
+				fprintf( stderr, " }\n" );
+				fflush(stderr);
+			}
+			MPI_Barrier(MPI_COMM_WORLD);
+		}
+	}
+	if (npes == 1) nlw = nw;
+	nlwcounts = (int *)malloc(npes*sizeof(int));
+	recvcounts = (int *)malloc(npes*sizeof(int));
+	displacements = (int *)malloc(npes*sizeof(int));
+	nlw_tag = 1;
+	if (pe == root_pe) {
+		displacements[0] = 0;
+		nlwcounts[0] = nlw;
+		for( ipe=1; ipe<npes; ipe++ ) {
+			MPI_Recv(&nlwcounts[ipe], 1, MPI_INT, ipe, nlw_tag, 
+				MPI_COMM_WORLD, &status);
+			displacements[ipe] = displacements[ipe-1]+nlwcounts[ipe-1];
+		}
+	}
+	else {
+		MPI_Send(&nlw, 1, MPI_INT, root_pe, nlw_tag, MPI_COMM_WORLD);
+	}
+#else
+	nlw = nw;
+#endif
+
+	fmin = MAX(0,-df + df*freq_index[0]);
+	fmax =  df + df*freq_index[nlw-1];
+
+	assert( fmax < 1.0/(2.0*dt) ); /* Nyguist in time */
+	if( (2.0*fmax)/vmin > 1.0/dxv ){ /* Nyguist in space */
+		fprintf(stderr,"spatial aliasing fmax < %f (vmin/2*dx)\n", vmin/(2*dxv));
+	}
+	if( (2.0*fmax)/vmin > 1.0/dyv ){ /* Nyguist in space */
+		fprintf(stderr,"spatial aliasing fmax < %f (vmin/2*dy)\n", vmin/(2*dyv));
+	}
+//	assert( (2.0*fmax)/vmin < 1.0/dxv ); /* Nyguist in space */
+//	assert( (2.0*fmax)/vmin < 1.0/dyv ); /* Nyguist in space */
+
+	if (zomigr==1) { vmin *= 0.5; vmax *= 0.5; }
+	size = (size_t)nlw*nxy*sizeof(complex);
+	if (verbose) {
+		MB = 1024*1024;
+		if (zomigr)  fprintf(stderr," for zero-offset migration \n");
+		fprintf(stderr," minimum velocity = %.2f\n", vmin);
+		fprintf(stderr," maximum velocity = %.2f\n", vmax);
+		fprintf(stderr,"\n    DATA INFORMATION\n");
+		fprintf(stderr," nw = %d nlw = %d\n", nw, nlw);
+		fprintf(stderr," fmin = %.3f fmax = %.3f\n", nw_low*df, nw_high*df);
+		fprintf(stderr," dt = %.4f nt = %d nfft = %d\n", dt, nt, nfft);
+		fprintf(stderr," size of rec_field = %ld Mbytes\n", size/MB);
+		size_out = (size_t)(nzv)*(size_t)(nxy)*sizeof(float);
+		if (image_su) size_out += (TRCBYTES*nxy);
+		fprintf(stderr," size of image file = %ld Mbytes\n", size_out/MB);
+	}
+
+	t1 = wallclock_time(); t_io += t1-t2;
+
+/* Calculate operator tables */
+
+	if (method == 1) {
+		tablecalc_2D(oplx, oply, nxv, dxv, nyv, dyv, dzv, alpha, fmin, fmax,
+			vmin, vmax, df, weight, fine, oper_opt, file_table, verbose);
+	} 
+	else if (method == 2) {
+		tablecalc_1D(order, nxv, dxv, dzv, alpha, fmin, fmax, vmin, vmax, df,
+			fine, oper_opt, verbose);
+	}
+
+	t2 = wallclock_time(); t_table = t2-t1;
+	if (verbose) 
+		fprintf(stderr," time to calculate tables      : %.3f s.\n", t_table);
+
+/* ============ INITIALIZE AND CHECK PARAMETERS =============== */
+
+/* allocate rec and src field to zero */
+
+	rec_field = (complex *)calloc(nxy*nlw, sizeof(complex));
+	assert(rec_field != NULL);
+	velocity = (float *)malloc(dstep*nxy*sizeof(float));
+	assert(velocity != NULL);
+	image = (float *)malloc(dstep*nxy*sizeof(float));
+	assert(image != NULL);
+#ifdef MPI
+	tot_image = (float *)calloc(nxy*dstep, sizeof(float));
+	assert(tot_image != NULL);
+#endif
+
+	if (!zomigr) {
+		src_field = (complex *)calloc(nxy*nlw, sizeof(complex));
+		assert(src_field != NULL);
+	}
+	else {
+		src_field = NULL;
+	}
+
+	one_shot    = 1;
+	traces_shot = 0;
+	traces_read_in = 0;
+	ixmin = nxv-1; ixmax = 0;
+	iymin = nxv-1; iymax = 0;
+
+	fseek(shot_file, 0, SEEK_SET);
+
+	read_FFT_DataFile(shot_file, rec_field, shot_area, nfft, nlw, 
+		freq_index[0], &traces_read_in, &traces_shot, 
+		&ixmin, &ixmax, &iymin, &iymax, &sx, &sy, conjg, verbose);
+
+/* Open src file and read source field */
+
+	if (!zomigr) {
+
+		if (file_src) {
+			hdrw = (segy *)malloc(TRCBYTES);
+			src_file = fopen( file_src, "r" );
+			assert( src_file );
+			nread = fread( hdrw, 1, TRCBYTES, src_file );
+			assert (nread == TRCBYTES);
+		
+			if (hdrw[0].scalco < 0) sclw = 1.0/fabs(hdrw[0].scalco);
+			else if (hdrw[0].scalco == 0) sclw = 1.0;
+			else sclw = hdrw[0].scalco;
+
+			fldr_w = hdrw[0].fldr;
+			ns  = hdrw[0].ns;
+			dtw = 1e-6*hdrw[0].dt;
+			if (ns < nt) fprintf(stderr,"WARNING: nt of file_src < nt of file_shot: zero's will be added to %d samples\n", nfft);
+
+			assert (nt >= ns); /* TO DO */
+			assert (dt == dtw);
+			free(hdrw);
+
+			traces_read_in_src = 0;
+			traces_shot_src = 0;
+			fseek(src_file, 0, SEEK_SET);
+			read_FFT_DataFile(src_file, src_field, shot_area, nfft, nlw, 
+				freq_index[0], &traces_read_in_src, &traces_shot_src, 
+				&ixmin, &ixmax, &iymin, &iymax, &sx, &sy, conjgs, verbose);
+
+			if (verbose) {
+				fprintf(stderr,"\n    SOURCE INFORMATION\n");
+				fprintf(stderr," number of traces in gather : %d\n", traces_shot_src);
+				fprintf(stderr," dt = %.4f nt = %d\n", dtw, ns);
+			}
+
+		}
+		else {
+			ix = (sx*scl-xvmin)/dxv;
+			iy = (sy*scl-yvmin)/dyv;
+			if (ix >=0 && ix<nxv && iy>=0 && iy<nyv) {
+				for (iw=0; iw<nlw; iw++) {
+					src_field[iw*nxy+iy*nxv+ix].r = 1.0;
+				}
+				ixmin = MIN(ix,ixmin);
+				iymin = MIN(iy,iymin);
+				ixmax = MAX(ix,ixmax);
+				iymax = MAX(iy,iymax);
+			}
+			else {
+				fprintf(stderr,"*** source at %.2f, %.2f outside model\n",
+					sx*scl, sy*scl);
+			}
+		}
+
+
+	} /* end of zomigr */
+	one_shot=1;
+
+/* open image file */
+
+	if (stackmigr) {
+		image_file = fopen( file_image, "w+" ); 
+		assert( image_file );
+		fprintf(stderr,"WARNING: stackmigr does not yet work on LINUX.\n");
+		stackmigr = 0;
+
+	}
+	else {
+		image_file = fopen( file_image, "w+" ); 
+		assert( image_file );
+	}
+
+	t1 = wallclock_time(); t_io += t1-t2;
+
+	if (verbose) 
+		fprintf(stderr," time to initialize migration  : %.3f s.\n", t1-t0);
+
+/* Loop over input traces */
+
+	is = 0;
+	while (one_shot) {
+		t1 = wallclock_time(); t_init += t1-t2;
+
+		if (verbose) {
+			fprintf(stderr,"\n    MIGRATION INFORMATION\n");
+			fprintf(stderr," source position (x,y) : %.2f, %.2f\n", 
+				sx*scl, sy*scl);
+			fprintf(stderr," number of traces in shot : %d\n", traces_shot);
+			fprintf(stderr," traces done = %d to do %d\n", 
+				traces_read_in-traces_shot, ntraces-traces_read_in+traces_shot);
+			fprintf(stderr," shot region is      x:%d-%d        y:%d-%d\n", 
+				ixmin, ixmax, iymin, iymax);
+		}
+		
+	/* determine aperture to be extrapolated */
+
+		if (area>0) {
+			ixmin = MAX(0,ixmin-area);
+			ixmax = MIN(nxv-1,ixmax+area);
+			iymin = MAX(0,iymin-area);
+			iymax = MIN(nyv-1,iymax+area);
+		}
+		else {
+			ixmin = 0;
+			iymin = 0;
+			ixmax = nxv-1;
+			iymax = nyv-1;
+		}
+		nx = ixmax-ixmin+1;
+		ny = iymax-iymin+1;
+
+		shot_area.ixmin = ixmin;
+		shot_area.ixmax = ixmax;
+		shot_area.iymin = iymin;
+		shot_area.iymax = iymax;
+		shot_area.sxy   = nxy;
+	
+		if (verbose) {
+			fprintf(stderr," work area is x:%d-%d (%d) y:%d-%d (%d)\n",
+				ixmin, ixmax, nx, iymin, iymax, ny);
+		}
+
+		t2 = wallclock_time(); t_init += t2-t1;
+
+		/* Imaging for z=0 */
+
+		memset( &image[0], 0, nxy*sizeof(float) );
+
+		for (iw=0; iw<nlw; iw++) {
+			om = freq_index[iw]*dw;
+			rec = (complex*) (rec_field + iw*nxy);
+			if (!zomigr) src = (complex*) (src_field + iw*nxy);
+            
+			image_condition(src, rec, &shot_area, &image[0], om,
+				wmax, eps_a, imc);
+		}
+		t1 = wallclock_time(); t_migr += t1-t2;
+
+		/* collect partial_image from other PE's */
+#ifdef MPI
+		MPI_Reduce(image, tot_image, nxy, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
+#else
+		tot_image = image;
+#endif
+		t2 = wallclock_time(); t_comm += t2-t1;
+
+	/* write image to output file */
+		if (pe == root_pe) {
+			write_ImageFile(image_file, tot_image, shot_area, 
+				is, 0, image_su, verbose);
+		}
+
+		t1 = wallclock_time(); t_io += t1-t2;
+
+	/* Start of depth loop */
+
+		for (d=0; d<ndepth; d+=dstep) {
+			id1 = d;
+			id2 = MIN(id1+dstep, ndepth);
+			nel = (id2-id1)*nxy;
+
+			/* Read dstep depth slices */
+
+			t2 = wallclock_time(); t_init += t2-t1;
+			for (id=id1,i=0; id<id2; id++,i++) {
+				readVelocitySlice(vel_file, &velocity[i*nxy], id, nyv, nxv);
+			}
+
+			if (zomigr==1) {
+				for (i=0; i<nel; i++) velocity[i] *= 0.5;
+			}
+
+			if (verbose > 1) {
+				fprintf(stderr," migrating depth levels ");
+				fprintf(stderr,"%d (%.2f) to %d (%.2f) \n",
+					id1, zvmin+dzv*id1, id2, zvmin+dzv*id2);
+			}
+			t1 = wallclock_time(); t_io += t1-t2;
+
+			memset( &image[0], 0, nxy*dstep*sizeof(float) );
+
+			for (iw=0; iw<nlw; iw++) {
+				om = freq_index[iw]*dw;
+				rec = (complex*) (rec_field + iw*nxy);
+				if (!zomigr) src = (complex*) (src_field + iw*nxy);
+
+				for (id=id1,i=0; id<id2; id++,i++) {
+
+					/* Extrapolation */
+					xwMigr3d(src, rec, &velocity[i*nxy], vmin, oplx, oply,
+						order, McC, om, nterms, filter_inc, 
+						ntap, tap_opt, &shot_area, zomigr, method);
+
+					/* Imaging */
+					image_condition(src, rec, &shot_area,
+						&image[i*nxy], om, wmax, eps_a, imc);
+
+				} /* end of small depth loop */
+			} /* end of frequency loop */
+
+			t2 = wallclock_time(); t_migr += t2-t1;
+#ifdef MPI
+		MPI_Reduce(image, tot_image, dstep*nxy, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
+#else
+		tot_image = image;
+#endif
+
+			t1 = wallclock_time(); t_comm += t1-t2;
+
+		/* write image to output file */
+
+			if (pe == root_pe) {
+				for (id=id1,i=0; id<id2; id++,i++) {
+					write_ImageFile(image_file, &tot_image[i*nxy], shot_area, 
+						id, id, image_su, verbose);
+				}
+			}
+			t2 = wallclock_time(); t_io += t2-t1;
+			t1 = t2;
+
+		} /* end of (dstep) depth loop */
+
+		t1 = wallclock_time(); t_init += t1-t2;
+
+		if (verbose) {
+			fprintf(stderr," subtime for migration     : %.3f s.\n", t_migr);
+			fprintf(stderr," subtime for io            : %.3f s.\n", t_io);
+			fprintf(stderr," subtime for communication : %.3f s.\n\n", t_comm);
+		}
+
+		/* Read next shot record */
+
+		if (traces_read_in == ntraces) {
+			one_shot = 0;
+		}
+		else {
+			traces_shot = 0;
+			memset( &rec_field[0], 0, nlw*nxy*sizeof(complex) );
+			read_FFT_DataFile(shot_file, rec_field, shot_area, nfft, 
+				nlw, freq_index[0], &traces_read_in, &traces_shot,
+				&ixmin, &ixmax, &iymin, &iymax, &sx, &sy, conjg, verbose);
+		}
+		is++;
+
+	/* source wavefield */
+
+		if (one_shot && !zomigr) {
+
+			memset( &src_field[0], 0, nlw*nxy*sizeof(complex) );
+
+			if (file_src) {
+				traces_shot_src = 0;
+				err = read_FFT_DataFile(src_file, src_field, shot_area, nfft, nlw, 
+					freq_index[0], &traces_read_in_src, &traces_shot_src, 
+					&ixmin, &ixmax, &iymin, &iymax, &sx, &sy, conjgs, verbose);
+				if (err == -1) {
+					file_src = NULL;
+				}
+			}
+			if (file_src == NULL) {
+				ix = (sx*scl-xvmin)/dxv;
+				iy = (sy*scl-yvmin)/dyv;
+				if (ix >=0 && ix<nxv && iy>=0 && iy<nyv) {
+					for (iw=0; iw<nlw; iw++) {
+						src_field[iw*nxy+iy*nxv+ix].r = 1.0;
+						src_field[iw*nxy+iy*nxv+ix].i = 0.0;
+					}
+					ixmin = MIN(ix,ixmin);
+					iymin = MIN(iy,iymin);
+					ixmax = MAX(ix,ixmax);
+					iymax = MAX(iy,iymax);
+				}
+				else {
+					fprintf(stderr,"*** source at %.2f, %.2f outside model\n",
+						sx*scl, sy*scl);
+				}
+			}
+		}
+
+		t2 = wallclock_time(); t_io += t2-t1;
+
+	} /* end of while loop over input traces */
+
+	t1 = wallclock_time();
+
+/* Write total image result to output file */
+
+/*	if (stackmigr) close(fd);
+	else fclose(image_file); */
+
+	fclose(image_file);
+	if (file_src) fclose(src_file);
+	if (!zomigr) free(src_field);
+	fclose(vel_file);
+	fclose(shot_file);
+
+	t2 = wallclock_time(); t_io += t2-t1;
+
+	free(velocity);
+	free(rec_field);
+	free(image);
+/* clean temporary velocity files */
+	sprintf(sys_call,"rm -rf %s/velocity%d.bin\n",tmp_dir, getpid());
+	system(sys_call);
+#ifdef MPI
+	MPI_Barrier(MPI_COMM_WORLD);
+	free(tot_image);
+#endif
+	t1 = wallclock_time(); t_init += t1-t2;
+
+/* print the timing results */
+
+	if (verbose) {
+		fprintf(stderr,"Time for total migration  : %.3f s.\n", t2-t0);
+		fprintf(stderr,"  time for migration      : %.3f s.\n", t_migr);
+		fprintf(stderr,"  time for tables         : %.3f s.\n", t_table);
+		fprintf(stderr,"  time for io             : %.3f s.\n", t_io);
+		fprintf(stderr,"  time for communication  : %.3f s.\n", t_comm);
+		fprintf(stderr,"  time for initialization : %.3f s.\n", t_init);
+	}
+
+	return 0;
+}
+
+
diff --git a/extrap3d/main/zomodel3d.c b/extrap3d/main/zomodel3d.c
new file mode 100644
index 0000000000000000000000000000000000000000..39f66a2591032f7728c48c8d3d6795d284441fad
--- /dev/null
+++ b/extrap3d/main/zomodel3d.c
@@ -0,0 +1,746 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <time.h>
+#include <math.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include "par.h"
+#include "segy.h"
+//#include "genfft.h"
+#include "Area.h"
+#ifdef MPI
+#include <mpi.h>
+#endif
+
+double wallclock_time(void);
+int optncr(int n);
+void rc1fft(float *rdata, complex *cdata, int n, int sign);
+void cr1fft(complex *cdata, float *rdata, int n, int sign);
+
+/****** IO routines *******/
+int openVelocityFile(char *file_vel, FILE **fp, Area *vel_area, int verbose);
+void readVelocitySlice(FILE *fp, float *velocity, int iz, int nyv, int nxv);
+int write_FFT_DataFile(FILE *fp, complex *data, Area data_area, int fldr, int nt, int nfft, int nw, int nw_low, float dt, int out_su, int conjg, int verbose);
+
+/***** Operator tables *****/
+void tablecalc_2D(int oplx, int oply, int nx, float dx, int ny, float dy, 
+	float dz, float alpha, float fmin, float fmax, float cmin, float cmax, 
+    float df, float weight, int fine, int method, char *file_table, int verbose);
+
+void tablecalc_1D(int order, int nx, float dx, float dz, float theta, 
+	float fmin, float fmax, float vmin, float vmax, float df, int fine, 
+	int oper_opt, int verbose);
+
+/***** Wave field extrapolation *****/
+void xwExtr3d(complex *rec, float *velocity, float vmin,
+	int oplx, int oply, int order, int McC, float om, int nterms,
+	int filter_inc, int ntap, int tap_opt, Area *shot_area, int method);
+
+/***** Interpolation *****/
+void interpolateXY(float *velin, int nx, float dx, float dy, float *velout, int nxo, int nyo, float dxo, float dyo);
+void interpolateZ(float *velin0, float *velin1, int nx, int ny, float z0, float z1, float depth, float *velout);
+
+/* for MPI version */
+int frequency_distribution(int nw_low, int nw, int npes, int pe, int maxlw, 
+	int *freq_index, int type );
+
+/*********************** self documentation **********************/
+char *sdoc[] = {
+"  ",
+" ZOMODEL3D - 3D modeling of ZO-gathers (x,y-w).",
+"  ",
+" zomodel3d file_out= file_vel= [optional parameters]",
+"  ",
+" Required parameters:",
+" ",
+"   file_out= ................ output file with the modelling result",
+"   file_vel= ................ gridded velocity file ",
+"   vx,vy,vz ................. these 3 parameters must be set correct",
+"  ",
+" Optional parameters:",
+" ",
+"   conjg=0 .................. 1: take complex conjugate of input data",
+"   verbose=0 ................ >1: shows various parameters and results",
+" VELOCITY MODEL ",
+"   dxv=dxv .................. stepsize in x-direction of velocity model ",
+"   dyv=dyv .................. stepsize in y-direction of velocity model ",
+"   dzv=dzv .................. stepsize in z-direction of velocity model ",
+"   nxv= ..................... number of samples in the x-direction file_vel",
+"   nyv= ..................... number of samples in the y-direction file_vel",
+"   nzv= ..................... number of samples in the z-direction file_vel",
+"   xvmin=0 .................. first x-position of velocity sampling point",
+"   yvmin=0 .................. first y-position of velocity sampling point",
+"   zvmin=0 .................. first z-position of velocity sampling point",
+"   vmin=1500 ................ minimum velocity in file_vel ",
+"   vmax=4800 ................ maximum velocity in file_vel ",
+"   vx=1 ..................... dimension number for x axis (1=sample)",
+"   vy=2 ..................... dimension number for y axis (2=trace)",
+"   vz=3 ..................... dimension number for z axis (3=gather)",
+"   tmp_dir=/tmp ............. tmp directory to store local velocity file",
+" DENISITY MODEL #",
+"   file_den= ................ gridded density file ",
+"   alphar= .................. acoustic angle reflectivity (degrees)",
+" MODELLING ",
+"   ndepth=all ............... number of depth steps",
+"   ntap=0 ................... number of taper points at boundaries",
+"   tap_opt=1 ................ 0: exponential, 1: cosinus 2: linear",
+"   dxo=dxv .................. stepsize in x-direction in modelling ",
+"   dyo=dyv .................. stepsize in y-direction in modelling ",
+"   dzo=dzv .................. stepsize in z-direction in modelling ",
+" SOURCE DEFINITION ",
+"   file_src=<file_name> ..... wavelet used ",
+"   fmin=0 ................... minimum frequency ",
+"   fmax=45 .................. maximum frequency",
+"   tshift=0.0 ............... time shift for source wavelet",
+"   dt=0.004 ................. time sampling",
+"   nt=512 ................... number of time samples",
+"   conjgs=0 ................. 1: take complex conjugate of source wavefield",
+" EXTRAPOLATION OPERATOR DEFINITION ",
+"   file_table= .............. file which contains pre-computed operators ",
+"   method=1 ................. type of 3D extrapolation method (see below)",
+"   oplx=25 .................. length of the convolution operator in x (odd)",
+"   oply=oplx ................ length of the convolution operator in y (odd)",
+"   order=13 ................. order in McClellan and Series expansion",
+"   McC=1 .................... type of McClellan (cos(kr)) operator",
+"   oper_opt=1 ............... 1D operator in McC 1:smooth 2:filter 3:remez",
+"   alpha=65 ................. maximum angle of interest (used in operator)",
+"   weight=5e-5 .............. weight factor in WLSQ operator optimization",
+"   weights=1e-2 ............. weight factor in series expansion optimization",
+"   fine=2 ................... fine sampling in operator table",
+"   filter_inc=1 ............. the increment in dz for the Li filter (0=off)",
+"   nterms=1 ................. the number of terms in the paraxial expansion",
+"  ",
+"   Options for method:",
+"         - 1  = Direct 2D convolution (Walter's scheme)",
+"         - 2  = McCLellan transformation with Chebyshev recursion",
+"         - 3  = Split-Step Fourier domain extrapolation",
+"         - 4  = Finite Difference with Li's (1991) filter",
+"   Options for McC (only for method=2):",
+"         - (1) 1 st order McClellan (3x3 stencil,  9 points)",
+"         - (2) 2 nd order McClellan (5x5 stencil, 17 points)",
+" ",
+"  The positions in the model are determined by the hdr values gx,gy. ",
+"  The velocity file is assumed to have the data ordered per depth slice.",
+"  # Note,stepsize, number of samples and starting position are assumed ",
+"    to be the same as VELOCITY MODEL",
+" ",
+"      Jan Thorbecke 2006",
+"      TU Delft / Cray ",
+"      E-mail: janth@xs4all.com ",
+"  ",
+NULL};
+/**************** end self doc ***********************************/
+
+int main(int argc, char *argv[])
+{
+	FILE    *vel_file, *src_file, *out_file, *den_file;
+	size_t  nread, size, size_out;
+	int     verbose,  method, MB;
+	int     nxv, nyv, nzv;
+	int     nxo, nyo, nzo;
+	int     d, nt, ndepth, i, conjg, conjgs;
+	int     ntap, tap_opt, order, McC, oplx, oply, fine;
+	int     area, ixmin, ixmax, iymin, iymax;
+	int     nfft, nfreq, nw_high, nw_low, nw, ix, iy;
+	int     sxy, iw, sign, ixv, iyv; 
+	int     nxy, pos, id, idp, oper_opt;
+	int		out_su, nterms, filter_inc, interpol;
+	Area    shot_area;
+	float   alpha, weight;
+	float	depth1,depth2;
+	float   fmin, fmax, dt;
+	float   *velocity, *velint, *velfield, weights, tshift;
+	float   *density, *denint, *denfield, alphar, alphac, wortel;
+    float   *vel0, *vel1, *vel0i, *vel1i, *z0, *z1, *pt, *R;
+	float   *den0, *den1, *den0i, *den1i, *d0, *d1;
+	float   dxv, dyv, dzv, vmin, vmax;
+    float   dxo, dyo, dzo, idxv, idyv, xo, yo, xt, yt, zt;
+    float   depth;
+	float   dw, df, om, *trace, tdw, t_w, tr, ti;
+	double  t0, t1, t2, t_migr=0, t_io=0, t_table=0, t_init=0;
+	complex *ctrace, *src_trace, *rec_field, *rec;
+	char    *file_vel, *file_src, *file_out, *file_den, *file_table;
+	char    *tmp_dir, sys_call[256];
+	segy    *hdrw;
+	int     npes, pe, root_pe=0, nlw, maxlw, *freq_index, fdist, ipe;
+#ifdef MPI
+	int *nlwcounts, *recvcounts, *displacements;
+	int nlw_tag;
+	complex  *gath_rec_field;
+	MPI_Status status;
+
+	MPI_Init( &argc, &argv );
+	MPI_Comm_size( MPI_COMM_WORLD, &npes );
+	MPI_Comm_rank( MPI_COMM_WORLD, &pe );
+#else
+	npes = 1;
+	pe   = 0;
+#endif
+
+	t0 = wallclock_time();
+
+/* Read in parameters */
+
+	initargs(argc,argv);
+	requestdoc(0);
+
+	if(!getparstring("file_vel", &file_vel)) file_vel=NULL;
+	if(!getparstring("file_den", &file_den)) file_den=NULL;
+	if(!getparstring("file_out", &file_out)) file_out=NULL;
+	if(!getparstring("file_src", &file_src)) file_src=NULL;
+	if(!getparstring("file_table", &file_table)) file_table=NULL;
+	if(!getparstring("tmp_dir", &tmp_dir)) tmp_dir="/tmp";
+	if(!getparfloat("fmin", &fmin)) fmin = 0.0;
+	if(!getparfloat("fmax", &fmax)) fmax = 45.0;
+	if(!getparfloat("tshift",&tshift)) tshift = 0.;
+	if(!getparint("method", &method)) method = 1;
+	if(!getparint("conjg", &conjg)) conjg = 0;
+	if(!getparint("conjgs", &conjgs)) conjgs = 0;
+	if(!getparint("area", &area)) area = 0;
+	if(!getparint("ntap", &ntap)) ntap = 0;
+	if(!getparint("tap_opt", &tap_opt)) tap_opt = 1;
+	if(!getparint("oplx", &oplx)) oplx = 25;
+	if(!getparint("oply", &oply)) oply = oplx;
+	if(!getparint("order", &order)) order = 13;
+	if(!getparint("McC", &McC)) McC = 1;
+	if(!getparint("oper_opt", &oper_opt)) oper_opt = 1;
+	if(!getparint("nterms", &nterms)) nterms = 1;
+	if(!getparint("filter_inc", &filter_inc)) filter_inc = 1;
+	if(!getparfloat("alpha", &alpha)) alpha = 65.0;
+	if(!getparfloat("alphar", &alphar)) alphar = 0.0;
+	if(!getparfloat("weight", &weight)) weight = 5e-5;
+	if(!getparfloat("weights", &weights)) weights = 1e-2;
+	if(!getparint("fine", &fine)) fine = 2;
+	if(!getparfloat("dt", &dt)) dt = 0.004;
+	if(!getparint("nt", &nt)) nt = 512;
+	if(!getparint("verbose", &verbose)) verbose = 0;
+
+	if(!ISODD(oplx)) oplx += 1;
+	if(!ISODD(oply)) oply += 1;
+	if(conjg)  conjg  = -1; else  conjg = 1;
+	if(conjgs) conjgs = -1; else conjgs = 1;
+	assert(McC <= 2 && McC >= 1);
+	assert(method <= 5 && method >= 1);
+	assert(file_vel != NULL);
+	out_su = (strstr(file_out, ".su")!=NULL);
+
+	t2 = wallclock_time(); t_init += t2-t0;
+
+/* Open velocity file to read size and area information */
+	if (file_den != NULL) {
+		openVelocityFile(file_den, &den_file, &shot_area, verbose);
+		alphar = (alphar*M_PI)/180.0;
+	}
+
+	openVelocityFile(file_vel, &vel_file, &shot_area, verbose);
+
+	nxv   = shot_area.nx;
+	nyv   = shot_area.ny;
+	nzv   = shot_area.nz;
+	dxv   = shot_area.dx;  
+	dyv   = shot_area.dy;
+	dzv   = shot_area.dz;
+	nxy   = shot_area.sxy;
+
+/* Get parameters for actual velocity field (to be interpolated) */ 
+
+	if(!getparfloat("dxo", &dxo)) dxo = dxv;
+	if(!getparfloat("dyo", &dyo)) dyo = dyv;
+	if(!getparfloat("dzo", &dzo)) dzo = dzv;
+    nxo = (int)(((nxv-1.0)*dxv)/dxo)+1;
+    nyo = (int)(((nyv-1.0)*dyv)/dyo)+1;
+    nzo = (int)(((nzv-1.0)*dzv)/dzo)+1;
+	nxy = nxo*nyo;
+
+    if ( (nxo != nxv) || (nyo != nyv) || (nzo != nzv) ) interpol = 1;
+    else interpol = 0;
+
+/*================ Define time wavelet ================*/
+
+/* Open src file and read source field */
+
+	hdrw = (segy *)malloc(TRCBYTES);
+	if (file_src) {
+		src_file = fopen( file_src, "r" );
+		assert( src_file );
+		nread = fread( hdrw, 1, TRCBYTES, src_file );
+		assert (nread == TRCBYTES);
+		nt = hdrw[0].ns;
+		dt = 1e-6*hdrw[0].dt;
+    }
+
+	nfft     = optncr(nt);
+	nfreq    = nfft/2 + 1;
+	df       = 1.0/(nfft*dt);
+	dw       = 2.*M_PI*df;
+	nw_high  = MIN( (int)(fmax/df), nfreq );
+	nw_low   = MAX( (int)((fmin)/df), 1 );
+	nw       = nw_high - nw_low + 1;
+    sign     = -1;
+
+	/* read wavelet */
+
+    trace     =   (float *)calloc(nfft,sizeof(float));
+    ctrace    = (complex *)malloc(nfreq*sizeof(complex));
+	src_trace = (complex *)malloc(nfreq*sizeof(complex));
+
+	if (file_src) {
+        nread = fread( trace, sizeof(float), nt, src_file );
+        assert (nread == nt);
+        rc1fft(trace,ctrace,nfft,sign);
+        for (iw=0; iw<nfreq; iw++) {
+            src_trace[iw].r = ctrace[iw].r;
+            src_trace[iw].i = conjgs*ctrace[iw].i;
+        }
+	    fclose(src_file);
+	}
+    else {
+        for (iw=0; iw<nfreq; iw++) {
+            src_trace[iw].r = 1.0;
+            src_trace[iw].i = 0.0;
+		}
+    }
+	t1 = wallclock_time(); t_io += t1-t2;
+
+	if (tshift != 0) { /* time shift of wavelet */
+		tdw = -tshift*dw;
+		for (iw=0; iw<nfreq; iw++) {
+			t_w = iw*tdw;
+			tr = src_trace[iw].r*cos(t_w) - src_trace[iw].i*sin(t_w);
+			ti = src_trace[iw].r*sin(t_w) - src_trace[iw].i*cos(t_w);
+			src_trace[iw].r = tr;
+			src_trace[iw].i = ti;
+		}
+	}
+	free(hdrw);
+	free(trace);
+	free(ctrace);
+
+/*======= compute frequency distribution for multiple CPU's ========*/
+
+	fdist = 0;
+	maxlw = ceil((float)nw/(float)npes);
+	freq_index = (int *)malloc(maxlw*sizeof(int));
+	nlw = frequency_distribution(nw_low, nw, npes, pe, maxlw, 
+		freq_index, fdist );
+
+#ifdef MPI
+	if( verbose ) {
+		/* print out all the frequencies for each process */
+		MPI_Barrier(MPI_COMM_WORLD);
+		for( ipe=0; ipe<npes; ipe++ ) {
+			if( pe == ipe ) {
+				fprintf(stderr, "pe=%d:\tf[%d] = df*{", pe, nlw );
+				if( nlw > 0 )
+				for( iw=0; iw<nlw; iw++ )
+					fprintf(stderr, " %d", freq_index[iw] );
+				fprintf( stderr, " }\n" );
+				fflush(stderr);
+			}
+			MPI_Barrier(MPI_COMM_WORLD);
+		}
+	}
+	if (npes == 1) nlw = nw;
+	nlwcounts = (int *)malloc(npes*sizeof(int));
+	recvcounts = (int *)malloc(npes*sizeof(int));
+	displacements = (int *)malloc(npes*sizeof(int));
+	nlw_tag = 1;
+	if (pe == root_pe) {
+		displacements[0] = 0;
+		nlwcounts[0] = nlw;
+		for( ipe=1; ipe<npes; ipe++ ) {
+			MPI_Recv(&nlwcounts[ipe], 1, MPI_INT, ipe, nlw_tag, 
+				MPI_COMM_WORLD, &status);
+			displacements[ipe] = displacements[ipe-1]+nlwcounts[ipe-1];
+		}
+	}
+	else {
+		MPI_Send(&nlw, 1, MPI_INT, root_pe, nlw_tag, MPI_COMM_WORLD);
+	}
+#else
+	nlw = nw;
+#endif
+	fmin = MAX(0,-df + df*freq_index[0]);
+	fmax =  df + df*freq_index[nlw-1];
+
+	if(!getparint("ndepth", &ndepth)) ndepth = nzo-1;
+	else assert (ndepth <= nzo-1);
+	if(!getparfloat("depth1",&depth1)) depth1 = 0;
+	if(!getparfloat("depth2",&depth2)) depth2 = ndepth*dzv;
+
+	if(!getparfloat("vmin", &vmin)) vmin = 1500;
+	if(!getparfloat("vmax", &vmax)) vmax = 4800;
+	vmin *= 0.5; vmax *= 0.5; 
+
+	assert( fmax < 1.0/(2.0*dt) ); /* Nyguist in time */
+	assert( (2.0*fmax)/vmin < 1.0/dxo ); /* Nyguist in space */
+	assert( (2.0*fmax)/vmin < 1.0/dyo ); /* Nyguist in space */
+
+	if (verbose && pe==root_pe) {
+        if (interpol) {
+            fprintf(stderr," Interpolation to new grid:\n");
+		    fprintf(stderr,"   nxo = %d dxo = %.2f\n", nxo, dxo);
+		    fprintf(stderr,"   nyo = %d dyo = %.2f\n", nyo, dyo);
+		    fprintf(stderr,"   nzo = %d dzo = %.2f\n", nzo, dzo);
+        }
+		fprintf(stderr," minimum velocity = %.2f\n", vmin);
+		fprintf(stderr," maximum velocity = %.2f\n", vmax);
+	}
+
+	sxy  = nxy;
+	size = (size_t)nlw*sxy*sizeof(complex);
+
+	t2 = wallclock_time(); t_init += t2-t1;
+	if (verbose && pe==root_pe) {
+		MB = 1024*1024;
+		fprintf(stderr,"\n    DATA INFORMATION\n");
+		fprintf(stderr," sxy      = %d nw = %d\n", sxy, nw);
+		fprintf(stderr," fmin = %.3f fmax = %.3f\n", nw_low*df, nw_high*df);
+		fprintf(stderr," fmin = %.3f fmax = %.3f\n", fmin, fmax);
+		fprintf(stderr," dt = %.4f nt = %d nfft = %d\n", dt, nt, nfft);
+		fprintf(stderr," size of rec_field = %ld Mbytes\n", size/MB);
+		size_out = (size_t)(nt)*(size_t)(nxy)*sizeof(float);
+		if (out_su) size_out += (TRCBYTES*nxy);
+		fprintf(stderr," size of output file = %ld Mbytes\n", size_out/MB);
+		fprintf(stderr," time to initialize modelling  : %.3f s.\n", t2-t0);
+	}
+
+/* =============== Make operator Table ================= */
+
+	if (method == 1) {
+		fine = 1;
+		tablecalc_2D(oplx, oply, nxo, dxo, nyo, dyo, dzo, alpha, fmin, fmax,
+			vmin, vmax, df, weight, fine, oper_opt, file_table, verbose);
+	} 
+	else if (method == 2) {
+		tablecalc_1D(order, nxo, dxo, dzo, alpha, fmin, fmax, vmin, vmax, df,
+			fine, oper_opt, verbose);
+	}
+	t1 = wallclock_time(); t_table = t1-t2;
+	if (verbose) 
+		fprintf(stderr," time to calculate tables      : %.3f s.\n", t_table);
+
+/* ============ INITIALIZE AND CHECK PARAMETERS =============== */
+
+/* allocate data fields */
+
+	rec_field = (complex *)calloc(size, sizeof(float));
+	assert(rec_field != NULL);
+	velocity = (float *)malloc(2*nxv*nyv*sizeof(float));
+	assert(velocity != NULL);
+	velfield = (float *)malloc(2*nxy*sizeof(float));
+	assert(velfield != NULL);
+	velint = (float *)malloc(2*nxy*sizeof(float));
+    assert(velint != NULL);
+	R = (float *)malloc(nxy*sizeof(float));
+	assert(R != NULL);
+	if (file_den != NULL) {
+		density = (float *)malloc(2*nxv*nyv*sizeof(float));
+		assert(density != NULL);
+		denfield = (float *)malloc(2*nxy*sizeof(float));
+		assert(denfield != NULL);
+		denint = (float *)malloc(2*nxy*sizeof(float));
+		assert(denint != NULL);
+	}
+
+/* determine aperture to be extrapolated */
+
+	ixmin = 0;
+	iymin = 0;
+	ixmax = nxo-1;
+	iymax = nyo-1;
+
+	shot_area.ixmin = ixmin;
+	shot_area.ixmax = ixmax;
+	shot_area.iymin = iymin;
+	shot_area.iymax = iymax;
+	shot_area.dx    = dxo;
+	shot_area.dy    = dyo;
+	shot_area.dz    = dzo;
+	shot_area.nx    = nxo;
+	shot_area.ny    = nyo;
+	shot_area.sxy   = sxy;
+
+	t2 = wallclock_time(); t_init += t2-t1;
+
+/* read depth slice at ndepth */
+
+    depth = (ndepth)*dzo;
+    id = (int)(depth/dzv);
+
+    vel1 = &velocity[0];
+    vel0 = &velocity[nxv*nyv];
+	readVelocitySlice(vel_file, vel1, id, nyv, nxv);
+
+	if (file_den != NULL) {
+		den1 = &density[0];
+		den0 = &density[nxv*nyv];
+		readVelocitySlice(den_file, den1, id, nyv, nxv);
+	}
+
+    idp = id;
+    id  = idp-1;
+	if (verbose) fprintf(stderr," depth = %f idp = %d\n", depth, idp);
+
+	t1 = wallclock_time(); t_io += t1-t2;
+
+/* interpolate in x-y directions deepest depth level*/
+
+	/*Velocity*/
+    vel1i = &velint[0];
+    vel0i = &velint[nxy];
+	interpolateXY(vel1, nxv, dxv, dyv, vel1i, nxo, nyo, dxo, dyo);
+    z1 = &velfield[0];
+    z0 = &velfield[nxy];
+	for (i=0; i<nxy; i++) z1[i] = vel1i[i];
+
+	/*Density*/
+    if (file_den != NULL) {
+    	den1i = &denint[0];
+    	den0i = &denint[nxy];
+		interpolateXY(den1, nxv, dxv, dyv, den1i, nxo, nyo, dxo, dyo);
+    	d1 = &denfield[0];
+    	d0 = &denfield[nxy];
+		for (i=0; i<nxy; i++) d1[i] = den1i[i];
+     }
+
+
+/* Start modelling */
+
+/* Start of depth loop */
+
+	for (d=ndepth-1; d>=0; d--) {
+
+        /* Read depth slices and interpolate in XY*/
+
+        if (id < idp) {
+			readVelocitySlice(vel_file, vel0, id, nyv, nxv);
+            idp = id;
+			interpolateXY(vel0, nxv, dxv, dyv, vel0i, nxo, nyo, dxo, dyo);
+			if (file_den != NULL) { 
+				readVelocitySlice(den_file, den0, id, nyv, nxv);
+				interpolateXY(den0, nxv, dxv, dyv, den0i, nxo, nyo, dxo, dyo);
+			}
+        }
+		t2 = wallclock_time(); t_io += t2-t1;
+
+        /* interpolate depth slices to desired depth level */
+
+        depth = d*dzo;
+        zt = ((idp+1)*dzv-depth)/dzv;
+        for (iy=0; iy<nyo; iy++) {
+            for (ix=0; ix<nxo; ix++) {
+                pos = iy*nxo+ix;
+                z0[pos] = zt*vel0i[pos] + (1-zt)*vel1i[pos];
+            }   
+        }
+		if (file_den != NULL) { 
+        	for (iy=0; iy<nyo; iy++) {
+            	for (ix=0; ix<nxo; ix++) {
+                	pos = iy*nxo+ix;
+                	d0[pos] = zt*den0i[pos] + (1-zt)*den1i[pos];
+            	}	   
+        	}
+		}
+        if (verbose>2 && pe==root_pe) {
+            fprintf(stderr," d = %d depth = %f idp = %d zt = %f idp_depth = %f\n", d, depth, idp, zt, idp*dzv );
+        }
+
+        /* calculate Reflection coefficient */
+
+		memset(R,0,nxy*sizeof(float));
+		if (depth <= depth2 || depth >= depth1) {  
+			if (file_den != NULL) {
+				for (iy=0; iy<nyo; iy++) {
+					for (ix=0; ix<nxo; ix++) {
+						pos = iy*nxo+ix;
+						if ( (z0[pos] != z1[pos]) || (d0[pos] != d1[pos]) ) {
+							alphac = asin(z0[pos]/z1[pos]);
+							if (alphar < alphac) {
+							wortel = sqrt(z0[pos]*z0[pos]- 
+									z1[pos]*z1[pos]*sin(alphar)*sin(alphar));
+							R[pos] = (z1[pos]*d1[pos]*cos(alphar)-d0[pos]*wortel) /
+							 (z1[pos]*d1[pos]*cos(alphar)+d0[pos]*wortel);
+/*					fprintf(stderr,"ix %d iy %d alphac %f alphar %f R = %e\n",ix, iy, 180.*alphac/M_PI, 180.*alphar/M_PI, R[pos]);*/
+							}
+						}
+					}   
+				}
+			}
+			else {
+        		for (iy=0; iy<nyo; iy++) {
+            		for (ix=0; ix<nxo; ix++) {
+						if ( (z0[pos] != z1[pos]) ) {
+                			pos = iy*nxo+ix;
+                			R[pos] = (z1[pos]-z0[pos])/(z0[pos]+z1[pos]);
+						}
+            		}   
+        		}
+			}
+		}
+
+	    for (i=0; i<nxy; i++) z0[i] *= 0.5;
+
+		if (verbose>1 && pe==root_pe) {
+			fprintf(stderr,"   starting to extrapolate to depth level [%d] = %.3f m.\n", d, d*dzo);
+			fprintf(stderr,"   compute time of levels %.3f s.\n", t_migr);
+		}
+		t1 = wallclock_time(); t_init += t1-t2;
+
+		for (iw=0; iw<nlw; iw++) {
+			om = freq_index[iw]*dw;
+			rec = (complex*) (rec_field + iw*sxy);
+
+               /* add reflection field */
+			   for (iy=0; iy<nyo; iy++) {
+			       for (ix=0; ix<nxo; ix++) {
+			    	   pos = iy*nxo+ix;
+			    	   rec[pos].r += R[pos];
+			       }
+			   }
+
+			/* Extrapolation */
+			xwExtr3d(rec, z0, vmin, oplx, oply,
+				order, McC, om, nterms, filter_inc, 
+				ntap, tap_opt, &shot_area, method);
+
+		} /* end of frequency loop */
+
+		t2 = wallclock_time(); t_migr += t2-t1;
+        t1 = wallclock_time();
+        if (d == ndepth-1 && verbose && pe==root_pe) {
+		    fprintf(stderr," Estimated compute time, %.0f s.\n",
+                1.05*t_migr*(ndepth-1));
+        }
+
+        /* switch depth levels */
+
+        depth = (d-1)*dzo;
+        id = (int)(depth/dzv);
+        if (id < idp) {
+            pt = vel1;
+            vel1 = vel0;
+            vel0 = pt;
+            pt = vel1i;
+            vel1i = vel0i;
+            vel0i = pt;
+        }
+	    for (i=0; i<nxy; i++) z1[i] = z0[i]*2;
+		if (file_den != NULL) {
+			if (id < idp) {
+				pt = den1;
+				den1 = den0;
+				den0 = pt;
+				pt = den1i;
+				den1i = den0i;
+				den0i = pt;
+			}
+			for (i=0; i<nxy; i++) d1[i] = d0[i];
+		}
+
+
+	} /* end of depth loop */
+
+	/* convolve with wavelet */
+	if (file_src) { 
+    	for (iy=0; iy<nyo; iy++) {
+        	for (ix=0; ix<nxo; ix++) {
+				pos = iy*nxo+ix;
+				for (iw=0; iw<nlw; iw++) {
+					i = freq_index[iw];
+					tr = rec_field[iw*sxy+pos].r*src_trace[i].r -
+						 rec_field[iw*sxy+pos].i*src_trace[i].i;
+					ti = rec_field[iw*sxy+pos].r*src_trace[i].i +
+						 rec_field[iw*sxy+pos].i*src_trace[i].r;
+					rec_field[iw*sxy+pos].r = tr;
+					rec_field[iw*sxy+pos].i = ti;
+				}
+			}
+		}
+	}
+
+	t2 = wallclock_time(); t_init += t2-t1;
+
+/* communicate data from all PE's to root_pe */
+
+#ifdef MPI
+	fprintf(stderr,"pe %d: collecting all frequencies \n", pe);
+	fflush(stderr);
+	MPI_Barrier(MPI_COMM_WORLD);
+	if (pe == root_pe) {
+		gath_rec_field = (complex *)calloc(sxy*nw,sizeof(complex));
+		assert(gath_rec_field != NULL);
+	}
+
+	displacements[0] = 0;
+	recvcounts[0] = nlw*sxy*2;
+	for( ipe=1; ipe<npes; ipe++ ) {
+		recvcounts[ipe] = nlwcounts[ipe]*sxy*2;
+		displacements[ipe] = displacements[ipe-1]+recvcounts[ipe-1];
+	}
+	MPI_Gatherv(rec_field, sxy*nlw*2, MPI_FLOAT, gath_rec_field, recvcounts, 
+		displacements, MPI_FLOAT, root_pe, MPI_COMM_WORLD);
+	MPI_Barrier(MPI_COMM_WORLD);
+	if (pe == root_pe) {
+		free(rec_field);
+		rec_field = gath_rec_field;
+	}
+#endif
+
+	if (pe == root_pe) {
+		/* Write modelling result to output file */
+		if (verbose) 
+			fprintf(stderr," End of depth loop, writing data.\n");
+
+		out_file = fopen( file_out, "w+" ); assert( out_file );
+
+		write_FFT_DataFile(out_file, rec_field, shot_area, 1,  
+			nt, nfft, nw, nw_low, dt, out_su, conjg, verbose);
+
+		fclose(out_file);
+	}
+	fclose(vel_file);
+
+	free(velocity);
+	free(velfield);
+	free(velint);
+	free(R);
+	free(src_trace);
+	free(rec_field);
+	free(freq_index);
+	if (file_den != NULL) {
+		fclose(den_file);
+		free(density);
+		free(denfield);
+		free(denint);
+	}
+
+	t1 = wallclock_time(); t_io += t1-t2;
+
+/* print the timing results */
+
+	if (verbose && pe==root_pe) {
+		fprintf(stderr,"Time for total modeling   : %.3f s.\n", t2-t0);
+		fprintf(stderr,"  time for extrapolation  : %.3f s.\n", t_migr);
+		fprintf(stderr,"  time for tables         : %.3f s.\n", t_table);
+		fprintf(stderr,"  time for initialization : %.3f s.\n", t_init);
+		fprintf(stderr,"  time for I/0            : %.3f s.\n", t_io);
+	}
+/* clean temporary velocity files */
+	sprintf(sys_call,"rm -rf %s/velocity%d.bin\n",tmp_dir, getpid());
+	system(sys_call);
+#ifdef MPI
+	MPI_Barrier(MPI_COMM_WORLD);
+	free(nlwcounts);
+	free(recvcounts);
+	free(displacements);
+	MPI_Finalize();
+#endif
+	return 0;
+}
+
+
diff --git a/fdelmodc/sourceOnSurface.c b/fdelmodc/sourceOnSurface.c
index 4d420eb469c47fe0c8233f4f5c92e6e7a963e69e..a2062dc3f89f95a8f2ac6998b5fbc62768f4de07 100644
--- a/fdelmodc/sourceOnSurface.c
+++ b/fdelmodc/sourceOnSurface.c
@@ -58,10 +58,12 @@ int storeSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int izsr
 	else if (src.type==2) {
     	ibndz = mod.ioTz;
     	ibndx = mod.ioTx;
+    	if (bnd.lef==4 || bnd.lef==2) ibndx += bnd.ntap;
 	}
 	else {	
     	ibndz = mod.ioPz;
     	ibndx = mod.ioPx;
+    	if (bnd.lef==4 || bnd.lef==2) ibndx += bnd.ntap;
 	}
 
 /* check if there are sources placed on the boundaries */
@@ -294,10 +296,12 @@ int reStoreSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int iz
 	else if (src.type==2) {
     	ibndz = mod.ioTz;
     	ibndx = mod.ioTx;
+    	if (bnd.lef==4 || bnd.lef==2) ibndx += bnd.ntap;
 	}
 	else {	
     	ibndz = mod.ioPz;
     	ibndx = mod.ioPx;
+    	if (bnd.lef==4 || bnd.lef==2) ibndx += bnd.ntap;
 	}
 
 	/* restore source positions on the edge */
diff --git a/marchenko/demo/twoD/README b/marchenko/demo/twoD/README
index ff1a0b1cbc6a66369faa72ebe2152a0e7ec48e09..a4c7852f088a25f6f53418e9043f5cf5565d6bb4 100644
--- a/marchenko/demo/twoD/README
+++ b/marchenko/demo/twoD/README
@@ -5,6 +5,6 @@ Description of files:
 2) direct.scr creates the direct arrival to be removed from the shots
 3) remove_direct.scr remove the direct wave from the shots 
 4) initialFocus.scr model G_d the intitial focusing function => iniFocus_z1100_x0_rp.su
-5) referenceShot.scr creates the reference Green's function at focal point => referenceP_z1100_x0_rp.su
-5) marchenko.scr perform the Marchenko scheme
+5) referenceShot.scr creates the reference Green's function at focal point => referenceP_rp.su
+6) marchenko.scr perform the Marchenko scheme