diff --git a/fdelmodc3D/3dfd.c b/fdelmodc3D/3dfd.c
new file mode 100644
index 0000000000000000000000000000000000000000..bfda4b58577fb2bce99aa84e612f5fbcfa7889ec
--- /dev/null
+++ b/fdelmodc3D/3dfd.c
@@ -0,0 +1,1221 @@
+#include<mpi.h>
+#include<stdlib.h>
+#include<stdio.h>
+#include<math.h>
+#include<time.h>
+#include<assert.h>
+#include<sys/time.h>
+#include"par.h"
+#include<stdlib.h>
+#include<stdio.h>
+#include<math.h>
+#include"fdelmodc3D.h"
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define NINT(x) ((int)((x)>0.0?(x)+0.5:(x)-0.5))
+
+
+#define STRIPE_COUNT "4" /* must be an ascii string */
+#define STRIPE_SIZE "1048576" /* 1 MB must be an ascii string */
+//#define STRIPE_SIZE "268435456" /* 256 MB must be an ascii string */
+#define C1 (9.0/8.0)
+#define C2 (1.0/24.0)
+#define Dx(f,ix,iy,iz,nz) C1*(f[iy+ix*nz+iz] - f[iy+(ix-1)*nz+iz]) - C2*(f[iy+(ix+1)*nz+iz] - f[iy+(ix-2)*nz+iz])
+#define Dy(f,ix,iy,iz,nxz) C1*(f[iy*nxz+ix+iz] - f[(iy-1)*nxz+ix+iz]) - C2*(f[(iy+1)*nxz+ix+iz] - f[(iy-2)*nxz+ix+iz])
+#define Dz(f,ix,iy,iz) C1*(f[iy+ix+iz] - f[iy+ix+iz-1]) - C2*(f[iy+ix+iz+1] - f[iy+ix+iz-2])
+
+#define Dv(vx,vz,ix,iy,iz,nz,nxz) C1*((vx[iy*nxz+(ix+1)*nz+iz] - vx[iy*nxz+ix*nz+iz]) + \
+				      (vy[(iy+1)*nxz+ix*nz+iz] - vy[iy*nxz+ix*nz+iz]) + \
+				      (vz[iy*nxz+ix*nz+iz+1]   - vz[iy*nxz+ix*nz+iz])) - \
+				  C2*((vx[iy*nxz+(ix+2)*nz+iz] - vx[iy*nxz+(ix-1)*nz+iz]) + \
+				      (vy[(iy+2)*nxz+ix*nz+iz] - vy[(iy-1)*nxz+ix*nz+iz]) + \
+				      (vz[iy*nxz+ix*nz+iz+2]   - vz[iy*nxz+ix*nz+iz-1]))
+
+int getParameters3D(modPar *mod, recPar *rec, snaPar *sna, wavPar *wav, srcPar *src, shotPar *shot, bndPar *bnd, int verbose);
+int readModel3D(modPar mod, bndPar bnd, float *rox, float *roz, float *l2m);
+
+void vinit();
+int updateVelocitiesHalo(float *vx, float *vy, float *vz, float *p, float *ro, int halo, int npx, int npy, int npz);
+int updateVelocities(float *vx, float *vy, float *vz, float *p, float *ro, int halo, int npx, int npy, int npz);
+int updatePressureHalo(float *vx, float *vy, float *vz, float *p, float *l2m, int halo, int npx, int npy, int npz);
+int updatePressure(float *vx, float *vy, float *vz, float *p, float *l2m, int halo, int npx, int npy, int npz);
+int exchangeHalo(float *leftRecv, float *leftSend, float *rightRecv, float *rightSend, int size, int leftrank, int rightrank, MPI_Request *reqRecv, MPI_Request *reqSend, int tag);
+int newHaloVxVz(float *vx, float *vz, int npx, int npy, int npz, int halo, float *leftRecv, float *rightRecv, float *frontRecv, float *backRecv, float *topRecv, float *bottomRecv);
+int newHaloP(float *p, int npx, int npy, int npz, int halo, float *leftRecv, float *rightRecv, float *frontRecv, float *backRecv, float *topRecv, float *bottomRecv);
+int copyHaloVxVz(float *vx, float *vz, int npx, int npy, int npz, int halo, float *leftSend, float *rightSend, float *frontSend, float *backSend, float *topSend, float *bottomSend);
+int copyHaloP(float *p, int npx, int npy, int npz, int halo, float *leftSend, float *rightSend, float *frontSend, float *backSend, float *topSend, float *bottomSend);
+int waitForHalo(MPI_Request *reqRecv, MPI_Request *reqSend);
+float gauss2time(float t, float f, float t0);
+double wallclock_time(void);
+void name_ext(char *filename, char *extension);
+
+
+/* Self documentation */
+char *sdoc[] = {
+" ",
+"   file_rcv=recv.su .. base name for receiver files",
+"   file_snap=snap.su . base name for snapshot files",
+"   nx=256 ............ number of samples in x-direction",
+"   ny=nx ............. number of samples in y-direction",
+"   nz=nx ............. number of samples in z-direction",
+"   dx=5 .............. spatial sampling in x-direction",
+"   dy=5 .............. spatial sampling in y-direction",
+"   dz=5 .............. spatial sampling in z-direction",
+"" ,
+"   verbose=0 ......... silent mode; =1: display info",
+" ",
+"      Jan Thorbecke 2016",
+"      Cray / TU Delft",
+"      E-mail: janth@xs4all.nl ",
+"",
+NULL};
+
+int main (int argc, char *argv[])
+{
+	modPar mod;
+	recPar rec;
+	snaPar sna;
+	wavPar wav;
+	srcPar src;
+	bndPar bnd;
+	shotPar shot;
+	float *wavelet;
+	int nx, ny, nz, dims[3], period[3], reorder, coord[3], ndims=3;
+	int npx, npy, npz, halo, nt;
+	int my_rank, size, source, dest, snapwritten;
+	int left, right, front, back, top, bottom;
+	int direction, displ, halosizex, halosizey, halosizez;
+	int ioXx, ioXz, ioYx, ioYz, ioZz, ioZx, ioPx, ioPz;
+	int it, ix, iy, iz, iyp, ixp, izp, isrc, ixsrc, iysrc, izsrc, c1, c2;
+	int sizes[3], subsizes[3], starts[3];
+	int gsizes[3], gsubsizes[3], gstarts[3];
+	int error, rc, verbose;
+	float fx, fy, fz, dx, dy, dz, flx, fly, flz;
+	float *p, *vx, *vy, *vz, *rox, *roz, *roy, *l2m, hcp, hro, fac;
+	float *leftRecv, *leftSend, *rightRecv, *rightSend;
+	float *frontRecv, *frontSend, *backRecv, *backSend;
+	float *topRecv, *topSend, *bottomRecv, *bottomSend;
+	float dt, src_ampl, fmax, fpeaksrc, t0src, time, snaptime;
+	double t00, t0, t1, t2, tcomm, tcomp, thcomp, tcopy, ttot, tio;
+	char err_buffer[MPI_MAX_ERROR_STRING];
+	int resultlen;
+	MPI_Comm COMM_CART;
+	MPI_Request reqSend[6], reqRecv[6];
+	MPI_Status status[12];
+	MPI_Datatype local_array, global_array;
+	MPI_Offset disp;
+	MPI_Info fileinfo;
+	MPI_File fh;
+	char filename[1000], *file_snap, *file_rcv;
+
+	MPI_Init(&argc, &argv);
+	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	vinit();
+
+	t0= wallclock_time();
+	initargs(argc,argv);
+	requestdoc(0);
+
+	if (!getparint("verbose",&verbose)) verbose=0;
+	if (!getparstring("file_snap",&file_snap)) file_snap="snap.su";
+	if (!getparstring("file_rcv",&file_rcv)) file_rcv="recv.su";
+
+	getParameters3D(&mod, &rec, &sna, &wav, &src, &shot, &bnd, verbose);
+
+	/* divide 3D cube among available processors */
+	dims[0]=0; 
+	dims[1]=0;
+	dims[2]=0;
+	MPI_Dims_create(size, ndims, dims);
+
+	/* dims contains the number of MPI-tasks in each direction */
+	/* set number of grid points based on number of procs in dims */
+	if (!getparint("nx",&nx)) nx=256;
+	if (!getparint("ny",&ny)) ny=nx;
+	if (!getparint("nz",&nz)) nz=nx;
+
+	if (!getparfloat("dx",&dx)) dx=5.;
+	if (!getparfloat("dy",&dy)) dy=5.;
+	if (!getparfloat("dz",&dz)) dz=5.;
+
+	halo = 2;
+
+	/* new larger dimensions to fit with the domain-decomposition */
+	nz=dims[0]*ceil(mod.naz/dims[0]);
+	nx=dims[1]*ceil(mod.nax/dims[1]); 
+	ny=dims[2]*ceil(mod.nay/dims[2]); 
+
+//	dt=0.001;
+	nt=4096;
+	t0src=0.50;
+	hcp=2000.0;
+	hro=1800.0;
+	tcomm=tcomp=thcomp=tcopy=tio=0.0;
+
+	/* for stability 10 points per wavelenght */
+	fmax=hcp/(mod.dx*8);
+	dt=0.295*mod.dx/hcp;
+	fpeaksrc=0.2*fmax; /* Ricker wavelet has peak at 1/3 of fmax */
+	fac = mod.dt/mod.dx;
+
+	fx=-mod.dx*nx/2; fy=-mod.dy*ny/2; fz=0;
+	npz = 2*halo+nz/dims[0];
+	npx = 2*halo+nx/dims[1];
+	npy = 2*halo+ny/dims[2];
+	wavelet = (float *)calloc(nt,sizeof(float));
+
+	/* find which MPI-task has the source position */
+
+
+	snaptime = t0src+1.80*npx*dx*0.5/hcp;
+	snapwritten=0;
+	nt = (int) 1.1*(t0src+snaptime)/dt;
+	nt = (int) (t0src+1.5)/dt;
+
+	if (verbose && my_rank==0) {
+		fprintf(stderr,"fmax=%f fpeak=%f dt=%e\n", fmax, fpeaksrc, dt);
+		fprintf(stderr,"nx=%d nprocx=%d ny=%d nprocy=%d nz=%d nprocz=%d\n", nx, dims[1], ny, dims[2], nz, dims[0]);
+		fprintf(stderr,"npx=%d npy=%d npz=%d nt=%d time=%f\n", npx, npy, npz, nt, nt*dt);
+		fprintf(stderr,"source expected at local boundary at %f seconds\n", npx*dx*0.5/hcp);
+		fflush(stderr);
+	}
+
+	if (my_rank==0) fprintf(stderr,"memory per MPI task is %ld MB\n", (6*npx*npy*npz*4/(1024*1024)));
+
+	/* allocate wavefields and medium properties for local grid domains */
+	p   = (float *)calloc(npx*npy*npz,sizeof(float));
+	vx  = (float *)calloc(npx*npy*npz,sizeof(float));
+	vy  = (float *)calloc(npx*npy*npz,sizeof(float));
+	vz  = (float *)calloc(npx*npy*npz,sizeof(float));
+
+    /* read velocity and density files */
+
+    readModel3D(mod, bnd, rox, roz, l2m);
+
+/* for 2.5 D model npy=1 */
+	rox = (float *)calloc(npx*npy*npz,sizeof(float));
+	roy= (float *)calloc(npx*npy*npz,sizeof(float));
+	roz= (float *)calloc(npx*npy*npz,sizeof(float));
+	l2m = (float *)calloc(npx*npy*npz,sizeof(float));
+
+	/* define homogeneus model */
+	for (ix=0; ix<npx*1*npz; ix++) {
+		rox[ix]  = fac/hro;
+		roy[ix]  = fac/hro;
+		roz[ix]  = fac/hro;
+		l2m[ix] = fac*hcp*hcp*hro;
+	}
+
+	/* create cartesian domain decomposition */
+	period[0]=0; 
+	period[1]=0;
+	period[2]=0;
+	reorder=0;
+	MPI_Cart_create(MPI_COMM_WORLD, 3, dims, period, reorder, &COMM_CART);
+
+	/* find out coordinates of the rank */
+	MPI_Cart_coords(COMM_CART, my_rank, 3, coord);
+	flz = fz+(dz*nz/dims[0])*coord[0];
+	flx = fx+(dx*nx/dims[1])*coord[1];
+	fly = fy+(dy*ny/dims[2])*coord[2];
+	if (verbose>=2) fprintf(stderr,"Rank %d coordinates are %d %d %d orig=(%5.2F, %5.2f, %5.2f) \n", my_rank, coord[0], coord[1], coord[2], flx, fly, flz);
+	fflush(stderr);
+
+	/* find out neighbours of the rank, MPI_PROC_NULL is a hard boundary of the model */ 
+	displ=1;
+	MPI_Cart_shift(COMM_CART, 1, 1, &left, &right);
+	MPI_Cart_shift(COMM_CART, 2, 1, &top, &bottom);
+	MPI_Cart_shift(COMM_CART, 0, 1, &front, &back);
+	if (verbose>=2) fprintf(stderr, "Rank %d in direction 0 has LR neighbours %d %d FB %d %d TB %d %d\n", my_rank, left, right, front, back, top, bottom);
+	fflush(stderr);
+
+	/* allocate of halo areas */
+	halosizex = npy*npz*halo;
+	leftRecv  = (float *)calloc(3*halosizex,sizeof(float));
+	rightRecv = (float *)calloc(3*halosizex,sizeof(float));
+	leftSend  = (float *)calloc(3*halosizex,sizeof(float));
+	rightSend = (float *)calloc(3*halosizex,sizeof(float));
+
+	halosizey = npx*npz*halo;
+	frontRecv = (float *)calloc(3*halosizey,sizeof(float));
+	backRecv  = (float *)calloc(3*halosizey,sizeof(float));
+	frontSend = (float *)calloc(3*halosizey,sizeof(float));
+	backSend  = (float *)calloc(3*halosizey,sizeof(float));
+
+	halosizez = npy*npx*halo;
+	topRecv    = (float *)calloc(3*halosizez,sizeof(float));
+	bottomRecv = (float *)calloc(3*halosizez,sizeof(float));
+	topSend    = (float *)calloc(3*halosizez,sizeof(float));
+	bottomSend = (float *)calloc(3*halosizez,sizeof(float));
+
+	if (my_rank==0) fprintf(stderr,"memory per MPI task for halo exchange is %ld MB\n", ((12*(halosizex+halosizey+halosizez))*4/(1024*1024)));
+
+	/* create subarrays(excluding halo areas) to write to file with MPI-IO */
+	/* data in the local array */
+	sizes[0]=npz; 
+	sizes[1]=npx; 
+	sizes[2]=npy;
+	subsizes[0]=sizes[0]-2*halo; 
+	subsizes[1]=sizes[1]-2*halo;  
+	subsizes[2]=sizes[2]-2*halo;
+	starts[0]=halo; 
+	starts[1]=halo; 
+	starts[2]=halo; 
+	MPI_Type_create_subarray(3, sizes, subsizes, starts, MPI_ORDER_C, 
+                            MPI_FLOAT, &local_array); 
+	MPI_Type_commit(&local_array);
+
+	/* data in the global array */
+	gsizes[0]=nz; 
+	gsizes[1]=nx; 
+	gsizes[2]=ny;
+	gsubsizes[0]=subsizes[0]; 
+	gsubsizes[1]=subsizes[1];  
+	gsubsizes[2]=subsizes[2];
+	gstarts[0]=subsizes[0]*coord[0]; 
+	gstarts[1]=subsizes[1]*coord[1]; 
+	gstarts[2]=subsizes[2]*coord[2]; 
+	MPI_Type_create_subarray(3, gsizes, gsubsizes, gstarts, MPI_ORDER_C, 
+                            MPI_FLOAT, &global_array); 
+	MPI_Type_commit(&global_array);
+
+
+	/* compute field of the inner grid excluding halo areas */
+	ioXx=2;
+	ioXz=ioXx-1;
+	ioYx=2;
+	ioYz=ioYx-1;
+	ioZz=2;
+	ioZx=ioZz-1;
+	ioPx=1;
+	ioPz=ioPx;
+
+	t00 = wallclock_time();
+	for (it=0; it<nt; it++) {
+		time = it*dt;
+		wavelet[it] = gauss2time(time,fpeaksrc,t0src);
+	}
+	if (my_rank==0) {
+		FILE *fp;
+		fp = fopen("src.bin", "w+");
+		fwrite( wavelet, sizeof(float), nt, fp);
+		fflush(fp);
+		fclose(fp);
+	}
+
+/*
+	nt =1;
+			sprintf(filename,"snap_nz%d_nx%d_ny%d.bin",nz, nx, ny);
+
+	for (ix=0; ix<npx*npy*npz; ix++) {
+		p[ix]  = my_rank;
+	}
+			MPI_Info_create(&fileinfo);
+			MPI_Info_set(fileinfo, "striping_factor", STRIPE_COUNT);
+			MPI_Info_set(fileinfo, "striping_unit", STRIPE_SIZE);
+			MPI_File_delete(filename, MPI_INFO_NULL);
+			rc = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDWR|MPI_MODE_CREATE, fileinfo, &fh);
+			if (rc != MPI_SUCCESS) {
+  				fprintf(stderr, "could not open input file\n");
+  				MPI_Abort(MPI_COMM_WORLD, 2);
+			}
+			disp = 0;
+			rc = MPI_File_set_view(fh, disp, MPI_FLOAT, global_array, "native", fileinfo);
+			if (rc != MPI_SUCCESS) {
+				fprintf(stderr, "error setting view on results file\n");
+				MPI_Abort(MPI_COMM_WORLD, 4);
+			}
+			rc = MPI_File_write_all(fh, p, 1, local_array, status);
+			if (rc != MPI_SUCCESS) {
+				MPI_Error_string(rc,err_buffer,&resultlen);
+				fprintf(stderr,err_buffer);
+				MPI_Abort(MPI_COMM_WORLD, 5);
+			}
+			MPI_File_close(&fh);
+*/
+
+
+	/* Main loop over the number of time steps */
+	for (it=0; it<nt; it++) {
+		t0 = wallclock_time();
+		time = it*dt;
+		//fprintf(stderr,"modeling time step %d for time %f\n", it, time);
+
+		/* define source wavelet */
+		wavelet[it] = gauss2time(time,fpeaksrc,t0src);
+
+		/* update of grid values on halo areas */
+		updateVelocitiesHalo(vx, vy, vz, p, rox, halo, npx, npy, npz);
+		t1 = wallclock_time();
+		thcomp += t1-t0;
+
+		/* copy of halo areas  */
+		copyHaloVxVz(vx, vz, npx, npy, npz, halo, leftSend, rightSend, frontSend, backSend, topSend, bottomSend);
+		t2 = wallclock_time();
+		tcopy += t2-t1;
+
+		/* start a-synchronous communication of halo areas to neighbours */
+		/* this is done first for Vx,Vz fields only */
+		exchangeHalo(leftRecv, leftSend, rightRecv, rightSend, 2*halosizex, left, right, &reqRecv[0], &reqSend[0], 0);
+		exchangeHalo(frontRecv, frontSend, backRecv, backSend, 2*halosizey, front, back, &reqRecv[2], &reqSend[2], 4);
+		exchangeHalo(topRecv, topSend, bottomRecv, bottomSend, 2*halosizez, top, bottom, &reqRecv[4], &reqSend[4], 8);
+		t1 = wallclock_time();
+		tcomm += t1-t2;
+
+		/* compute update on grid values excluding halo areas */
+		updateVelocities(vx, vy, vz, p, rox, halo, npx, npy, npz);
+		t2 = wallclock_time();
+		tcomp += t2-t1;
+
+		/* wait for Vx.Vz halo exchange */
+		waitForHalo(&reqRecv[0], &reqSend[0]);
+		t1 = wallclock_time();
+		tcomm += t1-t2;
+
+		/* copy of halo areas  back to arrays */
+		newHaloVxVz(vx, vz, npx, npy, npz, halo, leftRecv, rightRecv, frontRecv, backRecv, topRecv, bottomRecv);
+		t2 = wallclock_time();
+		tcopy += t2-t1;
+	
+		/* add Force source on the Vz grid */
+		src_ampl = wavelet[it];
+	
+		/* check if source position is in local domain */
+		/* for the moment place a source in the middle of each domain */
+		ixsrc = npx/2;
+		iysrc = npy/2;
+		izsrc = npz/2;
+		isrc  = iysrc*npx*npz+ixsrc*npz+izsrc;
+//		fprintf(stderr,"npz=%d npx=%d npy=%d isrc=%d\n", npz, npx, npy, isrc);
+	
+		/* source scaling factor to compensate for discretisation */
+		src_ampl *= rox[isrc]*l2m[isrc]/(dt);
+	
+		/* Force source */
+		//if (my_rank == 0) vz[isrc] += 0.25*src_ampl*ro[isrc]*dz;
+		vz[isrc] += 0.25*src_ampl*rox[isrc]*dz;
+	
+		/* compute field on the grid of the halo areas */
+		updatePressureHalo(vx, vy, vz, p, l2m, halo, npx, npy, npz);
+		t1 = wallclock_time();
+		thcomp += t1-t2;
+
+		/* copy p-field and sent to neighbours */
+		copyHaloP(p, npx, npy, npz, halo, leftSend, rightSend, frontSend, backSend, topSend, bottomSend);
+		exchangeHalo(leftRecv, leftSend, rightRecv, rightSend, halosizex, left, right, &reqRecv[0], &reqSend[0], 0);
+		exchangeHalo(frontRecv, frontSend, backRecv, backSend, halosizey, front, back, &reqRecv[2], &reqSend[2], 4);
+		exchangeHalo(topRecv, topSend, bottomRecv, bottomSend, halosizez, top, bottom, &reqRecv[4], &reqSend[4], 8);
+		t2 = wallclock_time();
+		tcomm += t2-t1;
+
+		/* compute update on grid values excluding halo areas */
+		updatePressure(vx, vy, vz, p, l2m, halo, npx, npy, npz);
+		t1 = wallclock_time();
+		tcomp += t1-t2;
+
+		/* wait for P halo exchange */
+		waitForHalo(&reqRecv[0], &reqSend[0]);
+		t2 = wallclock_time();
+		tcomm += t2-t1;
+
+		newHaloP(p, npx, npy, npz, halo, leftRecv, rightRecv, frontRecv, backRecv, topRecv, bottomRecv);
+		t1 = wallclock_time();
+		tcopy += t1-t2;
+	
+//		fprintf(stderr,"rank %d did time step %d in %f seconds\n", my_rank, it, t1-t0);
+//		fflush(stderr);
+
+		/* write snapshots to file */
+//		if (time >= snaptime && !snapwritten) {
+		if ((it+1)%100==0 ) {
+
+			t1 = wallclock_time();
+			sprintf(filename,"snap_nz%d_nx%d_ny%d_it%4d.bin",nz, nx, ny, it);
+
+			MPI_Info_create(&fileinfo);
+			MPI_Info_set(fileinfo, "striping_factor", STRIPE_COUNT);
+			MPI_Info_set(fileinfo, "striping_unit", STRIPE_SIZE);
+			MPI_File_delete(filename, MPI_INFO_NULL);
+			rc = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDWR|MPI_MODE_CREATE, fileinfo, &fh);
+			if (rc != MPI_SUCCESS) {
+  				fprintf(stderr, "could not open input file\n");
+  				MPI_Abort(MPI_COMM_WORLD, 2);
+			}
+			disp = 0;
+			rc = MPI_File_set_view(fh, disp, MPI_FLOAT, global_array, "native", fileinfo);
+			if (rc != MPI_SUCCESS) {
+				fprintf(stderr, "error setting view on results file\n");
+				MPI_Abort(MPI_COMM_WORLD, 4);
+			}
+			rc = MPI_File_write_all(fh, p, 1, local_array, status);
+			if (rc != MPI_SUCCESS) {
+				MPI_Error_string(rc,err_buffer,&resultlen);
+				fprintf(stderr,err_buffer);
+				MPI_Abort(MPI_COMM_WORLD, 5);
+			}
+			MPI_File_close(&fh);
+
+
+/*			MPI_Info_create(&fileinfo);
+			MPI_File_delete(filename, MPI_INFO_NULL);
+			MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDWR|MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
+			MPI_File_set_view(fh, 0, MPI_FLOAT, global_array, "native", MPI_INFO_NULL);
+			MPI_File_write_all(fh, p, npz*npx*npy, local_array, status);
+			MPI_File_close(&fh);
+*/
+			snapwritten+=1;
+			t2 = wallclock_time();
+			tio += t2-t1;
+		}
+
+	}
+	ttot = wallclock_time() - t00;
+
+	if (my_rank == 0) {
+		fprintf(stderr,"rank %d total time in %f seconds\n", my_rank, ttot);
+		fprintf(stderr,"rank %d comm  time in %f seconds\n", my_rank, tcomm);
+		fprintf(stderr,"rank %d comp  time in %f seconds\n", my_rank, tcomp);
+		fprintf(stderr,"rank %d hcomp time in %f seconds\n", my_rank, thcomp);
+		fprintf(stderr,"rank %d copy  time in %f seconds\n", my_rank, tcopy);
+		fprintf(stderr,"rank %d io    time in %f seconds\n", my_rank, tio);
+		fprintf(stderr,"rank %d snaphsots written to file\n", snapwritten);
+	}
+
+
+	MPI_Finalize();
+	return 0;
+}
+
+
+
+int updateVelocities(float *vx, float *vy, float *vz, float *p, float *ro, int halo, int npx, int npy, int npz)
+{
+	int ix, iy, iz, iyp, ixp, izp, c1, c2, nxz;
+	int ixs, ixe, iys, iye, izs, ize;
+	float DpDx, DpDy, DpDz;
+
+	nxz=npx*npz;
+	c1 = 9.0/8.0;
+	c2 = -1.0/24.0;
+
+	ixs=2*halo; ixe=npx-2*halo;
+	iys=2*halo; iye=npy-2*halo;
+	izs=2*halo; ize=npz-2*halo;
+
+	/* calculate vx,vy,vz for all grid points except on the virtual boundary*/
+#pragma omp for private (iy, ix, iz) nowait
+#pragma ivdep
+	for (iy=iys; iy<iye; iy++) {
+		iyp=iy*nxz;
+		for (ix=ixs; ix<ixe; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=izs; iz<ize; iz++) {
+				DpDx = Dx(p,ix,iyp,iz,npz);
+				DpDy = Dy(p,ixp,iy,iz,nxz);
+				DpDz = Dz(p,ixp,iyp,iz);
+
+				vz[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDz;
+				vx[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDx;
+				vy[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDy;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int updateVelocitiesHalo(float *vx, float *vy, float *vz, float *p, float *ro, int halo, int npx, int npy, int npz)
+{
+	int ix, iy, iz, iyp, ixp, izp, c1, c2, nxz;
+	float DpDx, DpDy, DpDz;
+
+	nxz=npx*npz;
+	c1 = 9.0/8.0;
+	c2 = -1.0/24.0;
+
+	/* calculate vx,vy,vz for all halo grid points */
+
+	/* compute halo areas at left side */
+#pragma omp for private (iy, ix, iz) nowait
+	for (iy=halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=halo; ix<2*halo; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				DpDx = Dx(p,ix,iyp,iz,npz);
+				DpDy = Dy(p,ixp,iy,iz,nxz);
+				DpDz = Dz(p,ixp,iyp,iz);
+
+				vz[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDz;
+				vx[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDx;
+				vy[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDy;
+			}
+		}
+	}
+
+	/* compute halo areas at right side */
+	for (iy=halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=npx-2*halo; ix<npx-halo; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				DpDx = Dx(p,ix,iyp,iz,npz);
+				DpDy = Dy(p,ixp,iy,iz,nxz);
+				DpDz = Dz(p,ixp,iyp,iz);
+
+				vz[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDz;
+				vx[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDx;
+				vy[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDy;
+			}
+		}
+	}
+
+
+	/* compute halo areas at front side */
+	for (iy=halo; iy<2*halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=2*halo; ix<npx-2*halo; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				DpDx = Dx(p,ix,iyp,iz,npz);
+				DpDy = Dy(p,ixp,iy,iz,nxz);
+				DpDz = Dz(p,ixp,iyp,iz);
+
+				vz[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDz;
+				vx[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDx;
+				vy[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDy;
+			}
+		}
+	}
+
+	/* compute halo areas at back side */
+	for (iy=npy-2*halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=2*halo; ix<npx-2*halo; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				DpDx = Dx(p,ix,iyp,iz,npz);
+				DpDy = Dy(p,ixp,iy,iz,nxz);
+				DpDz = Dz(p,ixp,iyp,iz);
+
+				vz[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDz;
+				vx[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDx;
+				vy[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDy;
+			}
+		}
+	}
+
+	/* compute halo areas at top side */
+	for (iy=2*halo; iy<npy-2*halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=2*halo; ix<npx-2*halo; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<2*halo; iz++) {
+				DpDx = Dx(p,ix,iyp,iz,npz);
+				DpDy = Dy(p,ixp,iy,iz,nxz);
+				DpDz = Dz(p,ixp,iyp,iz);
+
+				vz[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDz;
+				vx[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDx;
+				vy[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDy;
+			}
+		}
+	}
+
+	/* compute halo areas at bottom side */
+	for (iy=2*halo; iy<npy-2*halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=2*halo; ix<npx-2*halo; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=npz-2*halo; iz<npz-halo; iz++) {
+				DpDx = Dx(p,ix,iyp,iz,npz);
+				DpDy = Dy(p,ixp,iy,iz,nxz);
+				DpDz = Dz(p,ixp,iyp,iz);
+
+				vz[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDz;
+				vx[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDx;
+				vy[iyp+ixp+iz] += ro[iyp+ixp+iz]*DpDy;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int updatePressure(float *vx, float *vy, float *vz, float *p, float *l2m, int halo, int npx, int npy, int npz)
+{
+	int ix, iy, iz, iyp, ixp, izp, c1, c2, nxz;
+	int ixs, ixe, iys, iye, izs, ize;
+
+	nxz=npx*npz;
+	c1 = 9.0/8.0;
+	c2 = -1.0/24.0;
+
+	ixs=2*halo; ixe=npx-2*halo;
+	iys=2*halo; iye=npy-2*halo;
+	izs=2*halo; ize=npz-2*halo;
+
+/* calculate p/tzz for all grid points except on the virtual boundary */
+#pragma omp for private (ix, iz)
+#pragma ivdep
+	for (iy=iys; iy<iye; iy++) {
+		iyp=iy*nxz;
+		for (ix=ixs; ix<ixe; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=izs; iz<ize; iz++) {
+				p[iyp+ixp+iz] += l2m[iyp+ixp+iz]*(Dv(vx,vz,ix,iy,iz,npz,nxz));
+			}
+		}
+	}
+
+	return 0;
+}
+
+int updatePressureHalo(float *vx, float *vy, float *vz, float *p, float *l2m, int halo, int npx, int npy, int npz)
+{
+	int ix, iy, iz, iyp, ixp, izp, c1, c2, nxz;
+
+	nxz=npx*npz;
+	c1 = 9.0/8.0;
+	c2 = -1.0/24.0;
+
+	/* calculate p/tzz for all grid points except on the virtual boundary */
+
+	/* compute halo areas at left side */
+#pragma omp for private (iy, ix, iz) nowait
+	for (iy=halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=halo; ix<2*halo; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				p[iyp+ixp+iz] += l2m[iyp+ixp+iz]*(Dv(vx,vz,ix,iy,iz,npz,nxz));
+			}
+		}
+	}
+
+	/* compute halo areas at right side */
+	for (iy=halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=npx-2*halo; ix<npx-halo; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				p[iyp+ixp+iz] += l2m[iyp+ixp+iz]*(Dv(vx,vz,ix,iy,iz,npz,nxz));
+			}
+		}
+	}
+
+
+	/* compute halo areas at front side */
+	for (iy=halo; iy<2*halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=2*halo; ix<npx-2*halo; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				p[iyp+ixp+iz] += l2m[iyp+ixp+iz]*(Dv(vx,vz,ix,iy,iz,npz,nxz));
+			}
+		}
+	}
+
+	/* compute halo areas at back side */
+	for (iy=npy-2*halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=2*halo; ix<npx-2*halo; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				p[iyp+ixp+iz] += l2m[iyp+ixp+iz]*(Dv(vx,vz,ix,iy,iz,npz,nxz));
+			}
+		}
+	}
+
+	/* compute halo areas at top side */
+	for (iy=2*halo; iy<npy-2*halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=2*halo; ix<npx-2*halo; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<2*halo; iz++) {
+				p[iyp+ixp+iz] += l2m[iyp+ixp+iz]*(Dv(vx,vz,ix,iy,iz,npz,nxz));
+			}
+		}
+	}
+
+	/* compute halo areas at bottom side */
+	for (iy=2*halo; iy<npy-2*halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=2*halo; ix<npx-2*halo; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=npz-2*halo; iz<npz-halo; iz++) {
+				p[iyp+ixp+iz] += l2m[iyp+ixp+iz]*(Dv(vx,vz,ix,iy,iz,npz,nxz));
+			}
+		}
+	}
+
+	return 0;
+}
+
+int exchangeHalo(float *leftRecv, float *leftSend, float *rightRecv, float *rightSend, int size, int leftrank, int rightrank, MPI_Request *reqRecv, MPI_Request *reqSend, int tag)
+{
+	int error, my_rank, ltag;
+	MPI_Status status;
+
+    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+
+	if (leftrank != MPI_PROC_NULL) {
+		ltag = tag;
+		error = MPI_Irecv(leftRecv, size, MPI_FLOAT, leftrank, ltag, MPI_COMM_WORLD, &reqRecv[0]);
+		assert (error == MPI_SUCCESS);
+//		fprintf(stderr,"rank %d recv data from %d left\n", my_rank, leftrank);
+		ltag = tag+1;
+		error = MPI_Isend(leftSend, size, MPI_FLOAT, leftrank, ltag, MPI_COMM_WORLD, &reqSend[0]);
+		assert (error == MPI_SUCCESS);
+//		fprintf(stderr,"rank %d send data to %d left\n", my_rank, leftrank);
+	}
+	else {
+		reqRecv[0] = MPI_REQUEST_NULL;
+		reqSend[0] = MPI_REQUEST_NULL;
+	}
+
+	if (rightrank != MPI_PROC_NULL) {
+		ltag = tag+1;
+		error = MPI_Irecv(rightRecv, size, MPI_FLOAT, rightrank, ltag, MPI_COMM_WORLD, &reqRecv[1]);
+//		fprintf(stderr,"rank %d recv data from %d right\n", my_rank, rightrank);
+		assert (error == MPI_SUCCESS);
+		ltag = tag;
+		error = MPI_Isend(rightSend, size, MPI_FLOAT, rightrank, ltag, MPI_COMM_WORLD, &reqSend[1]);
+		assert (error == MPI_SUCCESS);
+//		fprintf(stderr,"rank %d send data to %d right\n", my_rank, rightrank);
+	}
+	else {
+		reqRecv[1] = MPI_REQUEST_NULL;
+		reqSend[1] = MPI_REQUEST_NULL;
+	}
+
+	return 0;
+}
+
+int waitForHalo(MPI_Request *reqRecv, MPI_Request *reqSend)
+{
+	int i;
+	MPI_Status status;
+	int error;
+
+	for (i=0; i<6; i++) {
+		error = MPI_Wait(&reqSend[i], &status);
+		assert (error == MPI_SUCCESS);
+	}
+
+//	MPI_Barrier(MPI_COMM_WORLD);
+
+	for (i=0; i<6; i++) {
+		error = MPI_Wait(&reqRecv[i], &status);
+		assert (error == MPI_SUCCESS);
+	}
+
+	return 0;
+}
+
+int copyHaloVxVz(float *vx, float *vz, int npx, int npy, int npz, int halo, float *leftSend, float *rightSend, float *frontSend, float *backSend, float *topSend, float *bottomSend)
+{
+	int ix, iy, iz, ih, iyp, ixp, halosizex, halosizey, halosizez, nxz;
+
+	nxz = npx*npz;
+
+	/* copy halo areas at left side */
+	halosizex = npy*npz*halo;
+	for (iy=halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=halo; ix<2*halo; ix++) {
+			ixp=ix*npz;
+			ih=(ix-halo)*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				leftSend[iy*npz*halo+ih+iz]             = vx[iyp+ixp+iz];
+				leftSend[halosizex+iy*npz*halo+ih+iz]   = vz[iyp+ixp+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at right side */
+	halosizex = npy*npz*halo;
+	for (iy=halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=npx-2*halo; ix<npx-halo; ix++) {
+			ixp=ix*npz;
+			ih=(ix-(npx-2*halo))*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				rightSend[iy*npz*halo+ih+iz]             = vx[iyp+ixp+iz];
+				rightSend[halosizex+iy*npz*halo+ih+iz]   = vz[iyp+ixp+iz];
+			}
+		}
+	}
+
+
+	/* copy halo areas at front side */
+	halosizey = npx*npz*halo;
+	for (iy=halo; iy<2*halo; iy++) {
+		iyp=iy*nxz;
+		ih=(iy-halo)*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				frontSend[ih+ixp+iz]             = vx[iyp+ixp+iz];
+				frontSend[halosizey+ih+ixp+iz]   = vz[iyp+ixp+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at back side */
+	for (iy=npy-2*halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		ih=(iy-(npy-2*halo))*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				backSend[ih+ixp+iz]             = vx[iyp+ixp+iz];
+				backSend[halosizey+ih+ixp+iz]   = vz[iyp+ixp+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at top side */
+	halosizez = npy*npx*halo;
+	for (iy=0; iy<npy; iy++) {
+		iyp=iy*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<2*halo; iz++) {
+				ih=iz-halo;
+				topSend[iy*npx*halo+ix*halo+ih]             = vx[iyp+ixp+iz];
+				topSend[halosizez+iy*npx*halo+ix*halo+ih]   = vz[iyp+ixp+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at bottom side */
+	for (iy=0; iy<npy; iy++) {
+		iyp=iy*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=npz-2*halo; iz<npz-halo; iz++) {
+				ih=(iz-(npz-2*halo));
+				bottomSend[iy*npx*halo+ix*halo+ih]             = vx[iyp+ixp+iz];
+				bottomSend[halosizez+iy*npx*halo+ix*halo+ih]   = vz[iyp+ixp+iz];
+			}
+		}
+	}
+
+	return 0;
+}
+
+int copyHaloP(float *p, int npx, int npy, int npz, int halo, float *leftSend, float *rightSend, float *frontSend, float *backSend, float *topSend, float *bottomSend)
+{
+	int ix, iy, iz, ih, iyp, ixp, halosizex, halosizey, halosizez, nxz;
+
+	nxz = npx*npz;
+
+	/* copy halo areas at left side */
+	halosizex = npy*npz*halo;
+	for (iy=halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=halo; ix<2*halo; ix++) {
+			ixp=ix*npz;
+			ih=(ix-halo)*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				leftSend[iy*npz*halo+ih+iz]             = p[iyp+ixp+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at right side */
+	halosizex = npy*npz*halo;
+	for (iy=halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=npx-2*halo; ix<npx-halo; ix++) {
+			ixp=ix*npz;
+			ih=(ix-(npx-2*halo))*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				rightSend[iy*npz*halo+ih+iz]             = p[iyp+ixp+iz];
+			}
+		}
+	}
+
+
+	/* copy halo areas at front side */
+	halosizey = npx*npz*halo;
+	for (iy=halo; iy<2*halo; iy++) {
+		iyp=iy*nxz;
+		ih=(iy-halo)*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				frontSend[ih+ixp+iz]             = p[iyp+ixp+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at back side */
+	for (iy=npy-2*halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		ih=(iy-(npy-2*halo))*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				backSend[ih+ixp+iz]             = p[iyp+ixp+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at top side */
+	halosizez = npy*npx*halo;
+	for (iy=0; iy<npy; iy++) {
+		iyp=iy*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<2*halo; iz++) {
+				ih=iz-halo;
+				topSend[iy*npx*halo+ix*halo+ih]             = p[iyp+ixp+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at bottom side */
+	for (iy=0; iy<npy; iy++) {
+		iyp=iy*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=npz-2*halo; iz<npz-halo; iz++) {
+				ih=(iz-(npz-2*halo));
+				bottomSend[iy*npx*halo+ix*halo+ih]             = p[iyp+ixp+iz];
+			}
+		}
+	}
+
+	return 0;
+}
+
+/* copy communicated halo areas back to compute grids */
+int newHaloVxVz(float *vx, float *vz, int npx, int npy, int npz, int halo, float *leftRecv, float *rightRecv, float *frontRecv, float *backRecv, float *topRecv, float *bottomRecv)
+{
+	int ix, iy, iz, ih, iyp, ixp, halosizex, halosizey, halosizez, nxz;
+
+	nxz = npx*npz;
+
+	/* copy halo areas at left side */
+	halosizex = npy*npz*halo;
+	for (iy=halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=0; ix<halo; ix++) {
+			ixp=ix*npz;
+			ih=ixp;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				vx[iyp+ixp+iz] = leftRecv[iy*npz*halo+ih+iz];
+				vz[iyp+ixp+iz] = leftRecv[halosizex+iy*npz*halo+ih+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at right side */
+	halosizex = npy*npz*halo;
+	for (iy=halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=npx-halo; ix<npx; ix++) {
+			ixp=ix*npz;
+			ih=(ix-(npx-halo))*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				vx[iyp+ixp+iz] = rightRecv[iy*npz*halo+ih+iz];
+				vz[iyp+ixp+iz] = rightRecv[halosizex+iy*npz*halo+ih+iz];
+			}
+		}
+	}
+
+
+	/* copy halo areas at front side */
+	halosizey = npx*npz*halo;
+	for (iy=0; iy<halo; iy++) {
+		iyp=iy*nxz;
+		ih=iyp;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				vx[iyp+ixp+iz] = frontRecv[ih+ixp+iz];
+				vz[iyp+ixp+iz] = frontRecv[halosizey+ih+ixp+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at back side */
+	for (iy=npy-halo; iy<npy; iy++) {
+		iyp=iy*nxz;
+		ih=(iy-(npy-halo))*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				vx[iyp+ixp+iz] = backRecv[ih+ixp+iz];
+				vz[iyp+ixp+iz] = backRecv[halosizey+ih+ixp+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at top side */
+	halosizez = npy*npx*halo;
+	for (iy=0; iy<npy; iy++) {
+		iyp=iy*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=0; iz<halo; iz++) {
+				ih=iz;
+				vx[iyp+ixp+iz] = topRecv[iy*npx*halo+ix*halo+ih];
+				vz[iyp+ixp+iz] = topRecv[halosizez+iy*npx*halo+ix*halo+ih];
+			}
+		}
+	}
+
+	/* copy halo areas at bottom side */
+	for (iy=0; iy<npy; iy++) {
+		iyp=iy*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=npz-halo; iz<npz; iz++) {
+				ih=(iz-(npz-halo));
+				vx[iyp+ixp+iz] = bottomRecv[iy*npx*halo+ix*halo+ih];
+				vz[iyp+ixp+iz] = bottomRecv[halosizez+iy*npx*halo+ix*halo+ih];
+			}
+		}
+	}
+
+	return 0;
+}
+
+/* copy communicated halo areas back to compute grids */
+int newHaloP(float *p, int npx, int npy, int npz, int halo, float *leftRecv, float *rightRecv, float *frontRecv, float *backRecv, float *topRecv, float *bottomRecv)
+{
+	int ix, iy, iz, ih, iyp, ixp, halosizex, halosizey, halosizez, nxz;
+
+	nxz = npx*npz;
+
+	/* copy halo areas at left side */
+	halosizex = npy*npz*halo;
+	for (iy=halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=0; ix<halo; ix++) {
+			ixp=ix*npz;
+			ih=ixp;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				p[iyp+ixp+iz] = leftRecv[iy*npz*halo+ih+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at right side */
+	halosizex = npy*npz*halo;
+	for (iy=halo; iy<npy-halo; iy++) {
+		iyp=iy*nxz;
+		for (ix=npx-halo; ix<npx; ix++) {
+			ixp=ix*npz;
+			ih=(ix-(npx-halo))*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				p[iyp+ixp+iz] = rightRecv[iy*npz*halo+ih+iz];
+			}
+		}
+	}
+
+
+	/* copy halo areas at front side */
+	halosizey = npx*npz*halo;
+	for (iy=0; iy<halo; iy++) {
+		iyp=iy*nxz;
+		ih=iyp;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				p[iyp+ixp+iz] = frontRecv[ih+ixp+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at back side */
+	for (iy=npy-halo; iy<npy; iy++) {
+		iyp=iy*nxz;
+		ih=(iy-(npy-halo))*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=halo; iz<npz-halo; iz++) {
+				p[iyp+ixp+iz] = backRecv[ih+ixp+iz];
+			}
+		}
+	}
+
+	/* copy halo areas at top side */
+	halosizez = npy*npx*halo;
+	for (iy=0; iy<npy; iy++) {
+		iyp=iy*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=0; iz<halo; iz++) {
+				ih=iz;
+				p[iyp+ixp+iz] = topRecv[iy*npx*halo+ix*halo+ih];
+			}
+		}
+	}
+
+	/* copy halo areas at bottom side */
+	for (iy=0; iy<npy; iy++) {
+		iyp=iy*nxz;
+		for (ix=0; ix<npx; ix++) {
+			ixp=ix*npz;
+#pragma ivdep
+			for (iz=npz-halo; iz<npz; iz++) {
+				ih=(iz-(npz-halo));
+				p[iyp+ixp+iz] = bottomRecv[iy*npx*halo+ix*halo+ih];
+			}
+		}
+	}
+
+	return 0;
+}
+float gauss2time(float t, float f, float t0)
+{
+    float value, time;
+
+	time = t-t0;
+    value = ((1.0-2.0*M_PI*M_PI*f*f*time*time)*exp(-M_PI*M_PI*f*f*time*time));
+    return value;
+}
+
diff --git a/fdelmodc3D/elastic4.c b/fdelmodc3D/elastic4.c
new file mode 100644
index 0000000000000000000000000000000000000000..bfaee6e6780cff467a1a910ffb5c8524e0845485
--- /dev/null
+++ b/fdelmodc3D/elastic4.c
@@ -0,0 +1,158 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<math.h>
+#include<assert.h>
+#include"fdelmodc.h"
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+
+int applySource(modPar mod, srcPar src, wavPar wav, bndPar bnd, int itime, int ixsrc, int izsrc, float *vx, float *vz, float *tzz,
+float *txx, float *txz, float *rox, float *roz, float *l2m, float **src_nwav, int verbose);
+
+int storeSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, int verbose);
+
+int reStoreSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, int verbose);
+
+int boundariesP(modPar mod, bndPar bnd, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, int itime, int verbose);
+
+int boundariesV(modPar mod, bndPar bnd, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, int itime, int verbose);
+
+int elastic4(modPar mod, srcPar src, wavPar wav, bndPar bnd, int itime, int ixsrc, int izsrc, float **src_nwav, float *vx,
+float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float
+*l2m, float *lam, float *mul, int verbose)
+{
+/*********************************************************************
+       COMPUTATIONAL OVERVIEW OF THE 4th ORDER STAGGERED GRID: 
+
+  The captial symbols T (=Txx,Tzz) Txz,Vx,Vz represent the actual grid
+  The indices ix,iz are related to the T grid, so the capital T 
+  symbols represent the actual modelled grid.
+
+  one cel (iz,ix)
+       |
+       V                              extra column of vx,txz
+                                                      |
+    -------                                           V
+   | txz vz| txz vz  txz vz  txz vz  txz vz  txz vz txz
+   |       |      
+   | vx  t | vx  t   vx  t   vx  t   vx  t   vx  t  vx
+    -------
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz
+
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   | 
+     txz vz  txz Vz--Txz-Vz--Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   |
+     txz vz  txz Vz  Txz-Vz  Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   |
+     txz vz  txz Vz  Txz-Vz  Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz
+
+     vx  t   vx  t   vx  t   vx  t   vx  t   vx  t  vx
+
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz  <--| 
+                                                             |
+                                         extra row of txz/vz |
+
+   AUTHOR:
+           Jan Thorbecke (janth@xs4all.nl)
+           The Netherlands 
+
+***********************************************************************/
+
+	float c1, c2;
+	float dvx, dvz;
+	int   ix, iz;
+	int   n1;
+
+	c1 = 9.0/8.0; 
+	c2 = -1.0/24.0;
+	n1  = mod.naz;
+
+	/* calculate vx for all grid points except on the virtual boundary*/
+#pragma omp for private (ix, iz) nowait schedule(guided,1)
+	for (ix=mod.ioXx; ix<mod.ieXx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioXz; iz<mod.ieXz; iz++) {
+			vx[ix*n1+iz] -= rox[ix*n1+iz]*(
+						c1*(txx[ix*n1+iz]     - txx[(ix-1)*n1+iz] +
+							txz[ix*n1+iz+1]   - txz[ix*n1+iz])    +
+						c2*(txx[(ix+1)*n1+iz] - txx[(ix-2)*n1+iz] +
+							txz[ix*n1+iz+2]   - txz[ix*n1+iz-1])  );
+		}
+	}
+
+	/* calculate vz for all grid points except on the virtual boundary */
+#pragma omp for private (ix, iz)  schedule(guided,1)
+	for (ix=mod.ioZx; ix<mod.ieZx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioZz; iz<mod.ieZz; iz++) {
+			vz[ix*n1+iz] -= roz[ix*n1+iz]*(
+						c1*(tzz[ix*n1+iz]     - tzz[ix*n1+iz-1] +
+							txz[(ix+1)*n1+iz] - txz[ix*n1+iz])  +
+						c2*(tzz[ix*n1+iz+1]   - tzz[ix*n1+iz-2] +
+							txz[(ix+2)*n1+iz] - txz[(ix-1)*n1+iz])  );
+		}
+	}
+
+	/* Add force source */
+	if (src.type > 5) {
+		 applySource(mod, src, wav, bnd, itime, ixsrc, izsrc, vx, vz, tzz, txx, txz, rox, roz, l2m, src_nwav, verbose);
+	}
+
+    
+	/* boundary condition clears velocities on boundaries */
+	boundariesP(mod, bnd, vx, vz, tzz, txx, txz, rox, roz, l2m, lam, mul, itime, verbose);
+
+	/* calculate Txx/tzz for all grid points except on the virtual boundary */
+#pragma omp	for private (ix, iz, dvx, dvz) nowait schedule(guided,1)
+	for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			dvx = c1*(vx[(ix+1)*n1+iz] - vx[ix*n1+iz]) +
+			      c2*(vx[(ix+2)*n1+iz] - vx[(ix-1)*n1+iz]);
+			dvz = c1*(vz[ix*n1+iz+1]   - vz[ix*n1+iz]) +
+			      c2*(vz[ix*n1+iz+2]   - vz[ix*n1+iz-1]);
+			txx[ix*n1+iz] -= l2m[ix*n1+iz]*dvx + lam[ix*n1+iz]*dvz;
+			tzz[ix*n1+iz] -= l2m[ix*n1+iz]*dvz + lam[ix*n1+iz]*dvx;
+		}
+	}
+
+    
+    
+	/* calculate Txz for all grid points except on the virtual boundary */
+#pragma omp	for private (ix, iz)  schedule(guided,1)
+	for (ix=mod.ioTx; ix<mod.ieTx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioTz; iz<mod.ieTz; iz++) {
+			txz[ix*n1+iz] -= mul[ix*n1+iz]*(
+					c1*(vx[ix*n1+iz]     - vx[ix*n1+iz-1] +
+						vz[ix*n1+iz]     - vz[(ix-1)*n1+iz]) +
+					c2*(vx[ix*n1+iz+1]   - vx[ix*n1+iz-2] +
+						vz[(ix+1)*n1+iz] - vz[(ix-2)*n1+iz]) );
+		}
+	}
+
+	/* Add stress source */
+	if (src.type < 6) {
+		 applySource(mod, src, wav, bnd, itime, ixsrc, izsrc, vx, vz, tzz, txx, txz, rox, roz, l2m, src_nwav, verbose);
+	}
+    
+	/* check if there are sources placed on the boundaries */
+    storeSourceOnSurface(mod, src, bnd, ixsrc, izsrc, vx, vz, tzz, txx, txz, verbose);
+    
+    /* Free surface: calculate free surface conditions for stresses */
+    boundariesV(mod, bnd, vx, vz, tzz, txx, txz, rox, roz, l2m, lam, mul, itime, verbose);
+
+	/* restore source positions on the edge */
+	reStoreSourceOnSurface(mod, src, bnd, ixsrc, izsrc, vx, vz, tzz, txx, txz, verbose);
+
+    return 0;
+}
diff --git a/fdelmodc3D/elastic4dc.c b/fdelmodc3D/elastic4dc.c
new file mode 100644
index 0000000000000000000000000000000000000000..d119cfb3db178a7f3d5ade881188634191b1c686
--- /dev/null
+++ b/fdelmodc3D/elastic4dc.c
@@ -0,0 +1,160 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<math.h>
+#include<assert.h>
+#include"fdelmodc.h"
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+
+int applySource(modPar mod, srcPar src, wavPar wav, bndPar bnd, int itime, int ixsrc, int izsrc, float *vx, float *vz, float *tzz,
+float *txx, float *txz, float *rox, float *roz, float *l2m, float **src_nwav, int verbose);
+
+int storeSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, int verbose);
+
+int reStoreSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, int verbose);
+
+int boundariesP(modPar mod, bndPar bnd, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, int itime, int verbose);
+
+int boundariesV(modPar mod, bndPar bnd, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, int itime, int verbose);
+
+int elastic4dc(modPar mod, srcPar src, wavPar wav, bndPar bnd, int itime, int ixsrc, int izsrc, float **src_nwav, float *vx,
+float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float
+*l2m, float *lam, float *mul, int verbose)
+{
+/*********************************************************************
+       COMPUTATIONAL OVERVIEW OF THE 4th ORDER STAGGERED GRID: 
+
+  The captial symbols T (=Txx,Tzz) Txz,Vx,Vz represent the actual grid
+  The indices ix,iz are related to the T grid, so the capital T 
+  symbols represent the actual modelled grid.
+
+  one cel (iz,ix)
+       |
+       V                              extra column of vx,txz
+                                                      |
+    -------                                           V
+   | txz vz| txz vz  txz vz  txz vz  txz vz  txz vz txz
+   |       |      
+   | vx  t | vx  t   vx  t   vx  t   vx  t   vx  t  vx
+    -------
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz
+
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   | 
+     txz vz  txz Vz--Txz-Vz--Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   |
+     txz vz  txz Vz  Txz-Vz  Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   |
+     txz vz  txz Vz  Txz-Vz  Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz
+
+     vx  t   vx  t   vx  t   vx  t   vx  t   vx  t  vx
+
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz  <--| 
+                                                             |
+                                         extra row of txz/vz |
+
+   AUTHOR:
+           Jan Thorbecke (janth@xs4all.nl)
+           The Netherlands 
+
+***********************************************************************/
+
+	float c1, c2;
+	float dvx, dvz;
+	int   ix, iz;
+	int   n1;
+
+	c1 = 9.0/8.0; 
+	c2 = -1.0/24.0;
+	n1  = mod.naz;
+
+	/* calculate vx for all grid points except on the virtual boundary*/
+#pragma omp for private (ix, iz) nowait schedule(guided,1)
+	for (ix=mod.ioXx; ix<mod.ieXx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioXz; iz<mod.ieXz; iz++) {
+			vx[ix*n1+iz] -= rox[ix*n1+iz]*(
+						c1*(txx[ix*n1+iz]     - txx[(ix-1)*n1+iz] +
+							txz[ix*n1+iz+1]   - txz[ix*n1+iz])    +
+						c2*(txx[(ix+1)*n1+iz] - txx[(ix-2)*n1+iz] +
+							txz[ix*n1+iz+2]   - txz[ix*n1+iz-1])  );
+		}
+	}
+
+	/* calculate vz for all grid points except on the virtual boundary */
+#pragma omp for private (ix, iz)  schedule(guided,1)
+	for (ix=mod.ioZx; ix<mod.ieZx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioZz; iz<mod.ieZz; iz++) {
+			vz[ix*n1+iz] -= roz[ix*n1+iz]*(
+						c1*(tzz[ix*n1+iz]     - tzz[ix*n1+iz-1] +
+							txz[(ix+1)*n1+iz] - txz[ix*n1+iz])  +
+						c2*(tzz[ix*n1+iz+1]   - tzz[ix*n1+iz-2] +
+							txz[(ix+2)*n1+iz] - txz[(ix-1)*n1+iz])  );
+		}
+	}
+
+	/* Add force source */
+	if (src.type > 5) {
+		 applySource(mod, src, wav, bnd, itime, ixsrc, izsrc, vx, vz, tzz, txx, txz, rox, roz, l2m, src_nwav, verbose);
+	}
+
+    
+	/* boundary condition clears velocities on boundaries */
+	boundariesP(mod, bnd, vx, vz, tzz, txx, txz, rox, roz, l2m, lam, mul, itime, verbose);
+
+	/* calculate Txx/tzz for all grid points except on the virtual boundary */
+#pragma omp	for private (ix, iz, dvx, dvz) nowait schedule(guided,1)
+	for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			dvx = c1*(vx[(ix+1)*n1+iz] - vx[ix*n1+iz]) +
+			      c2*(vx[(ix+2)*n1+iz] - vx[(ix-1)*n1+iz]);
+			dvz = c1*(vz[ix*n1+iz+1]   - vz[ix*n1+iz]) +
+			      c2*(vz[ix*n1+iz+2]   - vz[ix*n1+iz-1]);
+			txx[ix*n1+iz] -= l2m[ix*n1+iz]*dvx + l2m[ix*n1+iz]*dvz;
+			tzz[ix*n1+iz] -= l2m[ix*n1+iz]*dvz + l2m[ix*n1+iz]*dvx;
+		}
+	}
+
+    
+    
+	/* calculate Txz for all grid points except on the virtual boundary */
+/*
+#pragma omp	for private (ix, iz)  schedule(guided,1)
+	for (ix=mod.ioTx; ix<mod.ieTx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioTz; iz<mod.ieTz; iz++) {
+			txz[ix*n1+iz] -= mul[ix*n1+iz]*(
+					c1*(vx[ix*n1+iz]     - vx[ix*n1+iz-1] +
+						vz[ix*n1+iz]     - vz[(ix-1)*n1+iz]) +
+					c2*(vx[ix*n1+iz+1]   - vx[ix*n1+iz-2] +
+						vz[(ix+1)*n1+iz] - vz[(ix-2)*n1+iz]) );
+		}
+	}
+*/
+
+	/* Add stress source */
+	if (src.type < 6) {
+		 applySource(mod, src, wav, bnd, itime, ixsrc, izsrc, vx, vz, tzz, txx, txz, rox, roz, l2m, src_nwav, verbose);
+	}
+    
+	/* check if there are sources placed on the boundaries */
+    storeSourceOnSurface(mod, src, bnd, ixsrc, izsrc, vx, vz, tzz, txx, txz, verbose);
+    
+    /* Free surface: calculate free surface conditions for stresses */
+    boundariesV(mod, bnd, vx, vz, tzz, txx, txz, rox, roz, l2m, lam, mul, itime, verbose);
+
+	/* restore source positions on the edge */
+	reStoreSourceOnSurface(mod, src, bnd, ixsrc, izsrc, vx, vz, tzz, txx, txz, verbose);
+
+    return 0;
+}
diff --git a/fdelmodc3D/elastic6.c b/fdelmodc3D/elastic6.c
new file mode 100644
index 0000000000000000000000000000000000000000..00aaf5660a413521604c102fe8915f895caf1606
--- /dev/null
+++ b/fdelmodc3D/elastic6.c
@@ -0,0 +1,182 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<math.h>
+#include<assert.h>
+#include"fdelmodc.h"
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+
+int applySource(modPar mod, srcPar src, wavPar wav, bndPar bnd, int itime, int ixsrc, int izsrc, float *vx, float *vz, float *tzz,
+float *txx, float *txz, float *rox, float *roz, float *l2m, float **src_nwav, int verbose);
+
+int storeSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, int verbose);
+
+int reStoreSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, int verbose);
+
+int boundariesP(modPar mod, bndPar bnd, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, int itime, int verbose);
+
+int boundariesV(modPar mod, bndPar bnd, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, int itime, int verbose);
+
+int elastic6(modPar mod, srcPar src, wavPar wav, bndPar bnd, int itime, int ixsrc, int izsrc, float **src_nwav, float *vx,
+float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float
+*l2m, float *lam, float *mul, int verbose)
+{
+/*********************************************************************
+       COMPUTATIONAL OVERVIEW OF THE 4th ORDER STAGGERED GRID: 
+
+  The captial symbols T (=Txx,Tzz) Txz,Vx,Vz represent the actual grid
+  The indices ix,iz are related to the T grid, so the capital T 
+  symbols represent the actual modelled grid.
+
+  one cel (iz,ix)
+       |
+       V                              extra column of vx,txz
+                                                      |
+    -------                                           V
+   | txz vz| txz vz  txz vz  txz vz  txz vz  txz vz txz
+   |       |      
+   | vx  t | vx  t   vx  t   vx  t   vx  t   vx  t  vx
+    -------
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz
+
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   | 
+     txz vz  txz Vz--Txz-Vz--Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   |
+     txz vz  txz Vz  Txz-Vz  Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   |
+     txz vz  txz Vz  Txz-Vz  Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz
+
+     vx  t   vx  t   vx  t   vx  t   vx  t   vx  t  vx
+
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz  <--| 
+                                                             |
+                                         extra row of txz/vz |
+
+   AUTHOR:
+           Jan Thorbecke (janth@xs4all.nl)
+           The Netherlands 
+
+***********************************************************************/
+
+	float c1, c2, c3;
+	float dvx, dvz;
+	int   ix, iz;
+	int   n1;
+	int   ioXx, ioXz, ioZz, ioZx, ioPx, ioPz, ioTx, ioTz;
+
+	c1 = 75.0/64.0;
+	c2 = -25.0/384.0;
+	c3 = 3.0/640.0;
+	n1  = mod.naz;
+
+	/* Vx: rox */
+	ioXx=mod.iorder/2;
+	ioXz=ioXx-1;
+	/* Vz: roz */
+	ioZz=mod.iorder/2;
+	ioZx=ioZz-1;
+	/* P, Txx, Tzz: lam, l2m */
+	ioPx=mod.iorder/2-1;
+	ioPz=ioPx;
+	/* Txz: muu */
+	ioTx=mod.iorder/2;
+	ioTz=ioTx;
+
+	/* calculate vx for all grid points except on the virtual boundary*/
+#pragma omp for private (ix, iz) nowait
+	for (ix=mod.ioXx; ix<mod.ieXx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioXz; iz<mod.ieXz; iz++) {
+			vx[ix*n1+iz] -= rox[ix*n1+iz]*(
+						c1*(txx[ix*n1+iz]     - txx[(ix-1)*n1+iz] +
+							txz[ix*n1+iz+1]   - txz[ix*n1+iz])    +
+						c2*(txx[(ix+1)*n1+iz] - txx[(ix-2)*n1+iz] +
+							txz[ix*n1+iz+2]   - txz[ix*n1+iz-1])  +
+                        c3*(txx[(ix+2)*n1+iz] - txx[(ix-3)*n1+iz] +
+                            txz[ix*n1+iz+3]   - txz[ix*n1+iz-2])  );
+		}
+	}
+
+	/* calculate vz for all grid points except on the virtual boundary */
+#pragma omp for private (ix, iz) 
+	for (ix=mod.ioZx; ix<mod.ieZx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioZz; iz<mod.ieZz; iz++) {
+			vz[ix*n1+iz] -= roz[ix*n1+iz]*(
+						c1*(tzz[ix*n1+iz]     - tzz[ix*n1+iz-1]    +
+							txz[(ix+1)*n1+iz] - txz[ix*n1+iz])     +
+						c2*(tzz[ix*n1+iz+1]   - tzz[ix*n1+iz-2]    +
+							txz[(ix+2)*n1+iz] - txz[(ix-1)*n1+iz]) +
+                        c3*(tzz[ix*n1+iz+2]   - tzz[ix*n1+iz-3]    +
+                            txz[(ix+3)*n1+iz] - txz[(ix-2)*n1+iz])  );
+            
+            
+		}
+	}
+
+	/* Add force source */
+	if (src.type > 5) {
+		 applySource(mod, src, wav, bnd, itime, ixsrc, izsrc, vx, vz, tzz, txx, txz, rox, roz, l2m, src_nwav, verbose);
+	}
+
+    /* boundary condition clears velocities on boundaries */
+	boundariesP(mod, bnd, vx, vz, tzz, txx, txz, rox, roz, l2m, lam, mul, itime, verbose);
+
+	/* calculate Txx/tzz for all grid points except on the virtual boundary */
+#pragma omp	for private (ix, iz, dvx, dvz) nowait
+	for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			dvx = c1*(vx[(ix+1)*n1+iz] - vx[ix*n1+iz]) +
+			      c2*(vx[(ix+2)*n1+iz] - vx[(ix-1)*n1+iz]) +
+                  c3*(vx[(ix+3)*n1+iz] - vx[(ix-2)*n1+iz]);  
+			dvz = c1*(vz[ix*n1+iz+1]   - vz[ix*n1+iz]) +
+			      c2*(vz[ix*n1+iz+2]   - vz[ix*n1+iz-1]) +
+                  c3*(vz[ix*n1+iz+3]   - vz[ix*n1+iz-2]);
+			txx[ix*n1+iz] -= l2m[ix*n1+iz]*dvx + lam[ix*n1+iz]*dvz;
+			tzz[ix*n1+iz] -= l2m[ix*n1+iz]*dvz + lam[ix*n1+iz]*dvx;
+		}
+	}
+
+	/* calculate Txz for all grid points except on the virtual boundary */
+#pragma omp	for private (ix, iz) 
+	for (ix=mod.ioTx; ix<mod.ieTx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioTz; iz<mod.ieTz; iz++) {
+			txz[ix*n1+iz] -= mul[ix*n1+iz]*(
+					c1*(vx[ix*n1+iz]     - vx[ix*n1+iz-1] +
+						vz[ix*n1+iz]     - vz[(ix-1)*n1+iz]) +
+					c2*(vx[ix*n1+iz+1]   - vx[ix*n1+iz-2] +
+						vz[(ix+1)*n1+iz] - vz[(ix-2)*n1+iz]) +
+                    c3*(vx[ix*n1+iz+2]   - vx[ix*n1+iz-3] +
+                        vz[(ix+2)*n1+iz] - vz[(ix-3)*n1+iz]) );
+
+		}
+	}
+
+	/* Add stress source */
+	if (src.type < 6) {
+		 applySource(mod, src, wav, bnd, itime, ixsrc, izsrc, vx, vz, tzz, txx, txz, rox, roz, l2m, src_nwav, verbose);
+	}
+
+
+	/* check if there are sources placed on the free surface */
+    storeSourceOnSurface(mod, src, bnd, ixsrc, izsrc, vx, vz, tzz, txx, txz, verbose);
+
+    /* Free surface: calculate free surface conditions for stresses */
+    boundariesV(mod, bnd, vx, vz, tzz, txx, txz, rox, roz, l2m, lam, mul, itime, verbose);
+
+	/* restore source positions on the edge */
+	reStoreSourceOnSurface(mod, src, bnd, ixsrc, izsrc, vx, vz, tzz, txx, txz, verbose);
+
+      return 0;
+}
diff --git a/fdelmodc3D/recvPar3D2.c b/fdelmodc3D/recvPar3D2.c
new file mode 100644
index 0000000000000000000000000000000000000000..4419a48db462f01a8cc9e520bc9d22ebca9cb702
--- /dev/null
+++ b/fdelmodc3D/recvPar3D2.c
@@ -0,0 +1,514 @@
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#include "fdelmodc3D.h"
+#include "par.h"
+
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define NINT(x) ((long)((x)>0.0?(x)+0.5:(x)-0.5))
+
+/**
+*  Calculates the receiver positions based on the input parameters
+*
+*   AUTHOR:
+*           Jan Thorbecke (janth@xs4all.nl)
+*
+*   Ammendments:
+*           Max Holicki changing the allocation receiver array (2-2016)
+*           Joeri Brackenhoff adding the 3D extension
+*           The Netherlands 
+**/
+
+
+void name_ext(char *filename, char *extension);
+
+long recvPar3D(recPar *rec, float sub_x0, float sub_y0, float sub_z0, 
+	float dx, float dy, float dz, long nx, long ny, long nz)
+{
+	float   *xrcv1, *xrcv2, *yrcv1, *yrcv2, *zrcv1, *zrcv2;
+	long    i, ix, iy, ir, verbose;
+	float   dxrcv, dyrcv, dzrcv, *dxr, *dyr, *dzr;
+	float   rrcv, dphi, oxrcv, oyrcv, ozrcv, arcv;
+	double  circ, h, a, b, e, s, xr, yr, zr, dr, srun, phase;
+	float   xrange, yrange, zrange, sub_x1, sub_y1, sub_z1;
+	long    Nx1, Nx2, Ny1, Ny2, Nz1, Nz2, Ndx, Ndy, Ndz, iarray, nrec, nh;
+	long    nxrcv, nyrcv, nzrcv, ncrcv, nrcv, ntrcv, *nlrcv;
+	float   *xrcva, *yrcva, *zrcva;
+	char*   rcv_txt;
+	FILE    *fp;
+
+	if (!getparlong("verbose", &verbose)) verbose = 0;
+
+    /* Calculate Model Dimensions */
+    sub_x1=sub_x0+(nx-1)*dx;
+    sub_y1=sub_y0+(ny-1)*dy;
+    sub_z1=sub_z0+(nz-1)*dz;
+
+/* Compute how many receivers are defined and then allocate the receiver arrays */
+
+    /* Receiver Array */
+    nxrcv=countparval("xrcva");
+    nyrcv=countparval("yrcva");
+    nzrcv=countparval("zrcva");
+    if (nxrcv!=nzrcv) verr("Number of receivers in array xrcva (%li), yrcva (%li), zrcva(%li) are not equal",nxrcv,nyrcv,nzrcv);
+    if (verbose&&nxrcv) vmess("Total number of array receivers: %li",nxrcv);
+
+    /* Linear Receiver Arrays */
+	Nx1 = countparval("xrcv1");
+	Nx2 = countparval("xrcv2");
+	Ny1 = countparval("yrcv1");
+	Ny2 = countparval("yrcv2");
+	Nz1 = countparval("zrcv1");
+	Nz2 = countparval("zrcv2");
+    if (Nx1!=Nx2) verr("Number of receivers starting points in 'xrcv1' (%li) and number of endpoint in 'xrcv2' (%li) are not equal",Nx1,Nx2);
+    if (Ny1!=Ny2) verr("Number of receivers starting points in 'yrcv1' (%li) and number of endpoint in 'yrcv2' (%li) are not equal",Ny1,Ny2);
+    if (Nz1!=Nz2) verr("Number of receivers starting points in 'zrcv1' (%li) and number of endpoint in 'zrcv2' (%li) are not equal",Nz1,Nz2);
+    if (Nx1!=Ny2) verr("Number of receivers starting points in 'xrcv1' (%li) and number of endpoint in 'yrcv2' (%li) are not equal",Nx1,Ny2);
+    if (Nx1!=Nz2) verr("Number of receivers starting points in 'xrcv1' (%li) and number of endpoint in 'zrcv2' (%li) are not equal",Nx1,Nz2);
+
+    rec->max_nrec=nyrcv*nxrcv;
+
+	/* no receivers are defined use default linear array of receivers on top of model */
+    if (!rec->max_nrec && Nx1==0) Nx1=1; // Default is to use top of model to record data
+    if (!rec->max_nrec && Ny1==0) Ny1=1;
+
+    if (Nx1) {
+        /* Allocate Start & End Points of Linear Arrays */
+        xrcv1=(float *)malloc(Nx1*sizeof(float));
+        xrcv2=(float *)malloc(Nx1*sizeof(float));
+        yrcv1=(float *)malloc(Nx1*sizeof(float));
+        yrcv2=(float *)malloc(Nx1*sizeof(float));
+        zrcv1=(float *)malloc(Nx1*sizeof(float));
+        zrcv2=(float *)malloc(Nx1*sizeof(float));
+        if (!getparfloat("xrcv1",xrcv1)) xrcv1[0]=sub_x0;
+        if (!getparfloat("xrcv2",xrcv2)) xrcv2[0]=sub_x1;
+        if (!getparfloat("yrcv1",yrcv1)) yrcv1[0]=sub_y0;
+        if (!getparfloat("yrcv2",yrcv2)) yrcv2[0]=sub_y1;
+        if (!getparfloat("zrcv1",zrcv1)) zrcv1[0]=sub_z0;
+        if (!getparfloat("zrcv2",zrcv2)) zrcv2[0]=zrcv1[0];
+
+		/* check if receiver arrays fit into model */
+		for (iarray=0; iarray<Nx1; iarray++) {
+			xrcv1[iarray] = MAX(sub_x0,      xrcv1[iarray]);
+			xrcv1[iarray] = MIN(sub_x0+nx*dx,xrcv1[iarray]);
+			xrcv2[iarray] = MAX(sub_x0,      xrcv2[iarray]);
+			xrcv2[iarray] = MIN(sub_x0+nx*dx,xrcv2[iarray]);
+			
+			yrcv1[iarray] = MAX(sub_y0,      yrcv1[iarray]);
+			yrcv1[iarray] = MIN(sub_y0+ny*dy,yrcv1[iarray]);
+			yrcv2[iarray] = MAX(sub_y0,      yrcv2[iarray]);
+			yrcv2[iarray] = MIN(sub_y0+ny*dy,yrcv2[iarray]);
+
+			zrcv1[iarray] = MAX(sub_z0,      zrcv1[iarray]);
+			zrcv1[iarray] = MIN(sub_z0+nz*dz,zrcv1[iarray]);
+			zrcv2[iarray] = MAX(sub_z0,      zrcv2[iarray]);
+			zrcv2[iarray] = MIN(sub_z0+nz*dz,zrcv2[iarray]);
+		}
+
+        /* Crop to Fit Model */
+/* Max's addtion still have to check if it has the same fucntionality */
+        for (iarray=0;iarray<Nx1;iarray++) {
+            if (xrcv1[iarray]<sub_x0) {
+                if (xrcv2[iarray]<sub_x0) {
+                    verr("Linear array %li outside model bounds",iarray);
+                }
+				else {
+                    vwarn("Cropping element %li of 'xrcv1' (%f) to model bounds (%f)",iarray,xrcv1[iarray],sub_x0);
+                    xrcv1[iarray]=sub_x0;
+                }
+            } 
+			else if (xrcv1[iarray] > sub_x1) {
+                verr("Linear array %li outside model bounds",iarray);
+            }
+            if ( (xrcv2[iarray] < xrcv1[iarray]) ) {
+                verr("Ill defined linear array %li, 'xrcv1' (%f) greater than 'xrcv2' (%f)",iarray,xrcv1[iarray],xrcv2[iarray]);
+            }
+			else if (xrcv2[iarray]>sub_x1) {
+                vwarn("Cropping element %li of 'xrcv2' (%f) to model bounds (%f)",iarray,xrcv2[iarray],sub_x1);
+                xrcv2[iarray]=sub_x1;
+            }
+
+            if (yrcv1[iarray]<sub_y0) {
+                if (yrcv2[iarray]<sub_y0) {
+                    verr("Linear array %li outside model bounds",iarray);
+                }
+				else {
+                    vwarn("Cropping element %li of 'yrcv1' (%f) to model bounds (%f)",iarray,yrcv1[iarray],sub_y0);
+                    yrcv1[iarray]=sub_y0;
+                }
+            } 
+			else if (yrcv1[iarray] > sub_y1) {
+                verr("Linear array %li outside model bounds",iarray);
+            }
+            if ( (yrcv2[iarray] < yrcv1[iarray]) ) {
+                verr("Ill defined linear array %li, 'yrcv1' (%f) greater than 'yrcv2' (%f)",iarray,yrcv1[iarray],yrcv2[iarray]);
+            }
+			else if (yrcv2[iarray]>sub_y1) {
+                vwarn("Cropping element %li of 'yrcv2' (%f) to model bounds (%f)",iarray,yrcv2[iarray],sub_y1);
+                yrcv2[iarray]=sub_y1;
+            }
+
+            if (zrcv1[iarray] < sub_z0) {
+                if (zrcv2[iarray] < sub_z0) {
+                    verr("Linear array %li outside model bounds",iarray);
+                }
+				else {
+               		vwarn("Cropping element %li of 'zrcv1' (%f) to model bounds (%f)",iarray,zrcv1[iarray],sub_z0);
+                	zrcv1[iarray]=sub_z0;
+                }
+            }
+			else if (zrcv1[iarray] > sub_z1) {
+                verr("Linear array %li outside model bounds",iarray);
+            }
+            if ( (zrcv2[iarray] < zrcv1[iarray]) ) {
+                verr("Ill defined linear array %li, 'zrcv1' (%f) greater than 'zrcv2' (%f)",iarray,zrcv1[iarray],zrcv2[iarray]);
+            }
+			else if (zrcv2[iarray]>sub_z1) {
+                vwarn("Cropping element %li of 'xrcv2' (%f) to model bounds (%f)",iarray,zrcv2[iarray],sub_z1);
+                zrcv2[iarray]=sub_z1;
+            }
+        }
+
+        /* Get Sampling Rates */
+		Ndx = countparval("dxrcv");
+		Ndy = countparval("dyrcv");
+		Ndz = countparval("dzrcv");
+
+		dxr = (float *)malloc(Nx1*sizeof(float));
+		dyr = (float *)malloc(Nx1*sizeof(float));
+		dzr = (float *)malloc(Nx1*sizeof(float));
+		if(!getparfloat("dxrcv", dxr)) dxr[0]=dx;
+		if(!getparfloat("dyrcv", dyr)) dyr[0]=dy;
+		if(!getparfloat("dzrcv", dzr)) dzr[0]=0.0;
+		if ( (Ndx<=1) && (Ndy<=1) && (Ndz==0) ){ /* default values are set */
+			for (i=1; i<Nx1; i++) {
+				dxr[i] = dxr[0];
+				dyr[i] = dyr[0];
+				dzr[i] = dzr[0];
+			}
+			Ndx=1;
+            Ndy=1;
+			Ndz=1;
+		}
+		else if ( (Ndz==1) && (Ndx==0) && (Ndy==0) ){ /* default values are set */
+			for (i=1; i<Nx1; i++) {
+				dxr[i] = dxr[0];
+				dyr[i] = dyr[0];
+				dzr[i] = dzr[0];
+			}
+			Ndz=1;
+            Ndy=1;
+			Ndx=1;
+		}
+		else { /* make sure that each array has dzrcv or dxrcv defined for each line or receivers */
+			if (Ndx!=Ndz) {
+				verr("Number of 'dxrcv' (%li) is not equal to number of 'dzrcv' (%li) or 1",Ndx,Ndz);
+			}
+			if (Ndx!=Ndy) {
+				verr("Number of 'dxrcv' (%li) is not equal to number of 'dyrcv' (%li) or 1",Ndx,Ndy);
+			}
+			if (Ndx!=Nx1 && Ndx!=1) {
+				verr("Number of 'dxrcv' (%li) is not equal to number of starting points in 'xrcv1' (%li) or 1",Ndx,Nx1);
+			}
+			if (Ndy!=Ny1 && Ndy!=1) {
+				verr("Number of 'dyrcv' (%li) is not equal to number of starting points in 'yrcv1' (%li) or 1",Ndy,Ny1);
+			}
+		}
+
+		/* check consistency of receiver steps */
+        for (iarray=0; iarray<Ndx; iarray++) {
+            if (dxr[iarray]<0) {
+				dxr[i]=dx;
+				vwarn("'dxrcv' element %li (%f) is less than zero, changing it to %f'",iarray,dxr[iarray],dx);
+			}
+        }
+        for (iarray=0; iarray<Ndy; iarray++) {
+            if (dyr[iarray]<0) {
+				dyr[i]=dx;
+				vwarn("'dyrcv' element %li (%f) is less than zero, changing it to %f'",iarray,dyr[iarray],dy);
+			}
+        }
+        for (iarray=0;iarray<Ndz;iarray++) {
+            if (dzr[iarray]<0) {
+				dzr[iarray]=dz;
+				vwarn("'dzrcv' element %li (%f) is less than zero, changing it to %f'",iarray,dzr[iarray],dz);
+			}
+        }
+        for (iarray=0;iarray<Ndx;iarray++){
+            if (dxr[iarray]==0 && dzr[iarray]==0) {
+                xrcv2[iarray]=xrcv1[iarray];
+				dxr[iarray]=1.;
+                vwarn("'dxrcv' element %li & 'dzrcv' element 1 are both 0.",iarray+1);
+                vmess("Placing 1 receiver at (%li,%li)",xrcv1[iarray],zrcv1[iarray]);
+            }
+        }
+        for (iarray=0;iarray<Ndx;iarray++){
+            if (xrcv1[iarray]==xrcv2[iarray] && dxr[iarray]!=0) {
+                dxr[iarray]=0.;
+                vwarn("Linear array %li: 'xrcv1'='xrcv2' and 'dxrcv' is not 0. Setting 'dxrcv'=0",iarray+1);
+            }
+        }
+        for (iarray=0;iarray<Ndx;iarray++){
+            if (yrcv1[iarray]==yrcv2[iarray] && dyr[iarray]!=0) {
+                dyr[iarray]=0.;
+                vwarn("Linear array %li: 'yrcv1'='yrcv2' and 'dyrcv' is not 0. Setting 'dyrcv'=0",iarray+1);
+            }
+        }
+        for (iarray=0;iarray<Ndx;iarray++){
+            if (zrcv1[iarray]==zrcv2[iarray] && dzr[iarray]!=0.){
+                dzr[iarray]=0.;
+                vwarn("Linear array %li: 'zrcv1'='zrcv2' and 'dzrcv' is not 0. Setting 'dzrcv'=0",iarray+1);
+            }
+        }
+
+        /* Calculate Number of Receivers */
+		nrcv = 0;
+        nlrcv=(long *)malloc(Nx1*sizeof(long));
+		for (iarray=0; iarray<Nx1; iarray++) {
+			xrange = (xrcv2[iarray]-xrcv1[iarray]); 
+			yrange = (yrcv2[iarray]-yrcv1[iarray]); 
+			zrange = (zrcv2[iarray]-zrcv1[iarray]); 
+			if (dxr[iarray] != 0.0 && dyr[iarray] != 0.0) {
+				nlrcv[iarray] = NINT(fabs(yrange/dyr[iarray]))*NINT(fabs(xrange/dxr[iarray]))+1;
+			}
+			else if (dxr[iarray] != 0.0) {
+				nlrcv[iarray] = NINT(fabs(xrange/dxr[iarray]))+1;
+			}
+			else if (dyr[iarray] != 0.0) {
+				nlrcv[iarray] = NINT(fabs(yrange/dyr[iarray]))+1;
+			}
+			else {
+				if (dzr[iarray] == 0) {
+					verr("For receiver array %li: receiver distance dzrcv is not given", iarray);
+				}
+				nlrcv[iarray] = NINT(fabs(zrange/dzr[iarray]))+1;
+			}
+            nrcv+=nlrcv[iarray];
+		}
+
+        /* Calculate Number of Receivers */
+        if (verbose) vmess("Total number of linear array receivers: %li",nrcv);
+        if (!nrcv) {
+            free(xrcv1);
+            free(xrcv2);
+            free(yrcv1);
+            free(yrcv2);
+            free(zrcv1);
+            free(zrcv2);
+            free(dxr);
+            free(dyr);
+            free(dzr);
+            free(nlrcv);
+        }
+        rec->max_nrec+=nrcv;
+    } 
+	else {
+		nrcv=0;
+	}
+
+/* allocate the receiver arrays */
+
+    /* Total Number of Receivers */
+    if (verbose) vmess("Total number of receivers: %li",rec->max_nrec);
+
+    /* Allocate Arrays */
+    rec->x  = (long *)calloc(rec->max_nrec,sizeof(long));
+    rec->y  = (long *)calloc(rec->max_nrec,sizeof(long));
+    rec->z  = (long *)calloc(rec->max_nrec,sizeof(long));
+    rec->xr = (float *)calloc(rec->max_nrec,sizeof(float));
+    rec->yr = (float *)calloc(rec->max_nrec,sizeof(float));
+    rec->zr = (float *)calloc(rec->max_nrec,sizeof(float));
+
+/* read in the receiver postions */
+
+	nrec=0;
+    /* Receiver Array */
+	if (nxrcv != 0 && nyrcv != 0) {
+		/* receiver array is defined */
+		xrcva = (float *)malloc(nxrcv*sizeof(float));
+		yrcva = (float *)malloc(nxrcv*sizeof(float));
+		zrcva = (float *)malloc(nxrcv*sizeof(float));
+		getparfloat("xrcva", xrcva);
+		getparfloat("yrcva", yrcva);
+		getparfloat("zrcva", zrcva);
+		for (iy=0; iy<nyrcv; iy++) {
+            for (ix=0; ix<nxrcv; ix++) {
+                rec->xr[nrec+iy*nxrcv+ix] = xrcva[ix]-sub_x0;
+                rec->yr[nrec+iy*nxrcv+ix] = yrcva[iy]-sub_y0;
+                rec->zr[nrec+iy*nxrcv+ix] = zrcva[ix]-sub_z0;
+                rec->x[nrec+iy*nxrcv+ix] = NINT((xrcva[ix]-sub_x0)/dx);
+                rec->y[nrec+iy*nxrcv+ix] = NINT((yrcva[iy]-sub_y0)/dy);
+                rec->z[nrec+iy*nxrcv+ix] = NINT((zrcva[ix]-sub_z0)/dz);
+                if (verbose>4) fprintf(stderr,"Receiver Array: xrcv[%li]=%f yrcv[%li]=%f zrcv=%f\n", ix, rec->xr[nrec+ix]+sub_x0, iy, rec->yr[nrec+ix]+sub_y0, rec->zr[nrec+ix]+sub_z0);
+            }
+        }
+		free(xrcva);
+		free(yrcva);
+		free(zrcva);
+		nrec += nyrcv*nxrcv;
+	}
+
+    /* Linear Receiver Arrays */
+    if (nrcv!=0) {
+		xrcv1 = (float *)malloc(Nx1*sizeof(float));
+		xrcv2 = (float *)malloc(Nx1*sizeof(float));
+		yrcv1 = (float *)malloc(Nx1*sizeof(float));
+		yrcv2 = (float *)malloc(Nx1*sizeof(float));
+		zrcv1 = (float *)malloc(Nx1*sizeof(float));
+		zrcv2 = (float *)malloc(Nx1*sizeof(float));
+		
+		if(!getparfloat("xrcv1", xrcv1)) xrcv1[0]=sub_x0;
+		if(!getparfloat("xrcv2", xrcv2)) xrcv2[0]=(nx-1)*dx+sub_x0;
+		if(!getparfloat("yrcv1", yrcv1)) yrcv1[0]=sub_y0;
+		if(!getparfloat("yrcv2", yrcv2)) yrcv2[0]=(ny-1)*dy+sub_y0;
+		if(!getparfloat("zrcv1", zrcv1)) zrcv1[0]=sub_z0;
+		if(!getparfloat("zrcv2", zrcv2)) zrcv2[0]=zrcv1[0];		
+		
+		Ndx = countparval("dxrcv");
+		Ndy = countparval("dyrcv");
+		Ndz = countparval("dzrcv");
+
+		dxr = (float *)malloc(Nx1*sizeof(float));
+		dyr = (float *)malloc(Nx1*sizeof(float));
+		dzr = (float *)malloc(Nx1*sizeof(float));
+		if(!getparfloat("dxrcv", dxr)) dxr[0]=dx;
+		if(!getparfloat("dyrcv", dyr)) dyr[0]=dy;
+		if(!getparfloat("dzrcv", dzr)) dzr[0]=0.0;
+		if ( (Ndx<=1) && (Ndy<=1) && (Ndz==0) ){ /* default values are set */
+			for (i=1; i<Nx1; i++) {
+				dxr[i] = dxr[0];
+				dyr[i] = dyr[0];
+				dzr[i] = dzr[0];
+			}
+			Ndx=1;
+            Ndy=1;
+		}
+        else if ( (Ndx<=1) && (Ndy==0) && (Ndz==0) ){ /* default values are set */
+			for (i=1; i<Nx1; i++) {
+				dxr[i] = dxr[0];
+				dyr[i] = dyr[0];
+				dzr[i] = dzr[0];
+			}
+			Ndx=1;
+		}
+        else if ( (Ndy<=1) && (Ndx==0) && (Ndz==0) ){ /* default values are set */
+			for (i=1; i<Nx1; i++) {
+				dxr[i] = dxr[0];
+				dyr[i] = dyr[0];
+				dzr[i] = dzr[0];
+			}
+			Ndy=1;
+		}
+		else if ( (Ndz==1) && (Ndy==0) && (Ndx==0) ){ /* default values are set */
+			for (i=1; i<Nx1; i++) {
+				dxr[i] = dxr[0];
+				dyr[i] = dyr[0];
+				dzr[i] = dzr[0];
+			}
+			Ndz=1;
+		}
+		else { /* make sure that each array has dzrcv or dxrcv defined for each line or receivers */
+			if (Ndx>1) assert(Ndx==Nx1);
+			if (Ndy>1) assert(Ndy==Ny1);
+			if (Ndz>1) assert(Ndz==Nx1);
+		}
+		
+		/* check if receiver arrays fit into model */
+		for (iarray=0; iarray<Nx1; iarray++) {
+			xrcv1[iarray] = MAX(sub_x0,      xrcv1[iarray]);
+			xrcv1[iarray] = MIN(sub_x0+nx*dx,xrcv1[iarray]);
+			xrcv2[iarray] = MAX(sub_x0,      xrcv2[iarray]);
+			xrcv2[iarray] = MIN(sub_x0+nx*dx,xrcv2[iarray]);
+
+			yrcv1[iarray] = MAX(sub_y0,      yrcv1[iarray]);
+			yrcv1[iarray] = MIN(sub_y0+ny*dy,yrcv1[iarray]);
+			yrcv2[iarray] = MAX(sub_y0,      yrcv2[iarray]);
+			yrcv2[iarray] = MIN(sub_y0+ny*dy,yrcv2[iarray]);
+			
+			zrcv1[iarray] = MAX(sub_z0,      zrcv1[iarray]);
+			zrcv1[iarray] = MIN(sub_z0+nz*dz,zrcv1[iarray]);
+			zrcv2[iarray] = MAX(sub_z0,      zrcv2[iarray]);
+			zrcv2[iarray] = MIN(sub_z0+nz*dz,zrcv2[iarray]);
+		}
+
+		/* calculate receiver array and store into rec->x,y,z */
+
+		for (iarray=0; iarray<Nx1; iarray++) {
+			xrange = (xrcv2[iarray]-xrcv1[iarray]); 
+			yrange = (yrcv2[iarray]-yrcv1[iarray]); 
+			zrange = (zrcv2[iarray]-zrcv1[iarray]); 
+			if (dxr[iarray] != 0.0) {
+				nrcv = nlrcv[iarray];
+				dxrcv = dxr[iarray];
+				dyrcv = yrange/(nrcv-1);
+				dzrcv = zrange/(nrcv-1);
+				if (dyrcv != dyr[iarray]) {
+					vwarn("For receiver array %li: calculated dyrcv=%f given=%f", iarray, dyrcv, dyr[iarray]);
+					vwarn("The calculated receiver distance %f is used", dyrcv);
+				}
+				if (dzrcv != dzr[iarray]) {
+					vwarn("For receiver array %li: calculated dzrcv=%f given=%f", iarray, dzrcv, dzr[iarray]);
+					vwarn("The calculated receiver distance %f is used", dzrcv);
+				}
+			}
+            else if (dyr[iarray] != 0.0) {
+				nrcv = nlrcv[iarray];
+				dxrcv = xrange/(nrcv-1);
+				dyrcv = dyr[iarray];
+				dzrcv = zrange/(nrcv-1);
+				if (dxrcv != dxr[iarray]) {
+					vwarn("For receiver array %li: calculated dxrcv=%f given=%f", iarray, dxrcv, dxr[iarray]);
+					vwarn("The calculated receiver distance %f is used", dxrcv);
+				}
+				if (dzrcv != dzr[iarray]) {
+					vwarn("For receiver array %li: calculated dzrcv=%f given=%f", iarray, dzrcv, dzr[iarray]);
+					vwarn("The calculated receiver distance %f is used", dzrcv);
+				}
+			}
+			else {
+				if (dzr[iarray] == 0) {
+					verr("For receiver array %li: receiver distance dzrcv is not given", iarray);
+				}
+				nrcv = nlrcv[iarray];
+				dxrcv = xrange/(nrcv-1);
+				dyrcv = yrange/(nrcv-1);
+				dzrcv = dzr[iarray];
+				if (dxrcv != dxr[iarray]) {
+					vwarn("For receiver array %li: calculated dxrcv=%f given=%f", iarray, dxrcv, dxr[iarray]);
+					vwarn("The calculated receiver distance %f is used", dxrcv);
+				}
+				if (dyrcv != dyr[iarray]) {
+					vwarn("For receiver array %li: calculated dyrcv=%f given=%f", iarray, dyrcv, dyr[iarray]);
+					vwarn("The calculated receiver distance %f is used", dyrcv);
+				}
+			}
+
+			// calculate coordinates
+			for (ir=0; ir<nrcv; ir++) {
+				rec->xr[nrec]=xrcv1[iarray]-sub_x0+ir*dxrcv;
+				rec->yr[nrec]=yrcv1[iarray]-sub_y0+ir*dyrcv;
+				rec->zr[nrec]=zrcv1[iarray]-sub_z0+ir*dzrcv;
+
+				rec->x[nrec]=NINT((rec->xr[nrec])/dx);
+				rec->y[nrec]=NINT((rec->yr[nrec])/dy);
+				rec->z[nrec]=NINT((rec->zr[nrec])/dz);
+				nrec++;
+			}
+		}
+		free(xrcv1);
+		free(xrcv2);
+		free(yrcv1);
+		free(yrcv2);
+		free(zrcv1);
+		free(zrcv2);
+		free(dxr);
+		free(dyr);
+		free(dzr);
+        free(nlrcv);
+	}
+
+    rec->n=rec->max_nrec;
+	return 0;
+}
diff --git a/fdelmodc3D/recvPar3D3.c b/fdelmodc3D/recvPar3D3.c
new file mode 100644
index 0000000000000000000000000000000000000000..f5130332823bb7399af74fb6344ed54e6e3f55d3
--- /dev/null
+++ b/fdelmodc3D/recvPar3D3.c
@@ -0,0 +1,523 @@
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#include "fdelmodc3D.h"
+#include "par.h"
+
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define NINT(x) ((long)((x)>0.0?(x)+0.5:(x)-0.5))
+
+/**
+*  Calculates the receiver positions based on the input parameters
+*
+*   AUTHOR:
+*           Jan Thorbecke (janth@xs4all.nl)
+*
+*   Ammendments:
+*           Max Holicki changing the allocation receiver array (2-2016)
+*           Joeri Brackenhoff adding the 3D extension
+*           The Netherlands 
+**/
+
+
+void name_ext(char *filename, char *extension);
+
+long recvPar3D(recPar *rec, float sub_x0, float sub_y0, float sub_z0, 
+	float dx, float dy, float dz, long nx, long ny, long nz)
+{
+	float   *xrcv1, *xrcv2, *yrcv1, *yrcv2, *zrcv1, *zrcv2;
+	long    i, ix, iy, ir, verbose;
+	float   dxrcv, dyrcv, dzrcv, *dxr, *dyr, *dzr;
+	float   rrcv, dphi, oxrcv, oyrcv, ozrcv, arcv;
+	double  circ, h, a, b, e, s, xr, yr, zr, dr, srun, phase;
+	float   xrange, yrange, zrange, sub_x1, sub_y1, sub_z1;
+	long    Nx1, Nx2, Ny1, Ny2, Nz1, Nz2, Ndx, Ndy, Ndz, iarray, nrec, nh;
+	long    nxrcv, nyrcv, nzrcv, ncrcv, nrcv, ntrcv, *nlxrcv, *nlyrcv;
+	float   *xrcva, *yrcva, *zrcva;
+	char*   rcv_txt;
+	FILE    *fp;
+
+	if (!getparlong("verbose", &verbose)) verbose = 0;
+
+    /* Calculate Model Dimensions */
+    sub_x1=sub_x0+(nx-1)*dx;
+    sub_y1=sub_y0+(ny-1)*dy;
+    sub_z1=sub_z0+(nz-1)*dz;
+
+/* Compute how many receivers are defined and then allocate the receiver arrays */
+
+    /* Receiver Array */
+    nxrcv=countparval("xrcva");
+    nyrcv=countparval("yrcva");
+    nzrcv=countparval("zrcva");
+    if (nxrcv!=nzrcv) verr("Number of receivers in array xrcva (%li), yrcva (%li), zrcva(%li) are not equal",nxrcv,nyrcv,nzrcv);
+    if (verbose&&nxrcv) vmess("Total number of array receivers: %li",nxrcv);
+
+    /* Linear Receiver Arrays */
+	Nx1 = countparval("xrcv1");
+	Nx2 = countparval("xrcv2");
+	Ny1 = countparval("yrcv1");
+	Ny2 = countparval("yrcv2");
+	Nz1 = countparval("zrcv1");
+	Nz2 = countparval("zrcv2");
+    if (Nx1!=Nx2) verr("Number of receivers starting points in 'xrcv1' (%li) and number of endpoint in 'xrcv2' (%li) are not equal",Nx1,Nx2);
+    if (Ny1!=Ny2) verr("Number of receivers starting points in 'yrcv1' (%li) and number of endpoint in 'yrcv2' (%li) are not equal",Ny1,Ny2);
+    if (Nz1!=Nz2) verr("Number of receivers starting points in 'zrcv1' (%li) and number of endpoint in 'zrcv2' (%li) are not equal",Nz1,Nz2);
+    if (Nx1!=Ny2) verr("Number of receivers starting points in 'xrcv1' (%li) and number of endpoint in 'yrcv2' (%li) are not equal",Nx1,Ny2);
+    if (Nx1!=Nz2) verr("Number of receivers starting points in 'xrcv1' (%li) and number of endpoint in 'zrcv2' (%li) are not equal",Nx1,Nz2);
+
+    rec->max_nrec=nyrcv*nxrcv;
+
+	/* no receivers are defined use default linear array of receivers on top of model */
+    if (!rec->max_nrec && Nx1==0) Nx1=1; // Default is to use top of model to record data
+    if (!rec->max_nrec && Ny1==0) Ny1=1;
+
+    if (Nx1) {
+        /* Allocate Start & End Points of Linear Arrays */
+        xrcv1=(float *)malloc(Nx1*sizeof(float));
+        xrcv2=(float *)malloc(Nx1*sizeof(float));
+        yrcv1=(float *)malloc(Nx1*sizeof(float));
+        yrcv2=(float *)malloc(Nx1*sizeof(float));
+        zrcv1=(float *)malloc(Nx1*sizeof(float));
+        zrcv2=(float *)malloc(Nx1*sizeof(float));
+        if (!getparfloat("xrcv1",xrcv1)) xrcv1[0]=sub_x0;
+        if (!getparfloat("xrcv2",xrcv2)) xrcv2[0]=sub_x1;
+        if (!getparfloat("yrcv1",yrcv1)) yrcv1[0]=sub_y0;
+        if (!getparfloat("yrcv2",yrcv2)) yrcv2[0]=sub_y1;
+        if (!getparfloat("zrcv1",zrcv1)) zrcv1[0]=sub_z0;
+        if (!getparfloat("zrcv2",zrcv2)) zrcv2[0]=zrcv1[0];
+
+		/* check if receiver arrays fit into model */
+		for (iarray=0; iarray<Nx1; iarray++) {
+			xrcv1[iarray] = MAX(sub_x0,      xrcv1[iarray]);
+			xrcv1[iarray] = MIN(sub_x0+nx*dx,xrcv1[iarray]);
+			xrcv2[iarray] = MAX(sub_x0,      xrcv2[iarray]);
+			xrcv2[iarray] = MIN(sub_x0+nx*dx,xrcv2[iarray]);
+			
+			yrcv1[iarray] = MAX(sub_y0,      yrcv1[iarray]);
+			yrcv1[iarray] = MIN(sub_y0+ny*dy,yrcv1[iarray]);
+			yrcv2[iarray] = MAX(sub_y0,      yrcv2[iarray]);
+			yrcv2[iarray] = MIN(sub_y0+ny*dy,yrcv2[iarray]);
+
+			zrcv1[iarray] = MAX(sub_z0,      zrcv1[iarray]);
+			zrcv1[iarray] = MIN(sub_z0+nz*dz,zrcv1[iarray]);
+			zrcv2[iarray] = MAX(sub_z0,      zrcv2[iarray]);
+			zrcv2[iarray] = MIN(sub_z0+nz*dz,zrcv2[iarray]);
+		}
+
+        /* Crop to Fit Model */
+/* Max's addtion still have to check if it has the same fucntionality */
+        for (iarray=0;iarray<Nx1;iarray++) {
+            if (xrcv1[iarray]<sub_x0) {
+                if (xrcv2[iarray]<sub_x0) {
+                    verr("Linear array %li outside model bounds",iarray);
+                }
+				else {
+                    vwarn("Cropping element %li of 'xrcv1' (%f) to model bounds (%f)",iarray,xrcv1[iarray],sub_x0);
+                    xrcv1[iarray]=sub_x0;
+                }
+            } 
+			else if (xrcv1[iarray] > sub_x1) {
+                verr("Linear array %li outside model bounds",iarray);
+            }
+            if ( (xrcv2[iarray] < xrcv1[iarray]) ) {
+                verr("Ill defined linear array %li, 'xrcv1' (%f) greater than 'xrcv2' (%f)",iarray,xrcv1[iarray],xrcv2[iarray]);
+            }
+			else if (xrcv2[iarray]>sub_x1) {
+                vwarn("Cropping element %li of 'xrcv2' (%f) to model bounds (%f)",iarray,xrcv2[iarray],sub_x1);
+                xrcv2[iarray]=sub_x1;
+            }
+
+            if (yrcv1[iarray]<sub_y0) {
+                if (yrcv2[iarray]<sub_y0) {
+                    verr("Linear array %li outside model bounds",iarray);
+                }
+				else {
+                    vwarn("Cropping element %li of 'yrcv1' (%f) to model bounds (%f)",iarray,yrcv1[iarray],sub_y0);
+                    yrcv1[iarray]=sub_y0;
+                }
+            } 
+			else if (yrcv1[iarray] > sub_y1) {
+                verr("Linear array %li outside model bounds",iarray);
+            }
+            if ( (yrcv2[iarray] < yrcv1[iarray]) ) {
+                verr("Ill defined linear array %li, 'yrcv1' (%f) greater than 'yrcv2' (%f)",iarray,yrcv1[iarray],yrcv2[iarray]);
+            }
+			else if (yrcv2[iarray]>sub_y1) {
+                vwarn("Cropping element %li of 'yrcv2' (%f) to model bounds (%f)",iarray,yrcv2[iarray],sub_y1);
+                yrcv2[iarray]=sub_y1;
+            }
+
+            if (zrcv1[iarray] < sub_z0) {
+                if (zrcv2[iarray] < sub_z0) {
+                    verr("Linear array %li outside model bounds",iarray);
+                }
+				else {
+               		vwarn("Cropping element %li of 'zrcv1' (%f) to model bounds (%f)",iarray,zrcv1[iarray],sub_z0);
+                	zrcv1[iarray]=sub_z0;
+                }
+            }
+			else if (zrcv1[iarray] > sub_z1) {
+                verr("Linear array %li outside model bounds",iarray);
+            }
+            if ( (zrcv2[iarray] < zrcv1[iarray]) ) {
+                verr("Ill defined linear array %li, 'zrcv1' (%f) greater than 'zrcv2' (%f)",iarray,zrcv1[iarray],zrcv2[iarray]);
+            }
+			else if (zrcv2[iarray]>sub_z1) {
+                vwarn("Cropping element %li of 'xrcv2' (%f) to model bounds (%f)",iarray,zrcv2[iarray],sub_z1);
+                zrcv2[iarray]=sub_z1;
+            }
+        }
+
+        /* Get Sampling Rates */
+		Ndx = countparval("dxrcv");
+		Ndy = countparval("dyrcv");
+		Ndz = countparval("dzrcv");
+
+		dxr = (float *)malloc(Nx1*sizeof(float));
+		dyr = (float *)malloc(Nx1*sizeof(float));
+		dzr = (float *)malloc(Nx1*sizeof(float));
+		if(!getparfloat("dxrcv", dxr)) dxr[0]=dx;
+		if(!getparfloat("dyrcv", dyr)) dyr[0]=dy;
+		if(!getparfloat("dzrcv", dzr)) dzr[0]=0.0;
+		if ( (Ndx<=1) && (Ndy<=1) && (Ndz==0) ){ /* default values are set */
+			for (i=1; i<Nx1; i++) {
+				dxr[i] = dxr[0];
+				dyr[i] = dyr[0];
+				dzr[i] = dzr[0];
+			}
+			Ndx=1;
+            Ndy=1;
+			Ndz=1;
+		}
+		else if ( (Ndz==1) && (Ndx==0) && (Ndy==0) ){ /* default values are set */
+			for (i=1; i<Nx1; i++) {
+				dxr[i] = dxr[0];
+				dyr[i] = dyr[0];
+				dzr[i] = dzr[0];
+			}
+			Ndz=1;
+            Ndy=1;
+			Ndx=1;
+		}
+		else { /* make sure that each array has dzrcv or dxrcv defined for each line or receivers */
+			if (Ndx!=Ndz) {
+				verr("Number of 'dxrcv' (%li) is not equal to number of 'dzrcv' (%li) or 1",Ndx,Ndz);
+			}
+			if (Ndx!=Ndy) {
+				verr("Number of 'dxrcv' (%li) is not equal to number of 'dyrcv' (%li) or 1",Ndx,Ndy);
+			}
+			if (Ndx!=Nx1 && Ndx!=1) {
+				verr("Number of 'dxrcv' (%li) is not equal to number of starting points in 'xrcv1' (%li) or 1",Ndx,Nx1);
+			}
+			if (Ndy!=Ny1 && Ndy!=1) {
+				verr("Number of 'dyrcv' (%li) is not equal to number of starting points in 'yrcv1' (%li) or 1",Ndy,Ny1);
+			}
+		}
+
+		/* check consistency of receiver steps */
+        for (iarray=0; iarray<Ndx; iarray++) {
+            if (dxr[iarray]<0) {
+				dxr[i]=dx;
+				vwarn("'dxrcv' element %li (%f) is less than zero, changing it to %f'",iarray,dxr[iarray],dx);
+			}
+        }
+        for (iarray=0; iarray<Ndy; iarray++) {
+            if (dyr[iarray]<0) {
+				dyr[i]=dx;
+				vwarn("'dyrcv' element %li (%f) is less than zero, changing it to %f'",iarray,dyr[iarray],dy);
+			}
+        }
+        for (iarray=0;iarray<Ndz;iarray++) {
+            if (dzr[iarray]<0) {
+				dzr[iarray]=dz;
+				vwarn("'dzrcv' element %li (%f) is less than zero, changing it to %f'",iarray,dzr[iarray],dz);
+			}
+        }
+        for (iarray=0;iarray<Ndx;iarray++){
+            if (dxr[iarray]==0 && dzr[iarray]==0) {
+                xrcv2[iarray]=xrcv1[iarray];
+				dxr[iarray]=1.;
+                vwarn("'dxrcv' element %li & 'dzrcv' element 1 are both 0.",iarray+1);
+                vmess("Placing 1 receiver at (%li,%li)",xrcv1[iarray],zrcv1[iarray]);
+            }
+        }
+        for (iarray=0;iarray<Ndx;iarray++){
+            if (xrcv1[iarray]==xrcv2[iarray] && dxr[iarray]!=0) {
+                dxr[iarray]=0.;
+                vwarn("Linear array %li: 'xrcv1'='xrcv2' and 'dxrcv' is not 0. Setting 'dxrcv'=0",iarray+1);
+            }
+        }
+        for (iarray=0;iarray<Ndx;iarray++){
+            if (yrcv1[iarray]==yrcv2[iarray] && dyr[iarray]!=0) {
+                dyr[iarray]=0.;
+                vwarn("Linear array %li: 'yrcv1'='yrcv2' and 'dyrcv' is not 0. Setting 'dyrcv'=0",iarray+1);
+            }
+        }
+        for (iarray=0;iarray<Ndx;iarray++){
+            if (zrcv1[iarray]==zrcv2[iarray] && dzr[iarray]!=0.){
+                dzr[iarray]=0.;
+                vwarn("Linear array %li: 'zrcv1'='zrcv2' and 'dzrcv' is not 0. Setting 'dzrcv'=0",iarray+1);
+            }
+        }
+
+        /* Calculate Number of Receivers */
+		nrcv = 0;
+        nlxrcv=(long *)malloc(Nx1*sizeof(long));
+        nlyrcv=(long *)malloc(Nx1*sizeof(long));
+		for (iarray=0; iarray<Nx1; iarray++) {
+			xrange = (xrcv2[iarray]-xrcv1[iarray]); 
+			yrange = (yrcv2[iarray]-yrcv1[iarray]); 
+			zrange = (zrcv2[iarray]-zrcv1[iarray]); 
+			if (dxr[iarray] != 0.0 && dyr[iarray] != 0.0) {
+				nlxrcv[iarray] = NINT(fabs(xrange/dxr[iarray]))+1;
+				nlyrcv[iarray] = NINT(fabs(yrange/dyr[iarray]))+1;
+			}
+			else if (dxr[iarray] != 0.0) {
+				nlxrcv[iarray] = NINT(fabs(xrange/dxr[iarray]))+1;
+				nlyrcv[iarray] = 1;
+			}
+			else if (dyr[iarray] != 0.0) {
+				nlxrcv[iarray] = 1;
+				nlyrcv[iarray] = NINT(fabs(yrange/dyr[iarray]))+1;
+			}
+			else {
+				if (dzr[iarray] == 0) {
+					verr("For receiver array %li: receiver distance dzrcv is not given", iarray);
+				}
+				nlxrcv[iarray] = NINT(fabs(zrange/dzr[iarray]))+1;
+				nlyrcv[iarray] = NINT(fabs(zrange/dzr[iarray]))+1;
+			}
+            nrcv+=nlyrcv[iarray]*nlxrcv[iarray];
+		}
+
+        /* Calculate Number of Receivers */
+        if (verbose) vmess("Total number of linear array receivers: %li",nrcv);
+        if (!nrcv) {
+            free(xrcv1);
+            free(xrcv2);
+            free(yrcv1);
+            free(yrcv2);
+            free(zrcv1);
+            free(zrcv2);
+            free(dxr);
+            free(dyr);
+            free(dzr);
+            free(nlxrcv);
+            free(nlyrcv);
+        }
+        rec->max_nrec+=nrcv;
+    } 
+	else {
+		nrcv=0;
+	}
+
+/* allocate the receiver arrays */
+
+    /* Total Number of Receivers */
+    if (verbose) vmess("Total number of receivers: %li",rec->max_nrec);
+
+    /* Allocate Arrays */
+    rec->x  = (long *)calloc(rec->max_nrec,sizeof(long));
+    rec->y  = (long *)calloc(rec->max_nrec,sizeof(long));
+    rec->z  = (long *)calloc(rec->max_nrec,sizeof(long));
+    rec->xr = (float *)calloc(rec->max_nrec,sizeof(float));
+    rec->yr = (float *)calloc(rec->max_nrec,sizeof(float));
+    rec->zr = (float *)calloc(rec->max_nrec,sizeof(float));
+
+/* read in the receiver postions */
+
+	nrec=0;
+    /* Receiver Array */
+	if (nxrcv != 0 && nyrcv != 0) {
+		/* receiver array is defined */
+		xrcva = (float *)malloc(nxrcv*sizeof(float));
+		yrcva = (float *)malloc(nxrcv*sizeof(float));
+		zrcva = (float *)malloc(nxrcv*sizeof(float));
+		getparfloat("xrcva", xrcva);
+		getparfloat("yrcva", yrcva);
+		getparfloat("zrcva", zrcva);
+		for (iy=0; iy<nyrcv; iy++) {
+            for (ix=0; ix<nxrcv; ix++) {
+                rec->xr[nrec+iy*nxrcv+ix] = xrcva[ix]-sub_x0;
+                rec->yr[nrec+iy*nxrcv+ix] = yrcva[iy]-sub_y0;
+                rec->zr[nrec+iy*nxrcv+ix] = zrcva[ix]-sub_z0;
+                rec->x[nrec+iy*nxrcv+ix] = NINT((xrcva[ix]-sub_x0)/dx);
+                rec->y[nrec+iy*nxrcv+ix] = NINT((yrcva[iy]-sub_y0)/dy);
+                rec->z[nrec+iy*nxrcv+ix] = NINT((zrcva[ix]-sub_z0)/dz);
+                if (verbose>4) fprintf(stderr,"Receiver Array: xrcv[%li]=%f yrcv[%li]=%f zrcv=%f\n", ix, rec->xr[nrec+ix]+sub_x0, iy, rec->yr[nrec+ix]+sub_y0, rec->zr[nrec+ix]+sub_z0);
+            }
+        }
+		free(xrcva);
+		free(yrcva);
+		free(zrcva);
+		nrec += nyrcv*nxrcv;
+	}
+
+    /* Linear Receiver Arrays */
+    if (nrcv!=0) {
+		xrcv1 = (float *)malloc(Nx1*sizeof(float));
+		xrcv2 = (float *)malloc(Nx1*sizeof(float));
+		yrcv1 = (float *)malloc(Nx1*sizeof(float));
+		yrcv2 = (float *)malloc(Nx1*sizeof(float));
+		zrcv1 = (float *)malloc(Nx1*sizeof(float));
+		zrcv2 = (float *)malloc(Nx1*sizeof(float));
+		
+		if(!getparfloat("xrcv1", xrcv1)) xrcv1[0]=sub_x0;
+		if(!getparfloat("xrcv2", xrcv2)) xrcv2[0]=(nx-1)*dx+sub_x0;
+		if(!getparfloat("yrcv1", yrcv1)) yrcv1[0]=sub_y0;
+		if(!getparfloat("yrcv2", yrcv2)) yrcv2[0]=(ny-1)*dy+sub_y0;
+		if(!getparfloat("zrcv1", zrcv1)) zrcv1[0]=sub_z0;
+		if(!getparfloat("zrcv2", zrcv2)) zrcv2[0]=zrcv1[0];		
+		
+		Ndx = countparval("dxrcv");
+		Ndy = countparval("dyrcv");
+		Ndz = countparval("dzrcv");
+
+		dxr = (float *)malloc(Nx1*sizeof(float));
+		dyr = (float *)malloc(Nx1*sizeof(float));
+		dzr = (float *)malloc(Nx1*sizeof(float));
+		if(!getparfloat("dxrcv", dxr)) dxr[0]=dx;
+		if(!getparfloat("dyrcv", dyr)) dyr[0]=dy;
+		if(!getparfloat("dzrcv", dzr)) dzr[0]=0.0;
+		if ( (Ndx<=1) && (Ndy<=1) && (Ndz==0) ){ /* default values are set */
+			for (i=1; i<Nx1; i++) {
+				dxr[i] = dxr[0];
+				dyr[i] = dyr[0];
+				dzr[i] = dzr[0];
+			}
+			Ndx=1;
+            Ndy=1;
+		}
+        else if ( (Ndx<=1) && (Ndy==0) && (Ndz==0) ){ /* default values are set */
+			for (i=1; i<Nx1; i++) {
+				dxr[i] = dxr[0];
+				dyr[i] = dyr[0];
+				dzr[i] = dzr[0];
+			}
+			Ndx=1;
+		}
+        else if ( (Ndy<=1) && (Ndx==0) && (Ndz==0) ){ /* default values are set */
+			for (i=1; i<Nx1; i++) {
+				dxr[i] = dxr[0];
+				dyr[i] = dyr[0];
+				dzr[i] = dzr[0];
+			}
+			Ndy=1;
+		}
+		else if ( (Ndz==1) && (Ndy==0) && (Ndx==0) ){ /* default values are set */
+			for (i=1; i<Nx1; i++) {
+				dxr[i] = dxr[0];
+				dyr[i] = dyr[0];
+				dzr[i] = dzr[0];
+			}
+			Ndz=1;
+		}
+		else { /* make sure that each array has dzrcv or dxrcv defined for each line or receivers */
+			if (Ndx>1) assert(Ndx==Nx1);
+			if (Ndy>1) assert(Ndy==Ny1);
+			if (Ndz>1) assert(Ndz==Nx1);
+		}
+		
+		/* check if receiver arrays fit into model */
+		for (iarray=0; iarray<Nx1; iarray++) {
+			xrcv1[iarray] = MAX(sub_x0,      xrcv1[iarray]);
+			xrcv1[iarray] = MIN(sub_x0+nx*dx,xrcv1[iarray]);
+			xrcv2[iarray] = MAX(sub_x0,      xrcv2[iarray]);
+			xrcv2[iarray] = MIN(sub_x0+nx*dx,xrcv2[iarray]);
+
+			yrcv1[iarray] = MAX(sub_y0,      yrcv1[iarray]);
+			yrcv1[iarray] = MIN(sub_y0+ny*dy,yrcv1[iarray]);
+			yrcv2[iarray] = MAX(sub_y0,      yrcv2[iarray]);
+			yrcv2[iarray] = MIN(sub_y0+ny*dy,yrcv2[iarray]);
+			
+			zrcv1[iarray] = MAX(sub_z0,      zrcv1[iarray]);
+			zrcv1[iarray] = MIN(sub_z0+nz*dz,zrcv1[iarray]);
+			zrcv2[iarray] = MAX(sub_z0,      zrcv2[iarray]);
+			zrcv2[iarray] = MIN(sub_z0+nz*dz,zrcv2[iarray]);
+		}
+
+		/* calculate receiver array and store into rec->x,y,z */
+
+		for (iarray=0; iarray<Nx1; iarray++) {
+			xrange = (xrcv2[iarray]-xrcv1[iarray]); 
+			yrange = (yrcv2[iarray]-yrcv1[iarray]); 
+			zrange = (zrcv2[iarray]-zrcv1[iarray]); 
+			if (dxr[iarray] != 0.0) {
+				nrcv = nlyrcv[iarray]*nlxrcv[iarray];
+				dxrcv = dxr[iarray];
+				dyrcv = yrange/(nrcv-1);
+				dzrcv = zrange/(nrcv-1);
+				if (dyrcv != dyr[iarray]) {
+					vwarn("For receiver array %li: calculated dyrcv=%f given=%f", iarray, dyrcv, dyr[iarray]);
+					vwarn("The calculated receiver distance %f is used", dyrcv);
+				}
+				if (dzrcv != dzr[iarray]) {
+					vwarn("For receiver array %li: calculated dzrcv=%f given=%f", iarray, dzrcv, dzr[iarray]);
+					vwarn("The calculated receiver distance %f is used", dzrcv);
+				}
+			}
+            else if (dyr[iarray] != 0.0) {
+				nrcv = nlyrcv[iarray]*nlxrcv[iarray];
+				dxrcv = xrange/(nrcv-1);
+				dyrcv = dyr[iarray];
+				dzrcv = zrange/(nrcv-1);
+				if (dxrcv != dxr[iarray]) {
+					vwarn("For receiver array %li: calculated dxrcv=%f given=%f", iarray, dxrcv, dxr[iarray]);
+					vwarn("The calculated receiver distance %f is used", dxrcv);
+				}
+				if (dzrcv != dzr[iarray]) {
+					vwarn("For receiver array %li: calculated dzrcv=%f given=%f", iarray, dzrcv, dzr[iarray]);
+					vwarn("The calculated receiver distance %f is used", dzrcv);
+				}
+			}
+			else {
+				if (dzr[iarray] == 0) {
+					verr("For receiver array %li: receiver distance dzrcv is not given", iarray);
+				}
+				nrcv = nlyrcv[iarray]*nlxrcv[iarray];
+				dxrcv = xrange/(nrcv-1);
+				dyrcv = yrange/(nrcv-1);
+				dzrcv = dzr[iarray];
+				if (dxrcv != dxr[iarray]) {
+					vwarn("For receiver array %li: calculated dxrcv=%f given=%f", iarray, dxrcv, dxr[iarray]);
+					vwarn("The calculated receiver distance %f is used", dxrcv);
+				}
+				if (dyrcv != dyr[iarray]) {
+					vwarn("For receiver array %li: calculated dyrcv=%f given=%f", iarray, dyrcv, dyr[iarray]);
+					vwarn("The calculated receiver distance %f is used", dyrcv);
+				}
+			}
+
+			// calculate coordinates
+			for (iy=0; iy<nlyrcv[iarray]; iy++) {
+                for (ix=0; ix<nlxrcv[iarray]; ix++) {
+                    rec->xr[nrec]=xrcv1[iarray]-sub_x0+ix*dxrcv;
+                    rec->yr[nrec]=yrcv1[iarray]-sub_y0+iy*dyrcv;
+                    rec->zr[nrec]=zrcv1[iarray]-sub_z0+ix*dzrcv;
+
+                    rec->x[nrec]=NINT((rec->xr[nrec])/dx);
+                    rec->y[nrec]=NINT((rec->yr[nrec])/dy);
+                    rec->z[nrec]=NINT((rec->zr[nrec])/dz);
+                    nrec++;
+                }
+            }
+		}
+		free(xrcv1);
+		free(xrcv2);
+		free(yrcv1);
+		free(yrcv2);
+		free(zrcv1);
+		free(zrcv2);
+		free(dxr);
+		free(dyr);
+		free(dzr);
+        free(nlxrcv);
+        free(nlyrcv);
+	}
+
+    rec->n=rec->max_nrec;
+	return 0;
+}
diff --git a/fdelmodc3D/viscoacoustic4.c b/fdelmodc3D/viscoacoustic4.c
new file mode 100644
index 0000000000000000000000000000000000000000..63207c7da09c060fccf4b3625a86e6437d744d91
--- /dev/null
+++ b/fdelmodc3D/viscoacoustic4.c
@@ -0,0 +1,175 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<math.h>
+#include<assert.h>
+#include"fdelmodc.h"
+
+int applySource(modPar mod, srcPar src, wavPar wav, bndPar bnd, int itime, int ixsrc, int izsrc, float *vx, float *vz, float *tzz,
+float *txx, float *txz, float *rox, float *roz, float *l2m, float **src_nwav, int verbose);
+
+int storeSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, int verbose);
+
+int reStoreSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, int verbose);
+
+int boundariesP(modPar mod, bndPar bnd, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, int itime, int verbose);
+
+int boundariesV(modPar mod, bndPar bnd, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, int itime, int verbose);
+
+int viscoacoustic4(modPar mod, srcPar src, wavPar wav, bndPar bnd, int itime, int ixsrc, int izsrc, float **src_nwav, float *vx, float *vz, float *p, float *rox, float *roz, float *l2m, float *tss, float *tep, float *q, int verbose)
+{
+/*********************************************************************
+       COMPUTATIONAL OVERVIEW OF THE 4th ORDER STAGGERED GRID: 
+
+  The captial symbols T (=Txx,Tzz) Txz,Vx,Vz represent the actual grid
+  The indices ix,iz are related to the T grid, so the capital T 
+  symbols represent the actual modelled grid.
+
+  one cel (iz,ix)
+       |
+       V                              extra column of vx,txz
+                                                      |
+    -------                                           V
+   | txz vz| txz vz  txz vz  txz vz  txz vz  txz vz txz
+   |       |      
+   | vx  t | vx  t   vx  t   vx  t   vx  t   vx  t  vx
+    -------
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz
+
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   | 
+     txz vz  txz Vz--Txz-Vz--Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   |
+     txz vz  txz Vz  Txz-Vz  Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   |
+     txz vz  txz Vz  Txz-Vz  Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz
+
+     vx  t   vx  t   vx  t   vx  t   vx  t   vx  t  vx
+
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz  <--| 
+                                                             |
+                                         extra row of txz/vz |
+
+   AUTHOR:
+           Jan Thorbecke (janth@xs4all.nl)
+           The Netherlands 
+
+***********************************************************************/
+
+	float c1, c2;
+	int   ix, iz;
+	int   n1;
+	float ddt, Tpp, *Tlm, *Tlp, *Tt1, *Tt2, *dxvx, *dzvz;
+
+
+	c1 = 9.0/8.0; 
+	c2 = -1.0/24.0;
+	n1  = mod.naz;
+	ddt = 1.0/mod.dt;
+
+	dxvx = (float *)malloc(n1*sizeof(float));
+	dzvz = (float *)malloc(n1*sizeof(float));
+	Tlm = (float *)malloc(n1*sizeof(float));
+	Tlp = (float *)malloc(n1*sizeof(float));
+	Tt1 = (float *)malloc(n1*sizeof(float));
+	Tt2 = (float *)malloc(n1*sizeof(float));
+
+	/* calculate vx for all grid points except on the virtual boundary*/
+#pragma omp for private (ix, iz) nowait schedule(guided,1)
+	for (ix=mod.ioXx; ix<mod.ieXx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioXz; iz<mod.ieXz; iz++) {
+			vx[ix*n1+iz] -= rox[ix*n1+iz]*(
+						c1*(p[ix*n1+iz]     - p[(ix-1)*n1+iz]) +
+						c2*(p[(ix+1)*n1+iz] - p[(ix-2)*n1+iz]));
+		}
+	}
+
+	/* calculate vz for all grid points except on the virtual boundary */
+#pragma omp for private (ix, iz)  schedule(guided,1)
+	for (ix=mod.ioZx; ix<mod.ieZx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioZz; iz<mod.ieZz; iz++) {
+			vz[ix*n1+iz] -= roz[ix*n1+iz]*(
+						c1*(p[ix*n1+iz]   - p[ix*n1+iz-1]) +
+						c2*(p[ix*n1+iz+1] - p[ix*n1+iz-2]));
+		}
+	}
+
+	/* Add force source */
+	if (src.type > 5) {
+		 applySource(mod, src, wav, bnd, itime, ixsrc, izsrc, vx, vz, p, NULL, NULL, rox, roz, l2m, src_nwav, verbose);
+	}
+
+	/* boundary condition clears velocities on boundaries */
+	boundariesP(mod, bnd, vx, vz, p, NULL, NULL, rox, roz, l2m, NULL, NULL, itime, verbose);
+
+	/* calculate p/tzz for all grid points except on the virtual boundary */
+#pragma omp	for private (iz,ix, Tpp) nowait schedule(guided,1)
+	for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			dxvx[iz] = c1*(vx[(ix+1)*n1+iz] - vx[ix*n1+iz]) +
+					   c2*(vx[(ix+2)*n1+iz] - vx[(ix-1)*n1+iz]);
+		}
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			dzvz[iz] = c1*(vz[ix*n1+iz+1]   - vz[ix*n1+iz]) +
+					   c2*(vz[ix*n1+iz+2]   - vz[ix*n1+iz-1]);
+		}
+
+		/* help variables to let the compiler vectorize the loops */
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			Tpp     = tep[ix*n1+iz]*tss[ix*n1+iz];
+			Tlm[iz] = (1.0-Tpp)*tss[ix*n1+iz]*l2m[ix*n1+iz]*0.5;
+			Tlp[iz] = l2m[ix*n1+iz]*Tpp;
+		}   
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			Tt1[iz] = 1.0/(ddt+0.5*tss[ix*n1+iz]);
+			Tt2[iz] = ddt-0.5*tss[ix*n1+iz];
+		}   
+
+		/* the update with the relaxation correction */
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			p[ix*n1+iz] -= Tlp[iz]*(dzvz[iz]+dxvx[iz]) + q[ix*n1+iz];
+		}
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			q[ix*n1+iz] = (Tt2[iz]*q[ix*n1+iz] + Tlm[iz]*(dxvx[iz]+dzvz[iz]))*Tt1[iz];
+			p[ix*n1+iz] -= q[ix*n1+iz];
+		}
+	}
+
+	/* Add stress source */
+	if (src.type < 6) {
+		 applySource(mod, src, wav, bnd, itime, ixsrc, izsrc, vx, vz, p, NULL, NULL, rox, roz, l2m, src_nwav, verbose);
+	}
+
+	/* check if there are sources placed on the free surface */
+    storeSourceOnSurface(mod, src, bnd, ixsrc, izsrc, vx, vz, p, NULL, NULL, verbose);
+
+	/* Free surface: calculate free surface conditions for stresses */
+	boundariesV(mod, bnd, vx, vz, p, NULL, NULL, rox, roz, l2m, NULL, NULL, itime, verbose);
+
+	/* restore source positions on the edge */
+	reStoreSourceOnSurface(mod, src, bnd, ixsrc, izsrc, vx, vz, p, NULL, NULL, verbose);
+
+	free(dxvx);
+	free(dzvz);
+	free(Tlm);
+	free(Tlp);
+	free(Tt1);
+	free(Tt2);
+
+	return 0;
+}
diff --git a/fdelmodc3D/viscoelastic4.c b/fdelmodc3D/viscoelastic4.c
new file mode 100644
index 0000000000000000000000000000000000000000..865aa123eae8e88e73131934527af6b88da9cce7
--- /dev/null
+++ b/fdelmodc3D/viscoelastic4.c
@@ -0,0 +1,244 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<math.h>
+#include<assert.h>
+#include"fdelmodc.h"
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+
+int applySource(modPar mod, srcPar src, wavPar wav, bndPar bnd, int itime, int ixsrc, int izsrc, float *vx, float *vz, float *tzz,
+float *txx, float *txz, float *rox, float *roz, float *l2m, float **src_nwav, int verbose);
+
+int storeSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, int verbose);
+
+int reStoreSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int izsrc, float *vx, float *vz, float *tzz, float *txx, float *txz, int verbose);
+
+int boundariesP(modPar mod, bndPar bnd, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, int itime, int verbose);
+
+int boundariesV(modPar mod, bndPar bnd, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, int itime, int verbose);
+
+int viscoelastic4(modPar mod, srcPar src, wavPar wav, bndPar bnd, int itime, int ixsrc, int izsrc, float **src_nwav, float *vx, float *vz, float *tzz, float *txx, float *txz, float *rox, float *roz, float *l2m, float *lam, float *mul, float *tss, float *tep, float *tes, float *r, float *q, float *p, int verbose)
+{
+/*********************************************************************
+       COMPUTATIONAL OVERVIEW OF THE 4th ORDER STAGGERED GRID: 
+
+  The captial symbols T (=Txx,Tzz) Txz,Vx,Vz represent the actual grid
+  The indices ix,iz are related to the T grid, so the capital T 
+  symbols represent the actual modelled grid.
+
+  one cel (iz,ix)
+       |
+       V                              extra column of vx,txz
+                                                      |
+    -------                                           V
+   | txz vz| txz vz  txz vz  txz vz  txz vz  txz vz txz
+   |       |      
+   | vx  t | vx  t   vx  t   vx  t   vx  t   vx  t  vx
+    -------
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz
+
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   | 
+     txz vz  txz Vz--Txz-Vz--Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   |
+     txz vz  txz Vz  Txz-Vz  Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+                 |   |   |   |   |   |   |
+     txz vz  txz Vz  Txz-Vz  Txz-Vz  Txz-Vz  txz vz  txz
+                 |   |   |   |   |   |   |
+     vx  t   vx  T---Vx--T---Vx--T---Vx--T   vx  t   vx
+
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz
+
+     vx  t   vx  t   vx  t   vx  t   vx  t   vx  t  vx
+
+     txz vz  txz vz  txz vz  txz vz  txz vz  txz vz  txz  <--| 
+                                                             |
+                                         extra row of txz/vz |
+
+  Implementation as described in:
+
+Viscoelastic finite-difference modeling 
+Johan 0. A. Robertsson, Joakim 0. Blanch, and William W. Symes
+GEOPHYSICS, VOL. 59, NO. 9 (SEPTEMBER 1994); P. 1444-1456
+
+   AUTHOR:
+           Jan Thorbecke (janth@xs4all.nl)
+           The Netherlands 
+
+***********************************************************************/
+
+	float c1, c2;
+	float ddt;
+	float *dxvx, *dzvz, *dxvz, *dzvx;
+	float *Tpp, *Tss, *Tmu, *Tlm, *Tlp, *Tus, *Tt1, *Tt2;
+	int   ix, iz;
+	int   n1;
+//	int   ioXx, ioXz, ioZz, ioZx, ioPx, ioPz, ioTx, ioTz;
+
+
+	c1 = 9.0/8.0; 
+	c2 = -1.0/24.0;
+	n1  = mod.naz;
+	ddt = 1.0/mod.dt;
+
+	dxvx = (float *)malloc(n1*sizeof(float));
+	dzvz = (float *)malloc(n1*sizeof(float));
+	dxvz = (float *)malloc(n1*sizeof(float));
+	dzvx = (float *)malloc(n1*sizeof(float));
+	Tpp = (float *)malloc(n1*sizeof(float));
+	Tss = (float *)malloc(n1*sizeof(float));
+	Tmu = (float *)malloc(n1*sizeof(float));
+	Tlm = (float *)malloc(n1*sizeof(float));
+	Tlp = (float *)malloc(n1*sizeof(float));
+	Tus = (float *)malloc(n1*sizeof(float));
+	Tt1 = (float *)malloc(n1*sizeof(float));
+	Tt2 = (float *)malloc(n1*sizeof(float));
+
+	/* Vx: rox */
+//	ioXx=mod.iorder/2;
+//	ioXz=ioXx-1;
+	/* Vz: roz */
+//	ioZz=mod.iorder/2;
+//	ioZx=ioZz-1;
+	/* P, Txx, Tzz: lam, l2m */
+//	ioPx=mod.iorder/2-1;
+//	ioPz=ioPx;
+	/* Txz: muu */
+//	ioTx=mod.iorder/2;
+//	ioTz=ioTx;
+
+	/* calculate vx for all grid points except on the virtual boundary*/
+#pragma omp for private (ix, iz) nowait schedule(guided,1)
+	for (ix=mod.ioXx; ix<mod.ieXx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioXz; iz<mod.ieXz; iz++) {
+			vx[ix*n1+iz] -= rox[ix*n1+iz]*(
+						c1*(txx[ix*n1+iz]     - txx[(ix-1)*n1+iz] +
+							txz[ix*n1+iz+1]   - txz[ix*n1+iz])    +
+						c2*(txx[(ix+1)*n1+iz] - txx[(ix-2)*n1+iz] +
+							txz[ix*n1+iz+2]   - txz[ix*n1+iz-1])  );
+		}
+	}
+
+	/* calculate vz for all grid points except on the virtual boundary */
+#pragma omp for private (ix, iz)  schedule(guided,1)
+	for (ix=mod.ioZx; ix<mod.ieZx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioZz; iz<mod.ieZz; iz++) {
+			vz[ix*n1+iz] -= roz[ix*n1+iz]*(
+						c1*(tzz[ix*n1+iz]     - tzz[ix*n1+iz-1] +
+							txz[(ix+1)*n1+iz] - txz[ix*n1+iz])  +
+						c2*(tzz[ix*n1+iz+1]   - tzz[ix*n1+iz-2] +
+							txz[(ix+2)*n1+iz] - txz[(ix-1)*n1+iz])  );
+		}
+	}
+
+	/* Add force source */
+	if (src.type > 5) {
+		 applySource(mod, src, wav, bnd, itime, ixsrc, izsrc, vx, vz, tzz, txx, txz, rox, roz, l2m, src_nwav, verbose);
+	}
+
+	/* boundary condition clears velocities on boundaries */
+	boundariesP(mod, bnd, vx, vz, tzz, txx, txz, rox, roz, l2m, lam, mul, itime, verbose);
+
+	/* calculate Txx/Tzz for all grid points except on the virtual boundary */
+#pragma omp	for private (ix, iz) nowait schedule(guided,1)
+	for (ix=mod.ioPx; ix<mod.iePx; ix++) {
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			dxvx[iz] = c1*(vx[(ix+1)*n1+iz] - vx[ix*n1+iz]) +
+					   c2*(vx[(ix+2)*n1+iz] - vx[(ix-1)*n1+iz]);
+		}
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			dzvz[iz] = c1*(vz[ix*n1+iz+1]   - vz[ix*n1+iz]) +
+					   c2*(vz[ix*n1+iz+2]   - vz[ix*n1+iz-1]);
+		}
+		/* help variables to let the compiler vectorize the loops */
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			Tpp[iz] = tep[ix*n1+iz]*tss[ix*n1+iz];
+			Tss[iz] = tes[ix*n1+iz]*tss[ix*n1+iz];
+			Tmu[iz] = (1.0-Tss[iz])*tss[ix*n1+iz]*mul[ix*n1+iz];
+			Tlm[iz] = (1.0-Tpp[iz])*tss[ix*n1+iz]*l2m[ix*n1+iz]*0.5;
+			Tlp[iz] = l2m[ix*n1+iz]*Tpp[iz];
+			Tus[iz] = mul[ix*n1+iz]*Tss[iz];
+		}
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			Tt1[iz] = 1.0/(ddt+0.5*tss[ix*n1+iz]);
+			Tt2[iz] = ddt-0.5*tss[ix*n1+iz];
+		}
+
+		/* the update with the relaxation correction */
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			txx[ix*n1+iz] -= Tlp[iz]*dxvx[iz] + (Tlp[iz]-2.0*Tus[iz])*dzvz[iz] + q[ix*n1+iz];
+			tzz[ix*n1+iz] -= Tlp[iz]*dzvz[iz] + (Tlp[iz]-2.0*Tus[iz])*dxvx[iz] + p[ix*n1+iz];
+		}
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			q[ix*n1+iz] = (Tt2[iz]*q[ix*n1+iz] - Tmu[iz]*dzvz[iz] + Tlm[iz]*(dxvx[iz]+dzvz[iz]))*Tt1[iz];
+			txx[ix*n1+iz] -= q[ix*n1+iz];
+		}
+#pragma ivdep
+		for (iz=mod.ioPz; iz<mod.iePz; iz++) {
+			p[ix*n1+iz] = (Tt2[iz]*p[ix*n1+iz] - Tmu[iz]*dxvx[iz] + Tlm[iz]*(dxvx[iz]+dzvz[iz]))*Tt1[iz];
+			tzz[ix*n1+iz] -= p[ix*n1+iz];
+		}
+
+		/* calculate Txz for all grid points except on the virtual boundary */
+		if (ix >= mod.ioTx) {
+#pragma ivdep
+            for (iz=mod.ioTz; iz<mod.ieTz; iz++) {
+				dzvx[iz] = c1*(vx[ix*n1+iz]     - vx[ix*n1+iz-1]) +
+						   c2*(vx[ix*n1+iz+1]   - vx[ix*n1+iz-2]);
+			}
+#pragma ivdep
+            for (iz=mod.ioTz; iz<mod.ieTz; iz++) {
+				dxvz[iz] = c1*(vz[ix*n1+iz]     - vz[(ix-1)*n1+iz]) +
+						   c2*(vz[(ix+1)*n1+iz] - vz[(ix-2)*n1+iz]);
+			}
+#pragma ivdep
+            for (iz=mod.ioTz; iz<mod.ieTz; iz++) {
+				txz[ix*n1+iz] -= Tus[iz]*(dzvx[iz]+dxvz[iz]) + r[ix*n1+iz];
+				r[ix*n1+iz] = (Tt2[iz]*r[ix*n1+iz] + 0.5*Tmu[iz]*(dzvx[iz]+dxvz[iz]))*Tt1[iz];
+				txz[ix*n1+iz] -= r[ix*n1+iz];
+			}
+		}
+	}
+
+	/* Add stress source */
+	if (src.type < 6) {
+		 applySource(mod, src, wav, bnd, itime, ixsrc, izsrc, vx, vz, tzz, txx, txz, rox, roz, l2m, src_nwav, verbose);
+	}
+
+	/* check if there are sources placed on the free surface */
+    storeSourceOnSurface(mod, src, bnd, ixsrc, izsrc, vx, vz, tzz, txx, txz, verbose);
+
+    /* Free surface: calculate free surface conditions for stresses */
+    boundariesV(mod, bnd, vx, vz, tzz, txx, txz, rox, roz, l2m, lam, mul, itime, verbose);
+
+	/* restore source positions on the edge */
+	reStoreSourceOnSurface(mod, src, bnd, ixsrc, izsrc, vx, vz, tzz, txx, txz, verbose);
+
+	free(dxvx);
+	free(dzvz);
+	free(dzvx);
+	free(dxvz);
+	free(Tpp);
+	free(Tss);
+	free(Tmu);
+	free(Tlm);
+	free(Tlp);
+	free(Tus);
+	free(Tt1);
+	free(Tt2);
+
+	return 0;
+}
+