diff --git a/.gitignore b/.gitignore
index 635fb58cbcf2d04c4fe6fc500357938a32da2a3c..d29000507baf716523e7711070baaf27c0c9194b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 bin/*
 *.su
 *.bin
+*.asci
 */.DS*
 **/.DS*
 .DS*
@@ -60,3 +61,6 @@ marchenko3D/fmute3D
 marchenko3D/marchenko3D
 fdelmodc3D/fdelmodc3D
 fdacrtmc/fdacrtmc
+marchenko3D/TWtransform
+utils/mutesnap
+utils/snap2shot
diff --git a/Make_include_template b/Make_include_template
index 002c45dc914840e33369eeefdd8cf28af97a87e4..8353d6e971869c7337e17e7fb5bcfe4d60f88049 100644
--- a/Make_include_template
+++ b/Make_include_template
@@ -97,6 +97,9 @@ BLAS = -L${MKLROOT}/lib/ -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread -lm
 #BLAS = -Wl,-rpath ${MKLLIB} -Wl,--start-group -L${MKLLIB} -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread -lm -ldl
 #for intel compilers
 #BLAS = -mkl
+#
+#If you do not want to use MKL
+#BLAS = -llapack -lblas
 
 #############################################################################
 # AMD ACML 4.4.0
diff --git a/README b/README
index 4ec806b5c552611cf6831df4f69b9b6eb459f0a2..4587443e9c43046de6df1731342f2d43c36d0874 100644
--- a/README
+++ b/README
@@ -110,7 +110,7 @@ Usually the MKL libraries are installed in $MKL_ROOT. If that variable is not se
 find /opt/intel -name libmkl_gf_lp64.so
 
 and adjust MKLROOT in Make_include accordingly. 
-You can also completely disable the use of MKL by commenting out these parts in Make_include. 
+You can also completely disable the use of MKL by commenting out the MKL parts in Make_include. 
 
 SEISMIC UNIX
 -----------
@@ -124,7 +124,7 @@ XDRFLAG =
 
 so the XDRFLAG is empty. If you have made that change you have to remake SU with the commands:
 make remake
-make xtremane
+make xtremake
 
 ZFP
 ---
diff --git a/extrap3d/main/Makefile b/extrap3d/main/Makefile
index 4ed828591f063fa0d414c9b783593507830bc1fc..86a27c6501b8745535a3e07e8d32d111da721868 100644
--- a/extrap3d/main/Makefile
+++ b/extrap3d/main/Makefile
@@ -2,7 +2,7 @@
 
 include ../../Make_include
 
-LIBS = -L$L -lextrap3d  -lgenfft -lblas -llapack -lm 
+LIBS += -L$L -lextrap3d -lgenfft -lm 
 
 LDFLAGS += $(LIBS)
 
@@ -23,7 +23,7 @@ $(CPROGS):	$(CTARGET) $L/libextrap3d.a
 	mv $@.tmp $B/$@
 
 migr3d_mpi: migr3d.c  $L/libextrap3d.a
-	mpicc $(CFLAGS) -DMPI $(OPTC) $(CPPFLAGS) migr3d.c $(LDFLAGS) -o $@.tmp
+	mpicc -cc=icc $(CFLAGS) -DMPI $(OPTC) $(CPPFLAGS) migr3d.c $(LDFLAGS) -o $@.tmp
 	test ! -f $@ || $(RM) $@
 	mv $@.tmp $B/$@
 
diff --git a/fdacrtmc/extractMigrationSnapshots.c b/fdacrtmc/extractMigrationSnapshots.c
index 796682146a9df9ab3e819cb0c4fc0e7dbff1430c..18cf7436fd75c5f95a705dd7829f58248d18dc1c 100644
--- a/fdacrtmc/extractMigrationSnapshots.c
+++ b/fdacrtmc/extractMigrationSnapshots.c
@@ -60,7 +60,6 @@ int storeVerticalParticleVelocitySnapshot(modPar *mod, wavPar *wav, migPar *mig)
 	else farr=(float*)malloc(mig->sizem*sizeof(float));
 
 	// Store Vertical Particle Velocity Field - Interpolate to Tzz(P)-Grid
-	mig->wav[mig->it].vz=(float*)malloc(mig->sizem*sizeof(float));
 	for(ix=0,ix1=mod->ioZx+mig->x1;ix<mig->nx;ix++,ix1+=mig->skipdx){
 		for(iz=0,iz1=mod->ioZz+mig->z1;iz<mig->nz;iz++,iz1+=mig->skipdz){
 			farr[ix*mig->nz+iz]=(wav->vz[ix1*mod->naz+iz1]+wav->vz[ix1*mod->naz+iz1+1])/2.0;
@@ -77,9 +76,6 @@ int storeVerticalParticleVelocitySnapshot(modPar *mod, wavPar *wav, migPar *mig)
 int extractMigrationSnapshots(modPar *mod, wavPar *wav, migPar *mig, decompPar *decomp){
 	size_t ix, ix1, iz, iz1;
 
-//if(mig->it<100){mig->it++;return(0);}
-
-
 	switch(mig->mode){
 		case 1: /* Conventional Migration - Store Pressure Field (Tzz) */
 			// P (Tzz)
@@ -90,8 +86,10 @@ int extractMigrationSnapshots(modPar *mod, wavPar *wav, migPar *mig, decompPar *
 			storePressureSnapshot(mod,wav,mig);
 			switch(mig->orient){ //Migration Orientation
 				case 0: //Image Wavefields not travelling in the same direction
+					// Vertical Particle Velocity - Interpolate to Tzz(P)-Grid
+					storeVerticalParticleVelocitySnapshot(mod,wav,mig);
 					// Horizontal Particle Velocity - Interpolate to Tzz(P)-Grid
-					storeVerticalParticleVelocitySnapshot(mod,wav,mig); //TODO: What does this case actually do?
+					storeHorizontalParticleVelocitySnapshot(mod,wav,mig); //TODO: What does this case actually do?
 					break;
 				case 1: //Up-Down Imaging
 					// Vertical Particle Velocity - Interpolate to Tzz(P)-Grid
diff --git a/fdacrtmc/rtmImagingCondition.c b/fdacrtmc/rtmImagingCondition.c
index 01bca46ec6b692263f2afde5468dfed571ea5668..2e07f4c529dc1e4c990729a73b7cf6498c88b42e 100644
--- a/fdacrtmc/rtmImagingCondition.c
+++ b/fdacrtmc/rtmImagingCondition.c
@@ -66,6 +66,8 @@ int rtmImagingCondition(modPar *mod, wavPar *wav, migPar *mig, decompPar *decomp
 							}
 						}
 					}
+					free(mig->wav[mig->it].vx);
+					free(mig->wav[mig->it].vz);
 					break;
 				case 1: // Up-Down Imaging
 					for(ix=0,ix1=mod->ioPx+mig->x1;ix<mig->nx;ix++,ix1+=mig->skipdx){
@@ -78,6 +80,7 @@ int rtmImagingCondition(modPar *mod, wavPar *wav, migPar *mig, decompPar *decomp
 							if(pzf*pzb>0) mig->image[ix*mig->nz+iz]+=mig->dt*(wav->tzz[ix1*mod->naz+iz1]*mig->wav[mig->it].tzz[ix*mig->nz+iz]);
 						}
 					}
+					free(mig->wav[mig->it].vz);
 					break;
 				case 2: // Left-Right Imaging
 					for(ix=0,ix1=mod->ioPx+mig->x1;ix<mig->nx;ix++,ix1+=mig->skipdx){
@@ -90,6 +93,7 @@ int rtmImagingCondition(modPar *mod, wavPar *wav, migPar *mig, decompPar *decomp
 							if(pxf*pxb<0) mig->image[ix*mig->nz+iz]+=mig->dt*(wav->tzz[ix1*mod->naz+iz1]*mig->wav[mig->it].tzz[ix*mig->nz+iz]);
 						}
 					}
+					free(mig->wav[mig->it].vx);
 					break;
 				case 3: //Normal Imaging
 					//Not yet implemented
@@ -103,8 +107,6 @@ int rtmImagingCondition(modPar *mod, wavPar *wav, migPar *mig, decompPar *decomp
 				break;
 			}
 			free(mig->wav[mig->it].tzz);
-			free(mig->wav[mig->it].vx);
-			free(mig->wav[mig->it].vz);
 			break;
 		case 3: /* Wavefield decomposition zero-lag pressure cross-correlation */
 			mig->it--;break;
diff --git a/fdelmodc/FiguresPaper/FigureCCsources.scr b/fdelmodc/FiguresPaper/FigureCCsources.scr
index 8bfccfafb18bf2d9ba8e09502f3c37b3d3b8e7fd..c547e8afc7656eba4bd2de292678e82d0507d368 100755
--- a/fdelmodc/FiguresPaper/FigureCCsources.scr
+++ b/fdelmodc/FiguresPaper/FigureCCsources.scr
@@ -36,6 +36,8 @@ echo $file_shot
 zsrc1=500
 zsrc2=4090
 
+set -x
+pwd
 ../fdelmodc \
 	file_cp=simple_cp.su ischeme=1 \
 	file_den=simple_ro.su \
@@ -64,6 +66,6 @@ zsrc2=4090
 	ntaper=45 \
     left=4 right=4 top=1 bottom=4 
 
-    mv src_nwaw.su source_volume_S${nsrc}_Dt${tsrc2}_F${fmax}.su
+cp src_nwav.su source_volume_S${nsrc}_Dt${tsrc2}_F${fmax}.su
 
 
diff --git a/fdelmodc/sourceOnSurface.c b/fdelmodc/sourceOnSurface.c
index a2062dc3f89f95a8f2ac6998b5fbc62768f4de07..1b858f94508eaaf252c8d5379e51e79704727b30 100644
--- a/fdelmodc/sourceOnSurface.c
+++ b/fdelmodc/sourceOnSurface.c
@@ -306,7 +306,7 @@ int reStoreSourceOnSurface(modPar mod, srcPar src, bndPar bnd, int ixsrc, int iz
 
 	/* restore source positions on the edge */
 	is0 = -1*floor((src.n-1)/2);
-#pragma omp	for private (isrc, ixs, izs, store) 
+//#pragma omp	for private (isrc, ixs, izs, store) 
 	for (isrc=0; isrc<src.n; isrc++) {
 		/* calculate the source position */
 		if (src.random || src.multiwav) {
diff --git a/marchenko/Makefile b/marchenko/Makefile
index 7fadd2cca106c94b3ae012eb6aaa78128c749097..848b6a8624741950e794ba9e25da1e43e5ab8213 100644
--- a/marchenko/Makefile
+++ b/marchenko/Makefile
@@ -4,9 +4,10 @@ include ../Make_include
 
 #LIBS    += -L$L -lgenfft -lm $(LIBSM)
 #OPTC += -g -O0 -Wall 
+#OPTC += -g -traceback #-check-pointers=rw -check-pointers-undimensioned
 
 
-ALL: fmute marchenko #marchenko_primaries
+ALL: fmute marchenko marchenko_primaries
 
 SRCJ	= fmute.c \
 		getFileInfo.c  \
@@ -35,20 +36,20 @@ SRCH	= marchenko.c \
 		docpkge.c \
 		getpars.c
 
-#SRCP	= marchenko_primaries.c \
-#		synthesis.c  \
-#		getFileInfo.c  \
-#		readData.c \
-#		readShotData.c \
-#		readTinvData.c \
-#		writeData.c \
-#		writeDataIter.c \
-#		wallclock_time.c \
-#		name_ext.c  \
-#		verbosepkg.c  \
-#		atopkge.c \
-#		docpkge.c \
-#		getpars.c
+SRCP	= marchenko_primaries.c \
+		synthesis.c  \
+		getFileInfo.c  \
+		readData.c \
+		readShotData.c \
+		readTinvData.c \
+		writeData.c \
+		writeDataIter.c \
+		wallclock_time.c \
+		name_ext.c  \
+		verbosepkg.c  \
+		atopkge.c \
+		docpkge.c \
+		getpars.c
 
 OBJJ	= $(SRCJ:%.c=%.o)
 
diff --git a/marchenko/demo/oneD/README_PRIMARIES b/marchenko/demo/oneD/README_PRIMARIES
new file mode 100644
index 0000000000000000000000000000000000000000..5eda7197ef806033b2f1286d28a433d9d53b2c64
--- /dev/null
+++ b/marchenko/demo/oneD/README_PRIMARIES
@@ -0,0 +1,182 @@
+Description of files:
+
+
+1) model.scr computes the model and the 'basis' shot of R => shot5_rp.su
+2) p5all.scr create from basis shot full Reflection response matrix => shotsdx5_rp.su (3.3 GB)
+3) primaries.scr computes the internal multiple attenuated (middle) shot record.
+4) itertions.scr computes the intermediate results of the multiple attenutation scheme and produces all output files that are used in the manuscript.
+5) epsPrimaries.scr selected output form 4) are converted to .eps pictures that are used in the Figures to explain the method.
+   reproduce the postscript files of the manuscript using SU postscript plotting programs.
+
+extra scripts
++) primariesPlane.scr: computes the internal moval scheme for plane-waves (see Meles 2020)
++) clean: remove all produced files and start with a clean directory
+
+To reproduce the Figures in the Manuscript:
+
+--------------------------
+* Figure 2: Model + Initial wavefield
+
+==> run model.scr to generate the data .su files: this will take 3-4 minutes. The files generate are:
+	- hom_cp.su, hom_ro.su
+	- model10_cp.su, model10_ro.su
+	- shot5_fd_rp.su
+	- shot5_hom_fd_rp.su
+	- shot5_rp.su
+	- wave.su
+	- wavefw.su
+
+==> run './epsPrimaries.scr Figure2' to generate the postscript files of Figure 2 
+
+model_cp_line.eps 	=> Figure 2a 
+model_ro_line.eps 	=> Figure 2b
+shotx0_rp.eps 		=> Figure 2c
+
+--------------------------
+* Figure 3: First Iteration
+
+The full R matrix is build up from the the shot record computed with model.scr
+
+==> run p5all.scr to generate the full R matrix for a fixed spread geometry. This will take less than one minute. The file generated is
+	- shotsdx5_rp.su this file has a size of 3.3 GB
+
+This R is the only input of the Marchenko Primaries algorithm.
+
+==> run './iterations.scr ./iterations.scr Figure3467' to compute the intermediate results of the first iterations of the Marchenko Primaries algorithm.
+This will take 10 seconds. The generated files are:
+
+	- M0_276000.su
+	- iter_2760##.su (not used)
+	- Mi_2760##.su
+	- f1min_2760##.su
+	- pred_rr_276.su (not used)
+
+	where ## ranges from 01 to 33
+
+To generate the postscript files for Figure 3:
+
+==> run './epsPrimaries.scr Figure3' 
+
+This will produce the following files:
+
+shotx0_rp.eps 		=> Figure 2c == Figure 3a
+M0_276000_flip.eps	=> Figure 3b
+fconvN0fulltime.eps => Figure 3c
+fconvN0flip.eps 	=> Figure 3d
+Mi_276001.eps		=> Figure 3e
+
+
+To generate the postscript files for Figure 4:
+==> run './epsPrimaries.scr Figure4' 
+
+To generate the postscript files for Figure 6:
+==> run './epsPrimaries.scr Figure6' 
+
+To generate the postscript files for Figure 7:
+==> run './epsPrimaries.scr Figure7' 
+
+
+
+shotx0_rp.eps 		=> Figure 5 R == Figure 3c
+f1min_001.eps	 	=> Figure 5 f^-_1,0
+iter_002.eps	 	=> Figure 5 -N_1
+f1plus_002.eps	 	=> Figure 5 f^+_1,0
+
+-- Figure 6 column 1
+iter_001.eps
+iter_002.eps
+iter_003.eps
+iter_004.eps
+-- Figure 6 column 2
+f1min_001.eps
+f1min_002.eps
+f1min_003.eps
+f1min_004.eps
+-- Figure 6 column 3
+p0plus_flip.eps
+f1plus_002.eps
+f1plus_003.eps
+f1plus_004.eps
+-- Figure 6 column 4
+pgreen_001.eps
+pgreen_002.eps
+pgreen_003.eps
+pgreen_004.eps
+-- Figure 6 column 5
+compare_001.eps
+compare_002.eps
+compare_003.eps
+compare_004.eps
+
+
+Note that the script epsIterwithLabels.scr produces the same figures, but with axis-labels. 
+
+--------------------------
+* Figure 7: Comparison of Marchenko result with reference
+
+To compute the marchenko results for 8 iterations.  
+
+==> run marchenko.scr This will take less than 1 minute. The generated files are:
+	- pgreen.su, pgreen512.su
+	- diffref.su
+	- Gplus0.su
+	- Gmin0.su
+	- f1plus0.su
+	- f1min0.su
+	- f2.su 
+
+
+At the end of the run the script will display in X11 a comparison of the middle trace. 
+
+To make the postscript figure 
+
+==> run epsCompare.scr
+
+mergeGreenRef.eps 	=> Figure 7
+
+--------------------------
+* Figure 8: snapshots of back propagating f2 in actual medium 
+
+To compute the snapshots 
+
+==> run backpropf2.scr This will take about 1 minute. The generated output file is
+	- backpropf2_sp.su
+
+The postscript files of Figure 8 are generated with 
+
+==> run epsBackprop.scr
+
+-- Figure 8 column 1
+backpropf2_-0.30.eps
+backpropf2_-0.15.eps
+backpropf2_-0.03.eps
+backpropf2_-0.02.eps
+backpropf2_0.00.eps
+-- Figure 8 column 2
+backpropf2_0.30.eps
+backpropf2_0.15.eps
+backpropf2_0.03.eps
+backpropf2_0.02.eps
+backpropf2_0.00.eps
+-- Figure 8 column 3
+backpropf2sum_0.30.eps
+backpropf2sum_0.15.eps
+backpropf2sum_0.03.eps
+backpropf2sum_0.02.eps
+backpropf2_0.00.eps
+
+
+The figures in the appendix, to explain the different options in the programs, are reproduced by
+
+==> run figAppendi.scr
+
+-- Figure A-1
+noise_above0.eps
+noise_above1.eps
+noise_above-1.eps
+noise_above2.eps
+noise_above4.eps
+
+-- Figure A-2
+iniFocus_shifts.eps
+
diff --git a/marchenko/demo/oneD/epsModel.scr b/marchenko/demo/oneD/epsModel.scr
index 6fddac7469d8998493c1c41577775adf32709f5b..2ee2e5e8ff541710206b06d67456738977d55cd7 100755
--- a/marchenko/demo/oneD/epsModel.scr
+++ b/marchenko/demo/oneD/epsModel.scr
@@ -50,7 +50,7 @@ clipref=`cat nep | awk '{print $1/15}'`
 suwind key=gx min=-2250000 max=2250000 < $file | \
         supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
     	label1="time sample number" label2="lateral distance (m)" \
-        n1tic=2 d2=5 f1=0.0 x1beg=0 x1end=500 d1=1 f1=0 d1num=100 \
+        n1tic=2 d2=5 f1=0.0 x1beg=0 x1end=400 d1=1 f1=0 d1num=100 \
         f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > shotx0_rp.eps
 
 rm nep 
diff --git a/marchenko/demo/oneD/epsPrimaries.scr b/marchenko/demo/oneD/epsPrimaries.scr
new file mode 100755
index 0000000000000000000000000000000000000000..00949d8fe8d62007ba0bae6ec5dda71b5bf1ef44
--- /dev/null
+++ b/marchenko/demo/oneD/epsPrimaries.scr
@@ -0,0 +1,206 @@
+#!/bin/bash
+
+export PATH=$HOME/src/OpenSource/bin/:$PATH:
+
+echo "Making eps files for $1"
+
+if [[ "$1" == "Figure2" ]];
+then
+./epsModel.scr
+exit 0
+fi
+
+istart=276
+#set same clip factor for iteration updates
+file=Mi_${istart}002.su
+sumax < $file mode=abs outpar=nep
+clipiter=`cat nep | awk '{print $1/15}'`
+ns=`surange < Mi_276014.su | grep ns | awk '{print $2}'`
+nsd=400 #number of samples to display
+(( nstart = ns - nsd ))
+
+file=f1min_${istart}002.su
+sumax < $file mode=abs outpar=nep
+clipf1=`cat nep | awk '{print $1/11}'`
+clipm1=`cat nep | awk '{print $1/22}'`
+
+#M0
+file=M0_276000.su
+file_base=${file%.su}
+sumax < $file mode=abs outpar=nep
+clipref=`cat nep | awk '{print $1/15}'`
+
+echo "clipiter="$clipiter "clipref="$clipref "clipf1="$clipf1
+clipiter=$clipref
+clipf1=$clipref
+
+# Initialisation and First iteration
+if [[ "$1" == "Figure3" ]];
+then
+file=M0_276000.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=$nstart x1end=$ns d1=1 f1=0 f1num=700 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
+
+suflip < $file flip=3 | \
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 f1=0 f1num=0 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > ${file_base}_flip.eps
+
+#convolve M0 with middle shot record of R
+select=451
+#original shot record from Reflection matrix
+suwind key=fldr min=$select max=$select < shotsdx5_rp.su > shotsx0.su
+#first iteration
+cp M0_276000.su N0.su
+#compute R*N0
+fconv file_in1=shotsx0.su file_in2=N0.su file_out=fconvN0.su verbose=1 fmax=90
+
+file=fconvN0.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 d1=1 f1=0 f1num=0 d1num=200 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > ${file_base}fulltime.eps
+
+file=fconvN0.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=$nstart x1end=$ns d1=1 f1=0 f1num=700 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > ${file_base}zoom.eps
+
+suflip < fconvN0.su flip=3 | sugain scale=-1 > fconvN0flip.su
+file=fconvN0flip.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 f1=0 f1num=0 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
+
+iter=1
+piter=$(printf %03d $iter)
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > $file_base.eps
+
+fi
+
+
+# second iteration
+if [[ "$1" == "Figure4" ]];
+then
+suwind itmax=1023 <  Mi_276001.su > N0.su
+#compute R*N0
+fconv file_in1=shotsx0.su file_in2=N0.su file_out=fconvN1.su verbose=1 fmax=90
+
+suflip < fconvN1.su flip=3 | sugain scale=-1 > fconvN1flip.su
+file=fconvN1flip.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=624 x1end=1024 d1=1 f1=0 f1num=600 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipm1 > $file_base.eps
+
+file=fconvN1.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 d1=1 f1=0 f1num=0 d1num=200 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipm1 > ${file_base}fulltime.eps
+fi
+
+#Iterations
+if [[ "$1" == "Figure5" ]];
+then
+for (( iter=0; iter<=21; iter+=2 ))
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+#possibly add suflip flip=3 to flip the time-axis
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=$nstart x1end=$ns d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > $file_base.eps
+done
+fi
+
+if [[ "$1" == "Figure5" ]];
+then
+for (( iter=1; iter<=21; iter+=2 ))
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > $file_base.eps
+done
+fi
+
+exit;
+#iterations
+for (( iter=0; iter<=31; iter+=2 ))
+do
+piter=$(printf %03d $iter)
+echo $piter
+
+file=Mi_${istart}${piter}.su
+#ns=`surange < iter_001.su | grep ns | awk '{print $2}'`
+#dtrcv=`surange < iter_001.su | grep dt | awk '{print $2/1000000.0}'`
+#shift=$(echo "scale=4; ($dtrcv*($ns/2.0-1))" | bc -l)
+#basop choice=shift shift=$shift file_in=$file | \
+file_base=${file%.su}
+suflip flip=3 < $file | supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > ${file_base}flip.eps
+
+file=f1min_${istart}${piter}.su
+file_base=${file%.su}
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 < $file\
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > $file_base.eps
+
+done
+
+iter=32
+piter=$(printf %03d $iter)
+echo $piter
+for (( istart=246; istart<=316; istart+=10 ))
+do
+file=f1min_${istart}${piter}.su
+file_base=${file%.su}
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 < $file\
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > $file_base.eps
+done
+
+exit;
+
+#Windows for odd and even iterations
+file=WindowOdd276.su
+file_base=${file%.su}
+suwind key=tracl min=1 max=1 < $file | suwind itmin=1024 | \
+supsgraph d1=1 style=normal f1=0 \
+    labelsize=12 label1="time sample number" label2="amplitude" \
+    d1num=100 wbox=6 hbox=3 x2end=1.2 > $file_base.eps
+file=WindowEven276.su
+file_base=${file%.su}
+suwind key=tracl min=1 max=1 < $file | suwind itmax=1024 | \
+supsgraph d1=1 style=normal f1=0 \
+    labelsize=12 label1="time sample number" label2="amplitude" \
+    d1num=100 wbox=6 hbox=3 x2end=1.2 > $file_base.eps
+
diff --git a/marchenko/demo/oneD/iterations.scr b/marchenko/demo/oneD/iterations.scr
index f8245068c6d7c759ec5cee25c1a862202dd924ea..3cc1ec7981379ceab761f5aed388890c6c2246fa 100755
--- a/marchenko/demo/oneD/iterations.scr
+++ b/marchenko/demo/oneD/iterations.scr
@@ -9,17 +9,35 @@
 
 select=451
 
+if [[ "$1" == "Figure3467" ]];
+then
 for istart in 276
-#for istart in 176 200 276 400
-#for istart in 246 256 266 276 286 296 306 316
 do
 (( iend = istart + 1 ))
 
 ../../marchenko_primaries file_shot=shotsdx5_rp.su ishot=$select file_src=wave.su \
 	nshots=901 verbose=2 istart=$istart iend=$iend fmax=90 \
-	pad=0 niter=4 smooth=10 niterskip=600 shift=20 file_rr=pred_rr_${istart}.su T=0 file_iter=iter.su 
+	pad=512 niter=33 smooth=8 niterskip=600 shift=16 file_rr=pred_rr_${istart}.su T=0 file_iter=iter.su 
+done
+fi
 
-#suximage < f1min_${istart}043.su  clip=1 title="${istart}" &
+if [[ "$1" == "Figure8" ]];
+then
+for istart in 246 256 266 276 286 296 306 316
+do
+(( iend = istart + 1 ))
 
+../../marchenko_primaries file_shot=shotsdx5_rp.su ishot=$select file_src=wave.su \
+	nshots=901 verbose=2 istart=$istart iend=$iend fmax=90 \
+	pad=512 niter=33 smooth=8 niterskip=600 shift=16 file_rr=pred_rr_${istart}.su T=0 file_iter=iter.su 
 done
+fi
 
+if [[ "$1" == "Figure10" ]];
+then
+istart=276
+(( iend = istart + 1 ))
+../../marchenko_primaries file_shot=shotsdx5_rp.su ishot=$select file_src=wave.su \
+	nshots=901 verbose=2 istart=$istart iend=$iend fmax=90 \
+	pad=512 niter=33 smooth=8 niterskip=600 shift=16 file_rr=pred_rr_${istart}.su T=1 file_iter=iterT.su 
+fi
diff --git a/marchenko/demo/oneD/primaries.scr b/marchenko/demo/oneD/primaries.scr
index f5a82f2ebb581b3ad54f6bea1ec0e3e7f0550389..e11a01563aa2872d7b539f9b7694c2b64fe31a7f 100755
--- a/marchenko/demo/oneD/primaries.scr
+++ b/marchenko/demo/oneD/primaries.scr
@@ -8,7 +8,7 @@
 #cd $SLURM_SUBMIT_DIR
 
 export PATH=$HOME/src/OpenSource/bin:$PATH:
-export OMP_NUM_THREADS=20
+export OMP_NUM_THREADS=40
 
 #shot record to remove internal multiples
 select=451
@@ -16,8 +16,10 @@ select=451
 makewave fp=20 dt=0.004 file_out=wave.su nt=2048 t0=0.0 scale=0 scfft=1
 
 ../../marchenko_primaries file_shot=shotsdx5_rp.su ishot=$select file_src=wave.su \
-	nshots=601 verbose=2 istart=40 iend=500 fmax=90 pad=1024 \
-	niter=25 smooth=10 niterskip=600 shift=20 file_rr=pred_rr.su T=0
+	nshots=901 verbose=2 istart=40 iend=500 fmax=90 pad=1024 \
+	niter=31 smooth=10 niterec=2 niterskip=1 shift=20 file_rr=pred_rr.su T=0 file_update=update.su
+
+exit;
 
 #for reference original shot record from Reflection matrix
 suwind key=fldr min=$select max=$select < shotsdx5_rp.su itmax=2047 > shotsx0.su
diff --git a/marchenko/demo/oneD/primariesPlane.scr b/marchenko/demo/oneD/primariesPlane.scr
new file mode 100755
index 0000000000000000000000000000000000000000..0ab5c64d70e0541f87459979f760c0386452840e
--- /dev/null
+++ b/marchenko/demo/oneD/primariesPlane.scr
@@ -0,0 +1,40 @@
+#!/bin/bash -x
+#SBATCH -J marchenko_primaries
+#SBATCH --cpus-per-task=40
+#SBATCH --ntasks=1
+#SBATCH --time=2:40:00
+
+
+#cd $SLURM_SUBMIT_DIR
+
+export PATH=$HOME/src/OpenSource/bin:$PATH:
+export OMP_NUM_THREADS=40
+
+#shot record to remove internal multiples
+select=451
+
+makewave fp=20 dt=0.004 file_out=wave.su nt=1024 t0=0.0 scale=0 scfft=1
+
+#../../marchenko_primaries file_shot=shotsdx5_rp.su plane_wave=1 file_src=wave.su src_angle=10 \
+#	nshots=901 verbose=2 istart=276 iend=277 fmax=90 pad=1024 \
+# xorig=900 niter=2 smooth=10 niterskip=1 file_iter=iterplane.su shift=20 file_rr=plane2_10_rr.su T=0
+#exit
+../../marchenko_primaries file_shot=shotsdx5_rp.su plane_wave=1 file_src=wave.su src_angle=-10 \
+	nshots=901 verbose=2 istart=40 iend=1000 fmax=90 pad=1024 \
+	niter=20 smooth=10 niterskip=40 niterec=0 shift=20 file_rr=plane2_10_rr.su T=0
+
+exit
+
+#for reference original shot record from Reflection matrix
+suwind key=fldr min=$select max=$select < shotsdx5_rp.su itmax=2047 > shotsx0.su
+fconv file_in1=shotsx0.su file_in2=wave.su file_out=shotw.su
+
+# for displaying results
+
+(suwind key=offset min=0 max=0 < pred_rr.su ; suwind key=offset min=0 max=0 < shotw.su) | suxgraph &
+
+sudiff shotw.su pred_rr.su > diff.su
+suximage < shotw.su  x1end=2.5 clip=1 title="original shot"&
+suximage < pred_rr.su  x1end=2.5 clip=1 title="shot with multiples removed"&
+suximage < diff.su   x1end=2.5 clip=1 title="removed multiples"&
+
diff --git a/marchenko/marchenko.c b/marchenko/marchenko.c
index 99514fd27c0ce594820ec0d83c47ddd59f6f2ae4..e9472631dce5e11f73857aec2c5ca0c491c48256 100644
--- a/marchenko/marchenko.c
+++ b/marchenko/marchenko.c
@@ -125,7 +125,7 @@ int main (int argc, char **argv)
     int     hw, smooth, above, shift, *ixpos, npos, ix, plane_wave;
     int     nshots_r, *isxcount, *reci_xsrc, *reci_xrcv;
     float   fmin, fmax, *tapersh, *tapersy, fxf, dxf, *xsrc, *xrcv, *zsyn, *zsrc, *xrcvsyn;
-    double  t0, t1, t2, t3, tsyn, tread, tfft, tcopy, energyNi, energyN0;
+    double  t0, t1, t2, t3, tsyn, tread, tfft, tcopy, energyNi, *energyN0;
     float   d1, d2, f1, f2, fxsb, fxse, ft, fx, *xsyn, dxsrc;
     float   *green, *f2p, *pmin, *G_d, dt, dx, dxs, scl, mem;
     float   *f1plus, *f1min, *iRN, *Ni, *Nig, *trace, *Gmin, *Gplus;
@@ -224,6 +224,7 @@ int main (int argc, char **argv)
     G_d     = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
     muteW   = (int *)calloc(Nfoc*nxs,sizeof(int));
     tsynW   = (int *)malloc(Nfoc*nxs*sizeof(int)); // time-shift for Giovanni's plane-wave on non-zero times
+    energyN0= (double *)malloc(Nfoc*sizeof(double));
     trace   = (float *)malloc(ntfft*sizeof(float));
     xrcvsyn = (float *)calloc(Nfoc*nxs,sizeof(float)); // x-rcv postions of focal points
     xsyn    = (float *)malloc(Nfoc*sizeof(float)); // x-src position of focal points
@@ -488,9 +489,9 @@ int main (int argc, char **argv)
 					}
 				}
             }
-            if (iter==0) energyN0 = energyNi;
+            if (iter==0) energyN0[Nfoc] = energyNi;
             if (verbose >=2) vmess(" - iSyn %d: Ni at iteration %d has energy %e; relative to N0 %e", l, iter, sqrt(energyNi),
-sqrt(energyNi/energyN0));
+sqrt(energyNi/energyN0[Nfoc]));
         }
 
         /* apply mute window based on times of direct arrival (in muteW) */
@@ -628,6 +629,7 @@ sqrt(energyNi/energyN0));
 
     free(muteW);
     free(tsynW);
+    free(energyN0);
 
     t2 = wallclock_time();
     if (verbose) {
diff --git a/marchenko/marchenko_primaries.c b/marchenko/marchenko_primaries.c
new file mode 100644
index 0000000000000000000000000000000000000000..cc98f668ddf199dcc92923c998d2a4bdf480b6a6
--- /dev/null
+++ b/marchenko/marchenko_primaries.c
@@ -0,0 +1,902 @@
+#include "par.h"
+#include "segy.h"
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+#include <genfft.h>
+
+int omp_get_max_threads(void);
+int omp_get_num_threads(void);
+void omp_set_num_threads(int num_threads);
+
+#ifndef MAX
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#endif
+#ifndef MIN
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#endif
+#define NINT(x) ((int)((x)>0.0?(x)+0.5:(x)-0.5))
+
+#ifndef COMPLEX
+typedef struct _complexStruct { /* complex number */
+    float r,i;
+} complex;
+#endif/* complex */
+
+int readShotData(char *filename, float *xrcv, float *xsrc, float *zsrc, int *xnx, complex *cdata, int nw, int nw_low, int nshots, int nx, int nxs, float fxsb, float dxs, int ntfft, int mode, float scale, float tsq, float Q, float f0, int reci, int *nshots_r, int *isxcount, int *reci_xsrc,  int *reci_xrcv, float *ixmask, int verbose);
+
+int readTinvData(char *filename, float *xrcv, float *xsrc, float *zsrc, int *xnx, int Nfoc, int nx, int ntfft, int mode, int *maxval, float *tinv, int hw, int verbose);
+
+int writeDataIter(char *file_iter, float *data, segy *hdrs, int n1, int n2, float d2, float f2, int n2out, int Nfoc, float *xsyn, float *zsyn, int *ixpos, int npos, int t0shift, int iter);
+int getFileInfo(char *filename, int *n1, int *n2, int *ngath, float *d1, float *d2, float *f1, float *f2, float *xmin, float *xmax, float *sclsxgx, int *nxm);
+int readData(FILE *fp, float *data, segy *hdrs, int n1);
+int writeData(FILE *fp, float *data, segy *hdrs, int n1, int n2);
+int disp_fileinfo(char *file, int n1, int n2, float f1, float f2, float d1, float d2, segy *hdrs);
+double wallclock_time(void);
+
+void synthesis(complex *Refl, complex *Fop, float *Top, float *RNi, int nx, int nt, int nxs, int nts, float dt, float *xsyn, int
+Nfoc, float *xrcv, float *xsrc, int *xnx, float fxse, float fxsb, float dxs, float dxsrc, float dx, int ntfft, int
+nw, int nw_low, int nw_high,  int mode, int reci, int nshots, int *ixpos, int npos, double *tfft, int *isxcount, int
+*reci_xsrc,  int *reci_xrcv, float *ixmask, int verbose);
+
+void synthesisPositions(int nx, int nt, int nxs, int nts, float dt, float *xsyn, int Nfoc, float *xrcv, float *xsrc, int *xnx, float fxse, float fxsb, float dxs, float dxsrc, float dx, int nshots, int *ixpos, int *npos, int *isxcount, int countmin, int reci, int verbose);
+
+
+
+/*********************** self documentation **********************/
+char *sdoc[] = {
+" ",
+" MARCHENKO_primaries - Iterative primary reflections retrieval",
+" ",
+" marchenko_primaries file_tinv= file_shot= [optional parameters]",
+" ",
+" Required parameters: ",
+" ",
+"   file_shot= ............... Reflection response: R",
+" ",
+" Optional parameters: ",
+" ",
+" INTEGRATION ",
+"   ishot=nshots/2 ........... shot number(s) to remove internal multiples ",
+"   file_tinv= ............... shot-record (from R) to remove internal multiples",
+"   file_src= ................ optional source wavelet to convolve selected ishot(s)",
+" COMPUTATION",
+"   tap=0 .................... lateral taper R_ishot(1), file_shot(2), or both(3)",
+"   ntap=0 ................... number of taper points at boundaries",
+"   fmin=0 ................... minimum frequency in the Fourier transform",
+"   fmax=70 .................. maximum frequency in the Fourier transform",
+"   plane_wave=0 ............. model plane wave",
+"   src_angle=0 .............. angle with horizontal of plane source array",
+"   src_velo=1500 ............ velocity to use in src_angle definition",
+"   t0=0.1 ................... time shift in plane-wave source wavelet for migration",
+//"   xorig=nrecv/2 ............ center of plane-wave",
+" MARCHENKO ITERATIONS ",
+"   niter=22 ................. number of iterations to initialize and restart",
+"   niterec=2 ................ number of iterations in recursive part of the time-samples",
+"   niterskip=50 ............. restart scheme each niterskip samples with niter iterations",
+"   istart=20 ................ start sample of iterations for primaries",
+"   iend=nt .................. end sample of iterations for primaries",
+" MUTE-WINDOW ",
+"   shift=20 ................. number of points to account for wavelet (epsilon in papers)",
+"   smooth=shift/2 ........... number of points to smooth mute with cosine window",
+" REFLECTION RESPONSE CORRECTION ",
+"   tsq=0.0 .................. scale factor n for t^n for true amplitude recovery",
+"   Q=0.0 .......,............ Q correction factor",
+"   f0=0.0 ................... ... for Q correction factor",
+"   scale=2 .................. scale factor of R for summation of Mi with M0",
+"   pad=0 .................... amount of samples to pad the reflection series",
+//"   reci=0 ................... 1; add receivers as shots 2; only use receivers as shot positions",
+//"   countmin=0 ............... 0.3*nxrcv; minimum number of reciprocal traces for a contribution",
+" OUTPUT DEFINITION ",
+"   file_rr= ................. output file with primary only shot record",
+"   file_iter= ............... output file with -Mi(-t) for each iteration: writes",
+"              ............... M0.su=M0 : initialisation of algorithm",
+"              ............... RMi: iterative terms ",
+"              ............... f1min.su: f1min terms ",
+"   file_update= ............. output file with updates only => removed internal multiples",
+"   T=0 ...................... :1 compute transmission-losses compensated primaries ",
+"   verbose=0 ................ silent option; >0 displays info",
+" ",
+" ",
+" author  : Lele Zhang & Jan Thorbecke : 2019 ",
+" ",
+NULL};
+/**************** end self doc ***********************************/
+
+int main (int argc, char **argv)
+{
+    FILE    *fp_out, *fp_rr, *fp_w, *fp_up;
+	size_t  nread;
+    int     i, j, k, l, ret, nshots, Nfoc, nt, nx, nts, nxs, ngath;
+    int     size, n1, n2, ntap, tap, di, ntraces;
+    int     nw, nw_low, nw_high, nfreq, *xnx, *xnxsyn;
+    int     reci, countmin, mode, ixa, ixb, n2out, verbose, ntfft;
+    int     iter, niter, niterec, recur, niterskip, niterrun, tracf, *muteW;
+    int     hw, ii, iw, ishot, istart, iend;
+    int     smooth, *ixpos, npos, ix, m, pad, T, isms, isme, perc;
+    int     nshots_r, *isxcount, *reci_xsrc, *reci_xrcv, shift, plane_wave, xorig;
+    float   fmin, fmax, tom, deltom, *tapersh, *tapersy, fxf, dxf, *xsrc, *xrcv, *zsyn, *zsrc, *xrcvsyn;
+    double  t0, t1, t2, t3, t4, tsyn, tread, tfft, tcopy, tii;
+	double  energyMi, *energyM0;
+    float   tt0, d1, d2, f1, f2, fxsb, fxse, ft, fx, *xsyn, dxsrc;
+    float   *M0, *DD, *RR, *SRC, dt, dx, dxs, scl, mem;
+    float   *rtrace, *tmpdata, *f1min, *f1plus, *RMi, *Mi, *trace;
+	float   *Mup, *Msp;
+    float   xmin, xmax, scale, tsq;
+	float   Q, f0, *ixmask, *costaper;
+	float   src_velo, src_angle, grad2rad, p, *twplane;
+    complex *Refl, *Fop, *ctrace, *cwave, csum, cwav;
+    char    *file_tinv, *file_shot, *file_rr, *file_src, *file_iter, *file_update;
+    segy    *hdrs_out, hdr;
+
+    initargs(argc, argv);
+    requestdoc(1);
+
+    tsyn = tread = tfft = tcopy = tii = 0.0;
+    t0   = wallclock_time();
+
+    if (!getparstring("file_shot", &file_shot)) file_shot = NULL;
+    if (!getparstring("file_tinv", &file_tinv)) file_tinv = NULL;
+    if(!getparstring("file_src", &file_src)) file_src = NULL;
+    if (!getparstring("file_rr", &file_rr)) verr("parameter file_rr not found");
+    if (!getparstring("file_iter", &file_iter)) file_iter = NULL;
+    if (!getparstring("file_update", &file_update)) file_update = NULL;
+    
+    if (!getparint("verbose", &verbose)) verbose = 0;
+    if (!getparfloat("fmin", &fmin)) fmin = 0.0;
+    if (!getparfloat("fmax", &fmax)) fmax = 70.0;
+    if (!getparint("reci", &reci)) reci = 0;
+    reci=0; // source-receiver reciprocity is not yet fully build into the code
+    if (!getparfloat("scale", &scale)) scale = 2.0;
+    if (!getparfloat("Q", &Q)) Q = 0.0;
+    if (!getparfloat("tsq", &tsq)) tsq = 0.0;
+    if (!getparfloat("f0", &f0)) f0 = 0.0;
+    if (!getparint("tap", &tap)) tap = 0;
+    if (!getparint("ntap", &ntap)) ntap = 0;
+    if (!getparint("pad", &pad)) pad = 0;
+    if (!getparint("T", &T)) T = 0;
+
+
+    if(!getparint("niter", &niter)) niter = 22;
+    if(!getparint("niterec", &niterec)) niterec = 2;
+    if(!getparint("niterskip", &niterskip)) niterskip = 50;
+    if(!getparint("hw", &hw)) hw = 15;
+    if(!getparint("shift", &shift)) shift=20;
+    if(!getparint("smooth", &smooth)) smooth = shift/2;
+    if(!getparint("ishot", &ishot)) ishot=300;
+    if(!getparint("plane_wave", &plane_wave)) plane_wave=0;
+    if(!getparfloat("src_angle", &src_angle)) src_angle = 0.0;
+    if (!getparfloat("src_velo",&src_velo)) src_velo=1500.;
+    if (!getparfloat("t0",&tt0)) tt0=0.1;
+
+    if (T>0) {
+		T=-1;
+		isms = -shift;
+		isme = -1*MAX(0,shift-smooth);
+	}
+    else {
+		T=1;
+		isms = MAX(0,shift-smooth);
+		isme = shift;
+	}
+
+    if (reci && ntap) vwarn("tapering influences the reciprocal result");
+
+	smooth = MIN(smooth, shift);
+    if (smooth) {
+        costaper = (float *)malloc(smooth*sizeof(float));
+        scl = M_PI/((float)smooth);
+        for (i=0; i<smooth; i++) {
+            costaper[i] = 0.5*(1.0+cos((i+1)*scl));
+        }
+    }
+
+/*================ Reading info about shot and initial operator sizes ================*/
+
+    if (file_tinv != NULL) { /* M0 is read from file_tinv */
+        ngath = 0; /* setting ngath=0 scans all traces; n2 contains maximum traces/gather */
+        ret = getFileInfo(file_tinv, &n1, &n2, &ngath, &d1, &d2, &f1, &f2, &xmin, &xmax, &scl, &ntraces);
+        Nfoc = ngath;
+        nxs = n2; 
+        nts = n1;
+        dxs = d2; 
+        fxsb = f2;
+	}
+
+    ngath = 0; /* setting ngath=0 scans all traces; nx contains maximum traces/gather */
+    ret = getFileInfo(file_shot, &nt, &nx, &ngath, &d1, &dx, &ft, &fx, &xmin, &xmax, &scl, &ntraces);
+    nshots = ngath;
+
+    if (!getparfloat("dt", &dt)) dt = d1;
+    if(!getparint("istart", &istart)) istart=20;
+    if(!getparint("iend", &iend)) iend=nt;
+    iend = MIN(iend, nt-shift-1);
+    istart = MIN(MAX(1,istart),iend);
+
+    if (file_tinv == NULL) {/* 'M0' is one of the shot records */
+        if(!getparint("ishot", &ishot)) ishot=1+(nshots-1)/2;
+		ishot -= 1; /* shot numbering starts at 0 */
+        Nfoc = 1;
+        nxs  = nx;
+        nts  = nt;
+        dxs  = dx;
+        fxsb = fx;
+    }
+    assert (nxs >= nshots); /* ToDo allow other geometries */
+    //if(!getparint("xorig", &xorig)) xorig=-(nxs-1)/2;
+
+	/* compute time delay for plane-wave responses */
+    twplane = (float *) calloc(nxs,sizeof(float));
+	if (plane_wave==1) {
+        grad2rad = 17.453292e-3;
+        p = sin(src_angle*grad2rad)/src_velo;
+        if (p < 0.0) {
+			for (i=0; i<nxs; i++) {
+				twplane[i] = fabsf((nxs-i-1)*dxs*p)+tt0;
+            }
+        }
+		else {
+			for (i=0; i<nxs; i++) {
+				twplane[i] = (i)*dxs*p+tt0;
+				//twplane[i] = dxs*(xorig+i)*tan(src_angle*grad2rad)/src_velo;
+//				fprintf(stderr,"plane-wave i=%d x=%f t=%f %f\n", i, dxs*(xorig+i), twplane[i], (xorig+i)*dxs*p);
+			}
+		}
+	}
+
+    ntfft = optncr(MAX(nt+pad, nts+pad)); 
+    nfreq = ntfft/2+1;
+    nw_low = (int)MIN((fmin*ntfft*dt), nfreq-1);
+    nw_low = MAX(nw_low, 1);
+    nw_high = MIN((int)(fmax*ntfft*dt), nfreq-1);
+    nw  = nw_high - nw_low + 1;
+    scl   = 1.0/((float)ntfft);
+    deltom = 2.0*M_PI/(ntfft*dt);
+
+    if (!getparint("countmin", &countmin)) countmin = 0.3*nx;
+
+/*================ Allocating all data arrays ================*/
+
+    Fop     = (complex *)calloc(nxs*nw*Nfoc,sizeof(complex));
+    f1min   = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    f1plus  = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    RMi     = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    Mi      = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    M0     = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    DD      = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    SRC     = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    RR      = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    Mup     = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    Msp     = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    trace   = (float *)malloc(ntfft*sizeof(float));
+    ixpos   = (int *)malloc(nxs*sizeof(int));
+    energyM0= (double *)malloc(Nfoc*sizeof(double));
+    xrcvsyn = (float *)calloc(Nfoc*nxs,sizeof(float));
+    xsyn    = (float *)malloc(Nfoc*sizeof(float));
+    zsyn    = (float *)malloc(Nfoc*sizeof(float));
+    xnxsyn  = (int *)calloc(Nfoc,sizeof(int));
+
+    Refl    = (complex *)malloc(nw*nx*nshots*sizeof(complex));
+    xsrc    = (float *)calloc(nshots,sizeof(float));
+    zsrc    = (float *)calloc(nshots,sizeof(float));
+    xrcv    = (float *)calloc(nshots*nx,sizeof(float));
+    xnx     = (int *)calloc(nshots,sizeof(int));
+
+    if (reci!=0) {
+        reci_xsrc = (int *)malloc((nxs*nxs)*sizeof(int));
+        reci_xrcv = (int *)malloc((nxs*nxs)*sizeof(int));
+        isxcount  = (int *)calloc(nxs,sizeof(int));
+        ixmask  = (float *)calloc(nxs,sizeof(float));
+    }
+
+/*================ Read focusing operator(s) ================*/
+
+    if (file_tinv != NULL) {  /*  M0 is named DD */
+        muteW   = (int *)calloc(Nfoc*nxs,sizeof(int));
+        mode=-1; /* apply complex conjugate to read in data */
+        readTinvData(file_tinv, xrcvsyn, xsyn, zsyn, xnxsyn, Nfoc, nxs, ntfft, 
+             mode, muteW, DD, hw, verbose);
+        /* reading data added zero's to the number of time samples to be the same as ntfft */
+        nts   = ntfft;
+    	free(muteW);
+
+        /* check consistency of header values */
+        if (xrcvsyn[0] != 0 || xrcvsyn[1] != 0 ) fxsb = xrcvsyn[0];
+        fxse = fxsb + (float)(nxs-1)*dxs;
+        dxf = (xrcvsyn[nxs-1] - xrcvsyn[0])/(float)(nxs-1);
+        if (NINT(dxs*1e3) != NINT(fabs(dxf)*1e3)) {
+            vmess("dx in hdr.d1 (%.3f) and hdr.gx (%.3f) not equal",d2, dxf);
+            if (dxf != 0) dxs = fabs(dxf);
+            vmess("dx in operator => %f", dxs);
+        }
+	}
+
+/* ========================= Opening optional wavelet file ====================== */
+
+    cwave = (complex *)calloc(ntfft,sizeof(complex));
+    if (file_src != NULL){
+        if (verbose) vmess("Reading wavelet from file %s.", file_src);
+
+        fp_w = fopen(file_src, "r");
+        if (fp_w == NULL) verr("error on opening input file_src=%s", file_src);
+        nread = fread(&hdr, 1, TRCBYTES, fp_w);
+        assert (nread == TRCBYTES);
+        tmpdata = (float *)malloc(hdr.ns*sizeof(float));
+        nread = fread(tmpdata, sizeof(float), hdr.ns, fp_w);
+        assert (nread == hdr.ns);
+        fclose(fp_w);
+
+        /* Check dt wavelet is the same as reflection data */
+        if ( rint(dt*1000) != rint(hdr.dt/1000.0) ) {
+            vwarn("file_src dt != dt of file_shot %e != %e", hdr.dt/1e6, dt);
+        }
+        rtrace = (float *)calloc(ntfft,sizeof(float));
+
+        /* add zero's to the number of time samples to be the same as ntfft */
+        /* Add zero-valued samples in middle */
+        if (hdr.ns <= ntfft) {
+            for (i = 0; i < hdr.ns/2; i++) {
+                rtrace[i] = tmpdata[i];
+                rtrace[ntfft-1-i] = tmpdata[hdr.ns-1-i];
+            }
+        }
+        else {
+            vwarn("file_src has more samples than reflection data: truncated to ntfft = %d samples in middle are removed ", ntfft);
+            for (i = 0; i < ntfft/2; i++) {
+                rtrace[i] = tmpdata[i];
+                rtrace[ntfft-1-i] = tmpdata[hdr.ns-1-i];
+            }
+        }
+
+        rc1fft(rtrace, cwave, ntfft, -1);
+        free(tmpdata);
+        free(rtrace);
+    }
+    else {
+        for (i = 0; i < nfreq; i++) cwave[i].r = 1.0;
+    }
+
+
+/*================ Reading shot records ================*/
+
+    mode=1;
+    readShotData(file_shot, xrcv, xsrc, zsrc, xnx, Refl, nw, nw_low, ngath, nx, nx, fxsb, dxs, ntfft, 
+         mode, scale, tsq,  Q, f0, reci, &nshots_r, isxcount, reci_xsrc, reci_xrcv, ixmask,verbose);
+
+    if (tap == 2 || tap == 3) {
+    	tapersh = (float *)malloc(nx*sizeof(float));
+        for (j = 0; j < ntap; j++)
+            tapersh[j] = (cos(PI*(j-ntap)/ntap)+1)/2.0;
+        for (j = ntap; j < nx-ntap; j++)
+            tapersh[j] = 1.0;
+        for (j = nx-ntap; j < nx; j++)
+            tapersh[j] =(cos(PI*(j-(nx-ntap))/ntap)+1)/2.0;
+        if (verbose) vmess("Taper for shots applied ntap=%d", ntap);
+        for (l = 0; l < nshots; l++) {
+            for (j = 1; j < nw; j++) {
+                for (i = 0; i < nx; i++) {
+                    Refl[l*nx*nw+j*nx+i].r *= tapersh[i];
+                    Refl[l*nx*nw+j*nx+i].i *= tapersh[i];
+                }   
+            }   
+        }
+    	free(tapersh);
+    }
+
+    /* check consistency of header values */
+    fxf = xsrc[0];
+    if (nx > 1) dxf = (xrcv[nx-1] - xrcv[0])/(float)(nx-1);
+    else dxf = d2;
+    if (NINT(dx*1e3) != NINT(fabs(dxf)*1e3)) {
+        vmess("dx in hdr.d1 (%.3f) and hdr.gx (%.3f) not equal",dx, dxf);
+        if (dxf != 0) dx = fabs(dxf);
+        else verr("gx hdrs not set");
+        vmess("dx used => %f", dx);
+    }
+    
+    dxsrc = (float)xsrc[1] - xsrc[0];
+    if (dxsrc == 0) {
+        vwarn("sx hdrs are not filled in!!");
+        dxsrc = dx;
+    }
+
+/*================ Defining focusing operator(s) from R ================*/
+/* M0 = -R(ishot,-t) */
+
+    /* use ishot from Refl, complex-conjugate(time reverse), scale with -1 and convolve with wavelet */
+    if (file_tinv == NULL) {
+        if (verbose) vmess("Selecting M0 from Refl of %s", file_shot);
+        nts   = ntfft;
+
+        scl   = 1.0/((float)2.0*ntfft);
+        rtrace = (float *)calloc(ntfft,sizeof(float));
+        ctrace = (complex *)calloc(nfreq+1,sizeof(complex));
+        for (i = 0; i < xnx[ishot]; i++) {
+            for (j = nw_low, m = 0; j <= nw_high; j++, m++) {
+                ctrace[j].r =  Refl[ishot*nw*nx+m*nx+i].r*cwave[j].r + Refl[ishot*nw*nx+m*nx+i].i*cwave[j].i;
+                ctrace[j].i = -Refl[ishot*nw*nx+m*nx+i].i*cwave[j].r + Refl[ishot*nw*nx+m*nx+i].r*cwave[j].i;;
+            }
+            /* transfrom result back to time domain */
+            cr1fft(ctrace, rtrace, ntfft, 1);
+            for (j = 0; j < nts; j++) {
+                DD[0*nxs*nts+i*nts+j] = -1.0*scl*rtrace[j];
+            }
+        }
+
+		/* construct plane wave from all shot records */
+		if (plane_wave==1) {
+        	for (l=0; l<nshots; l++) {
+				memset(ctrace, 0, sizeof(complex)*(nfreq+1));
+            	for (j = nw_low, m = 0; j <= nw_high; j++, m++) {
+            		tom = j*deltom*twplane[l];
+					csum.r=0.0; csum.i=0.0;
+        			for (i = 0; i < xnx[l]; i++) {
+            			tom = j*deltom*twplane[i];
+            			csum.r += Refl[l*nw*nx+m*nx+i].r*cos(-tom) - Refl[l*nw*nx+m*nx+i].i*sin(-tom);
+            			csum.i += Refl[l*nw*nx+m*nx+i].i*cos(-tom) + Refl[l*nw*nx+m*nx+i].r*sin(-tom);
+            		}
+                	cwav.r =  csum.r*cwave[j].r + csum.i*cwave[j].i;
+                	cwav.i = -csum.i*cwave[j].r + csum.r*cwave[j].i;
+					ctrace[j] = cwav;
+            	}
+            	/* transfrom result back to time domain */
+            	cr1fft(ctrace, rtrace, ntfft, 1);
+            	for (j = 0; j < nts; j++) {
+               		DD[0*nxs*nts+l*nts+j] = -1.0*scl*rtrace[j];
+        		}
+				/* compute Source wavelet for plane-wave imaging */
+            	for (j = nw_low, m = 0; j <= nw_high; j++, m++) {
+            		tom = j*deltom*twplane[l];
+            		csum.r = cos(-tom);
+            		csum.i = sin(-tom);
+                	//cwav.r = csum.r*cwave[j].r - csum.i*cwave[j].i;
+                	//cwav.i = csum.i*cwave[j].r + csum.r*cwave[j].i;
+					ctrace[j] = csum;
+				}
+            	/* transfrom result back to time domain */
+            	cr1fft(ctrace, rtrace, ntfft, 1);
+            	for (j = 0; j < nts; j++) {
+               		SRC[0*nxs*nts+l*nts+j] = scl*rtrace[j];
+        		}
+			}
+		}
+        free(ctrace);
+        free(rtrace);
+
+        xsyn[0] = xsrc[ishot];
+        zsyn[0] = zsrc[ishot];
+        xnxsyn[0] = xnx[ishot];
+        fxse = fxsb = xrcv[0];
+        /* check consistency of header values */
+        for (l=0; l<nshots; l++) {
+            for (i = 0; i < nx; i++) {
+                fxsb = MIN(xrcv[l*nx+i],fxsb);
+                fxse = MAX(xrcv[l*nx+i],fxse);
+            }
+        }
+        dxf = dx;
+        dxs = dx;
+    }
+
+    /* define tapers to taper edges of acquisition */
+    if (tap == 1 || tap == 3) {
+    	tapersy = (float *)malloc(nxs*sizeof(float));
+        for (j = 0; j < ntap; j++)
+            tapersy[j] = (cos(PI*(j-ntap)/ntap)+1)/2.0;
+        for (j = ntap; j < nxs-ntap; j++)
+            tapersy[j] = 1.0;
+        for (j = nxs-ntap; j < nxs; j++)
+            tapersy[j] =(cos(PI*(j-(nxs-ntap))/ntap)+1)/2.0;
+        if (verbose) vmess("Taper for operator applied ntap=%d", ntap);
+        for (l = 0; l < Nfoc; l++) {
+            for (i = 0; i < nxs; i++) {
+                for (j = 0; j < nts; j++) {
+                    DD[l*nxs*nts+i*nts+j] *= tapersy[i];
+                }   
+            }   
+        }   
+    	free(tapersy);
+    }
+
+/*================ Check the size of the files ================*/
+
+    if (NINT(dxsrc/dx)*dx != NINT(dxsrc)) {
+        vwarn("source (%.2f) and receiver step (%.2f) don't match",dxsrc,dx);
+        if (reci == 2) vwarn("step used from operator (%.2f) ",dxs);
+    }
+    di = NINT(dxf/dxs);
+    if ((NINT(di*dxs) != NINT(dxf)) && verbose) 
+        vwarn("dx in receiver (%.2f) and operator (%.2f) don't match",dx,dxs);
+    if (verbose) {
+        vmess("Number of receivers in focusop  = %d", nxs);
+        vmess("number of shots                 = %d", nshots);
+        vmess("number of receiver/shot         = %d", nx);
+        vmess("first model position            = %.2f", fxsb);
+        vmess("last model position             = %.2f", fxse);
+        vmess("first source position fxf       = %.2f", fxf);
+        vmess("source distance dxsrc           = %.2f", dxsrc);
+        vmess("last source position            = %.2f", fxf+(nshots-1)*dxsrc);
+        vmess("receiver distance     dxf       = %.2f", dxf);
+        vmess("direction of increasing traces = %d", di);
+        vmess("number of time samples fft nt nts = %d %d %d", ntfft, nt, nts);
+        vmess("time sampling                   = %e ", dt);
+        vmess("smoothing taper for time-window = %d ", smooth);
+        if (file_rr != NULL) vmess("RR output file                  = %s ", file_rr);
+        if (file_iter != NULL)  vmess("Iterations output file          = %s ", file_iter);
+    }
+
+/*================ initializations ================*/
+
+    if (reci) n2out = nxs;
+    else n2out = nshots;
+    mem = Nfoc*n2out*ntfft*sizeof(float)/1048576.0;
+    if (verbose) {
+        vmess("Time-sample range processed     = %d:%d", istart, iend);
+        vmess("number of output traces         = %d", n2out);
+        vmess("number of output samples        = %d", ntfft);
+        vmess("Size of output data/file        = %.1f MB", mem);
+    }
+
+	ixa=0; ixb=0;
+
+    synthesisPositions(nx, nt, nxs, nts, dt, xsyn, Nfoc, xrcv, xsrc, xnx, fxse, fxsb,
+        dxs, dxsrc, dx, nshots, ixpos, &npos, isxcount, countmin, reci, verbose);
+
+    if (verbose) {
+        vmess("synthesisPositions: nshots=%d npos=%d", nshots, npos);
+    }
+
+/*================ set variables for output data ================*/
+
+    n1 = nts; 
+	n2 = n2out;
+    f1 = ft; 
+	f2 = fxsb+dxs*ixpos[0];
+    d1 = dt;
+    if (reci == 0) d2 = dxsrc;
+    else if (reci == 1) d2 = dxs;
+    else if (reci == 2) d2 = dx;
+
+    hdrs_out = (segy *) calloc(n2,sizeof(segy));
+    if (hdrs_out == NULL) verr("allocation for hdrs_out");
+    size  = nxs*nts;
+
+    for (i = 0; i < n2; i++) {
+        hdrs_out[i].ns     = n1;
+        hdrs_out[i].trid   = 1;
+        hdrs_out[i].dt     = dt*1000000;
+        hdrs_out[i].f1     = f1;
+        hdrs_out[i].f2     = f2;
+        hdrs_out[i].d1     = d1;
+        hdrs_out[i].d2     = d2;
+        hdrs_out[i].trwf   = n2out;
+        hdrs_out[i].scalco = -1000;
+        hdrs_out[i].gx = NINT(1000*(f2+i*d2));
+        hdrs_out[i].scalel = -1000;
+        hdrs_out[i].tracl = i+1;
+    }
+    t1    = wallclock_time();
+    tread = t1-t0;
+    if(verbose) {
+        vmess("*******************************************");
+        vmess("***** Computing Marchenko for all steps****");
+        vmess("*******************************************");
+        fprintf(stderr,"    %s: Progress: %3d%%",xargv[0],0);
+	}
+    perc=(iend-istart)/100;if(!perc)perc=1;
+
+    if (plane_wave) {
+		writeDataIter("SRCplane.su", SRC, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 0, NINT(src_angle));
+		writeDataIter("DDplane.su", DD, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 0, NINT(src_angle));
+	}
+	free(SRC);
+
+/*================ start loop over number of time-samples ================*/
+
+    for (ii=istart; ii<iend; ii++) {
+
+/*================ initialization ================*/
+
+        /* once every 'niterskip' time-steps start from fresh M0 and do niter (~20) iterations */
+		if ( ((ii-istart)%niterskip==0) || (ii==istart) ) {
+			niterrun=niter;
+			recur=0;
+			if (verbose>1) vmess("Doing %d iterations to reset recursion at time-sample %d\n",niterrun,ii);
+            for (l = 0; l < Nfoc; l++) {
+                for (i = 0; i < nxs; i++) {
+					iw = NINT((ii*dt+twplane[i])/dt);
+					//iw = ii;
+                    for (j = 0; j < nts; j++) {
+                        M0[l*nxs*nts+i*nts+j] = DD[l*nxs*nts+i*nts+j];
+                    }
+					/* apply mute window for samples above nts-ii */
+                    for (j = 0; j < MIN(nts, nts-iw+isms); j++) {
+                        M0[l*nxs*nts+i*nts+j] = 0.0;
+                    }
+                    for (j = nts-iw+isms, k=1; j < MIN(nts, nts-iw+isme); j++, k++) {
+                        M0[l*nxs*nts+i*nts+j] *= costaper[smooth-k];
+                    }
+                }
+                for (i = 0; i < npos; i++) {
+                    ix = ixpos[i];
+                    j = 0;
+                    f1min[l*nxs*nts+i*nts+j] = -DD[l*nxs*nts+ix*nts+j];
+                    for (j = 1; j < nts; j++) {
+                       f1min[l*nxs*nts+i*nts+j] = -DD[l*nxs*nts+ix*nts+nts-j];
+                    }
+			    }
+			}
+        	if (file_iter != NULL) {
+           	    writeDataIter("M0.su", M0, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 0, 1000*ii);
+			}
+		}
+		else { /* use f1min from previous iteration as starting point and do niterec iterations */
+			niterrun=niterec;
+			recur=1;
+			if (verbose>1) vmess("Doing %d iterations using previous result at time-sample %d",niterrun,ii);
+            for (l = 0; l < Nfoc; l++) {
+                for (i = 0; i < npos; i++) {
+					j=0;
+                    ix = ixpos[i];
+					iw = NINT((ii*dt+twplane[ix])/dt);
+					//iw = ii;
+                    M0[l*nxs*nts+i*nts+j] = -DD[l*nxs*nts+ix*nts] - f1min[l*nxs*nts+i*nts+j];
+                    for (j = 1; j < nts; j++) {
+                        M0[l*nxs*nts+i*nts+j] = -DD[l*nxs*nts+ix*nts+nts-j] - f1min[l*nxs*nts+i*nts+nts-j];
+                    }
+					/* apply mute window for samples above nts-ii */
+                    for (j = 0; j < MIN(nts,nts-iw+isms); j++) {
+                        M0[l*nxs*nts+i*nts+j] = 0.0;
+                    }
+                    for (j = nts-iw+isms, k=1; j < MIN(nts, nts-iw+isme); j++, k++) {
+                        M0[l*nxs*nts+i*nts+j] *= costaper[smooth-k];
+                    }
+                }
+            }
+        }
+/*================ initialization ================*/
+
+        memcpy(Mi, M0, Nfoc*nxs*ntfft*sizeof(float));
+        memset(Mup, 0, Nfoc*nxs*ntfft*sizeof(float));
+
+/*================ number of Marchenko iterations ================*/
+
+        for (iter=0; iter<niterrun; iter++) {
+
+            t2    = wallclock_time();
+    
+/*================ construction of Mi(-t) = - \int R(x,t) Mi(t)  ================*/
+
+            synthesis(Refl, Fop, Mi, RMi, nx, nt, nxs, nts, dt, xsyn, Nfoc,
+                xrcv, xsrc, xnx, fxse, fxsb, dxs, dxsrc, dx, ntfft, nw, nw_low, nw_high, mode,
+                reci, nshots, ixpos, npos, &tfft, isxcount, reci_xsrc, reci_xrcv, ixmask, verbose);
+
+        	if (file_iter != NULL) {
+            	writeDataIter(file_iter, RMi, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 0, 1000*ii+iter+1);
+        	}
+
+			if (verbose >=2) {
+                for (l = 0; l < Nfoc; l++) {
+			        energyMi = 0.0;
+                    for (i = 0; i < npos; i++) {
+                        for (j = 0; j < nts; j++) {
+                            energyMi += RMi[l*nxs*nts+ix*nts+j]*RMi[l*nxs*nts+ix*nts+j];
+					    }
+                    }
+            		if (iter % 2 == 0) { 
+                    	if (iter==0) energyM0[Nfoc] = energyMi;
+                        vmess(" - iSyn %d: Mi at iteration %d has energy %e; relative to M0 %e", l, iter, sqrt(energyMi), sqrt(energyMi/energyM0[Nfoc]));
+                   }
+                }
+            }
+
+            t3 = wallclock_time();
+            tsyn +=  t3 - t2;
+    
+            /* N_k(x,t) = -N_(k-1)(x,-t) */
+            for (l = 0; l < Nfoc; l++) {
+                for (i = 0; i < npos; i++) {
+                    j = 0;
+                    ix = ixpos[i];
+                    Mi[l*nxs*nts+i*nts+j]    = -RMi[l*nxs*nts+ix*nts+j];
+                    for (j = 1; j < nts; j++) {
+                        Mi[l*nxs*nts+i*nts+j]    = -RMi[l*nxs*nts+ix*nts+nts-j];
+                    }
+                }
+            }
+    
+            if (iter % 2 == 0) { /* even iterations: => f_1^+(t) */
+                /* apply muting for the acausal part */
+                for (l = 0; l < Nfoc; l++) {
+                    for (i = 0; i < npos; i++) {
+						ix = ixpos[i];
+						iw = NINT((ii*dt+twplane[ix])/dt);
+						//iw = ii;
+						/* apply mute window for samples after ii */
+                        for (j = MAX(0,iw-isme); j < nts; j++) {
+                            Mi[l*nxs*nts+i*nts+j] = 0.0;
+                        }
+                        for (j = MAX(0,iw-isme), k=0; j < iw-isms; j++, k++) {
+                            Mi[l*nxs*nts+i*nts+j] *= costaper[k];
+                        }
+						/* apply mute window for delta function at t=0*/
+						iw = NINT((twplane[ix])/dt);
+                        for (j = 0; j < MAX(0,iw+shift-smooth); j++) {
+                            Mi[l*nxs*nts+i*nts+j] = 0.0;
+                        }
+                        for (j = MAX(0,iw+shift-smooth), k=1; j < MAX(0,iw+shift); j++, k++) {
+                            Mi[l*nxs*nts+i*nts+j] *= costaper[smooth-k];
+                        }
+                        f1plus[l*nxs*nts+i*nts+j] += Mi[l*nxs*nts+i*nts+j];
+                        for (j = 1; j < nts; j++) {
+                            f1plus[l*nxs*nts+i*nts+j] += Mi[l*nxs*nts+i*nts+j];
+                        }
+                    }
+                }
+/*
+        		if (file_iter != NULL) {
+            		writeDataIter("f1plus.su", f1plus, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 0, 1000*ii+iter);
+				}
+*/
+            }
+            else {/* odd iterations: => f_1^-(t)  */
+                for (l = 0; l < Nfoc; l++) {
+                    for (i = 0; i < npos; i++) {
+                    	ix = ixpos[i];
+						if (recur==1) { /* use f1min from previous iteration */
+                            for (j = 0; j < nts; j++) {
+                                Mi[l*nxs*nts+i*nts+j] += DD[l*nxs*nts+ix*nts+j];
+						    }
+                            j = 0;
+                            f1min[l*nxs*nts+i*nts+j] = -Mi[l*nxs*nts+i*nts+j];
+                            for (j = 1; j < nts; j++) {
+                                f1min[l*nxs*nts+i*nts+j] = -Mi[l*nxs*nts+i*nts+nts-j];
+                            }
+        		        	if (file_update != NULL) {
+								j=0;
+                            	Mup[l*nxs*nts+i*nts+j] += f1min[l*nxs*nts+i*nts+j]+DD[l*nxs*nts+i*nts+j];
+                            	for (j = 1; j < nts; j++) {
+                                	Mup[l*nxs*nts+i*nts+j] += f1min[l*nxs*nts+i*nts+j]+DD[l*nxs*nts+i*nts+nts-j];
+                            	}
+							}
+						}
+						else {
+                            j = 0;
+                            f1min[l*nxs*nts+i*nts+j] -= Mi[l*nxs*nts+i*nts+j];
+                            for (j = 1; j < nts; j++) {
+                                f1min[l*nxs*nts+i*nts+j] -= Mi[l*nxs*nts+i*nts+nts-j];
+                            }
+        		        	if (file_update != NULL) {
+								j=0;
+                            	Mup[l*nxs*nts+i*nts+j] -= Mi[l*nxs*nts+i*nts+j];
+                            	for (j = 1; j < nts; j++) {
+                                	Mup[l*nxs*nts+i*nts+j] -= Mi[l*nxs*nts+i*nts+nts-j];
+                            	}
+							}
+					    }
+						/* apply mute window for delta function at t=0*/
+						iw = NINT((twplane[ix])/dt);
+                        for (j = nts-shift+smooth+iw; j < nts; j++) {
+                            Mi[l*nxs*nts+i*nts+j] = 0.0;
+                        }
+                        for (j = nts-shift+iw, k=0; j < MIN(nts, nts-shift+smooth+iw); j++, k++) {
+                            Mi[l*nxs*nts+i*nts+j] *= costaper[k];
+                        }
+					    /* apply mute window for samples above nts-ii */
+						iw = NINT((ii*dt+twplane[ix])/dt);
+						//iw = ii;
+                        for (j = 0; j < MIN(nts,nts-iw+isms); j++) {
+                            Mi[l*nxs*nts+i*nts+j] = 0.0;
+                        }
+                        for (j = nts-iw+isms, k=1; j < MIN(nts,nts-iw+isme); j++, k++) {
+                            Mi[l*nxs*nts+i*nts+j] *= costaper[smooth-k];
+                        }
+                    }
+                }
+        		if (file_iter != NULL) {
+            		writeDataIter("f1min.su", f1min, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 0, 1000*ii+iter+1);
+				}
+            } /* end else (iter) branch */
+        	if (file_iter != NULL) {
+                writeDataIter("Mi.su", Mi, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 0, 1000*ii+iter+1);
+			}
+
+            t2 = wallclock_time();
+            tcopy +=  t2 - t3;
+            if (verbose>2) vmess("*** Iteration %d finished ***", iter);
+    
+        } /* end of iterations */
+
+        for (l = 0; l < Nfoc; l++) {
+            for (i = 0; i < npos; i++) {
+           		ix = ixpos[i];
+				iw = NINT((ii*dt+twplane[ix])/dt);
+                RR[l*nxs*nts+i*nts+iw] = f1min[l*nxs*nts+i*nts+iw];
+       			if (file_update != NULL) Msp[l*nxs*nts+i*nts+iw] = Mup[l*nxs*nts+i*nts+iw];
+            }
+        }
+
+        /* To Do? optional write intermediate RR results to file */
+
+        if (verbose) {
+            if(!((iend-ii-istart)%perc)) fprintf(stderr,"\b\b\b\b%3d%%",(ii-istart)*100/(iend-istart));
+            if((ii-istart)==10)t4=wallclock_time();
+            if((ii-istart)==20){
+                t4=(wallclock_time()-t4)*((iend-istart)/10.0);
+                fprintf(stderr,"\r    %s: Estimated total compute time = %.2f s.\n    %s: Progress: %.0f%%",xargv[0],(float)t4,xargv[0],(ii-istart)/((iend-istart)/100.0));
+            }
+            //t4=wallclock_time();
+            tii=(t4-t1)*((float)(iend-istart)/(ii-istart+1.0))-(t4-t1);
+            //vmess("Remaining compute time at time-sample %d = %.2f s.",ii, tii);
+        }
+
+    } /* end of time iterations ii */
+
+    free(Mi);
+    free(energyM0);
+    free(M0);
+    free(f1min);
+    free(f1plus);
+
+    t2 = wallclock_time();
+    if (verbose) {
+        fprintf(stderr,"\b\b\b\b%3d%%\n",100);
+        vmess("Total CPU-time marchenko = %.3f", t2-t0);
+        vmess("with CPU-time synthesis  = %.3f", tsyn);
+        vmess("with CPU-time copy array = %.3f", tcopy);
+        vmess("     CPU-time fft data   = %.3f", tfft);
+        vmess("and CPU-time read data   = %.3f", tread);
+    }
+
+/*================ write output files ================*/
+
+    if (file_update != NULL) {
+		fp_up = fopen(file_update, "w+");
+    	if (fp_up==NULL) verr("error on creating output file %s", file_update);
+	}
+    fp_rr = fopen(file_rr, "w+");
+    if (fp_rr==NULL) verr("error on creating output file %s", file_rr);
+
+    tracf = 1;
+    for (l = 0; l < Nfoc; l++) {
+        if (reci) f2 = fxsb;
+        else f2 = fxf;
+
+        for (i = 0; i < n2; i++) {
+            hdrs_out[i].fldr   = l+1;
+            hdrs_out[i].sx     = NINT(xsyn[l]*1000);
+            hdrs_out[i].offset = (long)NINT((f2+i*d2) - xsyn[l]);
+            hdrs_out[i].tracf  = tracf++;
+            hdrs_out[i].selev  = NINT(zsyn[l]*1000);
+            hdrs_out[i].sdepth = NINT(-zsyn[l]*1000);
+            hdrs_out[i].f1     = f1;
+        }
+        ret = writeData(fp_rr, (float *)&RR[l*size], hdrs_out, n1, n2);
+        if (ret < 0 ) verr("error on writing output file.");
+    	if (file_update != NULL) {
+        	ret = writeData(fp_up, (float *)&Msp[l*size], hdrs_out, n1, n2);
+        	if (ret < 0 ) verr("error on writing output file.");
+		} 
+    }
+    ret = fclose(fp_rr);
+    if (ret < 0) verr("err %d on closing output file %s",ret, file_rr);
+	if (file_update != NULL) {
+		ret = fclose(fp_up);
+    	if (ret < 0) verr("err %d on closing output file %s",ret, file_update);
+	}
+
+    if (verbose) {
+        t1 = wallclock_time();
+        vmess("and CPU-time write data  = %.3f", t1-t2);
+    }
+
+/*================ free memory ================*/
+
+    free(hdrs_out);
+
+    exit(0);
+}
+
+
diff --git a/marchenko/synthesis.c b/marchenko/synthesis.c
index 828da0749d5d9158ad5f14314e999a6437e4c5e6..e975d8746ffa1b44fcf34a15f9b3fa0db03772fb 100644
--- a/marchenko/synthesis.c
+++ b/marchenko/synthesis.c
@@ -95,7 +95,7 @@ nw, int nw_low, int nw_high,  int mode, int reci, int nshots, int *ixpos, int np
     /* transform muted Ni (Top) to frequency domain, input for next iteration  */
         for (l = 0; l < Nfoc; l++) {
             /* set Fop to zero, so new operator can be defined within ixpos points */
-            memset(&Fop[l*nxs*nw].r, 0, nxs*nw*2*sizeof(float));
+           memset(&Fop[l*nxs*nw], 0, nxs*nw*sizeof(complex));
             for (i = 0; i < npos; i++) {
                 rc1fft(&Top[l*size+i*nts],ctrace,ntfft,-1);
                 ix = ixpos[i];
@@ -111,7 +111,7 @@ nw, int nw_low, int nw_high,  int mode, int reci, int nshots, int *ixpos, int np
         first=0;
         for (l = 0; l < Nfoc; l++) {
             /* set Fop to zero, so new operator can be defined within all ix points */
-            memset(&Fop[l*nxs*nw].r, 0, nxs*nw*2*sizeof(float));
+            memset(&Fop[l*nxs*nw], 0, nxs*nw*sizeof(complex));
             for (i = 0; i < nxs; i++) {
                 rc1fft(&Top[l*size+i*nts],ctrace,ntfft,-1);
                 for (iw=0; iw<nw; iw++) {
diff --git a/utils/syn2d.c b/utils/syn2d.c
index 2e87e9e328784546ec256521a03703b94ab555a3..f8c01b91e6322445e4a0d7e6f8d4e5b70c6fc68f 100644
--- a/utils/syn2d.c
+++ b/utils/syn2d.c
@@ -242,7 +242,6 @@ int main (int argc, char **argv)
         if (verbose>=2) vmess("dimensions used: %d x %d",ntmax,nxmax);
     }
 	if (!getparfloat("dt", &dt)) dt = d1;
-	fprintf(stderr,"dt=%e\n", dt);
     
     size     = ntmax * nxmax;
 	xrcv     = (float *)malloc(nxmax*sizeof(float));
@@ -501,6 +500,7 @@ int main (int argc, char **argv)
 
 		for (i = 0; i < n2; i++) {
             hdrs_out[i].ns     = n1;
+            hdrs_out[i].dt     = (unsigned short)(dt*1000000);
             hdrs_out[i].trid   = hdrs_in[i].trid;
             hdrs_out[i].f1     = f1;
             hdrs_out[i].f2     = f2;
@@ -510,7 +510,7 @@ int main (int argc, char **argv)
 			hdrs_out[i].trwf   = n2out;
 			hdrs_out[i].scalco = -1000;
 			hdrs_out[i].sx = NINT(xsyn[l]*1000);
-			hdrs_out[i].gx = NINT(1000*(f2+i*d2));
+			hdrs_out[i].gx = NINT(1000.0*(f2+i*d2));
 			hdrs_out[i].offset = (long)NINT((f2+i*d2) - xsyn[l]);
 			hdrs_out[i].scalel = -1000;
 			hdrs_out[i].selev  = NINT(zsyn[l]*1000);