diff --git a/README.md b/README.md
index 9dafebbb6c609509996b5a6611f7e978a1e2c211..ca30abef2b49ce06a33df512a77a457325551770 100644
--- a/README.md
+++ b/README.md
@@ -46,13 +46,19 @@ Virtual acoustics in inhomogeneous media with single-sided access:
 2018, Scientific Reports, Vol. 8, 2497. 
 Download: http://homepage.tudelft.nl/t4n4v/4_Journals/Nature/SR_18.pdf
 
--4- When you are using the marchenko_primaries algorithm developed by Lele Zhang please refer to the following paper
+-4- When you are using the marchenko_primaries algorithm developed by Lele Zhang please refer to the following papers
 
 Free-surface and internal multiple elimination in one step without adaptive subtraction
 Lele Zhang and Evert Slob
 2019, Geophysics, Vol. 84, no. 1 (January-February); p. A7-A11, doi: 10.1190/GEO2018-0548.1
 Download: http://homepage.tudelft.nl/t4n4v/BeyondInterferometry/geo_19h.pdf
 
+and
+
+Implementation of the Marchenko Multiple Elimination algorithm,
+Jan Thorbecke, Lele Zhang, Kees Wapenaar, Evert Slob,
+2021, Geophysics, Vol. 86, no. 2 (March-April); p. 1-15, doi: 10.1190/GEO2020-0196.1
+
 -5- If you use the fdacrtmc code of Max Holicki please refer to the following paper:
 
 Acoustic directional snapshot wavefield decomposition
@@ -170,6 +176,7 @@ UPDATES AND LATEST VERSION
 The latest version of the source code and manual can be found at:
 
 git clone https://github.com/JanThorbecke/OpenSource.git
+
 git clone git://github.com/JanThorbecke/OpenSource.git
 
 The code is used by many different people and if there is a request for a new option in the code, then we will try to implement, test and make it available. 
diff --git a/REPRODUCE b/REPRODUCE
index 524471a72b1760bd09fc2c481afec6b9f6359c22..b2933802fcb6735a5ddf29566ad8998191eeee72 100644
--- a/REPRODUCE
+++ b/REPRODUCE
@@ -40,6 +40,12 @@ Lele Zhang and Evert Slob
 2019, Geophysics, Vol. 84, no. 1 (January-February); p. A7-A11, doi: 10.1190/GEO2018-0548.1
 Download: http://homepage.tudelft.nl/t4n4v/BeyondInterferometry/geo_19h.pdf
 
+and the paper that belongs to the MME source code package:
+
+"Implementation of the Marchenko Multiple Elimination algorithm",
+Jan Thorbecke, Lele Zhang, Kees Wapenaar, Evert Slob,
+2021, Geophysics, Vol. 86, no. 2 (March-April); p. 1-15, doi: 10.1190/GEO2020-0196.1
+
 *** To reproduce the Figures shown in the GEOPHYICS paper "Implementation of the Marchenko Multiple Elimination algorithm" the scripts in marchenko/demo/mme directory can be used. The README_PRIMARIES in this directory gives more instructions and guidelines. 
 
 
diff --git a/extrap/.DS_Store b/extrap/.DS_Store
index 14850029b2e2aebed09cfc4fffb75769304464f8..c3b7e3a2a3d8b2fe690a6030d2e0100659cfb237 100644
Binary files a/extrap/.DS_Store and b/extrap/.DS_Store differ
diff --git a/extrap/main/Makefile b/extrap/main/Makefile
index ce4b2add3cf7ba9d7522d744bbec4aeff94ebd5d..b43c2bd7578088e454eb3fc4c52e798fcde24dd8 100644
--- a/extrap/main/Makefile
+++ b/extrap/main/Makefile
@@ -2,7 +2,7 @@
 
 include ../../Make_include
 
-LIBS += -L$L -lextrap2d  -lgenfft -lm 
+LIBS += -L$L -lextrap2d -lgenfft $(BLAS) -lm 
 
 LDFLAGS += $(LIBS)
 
diff --git a/fdelmodc/getParameters.c b/fdelmodc/getParameters.c
index 61983b0d2b39f2ed4ea83a5006cac6e9bf77d2f4..0894e7c7338f11b5a4fa779e5d7bda2cb3999583 100644
--- a/fdelmodc/getParameters.c
+++ b/fdelmodc/getParameters.c
@@ -891,6 +891,8 @@ int getParameters(modPar *mod, recPar *rec, snaPar *sna, wavPar *wav, srcPar *sr
 			for (is=0; is<nsrc; is++) {
 				src->tbeg[is] = fabsf((nsrc-is-1)*dx*p);
 			}
+			//rec->delay+=NINT(fabsf((nsrc-1)*dx*p)/mod->dt);
+			//mod->nt += NINT(fabsf((nsrc-1)*dx*p)/mod->dt);
 		}
 		else {
 			for (is=0; is<nsrc; is++) {
diff --git a/marchenko/applyMute.c b/marchenko/applyMute.c
index 64f806849ee753a7da99d4c0dc7807f7bec6787e..782df17943e244bb1a55c2f0eb47a6d030e7ecd6 100644
--- a/marchenko/applyMute.c
+++ b/marchenko/applyMute.c
@@ -14,13 +14,13 @@
 
 void applyMute( float *data, int *mute, int smooth, int above, int Nfoc, int nxs, int nt, int *ixpos, int npos, int shift, int *tsynW)
 {
-     int i, j, l, isyn;
+    int i, j, l, isyn;
     float *costaper, scl;
     int imute, tmute, ts;
 
     if (smooth) {
         costaper = (float *)malloc(smooth*sizeof(float));
-        scl = M_PI/((float)smooth);
+        scl = M_PI/((float)(smooth+1));
         for (i=0; i<smooth; i++) {
             costaper[i] = 0.5*(1.0+cos((i+1)*scl));
         }
@@ -40,7 +40,7 @@ void applyMute( float *data, int *mute, int smooth, int above, int Nfoc, int nxs
                 }
             }
         }
-        else if (above==0){
+        else if (above==0){ /* implementation of <t1-epsilon : -t1+epsilon> window */
             for (i = 0; i < npos; i++) {
                 imute = ixpos[i];
                 tmute = mute[isyn*nxs+imute];
@@ -49,18 +49,37 @@ void applyMute( float *data, int *mute, int smooth, int above, int Nfoc, int nxs
                     memset(&data[isyn*nxs*nt+i*nt],0, sizeof(float)*nt);
                     continue;
                 }
-                for (j = MAX(0,-2*ts+tmute-shift),l=0; j < MAX(0,-2*ts+tmute-shift+smooth); j++,l++) {
+                for (j = MAX(0,-2*ts+tmute-shift-smooth),l=0; j < MAX(0,-2*ts+tmute-shift); j++,l++) {
                     data[isyn*nxs*nt+i*nt+j] *= costaper[l];
                 }
-                for (j = MAX(0,-2*ts+tmute-shift+smooth)+1; j < MIN(nt,nt+1-tmute+shift-smooth); j++) {
+                for (j = MAX(0,-2*ts+tmute-shift); j < MIN(nt,nt+1-tmute+shift+2*ts); j++) {
                     data[isyn*nxs*nt+i*nt+j] = 0.0;
                 }
-                for (j = MIN(nt,nt-tmute+shift-smooth),l=0; j < MIN(nt,nt-tmute+shift); j++,l++) {
+                for (j = MIN(nt,nt+1-tmute+shift+2*ts),l=0; j < MIN(nt,nt+1-tmute+shift+2*ts+smooth); j++,l++) {
                     data[isyn*nxs*nt+i*nt+j] *= costaper[smooth-l-1];
                 }
             }
         }
-        else if (above==-2){
+        else if (above==4 || above==-4) { //Psi gate which is the inverse of the Theta gate (above=0)
+            for (i = 0; i < npos; i++) {
+                imute = ixpos[i];
+                tmute = mute[isyn*nxs+imute];
+                ts = tsynW[isyn*nxs+imute];
+                for (j = 0; j < MAX(0,tmute-2*ts-shift-smooth); j++) {
+                    data[isyn*nxs*nt+i*nt+j] = 0.0;
+                }
+                for (j = MAX(0,-2*ts+tmute-shift-smooth),l=0; j < MAX(0,-2*ts+tmute-shift); j++,l++) {
+                    data[isyn*nxs*nt+i*nt+j] *= costaper[smooth-l-1];
+                }
+                for (j = MIN(nt,nt+1-tmute+shift+2*ts),l=0; j < MIN(nt,nt+1-tmute+shift+2*ts+smooth); j++,l++) {
+                    data[isyn*nxs*nt+i*nt+j] *= costaper[l];
+                }
+                for (j = MIN(nt,nt+1-tmute+shift+2*ts+smooth); j < nt; j++) {
+                    data[isyn*nxs*nt+i*nt+j] = 0.0;
+                }
+            }
+        }
+        else if (above==6){ /* implementation of <t1+epsilon : -t1+epsilon> window */
             for (i = 0; i < npos; i++) {
                 imute = ixpos[i];
                 tmute = mute[isyn*nxs+imute];
@@ -69,46 +88,86 @@ void applyMute( float *data, int *mute, int smooth, int above, int Nfoc, int nxs
                     memset(&data[isyn*nxs*nt+i*nt],0, sizeof(float)*nt);
                     continue;
                 }
-                for (j = MAX(0,-2*ts+tmute+shift),l=0; j < MAX(0,-2*ts+tmute+shift+smooth); j++,l++) {
+                for (j = MAX(0,-2*ts+tmute+shift-smooth),l=0; j < MAX(0,-2*ts+tmute+shift); j++,l++) {
                     data[isyn*nxs*nt+i*nt+j] *= costaper[l];
                 }
-                for (j = MAX(0,-2*ts+tmute+shift+smooth)+1; j < MIN(nt,nt+1-tmute-shift-smooth); j++) {
+                for (j = MAX(0,-2*ts+tmute+shift); j < MIN(nt,nt-tmute+shift); j++) {
                     data[isyn*nxs*nt+i*nt+j] = 0.0;
                 }
-                for (j = MIN(nt,nt-tmute-shift-smooth),l=0; j < MIN(nt,nt-tmute-shift); j++,l++) {
+                for (j = MIN(nt,nt-tmute+shift),l=0; j < MIN(nt,nt-tmute+shift+smooth); j++,l++) {
                     data[isyn*nxs*nt+i*nt+j] *= costaper[smooth-l-1];
                 }
             }
         }
-        else if (above==-1){
+        else if (above==-6){ /* time-reversed implementation of <t1+epsilon : -t1+epsilon> window */
             for (i = 0; i < npos; i++) {
                 imute = ixpos[i];
                 tmute = mute[isyn*nxs+imute];
                 ts = tsynW[isyn*nxs+imute];
-                for (j = MAX(0,ts+tmute-shift),l=0; j < MAX(0,ts+tmute-shift+smooth); j++,l++) {
+                if (tmute >= nt/2) {
+                    memset(&data[isyn*nxs*nt+i*nt],0, sizeof(float)*nt);
+                    continue;
+                }
+                for (j = MAX(0,-2*ts+tmute-shift-smooth),l=0; j < MAX(0,-2*ts+tmute-shift); j++,l++) {
                     data[isyn*nxs*nt+i*nt+j] *= costaper[l];
                 }
-                for (j = MAX(0,ts+tmute-shift+smooth); j < nt; j++) {
+                for (j = MAX(0,-2*ts+tmute-shift); j < MIN(nt,nt-tmute-shift); j++) {
                     data[isyn*nxs*nt+i*nt+j] = 0.0;
                 }
+                for (j = MIN(nt,nt-tmute-shift),l=0; j < MIN(nt,nt-tmute-shift+smooth); j++,l++) {
+                    data[isyn*nxs*nt+i*nt+j] *= costaper[smooth-l-1];
+                }
             }
         }
-        else if (above==4) { //Psi gate which is the inverse of the Theta gate (above=0)
+        else if (above==10) { //Psi gate which is the inverse of the above=6 gate 
             for (i = 0; i < npos; i++) {
                 imute = ixpos[i];
                 tmute = mute[isyn*nxs+imute];
-                for (j = MAX(0,tmute-shift-smooth),l=0; j < MAX(0,tmute-shift); j++,l++) {
+                for (j = 0; j < MAX(0,tmute+shift-smooth); j++) {
+                    data[isyn*nxs*nt+i*nt+j] = 0.0;
+                }
+                for (j = MAX(0,tmute+shift-smooth),l=0; j < MAX(0,tmute+shift); j++,l++) {
                     data[isyn*nxs*nt+i*nt+j] *= costaper[smooth-l-1];
                 }
-                for (j = 0; j < MAX(0,tmute-shift-smooth-1); j++) {
+                for (j = MIN(nt,nt-tmute+shift),l=0; j < MIN(nt,nt-tmute+shift+smooth); j++,l++) {
+                    data[isyn*nxs*nt+i*nt+j] *= costaper[l];
+                }
+                for (j = MIN(nt,nt-tmute+shift+smooth); j < nt; j++) {
                     data[isyn*nxs*nt+i*nt+j] = 0.0;
                 }
-                for (j = MIN(nt,nt+1-tmute+shift+smooth); j < nt; j++) {
+            }
+        }
+        else if (above==-2){
+            for (i = 0; i < npos; i++) {
+                imute = ixpos[i];
+                tmute = mute[isyn*nxs+imute];
+                ts = tsynW[isyn*nxs+imute];
+                if (tmute >= nt/2) {
+                    memset(&data[isyn*nxs*nt+i*nt],0, sizeof(float)*nt);
+                    continue;
+                }
+                for (j = MAX(0,-2*ts+tmute+shift),l=0; j < MAX(0,-2*ts+tmute+shift+smooth); j++,l++) {
+                    data[isyn*nxs*nt+i*nt+j] *= costaper[l];
+                }
+                for (j = MAX(0,-2*ts+tmute+shift+smooth)+1; j < MIN(nt,nt+1-tmute-shift-smooth); j++) {
                     data[isyn*nxs*nt+i*nt+j] = 0.0;
                 }
-                for (j = MIN(nt,nt-tmute+shift),l=0; j < MIN(nt,nt-tmute+shift+smooth); j++,l++) {
+                for (j = MIN(nt,nt-tmute-shift-smooth),l=0; j < MIN(nt,nt-tmute-shift); j++,l++) {
+                    data[isyn*nxs*nt+i*nt+j] *= costaper[smooth-l-1];
+                }
+            }
+        }
+        else if (above==-1){
+            for (i = 0; i < npos; i++) {
+                imute = ixpos[i];
+                tmute = mute[isyn*nxs+imute];
+                ts = tsynW[isyn*nxs+imute];
+                for (j = MAX(0,ts+tmute-shift),l=0; j < MAX(0,ts+tmute-shift+smooth); j++,l++) {
                     data[isyn*nxs*nt+i*nt+j] *= costaper[l];
                 }
+                for (j = MAX(0,ts+tmute-shift+smooth); j < nt; j++) {
+                    data[isyn*nxs*nt+i*nt+j] = 0.0;
+                }
             }
         }
         else if (above==2){//Separates the direct part of the wavefield from the coda
diff --git a/marchenko/applyMute_tshift.c b/marchenko/applyMute_tshift.c
index a2f956e0ed3fe9b72c2b13d4d6cc7db111a86bde..26cd141e72f2d170a7bfa809dbaab1a641c2fd4b 100644
--- a/marchenko/applyMute_tshift.c
+++ b/marchenko/applyMute_tshift.c
@@ -3,7 +3,9 @@
 #include <string.h>
 #include <math.h>
 #include <assert.h>
+#include "genfft.h"
 
+void verr(char *fmt, ...);
 #ifndef MAX
 #define MAX(x,y) ((x) > (y) ? (x) : (y))
 #endif
@@ -26,7 +28,7 @@ void applyMute_tshift( float *data, int *mute, int smooth, int above, int Nfoc,
 
     if (smooth) {
         costaper = (float *)malloc(smooth*sizeof(float));
-        scl = M_PI/((float)smooth);
+        scl = M_PI/((float)smooth+1);
         for (i=0; i<smooth; i++) {
             costaper[i] = 0.5*(1.0+cos((i+1)*scl));
         }
@@ -36,163 +38,180 @@ void applyMute_tshift( float *data, int *mute, int smooth, int above, int Nfoc,
 
     for (isyn = 0; isyn < Nfoc; isyn++) {
         for (i = 0; i < npos; i++) {
+            imute = ixpos[i];
+            tmute = mute[isyn*nxs+imute];
+            ts = tsynW[isyn*nxs+imute];
+            //fprintf(stderr,"i=%d tmute=%d ts=%d\n", i, tmute, ts);
+            for (j = 0; j < nt; j++) {
+                Nig[j]   = data[isyn*nxs*nt+i*nt+j];
+            }
             if (iter % 2 == 0) { 
-                for (j = 0; j < nt; j++) {
-                    Nig[j]   = data[isyn*nxs*nt+i*nt+j];
-                }
+				if (above==0) above=0;
             }
-            else { // reverse back in time
-                j=0;
-                Nig[j]   = data[isyn*nxs*nt+i*nt+j];
-                for (j = 1; j < nt; j++) {
-                    Nig[j]   = data[isyn*nxs*nt+i*nt+nt-j];
-                }
+            else { // switch angle 
+                if (above==0) above=-1;
+                if (above==4) above=-4;
+                tmute = tmute-2*ts;
             }
-            if (above==1) {
-                imute = ixpos[i];
-                tmute = mute[isyn*nxs+imute];
-				ts = tsynW[isyn*nxs+imute];
-                for (j = 0; j < MAX(0,tmute-shift-smooth); j++) {
-                    Nig[j] = 0.0;
+            if (above==-1){ /* the above=0 implementation for plane-waves at odd iterations */
+                if (tmute >= nt/2) {
+                    memset(&Nig[0],0, sizeof(float)*nt);
+                    continue;
                 }
+                /* positive time axis mute below plane-wave first arrivals */
+                /* at the zero crossing the smooth transition zone is not handled correctly
+                 * this can be implemented but makes it more complicated, better solution is to
+                 * rotate Nig with the time axis in the middle, something still to do */
                 for (j = MAX(0,tmute-shift-smooth),l=0; j < MAX(0,tmute-shift); j++,l++) {
+                    Nig[j] *= costaper[l];
+                }
+                for (j = MAX(0,tmute-shift); j < MIN(nt,nt+1-tmute+shift-2*ts); j++) {
+                    Nig[j] = 0.0;
+                }
+                for (j = MIN(nt,nt+1-(tmute+2*ts)+shift),l=0; j < MIN(nt,nt+1-(tmute+2*ts)+shift+smooth); j++,l++) {
                     Nig[j] *= costaper[smooth-l-1];
+                }
+				if (nt-(tmute+2*ts)+shift+smooth > nt) {
+                	for (j = 0; j < MAX(0,-(tmute+2*ts)+shift); j++) {
+                    	Nig[j] = 0.0;
+                	}
+                	for (j = MAX(0,-(tmute+2*ts)+shift),l=0; j < MAX(0,-(tmute+2*ts)+shift+smooth); j++,l++) {
+                    	Nig[j] *= costaper[smooth-l-1];
+                	}
+				}
+                if (tmute-shift < 0) {
+                    for (j = MIN(nt,nt+1+tmute-shift-smooth),l=0; j < MIN(nt,nt+1+tmute-shift); j++,l++) {
+                        Nig[j] *= costaper[l];
+                    }
+                    for (j = MIN(nt,nt+1+tmute-shift); j < nt; j++) {
+                        Nig[j] = 0.0;
+                    }
                 }
             }
             else if (above==0){
-                imute = ixpos[i];
-                tmute = mute[isyn*nxs+imute];
-				ts = tsynW[isyn*nxs+imute];
                 if (tmute >= nt/2) {
                     memset(&Nig[0],0, sizeof(float)*nt);
                     continue;
                 }
-                for (j = MAX(0,tmute-shift),l=0; j < MAX(0,tmute-shift+smooth); j++,l++) {
+                for (j = MAX(0,tmute-shift-smooth),l=0; j < MAX(0,tmute-shift); j++,l++) {
                     Nig[j] *= costaper[l];
                 }
-                for (j = MAX(0,tmute-shift+smooth+1); j < MIN(nt,nt+1-tmute+2*ts+shift-smooth); j++) {
+                for (j = MAX(0,tmute-shift); j < MIN(nt,nt+1-tmute+shift+2*ts); j++) {
                     Nig[j] = 0.0;
                 }
-                for (j = MIN(nt-1,nt-tmute+2*ts+shift-smooth),l=0; j < MIN(nt,nt-tmute+shift); j++,l++) {
+                for (j = MIN(nt,nt+1-tmute+shift+2*ts),l=0; j < MIN(nt,nt+1-tmute+shift+2*ts+smooth); j++,l++) {
                     Nig[j] *= costaper[smooth-l-1];
                 }
-            }
-            else if (above==-1) {
-                imute = ixpos[i];
-                tmute = mute[isyn*nxs+imute];
-				ts = tsynW[isyn*nxs+imute];
-				//ts = tsynW[isyn*nxs+ixpos[npos-1]];
-                for (j = ts+tmute-shift,l=0; j < ts+tmute-shift+smooth; j++,l++) {
-                    Nig[j] *= costaper[l];
-                }
-                for (j = ts+tmute-shift+smooth; j < nt; j++) {
-                    Nig[j] = 0.0;
+				if (nt-tmute+shift+2*ts > nt) {
+                	for (j = 0; j < MAX(0,-tmute+shift+2*ts); j++) {
+                    	Nig[j] = 0.0;
+                	}
+                	for (j = MAX(0,-tmute+shift+2*ts),l=0; j < MAX(0,-tmute+shift+2*ts+smooth); j++,l++) {
+                    	Nig[j] *= costaper[smooth-l-1];
+                	}
+				}
+                if (tmute-shift-smooth < 0) {
+                    for (j = MIN(nt,nt+1+tmute-shift-smooth),l=0; j < MIN(nt,nt+1+tmute-shift); j++,l++) {
+                        Nig[j] *= costaper[l];
+                    }
+                    for (j = MIN(nt,nt+1+tmute-shift); j < nt; j++) {
+                        Nig[j] = 0.0;
+                    }
                 }
             }
             else if (above==4) { //Psi gate which is the inverse of the Theta gate (above=0)
-                imute = ixpos[i];
-                tmute = mute[isyn*nxs+imute];
-				ts = tsynW[isyn*nxs+imute];
-                for (j = MAX(0,-2*ts+tmute-shift-smooth),l=0; j < MAX(0,-2*ts+tmute-shift); j++,l++) {
-                    Nig[j] *= costaper[smooth-l-1];
+                if (tmute >= nt/2) continue;
+                for (j = MAX(0,tmute-shift-smooth),l=0; j < MAX(0,tmute-shift); j++,l++) {
+                   	Nig[j] *= costaper[smooth-l-1];
                 }
-                for (j = 0; j < MAX(0,-2*ts+tmute-shift-smooth-1); j++) {
-                    Nig[j] = 0.0;
+                for (j = MIN(nt,nt+1-tmute+shift+2*ts-smooth),l=0; j < MIN(nt,nt+1-tmute+shift+2*ts); j++,l++) {
+                    Nig[j] *= costaper[l];
                 }
-                for (j = nt+1-tmute+shift+smooth; j < nt; j++) {
+                for (j = MIN(nt,nt+1-tmute+shift+2*ts); j < nt; j++) {
                     Nig[j] = 0.0;
                 }
-                for (j = nt-tmute+shift,l=0; j < nt-tmute+shift+smooth; j++,l++) {
-                    Nig[j] *= costaper[l];
-                }
+				if (nt-tmute+shift+2*ts >= nt) {
+                	for (j = MAX(0,-tmute+shift+2*ts-smooth),l=0; j < MAX(0,-tmute+shift+2*ts); j++,l++) {
+                    	Nig[j] *= costaper[l];
+                	}
+                	for (j = MAX(0,-tmute+shift+2*ts); j < MAX(0,tmute-shift-smooth); j++) {
+                    	Nig[j] = 0.0;
+                	}
+				}
+				else {
+                	for (j = 0; j < MAX(0,tmute-shift-smooth); j++) {
+                    	Nig[j] = 0.0;
+                	}
+				}
             }
-/* To Do above==2 is not yet adapated for plane-waves */
-            else if (above==2){//Separates the direct part of the wavefield from the coda
-                imute = ixpos[i];
-                tmute = mute[isyn*nxs+imute];
-                for (j = 0; j < tmute-shift-smooth; j++) {
-                    data[isyn*nxs*nt+i*nt+j] = 0.0;
+            else if (above==-4) { //Psi gate which is the inverse of the Theta gate (above=-1)
+                if (tmute >= nt/2) continue;
+                for (j = 0; j < MAX(0,tmute-shift-smooth); j++) {
+                    Nig[j] = 0.0;
                 }
-                for (j = tmute-shift-smooth,l=0; j < tmute-shift; j++,l++) {
-                    data[isyn*nxs*nt+i*nt+j] *= costaper[smooth-l-1];
+                for (j = MAX(0,tmute-shift-smooth), l=0; j < MAX(0,tmute-shift); j++,l++) {
+                    Nig[j] *= costaper[smooth-l-1];
                 }
-                for (j = tmute+shift,l=0; j < tmute+shift+smooth; j++,l++) {
-                    data[isyn*nxs*nt+i*nt+j] *= costaper[l];
+                if (tmute-shift-smooth < 0) {
+                	for (j = MIN(nt,nt+1+tmute-shift-smooth),l=0; j < MIN(nt,nt+1+tmute-shift); j++,l++) {
+                    	Nig[j] *= costaper[smooth-l-1];
+                    }
                 }
-                for (j = tmute+shift+smooth; j < nt; j++) {
-                    data[isyn*nxs*nt+i*nt+j] = 0.0;
+                for (j = MIN(nt,nt+1-(tmute+2*ts)+shift-smooth),l=0; j < MIN(nt,nt+1-(tmute+2*ts)+shift); j++,l++) {
+                    Nig[j] *= costaper[l];
                 }
-            }
-
-            if (iter % 2 == 0) { 
-                for (j = 0; j < nt; j++) {
-                    data[isyn*nxs*nt+i*nt+j] = Nig[j];
+                for (j = MIN(nt,nt+1-(tmute+2*ts)+shift),l=0; j < MIN(nt,nt+1+tmute-shift-smooth); j++,l++) {
+                   	Nig[j] = 0.0;
                 }
-            }
-            else { // reverse back in time
-                j=0;
+			}
+
+            for (j = 0; j < nt; j++) {
                 data[isyn*nxs*nt+i*nt+j] = Nig[j];
-                for (j = 1; j < nt; j++) {
-                    data[isyn*nxs*nt+i*nt+j] = Nig[nt-j];
-                }
             }
         } /* end if ipos */
     }
 
     if (smooth) free(costaper);
-	free(Nig);
+    free(Nig);
 
     return;
 }
 
 void timeShift(float *data, int nsam, int nrec, float dt, float shift, float fmin, float fmax)
 {
-	int 	optn, iom, iomin, iomax, nfreq, ix, it;
-	float	omin, omax, deltom, om, tom, df, *trace, scl;
-	complex *ctrace, ctmp;
+    int     optn, iom, nfreq, ix, it;
+    float    deltom, om, tom, df, *trace, scl;
+    complex *ctrace, ctmp;
 
-	optn = optncr(nsam);
-	nfreq = optn/2+1;
-	df    = 1.0/(optn*dt);
+    optn = optncr(nsam);
+    nfreq = optn/2+1;
+    df    = 1.0/(optn*dt);
 
-	ctrace = (complex *)malloc(nfreq*sizeof(complex));
-	if (ctrace == NULL) verr("memory allocation error for ctrace");
+    ctrace = (complex *)malloc(nfreq*sizeof(complex));
+    if (ctrace == NULL) verr("memory allocation error for ctrace");
 
-	trace = (float *)malloc(optn*sizeof(float));
-	if (trace == NULL) verr("memory allocation error for rdata");
+    trace = (float *)malloc(optn*sizeof(float));
+    if (trace == NULL) verr("memory allocation error for rdata");
 
-	deltom = 2.*M_PI*df;
-	omin   = 2.*M_PI*fmin;
-	omax   = 2.*M_PI*fmax;
-	iomin  = (int)MIN((omin/deltom), (nfreq));
-	iomax  = MIN((int)(omax/deltom), (nfreq));
+    deltom = 2.*M_PI*df;
     scl = 1.0/(float)optn;
 
-	for (ix = 0; ix < nrec; ix++) {
+    for (ix = 0; ix < nrec; ix++) {
         for (it=0;it<nsam;it++)    trace[it]=data[ix*nsam+it];
         for (it=nsam;it<optn;it++) trace[it]=0.0;
-	    /* Forward time-frequency FFT */
-	    rc1fft(&trace[0], &ctrace[0], optn, -1);
-
-		for (iom = 0; iom < iomin; iom++) {
-			ctrace[iom].r = 0.0;
-			ctrace[iom].i = 0.0;
-		}
-		for (iom = iomax; iom < nfreq; iom++) {
-			ctrace[iom].r = 0.0;
-			ctrace[iom].i = 0.0;
-		}
-		for (iom = iomin ; iom < iomax ; iom++) {
-			om = deltom*iom;
-			tom = om*shift;
-			ctmp = ctrace[iom];
-			ctrace[iom].r = ctmp.r*cos(-tom) - ctmp.i*sin(-tom);
-			ctrace[iom].i = ctmp.i*cos(-tom) + ctmp.r*sin(-tom);
-		}
+        /* Forward time-frequency FFT */
+        rc1fft(&trace[0], &ctrace[0], optn, -1);
+        for (iom = 0 ; iom < nfreq ; iom++) {
+            om = deltom*iom;
+            tom = om*shift;
+            ctmp = ctrace[iom];
+            ctrace[iom].r = ctmp.r*cos(-tom) - ctmp.i*sin(-tom);
+            ctrace[iom].i = ctmp.i*cos(-tom) + ctmp.r*sin(-tom);
+        }
         /* Inverse frequency-time FFT and scale result */
         cr1fft(ctrace, trace, optn, 1);
         for (it=0;it<nsam;it++) data[ix*nsam+it]=trace[it]*scl;
-	}
+    }
 
 
     free(ctrace);
diff --git a/marchenko/demo/mme/clean b/marchenko/demo/mme/clean
new file mode 100755
index 0000000000000000000000000000000000000000..d01f8d3445d00a9ac09bc0ae9cab7b265c64f6a8
--- /dev/null
+++ b/marchenko/demo/mme/clean
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+rm *.su *.bin *.eps nep line* *.asci
+rm strongContrast/*.su strongContrast/*.eps
+
diff --git a/marchenko/demo/mme/epsModel.scr b/marchenko/demo/mme/epsModel.scr
index d71acdcdc1138026e265cfd4cd220e1d870e030f..82d2785aa47a5535a2fcd72c7eb32037e1b1055f 100755
--- a/marchenko/demo/mme/epsModel.scr
+++ b/marchenko/demo/mme/epsModel.scr
@@ -21,13 +21,13 @@ cat << EOF3 > line3
 EOF3
 
 #model
-supsimage hbox=4 wbox=6 labelsize=12 < model10_cp.su \
+supsimage hbox=4 wbox=6 labelsize=16 < model10_cp.su \
         x1beg=0 x1end=1400.0 d1num=200 lstyle=vertright legend=1 threecolor=0 \
     	label1="depth (m)" label2="lateral distance (m)" \
         curve=line1,line2,line3 npair=2,2,2 curvecolor=black,black,black \
         n1tic=5 x2beg=-2250 f2num=-2000 d2num=1000 x2end=2250 > model_cp_line.eps
 
-supsimage hbox=4 wbox=6 labelsize=12 < model10_ro.su \
+supsimage hbox=4 wbox=6 labelsize=16 < model10_ro.su \
         x1beg=0 x1end=1400.0 d1num=200 lstyle=vertright legend=1 threecolor=0 \
     	label1="depth (m)" label2="lateral distance (m)" \
         curve=line1,line2,line3 npair=2,2,2 curvecolor=black,black,black \
@@ -36,12 +36,12 @@ supsimage hbox=4 wbox=6 labelsize=12 < model10_ro.su \
 #wavelet
 dt=0.0005
 supsgraph < wavefw.su \
-    labelsize=12 d1=$dt style=normal \
+    labelsize=16 d1=$dt style=normal \
     label1="time (s)" label2="amplitude" \
     d1num=0.15 wbox=6 hbox=3 x1end=0.9 > wavefw.eps
  
 sufft < wavefw.su | suamp | sugain scale=$dt | supsgraph \
-    labelsize=12 style=normal \
+    labelsize=16 style=normal \
     label1="frequency (1/s)" label2="amplitude" \
     d1num=10 wbox=6 hbox=3 x1end=125 x2end=1.1 > wavefw_freq.eps
  
diff --git a/marchenko/demo/mme/epsPrimaries.scr b/marchenko/demo/mme/epsPrimaries.scr
index 67341c26e3a1c7ec34707499ef4059eec7717c2b..e74cf8145958541410f5b4b97d7db8055c09aad3 100755
--- a/marchenko/demo/mme/epsPrimaries.scr
+++ b/marchenko/demo/mme/epsPrimaries.scr
@@ -102,6 +102,10 @@ fi
 # second iteration
 if [[ "$1" == "Figure4" ]];
 then
+if [[ ! -f "shotsx0.su" ]]; then
+echo "ERR: File shotsx0.su is not yet created, please run ./epsPrimaries.scr Figure3 first."
+exit
+fi
 suwind itmax=1023 <  Mi_276001.su > N0.su
 #compute R*N0
 fconv file_in1=shotsx0.su file_in2=N0.su file_out=fconvN1.su verbose=1 fmax=90
@@ -182,7 +186,7 @@ echo $piter
 file=Mi_${istart}${piter}.su
 file_base=${file%.su}
 #possibly add suflip flip=3 to flip the time-axis
-supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+supsimage < $file hbox=6 wbox=4 labelsize=16 linewidth=0.0 \
 	n1tic=2 d2=5 x1beg=$nstart x1end=$ns d1=1 d1num=100 \
 	label1="time sample number" label2="lateral distance" \
     f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
@@ -197,7 +201,7 @@ piter=$(printf %03d $iter)
 echo $piter
 file=Mi_${istart}${piter}.su
 file_base=${file%.su}
-supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+supsimage < $file hbox=6 wbox=4 labelsize=16 linewidth=0.0 \
 	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
 	label1="time sample number" label2="lateral distance" \
     f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
@@ -218,14 +222,14 @@ file=Mi_${istart}${piter}.su
 #shift=$(echo "scale=4; ($dtrcv*($ns/2.0-1))" | bc -l)
 #basop choice=shift shift=$shift file_in=$file | \
 file_base=${file%.su}
-suflip flip=3 < $file | supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+suflip flip=3 < $file | supsimage hbox=6 wbox=4 labelsize=16 linewidth=0.0 \
 	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
 	label1="time sample number" label2="lateral distance" \
     f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > ${file_base}flip.eps
 
 file=k1min_${istart}${piter}.su
 file_base=${file%.su}
-supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 < $file\
+supsimage hbox=6 wbox=4 labelsize=16 linewidth=0.0 < $file\
 	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
 	label1="time sample number" label2="lateral distance" \
     f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > $file_base.eps
@@ -242,7 +246,7 @@ for (( istart=246; istart<=316; istart+=10 ))
 do
 file=k1min_${istart}${piter}.su
 file_base=${file%.su}
-supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 < $file\
+supsimage hbox=6 wbox=4 labelsize=16 linewidth=0.0 < $file\
 	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
 	label1="time sample number" label2="lateral distance" \
     f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > $file_base.eps
@@ -259,7 +263,7 @@ echo $piter
 file=Mi_${istart}${piter}.su
 file_base=${file%.su}
 #possibly add suflip flip=3 to flip the time-axis
-supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+supsimage < $file hbox=6 wbox=4 labelsize=16 linewidth=0.0 \
 	n1tic=2 d2=5 x1beg=$nstart x1end=$ns d1=1 d1num=100 \
 	label1="time sample number" label2="lateral distance" \
     f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > ${file_base}T.eps
@@ -296,12 +300,12 @@ file=WindowOdd276.su
 file_base=${file%.su}
 suwind key=tracl min=1 max=1 < $file | suwind itmin=1024 | \
 supsgraph d1=1 style=normal f1=0 \
-    labelsize=12 label1="time sample number" label2="amplitude" \
+    labelsize=14 label1="time sample number" label2="amplitude" \
     d1num=100 wbox=6 hbox=3 x2end=1.2 > $file_base.eps
 file=WindowEven276.su
 file_base=${file%.su}
 suwind key=tracl min=1 max=1 < $file | suwind itmax=1024 | \
 supsgraph d1=1 style=normal f1=0 \
-    labelsize=12 label1="time sample number" label2="amplitude" \
+    labelsize=14 label1="time sample number" label2="amplitude" \
     d1num=100 wbox=6 hbox=3 x2end=1.2 > $file_base.eps
 
diff --git a/marchenko/findFirstBreak.c b/marchenko/findFirstBreak.c
index 100974d81a47eb914d4c9d3e197bcd0e1b40e697..40a7c5e7a9e482c65b47bc1185a2bff714ee128c 100644
--- a/marchenko/findFirstBreak.c
+++ b/marchenko/findFirstBreak.c
@@ -4,6 +4,7 @@
 #include <math.h>
 #include <assert.h>
 
+void vmess(char *fmt, ...);
 #ifndef MAX
 #define MAX(x,y) ((x) > (y) ? (x) : (y))
 #endif
diff --git a/marchenko/fmute.c b/marchenko/fmute.c
index 4882a0222d3bb436e280fcb042ce78fc51e1ebca..7bb405617df5fbbe1c00fcf2874386b94e04a5ed 100644
--- a/marchenko/fmute.c
+++ b/marchenko/fmute.c
@@ -176,15 +176,15 @@ int main (int argc, char **argv)
     if (file_out==NULL) fp_out = stdout;
     else {
         fp_out = fopen(file_out, "w+");
-        if (fp_out==NULL) verr("error on ceating output file");
+        if (fp_out==NULL) verr("error on creating output file");
     }
     if (check!=0){
         fp_chk = fopen("check.su", "w+");
-        if (fp_chk==NULL) verr("error on ceating output file");
+        if (fp_chk==NULL) verr("error on creating output file");
         fp_psline1 = fopen("pslinepos.asci", "w+");
-        if (fp_psline1==NULL) verr("error on ceating output file");
+        if (fp_psline1==NULL) verr("error on creating output file");
         fp_psline2 = fopen("pslineneg.asci", "w+");
-        if (fp_psline2==NULL) verr("error on ceating output file");
+        if (fp_psline2==NULL) verr("error on creating output file");
         
     }
     if (smooth) {
@@ -316,6 +316,7 @@ int main (int argc, char **argv)
             if (ret < 0 ) verr("error on writing check file.");
             for (i=0; i<nx1; i++) {
                 jmax = maxval[i]-shift;
+                if (above==6) jmax = maxval[i]+shift;
                 ret = fprintf(fp_psline1, "%.5f %.5f \n",jmax*dt,hdrs_in1[i].gx*sclshot);
                 jmax =-maxval[i]+shift;
                 ret = fprintf(fp_psline2, "%.5f %.5f \n",jmax*dt,hdrs_in1[i].gx*sclshot);
diff --git a/marchenko/marchenko.c b/marchenko/marchenko.c
index e3ab59bbfeb3263d361aaeea8691dd1ab717b726..1021ecf7769a0caf2cb9bf8d79aed8e9fadd0d3f 100644
--- a/marchenko/marchenko.c
+++ b/marchenko/marchenko.c
@@ -134,7 +134,7 @@ int main (int argc, char **argv)
     float   *f1plus, *f1min, *iRN, *Ni, *trace, *Gmin, *Gplus;
     float   xmin, xmax, scale, tsq, Q, f0;
     float   *ixmask;
-    float   grad2rad, p, src_angle, src_velo, tshift;
+    float   grad2rad, p, src_angle, src_velo, tshift, tneg;
     complex *Refl, *Fop;
     char    *file_tinv, *file_shot, *file_green, *file_iter;
     char    *file_f1plus, *file_f1min, *file_gmin, *file_gplus, *file_f2, *file_pmin;
@@ -255,7 +255,6 @@ int main (int argc, char **argv)
                              
 	/* compute time shift for tilted plane waves */
 	if (plane_wave==1) {
-        itmin = nt;
 	    /* compute time shift for shifted plane waves */
         grad2rad = 17.453292e-3;
         p = sin(src_angle*grad2rad)/src_velo;
@@ -263,14 +262,29 @@ int main (int argc, char **argv)
 
 		/* compute mute window for plane waves */
 		//for (i=0; i<nxs; i++) fprintf(stderr,"i=%d window=%d\n", i, muteW[i]);
-        for (i=0; i<nxs; i++) itmin = MIN (itmin, muteW[i]);
-        for (i=0; i<nxs; i++) tsynW[i] = muteW[i]-itmin;
+        //itmin = nt;
+        //for (i=0; i<nxs; i++) itmin = MIN (itmin, muteW[i]);
+
+        /* for negative angles tshift is negative and */
+        if (src_angle < 0.0) {
+			itmin = NINT(tshift/dt);
+        	//for (i=0; i<nxs; i++) muteW[i] = MAX(0, muteW[i]-itmin);
+        	for (i=0; i<nxs; i++) muteW[i] = muteW[i]-itmin;
+            timeShift(G_d, nts, nxs, dt, tshift, fmin, fmax);
+		}
+
+        for (i=0; i<nxs; i++) tsynW[i] = NINT(i*dxs*p/dt);
+        //for (i=0; i<nxs; i++) tsynW[i] = 0.0;
 		if (Nfoc!=1) verr("For plane-wave focusing only one function can be computed at the same time");
-	    //fprintf(stderr,"itmin=%d\n", itmin);	
+	    //fprintf(stderr,"itmin=%d tshift=%f =%d \n", itmin, tshift, NINT(tshift/dt));	
 		//for (i=0; i<nxs; i++) fprintf(stderr,"i=%d window=%f\n", i, tsynW[i]*dt);
+/*		// TESTING SHIFT 
+        tshift=0.3;
+        for (i=0; i<nxs; i++) tsynW[i] = NINT(0.3/dt);
+*/
 	}
 	else { /* just fill with zero's */
-		itmin=0;
+		//itmin=0;
 		for (i=0; i<nxs*Nfoc; i++) {
 			tsynW[i] = 0;
 		}
@@ -483,17 +497,39 @@ int main (int argc, char **argv)
                     energyNi += iRN[l*nxs*nts+ix*nts+j]*iRN[l*nxs*nts+ix*nts+j];
                 }
             }
-            if (iter==0) energyN0[Nfoc] = energyNi;
-            if (verbose >=2) vmess(" - iSyn %d: Ni at iteration %d has energy %e; relative to N0 %e", l, iter, sqrt(energyNi), sqrt(energyNi/energyN0[Nfoc]));
+            if (iter==0) energyN0[l] = energyNi;
+            if (verbose >=2) vmess(" - iSyn %d: Ni at iteration %d has energy %e; relative to N0 %e", l, iter, sqrt(energyNi), sqrt(energyNi/energyN0[l]));
         }
 
+        //writeDataIter("bmute.su", Ni, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 0, iter+1);
         /* apply mute window based on times of direct arrival (in muteW) */
+
         if ( plane_wave==1 ) { /* use a-symmetric shift for plane waves with non-zero angles */
-            applyMute_tshift(Ni, muteW, smooth, above, Nfoc, nxs, nts, ixpos, npos, shift, iter, tsynW);
+            applyMute_tshift(Ni, muteW, smooth, 0, Nfoc, nxs, nts, ixpos, npos, shift, iter, tsynW);
         }
         else {
-            applyMute(Ni, muteW, smooth, above, Nfoc, nxs, nts, ixpos, npos, shift, tsynW);
+            applyMute(Ni, muteW, smooth, -above, Nfoc, nxs, nts, ixpos, npos, shift, tsynW);
         }
+        //writeDataIter("amute.su", Ni, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 0, iter+1);
+/*
+    		// for testing time-windows with dipping plane waves
+            for (i = 0; i < npos; i++) {
+                for (j = 0; j < nts; j++) {
+                    Ni[i*nts+j]    = 1.0;
+				}
+			}
+            applyMute_tshift(Ni, muteW, smooth, 0, Nfoc, nxs, nts, ixpos, npos, shift, iter, tsynW);
+            //applyMute(Ni, muteW, smooth, -above, Nfoc, nxs, nts, ixpos, npos, shift, tsynW);
+            writeDataIter("mute0.su", Ni, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 0, iter+1);
+            for (i = 0; i < npos; i++) {
+                for (j = 0; j < nts; j++) {
+                    Ni[i*nts+j]    = 1.0;
+				}
+			}
+            applyMute_tshift(Ni, muteW, smooth, 4, Nfoc, nxs, nts, ixpos, npos, shift, iter, tsynW);
+            //applyMute(Ni, muteW, smooth, 4, Nfoc, nxs, nts, ixpos, npos, shift, tsynW);
+            writeDataIter("mute4.su", Ni, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 0, iter+1);
+*/
 
         if (iter % 2 == 0) { /* even iterations update: => f_1^-(t) */
             for (l = 0; l < Nfoc; l++) {
@@ -568,6 +604,8 @@ int main (int argc, char **argv)
             xrcv, xsrc, xnx, fxse, fxsb, dxs, dxsrc, dx, ntfft, nw, nw_low, nw_high, mode,
             reci, nshots, ixpos, npos, &tfft, isxcount, reci_xsrc, reci_xrcv, ixmask, verbose);
 
+        writeDataIter("iRN.su", iRN, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 1, iter+1);
+        
         /* compute upgoing Green's G^-,+ */
         for (l = 0; l < Nfoc; l++) {
             for (i = 0; i < npos; i++) {
@@ -581,9 +619,15 @@ int main (int argc, char **argv)
         }
         /* Apply mute with window for Gmin */
 		if ( plane_wave==1 ) {
-            applyMute_tshift(Gmin, muteW, smooth, 4, Nfoc, nxs, nts, ixpos, npos, shift, 0, tsynW);
             /* for plane wave with angle tshift downward */
-            timeShift(Gmin, nts, npos, dt, tshift, fmin, fmax);
+            applyMute_tshift(Gmin, muteW, smooth, 4, Nfoc, nxs, nts, ixpos, npos, shift, 1, tsynW);
+            if (src_angle > 0.0) {
+			    if (verbose>1) vmess("Gmin planewave tshift=%f", tshift);
+                timeShift(Gmin, nts, npos, dt, tshift, fmin, fmax);
+			}
+			else {
+			    if (verbose>1) vmess("Gmin NO planewave tshift");
+			}
 		}
 		else {
             applyMute(Gmin, muteW, smooth, 4, Nfoc, nxs, nts, ixpos, npos, shift, tsynW);
@@ -613,7 +657,7 @@ int main (int argc, char **argv)
         }
         /* Apply mute with window for Gplus */
 		if ( plane_wave==1 ) {
-            applyMute_tshift(Gplus, muteW, smooth, 4, Nfoc, nxs, nts, ixpos, npos, shift, 1, tsynW);
+            applyMute_tshift(Gplus, muteW, smooth, 4, Nfoc, nxs, nts, ixpos, npos, shift, 0, tsynW);
         }
         else {
             applyMute(Gplus, muteW, smooth, 4, Nfoc, nxs, nts, ixpos, npos, shift, tsynW);
diff --git a/marchenko/marchenko_primaries.c b/marchenko/marchenko_primaries.c
index 3f6f9dc28c937227d4ce25965578c0dd506ceec9..7a4d68aff0ec4047dd205245b9cad6aaefbe116e 100644
--- a/marchenko/marchenko_primaries.c
+++ b/marchenko/marchenko_primaries.c
@@ -95,6 +95,10 @@ char *sdoc[] = {
 "              ............... M0.su=M0 : initialisation of algorithm",
 "              ............... RMi: iterative terms ",
 "              ............... k1min.su: k1min terms ",
+"   file_vplus= .............. output file with v+",
+"   file_vmin= ............... output file with v-",
+"   file_uplus= .............. output file with u+",
+"   file_umin= ............... output file with u-",
 "   file_update= ............. output file with updates only => removed internal multiples",
 "   T=0 ...................... :1 compute transmission-losses compensated primaries ",
 "   verbose=0 ................ silent option; >0 displays info",
@@ -105,16 +109,24 @@ char *sdoc[] = {
 NULL};
 /**************** end self doc ***********************************/
 
+/*
+ * The equation and Figure numbers mentioned in the comments refer to the paper:
+ *  "Implementation of the Marchenko Multiple Elimination algorithm", 
+ *  Thorbecke, Jan; Zhang, Lele; Wapenaar, Kees; Slob, Evert,
+ *  Accepted for publication, todo add full reference after publication 
+ */
+
 int main (int argc, char **argv)
 {
-    FILE    *fp_out, *fp_rr, *fp_w, *fp_up;
+    FILE    *fp_out, *fp_rr, *fp_w, *fp_ud;
+    FILE    *fp_um, *fp_up, *fp_vm, *fp_vp;
 	size_t  nread, size;
     int     i, j, k, l, ret, nshots, Nfoc, nt, nx, nts, nxs, ngath, nacq;
     int     n1, n2, ntap, tap, di, ntraces, tr;
     int     nw, nw_low, nw_high, nfreq, *xnx, *xnxsyn;
     int     reci, countmin, mode, n2out, verbose, ntfft;
     int     iter, niter, niterec, recur, niterskip, niterrun, tracf, *muteW;
-    int     hw, ii, iw, ishot, istart, iend;
+    int     hw, ii, iw, iw0, ishot, istart, iend;
     int     smooth, *ixpos, *ixp, npos, ix, ixrcv, m, pad, T, isms, isme, perc;
     int     nshots_r, *isxcount, *reci_xsrc, *reci_xrcv, shift, plane_wave;
     float   fmin, fmax, tom, deltom, *tapersh, *tapersy, fxf, dxf, *xsrc, *xrcv, *zsyn, *zsrc, *xrcvsyn;
@@ -122,13 +134,14 @@ int main (int argc, char **argv)
 	double  energyMi, *energyM0;
     float   tt0, d1, d2, f1, f2, fxsb, fxse, ft, fx, *xsyn, dxsrc;
     float   *M0, *DD, *RR, *SRC, dt, dx, dxs, scl, mem, scltap;
-    float   *rtrace, *tmpdata, *k1min, *v1plus, *RMi, *Mi, *trace;
+    float   *rtrace, *tmpdata, *k1min, *k1plus, *v1plus, *uv, *RMi, *Mi, *trace;
 	float   *Mup, *Msp, *maxval;
     float   xmin, xmax, scale, tsq;
 	float   Q, f0, *ixmask, *costaper;
 	float   src_velo, src_angle, grad2rad, p, *twplane;
     complex *Refl, *Fop, *ctrace, *cwave, csum, cwav;
     char    *file_tinv, *file_shot, *file_rr, *file_src, *file_iter, *file_update;
+    char    *file_umin, *file_uplus, *file_vmin, *file_vplus;
 	char    *file_dd;
     segy    *hdrs_out, hdr;
 
@@ -145,6 +158,10 @@ int main (int argc, char **argv)
     if (!getparstring("file_dd", &file_rr)) file_dd = NULL;
     if (!getparstring("file_iter", &file_iter)) file_iter = NULL;
     if (!getparstring("file_update", &file_update)) file_update = NULL;
+    if (!getparstring("file_umin", &file_umin)) file_umin = NULL;
+    if (!getparstring("file_uplus", &file_uplus)) file_uplus = NULL;
+    if (!getparstring("file_vmin", &file_vmin)) file_vmin = NULL;
+    if (!getparstring("file_vplus", &file_vplus)) file_vplus = NULL;
     
     if (!getparint("verbose", &verbose)) verbose = 0;
     if (!getparfloat("fmin", &fmin)) fmin = 0.0;
@@ -187,7 +204,7 @@ int main (int argc, char **argv)
 
     if (reci && ntap) vwarn("tapering influences the reciprocal result");
 
-/* defines the smooth transition zone of the time-window */
+/* defines the smooth transition zone of the time-window (Figure 5) */
 
 	smooth = MIN(smooth, shift);
     if (smooth) {
@@ -272,7 +289,9 @@ int main (int argc, char **argv)
     Mi      = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
     M0      = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
     k1min   = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    k1plus  = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
     v1plus  = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
+    uv      = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
     SRC     = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
     RR      = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
     Mup     = (float *)calloc(Nfoc*nxs*ntfft,sizeof(float));
@@ -419,7 +438,7 @@ int main (int argc, char **argv)
     }
 
 /*================ Defining focusing operator(s) from R ================*/
-/* M0 = -R(ishot,-t) */
+/* M0 = -R(ishot,-t)  equation (3) */
 
     /* use ishot from Refl, complex-conjugate(time reverse), scale with -1 and convolve with wavelet */
     if (file_tinv == NULL) {
@@ -650,13 +669,13 @@ int main (int argc, char **argv)
     int A, W;
     A=shift*3; W=10;
 	sprintf(filename,"windowA%dW%d.txt",A, W);
-	fp_up = fopen(filename, "w+");
+	fp_ud = fopen(filename, "w+");
     ii=250;
     for (i = 0; i < npos; i++) {
 		twplane[i] = dt*A*sin(2*M_PI*i*W/npos);
-        fprintf(fp_up,"time-shift= %d %d %f\n", i, NINT(twplane[i]/dt), twplane[i]);
+        fprintf(fp_ud,"time-shift= %d %d %f\n", i, NINT(twplane[i]/dt), twplane[i]);
 	}
-	fclose(fp_up);
+	fclose(fp_ud);
 */
 
 /*================ start loop over number of time-samples ================*/
@@ -673,7 +692,9 @@ int main (int argc, char **argv)
 */
         t5 = wallclock_time();
         memset(M0, 0, Nfoc*nxs*ntfft*sizeof(float));
+        memset(k1plus, 0, Nfoc*nxs*ntfft*sizeof(float));
         memset(v1plus, 0, Nfoc*nxs*ntfft*sizeof(float));
+        memset(uv, 0, Nfoc*nxs*ntfft*sizeof(float));
 
         /* M0 equation (3) in the Geophysics implementation paper */
         /* once every 'niterskip' time-steps start from fresh M0 and do niter (~20) iterations */
@@ -688,7 +709,7 @@ int main (int argc, char **argv)
                     for (j = 0; j < nts; j++) {
                         M0[l*nxs*nts+i*nts+j] = -DD[l*nxs*nts+ix*nts+j];
                     }
-                    /* apply mute window for samples above nts-ii */
+                    /* apply mute window for samples above nts-ii : the Heaviside function in equation (3) */
                     for (j = 0; j < MIN(nts, nts-iw+isms); j++) {
                         M0[l*nxs*nts+i*nts+j] = 0.0;
                     }
@@ -696,6 +717,7 @@ int main (int argc, char **argv)
                         M0[l*nxs*nts+i*nts+j] *= costaper[smooth-k];
                     }
                 }
+				/* Initilisation of k1min: the first term (R) in the right hand side of equation(6) */
                 for (i = 0; i < npos; i++) {
                     ix = ixpos[i];
                     j = 0;
@@ -729,6 +751,7 @@ int main (int argc, char **argv)
                 }
             }
         }
+        /* If enabled this write the result of equation(3) to disk */
         if (file_iter != NULL) {
        	    writeDataIter("M0.su", M0, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixp, npos, 0, 1000*ii);
         }
@@ -746,15 +769,17 @@ int main (int argc, char **argv)
             t2    = wallclock_time();
     
 /*================ construction of Mi(-t) = - \int R(x,t) Mi(t)  ================*/
-            /* synthesis process is the compute kernel in equations (4) and (5) in the Geophysics implementation paper */
+            /* synthesis process is the compute kernel in equations (4) and (5) (rewritten from equation (2)) */
             synthesisp(Refl, Fop, Mi, RMi, nx, nt, nacq, nts, dt, xsyn, Nfoc,
                 xrcv, xsrc, xnx, fxse, fxsb, dxs, dxsrc, dx, ntfft, nw, nw_low, nw_high, mode,
                 reci, nshots, ixpos, npos, &tfft, isxcount, reci_xsrc, reci_xrcv, ixmask, verbose);
 
+			/* If enbled the result of equation(4) or (5) (depending on odd/even) is written to disk */
         	if (file_iter != NULL) {
             	writeDataIter(file_iter, RMi, hdrs_out, ntfft, nacq, d2, f2, n2out, Nfoc, xsyn, zsyn, ixpos, npos, 0, 1000*ii+iter+1);
         	}
 
+            /* The Energy is computed to check the convergence of the scheme see Figure 6c and Figure 8 */
 			if (verbose >=2) {
                 for (l = 0; l < Nfoc; l++) {
                     if (iter % 2 == 0) {
@@ -765,8 +790,8 @@ int main (int argc, char **argv)
                                 energyMi += RMi[l*nacq*nts+ix*nts+j]*RMi[l*nacq*nts+ix*nts+j];
 					        }
                         }
-                        if ( (iter==0) ) energyM0[l] = energyMi;
-                        if ( (recur==0) ) {
+                        if ( iter==0 ) energyM0[l] = energyMi;
+                        if ( recur==0 ) {
                         	vmess(" - ii %d: Mi at iteration %d has energy %e; relative to M0 %e", ii, iter, sqrt(energyMi), sqrt(energyMi/energyM0[l]));
 						}
                     }
@@ -776,7 +801,8 @@ int main (int argc, char **argv)
             t3 = wallclock_time();
             tsyn +=  t3 - t2;
     
-            /* N_k(x,t) = -N_(k-1)(x,-t) */
+            /* M_i(x,t) = RMi_(i-1)(x,-t) : time-reversal of the outcome of the synthesis process */
+            /* see equation(3) where the time reversal of R is taken as input */
             for (l = 0; l < Nfoc; l++) {
                 for (i = 0; i < npos; i++) {
                     j = 0;
@@ -794,14 +820,19 @@ int main (int argc, char **argv)
                     for (i = 0; i < npos; i++) {
 						ix = ixpos[i];
 						iw = NINT((ii*dt+twplane[ix])/dt);
-						/* apply mute window for samples after ii */
+						/* store results in kplus the 'plus-part' of equation (6) */
+                        for (j = 0; j < nts; j++) {
+                            k1plus[l*nxs*nts+i*nts+j] += Mi[l*nxs*nts+i*nts+j];
+                        }
+						/* Apply mute window for samples after ii : the Heaviside function in equation (5) */
+                        /* This also defines v1_plus in equation (8) */
                         for (j = MAX(0,iw-isme); j < nts; j++) {
                             Mi[l*nxs*nts+i*nts+j] = 0.0;
                         }
                         for (j = MAX(0,iw-isme), k=0; j < iw-isms; j++, k++) {
                             Mi[l*nxs*nts+i*nts+j] *= costaper[k];
                         }
-						/* apply mute window for delta function at t=0*/
+						/* Apply mute window for delta function at t=0*/
 						iw = NINT((twplane[ix])/dt);
                         for (j = 0; j < MAX(0,iw+shift-smooth); j++) {
                             Mi[l*nxs*nts+i*nts+j] = 0.0;
@@ -809,6 +840,7 @@ int main (int argc, char **argv)
                         for (j = MAX(0,iw+shift-smooth), k=1; j < MAX(0,iw+shift); j++, k++) {
                             Mi[l*nxs*nts+i*nts+j] *= costaper[smooth-k];
                         }
+                        /* This defines v1_plus in equation (8) */
                         for (j = 0; j < nts; j++) {
                             v1plus[l*nxs*nts+i*nts+j] += Mi[l*nxs*nts+i*nts+j];
                         }
@@ -826,11 +858,13 @@ int main (int argc, char **argv)
                             for (j = 0; j < nts; j++) {
                                 Mi[l*nxs*nts+i*nts+j] += -DD[l*nxs*nts+ix*nts+j];
 						    }
+						    /* store results in kmin as defined in equation (6) */
                             j = 0;
                             k1min[l*nxs*nts+i*nts+j] = -Mi[l*nxs*nts+i*nts+j];
                             for (j = 1; j < nts; j++) {
                                 k1min[l*nxs*nts+i*nts+j] = -Mi[l*nxs*nts+i*nts+nts-j];
                             }
+					        /* This collects the eliminated internal multiples and is the sum in right hand-side in equation (1) */	
         		        	if (file_update != NULL) {
 								j=0;
                                 Mup[l*nxs*nts+i*nts+j] += DD[l*nxs*nts+ix*nts+j] - k1min[l*nxs*nts+i*nts+j];
@@ -840,11 +874,13 @@ int main (int argc, char **argv)
 							}
 						}
 						else {
+						    /* store results in kmin as defined in equation (6) */
                             j = 0;
                             k1min[l*nxs*nts+i*nts+j] -= Mi[l*nxs*nts+i*nts+j];
                             for (j = 1; j < nts; j++) {
                                 k1min[l*nxs*nts+i*nts+j] -= Mi[l*nxs*nts+i*nts+nts-j];
                             }
+					        /* Mup collects the eliminated internal multiples and is the sum in right hand-side in equation (1) */	
         		        	if (file_update != NULL) {
 								j=0;
                             	Mup[l*nxs*nts+i*nts+j] += Mi[l*nxs*nts+i*nts+j];
@@ -853,7 +889,8 @@ int main (int argc, char **argv)
                             	}
 							}
 					    }
-					    /* apply mute window for samples above nts-ii */
+					    /* Apply mute window for samples above nts-ii : the Heaviside function in equation (4)
+                         * This also defines u1_min, for t>=t2-epsilon in equation (7) */
 						iw = NINT((ii*dt+twplane[ix])/dt);
                         for (j = 0; j < MIN(nts,nts-iw+isms); j++) {
                             Mi[l*nxs*nts+i*nts+j] = 0.0;
@@ -889,6 +926,126 @@ int main (int argc, char **argv)
             }
         }
 
+        /* Write optional u- u+ v- v+ to disk */
+        /* Based on equation(6) and (7) k1min is split in vmin and umin */
+        if (file_vmin != NULL) {
+            for (l = 0; l < Nfoc; l++) {
+                for (i = 0; i < npos; i++) {
+					/* apply mute window for delta function at t=0*/
+					iw0 = NINT((twplane[ix])/dt);
+            		for (j = 0; j < MAX(0,iw0+shift-smooth); j++) {
+                		uv[l*nxs*nts+i*nts+j] = 0.0;
+            		}
+            		for (j = MAX(0,iw0+shift-smooth), k=1; j < MAX(0,iw0+shift); j++, k++) {
+                		uv[l*nxs*nts+i*nts+j] = k1min[l*nxs*nts+i*nts+j]*costaper[smooth-k];
+ 					}
+					iw = NINT((ii*dt+twplane[ix])/dt);
+            		for (j = MAX(0,iw0+shift); j < iw-isme; j++, k++) {
+                		uv[l*nxs*nts+i*nts+j] = k1min[l*nxs*nts+i*nts+j];
+					}
+            		for (j = iw-isme, k=0; j < iw-isms; j++, k++) {
+                		uv[l*nxs*nts+i*nts+j] = k1min[l*nxs*nts+i*nts+j]*costaper[k];
+            		}
+            		for (j = MAX(0,iw-isms); j < nts; j++) {
+                		uv[l*nxs*nts+i*nts+j] = 0.0;
+            		}
+
+				}
+			}
+          	writeDataIter(file_vmin, uv, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixp, npos, 0, ii);
+	    }
+        if (file_umin != NULL) {
+            for (l = 0; l < Nfoc; l++) {
+                for (i = 0; i < npos; i++) {
+					iw = NINT((ii*dt+twplane[ix])/dt);
+					/* apply mute window for samples before ii */
+            		for (j = 0; j < MAX(0,iw-isme); j++) {
+                		uv[l*nxs*nts+i*nts+j] = 0.0;
+            		}
+            		for (j = MAX(0,iw-isme), k=1; j < iw-isms; j++, k++) {
+                		uv[l*nxs*nts+i*nts+j] = k1min[l*nxs*nts+i*nts+j]*costaper[smooth-k];
+            		}
+            		for (j = MAX(0,iw-isms); j < nts; j++) {
+                		uv[l*nxs*nts+i*nts+j] = k1min[l*nxs*nts+i*nts+j];
+            		}
+				}
+			}
+          	writeDataIter(file_umin, uv, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixp, npos, 0, ii);
+	    }
+        /*
+         * \begin{alignat}{2}
+         * \label{k1p}
+         * k_{1,i}^+({\bf x}'_0,{\bf x}''_0,t,t_2) = & \int_{t'=0}^{+\infty} \int_{\partial \setD_0}  R({\bf x}'_0,{\bf x}_0,-t')
+         * v_{1,i}^-({\bf x}_0,{\bf x}''_0,t-t',t_2) {\rm d}t' {\rm d} {\bf x}_0.
+         * \end{alignat}
+         * 
+         * Equation (\ref{k1p}) can be further split in the time domain as follows; 
+         * %
+         * \begin{equation} \label{uv+}
+         *     k_{1,i}^+({\bf x}'''_0,{\bf x}''_0,t,t_2) = \begin{cases}
+         *                v_{1,i}^+({\bf x}'''_0,{\bf x}''_0,t,t_2)            & t < t_2-\varepsilon \\
+         *                u_{1,i}^+({\bf x}'''_0,{\bf x}''_0,t,t_2)			   & t \geq t_2-\varepsilon
+         *            \end{cases},
+         * \end{equation}
+         * %
+         *
+         */
+        /* The equation (\ref{k1p}) above is the k1plus equivalent of k1min of equation(6) 
+         * and the time window in equation (\ref{uv+}) defines vplus and uplus */
+        /* Based on these equations and k1plus is split in vplus and uplus */
+
+        if (file_vplus != NULL) {
+            for (l = 0; l < Nfoc; l++) {
+                for (i = 0; i < npos; i++) {
+					/* apply mute window for delta function at t=0*/
+					iw0 = NINT((twplane[ix])/dt);
+                    /* to apply muting window around epsilon below t=0 uncomment this part */
+                    /* This (partly) removes the delta function at t=0 */
+            	   for (j = 0; j < MAX(0,iw0+shift-smooth); j++) {
+                		uv[l*nxs*nts+i*nts+j] = 0.0;
+            		}
+            		for (j = MAX(0,iw0+shift-smooth), k=1; j < MAX(0,iw0+shift); j++, k++) {
+                		uv[l*nxs*nts+i*nts+j] = k1plus[l*nxs*nts+i*nts+j]*costaper[smooth-k];
+ 					}
+ 				/*  this keeps the delta pulse below and around t=0 */
+            //		for (j = 0; j < iw0+shift; j++) {
+            //    		uv[l*nxs*nts+i*nts+j] = k1plus[l*nxs*nts+i*nts+j];
+ 			//		}
+
+					iw = NINT((ii*dt+twplane[ix])/dt);
+            		for (j = MAX(0,iw0+shift); j < iw-isme; j++, k++) {
+                		uv[l*nxs*nts+i*nts+j] = k1plus[l*nxs*nts+i*nts+j];
+					}
+            		for (j = iw-isme, k=0; j < iw-isms; j++, k++) {
+                		uv[l*nxs*nts+i*nts+j] = k1plus[l*nxs*nts+i*nts+j]*costaper[k];
+            		}
+            		for (j = MAX(0,iw-isms); j < nts; j++) {
+                		uv[l*nxs*nts+i*nts+j] = 0.0;
+            		}
+
+				}
+			}
+          	writeDataIter(file_vplus, uv, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixp, npos, 0, ii);
+	    }
+        if (file_uplus != NULL) {
+            for (l = 0; l < Nfoc; l++) {
+                for (i = 0; i < npos; i++) {
+					iw = NINT((ii*dt+twplane[ix])/dt);
+					/* apply mute window for samples before ii */
+            		for (j = 0; j < MAX(0,iw-isme); j++) {
+                		uv[l*nxs*nts+i*nts+j] = 0.0;
+            		}
+            		for (j = MAX(0,iw-isme), k=1; j < iw-isms; j++, k++) {
+                		uv[l*nxs*nts+i*nts+j] = k1plus[l*nxs*nts+i*nts+j]*costaper[smooth-k];
+            		}
+            		for (j = MAX(0,iw-isms); j < nts; j++) {
+                		uv[l*nxs*nts+i*nts+j] = k1plus[l*nxs*nts+i*nts+j];
+            		}
+				}
+			}
+          	writeDataIter(file_uplus, uv, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixp, npos, 0, ii);
+	    }
+
         /* To Do? optional write intermediate RR results to file */
 
         if (verbose) {
@@ -911,7 +1068,9 @@ int main (int argc, char **argv)
     free(Mi);
     free(energyM0);
     free(M0);
+    free(uv);
     free(k1min);
+    free(k1plus);
     free(v1plus);
 
     t2 = wallclock_time();
@@ -927,8 +1086,8 @@ int main (int argc, char **argv)
 /*================ write output files ================*/
 
     if (file_update != NULL) {
-		fp_up = fopen(file_update, "w+");
-    	if (fp_up==NULL) verr("error on creating output file %s", file_update);
+		fp_ud = fopen(file_update, "w+");
+    	if (fp_ud==NULL) verr("error on creating output file %s", file_update);
 	}
     fp_rr = fopen(file_rr, "w+");
     if (fp_rr==NULL) verr("error on creating output file %s", file_rr);
@@ -950,14 +1109,14 @@ int main (int argc, char **argv)
         ret = writeData(fp_rr, (float *)&RR[l*size], hdrs_out, n1, n2);
         if (ret < 0 ) verr("error on writing output file.");
     	if (file_update != NULL) {
-        	ret = writeData(fp_up, (float *)&Msp[l*size], hdrs_out, n1, n2);
+        	ret = writeData(fp_ud, (float *)&Msp[l*size], hdrs_out, n1, n2);
         	if (ret < 0 ) verr("error on writing output file.");
 		} 
     }
     ret = fclose(fp_rr);
     if (ret < 0) verr("err %d on closing output file %s",ret, file_rr);
 	if (file_update != NULL) {
-		ret = fclose(fp_up);
+		ret = fclose(fp_ud);
     	if (ret < 0) verr("err %d on closing output file %s",ret, file_update);
 	}
 
diff --git a/marchenko/readShotData.c b/marchenko/readShotData.c
index 80e43fc5f03e2b2b25bea324a15592a0456f0990..ed1c0bc271039d0f387f7a7985340ee35ccdaf1a 100644
--- a/marchenko/readShotData.c
+++ b/marchenko/readShotData.c
@@ -131,7 +131,7 @@ int nx, int nxs, float fxsb, float dxs, int ntfft, int mode, float scale, float
             }
             if ((sx_shot != hdr.sx) || (fldr_shot != hdr.fldr) ) break;
         }
-        if (verbose>2) {
+        if (verbose>3) {
             vmess("finished reading shot %d (%d) with %d traces",sx_shot,igath,itrace);
         }
 
diff --git a/marchenko/readTinvData.c b/marchenko/readTinvData.c
index 11ef1a4585c7adb09bf3c5a4267a371ac1e99d30..c0724b10fe3e9e57f47798ad9a8499eb735e715c 100644
--- a/marchenko/readTinvData.c
+++ b/marchenko/readTinvData.c
@@ -99,7 +99,7 @@ int readTinvData(char *filename, float *xrcv, float *xsrc, float *zsrc, int *xnx
 			}
 			if ((sx_shot != hdr.sx) || (fldr_shot != hdr.fldr) ) break;
 		}
-		if (verbose>2) {
+		if (verbose>3) {
 			fprintf(stderr,"finished reading shot %d (%d) with %d traces\n",sx_shot,isyn,itrace);
 			//disp_fileinfo(filename, nt, xnx[isyn], hdr.f1, xrcv[isyn*nxm], d1, d2, &hdr);
 		}
@@ -144,7 +144,7 @@ int readTinvData(char *filename, float *xrcv, float *xsrc, float *zsrc, int *xnx
             }
         }
         maxval[isyn*nx+imax] = jmax;
-        if (verbose >= 3) vmess("Mute max at src-trace %d is sample %d", imax, maxval[imax]);
+        if (verbose >= 2) vmess("Mute max at src-trace %d is sample %d", imax, maxval[imax]);
 
         /* search forward in trace direction from maximum in file */
         for (i = imax+1; i < nx1; i++) {
diff --git a/marchenko/synthesis.c b/marchenko/synthesis.c
index 39a31504b6ad6c42b17276c057b9dbddfe8dbdc7..8d7a1636445d0c15d45555f6659f4540a5fed46c 100644
--- a/marchenko/synthesis.c
+++ b/marchenko/synthesis.c
@@ -180,7 +180,7 @@ nw, int nw_low, int nw_high,  int mode, int reci, int nshots, int *ixpos, int np
             
             } /* end of Nfoc loop */
 
-            if (verbose>4) vmess("*** Shot gather %d processed ***", k);
+            if (verbose>3) vmess("*** Shot gather %d processed ***", k);
 
         } /* end of nparallel shots (k) loop */
         free(sum);
@@ -375,7 +375,7 @@ nw, int nw_low, int nw_high,  int mode, int reci, int nshots, int *ixpos, int np
             
             } /* end of Nfoc loop */
 
-            if (verbose>4) vmess("*** Shot gather %d processed ***", k);
+            if (verbose>3) vmess("*** Shot gather %d processed ***", k);
 
         } /* end of nparallel shots (k) loop */
         free(sum);
diff --git a/marchenko/writeDataIter.c b/marchenko/writeDataIter.c
index b1a51f3d0aa776be14f3a765d1e23e103eb42eab..23ca41bc7bb4c51c969eac9196ca3d3b35ef61ce 100644
--- a/marchenko/writeDataIter.c
+++ b/marchenko/writeDataIter.c
@@ -25,6 +25,7 @@ int writeDataIter(char *file_iter, float *data, segy *hdrs, int n1, int n2, floa
     char number[16], filename[1024];
 	float *trace;
 
+	if (file_iter==NULL) return -1;
     trace  = (float *)malloc(n1*sizeof(float));
 	strcpy(filename, file_iter);
 	sprintf(number,"_%03d",(iter));
diff --git a/marchenko3D/TWtransform.c b/marchenko3D/TWtransform.c
index 12bcb7a790be00fb2e16d6c8eb7b4b64f759facc..72e6fb39d1c61cfab16fdb1337ec9c6e21d7c48d 100644
--- a/marchenko3D/TWtransform.c
+++ b/marchenko3D/TWtransform.c
@@ -3,14 +3,11 @@
 #include <string.h>
 #include <math.h>
 #include "segy.h"
+#include "genfft.h"
 #include "zfpmar.h"
 #include <assert.h>
 #include <zfp.h>
 
-typedef struct { /* complex number */
-        float r,i;
-} complex;
-
 #define NINT(x) ((long)((x)>0.0?(x)+0.5:(x)-0.5))
 #define MAX(x,y) ((x) > (y) ? (x) : (y))
 #define MIN(x,y) ((x) < (y) ? (x) : (y))
diff --git a/marchenko3D/applyMute3D.c b/marchenko3D/applyMute3D.c
index 7356aa6baa4b554fa227a8639ff92e78db4578bd..8e51ff2851f1a771c7778f5e8d227f387f80306b 100644
--- a/marchenko3D/applyMute3D.c
+++ b/marchenko3D/applyMute3D.c
@@ -3,7 +3,9 @@
 #include <string.h>
 #include <math.h>
 #include <assert.h>
+#include "genfft.h"
 
+void verr(char *fmt, ...);
 #ifndef MAX
 #define MAX(x,y) ((x) > (y) ? (x) : (y))
 #endif
@@ -346,4 +348,4 @@ void timeShift(float *data, long nsam, long nrec, float dt, float shift, float f
     free(trace);
 
     return;
-}
\ No newline at end of file
+}
diff --git a/marchenko3D/fmute3D.c b/marchenko3D/fmute3D.c
index 5715a5dfa1bec6caf3d32023eee014711a9283d6..c175c0192c9a026f217425b43261efede44f3768 100644
--- a/marchenko3D/fmute3D.c
+++ b/marchenko3D/fmute3D.c
@@ -184,15 +184,15 @@ int main (int argc, char **argv)
     if (file_out==NULL) fp_out = stdout;
     else {
         fp_out = fopen(file_out, "w+");
-        if (fp_out==NULL) verr("error on ceating output file");
+        if (fp_out==NULL) verr("error on creating output file");
     }
     if (check!=0){
         fp_chk = fopen("check.su", "w+");
-        if (fp_chk==NULL) verr("error on ceating output file");
+        if (fp_chk==NULL) verr("error on creating output file");
         fp_psline1 = fopen("pslinepos.asci", "w+");
-        if (fp_psline1==NULL) verr("error on ceating output file");
+        if (fp_psline1==NULL) verr("error on creating output file");
         fp_psline2 = fopen("pslineneg.asci", "w+");
-        if (fp_psline2==NULL) verr("error on ceating output file");
+        if (fp_psline2==NULL) verr("error on creating output file");
         
     }
     if (smooth) {
diff --git a/marchenko3D/getFileInfo3Dzfp.c b/marchenko3D/getFileInfo3Dzfp.c
index 36854f58f1d5d0b9dda17df7363ce2c589517cd4..105c8295351966a22794494857bc8f9870c51f15 100644
--- a/marchenko3D/getFileInfo3Dzfp.c
+++ b/marchenko3D/getFileInfo3Dzfp.c
@@ -38,7 +38,7 @@ long getFileInfo3Dzfp(char *filename, long *n1, long *n2, long *n3, long *ngath,
     
     fp_in = fopen(filename, "r");
 	if (fp_in==NULL) {
-		fprintf(stderr,"input file %s has an error\n", fp_in);
+		fprintf(stderr,"input file %s has an error\n", filename);
 		perror("error in opening file: ");
 		fflush(stderr);
 		return -1;
@@ -78,4 +78,4 @@ long getFileInfo3Dzfp(char *filename, long *n1, long *n2, long *n3, long *ngath,
     *nxm = (*n2)*(*n3);
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/marchenko3D/writeDataIter3D.c b/marchenko3D/writeDataIter3D.c
index fbaecdb2f4d71bb67a5982bae554c5aee5078955..dab35d61a802a38f9ab05b607f3c1578ec4d5622 100644
--- a/marchenko3D/writeDataIter3D.c
+++ b/marchenko3D/writeDataIter3D.c
@@ -26,7 +26,7 @@ long writeDataIter3D(char *file_iter, float *data, segy *hdrs, long n1, long n2,
 
     trace  = (float *)malloc(n1*sizeof(float));
 	strcpy(filename, file_iter);
-	sprintf(number,"_%03d",(iter));
+	sprintf(number,"_%03ld",(iter));
 	name_ext(filename, number);
 	fp_iter = fopen(filename, "w+");
 	if (fp_iter==NULL) verr("error on creating output file %s", filename);
diff --git a/utils/correigen.c b/utils/correigen.c
index 50a84c970e58b1fff50bb384e9dcbaa98bdf3bc8..26a80911d04c41a1b8d204d6b37c95e4d69d23e8 100644
--- a/utils/correigen.c
+++ b/utils/correigen.c
@@ -302,7 +302,7 @@ int main (int argc, char **argv)
 	if (file_out==NULL) fp_out = stdout;
 	else {
 		fp_out = fopen(file_out, "w+");
-		if (fp_out==NULL) verr("error on ceating output file");
+		if (fp_out==NULL) verr("error on creating output file");
     	strcpy(filename, file_out);
     	name_ext(filename, "_sort_eig");
     	fp_oute = fopen(filename,"w");
diff --git a/utils/fconv.c b/utils/fconv.c
index d44c92aaac7372422ee41f86260f09a1a82d57f3..fe3e6e77797ee19bbe742d72fb2d017d7424eb53 100644
--- a/utils/fconv.c
+++ b/utils/fconv.c
@@ -275,7 +275,7 @@ int main (int argc, char **argv)
 	if (file_out==NULL) fp_out = stdout;
 	else {
 		fp_out = fopen(file_out, "w+");
-		if (fp_out==NULL) verr("error on ceating output file");
+		if (fp_out==NULL) verr("error on creating output file");
 	}
 
 /*================ loop over all shot records ================*/
diff --git a/utils/syn2d.c b/utils/syn2d.c
index f8c01b91e6322445e4a0d7e6f8d4e5b70c6fc68f..05d6ac9bc7bb26f5950cb6d53f11069585e2fd74 100644
--- a/utils/syn2d.c
+++ b/utils/syn2d.c
@@ -489,7 +489,7 @@ int main (int argc, char **argv)
 	for (j = 0; j < n1; j++) etap[j] = exp(-alpha*j*dt);
 
     fp_out = fopen(file_cfp, "w+");
-    if (fp_out==NULL) verr("error on ceating output file");
+    if (fp_out==NULL) verr("error on creating output file");
 
 	for (l = 0; l < Nsyn; l++) {
 		if (ixa || ixb) f2 = xsyn[l]-ixb*d2;