diff --git a/.gitignore b/.gitignore
index d29000507baf716523e7711070baaf27c0c9194b..30e0780c4ddfdaddabad1284c7fffb7733735e04 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 *.[oa]
 *.eps
+*.mod
 bin/*
 *.su
 *.bin
@@ -30,8 +31,16 @@ utils/Snap2Shot
 utils/combine
 utils/combine_induced
 utils/reshape_su
+utils/convhomg
+utils/makeR1D
+utils/padmodel
+utils/pwshift
+utils/truncate
 fdelmodc/fdelmodc
 include/genfft.h
+include/zfp.h
+include/bitstream.h
+include/zfp/
 Make_include
 marchenko_applications/fmute
 marchenko_applications/marchenko
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000000000000000000000000000000000000..cf8465c27b2721f153236f41c7f66271f5ee4c4c
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,150 @@
+INSTALLATION
+-------------
+
+1) To compile and link the code you first have to set the ROOT variable in the Make_include file which can be found in the directory where you have found this INSTALL file.
+You can use Make_include_template as a first start: 
+
+cp Make_include_template Make_include
+
+and set the ROOT directory Make_include to the current path with the following command:
+
+sed -i 's,REPLACE_WITH_PWD,'"$PWD"',' Make_include
+
+2) Check the compiler and CFLAGS options in the file Make_include and adapt to the system you are using. The default options are set for a the GNU C-compiler on a Linux system. A Fortran compiler is only needed for the MDD package. A C++ compiler is not needed to compile the code. The Makefile has been tested with GNU make. 
+If the fortran compiler (FC=) is not set the MDD package will not be build.
+
+The Make_include file contains example settings for GNU, Intel, AMD, PGI and Cray compilers. If you want to use one of these compilers don't
+forget to comment the compilers you do not want to use. 
+
+3) If the compiler options are set in the Make_include file you can type 
+
+make clean
+make 
+
+and the Makefile will make:
+
+    cd FFTlib       ; $(MAKE)
+    cd zfp          ; $(MAKE) install
+    cd fdelmodc     ; $(MAKE) install
+    cd fdelmodc3D   ; $(MAKE) install
+    cd utils        ; $(MAKE) install
+    cd marchenko    ; $(MAKE) install
+    cd marchenko3D  ; $(MAKE) install
+    cd corrvir      ; $(MAKE) install
+    cd raytime      ; $(MAKE) install
+    cd MDD          ; $(MAKE) install
+    cd fdacrtmc     ; $(MAKE) install
+
+- FFT library 
+- fdelmodc / 3D
+- marchenko / 3D
+- utils
+
+The libraries will be placed in the lib/ directory and the executables in the bin/ directory.
+
+To use the executables don't forget to include the pathname in your PATH:
+
+bash/sh:
+export PATH='path_to_this_directory'/bin:$PATH:
+csh:
+setenv PATH 'path_to_this_directory'/bin:$PATH:
+
+
+MKL libraries
+-------------
+If you are running on x64_86 processors you can download (for free) Intel's highly optimised MKL package:
+
+https://software.intel.com/en-us/mkl/choose-download
+ 
+These Libraries include highly optimised libraries of BLAS, LAPACK, and FFT(W). 
+Usually the MKL libraries are installed in $MKLROOT. If that variable is not set, you can try to find the correct path by searching for one of the libraries:
+
+find /opt/intel -name libmkl_gf_lp64.so
+
+and adjust MKLROOT in Make_include accordingly. 
+
+If MKLROOT is not set in your environment and also not set in Make_include then MKL will not be used and default options for BLAS, LAPACK, and FFT are chosen. 
+
+The Finite Different based RTM code (FDACRTMC) will not be installed if MKL is not available. This code depends of FFTW and we do want to
+include the full FFTW source code package to this. Our aim is to keep the compilation of the code in the GitHub repository
+as simple as possible with the smallest number of external libraries.
+
+
+SEISMIC UNIX
+-----------
+If you want to use the .su files with SU from CWP:
+git clone https://github.com/JohnWStockwellJr/SeisUnix
+
+==> Please make sure that SU is compiled without XDR (in $CWPROOT/Makefile.config make sure that XDRFLAG is NOT enabled). The SU output files of fdelmodc are all based on local IEEE data.
+To exclude the XDRFLAG in SU you have to use the following line in $CWPROOT/src/Makefile.config around line 35:
+
+XDRFLAG =
+
+so the XDRFLAG is empty. If you have made that change you have to remake SU with the commands:
+make remake
+make xtremake
+
+
+ZFP
+---
+The fdacrtmc and the 3D Marchenko code makes use of ZFP compression to store snaphots in CPU memory or do effient IO. This package is included in this repository for your convenience. The latest package and detailed explanation can be found on:
+
+https://github.com/LLNL/zfp
+
+MDD
+---
+The MDD kernels depend on BLAS and LAPACK calls. These libraries are part of Intel's MKL. Free downloads of the source code of these libraries can be found on 
+
+https://www.netlib.org/blas/index.html
+https://www.netlib.org/lapack/index.html
+
+
+FDACRTMC
+--------
+fdacrtmc uses FFTW and the wisdom computations are stored on disk for re-usage.  This directory is defined in fdacrtmc.h
+MKL includes an FFTW interface and in presence of MKL (and MKLROOT defined) the FDACRTMC package will also get compiled. 
+
+#ifndef WISDOMDIR
+#define WISDOMDIR "/tmp/fftw/"
+#endif
+
+For the code to run properly the directory /tmp/fftw/ must exist. It is also possible to compile the fdacrtmc code that defines
+WISDOMDIR. In fdacrtmc/Makefile add:
+
+CFLAGS  += -DWISDOMDIR="/directory/that/exists"
+
+or you can change the name of WISDOMDIR in fdacrtmc.h
+
+
+MISC
+----
+Other make commands which can be useful:
+
+make clean : removes all object files, but leaves libraries and executables
+make realclean: removes also object files, libraries and executables.
+
+
+COMPILATION PROBLEMS
+--------------------
+If you encounter missing trigometric / mathematical functions during compilation, for example;
+
+defineSource.c:(.text+0x2356): undefined reference to sin getParameters.o: In function getParameters:
+
+add  '-lm -lc' around line 109 in Make_include:
+
+LIBS= -L$L -lgenfft $(BLAS) -lm -lc
+
+
+UPDATES AND LATEST VERSION
+--------------------------
+The latest version of the source code and manual can be found at:
+
+http://www.xs4all.nl/~janth/Software/Software.html
+
+or at github:
+
+git clone https://github.com/JanThorbecke/OpenSource.git
+git clone git://github.com/JanThorbecke/OpenSource.git
+
+The code is used by many different people and if there is a request for a new option in the code, then I will try to implement, test and make it available. 
+
diff --git a/MDD/GlobalVariables.h b/MDD/GlobalVariables.h
new file mode 100644
index 0000000000000000000000000000000000000000..91c11d5afdb52b8ce27c0de845c46b3941c683a2
--- /dev/null
+++ b/MDD/GlobalVariables.h
@@ -0,0 +1,5 @@
+typedef struct { /* complex number */
+        float r,i;
+} complex;
+
+extern complex *cB;
diff --git a/MDD/Makefile b/MDD/Makefile
index 523a1f8a1c1008245e32f99dd5187e49393d9384..b601fe15bcf8a7a2f72f91b075be83316d0c3de8 100644
--- a/MDD/Makefile
+++ b/MDD/Makefile
@@ -12,7 +12,8 @@ ALLINC  = -I.
 #General BLAS library
 #LIBS += $(BLAS)
 
-CFLAGS += -I$(MKLROOT)/include
+#CFLAGS += -g # -I$(MKLROOT)/include  
+#FFLAGS += -g -mcmodel=medium
 #LIBS += -lblas -llapack -L$L -lgenfft $(LIBSM) -lc -lm
 
 all: mdd 
@@ -30,21 +31,28 @@ SRCC =  $(PRG).c \
 	getFileInfo.c \
 	verbosepkg.c \
 	name_ext.c \
-	wallclock_time.c
+	wallclock_time.c 
+
+SRCF =  lsqrDataModule.F90 \
+        zlsqrDataModule.F90 \
+        zlsqrblasInterface.F90 \
+        zlsqrModule.F90
 
 OBJC	= $(SRCC:%.c=%.o)
 
-$(PRG):	$(OBJC) 
-	$(CC) $(LDFLAGS) $(CFLAGS) $(OPTC) -o $(PRG) $(OBJC) $(LIBS)
+OBJF    = $(SRCF:%.F90=%.o)
+
+$(PRG):	$(OBJC) $(OBJF)
+	$(FC) $(LDFLAGS) -nofor-main $(CFLAGS) $(OPTC) -o $(PRG) $(OBJC) $(OBJF) $(LIBS) 
 
 install: $(PRG) 
 	cp $(PRG) $B
 
 clean:
-		rm -f core $(OBJC) $(OBJM) $(PRG) 
+		rm -f core $(OBJC) $(OBJF) $(OBJM) *.mod $(PRG) 
 
 realclean:
-		rm -f core $(OBJC) $(OBJM) $(PRG) $B/$(PRG) 
+		rm -f core $(OBJC) $(OBJF) $(OBJM) *.mod $(PRG) $B/$(PRG) 
 
 
 print:	Makefile $(SRC)
@@ -58,4 +66,3 @@ tar:
 	@tar cf $(PRG).tar Makefile $(SRC) && compress $(PRG).tar
 
 
-
diff --git a/MDD/computeMatrixInverse.c b/MDD/computeMatrixInverse.c
index fd4eee61cfb6275bd428e6beb60058d7c89f0d53..7fe2d58f16c0d18c0c5b42612c109362cd2a21e3 100644
--- a/MDD/computeMatrixInverse.c
+++ b/MDD/computeMatrixInverse.c
@@ -342,7 +342,7 @@ void computeMatrixInverse(complex *matrix, int nxm, int rthm, float eps_a, float
 			/*S[i]=1.0/(S[i]+eps_r*S[0]);*/
 /*			fprintf(stderr,"S^-1[%d] = %e\n",i,S[i]);*/
 		}
-		if(verbose) fprintf(stderr,"fraction of eigenvalues used = %.3f\n",(float)(neig/((float)N)));
+		if(verbose>1) fprintf(stderr,"fraction of eigenvalues used = %.3f\n",(float)(neig/((float)N)));
 
 		for (j=0; j<N; j++) {
 			for (i=0; i<N; i++) {
diff --git a/MDD/deconvolve.c b/MDD/deconvolve.c
index 13dc809c2872454ac80f70b855a151a312ce28b9..9d7d81ea229e49cf5b6c8eb99519f9c5d8544e27 100644
--- a/MDD/deconvolve.c
+++ b/MDD/deconvolve.c
@@ -9,7 +9,7 @@
 
 typedef struct { /* complex number */
 	float r,i;
-} complex;
+}complex;  
 
 /*
 cblas interface
@@ -88,19 +88,34 @@ On entry, LDC specifies the first dimension of C as declared in the calling (sub
 
 */
 
+// zLSQR implementation
+void zLSQR_(int *m, int *n, size_t *indw, void (*Aprod1)(int *,int *, complex *, complex *, size_t *), void (*Aprod2)(int *,int *, complex *, complex *, size_t *), complex *b, float *damp, int *wantse, complex *x, float *se, float *atol, float *btol, float* conlim,  int* itnlim, int *nout, int *istop, int *itn, float *Anorm, float *Acond, float *rnorm, float *Arnorm, float *xnorm); //, complex *w, complex *v);
+
+// C funcs as input for Fortran subroutines in zLSQR
+void Aprod1_(int *m, int *n, float *x, float *y, size_t *indw);
+void Aprod2_(int *m, int *n, float *x, float *y, size_t *indw);
+
 void computeMatrixInverse(complex *matrix, int nxm, int rthm, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int iw, int verbose);
 
-int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int verbose)
+complex *cB;
+
+int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int lsqr_iter, float lsqr_damp, int k_iter, float TCscl, int verbose)
 {
 	int istation, jstation, i, j, k, icc, ibb, NA, NB, NC, nshots;
 	size_t  iwnA, iw, iwnB, iwAB, iwBB;
 	complex *AB, *BB;
 	char *transa, *transb,*transN;
 	complex beta, alpha, tmp, a, b;
+	
 
 	AB = (complex *)calloc(nstationA*nstationB,sizeof(complex));
 	BB = (complex *)calloc(nstationB*nstationB,sizeof(complex));
 
+	complex *T0Fm, *Temp; // remember to free this stuff
+
+	T0Fm = (complex *)calloc(nstationA*nstationB*nfreq,sizeof(complex));
+	Temp = (complex *)calloc(nstationB*nstationB*nfreq,sizeof(complex));
+
 	if (conjgA == 1) transa = "C";
 	else if (conjgA == 0) transa = "N";
 	else transa = "T";
@@ -129,23 +144,11 @@ private(iw, iwnA, iwnB, iwAB, iwBB)
 				/* cblas_cgemm(CblasRowMajor,CblasNoTrans, CblasConjTrans, NA, NB, nshots, &alpha.r, 
 				&cA[iwnA].r, NA, 
 				&cB[iwnB].r, NB, &beta.r,
-				ocC[iwAB].r, NC); */
-/*
-			for (i=0; i<nshots; i++) {
-				for (j=0; j<nshots; j++) {
-				    for (k=0; k<nstationB; k++) {
-					cC[iwAB+j*nshots+i].r += cA[iwnA+k*nshots+i].r*cB[iwnB+j*nshots+k].r - cA[iwnA+k*nshots+i].i*cB[iwnB+j*nshots+k].i;
-					cC[iwAB+j*nshots+i].i += cA[iwnA+k*nshots+i].r*cB[iwnB+j*nshots+k].i + cA[iwnA+k*nshots+i].i*cB[iwnB+j*nshots+k].r;
-                    }
-				}
-			}
-*/
-
+				&cC[iwAB].r, NC); */
 			cgemm_(transa, transb, &NA, &NB, &nshots, &alpha.r, 
 				&cA[iwnA].r, &NA, 
 				&cB[iwnB].r, &NB, &beta.r, 
 				&cC[iwAB].r, &NC); 	
-
 //				memcpy(&cC[iwAB].r, &cB[iwnA].r, sizeof(float)*2*nstationA*nshots);
 		}
 		else if (mdd==1) { /* Multi Dimensional deconvolution */
@@ -186,21 +189,152 @@ private(iw, iwnA, iwnB, iwAB, iwBB)
 				&BB[0].r, &NB, &beta.r, 
 				&cC[iwAB].r, &NA);
 		}
-		else if (mdd==3) { /* Copy matrix A or B to memory for testing purposes */
-			memcpy(&cC[iwAB].r, &cA[iwnA].r, sizeof(complex)*nstationA*nshots);
+		else if (mdd==3) { /* LSQR solver */
+			// INPUT LSQR
+			int wantse, itnlim, inc, nout, mn;
+			float damp, atol, btol, conlim;
+			float *n;
+			
+			damp = lsqr_damp;
+			atol = 0;
+			btol = 0;
+			conlim = 0; 
+			wantse = 0;
+			itnlim = lsqr_iter;
+			nout = 0;
+			inc = 1;	
+			
+			mn = NA*NB;
+			
+			// OUTPUT zLSQR
+			int itn, istop;
+			float Anorm, Acond, rnorm, Arnorm, xnorm, se;
+		
+			zLSQR_(&mn, &mn, &iwnB, Aprod1_, Aprod2_, 
+						&cA[iwnA], &damp, 
+						&wantse, &cC[iwAB], &se,
+						&atol, &btol, &conlim, &itnlim, &nout,
+						&istop, &itn, &Anorm, &Acond, &rnorm, &Arnorm, &xnorm);
+		
+			//memcpy(&cC[iwAB].r, &cA[iwnA].r, sizeof(complex)*nstationA*nshots);
 		}
 		else if (mdd==4) {
-			memcpy(&cC[iwAB].r, &cB[iwnB].r, sizeof(complex)*nstationB*nshots);
+			memcpy(&cC[iwAB].r, &cA[iwnA].r, sizeof(complex)*nstationA*nshots);
+		}
+		else if (mdd==5) { //Matrix with Single shot attempt (doesn't work)
+			cgemv_(transa, &NA, &NA, &alpha.r, 
+				&cA[iwnA].r, &NA, 
+				&cB[iwnB].r, 1, &beta.r,
+				&cC[iwnA].r, 1);
 		}
-		else if (mdd==5) {
-			cblas_cdotu_sub(nshots, &cA[iwnA].r, NA, &cB[iwnB].r, NB, &cC[iwnA].r);
+		
+		if (mdd==6) { /* Transmission Calculations */
+			
+			cgemm_(transa, transb, &NA, &NB, &nshots, &alpha.r, 
+				&cA[iwnA].r, &NA, 
+				&cB[iwnB].r, &NB, &beta.r, 
+				&T0Fm[iwnA].r, &NA); // construct T_0 F+m
+			
+			for (jstation = 0; jstation < nstationA; jstation++) {
+				for (istation = 0; istation < nshots; istation++) {	
+					T0Fm[iw*nstationA*nshots+jstation*nshots+istation].r *= TCscl;
+					T0Fm[iw*nstationA*nshots+jstation*nshots+istation].i *= TCscl;
+				}   
+			}
+
+
+			memcpy(&cC[iwAB].r, &cA[iwnA].r, sizeof(complex)*nstationA*nshots); // copy T_0
+			
+			for (k=1; k< k_iter; k++) {
+				cgemm_(transa, transb, &NA, &NB, &nshots, &alpha.r, 
+					&T0Fm[iwnA].r, &NA, 
+					&cA[iwnB].r, &NB, &beta.r, 
+					&Temp[iwnA].r, &NA); // construct T_0 F+m T_0 (k=1)
+					
+				for (jstation = 0; jstation < nstationA; jstation++) {
+					for (istation = 0; istation < nshots; istation++) {
+						Temp[iw*nstationA*nshots+jstation*nshots+istation].r *= TCscl;
+						Temp[iw*nstationA*nshots+jstation*nshots+istation].i *= TCscl;						
+						if (k_iter % 2 == 1) {
+							
+							cC[iw*nstationA*nshots+jstation*nshots+istation].r -= Temp[iw*nstationA*nshots+jstation*nshots+istation].r; //pow(TCscl,k_iter+2);
+							cC[iw*nstationA*nshots+jstation*nshots+istation].i -= Temp[iw*nstationA*nshots+jstation*nshots+istation].i; //pow(TCscl,k_iter+2);
+						} else {
+							cC[iw*nstationA*nshots+jstation*nshots+istation].r += Temp[iw*nstationA*nshots+jstation*nshots+istation].r; //pow(TCscl,k_iter+2);
+							cC[iw*nstationA*nshots+jstation*nshots+istation].i += Temp[iw*nstationA*nshots+jstation*nshots+istation].i; //pow(TCscl,k_iter+2);
+							
+						}
+					}   
+				}
+				memcpy(&cA[iwnA].r,&Temp[iwnA].r,sizeof(complex)*nstationA*nshots);
+			}
+			
 		}
 
 	}
 
 	free(AB);
 	free(BB);
+	free(T0Fm);
+	free(Temp);
 
 	return 0;
 }
 
+
+void Aprod1_(int *m, int *n, float *x, float *y, size_t *indw) {
+	int ldc;
+	complex beta, alpha;
+	char *transa, *transb;
+
+	transa = "N";
+	transb = "N";
+	alpha.r = 1.0; alpha.i = 0.0;
+	beta.r = 1.0; beta.i = 0.0;
+	
+	ldc = sqrt(*m);	
+	
+//	fprintf(stderr,"Aprod1 \n");
+	
+	cgemm_(transa, transb, &ldc, &ldc, &ldc, &alpha.r, 
+		&x[0], &ldc, 
+		&cB[*indw].r, &ldc, &beta.r, 
+		&y[0], &ldc); 	
+	
+	/*
+	cgemv_(transa, m, n, &alpha.r, 
+		&cA[*indw].r, m, 
+		&x[0].r, &incx, &beta.r,
+		&y[0].r, &incy);
+	*/
+}
+
+void Aprod2_(int *m, int* n, float *x, float *y, size_t *indw) {
+	int ldc;
+	complex beta, alpha;
+	char *transa, *transb;
+	
+	transa = "C";
+	transb = "N";
+	alpha.r = 1.0; alpha.i = 0.0;
+	beta.r = 1.0; beta.i = 0.0;
+	
+	ldc = sqrt(*m);
+	
+//	fprintf(stderr,"Aprod2 \n");
+	
+	memset(x,0,1*(*m)*sizeof(complex));
+	
+	cgemm_(transb, transa, &ldc, &ldc, &ldc, &alpha.r, 
+		&y[0], &ldc, 
+		&cB[*indw].r, &ldc, &beta.r, 
+		&x[0], &ldc); 
+	
+	/*
+	cgemv_(transa, m, n, &alpha.r, 
+		&cA[*indw].r, m, 
+		&y[0].r, &incx, &beta.r,
+		&x[0].r, &incy);
+	*/	
+}
+
diff --git a/MDD/lsqrDataModule.F90 b/MDD/lsqrDataModule.F90
new file mode 100644
index 0000000000000000000000000000000000000000..54e0d0ae9d3a91693b88021e75f3ade1ff0d5498
--- /dev/null
+++ b/MDD/lsqrDataModule.F90
@@ -0,0 +1,31 @@
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+! File lsqrDataModule.f90
+!
+! Defines real(dp) and a few constants for use in other modules.
+!
+! 24 Oct 2007: Allows floating-point precision dp to be defined
+!              in exactly one place (here).  Note that we need
+!                 use lsqrDataModule
+!              at the beginning of modules AND inside interfaces.
+!              zero and one are not currently used by LSQR,
+!              but this shows how they should be declared
+!              by a user routine that does need them.
+
+! 29 Jun 2013: Added ip and eps among others to match MINRES-QLP
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+module lsqrDataModule
+
+  use iso_c_binding
+
+  implicit none
+
+  intrinsic                   ::      selected_real_kind, selected_int_kind, tiny
+
+  integer, parameter, public  :: dp = c_float !selected_real_kind(15,307) ! 64-bit real, default
+  integer,  parameter, public :: sp    = selected_real_kind(6,37)      ! 32-bit real
+  integer, parameter, public  :: ip = c_int !selected_int_kind(9)       ! R: (-10^R, 10^R)
+  real(c_float), parameter, public :: zero = 0.0_dp, one = 1.0_dp, eps = epsilon(zero)
+  real(c_float), parameter, public :: realmin = tiny(one)
+
+end module lsqrDataModule
diff --git a/MDD/mdd.c b/MDD/mdd.c
index 92d1976e08362fda7abb872db5d603ea82492fdb..4420da13dc9da07cb5c9bf4e3b7f3a8ec04aeff7 100644
--- a/MDD/mdd.c
+++ b/MDD/mdd.c
@@ -26,7 +26,9 @@ int getFileInfo(char *filename, int *n1, int *n2, int *ngath, float *d1, float *
 
 int readShotData(char *filename, float xmin, float dx, float *xrcv, float *xsrc, int *xnx, complex *cdata, int nw, int nw_low, int ngath, int nx, int nxm, int ntfft, float alpha, float scl, float conjg, int transpose, int verbose);
 
-int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int verbose);
+//int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int verbose);
+int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int lsqr_iter, float lsqr_damp, int k_iter, float TCscl, int verbose);
+
 
 void writeEigen(char *file_out, float df, int nw_low, int nw_high, int nw, float *eigen, int nx, float dx, float xmin);
 void writeDatamatrix(char *file_out, complex *P, int ntfft, int ntc, int Nrec, int Nshot, int nfreq, int nw_low, float dt, int verbose);
@@ -50,6 +52,7 @@ char *sdoc[] = {
 " ",
 "   file_A= .................. name of file(s) which store the data in location A",
 "   file_B= .................. name of file(s) which store the data in location B",
+"   file_C= .................. name of file(s) which store the data in location C",
 " ",
 " Optional parameters: ",
 " ",
@@ -58,24 +61,27 @@ char *sdoc[] = {
 "   fmin=0 ................... minimum frequency",
 "   fmax=70 .................. maximum frequency to use in deconvolution",
 " INPUT DEFINITION ",
-"   cjA=1 .................... -1 => apply complex conjugate to A",
-"   sclA=1 ................... apply scaling factor to A",
-"   tranposeA=0 .............. apply transpose to A",
-"   cjB=1 .................... -1 => apply complex conjugate to B",
-"   sclB=1 ................... apply scaling factor to B",
-"   tranposeB=0 .............. apply transpose to B",
+"   cjA/B/C=1 ................ -1 => apply complex conjugate to A/B/C",
+"   sclA/B/C=1 ............... apply scaling factor to A/B/C",
+"   tranposeA/B/C=0 .......... 1 => apply transpose to A/B/C",
+"   k_iter=5 ................. Iterations for MDD=6",
 " MATRIX INVERSION CALCULATION ",
 "   conjgA=0 ................. apply complex conjugate-transpose to A",
 "   conjgB=1 ................. apply complex conjugate-transpose to B",
+"   conjgC=0 ................. apply complex conjugate-transpose to C",
 "   rthm=0 ................... see below for options",
 "   eps_a=1e-5 ............... absolute stabilization factor for LS",
 "   eps_r=1e-4 ............... relative stabilization factor for LS",
 "   numacc=1e-6 .............. numerical accurary for SVD",
-"   ntap=0 ................... number of taper points matrix",
+"   ntapA/B=0 ................ number of taper points matrix",
 "   ftap=0 ................... percentage for tapering",
 "   tap=0 .................... type of taper: 0=cos 1=exp",
 "   eigenvalues= ............. write SVD eigenvalues to file ",
 "   mdd=1 .................... mdd=0 => computes correlation ",
+"                              mdd=3 => LSQR solver ",
+" LSQR PARAMETERS ",
+"   lsqr_iter=25 ............. number of iterations for LSQR solver",
+"   lsqr_damp=1e-4 ........... damping for LSQR solver",
 " OUTPUT DEFINITION ",
 "   file_out= ................ output base name ",
 "   causal=1 ................. output causal(1), non-causal(2), both(3), or summed(4)",
@@ -88,6 +94,8 @@ char *sdoc[] = {
 "    nt (the number of samples read by the IO routine)",
 " ",
 " Options for mdd= ",
+"	  6 = Iterative Transmission Response (based on Vd Neut et al. 2018, EAGE)",
+"     3 = LSQR based solver A = Bx",
 "     2 = A/(B + eps) ",
 "     1 = A*B^H/(B*B^H + eps) ",
 "     0 = A*B^H ",
@@ -106,34 +114,40 @@ char *sdoc[] = {
 NULL};
 /**************** end self doc ***********************************/
 
+complex *cB;
+
 int main (int argc, char **argv)
 {
 	FILE    *fpin, *fpout;
 	int		i, j, k, ret, nshots, ntraces;
 	int		size, n1, n2, ntfft, nf, causal;
 	int     verbose, fullcorr, ncorstat, err;
-	int     nt, nc, ncc, ntc, nshotA, nshotB;
-	size_t  nstationA, nstationB, nfreq, istation, jstation, iw;
+ 	int     nt, nc, ncc, ntc, nshotA, nshotB, nshotC;
+ 	size_t  nstationA, nstationB, nstationC, nfreq, istation, jstation, iw;
 	int     pgsz, istep,jstep;
 	int     mdd;
-	int		conjgA, conjgB;
-	int 	ntap, nxm, ngath, nw, nw_low, nw_high, eigenvalues, rthm, combine, distance;
+	int	    conjgA, conjgB, conjgC;
+ 	int     ntapA, ntapB, nxm, ngath, nw, nw_low, nw_high, eigenvalues, rthm, combine, distance;
 	size_t  nwrite, cdatainSize, datainSize, cdataoutSize, stationSize, is;
 	float	dx, dt, fmin, fmax, df, eps_r, eps_a, ftap, numacc;
 	float	*rC, scl, *rl, *eigen;
 	float   f1, f2, d1, d2, sclsxgx, xmin, xmax, alpha, wshot, wpi, wrec;
- 	float   *xrcvA, *xsrcA, *xrcvB, *xsrcB;
+  	float   *xrcvA, *xsrcA, *xrcvB, *xsrcB, *xsrcC, *xrcvC;
 	float	*taper;
     int     *xnx;
-	float sclA,sclB, cjA, cjB;
-	int  transposeA, transposeB;
+ 	float 	sclA,sclB, cjA, cjB, sclC, cjC;
+ 	int  	transposeA, transposeB, transposeC;
+ 	float	scaling;
+ 	int  	npad;
+ 	int     k_iter, lsqr_iter;
+ 	float   TCscl, lsqr_damp;
 
+ 	complex *cdataout, *cTemp;
 
-	complex *cdataout;
 	double  t0, t1, t2, t3, tinit, twrite, tread, tdec, tfft;
-	char	*file_A, *file_B, *file_out, *file_dmat, filename[1024], number[128], *rthmName;
+ 	char	*file_A, *file_B, *file_C, *file_out, *file_dmat, filename[1024], number[128], *rthmName;
 	int     pe=0, root_pe=0, npes=1, ipe, size_s, one_file;
-	complex *cA, *cB, *oBB;
+	complex *cA, *cC, *oBB;
 	segy *hdr;
 
 	t0 = wallclock_time();
@@ -145,6 +159,7 @@ int main (int argc, char **argv)
 	assert(file_A != NULL);
 	if (!getparstring("file_B", &file_B)) file_B=NULL;
 	assert(file_B != NULL);
+ 	if (!getparstring("file_C", &file_C)) file_C=NULL;
 	if (!getparstring("file_out", &file_out)) file_out=NULL;
  	assert(file_out != NULL);
 	if (!getparstring("file_dmat", &file_dmat)) file_dmat=NULL;
@@ -154,20 +169,34 @@ int main (int argc, char **argv)
 	if (!getparint("rthm", &rthm)) rthm = 0;
 	if (!getparint("combine", &combine)) combine = 0;
 	if (!getparint("causal", &causal)) causal = 1;
-	if (!getparint("ntap", &ntap)) ntap = 0;
-	if (!getparfloat("ftap", &ftap)) ftap = 0.;
+    if (!getparint("ntapA", &ntapA)) ntapA = 0;
+    if (!getparint("ntapB", &ntapB)) ntapB = 0;
+    if (!getparfloat("ftap", &ftap)) ftap = 0.;
+    if (!getparfloat("scaling", &scaling)) scaling = 1.;
 	if (!getparfloat("eps_r", &eps_r)) eps_r = 1e-4;
 	if (!getparfloat("eps_a", &eps_a)) eps_a = 1e-5;
 	if (!getparfloat("numacc", &numacc)) numacc = 1e-6;
 	if (!getparint("eigenvalues", &eigenvalues)) eigenvalues = 0;
 	if (!getparint("mdd", &mdd)) mdd = 1;
 
+    if (!getparint("lsqr_iter", &lsqr_iter)) lsqr_iter = 25;
+    if (!getparfloat("lsqr_damp", &lsqr_damp)) lsqr_damp = 1e-4;
+
 	if (!getparint("transposeA", &transposeA)) transposeA = 0;
 	if (!getparfloat("sclA", &sclA)) sclA = 1.;
 	if (!getparfloat("cjA", &cjA)) cjA = 1.;
 	if (!getparint("transposeB", &transposeB)) transposeB = 0;
 	if (!getparfloat("sclB", &sclB)) sclB = 1.;
 	if (!getparfloat("cjB", &cjB)) cjB = 1.;
+    if (file_C != NULL) {
+        if (!getparint("transposeC", &transposeC)) transposeC = 0;
+        if (!getparfloat("sclC", &sclC)) sclC = 1.;
+        if (!getparfloat("cjC", &cjC)) cjC = 1.;
+        if (!getparint("conjgC", &conjgC)) conjgC = 0;
+    }
+
+    if (!getparint("npad", &npad)) npad = 0;
+    if (!getparint("k_iter", &k_iter)) k_iter= 5;
 
 #ifdef _OPENMP
     npes   = omp_get_max_threads();
@@ -200,7 +229,23 @@ int main (int argc, char **argv)
 	getFileInfo(file_B, &n1, &n2, &nshotB, &d1, &d2, &f1, &f2, &xmin, &xmax, &sclsxgx, &nxm);
 	assert( n1 == nt);
 	nstationB = n2;
-	assert( nshotA == nshotB);
+    if (!((mdd == 5) || (mdd == 3))) assert( nshotA == nshotB);
+
+    if (file_C != NULL && mdd != 3) {
+        nshotC = 0;
+        getFileInfo(file_C, &n1, &n2, &nshotC, &d1, &d2, &f1, &f2, &xmin, &xmax, &sclsxgx, &nxm);
+        assert( n1 == nt);
+        nstationC = n2;
+        assert( nshotA == nshotC);
+    } else if (file_C != NULL) {
+        nshotC = 0;
+        getFileInfo(file_C, &n1, &n2, &nshotC, &d1, &d2, &f1, &f2, &xmin, &xmax, &sclsxgx, &nxm);
+        assert( n1 == nt);
+        nstationC = n2;
+    }
+
+    if (ntapB != 0) ftap = (float)ntapB / (float)nstationA;
+    else if (ftap != 0) ntapB = NINT(ftap*nstationA);
 
 /*================ initializations ================*/
 
@@ -218,11 +263,26 @@ int main (int argc, char **argv)
     nw       = nw_high - nw_low + 1;
 	nfreq = MIN(nf,nw);
 
-/* scaling of the results by Johno van IJsseldijk */ 
-    if (mdd == 0) scl = dx*dt/((float)ntfft); //correlation
-    else if (mdd==1) scl = 1/((float)ntfft)/dx/dt; // MDD
-    else if (mdd==2) scl = 1/((float)ntfft)/dx/dt; // MDD with A and B already computed (NOT TESTED)
-    else scl = 1.0/((float)ntfft); // Passing A or B through
+/* scaling of the results by Johno van IJsseldijk */
+    if (scaling==1) {
+        if (mdd == 0) scl = dx*dt/((float)ntfft); //correlation
+        else if (mdd==1) scl = 1/((float)ntfft)/dx/dt; // MDD
+        else if (mdd==3) scl = 1/((float)ntfft)/dx/dt; // MDD LSQR
+        else if (mdd==2) scl = 1/((float)ntfft)/dx/dt; // MDD with A and B already computed (NOT TESTED)
+        else scl = 1.0/((float)ntfft); // Passing A or B through
+
+        if (file_C != NULL && mdd != 3) scl *= dx*dt; // 
+
+        TCscl=dx*dt;
+    }
+    else if (scaling==0) {
+        scl = 1/((float)ntfft);
+        TCscl=1;
+    }
+    else {
+        scl = scaling/((float)ntfft);
+        TCscl=1;
+    }
 
 /* allocate in shared memory the in- and output data */
 
@@ -232,13 +292,18 @@ int main (int argc, char **argv)
 	cdataout     = (complex *)malloc(cdataoutSize);
 	cA           = (complex *)malloc(nstationA*cdatainSize);
 	cB           = (complex *)malloc(nstationB*cdatainSize);
-	taper        = (float *)malloc(2*nstationB*sizeof(float));
 	if (file_dmat!=NULL) oBB = (complex *)malloc(nstationB*nstationB*nfreq*sizeof(complex));
 	else oBB = NULL;
 	assert(cdataout != NULL);
 	assert(cA != NULL);
 	assert(cB != NULL);
 
+    if (file_C != NULL && mdd != 3) {
+        cC = (complex *)malloc(nstationC*cdatainSize);
+        assert(cC != NULL);
+        cTemp = (complex *)malloc(cdataoutSize);
+        assert(cTemp != NULL);
+    }
 
 /* for first touch binding of allocated memory */
 #pragma omp parallel for schedule(static) private(jstation,is) default(shared)
@@ -254,8 +319,16 @@ int main (int argc, char **argv)
 		memset(&cA[jstation*jstep],0,jstep*sizeof(complex));
 	}
 
+    if (file_C != NULL && mdd != 3) {
+#pragma omp parallel for schedule(static) private(jstation) default(shared)
+        for (jstation=0; jstation<nstationC; jstation++) {
+            memset(&cC[jstation*jstep],0,jstep*sizeof(complex));
+        }
+    }
+
     if (verbose) {
-        if (rthm==0) rthmName="Cholesky";
+		if (mdd==3) rthmName="LSQR";
+        else if (rthm==0) rthmName="Cholesky";
         else if (rthm==1) rthmName="LU";
         else if (rthm==2) rthmName="SVD single precision";
         else if (rthm==3) rthmName="SVD divide-and-conquer";
@@ -269,18 +342,31 @@ int main (int argc, char **argv)
         fprintf(stderr,"  nstationA ........ : %ld\n", nstationA );
         fprintf(stderr,"  nshotB ........... : %d\n", nshotB );
         fprintf(stderr,"  nstationB ........ : %ld\n", nstationB );
+        if (file_C != NULL) {
+            fprintf(stderr,"  nshotC ........... : %d\n", nshotC );
+            fprintf(stderr,"  nstationC ........ : %ld\n", nstationC );
+        }
+        fprintf(stderr,"  Scaling .......... : %e\n", scl);
+
         fprintf(stderr,"  number t-fft ..... : %d\n", ntfft);
-		fprintf(stderr,"  Input  size ...... : %ld MB\n", (nstationA+nstationB)*cdatainSize/(1024*1024));
+        fprintf(stderr,"  Input  size ...... : %ld MB\n", ((file_C != NULL) ? (nstationA+nstationB+nstationC)*cdatainSize/(1024*1024) : (nstationA+nstationB)*cdatainSize/(1024*1024)));
+
 		fprintf(stderr,"  Output size ...... : %ld MB\n", (cdataoutSize/((size_t)1024*1024)));
-        fprintf(stderr,"  taper points ..... : %d (%.2f %%)\n", ntap, ftap*100.0);
+        if (ntapB != 0) fprintf(stderr,"  taper points ..... : %d (%.0f %%)\n", ntapB, ftap*100.0);
+        if (ntapA != 0) fprintf(stderr,"  taper points ..... : %d \n", ntapA);
         fprintf(stderr,"  process number ... : %d\n", pe);
         fprintf(stderr,"  fmin ............. : %.3f (%d)\n", fmin, nw_low);
         fprintf(stderr,"  fmax ............. : %.3f (%d)\n", fmax, nw_high);
         fprintf(stderr,"  nfreq  ........... : %ld\n", nfreq);
         if (mdd) fprintf(stderr,"  Matrix inversion . : %s\n", rthmName);
         else  fprintf(stderr,"  Correlation ...... : \n");
-        fprintf(stderr,"  eps_r ............ : %e\n", eps_r);
-        fprintf(stderr,"  eps_a ............ : %e\n", eps_a);
+        if (mdd==1) {
+            fprintf(stderr,"  eps_r ............ : %e\n", eps_r);
+            fprintf(stderr,"  eps_a ............ : %e\n", eps_a);
+        } else if (mdd==3) {
+            fprintf(stderr,"  iterations........ : %d\n", lsqr_iter);
+            fprintf(stderr,"  damping........... : %e\n", lsqr_damp);
+        }
         fprintf(stderr,"  mdd .............. : %d\n", mdd);
     }
 
@@ -300,7 +386,54 @@ int main (int argc, char **argv)
 	alpha = 0.0;
     readShotData(file_B, xmin, dx, xrcvB, xsrcB, xnx, cB, nw, nw_low, nshotB, nstationB, nstationB, ntfft, alpha, sclB, cjB, transposeB, verbose);
 
-	//cB = cA;
+    if (file_C != NULL && mdd != 3) {
+        xsrcC     = (float *)calloc(nshotC,sizeof(float));
+        xrcvC     = (float *)calloc(nshotC*nstationC,sizeof(float));
+        alpha = 0.0;
+        readShotData(file_C, xmin, dx, xrcvC, xsrcC, xnx, cC, nw, nw_low, nshotC, nstationC, nstationC, ntfft, alpha, sclC, cjC, transposeC, verbose);
+    } else if (file_C != NULL) {
+        xsrcC     = (float *)calloc(nshotC,sizeof(float));
+        xrcvC     = (float *)calloc(nshotC*nstationC,sizeof(float));
+        alpha = 0.0;
+        readShotData(file_C, xmin, dx, xrcvC, xsrcC, xnx, cdataout, nw, nw_low, nshotC, nstationC, nstationC, ntfft, alpha, sclC, cjC, transposeC, verbose);
+    }
+
+    if (ntapA != 0) {
+        taper = (float *)malloc(nstationA*sizeof(float));
+        for (j = 0; j < ntapA; j++)
+            taper[j] = (cos(M_PI*(j-ntapA)/ntapA)+1)/2.0;//(exp(j/ntap*8)-1)/(exp(8)-1);//
+        for (j = ntapA; j < nstationA-ntapA; j++)
+            taper[j] = 1.0;
+        for (j = nstationA-ntapA; j < nstationA; j++)
+            taper[j] = taper[abs(j-nstationA)-1];//(cos(M_PI*(j-(nstationA-ntap))/ntap)+1)/2.0;
+        for (istation = 0; istation < nstationA; istation++) {  // Swap for jstation?
+            for (jstation = 0; jstation < nshotA; jstation++) {
+                for (iw=0; iw<nw; iw++) {
+                    cA[iw*nstationA*nshotA+jstation*nstationA+istation].r *= taper[istation];
+                    cA[iw*nstationA*nshotA+jstation*nstationA+istation].i *= taper[istation];
+                }
+            }
+        }
+        free(taper);
+    }
+    if (ntapB != 0) {
+        taper = (float *)malloc(nstationA*sizeof(float));
+        for (j = 0; j < ntapB; j++)
+            taper[j] = (cos(M_PI*(j-ntapB)/ntapB)+1)/2.0;//(exp(j/ntap*8)-1)/(exp(8)-1);//
+        for (j = ntapB; j < nstationA-ntapB; j++)
+            taper[j] = 1.0;
+        for (j = nstationA-ntapB; j < nstationA; j++)
+            taper[j] = taper[abs(j-nstationA)-1];//(cos(M_PI*(j-(nstationA-ntap))/ntap)+1)/2.0;
+        for (jstation = 0; jstation < nstationA; jstation++) {  // Swap for jstation?
+            for (istation = 0; istation < nshotA; istation++) {
+                for (iw=0; iw<nw; iw++) {
+                    cB[iw*nstationA*nshotA+istation*nshotA+jstation].r *= taper[istation];
+                    cB[iw*nstationA*nshotA+istation*nshotA+jstation].i *= taper[istation];
+                }
+            }
+        }
+        free(taper);
+    }
 
 	eigen = (float *)malloc(nfreq*nstationB*sizeof(float));
 
@@ -309,20 +442,25 @@ int main (int argc, char **argv)
 
 #pragma omp parallel default(none) \
 	private(t1,t2,pe) \
-	shared(cA,cB,eigen,eigenvalues,numacc,eps_r,eps_a) \
-	shared(nstationA,nstationB,verbose,cdatainSize) \
-	shared(rthm,mdd,nfreq,nshotA,conjgA,conjgB) \
-	shared(cdataout,oBB)
+ 	shared(cA,cB,cC,eigen,eigenvalues,numacc,eps_r,eps_a) \
+ 	shared(nstationA,nstationB,nstationC,verbose,cdatainSize) \
+    shared(rthm,mdd,nfreq,nshotA,conjgA,conjgB,conjgC) \
+    shared(cdataout,cTemp,oBB,file_C,cdataoutSize,k_iter,stderr, lsqr_iter, lsqr_damp, TCscl)
 { /* start of OpenMP parallel part */
 
-
 #ifdef _OPENMP
 	pe = omp_get_thread_num();
 #endif
 
 	/* compute deconvolution */
 	deconvolve(cA, cB, cdataout, oBB, nfreq, nshotA, nstationA, nstationB, 
-		eps_a, eps_r, numacc, eigenvalues, eigen, rthm, mdd, conjgA, conjgB, verbose);
+        eps_a, eps_r, numacc, eigenvalues, eigen, rthm, mdd, conjgA, conjgB, lsqr_iter, lsqr_damp, k_iter, TCscl, verbose);
+
+    if (file_C != NULL && mdd != 3) {
+        deconvolve(cdataout, cC, cTemp, oBB, nfreq, nshotA, nstationA, nstationC,
+            eps_a, eps_r, numacc, eigenvalues, eigen, rthm, mdd, conjgA, conjgC, lsqr_iter, lsqr_damp, k_iter, TCscl, verbose);
+        memcpy(&cdataout[0].r, &cTemp[0].r, cdataoutSize);
+    }
 
 } /*end of parallel OpenMP part */
 
@@ -340,6 +478,8 @@ int main (int argc, char **argv)
 /* for writing out combined shots cA */
 	free(cA);
 	free(cB);
+    if (file_C != NULL && mdd != 3) free(cC);
+    if (file_C != NULL && mdd != 3) free(cTemp);
 
 /* Inverse FFT of deconvolution results */
 /* This is done for every deconvolution component seperately */
diff --git a/MDD/readShotData.c b/MDD/readShotData.c
index a6205bf7cbad1742c301a27e0f4e7888f208d5fa..ad5d0e452e7fb3b92a81cf1487eb940a34fd7056 100644
--- a/MDD/readShotData.c
+++ b/MDD/readShotData.c
@@ -42,7 +42,8 @@ int readShotData(char *filename, float xmin, float dx, float *xrcv, float *xsrc,
 	fseek(fp, 0, SEEK_SET);
 	nread = fread( &hdr, 1, TRCBYTES, fp );
 	assert(nread == TRCBYTES);
-	if (hdr.scalco < 0) scl = 1.0/fabs((float)hdr.scalco);
+ 	if (hdr.scalco < 0) scl = 1.0/fabs((float)hdr.scalco);
+
 	else if (hdr.scalco == 0) scl = 1.0;
 	else scl = hdr.scalco;
 	fseek(fp, 0, SEEK_SET);
@@ -104,13 +105,13 @@ int readShotData(char *filename, float xmin, float dx, float *xrcv, float *xsrc,
         	rc1fft(&trace[i*ntfft],ctrace,ntfft,-1);
 
 			if (transpose == 0) {
-        		for (iw=0; iw<nw; iw++) {
+        		for (iw=0; iw<nw; iw++) { // FREQUENCY - SHOTS - RECEIVERS
         			cdata[iw*ngath*nx+igath*nx+i].r = scale*ctrace[nw_low+iw].r;
         			cdata[iw*ngath*nx+igath*nx+i].i = conjg*scale*ctrace[nw_low+iw].i;
         		}
 			}
 			else {
-        		for (iw=0; iw<nw; iw++) {
+        		for (iw=0; iw<nw; iw++) { // FREQUENCY - RECEIVERS - SHOTS
         			cdata[iw*ngath*nx+i*ngath+igath].r = scale*ctrace[nw_low+iw].r;
         			cdata[iw*ngath*nx+i*ngath+igath].i = conjg*scale*ctrace[nw_low+iw].i;
         		}
diff --git a/MDD/zlsqrDataModule.F90 b/MDD/zlsqrDataModule.F90
new file mode 100644
index 0000000000000000000000000000000000000000..ff9f905aeca90cc1721e55f8dbff206bcb4157ba
--- /dev/null
+++ b/MDD/zlsqrDataModule.F90
@@ -0,0 +1,19 @@
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+! File zlsqrDataModule.f90
+!
+! Extends lsqrDataModule.f90 for use with complex numbers
+! 29 Jun 2013: File created.
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+module zlsqrDataModule
+
+  use   iso_c_binding
+  use   lsqrDataModule, only  : dp, sp, ip, zero, realmin, eps, one
+  
+  implicit none
+
+  intrinsic                      :: cmplx
+
+  complex(c_float_complex), parameter, public :: zzero = cmplx(zero,zero,dp), zone = cmplx(one,zero,dp)
+
+end module zlsqrDataModule
diff --git a/MDD/zlsqrModule.F90 b/MDD/zlsqrModule.F90
new file mode 100644
index 0000000000000000000000000000000000000000..3220fd71ff44d97d5250d54b4e48c52eb6dd194e
--- /dev/null
+++ b/MDD/zlsqrModule.F90
@@ -0,0 +1,804 @@
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+! File lsqrModule.f90
+!
+!     zLSQR     d2norm
+!
+! zLSQR   solves Ax = b or min ||Ax - b|| with or without damping,
+! using the iterative algorithm of Paige and Saunders, ACM TOMS (1982).
+!
+! Maintained by
+!     Michael Saunders <saunders@stanford.edu>
+!     Systems Optimization Laboratory (SOL)
+!     Stanford University
+!     Stanford, CA 94305-4026, USA
+!     (650)723-1875
+!
+! 07 Sep 2007: Line by line translation of f77 file lsqr.f to f90
+!              by Eric Badel <badel@nancy.inra.fr>.
+! 21 Sep 2007: lsqrModule.f90 implemented.
+! 24 Oct 2007: Use real(dp) instead of compiler option -r8.
+! 19 Dec 2008: lsqrblasInterface module implemented.
+! 29 Jun 2013: Support for complex A, derived from real version
+!              by Austin Benson <arbenson@stanford.edu> and
+!                 Victor Minden <vminden@stanford.edu>
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+module zlsqrModule
+
+  use  zlsqrDataModule,    only : dp, ip, one, zero, eps, zone, zzero
+  use  zlsqrblasInterface, only : dznrm2_zlsqr, zscal_zlsqr
+  use  iso_c_binding ! for C interop
+
+  implicit none
+
+  public   :: zLSQR
+  private  :: d2norm 
+
+
+contains
+
+  !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 
+  subroutine zLSQR  ( m, n, indw, Aprod1, Aprod2, b, damp, wantse, x, se,  &
+                      atol, btol, conlim, itnlim, nout, istop, itn,  &
+                      Anorm, Acond, rnorm, Arnorm, xnorm) bind(c, name = "zLSQR_")!w, v ) 
+
+    integer(ip),  intent(in)  :: m, n, itnlim, nout
+    integer, intent(in)    :: indw
+    integer(ip),  intent(out) :: istop, itn
+    logical(ip),  intent(in)  :: wantse
+    complex(dp), dimension(m), intent(in)  :: b(m)
+    complex(dp), dimension(n), intent(inout) :: x(n)
+    real(dp), intent(out) :: se(*)
+    real(dp), intent(in)  :: atol, btol, conlim, damp
+    real(dp), intent(out) :: Anorm, Acond, rnorm, Arnorm, xnorm
+    
+	!complex(dp), dimension(n), intent(inout)  :: w(n)
+    !complex(dp), dimension(n), intent(inout)  :: v(n)
+	
+    interface
+       subroutine Aprod1(m,n,x,y,indw) bind(c)                  ! y := y + A*x
+         use zlsqrDataModule, only : dp, ip
+         integer(ip), intent(in)    :: m, n
+         integer, intent(in)    :: indw
+         complex(dp), intent(in)    :: x(n)
+         complex(dp), intent(inout) :: y(m)
+       end subroutine Aprod1
+
+       subroutine Aprod2(m,n,x,y,indw) bind(c)                 ! x := x + A'*y
+         use zlsqrDataModule, only : dp, ip
+         integer(ip),  intent(in)    :: m, n 
+         integer, intent(in)    :: indw
+         complex(dp), intent(inout) :: x(n)
+         complex(dp), intent(in)    :: y(m)
+       end subroutine Aprod2
+    end interface
+
+    !-------------------------------------------------------------------
+    ! zLSQR  finds a solution x to the following problems:
+    !
+    ! 1. Unsymmetric equations:    Solve  A*x = b
+    !
+    ! 2. Linear least squares:     Solve  A*x = b
+    !                              in the least-squares sense
+    !
+    ! 3. Damped least squares:     Solve  (   A    )*x = ( b )
+    !                                     ( damp*I )     ( 0 )
+    !                              in the least-squares sense
+    !
+    ! where A is a complex matrix with m rows and n columns, b is a
+    ! complex m-vector, and damp is a real scalar.
+    ! The matrix A is treated as a linear operator.  It is accessed
+    ! by means of subroutine calls with the following purpose:
+    !
+    ! call Aprod1(m,n,x,y)  must compute y = y + A*x  without altering x.
+    ! call Aprod2(m,n,x,y)  must compute x = x + A'*y without altering y.
+    !
+    ! zLSQR uses an iterative method to approximate the solution.
+    ! The number of iterations required to reach a certain accuracy
+    ! depends strongly on the scaling of the problem.  Poor scaling of
+    ! the rows or columns of A should therefore be avoided where
+    ! possible.
+    !
+    ! For example, in problem 1 the solution is unaltered by
+    ! row-scaling.  If a row of A is very small or large compared to
+    ! the other rows of A, the corresponding row of ( A  b ) should be
+    ! scaled up or down.
+    !
+    ! In problems 1 and 2, the solution x is easily recovered
+    ! following column-scaling.  Unless better information is known,
+    ! the nonzero columns of A should be scaled so that they all have
+    ! the same Euclidean norm (e.g., 1.0).
+    !
+    ! In problem 3, there is no freedom to re-scale if damp is
+    ! nonzero.  However, the value of damp should be assigned only
+    ! after attention has been paid to the scaling of A.
+    !
+    ! The parameter damp is intended to help regularize
+    ! ill-conditioned systems, by preventing the true solution from
+    ! being very large.  Another aid to regularization is provided by
+    ! the parameter Acond, which may be used to terminate iterations
+    ! before the computed solution becomes very large.
+    !
+    ! Note that x is not an input parameter.
+    ! If some initial estimate x0 is known and if damp = 0,
+    ! one could proceed as follows:
+    !
+    ! 1. Compute a residual vector     r0 = b - A*x0.
+    ! 2. Use LSQR to solve the system  A*dx = r0.
+    ! 3. Add the correction dx to obtain a final solution x = x0 + dx.
+    !
+    ! This requires that x0 be available before and after the call
+    ! to LSQR.  To judge the benefits, suppose LSQR takes k1 iterations
+    ! to solve A*x = b and k2 iterations to solve A*dx = r0.
+    ! If x0 is "good", norm(r0) will be smaller than norm(b).
+    ! If the same stopping tolerances atol and btol are used for each
+    ! system, k1 and k2 will be similar, but the final solution x0 + dx
+    ! should be more accurate.  The only way to reduce the total work
+    ! is to use a larger stopping tolerance for the second system.
+    ! If some value btol is suitable for A*x = b, the larger value
+    ! btol*norm(b)/norm(r0)  should be suitable for A*dx = r0.
+    !
+    ! Preconditioning is another way to reduce the number of iterations.
+    ! If it is possible to solve a related system M*x = b efficiently,
+    ! where M approximates A in some helpful way
+    ! (e.g. M - A has low rank or its elements are small relative to
+    ! those of A), LSQR may converge more rapidly on the system
+    !       A*M(inverse)*z = b,
+    ! after which x can be recovered by solving M*x = z.
+    !
+    ! NOTE: If A is Hermitian, LSQR should not be used!
+    ! Alternatives are the symmetric conjugate-gradient method (CG)
+    ! and/or SYMMLQ.
+    ! SYMMLQ is an implementation of symmetric CG that applies to
+    ! any symmetric A and will converge more rapidly than LSQR.
+    ! If A is positive definite, there are other implementations of
+    ! symmetric CG that require slightly less work per iteration
+    ! than SYMMLQ (but will take the same number of iterations).
+    !
+    !
+    ! Notation
+    ! --------
+    ! The following quantities are used in discussing the subroutine
+    ! parameters:
+    !
+    ! Abar   =  (  A   ),        bbar  =  (b)
+    !           (damp*I)                  (0)
+    !
+    ! r      =  b - A*x,         rbar  =  bbar - Abar*x
+    !
+    ! rnorm  =  sqrt( norm(r)**2  +  damp**2 * norm(x)**2 )
+    !        =  norm( rbar )
+    !
+    ! eps    =  the relative precision of floating-point arithmetic.
+    !           On most machines, eps is about 1.0e-7 and 1.0e-16
+    !           in single and double precision respectively.
+    !           We expect eps to be about 1e-16 always.
+    !
+    ! LSQR  minimizes the function rnorm with respect to x.
+    !
+    !
+    ! Parameters
+    ! ----------
+    ! m       input      m, the number of rows in A.
+    !
+    ! n       input      n, the number of columns in A.
+    !
+    ! Aprod1, Aprod2     See above.
+    !
+    ! damp    input      The damping parameter for problem 3 above.
+    !                    (damp should be 0.0 for problems 1 and 2.)
+    !                    If the system A*x = b is incompatible, values
+    !                    of damp in the range 0 to sqrt(eps)*norm(A)
+    !                    will probably have a negligible effect.
+    !                    Larger values of damp will tend to decrease
+    !                    the norm of x and reduce the number of 
+    !                    iterations required by LSQR.
+    !
+    !                    The work per iteration and the storage needed
+    !                    by LSQR are the same for all values of damp.
+    !
+    ! wantse  input      A logical variable to say if the array se(*)
+    !                    of standard error estimates should be computed.
+    !                    If m > n  or  damp > 0,  the system is
+    !                    overdetermined and the standard errors may be
+    !                    useful.  (See the first LSQR reference.)
+    !                    Otherwise (m <= n  and  damp = 0) they do not
+    !                    mean much.  Some time and storage can be saved
+    !                    by setting wantse = .false. and using any
+    !                    convenient array for se(*), which won't be
+    !                    touched.
+    !
+    ! b(m)    input      The rhs vector b.
+    !
+    ! x(n)    output     Returns the computed solution x.
+    !
+    ! se(*)   output     If wantse is true, the dimension of se must be
+    !         (maybe)    n or more.  se(*) then returns standard error
+    !                    estimates for the components of x.
+    !                    For each i, se(i) is set to the value
+    !                       rnorm * sqrt( sigma(i,i) / t ),
+    !                    where sigma(i,i) is an estimate of the i-th
+    !                    diagonal of the inverse of Abar(transpose)*Abar
+    !                    and  t = 1      if  m .le. n,
+    !                         t = m - n  if  m .gt. n  and  damp = 0,
+    !                         t = m      if  damp .ne. 0.
+    !
+    !                    If wantse is false, se(*) will not be touched.
+    !                    The actual parameter can be any real array.
+    !
+    ! atol    input      An estimate of the relative error in the data
+    !                    defining the matrix A.  For example, if A is
+    !                    accurate to about 6 digits, set atol = 1.0e-6.
+    !
+    ! btol    input      An estimate of the relative error in the data
+    !                    defining the rhs b.  For example, if b is
+    !                    accurate to about 6 digits, set btol = 1.0e-6.
+    !
+    ! conlim  input      An upper limit on cond(Abar), the apparent
+    !                    condition number of the matrix Abar.
+    !                    Iterations will be terminated if a computed
+    !                    estimate of cond(Abar) exceeds conlim.
+    !                    This is intended to prevent certain small or
+    !                    zero singular values of A or Abar from
+    !                    coming into effect and causing unwanted growth
+    !                    in the computed solution.
+    !
+    !                    conlim and damp may be used separately or
+    !                    together to regularize ill-conditioned systems.
+    !
+    !                    Normally, conlim should be in the range
+    !                    1000 to 1/eps.
+    !                    Suggested value:
+    !                    conlim = 1/(100*eps)  for compatible systems,
+    !                    conlim = 1/(10*sqrt(eps)) for least squares.
+    !
+    !         Note: Any or all of atol, btol, conlim may be set to zero.
+    !         The effect will be the same as the values eps, eps, 1/eps.
+    !
+    ! itnlim  input      An upper limit on the number of iterations.
+    !                    Suggested value:
+    !                    itnlim = n/2   for well-conditioned systems
+    !                                   with clustered singular values,
+    !                    itnlim = 4*n   otherwise.
+    !
+    ! nout    input      File number for printed output.  If positive,
+    !                    a summary will be printed on file nout.
+    !
+    ! istop   output     An integer giving the reason for termination:
+    !
+    !            0       x = 0  is the exact solution.
+    !                    No iterations were performed.
+    !
+    !            1       The equations A*x = b are probably compatible.
+    !                    Norm(A*x - b) is sufficiently small, given the
+    !                    values of atol and btol.
+    !
+    !            2       damp is zero.  The system A*x = b is probably
+    !                    not compatible.  A least-squares solution has
+    !                    been obtained that is sufficiently accurate,
+    !                    given the value of atol.
+    !
+    !            3       damp is nonzero.  A damped least-squares
+    !                    solution has been obtained that is sufficiently
+    !                    accurate, given the value of atol.
+    !
+    !            4       An estimate of cond(Abar) has exceeded conlim.
+    !                    The system A*x = b appears to be ill-conditioned,
+    !                    or there could be an error in Aprod1 or Aprod2.
+    !
+    !            5       The iteration limit itnlim was reached.
+    !
+    ! itn     output     The number of iterations performed.
+    !
+    ! Anorm   output     An estimate of the Frobenius norm of Abar.
+    !                    This is the square-root of the sum of squares
+    !                    of the elements of Abar.
+    !                    If damp is small and the columns of A
+    !                    have all been scaled to have length 1.0,
+    !                    Anorm should increase to roughly sqrt(n).
+    !                    A radically different value for Anorm may
+    !                    indicate an error in Aprod1 or Aprod2.
+    !
+    ! Acond   output     An estimate of cond(Abar), the condition
+    !                    number of Abar.  A very high value of Acond
+    !                    may again indicate an error in Aprod1 or Aprod2.
+    !
+    ! rnorm   output     An estimate of the final value of norm(rbar),
+    !                    the function being minimized (see notation
+    !                    above).  This will be small if A*x = b has
+    !                    a solution.
+    !
+    ! Arnorm  output     An estimate of the final value of
+    !                    norm( Abar(transpose)*rbar ), the norm of
+    !                    the residual for the normal equations.
+    !                    This should be small in all cases.  (Arnorm
+    !                    will often be smaller than the true value
+    !                    computed from the output vector x.)
+    !
+    ! xnorm   output     An estimate of norm(x) for the final solution x.
+    !
+    ! Subroutines and functions used              
+    ! ------------------------------
+    ! BLAS               zscal, dznrm2
+    ! USER               Aprod1, Aprod2
+    !
+    ! Precision
+    ! ---------
+    ! The number of iterations required by LSQR will decrease
+    ! if the computation is performed in higher precision.
+    ! At least 15-digit arithmetic should normally be used.
+    ! "complex(dp)" declarations should normally be 8-byte words.
+    ! If this ever changes, the BLAS routines  dznrm2, zscal
+    ! (Lawson, et al., 1979) will also need to be changed.
+    !
+    !
+    ! References
+    ! ----------
+    ! C.C. Paige and M.A. Saunders,  LSQR: An algorithm for sparse
+    !      linear equations and sparse least squares,
+    !      ACM Transactions on Mathematical Software 8, 1 (March 1982),
+    !      pp. 43-71.
+    !
+    ! C.C. Paige and M.A. Saunders,  Algorithm 583, LSQR: Sparse
+    !      linear equations and least-squares problems,
+    !      ACM Transactions on Mathematical Software 8, 2 (June 1982),
+    !      pp. 195-209.
+    !
+    ! C.L. Lawson, R.J. Hanson, D.R. Kincaid and F.T. Krogh,
+    !      Basic linear algebra subprograms for Fortran usage,
+    !      ACM Transactions on Mathematical Software 5, 3 (Sept 1979),
+    !      pp. 308-323 and 324-325.
+    ! ------------------------------------------------------------------
+    !
+    ! LSQR development:
+    ! 22 Feb 1982: LSQR sent to ACM TOMS to become Algorithm 583.
+    ! 15 Sep 1985: Final F66 version.  LSQR sent to "misc" in netlib.
+    ! 13 Oct 1987: Bug (Robert Davies, DSIR).  Have to delete
+    !                 if ( (one + dabs(t)) .le. one ) GO TO 200
+    !              from loop 200.  The test was an attempt to reduce
+    !              underflows, but caused w(i) not to be updated.
+    ! 17 Mar 1989: First F77 version.
+    ! 04 May 1989: Bug (David Gay, AT&T).  When the second beta is zero,
+    !              rnorm = 0 and
+    !              test2 = Arnorm / (Anorm * rnorm) overflows.
+    !              Fixed by testing for rnorm = 0.
+    ! 05 May 1989: Sent to "misc" in netlib.
+    ! 14 Mar 1990: Bug (John Tomlin via IBM OSL testing).
+    !              Setting rhbar2 = rhobar**2 + dampsq can give zero
+    !              if rhobar underflows and damp = 0.
+    !              Fixed by testing for damp = 0 specially.
+    ! 15 Mar 1990: Converted to lower case.
+    ! 21 Mar 1990: d2norm introduced to avoid overflow in numerous
+    !              items like  c = sqrt( a**2 + b**2 ).
+    ! 04 Sep 1991: wantse added as an argument to LSQR, to make
+    !              standard errors optional.  This saves storage and
+    !              time when se(*) is not wanted.
+    ! 13 Feb 1992: istop now returns a value in [1,5], not [1,7].
+    !              1, 2 or 3 means that x solves one of the problems
+    !              Ax = b,  min norm(Ax - b)  or  damped least squares.
+    !              4 means the limit on cond(A) was reached.
+    !              5 means the limit on iterations was reached.
+    ! 07 Dec 1994: Keep track of dxmax = max_k norm( phi_k * d_k ).
+    !              So far, this is just printed at the end.
+    !              A large value (relative to norm(x)) indicates
+    !              significant cancellation in forming
+    !                 x = D*f = sum( phi_k * d_k ).
+    !              A large column of D need NOT be serious if the
+    !              corresponding phi_k is small.
+    ! 27 Dec 1994: Include estimate of alfa_opt in iteration log.
+    !              alfa_opt is the optimal scale factor for the
+    !              residual in the "augmented system", as described by
+    !              A. Bjorck (1992),
+    !              Pivoting and stability in the augmented system method,
+    !              in D. F. Griffiths and G. A. Watson (eds.),
+    !              "Numerical Analysis 1991",
+    !              Proceedings of the 14th Dundee Conference,
+    !              Pitman Research Notes in Mathematics 260,
+    !              Longman Scientific and Technical, Harlow, Essex, 1992.
+    !
+    ! 21 Sep 2007: Fortran 90 finally adopted seriously.
+    !              Aprod1, Aprod2 implemented via f90 interface.
+    !-------------------------------------------------------------------
+
+    intrinsic :: abs, sqrt
+
+    ! Local arrays and variables
+    complex(dp)  :: u(m)
+	complex(dp), allocatable  :: v(:), w(:)
+    complex(dp)  :: t         
+    logical   :: damped, prnt
+    integer(ip)   :: i, maxdx, nconv, nstop
+    real(dp)  :: alfopt,                                    &
+                 alpha, beta, bnorm, cs, cs1, cs2, ctol,    &
+                 delta, dknorm, dnorm, dxk, dxmax, gamma,   &
+                 gambar, phi, phibar, psi, res2, rho,       &
+                 rhobar, rhbar1, rhs, rtol, sn, sn1, sn2,   &
+                 tau, temp, test1, test2, test3, theta,     &
+                 t1, t2, t3, xnorm1, z, zbar
+
+    ! Local constants
+    character(len=*), parameter :: enter = ' Enter LSQR.  '
+    character(len=*), parameter :: exitt = ' Exit  LSQR.  '
+    character(len=*), parameter :: msg(0:5) =                     &
+      (/ 'The exact solution is  x = 0                         ', &
+         'A solution to Ax = b was found, given atol, btol     ', &
+         'A least-squares solution was found, given atol       ', &
+         'A damped least-squares solution was found, given atol', &
+         'Cond(Abar) seems to be too large, given conlim       ', &
+         'The iteration limit was reached                      ' /)
+    !-------------------------------------------------------------------
+
+    ! Initialize.
+
+    if (nout > 0) then
+       OPEN(nout, file='OUTPUT.txt', status='unknown') ! ADDED BY JOHNO
+       write(nout, 1000) enter,m,n,damp,wantse,atol,conlim,btol,itnlim
+    end if
+
+    damped =   damp > zero
+    itn    =   0
+    istop  =   0
+    nstop  =   0
+    maxdx  =   0
+    ctol   =   zero
+    if (conlim > zero) ctol = one / conlim
+    Anorm  =   zero
+    Acond  =   zero
+    dnorm  =   zero
+    dxmax  =   zero
+    res2   =   zero
+    psi    =   zero
+    xnorm  =   zero
+    xnorm1 =   zero
+    cs2    = - one
+    sn2    =   zero
+    z      =   zero
+
+    !-------------------------------------------------------------------
+    ! Set up the first vectors u and v for the bidiagonalization.
+    ! These satisfy  beta*u = b,  alpha*v = A(transpose)*u.
+    !-------------------------------------------------------------------
+    
+    u(1:m) = b(1:m)
+	allocate(v(n),w(n))
+    v(1:n) = zzero
+    !x(1:n) = zzero
+
+
+    if (wantse) then
+       se(1:n) = zero
+    end if
+
+    alpha  = zero
+    beta   = dznrm2_zlsqr (m, u, 1)
+
+    if (beta > zero) then
+       call zscal_zlsqr (m, cmplx((one/beta), 0, dp), u, 1)
+       call Aprod2(m, n, v, u, indw)          ! v = A'*u
+       alpha = dznrm2_zlsqr (n, v, 1)
+    end if
+
+    if (alpha > zero) then
+       call zscal_zlsqr (n, cmplx((one/alpha), 0, dp), v, 1)
+       w = v
+    end if
+
+    Arnorm = alpha*beta
+    if (Arnorm == zero) go to 800
+
+    rhobar = alpha
+    phibar = beta
+    bnorm  = beta
+    rnorm  = beta
+
+    if (nout > 0) then
+       if (damped) then
+          write(nout,1300)
+       else
+          write(nout,1200)
+       end if
+       test1 = one
+       test2 = alpha/beta
+       write(nout, 1500) itn,abs(x(1)),rnorm,test1,test2
+    end if
+
+    !===================================================================
+    ! Main iteration loop.
+    !===================================================================
+    do
+       itn = itn + 1
+	  
+       !----------------------------------------------------------------
+       ! Perform the next step of the bidiagonalization to obtain the
+       ! next beta, u, alpha, v.  These satisfy
+       !     beta*u = A*v  - alpha*u,
+       !    alpha*v = A'*u -  beta*v.
+       !----------------------------------------------------------------
+       call zscal_zlsqr (m,cmplx((- alpha), 0, dp), u, 1)
+       call Aprod1(m, n, v, u, indw)             ! u = A*v
+       beta   = dznrm2_zlsqr (m, u, 1)
+
+       ! Accumulate Anorm = ||Bk|| = norm([alpha beta damp]).
+
+       temp   = d2norm(alpha, beta)
+       temp   = d2norm(temp , damp)
+       Anorm  = d2norm(Anorm, temp)
+
+       if (beta > zero) then
+          call zscal_zlsqr (m, cmplx((one/beta), 0, dp), u, 1)
+          call zscal_zlsqr (n, cmplx((- beta), 0, dp), v, 1)
+          call Aprod2(m, n, v, u, indw)          ! v = A'*u
+          alpha  = dznrm2_zlsqr (n, v, 1)
+          if (alpha > zero) then
+             call zscal_zlsqr (n, cmplx((one/alpha), 0, dp), v, 1)
+          end if
+       end if
+
+       !----------------------------------------------------------------
+       ! Use a plane rotation to eliminate the damping parameter.
+       ! This alters the diagonal (rhobar) of the lower-bidiagonal matrix.
+       !----------------------------------------------------------------
+       rhbar1 = rhobar
+       if (damped) then
+          rhbar1 = d2norm(rhobar, damp)
+          cs1    = rhobar/rhbar1
+          sn1    = damp  /rhbar1
+          psi    = sn1 * phibar
+          phibar = cs1 * phibar
+       end if
+
+       !----------------------------------------------------------------
+       ! Use a plane rotation to eliminate the subdiagonal element (beta)
+       ! of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix.
+       !----------------------------------------------------------------
+       rho    =   d2norm(rhbar1, beta)
+       cs     =   rhbar1/rho
+       sn     =   beta  /rho
+       theta  =   sn*alpha
+       rhobar = - cs*alpha
+       phi    =   cs*phibar
+       phibar =   sn*phibar
+       tau    =   sn*phi
+
+       !----------------------------------------------------------------
+       ! Update  x, w  and (perhaps) the standard error estimates.
+       ! ---------------------------------------------------------------
+       t1     =     phi/rho
+       t2     = - theta/rho
+       t3     =     one/rho
+       dknorm =    zero
+
+       if (wantse) then
+          do i=1,n
+             t      = w(i)
+             x(i)   = t1*t +  x(i)
+             w(i)   = t2*t +  v(i)
+             t      = abs(t3*t)**2
+             se(i)  = t + se(i)
+             dknorm = t + dknorm !bug?
+          end do
+       else
+          do i=1,n
+             t      = w(i)
+             x(i)   = t1*t + x(i)
+             w(i)   = t2*t + v(i)
+             dknorm = abs(t3*t)**2 + dknorm
+          end do
+       end if
+
+       !----------------------------------------------------------------
+       ! Monitor the norm of d_k, the update to x.
+       ! dknorm = norm( d_k )
+       ! dnorm  = norm( D_k ),       where   D_k = (d_1, d_2, ..., d_k )
+       ! dxk    = norm( phi_k d_k ), where new x = x_k + phi_k d_k.
+       !----------------------------------------------------------------
+       dknorm = sqrt(dknorm)
+       dnorm  = d2norm(dnorm, dknorm)
+       dxk    = abs(phi*dknorm)
+       if (dxmax < dxk) then
+          dxmax  = dxk
+          maxdx  = itn
+       end if
+
+       !----------------------------------------------------------------
+       ! Use a plane rotation on the right to eliminate the
+       ! super-diagonal element (theta) of the upper-bidiagonal matrix.
+       ! Then use the result to estimate  norm(x).
+       !----------------------------------------------------------------
+       delta  =   sn2*rho
+       gambar = - cs2*rho
+       rhs    =   phi - delta*z
+       zbar   =   rhs   /gambar
+       xnorm  =   d2norm(xnorm1, zbar)
+       gamma  =   d2norm(gambar, theta)
+       cs2    =   gambar/gamma
+       sn2    =   theta /gamma
+       z      =   rhs   /gamma
+       xnorm1 =   d2norm(xnorm1, z)
+
+       !----------------------------------------------------------------
+       ! Test for convergence.
+       ! First, estimate the norm and condition of the matrix  Abar,
+       ! and the norms of  rbar  and  Abar(transpose)*rbar.
+       !----------------------------------------------------------------
+       Acond  = Anorm*dnorm
+       res2   = d2norm(res2, psi)
+       rnorm  = d2norm(res2, phibar)
+       rnorm  = rnorm + 1e-30         ! Prevent rnorm == 0.0
+       Arnorm = alpha*abs( tau )
+
+       ! Now use these norms to estimate certain other quantities,
+       ! some of which will be small near a solution.
+
+       alfopt = sqrt( rnorm/(dnorm*xnorm) )
+       test1  = rnorm/bnorm
+       test2  = zero
+       test2  = Arnorm/(Anorm*rnorm)
+       test3  = one   /Acond
+       t1     = test1 /(one + Anorm*xnorm/bnorm)
+       rtol   = btol  +  atol*Anorm*xnorm/bnorm
+
+       ! The following tests guard against extremely small values of
+       ! atol, btol  or  ctol.  (The user may have set any or all of
+       ! the parameters  atol, btol, conlim  to zero.)
+       ! The effect is equivalent to the normal tests using
+       ! atol = eps,  btol = eps,  conlim = 1/eps.
+
+       t3 = one + test3
+       t2 = one + test2
+       t1 = one + t1
+       if (itn >= itnlim) istop = 5
+       if (t3  <= one   ) istop = 4
+       if (t2  <= one   ) istop = 2
+       if (t1  <= one   ) istop = 1
+
+       ! Allow for tolerances set by the user.
+
+       if (test3 <= ctol) istop = 4
+       if (test2 <= atol) istop = 2
+       if (test1 <= rtol) istop = 1
+
+       !----------------------------------------------------------------
+       ! See if it is time to print something.
+       !----------------------------------------------------------------
+       prnt = .false.
+       if (nout > 0) then
+          if (n     <=        40) prnt = .true.
+          if (itn   <=        10) prnt = .true.
+          if (itn   >= itnlim-10) prnt = .true.
+          if (mod(itn,10)  ==  0) prnt = .true.
+          if (test3 <=  2.0*ctol) prnt = .true.
+          if (test2 <= 10.0*atol) prnt = .true.
+          if (test1 <= 10.0*rtol) prnt = .true.
+          if (istop /=         0) prnt = .true.
+
+          if (prnt) then   ! Print a line for this iteration.
+             write(nout,1500) itn,abs(x(1)),rnorm,test1,test2,Anorm,Acond,alfopt
+          end if
+       end if
+
+       !----------------------------------------------------------------
+       ! Stop if appropriate.
+       ! The convergence criteria are required to be met on  nconv
+       ! consecutive iterations, where  nconv  is set below.
+       ! Suggested value:  nconv = 1, 2  or  3.
+       !----------------------------------------------------------------
+       if (istop == 0) then
+          nstop = 0
+       else
+          nconv = 1
+          nstop = nstop + 1
+          if (nstop < nconv  .and.  itn < itnlim) istop = 0
+       end if
+       if (istop /= 0) exit
+
+    end do
+    !===================================================================
+    ! End of iteration loop.
+    !===================================================================
+	
+	deallocate(v,w)
+	
+    if (wantse) then                       ! Finish off the
+       t = one                             ! standard error estimates.
+       if (m > n)  t = m-n
+       if (damped) t = m
+       t = rnorm/sqrt(t)
+       do i=1,n
+          se(i) = t*sqrt(se(i))
+       end do
+    end if
+
+    ! Come here if Arnorm = 0, or if normal exit.
+
+800 if (damped .and. istop==2) istop=3     ! Decide if istop = 2 or 3.
+    if (nout > 0) then                     ! Print the stopping condition.
+       write(nout, 2000)                &
+            exitt,istop,itn,            &
+            exitt,Anorm,Acond,          &
+            exitt,bnorm, xnorm,         &
+            exitt,rnorm,Arnorm
+       write(nout, 2100)                &
+            exitt,dxmax, maxdx,         &
+            exitt,dxmax/(xnorm+1.0e-30)
+       write(nout, 3000)                &
+            exitt, msg(istop)
+    end if
+
+    return
+
+ 1000 format(// 1p, a, '     Least-squares solution of  Ax = b'   &
+      / ' The matrix  A  has', i7, ' rows   and', i7, ' columns'  &
+      / ' damp   =', e22.14, 3x,        'wantse =', l10           &
+      / ' atol   =', e10.2, 15x,        'conlim =', e10.2         &
+      / ' btol   =', e10.2, 15x,        'itnlim =', i10)
+ 1200 format(/ '   Itn  abs(x(1))           Function',            &
+      '     Compatible   LS        Norm A    Cond A')
+ 1300 format(/ '   Itn  abs(x(1))           Function',            &
+      '     Compatible   LS     Norm Abar Cond Abar alfa_opt')
+ 1500 format(1p, i6, 2e17.9, 4e10.2, e9.1)
+ 2000 format(/ 1p, a, 5x, 'istop  =', i2,   15x, 'itn    =', i8   &
+      /     a, 5x, 'Anorm  =', e12.5, 5x, 'Acond  =', e12.5       &
+      /     a, 5x, 'bnorm  =', e12.5, 5x, 'xnorm  =', e12.5       &
+      /     a, 5x, 'rnorm  =', e12.5, 5x, 'Arnorm =', e12.5)
+ 2100 format(1p, a, 5x, 'max dx =', e8.1 , ' occurred at itn ', i8 &
+      /     a, 5x, '       =', e8.1 , '*xnorm')
+ 3000 format(a, 5x, a)
+
+  end subroutine zLSQR
+
+  !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+  function d2norm( a, b )
+
+    real(dp)             :: d2norm
+    real(dp), intent(in) :: a, b
+
+    !-------------------------------------------------------------------
+    ! d2norm returns sqrt( a**2 + b**2 )
+    ! with precautions to avoid overflow.
+    !
+    ! 21 Mar 1990: First version.
+    ! 17 Sep 2007: Fortran 90 version.
+    ! 24 Oct 2007: User real(dp) instead of compiler option -r8.
+    !-------------------------------------------------------------------
+
+    intrinsic            :: abs, sqrt
+    real(dp)             :: scale
+    !real(dp), parameter  :: zero = 0.0_dp
+
+    scale = abs(a) + abs(b)
+    if (scale == zero) then
+       d2norm = zero
+    else
+       d2norm = scale*sqrt((a/scale)**2 + (b/scale)**2)
+    end if
+
+  end function d2norm
+
+    !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+!   function zd2norm( a, b )
+
+!     real(dp)             :: zd2norm
+!     complex(dp), intent(in) :: a, b
+
+!     !-------------------------------------------------------------------
+!     ! complex version of d2norm
+!     !-------------------------------------------------------------------
+
+!     intrinsic            :: abs, sqrt
+!     real(dp)             :: scale
+!     !real(dp), parameter  :: zero = 0.0_dp
+
+!     scale = abs(a) + abs(b)
+!     if (scale == zero) then
+!        zd2norm = zero
+!     else
+!        zd2norm = scale*sqrt(abs(a/scale)**2 + abs(b/scale)**2)
+!     end if
+
+!   end function zd2norm
+
+end module zLSQRmodule
diff --git a/MDD/zlsqrblasInterface.F90 b/MDD/zlsqrblasInterface.F90
new file mode 100644
index 0000000000000000000000000000000000000000..544e014af260699289c7e015c7bd605e7a34b493
--- /dev/null
+++ b/MDD/zlsqrblasInterface.F90
@@ -0,0 +1,161 @@
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+! File zlsqrblasInterface.f90
+!
+!    BLAS1 Interfaces:   dznrm2    zscal
+!
+! Maintained by Michael Saunders <saunders@stanford.edu>.
+!
+! 29 Jun 2013: zlsqrblasInterface module implemented.
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+module zlsqrblasInterface
+  use iso_c_binding
+  use zlsqrDataModule, only : dp, ip
+  
+  implicit none
+  public   :: dznrm2, zscal, dznrm2_zlsqr, zscal_zlsqr
+
+  interface                              ! Level 1 BLAS
+     function dznrm2 (n,x,incx) bind(c)
+       use zlsqrDataModule, only : dp, ip
+       integer(ip),  intent(in)    :: n,incx
+       complex(dp), intent(in)     :: x(*)
+       real(dp)                    :: dznrm2
+     end function dznrm2
+
+     subroutine zscal (n,za,zx,incx) bind(c)
+       use zlsqrDataModule, only : dp, ip
+       integer(ip),  intent(in)       :: n,incx
+       complex(dp), intent(in)        :: za
+       complex(dp), intent(inout)     :: zx(*)
+     end subroutine zscal
+  end interface
+  
+
+contains
+
+  function dznrm2_zlsqr ( n, x, incx ) bind(c,name="dznrm2_zlsqr_")
+  !*****************************************************************************
+  !
+  ! DZNRM2 returns the euclidean norm of a complex(8) vector.
+  !
+  !
+  !  Discussion:
+  !
+  !    DZNRM2 := sqrt ( sum ( conjg ( x(1:n) ) * x(1:n) ) )
+  !            = sqrt ( dot_product ( x(1:n), x(1:n) ) )
+  !
+  !  Parameters:
+  !
+  !    Input, integer N, the number of entries in the vector.
+  !
+  !    Input, complex(8) X(*), the vector.
+  !
+  !    Input, integer INCX, the increment between successive entries of X.
+  !
+  !    Output, real(8) DZNRM2, the norm of the vector.
+  !
+  !
+     
+    integer(ip), intent(in)     :: incx
+    integer(ip)                 :: ix
+    integer(ip), intent(in)     :: n
+    real(dp)                 :: norm
+    real(dp)                 :: scale
+    real(dp)                 :: dznrm2_zlsqr
+    real(dp)                 :: ssq
+    real(dp)                 :: temp
+    real(dp), parameter      :: one = 1.0
+    real(dp), parameter      :: zero = 0.0
+    complex(dp), intent(in)  :: x(*)
+  
+  ! 
+    
+    if ( n < 1 .or. incx < 1 ) then
+	
+      norm  = zero
+  
+    else
+  
+      scale = zero
+      ssq = one
+  
+      do ix = 1, 1 + ( n - 1 ) * incx, incx
+        if ( real(x(ix), 8) /= zero ) then
+          temp = abs ( real(x(ix), 8) )
+          if ( scale < temp ) then
+            ssq = one + ssq * ( scale / temp )**2
+            scale = temp
+          else
+            ssq = ssq + ( temp / scale )**2
+          end if
+        end if
+  
+        if ( aimag ( x(ix) ) /= zero ) then
+          temp = abs ( aimag ( x(ix) ) )
+          if ( scale < temp ) then
+            ssq = one + ssq * ( scale / temp )**2
+            scale = temp
+          else
+            ssq = ssq + ( temp / scale )**2
+          end if
+  
+        end if
+  
+      end do
+  
+      norm  = scale * sqrt ( ssq )
+  
+    end if
+	
+    dznrm2_zlsqr = norm
+	  
+    return
+  end function dznrm2_zlsqr
+
+  subroutine zscal_zlsqr(n, za, zx, incx) bind(c, name="zscal_zlsqr_")
+  !*****************************************************************************
+  !
+  ! zscal returns the euclidean norm of a complex(8) vector.
+  !
+  !
+  !  Discussion:
+  !
+  !    ZSCAL scales a vector by a constant.
+  !
+  !  Parameters:
+  !
+  !    Input, integer n, the number of entries in the vector.
+  
+  !    Input, complex(8) za(*), the scaling parameter.
+  
+  !    Input/Output, complex(8) zx(*), the vector.
+  !
+  !    Input, integer incx, the increment between successive entries of X.
+    use zlsqrDataModule, only : dp, ip
+    
+    implicit none
+    
+    
+    integer(ip), intent(in)        :: incx
+    integer(ip)                    :: i, nincx
+    integer(ip), intent(in)        :: n
+    complex(dp), intent(in)    :: za
+    complex(dp), intent(inout) :: zx(*)
+  
+    if (n .le. 0 .or. incx .le. 0) return
+    if (incx .eq. 1) then
+      do i = 1, n
+        zx(i) = za * zx(i)
+      end do
+    else
+      nincx = n * incx
+      do i = 1, nincx, incx
+        zx(i) = za * zx(i)
+      end do
+    end if
+    return
+  
+  end subroutine zscal_zlsqr
+
+end module zlsqrblasInterface
diff --git a/Make_include_template b/Make_include_template
index ab90618955c23245d274dece26335cccc4213251..99b008d365caca0bce9803002112c50d8755f386 100644
--- a/Make_include_template
+++ b/Make_include_template
@@ -6,7 +6,7 @@
 #   -3- on Solaris system use RANLIB=ranlib which is defined below
 
 # the current directory (in vi ":r!pwd")
-ROOT=/Users/jan/src/OpenSource
+ROOT=REPLACE_WITH_PWD
 
 #############################################################################
 # Some convenient abbreviations
@@ -20,18 +20,18 @@ L = $(ROOT)/lib
 #GNU 
 #CC = gcc-mp-5 
 CC = gcc
-FC = gfortran
+FC = 
+#FC = gfortran
 # Linux gcc version 4.x
 OPTC = -O3 -ffast-math
-#to include parallelisation with OpenMP
+##to include parallelisation with OpenMP
 OPTC += -fopenmp
-
 # for better performing code you can try:
-#OPTC = -O3 -fno-trapping-math -ffast-math -funroll-all-loops -fomit-frame-pointer -ftree-vectorizer-verbose=1
-# Linux gcc very old version 3.x
-#OPTC = -O3 -ffast-math -funroll-all-loops 
+#OPTC = -O3 -fno-trapping-math -ffast-math -funroll-all-loops -mavx -fomit-frame-pointer -mfpmath=sse -ftree-vectorizer-verbose=1
+# Linux gcc version 3.x
+#OPTC = -O3 -ffast-math -funroll-all-loops -mfpmath=sse
 
-#enable double precision only for use in FFTlib and emmod
+#for double precision use FFTlib and emmod
 #OPTC += -DDOUBLE
 #OPTF += -fdefault-double-8 -fdefault-real-8
 
@@ -49,12 +49,9 @@ OPTC += -fopenmp
 ##OPTF = -O3 -no-prec-div -qopt-report-phase=vec,openmp
 #OPTC = -O3 -no-prec-div -xCORE-AVX2 
 #OPTF = -O3 -no-prec-div -xCORE-AVX2
-##to include parallelisation with OpenMP
+###to include parallelisation with OpenMP
 #OPTC += -qopenmp
 
-# Apple OSX intel 11.1.076 snow leopard 10.6.2
-#OPTC = -O3 -msse3 -no-prec-div -vec-report2 -fno-builtin-__sprintf_chk 
-
 #PGI 
 #CC = pgcc
 #FC = pgf90
@@ -63,18 +60,12 @@ OPTC += -fopenmp
 #OPTF = -fast
 #LDFLAGS = -fast -Minfo=vect -Mvect=simd:256 -Msafeptr 
 
-#Pathscale 
-#CC = cc
-#FC = ftn
-#OPTC = -Ofast -OPT:Ofast -fno-math-errno
-#OPTF = -Ofast -OPT:Ofast -fno-math-errno
-
 #Apple OSX clang/gcc (10.9) llvm
 #CC = clang
 #OPTC = -Ofast
 #LDFLAGS = -Ofast
 
-#AMD Open64
+#AMD Open64 this will be replaced with the latest AOCC compilers
 #CC = opencc
 #FC = openf95
 #OPTC = -Ofast
@@ -82,40 +73,27 @@ OPTC += -fopenmp
 #LDFLAGS = -static -Ofast
 
 #############################################################################
-# BLAS and LAPACK libraries and FFT LIBRARIES
+# BLAS and LAPACK libraries 
 #MKLROOT=/opt/intel/mkl/
-MKLLIB=${MKLROOT}/lib
-OPTC += -DMKL -I$(MKLROOT)/include -I$(MKLROOT)/include/fftw
-#for GNU compilers
-#you might need to add intel64 to : ${MKLROOT}/lib/intel64
-BLAS = -L${MKLROOT}/lib/ -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl
-#for GNU/clang on OSX
-#BLAS = -Wl,-rpath ${MKLLIB} -L${MKLROOT}/lib/ -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl
-#on linux you want to use groups and MKL is in lib/intel64
-#MKLLIB=${MKLROOT}/lib/intel64
-#BLAS = -Wl,-rpath ${MKLLIB} -Wl,--start-group -L${MKLLIB} -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread -lm -ldl
-#for intel compilers
-#BLAS = -mkl
-#
-#If you do not want or can not use MKL you can try the basic lapack and blas libaries that are default installed on most Linux
-#systems
-#BLAS = -llapack -lblas
-
-#############################################################################
-# AMD ACML 4.4.0
-#AMDROOT = /home/thorbcke/amdsdk/v1.0/acml/open64_64
-#OPTC += -DACML440 -I$(AMDROOT)/include
-#BLAS = -L$(AMDROOT)/lib -lacml -lfortran -lffio -lrt -lm
-
-# FOR ZFP LIBRARIES
-ZFPROOT = ${ROOT}/zfp
-ZFP = -L${ZFPROOT}/lib -lzfp
-ZFPINCL = ${ZFPROOT}/include
-OPTC += -I${ZFPINCL}
+#MKLROOT=
+ifneq ($(strip $(MKLROOT)),)
+    ifeq ($(CC),icc)
+        #for intel compilers
+        OPTC += -DMKL -I$(MKLROOT)/include -I$(MKLROOT)/include/fftw
+        BLAS = -mkl
+    else
+        #for GNU and other compilers
+        #on linux you want to use groups and MKL is in lib/intel64
+        MKLLIB=${MKLROOT}/lib/intel64
+        OPTC += -DMKL -I$(MKLROOT)/include -I$(MKLROOT)/include/fftw
+        BLAS = -Wl,-rpath ${MKLLIB} -Wl,--start-group -L${MKLLIB} -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread -lm -ldl
+endif
+else
+    BLAS = -llapack -lblas 
+endif
 
 #LIBARIES
-LIBS= -L$L -lgenfft $(FFT) $(BLAS) $(ZFP)
-
+LIBS= -L$L -lgenfft $(BLAS)
 
 
 ########################################################################
diff --git a/Makefile b/Makefile
index 86415c08131202fcd87cf41f2ef7f23211bd89d6..56df78d0bc4ace710d999e9c92b410ea55fc0488 100644
--- a/Makefile
+++ b/Makefile
@@ -1,18 +1,29 @@
 #master makefile for OpenSource 
 
+include Make_include
+
 all: mkdirs 
 	cd FFTlib		; $(MAKE)
-	cd zfp			; $(MAKE) install
 	cd fdelmodc		; $(MAKE) install
-	cd fdelmodc3D	; $(MAKE) install
+	cd zfp			; $(MAKE) install
 	cd utils		; $(MAKE) install
 	cd marchenko	; $(MAKE) install
-	cd marchenko3D	; $(MAKE) install
 	cd corrvir		; $(MAKE) install
 	cd raytime		; $(MAKE) install
-	cd MDD			; $(MAKE) install
+ifneq ($(strip $(MKLROOT)),)
 	cd fdacrtmc		; $(MAKE) install
-
+else
+	@echo "***************************************************************************";
+	@echo "**** There is no MKL or other library for the FFTW calls in use by fdacrtmc";
+endif
+	cd fdelmodc3D	; $(MAKE) install
+	cd marchenko3D	; $(MAKE) install
+ifneq ($(strip $(FC)),)
+	cd MDD			; $(MAKE) install
+else
+	@echo "***************************************************************************";
+	@echo "**** There is no Fortran compiler (FC) defined in Make_include to make MDD";
+endif
 
 mkdirs:
 	-mkdir -p lib
@@ -21,30 +32,29 @@ mkdirs:
 
 clean:
 	cd FFTlib 		; $(MAKE) $@
-	cd zfp			; $(MAKE) $@
 	cd fdelmodc		; $(MAKE) $@
-	cd fdelmodc3D	; $(MAKE) $@
 	cd utils		; $(MAKE) $@
 	cd marchenko	; $(MAKE) $@
-	cd marchenko3D	; $(MAKE) $@
 	cd corrvir		; $(MAKE) $@
 	cd raytime		; $(MAKE) $@
-	cd MDD			; $(MAKE) $@
 	cd fdacrtmc		; $(MAKE) $@
+	cd zfp			; $(MAKE) $@
+	cd fdelmodc3D	; $(MAKE) $@
+	cd marchenko3D	; $(MAKE) $@
+	cd MDD			; $(MAKE) $@
 
 realclean:
 	cd FFTlib 		; $(MAKE) $@
-	cd zfp			; $(MAKE) $@
 	cd fdelmodc		; $(MAKE) $@
-	cd fdelmodc3D	; $(MAKE) $@
-	cd fdelrtmc		; $(MAKE) $@
 	cd utils		; $(MAKE) $@
 	cd marchenko	; $(MAKE) $@
-	cd marchenko3D	; $(MAKE) $@
 	cd corrvir		; $(MAKE) $@
 	cd raytime		; $(MAKE) $@
-	cd MDD			; $(MAKE) $@
 	cd fdacrtmc		; $(MAKE) $@
+	cd zfp			; $(MAKE) $@
+	cd fdelmodc3D	; $(MAKE) $@
+	cd marchenko3D	; $(MAKE) $@
+	cd MDD			; $(MAKE) $@
 	rm -f lib/*
 	rm -f include/*
 	rm -f bin/*
diff --git a/README.md b/README.md
index 28e547d72dfdaf18fde68e4d9074d73b5cd58c96..9dafebbb6c609509996b5a6611f7e978a1e2c211 100644
--- a/README.md
+++ b/README.md
@@ -39,56 +39,38 @@ Jan Thorbecke, Evert Slob, Joeri Brackenhoff, Joost van der Neut, and Kees Wapen
 2017, Geophysics, Vol. 82, no. 6 (November-December); p. WB29--WB45, doi: 10.1190/GEO2017-0108.1
 Download: https://janth.home.xs4all.nl/Publications/Articles/ThorbeckeGPY2017.pdf
 
--3- When you are using the marchenko_primaries algorithm developed by Lele Zhang please refer to the following paper
+-3- If you used the code to construct homogenoeus Green's functions, please refer to this paper in your related publications:
+
+Virtual acoustics in inhomogeneous media with single-sided access:
+ Wapenaar, K., Brackenhoff, J., Thorbecke, J., van der Neut, J., Slob, E., and Verschuur, E., 
+2018, Scientific Reports, Vol. 8, 2497. 
+Download: http://homepage.tudelft.nl/t4n4v/4_Journals/Nature/SR_18.pdf
+
+-4- When you are using the marchenko_primaries algorithm developed by Lele Zhang please refer to the following paper
 
 Free-surface and internal multiple elimination in one step without adaptive subtraction
 Lele Zhang and Evert Slob
 2019, Geophysics, Vol. 84, no. 1 (January-February); p. A7-A11, doi: 10.1190/GEO2018-0548.1
 Download: http://homepage.tudelft.nl/t4n4v/BeyondInterferometry/geo_19h.pdf
 
--4- If you use the fdacrtmc code of Max Holicki please refer to the following paper:
+-5- If you use the fdacrtmc code of Max Holicki please refer to the following paper:
 
 Acoustic directional snapshot wavefield decomposition
 Holicki, M., Drijkoningen, G., and Wapenaar, K., 2019, Acoustic directional snapshot wavefield decomposition: Geophysical
 Prospecting, Vol. 67, 32-51
 Download: http://homepage.tudelft.nl/t4n4v/4_Journals/Geophys.Prosp/GP_19a.pdf
 
--5- 
 
 INSTALLATION
 -------------
+See the seperate INSTALL file.
 
-1) To compile and link the code you first have to set the ROOT variable in the Make_include file which can be found in the directory where you have found this README.
-You can use Make_include_template as a first start: 
-
-```
-cp Make_include_template Make_include
-```
-
-2) Check the compiler and CFLAGS options in the file Make_include and adapt to the system you are using. The default options are set for a the GNU C-compiler on a Linux system. A Fortran or C++ compiler are not needed to compile the code. The Makefile has been tested with GNU make. 
-
-3) If the compiler options are set in the Make_include file you can type 
-```
-make clean
-make 
-```
-and the Makefile will make:
 
-- FFT library 
-- fdelmodc / 3D
-- marchenko / 3D
-- utils
+REPRODUCING
+----------
+Almost all Figures in the papers mentioned above can be reproduced with the sofwtare in this repository. Please see the file REPRODUCE for further instructions
 
-The libraries will be placed in the lib/ directory and the executables in the bin/ directory.
 
-To use the executables don't forget to include the pathname in your PATH:
-
-```
-bash/sh:
-export PATH='path_to_this_directory'/bin:$PATH:
-csh:
-setenv PATH 'path_to_this_directory'/bin:$PATH:
-```
 
 Finite Difference Modeling: FDELMODC
 ------------------------------------
@@ -102,6 +84,7 @@ The demo directory contains many scripts which demonstrate the different possibi
 
 To reproduce the Figures shown in the GEOPHYICS manuscript "Finite-difference modeling experiments for seismic interferometry" the scripts in FiguresPaper directory can be used. Please read the README in the FiguresPaper directory for more instructions and guidelines. 
 
+An extensive manual of fdelmodc can be found in doc/fdelmodcManual.pdf
 
 Marchenko method : MARCHENKO
 ----------------------------
@@ -111,7 +94,9 @@ To reproduce the Figures shown in the GEOPHYICS paper "Implementation of the Mar
 
 To reproduce the Figures shown in the Scientific Reports paper "Virtual acoustics in inhomogeneous media with single-sided access" the scripts in marchenko/demo/ScientificReports directory can be used. The README in this directory gives more instructions and guidelines. 
 
-To reproduce the Figures shown in the GEOPHYICS paper "Implementation of the Marchenko Multiple Elimination algorithm" the scripts in marchenko/demo/oneD directory can be used. The README_PRIMARIES in this directory gives more instructions and guidelines. 
+To reproduce the Figures shown in the GEOPHYICS paper "Implementation of the Marchenko Multiple Elimination algorithm" the scripts in marchenko/demo/mme directory can be used. The README_PRIMARIES in this directory gives more instructions and guidelines. 
+A bried manual about the MME program 'marchencko_primaries' can be found in doc/MMEmanual.pdf
+
 
 MDD
 ---
@@ -135,6 +120,8 @@ find /opt/intel -name libmkl_gf_lp64.so
 and adjust MKLROOT in Make_include accordingly. 
 You can also completely disable the use of MKL by commenting out the MKL parts in Make_include. 
 
+In case MKL is installed on your system there is no need to install the netlib packages mentioned for MDD. 
+
 
 SEISMIC UNIX
 -----------
@@ -157,6 +144,8 @@ The fdacrtmc and the 3D Marchenko code makes use of ZFP compression to store sna
 
 https://github.com/LLNL/zfp
 
+and written by Peter Lindstrom
+
 FDACRTMC
 --------
 fdacrtmc uses FFTW and the wisdom computations are stored on disk for re-usage.  This directory is defined in fdacrtmc.h
@@ -172,36 +161,16 @@ CFLAGS  += -DWISDOMDIR="/directory/that/exists"
 
 or you can change the name of WISDOMDIR in fdacrtmc.h
 
-
-MISC
-----
-Other make commands which can be useful:
-
-make clean : removes all object files, but leaves libraries and executables
-make realclean: removes also object files, libraries and executables.
-
-COMPILATION PROBLEMS
---------------------
-If you encounter missing trigometric / mathematical functions during compilation, for example;
-
-defineSource.c:(.text+0x2356): undefined reference to sin getParameters.o: In function getParameters:
-
-add  '-lm -lc' around line 109 in Make_include:
-
-LIBS= -L$L -lgenfft $(BLAS) -lm -lc
-
-
+The Finite Different based RTM code (FDACRTMC) will not be installed if MKL is not available. This code depends on FFTW and we do want to
+include the full FFTW source code package to this. Our aim is to keep the compilation of the code in the GitHub repository
+as simple as possible with the smallest number of external libraries.
 
 UPDATES AND LATEST VERSION
 --------------------------
 The latest version of the source code and manual can be found at:
 
-http://www.xs4all.nl/~janth/Software/Software.html
-
-or at github:
-
 git clone https://github.com/JanThorbecke/OpenSource.git
 git clone git://github.com/JanThorbecke/OpenSource.git
 
-The code is used by many different people and if there is a request for a new option in the code, then I will try to implement, test and make it available. 
+The code is used by many different people and if there is a request for a new option in the code, then we will try to implement, test and make it available. 
 
diff --git a/REPRODUCE b/REPRODUCE
new file mode 100644
index 0000000000000000000000000000000000000000..524471a72b1760bd09fc2c481afec6b9f6359c22
--- /dev/null
+++ b/REPRODUCE
@@ -0,0 +1,52 @@
+
+REFERENCES
+---------
+-0- DOI reference of this software release 
+https://zenodo.org/badge/latestdoi/23060862
+
+-1- If the Finite Difference code has helped you in your research please refer to the following paper in your publications:
+
+Finite-difference modeling experiments for seismic interferometry
+Jan Thorbecke and Deyan Draganov
+2011, Geophysics, Vol. 76, no. 6 (November-December); p H1--H18, doi: 10.1190/GEO2010-0039.1
+Download: https://janth.home.xs4all.nl/Publications/Articles/ThorbeckeDraganov2012.pdf
+
+*** To reproduce the Figures shown in the GEOPHYICS manuscript "Finite-difference modeling experiments for seismic interferometry" the scripts in fdelmodc/FiguresPaper directory can be used. Please read the README in the FiguresPaper directory for more instructions and guidelines. 
+
+
+-2- If the Machenko code has helped you in your research please refer to this paper in your publications:
+
+Implementation of the Marchenko method
+Jan Thorbecke, Evert Slob, Joeri Brackenhoff, Joost van der Neut, and Kees Wapenaar
+2017, Geophysics, Vol. 82, no. 6 (November-December); p. WB29--WB45, doi: 10.1190/GEO2017-0108.1
+Download: https://janth.home.xs4all.nl/Publications/Articles/ThorbeckeGPY2017.pdf
+
+*** To reproduce the Figures shown in the GEOPHYICS paper "Implementation of the Marchenko method" the scripts in marchenko/demo/oneD directory can be used. The README in this directory gives more instructions and guidelines. 
+
+
+-3- If you used the code to construct homogenoeus Green's functions, please refeer to this paper in your related publications:
+
+Virtual acoustics in inhomogeneous media with single-sided access:
+ Wapenaar, K., Brackenhoff, J., Thorbecke, J., van der Neut, J., Slob, E., and Verschuur, E., 
+2018, Scientific Reports, Vol. 8, 2497. 
+Download: http://homepage.tudelft.nl/t4n4v/4_Journals/Nature/SR_18.pdf
+
+*** To reproduce the Figures shown in the Scientific Reports paper "Virtual acoustics in inhomogeneous media with single-sided access" the scripts in marchenko/demo/ScientificReports directory can be used. The README in this directory gives more instructions and guidelines. 
+
+-4- When you are using the marchenko_primaries algorithm developed by Lele Zhang please refer to the following paper
+
+Free-surface and internal multiple elimination in one step without adaptive subtraction
+Lele Zhang and Evert Slob
+2019, Geophysics, Vol. 84, no. 1 (January-February); p. A7-A11, doi: 10.1190/GEO2018-0548.1
+Download: http://homepage.tudelft.nl/t4n4v/BeyondInterferometry/geo_19h.pdf
+
+*** To reproduce the Figures shown in the GEOPHYICS paper "Implementation of the Marchenko Multiple Elimination algorithm" the scripts in marchenko/demo/mme directory can be used. The README_PRIMARIES in this directory gives more instructions and guidelines. 
+
+
+-5- If you use the fdacrtmc code of Max Holicki please refer to the following paper:
+
+Acoustic directional snapshot wavefield decomposition
+Holicki, M., Drijkoningen, G., and Wapenaar, K., 2019, Acoustic directional snapshot wavefield decomposition: Geophysical
+Prospecting, Vol. 67, 32-51
+Download: http://homepage.tudelft.nl/t4n4v/4_Journals/Geophys.Prosp/GP_19a.pdf
+
diff --git a/doc/MMEmanual.pdf b/doc/MMEmanual.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..1950f25a3cd7e186f69c41e2d336b52e5ea8c391
Binary files /dev/null and b/doc/MMEmanual.pdf differ
diff --git a/fdelmodc/getModelInfo.c b/fdelmodc/getModelInfo.c
index 378a1b50ebac46e5b7b8a8bef4b5365ac15bef9d..b175fea56ad33657fbabc425a9d48d928c1ed603 100644
--- a/fdelmodc/getModelInfo.c
+++ b/fdelmodc/getModelInfo.c
@@ -46,7 +46,9 @@ int getModelInfo(char *file_name, int *n1, int *n2, float *d1, float *d2, float
     *f2 = hdr.f2;
 
     if ( NINT(100.0*((*d1)/(*d2)))!=100 ) {
-        verr("dx and dz are different in the model !");
+        vwarn("dx and dz are different in the model !");
+        vwarn("setting dx=%e to dz=%e ", *d2, *d1);
+        *d2 = *d1;
     }
     if ( NINT(1000.0*(*d1))==0 ) {
         if(!getparfloat("dx",d1)) {
diff --git a/marchenko/demo/oneD/README_PRIMARIES b/marchenko/demo/mme/README_PRIMARIES
similarity index 99%
rename from marchenko/demo/oneD/README_PRIMARIES
rename to marchenko/demo/mme/README_PRIMARIES
index 2e166b3c0a06278cb03af9fc05eea8a39118f903..8b6d43e5cfada2686d5b3c929408718234aaa58f 100644
--- a/marchenko/demo/oneD/README_PRIMARIES
+++ b/marchenko/demo/mme/README_PRIMARIES
@@ -52,7 +52,7 @@ This will take 15 seconds. The generated files are:
 	- pred_rr_276.su (not used)
 	- DDshot_450.su (not used): selected shot record convolved with file_src=wave.su
 
-	where ## ranges from 01 to 33
+	where ## ranges from 01 to 34
 
 To generate the postscript files for Figure 3:
 
diff --git a/marchenko/demo/mme/epsModel.scr b/marchenko/demo/mme/epsModel.scr
new file mode 100755
index 0000000000000000000000000000000000000000..d71acdcdc1138026e265cfd4cd220e1d870e030f
--- /dev/null
+++ b/marchenko/demo/mme/epsModel.scr
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+export ROOT=`grep ROOT= ../../../Make_include | head -1 | sed 's/ROOT=//' `
+#adjust the PATH to where the code is installed
+export PATH=$ROOT/bin:$PATH:
+
+# Add interface line to postscript file of model 
+cat << EOF1 > line1
+400 -2500
+400 2500
+EOF1
+
+cat << EOF2 > line2
+700 -2500
+700 2500
+EOF2
+
+cat << EOF3 > line3
+1100 -2500
+1100 2500
+EOF3
+
+#model
+supsimage hbox=4 wbox=6 labelsize=12 < model10_cp.su \
+        x1beg=0 x1end=1400.0 d1num=200 lstyle=vertright legend=1 threecolor=0 \
+    	label1="depth (m)" label2="lateral distance (m)" \
+        curve=line1,line2,line3 npair=2,2,2 curvecolor=black,black,black \
+        n1tic=5 x2beg=-2250 f2num=-2000 d2num=1000 x2end=2250 > model_cp_line.eps
+
+supsimage hbox=4 wbox=6 labelsize=12 < model10_ro.su \
+        x1beg=0 x1end=1400.0 d1num=200 lstyle=vertright legend=1 threecolor=0 \
+    	label1="depth (m)" label2="lateral distance (m)" \
+        curve=line1,line2,line3 npair=2,2,2 curvecolor=black,black,black \
+        n1tic=5 x2beg=-2250 f2num=-2000 d2num=1000 x2end=2250 > model_ro_line.eps
+
+#wavelet
+dt=0.0005
+supsgraph < wavefw.su \
+    labelsize=12 d1=$dt style=normal \
+    label1="time (s)" label2="amplitude" \
+    d1num=0.15 wbox=6 hbox=3 x1end=0.9 > wavefw.eps
+ 
+sufft < wavefw.su | suamp | sugain scale=$dt | supsgraph \
+    labelsize=12 style=normal \
+    label1="frequency (1/s)" label2="amplitude" \
+    d1num=10 wbox=6 hbox=3 x1end=125 x2end=1.1 > wavefw_freq.eps
+ 
+#shot record
+file=shot5_rp.su
+sumax < $file mode=abs outpar=nep
+clipref=`cat nep | awk '{print $1/15}'`
+suwind key=gx min=-2250000 max=2250000 < $file | \
+        supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+    	label1="time sample number" label2="lateral distance (m)" \
+        n1tic=2 d2=5 f1=0.0 x1beg=0 x1end=400 d1=1 f1=0 d1num=100 \
+        f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > shotx0_rp.eps
+
+rm nep 
diff --git a/marchenko/demo/mme/epsPrimaries.scr b/marchenko/demo/mme/epsPrimaries.scr
new file mode 100755
index 0000000000000000000000000000000000000000..67341c26e3a1c7ec34707499ef4059eec7717c2b
--- /dev/null
+++ b/marchenko/demo/mme/epsPrimaries.scr
@@ -0,0 +1,307 @@
+#!/bin/bash
+
+export ROOT=`grep ROOT= ../../../Make_include | head -1 | sed 's/ROOT=//' `
+#adjust the PATH to where the code is installed
+export PATH=$ROOT/bin:$PATH:
+
+echo "Making eps files for $1"
+
+if [[ "$1" == "Figure2" ]];
+then
+./epsModel.scr
+exit 0
+fi
+
+istart=276
+#set same clip factor for iteration updates
+file=Mi_${istart}002.su
+sumax < $file mode=abs outpar=nep
+clipiter=`cat nep | awk '{print $1/15}'`
+ns=`surange < Mi_276014.su | grep ns | awk '{print $2}'`
+nsd=400 #number of samples to display
+(( nstart = ns - nsd ))
+
+file=k1min_${istart}002.su
+sumax < $file mode=abs outpar=nep
+clipf1=`cat nep | awk '{print $1/11}'`
+clipm1=`cat nep | awk '{print $1/22}'`
+
+#M0
+file=M0_276000.su
+file_base=${file%.su}
+sumax < $file mode=abs outpar=nep
+clipref=`cat nep | awk '{print $1/15}'`
+
+echo "clipiter="$clipiter "clipref="$clipref "clipf1="$clipf1 "clipm1="$clipm1
+#clipiter=$clipref
+#clipf1=$clipref
+
+# Initialisation and First iteration
+if [[ "$1" == "Figure3" ]];
+then
+file=M0_276000.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=$nstart x1end=$ns d1=1 f1=0 f1num=700 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
+
+suflip < $file flip=3 | \
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 f1=0 f1num=0 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > ${file_base}_flip.eps
+
+#convolve M0 with middle shot record of R
+select=451
+#original shot record from Reflection matrix
+suwind key=fldr min=$select max=$select < shotsdx5_rp.su > shotsx0.su
+#first iteration
+suwind itmin=516 itmax=1539 < M0_276000.su > N0.su
+#compute R*N0
+fconv file_in1=shotsx0.su file_in2=N0.su file_out=fconvN0.su verbose=1 fmax=90
+
+file=fconvN0.su
+sumax < $file mode=abs outpar=nep
+clipiter=`cat nep | awk '{print $1/15}'`
+
+file=fconvN0.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 d1=1 f1=0 f1num=0 d1num=200 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > ${file_base}fulltime.eps
+
+file=fconvN0.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=624 x1end=1024 d1=1 f1=0 f1num=700 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > ${file_base}zoom.eps
+
+suflip < fconvN0.su flip=3 | sugain scale=1 > fconvN0flip.su
+file=fconvN0flip.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 f1=0 f1num=0 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > $file_base.eps
+
+iter=1
+piter=$(printf %03d $iter)
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
+
+fi
+
+
+# second iteration
+if [[ "$1" == "Figure4" ]];
+then
+suwind itmax=1023 <  Mi_276001.su > N0.su
+#compute R*N0
+fconv file_in1=shotsx0.su file_in2=N0.su file_out=fconvN1.su verbose=1 fmax=90
+
+file=fconvN1.su
+sumax < $file mode=abs outpar=nep
+clipiter=`cat nep | awk '{print $1/15}'`
+suflip < fconvN1.su flip=3 | sugain scale=1 > fconvN1flip.su
+
+file=fconvN1flip.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=624 x1end=1024 d1=1 f1=0 f1num=600 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > $file_base.eps
+
+file=fconvN1.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 d1=1 f1=0 f1num=0 d1num=200 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > ${file_base}fulltime.eps
+
+iter=2
+piter=$(printf %03d $iter)
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+suwind itmin=516 itmax=1539 < $file > N2.su
+supsimage < N2.su hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=624 x1end=1024 d1=1 f1=0 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
+
+fi
+
+if [[ "$1" == "Figure6" ]];
+then
+#convergence plot
+rm vplus.su
+istart=200
+for (( iter=1; iter<=29; iter+=2 ))
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=v1plus_${istart}${piter}.su
+file_base=${file%.su}
+suwind < $file key=offset min=0 max=0 >> vplus.su
+done
+
+basop file_in=vplus.su file_out=vplus_env.su choice=4
+sumax < vplus_env.su verbose=1 mode=max 
+supsmax < vplus_env.su n=15 verbose=1 mode=max \
+	style=normal linewidth=2.0 f1=1 labelsize=14 label1="iteration number" label2="amplitude" \
+    d1num=2 d2num=0.2 wbox=6 hbox=3 grid1=dot grid2=dot n1tic=2 n2tic=2 x2end=2.9 x2beg=1.8 > v1plus_max.eps
+
+file=v1plus_${istart}001.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > $file_base.eps
+
+file=k1min_${istart}030.su
+file_base=${file%.su}
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 < $file\
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > $file_base.eps
+fi
+
+#Iterations
+if [[ "$1" == "Figure9" ]];
+then
+for iter in 2 4 12 20
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+#possibly add suflip flip=3 to flip the time-axis
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=$nstart x1end=$ns d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
+done
+fi
+
+if [[ "$1" == "Figure9" ]];
+then
+for iter in 1 3 11 19
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
+done
+fi
+
+if [[ "$1" == "Figure10" ]];
+then
+#iterations
+for iter in 2 4 10 20
+do
+piter=$(printf %03d $iter)
+echo $piter
+
+file=Mi_${istart}${piter}.su
+#ns=`surange < iter_001.su | grep ns | awk '{print $2}'`
+#dtrcv=`surange < iter_001.su | grep dt | awk '{print $2/1000000.0}'`
+#shift=$(echo "scale=4; ($dtrcv*($ns/2.0-1))" | bc -l)
+#basop choice=shift shift=$shift file_in=$file | \
+file_base=${file%.su}
+suflip flip=3 < $file | supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > ${file_base}flip.eps
+
+file=k1min_${istart}${piter}.su
+file_base=${file%.su}
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 < $file\
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > $file_base.eps
+
+done
+fi
+
+if [[ "$1" == "Figure11" ]];
+then
+iter=32
+piter=$(printf %03d $iter)
+echo $piter
+for (( istart=246; istart<=316; istart+=10 ))
+do
+file=k1min_${istart}${piter}.su
+file_base=${file%.su}
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 < $file\
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > $file_base.eps
+done
+fi
+
+if [[ "$1" == "Figure13" ]];
+then
+istart=276
+for iter in 2 4 
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+#possibly add suflip flip=3 to flip the time-axis
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=$nstart x1end=$ns d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > ${file_base}T.eps
+done
+for iter in 1 3 
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > ${file_base}T.eps
+done
+for iter in 2 4 10 20
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=k1min_${istart}${piter}.su
+file_base=${file%.su}
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 < $file\
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > ${file_base}T.eps
+done
+
+fi
+
+exit;
+
+#Windows for odd and even iterations
+file=WindowOdd276.su
+file_base=${file%.su}
+suwind key=tracl min=1 max=1 < $file | suwind itmin=1024 | \
+supsgraph d1=1 style=normal f1=0 \
+    labelsize=12 label1="time sample number" label2="amplitude" \
+    d1num=100 wbox=6 hbox=3 x2end=1.2 > $file_base.eps
+file=WindowEven276.su
+file_base=${file%.su}
+suwind key=tracl min=1 max=1 < $file | suwind itmax=1024 | \
+supsgraph d1=1 style=normal f1=0 \
+    labelsize=12 label1="time sample number" label2="amplitude" \
+    d1num=100 wbox=6 hbox=3 x2end=1.2 > $file_base.eps
+
diff --git a/marchenko/demo/mme/iterations.scr b/marchenko/demo/mme/iterations.scr
new file mode 100755
index 0000000000000000000000000000000000000000..2f4181fe345708200a5d63f578cd6bcf100bf2b8
--- /dev/null
+++ b/marchenko/demo/mme/iterations.scr
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+#second reflector at time:
+# 800/1800+600/2300
+# .70531400966183574878 => sample 176
+#third reflector at time model.scr:
+# 800/1800+600/2300+800/2000
+# 1.10531400966183574878 sample 276
+#third reflector at time modepm.scr
+#800/1800+600/3200+520/2000
+#.96531400966183574878 sample 241
+
+export ROOT=`grep ROOT= ../../../Make_include | head -1 | sed 's/ROOT=//' `
+#adjust the PATH to where the code is installed
+export PATH=$ROOT/bin:$PATH:
+
+R=shotsdx5_rp.su
+makewave fp=20 dt=0.004 file_out=wave.su nt=1024 t0=0.0 scale=0 scfft=1
+select=451
+
+if [[ "$1" == "Figure34910" ]];
+then
+for istart in 276
+do
+(( iend = istart + 1 ))
+
+marchenko_primaries file_shot=$R ishot=$select file_src=wave.su \
+	nshots=901 verbose=2 istart=$istart iend=$iend fmax=90 \
+	pad=512 niter=33 smooth=8 niterskip=600 shift=16 file_rr=pred_rr_${istart}.su T=0 file_iter=iter.su 
+done
+fi
+
+if [[ "$1" == "Figure6" ]];
+then
+istart=200
+(( iend = istart + 1 ))
+marchenko_primaries file_shot=$R ishot=$select file_src=wave.su \
+	nshots=901 verbose=2 istart=$istart iend=$iend fmax=90 \
+	pad=512 niter=30 smooth=8 niterskip=600 shift=16 file_rr=pred_rr_${istart}.su T=0 file_iter=iter.su 
+fi
+
+if [[ "$1" == "Figure11" ]];
+then
+for istart in 246 256 266 276 286 296 306 316
+do
+(( iend = istart + 1 ))
+
+marchenko_primaries file_shot=$R ishot=$select file_src=wave.su \
+	nshots=901 verbose=2 istart=$istart iend=$iend fmax=90 \
+	pad=512 niter=33 smooth=8 niterskip=600 shift=16 file_rr=pred_rr_${istart}.su T=0 file_iter=iter.su 
+done
+fi
+
+if [[ "$1" == "Figure13" ]];
+then
+istart=276
+(( iend = istart + 1 ))
+marchenko_primaries file_shot=$R ishot=$select file_src=wave.su \
+	nshots=901 verbose=2 istart=$istart iend=$iend fmax=90 \
+	pad=512 niter=33 smooth=8 niterskip=600 shift=16 file_rr=pred_rr_${istart}.su T=1 file_iter=iterT.su 
+fi
+
diff --git a/marchenko/demo/mme/model.scr b/marchenko/demo/mme/model.scr
new file mode 100755
index 0000000000000000000000000000000000000000..fccd1018b3bef8edfa2216f89f9ba3a8ecc5928f
--- /dev/null
+++ b/marchenko/demo/mme/model.scr
@@ -0,0 +1,107 @@
+#!/bin/bash
+#SBATCH -J model_1.5D
+#SBATCH --cpus-per-task=8
+#SBATCH --ntasks=1
+#SBATCH --time=0:15:00
+
+
+export ROOT=`grep ROOT= ../../../Make_include | head -1 | sed 's/ROOT=//' `
+#adjust the PATH to where the code is installed
+export PATH=$ROOT/bin:$PATH:
+
+dx=2.5
+dt=0.0005
+
+#define gridded model for FD computations
+makemod sizex=10000 sizez=1400 dx=$dx dz=$dx cp0=1800 ro0=1000 \
+        orig=-5000,0 file_base=model10.su verbose=2 \
+        intt=def x=-5000,5000 z=400,400 poly=0 cp=2300 ro=3000 \
+        intt=def x=-5000,5000 z=700,700 poly=0 cp=2000 ro=1100 \
+        intt=def x=-5000,5000 z=1100,1100 poly=0 cp=2500 ro=4000
+
+#define wavelet for modeling R
+makewave w=fw fmin=0 flef=5 frig=80 fmax=100  dt=$dt file_out=wavefw.su nt=4096 t0=0.3 scale=0 scfft=1
+
+#define wavelet for reference and intial focusing field.
+makewave fp=25 dt=$dt file_out=wave.su nt=4096 t0=0.1 scale=1
+
+export OMP_NUM_THREADS=8
+
+#Model shot record in middle of model
+fdelmodc \
+    file_cp=model10_cp.su ischeme=1 iorder=4 \
+    file_den=model10_ro.su \
+    file_src=wavefw.su \
+    file_rcv=shot5_fd.su \
+    src_type=7 \
+    src_orient=1 \
+    src_injectionrate=1 \
+    rec_type_vz=0 \
+    rec_type_p=1 \
+    rec_int_vz=2 \
+    dtrcv=0.004 \
+    rec_delay=0.3 \
+    verbose=2 \
+    tmod=4.392 \
+    dxrcv=5.0 \
+    xrcv1=-4500 xrcv2=4500 \
+    zrcv1=0 zrcv2=0 \
+    xsrc=0 zsrc=0 \
+    npml=101 \
+    left=2 right=2 top=2 bottom=2 
+
+#define homogenoeus model to compute direct wave only
+makemod sizex=10000 sizez=1200 dx=$dx dz=$dx cp0=1800 ro0=1000 \
+        orig=-5000,0 file_base=hom.su verbose=2
+
+#Model direct wave only in middle of model
+fdelmodc \
+    file_cp=hom_cp.su ischeme=1 iorder=4 \
+    file_den=hom_ro.su \
+    file_src=wavefw.su \
+    file_rcv=shot5_hom_fd.su \
+    src_type=7 \
+    src_orient=1 \
+    src_injectionrate=1 \
+    rec_type_vz=0 \
+    rec_type_p=1 \
+    rec_int_vz=2 \
+    dtrcv=0.004 \
+    rec_delay=0.3 \
+    verbose=2 \
+    tmod=4.392 \
+    dxrcv=5.0 \
+    xrcv1=-4500 xrcv2=4500 \
+    zrcv1=0 zrcv2=0 \
+    xsrc=0 zsrc=0 \
+    npml=101 \
+    left=2 right=2 top=2 bottom=2 
+
+#subtract direct wave from full model shot record: this defines R
+sudiff shot5_fd_rp.su shot5_hom_fd_rp.su > shot5_rp.su
+
+#########################################################
+# Generate the full R matrix for a fixed spread geometry.
+
+dxshot=5000 # with scalco factor of 1000
+ishot=0
+nshots=901
+
+rm shotsdx5_rp.su 
+
+while (( ishot < nshots ))
+do
+
+	(( xsrc = -2250000 + ${ishot}*${dxshot} ))
+	(( tr1 = $nshots - ${ishot} ))
+	(( tr2 = ${tr1} + $nshots - 1 ))
+	echo xsrc=$xsrc tr1=$tr1 tr2=$tr2
+
+	(( ishot = $ishot + 1))
+
+	suwind < shot5_rp.su key=tracl min=$tr1 max=$tr2 | \
+	sushw key=sx,gx,fldr,trwf \
+	a=$xsrc,-2250000,$ishot,$nshots b=0,$dxshot,0,0 j=0,$nshots,0,0 | \
+	suchw key1=offset key2=gx key3=sx c=-1 d=1000 >> shotsdx5_rp.su
+
+done
diff --git a/marchenko/demo/mme/primaries.scr b/marchenko/demo/mme/primaries.scr
new file mode 100755
index 0000000000000000000000000000000000000000..17d0dda0c73da23cd47d7745b81060efa65f307f
--- /dev/null
+++ b/marchenko/demo/mme/primaries.scr
@@ -0,0 +1,43 @@
+#!/bin/bash -x
+#SBATCH -J marchenko_primaries
+#SBATCH --cpus-per-task=20
+#SBATCH --ntasks=1
+##SBATCH --time=1:29:00
+#SBATCH -p max2h
+
+# Generate the model and Reflection data
+#modelpm.scr 
+
+#cd $SLURM_SUBMIT_DIR
+
+export ROOT=`grep ROOT= ../../../Make_include | head -1 | sed 's/ROOT=//' `
+#adjust the PATH to where the code is installed
+export PATH=$ROOT/bin:$PATH:
+
+export OMP_NUM_THREADS=20
+
+#shot record to remove internal multiples
+R=shotsdx5_rp.su
+select=451
+
+makewave fp=20 dt=0.004 file_out=wave.su nt=2048 t0=0.0 scale=0 scfft=1
+
+../../marchenko_primaries1 file_shot=$R ishot=$select file_src=wave.su \
+	nshots=901 verbose=2 istart=40 iend=500 fmax=90 pad=1024 \
+	niter=31 shift=20 smooth=10 niterec=2 niterskip=50 file_rr=pred_rr.su T=0 file_update=update.su
+
+exit;
+
+#for reference original shot record from Reflection matrix
+suwind key=fldr min=$select max=$select < $R itmax=2047 > shotsx0.su
+fconv file_in1=shotsx0.su file_in2=wave.su file_out=shotw.su
+
+# for displaying results
+
+(suwind key=offset min=0 max=0 < pred_rr.su ; suwind key=offset min=0 max=0 < shotw.su) | suxgraph &
+
+sudiff shotw.su pred_rr.su > diff.su
+suximage < shotw.su  x1end=2.5 clip=1 title="original shot"&
+suximage < pred_rr.su  x1end=2.5 clip=1 title="shot with multiples removed"&
+suximage < diff.su   x1end=2.5 clip=1 title="removed multiples"&
+
diff --git a/marchenko/demo/mme/primariesPlane.scr b/marchenko/demo/mme/primariesPlane.scr
new file mode 100755
index 0000000000000000000000000000000000000000..543496d7a2bde4465ae20c71fa8da15c54be002f
--- /dev/null
+++ b/marchenko/demo/mme/primariesPlane.scr
@@ -0,0 +1,21 @@
+#!/bin/bash -x
+#SBATCH -J marchenko_primaries
+#SBATCH --cpus-per-task=10
+#SBATCH --ntasks=1
+#SBATCH --time=0:30:00
+
+
+#cd $SLURM_SUBMIT_DIR
+
+export ROOT=`grep ROOT= ../../../Make_include | head -1 | sed 's/ROOT=//' `
+#adjust the PATH to where the code is installed
+export PATH=$ROOT/bin:$PATH:
+
+export OMP_NUM_THREADS=10
+
+makewave fp=20 dt=0.004 file_out=wave.su nt=1024 t0=0.0 scale=0 scfft=1
+
+../../marchenko_primaries1 file_shot=shotsdx5_rp.su plane_wave=1 file_src=wave.su src_angle=10.0 \
+	nshots=901 verbose=4 istart=40 iend=500 fmax=90 pad=1024 \
+	niter=30 smooth=10 niterskip=60 niterec=2 shift=20 file_rr=plane1_10_rr.su T=0
+
diff --git a/marchenko/demo/oneD/strongContrast/epsPrimaries.scr b/marchenko/demo/mme/strongContrast/epsPrimaries.scr
similarity index 79%
rename from marchenko/demo/oneD/strongContrast/epsPrimaries.scr
rename to marchenko/demo/mme/strongContrast/epsPrimaries.scr
index ee99f2234d6d916097aef29b45628e1eb8f233c3..863cb9f9f802a093f7301c0294d85ad688477c86 100755
--- a/marchenko/demo/oneD/strongContrast/epsPrimaries.scr
+++ b/marchenko/demo/mme/strongContrast/epsPrimaries.scr
@@ -1,6 +1,8 @@
 #!/bin/bash
 
-export PATH=$HOME/src/OpenSource/bin/:$PATH:
+export ROOT=`grep ROOT= ../../../../Make_include | head -1 | sed 's/ROOT=//' `
+#adjust the PATH to where the code is installed
+export PATH=$ROOT/bin:$PATH:
 
 if [[ "$1" == "Figure8" ]];
 then
diff --git a/marchenko/demo/oneD/strongContrast/iterations.scr b/marchenko/demo/mme/strongContrast/iterations.scr
similarity index 73%
rename from marchenko/demo/oneD/strongContrast/iterations.scr
rename to marchenko/demo/mme/strongContrast/iterations.scr
index 6a71918049bde156839a1f247db1a5ce10e1a63a..ed4e11be036cc4bbd83d8bf3f9ca8f2198517e3f 100755
--- a/marchenko/demo/oneD/strongContrast/iterations.scr
+++ b/marchenko/demo/mme/strongContrast/iterations.scr
@@ -10,6 +10,10 @@
 #800/1800+600/3200+520/2000
 #.96531400966183574878 sample 241
 
+export ROOT=`grep ROOT= ../../../../Make_include | head -1 | sed 's/ROOT=//' `
+#adjust the PATH to where the code is installed
+export PATH=$ROOT/bin:$PATH:
+
 makewave fp=20 dt=0.004 file_out=wave.su nt=2048 t0=0.0 scale=0 scfft=1
 R=shotsdx5_rp.su
 
@@ -19,7 +23,7 @@ if [[ "$1" == "Figure8" ]];
 then
 istart=200
 (( iend = istart + 1 ))
-../../../marchenko_primaries file_shot=$R ishot=$select file_src=wave.su \
+marchenko_primaries file_shot=$R ishot=$select file_src=wave.su \
 	nshots=901 verbose=2 istart=$istart iend=$iend fmax=90 \
 	pad=512 niter=30 smooth=8 niterskip=600 shift=16 file_rr=pred_rr_${istart}.su T=0 file_iter=iter.su 
 fi
diff --git a/marchenko/demo/oneD/strongContrast/model.scr b/marchenko/demo/mme/strongContrast/model.scr
similarity index 94%
rename from marchenko/demo/oneD/strongContrast/model.scr
rename to marchenko/demo/mme/strongContrast/model.scr
index bd651affb43e888d6fcaf5eea912f624ed124238..7a88015aaa3d0272a9edcc898927be05842a31f7 100755
--- a/marchenko/demo/oneD/strongContrast/model.scr
+++ b/marchenko/demo/mme/strongContrast/model.scr
@@ -5,8 +5,9 @@
 #SBATCH --time=1:59:00
 #SBATCH -p max2h
 
-#adjust this PATH to where the code is installed
-export PATH=$HOME/src/OpenSource/bin:$PATH:
+export ROOT=`grep ROOT= ../../../../Make_include | head -1 | sed 's/ROOT=//' `
+#adjust the PATH to where the code is installed
+export PATH=$ROOT/bin:$PATH:
 
 dx=2.5
 dt=0.0005
diff --git a/marchenko/marchenko_primaries.c b/marchenko/marchenko_primaries.c
index 1eddba67121cafede753f57e4f0a3b7670dca991..3f6f9dc28c937227d4ce25965578c0dd506ceec9 100644
--- a/marchenko/marchenko_primaries.c
+++ b/marchenko/marchenko_primaries.c
@@ -60,7 +60,7 @@ char *sdoc[] = {
 " ",
 " INTEGRATION ",
 "   ishot=nshots/2 ........... shot number(s) to remove internal multiples ",
-"   file_tinv= ............... shot-record (from R) to remove internal multiples",
+"   file_tinv= ............... shot-record to remove internal multiples",
 "   file_src= ................ optional source wavelet to convolve selected ishot(s)",
 " COMPUTATION",
 "   tap=0 .................... lateral taper R_ishot(1), file_shot(2), or both(3)",
@@ -187,6 +187,8 @@ int main (int argc, char **argv)
 
     if (reci && ntap) vwarn("tapering influences the reciprocal result");
 
+/* defines the smooth transition zone of the time-window */
+
 	smooth = MIN(smooth, shift);
     if (smooth) {
         costaper = (float *)malloc(smooth*sizeof(float));
@@ -297,6 +299,7 @@ int main (int argc, char **argv)
     }
 
 /*================ Read focusing operator(s) ================*/
+/* this is an optional functionality, typically one uses one of the shots from R */
 
     if (file_tinv != NULL) {  /*  M0 is named DD */
         muteW   = (int *)calloc(Nfoc*nxs,sizeof(int));
@@ -504,7 +507,7 @@ int main (int argc, char **argv)
         dxs = dx;
     }
 
-    /* define tapers to taper edges of acquisition */
+    /* define optional tapers to taper edges of acquisition */
     if (tap == 1 || tap == 3) {
     	tapersy = (float *)malloc(nxs*sizeof(float));
         for (j = 0; j < ntap; j++)
@@ -619,6 +622,7 @@ int main (int argc, char **argv)
 	}
     perc=(iend-istart)/100;if(!perc)perc=1;
 
+    /* for the plane-wave option write the constructed plane wave to disk */
     if (plane_wave) {
 		writeDataIter("SRCplane.su", SRC, hdrs_out, ntfft, nxs, d2, f2, n2out, Nfoc, xsyn, zsyn, ixp, npos, 0, NINT(src_angle));
 	}
@@ -656,6 +660,7 @@ int main (int argc, char **argv)
 */
 
 /*================ start loop over number of time-samples ================*/
+/* for each time sample in this loop the Marchenko equations are solved */
 
     for (ii=istart; ii<iend; ii++) {
 
@@ -669,6 +674,8 @@ int main (int argc, char **argv)
         t5 = wallclock_time();
         memset(M0, 0, Nfoc*nxs*ntfft*sizeof(float));
         memset(v1plus, 0, Nfoc*nxs*ntfft*sizeof(float));
+
+        /* M0 equation (3) in the Geophysics implementation paper */
         /* once every 'niterskip' time-steps start from fresh M0 and do niter (~20) iterations */
 		if ( ((ii-istart)%niterskip==0) || (ii==istart) ) {
 			niterrun=niter;
@@ -733,12 +740,13 @@ int main (int argc, char **argv)
 
 /*================ number of Marchenko iterations ================*/
 
+        /* niterrun is m in equation (1) in the Geophysics implementation paper */
         for (iter=0; iter<niterrun; iter++) {
 
             t2    = wallclock_time();
     
 /*================ construction of Mi(-t) = - \int R(x,t) Mi(t)  ================*/
-
+            /* synthesis process is the compute kernel in equations (4) and (5) in the Geophysics implementation paper */
             synthesisp(Refl, Fop, Mi, RMi, nx, nt, nacq, nts, dt, xsyn, Nfoc,
                 xrcv, xsrc, xnx, fxse, fxsb, dxs, dxsrc, dx, ntfft, nw, nw_low, nw_high, mode,
                 reci, nshots, ixpos, npos, &tfft, isxcount, reci_xsrc, reci_xrcv, ixmask, verbose);
diff --git a/marchenko/synthesis.c b/marchenko/synthesis.c
index 7531c25a1ad7977e7b44190a6bcd164ce15cea00..39a31504b6ad6c42b17276c057b9dbddfe8dbdc7 100644
--- a/marchenko/synthesis.c
+++ b/marchenko/synthesis.c
@@ -41,10 +41,14 @@ float fxse, float fxsb, float dxs, float dxsrc, float dx, int nshots, int *ixpos
 int linearsearch(int *array, size_t N, int value);
 
 /*================ Convolution and Integration ================*/
-/* Refl has the full acquisition grid R(x_r, x_s) 
+/* The synthesis process is the compute kernel in equations (4) and (5) in the Geophysics implementation paper 
+ * It computes M_{2m}(x'_0,x''_0,t,t_2) = \int_{t'=0}^{+\infty} \int_{\partial \setD_0} R(x'''_0, x'_0,t')
+ *                                        & M_{2m-1}(x'''_0,x''_0,t-t',t_2) d x'''_0 dt'
+ *
+ * Refl has the full acquisition grid R(x_r, x_s) 
  * Fop has the acquisition grid of the operator, ideally this should be equal to the acquisition grid of Refl, 
- *   so all traces can be used to compute R*Fop.
- * The output RNi has the traces in the grid of Fop, these are the x_s positions of R(x_r,x_s) */
+ * so all traces can be used to compute R*Fop.
+ * The output RNi has the traces in the grid of Fop, these are the x_s positions of R(x_r,x_s)  */
 
 void synthesis(complex *Refl, complex *Fop, float *Top, float *RNi, int nx, int nt, int nxs, int nts, float dt, float *xsyn, int
 Nfoc, float *xrcv, float *xsrc, int *xnx, float fxse, float fxsb, float dxs, float dxsrc, float dx, int ntfft, int
diff --git a/utils/HomG.c b/utils/HomG.c
old mode 100755
new mode 100644
diff --git a/utils/Makefile b/utils/Makefile
index 115653916d18f9d1294b731e133ae2f50b80511f..d345730903d07f5fddbdcb8384778160c0251056 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -23,7 +23,7 @@ SRCM	= \
 		plotexample.c \
 		polint.c \
 		roughint.c \
-		sinusint.c \
+		sinuousint.c \
 		spline.c \
 		name_ext.c  \
 		allocs.c  \
diff --git a/utils/ampdet b/utils/ampdet
deleted file mode 100755
index 574ead227792513d2bfdcafdf433560e4a3ee50c..0000000000000000000000000000000000000000
Binary files a/utils/ampdet and /dev/null differ
diff --git a/utils/basop.c b/utils/basop.c
index 4456acbaaf94921101fd404e8eef7307690a6ee5..18c9d2323df1292ed278db098cc72273d60ca076 100644
--- a/utils/basop.c
+++ b/utils/basop.c
@@ -48,6 +48,7 @@ double wallclock_time(void);
 void pad_data(float *data, int nsam, int nrec, int nsamout, float *datout);
 void scl_data(float *data, int nsam, int nrec, float scl, float *datout, int nsamout);
 void pad2d_data(float *data, int nsam, int nrec, int nsamout, int nrecout, float *datout);
+void kill_traces(float *data, int nrec, int nsam, int fldr, int dim, char *file_t2k);
 float rcabs(complex z);
 complex froot(float x);
 
@@ -80,6 +81,8 @@ char *sdoc[] = {
 "   nxmax=512 ................ maximum number of traces in input files",
 "   ntmax=1024 ............... maximum number of samples/trace in input files",
 "   verbose=0 ................ silent option; >0 display info",
+"   file_t2k= ................ input file with traces to kill",
+"   dim=1 .................... dimension of killing traces (1 = src, 0 = rcv)",
 "   ",
 "   Options for choice:",
 "         - 1 -  shift  = time shift",
@@ -108,6 +111,7 @@ char *sdoc[] = {
 "         - 24 - deca1  = acoustic decompostion multiply with sqrt(2 kz/(w rho)) in kx-w domain",
 "         - 25 - deca2  = acoustic decompostion multiply with sqrt((2 w rho)/(kz)) in kx-w domain",
 "         - 26 - kfilt  = dipfilter in the kx-w domain given alpha and cp",
+"         - 27 - kill   = kill multiple traces",
 "  ",
 " author  : Jan Thorbecke : 12-12-1994 (janth@xs4all.nl)",
 "           Alexander Koek (E.A.Koek@CTG.TuDelft.NL)",
@@ -121,17 +125,18 @@ NULL};
 int main(int argc, char **argv)
 {
 	FILE	*fp_in, *fp_out;
-	int     nrec, nsam, ntmax, nxmax, error, ret, verbose, i, j;
-	int     opt, size, n1, n2, first, nrot;
+	int     nrec, nsam, ntmax, nxmax, error, ret, verbose, i, j, is;
+	int     opt, size, n1, n2, first, nrot; 
 	int     ntraces, ngath;
 	float   dt, dx, dy, c, rho, d1, d2, f1, f2; 
 	float   scl, xmin, xmax, trot;
 	double  t0, t1, t2;
 	float	fmin, fmax, *data, shift, rot, alpha, perc, dz, eps, *tmpdata;
-	char  	*file_in, *file_out;
+	char  	*file_in, *file_out, *file_t2k;
 	char  	choice[10], *choicepar;
+	int		dim;
 	segy	*hdrs;
-
+	
 	initargs(argc, argv);
 	requestdoc(1);
 
@@ -147,7 +152,11 @@ int main(int argc, char **argv)
 		file_out = NULL;
 	}
 	if(!getparstring("choice", &choicepar)) verr("choice unknown.");
-
+	
+	if(!getparstring("file_t2k", &file_t2k)){
+		file_t2k = NULL;
+	}
+	
 	if(!getparfloat("fmin", &fmin)) fmin = 0.0;
 	if(!getparfloat("shift", &shift)) shift = 0.0;
 	if(!getparfloat("c", &c)) c = 1500.0;
@@ -160,6 +169,7 @@ int main(int argc, char **argv)
 	if(!getparint("nrot", &nrot)) nrot = 0;
 	if(!getparint("ntmax", &ntmax)) ntmax = 1024;
 	if(!getparint("nxmax", &nxmax)) nxmax = 512;
+	if(!getparint("dim", &dim)) dim = 1;
 	n1 = 0;
 
 /* Opening input file */
@@ -388,7 +398,50 @@ int main(int argc, char **argv)
 			perc=0.15;
 			kxwfilter(data, nsam, nrec, dt, dx, fmin, fmax, alpha, c, perc);
 		}
-		
+		else if (!strcmp(choice, "kill") || !strcmp(choice, "27")) {
+			if (verbose) vmess("Muting shot: %d in %d",hdrs[0].fldr,dim);
+			kill_traces(data, nrec, nsam, hdrs[0].fldr, dim, file_t2k);			
+		}
+		else if (!strcmp(choice, "scaletraces") || !strcmp(choice, "28")) {
+			
+			FILE	*fp_dx;
+			char	ch, buffer[32];
+			int 	*newdx;
+			
+			newdx = (int *)malloc(nrec*sizeof(int)); 
+			//memset(t2k,1,nrec*sizeof(int));
+			
+
+			fp_dx = fopen("dx.txt", "r");
+			if (fp_dx == NULL) verr("Could not open dx file");
+			i=0;
+			j=0; 
+			while(1){
+				ch = fgetc(fp_dx);
+				if (ch == EOF) break; 
+				else if (!isdigit(ch)) {
+					newdx[i] = atoi(buffer);
+					bzero(buffer,32);
+					i++;
+					j=0;
+					continue;
+				}
+				else {
+					buffer[j] = ch; 
+					j++;
+				}
+			}
+			fclose(fp_dx);
+			
+			int it,ix;
+			for (ix=0;ix<nrec;ix++) { //Loop over receivers
+				//vmess("Trace %d, scale = %d",ix,t2k[ix]);
+				for (it=0;it<nsam;it++) { //Loop over time samples
+					data[ix*nsam+it] *= (float)newdx[ix]; 
+				}
+			}
+			
+		}
 
 		t2 = wallclock_time();
 		if (verbose) vmess("CPU-time basop = %.3f", t2-t1);
@@ -1583,3 +1636,52 @@ complex froot(float x)
     }
 }
 
+void kill_traces(float *data, int nrec, int nsam, int fldr, int dim, char *file_t2k)
+{
+	char	ch, buffer[32];
+	int 	i, *t2k;
+	FILE  	*fp_t2k;
+	
+	t2k = (int *)malloc(nrec*sizeof(int)); 
+	//memset(t2k,1,nrec*sizeof(int));
+	for (i=0; i<nrec; i++) {
+		t2k[i] = 1;
+	}
+	fp_t2k = fopen(file_t2k, "r");
+	if (fp_t2k == NULL) verr("Could not open trace 2 kill file");
+	i=0;
+	while(1){
+		ch = fgetc(fp_t2k);
+		if (ch == EOF) break; 
+		else if (!isdigit(ch)) {
+			t2k[atoi(buffer)] = 0;
+			bzero(buffer,32);
+			i=0;
+			continue;
+		}
+		else {
+			buffer[i] = ch;
+			i++;
+		}
+	}
+	fclose(fp_t2k);
+
+	int it,ix;
+	if (dim == 0) {
+		for (ix=0;ix<nrec;ix++) { //Loop over receivers
+			//vmess("Trace %d, scale = %d",ix,t2k[ix]);
+			for (it=0;it<nsam;it++) { //Loop over time samples
+				data[ix*nsam+it] *= (float)t2k[ix]; 
+			}
+		}
+	}
+	else if (dim == 1) {
+		for (ix=0;ix<nrec;ix++) { //Loop over receivers
+			//vmess("Trace %d, scale = %d",ix,t2k[ix]);
+			for (it=0;it<nsam;it++) { //Loop over time samples
+				data[ix*nsam+it] *= (float)t2k[fldr]; 
+			}
+		}
+	}
+}
+
diff --git a/utils/combine.c b/utils/combine.c
old mode 100755
new mode 100644
diff --git a/utils/combine_induced.c b/utils/combine_induced.c
old mode 100755
new mode 100644
diff --git a/utils/makemod.c b/utils/makemod.c
index 574c1355e646a5e06b536f5e07ef3174914ca88b..8d1f5fe21c901319cee73315002c6bb528502600 100644
--- a/utils/makemod.c
+++ b/utils/makemod.c
@@ -32,7 +32,7 @@ void interpolation(float *x, float *z, int nxp, int nx, int poly, int *pminx, in
 
 void linearint(int *zp, int minx, int maxx, float dz, float *interface);
 
-void sinusint(int *zp, int minx, int maxx, float dz, float *interface, float dx, float ampl, float wavel);
+void sinuousint(int *zp, int minx, int maxx, float dz, float *interface, float dx, float ampl, float wavel);
 
 void roughint(int *zp, int minx, int maxx, float dz, float *interface, float ampl, float beta, float seed);
 
@@ -96,9 +96,9 @@ char *sdoc[] = {
   " ",
   "   Options for intt:",
   "         - def       = default interface through the points(Xi, Zi)",
-  "         - sin       = sinus shaped interface",
+  "         - sin       = sinuous shaped interface",
   "         - rough     = rough interface with beta(smoothness)",
-  "         - fract     = cosinus fractal shaped interface",
+  "         - fract     = cosinuous fractal shaped interface",
   "         - random    = define random velocities in layer",
   "         - elipse    = define elipse shaped body",
   "         - diffr     = point diffractions",
@@ -106,7 +106,7 @@ char *sdoc[] = {
   "   Options for var in case of intt =:",
   "         - sin(2)    = wavelength,amplitude",
   "         - rough(3)  = amplitude,beta,seed",
-  "         - fract(6)  = Nsinus,amplitude,dim,k0,freqscale,seed",
+  "         - fract(6)  = Nsin,amplitude,dim,k0,freqscale,seed",
   "         - random(1) = min-max variation around cp",
   "         - elipse(2) = r1, r2: vertical and horizontal radius",
   "         - diffr(1)  = width of each point, type(optional)",
@@ -555,7 +555,7 @@ int main(int argc, char **argv)
       Nv = countnparval(Nvi,"var");
       if (Nv != 2) verr("Sinus interface must have 2 variables.");
       getnparfloat(Nvi,"var", var);
-      sinusint(zp, nminx, nmaxx, dz, interface, dx, var[0], var[1]);
+      sinuousint(zp, nminx, nmaxx, dz, interface, dx, var[0], var[1]);
       if (above == 0) Noi++; else Noi--;
       if (above == 0) Nvi++; else Nvi--;
     }
diff --git a/utils/readSnapData.c b/utils/readSnapData.c
old mode 100755
new mode 100644
diff --git a/utils/reshape_su.c b/utils/reshape_su.c
old mode 100755
new mode 100644
diff --git a/utils/sinusint.c b/utils/sinuousint.c
similarity index 72%
rename from utils/sinusint.c
rename to utils/sinuousint.c
index 3018caa19a5b89c854887908f184b861d1614e0f..68adaa87c0909bfc71cb0090f318469c343ecac8 100644
--- a/utils/sinusint.c
+++ b/utils/sinuousint.c
@@ -5,7 +5,7 @@
 #define SGN(x) ((x) < 0 ? -1.0 : 1.0)
 
 /**
-* compute sinus shaped interface used in makemod
+* compute sinuous shaped interface used in makemod
 *
 *   AUTHOR:
 *           Jan Thorbecke (janth@xs4all.nl)
@@ -13,7 +13,7 @@
 **/
 
 
-void sinusint(int *zp, int minx, int maxx, float dz, float *interface, float dx, float ampl, float wavel)
+void sinuousint(int *zp, int minx, int maxx, float dz, float *interface, float dx, float ampl, float wavel)
 {
 	int     j, i;