diff --git a/.gitignore b/.gitignore
index d29000507baf716523e7711070baaf27c0c9194b..87bef21705ba2dff7c0fa7bb12c4ac284ec7c548 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 *.[oa]
 *.eps
+*.mod
 bin/*
 *.su
 *.bin
@@ -30,6 +31,11 @@ utils/Snap2Shot
 utils/combine
 utils/combine_induced
 utils/reshape_su
+utils/convhomg
+utils/makeR1D
+utils/padmodel
+utils/pwshift
+utils/truncate
 fdelmodc/fdelmodc
 include/genfft.h
 Make_include
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000000000000000000000000000000000000..53837588f88859e167972d3a22ad4b70cd43a33a
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,139 @@
+INSTALLATION
+-------------
+
+1) To compile and link the code you first have to set the ROOT variable in the Make_include file which can be found in the directory where you have found this INSTALL file.
+You can use Make_include_template as a first start: 
+
+```
+cp Make_include_template Make_include
+```
+
+2) Check the compiler and CFLAGS options in the file Make_include and adapt to the system you are using. The default options are set for a the GNU C-compiler on a Linux system. A Fortran compiler is only needed for the MDD package. A C++ compiler is not needed to compile the code. The Makefile has been tested with GNU make. 
+If the fortran compiler (FC=) is not set the MDD package will not be build.
+
+3) If the compiler options are set in the Make_include file you can type 
+```
+make clean
+make 
+```
+and the Makefile will make:
+
+    cd FFTlib       ; $(MAKE)
+    cd zfp          ; $(MAKE) install
+    cd fdelmodc     ; $(MAKE) install
+    cd fdelmodc3D   ; $(MAKE) install
+    cd utils        ; $(MAKE) install
+    cd marchenko    ; $(MAKE) install
+    cd marchenko3D  ; $(MAKE) install
+    cd corrvir      ; $(MAKE) install
+    cd raytime      ; $(MAKE) install
+    cd MDD          ; $(MAKE) install
+    cd fdacrtmc     ; $(MAKE) install
+
+- FFT library 
+- fdelmodc / 3D
+- marchenko / 3D
+- utils
+
+The libraries will be placed in the lib/ directory and the executables in the bin/ directory.
+
+To use the executables don't forget to include the pathname in your PATH:
+
+```
+bash/sh:
+export PATH='path_to_this_directory'/bin:$PATH:
+csh:
+setenv PATH 'path_to_this_directory'/bin:$PATH:
+```
+
+
+MKL libraries
+-------------
+If you are running on x64_86 processors you can download (for free) Intel's highly optimised MKL package:
+
+https://software.intel.com/en-us/mkl/choose-download
+ 
+These Libraries include highly optimised libraries of BLAS, LAPACK, and FFT(W). 
+Usually the MKL libraries are installed in $MKLROOT. If that variable is not set, you can try to find the correct path by searching for one of the libraries:
+
+find /opt/intel -name libmkl_gf_lp64.so
+
+and adjust MKLROOT in Make_include accordingly. 
+If MKLROOT is not set in your environment and also not set in Make_include then MKL will not be used and default options for BLAS, LAPACK, and FFT are chosen. 
+
+
+SEISMIC UNIX
+-----------
+If you want to use the .su files with SU from CWP:
+git clone https://github.com/JohnWStockwellJr/SeisUnix
+
+==> Please make sure that SU is compiled without XDR (in $CWPROOT/Makefile.config make sure that XDRFLAG is NOT enabled). The SU output files of fdelmodc are all based on local IEEE data.
+To exclude the XDRFLAG in SU you have to use the following line in $CWPROOT/src/Makefile.config around line 35:
+
+XDRFLAG =
+
+so the XDRFLAG is empty. If you have made that change you have to remake SU with the commands:
+make remake
+make xtremake
+
+
+ZFP
+---
+The fdacrtmc and the 3D Marchenko code makes use of ZFP compression to store snaphots in CPU memory or do effient IO. This package is included in this repository for your convenience. The latest package and detailed explanation can be found on:
+
+https://github.com/LLNL/zfp
+
+MDD
+---
+The MDD kernels depend on BLAS and LAPACK calls. These libraries are part of Intel's MKL. Free downloads of the source code of these libraries can be found on 
+
+https://www.netlib.org/blas/index.html
+https://www.netlib.org/lapack/index.html
+
+
+FDACRTMC
+--------
+fdacrtmc uses FFTW and the wisdom computations are stored on disk for re-usage.  This directory is defined in fdacrtmc.h
+
+#ifndef WISDOMDIR
+#define WISDOMDIR "/tmp/fftw/"
+#endif
+
+For the code to run properly the directory /tmp/fftw/ must exist. It is also possible to compile the fdacrtmc code that defines
+WISDOMDIR. In fdacrtmc/Makefile add:
+
+CFLAGS  += -DWISDOMDIR="/directory/that/exists"
+
+or you can change the name of WISDOMDIR in fdacrtmc.h
+
+
+MISC
+----
+Other make commands which can be useful:
+
+make clean : removes all object files, but leaves libraries and executables
+make realclean: removes also object files, libraries and executables.
+
+COMPILATION PROBLEMS
+--------------------
+If you encounter missing trigometric / mathematical functions during compilation, for example;
+
+defineSource.c:(.text+0x2356): undefined reference to sin getParameters.o: In function getParameters:
+
+add  '-lm -lc' around line 109 in Make_include:
+
+LIBS= -L$L -lgenfft $(BLAS) -lm -lc
+
+UPDATES AND LATEST VERSION
+--------------------------
+The latest version of the source code and manual can be found at:
+
+http://www.xs4all.nl/~janth/Software/Software.html
+
+or at github:
+
+git clone https://github.com/JanThorbecke/OpenSource.git
+git clone git://github.com/JanThorbecke/OpenSource.git
+
+The code is used by many different people and if there is a request for a new option in the code, then I will try to implement, test and make it available. 
+
diff --git a/MDD/GlobalVariables.h b/MDD/GlobalVariables.h
new file mode 100644
index 0000000000000000000000000000000000000000..91c11d5afdb52b8ce27c0de845c46b3941c683a2
--- /dev/null
+++ b/MDD/GlobalVariables.h
@@ -0,0 +1,5 @@
+typedef struct { /* complex number */
+        float r,i;
+} complex;
+
+extern complex *cB;
diff --git a/MDD/Makefile b/MDD/Makefile
index 523a1f8a1c1008245e32f99dd5187e49393d9384..b601fe15bcf8a7a2f72f91b075be83316d0c3de8 100644
--- a/MDD/Makefile
+++ b/MDD/Makefile
@@ -12,7 +12,8 @@ ALLINC  = -I.
 #General BLAS library
 #LIBS += $(BLAS)
 
-CFLAGS += -I$(MKLROOT)/include
+#CFLAGS += -g # -I$(MKLROOT)/include  
+#FFLAGS += -g -mcmodel=medium
 #LIBS += -lblas -llapack -L$L -lgenfft $(LIBSM) -lc -lm
 
 all: mdd 
@@ -30,21 +31,28 @@ SRCC =  $(PRG).c \
 	getFileInfo.c \
 	verbosepkg.c \
 	name_ext.c \
-	wallclock_time.c
+	wallclock_time.c 
+
+SRCF =  lsqrDataModule.F90 \
+        zlsqrDataModule.F90 \
+        zlsqrblasInterface.F90 \
+        zlsqrModule.F90
 
 OBJC	= $(SRCC:%.c=%.o)
 
-$(PRG):	$(OBJC) 
-	$(CC) $(LDFLAGS) $(CFLAGS) $(OPTC) -o $(PRG) $(OBJC) $(LIBS)
+OBJF    = $(SRCF:%.F90=%.o)
+
+$(PRG):	$(OBJC) $(OBJF)
+	$(FC) $(LDFLAGS) -nofor-main $(CFLAGS) $(OPTC) -o $(PRG) $(OBJC) $(OBJF) $(LIBS) 
 
 install: $(PRG) 
 	cp $(PRG) $B
 
 clean:
-		rm -f core $(OBJC) $(OBJM) $(PRG) 
+		rm -f core $(OBJC) $(OBJF) $(OBJM) *.mod $(PRG) 
 
 realclean:
-		rm -f core $(OBJC) $(OBJM) $(PRG) $B/$(PRG) 
+		rm -f core $(OBJC) $(OBJF) $(OBJM) *.mod $(PRG) $B/$(PRG) 
 
 
 print:	Makefile $(SRC)
@@ -58,4 +66,3 @@ tar:
 	@tar cf $(PRG).tar Makefile $(SRC) && compress $(PRG).tar
 
 
-
diff --git a/MDD/computeMatrixInverse.c b/MDD/computeMatrixInverse.c
index fd4eee61cfb6275bd428e6beb60058d7c89f0d53..7fe2d58f16c0d18c0c5b42612c109362cd2a21e3 100644
--- a/MDD/computeMatrixInverse.c
+++ b/MDD/computeMatrixInverse.c
@@ -342,7 +342,7 @@ void computeMatrixInverse(complex *matrix, int nxm, int rthm, float eps_a, float
 			/*S[i]=1.0/(S[i]+eps_r*S[0]);*/
 /*			fprintf(stderr,"S^-1[%d] = %e\n",i,S[i]);*/
 		}
-		if(verbose) fprintf(stderr,"fraction of eigenvalues used = %.3f\n",(float)(neig/((float)N)));
+		if(verbose>1) fprintf(stderr,"fraction of eigenvalues used = %.3f\n",(float)(neig/((float)N)));
 
 		for (j=0; j<N; j++) {
 			for (i=0; i<N; i++) {
diff --git a/MDD/deconvolve.c b/MDD/deconvolve.c
index 13dc809c2872454ac80f70b855a151a312ce28b9..3e677c6a7a165db29193b13ebcc909584f3c0db5 100644
--- a/MDD/deconvolve.c
+++ b/MDD/deconvolve.c
@@ -9,7 +9,7 @@
 
 typedef struct { /* complex number */
 	float r,i;
-} complex;
+}complex;  
 
 /*
 cblas interface
@@ -88,19 +88,34 @@ On entry, LDC specifies the first dimension of C as declared in the calling (sub
 
 */
 
+// zLSQR implementation
+void zLSQR_(int *m, int *n, size_t *indw, void (*Aprod1)(int *,int *, complex *, complex *, size_t *), void (*Aprod2)(int *,int *, complex *, complex *, size_t *), complex *b, float *damp, int *wantse, complex *x, float *se, float *atol, float *btol, float* conlim,  int* itnlim, int *nout, int *istop, int *itn, float *Anorm, float *Acond, float *rnorm, float *Arnorm, float *xnorm); //, complex *w, complex *v);
+
+// C funcs as input for Fortran subroutines in zLSQR
+void Aprod1_(int *m, int *n, float *x, float *y, size_t *indw);
+void Aprod2_(int *m, int *n, float *x, float *y, size_t *indw);
+
 void computeMatrixInverse(complex *matrix, int nxm, int rthm, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int iw, int verbose);
 
-int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int verbose)
+extern complex *cB;
+
+int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int lsqr_iter, float lsqr_damp, int k_iter, float TCscl, int verbose)
 {
 	int istation, jstation, i, j, k, icc, ibb, NA, NB, NC, nshots;
 	size_t  iwnA, iw, iwnB, iwAB, iwBB;
 	complex *AB, *BB;
 	char *transa, *transb,*transN;
 	complex beta, alpha, tmp, a, b;
+	
 
 	AB = (complex *)calloc(nstationA*nstationB,sizeof(complex));
 	BB = (complex *)calloc(nstationB*nstationB,sizeof(complex));
 
+	complex *T0Fm, *Temp; // remember to free this stuff
+
+	T0Fm = (complex *)calloc(nstationA*nstationB*nfreq,sizeof(complex));
+	Temp = (complex *)calloc(nstationB*nstationB*nfreq,sizeof(complex));
+
 	if (conjgA == 1) transa = "C";
 	else if (conjgA == 0) transa = "N";
 	else transa = "T";
@@ -129,23 +144,11 @@ private(iw, iwnA, iwnB, iwAB, iwBB)
 				/* cblas_cgemm(CblasRowMajor,CblasNoTrans, CblasConjTrans, NA, NB, nshots, &alpha.r, 
 				&cA[iwnA].r, NA, 
 				&cB[iwnB].r, NB, &beta.r,
-				ocC[iwAB].r, NC); */
-/*
-			for (i=0; i<nshots; i++) {
-				for (j=0; j<nshots; j++) {
-				    for (k=0; k<nstationB; k++) {
-					cC[iwAB+j*nshots+i].r += cA[iwnA+k*nshots+i].r*cB[iwnB+j*nshots+k].r - cA[iwnA+k*nshots+i].i*cB[iwnB+j*nshots+k].i;
-					cC[iwAB+j*nshots+i].i += cA[iwnA+k*nshots+i].r*cB[iwnB+j*nshots+k].i + cA[iwnA+k*nshots+i].i*cB[iwnB+j*nshots+k].r;
-                    }
-				}
-			}
-*/
-
+				&cC[iwAB].r, NC); */
 			cgemm_(transa, transb, &NA, &NB, &nshots, &alpha.r, 
 				&cA[iwnA].r, &NA, 
 				&cB[iwnB].r, &NB, &beta.r, 
 				&cC[iwAB].r, &NC); 	
-
 //				memcpy(&cC[iwAB].r, &cB[iwnA].r, sizeof(float)*2*nstationA*nshots);
 		}
 		else if (mdd==1) { /* Multi Dimensional deconvolution */
@@ -186,21 +189,152 @@ private(iw, iwnA, iwnB, iwAB, iwBB)
 				&BB[0].r, &NB, &beta.r, 
 				&cC[iwAB].r, &NA);
 		}
-		else if (mdd==3) { /* Copy matrix A or B to memory for testing purposes */
-			memcpy(&cC[iwAB].r, &cA[iwnA].r, sizeof(complex)*nstationA*nshots);
+		else if (mdd==3) { /* LSQR solver */
+			// INPUT LSQR
+			int wantse, itnlim, inc, nout, mn;
+			float damp, atol, btol, conlim;
+			float *n;
+			
+			damp = lsqr_damp;
+			atol = 0;
+			btol = 0;
+			conlim = 0; 
+			wantse = 0;
+			itnlim = lsqr_iter;
+			nout = 0;
+			inc = 1;	
+			
+			mn = NA*NB;
+			
+			// OUTPUT zLSQR
+			int itn, istop;
+			float Anorm, Acond, rnorm, Arnorm, xnorm, se;
+		
+			zLSQR_(&mn, &mn, &iwnB, Aprod1_, Aprod2_, 
+						&cA[iwnA], &damp, 
+						&wantse, &cC[iwAB], &se,
+						&atol, &btol, &conlim, &itnlim, &nout,
+						&istop, &itn, &Anorm, &Acond, &rnorm, &Arnorm, &xnorm);
+		
+			//memcpy(&cC[iwAB].r, &cA[iwnA].r, sizeof(complex)*nstationA*nshots);
 		}
 		else if (mdd==4) {
-			memcpy(&cC[iwAB].r, &cB[iwnB].r, sizeof(complex)*nstationB*nshots);
+			memcpy(&cC[iwAB].r, &cA[iwnA].r, sizeof(complex)*nstationA*nshots);
+		}
+		else if (mdd==5) { //Matrix with Single shot attempt (doesn't work)
+			cgemv_(transa, &NA, &NA, &alpha.r, 
+				&cA[iwnA].r, &NA, 
+				&cB[iwnB].r, 1, &beta.r,
+				&cC[iwnA].r, 1);
 		}
-		else if (mdd==5) {
-			cblas_cdotu_sub(nshots, &cA[iwnA].r, NA, &cB[iwnB].r, NB, &cC[iwnA].r);
+		
+		if (mdd==6) { /* Transmission Calculations */
+			
+			cgemm_(transa, transb, &NA, &NB, &nshots, &alpha.r, 
+				&cA[iwnA].r, &NA, 
+				&cB[iwnB].r, &NB, &beta.r, 
+				&T0Fm[iwnA].r, &NA); // construct T_0 F+m
+			
+			for (jstation = 0; jstation < nstationA; jstation++) {
+				for (istation = 0; istation < nshots; istation++) {	
+					T0Fm[iw*nstationA*nshots+jstation*nshots+istation].r *= TCscl;
+					T0Fm[iw*nstationA*nshots+jstation*nshots+istation].i *= TCscl;
+				}   
+			}
+
+
+			memcpy(&cC[iwAB].r, &cA[iwnA].r, sizeof(complex)*nstationA*nshots); // copy T_0
+			
+			for (k=1; k< k_iter; k++) {
+				cgemm_(transa, transb, &NA, &NB, &nshots, &alpha.r, 
+					&T0Fm[iwnA].r, &NA, 
+					&cA[iwnB].r, &NB, &beta.r, 
+					&Temp[iwnA].r, &NA); // construct T_0 F+m T_0 (k=1)
+					
+				for (jstation = 0; jstation < nstationA; jstation++) {
+					for (istation = 0; istation < nshots; istation++) {
+						Temp[iw*nstationA*nshots+jstation*nshots+istation].r *= TCscl;
+						Temp[iw*nstationA*nshots+jstation*nshots+istation].i *= TCscl;						
+						if (k_iter % 2 == 1) {
+							
+							cC[iw*nstationA*nshots+jstation*nshots+istation].r -= Temp[iw*nstationA*nshots+jstation*nshots+istation].r; //pow(TCscl,k_iter+2);
+							cC[iw*nstationA*nshots+jstation*nshots+istation].i -= Temp[iw*nstationA*nshots+jstation*nshots+istation].i; //pow(TCscl,k_iter+2);
+						} else {
+							cC[iw*nstationA*nshots+jstation*nshots+istation].r += Temp[iw*nstationA*nshots+jstation*nshots+istation].r; //pow(TCscl,k_iter+2);
+							cC[iw*nstationA*nshots+jstation*nshots+istation].i += Temp[iw*nstationA*nshots+jstation*nshots+istation].i; //pow(TCscl,k_iter+2);
+							
+						}
+					}   
+				}
+				memcpy(&cA[iwnA].r,&Temp[iwnA].r,sizeof(complex)*nstationA*nshots);
+			}
+			
 		}
 
 	}
 
 	free(AB);
 	free(BB);
+	free(T0Fm);
+	free(Temp);
 
 	return 0;
 }
 
+
+void Aprod1_(int *m, int *n, float *x, float *y, size_t *indw) {
+	int ldc;
+	complex beta, alpha;
+	char *transa, *transb;
+
+	transa = "N";
+	transb = "N";
+	alpha.r = 1.0; alpha.i = 0.0;
+	beta.r = 1.0; beta.i = 0.0;
+	
+	ldc = sqrt(*m);	
+	
+//	fprintf(stderr,"Aprod1 \n");
+	
+	cgemm_(transa, transb, &ldc, &ldc, &ldc, &alpha.r, 
+		&x[0], &ldc, 
+		&cB[*indw].r, &ldc, &beta.r, 
+		&y[0], &ldc); 	
+	
+	/*
+	cgemv_(transa, m, n, &alpha.r, 
+		&cA[*indw].r, m, 
+		&x[0].r, &incx, &beta.r,
+		&y[0].r, &incy);
+	*/
+}
+
+void Aprod2_(int *m, int* n, float *x, float *y, size_t *indw) {
+	int ldc;
+	complex beta, alpha;
+	char *transa, *transb;
+	
+	transa = "C";
+	transb = "N";
+	alpha.r = 1.0; alpha.i = 0.0;
+	beta.r = 1.0; beta.i = 0.0;
+	
+	ldc = sqrt(*m);
+	
+//	fprintf(stderr,"Aprod2 \n");
+	
+	memset(x,0,1*(*m)*sizeof(complex));
+	
+	cgemm_(transb, transa, &ldc, &ldc, &ldc, &alpha.r, 
+		&y[0], &ldc, 
+		&cB[*indw].r, &ldc, &beta.r, 
+		&x[0], &ldc); 
+	
+	/*
+	cgemv_(transa, m, n, &alpha.r, 
+		&cA[*indw].r, m, 
+		&y[0].r, &incx, &beta.r,
+		&x[0].r, &incy);
+	*/	
+}
+
diff --git a/MDD/lsqrDataModule.F90 b/MDD/lsqrDataModule.F90
new file mode 100644
index 0000000000000000000000000000000000000000..54e0d0ae9d3a91693b88021e75f3ade1ff0d5498
--- /dev/null
+++ b/MDD/lsqrDataModule.F90
@@ -0,0 +1,31 @@
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+! File lsqrDataModule.f90
+!
+! Defines real(dp) and a few constants for use in other modules.
+!
+! 24 Oct 2007: Allows floating-point precision dp to be defined
+!              in exactly one place (here).  Note that we need
+!                 use lsqrDataModule
+!              at the beginning of modules AND inside interfaces.
+!              zero and one are not currently used by LSQR,
+!              but this shows how they should be declared
+!              by a user routine that does need them.
+
+! 29 Jun 2013: Added ip and eps among others to match MINRES-QLP
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+module lsqrDataModule
+
+  use iso_c_binding
+
+  implicit none
+
+  intrinsic                   ::      selected_real_kind, selected_int_kind, tiny
+
+  integer, parameter, public  :: dp = c_float !selected_real_kind(15,307) ! 64-bit real, default
+  integer,  parameter, public :: sp    = selected_real_kind(6,37)      ! 32-bit real
+  integer, parameter, public  :: ip = c_int !selected_int_kind(9)       ! R: (-10^R, 10^R)
+  real(c_float), parameter, public :: zero = 0.0_dp, one = 1.0_dp, eps = epsilon(zero)
+  real(c_float), parameter, public :: realmin = tiny(one)
+
+end module lsqrDataModule
diff --git a/MDD/mdd.c b/MDD/mdd.c
index 92d1976e08362fda7abb872db5d603ea82492fdb..4420da13dc9da07cb5c9bf4e3b7f3a8ec04aeff7 100644
--- a/MDD/mdd.c
+++ b/MDD/mdd.c
@@ -26,7 +26,9 @@ int getFileInfo(char *filename, int *n1, int *n2, int *ngath, float *d1, float *
 
 int readShotData(char *filename, float xmin, float dx, float *xrcv, float *xsrc, int *xnx, complex *cdata, int nw, int nw_low, int ngath, int nx, int nxm, int ntfft, float alpha, float scl, float conjg, int transpose, int verbose);
 
-int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int verbose);
+//int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int verbose);
+int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int lsqr_iter, float lsqr_damp, int k_iter, float TCscl, int verbose);
+
 
 void writeEigen(char *file_out, float df, int nw_low, int nw_high, int nw, float *eigen, int nx, float dx, float xmin);
 void writeDatamatrix(char *file_out, complex *P, int ntfft, int ntc, int Nrec, int Nshot, int nfreq, int nw_low, float dt, int verbose);
@@ -50,6 +52,7 @@ char *sdoc[] = {
 " ",
 "   file_A= .................. name of file(s) which store the data in location A",
 "   file_B= .................. name of file(s) which store the data in location B",
+"   file_C= .................. name of file(s) which store the data in location C",
 " ",
 " Optional parameters: ",
 " ",
@@ -58,24 +61,27 @@ char *sdoc[] = {
 "   fmin=0 ................... minimum frequency",
 "   fmax=70 .................. maximum frequency to use in deconvolution",
 " INPUT DEFINITION ",
-"   cjA=1 .................... -1 => apply complex conjugate to A",
-"   sclA=1 ................... apply scaling factor to A",
-"   tranposeA=0 .............. apply transpose to A",
-"   cjB=1 .................... -1 => apply complex conjugate to B",
-"   sclB=1 ................... apply scaling factor to B",
-"   tranposeB=0 .............. apply transpose to B",
+"   cjA/B/C=1 ................ -1 => apply complex conjugate to A/B/C",
+"   sclA/B/C=1 ............... apply scaling factor to A/B/C",
+"   tranposeA/B/C=0 .......... 1 => apply transpose to A/B/C",
+"   k_iter=5 ................. Iterations for MDD=6",
 " MATRIX INVERSION CALCULATION ",
 "   conjgA=0 ................. apply complex conjugate-transpose to A",
 "   conjgB=1 ................. apply complex conjugate-transpose to B",
+"   conjgC=0 ................. apply complex conjugate-transpose to C",
 "   rthm=0 ................... see below for options",
 "   eps_a=1e-5 ............... absolute stabilization factor for LS",
 "   eps_r=1e-4 ............... relative stabilization factor for LS",
 "   numacc=1e-6 .............. numerical accurary for SVD",
-"   ntap=0 ................... number of taper points matrix",
+"   ntapA/B=0 ................ number of taper points matrix",
 "   ftap=0 ................... percentage for tapering",
 "   tap=0 .................... type of taper: 0=cos 1=exp",
 "   eigenvalues= ............. write SVD eigenvalues to file ",
 "   mdd=1 .................... mdd=0 => computes correlation ",
+"                              mdd=3 => LSQR solver ",
+" LSQR PARAMETERS ",
+"   lsqr_iter=25 ............. number of iterations for LSQR solver",
+"   lsqr_damp=1e-4 ........... damping for LSQR solver",
 " OUTPUT DEFINITION ",
 "   file_out= ................ output base name ",
 "   causal=1 ................. output causal(1), non-causal(2), both(3), or summed(4)",
@@ -88,6 +94,8 @@ char *sdoc[] = {
 "    nt (the number of samples read by the IO routine)",
 " ",
 " Options for mdd= ",
+"	  6 = Iterative Transmission Response (based on Vd Neut et al. 2018, EAGE)",
+"     3 = LSQR based solver A = Bx",
 "     2 = A/(B + eps) ",
 "     1 = A*B^H/(B*B^H + eps) ",
 "     0 = A*B^H ",
@@ -106,34 +114,40 @@ char *sdoc[] = {
 NULL};
 /**************** end self doc ***********************************/
 
+complex *cB;
+
 int main (int argc, char **argv)
 {
 	FILE    *fpin, *fpout;
 	int		i, j, k, ret, nshots, ntraces;
 	int		size, n1, n2, ntfft, nf, causal;
 	int     verbose, fullcorr, ncorstat, err;
-	int     nt, nc, ncc, ntc, nshotA, nshotB;
-	size_t  nstationA, nstationB, nfreq, istation, jstation, iw;
+ 	int     nt, nc, ncc, ntc, nshotA, nshotB, nshotC;
+ 	size_t  nstationA, nstationB, nstationC, nfreq, istation, jstation, iw;
 	int     pgsz, istep,jstep;
 	int     mdd;
-	int		conjgA, conjgB;
-	int 	ntap, nxm, ngath, nw, nw_low, nw_high, eigenvalues, rthm, combine, distance;
+	int	    conjgA, conjgB, conjgC;
+ 	int     ntapA, ntapB, nxm, ngath, nw, nw_low, nw_high, eigenvalues, rthm, combine, distance;
 	size_t  nwrite, cdatainSize, datainSize, cdataoutSize, stationSize, is;
 	float	dx, dt, fmin, fmax, df, eps_r, eps_a, ftap, numacc;
 	float	*rC, scl, *rl, *eigen;
 	float   f1, f2, d1, d2, sclsxgx, xmin, xmax, alpha, wshot, wpi, wrec;
- 	float   *xrcvA, *xsrcA, *xrcvB, *xsrcB;
+  	float   *xrcvA, *xsrcA, *xrcvB, *xsrcB, *xsrcC, *xrcvC;
 	float	*taper;
     int     *xnx;
-	float sclA,sclB, cjA, cjB;
-	int  transposeA, transposeB;
+ 	float 	sclA,sclB, cjA, cjB, sclC, cjC;
+ 	int  	transposeA, transposeB, transposeC;
+ 	float	scaling;
+ 	int  	npad;
+ 	int     k_iter, lsqr_iter;
+ 	float   TCscl, lsqr_damp;
 
+ 	complex *cdataout, *cTemp;
 
-	complex *cdataout;
 	double  t0, t1, t2, t3, tinit, twrite, tread, tdec, tfft;
-	char	*file_A, *file_B, *file_out, *file_dmat, filename[1024], number[128], *rthmName;
+ 	char	*file_A, *file_B, *file_C, *file_out, *file_dmat, filename[1024], number[128], *rthmName;
 	int     pe=0, root_pe=0, npes=1, ipe, size_s, one_file;
-	complex *cA, *cB, *oBB;
+	complex *cA, *cC, *oBB;
 	segy *hdr;
 
 	t0 = wallclock_time();
@@ -145,6 +159,7 @@ int main (int argc, char **argv)
 	assert(file_A != NULL);
 	if (!getparstring("file_B", &file_B)) file_B=NULL;
 	assert(file_B != NULL);
+ 	if (!getparstring("file_C", &file_C)) file_C=NULL;
 	if (!getparstring("file_out", &file_out)) file_out=NULL;
  	assert(file_out != NULL);
 	if (!getparstring("file_dmat", &file_dmat)) file_dmat=NULL;
@@ -154,20 +169,34 @@ int main (int argc, char **argv)
 	if (!getparint("rthm", &rthm)) rthm = 0;
 	if (!getparint("combine", &combine)) combine = 0;
 	if (!getparint("causal", &causal)) causal = 1;
-	if (!getparint("ntap", &ntap)) ntap = 0;
-	if (!getparfloat("ftap", &ftap)) ftap = 0.;
+    if (!getparint("ntapA", &ntapA)) ntapA = 0;
+    if (!getparint("ntapB", &ntapB)) ntapB = 0;
+    if (!getparfloat("ftap", &ftap)) ftap = 0.;
+    if (!getparfloat("scaling", &scaling)) scaling = 1.;
 	if (!getparfloat("eps_r", &eps_r)) eps_r = 1e-4;
 	if (!getparfloat("eps_a", &eps_a)) eps_a = 1e-5;
 	if (!getparfloat("numacc", &numacc)) numacc = 1e-6;
 	if (!getparint("eigenvalues", &eigenvalues)) eigenvalues = 0;
 	if (!getparint("mdd", &mdd)) mdd = 1;
 
+    if (!getparint("lsqr_iter", &lsqr_iter)) lsqr_iter = 25;
+    if (!getparfloat("lsqr_damp", &lsqr_damp)) lsqr_damp = 1e-4;
+
 	if (!getparint("transposeA", &transposeA)) transposeA = 0;
 	if (!getparfloat("sclA", &sclA)) sclA = 1.;
 	if (!getparfloat("cjA", &cjA)) cjA = 1.;
 	if (!getparint("transposeB", &transposeB)) transposeB = 0;
 	if (!getparfloat("sclB", &sclB)) sclB = 1.;
 	if (!getparfloat("cjB", &cjB)) cjB = 1.;
+    if (file_C != NULL) {
+        if (!getparint("transposeC", &transposeC)) transposeC = 0;
+        if (!getparfloat("sclC", &sclC)) sclC = 1.;
+        if (!getparfloat("cjC", &cjC)) cjC = 1.;
+        if (!getparint("conjgC", &conjgC)) conjgC = 0;
+    }
+
+    if (!getparint("npad", &npad)) npad = 0;
+    if (!getparint("k_iter", &k_iter)) k_iter= 5;
 
 #ifdef _OPENMP
     npes   = omp_get_max_threads();
@@ -200,7 +229,23 @@ int main (int argc, char **argv)
 	getFileInfo(file_B, &n1, &n2, &nshotB, &d1, &d2, &f1, &f2, &xmin, &xmax, &sclsxgx, &nxm);
 	assert( n1 == nt);
 	nstationB = n2;
-	assert( nshotA == nshotB);
+    if (!((mdd == 5) || (mdd == 3))) assert( nshotA == nshotB);
+
+    if (file_C != NULL && mdd != 3) {
+        nshotC = 0;
+        getFileInfo(file_C, &n1, &n2, &nshotC, &d1, &d2, &f1, &f2, &xmin, &xmax, &sclsxgx, &nxm);
+        assert( n1 == nt);
+        nstationC = n2;
+        assert( nshotA == nshotC);
+    } else if (file_C != NULL) {
+        nshotC = 0;
+        getFileInfo(file_C, &n1, &n2, &nshotC, &d1, &d2, &f1, &f2, &xmin, &xmax, &sclsxgx, &nxm);
+        assert( n1 == nt);
+        nstationC = n2;
+    }
+
+    if (ntapB != 0) ftap = (float)ntapB / (float)nstationA;
+    else if (ftap != 0) ntapB = NINT(ftap*nstationA);
 
 /*================ initializations ================*/
 
@@ -218,11 +263,26 @@ int main (int argc, char **argv)
     nw       = nw_high - nw_low + 1;
 	nfreq = MIN(nf,nw);
 
-/* scaling of the results by Johno van IJsseldijk */ 
-    if (mdd == 0) scl = dx*dt/((float)ntfft); //correlation
-    else if (mdd==1) scl = 1/((float)ntfft)/dx/dt; // MDD
-    else if (mdd==2) scl = 1/((float)ntfft)/dx/dt; // MDD with A and B already computed (NOT TESTED)
-    else scl = 1.0/((float)ntfft); // Passing A or B through
+/* scaling of the results by Johno van IJsseldijk */
+    if (scaling==1) {
+        if (mdd == 0) scl = dx*dt/((float)ntfft); //correlation
+        else if (mdd==1) scl = 1/((float)ntfft)/dx/dt; // MDD
+        else if (mdd==3) scl = 1/((float)ntfft)/dx/dt; // MDD LSQR
+        else if (mdd==2) scl = 1/((float)ntfft)/dx/dt; // MDD with A and B already computed (NOT TESTED)
+        else scl = 1.0/((float)ntfft); // Passing A or B through
+
+        if (file_C != NULL && mdd != 3) scl *= dx*dt; // 
+
+        TCscl=dx*dt;
+    }
+    else if (scaling==0) {
+        scl = 1/((float)ntfft);
+        TCscl=1;
+    }
+    else {
+        scl = scaling/((float)ntfft);
+        TCscl=1;
+    }
 
 /* allocate in shared memory the in- and output data */
 
@@ -232,13 +292,18 @@ int main (int argc, char **argv)
 	cdataout     = (complex *)malloc(cdataoutSize);
 	cA           = (complex *)malloc(nstationA*cdatainSize);
 	cB           = (complex *)malloc(nstationB*cdatainSize);
-	taper        = (float *)malloc(2*nstationB*sizeof(float));
 	if (file_dmat!=NULL) oBB = (complex *)malloc(nstationB*nstationB*nfreq*sizeof(complex));
 	else oBB = NULL;
 	assert(cdataout != NULL);
 	assert(cA != NULL);
 	assert(cB != NULL);
 
+    if (file_C != NULL && mdd != 3) {
+        cC = (complex *)malloc(nstationC*cdatainSize);
+        assert(cC != NULL);
+        cTemp = (complex *)malloc(cdataoutSize);
+        assert(cTemp != NULL);
+    }
 
 /* for first touch binding of allocated memory */
 #pragma omp parallel for schedule(static) private(jstation,is) default(shared)
@@ -254,8 +319,16 @@ int main (int argc, char **argv)
 		memset(&cA[jstation*jstep],0,jstep*sizeof(complex));
 	}
 
+    if (file_C != NULL && mdd != 3) {
+#pragma omp parallel for schedule(static) private(jstation) default(shared)
+        for (jstation=0; jstation<nstationC; jstation++) {
+            memset(&cC[jstation*jstep],0,jstep*sizeof(complex));
+        }
+    }
+
     if (verbose) {
-        if (rthm==0) rthmName="Cholesky";
+		if (mdd==3) rthmName="LSQR";
+        else if (rthm==0) rthmName="Cholesky";
         else if (rthm==1) rthmName="LU";
         else if (rthm==2) rthmName="SVD single precision";
         else if (rthm==3) rthmName="SVD divide-and-conquer";
@@ -269,18 +342,31 @@ int main (int argc, char **argv)
         fprintf(stderr,"  nstationA ........ : %ld\n", nstationA );
         fprintf(stderr,"  nshotB ........... : %d\n", nshotB );
         fprintf(stderr,"  nstationB ........ : %ld\n", nstationB );
+        if (file_C != NULL) {
+            fprintf(stderr,"  nshotC ........... : %d\n", nshotC );
+            fprintf(stderr,"  nstationC ........ : %ld\n", nstationC );
+        }
+        fprintf(stderr,"  Scaling .......... : %e\n", scl);
+
         fprintf(stderr,"  number t-fft ..... : %d\n", ntfft);
-		fprintf(stderr,"  Input  size ...... : %ld MB\n", (nstationA+nstationB)*cdatainSize/(1024*1024));
+        fprintf(stderr,"  Input  size ...... : %ld MB\n", ((file_C != NULL) ? (nstationA+nstationB+nstationC)*cdatainSize/(1024*1024) : (nstationA+nstationB)*cdatainSize/(1024*1024)));
+
 		fprintf(stderr,"  Output size ...... : %ld MB\n", (cdataoutSize/((size_t)1024*1024)));
-        fprintf(stderr,"  taper points ..... : %d (%.2f %%)\n", ntap, ftap*100.0);
+        if (ntapB != 0) fprintf(stderr,"  taper points ..... : %d (%.0f %%)\n", ntapB, ftap*100.0);
+        if (ntapA != 0) fprintf(stderr,"  taper points ..... : %d \n", ntapA);
         fprintf(stderr,"  process number ... : %d\n", pe);
         fprintf(stderr,"  fmin ............. : %.3f (%d)\n", fmin, nw_low);
         fprintf(stderr,"  fmax ............. : %.3f (%d)\n", fmax, nw_high);
         fprintf(stderr,"  nfreq  ........... : %ld\n", nfreq);
         if (mdd) fprintf(stderr,"  Matrix inversion . : %s\n", rthmName);
         else  fprintf(stderr,"  Correlation ...... : \n");
-        fprintf(stderr,"  eps_r ............ : %e\n", eps_r);
-        fprintf(stderr,"  eps_a ............ : %e\n", eps_a);
+        if (mdd==1) {
+            fprintf(stderr,"  eps_r ............ : %e\n", eps_r);
+            fprintf(stderr,"  eps_a ............ : %e\n", eps_a);
+        } else if (mdd==3) {
+            fprintf(stderr,"  iterations........ : %d\n", lsqr_iter);
+            fprintf(stderr,"  damping........... : %e\n", lsqr_damp);
+        }
         fprintf(stderr,"  mdd .............. : %d\n", mdd);
     }
 
@@ -300,7 +386,54 @@ int main (int argc, char **argv)
 	alpha = 0.0;
     readShotData(file_B, xmin, dx, xrcvB, xsrcB, xnx, cB, nw, nw_low, nshotB, nstationB, nstationB, ntfft, alpha, sclB, cjB, transposeB, verbose);
 
-	//cB = cA;
+    if (file_C != NULL && mdd != 3) {
+        xsrcC     = (float *)calloc(nshotC,sizeof(float));
+        xrcvC     = (float *)calloc(nshotC*nstationC,sizeof(float));
+        alpha = 0.0;
+        readShotData(file_C, xmin, dx, xrcvC, xsrcC, xnx, cC, nw, nw_low, nshotC, nstationC, nstationC, ntfft, alpha, sclC, cjC, transposeC, verbose);
+    } else if (file_C != NULL) {
+        xsrcC     = (float *)calloc(nshotC,sizeof(float));
+        xrcvC     = (float *)calloc(nshotC*nstationC,sizeof(float));
+        alpha = 0.0;
+        readShotData(file_C, xmin, dx, xrcvC, xsrcC, xnx, cdataout, nw, nw_low, nshotC, nstationC, nstationC, ntfft, alpha, sclC, cjC, transposeC, verbose);
+    }
+
+    if (ntapA != 0) {
+        taper = (float *)malloc(nstationA*sizeof(float));
+        for (j = 0; j < ntapA; j++)
+            taper[j] = (cos(M_PI*(j-ntapA)/ntapA)+1)/2.0;//(exp(j/ntap*8)-1)/(exp(8)-1);//
+        for (j = ntapA; j < nstationA-ntapA; j++)
+            taper[j] = 1.0;
+        for (j = nstationA-ntapA; j < nstationA; j++)
+            taper[j] = taper[abs(j-nstationA)-1];//(cos(M_PI*(j-(nstationA-ntap))/ntap)+1)/2.0;
+        for (istation = 0; istation < nstationA; istation++) {  // Swap for jstation?
+            for (jstation = 0; jstation < nshotA; jstation++) {
+                for (iw=0; iw<nw; iw++) {
+                    cA[iw*nstationA*nshotA+jstation*nstationA+istation].r *= taper[istation];
+                    cA[iw*nstationA*nshotA+jstation*nstationA+istation].i *= taper[istation];
+                }
+            }
+        }
+        free(taper);
+    }
+    if (ntapB != 0) {
+        taper = (float *)malloc(nstationA*sizeof(float));
+        for (j = 0; j < ntapB; j++)
+            taper[j] = (cos(M_PI*(j-ntapB)/ntapB)+1)/2.0;//(exp(j/ntap*8)-1)/(exp(8)-1);//
+        for (j = ntapB; j < nstationA-ntapB; j++)
+            taper[j] = 1.0;
+        for (j = nstationA-ntapB; j < nstationA; j++)
+            taper[j] = taper[abs(j-nstationA)-1];//(cos(M_PI*(j-(nstationA-ntap))/ntap)+1)/2.0;
+        for (jstation = 0; jstation < nstationA; jstation++) {  // Swap for jstation?
+            for (istation = 0; istation < nshotA; istation++) {
+                for (iw=0; iw<nw; iw++) {
+                    cB[iw*nstationA*nshotA+istation*nshotA+jstation].r *= taper[istation];
+                    cB[iw*nstationA*nshotA+istation*nshotA+jstation].i *= taper[istation];
+                }
+            }
+        }
+        free(taper);
+    }
 
 	eigen = (float *)malloc(nfreq*nstationB*sizeof(float));
 
@@ -309,20 +442,25 @@ int main (int argc, char **argv)
 
 #pragma omp parallel default(none) \
 	private(t1,t2,pe) \
-	shared(cA,cB,eigen,eigenvalues,numacc,eps_r,eps_a) \
-	shared(nstationA,nstationB,verbose,cdatainSize) \
-	shared(rthm,mdd,nfreq,nshotA,conjgA,conjgB) \
-	shared(cdataout,oBB)
+ 	shared(cA,cB,cC,eigen,eigenvalues,numacc,eps_r,eps_a) \
+ 	shared(nstationA,nstationB,nstationC,verbose,cdatainSize) \
+    shared(rthm,mdd,nfreq,nshotA,conjgA,conjgB,conjgC) \
+    shared(cdataout,cTemp,oBB,file_C,cdataoutSize,k_iter,stderr, lsqr_iter, lsqr_damp, TCscl)
 { /* start of OpenMP parallel part */
 
-
 #ifdef _OPENMP
 	pe = omp_get_thread_num();
 #endif
 
 	/* compute deconvolution */
 	deconvolve(cA, cB, cdataout, oBB, nfreq, nshotA, nstationA, nstationB, 
-		eps_a, eps_r, numacc, eigenvalues, eigen, rthm, mdd, conjgA, conjgB, verbose);
+        eps_a, eps_r, numacc, eigenvalues, eigen, rthm, mdd, conjgA, conjgB, lsqr_iter, lsqr_damp, k_iter, TCscl, verbose);
+
+    if (file_C != NULL && mdd != 3) {
+        deconvolve(cdataout, cC, cTemp, oBB, nfreq, nshotA, nstationA, nstationC,
+            eps_a, eps_r, numacc, eigenvalues, eigen, rthm, mdd, conjgA, conjgC, lsqr_iter, lsqr_damp, k_iter, TCscl, verbose);
+        memcpy(&cdataout[0].r, &cTemp[0].r, cdataoutSize);
+    }
 
 } /*end of parallel OpenMP part */
 
@@ -340,6 +478,8 @@ int main (int argc, char **argv)
 /* for writing out combined shots cA */
 	free(cA);
 	free(cB);
+    if (file_C != NULL && mdd != 3) free(cC);
+    if (file_C != NULL && mdd != 3) free(cTemp);
 
 /* Inverse FFT of deconvolution results */
 /* This is done for every deconvolution component seperately */
diff --git a/MDD/readShotData.c b/MDD/readShotData.c
index a6205bf7cbad1742c301a27e0f4e7888f208d5fa..ad5d0e452e7fb3b92a81cf1487eb940a34fd7056 100644
--- a/MDD/readShotData.c
+++ b/MDD/readShotData.c
@@ -42,7 +42,8 @@ int readShotData(char *filename, float xmin, float dx, float *xrcv, float *xsrc,
 	fseek(fp, 0, SEEK_SET);
 	nread = fread( &hdr, 1, TRCBYTES, fp );
 	assert(nread == TRCBYTES);
-	if (hdr.scalco < 0) scl = 1.0/fabs((float)hdr.scalco);
+ 	if (hdr.scalco < 0) scl = 1.0/fabs((float)hdr.scalco);
+
 	else if (hdr.scalco == 0) scl = 1.0;
 	else scl = hdr.scalco;
 	fseek(fp, 0, SEEK_SET);
@@ -104,13 +105,13 @@ int readShotData(char *filename, float xmin, float dx, float *xrcv, float *xsrc,
         	rc1fft(&trace[i*ntfft],ctrace,ntfft,-1);
 
 			if (transpose == 0) {
-        		for (iw=0; iw<nw; iw++) {
+        		for (iw=0; iw<nw; iw++) { // FREQUENCY - SHOTS - RECEIVERS
         			cdata[iw*ngath*nx+igath*nx+i].r = scale*ctrace[nw_low+iw].r;
         			cdata[iw*ngath*nx+igath*nx+i].i = conjg*scale*ctrace[nw_low+iw].i;
         		}
 			}
 			else {
-        		for (iw=0; iw<nw; iw++) {
+        		for (iw=0; iw<nw; iw++) { // FREQUENCY - RECEIVERS - SHOTS
         			cdata[iw*ngath*nx+i*ngath+igath].r = scale*ctrace[nw_low+iw].r;
         			cdata[iw*ngath*nx+i*ngath+igath].i = conjg*scale*ctrace[nw_low+iw].i;
         		}
diff --git a/MDD/zlsqrDataModule.F90 b/MDD/zlsqrDataModule.F90
new file mode 100644
index 0000000000000000000000000000000000000000..ff9f905aeca90cc1721e55f8dbff206bcb4157ba
--- /dev/null
+++ b/MDD/zlsqrDataModule.F90
@@ -0,0 +1,19 @@
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+! File zlsqrDataModule.f90
+!
+! Extends lsqrDataModule.f90 for use with complex numbers
+! 29 Jun 2013: File created.
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+module zlsqrDataModule
+
+  use   iso_c_binding
+  use   lsqrDataModule, only  : dp, sp, ip, zero, realmin, eps, one
+  
+  implicit none
+
+  intrinsic                      :: cmplx
+
+  complex(c_float_complex), parameter, public :: zzero = cmplx(zero,zero,dp), zone = cmplx(one,zero,dp)
+
+end module zlsqrDataModule
diff --git a/MDD/zlsqrModule.F90 b/MDD/zlsqrModule.F90
new file mode 100644
index 0000000000000000000000000000000000000000..3220fd71ff44d97d5250d54b4e48c52eb6dd194e
--- /dev/null
+++ b/MDD/zlsqrModule.F90
@@ -0,0 +1,804 @@
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+! File lsqrModule.f90
+!
+!     zLSQR     d2norm
+!
+! zLSQR   solves Ax = b or min ||Ax - b|| with or without damping,
+! using the iterative algorithm of Paige and Saunders, ACM TOMS (1982).
+!
+! Maintained by
+!     Michael Saunders <saunders@stanford.edu>
+!     Systems Optimization Laboratory (SOL)
+!     Stanford University
+!     Stanford, CA 94305-4026, USA
+!     (650)723-1875
+!
+! 07 Sep 2007: Line by line translation of f77 file lsqr.f to f90
+!              by Eric Badel <badel@nancy.inra.fr>.
+! 21 Sep 2007: lsqrModule.f90 implemented.
+! 24 Oct 2007: Use real(dp) instead of compiler option -r8.
+! 19 Dec 2008: lsqrblasInterface module implemented.
+! 29 Jun 2013: Support for complex A, derived from real version
+!              by Austin Benson <arbenson@stanford.edu> and
+!                 Victor Minden <vminden@stanford.edu>
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+module zlsqrModule
+
+  use  zlsqrDataModule,    only : dp, ip, one, zero, eps, zone, zzero
+  use  zlsqrblasInterface, only : dznrm2_zlsqr, zscal_zlsqr
+  use  iso_c_binding ! for C interop
+
+  implicit none
+
+  public   :: zLSQR
+  private  :: d2norm 
+
+
+contains
+
+  !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 
+  subroutine zLSQR  ( m, n, indw, Aprod1, Aprod2, b, damp, wantse, x, se,  &
+                      atol, btol, conlim, itnlim, nout, istop, itn,  &
+                      Anorm, Acond, rnorm, Arnorm, xnorm) bind(c, name = "zLSQR_")!w, v ) 
+
+    integer(ip),  intent(in)  :: m, n, itnlim, nout
+    integer, intent(in)    :: indw
+    integer(ip),  intent(out) :: istop, itn
+    logical(ip),  intent(in)  :: wantse
+    complex(dp), dimension(m), intent(in)  :: b(m)
+    complex(dp), dimension(n), intent(inout) :: x(n)
+    real(dp), intent(out) :: se(*)
+    real(dp), intent(in)  :: atol, btol, conlim, damp
+    real(dp), intent(out) :: Anorm, Acond, rnorm, Arnorm, xnorm
+    
+	!complex(dp), dimension(n), intent(inout)  :: w(n)
+    !complex(dp), dimension(n), intent(inout)  :: v(n)
+	
+    interface
+       subroutine Aprod1(m,n,x,y,indw) bind(c)                  ! y := y + A*x
+         use zlsqrDataModule, only : dp, ip
+         integer(ip), intent(in)    :: m, n
+         integer, intent(in)    :: indw
+         complex(dp), intent(in)    :: x(n)
+         complex(dp), intent(inout) :: y(m)
+       end subroutine Aprod1
+
+       subroutine Aprod2(m,n,x,y,indw) bind(c)                 ! x := x + A'*y
+         use zlsqrDataModule, only : dp, ip
+         integer(ip),  intent(in)    :: m, n 
+         integer, intent(in)    :: indw
+         complex(dp), intent(inout) :: x(n)
+         complex(dp), intent(in)    :: y(m)
+       end subroutine Aprod2
+    end interface
+
+    !-------------------------------------------------------------------
+    ! zLSQR  finds a solution x to the following problems:
+    !
+    ! 1. Unsymmetric equations:    Solve  A*x = b
+    !
+    ! 2. Linear least squares:     Solve  A*x = b
+    !                              in the least-squares sense
+    !
+    ! 3. Damped least squares:     Solve  (   A    )*x = ( b )
+    !                                     ( damp*I )     ( 0 )
+    !                              in the least-squares sense
+    !
+    ! where A is a complex matrix with m rows and n columns, b is a
+    ! complex m-vector, and damp is a real scalar.
+    ! The matrix A is treated as a linear operator.  It is accessed
+    ! by means of subroutine calls with the following purpose:
+    !
+    ! call Aprod1(m,n,x,y)  must compute y = y + A*x  without altering x.
+    ! call Aprod2(m,n,x,y)  must compute x = x + A'*y without altering y.
+    !
+    ! zLSQR uses an iterative method to approximate the solution.
+    ! The number of iterations required to reach a certain accuracy
+    ! depends strongly on the scaling of the problem.  Poor scaling of
+    ! the rows or columns of A should therefore be avoided where
+    ! possible.
+    !
+    ! For example, in problem 1 the solution is unaltered by
+    ! row-scaling.  If a row of A is very small or large compared to
+    ! the other rows of A, the corresponding row of ( A  b ) should be
+    ! scaled up or down.
+    !
+    ! In problems 1 and 2, the solution x is easily recovered
+    ! following column-scaling.  Unless better information is known,
+    ! the nonzero columns of A should be scaled so that they all have
+    ! the same Euclidean norm (e.g., 1.0).
+    !
+    ! In problem 3, there is no freedom to re-scale if damp is
+    ! nonzero.  However, the value of damp should be assigned only
+    ! after attention has been paid to the scaling of A.
+    !
+    ! The parameter damp is intended to help regularize
+    ! ill-conditioned systems, by preventing the true solution from
+    ! being very large.  Another aid to regularization is provided by
+    ! the parameter Acond, which may be used to terminate iterations
+    ! before the computed solution becomes very large.
+    !
+    ! Note that x is not an input parameter.
+    ! If some initial estimate x0 is known and if damp = 0,
+    ! one could proceed as follows:
+    !
+    ! 1. Compute a residual vector     r0 = b - A*x0.
+    ! 2. Use LSQR to solve the system  A*dx = r0.
+    ! 3. Add the correction dx to obtain a final solution x = x0 + dx.
+    !
+    ! This requires that x0 be available before and after the call
+    ! to LSQR.  To judge the benefits, suppose LSQR takes k1 iterations
+    ! to solve A*x = b and k2 iterations to solve A*dx = r0.
+    ! If x0 is "good", norm(r0) will be smaller than norm(b).
+    ! If the same stopping tolerances atol and btol are used for each
+    ! system, k1 and k2 will be similar, but the final solution x0 + dx
+    ! should be more accurate.  The only way to reduce the total work
+    ! is to use a larger stopping tolerance for the second system.
+    ! If some value btol is suitable for A*x = b, the larger value
+    ! btol*norm(b)/norm(r0)  should be suitable for A*dx = r0.
+    !
+    ! Preconditioning is another way to reduce the number of iterations.
+    ! If it is possible to solve a related system M*x = b efficiently,
+    ! where M approximates A in some helpful way
+    ! (e.g. M - A has low rank or its elements are small relative to
+    ! those of A), LSQR may converge more rapidly on the system
+    !       A*M(inverse)*z = b,
+    ! after which x can be recovered by solving M*x = z.
+    !
+    ! NOTE: If A is Hermitian, LSQR should not be used!
+    ! Alternatives are the symmetric conjugate-gradient method (CG)
+    ! and/or SYMMLQ.
+    ! SYMMLQ is an implementation of symmetric CG that applies to
+    ! any symmetric A and will converge more rapidly than LSQR.
+    ! If A is positive definite, there are other implementations of
+    ! symmetric CG that require slightly less work per iteration
+    ! than SYMMLQ (but will take the same number of iterations).
+    !
+    !
+    ! Notation
+    ! --------
+    ! The following quantities are used in discussing the subroutine
+    ! parameters:
+    !
+    ! Abar   =  (  A   ),        bbar  =  (b)
+    !           (damp*I)                  (0)
+    !
+    ! r      =  b - A*x,         rbar  =  bbar - Abar*x
+    !
+    ! rnorm  =  sqrt( norm(r)**2  +  damp**2 * norm(x)**2 )
+    !        =  norm( rbar )
+    !
+    ! eps    =  the relative precision of floating-point arithmetic.
+    !           On most machines, eps is about 1.0e-7 and 1.0e-16
+    !           in single and double precision respectively.
+    !           We expect eps to be about 1e-16 always.
+    !
+    ! LSQR  minimizes the function rnorm with respect to x.
+    !
+    !
+    ! Parameters
+    ! ----------
+    ! m       input      m, the number of rows in A.
+    !
+    ! n       input      n, the number of columns in A.
+    !
+    ! Aprod1, Aprod2     See above.
+    !
+    ! damp    input      The damping parameter for problem 3 above.
+    !                    (damp should be 0.0 for problems 1 and 2.)
+    !                    If the system A*x = b is incompatible, values
+    !                    of damp in the range 0 to sqrt(eps)*norm(A)
+    !                    will probably have a negligible effect.
+    !                    Larger values of damp will tend to decrease
+    !                    the norm of x and reduce the number of 
+    !                    iterations required by LSQR.
+    !
+    !                    The work per iteration and the storage needed
+    !                    by LSQR are the same for all values of damp.
+    !
+    ! wantse  input      A logical variable to say if the array se(*)
+    !                    of standard error estimates should be computed.
+    !                    If m > n  or  damp > 0,  the system is
+    !                    overdetermined and the standard errors may be
+    !                    useful.  (See the first LSQR reference.)
+    !                    Otherwise (m <= n  and  damp = 0) they do not
+    !                    mean much.  Some time and storage can be saved
+    !                    by setting wantse = .false. and using any
+    !                    convenient array for se(*), which won't be
+    !                    touched.
+    !
+    ! b(m)    input      The rhs vector b.
+    !
+    ! x(n)    output     Returns the computed solution x.
+    !
+    ! se(*)   output     If wantse is true, the dimension of se must be
+    !         (maybe)    n or more.  se(*) then returns standard error
+    !                    estimates for the components of x.
+    !                    For each i, se(i) is set to the value
+    !                       rnorm * sqrt( sigma(i,i) / t ),
+    !                    where sigma(i,i) is an estimate of the i-th
+    !                    diagonal of the inverse of Abar(transpose)*Abar
+    !                    and  t = 1      if  m .le. n,
+    !                         t = m - n  if  m .gt. n  and  damp = 0,
+    !                         t = m      if  damp .ne. 0.
+    !
+    !                    If wantse is false, se(*) will not be touched.
+    !                    The actual parameter can be any real array.
+    !
+    ! atol    input      An estimate of the relative error in the data
+    !                    defining the matrix A.  For example, if A is
+    !                    accurate to about 6 digits, set atol = 1.0e-6.
+    !
+    ! btol    input      An estimate of the relative error in the data
+    !                    defining the rhs b.  For example, if b is
+    !                    accurate to about 6 digits, set btol = 1.0e-6.
+    !
+    ! conlim  input      An upper limit on cond(Abar), the apparent
+    !                    condition number of the matrix Abar.
+    !                    Iterations will be terminated if a computed
+    !                    estimate of cond(Abar) exceeds conlim.
+    !                    This is intended to prevent certain small or
+    !                    zero singular values of A or Abar from
+    !                    coming into effect and causing unwanted growth
+    !                    in the computed solution.
+    !
+    !                    conlim and damp may be used separately or
+    !                    together to regularize ill-conditioned systems.
+    !
+    !                    Normally, conlim should be in the range
+    !                    1000 to 1/eps.
+    !                    Suggested value:
+    !                    conlim = 1/(100*eps)  for compatible systems,
+    !                    conlim = 1/(10*sqrt(eps)) for least squares.
+    !
+    !         Note: Any or all of atol, btol, conlim may be set to zero.
+    !         The effect will be the same as the values eps, eps, 1/eps.
+    !
+    ! itnlim  input      An upper limit on the number of iterations.
+    !                    Suggested value:
+    !                    itnlim = n/2   for well-conditioned systems
+    !                                   with clustered singular values,
+    !                    itnlim = 4*n   otherwise.
+    !
+    ! nout    input      File number for printed output.  If positive,
+    !                    a summary will be printed on file nout.
+    !
+    ! istop   output     An integer giving the reason for termination:
+    !
+    !            0       x = 0  is the exact solution.
+    !                    No iterations were performed.
+    !
+    !            1       The equations A*x = b are probably compatible.
+    !                    Norm(A*x - b) is sufficiently small, given the
+    !                    values of atol and btol.
+    !
+    !            2       damp is zero.  The system A*x = b is probably
+    !                    not compatible.  A least-squares solution has
+    !                    been obtained that is sufficiently accurate,
+    !                    given the value of atol.
+    !
+    !            3       damp is nonzero.  A damped least-squares
+    !                    solution has been obtained that is sufficiently
+    !                    accurate, given the value of atol.
+    !
+    !            4       An estimate of cond(Abar) has exceeded conlim.
+    !                    The system A*x = b appears to be ill-conditioned,
+    !                    or there could be an error in Aprod1 or Aprod2.
+    !
+    !            5       The iteration limit itnlim was reached.
+    !
+    ! itn     output     The number of iterations performed.
+    !
+    ! Anorm   output     An estimate of the Frobenius norm of Abar.
+    !                    This is the square-root of the sum of squares
+    !                    of the elements of Abar.
+    !                    If damp is small and the columns of A
+    !                    have all been scaled to have length 1.0,
+    !                    Anorm should increase to roughly sqrt(n).
+    !                    A radically different value for Anorm may
+    !                    indicate an error in Aprod1 or Aprod2.
+    !
+    ! Acond   output     An estimate of cond(Abar), the condition
+    !                    number of Abar.  A very high value of Acond
+    !                    may again indicate an error in Aprod1 or Aprod2.
+    !
+    ! rnorm   output     An estimate of the final value of norm(rbar),
+    !                    the function being minimized (see notation
+    !                    above).  This will be small if A*x = b has
+    !                    a solution.
+    !
+    ! Arnorm  output     An estimate of the final value of
+    !                    norm( Abar(transpose)*rbar ), the norm of
+    !                    the residual for the normal equations.
+    !                    This should be small in all cases.  (Arnorm
+    !                    will often be smaller than the true value
+    !                    computed from the output vector x.)
+    !
+    ! xnorm   output     An estimate of norm(x) for the final solution x.
+    !
+    ! Subroutines and functions used              
+    ! ------------------------------
+    ! BLAS               zscal, dznrm2
+    ! USER               Aprod1, Aprod2
+    !
+    ! Precision
+    ! ---------
+    ! The number of iterations required by LSQR will decrease
+    ! if the computation is performed in higher precision.
+    ! At least 15-digit arithmetic should normally be used.
+    ! "complex(dp)" declarations should normally be 8-byte words.
+    ! If this ever changes, the BLAS routines  dznrm2, zscal
+    ! (Lawson, et al., 1979) will also need to be changed.
+    !
+    !
+    ! References
+    ! ----------
+    ! C.C. Paige and M.A. Saunders,  LSQR: An algorithm for sparse
+    !      linear equations and sparse least squares,
+    !      ACM Transactions on Mathematical Software 8, 1 (March 1982),
+    !      pp. 43-71.
+    !
+    ! C.C. Paige and M.A. Saunders,  Algorithm 583, LSQR: Sparse
+    !      linear equations and least-squares problems,
+    !      ACM Transactions on Mathematical Software 8, 2 (June 1982),
+    !      pp. 195-209.
+    !
+    ! C.L. Lawson, R.J. Hanson, D.R. Kincaid and F.T. Krogh,
+    !      Basic linear algebra subprograms for Fortran usage,
+    !      ACM Transactions on Mathematical Software 5, 3 (Sept 1979),
+    !      pp. 308-323 and 324-325.
+    ! ------------------------------------------------------------------
+    !
+    ! LSQR development:
+    ! 22 Feb 1982: LSQR sent to ACM TOMS to become Algorithm 583.
+    ! 15 Sep 1985: Final F66 version.  LSQR sent to "misc" in netlib.
+    ! 13 Oct 1987: Bug (Robert Davies, DSIR).  Have to delete
+    !                 if ( (one + dabs(t)) .le. one ) GO TO 200
+    !              from loop 200.  The test was an attempt to reduce
+    !              underflows, but caused w(i) not to be updated.
+    ! 17 Mar 1989: First F77 version.
+    ! 04 May 1989: Bug (David Gay, AT&T).  When the second beta is zero,
+    !              rnorm = 0 and
+    !              test2 = Arnorm / (Anorm * rnorm) overflows.
+    !              Fixed by testing for rnorm = 0.
+    ! 05 May 1989: Sent to "misc" in netlib.
+    ! 14 Mar 1990: Bug (John Tomlin via IBM OSL testing).
+    !              Setting rhbar2 = rhobar**2 + dampsq can give zero
+    !              if rhobar underflows and damp = 0.
+    !              Fixed by testing for damp = 0 specially.
+    ! 15 Mar 1990: Converted to lower case.
+    ! 21 Mar 1990: d2norm introduced to avoid overflow in numerous
+    !              items like  c = sqrt( a**2 + b**2 ).
+    ! 04 Sep 1991: wantse added as an argument to LSQR, to make
+    !              standard errors optional.  This saves storage and
+    !              time when se(*) is not wanted.
+    ! 13 Feb 1992: istop now returns a value in [1,5], not [1,7].
+    !              1, 2 or 3 means that x solves one of the problems
+    !              Ax = b,  min norm(Ax - b)  or  damped least squares.
+    !              4 means the limit on cond(A) was reached.
+    !              5 means the limit on iterations was reached.
+    ! 07 Dec 1994: Keep track of dxmax = max_k norm( phi_k * d_k ).
+    !              So far, this is just printed at the end.
+    !              A large value (relative to norm(x)) indicates
+    !              significant cancellation in forming
+    !                 x = D*f = sum( phi_k * d_k ).
+    !              A large column of D need NOT be serious if the
+    !              corresponding phi_k is small.
+    ! 27 Dec 1994: Include estimate of alfa_opt in iteration log.
+    !              alfa_opt is the optimal scale factor for the
+    !              residual in the "augmented system", as described by
+    !              A. Bjorck (1992),
+    !              Pivoting and stability in the augmented system method,
+    !              in D. F. Griffiths and G. A. Watson (eds.),
+    !              "Numerical Analysis 1991",
+    !              Proceedings of the 14th Dundee Conference,
+    !              Pitman Research Notes in Mathematics 260,
+    !              Longman Scientific and Technical, Harlow, Essex, 1992.
+    !
+    ! 21 Sep 2007: Fortran 90 finally adopted seriously.
+    !              Aprod1, Aprod2 implemented via f90 interface.
+    !-------------------------------------------------------------------
+
+    intrinsic :: abs, sqrt
+
+    ! Local arrays and variables
+    complex(dp)  :: u(m)
+	complex(dp), allocatable  :: v(:), w(:)
+    complex(dp)  :: t         
+    logical   :: damped, prnt
+    integer(ip)   :: i, maxdx, nconv, nstop
+    real(dp)  :: alfopt,                                    &
+                 alpha, beta, bnorm, cs, cs1, cs2, ctol,    &
+                 delta, dknorm, dnorm, dxk, dxmax, gamma,   &
+                 gambar, phi, phibar, psi, res2, rho,       &
+                 rhobar, rhbar1, rhs, rtol, sn, sn1, sn2,   &
+                 tau, temp, test1, test2, test3, theta,     &
+                 t1, t2, t3, xnorm1, z, zbar
+
+    ! Local constants
+    character(len=*), parameter :: enter = ' Enter LSQR.  '
+    character(len=*), parameter :: exitt = ' Exit  LSQR.  '
+    character(len=*), parameter :: msg(0:5) =                     &
+      (/ 'The exact solution is  x = 0                         ', &
+         'A solution to Ax = b was found, given atol, btol     ', &
+         'A least-squares solution was found, given atol       ', &
+         'A damped least-squares solution was found, given atol', &
+         'Cond(Abar) seems to be too large, given conlim       ', &
+         'The iteration limit was reached                      ' /)
+    !-------------------------------------------------------------------
+
+    ! Initialize.
+
+    if (nout > 0) then
+       OPEN(nout, file='OUTPUT.txt', status='unknown') ! ADDED BY JOHNO
+       write(nout, 1000) enter,m,n,damp,wantse,atol,conlim,btol,itnlim
+    end if
+
+    damped =   damp > zero
+    itn    =   0
+    istop  =   0
+    nstop  =   0
+    maxdx  =   0
+    ctol   =   zero
+    if (conlim > zero) ctol = one / conlim
+    Anorm  =   zero
+    Acond  =   zero
+    dnorm  =   zero
+    dxmax  =   zero
+    res2   =   zero
+    psi    =   zero
+    xnorm  =   zero
+    xnorm1 =   zero
+    cs2    = - one
+    sn2    =   zero
+    z      =   zero
+
+    !-------------------------------------------------------------------
+    ! Set up the first vectors u and v for the bidiagonalization.
+    ! These satisfy  beta*u = b,  alpha*v = A(transpose)*u.
+    !-------------------------------------------------------------------
+    
+    u(1:m) = b(1:m)
+	allocate(v(n),w(n))
+    v(1:n) = zzero
+    !x(1:n) = zzero
+
+
+    if (wantse) then
+       se(1:n) = zero
+    end if
+
+    alpha  = zero
+    beta   = dznrm2_zlsqr (m, u, 1)
+
+    if (beta > zero) then
+       call zscal_zlsqr (m, cmplx((one/beta), 0, dp), u, 1)
+       call Aprod2(m, n, v, u, indw)          ! v = A'*u
+       alpha = dznrm2_zlsqr (n, v, 1)
+    end if
+
+    if (alpha > zero) then
+       call zscal_zlsqr (n, cmplx((one/alpha), 0, dp), v, 1)
+       w = v
+    end if
+
+    Arnorm = alpha*beta
+    if (Arnorm == zero) go to 800
+
+    rhobar = alpha
+    phibar = beta
+    bnorm  = beta
+    rnorm  = beta
+
+    if (nout > 0) then
+       if (damped) then
+          write(nout,1300)
+       else
+          write(nout,1200)
+       end if
+       test1 = one
+       test2 = alpha/beta
+       write(nout, 1500) itn,abs(x(1)),rnorm,test1,test2
+    end if
+
+    !===================================================================
+    ! Main iteration loop.
+    !===================================================================
+    do
+       itn = itn + 1
+	  
+       !----------------------------------------------------------------
+       ! Perform the next step of the bidiagonalization to obtain the
+       ! next beta, u, alpha, v.  These satisfy
+       !     beta*u = A*v  - alpha*u,
+       !    alpha*v = A'*u -  beta*v.
+       !----------------------------------------------------------------
+       call zscal_zlsqr (m,cmplx((- alpha), 0, dp), u, 1)
+       call Aprod1(m, n, v, u, indw)             ! u = A*v
+       beta   = dznrm2_zlsqr (m, u, 1)
+
+       ! Accumulate Anorm = ||Bk|| = norm([alpha beta damp]).
+
+       temp   = d2norm(alpha, beta)
+       temp   = d2norm(temp , damp)
+       Anorm  = d2norm(Anorm, temp)
+
+       if (beta > zero) then
+          call zscal_zlsqr (m, cmplx((one/beta), 0, dp), u, 1)
+          call zscal_zlsqr (n, cmplx((- beta), 0, dp), v, 1)
+          call Aprod2(m, n, v, u, indw)          ! v = A'*u
+          alpha  = dznrm2_zlsqr (n, v, 1)
+          if (alpha > zero) then
+             call zscal_zlsqr (n, cmplx((one/alpha), 0, dp), v, 1)
+          end if
+       end if
+
+       !----------------------------------------------------------------
+       ! Use a plane rotation to eliminate the damping parameter.
+       ! This alters the diagonal (rhobar) of the lower-bidiagonal matrix.
+       !----------------------------------------------------------------
+       rhbar1 = rhobar
+       if (damped) then
+          rhbar1 = d2norm(rhobar, damp)
+          cs1    = rhobar/rhbar1
+          sn1    = damp  /rhbar1
+          psi    = sn1 * phibar
+          phibar = cs1 * phibar
+       end if
+
+       !----------------------------------------------------------------
+       ! Use a plane rotation to eliminate the subdiagonal element (beta)
+       ! of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix.
+       !----------------------------------------------------------------
+       rho    =   d2norm(rhbar1, beta)
+       cs     =   rhbar1/rho
+       sn     =   beta  /rho
+       theta  =   sn*alpha
+       rhobar = - cs*alpha
+       phi    =   cs*phibar
+       phibar =   sn*phibar
+       tau    =   sn*phi
+
+       !----------------------------------------------------------------
+       ! Update  x, w  and (perhaps) the standard error estimates.
+       ! ---------------------------------------------------------------
+       t1     =     phi/rho
+       t2     = - theta/rho
+       t3     =     one/rho
+       dknorm =    zero
+
+       if (wantse) then
+          do i=1,n
+             t      = w(i)
+             x(i)   = t1*t +  x(i)
+             w(i)   = t2*t +  v(i)
+             t      = abs(t3*t)**2
+             se(i)  = t + se(i)
+             dknorm = t + dknorm !bug?
+          end do
+       else
+          do i=1,n
+             t      = w(i)
+             x(i)   = t1*t + x(i)
+             w(i)   = t2*t + v(i)
+             dknorm = abs(t3*t)**2 + dknorm
+          end do
+       end if
+
+       !----------------------------------------------------------------
+       ! Monitor the norm of d_k, the update to x.
+       ! dknorm = norm( d_k )
+       ! dnorm  = norm( D_k ),       where   D_k = (d_1, d_2, ..., d_k )
+       ! dxk    = norm( phi_k d_k ), where new x = x_k + phi_k d_k.
+       !----------------------------------------------------------------
+       dknorm = sqrt(dknorm)
+       dnorm  = d2norm(dnorm, dknorm)
+       dxk    = abs(phi*dknorm)
+       if (dxmax < dxk) then
+          dxmax  = dxk
+          maxdx  = itn
+       end if
+
+       !----------------------------------------------------------------
+       ! Use a plane rotation on the right to eliminate the
+       ! super-diagonal element (theta) of the upper-bidiagonal matrix.
+       ! Then use the result to estimate  norm(x).
+       !----------------------------------------------------------------
+       delta  =   sn2*rho
+       gambar = - cs2*rho
+       rhs    =   phi - delta*z
+       zbar   =   rhs   /gambar
+       xnorm  =   d2norm(xnorm1, zbar)
+       gamma  =   d2norm(gambar, theta)
+       cs2    =   gambar/gamma
+       sn2    =   theta /gamma
+       z      =   rhs   /gamma
+       xnorm1 =   d2norm(xnorm1, z)
+
+       !----------------------------------------------------------------
+       ! Test for convergence.
+       ! First, estimate the norm and condition of the matrix  Abar,
+       ! and the norms of  rbar  and  Abar(transpose)*rbar.
+       !----------------------------------------------------------------
+       Acond  = Anorm*dnorm
+       res2   = d2norm(res2, psi)
+       rnorm  = d2norm(res2, phibar)
+       rnorm  = rnorm + 1e-30         ! Prevent rnorm == 0.0
+       Arnorm = alpha*abs( tau )
+
+       ! Now use these norms to estimate certain other quantities,
+       ! some of which will be small near a solution.
+
+       alfopt = sqrt( rnorm/(dnorm*xnorm) )
+       test1  = rnorm/bnorm
+       test2  = zero
+       test2  = Arnorm/(Anorm*rnorm)
+       test3  = one   /Acond
+       t1     = test1 /(one + Anorm*xnorm/bnorm)
+       rtol   = btol  +  atol*Anorm*xnorm/bnorm
+
+       ! The following tests guard against extremely small values of
+       ! atol, btol  or  ctol.  (The user may have set any or all of
+       ! the parameters  atol, btol, conlim  to zero.)
+       ! The effect is equivalent to the normal tests using
+       ! atol = eps,  btol = eps,  conlim = 1/eps.
+
+       t3 = one + test3
+       t2 = one + test2
+       t1 = one + t1
+       if (itn >= itnlim) istop = 5
+       if (t3  <= one   ) istop = 4
+       if (t2  <= one   ) istop = 2
+       if (t1  <= one   ) istop = 1
+
+       ! Allow for tolerances set by the user.
+
+       if (test3 <= ctol) istop = 4
+       if (test2 <= atol) istop = 2
+       if (test1 <= rtol) istop = 1
+
+       !----------------------------------------------------------------
+       ! See if it is time to print something.
+       !----------------------------------------------------------------
+       prnt = .false.
+       if (nout > 0) then
+          if (n     <=        40) prnt = .true.
+          if (itn   <=        10) prnt = .true.
+          if (itn   >= itnlim-10) prnt = .true.
+          if (mod(itn,10)  ==  0) prnt = .true.
+          if (test3 <=  2.0*ctol) prnt = .true.
+          if (test2 <= 10.0*atol) prnt = .true.
+          if (test1 <= 10.0*rtol) prnt = .true.
+          if (istop /=         0) prnt = .true.
+
+          if (prnt) then   ! Print a line for this iteration.
+             write(nout,1500) itn,abs(x(1)),rnorm,test1,test2,Anorm,Acond,alfopt
+          end if
+       end if
+
+       !----------------------------------------------------------------
+       ! Stop if appropriate.
+       ! The convergence criteria are required to be met on  nconv
+       ! consecutive iterations, where  nconv  is set below.
+       ! Suggested value:  nconv = 1, 2  or  3.
+       !----------------------------------------------------------------
+       if (istop == 0) then
+          nstop = 0
+       else
+          nconv = 1
+          nstop = nstop + 1
+          if (nstop < nconv  .and.  itn < itnlim) istop = 0
+       end if
+       if (istop /= 0) exit
+
+    end do
+    !===================================================================
+    ! End of iteration loop.
+    !===================================================================
+	
+	deallocate(v,w)
+	
+    if (wantse) then                       ! Finish off the
+       t = one                             ! standard error estimates.
+       if (m > n)  t = m-n
+       if (damped) t = m
+       t = rnorm/sqrt(t)
+       do i=1,n
+          se(i) = t*sqrt(se(i))
+       end do
+    end if
+
+    ! Come here if Arnorm = 0, or if normal exit.
+
+800 if (damped .and. istop==2) istop=3     ! Decide if istop = 2 or 3.
+    if (nout > 0) then                     ! Print the stopping condition.
+       write(nout, 2000)                &
+            exitt,istop,itn,            &
+            exitt,Anorm,Acond,          &
+            exitt,bnorm, xnorm,         &
+            exitt,rnorm,Arnorm
+       write(nout, 2100)                &
+            exitt,dxmax, maxdx,         &
+            exitt,dxmax/(xnorm+1.0e-30)
+       write(nout, 3000)                &
+            exitt, msg(istop)
+    end if
+
+    return
+
+ 1000 format(// 1p, a, '     Least-squares solution of  Ax = b'   &
+      / ' The matrix  A  has', i7, ' rows   and', i7, ' columns'  &
+      / ' damp   =', e22.14, 3x,        'wantse =', l10           &
+      / ' atol   =', e10.2, 15x,        'conlim =', e10.2         &
+      / ' btol   =', e10.2, 15x,        'itnlim =', i10)
+ 1200 format(/ '   Itn  abs(x(1))           Function',            &
+      '     Compatible   LS        Norm A    Cond A')
+ 1300 format(/ '   Itn  abs(x(1))           Function',            &
+      '     Compatible   LS     Norm Abar Cond Abar alfa_opt')
+ 1500 format(1p, i6, 2e17.9, 4e10.2, e9.1)
+ 2000 format(/ 1p, a, 5x, 'istop  =', i2,   15x, 'itn    =', i8   &
+      /     a, 5x, 'Anorm  =', e12.5, 5x, 'Acond  =', e12.5       &
+      /     a, 5x, 'bnorm  =', e12.5, 5x, 'xnorm  =', e12.5       &
+      /     a, 5x, 'rnorm  =', e12.5, 5x, 'Arnorm =', e12.5)
+ 2100 format(1p, a, 5x, 'max dx =', e8.1 , ' occurred at itn ', i8 &
+      /     a, 5x, '       =', e8.1 , '*xnorm')
+ 3000 format(a, 5x, a)
+
+  end subroutine zLSQR
+
+  !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+  function d2norm( a, b )
+
+    real(dp)             :: d2norm
+    real(dp), intent(in) :: a, b
+
+    !-------------------------------------------------------------------
+    ! d2norm returns sqrt( a**2 + b**2 )
+    ! with precautions to avoid overflow.
+    !
+    ! 21 Mar 1990: First version.
+    ! 17 Sep 2007: Fortran 90 version.
+    ! 24 Oct 2007: User real(dp) instead of compiler option -r8.
+    !-------------------------------------------------------------------
+
+    intrinsic            :: abs, sqrt
+    real(dp)             :: scale
+    !real(dp), parameter  :: zero = 0.0_dp
+
+    scale = abs(a) + abs(b)
+    if (scale == zero) then
+       d2norm = zero
+    else
+       d2norm = scale*sqrt((a/scale)**2 + (b/scale)**2)
+    end if
+
+  end function d2norm
+
+    !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+!   function zd2norm( a, b )
+
+!     real(dp)             :: zd2norm
+!     complex(dp), intent(in) :: a, b
+
+!     !-------------------------------------------------------------------
+!     ! complex version of d2norm
+!     !-------------------------------------------------------------------
+
+!     intrinsic            :: abs, sqrt
+!     real(dp)             :: scale
+!     !real(dp), parameter  :: zero = 0.0_dp
+
+!     scale = abs(a) + abs(b)
+!     if (scale == zero) then
+!        zd2norm = zero
+!     else
+!        zd2norm = scale*sqrt(abs(a/scale)**2 + abs(b/scale)**2)
+!     end if
+
+!   end function zd2norm
+
+end module zLSQRmodule
diff --git a/MDD/zlsqrblasInterface.F90 b/MDD/zlsqrblasInterface.F90
new file mode 100644
index 0000000000000000000000000000000000000000..544e014af260699289c7e015c7bd605e7a34b493
--- /dev/null
+++ b/MDD/zlsqrblasInterface.F90
@@ -0,0 +1,161 @@
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+! File zlsqrblasInterface.f90
+!
+!    BLAS1 Interfaces:   dznrm2    zscal
+!
+! Maintained by Michael Saunders <saunders@stanford.edu>.
+!
+! 29 Jun 2013: zlsqrblasInterface module implemented.
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+module zlsqrblasInterface
+  use iso_c_binding
+  use zlsqrDataModule, only : dp, ip
+  
+  implicit none
+  public   :: dznrm2, zscal, dznrm2_zlsqr, zscal_zlsqr
+
+  interface                              ! Level 1 BLAS
+     function dznrm2 (n,x,incx) bind(c)
+       use zlsqrDataModule, only : dp, ip
+       integer(ip),  intent(in)    :: n,incx
+       complex(dp), intent(in)     :: x(*)
+       real(dp)                    :: dznrm2
+     end function dznrm2
+
+     subroutine zscal (n,za,zx,incx) bind(c)
+       use zlsqrDataModule, only : dp, ip
+       integer(ip),  intent(in)       :: n,incx
+       complex(dp), intent(in)        :: za
+       complex(dp), intent(inout)     :: zx(*)
+     end subroutine zscal
+  end interface
+  
+
+contains
+
+  function dznrm2_zlsqr ( n, x, incx ) bind(c,name="dznrm2_zlsqr_")
+  !*****************************************************************************
+  !
+  ! DZNRM2 returns the euclidean norm of a complex(8) vector.
+  !
+  !
+  !  Discussion:
+  !
+  !    DZNRM2 := sqrt ( sum ( conjg ( x(1:n) ) * x(1:n) ) )
+  !            = sqrt ( dot_product ( x(1:n), x(1:n) ) )
+  !
+  !  Parameters:
+  !
+  !    Input, integer N, the number of entries in the vector.
+  !
+  !    Input, complex(8) X(*), the vector.
+  !
+  !    Input, integer INCX, the increment between successive entries of X.
+  !
+  !    Output, real(8) DZNRM2, the norm of the vector.
+  !
+  !
+     
+    integer(ip), intent(in)     :: incx
+    integer(ip)                 :: ix
+    integer(ip), intent(in)     :: n
+    real(dp)                 :: norm
+    real(dp)                 :: scale
+    real(dp)                 :: dznrm2_zlsqr
+    real(dp)                 :: ssq
+    real(dp)                 :: temp
+    real(dp), parameter      :: one = 1.0
+    real(dp), parameter      :: zero = 0.0
+    complex(dp), intent(in)  :: x(*)
+  
+  ! 
+    
+    if ( n < 1 .or. incx < 1 ) then
+	
+      norm  = zero
+  
+    else
+  
+      scale = zero
+      ssq = one
+  
+      do ix = 1, 1 + ( n - 1 ) * incx, incx
+        if ( real(x(ix), 8) /= zero ) then
+          temp = abs ( real(x(ix), 8) )
+          if ( scale < temp ) then
+            ssq = one + ssq * ( scale / temp )**2
+            scale = temp
+          else
+            ssq = ssq + ( temp / scale )**2
+          end if
+        end if
+  
+        if ( aimag ( x(ix) ) /= zero ) then
+          temp = abs ( aimag ( x(ix) ) )
+          if ( scale < temp ) then
+            ssq = one + ssq * ( scale / temp )**2
+            scale = temp
+          else
+            ssq = ssq + ( temp / scale )**2
+          end if
+  
+        end if
+  
+      end do
+  
+      norm  = scale * sqrt ( ssq )
+  
+    end if
+	
+    dznrm2_zlsqr = norm
+	  
+    return
+  end function dznrm2_zlsqr
+
+  subroutine zscal_zlsqr(n, za, zx, incx) bind(c, name="zscal_zlsqr_")
+  !*****************************************************************************
+  !
+  ! zscal returns the euclidean norm of a complex(8) vector.
+  !
+  !
+  !  Discussion:
+  !
+  !    ZSCAL scales a vector by a constant.
+  !
+  !  Parameters:
+  !
+  !    Input, integer n, the number of entries in the vector.
+  
+  !    Input, complex(8) za(*), the scaling parameter.
+  
+  !    Input/Output, complex(8) zx(*), the vector.
+  !
+  !    Input, integer incx, the increment between successive entries of X.
+    use zlsqrDataModule, only : dp, ip
+    
+    implicit none
+    
+    
+    integer(ip), intent(in)        :: incx
+    integer(ip)                    :: i, nincx
+    integer(ip), intent(in)        :: n
+    complex(dp), intent(in)    :: za
+    complex(dp), intent(inout) :: zx(*)
+  
+    if (n .le. 0 .or. incx .le. 0) return
+    if (incx .eq. 1) then
+      do i = 1, n
+        zx(i) = za * zx(i)
+      end do
+    else
+      nincx = n * incx
+      do i = 1, nincx, incx
+        zx(i) = za * zx(i)
+      end do
+    end if
+    return
+  
+  end subroutine zscal_zlsqr
+
+end module zlsqrblasInterface
diff --git a/Makefile b/Makefile
index 86415c08131202fcd87cf41f2ef7f23211bd89d6..047c71741b3de43ffb961b58bb944d12d0e99618 100644
--- a/Makefile
+++ b/Makefile
@@ -1,18 +1,29 @@
 #master makefile for OpenSource 
 
+include Make_include
+
 all: mkdirs 
 	cd FFTlib		; $(MAKE)
-	cd zfp			; $(MAKE) install
 	cd fdelmodc		; $(MAKE) install
-	cd fdelmodc3D	; $(MAKE) install
 	cd utils		; $(MAKE) install
 	cd marchenko	; $(MAKE) install
-	cd marchenko3D	; $(MAKE) install
 	cd corrvir		; $(MAKE) install
 	cd raytime		; $(MAKE) install
-	cd MDD			; $(MAKE) install
+ifneq ($(strip $(MKLROOT)),)
 	cd fdacrtmc		; $(MAKE) install
-
+else
+	@echo "***************************************************************************";
+	@echo "**** There is no MKL or other library for the FFTW calls in use by fdacrtmc";
+endif
+	cd zfp			; $(MAKE) install
+	cd fdelmodc3D	; $(MAKE) install
+	cd marchenko3D	; $(MAKE) install
+ifneq ($(strip $(FC)),)
+	cd MDD			; $(MAKE) install
+else
+	@echo "***************************************************************************";
+	@echo "**** There is no Fortran compiler (FC) defined in Make_include to make MDD";
+endif
 
 mkdirs:
 	-mkdir -p lib
@@ -21,30 +32,29 @@ mkdirs:
 
 clean:
 	cd FFTlib 		; $(MAKE) $@
-	cd zfp			; $(MAKE) $@
 	cd fdelmodc		; $(MAKE) $@
-	cd fdelmodc3D	; $(MAKE) $@
 	cd utils		; $(MAKE) $@
 	cd marchenko	; $(MAKE) $@
-	cd marchenko3D	; $(MAKE) $@
 	cd corrvir		; $(MAKE) $@
 	cd raytime		; $(MAKE) $@
-	cd MDD			; $(MAKE) $@
 	cd fdacrtmc		; $(MAKE) $@
+	cd zfp			; $(MAKE) $@
+	cd fdelmodc3D	; $(MAKE) $@
+	cd marchenko3D	; $(MAKE) $@
+	cd MDD			; $(MAKE) $@
 
 realclean:
 	cd FFTlib 		; $(MAKE) $@
-	cd zfp			; $(MAKE) $@
 	cd fdelmodc		; $(MAKE) $@
-	cd fdelmodc3D	; $(MAKE) $@
-	cd fdelrtmc		; $(MAKE) $@
 	cd utils		; $(MAKE) $@
 	cd marchenko	; $(MAKE) $@
-	cd marchenko3D	; $(MAKE) $@
 	cd corrvir		; $(MAKE) $@
 	cd raytime		; $(MAKE) $@
-	cd MDD			; $(MAKE) $@
 	cd fdacrtmc		; $(MAKE) $@
+	cd zfp			; $(MAKE) $@
+	cd fdelmodc3D	; $(MAKE) $@
+	cd marchenko3D	; $(MAKE) $@
+	cd MDD			; $(MAKE) $@
 	rm -f lib/*
 	rm -f include/*
 	rm -f bin/*
diff --git a/fdelmodc/getModelInfo.c b/fdelmodc/getModelInfo.c
index 378a1b50ebac46e5b7b8a8bef4b5365ac15bef9d..b175fea56ad33657fbabc425a9d48d928c1ed603 100644
--- a/fdelmodc/getModelInfo.c
+++ b/fdelmodc/getModelInfo.c
@@ -46,7 +46,9 @@ int getModelInfo(char *file_name, int *n1, int *n2, float *d1, float *d2, float
     *f2 = hdr.f2;
 
     if ( NINT(100.0*((*d1)/(*d2)))!=100 ) {
-        verr("dx and dz are different in the model !");
+        vwarn("dx and dz are different in the model !");
+        vwarn("setting dx=%e to dz=%e ", *d2, *d1);
+        *d2 = *d1;
     }
     if ( NINT(1000.0*(*d1))==0 ) {
         if(!getparfloat("dx",d1)) {
diff --git a/marchenko/demo/oneD/README_PRIMARIES b/marchenko/demo/mme/README_PRIMARIES
similarity index 100%
rename from marchenko/demo/oneD/README_PRIMARIES
rename to marchenko/demo/mme/README_PRIMARIES
diff --git a/marchenko/demo/mme/epsModel.scr b/marchenko/demo/mme/epsModel.scr
new file mode 100755
index 0000000000000000000000000000000000000000..2ee2e5e8ff541710206b06d67456738977d55cd7
--- /dev/null
+++ b/marchenko/demo/mme/epsModel.scr
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+export PATH=$HOME/src/OpenSource/bin/:$PATH:
+
+# Add interface line to postscript file of model 
+cat << EOF1 > line1
+400 -2500
+400 2500
+EOF1
+
+cat << EOF2 > line2
+700 -2500
+700 2500
+EOF2
+
+cat << EOF3 > line3
+1100 -2500
+1100 2500
+EOF3
+
+#model
+supsimage hbox=4 wbox=6 labelsize=12 < model10_cp.su \
+        x1beg=0 x1end=1400.0 d1num=200 lstyle=vertright legend=1 threecolor=0 \
+    	label1="depth (m)" label2="lateral distance (m)" \
+        curve=line1,line2,line3 npair=2,2,2 curvecolor=black,black,black \
+        n1tic=5 x2beg=-2250 f2num=-2000 d2num=1000 x2end=2250 > model_cp_line.eps
+
+supsimage hbox=4 wbox=6 labelsize=12 < model10_ro.su \
+        x1beg=0 x1end=1400.0 d1num=200 lstyle=vertright legend=1 threecolor=0 \
+    	label1="depth (m)" label2="lateral distance (m)" \
+        curve=line1,line2,line3 npair=2,2,2 curvecolor=black,black,black \
+        n1tic=5 x2beg=-2250 f2num=-2000 d2num=1000 x2end=2250 > model_ro_line.eps
+
+#wavelet
+dt=0.0005
+supsgraph < wavefw.su \
+    labelsize=12 d1=$dt style=normal \
+    label1="time (s)" label2="amplitude" \
+    d1num=0.15 wbox=6 hbox=3 x1end=0.9 > wavefw.eps
+ 
+sufft < wavefw.su | suamp | sugain scale=$dt | supsgraph \
+    labelsize=12 style=normal \
+    label1="frequency (1/s)" label2="amplitude" \
+    d1num=10 wbox=6 hbox=3 x1end=125 x2end=1.1 > wavefw_freq.eps
+ 
+#shot record
+file=shot5_rp.su
+sumax < $file mode=abs outpar=nep
+clipref=`cat nep | awk '{print $1/15}'`
+suwind key=gx min=-2250000 max=2250000 < $file | \
+        supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+    	label1="time sample number" label2="lateral distance (m)" \
+        n1tic=2 d2=5 f1=0.0 x1beg=0 x1end=400 d1=1 f1=0 d1num=100 \
+        f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > shotx0_rp.eps
+
+rm nep 
diff --git a/marchenko/demo/mme/epsPrimaries.scr b/marchenko/demo/mme/epsPrimaries.scr
new file mode 100755
index 0000000000000000000000000000000000000000..cd2f11d144c9e1c8dc89ce9a398b751ed29ee9af
--- /dev/null
+++ b/marchenko/demo/mme/epsPrimaries.scr
@@ -0,0 +1,305 @@
+#!/bin/bash
+
+export PATH=$HOME/src/OpenSource/bin/:$PATH:
+
+echo "Making eps files for $1"
+
+if [[ "$1" == "Figure2" ]];
+then
+./epsModel.scr
+exit 0
+fi
+
+istart=276
+#set same clip factor for iteration updates
+file=Mi_${istart}002.su
+sumax < $file mode=abs outpar=nep
+clipiter=`cat nep | awk '{print $1/15}'`
+ns=`surange < Mi_276014.su | grep ns | awk '{print $2}'`
+nsd=400 #number of samples to display
+(( nstart = ns - nsd ))
+
+file=k1min_${istart}002.su
+sumax < $file mode=abs outpar=nep
+clipf1=`cat nep | awk '{print $1/11}'`
+clipm1=`cat nep | awk '{print $1/22}'`
+
+#M0
+file=M0_276000.su
+file_base=${file%.su}
+sumax < $file mode=abs outpar=nep
+clipref=`cat nep | awk '{print $1/15}'`
+
+echo "clipiter="$clipiter "clipref="$clipref "clipf1="$clipf1 "clipm1="$clipm1
+#clipiter=$clipref
+#clipf1=$clipref
+
+# Initialisation and First iteration
+if [[ "$1" == "Figure3" ]];
+then
+file=M0_276000.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=$nstart x1end=$ns d1=1 f1=0 f1num=700 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
+
+suflip < $file flip=3 | \
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 f1=0 f1num=0 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > ${file_base}_flip.eps
+
+#convolve M0 with middle shot record of R
+select=451
+#original shot record from Reflection matrix
+suwind key=fldr min=$select max=$select < shotsdx5_rp.su > shotsx0.su
+#first iteration
+suwind itmin=516 itmax=1539 < M0_276000.su > N0.su
+#compute R*N0
+fconv file_in1=shotsx0.su file_in2=N0.su file_out=fconvN0.su verbose=1 fmax=90
+
+file=fconvN0.su
+sumax < $file mode=abs outpar=nep
+clipiter=`cat nep | awk '{print $1/15}'`
+
+file=fconvN0.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 d1=1 f1=0 f1num=0 d1num=200 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > ${file_base}fulltime.eps
+
+file=fconvN0.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=624 x1end=1024 d1=1 f1=0 f1num=700 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > ${file_base}zoom.eps
+
+suflip < fconvN0.su flip=3 | sugain scale=1 > fconvN0flip.su
+file=fconvN0flip.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 f1=0 f1num=0 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > $file_base.eps
+
+iter=1
+piter=$(printf %03d $iter)
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
+
+fi
+
+
+# second iteration
+if [[ "$1" == "Figure4" ]];
+then
+suwind itmax=1023 <  Mi_276001.su > N0.su
+#compute R*N0
+fconv file_in1=shotsx0.su file_in2=N0.su file_out=fconvN1.su verbose=1 fmax=90
+
+file=fconvN1.su
+sumax < $file mode=abs outpar=nep
+clipiter=`cat nep | awk '{print $1/15}'`
+suflip < fconvN1.su flip=3 | sugain scale=1 > fconvN1flip.su
+
+file=fconvN1flip.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=624 x1end=1024 d1=1 f1=0 f1num=600 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > $file_base.eps
+
+file=fconvN1.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 d1=1 f1=0 f1num=0 d1num=200 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipiter > ${file_base}fulltime.eps
+
+iter=2
+piter=$(printf %03d $iter)
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+suwind itmin=516 itmax=1539 < $file > N2.su
+supsimage < N2.su hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=624 x1end=1024 d1=1 f1=0 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
+
+fi
+
+if [[ "$1" == "Figure6" ]];
+then
+#convergence plot
+rm vplus.su
+istart=200
+for (( iter=1; iter<=29; iter+=2 ))
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=v1plus_${istart}${piter}.su
+file_base=${file%.su}
+suwind < $file key=offset min=0 max=0 >> vplus.su
+done
+
+basop file_in=vplus.su file_out=vplus_env.su choice=4
+sumax < vplus_env.su verbose=1 mode=max 
+supsmax < vplus_env.su n=15 verbose=1 mode=max \
+	style=normal linewidth=2.0 f1=1 labelsize=14 label1="iteration number" label2="amplitude" \
+    d1num=2 d2num=0.2 wbox=6 hbox=3 grid1=dot grid2=dot n1tic=2 n2tic=2 x2end=2.9 x2beg=1.8 > v1plus_max.eps
+
+file=v1plus_${istart}001.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > $file_base.eps
+
+file=k1min_${istart}030.su
+file_base=${file%.su}
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 < $file\
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > $file_base.eps
+fi
+
+#Iterations
+if [[ "$1" == "Figure9" ]];
+then
+for iter in 2 4 12 20
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+#possibly add suflip flip=3 to flip the time-axis
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=$nstart x1end=$ns d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
+done
+fi
+
+if [[ "$1" == "Figure9" ]];
+then
+for iter in 1 3 11 19
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > $file_base.eps
+done
+fi
+
+if [[ "$1" == "Figure10" ]];
+then
+#iterations
+for iter in 2 4 10 20
+do
+piter=$(printf %03d $iter)
+echo $piter
+
+file=Mi_${istart}${piter}.su
+#ns=`surange < iter_001.su | grep ns | awk '{print $2}'`
+#dtrcv=`surange < iter_001.su | grep dt | awk '{print $2/1000000.0}'`
+#shift=$(echo "scale=4; ($dtrcv*($ns/2.0-1))" | bc -l)
+#basop choice=shift shift=$shift file_in=$file | \
+file_base=${file%.su}
+suflip flip=3 < $file | supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > ${file_base}flip.eps
+
+file=k1min_${istart}${piter}.su
+file_base=${file%.su}
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 < $file\
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > $file_base.eps
+
+done
+fi
+
+if [[ "$1" == "Figure11" ]];
+then
+iter=32
+piter=$(printf %03d $iter)
+echo $piter
+for (( istart=246; istart<=316; istart+=10 ))
+do
+file=k1min_${istart}${piter}.su
+file_base=${file%.su}
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 < $file\
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > $file_base.eps
+done
+fi
+
+if [[ "$1" == "Figure13" ]];
+then
+istart=276
+for iter in 2 4 
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+#possibly add suflip flip=3 to flip the time-axis
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=$nstart x1end=$ns d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > ${file_base}T.eps
+done
+for iter in 1 3 
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=Mi_${istart}${piter}.su
+file_base=${file%.su}
+supsimage < $file hbox=6 wbox=4 labelsize=14 linewidth=0.0 \
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipref > ${file_base}T.eps
+done
+for iter in 2 4 10 20
+do
+piter=$(printf %03d $iter)
+echo $piter
+file=k1min_${istart}${piter}.su
+file_base=${file%.su}
+supsimage hbox=6 wbox=4 labelsize=14 linewidth=0.0 < $file\
+	n1tic=2 d2=5 x1beg=0 x1end=$nsd d1=1 d1num=100 \
+	label1="time sample number" label2="lateral distance" \
+    f2=-2250 f2num=-2000 d2num=1000 clip=$clipf1 > ${file_base}T.eps
+done
+
+fi
+
+exit;
+
+#Windows for odd and even iterations
+file=WindowOdd276.su
+file_base=${file%.su}
+suwind key=tracl min=1 max=1 < $file | suwind itmin=1024 | \
+supsgraph d1=1 style=normal f1=0 \
+    labelsize=12 label1="time sample number" label2="amplitude" \
+    d1num=100 wbox=6 hbox=3 x2end=1.2 > $file_base.eps
+file=WindowEven276.su
+file_base=${file%.su}
+suwind key=tracl min=1 max=1 < $file | suwind itmax=1024 | \
+supsgraph d1=1 style=normal f1=0 \
+    labelsize=12 label1="time sample number" label2="amplitude" \
+    d1num=100 wbox=6 hbox=3 x2end=1.2 > $file_base.eps
+
diff --git a/marchenko/demo/mme/iterations.scr b/marchenko/demo/mme/iterations.scr
new file mode 100755
index 0000000000000000000000000000000000000000..671376345f4a18dd8218288e2170095d6695ae54
--- /dev/null
+++ b/marchenko/demo/mme/iterations.scr
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+#second reflector at time:
+# 800/1800+600/2300
+# .70531400966183574878 => sample 176
+#third reflector at time model.scr:
+# 800/1800+600/2300+800/2000
+# 1.10531400966183574878 sample 276
+#third reflector at time modepm.scr
+#800/1800+600/3200+520/2000
+#.96531400966183574878 sample 241
+
+
+R=shotsdx5_rp.su
+makewave fp=20 dt=0.004 file_out=wave.su nt=2048 t0=0.0 scale=0 scfft=1
+select=451
+
+if [[ "$1" == "Figure34910" ]];
+then
+for istart in 276
+do
+(( iend = istart + 1 ))
+
+../../marchenko_primaries file_shot=$R ishot=$select file_src=wave.su \
+	nshots=901 verbose=2 istart=$istart iend=$iend fmax=90 \
+	pad=512 niter=33 smooth=8 niterskip=600 shift=16 file_rr=pred_rr_${istart}.su T=0 file_iter=iter.su 
+done
+fi
+
+if [[ "$1" == "Figure6" ]];
+then
+istart=200
+(( iend = istart + 1 ))
+../../marchenko_primaries file_shot=$R ishot=$select file_src=wave.su \
+	nshots=901 verbose=2 istart=$istart iend=$iend fmax=90 \
+	pad=512 niter=30 smooth=8 niterskip=600 shift=16 file_rr=pred_rr_${istart}.su T=0 file_iter=iter.su 
+fi
+
+if [[ "$1" == "Figure11" ]];
+then
+for istart in 246 256 266 276 286 296 306 316
+do
+(( iend = istart + 1 ))
+
+../../marchenko_primaries file_shot=$R ishot=$select file_src=wave.su \
+	nshots=901 verbose=2 istart=$istart iend=$iend fmax=90 \
+	pad=512 niter=33 smooth=8 niterskip=600 shift=16 file_rr=pred_rr_${istart}.su T=0 file_iter=iter.su 
+done
+fi
+
+if [[ "$1" == "Figure13" ]];
+then
+istart=276
+(( iend = istart + 1 ))
+../../marchenko_primaries file_shot=$R ishot=$select file_src=wave.su \
+	nshots=901 verbose=2 istart=$istart iend=$iend fmax=90 \
+	pad=512 niter=33 smooth=8 niterskip=600 shift=16 file_rr=pred_rr_${istart}.su T=1 file_iter=iterT.su 
+fi
+
diff --git a/marchenko/demo/mme/model.scr b/marchenko/demo/mme/model.scr
new file mode 100755
index 0000000000000000000000000000000000000000..b97d41800605d7af489dac030e90265174f5b17b
--- /dev/null
+++ b/marchenko/demo/mme/model.scr
@@ -0,0 +1,106 @@
+#!/bin/bash
+#SBATCH -J model_1.5D
+#SBATCH --cpus-per-task=8
+#SBATCH --ntasks=1
+#SBATCH --time=0:15:00
+
+
+#adjust this PATH to where the code is installed
+export PATH=$HOME/src/OpenSource/bin:$PATH:
+
+dx=2.5
+dt=0.0005
+
+#define gridded model for FD computations
+makemod sizex=10000 sizez=1400 dx=$dx dz=$dx cp0=1800 ro0=1000 \
+        orig=-5000,0 file_base=model10.su verbose=2 \
+        intt=def x=-5000,5000 z=400,400 poly=0 cp=2300 ro=3000 \
+        intt=def x=-5000,5000 z=700,700 poly=0 cp=2000 ro=1100 \
+        intt=def x=-5000,5000 z=1100,1100 poly=0 cp=2500 ro=4000
+
+#define wavelet for modeling R
+makewave w=fw fmin=0 flef=5 frig=80 fmax=100  dt=$dt file_out=wavefw.su nt=4096 t0=0.3 scale=0 scfft=1
+
+#define wavelet for reference and intial focusing field.
+makewave fp=25 dt=$dt file_out=wave.su nt=4096 t0=0.1 scale=1
+
+export OMP_NUM_THREADS=8
+
+#Model shot record in middle of model
+fdelmodc \
+    file_cp=model10_cp.su ischeme=1 iorder=4 \
+    file_den=model10_ro.su \
+    file_src=wavefw.su \
+    file_rcv=shot5_fd.su \
+    src_type=7 \
+    src_orient=1 \
+    src_injectionrate=1 \
+    rec_type_vz=0 \
+    rec_type_p=1 \
+    rec_int_vz=2 \
+    dtrcv=0.004 \
+    rec_delay=0.3 \
+    verbose=2 \
+    tmod=4.392 \
+    dxrcv=5.0 \
+    xrcv1=-4500 xrcv2=4500 \
+    zrcv1=0 zrcv2=0 \
+    xsrc=0 zsrc=0 \
+    npml=101 \
+    left=2 right=2 top=2 bottom=2 
+
+#define homogenoeus model to compute direct wave only
+makemod sizex=10000 sizez=1200 dx=$dx dz=$dx cp0=1800 ro0=1000 \
+        orig=-5000,0 file_base=hom.su verbose=2
+
+#Model direct wave only in middle of model
+fdelmodc \
+    file_cp=hom_cp.su ischeme=1 iorder=4 \
+    file_den=hom_ro.su \
+    file_src=wavefw.su \
+    file_rcv=shot5_hom_fd.su \
+    src_type=7 \
+    src_orient=1 \
+    src_injectionrate=1 \
+    rec_type_vz=0 \
+    rec_type_p=1 \
+    rec_int_vz=2 \
+    dtrcv=0.004 \
+    rec_delay=0.3 \
+    verbose=2 \
+    tmod=4.392 \
+    dxrcv=5.0 \
+    xrcv1=-4500 xrcv2=4500 \
+    zrcv1=0 zrcv2=0 \
+    xsrc=0 zsrc=0 \
+    npml=101 \
+    left=2 right=2 top=2 bottom=2 
+
+#subtract direct wave from full model shot record: this defines R
+sudiff shot5_fd_rp.su shot5_hom_fd_rp.su > shot5_rp.su
+
+#########################################################
+# Generate the full R matrix for a fixed spread geometry.
+
+dxshot=5000 # with scalco factor of 1000
+ishot=0
+nshots=901
+
+rm shotsdx5_rp.su 
+
+while (( ishot < nshots ))
+do
+
+	(( xsrc = -2250000 + ${ishot}*${dxshot} ))
+	(( tr1 = $nshots - ${ishot} ))
+	(( tr2 = ${tr1} + $nshots - 1 ))
+	echo xsrc=$xsrc tr1=$tr1 tr2=$tr2
+
+	(( ishot = $ishot + 1))
+
+	suwind < shot5_rp.su key=tracl min=$tr1 max=$tr2 | \
+	sushw key=sx,gx,fldr,trwf \
+	a=$xsrc,-2250000,$ishot,$nshots b=0,$dxshot,0,0 j=0,$nshots,0,0 | \
+	suchw key1=offset key2=gx key3=sx c=-1 d=1000 >> shotsdx5_rp.su
+
+done
diff --git a/marchenko/demo/mme/primaries.scr b/marchenko/demo/mme/primaries.scr
new file mode 100755
index 0000000000000000000000000000000000000000..55ff6255c733218bb040ac5add82e5153c93f7af
--- /dev/null
+++ b/marchenko/demo/mme/primaries.scr
@@ -0,0 +1,40 @@
+#!/bin/bash -x
+#SBATCH -J marchenko_primaries
+#SBATCH --cpus-per-task=20
+#SBATCH --ntasks=1
+##SBATCH --time=1:29:00
+#SBATCH -p max2h
+
+# Generate the model and Reflection data
+#modelpm.scr 
+
+#cd $SLURM_SUBMIT_DIR
+
+export PATH=$HOME/src/OpenSource/bin:$PATH:
+export OMP_NUM_THREADS=20
+
+#shot record to remove internal multiples
+R=shotsdx5_rp.su
+select=451
+
+makewave fp=20 dt=0.004 file_out=wave.su nt=2048 t0=0.0 scale=0 scfft=1
+
+../../marchenko_primaries1 file_shot=$R ishot=$select file_src=wave.su \
+	nshots=901 verbose=2 istart=40 iend=500 fmax=90 pad=1024 \
+	niter=31 shift=20 smooth=10 niterec=2 niterskip=50 file_rr=pred_rr.su T=0 file_update=update.su
+
+exit;
+
+#for reference original shot record from Reflection matrix
+suwind key=fldr min=$select max=$select < $R itmax=2047 > shotsx0.su
+fconv file_in1=shotsx0.su file_in2=wave.su file_out=shotw.su
+
+# for displaying results
+
+(suwind key=offset min=0 max=0 < pred_rr.su ; suwind key=offset min=0 max=0 < shotw.su) | suxgraph &
+
+sudiff shotw.su pred_rr.su > diff.su
+suximage < shotw.su  x1end=2.5 clip=1 title="original shot"&
+suximage < pred_rr.su  x1end=2.5 clip=1 title="shot with multiples removed"&
+suximage < diff.su   x1end=2.5 clip=1 title="removed multiples"&
+
diff --git a/marchenko/demo/mme/primariesPlane.scr b/marchenko/demo/mme/primariesPlane.scr
new file mode 100755
index 0000000000000000000000000000000000000000..94fe87958e2e20a687b2b4c6c6b2af0e50a74f85
--- /dev/null
+++ b/marchenko/demo/mme/primariesPlane.scr
@@ -0,0 +1,18 @@
+#!/bin/bash -x
+#SBATCH -J marchenko_primaries
+#SBATCH --cpus-per-task=10
+#SBATCH --ntasks=1
+#SBATCH --time=0:30:00
+
+
+#cd $SLURM_SUBMIT_DIR
+
+export PATH=$HOME/src/OpenSource/bin:$PATH:
+export OMP_NUM_THREADS=10
+
+makewave fp=20 dt=0.004 file_out=wave.su nt=1024 t0=0.0 scale=0 scfft=1
+
+../../marchenko_primaries1 file_shot=shotsdx5_rp.su plane_wave=1 file_src=wave.su src_angle=10.0 \
+	nshots=901 verbose=4 istart=40 iend=500 fmax=90 pad=1024 \
+	niter=30 smooth=10 niterskip=60 niterec=2 shift=20 file_rr=plane1_10_rr.su T=0
+
diff --git a/utils/Makefile b/utils/Makefile
index 115653916d18f9d1294b731e133ae2f50b80511f..d345730903d07f5fddbdcb8384778160c0251056 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -23,7 +23,7 @@ SRCM	= \
 		plotexample.c \
 		polint.c \
 		roughint.c \
-		sinusint.c \
+		sinuousint.c \
 		spline.c \
 		name_ext.c  \
 		allocs.c  \
diff --git a/utils/makemod.c b/utils/makemod.c
index 574c1355e646a5e06b536f5e07ef3174914ca88b..8d1f5fe21c901319cee73315002c6bb528502600 100644
--- a/utils/makemod.c
+++ b/utils/makemod.c
@@ -32,7 +32,7 @@ void interpolation(float *x, float *z, int nxp, int nx, int poly, int *pminx, in
 
 void linearint(int *zp, int minx, int maxx, float dz, float *interface);
 
-void sinusint(int *zp, int minx, int maxx, float dz, float *interface, float dx, float ampl, float wavel);
+void sinuousint(int *zp, int minx, int maxx, float dz, float *interface, float dx, float ampl, float wavel);
 
 void roughint(int *zp, int minx, int maxx, float dz, float *interface, float ampl, float beta, float seed);
 
@@ -96,9 +96,9 @@ char *sdoc[] = {
   " ",
   "   Options for intt:",
   "         - def       = default interface through the points(Xi, Zi)",
-  "         - sin       = sinus shaped interface",
+  "         - sin       = sinuous shaped interface",
   "         - rough     = rough interface with beta(smoothness)",
-  "         - fract     = cosinus fractal shaped interface",
+  "         - fract     = cosinuous fractal shaped interface",
   "         - random    = define random velocities in layer",
   "         - elipse    = define elipse shaped body",
   "         - diffr     = point diffractions",
@@ -106,7 +106,7 @@ char *sdoc[] = {
   "   Options for var in case of intt =:",
   "         - sin(2)    = wavelength,amplitude",
   "         - rough(3)  = amplitude,beta,seed",
-  "         - fract(6)  = Nsinus,amplitude,dim,k0,freqscale,seed",
+  "         - fract(6)  = Nsin,amplitude,dim,k0,freqscale,seed",
   "         - random(1) = min-max variation around cp",
   "         - elipse(2) = r1, r2: vertical and horizontal radius",
   "         - diffr(1)  = width of each point, type(optional)",
@@ -555,7 +555,7 @@ int main(int argc, char **argv)
       Nv = countnparval(Nvi,"var");
       if (Nv != 2) verr("Sinus interface must have 2 variables.");
       getnparfloat(Nvi,"var", var);
-      sinusint(zp, nminx, nmaxx, dz, interface, dx, var[0], var[1]);
+      sinuousint(zp, nminx, nmaxx, dz, interface, dx, var[0], var[1]);
       if (above == 0) Noi++; else Noi--;
       if (above == 0) Nvi++; else Nvi--;
     }
diff --git a/utils/sinusint.c b/utils/sinuousint.c
similarity index 72%
rename from utils/sinusint.c
rename to utils/sinuousint.c
index 3018caa19a5b89c854887908f184b861d1614e0f..68adaa87c0909bfc71cb0090f318469c343ecac8 100644
--- a/utils/sinusint.c
+++ b/utils/sinuousint.c
@@ -5,7 +5,7 @@
 #define SGN(x) ((x) < 0 ? -1.0 : 1.0)
 
 /**
-* compute sinus shaped interface used in makemod
+* compute sinuous shaped interface used in makemod
 *
 *   AUTHOR:
 *           Jan Thorbecke (janth@xs4all.nl)
@@ -13,7 +13,7 @@
 **/
 
 
-void sinusint(int *zp, int minx, int maxx, float dz, float *interface, float dx, float ampl, float wavel)
+void sinuousint(int *zp, int minx, int maxx, float dz, float *interface, float dx, float ampl, float wavel)
 {
 	int     j, i;