-
JanThorbecke authoredJanThorbecke authored
Marchenko_Iterations.c 16.32 KiB
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <math.h>
#include <string.h>
#ifdef MKL
#include<mkl_cblas.h>
#endif
typedef struct { /* complex number */
float r,i;
} complex;
/*
cblas interface
void cgemm(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
const MKL_Complex8 *alpha, const MKL_Complex8 *a, const MKL_INT *lda,
const MKL_Complex8 *b, const MKL_INT *ldb, const MKL_Complex8 *beta,
MKL_Complex8 *c, const MKL_INT *ldc);
*/
void cgemm_(char *transA, char *transb, int *M, int *N, int *K, float *alpha, float *A, int *lda, float *B, int *ldb, float *beta, float *C, int *ldc);
/*
CGEMM - perform one of the matrix-matrix operations C := alpha*op( A )*op( B ) + beta*C,
Synopsis
SUBROUTINE CGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
CHARACTER*1 TRANSA, TRANSB
INTEGER M, N, K, LDA, LDB, LDC
COMPLEX ALPHA, BETA
COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * )
TRANSA - CHARACTER*1. On entry, TRANSA specifies the form of op( A ) to be used in the matrix multiplication as follows:
TRANSA = 'N' or 'n', op( A ) = A.
TRANSA = 'T' or 't', op( A ) = A'.
TRANSA = 'C' or 'c', op( A ) = conjg( A' ).
Unchanged on exit.
TRANSB - CHARACTER*1. On entry, TRANSB specifies the form of op( B ) to be used in the matrix multiplication as follows:
TRANSB = 'N' or 'n', op( B ) = B.
TRANSB = 'T' or 't', op( B ) = B'.
TRANSB = 'C' or 'c', op( B ) = conjg( B' ).
Unchanged on exit.
M - INTEGER.
On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. Unchanged on exit.
N - INTEGER.
On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. Unchanged on exit.
K - INTEGER.
On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. Unchanged on exit.
ALPHA - COMPLEX .
On entry, ALPHA specifies the scalar alpha. Unchanged on exit.
A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is k when TRANSA = 'N' or 'n', and is m otherwise. Before entry with TRANSA = 'N' or 'n', the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. Unchanged on exit.
LDA - INTEGER.
On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When TRANSA = 'N' or 'n' then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ). Unchanged on exit.
B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is n when TRANSB = 'N' or 'n', and is k otherwise. Before entry with TRANSB = 'N' or 'n', the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. Unchanged on exit.
LDB - INTEGER.
On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. When TRANSB = 'N' or 'n' then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ). Unchanged on exit.
BETA - COMPLEX .
On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. Unchanged on exit.
C - COMPLEX array of DIMENSION ( LDC, n ).
Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
LDC - INTEGER.
On entry, LDC specifies the first dimension of C as declared in the calling (sub) program. LDC must be at least max( 1, m ). Unchanged on exit.
*/
void cgemv_(char *transA, int *M, int *N, float *alpha, float *A, int *lda, float *X, int *incx, float *beta, float *Y, int *incy);
void cr1fft(complex *cdata, float *rdata, int n, int sign);
void rc1fft(float *rdata, complex *cdata, int n, int sign);
int Marchenko_Iterations(float *inif, float *WinA, float *WinB, float *rdatavp, float *rdatavm, float *rdatagm, float *rdatagp, complex *Reflw, complex *cjReflw, float fftscl, int ntfft, int nw, int nw_low, int nblock, size_t nstationA, size_t nstationB, int niter, int squaremat, int verbose)
{
int iA, iB, it, i, j, k, iter, icc, ibb, NA, NB, NC, nshots, incx, incy, pe;
size_t iw, iwnA, iwnB, iwAB, iwBB;
float scale;
char *transa, *transb, *transN;
complex beta, alpha;
complex *ctrace;
float *trace;
complex *cTemp1, *cTemp2;
int jstep;
size_t jstation;
int inc=1;
transN = "N";
alpha.r = 1.0; alpha.i = 0.0;
beta.r = 0.0; beta.i = 0.0;
nshots = nblock;
NA = nstationA;
NB = nstationB;
NC = nshots;
if (squaremat==1) {
cTemp1 = (complex *)malloc(nstationA*nw*nshots*sizeof(complex));
assert(cTemp1 != NULL);
cTemp2 = (complex *)malloc(nstationA*nw*nshots*sizeof(complex));
assert(cTemp2 != NULL);
// for first touch binding of allocated memory
jstep = nshots*nw;
#pragma omp parallel for schedule(static) private(jstation) default(shared)
for (jstation=0; jstation<nstationB; jstation++) {
memset(&cTemp1[jstation*jstep],0,jstep*sizeof(complex));
memset(&cTemp2[jstation*jstep],0,jstep*sizeof(complex));
//memset(&cdataout[is],0,stationSize);
}
} else if (squaremat==0) {
cTemp1 = (complex *)malloc(nstationA*nw*sizeof(complex));
assert(cTemp1 != NULL);
cTemp2 = (complex *)malloc(nstationA*nw*sizeof(complex));
assert(cTemp2 != NULL);
memset(&cTemp1[0],0,nstationA*nw*sizeof(complex));
memset(&cTemp2[0],0,nstationA*nw*sizeof(complex));
}
//NC = nstationB;
// if (verbose) fprintf(stderr,"transa=%s transb=%s %d %d %d\n", transa, transb, NA, NB, nshots);
if (squaremat==1) { // LSQR with square geometry using cgemm (all shots at once)
for (iter=0; iter<=niter; iter++) {
if (iter % 2 == 1) {
if (verbose >= 1) fprintf(stderr," ########## ITERATION %d: update to downgoing focus ##########\n", iter);
#pragma omp parallel default(none) \
private(scale,pe,ctrace,trace) \
shared(cjReflw,WinA,rdatavm,rdatavp,rdatagp,inif) \
shared(nstationA,nstationB,nw,nw_low,ntfft) \
shared(NA,NB,NC,alpha,beta,nshots,transa,transb) \
shared(cTemp1,cTemp2,fftscl,stderr)
{ /* start of OpenMP parallel part */
#ifdef _OPENMP
pe = omp_get_thread_num();
#endif
ctrace = (complex *)malloc(ntfft*sizeof(complex));
trace = (float *)malloc(ntfft*sizeof(float));
assert(trace != NULL);
assert(ctrace != NULL);
memset(trace,0,ntfft*sizeof(float));
memset(ctrace,0,ntfft*sizeof(complex));
//fprintf(stderr," ########## into FFT ##########\n");
#pragma omp for schedule(static) \
private(iB, iA, it, iw)
for (iB=0; iB<nstationB; iB++) {
/* FFT */
for (iA=0; iA<nstationA; iA++) {
for (it=0;it<ntfft;it++) {
trace[it] = rdatavm[iB*nstationA*ntfft+iA*ntfft+it];
}
rc1fft(trace,ctrace,ntfft,-1);
for (iw=0;iw<nw; iw++) {
cTemp1[iw*nstationA*nstationB+iB*nstationB+iA].r = ctrace[nw_low+iw].r;
cTemp1[iw*nstationA*nstationB+iB*nstationB+iA].i = ctrace[nw_low+iw].i;
}
}
}
//fprintf(stderr," ########## FFT converted ##########\n");
#pragma omp for schedule(static) \
private(iw, iwnA, iwnB, iwAB)
for (iw=0; iw<nw; iw++) {
iwnA = iw*nstationA*nshots;
iwnB = iw*nstationB*nshots;
iwAB = iw*NC*NC;
transa = "N";
transb = "N";
cgemm_(transa, transb, &NA, &NB, &nshots, &alpha.r,
&cjReflw[iwnB].r, &NB,
&cTemp1[iwnA].r, &NA, &beta.r,
&cTemp2[iwAB].r, &NC);
}
//fprintf(stderr," ########## MDC Performed ##########\n");
#pragma omp for schedule(static) \
private(iB, iA, it, iw)
for (iB=0; iB<nstationB; iB++) {
/* FFT */
for (iA=0; iA<nstationA; iA++) {
for (iw=0;iw<nw; iw++) {
ctrace[nw_low+iw].r = cTemp2[iw*nstationA*nstationB+iB*nstationB+iA].r;
ctrace[nw_low+iw].i = cTemp2[iw*nstationA*nstationB+iB*nstationB+iA].i;
}
cr1fft(ctrace,trace,ntfft,1);
for (it=0;it<ntfft;it++) {
scale = WinA[iB*nstationA*ntfft+iA*ntfft+it]*fftscl;
rdatavp[iB*nstationA*ntfft+iA*ntfft+it] = trace[it]*scale + inif[iB*nstationA*ntfft+iA*ntfft+it];
rdatagp[iB*nstationA*ntfft+iA*ntfft+it] = -1*(trace[ntfft-1-it]*fftscl-rdatavp[iB*nstationA*ntfft+iA*ntfft+(ntfft-1-it)]);
}
}
}
//fprintf(stderr," ########## FFT converted ##########\n");
free(ctrace);
free(trace);
} // END OpenMP
} // END IF
else {
if (verbose >= 1) fprintf(stderr," ########## ITERATION %d: update to upgoing focus ##########\n", iter);
#pragma omp parallel default(none) \
private(scale,pe,ctrace,trace) \
shared(Reflw,WinB,rdatavm,rdatavp,rdatagm) \
shared(nstationA,nstationB,nw,nw_low,ntfft) \
shared(NA,NB,NC,alpha,beta,nshots,transa,transb) \
shared(cTemp1,cTemp2,fftscl,stderr)
{ /* start of OpenMP parallel part */
#ifdef _OPENMP
pe = omp_get_thread_num();
#endif
ctrace = (complex *)malloc(ntfft*sizeof(complex));
assert(ctrace != NULL);
trace = (float *)malloc(ntfft*sizeof(float));
assert(trace != NULL);
memset(trace,0,ntfft*sizeof(float));
memset(ctrace,0,ntfft*sizeof(complex));
//fprintf(stderr," ########## into FFT ##########\n");
#pragma omp for schedule(static) \
private(iB, iA, it, iw)
for (iB=0; iB<nstationB; iB++) {
/* FFT */
for (iA=0; iA<nstationA; iA++) {
for (it=0;it<ntfft;it++) {
trace[it] = rdatavp[iB*nstationB*ntfft+iA*ntfft+it];
}
rc1fft(trace,ctrace,ntfft,-1);
for (iw=0;iw<nw; iw++) {
cTemp1[iw*nstationA*nstationB+iB*nstationB+iA].r = ctrace[nw_low+iw].r;
cTemp1[iw*nstationA*nstationB+iB*nstationB+iA].i = ctrace[nw_low+iw].i;
}
}
}
//fprintf(stderr," ########## FFT converted ##########\n");
#pragma omp for schedule(static) \
private(iw, iwnA, iwnB, iwAB)
for (iw=0; iw<nw; iw++) {
iwnA = iw*nstationA*nshots;
iwnB = iw*nstationB*nshots;
iwAB = iw*NC*NC;
transa = "N";
transb = "N";
cgemm_(transa, transb, &NA, &NB, &nshots, &alpha.r,
&Reflw[iwnB].r, &NB,
&cTemp1[iwnA].r, &NA, &beta.r,
&cTemp2[iwAB].r, &NC);
}
//fprintf(stderr," ########## Convolution done ##########\n");
#pragma omp for schedule(static) \
private(iB, iA, it, iw)
for (iB=0; iB<nstationB; iB++) {
/* FFT */
for (iA=0; iA<nstationA; iA++) {
for (iw=0;iw<nw; iw++) {
ctrace[nw_low+iw].r = cTemp2[iw*nstationA*nstationB+iB*nstationB+iA].r;
ctrace[nw_low+iw].i = cTemp2[iw*nstationA*nstationB+iB*nstationB+iA].i;
}
cr1fft(ctrace,trace,ntfft,1);
for (it=0;it<ntfft;it++) {
scale = WinB[iB*nstationA*ntfft+iA*ntfft+it]*fftscl;
rdatavm[iB*nstationA*ntfft+iA*ntfft+it] = trace[it]*scale;
rdatagm[iB*nstationA*ntfft+iA*ntfft+it] = trace[it]*fftscl - rdatavm[iB*nstationA*ntfft+iA*ntfft+it];
}
}
}
//fprintf(stderr," ########## FFT converted ##########\n");
free(ctrace);
free(trace);
} // END OpenMP
} // END ELSE
} // END ITER
} // END SQUARE==1
else if (squaremat==0) { // LSQR with non-square geometry using cgemv (single shot)
// ##################################################################################################################################################
// ##################################################################################################################################################
// ##################################################################################################################################################
for (iter=0; iter<=niter; iter++) {
if (iter % 2 == 1) {
if (verbose >= 1) fprintf(stderr," ########## ITERATION %d: update to downgoing focus ##########\n", iter);
#pragma omp parallel default(none) \
private(scale,pe,ctrace,trace) \
shared(cjReflw,WinA,rdatavm,rdatavp,rdatagp,inif) \
shared(nstationA,nstationB,nw,nw_low,ntfft) \
shared(NA,alpha,beta,nshots,transa) \
shared(cTemp1,cTemp2,fftscl,stderr,inc)
{ /* start of OpenMP parallel part */
#ifdef _OPENMP
pe = omp_get_thread_num();
#endif
ctrace = (complex *)malloc(ntfft*sizeof(complex));
trace = (float *)malloc(ntfft*sizeof(float));
assert(trace != NULL);
assert(ctrace != NULL);
memset(trace,0,ntfft*sizeof(float));
memset(ctrace,0,ntfft*sizeof(complex));
//fprintf(stderr," ########## into FFT ##########\n");
#pragma omp for schedule(static) \
private(iB, it, iw)
for (iB=0; iB<nstationB; iB++) {
for (it=0;it<ntfft;it++) {
trace[it] = rdatavm[iB*ntfft+it];
}
rc1fft(trace,ctrace,ntfft,-1);
for (iw=0;iw<nw; iw++) {
cTemp1[iw*nstationB+iB].r = ctrace[nw_low+iw].r;
cTemp1[iw*nstationB+iB].i = ctrace[nw_low+iw].i;
}
}
//fprintf(stderr," ########## FFT converted ##########\n");
#pragma omp for schedule(static) \
private(iw, iwnA, iwnB)
for (iw=0; iw<nw; iw++) {
iwnB = iw*nstationB*nshots;
iwnA = iw*nstationA*1;
transa = "N";
cgemv_(transa, &NA, &nshots, &alpha.r,
&cjReflw[iwnB].r, &NA,
&cTemp1[iwnA].r, &inc, &beta.r,
&cTemp2[iwnA].r, &inc);
}
//fprintf(stderr," ########## MDC Performed ##########\n");
#pragma omp for schedule(static) \
private(iB, it, iw)
for (iB=0; iB<nstationB; iB++) {
for (iw=0;iw<nw; iw++) {
ctrace[nw_low+iw].r = cTemp2[iw*nstationB+iB].r;
ctrace[nw_low+iw].i = cTemp2[iw*nstationB+iB].i;
}
cr1fft(ctrace,trace,ntfft,1);
for (it=0;it<ntfft;it++) {
scale = WinA[iB*ntfft+it]*fftscl;
rdatavp[iB*ntfft+it] = trace[it]*scale + inif[iB*ntfft+it];
rdatagp[iB*ntfft+(ntfft - 1 - it)] = -1*(trace[it]*fftscl - rdatavp[iB*ntfft+it]);//-1*(trace[ntfft-1-it]*fftscl-rdatavp[iB*ntfft+(ntfft-1-it)]);
}
}
//fprintf(stderr," ########## FFT converted ##########\n");
free(ctrace);
free(trace);
} // END OpenMP
} // END IF
else {
if (verbose >= 1) fprintf(stderr," ########## ITERATION %d: update to upgoing focus ##########\n", iter);
#pragma omp parallel default(none) \
private(scale,pe,ctrace,trace) \
shared(Reflw,WinB,rdatavm,rdatavp,rdatagm) \
shared(nstationA,nstationB,nw,nw_low,ntfft) \
shared(NA,NB,alpha,beta,nshots,transa) \
shared(cTemp1,cTemp2,fftscl,stderr,inc)
{ /* start of OpenMP parallel part */
#ifdef _OPENMP
pe = omp_get_thread_num();
#endif
ctrace = (complex *)malloc(ntfft*sizeof(complex));
assert(ctrace != NULL);
trace = (float *)malloc(ntfft*sizeof(float));
assert(trace != NULL);
memset(trace,0,ntfft*sizeof(float));
memset(ctrace,0,ntfft*sizeof(complex));
//fprintf(stderr," ########## into FFT ##########\n");
#pragma omp for schedule(static) \
private(iB, it, iw)
for (iB=0; iB<nstationB; iB++) {
/* FFT */
for (it=0;it<ntfft;it++) {
trace[it] = rdatavp[iB*ntfft+it];
}
rc1fft(trace,ctrace,ntfft,-1);
for (iw=0;iw<nw; iw++) {
cTemp1[iw*nstationB+iB].r = ctrace[nw_low+iw].r;
cTemp1[iw*nstationB+iB].i = ctrace[nw_low+iw].i;
}
}
//fprintf(stderr," ########## FFT converted ##########\n");
#pragma omp for schedule(static) \
private(iw, iwnA, iwnB)
for (iw=0; iw<nw; iw++) {
iwnA = iw*nstationA*1;
iwnB = iw*nstationB*nshots;
transa = "N";
cgemv_(transa, &NA, &nshots, &alpha.r,
&Reflw[iwnB].r, &NB,
&cTemp1[iwnA].r, &inc, &beta.r,
&cTemp2[iwnA].r, &inc);
}
//fprintf(stderr," ########## Convolution done ##########\n");
#pragma omp for schedule(static) \
private(iB, it, iw)
for (iB=0; iB<nstationB; iB++) {
for (iw=0;iw<nw; iw++) {
ctrace[nw_low+iw].r = cTemp2[iw*nstationB+iB].r;
ctrace[nw_low+iw].i = cTemp2[iw*nstationB+iB].i;
}
cr1fft(ctrace,trace,ntfft,1);
for (it=0;it<ntfft;it++) {
scale = WinB[iB*ntfft+it]*fftscl;
rdatavm[iB*ntfft+it] = trace[it]*scale;
rdatagm[iB*ntfft+it] = trace[it]*fftscl - rdatavm[iB*ntfft+it];
}
}
//fprintf(stderr," ########## FFT converted ##########\n");
free(ctrace);
free(trace);
} // END OpenMP
} // END ELSE
} // END ITER
} // END SQUARE==0
free(cTemp1);
free(cTemp2);
return 0;
}