#include <stdio.h> #include <stdlib.h> #include <assert.h> #include <math.h> #include "par.h" #include "segy.h" #define MIN(x,y) ((x) < (y) ? (x) : (y)) #define MAX(x,y) ((x) > (y) ? (x) : (y)) #define NINT(x) ((int)((x)>0.0?(x)+0.5:(x)-0.5)) #ifdef _OPENMP int omp_get_thread_num(void); #endif double wallclock_time(void); void name_ext(char *filename, char *extension); typedef struct { /* complex number */ float r,i; } complex; void cr1fft(complex *cdata, float *rdata, int n, int sign); int optncr(int n); int getFileInfo(char *filename, int *n1, int *n2, int *ngath, float *d1, float *d2, float *f1, float *f2, float *xmin, float *xmax, float *sclsxgx, int *nxm); int readShotData(char *filename, float xmin, float dx, float *xrcv, float *xsrc, int *xnx, complex *cdata, int nw, int nw_low, int ngath, int nx, int nxm, int ntfft, float alpha, float scl, float conjg, int transpose, int verbose); int deconvolve(complex *cA, complex *cB, complex *cC, complex *oBB, int nfreq, int nblock, size_t nstationA, size_t nstationB, float eps_a, float eps_r, float numacc, int eigenvalues, float *eigen, int rthm, int mdd, int conjgA, int conjgB, int verbose); void writeEigen(char *file_out, float df, int nw_low, int nw_high, int nw, float *eigen, int nx, float dx, float xmin); void writeDatamatrix(char *file_out, complex *P, int ntfft, int ntc, int Nrec, int Nshot, int nfreq, int nw_low, float dt, int verbose); void gausstaper(float *taper, float dx, int n, float enddecay); /************** * ntc output samples of deconvolution result * note that nt (the number of samples read by the IO routine) * should be 2*ntc and a number efficient for FFT's */ /*********************** self documentation **********************/ char *sdoc[] = { " ", " mdd - multi-dimensional deconvolution (OpenMP)", " ", " mdd file_A= file_B= file_out= [optional parameters]", " ", " Required parameters: ", " ", " file_A= .................. name of file(s) which store the data in location A", " file_B= .................. name of file(s) which store the data in location B", " ", " Optional parameters: ", " ", " ntc=nt ................... number of output time samples", " ntfft=nt ................. number of samples used in fft", " fmin=0 ................... minimum frequency", " fmax=70 .................. maximum frequency to use in deconvolution", " INPUT DEFINITION ", " cjA=1 .................... -1 => apply complex conjugate to A", " sclA=1 ................... apply scaling factor to A", " tranposeA=0 .............. apply transpose to A", " cjB=1 .................... -1 => apply complex conjugate to B", " sclB=1 ................... apply scaling factor to B", " tranposeB=0 .............. apply transpose to B", " MATRIX INVERSION CALCULATION ", " conjgA=0 ................. apply complex conjugate-transpose to A", " conjgB=1 ................. apply complex conjugate-transpose to B", " rthm=0 ................... see below for options", " eps_a=1e-5 ............... absolute stabilization factor for LS", " eps_r=1e-4 ............... relative stabilization factor for LS", " numacc=1e-6 .............. numerical accurary for SVD", " ntap=0 ................... number of taper points matrix", " ftap=0 ................... percentage for tapering", " tap=0 .................... type of taper: 0=cos 1=exp", " eigenvalues= ............. write SVD eigenvalues to file ", " mdd=1 .................... mdd=0 => computes correlation ", " OUTPUT DEFINITION ", " file_out= ................ output base name ", " causal=1 ................. output causal(1), non-causal(2), both(3), or summed(4)", " one_file=1 ............... write all shots into one file ", " file_dmat= ............... if defined writes matrix in frequency domain", " verbose=0 ................ silent option; >0 displays info", " ", " Notes: ", " ntc output samples of deconvolution result", " nt (the number of samples read by the IO routine)", " ", " Options for mdd= ", " 2 = A/(B + eps) ", " 1 = A*B^H/(B*B^H + eps) ", " 0 = A*B^H ", " ", " Option for rthm= ", " 0 = Least Squares QR based inversion", " 1 = Least Squares LU based inversion", " 2 = SVD inversion single precision", " 3 = SVD divide-and-conquer method", " 4 = SVD inversion double precision", " 5 = Least Squares LU based inversion double precision", " 6 = Eigenvalue based (not yet working)", " ", " author : Jan Thorbecke : 2008 (j.w.thorbecke@tudelft.nl)", " ", NULL}; /**************** end self doc ***********************************/ int main (int argc, char **argv) { FILE *fpin, *fpout; int i, j, k, ret, nshots, ntraces; int size, n1, n2, ntfft, nf, causal; int verbose, fullcorr, ncorstat, err; int nt, nc, ncc, ntc, nshotA, nshotB; size_t nstationA, nstationB, nfreq, istation, jstation, iw; int pgsz, istep,jstep; int mdd; int conjgA, conjgB; int ntap, nxm, ngath, nw, nw_low, nw_high, eigenvalues, rthm, combine, distance; size_t nwrite, cdatainSize, datainSize, cdataoutSize, stationSize, is; float dx, dt, fmin, fmax, df, eps_r, eps_a, ftap, numacc; float *rC, scl, *rl, *eigen; float f1, f2, d1, d2, sclsxgx, xmin, xmax, alpha, wshot, wpi, wrec; float *xrcvA, *xsrcA, *xrcvB, *xsrcB; float *taper; int *xnx; float sclA,sclB, cjA, cjB; int transposeA, transposeB; complex *cdataout; double t0, t1, t2, t3, tinit, twrite, tread, tdec, tfft; char *file_A, *file_B, *file_out, *file_dmat, filename[1024], number[128], *rthmName; int pe=0, root_pe=0, npes=1, ipe, size_s, one_file; complex *cA, *cB, *oBB; segy *hdr; t0 = wallclock_time(); initargs(argc, argv); requestdoc(1); if (!getparint("verbose", &verbose)) verbose = 0; if (!getparstring("file_A", &file_A)) file_A=NULL; assert(file_A != NULL); if (!getparstring("file_B", &file_B)) file_B=NULL; assert(file_B != NULL); if (!getparstring("file_out", &file_out)) file_out=NULL; assert(file_out != NULL); if (!getparstring("file_dmat", &file_dmat)) file_dmat=NULL; if (!getparint("one_file", &one_file)) one_file = 1; if (!getparfloat("fmin", &fmin)) fmin = 0.0; if (!getparint("rthm", &rthm)) rthm = 0; if (!getparint("combine", &combine)) combine = 0; if (!getparint("causal", &causal)) causal = 1; if (!getparint("ntap", &ntap)) ntap = 0; if (!getparfloat("ftap", &ftap)) ftap = 0.; if (!getparfloat("eps_r", &eps_r)) eps_r = 1e-4; if (!getparfloat("eps_a", &eps_a)) eps_a = 1e-5; if (!getparfloat("numacc", &numacc)) numacc = 1e-6; if (!getparint("eigenvalues", &eigenvalues)) eigenvalues = 0; if (!getparint("mdd", &mdd)) mdd = 1; if (!getparint("transposeA", &transposeA)) transposeA = 0; if (!getparfloat("sclA", &sclA)) sclA = 1.; if (!getparfloat("cjA", &cjA)) cjA = 1.; if (!getparint("transposeB", &transposeB)) transposeB = 0; if (!getparfloat("sclB", &sclB)) sclB = 1.; if (!getparfloat("cjB", &cjB)) cjB = 1.; #ifdef _OPENMP npes = atoi(getenv("OMP_NUM_THREADS")); assert(npes != 0); if (verbose) fprintf(stderr,"Number of OpenMP thread's is %d\n", npes); #else npes=1; #endif /* get information from input files */ nshotA = 0; getFileInfo(file_A, &n1, &n2, &nshotA, &d1, &d2, &f1, &f2, &xmin, &xmax, &sclsxgx, &nxm); if (!getparint("nt", &nt)) nt=n1; if (!getparint("ntc", &ntc)) ntc = n1; if (!getparint("conjgA", &conjgA)) conjgA = 0; if (!getparint("conjgB", &conjgB)) conjgB = 1; if (!getparfloat("dt", &dt)) dt = d1; if (!getparfloat("dx", &dx)) dx = d2; if (!getparfloat("fmax", &fmax)) fmax = 1.0/(2.0*dt); nstationA = n2; nshotB = 0; getFileInfo(file_B, &n1, &n2, &nshotB, &d1, &d2, &f1, &f2, &xmin, &xmax, &sclsxgx, &nxm); assert( n1 == nt); nstationB = n2; assert( nshotA == nshotB); /*================ initializations ================*/ tinit = 0.0; tfft = 0.0; tread = 0.0; tdec = 0.0; if (!getparint("ntfft", &ntfft)) ntfft = nt; ntfft = optncr(ntfft); nf = ntfft/2+1; df = 1.0/(ntfft*dt); nw_high = MIN( (int)((fmax)/df), nf ); nw_low = MAX( (int)(fmin/df), 1 ); nw = nw_high - nw_low + 1; nfreq = MIN(nf,nw); /* scaling of the results by Johno van IJsseldijk */ if (mdd == 0) scl = dx*dt/((float)ntfft); //correlation else if (mdd==1) scl = 1/((float)ntfft)/dx/dt; // MDD else if (mdd==2) scl = 1/((float)ntfft)/dx/dt; // MDD with A and B already computed (NOT TESTED) else scl = 1.0/((float)ntfft); // Passing A or B through /* allocate in shared memory the in- and output data */ jstep = nfreq*nshotA; cdatainSize = nfreq*nshotA*sizeof(complex); cdataoutSize = nstationA*nstationB*nfreq*sizeof(complex); cdataout = (complex *)malloc(cdataoutSize); cA = (complex *)malloc(nstationA*cdatainSize); cB = (complex *)malloc(nstationB*cdatainSize); taper = (float *)malloc(2*nstationB*sizeof(float)); if (file_dmat!=NULL) oBB = (complex *)malloc(nstationB*nstationB*nfreq*sizeof(complex)); else oBB = NULL; assert(cdataout != NULL); assert(cA != NULL); assert(cB != NULL); /* for first touch binding of allocated memory */ #pragma omp parallel for schedule(static) private(jstation,is) default(shared) for (jstation=0; jstation<nstationB; jstation++) { stationSize=nstationA*nfreq*sizeof(complex); is = jstation*nstationA*nfreq; memset(&cdataout[is],0,stationSize); memset(&cB[jstation*jstep],0,jstep*sizeof(complex)); } #pragma omp parallel for schedule(static) private(jstation) default(shared) for (jstation=0; jstation<nstationA; jstation++) { memset(&cA[jstation*jstep],0,jstep*sizeof(complex)); } if (verbose) { if (rthm==0) rthmName="Cholesky"; else if (rthm==1) rthmName="LU"; else if (rthm==2) rthmName="SVD single precision"; else if (rthm==3) rthmName="SVD divide-and-conquer"; else if (rthm==4) rthmName="SVD double precision"; else if (rthm==5) rthmName="LU double precision"; else if (rthm==6) rthmName="Eigenvalue double precision"; fprintf(stderr,"--- Input Information ---\n"); fprintf(stderr," dt nt ............ : %f : %d\n", dt, nt); fprintf(stderr," dx ............... : %f\n", dx); fprintf(stderr," nshotA ........... : %d\n", nshotA ); fprintf(stderr," nstationA ........ : %ld\n", nstationA ); fprintf(stderr," nshotB ........... : %d\n", nshotB ); fprintf(stderr," nstationB ........ : %ld\n", nstationB ); fprintf(stderr," number t-fft ..... : %d\n", ntfft); fprintf(stderr," Input size ...... : %ld MB\n", (nstationA+nstationB)*cdatainSize/(1024*1024)); fprintf(stderr," Output size ...... : %ld MB\n", (cdataoutSize/((size_t)1024*1024))); fprintf(stderr," taper points ..... : %d (%.2f %%)\n", ntap, ftap*100.0); fprintf(stderr," process number ... : %d\n", pe); fprintf(stderr," fmin ............. : %.3f (%d)\n", fmin, nw_low); fprintf(stderr," fmax ............. : %.3f (%d)\n", fmax, nw_high); fprintf(stderr," nfreq ........... : %ld\n", nfreq); if (mdd) fprintf(stderr," Matrix inversion . : %s\n", rthmName); else fprintf(stderr," Correlation ...... : \n"); fprintf(stderr," eps_r ............ : %e\n", eps_r); fprintf(stderr," eps_a ............ : %e\n", eps_a); fprintf(stderr," mdd .............. : %d\n", mdd); } t1 = wallclock_time(); tinit += t1-t0; /* read in first nt samples, and store in data */ xsrcA = (float *)calloc(nshotA,sizeof(float)); xrcvA = (float *)calloc(nshotA*nstationA,sizeof(float)); xnx = (int *)calloc(nshotA,sizeof(int)); alpha = 0.0; readShotData(file_A, xmin, dx, xrcvA, xsrcA, xnx, cA, nw, nw_low, nshotA, nstationA, nstationA, ntfft, alpha, sclA, cjA, transposeA, verbose); xsrcB = (float *)calloc(nshotB,sizeof(float)); xrcvB = (float *)calloc(nshotB*nstationB,sizeof(float)); alpha = 0.0; readShotData(file_B, xmin, dx, xrcvB, xsrcB, xnx, cB, nw, nw_low, nshotB, nstationB, nstationB, ntfft, alpha, sclB, cjB, transposeB, verbose); //cB = cA; eigen = (float *)malloc(nfreq*nstationB*sizeof(float)); t2 = wallclock_time(); tread += t2-t1; #pragma omp parallel default(none) \ private(t1,t2,pe) \ shared(cA,cB,eigen,eigenvalues,numacc,eps_r,eps_a) \ shared(nstationA,nstationB,verbose,cdatainSize) \ shared(rthm,mdd,nfreq,nshotA,conjgA,conjgB) \ shared(cdataout,oBB) { /* start of OpenMP parallel part */ #ifdef _OPENMP pe = omp_get_thread_num(); #endif /* compute deconvolution */ deconvolve(cA, cB, cdataout, oBB, nfreq, nshotA, nstationA, nstationB, eps_a, eps_r, numacc, eigenvalues, eigen, rthm, mdd, conjgA, conjgB, verbose); } /*end of parallel OpenMP part */ fflush(stderr); fflush(stdout); t3 = wallclock_time(); tdec += t3-t2; if (verbose>=1) { fprintf(stderr,"************* PE %d ************* \n", pe); fprintf(stderr,"CPU-time read data = %.3f\n", tread); fprintf(stderr,"CPU-time deconvolution = %.3f\n", tdec); } /* for writing out combined shots cA */ free(cA); free(cB); /* Inverse FFT of deconvolution results */ /* This is done for every deconvolution component seperately */ rC = (float *)malloc(nstationA*ntc*sizeof(float)); assert(rC != NULL); /* #pragma omp parallel default(none) \ private(istation,jstation,pe,j,i,t1,t2,t3,hdr,rl) \ private(filename, k, fpout, nwrite, cA, iw,number) \ shared(tfft) \ shared(rC,dt,ntc,file_out) \ shared(nt,nstationA,nstationB,verbose,err,ntfft,t0,twrite) \ shared(nfreq,stderr,stdout, nshotA, nshotB, nw_low, causal) \ shared(cdataout,istep,jstep,one_file) */ //{ /* start of OpenMP parallel part */ //#ifdef _OPENMP // pe = omp_get_thread_num(); //#else pe = 0; //#endif rl = (float *)calloc(ntfft,sizeof(float)); cA = (complex *)calloc(ntfft,sizeof(complex)); hdr = (segy *)calloc(1,sizeof(segy)); /* for writing out combined shots cA */ tfft = 0.0; twrite = 0.0; if (one_file && pe==0) { strcpy(filename, file_out); if (verbose>2) fprintf(stderr,"writing all output shot into file %s\n", filename); fpout = fopen( filename, "w+" ); assert(fpout != NULL); } //#pragma omp for for (jstation=0; jstation<nstationB; jstation++) { /* FFT */ t1 = wallclock_time(); for (istation=0; istation<nstationA; istation++) { memset(cA,0,ntfft*sizeof(complex)); for (iw=0;iw<nfreq;iw++) { cA[iw+nw_low].r = cdataout[(iw*nstationB+jstation)*nstationA+istation].r*scl; cA[iw+nw_low].i = cdataout[(iw*nstationB+jstation)*nstationA+istation].i*scl; } cr1fft(cA, rl, ntfft, 1); memcpy(&rC[istation*ntc],rl,ntc*sizeof(float)); if (causal==1) { memcpy(&rC[istation*ntc],rl,ntc*sizeof(float)); } else if (causal==2) { rC[istation*ntc] = rl[0]; for (j=1;j<ntc; j++) { rC[istation*ntc+j] = rl[ntfft-j]; } } else if (causal==3) { for (j=1;j<=(ntc/2); j++) { rC[istation*ntc+ntc/2-j] = rl[ntfft-j]; } for (j=ntc/2;j<ntc; j++) { rC[istation*ntc+j] = rl[j-ntc/2]; } } else if (causal==4) { rC[istation*ntc] = rl[0]; for (j=1;j<ntc; j++) { rC[istation*ntc+j] = rl[ntfft-j] + rl[j]; } } } t2 = wallclock_time(); tfft += t2-t1; if (pe == 0) { /* write data to file */ hdr[0].d1 = dt; if (causal == 3) hdr[0].f1=-0.5*ntc*dt; else hdr[0].f1=0.0; hdr[0].dt = (int)(dt*1000000); hdr[0].ns = ntc; hdr[0].fldr = jstation+1; hdr[0].scalco = -1000; hdr[0].scalel = -1000; hdr[0].trid = 1; hdr[0].f2 = f2; hdr[0].d2 = dx; // hdr[0].trwf = nstationA; hdr[0].sx = NINT((f2+dx*jstation)*1000); hdr[0].ntr = nstationA*nstationB; if (!one_file) { strcpy(filename, file_out); sprintf(number,"Station%03d\0",jstation+1); name_ext(filename, number); if (verbose>3) fprintf(stderr,"writing to file %s\n", filename); fpout = fopen( filename, "w+" ); assert(fpout != NULL); } for (istation=0; istation<nstationA; istation++) { hdr[0].tracl = istation+1; hdr[0].gx = NINT((f2+dx*istation)*1000); hdr[0].offset = NINT((f2+dx*istation)); nwrite = fwrite( hdr, 1, TRCBYTES, fpout ); assert (nwrite == TRCBYTES); nwrite = fwrite( &rC[istation*ntc], sizeof(float), ntc, fpout ); assert (nwrite == ntc); } if (!one_file) { fflush(fpout); fclose(fpout); } t3 = wallclock_time(); twrite += t3-t2; // fprintf(stderr,"write %f and fft %f for %d\n",twrite, tfft, jstation); } } if (one_file && pe==0) { fflush(fpout); fclose(fpout); } free(cA); free(rl); //} free(rC); free(cdataout); if (eigenvalues) { writeEigen(file_out, df, nw_low, nw_high, nfreq, eigen, nstationB, dx, f2); } free(eigen); /* if file_dmat write frequency slices of matrix */ if (file_dmat!=NULL) { t2 = wallclock_time(); strcpy(filename, file_dmat); fpout = fopen( filename, "w+" ); hdr[0].d1 = df; hdr[0].dt = (int)(df*1000000); hdr[0].ns = nfreq; hdr[0].trid = 111; /* for (iw=0;iw<nfreq;iw++) { hdr[0].fldr = iw+1; // sprintf(number,"Station%03d\0",jstation+1); // name_ext(filename, number); // if (verbose>3) fprintf(stderr,"writing to file %s\n", filename); // fpout = fopen( filename, "w+" ); twrite = 0.0; for (istation=0; istation<nstationB; istation++) { hdr[0].tracl = istation+1; nwrite = fwrite( hdr, 1, TRCBYTES, fpout ); assert (nwrite == TRCBYTES); // nwrite = fwrite( &oBB[iw*nstationB*nstationB+istation].r, sizeof(complex), nfreq, fpout ); // assert (nwrite == nfreq); } } */ fflush(fpout); fclose(fpout); t3 = wallclock_time(); twrite += t3-t2; free(oBB); } free(hdr); /*================ end ================*/ if (verbose) { t3 = wallclock_time(); fprintf(stderr,"CPU-time inverse FFT's = %.3f\n", tfft); fprintf(stderr,"CPU-time write data = %.3f\n", twrite); fprintf(stderr,"CPU-time initialization = %.3f\n", tinit); fprintf(stderr,"Total CPU-time = %.3f\n", t3-t0); } return 0; } void gausstaper(float *taper, float dx, int n, float enddecay) { int ix, hn; float dist, sigma2; if (enddecay > 0.999) { for (ix = 0; ix < n; ix++) taper[ix] = 1.0; return; } hn = (n-1)/2; sigma2 = (hn*dx*hn*dx)/(log(enddecay)); for (ix = 0; ix <= hn; ix++) { dist = ix*dx; taper[hn+ix] = exp(dist*dist/sigma2); } for (ix = 0; ix < hn; ix++) taper[ix] = taper[n-1-ix]; return; } void writeDatamatrix(char *file_out, complex *P, int ntfft, int ntc, int Nrec, int Nshot, int nfreq, int nw_low, float dt, int verbose) { FILE *fpout; char filename[1024]; size_t nwrite; int jstation, istation, iw; float *rl, *rC; complex *cA; segy *hdr; rC = (float *)malloc(Nrec*ntc*sizeof(float)); rl = (float *)calloc(ntfft,sizeof(float)); cA = (complex *)calloc(ntfft,sizeof(complex)); hdr = (segy *)calloc(1,sizeof(segy)); /* for writing out combined shots cA */ strcpy(filename, file_out); if (verbose>2) fprintf(stderr,"writing all output shot into file %s\n", filename); fpout = fopen( file_out, "w+" ); for (jstation=0; jstation<Nshot; jstation++) { /* FFT */ for (istation=0; istation<Nrec; istation++) { memset(cA,0,ntfft*sizeof(complex)); for (iw=0;iw<nfreq;iw++) { cA[iw+nw_low] = P[(iw*Nshot+jstation)*Nrec+istation]; } cr1fft(cA, rl, ntfft, 1); memcpy(&rC[istation*ntc],rl,ntc*sizeof(float)); } /* write data to file */ hdr[0].d1 = dt; hdr[0].dt = (int)(dt*1000000); hdr[0].ns = ntc; hdr[0].fldr = jstation+1; for (istation=0; istation<Nrec; istation++) { hdr[0].tracl = istation+1; nwrite = fwrite( hdr, 1, TRCBYTES, fpout ); assert (nwrite == TRCBYTES); nwrite = fwrite( &rC[istation*ntc], sizeof(float), ntc, fpout ); assert (nwrite == ntc); } } free(cA); free(rl); free(rC); return; }