-
Jan Thorbecke authoredJan Thorbecke authored
corrvir.c 28.15 KiB
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <math.h>
#include "par.h"
#include "corr.h"
#include "segy.h"
#define MIN(x,y) ((x) < (y) ? (x) : (y))
#define MAX(x,y) ((x) > (y) ? (x) : (y))
double wallclock_time(void);
int omp_get_max_threads(void);
int omp_get_num_threads(void);
int omp_get_thread_num(void);
void omp_set_num_threads(int num_threads);
typedef struct { /* complex number */
float r,i;
} complex;
void read_sutrace_at_position(FILE *fp, int itrace, int ircvnum, int isrcnum, complex *tracedata, complex *tracebuffer, int *trace_in_buffer, size_t *nbuffer, int ntfft, int nt, int nfreq, size_t nbufmax, size_t trace_sz, double *tread, double *tfft, double *tmemcpy);
void get_sutrace_at_position(FILE *fp, size_t itrace, complex *tracedata, complex *tracebuffer, int ntfft, int nt, int nfreq, size_t nbufmax, size_t trace_sz, double *tread, double *tfft, double *tmemcpy);
int optncr(int n);
void cr1_fft(complex *cdata, float *data, int n, int sign);
void rc1_fft(float *rdata, complex *cdata, int n, int sign);
int getFileInfo(char *file_name, int *n1, int *n2, float *d1, float *d2, int verbose);
int computeFFT(float *datain, int ld1, int nStart, int ntfft, int nfreq, int nstation, complex *cA, complex *cB, double *tfft, int verbose);
int correlate(complex *cmaster, complex *cslaves, int nfreq, int ncor, double *tcorr, int verbose);
int coherence(complex *cmaster, complex *cslaves, int nfreq, int ncor, float reps, float epsmax, double *tcorr, int verbose);
/**************
* ntc output samples of correlation result
* note that nt (the number of samples read by the IO routine)
* should be 2*ntc and a number efficient for FFT's
*/
/*********************** self documentation **********************/
char *sdoc[] = {
" ",
" corrvir - correlation to compute virtual shot records",
" ",
" corrvir file_shots= file_out= [optional parameters]",
" ",
" Required parameters: ",
" ",
" file_shots= ....... filename of shots used to calculate virtual shots",
" file_out= ......... filename of the output file with the virtual shot records",
" nsources= ......... maximum number of actual shots in a common-receiver gather",
" nreceivers= ....... maximum number of receiver positions",
" xsrcv= ............ defines virtual-source x-positions",
" ysrcv= ............ defines virtual-source y-positions",
" ",
" Optional parameters: ",
" ",
" dtolerr = 0.1 ....... tolerated error distance (in m) between above requested virtual-source position and actual receiver positions",
" fmax = 70 ........... maximum frequency to use in correlation",
" nbm = 4 ............. amount of memory (in GB) to buffer traces for reusage",
" nsrc_ctr = 1 ...... . minumum number of sources to contribute to Fresnel stack",
" normsrc = 0 ....... . normalize each correlated trace (before source summation)",
" normalize = 1 ....... normalize the virtual trace with the number of contributing sources",
" cohr = 0 ............ use cross-coherence: {f1(w)*f2(w)/(|f1(w)|*|f2(w)|+eps)} ",
" reps = 0.0 .......... relative stabilization factor for cohr; eps=reps*|f1(w)|*|f2(w)|",
" epsmax = 0.1 ........ cut off range above which spectrum is flattened",
" src_sel = 0 ......... use all sources that are contributing ",
" = 1 ......... use only sources that are outside square area enclosing master and slave positions",
" = 2 ......... use only sources that are in donut",
" bmin = 1 ............ define inner circle of donut",
" bmax = 4 ............ define outer circle of donut",
" ",
" OUTPUT DEFINITION ",
" causal=1 .......... output causal(1), non-causal(2), both(3), or summed(4)",
" verbose=0 ......... silent option; >0 displays info",
" ",
" Notes: ",
" ntc is the number of output samples of correlation result",
" nt is the number of samples per trace read by the IO routine, should be 2*ntc and a number efficient for FFT's",
" For causal=3: t=0 is sample nt/2 and trace goes from [-nt/2*dt - 0 - nt/2*dt]",
" ",
" Authors : Jan Thorbecke : 2013 (j.w.thorbecke@tudelft.nl)",
" Boris Boullenger : 2015 (b.boullenger@tudelft.nl)",
" ",
NULL};
/**************** end self doc ***********************************/
int main (int argc, char **argv)
{
int i, j, k, ret, nshots, maxnfiles, nfiles;
int size, n1, n2, ntfft, nf, ifreq;
int verbose, causal;
int nt, nstation, ntc, nb, istation, jstation;
int pgsz, istep, nsrc_ctr, nsrc, nsrcMaster, nsrcSlave;
size_t outputSize, nwrite, cdatainSize, datainSize, cdataoutSize;
size_t jstep, lsz, offset, trace_sz, nread;
int ierr, itrace_out, done, cohr, normalize, normsrc;
int storedrcv, storedsrc, src_sel, read_trace;
int nsources, nreceivers, jsource, ivirtual, ivrcv, ivsrc, isrcs, isrcm, ivs;
int sxv, syv, sx, sy, svelev, sxm, sym, sxs, sys;
int cx3, cy3, gxv, gyv, gvelev, gx, gy, gelev;
double cx1, cy1, cx2, cy2;
int distmc2, distsc2, distsc2min, distsc2max;
int bmin, bmax;
int rpeg, speg, vrpeg, vspeg;
int xmin, xmax, ymin, ymax;
int *trace_in_buffer;
size_t nbuffer, nbufmax, nbufmemory;
size_t ntrace, bufsize, nsamp, ipos, nfreq;
int nbm;
crgPos *rcvSrc;
int ircv, *isrc, *nsrc_crg, *vsrc;
float dx, fmax, df, d1, d2, scl, fscalco, sclfft;
float dt, reps, epsmax;
float dtolerr;
float *r, *vtrace;
float diffx, diffy, dd, ddmin;
float *xsrcv, *ysrcv;
float rms2, sclrms;
complex tmpcc;
int nxsrcv, nysrcv, nvsrc;
int scalco;
complex *c, *cmaster, *cslaves, *tracebuffer;
double t0, t1, t2, t3, ttotal, tinit, tread, tfft, tcorr, twrite, tlogic, trdloc, tftloc, tcorrloc, tmemcpy, tselect, tmemset, tloop, troutine0, troutine;
double tread_ivs, tfft_ivs, tcorr_ivs, twrite_ivs, tlogic_ivs, t1_ivs, t3_ivs, t1_ivs_ivr, t2_ivs_ivr, t3_ivs_ivr, t4_ivs_ivr, t5_ivs_ivr;
char *file_shots, *file_out, filename[1024], basename[1200], base[1200], *pbase;
int pe=0, root_pe=0, npes=1, ipe, size_s;
float *trace;
FILE *fpin, *fpout;
segy hdr;
segy *segy_hdrs;
segy outputhdr;
t0 = wallclock_time();
tinit = twrite = tread = tcorr = tfft = tlogic = 0.0;
initargs(argc, argv);
requestdoc(1);
if (!getparint("verbose", &verbose)) verbose = 0;
if (!getparstring("file_shots", &file_shots)) file_shots=NULL;
assert(file_shots != NULL);
if (!getparstring("file_out", &file_out)) file_out=NULL;
if (!getparint("nsources", &nsources)) nsources = 0;
assert(nsources!=0);
if (!getparint("nreceivers", &nreceivers)) nreceivers = 0;
assert(nreceivers!=0);
if (!getparint("nsrc_ctr", &nsrc_ctr)) nsrc_ctr = 1;
if (!getparint("normsrc", &normsrc)) normsrc = 0;
if (!getparint("causal", &causal)) causal = 1;
if (!getparint("normalize", &normalize)) normalize = 1;
if (!getparint("cohr", &cohr)) cohr = 0;
if (!getparint("src_sel", &src_sel)) src_sel = 0;
if (!getparint("bmin", &bmin)) bmin = 1;
if (!getparint("nbm", &nbm)) nbm = 0;
if (!getparint("bmax", &bmax)) bmax = 4;
if (!getparfloat("reps", &reps)) reps = 0.0;
if (!getparfloat("epsmax", &epsmax)) epsmax = 0.1;
if (!getparfloat("fmax", &fmax)) fmax = 70;
if (!getparfloat("dtolerr", &dtolerr)) dtolerr = 0.1;
nxsrcv = countparval("xsrcv");
nysrcv = countparval("ysrcv");
if (nxsrcv != nysrcv) {
verr("Number of sources in array xsrcv (%d), ysrcv(%d) are not equal",nxsrcv, nysrcv);
}
xsrcv = (float *)malloc(nxsrcv*sizeof(float));
ysrcv = (float *)malloc(nxsrcv*sizeof(float));
getparfloat("xsrcv", xsrcv);
getparfloat("ysrcv", ysrcv);
getFileInfo(file_shots, &n1, &n2, &d1, &d2, verbose);
ntrace = n2;
nt = n1;
dt = d1;
fpin = fopen(file_shots, "r");
assert( fpin != NULL);
nread = fread( &hdr, 1, TRCBYTES, fpin );
assert(nread == TRCBYTES);
fpout = fopen(file_out, "w+");
fprintf(stderr,"nt=%d dt=%f ntrace=%ld\n",nt, dt, (long) ntrace);
ntfft = optncr(2*nt);
nf = ntfft/2+1;
df = 1.0/(ntfft*dt);
nfreq = MIN(nf,(int)(fmax/df)+1);
sclfft= 1.0/((float)ntfft);
trace_sz = sizeof(float)*(n1)+TRCBYTES;
segy_hdrs = (segy *)calloc(ntrace,sizeof(segy));
assert(segy_hdrs != NULL);
ret = fseeko(fpin , 0, SEEK_SET);
t1 = wallclock_time();
tinit += t1-t0;
/* check if data is fully buffered */
if (nbm==0) { /* read only headers */
nbufmax = 0;
for (i=0; i<ntrace; i++) {
if(i % 100000 == 0) fprintf(stderr,"i=%d out of %d\n", i, ntrace);
offset = i*trace_sz;
ret = fseeko(fpin , offset, SEEK_SET);
if (ret<0) perror("fseeko");
nread = fread( &segy_hdrs[i], 1, TRCBYTES, fpin );
}
}
else { /* buffer is allocated to store all data traces */
bufsize = (size_t)(ntrace*nfreq*sizeof(complex));
nbufmax = ntrace;
fprintf(stderr,"memory allocated to buffer all traces = %.2f GB\n", (float)(bufsize/(1024*1024*1024)));
tracebuffer = (complex *)malloc(bufsize);
assert (tracebuffer != NULL);
/*================ Read geometry of all traces in file_shots ================*/
r = (float *)calloc(ntfft,sizeof(float));
c = (complex *)calloc((nf),sizeof(complex));
for (i=0; i<ntrace; i++) {
if(i % 100000 == 0) fprintf(stderr,"i=%d out of %d\n", i, ntrace);
offset = i*trace_sz;
ret = fseeko(fpin , offset, SEEK_SET);
if (ret<0) perror("fseeko");
nread = fread( &segy_hdrs[i], 1, TRCBYTES, fpin );
assert(nread == TRCBYTES);
nsamp = segy_hdrs[i].ns;
memset(r,0,ntfft*sizeof(float));
nread = fread( r, sizeof(float), nt, fpin );
assert(nread == nt);
rc1_fft(r,c,ntfft,-1);
memcpy(&tracebuffer[i*nfreq],&c[0],nfreq*sizeof(complex));
}
free(r);
free(c);
}
t2 = wallclock_time();
tread += t2-t1;
/*================ Sort traces in common receiver gathers ================*/
/* nreceivers is total number of receiver positions which is (much) smaller than the number of traces in the file */
/* Every receiver position has a number of sources: common receiver gather */
rcvSrc = (crgPos *)calloc(nreceivers,sizeof(crgPos));
for (i=0; i<nreceivers; i++) {
rcvSrc[i].src = (traceCoord *)calloc(nsources,sizeof(traceCoord));
}
rcvSrc[0].gx = segy_hdrs[0].gx;
rcvSrc[0].gy = segy_hdrs[0].gy;
rcvSrc[0].rpeg = segy_hdrs[0].gdel;
rcvSrc[0].gelev = segy_hdrs[0].gelev;
rcvSrc[0].src[0].x = segy_hdrs[0].sx;
rcvSrc[0].src[0].y = segy_hdrs[0].sy;
rcvSrc[0].src[0].peg = segy_hdrs[0].sdel;
scalco = segy_hdrs[0].scalco;
rcvSrc[0].src[0].fpos = 0;
rcvSrc[0].nsrc = 1;
ircv=1;
if (scalco < 0) fscalco = 1.0/(-1.0*scalco);
else fscalco = (float)scalco;
for (jstation=0; jstation<ntrace; jstation++) {
gx = segy_hdrs[jstation].gx;
gy = segy_hdrs[jstation].gy;
rpeg = segy_hdrs[jstation].gdel;
gelev = segy_hdrs[jstation].gelev;
sx = segy_hdrs[jstation].sx;
sy = segy_hdrs[jstation].sy;
speg = segy_hdrs[jstation].sdel;
/* bookkeeping: find out how many sources are contributing to each receiver position */
storedrcv = 0;
for (i=0; i<ircv; i++) {
if ( (rcvSrc[i].gx == gx) && (rcvSrc[i].gy == gy) ) {
storedrcv = 1;
storedsrc = 0;
for (j=0; j<rcvSrc[i].nsrc; j++) {
if ( (rcvSrc[i].src[j].x == sx) && (rcvSrc[i].src[j].y == sy) ) {
storedsrc=1;
break;
}
}
if ( !storedsrc ) {
rcvSrc[i].src[rcvSrc[i].nsrc].x = sx;
rcvSrc[i].src[rcvSrc[i].nsrc].y = sy;
rcvSrc[i].src[rcvSrc[i].nsrc].peg = speg;
rcvSrc[i].src[rcvSrc[i].nsrc].fpos = jstation;
rcvSrc[i].nsrc += 1;
}
break;
}
}
if ( !storedrcv ) {
if (verbose>=2) fprintf(stderr,"for jstation = %d number of receiver positions %d\n",jstation, ircv+1);
rcvSrc[ircv].gx = gx;
rcvSrc[ircv].gy = gy;
rcvSrc[ircv].rpeg = rpeg;
rcvSrc[ircv].gelev = gelev;
rcvSrc[ircv].src[0].x = sx;
rcvSrc[ircv].src[0].y = sy;
rcvSrc[ircv].src[0].peg = speg;
rcvSrc[ircv].src[0].fpos = jstation;
rcvSrc[ircv].nsrc = 1;
ircv++;
}
}
fprintf(stderr,"number of receiver positions in file = %d \n",ircv);
nreceivers=ircv;
nsources = 0;
nsrc_crg = (int *)calloc(nreceivers,sizeof(int));
for (i=0; i<nreceivers; i++) {
nsrc_crg[i] = rcvSrc[i].nsrc;
nsources = MAX(nsources,rcvSrc[i].nsrc);
if(verbose>=2) fprintf(stderr,"number of sources in receiver position %d = %d \n",i+1, rcvSrc[i].nsrc);
}
/*================ Find receiver positions corresponding to requested virtual-sources ================*/
if (nxsrcv) {
nvsrc = nxsrcv;
vsrc = (int *)malloc(nvsrc*sizeof(int));
for (j=0; j<nxsrcv; j++) {
diffx = xsrcv[j]- rcvSrc[0].gx*fscalco;
diffy = ysrcv[j]- rcvSrc[0].gy*fscalco;
ddmin = diffx*diffx+diffy*diffy;
vsrc[j] = 0;
for (i=1; i<nreceivers; i++) {
diffx = xsrcv[j]- rcvSrc[i].gx*fscalco;
diffy = ysrcv[j]- rcvSrc[i].gy*fscalco;
dd = diffx*diffx+diffy*diffy;
if (dd < ddmin ) {
ddmin = dd;
vsrc[j] = i;
}
}
fprintf(stderr,"ddmin for requested vsrc position (%.2f,%.2f) = %.2f\n",xsrcv[j],ysrcv[j],ddmin);
if (ddmin <= dtolerr) {
xsrcv[j]=rcvSrc[vsrc[j]].gx*fscalco;
ysrcv[j]=rcvSrc[vsrc[j]].gy*fscalco;
fprintf(stderr,"Requested vsrc position coincide with rcv position (%.2f,%.2f)\n",xsrcv[j],ysrcv[j]);
}
else {
fprintf(stderr,"Error: Requested vsrc position do not coincide with a rcv position\n");
}
}
}
else { /* all receivers are made virtual sources */
nvsrc = nreceivers;
vsrc = (int *)malloc(nvsrc*sizeof(int));
for (i=0; i<nreceivers; i++) {
vsrc[i] = i;
}
}
/*================ initializations ================*/
/*================ for nvirtual shots find suitable receivers ================*/
t1 = wallclock_time();
tinit += t1-t2;
#pragma omp parallel default(shared) \
private(cmaster,cslaves,vtrace,r,c, t1_ivs, t3_ivs) \
private(ivrcv, gxv, gyv, vrpeg, gelev, nsrcSlave) \
private(xmin,ymin,xmax,ymax, cx1, cy1, cx2, cy2, cx3, cy3, distmc2, distsc2min, distsc2max, distsc2) \
private(nsrc, trdloc, tftloc, tcorrloc) \
private(isrcm, isrcs, sxm, sym, sxs, sys, read_trace) \
private(jsource, rms2, ifreq, tmpcc, sclrms, scl) \
private(j, outputhdr, itrace_out, nwrite)
{
cmaster = (complex *)malloc(nsources*nfreq*sizeof(complex));
cslaves = (complex *)malloc(nsources*nfreq*sizeof(complex));
vtrace = (float *)malloc(nt*sizeof(float));
r = (float *)malloc(ntfft*sizeof(float));
c = (complex *)malloc((ntfft/2+1)*sizeof(complex));
//itrace_out=0;
fprintf(stderr,"Number of OpenMP threads set = %d number=%d\n", omp_get_max_threads(), omp_get_thread_num());
ivs = 0;
while (ivs<nvsrc) {/* loop over the number of virtual source positions to be created */
t1_ivs = wallclock_time();
//tread_ivs = tfft_ivs = tcorr_ivs = twrite_ivs = tlogic_ivs = tmemcpy = tselect = tmemset = tloop = troutine = 0.0;
ivsrc=vsrc[ivs];
sxv = rcvSrc[ivsrc].gx;
syv = rcvSrc[ivsrc].gy;
vspeg = rcvSrc[ivsrc].rpeg;
svelev = rcvSrc[ivsrc].gelev;
nsrcMaster = nsrc_crg[ivsrc];
#pragma omp for
for (ivrcv=0; ivrcv<nreceivers; ivrcv++) {
//t1_ivs_ivr = wallclock_time();
gxv = rcvSrc[ivrcv].gx;
gyv = rcvSrc[ivrcv].gy;
vrpeg = rcvSrc[ivrcv].rpeg;
gvelev = rcvSrc[ivrcv].gelev;
nsrcSlave = nsrc_crg[ivrcv];
if (src_sel == 1) {
xmin = MIN(gxv,sxv);
ymin = MIN(gyv,syv);
xmax = MAX(gxv,sxv);
ymax = MAX(gyv,syv);
}
if (src_sel == 2) {
cx1 = 0.5*(sxv+gxv);
cy1 = 0.5*(syv+gyv);
cx2 = ceil(cx1);
cy2 = ceil(cy1);
cx3 = cx2;
cy3 = cy2;
distmc2 = (sxv-cx3)*(sxv-cx3)+(syv-cy3)*(syv-cy3);
distsc2min = distmc2+bmin*bmin*distmc2;
distsc2max = distmc2+bmax*bmax*distmc2;
distsc2 = (sxv-cx3)*(sxv-cx3)+(syv-cy3)*(syv-cy3);
}
//fprintf(stderr,"ivsrc=%d sxv=%d nsrcMaster=%d ivrcv=%d gxv=%d nsrcSlave=%d\n", ivsrc, sxv, nsrcMaster, ivrcv, gxv, nsrcSlave);
nsrc = 0;
memset(cslaves,0,nfreq*nsources*sizeof(complex));
memset(cmaster,0,nfreq*nsources*sizeof(complex));
//t0 = wallclock_time();
//tmemset += t0-t1_ivs_ivr;
//trdloc = tftloc = tcorrloc = 0.0;
for (isrcm=0; isrcm<nsrcMaster; isrcm++) { /* for all traces find which traces are recorded by both virtual source and receiver position */
sxm = rcvSrc[ivsrc].src[isrcm].x;
sym = rcvSrc[ivsrc].src[isrcm].y;
for (isrcs=0; isrcs<nsrcSlave; isrcs++) {
sxs = rcvSrc[ivrcv].src[isrcs].x;
sys = rcvSrc[ivrcv].src[isrcs].y;
if ( (sxm == sxs) && (sym == sys) ) { /* master and slave have same contributing source */
/* if the source is outside the inclosing area of the virtual-receiver and virtual-source coordinates
then the source is accepted for contribution to the final summation */
if (src_sel == 0) {
read_trace=1;
}
else if (src_sel == 1) {
read_trace = 0;
if ( !( (sxs>xmin) && (sxs<xmax) ) ) {
if ( !( (sys>ymin) && (sys<ymax) ) ) {
read_trace=1;
}
}
}
else if (src_sel == 2) {
read_trace = 0;
if ( !( (distsc2 < distsc2min) && (distsc2 > distsc2max) ) ) {
read_trace = 1;
}
}
else if (src_sel == 3) {
read_trace = 0;
if ( (sxv < gxv) && (syv < gyv) ) {
if ( (sxm < sxv) && (sym < syv) ) {
read_trace = 1;
}
}
else if ( (sxv < gxv) && (syv > gyv) ) {
if ( (sxm < sxv) && (sym > syv) ) {
read_trace = 1;
}
}
else if ( (sxv > gxv) && (syv < gyv) ) {
if ( (sxm > sxv) && (sym < syv) ) {
read_trace = 1;
}
}
else if ( (sxv > gxv) && (syv > gyv) ) {
if ( (sxm > sxv) && (sym > syv) ) {
read_trace = 1;
}
}
}
else if (src_sel == 4) {
read_trace = 0;
if ( (sxv < gxv) && (syv < gyv) ) {
if ( (sxm < gxv) && (sym < gyv) ) {
read_trace = 1;
}
}
else if ( (sxv < gxv) && (syv > gyv) ) {
if ( (sxm < gxv) && (sym > gyv) ) {
read_trace = 1;
}
}
else if ( (sxv > gxv) && (syv < gyv) ) {
if ( (sxm > gxv) && (sym < gyv) ) {
read_trace = 1;
}
}
else if ( (sxv > gxv) && (syv > gyv) ) {
if ( (sxm > gxv) && (sym > gyv) ) {
read_trace = 1;
}
}
}
if (read_trace) { /* read data from file for sources that passed the tests above */
//#pragma omp critical
//{
//troutine0 = wallclock_time();
/* read master trace */
//memcpy(&cmaster[nsrc*nfreq], &tracebuffer[rcvSrc[ivsrc].src[isrcm].fpos*nfreq],nfreq*sizeof(complex));
get_sutrace_at_position(fpin, rcvSrc[ivsrc].src[isrcm].fpos, &cmaster[nsrc*nfreq], tracebuffer, ntfft, nt, nfreq, nbufmax, trace_sz, &tread, &tfft, &tmemcpy);
//read_sutrace_at_position(fpin, rcvSrc[ivsrc].src[isrcm].fpos, ivsrc, isrcm, &cmaster[nsrc*nfreq], tracebuffer, trace_in_buffer, &nbuffer, ntfft, nt, nfreq, nbufmax, trace_sz, &trdloc, &tftloc, &tmemcpy);
/* read slave trace */
//ipos = (rcvSrc[ivrcv].src[isrcs].fpos)*nfreq;
//memcpy(&cslaves[nsrc*nfreq], &tracebuffer[ipos],nfreq*sizeof(complex));
get_sutrace_at_position(fpin, rcvSrc[ivrcv].src[isrcs].fpos, &cslaves[nsrc*nfreq], tracebuffer, ntfft, nt, nfreq, nbufmax, trace_sz, &tread, &tfft, &tmemcpy);
//read_sutrace_at_position(fpin, rcvSrc[ivrcv].src[isrcs].fpos, ivrcv, isrcs, &cslaves[nsrc*nfreq], tracebuffer, trace_in_buffer, &nbuffer, ntfft, nt, nfreq, nbufmax, trace_sz, &trdloc, &tftloc, &tmemcpy);
//troutine += wallclock_time()-troutine0;
//}
nsrc++;
}
} /* end of if test that slave and master have same contributing source */
} /* end of nsrcSlave loop */
} /* end of nsrcMaster loop */
//tread_ivs += trdloc;
//tfft_ivs += tftloc;
//tloop += wallclock_time()-t0;
//t1 = wallclock_time();
//fprintf(stderr,"ivrcv = %d out of %d\n",ivrcv, nreceivers);
if (nsrc < nsrc_ctr) nsrc = -1; /* only compute virtual receiver when there are sufficient active sources contributing */
/* correlation of virtual source ivsrc with virtual receiver ivcrv */
if (cohr) {
coherence(cmaster, cslaves, nfreq, nsrc, reps, epsmax, &tcorrloc, verbose);
}
else {
correlate(cmaster, cslaves, nfreq, nsrc, &tcorrloc, verbose);
}
tcorr_ivs += wallclock_time()-t1;
/* summation over the contributing sources */
memset(c,0,nf*sizeof(complex));
for (jsource=0; jsource<nsrc; jsource++) {
if (normsrc == 1) {
rms2 = 0.0;
for (ifreq=0; ifreq<nfreq; ifreq++) {
tmpcc = cslaves[jsource*nfreq+ifreq];
rms2 += tmpcc.r*tmpcc.r+tmpcc.i*tmpcc.i;
}
if (rms2 > 0) sclrms = 1.0/sqrt(rms2);
else sclrms = 1.0;
}
else (sclrms = 1.0);
for (ifreq=0; ifreq<nfreq; ifreq++) {
c[ifreq].r += cslaves[jsource*nfreq+ifreq].r*sclrms;
c[ifreq].i += cslaves[jsource*nfreq+ifreq].i*sclrms;
}
}
//t2_ivs_ivr = wallclock_time();
cr1_fft(&c[0],r,ntfft,1); /* transform virtual trace back to time */
//t3_ivs_ivr = wallclock_time();
//tfft_ivs += t3_ivs_ivr-t2_ivs_ivr;
if (normalize && nsrc>0) scl=sclfft/nsrc; /* normalize by the number of contributing sources */
else scl = sclfft;
if (causal==1) {
for (j=0;j<nt; j++) {
vtrace[j] = r[j]*scl;
}
}
else if (causal==2) {
vtrace[0] = r[0]*scl;
for (j=1;j<nt; j++) {
vtrace[j] = r[ntfft-j]*scl;
}
}
else if (causal==3) {
for (j=1;j<=(nt/2); j++) {
vtrace[nt/2-j] = r[ntfft-j]*scl;
}
for (j=nt/2;j<nt; j++) {
vtrace[j] = r[j-nt/2]*scl;
}
}
else if (causal==4) {
vtrace[0] = r[0]*scl;
for (j=1;j<nt; j++) {
vtrace[j] = 0.5*(r[ntfft-j] + r[j])*scl;
}
} /* one virtual trace (virtual source and virtualreceiver) is now calculated and is written to disk below */
//t4_ivs_ivr = wallclock_time();
//tselect += t4_ivs_ivr-t3_ivs_ivr;
memcpy(&outputhdr,&segy_hdrs[(rcvSrc[ivrcv].src[0].fpos)],TRCBYTES);
outputhdr.fldr = ivsrc+1;
itrace_out=ivsrc*nreceivers+ivrcv+1;
outputhdr.tracl = itrace_out; // wrong value in principle
outputhdr.tracf = ivrcv+1;
outputhdr.sx = sxv;
outputhdr.sy = syv;
outputhdr.selev = svelev;
outputhdr.gx = gxv;
outputhdr.gy = gyv;
outputhdr.sdel = vspeg;
outputhdr.gdel = vrpeg;
outputhdr.gelev = gvelev;
outputhdr.ns = nt;
// outputhdr.offset = (gxv - sxv)/1000;
#pragma omp critical
{
nwrite = fwrite( &outputhdr, 1, TRCBYTES, fpout );
assert (nwrite == TRCBYTES);
nwrite = fwrite( &vtrace[0], sizeof(float), nt, fpout );
assert (nwrite == nt);
fflush(fpout);
}
} /* end of virtual receiver loop */
#pragma omp master
{
if (verbose>=3) {
t3_ivs = wallclock_time();
fprintf(stderr,"Number of OpenMP threads set = %d number=%d\n", omp_get_max_threads(), omp_get_thread_num());
//tlogic_ivs = ((t3_ivs-t1_ivs)-tread_ivs-tfft_ivs-tcorr_ivs-twrite_ivs-tmemcpy);
fprintf(stderr,"************* Timings ************* vshot= %d (ivs = %d)\n", vspeg, ivs);
//fprintf(stderr,"CPU-time read data = %.3f\n", tread_ivs);
//fprintf(stderr,"CPU-time FFT's = %.3f\n", tfft_ivs);
//fprintf(stderr,"CPU-time memcpy = %.3f\n", tmemcpy);
//fprintf(stderr,"CPU-time memset = %.3f\n", tmemset);
//fprintf(stderr,"CPU-time select = %.3f\n", tselect);
//fprintf(stderr,"CPU-time loop = %.3f\n", tloop);
//fprintf(stderr,"CPU-time routine = %.3f\n", troutine);
//fprintf(stderr,"CPU-time correlation = %.3f\n", tcorr_ivs);
//fprintf(stderr,"CPU-time write data = %.3f\n", twrite_ivs);
//fprintf(stderr,"CPU-time logic = %.3f\n", tlogic_ivs);
fprintf(stderr,"Total CPU-time this shot = %.3f\n", t3_ivs-t1_ivs);
fflush(stderr);
}
ivs++;
}
#pragma omp barrier
} /* end of virtual source loop */
free(cmaster);
free(cslaves);
free(vtrace);
free(r);
free(c);
} /* end of parallel region */
fclose(fpin);
fclose(fpout);
if (verbose) {
t3 = wallclock_time();
//tlogic = ((t3-t1)-tread-tfft-tcorr-twrite);
ttotal = t3-t1;
fprintf(stderr,"************* Total Timings ************* \n");
//fprintf(stderr,"CPU-time initilization = %.3f\n", tinit);
//fprintf(stderr,"CPU-time read data = %.3f\n", tread);
//fprintf(stderr,"CPU-time FFT's = %.3f\n", tfft);
//fprintf(stderr,"CPU-time correlation = %.3f\n", tcorr);
//fprintf(stderr,"CPU-time write data = %.3f\n", twrite);
//fprintf(stderr,"CPU-time logic = %.3f\n", tlogic);
fprintf(stderr,"Total CPU-time = %.3f\n", ttotal);
}
/*================ end ================*/
return 0;
}
void get_sutrace_at_position(FILE *fp, size_t itrace, complex *tracedata, complex *tracebuffer, int ntfft, int nt, int nfreq, size_t nbufmax, size_t trace_sz, double *tread, double *tfft, double *tmemcpy)
{
static size_t freebuffer=0;
size_t i, inbuffer, offset, nread, nwrite;
int ret;
float *trace;
float *r;
double t0, t1, t2, t3, t0_t, t1_t;
complex *c;
FILE *fpt;
char filename[1024];
if (nbufmax == 0) {
t0_t = wallclock_time();
//trace = (float *)calloc(nt, sizeof(float));
r = (float *)malloc(ntfft*sizeof(float));
c = (complex *)malloc((ntfft/2+1)*sizeof(complex));
/***** read trace from file *****/
offset = itrace*trace_sz+TRCBYTES;
ret = fseeko(fp, offset, SEEK_SET);
if (ret<0) perror("fseeko");
memset(r,0,ntfft*sizeof(float));
nread = fread(r, sizeof(float), nt, fp);
assert(nread == nt);
t1_t = wallclock_time();
*tread += t1_t-t0_t;
/***** transform to frequency domain *****/
//memcpy(r,&trace[0],nt*sizeof(float)); /* might not be needed if segy_read_traces_c is nice */
rc1_fft(r,c,ntfft,-1);
memcpy(&tracedata[0].r,&c[0].r,nfreq*sizeof(complex));
t2 = wallclock_time();
*tfft += t2-t1_t;
//free(trace);
free(r);
free(c);
}
else { /**** point to trace from buffer *****/
t0 = wallclock_time();
//tracedata = (complex *)&tracebuffer[itrace*nfreq];
memcpy(tracedata,&tracebuffer[itrace*nfreq],nfreq*sizeof(complex));
t1 = wallclock_time();
*tmemcpy += t1-t0;
}
return;
}
/* older implementation when part od traces is kept in memory buffer */
void read_sutrace_at_position_old(FILE *fp, int itrace, int ircvnum, int isrcnum, complex *tracedata, complex *tracebuffer, int *trace_in_buffer, size_t *nbuffer, int ntfft, int nt, int nfreq, size_t nbufmax, size_t trace_sz, double *tread, double *tfft, double *tmemcpy)
{
static size_t freebuffer=0;
size_t i, inbuffer, offset, nread, nwrite;
int ret;
float *trace;
float *r;
double t0, t1, t2, t3, t0_t, t1_t;
complex *c;
FILE *fpt;
char filename[1024];
inbuffer = -1;
//fprintf(stderr,"reading trace %d\n", itrace);
t0_t = wallclock_time();
for (i=0; i<nbufmax; i++) {
if (itrace == trace_in_buffer[i]) {
inbuffer = i;
break;
}
}
t1_t = wallclock_time();
*tread += t1_t-t0_t;
//fprintf(stderr,"itrace=%8d inbuffer=%8d nbufmax=%d ircvnum=%8d isrcnum=%8d time=%f\n", itrace, inbuffer, nbufmax, ircvnum, isrcnum, t1_t-t0_t);
if (inbuffer == -1) {
t0 = wallclock_time();
trace = (float *)calloc(nt, sizeof(float));
r = (float *)malloc(ntfft*sizeof(float));
c = (complex *)malloc((ntfft/2+1)*sizeof(complex));
/***** read trace from file *****/
offset = itrace*trace_sz+TRCBYTES;
ret = fseeko(fp, offset, SEEK_SET);
if (ret<0) perror("fseeko");
nread = fread(trace, sizeof(float), nt, fp);
assert(nread == nt);
/***** transform to frequency domain *****/
memset(r,0,ntfft*sizeof(float));
memcpy(r,&trace[0],nt*sizeof(float)); /* might not be needed if segy_read_traces_c is nice */
t1 = wallclock_time();
*tread += t1-t0;
rc1_fft(r,c,ntfft,-1);
t2 = wallclock_time();
*tfft += t2-t1;
memcpy(&tracedata[0].r,&c[0].r,nfreq*sizeof(complex));
/***** reset buffer counter when buffer is full *****/
if (nbufmax != 0) {
t1 = wallclock_time();
if (freebuffer<nbufmax-1) {
freebuffer++;
}
else freebuffer=0;
//fprintf(stderr,"freebuffer = %d\n", freebuffer);
//fprintf(stderr,"nbuffer = %d\n", *nbuffer);
memcpy(&tracebuffer[freebuffer*nfreq].r,&c[0].r,nfreq*sizeof(complex));
trace_in_buffer[freebuffer]=itrace;
t2 = wallclock_time();
*tmemcpy += t2-t1;
}
free(trace);
free(r);
free(c);
t1 = wallclock_time();
*tread += t1-t2;
}
else { /**** read trace from buffer *****/
t0 = wallclock_time();
tracedata = (complex *)&tracebuffer[inbuffer*nfreq]; /* this is better, but need some more changes TODO */
//fprintf(stderr,"read from inbuffer=%d nbufmax=%d\n", inbuffer,nbufmax);
//memcpy(&tracedata[0].r,&tracebuffer[inbuffer*nfreq].r,nfreq*sizeof(complex));
t1 = wallclock_time();
*tread += t1-t0;
}
return;
}