Skip to content
Snippets Groups Projects
corrvir.c 28.15 KiB
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <math.h>
#include "par.h"
#include "corr.h"
#include "segy.h" 

#define MIN(x,y) ((x) < (y) ? (x) : (y))
#define MAX(x,y) ((x) > (y) ? (x) : (y))

double wallclock_time(void);

int omp_get_max_threads(void);
int omp_get_num_threads(void);
int omp_get_thread_num(void);
void omp_set_num_threads(int num_threads);

typedef struct { /* complex number */
        float r,i;
} complex;

void read_sutrace_at_position(FILE *fp, int itrace, int ircvnum, int isrcnum, complex *tracedata, complex *tracebuffer, int *trace_in_buffer, size_t *nbuffer, int ntfft, int nt, int nfreq, size_t nbufmax, size_t trace_sz, double *tread, double *tfft, double *tmemcpy);

void get_sutrace_at_position(FILE *fp, size_t itrace, complex *tracedata, complex *tracebuffer, int ntfft, int nt, int nfreq, size_t nbufmax, size_t trace_sz, double *tread, double *tfft, double *tmemcpy);

int optncr(int n);
void cr1_fft(complex *cdata, float *data, int n, int sign);
void rc1_fft(float *rdata, complex *cdata, int n, int sign);

int getFileInfo(char *file_name, int *n1, int *n2, float *d1, float *d2, int verbose);

int computeFFT(float *datain, int ld1, int nStart, int ntfft, int nfreq, int nstation, complex *cA, complex *cB, double *tfft, int verbose);

int correlate(complex *cmaster, complex *cslaves, int nfreq, int ncor, double *tcorr, int verbose);
int coherence(complex *cmaster, complex *cslaves, int nfreq, int ncor, float reps, float epsmax, double *tcorr, int verbose);

/**************
* ntc output samples of correlation result
* note that nt (the number of samples read by the IO routine)
* should be 2*ntc and a number efficient for FFT's
*/

/*********************** self documentation **********************/
char *sdoc[] = {
" ",
" corrvir - correlation to compute virtual shot records",
"  ",
" corrvir file_shots= file_out= [optional parameters]",
"  ",
" Required parameters: ",
" ",
"   file_shots= ....... filename of shots used to calculate virtual shots",
"   file_out= ......... filename of the output file with the virtual shot records",
"   nsources= ......... maximum number of actual shots in a common-receiver gather",
"   nreceivers= ....... maximum number of receiver positions",
"   xsrcv= ............ defines virtual-source x-positions",
"   ysrcv= ............ defines virtual-source y-positions",
" ",
" Optional parameters: ",
" ",
"   dtolerr = 0.1 ....... tolerated error distance (in m) between above requested virtual-source position and actual receiver positions",
"   fmax = 70 ........... maximum frequency to use in correlation",
"   nbm = 4 ............. amount of memory (in GB) to buffer traces for reusage",
"   nsrc_ctr = 1 ...... . minumum number of sources to contribute to Fresnel stack",
"   normsrc = 0 ....... . normalize each correlated trace (before source summation)",
"   normalize = 1 ....... normalize the virtual trace with the number of contributing sources",
"   cohr = 0 ............ use cross-coherence:  {f1(w)*f2(w)/(|f1(w)|*|f2(w)|+eps)} ",
"   reps = 0.0 .......... relative stabilization factor for cohr; eps=reps*|f1(w)|*|f2(w)|",
"   epsmax = 0.1 ........ cut off range above which spectrum is flattened",
"   src_sel = 0 ......... use all sources that are contributing ",
"           = 1 ......... use only sources that are outside square area enclosing master and slave positions",
"           = 2 ......... use only sources that are in donut",	
"   bmin = 1 ............ define inner circle of donut",
"   bmax = 4 ............ define outer circle of donut",
" ",
" OUTPUT DEFINITION ",
"   causal=1 .......... output causal(1), non-causal(2), both(3), or summed(4)",
"   verbose=0 ......... silent option; >0 displays info",
" ",
" Notes: ",
"    ntc is the number of output samples of correlation result",
"    nt  is the number of samples per trace read by the IO routine, should be 2*ntc and a number efficient for FFT's",
"    For causal=3: t=0 is sample nt/2 and trace goes from [-nt/2*dt - 0 - nt/2*dt]",
" ",
" Authors  : Jan Thorbecke : 2013 (j.w.thorbecke@tudelft.nl)",
"            Boris Boullenger : 2015 (b.boullenger@tudelft.nl)",
" ",
NULL};
/**************** end self doc ***********************************/

int main (int argc, char **argv)
{
	int	i, j, k, ret, nshots, maxnfiles, nfiles;
	int	size, n1, n2, ntfft, nf, ifreq;
	int     verbose, causal;
	int     nt, nstation, ntc, nb, istation, jstation;
	int     pgsz, istep, nsrc_ctr, nsrc, nsrcMaster, nsrcSlave;
	size_t  outputSize, nwrite, cdatainSize, datainSize, cdataoutSize;
	size_t  jstep, lsz, offset, trace_sz, nread;
	int     ierr, itrace_out, done, cohr, normalize, normsrc;
	int	storedrcv, storedsrc, src_sel, read_trace;
	int 	nsources, nreceivers, jsource, ivirtual, ivrcv, ivsrc, isrcs, isrcm, ivs;
	int	sxv, syv, sx, sy, svelev, sxm, sym, sxs, sys;
	int	cx3, cy3, gxv, gyv, gvelev, gx, gy, gelev;
	double	cx1, cy1, cx2, cy2;
	int	distmc2, distsc2, distsc2min, distsc2max;
	int 	bmin, bmax;  
	int 	rpeg, speg, vrpeg, vspeg;
	int	xmin, xmax, ymin, ymax;
	int     *trace_in_buffer;
	size_t  nbuffer, nbufmax, nbufmemory;
	size_t ntrace, bufsize, nsamp, ipos, nfreq;
	int nbm;
	crgPos *rcvSrc;
	int ircv, *isrc, *nsrc_crg, *vsrc;
	float	dx, fmax, df, d1, d2, scl, fscalco, sclfft;
	float   dt, reps, epsmax;
	float 	dtolerr;
	float	*r, *vtrace;
	float 	diffx, diffy, dd, ddmin;
	float   *xsrcv, *ysrcv;
	float   rms2, sclrms;
	complex tmpcc; 
	int     nxsrcv, nysrcv, nvsrc;
	int	 	scalco;
	complex *c, *cmaster, *cslaves, *tracebuffer;
	double  t0, t1, t2, t3, ttotal, tinit, tread, tfft, tcorr, twrite, tlogic, trdloc, tftloc, tcorrloc, tmemcpy, tselect, tmemset, tloop, troutine0, troutine;
	double  tread_ivs, tfft_ivs, tcorr_ivs, twrite_ivs, tlogic_ivs, t1_ivs, t3_ivs, t1_ivs_ivr, t2_ivs_ivr, t3_ivs_ivr, t4_ivs_ivr, t5_ivs_ivr;
	char	*file_shots, *file_out, filename[1024], basename[1200], base[1200], *pbase;
	int     pe=0, root_pe=0, npes=1, ipe, size_s;
	float *trace;
	FILE *fpin, *fpout;
	segy hdr;
	segy *segy_hdrs;
	segy outputhdr;

	t0 = wallclock_time();
	tinit = twrite = tread = tcorr = tfft = tlogic = 0.0;
	initargs(argc, argv);
	requestdoc(1);
	
	if (!getparint("verbose", &verbose)) verbose = 0;
	if (!getparstring("file_shots", &file_shots)) file_shots=NULL;
	assert(file_shots != NULL);
	if (!getparstring("file_out", &file_out)) file_out=NULL;
	if (!getparint("nsources", &nsources)) nsources = 0;
	assert(nsources!=0);
	if (!getparint("nreceivers", &nreceivers)) nreceivers = 0;
	assert(nreceivers!=0);
	if (!getparint("nsrc_ctr", &nsrc_ctr)) nsrc_ctr = 1;
	if (!getparint("normsrc", &normsrc)) normsrc = 0;
	if (!getparint("causal", &causal)) causal = 1;
	if (!getparint("normalize", &normalize)) normalize = 1;
	if (!getparint("cohr", &cohr)) cohr = 0;
	if (!getparint("src_sel", &src_sel)) src_sel = 0;
	if (!getparint("bmin", &bmin)) bmin = 1;
	if (!getparint("nbm", &nbm)) nbm = 0;
	if (!getparint("bmax", &bmax)) bmax = 4;
	if (!getparfloat("reps", &reps)) reps = 0.0;
	if (!getparfloat("epsmax", &epsmax)) epsmax = 0.1;
	if (!getparfloat("fmax", &fmax)) fmax = 70;
	if (!getparfloat("dtolerr", &dtolerr)) dtolerr = 0.1;
	nxsrcv = countparval("xsrcv");
	nysrcv = countparval("ysrcv");
	if (nxsrcv != nysrcv) {
		verr("Number of sources in array xsrcv (%d), ysrcv(%d) are not equal",nxsrcv, nysrcv);
	}
	xsrcv = (float *)malloc(nxsrcv*sizeof(float));
	ysrcv = (float *)malloc(nxsrcv*sizeof(float));

	getparfloat("xsrcv", xsrcv);
	getparfloat("ysrcv", ysrcv);

	getFileInfo(file_shots, &n1, &n2, &d1, &d2, verbose);
	ntrace = n2;
	nt = n1;
	dt = d1;
	fpin = fopen(file_shots, "r");
	assert( fpin != NULL);
	nread = fread( &hdr, 1, TRCBYTES, fpin );
	assert(nread == TRCBYTES);
	fpout = fopen(file_out, "w+");

	fprintf(stderr,"nt=%d dt=%f ntrace=%ld\n",nt, dt, (long) ntrace);

    ntfft = optncr(2*nt);
	nf    = ntfft/2+1;
	df    = 1.0/(ntfft*dt);
	nfreq = MIN(nf,(int)(fmax/df)+1);
	sclfft= 1.0/((float)ntfft);
	trace_sz = sizeof(float)*(n1)+TRCBYTES;
	segy_hdrs = (segy *)calloc(ntrace,sizeof(segy));
	assert(segy_hdrs != NULL);
	ret = fseeko(fpin , 0, SEEK_SET);

	t1 = wallclock_time();
	tinit += t1-t0;

	/* check if data is fully buffered */
	if (nbm==0) { /* read only headers */
		nbufmax = 0;
		for (i=0; i<ntrace; i++) {
			if(i % 100000 == 0) fprintf(stderr,"i=%d out of %d\n", i, ntrace);
			offset = i*trace_sz;
			ret = fseeko(fpin , offset, SEEK_SET);
			if (ret<0) perror("fseeko");
   			nread = fread( &segy_hdrs[i], 1, TRCBYTES, fpin );
		}
	} 
	else { /* buffer is allocated to store all data traces */
        bufsize = (size_t)(ntrace*nfreq*sizeof(complex));
		nbufmax = ntrace;
		fprintf(stderr,"memory allocated to buffer all traces = %.2f GB\n", (float)(bufsize/(1024*1024*1024)));
		tracebuffer = (complex *)malloc(bufsize);
		assert (tracebuffer != NULL);

		/*================ Read geometry of all traces in file_shots ================*/

		r = (float *)calloc(ntfft,sizeof(float));
		c = (complex *)calloc((nf),sizeof(complex));
		for (i=0; i<ntrace; i++) {
			if(i % 100000 == 0) fprintf(stderr,"i=%d out of %d\n", i, ntrace);
			offset = i*trace_sz;
			ret = fseeko(fpin , offset, SEEK_SET);
			if (ret<0) perror("fseeko");
   			nread = fread( &segy_hdrs[i], 1, TRCBYTES, fpin );
			assert(nread == TRCBYTES);
			
			nsamp =  segy_hdrs[i].ns;
			memset(r,0,ntfft*sizeof(float));
       		nread = fread( r, sizeof(float), nt, fpin );
       		assert(nread == nt);
        	rc1_fft(r,c,ntfft,-1);
        	memcpy(&tracebuffer[i*nfreq],&c[0],nfreq*sizeof(complex));
		}
		free(r);
		free(c);
	}
	t2 = wallclock_time();
	tread += t2-t1;

	
	/*================ Sort traces in common receiver gathers ================*/
	
	/* nreceivers is total number of receiver positions which is (much) smaller than the number of traces in the file */
	/* Every receiver position has a number of sources: common receiver gather */

	rcvSrc = (crgPos *)calloc(nreceivers,sizeof(crgPos));
	for (i=0; i<nreceivers; i++) {
		rcvSrc[i].src = (traceCoord *)calloc(nsources,sizeof(traceCoord));
	}
		
	rcvSrc[0].gx = segy_hdrs[0].gx;
	rcvSrc[0].gy = segy_hdrs[0].gy;
	rcvSrc[0].rpeg = segy_hdrs[0].gdel;
	rcvSrc[0].gelev = segy_hdrs[0].gelev;
	rcvSrc[0].src[0].x = segy_hdrs[0].sx;
	rcvSrc[0].src[0].y = segy_hdrs[0].sy;
	rcvSrc[0].src[0].peg = segy_hdrs[0].sdel;
	scalco = segy_hdrs[0].scalco;
	rcvSrc[0].src[0].fpos = 0;
	rcvSrc[0].nsrc = 1;
	ircv=1;
	if (scalco < 0) fscalco = 1.0/(-1.0*scalco);
	else fscalco = (float)scalco;

	for (jstation=0; jstation<ntrace; jstation++) {
		gx = segy_hdrs[jstation].gx;
		gy = segy_hdrs[jstation].gy;
		rpeg = segy_hdrs[jstation].gdel;
		gelev = segy_hdrs[jstation].gelev;
		sx = segy_hdrs[jstation].sx;
		sy = segy_hdrs[jstation].sy;
		speg = segy_hdrs[jstation].sdel;
		
		/* bookkeeping: find out how many sources are contributing to each receiver position */
		storedrcv = 0;
		for (i=0; i<ircv; i++) {
			if ( (rcvSrc[i].gx == gx) && (rcvSrc[i].gy == gy) ) {
				storedrcv = 1;
				storedsrc = 0;
				for (j=0; j<rcvSrc[i].nsrc; j++) {
					if ( (rcvSrc[i].src[j].x == sx) && (rcvSrc[i].src[j].y == sy) ) {
						storedsrc=1;
						break;
					}
				}
				if ( !storedsrc ) {
					rcvSrc[i].src[rcvSrc[i].nsrc].x = sx;
					rcvSrc[i].src[rcvSrc[i].nsrc].y = sy;
					rcvSrc[i].src[rcvSrc[i].nsrc].peg = speg;
					rcvSrc[i].src[rcvSrc[i].nsrc].fpos = jstation;
					rcvSrc[i].nsrc += 1;
				}
				break;
			}
		}
		if ( !storedrcv ) {
			if (verbose>=2) fprintf(stderr,"for jstation = %d  number of receiver positions %d\n",jstation, ircv+1);
			rcvSrc[ircv].gx = gx;
			rcvSrc[ircv].gy = gy;
			rcvSrc[ircv].rpeg = rpeg;
			rcvSrc[ircv].gelev = gelev;
			rcvSrc[ircv].src[0].x = sx;
			rcvSrc[ircv].src[0].y = sy;
			rcvSrc[ircv].src[0].peg = speg;
			rcvSrc[ircv].src[0].fpos = jstation;
			rcvSrc[ircv].nsrc = 1;
			ircv++;
		}
	}
	fprintf(stderr,"number of receiver positions in file = %d \n",ircv);
	nreceivers=ircv;
	nsources = 0;
	nsrc_crg = (int *)calloc(nreceivers,sizeof(int));
	for (i=0; i<nreceivers; i++) {
		nsrc_crg[i] = rcvSrc[i].nsrc;
		nsources = MAX(nsources,rcvSrc[i].nsrc);
        	if(verbose>=2) fprintf(stderr,"number of sources in receiver position %d = %d \n",i+1, rcvSrc[i].nsrc);
	}

	
	
	/*================ Find receiver positions corresponding to requested virtual-sources ================*/
	if (nxsrcv) {
		nvsrc = nxsrcv;
		vsrc = (int *)malloc(nvsrc*sizeof(int));
		for (j=0; j<nxsrcv; j++) {
			diffx = xsrcv[j]- rcvSrc[0].gx*fscalco;
			diffy = ysrcv[j]- rcvSrc[0].gy*fscalco;
			ddmin = diffx*diffx+diffy*diffy;
			vsrc[j] = 0;
			for (i=1; i<nreceivers; i++) {
				diffx = xsrcv[j]- rcvSrc[i].gx*fscalco;
				diffy = ysrcv[j]- rcvSrc[i].gy*fscalco;
				dd = diffx*diffx+diffy*diffy;
				if (dd < ddmin ) {
					ddmin = dd; 
					vsrc[j] = i;
				}
			}
			fprintf(stderr,"ddmin for requested vsrc position (%.2f,%.2f) = %.2f\n",xsrcv[j],ysrcv[j],ddmin);
			if (ddmin <= dtolerr) {
				xsrcv[j]=rcvSrc[vsrc[j]].gx*fscalco;
				ysrcv[j]=rcvSrc[vsrc[j]].gy*fscalco;
				fprintf(stderr,"Requested vsrc position coincide with rcv position (%.2f,%.2f)\n",xsrcv[j],ysrcv[j]);		 	
			}
			else {
			fprintf(stderr,"Error: Requested vsrc position do not coincide with a rcv position\n");
			}
		}
	}
	else { /* all receivers are made virtual sources */
		nvsrc = nreceivers;
		vsrc = (int *)malloc(nvsrc*sizeof(int));
		for (i=0; i<nreceivers; i++) {
			vsrc[i] = i;
		}
	}

	/*================ initializations ================*/

	
	/*================ for nvirtual shots find suitable receivers ================*/
	t1 = wallclock_time();
	tinit += t1-t2;	

	#pragma omp parallel default(shared) \
	private(cmaster,cslaves,vtrace,r,c, t1_ivs, t3_ivs) \
	private(ivrcv, gxv, gyv, vrpeg, gelev, nsrcSlave) \
	private(xmin,ymin,xmax,ymax, cx1, cy1, cx2, cy2, cx3, cy3, distmc2, distsc2min, distsc2max, distsc2) \
	private(nsrc, trdloc, tftloc, tcorrloc) \
	private(isrcm, isrcs, sxm, sym, sxs, sys, read_trace) \
	private(jsource, rms2, ifreq, tmpcc, sclrms, scl) \
	private(j, outputhdr, itrace_out, nwrite)
{

	cmaster = (complex *)malloc(nsources*nfreq*sizeof(complex));
	cslaves = (complex *)malloc(nsources*nfreq*sizeof(complex));
	vtrace = (float *)malloc(nt*sizeof(float));
	r = (float *)malloc(ntfft*sizeof(float));
	c = (complex *)malloc((ntfft/2+1)*sizeof(complex));
	//itrace_out=0;

        fprintf(stderr,"Number of OpenMP threads set  = %d number=%d\n", omp_get_max_threads(), omp_get_thread_num());

	ivs = 0;
	while (ivs<nvsrc) {/* loop over the number of virtual source positions to be created */
		
		t1_ivs = wallclock_time();
		//tread_ivs = tfft_ivs = tcorr_ivs = twrite_ivs = tlogic_ivs = tmemcpy = tselect = tmemset = tloop = troutine = 0.0;

		ivsrc=vsrc[ivs];
		sxv = rcvSrc[ivsrc].gx;
		syv = rcvSrc[ivsrc].gy;
		vspeg = rcvSrc[ivsrc].rpeg;
		svelev = rcvSrc[ivsrc].gelev;
		nsrcMaster = nsrc_crg[ivsrc];

		#pragma omp for
		for (ivrcv=0; ivrcv<nreceivers; ivrcv++) {
			
			//t1_ivs_ivr = wallclock_time();
			
			gxv = rcvSrc[ivrcv].gx;
			gyv = rcvSrc[ivrcv].gy;
			vrpeg = rcvSrc[ivrcv].rpeg;
			gvelev = rcvSrc[ivrcv].gelev;
			nsrcSlave = nsrc_crg[ivrcv];
			
			if (src_sel == 1) {
				xmin = MIN(gxv,sxv);
				ymin = MIN(gyv,syv);
				xmax = MAX(gxv,sxv);
				ymax = MAX(gyv,syv);
			}

			if (src_sel == 2) {
				cx1 = 0.5*(sxv+gxv);
				cy1 = 0.5*(syv+gyv);
				cx2 = ceil(cx1);
				cy2 = ceil(cy1);
				cx3 = cx2;
				cy3 = cy2;
				distmc2 = (sxv-cx3)*(sxv-cx3)+(syv-cy3)*(syv-cy3);
				distsc2min = distmc2+bmin*bmin*distmc2;
				distsc2max = distmc2+bmax*bmax*distmc2;
				distsc2 = (sxv-cx3)*(sxv-cx3)+(syv-cy3)*(syv-cy3);
			}

			//fprintf(stderr,"ivsrc=%d sxv=%d nsrcMaster=%d ivrcv=%d gxv=%d nsrcSlave=%d\n", ivsrc, sxv, nsrcMaster, ivrcv, gxv, nsrcSlave);
			
			nsrc = 0;
			memset(cslaves,0,nfreq*nsources*sizeof(complex));
			memset(cmaster,0,nfreq*nsources*sizeof(complex));
			//t0 = wallclock_time();
			//tmemset += t0-t1_ivs_ivr;

			//trdloc = tftloc = tcorrloc = 0.0;
			for (isrcm=0; isrcm<nsrcMaster; isrcm++) { /* for all traces find which traces are recorded by both virtual source and receiver position */
				sxm = rcvSrc[ivsrc].src[isrcm].x;
				sym = rcvSrc[ivsrc].src[isrcm].y;
				for (isrcs=0; isrcs<nsrcSlave; isrcs++) {
					sxs = rcvSrc[ivrcv].src[isrcs].x;
					sys = rcvSrc[ivrcv].src[isrcs].y;
					if ( (sxm == sxs) && (sym == sys) ) { /* master and slave have same contributing source */

						/* if the source is outside the inclosing area of the virtual-receiver and virtual-source coordinates 
				 		   then the source is accepted for contribution to the final summation */
						if (src_sel == 0) {
							read_trace=1;
						}
						else if (src_sel == 1) {
							read_trace = 0;
							if ( !( (sxs>xmin) && (sxs<xmax) ) ) {
								if ( !( (sys>ymin) && (sys<ymax) ) ) {
									read_trace=1;
								}
							}
						}
						else if (src_sel == 2) {
							read_trace = 0;
							if ( !( (distsc2 < distsc2min) && (distsc2 > distsc2max) ) ) {
								read_trace = 1;
							}
						}
						else if (src_sel == 3) {
							read_trace = 0;
							if ( (sxv < gxv) && (syv < gyv) ) {
								if ( (sxm < sxv) && (sym < syv) ) {
									read_trace = 1;
								}
							}
							else if ( (sxv < gxv) && (syv > gyv) ) {
								if ( (sxm < sxv) && (sym > syv) ) {
									read_trace = 1;
								}
							}
							else if ( (sxv > gxv) && (syv < gyv) ) {
								if ( (sxm > sxv) && (sym < syv) ) {
									read_trace = 1;
								}
							}
							else if ( (sxv > gxv) && (syv > gyv) ) {
								if ( (sxm > sxv) && (sym > syv) ) {
									read_trace = 1;
								}
							}
							
						}
						else if (src_sel == 4) {
							read_trace = 0;
							if ( (sxv < gxv) && (syv < gyv) ) {
								if ( (sxm < gxv) && (sym < gyv) ) {
									read_trace = 1;
								}
							}
							else if ( (sxv < gxv) && (syv > gyv) ) {
								if ( (sxm < gxv) && (sym > gyv) ) {
									read_trace = 1;
								}
							}
							else if ( (sxv > gxv) && (syv < gyv) ) {
								if ( (sxm > gxv) && (sym < gyv) ) {
									read_trace = 1;
								}
							}
							else if ( (sxv > gxv) && (syv > gyv) ) {
								if ( (sxm > gxv) && (sym > gyv) ) {
									read_trace = 1;
								}
							}
						}
						if (read_trace) { /* read data from file for sources that passed the tests above */
//#pragma omp critical
//{
							//troutine0 = wallclock_time();
							/* read master trace */
							//memcpy(&cmaster[nsrc*nfreq], &tracebuffer[rcvSrc[ivsrc].src[isrcm].fpos*nfreq],nfreq*sizeof(complex));
							get_sutrace_at_position(fpin, rcvSrc[ivsrc].src[isrcm].fpos, &cmaster[nsrc*nfreq], tracebuffer, ntfft, nt, nfreq, nbufmax, trace_sz, &tread, &tfft, &tmemcpy);
							//read_sutrace_at_position(fpin, rcvSrc[ivsrc].src[isrcm].fpos, ivsrc, isrcm, &cmaster[nsrc*nfreq], tracebuffer, trace_in_buffer, &nbuffer, ntfft, nt, nfreq, nbufmax, trace_sz, &trdloc, &tftloc, &tmemcpy);

							/* read slave trace */
							//ipos = (rcvSrc[ivrcv].src[isrcs].fpos)*nfreq;
							//memcpy(&cslaves[nsrc*nfreq], &tracebuffer[ipos],nfreq*sizeof(complex));
							get_sutrace_at_position(fpin, rcvSrc[ivrcv].src[isrcs].fpos, &cslaves[nsrc*nfreq], tracebuffer, ntfft, nt, nfreq, nbufmax, trace_sz, &tread, &tfft, &tmemcpy);
							//read_sutrace_at_position(fpin, rcvSrc[ivrcv].src[isrcs].fpos, ivrcv, isrcs, &cslaves[nsrc*nfreq], tracebuffer, trace_in_buffer, &nbuffer, ntfft, nt, nfreq, nbufmax, trace_sz, &trdloc, &tftloc, &tmemcpy);
							//troutine += wallclock_time()-troutine0;
//}
							nsrc++;
						}
					} /* end of if test that slave and master have same contributing source */
				} /* end of nsrcSlave loop */
			} /* end of nsrcMaster loop */
			//tread_ivs += trdloc;
			//tfft_ivs += tftloc;
			//tloop += wallclock_time()-t0;
			//t1 = wallclock_time();
			//fprintf(stderr,"ivrcv = %d out of %d\n",ivrcv, nreceivers);

			if (nsrc < nsrc_ctr) nsrc = -1; /* only compute virtual receiver when there are sufficient active sources contributing */

			/* correlation of virtual source ivsrc with virtual receiver ivcrv */
			if (cohr) {
				coherence(cmaster, cslaves, nfreq, nsrc, reps, epsmax, &tcorrloc, verbose);
			}
			else {
				correlate(cmaster, cslaves, nfreq, nsrc, &tcorrloc, verbose);
			}
			tcorr_ivs += wallclock_time()-t1;
			
			/* summation over the contributing sources */
			memset(c,0,nf*sizeof(complex));
			for (jsource=0; jsource<nsrc; jsource++) {
				if (normsrc == 1) {
					rms2 = 0.0;
					for (ifreq=0; ifreq<nfreq; ifreq++) {
						tmpcc = cslaves[jsource*nfreq+ifreq];
						rms2 += tmpcc.r*tmpcc.r+tmpcc.i*tmpcc.i;
					}
					if (rms2 > 0) sclrms = 1.0/sqrt(rms2);
					else sclrms = 1.0;
				}
				else (sclrms = 1.0);
				for (ifreq=0; ifreq<nfreq; ifreq++) {
					c[ifreq].r += cslaves[jsource*nfreq+ifreq].r*sclrms;
					c[ifreq].i += cslaves[jsource*nfreq+ifreq].i*sclrms;
				}
			}
		
			//t2_ivs_ivr = wallclock_time();

			cr1_fft(&c[0],r,ntfft,1); /* transform virtual trace back to time */
			
			//t3_ivs_ivr = wallclock_time();
			//tfft_ivs += t3_ivs_ivr-t2_ivs_ivr;
			
			if (normalize && nsrc>0) scl=sclfft/nsrc; /* normalize by the number of contributing sources */
			else scl = sclfft;
			
			if (causal==1) {
				for (j=0;j<nt; j++) {
					vtrace[j] = r[j]*scl;
				}
			}
			else if (causal==2) {
				vtrace[0] = r[0]*scl;
				for (j=1;j<nt; j++) {
					vtrace[j] = r[ntfft-j]*scl;
				}
			}
			else if (causal==3) {
				for (j=1;j<=(nt/2); j++) {
					vtrace[nt/2-j] = r[ntfft-j]*scl;
				}
				for (j=nt/2;j<nt; j++) {
					vtrace[j] = r[j-nt/2]*scl;
				}
			}
			else if (causal==4) {
				vtrace[0] = r[0]*scl;
				for (j=1;j<nt; j++) {
					vtrace[j] = 0.5*(r[ntfft-j] + r[j])*scl;
				}
			} /* one virtual trace (virtual source and virtualreceiver) is now calculated and is written to disk below */
			
			//t4_ivs_ivr = wallclock_time();
			//tselect += t4_ivs_ivr-t3_ivs_ivr;
			
			memcpy(&outputhdr,&segy_hdrs[(rcvSrc[ivrcv].src[0].fpos)],TRCBYTES);
			outputhdr.fldr = ivsrc+1;
			itrace_out=ivsrc*nreceivers+ivrcv+1;
			outputhdr.tracl = itrace_out; // wrong value in principle
			outputhdr.tracf = ivrcv+1;
			outputhdr.sx = sxv;
			outputhdr.sy = syv;
			outputhdr.selev = svelev;
			outputhdr.gx = gxv;
			outputhdr.gy = gyv;
			outputhdr.sdel = vspeg;
			outputhdr.gdel = vrpeg;
			outputhdr.gelev = gvelev;
			outputhdr.ns = nt;
			// outputhdr.offset = (gxv - sxv)/1000;
			#pragma omp critical
{
			nwrite = fwrite( &outputhdr, 1, TRCBYTES, fpout );
			assert (nwrite == TRCBYTES);
			nwrite = fwrite( &vtrace[0], sizeof(float), nt, fpout );
			assert (nwrite == nt);
			fflush(fpout);
}
		} /* end of virtual receiver loop */

		#pragma omp master
{
		if (verbose>=3) {
			t3_ivs = wallclock_time();
        		fprintf(stderr,"Number of OpenMP threads set  = %d number=%d\n", omp_get_max_threads(), omp_get_thread_num());
			//tlogic_ivs = ((t3_ivs-t1_ivs)-tread_ivs-tfft_ivs-tcorr_ivs-twrite_ivs-tmemcpy);
			fprintf(stderr,"************* Timings ************* vshot= %d (ivs = %d)\n", vspeg, ivs);
			//fprintf(stderr,"CPU-time read data         = %.3f\n", tread_ivs);
			//fprintf(stderr,"CPU-time FFT's             = %.3f\n", tfft_ivs);
			//fprintf(stderr,"CPU-time memcpy            = %.3f\n", tmemcpy);
			//fprintf(stderr,"CPU-time memset            = %.3f\n", tmemset);
			//fprintf(stderr,"CPU-time select            = %.3f\n", tselect);
			//fprintf(stderr,"CPU-time loop              = %.3f\n", tloop);
			//fprintf(stderr,"CPU-time routine           = %.3f\n", troutine);
			//fprintf(stderr,"CPU-time correlation       = %.3f\n", tcorr_ivs);
			//fprintf(stderr,"CPU-time write data        = %.3f\n", twrite_ivs);
			//fprintf(stderr,"CPU-time logic             = %.3f\n", tlogic_ivs);
			fprintf(stderr,"Total CPU-time this shot   = %.3f\n", t3_ivs-t1_ivs);
			fflush(stderr);
		}
		ivs++;
}
		#pragma omp barrier
	} /* end of virtual source loop */

	free(cmaster);
	free(cslaves);
	free(vtrace);
	free(r);
	free(c);

} /* end of parallel region */

	fclose(fpin);
	fclose(fpout);

	if (verbose) {
		t3 = wallclock_time();
		//tlogic = ((t3-t1)-tread-tfft-tcorr-twrite);
		ttotal = t3-t1;
		fprintf(stderr,"************* Total Timings ************* \n");
		//fprintf(stderr,"CPU-time initilization     = %.3f\n", tinit);
		//fprintf(stderr,"CPU-time read data         = %.3f\n", tread);
		//fprintf(stderr,"CPU-time FFT's             = %.3f\n", tfft);
		//fprintf(stderr,"CPU-time correlation       = %.3f\n", tcorr);
		//fprintf(stderr,"CPU-time write data        = %.3f\n", twrite);
		//fprintf(stderr,"CPU-time logic             = %.3f\n", tlogic);
		fprintf(stderr,"Total CPU-time             = %.3f\n", ttotal);
	}
/*================ end ================*/
	return 0;
}

void get_sutrace_at_position(FILE *fp, size_t itrace, complex *tracedata, complex *tracebuffer, int ntfft, int nt, int nfreq, size_t nbufmax, size_t trace_sz, double *tread, double *tfft, double *tmemcpy)
{
	static size_t freebuffer=0;
	size_t i, inbuffer, offset, nread, nwrite;
	int ret;
	float *trace;
	float *r;
	double t0, t1, t2, t3, t0_t, t1_t;
	complex *c;
	FILE *fpt;
	char filename[1024];
	if (nbufmax == 0) {
		t0_t = wallclock_time();	
		//trace = (float *)calloc(nt, sizeof(float));
		r = (float *)malloc(ntfft*sizeof(float));
		c = (complex *)malloc((ntfft/2+1)*sizeof(complex));

		/***** read trace from file *****/
       	offset = itrace*trace_sz+TRCBYTES;
       	ret = fseeko(fp, offset, SEEK_SET);
       	if (ret<0) perror("fseeko");
		memset(r,0,ntfft*sizeof(float));
       	nread = fread(r, sizeof(float), nt, fp);
       	assert(nread == nt);
		t1_t = wallclock_time();
		*tread += t1_t-t0_t;

		/***** transform to frequency domain *****/

		//memcpy(r,&trace[0],nt*sizeof(float)); /* might not be needed if segy_read_traces_c is nice */
		rc1_fft(r,c,ntfft,-1);
		memcpy(&tracedata[0].r,&c[0].r,nfreq*sizeof(complex));

		t2 = wallclock_time();
		*tfft += t2-t1_t;

		//free(trace);
		free(r);
		free(c);
	}
	else {  /**** point to trace from buffer *****/
		t0 = wallclock_time();
		//tracedata = (complex *)&tracebuffer[itrace*nfreq]; 
		memcpy(tracedata,&tracebuffer[itrace*nfreq],nfreq*sizeof(complex));
		t1 = wallclock_time();
		*tmemcpy += t1-t0;
	}
	return;
}

/* older implementation when part od traces is kept in memory buffer */

void read_sutrace_at_position_old(FILE *fp, int itrace, int ircvnum, int isrcnum, complex *tracedata, complex *tracebuffer, int *trace_in_buffer, size_t *nbuffer, int ntfft, int nt, int nfreq, size_t nbufmax, size_t trace_sz, double *tread, double *tfft, double *tmemcpy)
{
	static size_t freebuffer=0;
	size_t i, inbuffer, offset, nread, nwrite;
	int ret;
	float *trace;
	float *r;
	double t0, t1, t2, t3, t0_t, t1_t;
	complex *c;
	FILE *fpt;
	char filename[1024];

	inbuffer = -1;
	//fprintf(stderr,"reading trace %d\n", itrace);

	t0_t = wallclock_time();	
	for (i=0; i<nbufmax; i++) {
		if (itrace == trace_in_buffer[i]) {
			inbuffer = i;
			break;
		}
	}
	t1_t = wallclock_time();
	*tread += t1_t-t0_t;
	//fprintf(stderr,"itrace=%8d inbuffer=%8d nbufmax=%d ircvnum=%8d isrcnum=%8d time=%f\n", itrace, inbuffer, nbufmax, ircvnum, isrcnum, t1_t-t0_t);

	if (inbuffer == -1) {
		t0 = wallclock_time();
		trace = (float *)calloc(nt, sizeof(float));
		r = (float *)malloc(ntfft*sizeof(float));
		c = (complex *)malloc((ntfft/2+1)*sizeof(complex));

		/***** read trace from file *****/
        	offset = itrace*trace_sz+TRCBYTES;
        	ret = fseeko(fp, offset, SEEK_SET);
        	if (ret<0) perror("fseeko");
        	nread = fread(trace, sizeof(float), nt, fp);
        	assert(nread == nt);

		/***** transform to frequency domain *****/
		memset(r,0,ntfft*sizeof(float));
		memcpy(r,&trace[0],nt*sizeof(float)); /* might not be needed if segy_read_traces_c is nice */
		t1 = wallclock_time();
		*tread += t1-t0;

		rc1_fft(r,c,ntfft,-1);

		t2 = wallclock_time();
		*tfft += t2-t1;

		memcpy(&tracedata[0].r,&c[0].r,nfreq*sizeof(complex));

		/***** reset buffer counter when buffer is full *****/
		if (nbufmax != 0) {
			t1 = wallclock_time();
			if (freebuffer<nbufmax-1) {
				freebuffer++;
			}
			else freebuffer=0;
		//fprintf(stderr,"freebuffer = %d\n", freebuffer);
		//fprintf(stderr,"nbuffer = %d\n", *nbuffer);

			memcpy(&tracebuffer[freebuffer*nfreq].r,&c[0].r,nfreq*sizeof(complex));
			trace_in_buffer[freebuffer]=itrace;
			t2 = wallclock_time();
			*tmemcpy += t2-t1;
		}

		free(trace);
		free(r);
		free(c);

		t1 = wallclock_time();
		*tread += t1-t2;
	}
	else {  /**** read trace from buffer *****/
		t0 = wallclock_time();
		tracedata = (complex *)&tracebuffer[inbuffer*nfreq]; /* this is better, but need some more changes TODO */
		//fprintf(stderr,"read from inbuffer=%d nbufmax=%d\n", inbuffer,nbufmax);

		//memcpy(&tracedata[0].r,&tracebuffer[inbuffer*nfreq].r,nfreq*sizeof(complex));
		
		t1 = wallclock_time();
		*tread += t1-t0;
	}
	return;
}