#include"fdacrtmc.h" int mvAvg2d3(size_t n1, size_t n2, float* in, float* out){ /* mvAvg2d3(n1,n2,in,out) computes the 3x3 moving average * of 2d input "in" and returns it in output "out". "n1" * is the size of the slow dimension while "n2" is the * size of the fast dimension. The boundaries are not * touched. The array size should be at least 3-by-3, * this is not checked. Input & output may NOT be equal. * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[3]; float c=0.111111111111111111111111111111111111111111111; size_t i1,i2,j; for(i2=0;i2<n2;i2++)out[i2]=in[i2]; //Copy Data for(i1=1;i1<n1-1;i1++){ //Loop over slow dimension sum[0]=in[(i1-1)*n2 ]+in[i1*n2 ]+in[(i1+1)*n2 ]; sum[1]=in[(i1-1)*n2+1]+in[i1*n2+1]+in[(i1+1)*n2+1]; out[i1*n2]=in[i1*n2]; for(i2=1,j=0;j<(n2-2)/3;j++){ //Loop over fast dimension - Inner 3-loop unrolled sum[2]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]); sum[0]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]); sum[1]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]); } // Take care of remaining values if(i2<n2-1){ sum[2]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]); }else continue; if(i2<n2-1){ sum[0]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]); } out[(i1+1)*n2-1]=in[(i1+1)*n2-1]; } for(i2=0;i2<n2;i2++)out[(n1-1)*n2+i2]=in[(n1-1)*n2+i2]; //Copy Data return(0); } int mvAvg2d3Sgn(size_t n1, size_t n2, float* in, float* out){ /* mvAvg2d3Sgn(n1,n2,in,out) signs the input values according * to the sign of the local 3-by-3 average at that point. See * mvAvg2d3 for details. * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[3]; size_t i1,i2,j; for(i2=0;i2<n2;i2++)out[i2]=in[i2]; //Copy Data for(i1=1;i1<n1-1;i1++){ //Loop over slow dimension sum[0]=in[(i1-1)*n2 ]+in[i1*n2 ]+in[(i1+1)*n2 ]; sum[1]=in[(i1-1)*n2+1]+in[i1*n2+1]+in[(i1+1)*n2+1]; out[i1*n2]=in[i1*n2]; for(i2=1,j=0;j<(n2-2)/3;j++){ //Loop over fast dimension - Inner 3-loop unrolled sum[2]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]);i2++; sum[0]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]);i2++; sum[1]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]);i2++; } // Take care of remaining values if(i2<n2-1){ sum[2]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]);i2++; }else continue; if(i2<n2-1){ sum[0]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]);i2++; } out[(i1+1)*n2-1]=in[(i1+1)*n2-1]; } for(i2=0;i2<n2;i2++)out[(n1-1)*n2+i2]=in[(n1-1)*n2+i2]; //Copy Data return(0); } int mvAvg2d3Embd(size_t n2, float* in, float* out, size_t s1, size_t s2, size_t e1, size_t e2){ /* mvAvg2d3Embd(n2,in,out,s1,s2,e1,e2) is the embedded form of mvAvg2d3(n1,n2,in,out). "s1" is * the start of the area to average in the slow dimension. "s2" is the start of the area to * average in the fast dimension. "e1" is the end of the area to average in the slow dimension. * "e2" is the end of the area to average in the fast dimension. Input & output MAY be equal. * Requires additional memory. Note: s1,s2>0 & e1<n1 & e2<n2 * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float *tmp; float sum[3]; float c=0.111111111111111111111111111111111111111111111; size_t i1,i1e,i2,i2e,j; size_t n1e=e1-s1; size_t n2e=e2-s2; tmp=(float*)malloc(n1e*n2e*sizeof(float)); for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ //Loop over slow dimension sum[0]=in[(i1-1)*n2+s2-1]+in[i1*n2+s2-1]+in[(i1+1)*n2+s2-1]; sum[1]=in[(i1-1)*n2+s2 ]+in[i1*n2+s2 ]+in[(i1+1)*n2+s2 ]; for(i2=s2,i2e=0,j=0;j<n2e/3;j++){ //Loop over fast dimension - Inner 3-loop unrolled sum[2]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]); sum[0]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]); sum[1]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]); } // Take care of remaining values if(i2<e2){ sum[2]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]); }else continue; if(i2<e2){ sum[0]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]); } } //Copy to output for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ for(i2=s2,i2e=0;i2<e2;i2++,i2e++){ out[i1*n2+i2]=tmp[i1e*n2e+i2e]; } } free(tmp); return(0); } int mvAvg2d3EmbdSgn(size_t n2, float* in, float* out, size_t s1, size_t s2, size_t e1, size_t e2){ /* mvAvg2d3EmbdSgn(n2,in,out,s1,s2,e1,e2) is the embedded form of mvAvg2d3Sgn(n1,n2,in,out). See * mvAvg2d3Embd for details. * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float *tmp; float sum[3]; size_t i1,i1e,i2,i2e,j; size_t n1e=e1-s1; size_t n2e=e2-s2; tmp=(float*)malloc(n1e*n2e*sizeof(float)); for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ //Loop over slow dimension sum[0]=in[(i1-1)*n2+s2-1]+in[i1*n2+s2-1]+in[(i1+1)*n2+s2-1]; sum[1]=in[(i1-1)*n2+s2 ]+in[i1*n2+s2 ]+in[(i1+1)*n2+s2 ]; for(i2=s2,i2e=0,j=0;j<n2e/3;j++){ //Loop over fast dimension - Inner 3-loop unrolled sum[2]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]); sum[0]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]); sum[1]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]); } // Take care of remaining values if(i2<e2){ sum[2]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]); }else continue; if(i2<e2){ sum[0]=in[(i1-1)*n2+i2+1]+in[i1*n2+i2+1]+in[(i1+1)*n2+i2+1]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]); } } //Copy to output for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ for(i2=s2,i2e=0;i2<e2;i2++,i2e++){ out[i1*n2+i2]=tmp[i1e*n2e+i2e]; } } free(tmp); return(0); } int mvAvg2d5(size_t n1, size_t n2, float* in, float* out){ /* mvAvg2d5(n1,n2,in,out) computes the 5x5 moving average * of 2d input "in" and returns it in output "out". "n1" * is the size of the slow dimension while "n2" is the * size of the fast dimension. The boundaries are not * touched. The array size should be at least 5-by-5, * this is not checked. Input & output may NOT be equal. * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[5]; float c=0.04; size_t i1,i2,j; for(i2=0;i2<n2;i2++)out[ i2]=in[ i2]; //Copy Data for(i2=0;i2<n2;i2++)out[n2+i2]=in[n2+i2]; //Copy Data for(i1=2;i1<n1-2;i1++){ //Loop over slow dimension sum[0]=in[(i1-2)*n2 ]+in[(i1-1)*n2 ]+in[i1*n2 ]+in[(i1+1)*n2 ]+in[(i1+2)*n2 ]; sum[1]=in[(i1-2)*n2+1]+in[(i1-1)*n2+1]+in[i1*n2+1]+in[(i1+1)*n2+1]+in[(i1+2)*n2+1]; sum[2]=in[(i1-2)*n2+2]+in[(i1-1)*n2+2]+in[i1*n2+2]+in[(i1+1)*n2+2]+in[(i1+2)*n2+2]; sum[3]=in[(i1-2)*n2+3]+in[(i1-1)*n2+3]+in[i1*n2+3]+in[(i1+1)*n2+3]+in[(i1+2)*n2+3]; out[i1*n2]=in[i1*n2];out[i1*n2+1]=in[i1*n2+1]; for(i2=2,j=0;j<(n2-4)/5;j++){ //Loop over fast dimension - Inner 5-loop unrolled sum[4]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); sum[0]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); sum[1]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); sum[2]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); sum[3]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); } // Take care of remaining values if(i2<n2-2){ sum[4]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); }else continue; if(i2<n2-2){ sum[0]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); }else continue; if(i2<n2-2){ sum[1]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); }else continue; if(i2<n2-2){ sum[2]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); } out[(i1+1)*n2-2]=in[(i1+1)*n2-2];out[(i1+1)*n2-1]=in[(i1+1)*n2-1]; } for(i2=0;i2<n2;i2++)out[(n1-2)*n2+i2]=in[(n1-2)*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[(n1-1)*n2+i2]=in[(n1-1)*n2+i2]; //Copy Data return(0); } int mvAvg2d5Sgn(size_t n1, size_t n2, float* in, float* out){ /* mvAvg2d5Sgn(n1,n2,in,out) signs the input values according * to the sign of the local 5-by-5 average at that point. See * mvAvg2d5 for details. * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[5]; size_t i1,i2,j; for(i2=0;i2<n2;i2++)out[ i2]=in[ i2]; //Copy Data for(i2=0;i2<n2;i2++)out[n2+i2]=in[n2+i2]; //Copy Data for(i1=2;i1<n1-2;i1++){ //Loop over slow dimension sum[0]=in[(i1-2)*n2 ]+in[(i1-1)*n2 ]+in[i1*n2 ]+in[(i1+1)*n2 ]+in[(i1+2)*n2 ]; sum[1]=in[(i1-2)*n2+1]+in[(i1-1)*n2+1]+in[i1*n2+1]+in[(i1+1)*n2+1]+in[(i1+2)*n2+1]; sum[2]=in[(i1-2)*n2+2]+in[(i1-1)*n2+2]+in[i1*n2+2]+in[(i1+1)*n2+2]+in[(i1+2)*n2+2]; sum[3]=in[(i1-2)*n2+3]+in[(i1-1)*n2+3]+in[i1*n2+3]+in[(i1+1)*n2+3]+in[(i1+2)*n2+3]; out[i1*n2]=in[i1*n2];out[i1*n2+1]=in[i1*n2+1]; for(i2=2,j=0;j<(n2-4)/5;j++){ //Loop over fast dimension - Inner 5-loop unrolled sum[4]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]);i2++; sum[0]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]);i2++; sum[1]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]);i2++; sum[2]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]);i2++; sum[3]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]);i2++; } // Take care of remaining values if(i2<n2-2){ sum[4]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]);i2++; }else continue; if(i2<n2-2){ sum[0]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]);i2++; }else continue; if(i2<n2-2){ sum[1]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]);i2++; }else continue; if(i2<n2-2){ sum[2]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]);i2++; } out[(i1+1)*n2-2]=in[(i1+1)*n2-2];out[(i1+1)*n2-1]=in[(i1+1)*n2-1]; } for(i2=0;i2<n2;i2++)out[(n1-2)*n2+i2]=in[(n1-2)*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[(n1-1)*n2+i2]=in[(n1-1)*n2+i2]; //Copy Data return(0); } int mvAvg2d5Embd(size_t n2, float* in, float* out, size_t s1, size_t s2, size_t e1, size_t e2){ /* mvAvg2d5Embd(n2,in,out,s1,s2,e1,e2) is the embedded form of mvAvg2d5(n1,n2,in,out). "s1" is * the start of the area to average in the slow dimension. "s2" is the start of the area to * average in the fast dimension. "e1" is the end of the area to average in the slow dimension. * "e2" is the end of the area to average in the fast dimension. Input & output MAY be equal. * Requires additional memory. Note: s1,s2>1 & e1<n1-1 & e2<n2-1 * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[5]; float *tmp; float c=0.04; size_t i1,i1e,i2,i2e,j; size_t n1e=e1-s1; size_t n2e=e2-s2; tmp=(float*)malloc(n1e*n2e*sizeof(float)); for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ //Loop over slow dimension sum[0]=in[(i1-2)*n2+s2-2]+in[(i1-1)*n2+s2-2]+in[i1*n2+s2-2]+in[(i1+1)*n2+s2+1]+in[(i1+2)*n2+s2-2]; sum[1]=in[(i1-2)*n2+s2-1]+in[(i1-1)*n2+s2-1]+in[i1*n2+s2-1]+in[(i1+1)*n2+s2+1]+in[(i1+2)*n2+s2-1]; sum[2]=in[(i1-2)*n2+s2 ]+in[(i1-1)*n2+s2 ]+in[i1*n2+s2 ]+in[(i1+1)*n2+s2+1]+in[(i1+2)*n2+s2 ]; sum[3]=in[(i1-2)*n2+s2+1]+in[(i1-1)*n2+s2+1]+in[i1*n2+s2+1]+in[(i1+1)*n2+s2+1]+in[(i1+2)*n2+s2+1]; for(i2=s2,i2e=0,j=0;j<n2e/5;j++){ //Loop over fast dimension - Inner 5-loop unrolled sum[4]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); sum[0]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); sum[1]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); sum[2]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); sum[3]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); } // Take care of remaining values if(i2<e2){ sum[4]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); }else continue; if(i2<e2){ sum[0]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); }else continue; if(i2<e2){ sum[1]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); }else continue; if(i2<e2){ sum[2]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); } } //Copy to output for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ for(i2=s2,i2e=0;i2<e2;i2++,i2e++){ out[i1*n2+i2]=tmp[i1e*n2e+i2e]; } } free(tmp); return(0); } int mvAvg2d5EmbdSgn(size_t n2, float* in, float* out, size_t s1, size_t s2, size_t e1, size_t e2){ /* mvAvg2d5Embd(n2,in,out,s1,s2,e1,e2) is the embedded form of mvAvg2d5(n1,n2,in,out). "s1" is * the start of the area to average in the slow dimension. "s2" is the start of the area to * average in the fast dimension. "e1" is the end of the area to average in the slow dimension. * "e2" is the end of the area to average in the fast dimension. Input & output MAY be equal. * Requires additional memory. Note: s1,s2>1 & e1<n1-1 & e2<n2-1 * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[5]; float *tmp; size_t i1,i1e,i2,i2e,j; size_t n1e=e1-s1; size_t n2e=e2-s2; tmp=(float*)malloc(n1e*n2e*sizeof(float)); for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ //Loop over slow dimension sum[0]=in[(i1-2)*n2+s2-2]+in[(i1-1)*n2+s2-2]+in[i1*n2+s2-2]+in[(i1+1)*n2+s2+1]+in[(i1+2)*n2+s2-2]; sum[1]=in[(i1-2)*n2+s2-1]+in[(i1-1)*n2+s2-1]+in[i1*n2+s2-1]+in[(i1+1)*n2+s2+1]+in[(i1+2)*n2+s2-1]; sum[2]=in[(i1-2)*n2+s2 ]+in[(i1-1)*n2+s2 ]+in[i1*n2+s2 ]+in[(i1+1)*n2+s2+1]+in[(i1+2)*n2+s2 ]; sum[3]=in[(i1-2)*n2+s2+1]+in[(i1-1)*n2+s2+1]+in[i1*n2+s2+1]+in[(i1+1)*n2+s2+1]+in[(i1+2)*n2+s2+1]; for(i2=s2,i2e=0,j=0;j<n2e/5;j++){ //Loop over fast dimension - Inner 5-loop unrolled sum[4]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); sum[0]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); sum[1]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); sum[2]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); sum[3]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); } // Take care of remaining values if(i2<e2){ sum[4]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); }else continue; if(i2<e2){ sum[0]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); }else continue; if(i2<e2){ sum[1]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); }else continue; if(i2<e2){ sum[2]=in[(i1-2)*n2+i2+2]+in[(i1-1)*n2+i2+2]+in[i1*n2+i2+2]+in[(i1+1)*n2+i2+2]+in[(i1+2)*n2+i2+2]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]); } } //Copy to output for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ for(i2=s2,i2e=0;i2<e2;i2++,i2e++){ out[i1*n2+i2]=tmp[i1e*n2e+i2e]; } } free(tmp); return(0); } int mvAvg2d7(size_t n1, size_t n2, float* in, float* out){ /* mvAvg2d7(n1,n2,in,out) computes the 7x7 moving average * of 2d input "in" and returns it in output "out". "n1" * is the size of the slow dimension while "n2" is the * size of the fast dimension. The boundaries are not * touched. The array size should be at least 7-by-7, * this is not checked. Input & output may NOT be equal. * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[7]; float c=1.0/49.0; size_t i1,i2,j; for(i2=0;i2<n2;i2++)out[ i2]=in[ i2]; //Copy Data for(i2=0;i2<n2;i2++)out[ n2+i2]=in[ n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[2*n2+i2]=in[2*n2+i2]; //Copy Data for(i1=3;i1<n1-3;i1++){ //Loop over slow dimension sum[0]=in[(i1-3)*n2 ]+in[(i1-2)*n2 ]+in[(i1-1)*n2 ]+in[i1*n2 ]+in[(i1+1)*n2 ]+in[(i1+2)*n2 ]+in[(i1+3)*n2 ]; sum[1]=in[(i1-3)*n2+1]+in[(i1-2)*n2+1]+in[(i1-1)*n2+1]+in[i1*n2+1]+in[(i1+1)*n2+1]+in[(i1+2)*n2+1]+in[(i1+3)*n2+1]; sum[2]=in[(i1-3)*n2+2]+in[(i1-2)*n2+2]+in[(i1-1)*n2+2]+in[i1*n2+2]+in[(i1+1)*n2+2]+in[(i1+2)*n2+2]+in[(i1+3)*n2+2]; sum[3]=in[(i1-3)*n2+3]+in[(i1-2)*n2+3]+in[(i1-1)*n2+3]+in[i1*n2+3]+in[(i1+1)*n2+3]+in[(i1+2)*n2+3]+in[(i1+3)*n2+3]; sum[4]=in[(i1-3)*n2+4]+in[(i1-2)*n2+4]+in[(i1-1)*n2+4]+in[i1*n2+4]+in[(i1+1)*n2+4]+in[(i1+2)*n2+4]+in[(i1+3)*n2+4]; sum[5]=in[(i1-3)*n2+5]+in[(i1-2)*n2+5]+in[(i1-1)*n2+5]+in[i1*n2+5]+in[(i1+1)*n2+5]+in[(i1+2)*n2+5]+in[(i1+3)*n2+5]; out[i1*n2]=in[i1*n2];out[i1*n2+1]=in[i1*n2+1];out[i1*n2+2]=in[i1*n2+2]; for(i2=3,j=0;j<(n2-6)/7;j++){ //Loop over fast dimension - Inner 7-loop unrolled sum[6]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[0]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[1]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[2]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[3]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[4]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[5]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); } // Take care of remaining values if(i2<n2-3){ sum[6]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<n2-3){ sum[0]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<n2-3){ sum[1]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<n2-3){ sum[2]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<n2-3){ sum[3]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<n2-3){ sum[4]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); } out[(i1+1)*n2-3]=in[(i1+1)*n2-3];out[(i1+1)*n2-2]=in[(i1+1)*n2-2];out[(i1+1)*n2-1]=in[(i1+1)*n2-1]; } for(i2=0;i2<n2;i2++)out[(n1-3)*n2+i2]=in[(n1-3)*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[(n1-2)*n2+i2]=in[(n1-2)*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[(n1-1)*n2+i2]=in[(n1-1)*n2+i2]; //Copy Data return(0); } int mvAvg2d7Sgn(size_t n1, size_t n2, float* in, float* out){ /* mvAvg2d7Sgn(n1,n2,in,out) signs the input values according * to the sign of the local 7-by-7 average at that point. See * mvAvg2d7 for details. * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[7]; size_t i1,i2,j; for(i2=0;i2<n2;i2++)out[ i2]=in[ i2]; //Copy Data for(i2=0;i2<n2;i2++)out[ n2+i2]=in[ n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[2*n2+i2]=in[2*n2+i2]; //Copy Data for(i1=3;i1<n1-3;i1++){ //Loop over slow dimension sum[0]=in[(i1-3)*n2 ]+in[(i1-2)*n2 ]+in[(i1-1)*n2 ]+in[i1*n2 ]+in[(i1+1)*n2 ]+in[(i1+2)*n2 ]+in[(i1+3)*n2 ]; sum[1]=in[(i1-3)*n2+1]+in[(i1-2)*n2+1]+in[(i1-1)*n2+1]+in[i1*n2+1]+in[(i1+1)*n2+1]+in[(i1+2)*n2+1]+in[(i1+3)*n2+1]; sum[2]=in[(i1-3)*n2+2]+in[(i1-2)*n2+2]+in[(i1-1)*n2+2]+in[i1*n2+2]+in[(i1+1)*n2+2]+in[(i1+2)*n2+2]+in[(i1+3)*n2+2]; sum[3]=in[(i1-3)*n2+3]+in[(i1-2)*n2+3]+in[(i1-1)*n2+3]+in[i1*n2+3]+in[(i1+1)*n2+3]+in[(i1+2)*n2+3]+in[(i1+3)*n2+3]; sum[4]=in[(i1-3)*n2+4]+in[(i1-2)*n2+4]+in[(i1-1)*n2+4]+in[i1*n2+4]+in[(i1+1)*n2+4]+in[(i1+2)*n2+4]+in[(i1+3)*n2+4]; sum[5]=in[(i1-3)*n2+5]+in[(i1-2)*n2+5]+in[(i1-1)*n2+5]+in[i1*n2+5]+in[(i1+1)*n2+5]+in[(i1+2)*n2+5]+in[(i1+3)*n2+5]; out[i1*n2]=in[i1*n2];out[i1*n2+1]=in[i1*n2+1];out[i1*n2+2]=in[i1*n2+2]; for(i2=3,j=0;j<(n2-6)/5;j++){ //Loop over fast dimension - Inner 7-loop unrolled sum[6]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]);i2++; sum[0]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]);i2++; sum[1]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]);i2++; sum[2]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]);i2++; sum[3]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]);i2++; sum[4]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]);i2++; sum[5]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]);i2++; } // Take care of remaining values if(i2<n2-3){ sum[6]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[1]+sum[2]+sum[3]+sum[4]+sum[5]);i2++; }else continue; if(i2<n2-3){ sum[0]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[1]+sum[2]+sum[3]+sum[4]+sum[5]);i2++; }else continue; if(i2<n2-3){ sum[1]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[1]+sum[2]+sum[3]+sum[4]+sum[5]);i2++; }else continue; if(i2<n2-3){ sum[2]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[1]+sum[2]+sum[3]+sum[4]+sum[5]);i2++; }else continue; if(i2<n2-3){ sum[3]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[1]+sum[2]+sum[3]+sum[4]+sum[5]);i2++; }else continue; if(i2<n2-3){ sum[4]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[1]+sum[2]+sum[3]+sum[4]+sum[5]);i2++; } out[(i1+1)*n2-3]=in[(i1+1)*n2-3];out[(i1+1)*n2-2]=in[(i1+1)*n2-2];out[(i1+1)*n2-1]=in[(i1+1)*n2-1]; } for(i2=0;i2<n2;i2++)out[(n1-3)*n2+i2]=in[(n1-3)*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[(n1-2)*n2+i2]=in[(n1-2)*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[(n1-1)*n2+i2]=in[(n1-1)*n2+i2]; //Copy Data return(0); } int mvAvg2d7Embd(size_t n2, float* in, float* out, size_t s1, size_t s2, size_t e1, size_t e2){ /* mvAvg2d7Embd(n2,in,out,s1,s2,e1,e2) is the embedded form of mvAvg2d7(n1,n2,in,out). "s1" is * the start of the area to average in the slow dimension. "s2" is the start of the area to * average in the fast dimension. "e1" is the end of the area to average in the slow dimension. * "e2" is the end of the area to average in the fast dimension. Input & output MAY be equal. * Requires additional memory. Note: s1,s2>1 & e1<n1-1 & e2<n2-1 * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[7]; float *tmp; float c=1.0/49.0; size_t i1,i1e,i2,i2e,j; size_t n1e=e1-s1; size_t n2e=e2-s2; tmp=(float*)malloc(n1e*n2e*sizeof(float)); for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ //Loop over slow dimension sum[0]=in[(i1-3)*n2-3]+in[(i1-2)*n2+s2-3]+in[(i1-1)*n2+s2-3]+in[i1*n2+s2-3]+in[(i1+1)*n2+s2-3]+in[(i1+2)*n2+s2-3]+in[(i1+3)*n2+s2-3]; sum[1]=in[(i1-3)*n2-2]+in[(i1-2)*n2+s2-2]+in[(i1-1)*n2+s2-2]+in[i1*n2+s2-2]+in[(i1+1)*n2+s2-2]+in[(i1+2)*n2+s2-2]+in[(i1+3)*n2+s2-2]; sum[2]=in[(i1-3)*n2-1]+in[(i1-2)*n2+s2-1]+in[(i1-1)*n2+s2-1]+in[i1*n2+s2-1]+in[(i1+1)*n2+s2-1]+in[(i1+2)*n2+s2-1]+in[(i1+3)*n2+s2-1]; sum[3]=in[(i1-3)*n2 ]+in[(i1-2)*n2+s2 ]+in[(i1-1)*n2+s2 ]+in[i1*n2+s2 ]+in[(i1+1)*n2+s2 ]+in[(i1+2)*n2+s2 ]+in[(i1+3)*n2+s2 ]; sum[4]=in[(i1-3)*n2+1]+in[(i1-2)*n2+s2+1]+in[(i1-1)*n2+s2+1]+in[i1*n2+s2+1]+in[(i1+1)*n2+s2+1]+in[(i1+2)*n2+s2+1]+in[(i1+3)*n2+s2+1]; sum[5]=in[(i1-3)*n2+2]+in[(i1-2)*n2+s2+2]+in[(i1-1)*n2+s2+2]+in[i1*n2+s2+2]+in[(i1+1)*n2+s2+2]+in[(i1+2)*n2+s2+2]+in[(i1+3)*n2+s2+2]; for(i2=s2,i2e=0,j=0;j<n2e/7;j++){ //Loop over fast dimension - Inner 7-loop unrolled sum[6]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[0]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[1]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[2]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[3]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[4]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[5]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); } // Take care of remaining values if(i2<e2){ sum[6]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[0]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[1]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[2]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[3]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[4]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); } } //Copy to output for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ for(i2=s2,i2e=0;i2<e2;i2++,i2e++){ out[i1*n2+i2]=tmp[i1e*n2e+i2e]; } } free(tmp); return(0); } int mvAvg2d7EmbdSgn(size_t n2, float* in, float* out, size_t s1, size_t s2, size_t e1, size_t e2){ /* mvAvg2d7Embd(n2,in,out,s1,s2,e1,e2) is the embedded form of mvAvg2d7(n1,n2,in,out). "s1" is * the start of the area to average in the slow dimension. "s2" is the start of the area to * average in the fast dimension. "e1" is the end of the area to average in the slow dimension. * "e2" is the end of the area to average in the fast dimension. Input & output MAY be equal. * Requires additional memory. Note: s1,s2>1 & e1<n1-1 & e2<n2-1 * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[7]; float *tmp; size_t i1,i1e,i2,i2e,j; size_t n1e=e1-s1; size_t n2e=e2-s2; tmp=(float*)malloc(n1e*n2e*sizeof(float)); for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ //Loop over slow dimension sum[0]=in[(i1-3)*n2-3]+in[(i1-2)*n2+s2-3]+in[(i1-1)*n2+s2-3]+in[i1*n2+s2-3]+in[(i1+1)*n2+s2-3]+in[(i1+2)*n2+s2-3]+in[(i1+3)*n2+s2-3]; sum[1]=in[(i1-3)*n2-2]+in[(i1-2)*n2+s2-2]+in[(i1-1)*n2+s2-2]+in[i1*n2+s2-2]+in[(i1+1)*n2+s2-2]+in[(i1+2)*n2+s2-2]+in[(i1+3)*n2+s2-2]; sum[2]=in[(i1-3)*n2-1]+in[(i1-2)*n2+s2-1]+in[(i1-1)*n2+s2-1]+in[i1*n2+s2-1]+in[(i1+1)*n2+s2-1]+in[(i1+2)*n2+s2-1]+in[(i1+3)*n2+s2-1]; sum[3]=in[(i1-3)*n2 ]+in[(i1-2)*n2+s2 ]+in[(i1-1)*n2+s2 ]+in[i1*n2+s2 ]+in[(i1+1)*n2+s2 ]+in[(i1+2)*n2+s2 ]+in[(i1+3)*n2+s2 ]; sum[4]=in[(i1-3)*n2+1]+in[(i1-2)*n2+s2+1]+in[(i1-1)*n2+s2+1]+in[i1*n2+s2+1]+in[(i1+1)*n2+s2+1]+in[(i1+2)*n2+s2+1]+in[(i1+3)*n2+s2+1]; sum[5]=in[(i1-3)*n2+2]+in[(i1-2)*n2+s2+2]+in[(i1-1)*n2+s2+2]+in[i1*n2+s2+2]+in[(i1+1)*n2+s2+2]+in[(i1+2)*n2+s2+2]+in[(i1+3)*n2+s2+2]; for(i2=s2,i2e=0,j=0;j<n2e/7;j++){ //Loop over fast dimension - Inner 7-loop unrolled sum[6]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); sum[0]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[1]+sum[2]+sum[3]+sum[4]+sum[5]); sum[1]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[1]+sum[2]+sum[3]+sum[4]+sum[5]); sum[2]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[1]+sum[2]+sum[3]+sum[4]+sum[5]); sum[3]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[1]+sum[2]+sum[3]+sum[4]+sum[5]); sum[4]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[1]+sum[2]+sum[3]+sum[4]+sum[5]); sum[5]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[1]+sum[2]+sum[3]+sum[4]+sum[5]); } // Take care of remaining values if(i2<e2){ sum[6]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[0]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[1]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[2]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[3]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[4]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); } } //Copy to output for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ for(i2=s2,i2e=0;i2<e2;i2++,i2e++){ out[i1*n2+i2]=tmp[i1e*n2e+i2e]; } } free(tmp); return(0); } int mvAvg2d9(size_t n1, size_t n2, float* in, float* out){ /* mvAvg2d9(n1,n2,in,out) computes the 9x9 moving average * of 2d input "in" and returns it in output "out". "n1" * is the size of the slow dimension while "n2" is the * size of the fast dimension. The boundaries are not * touched. The array size should be at least 9-by-9, * this is not checked. Input & output may NOT be equal. * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[9]; float c=1.0/81.0; size_t i1,i2,j; for(i2=0;i2<n2;i2++)out[ i2]=in[ i2]; //Copy Data for(i2=0;i2<n2;i2++)out[ n2+i2]=in[ n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[2*n2+i2]=in[2*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[3*n2+i2]=in[3*n2+i2]; //Copy Data for(i1=4;i1<n1-4;i1++){ //Loop over slow dimension sum[0]=in[(i1-4)*n2 ]+in[(i1-3)*n2 ]+in[(i1-2)*n2 ]+in[(i1-1)*n2 ]+in[i1*n2 ]+in[(i1+1)*n2 ]+in[(i1+2)*n2 ]+in[(i1+3)*n2 ]+in[(i1+4)*n2 ]; sum[1]=in[(i1-4)*n2+1]+in[(i1-3)*n2+1]+in[(i1-2)*n2+1]+in[(i1-1)*n2+1]+in[i1*n2+1]+in[(i1+1)*n2+1]+in[(i1+2)*n2+1]+in[(i1+3)*n2+1]+in[(i1+4)*n2+1]; sum[2]=in[(i1-4)*n2+2]+in[(i1-3)*n2+2]+in[(i1-2)*n2+2]+in[(i1-1)*n2+2]+in[i1*n2+2]+in[(i1+1)*n2+2]+in[(i1+2)*n2+2]+in[(i1+3)*n2+2]+in[(i1+4)*n2+2]; sum[3]=in[(i1-4)*n2+3]+in[(i1-3)*n2+3]+in[(i1-2)*n2+3]+in[(i1-1)*n2+3]+in[i1*n2+3]+in[(i1+1)*n2+3]+in[(i1+2)*n2+3]+in[(i1+3)*n2+3]+in[(i1+4)*n2+3]; sum[4]=in[(i1-4)*n2+4]+in[(i1-3)*n2+4]+in[(i1-2)*n2+4]+in[(i1-1)*n2+4]+in[i1*n2+4]+in[(i1+1)*n2+4]+in[(i1+2)*n2+4]+in[(i1+3)*n2+4]+in[(i1+4)*n2+4]; sum[5]=in[(i1-4)*n2+5]+in[(i1-3)*n2+5]+in[(i1-2)*n2+5]+in[(i1-1)*n2+5]+in[i1*n2+5]+in[(i1+1)*n2+5]+in[(i1+2)*n2+5]+in[(i1+3)*n2+5]+in[(i1+4)*n2+5]; sum[6]=in[(i1-4)*n2+6]+in[(i1-3)*n2+6]+in[(i1-2)*n2+6]+in[(i1-1)*n2+6]+in[i1*n2+6]+in[(i1+1)*n2+6]+in[(i1+2)*n2+6]+in[(i1+3)*n2+6]+in[(i1+4)*n2+6]; sum[7]=in[(i1-4)*n2+7]+in[(i1-3)*n2+7]+in[(i1-2)*n2+7]+in[(i1-1)*n2+7]+in[i1*n2+7]+in[(i1+1)*n2+7]+in[(i1+2)*n2+7]+in[(i1+3)*n2+7]+in[(i1+4)*n2+7]; out[i1*n2]=in[i1*n2];out[i1*n2+1]=in[i1*n2+1];out[i1*n2+2]=in[i1*n2+2];out[i1*n2+3]=in[i1*n2+3]; for(i2=4,j=0;j<(n2-8)/9;j++){ //Loop over fast dimension - Inner 9-loop unrolled sum[8]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[0]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[1]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[2]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[3]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[4]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[5]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[6]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[7]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); } // Take care of remaining values if(i2<n2-4){ sum[8]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<n2-4){ sum[0]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<n2-4){ sum[1]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<n2-4){ sum[2]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<n2-4){ sum[3]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<n2-4){ sum[4]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<n2-4){ sum[5]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<n2-4){ sum[6]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); } out[(i1+1)*n2-4]=in[(i1+1)*n2-4];out[(i1+1)*n2-3]=in[(i1+1)*n2-3];out[(i1+1)*n2-2]=in[(i1+1)*n2-2];out[(i1+1)*n2-1]=in[(i1+1)*n2-1]; } for(i2=0;i2<n2;i2++)out[(n1-4)*n2+i2]=in[(n1-4)*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[(n1-3)*n2+i2]=in[(n1-3)*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[(n1-2)*n2+i2]=in[(n1-2)*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[(n1-1)*n2+i2]=in[(n1-1)*n2+i2]; //Copy Data return(0); } int mvAvg2d9Sgn(size_t n1, size_t n2, float* in, float* out){ /* mvAvg2d9Sgn(n1,n2,in,out) signs the input values according * to the sign of the local 9-by-9 average at that point. See * mvAvg2d7 for details. * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[9]; size_t i1,i2,j; for(i2=0;i2<n2;i2++)out[ i2]=in[ i2]; //Copy Data for(i2=0;i2<n2;i2++)out[ n2+i2]=in[ n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[2*n2+i2]=in[2*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[3*n2+i2]=in[3*n2+i2]; //Copy Data for(i1=4;i1<n1-2;i1++){ //Loop over slow dimension sum[0]=in[(i1-3)*n2 ]+in[(i1-2)*n2 ]+in[(i1-1)*n2 ]+in[i1*n2 ]+in[(i1+1)*n2 ]+in[(i1+2)*n2 ]+in[(i1+3)*n2 ]; sum[1]=in[(i1-3)*n2+1]+in[(i1-2)*n2+1]+in[(i1-1)*n2+1]+in[i1*n2+1]+in[(i1+1)*n2+1]+in[(i1+2)*n2+1]+in[(i1+3)*n2+1]; sum[2]=in[(i1-3)*n2+2]+in[(i1-2)*n2+2]+in[(i1-1)*n2+2]+in[i1*n2+2]+in[(i1+1)*n2+2]+in[(i1+2)*n2+2]+in[(i1+3)*n2+2]; sum[3]=in[(i1-3)*n2+3]+in[(i1-2)*n2+3]+in[(i1-1)*n2+3]+in[i1*n2+3]+in[(i1+1)*n2+3]+in[(i1+2)*n2+3]+in[(i1+3)*n2+3]; sum[4]=in[(i1-3)*n2+4]+in[(i1-2)*n2+4]+in[(i1-1)*n2+4]+in[i1*n2+4]+in[(i1+1)*n2+4]+in[(i1+2)*n2+4]+in[(i1+3)*n2+4]; sum[5]=in[(i1-3)*n2+5]+in[(i1-2)*n2+5]+in[(i1-1)*n2+5]+in[i1*n2+5]+in[(i1+1)*n2+5]+in[(i1+2)*n2+5]+in[(i1+3)*n2+5]; out[i1*n2]=in[i1*n2];out[i1*n2+1]=in[i1*n2+1];out[i1*n2+2]=in[i1*n2+2];out[i1*n2+3]=in[i1*n2+3]; for(i2=3,j=0;j<(n2-6)/5;j++){ //Loop over fast dimension - Inner 7-loop unrolled sum[8]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; sum[0]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; sum[1]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; sum[2]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; sum[3]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; sum[4]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; sum[5]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; sum[6]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; sum[7]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; } // Take care of remaining values if(i2<n2-4){ sum[8]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; }else continue; if(i2<n2-4){ sum[0]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; }else continue; if(i2<n2-4){ sum[1]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; }else continue; if(i2<n2-4){ sum[2]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; }else continue; if(i2<n2-4){ sum[3]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; }else continue; if(i2<n2-4){ sum[4]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; }else continue; if(i2<n2-4){ sum[5]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; }else continue; if(i2<n2-4){ sum[6]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; out[i1*n2+i2]=copysignf(in[i1*n2+i2],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]);i2++; } out[(i1+1)*n2-4]=in[(i1+1)*n2-4];out[(i1+1)*n2-3]=in[(i1+1)*n2-3];out[(i1+1)*n2-2]=in[(i1+1)*n2-2];out[(i1+1)*n2-1]=in[(i1+1)*n2-1]; } for(i2=0;i2<n2;i2++)out[(n1-4)*n2+i2]=in[(n1-4)*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[(n1-3)*n2+i2]=in[(n1-3)*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[(n1-2)*n2+i2]=in[(n1-2)*n2+i2]; //Copy Data for(i2=0;i2<n2;i2++)out[(n1-1)*n2+i2]=in[(n1-1)*n2+i2]; //Copy Data return(0); } int mvAvg2d9Embd(size_t n2, float* in, float* out, size_t s1, size_t s2, size_t e1, size_t e2){ /* mvAvg2d9Embd(n2,in,out,s1,s2,e1,e2) is the embedded form of mvAvg2d7(n1,n2,in,out). "s1" is * the start of the area to average in the slow dimension. "s2" is the start of the area to * average in the fast dimension. "e1" is the end of the area to average in the slow dimension. * "e2" is the end of the area to average in the fast dimension. Input & output MAY be equal. * Requires additional memory. Note: s1,s2>1 & e1<n1-1 & e2<n2-1 * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[9]; float *tmp; float c=1.0/81.0; size_t i1,i1e,i2,i2e,j; size_t n1e=e1-s1; size_t n2e=e2-s2; tmp=(float*)malloc(n1e*n2e*sizeof(float)); for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ //Loop over slow dimension sum[0]=in[(i1-4)*n2-4]+in[(i1-3)*n2+s2-4]+in[(i1-2)*n2+s2-4]+in[(i1-1)*n2+s2-3]+in[i1*n2+s2-3]+in[(i1+1)*n2+s2-3]+in[(i1+2)*n2+s2-3]+in[(i1+3)*n2+s2-3]+in[(i1+4)*n2+s2-3]; sum[1]=in[(i1-4)*n2-3]+in[(i1-3)*n2+s2-3]+in[(i1-2)*n2+s2-3]+in[(i1-1)*n2+s2-3]+in[i1*n2+s2-3]+in[(i1+1)*n2+s2-3]+in[(i1+2)*n2+s2-3]+in[(i1+3)*n2+s2-3]+in[(i1+4)*n2+s2-3]; sum[2]=in[(i1-4)*n2-2]+in[(i1-3)*n2+s2-2]+in[(i1-2)*n2+s2-2]+in[(i1-1)*n2+s2-2]+in[i1*n2+s2-2]+in[(i1+1)*n2+s2-2]+in[(i1+2)*n2+s2-2]+in[(i1+3)*n2+s2-2]+in[(i1+4)*n2+s2-2]; sum[3]=in[(i1-4)*n2-1]+in[(i1-3)*n2+s2-1]+in[(i1-2)*n2+s2-1]+in[(i1-1)*n2+s2-1]+in[i1*n2+s2-1]+in[(i1+1)*n2+s2-1]+in[(i1+2)*n2+s2-1]+in[(i1+3)*n2+s2-1]+in[(i1+4)*n2+s2-1]; sum[4]=in[(i1-4)*n2 ]+in[(i1-3)*n2+s2 ]+in[(i1-2)*n2+s2 ]+in[(i1-1)*n2+s2 ]+in[i1*n2+s2 ]+in[(i1+1)*n2+s2 ]+in[(i1+2)*n2+s2 ]+in[(i1+3)*n2+s2 ]+in[(i1+4)*n2+s2 ]; sum[5]=in[(i1-4)*n2+1]+in[(i1-3)*n2+s2+1]+in[(i1-2)*n2+s2+1]+in[(i1-1)*n2+s2+1]+in[i1*n2+s2+1]+in[(i1+1)*n2+s2+1]+in[(i1+2)*n2+s2+1]+in[(i1+3)*n2+s2+1]+in[(i1+4)*n2+s2+1]; sum[6]=in[(i1-4)*n2+2]+in[(i1-3)*n2+s2+2]+in[(i1-2)*n2+s2+2]+in[(i1-1)*n2+s2+2]+in[i1*n2+s2+2]+in[(i1+1)*n2+s2+2]+in[(i1+2)*n2+s2+2]+in[(i1+3)*n2+s2+2]+in[(i1+4)*n2+s2+2]; sum[7]=in[(i1-4)*n2+3]+in[(i1-3)*n2+s2+3]+in[(i1-2)*n2+s2+3]+in[(i1-1)*n2+s2+3]+in[i1*n2+s2+3]+in[(i1+1)*n2+s2+3]+in[(i1+2)*n2+s2+3]+in[(i1+3)*n2+s2+3]+in[(i1+4)*n2+s2+3]; for(i2=s2,i2e=0,j=0;j<n2e/9;j++){ //Loop over fast dimension - Inner 9-loop unrolled sum[8]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[0]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[1]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[2]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[3]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[4]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[5]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[6]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[7]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); } // Take care of remaining values if(i2<e2){ sum[8]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<e2){ sum[0]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<e2){ sum[1]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<e2){ sum[2]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[3]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[4]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[5]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); }else continue; if(i2<e2){ sum[6]=in[(i1-3)*n2+i2+3]+in[(i1-2)*n2+i2+3]+in[(i1-1)*n2+i2+3]+in[i1*n2+i2+3]+in[(i1+1)*n2+i2+3]+in[(i1+2)*n2+i2+3]+in[(i1+3)*n2+i2+3];i2++; tmp[i1e*n2e+i2e++]=c*(sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]); } } //Copy to output for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ for(i2=s2,i2e=0;i2<e2;i2++,i2e++){ out[i1*n2+i2]=tmp[i1e*n2e+i2e]; } } free(tmp); return(0); } int mvAvg2d9EmbdSgn(size_t n2, float* in, float* out, size_t s1, size_t s2, size_t e1, size_t e2){ /* mvAvg2d9Embd(n2,in,out,s1,s2,e1,e2) is the embedded form of mvAvg2d7(n1,n2,in,out). "s1" is * the start of the area to average in the slow dimension. "s2" is the start of the area to * average in the fast dimension. "e1" is the end of the area to average in the slow dimension. * "e2" is the end of the area to average in the fast dimension. Input & output MAY be equal. * Requires additional memory. Note: s1,s2>1 & e1<n1-1 & e2<n2-1 * * AUTHOR: * Max Holicki * Delft University of Technology * 21.11.2017 */ float sum[9]; float *tmp; size_t i1,i1e,i2,i2e,j; size_t n1e=e1-s1; size_t n2e=e2-s2; tmp=(float*)malloc(n1e*n2e*sizeof(float)); for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ //Loop over slow dimension sum[0]=in[(i1-4)*n2-4]+in[(i1-3)*n2+s2-4]+in[(i1-2)*n2+s2-4]+in[(i1-1)*n2+s2-3]+in[i1*n2+s2-3]+in[(i1+1)*n2+s2-3]+in[(i1+2)*n2+s2-3]+in[(i1+3)*n2+s2-3]+in[(i1+4)*n2+s2-3]; sum[1]=in[(i1-4)*n2-3]+in[(i1-3)*n2+s2-3]+in[(i1-2)*n2+s2-3]+in[(i1-1)*n2+s2-3]+in[i1*n2+s2-3]+in[(i1+1)*n2+s2-3]+in[(i1+2)*n2+s2-3]+in[(i1+3)*n2+s2-3]+in[(i1+4)*n2+s2-3]; sum[2]=in[(i1-4)*n2-2]+in[(i1-3)*n2+s2-2]+in[(i1-2)*n2+s2-2]+in[(i1-1)*n2+s2-2]+in[i1*n2+s2-2]+in[(i1+1)*n2+s2-2]+in[(i1+2)*n2+s2-2]+in[(i1+3)*n2+s2-2]+in[(i1+4)*n2+s2-2]; sum[3]=in[(i1-4)*n2-1]+in[(i1-3)*n2+s2-1]+in[(i1-2)*n2+s2-1]+in[(i1-1)*n2+s2-1]+in[i1*n2+s2-1]+in[(i1+1)*n2+s2-1]+in[(i1+2)*n2+s2-1]+in[(i1+3)*n2+s2-1]+in[(i1+4)*n2+s2-1]; sum[4]=in[(i1-4)*n2 ]+in[(i1-3)*n2+s2 ]+in[(i1-2)*n2+s2 ]+in[(i1-1)*n2+s2 ]+in[i1*n2+s2 ]+in[(i1+1)*n2+s2 ]+in[(i1+2)*n2+s2 ]+in[(i1+3)*n2+s2 ]+in[(i1+4)*n2+s2 ]; sum[5]=in[(i1-4)*n2+1]+in[(i1-3)*n2+s2+1]+in[(i1-2)*n2+s2+1]+in[(i1-1)*n2+s2+1]+in[i1*n2+s2+1]+in[(i1+1)*n2+s2+1]+in[(i1+2)*n2+s2+1]+in[(i1+3)*n2+s2+1]+in[(i1+4)*n2+s2+1]; sum[6]=in[(i1-4)*n2+2]+in[(i1-3)*n2+s2+2]+in[(i1-2)*n2+s2+2]+in[(i1-1)*n2+s2+2]+in[i1*n2+s2+2]+in[(i1+1)*n2+s2+2]+in[(i1+2)*n2+s2+2]+in[(i1+3)*n2+s2+2]+in[(i1+4)*n2+s2+2]; sum[7]=in[(i1-4)*n2+3]+in[(i1-3)*n2+s2+3]+in[(i1-2)*n2+s2+3]+in[(i1-1)*n2+s2+3]+in[i1*n2+s2+3]+in[(i1+1)*n2+s2+3]+in[(i1+2)*n2+s2+3]+in[(i1+3)*n2+s2+3]+in[(i1+4)*n2+s2+3]; for(i2=s2,i2e=0,j=0;j<n2e/9;j++){ //Loop over fast dimension - Inner 5-loop unrolled sum[8]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[0]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[1]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[2]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[3]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[4]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[5]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[6]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); sum[7]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); } // Take care of remaining values if(i2<e2){ sum[8]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<e2){ sum[0]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<e2){ sum[1]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<e2){ sum[2]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<e2){ sum[3]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<e2){ sum[4]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<e2){ sum[5]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); }else continue; if(i2<e2){ sum[6]=in[(i1-4)*n2+i2+4]+in[(i1-3)*n2+i2+4]+in[(i1-2)*n2+i2+4]+in[(i1-1)*n2+i2+4]+in[i1*n2+i2+4]+in[(i1+1)*n2+i2+4]+in[(i1+2)*n2+i2+4]+in[(i1+3)*n2+i2+4]+in[(i1+4)*n2+i2+4]; tmp[i1e*n2e+i2e++]=copysignf(in[i1*n2+i2++],sum[0]+sum[1]+sum[2]+sum[3]+sum[4]+sum[5]+sum[6]+sum[7]+sum[8]); } } //Copy to output for(i1=s1,i1e=0;i1<e1;i1++,i1e++){ for(i2=s2,i2e=0;i2<e2;i2++,i2e++){ out[i1*n2+i2]=tmp[i1e*n2e+i2e]; } } free(tmp); return(0); }