97 struct timeval start,stage1,stage2,stage3,stage4;
103 MPI_Comm *comm = (MPI_Comm*) m;
108 MPI_Comm_size(*comm,&nproc);
109 MPI_Comm_rank(*comm,&rank);
117 fprintf(stderr,
"Error in tridiagLU(): NULL pointer passed for parameters.\n");
122 gettimeofday(&start,NULL);
124 if ((ns == 0) || (n == 0))
return(0);
126 xs1 = (
double*) calloc (ns,
sizeof(
double));
127 xp1 = (
double*) calloc (ns,
sizeof(
double));
128 for (i=0; i<ns; i++) xs1[i] = xp1[i] = 0;
131 istart = (rank == 0 ? 1 : 2);
133 for (i = istart; i < iend; i++) {
134 for (d = 0; d < ns; d++) {
135 if (b[(i-1)*ns+d] == 0)
return(-1);
136 double factor = a[i*ns+d] / b[(i-1)*ns+d];
137 b[i*ns+d] -= factor * c[(i-1)*ns+d];
138 a[i*ns+d] = -factor * a[(i-1)*ns+d];
139 x[i*ns+d] -= factor * x[(i-1)*ns+d];
141 double factor = c[d] / b[(i-1)*ns+d];
142 c[d] = -factor * c[(i-1)*ns+d];
143 b[d] -= factor * a[(i-1)*ns+d];
144 x[d] -= factor * x[(i-1)*ns+d];
150 gettimeofday(&stage1,NULL);
155 double *sendbuf, *recvbuf;
156 sendbuf = (
double*) calloc (ns*nvar,
sizeof(
double));
157 recvbuf = (
double*) calloc (ns*nvar,
sizeof(
double));
158 for (d=0; d<ns; d++) {
159 sendbuf[d*nvar+0] = a[(n-1)*ns+d];
160 sendbuf[d*nvar+1] = b[(n-1)*ns+d];
161 sendbuf[d*nvar+2] = c[(n-1)*ns+d];
162 sendbuf[d*nvar+3] = x[(n-1)*ns+d];
165 MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
166 if (rank) MPI_Irecv(recvbuf,nvar*ns,MPI_DOUBLE,rank-1,1436,*comm,&req[0]);
167 if (rank != nproc-1) MPI_Isend(sendbuf,nvar*ns,MPI_DOUBLE,rank+1,1436,*comm,&req[1]);
168 MPI_Waitall(2,&req[0],MPI_STATUS_IGNORE);
172 for (d = 0; d < ns; d++) {
173 double am1, bm1, cm1, xm1;
174 am1 = recvbuf[d*nvar+0];
175 bm1 = recvbuf[d*nvar+1];
176 cm1 = recvbuf[d*nvar+2];
177 xm1 = recvbuf[d*nvar+3];
179 if (bm1 == 0)
return(-1);
181 b[d] -= factor * cm1;
182 a[d] = -factor * am1;
183 x[d] -= factor * xm1;
184 if (b[(n-1)*ns+d] == 0)
return(-1);
185 factor = c[d] / b[(n-1)*ns+d];
186 b[d] -= factor * a[(n-1)*ns+d];
187 c[d] = -factor * c[(n-1)*ns+d];
188 x[d] -= factor * x[(n-1)*ns+d];
196 gettimeofday(&stage2,NULL);
202 zero = (
double*) calloc (ns,
sizeof(
double));
203 one = (
double*) calloc (ns,
sizeof(
double));
204 for (d=0; d<ns; d++) {
210 if (rank) ierr =
tridiagLUGS(a,b,c,x,1,ns,params,comm);
211 else ierr =
tridiagLUGS(zero,one,zero,zero,1,ns,params,comm);
212 if (ierr)
return(ierr);
222 MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
223 for (d=0; d<ns; d++) xs1[d] = x[d];
224 if (rank+1 < nproc) MPI_Irecv(xp1,ns,MPI_DOUBLE,rank+1,1323,*comm,&req[0]);
225 if (rank) MPI_Isend(xs1,ns,MPI_DOUBLE,rank-1,1323,*comm,&req[1]);
226 MPI_Waitall(2,&req[0],MPI_STATUS_IGNORE);
230 fprintf(stderr,
"Error: nproc > 1 for a serial run!\n");
235 gettimeofday(&stage3,NULL);
239 iend = (rank == 0 ? 0 : 1);
241 for (d = 0; d < ns; d++) {
242 if (b[istart*ns+d] == 0)
return(-1);
243 x[istart*ns+d] = (x[istart*ns+d]-a[istart*ns+d]*x[d]-c[istart*ns+d]*xp1[d]) / b[istart*ns+d];
245 for (i = istart-1; i > iend-1; i--) {
246 for (d = 0; d < ns; d++) {
247 if (b[i*ns+d] == 0)
return(-1);
248 x[i*ns+d] = (x[i*ns+d]-c[i*ns+d]*x[(i+1)*ns+d]-a[i*ns+d]*x[d]) / b[i*ns+d];
253 gettimeofday(&stage4,NULL);
261 walltime = ((stage1.tv_sec * 1000000 + stage1.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
262 params->
stage1_time = (double) walltime / 1000000.0;
263 walltime = ((stage2.tv_sec * 1000000 + stage2.tv_usec) - (stage1.tv_sec * 1000000 + stage1.tv_usec));
264 params->
stage2_time = (double) walltime / 1000000.0;
265 walltime = ((stage3.tv_sec * 1000000 + stage3.tv_usec) - (stage2.tv_sec * 1000000 + stage2.tv_usec));
266 params->
stage3_time = (double) walltime / 1000000.0;
267 walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (stage3.tv_sec * 1000000 + stage3.tv_usec));
268 params->
stage4_time = (double) walltime / 1000000.0;
269 walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
270 params->
total_time = (double) walltime / 1000000.0;
int tridiagLUGS(double *, double *, double *, double *, int, int, void *, void *)
int tridiagLU(double *a, double *b, double *c, double *x, int n, int ns, void *r, void *m)
Header file for TridiagLU.
char reducedsolvetype[50]
int tridiagIterJacobi(double *, double *, double *, double *, int, int, void *, void *)