121 int d,i,j,istart,iend,size;
122 int rank,nproc,bs2=bs*bs,nsbs=ns*bs;
123 struct timeval start,stage1,stage2,stage3,stage4;
129 MPI_Comm *comm = (MPI_Comm*) m;
134 MPI_Comm_size(*comm,&nproc);
135 MPI_Comm_rank(*comm,&rank);
143 fprintf(stderr,
"Error in tridiagLU(): NULL pointer passed for parameters.\n");
148 gettimeofday(&start,NULL);
150 if ((ns == 0) || (n == 0) || (bs == 0))
return(0);
152 xs1 = (
double*) calloc (nsbs,
sizeof(
double));
153 xp1 = (
double*) calloc (nsbs,
sizeof(
double));
154 for (i=0; i<nsbs; i++) xs1[i] = xp1[i] = 0;
157 istart = (rank == 0 ? 1 : 2);
159 for (i = istart; i < iend; i++) {
160 double binv[bs2], factor[bs2];
161 for (d = 0; d < ns; d++) {
179 gettimeofday(&stage1,NULL);
184 double *sendbuf, *recvbuf;
185 size = ns*bs2*(nvar-1)+nsbs;
186 sendbuf = (
double*) calloc (size,
sizeof(
double));
187 recvbuf = (
double*) calloc (size,
sizeof(
double));
188 for (d=0; d<ns; d++) {
189 for (i=0; i<bs2; i++) {
190 sendbuf[(0*ns+d)*bs2+i] = a[((n-1)*ns+d)*bs2+i];
191 sendbuf[(1*ns+d)*bs2+i] = b[((n-1)*ns+d)*bs2+i];
192 sendbuf[(2*ns+d)*bs2+i] = c[((n-1)*ns+d)*bs2+i];
194 for (i=0; i<bs; i++) sendbuf[3*ns*bs2+d*bs+i] = x[((n-1)*ns+d)*bs+i];
197 MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
198 if (rank) MPI_Irecv(recvbuf,size,MPI_DOUBLE,rank-1,1436,*comm,&req[0]);
199 if (rank != nproc-1) MPI_Isend(sendbuf,size,MPI_DOUBLE,rank+1,1436,*comm,&req[1]);
200 MPI_Waitall(2,&req[0],MPI_STATUS_IGNORE);
204 for (d = 0; d < ns; d++) {
205 double am1[bs2], bm1[bs2], cm1[bs2], xm1[bs];
206 for (i=0; i<bs2; i++) {
207 am1[i] = recvbuf[(0*ns+d)*bs2+i];
208 bm1[i] = recvbuf[(1*ns+d)*bs2+i];
209 cm1[i] = recvbuf[(2*ns+d)*bs2+i];
211 for (i=0; i<bs; i++) xm1[i] = recvbuf[3*ns*bs2+d*bs+i];
212 double factor[bs2], binv[bs2];
220 _MatrixInvert_ (b+((n-1)*ns+d)*bs2,binv,bs);
if (ierr)
return(ierr);
233 gettimeofday(&stage2,NULL);
239 zero = (
double*) calloc (ns*bs2,
sizeof(
double));
240 eye = (
double*) calloc (ns*bs2,
sizeof(
double));
241 for (d=0; d<ns*bs2; d++) zero[d] = eye[d] = 0.0;
242 for (d=0; d<ns; d++) {
243 for (i=0; i<bs; i++) eye[d*bs2+(i*bs+i)] = 1.0;
248 fprintf(stderr,
"Error in blocktridiagLU(): Gather-and-solve for reduced system not available.\n");
259 MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
260 for (d=0; d<nsbs; d++) xs1[d] = x[d];
261 if (rank+1 < nproc) MPI_Irecv(xp1,nsbs,MPI_DOUBLE,rank+1,1323,*comm,&req[0]);
262 if (rank) MPI_Isend(xs1,nsbs,MPI_DOUBLE,rank-1,1323,*comm,&req[1]);
263 MPI_Waitall(2,&req[0],MPI_STATUS_IGNORE);
267 fprintf(stderr,
"Error: nproc > 1 for a serial run!\n");
272 gettimeofday(&stage3,NULL);
276 iend = (rank == 0 ? 0 : 1);
278 for (d = 0; d < ns; d++) {
279 double binv[bs2],xt[bs];
284 for (j=0; j<bs; j++) x[(istart*ns+d)*bs+j]=xt[j];
286 for (i = istart-1; i > iend-1; i--) {
287 for (d = 0; d < ns; d++) {
288 double binv[bs2],xt[bs];
293 for (j=0; j<bs; j++) x[(i*ns+d)*bs+j] = xt[j];
298 gettimeofday(&stage4,NULL);
306 walltime = ((stage1.tv_sec * 1000000 + stage1.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
307 params->
stage1_time = (double) walltime / 1000000.0;
308 walltime = ((stage2.tv_sec * 1000000 + stage2.tv_usec) - (stage1.tv_sec * 1000000 + stage1.tv_usec));
309 params->
stage2_time = (double) walltime / 1000000.0;
310 walltime = ((stage3.tv_sec * 1000000 + stage3.tv_usec) - (stage2.tv_sec * 1000000 + stage2.tv_usec));
311 params->
stage3_time = (double) walltime / 1000000.0;
312 walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (stage3.tv_sec * 1000000 + stage3.tv_usec));
313 params->
stage4_time = (double) walltime / 1000000.0;
314 walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
315 params->
total_time = (double) walltime / 1000000.0;
#define _MatrixMultiplySubtract_(C, A, B, N)
#define _MatrixMultiply_(A, B, C, N)
int blocktridiagLU(double *a, double *b, double *c, double *x, int n, int ns, int bs, void *r, void *m)
Header file for TridiagLU.
Contains macros and function definitions for common matrix operations.
#define _MatrixZero_(A, N)
char reducedsolvetype[50]
int blocktridiagIterJacobi(double *, double *, double *, double *, int, int, int, void *, void *)
#define _MatVecMultiplySubtract_(y, A, x, N)
#define _MatVecMultiply_(A, x, y, N)
#define _MatrixInvert_(A, B, N)