TridiagLU  1.0
Scalable, parallel solver for tridiagonal system of equations
tridiagLU.c
Go to the documentation of this file.
1 
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <sys/time.h>
9 #include <string.h>
10 #ifndef serial
11 #include <mpi.h>
12 #endif
13 #include <tridiagLU.h>
14 
84  double *a,
85  double *b,
86  double *c,
87  double *x,
88  int n,
89  int ns,
90  void *r,
91  void *m
92  )
93 {
94  TridiagLU *params = (TridiagLU*) r;
95  int d,i,istart,iend;
96  int rank,nproc;
97  struct timeval start,stage1,stage2,stage3,stage4;
98 
99 #ifdef serial
100  rank = 0;
101  nproc = 1;
102 #else
103  MPI_Comm *comm = (MPI_Comm*) m;
104  int ierr = 0;
105  const int nvar = 4;
106 
107  if (comm) {
108  MPI_Comm_size(*comm,&nproc);
109  MPI_Comm_rank(*comm,&rank);
110  } else {
111  rank = 0;
112  nproc = 1;
113  }
114 #endif
115 
116  if (!params) {
117  fprintf(stderr,"Error in tridiagLU(): NULL pointer passed for parameters.\n");
118  return(1);
119  }
120 
121  /* start */
122  gettimeofday(&start,NULL);
123 
124  if ((ns == 0) || (n == 0)) return(0);
125  double *xs1, *xp1;
126  xs1 = (double*) calloc (ns, sizeof(double));
127  xp1 = (double*) calloc (ns, sizeof(double));
128  for (i=0; i<ns; i++) xs1[i] = xp1[i] = 0;
129 
130  /* Stage 1 - Parallel elimination of subdiagonal entries */
131  istart = (rank == 0 ? 1 : 2);
132  iend = n;
133  for (i = istart; i < iend; i++) {
134  for (d = 0; d < ns; d++) {
135  if (b[(i-1)*ns+d] == 0) return(-1);
136  double factor = a[i*ns+d] / b[(i-1)*ns+d];
137  b[i*ns+d] -= factor * c[(i-1)*ns+d];
138  a[i*ns+d] = -factor * a[(i-1)*ns+d];
139  x[i*ns+d] -= factor * x[(i-1)*ns+d];
140  if (rank) {
141  double factor = c[d] / b[(i-1)*ns+d];
142  c[d] = -factor * c[(i-1)*ns+d];
143  b[d] -= factor * a[(i-1)*ns+d];
144  x[d] -= factor * x[(i-1)*ns+d];
145  }
146  }
147  }
148 
149  /* end of stage 1 */
150  gettimeofday(&stage1,NULL);
151 
152  /* Stage 2 - Eliminate the first sub- & super-diagonal entries */
153  /* This needs the last (a,b,c,x) from the previous process */
154 #ifndef serial
155  double *sendbuf, *recvbuf;
156  sendbuf = (double*) calloc (ns*nvar, sizeof(double));
157  recvbuf = (double*) calloc (ns*nvar, sizeof(double));
158  for (d=0; d<ns; d++) {
159  sendbuf[d*nvar+0] = a[(n-1)*ns+d];
160  sendbuf[d*nvar+1] = b[(n-1)*ns+d];
161  sendbuf[d*nvar+2] = c[(n-1)*ns+d];
162  sendbuf[d*nvar+3] = x[(n-1)*ns+d];
163  }
164  if (nproc > 1) {
165  MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
166  if (rank) MPI_Irecv(recvbuf,nvar*ns,MPI_DOUBLE,rank-1,1436,*comm,&req[0]);
167  if (rank != nproc-1) MPI_Isend(sendbuf,nvar*ns,MPI_DOUBLE,rank+1,1436,*comm,&req[1]);
168  MPI_Waitall(2,&req[0],MPI_STATUS_IGNORE);
169  }
170  /* The first process sits this one out */
171  if (rank) {
172  for (d = 0; d < ns; d++) {
173  double am1, bm1, cm1, xm1;
174  am1 = recvbuf[d*nvar+0];
175  bm1 = recvbuf[d*nvar+1];
176  cm1 = recvbuf[d*nvar+2];
177  xm1 = recvbuf[d*nvar+3];
178  double factor;
179  if (bm1 == 0) return(-1);
180  factor = a[d] / bm1;
181  b[d] -= factor * cm1;
182  a[d] = -factor * am1;
183  x[d] -= factor * xm1;
184  if (b[(n-1)*ns+d] == 0) return(-1);
185  factor = c[d] / b[(n-1)*ns+d];
186  b[d] -= factor * a[(n-1)*ns+d];
187  c[d] = -factor * c[(n-1)*ns+d];
188  x[d] -= factor * x[(n-1)*ns+d];
189  }
190  }
191  free(sendbuf);
192  free(recvbuf);
193 #endif
194 
195  /* end of stage 2 */
196  gettimeofday(&stage2,NULL);
197 
198  /* Stage 3 - Solve the reduced (nproc-1) X (nproc-1) tridiagonal system */
199 #ifndef serial
200  if (nproc > 1) {
201  double *zero, *one;
202  zero = (double*) calloc (ns, sizeof(double));
203  one = (double*) calloc (ns, sizeof(double));
204  for (d=0; d<ns; d++) {
205  zero[d] = 0.0;
206  one [d] = 1.0;
207  }
208  if (!strcmp(params->reducedsolvetype,_TRIDIAG_GS_)) {
209  /* Solving the reduced system by gather-and-solve algorithm */
210  if (rank) ierr = tridiagLUGS(a,b,c,x,1,ns,params,comm);
211  else ierr = tridiagLUGS(zero,one,zero,zero,1,ns,params,comm);
212  if (ierr) return(ierr);
213  } else if (!strcmp(params->reducedsolvetype,_TRIDIAG_JACOBI_)) {
214  /* Solving the reduced system iteratively with the Jacobi method */
215  if (rank) ierr = tridiagIterJacobi(a,b,c,x,1,ns,params,comm);
216  else ierr = tridiagIterJacobi(zero,one,zero,zero,1,ns,params,comm);
217  }
218  free(zero);
219  free(one);
220 
221  /* Each process, get the first x of the next process */
222  MPI_Request req[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
223  for (d=0; d<ns; d++) xs1[d] = x[d];
224  if (rank+1 < nproc) MPI_Irecv(xp1,ns,MPI_DOUBLE,rank+1,1323,*comm,&req[0]);
225  if (rank) MPI_Isend(xs1,ns,MPI_DOUBLE,rank-1,1323,*comm,&req[1]);
226  MPI_Waitall(2,&req[0],MPI_STATUS_IGNORE);
227  }
228 #else
229  if (nproc > 1) {
230  fprintf(stderr,"Error: nproc > 1 for a serial run!\n");
231  return(1);
232  }
233 #endif /* if not serial */
234  /* end of stage 3 */
235  gettimeofday(&stage3,NULL);
236 
237  /* Stage 4 - Parallel back-substitution to get the solution */
238  istart = n-1;
239  iend = (rank == 0 ? 0 : 1);
240 
241  for (d = 0; d < ns; d++) {
242  if (b[istart*ns+d] == 0) return(-1);
243  x[istart*ns+d] = (x[istart*ns+d]-a[istart*ns+d]*x[d]-c[istart*ns+d]*xp1[d]) / b[istart*ns+d];
244  }
245  for (i = istart-1; i > iend-1; i--) {
246  for (d = 0; d < ns; d++) {
247  if (b[i*ns+d] == 0) return(-1);
248  x[i*ns+d] = (x[i*ns+d]-c[i*ns+d]*x[(i+1)*ns+d]-a[i*ns+d]*x[d]) / b[i*ns+d];
249  }
250  }
251 
252  /* end of stage 4 */
253  gettimeofday(&stage4,NULL);
254 
255  /* Done - now x contains the solution */
256  free(xs1);
257  free(xp1);
258 
259  /* save runtimes if needed */
260  long long walltime;
261  walltime = ((stage1.tv_sec * 1000000 + stage1.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
262  params->stage1_time = (double) walltime / 1000000.0;
263  walltime = ((stage2.tv_sec * 1000000 + stage2.tv_usec) - (stage1.tv_sec * 1000000 + stage1.tv_usec));
264  params->stage2_time = (double) walltime / 1000000.0;
265  walltime = ((stage3.tv_sec * 1000000 + stage3.tv_usec) - (stage2.tv_sec * 1000000 + stage2.tv_usec));
266  params->stage3_time = (double) walltime / 1000000.0;
267  walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (stage3.tv_sec * 1000000 + stage3.tv_usec));
268  params->stage4_time = (double) walltime / 1000000.0;
269  walltime = ((stage4.tv_sec * 1000000 + stage4.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
270  params->total_time = (double) walltime / 1000000.0;
271  return(0);
272 }
double stage3_time
Definition: tridiagLU.h:101
int tridiagLUGS(double *, double *, double *, double *, int, int, void *, void *)
Definition: tridiagLUGS.c:64
int tridiagLU(double *a, double *b, double *c, double *x, int n, int ns, void *r, void *m)
Definition: tridiagLU.c:83
double stage2_time
Definition: tridiagLU.h:100
double stage1_time
Definition: tridiagLU.h:99
Header file for TridiagLU.
#define _TRIDIAG_JACOBI_
Definition: tridiagLU.h:71
double stage4_time
Definition: tridiagLU.h:102
char reducedsolvetype[50]
Definition: tridiagLU.h:88
#define _TRIDIAG_GS_
Definition: tridiagLU.h:73
int tridiagIterJacobi(double *, double *, double *, double *, int, int, void *, void *)
double total_time
Definition: tridiagLU.h:98