TridiagLU  1.0
Scalable, parallel solver for tridiagonal system of equations
blocktridiagIterJacobi.c
Go to the documentation of this file.
1 
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <math.h>
9 #ifndef serial
10 #include <mpi.h>
11 #endif
12 #include <tridiagLU.h>
13 #include <matops.h>
14 
89  double *a,
90  double *b,
91  double *c,
92  double *x,
93  int n,
95  int ns,
96  int bs,
97  void *r,
98  void *m
99  )
100 {
101  TridiagLU *context = (TridiagLU*) r;
102  int iter,d,i,j,NT,bs2=bs*bs,nsbs=ns*bs;
103  double norm=0,norm0=0,global_norm=0;
104 
105 #ifndef serial
106  MPI_Comm *comm = (MPI_Comm*) m;
107  int rank,nproc;
108 
109  if (comm) {
110  MPI_Comm_size(*comm,&nproc);
111  MPI_Comm_rank(*comm,&rank);
112  } else {
113  rank = 0;
114  nproc = 1;
115  }
116 #endif
117 
118  if (!context) {
119  fprintf(stderr,"Error in tridiagIterJacobi(): NULL pointer passed for parameters!\n");
120  return(-1);
121  }
122 
123  double *rhs = (double*) calloc (ns*n*bs, sizeof(double));
124  for (i=0; i<ns*n*bs; i++) rhs[i] = x[i]; /* save a copy of the rhs */
125  /* initial guess */
126  for (i=0; i<n; i++) {
127  for (d=0; d<ns; d++) {
128  double binv[bs2];
129  _MatrixInvert_ (b+(i*ns+d)*bs2,binv,bs);
130  _MatVecMultiply_ (binv,rhs+(i*ns+d)*bs,x+(i*ns+d)*bs,bs);
131  }
132  }
133 
134  double *recvbufL, *recvbufR, *sendbufL, *sendbufR;
135  recvbufL = (double*) calloc (nsbs, sizeof(double));
136  recvbufR = (double*) calloc (nsbs, sizeof(double));
137  sendbufL = (double*) calloc (nsbs, sizeof(double));
138  sendbufR = (double*) calloc (nsbs, sizeof(double));
139 
140  /* total number of points */
141 #ifdef serial
142  if (context->evaluate_norm) NT = n;
143  else NT = 0;
144 #else
145  if (context->evaluate_norm) MPI_Allreduce(&n,&NT,1,MPI_INT,MPI_SUM,*comm);
146  else NT = 0;
147 #endif
148 
149 #ifdef serial
150  if (context->verbose) printf("\n");
151 #else
152  if (context->verbose && (!rank)) printf("\n");
153 #endif
154 
155  iter = 0;
156  while(1) {
157 
158  /* evaluate break conditions */
159  if ( (iter >= context->maxiter)
160  || (iter && context->evaluate_norm && (global_norm < context->atol))
161  || (iter && context->evaluate_norm && (global_norm/norm0 < context->rtol)) ) {
162  break;
163  }
164 
165  /* Communicate the boundary x values between processors */
166  for (d=0; d<nsbs; d++) recvbufL[d] = recvbufR[d] = 0;
167 #ifndef serial
168  MPI_Request req[4] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL,MPI_REQUEST_NULL,MPI_REQUEST_NULL};
169  if (rank) MPI_Irecv(recvbufL,nsbs,MPI_DOUBLE,rank-1,2,*comm,&req[0]);
170  if (rank != nproc-1) MPI_Irecv(recvbufR,nsbs,MPI_DOUBLE,rank+1,3,*comm,&req[1]);
171  for (d=0; d<nsbs; d++) {
172  sendbufL[d] = x[d];
173  sendbufR[d] = x[(n-1)*nsbs+d];
174  }
175  if (rank) MPI_Isend(sendbufL,nsbs,MPI_DOUBLE,rank-1,3,*comm,&req[2]);
176  if (rank != nproc-1) MPI_Isend(sendbufR,nsbs,MPI_DOUBLE,rank+1,2,*comm,&req[3]);
177 #endif
178 
179  /* calculate error norm - interior */
180  if (context->evaluate_norm) {
181  norm = 0;
182  for (i=1; i<n-1; i++) {
183  for (d=0; d<ns; d++) {
184  double err[bs]; for (j=0; j<bs; j++) err[j] = rhs[(i*ns+d)*bs+j];
185  _MatVecMultiplySubtract_(err,a+(i*ns+d)*bs2,x+((i-1)*ns+d)*bs,bs);
186  _MatVecMultiplySubtract_(err,b+(i*ns+d)*bs2,x+((i )*ns+d)*bs,bs);
187  _MatVecMultiplySubtract_(err,c+(i*ns+d)*bs2,x+((i+1)*ns+d)*bs,bs);
188  for (j=0; j<bs; j++) norm += (err[j]*err[j]);
189  }
190  }
191  }
192  /* calculate error norm - boundary */
193 #ifndef serial
194  MPI_Waitall(4,req,MPI_STATUS_IGNORE);
195 #endif
196  if (context->evaluate_norm) {
197  if (n > 1) {
198  for (d=0; d<ns; d++) {
199  double err[bs]; for (j=0; j<bs; j++) err[j] = rhs[d*bs+j];
200  _MatVecMultiplySubtract_(err,a+d*bs2,recvbufL+d*bs,bs);
201  _MatVecMultiplySubtract_(err,b+d*bs2,x+d*bs,bs);
202  _MatVecMultiplySubtract_(err,c+d*bs2,x+(ns+d)*bs,bs);
203  for (j=0; j<bs; j++) norm += (err[j]*err[j]);
204  }
205  for (d=0; d<ns; d++) {
206  double err[bs]; for (j=0; j<bs; j++) err[j] = rhs[(d+ns*(n-1))*bs+j];
207  _MatVecMultiplySubtract_(err,a+(d+ns*(n-1))*bs2,x+(d+ns*(n-2))*bs,bs);
208  _MatVecMultiplySubtract_(err,b+(d+ns*(n-1))*bs2,x+(d+ns*(n-1))*bs,bs);
209  _MatVecMultiplySubtract_(err,c+(d+ns*(n-1))*bs2,recvbufR+d*bs,bs);
210  for (j=0; j<bs; j++) norm += (err[j]*err[j]);
211  }
212  } else {
213  for (d=0; d<ns; d++) {
214  double err[bs]; for (j=0; j<bs; j++) err[j] = rhs[d*bs+j];
215  _MatVecMultiplySubtract_(err,a+d*bs2,recvbufL+d*bs,bs);
216  _MatVecMultiplySubtract_(err,b+d*bs2,x+d*bs,bs);
217  _MatVecMultiplySubtract_(err,c+d*bs2,recvbufR+d*bs,bs);
218  for (j=0; j<bs; j++) norm += (err[j]*err[j]);
219  }
220  }
221  /* sum over all processes */
222 #ifdef serial
223  global_norm = norm;
224 #else
225  MPI_Allreduce(&norm,&global_norm,1,MPI_DOUBLE,MPI_SUM,*comm);
226 #endif
227  global_norm = sqrt(global_norm/NT);
228  if (!iter) norm0 = global_norm;
229  } else {
230  norm = -1.0;
231  global_norm = -1.0;
232  }
233 
234 #ifdef serial
235  if (context->verbose)
236 #else
237  if (context->verbose && (!rank))
238 #endif
239  printf("\t\titer: %d, norm: %1.16E\n",iter,global_norm);
240 
241  /* correct the solution for this iteration */
242  if (n > 1) {
243  for (d=0; d<ns; d++) {
244  double xt[bs],binv[bs2];
245 
246  i = 0;
247  for (j=0; j<bs; j++) xt[j] = rhs[(i*ns+d)*bs+j];
248  _MatVecMultiplySubtract_(xt,a+(i*ns+d)*bs2,recvbufL+d*bs,bs);
249  _MatVecMultiplySubtract_(xt,c+(i*ns+d)*bs2,x+(d+ns*(i+1))*bs,bs);
250  _MatrixInvert_(b+(i*ns+d)*bs2,binv,bs);
251  _MatVecMultiply_(binv,xt,x+(i*ns+d)*bs,bs);
252 
253  i = n-1;
254  for (j=0; j<bs; j++) xt[j] = rhs[(i*ns+d)*bs+j];
255  _MatVecMultiplySubtract_(xt,a+(i*ns+d)*bs2,x+(d+ns*(i-1))*bs,bs);
256  _MatVecMultiplySubtract_(xt,c+(i*ns+d)*bs2,recvbufR+d*bs,bs);
257  _MatrixInvert_(b+(i*ns+d)*bs2,binv,bs);
258  _MatVecMultiply_(binv,xt,x+(i*ns+d)*bs,bs);
259  }
260  for (i=1; i<n-1; i++) {
261  for (d=0; d<ns; d++) {
262  double xt[bs],binv[bs2];
263  for (j=0; j<bs; j++) xt[j] = rhs[(i*ns+d)*bs+j];
264  _MatVecMultiplySubtract_(xt,a+(i*ns+d)*bs2,x+(d+ns*(i-1))*bs,bs);
265  _MatVecMultiplySubtract_(xt,c+(i*ns+d)*bs2,x+(d+ns*(i+1))*bs,bs);
266  _MatrixInvert_(b+(i*ns+d)*bs2,binv,bs);
267  _MatVecMultiply_(binv,xt,x+(i*ns+d)*bs,bs);
268  }
269  }
270  } else {
271  for (d=0; d<ns; d++) {
272  double xt[bs],binv[bs2];
273  for (j=0; j<bs; j++) xt[j] = rhs[d*bs+j];
274  _MatVecMultiplySubtract_(xt,a+d*bs2,recvbufL+d*bs,bs);
275  _MatVecMultiplySubtract_(xt,c+d*bs2,recvbufR+d*bs,bs);
276  _MatrixInvert_(b+d*bs2,binv,bs);
277  _MatVecMultiply_(binv,xt,x+d*bs,bs);
278  }
279  }
280 
281  /* finished with this iteration */
282  iter++;
283  }
284 
285  /* save convergence information */
286  context->exitnorm = (context->evaluate_norm ? global_norm : -1.0);
287  context->exititer = iter;
288 
289  free(rhs);
290  free(sendbufL);
291  free(sendbufR);
292  free(recvbufL);
293  free(recvbufR);
294 
295  return(0);
296 }
int maxiter
Definition: tridiagLU.h:91
int exititer
Definition: tridiagLU.h:94
Header file for TridiagLU.
int evaluate_norm
Definition: tridiagLU.h:90
Contains macros and function definitions for common matrix operations.
#define _MatVecMultiplySubtract_(y, A, x, N)
Definition: matops.h:78
#define _MatVecMultiply_(A, x, y, N)
Definition: matops.h:64
int verbose
Definition: tridiagLU.h:96
double exitnorm
Definition: tridiagLU.h:95
#define _MatrixInvert_(A, B, N)
Definition: matops.h:90
int blocktridiagIterJacobi(double *a, double *b, double *c, double *x, int n, int ns, int bs, void *r, void *m)