TridiagLU  1.0
Scalable, parallel solver for tridiagonal system of equations
blocktridiagIterJacobi.c File Reference

Solve a block tridiagonal system with the Jacobi method. More...

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <mpi.h>
#include <tridiagLU.h>
#include <matops.h>

Go to the source code of this file.

Functions

int blocktridiagIterJacobi (double *a, double *b, double *c, double *x, int n, int ns, int bs, void *r, void *m)
 

Detailed Description

Solve a block tridiagonal system with the Jacobi method.

Author
Debojyoti Ghosh

Definition in file blocktridiagIterJacobi.c.

Function Documentation

int blocktridiagIterJacobi ( double *  a,
double *  b,
double *  c,
double *  x,
int  n,
int  ns,
int  bs,
void *  r,
void *  m 
)

Solve block tridiagonal (non-periodic) systems of equations using point Jacobi iterations: This function can solve multiple independent systems with one call. The systems need not share the same left- or right-hand-sides. The initial guess is taken as the solution of

\begin{equation} {\rm diag}\left[{\bf b}\right]{\bf x} = {\bf r} \end{equation}

where \({\bf b}\) represents the diagonal elements of the tridiagonal system, and \({\bf r}\) is the right-hand-side, stored in \({\bf x}\) at the start of this function.

Array layout: The arguments a, b, and c are local 1D arrays (containing this processor's part of the subdiagonal, diagonal, and superdiagonal) of size (n X ns X bs^2), and x is a local 1D array (containing this processor's part of the right-hand-side, and will contain the solution on exit) of size (n X ns X bs), where n is the local size of the system, ns is the number of independent systems to solve, and bs is the block size. The ordering of the elements in these arrays is as follows:

  • Each block is stored in the row-major format.
  • Blocks of the same row for each of the independent systems are stored adjacent to each other.

For example, consider the following systems:

\begin{equation} \left[\begin{array}{ccccc} B_0^k & C_0^k & & & \\ A_1^k & B_1^k & C_1^k & & \\ & A_2^k & B_2^k & C_2^k & & \\ & & A_3^k & B_3^k & C_3^k & \\ & & & A_4^k & B_4^k & C_4^k \\ \end{array}\right] \left[\begin{array}{c} X_0^k \\ X_1^k \\ X_2^k \\ X_3^k \\ X_4^k \end{array}\right] = \left[\begin{array}{c} R_0^k \\ R_1^k \\ R_2^k \\ R_3^k \\ R_4^k \end{array}\right]; \ \ k= 1,\cdots,ns \end{equation}

where \(A\), \(B\), and \(C\) are matrices of size bs = 2 (say), and let \( ns = 3\). In the equation above, we have

\begin{equation} B_i^k = \left[\begin{array}{cc} b_{00,i}^k & b_{01,i}^k \\ b_{10,i}^k & b_{11,i}^k \end{array}\right], X_i^k = \left[\begin{array}{c} x_{0,i}^k \\ x_{1,i}^k \end{array} \right], R_i^k = \left[\begin{array}{c} r_{0,i}^k \\ r_{1,i}^k \end{array} \right] \end{equation}

Note that in the code, \(X\) and \(R\) are the same array x.

Then, the array b must be a 1D array with the following layout of elements:
[
b_{00,0}^0, b_{01,0}^0, b_{10,0}^0, b_{11,0}^0, b_{00,0}^1, b_{01,0}^1, b_{10,0}^1, b_{11,0}^1, b_{00,0}^2, b_{01,0}^2, b_{10,0}^2, b_{11,0}^2,
b_{00,1}^0, b_{01,1}^0, b_{10,1}^0, b_{11,1}^0, b_{00,1}^1, b_{01,1}^1, b_{10,1}^1, b_{11,1}^1, b_{00,1}^2, b_{01,1}^2, b_{10,1}^2, b_{11,1}^2,
...,
b_{00,n-1}^0, b_{01,n-1}^0, b_{10,n-1}^0, b_{11,n-1}^0, b_{00,n-1}^1, b_{01,n-1}^1, b_{10,n-1}^1, b_{11,n-1}^1, b_{00,n-1}^2, b_{01,n-1}^2, b_{10,n-1}^2, b_{11,n-1}^2
]
The arrays a and c are stored similarly.

The array corresponding to a vector (the solution and the right-hand-side x) must be a 1D array with the following layout of elements:
[
x_{0,0}^0, x_{1,0}^0, x_{0,0}^1, x_{1,0}^1,x_{0,0}^2, x_{1,0}^2,
x_{0,1}^0, x_{1,1}^0, x_{0,1}^1, x_{1,1}^1,x_{0,1}^2, x_{1,1}^2,
...,
x_{0,n-1}^0, x_{1,n-1}^0, x_{0,n-1}^1, x_{1,n-1}^1,x_{0,n-1}^2, x_{1,n-1}^2
]

Notes:

  • This function does not preserve the sub-diagonal, diagonal, super-diagonal elements and the right-hand-sides.
  • The input array x contains the right-hand-side on entering the function, and the solution on exiting it.
Parameters
aArray containing the sub-diagonal elements
bArray containing the diagonal elements
cArray containing the super-diagonal elements
xRight-hand side; will contain the solution on exit
nLocal size of the system on this processor (not multiplied by the block size)
nsNumber of systems to solve
bsBlock size
rObject of type TridiagLU
mMPI communicator

Definition at line 88 of file blocktridiagIterJacobi.c.

100 {
101  TridiagLU *context = (TridiagLU*) r;
102  int iter,d,i,j,NT,bs2=bs*bs,nsbs=ns*bs;
103  double norm=0,norm0=0,global_norm=0;
104 
105 #ifndef serial
106  MPI_Comm *comm = (MPI_Comm*) m;
107  int rank,nproc;
108 
109  if (comm) {
110  MPI_Comm_size(*comm,&nproc);
111  MPI_Comm_rank(*comm,&rank);
112  } else {
113  rank = 0;
114  nproc = 1;
115  }
116 #endif
117 
118  if (!context) {
119  fprintf(stderr,"Error in tridiagIterJacobi(): NULL pointer passed for parameters!\n");
120  return(-1);
121  }
122 
123  double *rhs = (double*) calloc (ns*n*bs, sizeof(double));
124  for (i=0; i<ns*n*bs; i++) rhs[i] = x[i]; /* save a copy of the rhs */
125  /* initial guess */
126  for (i=0; i<n; i++) {
127  for (d=0; d<ns; d++) {
128  double binv[bs2];
129  _MatrixInvert_ (b+(i*ns+d)*bs2,binv,bs);
130  _MatVecMultiply_ (binv,rhs+(i*ns+d)*bs,x+(i*ns+d)*bs,bs);
131  }
132  }
133 
134  double *recvbufL, *recvbufR, *sendbufL, *sendbufR;
135  recvbufL = (double*) calloc (nsbs, sizeof(double));
136  recvbufR = (double*) calloc (nsbs, sizeof(double));
137  sendbufL = (double*) calloc (nsbs, sizeof(double));
138  sendbufR = (double*) calloc (nsbs, sizeof(double));
139 
140  /* total number of points */
141 #ifdef serial
142  if (context->evaluate_norm) NT = n;
143  else NT = 0;
144 #else
145  if (context->evaluate_norm) MPI_Allreduce(&n,&NT,1,MPI_INT,MPI_SUM,*comm);
146  else NT = 0;
147 #endif
148 
149 #ifdef serial
150  if (context->verbose) printf("\n");
151 #else
152  if (context->verbose && (!rank)) printf("\n");
153 #endif
154 
155  iter = 0;
156  while(1) {
157 
158  /* evaluate break conditions */
159  if ( (iter >= context->maxiter)
160  || (iter && context->evaluate_norm && (global_norm < context->atol))
161  || (iter && context->evaluate_norm && (global_norm/norm0 < context->rtol)) ) {
162  break;
163  }
164 
165  /* Communicate the boundary x values between processors */
166  for (d=0; d<nsbs; d++) recvbufL[d] = recvbufR[d] = 0;
167 #ifndef serial
168  MPI_Request req[4] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL,MPI_REQUEST_NULL,MPI_REQUEST_NULL};
169  if (rank) MPI_Irecv(recvbufL,nsbs,MPI_DOUBLE,rank-1,2,*comm,&req[0]);
170  if (rank != nproc-1) MPI_Irecv(recvbufR,nsbs,MPI_DOUBLE,rank+1,3,*comm,&req[1]);
171  for (d=0; d<nsbs; d++) {
172  sendbufL[d] = x[d];
173  sendbufR[d] = x[(n-1)*nsbs+d];
174  }
175  if (rank) MPI_Isend(sendbufL,nsbs,MPI_DOUBLE,rank-1,3,*comm,&req[2]);
176  if (rank != nproc-1) MPI_Isend(sendbufR,nsbs,MPI_DOUBLE,rank+1,2,*comm,&req[3]);
177 #endif
178 
179  /* calculate error norm - interior */
180  if (context->evaluate_norm) {
181  norm = 0;
182  for (i=1; i<n-1; i++) {
183  for (d=0; d<ns; d++) {
184  double err[bs]; for (j=0; j<bs; j++) err[j] = rhs[(i*ns+d)*bs+j];
185  _MatVecMultiplySubtract_(err,a+(i*ns+d)*bs2,x+((i-1)*ns+d)*bs,bs);
186  _MatVecMultiplySubtract_(err,b+(i*ns+d)*bs2,x+((i )*ns+d)*bs,bs);
187  _MatVecMultiplySubtract_(err,c+(i*ns+d)*bs2,x+((i+1)*ns+d)*bs,bs);
188  for (j=0; j<bs; j++) norm += (err[j]*err[j]);
189  }
190  }
191  }
192  /* calculate error norm - boundary */
193 #ifndef serial
194  MPI_Waitall(4,req,MPI_STATUS_IGNORE);
195 #endif
196  if (context->evaluate_norm) {
197  if (n > 1) {
198  for (d=0; d<ns; d++) {
199  double err[bs]; for (j=0; j<bs; j++) err[j] = rhs[d*bs+j];
200  _MatVecMultiplySubtract_(err,a+d*bs2,recvbufL+d*bs,bs);
201  _MatVecMultiplySubtract_(err,b+d*bs2,x+d*bs,bs);
202  _MatVecMultiplySubtract_(err,c+d*bs2,x+(ns+d)*bs,bs);
203  for (j=0; j<bs; j++) norm += (err[j]*err[j]);
204  }
205  for (d=0; d<ns; d++) {
206  double err[bs]; for (j=0; j<bs; j++) err[j] = rhs[(d+ns*(n-1))*bs+j];
207  _MatVecMultiplySubtract_(err,a+(d+ns*(n-1))*bs2,x+(d+ns*(n-2))*bs,bs);
208  _MatVecMultiplySubtract_(err,b+(d+ns*(n-1))*bs2,x+(d+ns*(n-1))*bs,bs);
209  _MatVecMultiplySubtract_(err,c+(d+ns*(n-1))*bs2,recvbufR+d*bs,bs);
210  for (j=0; j<bs; j++) norm += (err[j]*err[j]);
211  }
212  } else {
213  for (d=0; d<ns; d++) {
214  double err[bs]; for (j=0; j<bs; j++) err[j] = rhs[d*bs+j];
215  _MatVecMultiplySubtract_(err,a+d*bs2,recvbufL+d*bs,bs);
216  _MatVecMultiplySubtract_(err,b+d*bs2,x+d*bs,bs);
217  _MatVecMultiplySubtract_(err,c+d*bs2,recvbufR+d*bs,bs);
218  for (j=0; j<bs; j++) norm += (err[j]*err[j]);
219  }
220  }
221  /* sum over all processes */
222 #ifdef serial
223  global_norm = norm;
224 #else
225  MPI_Allreduce(&norm,&global_norm,1,MPI_DOUBLE,MPI_SUM,*comm);
226 #endif
227  global_norm = sqrt(global_norm/NT);
228  if (!iter) norm0 = global_norm;
229  } else {
230  norm = -1.0;
231  global_norm = -1.0;
232  }
233 
234 #ifdef serial
235  if (context->verbose)
236 #else
237  if (context->verbose && (!rank))
238 #endif
239  printf("\t\titer: %d, norm: %1.16E\n",iter,global_norm);
240 
241  /* correct the solution for this iteration */
242  if (n > 1) {
243  for (d=0; d<ns; d++) {
244  double xt[bs],binv[bs2];
245 
246  i = 0;
247  for (j=0; j<bs; j++) xt[j] = rhs[(i*ns+d)*bs+j];
248  _MatVecMultiplySubtract_(xt,a+(i*ns+d)*bs2,recvbufL+d*bs,bs);
249  _MatVecMultiplySubtract_(xt,c+(i*ns+d)*bs2,x+(d+ns*(i+1))*bs,bs);
250  _MatrixInvert_(b+(i*ns+d)*bs2,binv,bs);
251  _MatVecMultiply_(binv,xt,x+(i*ns+d)*bs,bs);
252 
253  i = n-1;
254  for (j=0; j<bs; j++) xt[j] = rhs[(i*ns+d)*bs+j];
255  _MatVecMultiplySubtract_(xt,a+(i*ns+d)*bs2,x+(d+ns*(i-1))*bs,bs);
256  _MatVecMultiplySubtract_(xt,c+(i*ns+d)*bs2,recvbufR+d*bs,bs);
257  _MatrixInvert_(b+(i*ns+d)*bs2,binv,bs);
258  _MatVecMultiply_(binv,xt,x+(i*ns+d)*bs,bs);
259  }
260  for (i=1; i<n-1; i++) {
261  for (d=0; d<ns; d++) {
262  double xt[bs],binv[bs2];
263  for (j=0; j<bs; j++) xt[j] = rhs[(i*ns+d)*bs+j];
264  _MatVecMultiplySubtract_(xt,a+(i*ns+d)*bs2,x+(d+ns*(i-1))*bs,bs);
265  _MatVecMultiplySubtract_(xt,c+(i*ns+d)*bs2,x+(d+ns*(i+1))*bs,bs);
266  _MatrixInvert_(b+(i*ns+d)*bs2,binv,bs);
267  _MatVecMultiply_(binv,xt,x+(i*ns+d)*bs,bs);
268  }
269  }
270  } else {
271  for (d=0; d<ns; d++) {
272  double xt[bs],binv[bs2];
273  for (j=0; j<bs; j++) xt[j] = rhs[d*bs+j];
274  _MatVecMultiplySubtract_(xt,a+d*bs2,recvbufL+d*bs,bs);
275  _MatVecMultiplySubtract_(xt,c+d*bs2,recvbufR+d*bs,bs);
276  _MatrixInvert_(b+d*bs2,binv,bs);
277  _MatVecMultiply_(binv,xt,x+d*bs,bs);
278  }
279  }
280 
281  /* finished with this iteration */
282  iter++;
283  }
284 
285  /* save convergence information */
286  context->exitnorm = (context->evaluate_norm ? global_norm : -1.0);
287  context->exititer = iter;
288 
289  free(rhs);
290  free(sendbufL);
291  free(sendbufR);
292  free(recvbufL);
293  free(recvbufR);
294 
295  return(0);
296 }
int maxiter
Definition: tridiagLU.h:91
int exititer
Definition: tridiagLU.h:94
int evaluate_norm
Definition: tridiagLU.h:90
#define _MatVecMultiplySubtract_(y, A, x, N)
Definition: matops.h:78
#define _MatVecMultiply_(A, x, y, N)
Definition: matops.h:64
int verbose
Definition: tridiagLU.h:96
double exitnorm
Definition: tridiagLU.h:95
#define _MatrixInvert_(A, B, N)
Definition: matops.h:90