TridiagLU  1.0
Scalable, parallel solver for tridiagonal system of equations
tridiagIterJacobi.c File Reference

Solve tridiagonal systems of equations with the point Jacobi method. More...

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <mpi.h>
#include <tridiagLU.h>

Go to the source code of this file.

Functions

int tridiagIterJacobi (double *a, double *b, double *c, double *x, int n, int ns, void *r, void *m)
 

Detailed Description

Solve tridiagonal systems of equations with the point Jacobi method.

Author
Debojyoti Ghosh

Definition in file tridiagIterJacobi.c.

Function Documentation

int tridiagIterJacobi ( double *  a,
double *  b,
double *  c,
double *  x,
int  n,
int  ns,
void *  r,
void *  m 
)

Solve tridiagonal (non-periodic) systems of equations using point Jacobi iterations: This function can solve multiple independent systems with one call. The systems need not share the same left- or right-hand-sides. The initial guess is taken as the solution of

\begin{equation} {\rm diag}\left[{\bf b}\right]{\bf x} = {\bf r} \end{equation}

where \({\bf b}\) represents the diagonal elements of the tridiagonal system, and \({\bf r}\) is the right-hand-side, stored in \({\bf x}\) at the start of this function.

Array layout: The arguments a, b, c, and x are local 1D arrays (containing this processor's part of the subdiagonal, diagonal, superdiagonal, and right-hand-side) of size (n X ns), where n is the local size of the system, and ns is the number of independent systems to solve. The ordering of the elements in these arrays is as follows:

  • Elements of the same row for each of the independent systems are stored adjacent to each other.

For example, consider the following systems:

\begin{equation} \left[\begin{array}{ccccc} b_0^k & c_0^k & & & \\ a_1^k & b_1^k & c_1^k & & \\ & a_2^k & b_2^k & c_2^k & & \\ & & a_3^k & b_3^k & c_3^k & \\ & & & a_4^k & b_4^k & c_4^k \\ \end{array}\right] \left[\begin{array}{c} x_0^k \\ x_1^k \\ x_2^k \\ x_3^k \\ x_4^k \end{array}\right] = \left[\begin{array}{c} r_0^k \\ r_1^k \\ r_2^k \\ r_3^k \\ r_4^k \end{array}\right]; \ \ k= 1,\cdots,ns \end{equation}

and let \( ns = 3\). Note that in the code, \(x\) and \(r\) are the same array x.

Then, the array b must be a 1D array with the following layout of elements:
[
b_0^0, b_0^1, b_0^2, (diagonal element of the first row in each system)
b_1^0, b_1^1, b_1^2, (diagonal element of the second row in each system)
...,
b_{n-1}^0, b_{n-1}^1, b_{n-1}^2 (diagonal element of the last row in each system)
]
The arrays a, c, and x are stored similarly.

Notes:

  • This function does not preserve the sub-diagonal, diagonal, super-diagonal elements and the right-hand-sides.
  • The input array x contains the right-hand-side on entering the function, and the solution on exiting it.
Parameters
aArray containing the sub-diagonal elements
bArray containing the diagonal elements
cArray containing the super-diagonal elements
xRight-hand side; will contain the solution on exit
nLocal size of the system on this processor
nsNumber of systems to solve
rObject of type TridiagLU
mMPI communicator

Definition at line 64 of file tridiagIterJacobi.c.

74 {
75  TridiagLU *context = (TridiagLU*) r;
76  int iter,d,i,NT;
77  double norm=0,norm0=0,global_norm=0;
78 
79 #ifndef serial
80  MPI_Comm *comm = (MPI_Comm*) m;
81  int rank,nproc;
82 
83  if (comm) {
84  MPI_Comm_size(*comm,&nproc);
85  MPI_Comm_rank(*comm,&rank);
86  } else {
87  rank = 0;
88  nproc = 1;
89  }
90 #endif
91 
92  if (!context) {
93  fprintf(stderr,"Error in tridiagIterJacobi(): NULL pointer passed for parameters!\n");
94  return(-1);
95  }
96 
97  /* check for zero along the diagonal */
98  for (i=0; i<n; i++) {
99  for (d=0; d<ns; d++) {
100  if (b[i*ns+d]*b[i*ns+d] < context->atol*context->atol) {
101  fprintf(stderr,"Error in tridiagIterJacobi(): Encountered zero on main diagonal!\n");
102  return(1);
103  }
104  }
105  }
106 
107  double *rhs = (double*) calloc (ns*n, sizeof(double));
108  for (i=0; i<n; i++) {
109  for (d=0; d<ns; d++) {
110  rhs[i*ns+d] = x[i*ns+d]; /* save a copy of the rhs */
111  x[i*ns+d] /= b[i*ns+d]; /* initial guess */
112  }
113  }
114 
115  double *recvbufL, *recvbufR, *sendbufL, *sendbufR;
116  recvbufL = (double*) calloc (ns, sizeof(double));
117  recvbufR = (double*) calloc (ns, sizeof(double));
118  sendbufL = (double*) calloc (ns, sizeof(double));
119  sendbufR = (double*) calloc (ns, sizeof(double));
120 
121  /* total number of points */
122 #ifdef serial
123  if (context->evaluate_norm) NT = n;
124  else NT = 0;
125 #else
126  if (context->evaluate_norm) MPI_Allreduce(&n,&NT,1,MPI_INT,MPI_SUM,*comm);
127  else NT = 0;
128 #endif
129 
130 #ifdef serial
131  if (context->verbose) printf("\n");
132 #else
133  if (context->verbose && (!rank)) printf("\n");
134 #endif
135 
136  iter = 0;
137  while(1) {
138 
139  /* evaluate break conditions */
140  if ( (iter >= context->maxiter)
141  || (iter && context->evaluate_norm && (global_norm < context->atol))
142  || (iter && context->evaluate_norm && (global_norm/norm0 < context->rtol)) ) {
143  break;
144  }
145 
146  /* Communicate the boundary x values between processors */
147  for (d=0; d<ns; d++) recvbufL[d] = recvbufR[d] = 0;
148 #ifndef serial
149  MPI_Request req[4] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL,MPI_REQUEST_NULL,MPI_REQUEST_NULL};
150  if (rank) MPI_Irecv(recvbufL,ns,MPI_DOUBLE,rank-1,2,*comm,&req[0]);
151  if (rank != nproc-1) MPI_Irecv(recvbufR,ns,MPI_DOUBLE,rank+1,3,*comm,&req[1]);
152  for (d=0; d<ns; d++) { sendbufL[d] = x[d]; sendbufR[d] = x[(n-1)*ns+d]; }
153  if (rank) MPI_Isend(sendbufL,ns,MPI_DOUBLE,rank-1,3,*comm,&req[2]);
154  if (rank != nproc-1) MPI_Isend(sendbufR,ns,MPI_DOUBLE,rank+1,2,*comm,&req[3]);
155 #endif
156 
157  /* calculate error norm - interior */
158  if (context->evaluate_norm) {
159  norm = 0;
160  for (i=1; i<n-1; i++) {
161  for (d=0; d<ns; d++) {
162  norm += ( (a[i*ns+d]*x[(i-1)*ns+d] + b[i*ns+d]*x[i*ns+d] + c[i*ns+d]*x[(i+1)*ns+d] - rhs[i*ns+d])
163  * (a[i*ns+d]*x[(i-1)*ns+d] + b[i*ns+d]*x[i*ns+d] + c[i*ns+d]*x[(i+1)*ns+d] - rhs[i*ns+d]) );
164  }
165  }
166  }
167  /* calculate error norm - boundary */
168 #ifndef serial
169  MPI_Waitall(4,req,MPI_STATUS_IGNORE);
170 #endif
171  if (context->evaluate_norm) {
172  if (n > 1) {
173  for (d=0; d<ns; d++) {
174  norm += ( (a[d]*recvbufL[d] + b[d]*x[d] + c[d]*x[d+ns*1]- rhs[d])
175  * (a[d]*recvbufL[d] + b[d]*x[d] + c[d]*x[d+ns*1]- rhs[d]) );
176  }
177  for (d=0; d<ns; d++) {
178  norm += ( (a[d+ns*(n-1)]*x[d+ns*(n-2)] + b[d+ns*(n-1)]*x[d+ns*(n-1)] + c[d+ns*(n-1)]*recvbufR[d] - rhs[d+ns*(n-1)])
179  * (a[d+ns*(n-1)]*x[d+ns*(n-2)] + b[d+ns*(n-1)]*x[d+ns*(n-1)] + c[d+ns*(n-1)]*recvbufR[d] - rhs[d+ns*(n-1)]) );
180  }
181  } else {
182  for (d=0; d<ns; d++) {
183  norm += ( (a[d]*recvbufL[d] + b[d]*x[d] + c[d]*recvbufR[d] - rhs[d])
184  * (a[d]*recvbufL[d] + b[d]*x[d] + c[d]*recvbufR[d] - rhs[d]) );
185  }
186  }
187  /* sum over all processes */
188 #ifdef serial
189  global_norm = norm;
190 #else
191  MPI_Allreduce(&norm,&global_norm,1,MPI_DOUBLE,MPI_SUM,*comm);
192 #endif
193  global_norm = sqrt(global_norm/NT);
194  if (!iter) norm0 = global_norm;
195  } else {
196  norm = -1.0;
197  global_norm = -1.0;
198  }
199 
200 #ifdef serial
201  if (context->verbose)
202 #else
203  if (context->verbose && (!rank))
204 #endif
205  printf("\t\titer: %d, norm: %1.16E\n",iter,global_norm);
206 
207  /* correct the solution for this iteration */
208  if (n > 1) {
209  for (d=0; d<ns; d++) {
210  i = 0; x[i*ns+d] = (rhs[i*ns+d] - a[i*ns+d]*recvbufL[d] - c[i*ns+d]*x[d+ns*(i+1)] ) / b[i*ns+d];
211  i = n-1; x[i*ns+d] = (rhs[i*ns+d] - a[i*ns+d]*x[d+ns*(i-1)] - c[i*ns+d]*recvbufR[d]) / b[i*ns+d];
212  }
213  for (i=1; i<n-1; i++) {
214  for (d=0; d<ns; d++) {
215  x[i*ns+d] = (rhs[i*ns+d] - a[i*ns+d]*x[d+ns*(i-1)] - c[i*ns+d]*x[d+ns*(i+1)]) / b[i*ns+d];
216  }
217  }
218  } else {
219  for (d=0; d<ns; d++) x[d] = (rhs[d] - a[d]*recvbufL[d] - c[d]*recvbufR[d]) / b[d];
220  }
221 
222  /* finished with this iteration */
223  iter++;
224  }
225 
226  /* save convergence information */
227  context->exitnorm = (context->evaluate_norm ? global_norm : -1.0);
228  context->exititer = iter;
229 
230  free(rhs);
231  free(sendbufL);
232  free(sendbufR);
233  free(recvbufL);
234  free(recvbufR);
235 
236  return(0);
237 }
int maxiter
Definition: tridiagLU.h:91
int exititer
Definition: tridiagLU.h:94
int evaluate_norm
Definition: tridiagLU.h:90
double atol
Definition: tridiagLU.h:92
int verbose
Definition: tridiagLU.h:96
double exitnorm
Definition: tridiagLU.h:95