TridiagLU  1.0
Scalable, parallel solver for tridiagonal system of equations
tridiagLUGS.c File Reference

Solve tridiagonal systems of equations in parallel using "gather-and-solve". More...

#include <time.h>
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
#include <tridiagLU.h>

Go to the source code of this file.

Functions

int tridiagLUGS (double *a, double *b, double *c, double *x, int n, int ns, void *r, void *m)
 

Detailed Description

Solve tridiagonal systems of equations in parallel using "gather-and-solve".

Author
Debojyoti Ghosh

Definition in file tridiagLUGS.c.

Function Documentation

int tridiagLUGS ( double *  a,
double *  b,
double *  c,
double *  x,
int  n,
int  ns,
void *  r,
void *  m 
)

Solve tridiagonal (non-periodic) systems of equations using the gather-and-solve approach: This function can solve multiple independent systems with one call. The systems need not share the same left- or right-hand-sides. The "gather-and-solve" approach gathers a tridiagonal system on one processor and solves it using tridiagLU() (sending NULL as the argument for MPI communicator to indicate that a serial solution is desired). Given multiple tridiagonal systems (ns > 1), this function will gather the systems on different processors in an optimal way, and thus each processor will solve some of the systems. After the system(s) is (are) solved, the solution(s) is (are) scattered back to the original processors.

Array layout: The arguments a, b, c, and x are local 1D arrays (containing this processor's part of the subdiagonal, diagonal, superdiagonal, and right-hand-side) of size (n X ns), where n is the local size of the system, and ns is the number of independent systems to solve. The ordering of the elements in these arrays is as follows:

  • Elements of the same row for each of the independent systems are stored adjacent to each other.

For example, consider the following systems:

\begin{equation} \left[\begin{array}{ccccc} b_0^k & c_0^k & & & \\ a_1^k & b_1^k & c_1^k & & \\ & a_2^k & b_2^k & c_2^k & & \\ & & a_3^k & b_3^k & c_3^k & \\ & & & a_4^k & b_4^k & c_4^k \\ \end{array}\right] \left[\begin{array}{c} x_0^k \\ x_1^k \\ x_2^k \\ x_3^k \\ x_4^k \end{array}\right] = \left[\begin{array}{c} r_0^k \\ r_1^k \\ r_2^k \\ r_3^k \\ r_4^k \end{array}\right]; \ \ k= 1,\cdots,ns \end{equation}

and let \( ns = 3\). Note that in the code, \(x\) and \(r\) are the same array x.

Then, the array b must be a 1D array with the following layout of elements:
[
b_0^0, b_0^1, b_0^2, (diagonal element of the first row in each system)
b_1^0, b_1^1, b_1^2, (diagonal element of the second row in each system)
...,
b_{n-1}^0, b_{n-1}^1, b_{n-1}^2 (diagonal element of the last row in each system)
]
The arrays a, c, and x are stored similarly.

Notes:

  • This function does not preserve the sub-diagonal, diagonal, super-diagonal elements and the right-hand-sides.
  • The input array x contains the right-hand-side on entering the function, and the solution on exiting it.
Parameters
aArray containing the sub-diagonal elements
bArray containing the diagonal elements
cArray containing the super-diagonal elements
xRight-hand side; will contain the solution on exit
nLocal size of the system on this processor
nsNumber of systems to solve
rObject of type TridiagLU
mMPI communicator

Definition at line 64 of file tridiagLUGS.c.

74 {
75  TridiagLU *context = (TridiagLU*) r;
76  if (!context) {
77  fprintf(stderr,"Error in tridiagLUGS(): NULL pointer passed for parameters.\n");
78  return(-1);
79  }
80 #ifdef serial
81 
82  /* Serial compilation */
83  return(tridiagLU(a,b,c,x,n,ns,context,m));
84 
85 #else
86 
87  int d,i,ierr = 0,dstart,istart,p,q;
88  const int nvar = 4;
89  double *sendbuf,*recvbuf;
90  int rank,nproc;
91 
92  /* Parallel compilation */
93  MPI_Comm *comm = (MPI_Comm*) m;
94  if (!comm) return(tridiagLU(a,b,c,x,n,ns,context,NULL));
95  MPI_Comm_size(*comm,&nproc);
96  MPI_Comm_rank(*comm,&rank);
97 
98  if ((ns == 0) || (n == 0)) return(0);
99 
100  /*
101  each process needs to know the local sizes of every other process
102  and total size of the system
103  */
104  int *N = (int*) calloc (nproc, sizeof(int));
105  MPI_Allgather(&n,1,MPI_INT,N,1,MPI_INT,*comm);
106  int NT = 0; for (i=0; i<nproc; i++) NT += N[i];
107 
108  /* counts and displacements for gather and scatter operations */
109  int *counts = (int*) calloc (nproc, sizeof(int));
110  int *displ = (int*) calloc (nproc, sizeof(int));
111 
112  /* on all processes, calculate the number of systems each */
113  /* process has to solve */
114  int *ns_local = (int*) calloc (nproc,sizeof(int));
115  for (p=0; p<nproc; p++) ns_local[p] = ns / nproc;
116  for (p=0; p<ns%nproc; p++) ns_local[p]++;
117 
118  /* allocate the arrays for the gathered tridiagonal systems */
119  double *ra=NULL,*rb=NULL,*rc=NULL,*rx=NULL;
120  if (ns_local[rank] > 0) {
121  ra = (double*) calloc (ns_local[rank]*NT,sizeof(double));
122  rb = (double*) calloc (ns_local[rank]*NT,sizeof(double));
123  rc = (double*) calloc (ns_local[rank]*NT,sizeof(double));
124  rx = (double*) calloc (ns_local[rank]*NT,sizeof(double));
125  }
126 
127  /* Gather the systems on each process */
128  /* allocate receive buffer */
129  if (ns_local[rank] > 0)
130  recvbuf = (double*) calloc (ns_local[rank]*nvar*NT,sizeof(double));
131  else recvbuf = NULL;
132  dstart = 0;
133  for (p = 0; p < nproc; p++) {
134  if (ns_local[p] > 0) {
135  /* allocate send buffer and form the send packet of data */
136  sendbuf = (double*) calloc (nvar*n*ns_local[p],sizeof(double));
137  for (d = 0; d < ns_local[p]; d++) {
138  for (i = 0; i < n; i++) {
139  sendbuf[n*nvar*d+n*0+i] = a[d+dstart+ns*i];
140  sendbuf[n*nvar*d+n*1+i] = b[d+dstart+ns*i];
141  sendbuf[n*nvar*d+n*2+i] = c[d+dstart+ns*i];
142  sendbuf[n*nvar*d+n*3+i] = x[d+dstart+ns*i];
143  }
144  }
145  dstart += ns_local[p];
146 
147  /* gather these reduced systems on process with rank = p */
148  if (rank == p) {
149  for (q = 0; q < nproc; q++) {
150  counts[q] = nvar*N[q]*ns_local[p];
151  displ [q] = (q == 0 ? 0 : displ[q-1]+counts[q-1]);
152  }
153  }
154  MPI_Gatherv(sendbuf,nvar*n*ns_local[p],MPI_DOUBLE,
155  recvbuf,counts,displ,MPI_DOUBLE,p,*comm);
156 
157  /* deallocate send buffer */
158  free(sendbuf);
159  }
160  }
161  /* extract the data from the recvbuf and solve */
162  istart = 0;
163  for (q = 0; q < nproc; q++) {
164  for (d = 0; d < ns_local[rank]; d++) {
165  for (i = 0; i < N[q]; i++) {
166  ra[d+ns_local[rank]*(istart+i)] = recvbuf[istart*nvar*ns_local[rank]+d*nvar*N[q]+0*N[q]+i];
167  rb[d+ns_local[rank]*(istart+i)] = recvbuf[istart*nvar*ns_local[rank]+d*nvar*N[q]+1*N[q]+i];
168  rc[d+ns_local[rank]*(istart+i)] = recvbuf[istart*nvar*ns_local[rank]+d*nvar*N[q]+2*N[q]+i];
169  rx[d+ns_local[rank]*(istart+i)] = recvbuf[istart*nvar*ns_local[rank]+d*nvar*N[q]+3*N[q]+i];
170  }
171  }
172  istart += N[q];
173  }
174  /* deallocate receive buffer */
175  if (recvbuf) free(recvbuf);
176 
177  /* solve the gathered systems in serial */
178  ierr = tridiagLU(ra,rb,rc,rx,NT,ns_local[rank],context,NULL);
179  if (ierr) return(ierr);
180 
181  /* allocate send buffer and save the data to send */
182  if (ns_local[rank] > 0)
183  sendbuf = (double*) calloc (ns_local[rank]*NT,sizeof(double));
184  else sendbuf = NULL;
185  istart = 0;
186  for (q = 0; q < nproc; q++) {
187  for (i = 0; i < N[q]; i++) {
188  for (d = 0; d < ns_local[rank]; d++) {
189  sendbuf[istart*ns_local[rank]+d*N[q]+i] = rx[d+ns_local[rank]*(istart+i)];
190  }
191  }
192  istart += N[q];
193  }
194  dstart = 0;
195  for (p = 0; p < nproc; p++) {
196  if (ns_local[p] > 0) {
197 
198  /* allocate receive buffer */
199  recvbuf = (double*) calloc (ns_local[p]*n, sizeof(double));
200 
201  /* scatter the solution back */
202  for (q = 0; q < nproc; q++) {
203  counts[q] = ns_local[p]*N[q];
204  displ[q] = (q == 0 ? 0 : displ[q-1]+counts[q-1]);
205  }
206  MPI_Scatterv(sendbuf,counts,displ,MPI_DOUBLE,
207  recvbuf,ns_local[p]*n,MPI_DOUBLE,
208  p,*comm);
209  /* save the solution on all root processes */
210  for (d = 0; d < ns_local[p]; d++) {
211  for (i = 0; i < n; i++) {
212  x[d+dstart+ns*i] = recvbuf[d*n+i];
213  }
214  }
215  dstart += ns_local[p];
216  /* deallocate receive buffer */
217  free(recvbuf);
218  }
219  }
220  /* deallocate send buffer */
221  if (sendbuf) free(sendbuf);
222 
223  /* clean up */
224  if (ns_local[rank] > 0) {
225  free(ra);
226  free(rb);
227  free(rc);
228  free(rx);
229  }
230  free(ns_local);
231  free(N);
232  free(displ);
233  free(counts);
234 
235  return(0);
236 #endif
237 }
int tridiagLU(double *, double *, double *, double *, int, int, void *, void *)
Definition: tridiagLU.c:83