TridiagLU  1.0
Scalable, parallel solver for tridiagonal system of equations
tridiagLUGS.c
Go to the documentation of this file.
1 
6 #include <time.h>
7 #include <sys/time.h>
8 #include <stdlib.h>
9 #include <stdio.h>
10 #ifndef serial
11 #include <mpi.h>
12 #endif
13 #include <tridiagLU.h>
14 
65  double *a,
66  double *b,
67  double *c,
68  double *x,
69  int n,
70  int ns,
71  void *r,
72  void *m
73  )
74 {
75  TridiagLU *context = (TridiagLU*) r;
76  if (!context) {
77  fprintf(stderr,"Error in tridiagLUGS(): NULL pointer passed for parameters.\n");
78  return(-1);
79  }
80 #ifdef serial
81 
82  /* Serial compilation */
83  return(tridiagLU(a,b,c,x,n,ns,context,m));
84 
85 #else
86 
87  int d,i,ierr = 0,dstart,istart,p,q;
88  const int nvar = 4;
89  double *sendbuf,*recvbuf;
90  int rank,nproc;
91 
92  /* Parallel compilation */
93  MPI_Comm *comm = (MPI_Comm*) m;
94  if (!comm) return(tridiagLU(a,b,c,x,n,ns,context,NULL));
95  MPI_Comm_size(*comm,&nproc);
96  MPI_Comm_rank(*comm,&rank);
97 
98  if ((ns == 0) || (n == 0)) return(0);
99 
100  /*
101  each process needs to know the local sizes of every other process
102  and total size of the system
103  */
104  int *N = (int*) calloc (nproc, sizeof(int));
105  MPI_Allgather(&n,1,MPI_INT,N,1,MPI_INT,*comm);
106  int NT = 0; for (i=0; i<nproc; i++) NT += N[i];
107 
108  /* counts and displacements for gather and scatter operations */
109  int *counts = (int*) calloc (nproc, sizeof(int));
110  int *displ = (int*) calloc (nproc, sizeof(int));
111 
112  /* on all processes, calculate the number of systems each */
113  /* process has to solve */
114  int *ns_local = (int*) calloc (nproc,sizeof(int));
115  for (p=0; p<nproc; p++) ns_local[p] = ns / nproc;
116  for (p=0; p<ns%nproc; p++) ns_local[p]++;
117 
118  /* allocate the arrays for the gathered tridiagonal systems */
119  double *ra=NULL,*rb=NULL,*rc=NULL,*rx=NULL;
120  if (ns_local[rank] > 0) {
121  ra = (double*) calloc (ns_local[rank]*NT,sizeof(double));
122  rb = (double*) calloc (ns_local[rank]*NT,sizeof(double));
123  rc = (double*) calloc (ns_local[rank]*NT,sizeof(double));
124  rx = (double*) calloc (ns_local[rank]*NT,sizeof(double));
125  }
126 
127  /* Gather the systems on each process */
128  /* allocate receive buffer */
129  if (ns_local[rank] > 0)
130  recvbuf = (double*) calloc (ns_local[rank]*nvar*NT,sizeof(double));
131  else recvbuf = NULL;
132  dstart = 0;
133  for (p = 0; p < nproc; p++) {
134  if (ns_local[p] > 0) {
135  /* allocate send buffer and form the send packet of data */
136  sendbuf = (double*) calloc (nvar*n*ns_local[p],sizeof(double));
137  for (d = 0; d < ns_local[p]; d++) {
138  for (i = 0; i < n; i++) {
139  sendbuf[n*nvar*d+n*0+i] = a[d+dstart+ns*i];
140  sendbuf[n*nvar*d+n*1+i] = b[d+dstart+ns*i];
141  sendbuf[n*nvar*d+n*2+i] = c[d+dstart+ns*i];
142  sendbuf[n*nvar*d+n*3+i] = x[d+dstart+ns*i];
143  }
144  }
145  dstart += ns_local[p];
146 
147  /* gather these reduced systems on process with rank = p */
148  if (rank == p) {
149  for (q = 0; q < nproc; q++) {
150  counts[q] = nvar*N[q]*ns_local[p];
151  displ [q] = (q == 0 ? 0 : displ[q-1]+counts[q-1]);
152  }
153  }
154  MPI_Gatherv(sendbuf,nvar*n*ns_local[p],MPI_DOUBLE,
155  recvbuf,counts,displ,MPI_DOUBLE,p,*comm);
156 
157  /* deallocate send buffer */
158  free(sendbuf);
159  }
160  }
161  /* extract the data from the recvbuf and solve */
162  istart = 0;
163  for (q = 0; q < nproc; q++) {
164  for (d = 0; d < ns_local[rank]; d++) {
165  for (i = 0; i < N[q]; i++) {
166  ra[d+ns_local[rank]*(istart+i)] = recvbuf[istart*nvar*ns_local[rank]+d*nvar*N[q]+0*N[q]+i];
167  rb[d+ns_local[rank]*(istart+i)] = recvbuf[istart*nvar*ns_local[rank]+d*nvar*N[q]+1*N[q]+i];
168  rc[d+ns_local[rank]*(istart+i)] = recvbuf[istart*nvar*ns_local[rank]+d*nvar*N[q]+2*N[q]+i];
169  rx[d+ns_local[rank]*(istart+i)] = recvbuf[istart*nvar*ns_local[rank]+d*nvar*N[q]+3*N[q]+i];
170  }
171  }
172  istart += N[q];
173  }
174  /* deallocate receive buffer */
175  if (recvbuf) free(recvbuf);
176 
177  /* solve the gathered systems in serial */
178  ierr = tridiagLU(ra,rb,rc,rx,NT,ns_local[rank],context,NULL);
179  if (ierr) return(ierr);
180 
181  /* allocate send buffer and save the data to send */
182  if (ns_local[rank] > 0)
183  sendbuf = (double*) calloc (ns_local[rank]*NT,sizeof(double));
184  else sendbuf = NULL;
185  istart = 0;
186  for (q = 0; q < nproc; q++) {
187  for (i = 0; i < N[q]; i++) {
188  for (d = 0; d < ns_local[rank]; d++) {
189  sendbuf[istart*ns_local[rank]+d*N[q]+i] = rx[d+ns_local[rank]*(istart+i)];
190  }
191  }
192  istart += N[q];
193  }
194  dstart = 0;
195  for (p = 0; p < nproc; p++) {
196  if (ns_local[p] > 0) {
197 
198  /* allocate receive buffer */
199  recvbuf = (double*) calloc (ns_local[p]*n, sizeof(double));
200 
201  /* scatter the solution back */
202  for (q = 0; q < nproc; q++) {
203  counts[q] = ns_local[p]*N[q];
204  displ[q] = (q == 0 ? 0 : displ[q-1]+counts[q-1]);
205  }
206  MPI_Scatterv(sendbuf,counts,displ,MPI_DOUBLE,
207  recvbuf,ns_local[p]*n,MPI_DOUBLE,
208  p,*comm);
209  /* save the solution on all root processes */
210  for (d = 0; d < ns_local[p]; d++) {
211  for (i = 0; i < n; i++) {
212  x[d+dstart+ns*i] = recvbuf[d*n+i];
213  }
214  }
215  dstart += ns_local[p];
216  /* deallocate receive buffer */
217  free(recvbuf);
218  }
219  }
220  /* deallocate send buffer */
221  if (sendbuf) free(sendbuf);
222 
223  /* clean up */
224  if (ns_local[rank] > 0) {
225  free(ra);
226  free(rb);
227  free(rc);
228  free(rx);
229  }
230  free(ns_local);
231  free(N);
232  free(displ);
233  free(counts);
234 
235  return(0);
236 #endif
237 }
int tridiagLUGS(double *a, double *b, double *c, double *x, int n, int ns, void *r, void *m)
Definition: tridiagLUGS.c:64
int tridiagLU(double *, double *, double *, double *, int, int, void *, void *)
Definition: tridiagLU.c:83
Header file for TridiagLU.