TridiagLU  1.0
Scalable, parallel solver for tridiagonal system of equations
test.c
Go to the documentation of this file.
1 
177 #include <stdio.h>
178 #include <stdlib.h>
179 #include <time.h>
180 #ifndef serial
181 #include <mpi.h>
182 #endif
183 #include <tridiagLU.h>
184 #include <matops.h>
185 
189 int MAX_BS = 5;
190 
191 /* Function declarations */
192 static void CopyArray (double*,double*,int,int);
193 static void CopyArraySimple (double*,double*,int);
194 #ifdef serial
195 static int main_serial (int,int);
196 static int test_serial (int,int,int(*)(double*,double*,double*,double*,int,int,void*,void*));
197 static int test_block_serial (int,int,int,int(*)(double*,double*,double*,double*,int,int,int,void*,void*));
198 static double CalculateError (double*,double*,double*,double*,double*,int,int);
199 static double CalculateErrorBlock (double*,double*,double*,double*,double*,int,int,int);
200 #else
201 static int main_mpi (int,int,int,int,int,int);
202 static int test_mpi (int,int,int,int,int,int,int,int(*)(double*,double*,double*,double*,int,int,void*,void*),const char*);
203 static int test_block_mpi (int,int,int,int,int,int,int,int(*)(double*,double*,double*,double*,int,int,int,void*,void*),const char*);
204 static int partition1D (int,int,int,int*);
205 static double CalculateError (double*,double*,double*,double*,double*,int,int,int,int);
206 static double CalculateErrorBlock (double*,double*,double*,double*,double*,int,int,int,int,int);
207 #endif
208 
209 #ifdef with_scalapack
210 
211 extern void Cblacs_pinfo();
213 extern void Cblacs_get();
215 extern void Cblacs_gridinit();
217 extern void Cblacs_gridexit();
219 extern void Cblacs_exit();
220 #endif
221 
227 int main(int argc, char *argv[])
228 {
229  int ierr,N,Ns,NRuns;
230 
231 #ifdef serial
232 
233  /* If compiled in serial, run the serial tests */
234  /* read in size of system, number of system and number of solves */
235  FILE *in; in=fopen("input","r");
236  ierr = fscanf (in,"%d %d %d",&N,&Ns,&NRuns);
237  if (ierr != 3) {
238  fprintf(stderr,"Invalid input file.\n");
239  return(0);
240  }
241  fclose(in);
242  /* call the test function */
243  ierr = main_serial(N,Ns);
244  if (ierr) fprintf(stderr,"main_serial() returned with an error code of %d.\n",ierr);
245 
246 #else
247 
248  /* If compiled in parallel, run the MPI tests */
249  int rank,nproc;
250  MPI_Init(&argc,&argv);
251  MPI_Comm_size(MPI_COMM_WORLD,&nproc);
252  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
253 
254 #ifdef with_scalapack
255  /* initialize BLACS */
256  int rank_blacs,nproc_blacs,blacs_context;
257  Cblacs_pinfo(&rank_blacs,&nproc_blacs);
258  Cblacs_get(-1,0,&blacs_context);
259  Cblacs_gridinit(&blacs_context,"R",1,nproc_blacs);
260 #else
261  int blacs_context = -1;
262 #endif
263 
264  /* read in size of system, number of system and number of solves */
265 
266  if (!rank) {
267  FILE *in; in=fopen("input","r");
268  ierr = fscanf (in,"%d %d %d",&N,&Ns,&NRuns);
269  if (ierr != 3) {
270  fprintf(stderr,"Invalid input file.\n");
271  return(0);
272  }
273  fclose(in);
274  }
275  /* Broadcast the input values to all the processes */
276  MPI_Bcast(&N ,1,MPI_INT,0,MPI_COMM_WORLD);
277  MPI_Bcast(&Ns,1,MPI_INT,0,MPI_COMM_WORLD);
278  MPI_Bcast(&NRuns,1,MPI_INT,0,MPI_COMM_WORLD);
279 
280  /* call the test function */
281  ierr = main_mpi(N,Ns,NRuns,rank,nproc,blacs_context);
282  if (ierr) fprintf(stderr,"main_mpi() returned with an error code of %d on rank %d.\n",ierr,rank);
283 
284 #ifdef with_scalapack
285  Cblacs_gridexit(blacs_context);
286  Cblacs_exit(0);
287 #else
288  MPI_Finalize();
289 #endif
290 #endif
291  return(0);
292 }
293 
294 #ifdef serial
295 
296 /*
297  THIS FUNCTION CALLS THE TEST FUNCTION FOR THE DIFFERENT
298  TRIDIAGONAL SOLVERS (Serial)
299 */
300 int main_serial(int N,int Ns)
301 {
302  int ierr = 0;
303 
304  printf("Testing serial tridiagLUGS() with N=%d, Ns=%d\n",N,Ns);
305  ierr = test_serial(N,Ns,&tridiagLUGS); if(ierr) return(ierr);
306 
307  printf("Testing serial tridiagIterJacobi() with N=%d, Ns=%d\n",N,Ns);
308  ierr = test_serial(N,Ns,&tridiagIterJacobi); if(ierr) return(ierr);
309 
310  printf("Testing serial tridiagLU() with N=%d, Ns=%d\n",N,Ns);
311  ierr = test_serial(N,Ns,&tridiagLU); if(ierr) return(ierr);
312 
313  int bs;
314  for (bs=1; bs <= MAX_BS; bs++) {
315  printf("Testing serial blocktridiagIterJacobi() with N=%d, Ns=%d, bs=%d\n",N,Ns,bs);
316  ierr = test_block_serial(N,Ns,bs,&blocktridiagIterJacobi); if(ierr) return(ierr);
317  }
318  for (bs=1; bs <= MAX_BS; bs++) {
319  printf("Testing serial blocktridiagLU() with N=%d, Ns=%d, bs=%d\n",N,Ns,bs);
320  ierr = test_block_serial(N,Ns,bs,&blocktridiagLU); if(ierr) return(ierr);
321  }
322 
323  /* Return */
324  return(0);
325 }
326 
327 /*
328  THIS FUNCTION TESTS THE SERIAL IMPLEMENTATION OF A
329  TRIDIAGONAL SOLVER
330 */
331 int test_serial(int N,int Ns,int (*LUSolver)(double*,double*,double*,double*,int,int,void*,void*))
332 {
333  int d,i,ierr=0;
334  double error;
335  TridiagLU context;
336 
337  /* Initialize tridiagonal solver parameters */
338  ierr = tridiagLUInit(&context,NULL); if (ierr) return(ierr);
339 
340  /* Variable declarations */
341  double *a1; /* sub-diagonal */
342  double *b1; /* diagonal */
343  double *c1; /* super-diagonal */
344  double *x; /* right hand side, will contain the solution */
345 
346  /*
347  Since a,b,c and x are not preserved, declaring variables to
348  store a copy of them to calculate error after the solve
349  */
350  double *a2,*b2,*c2,*y;
351 
352  /* Initialize random number generator */
353  srand(time(NULL));
354 
355  /*
356  Allocate arrays of dimension (Ns x N)
357  Ns -> number of systems
358  N -> size of each system
359  */
360  a1 = (double*) calloc (N*Ns,sizeof(double));
361  b1 = (double*) calloc (N*Ns,sizeof(double));
362  c1 = (double*) calloc (N*Ns,sizeof(double));
363  a2 = (double*) calloc (N*Ns,sizeof(double));
364  b2 = (double*) calloc (N*Ns,sizeof(double));
365  c2 = (double*) calloc (N*Ns,sizeof(double));
366  x = (double*) calloc (N*Ns,sizeof(double));
367  y = (double*) calloc (N*Ns,sizeof(double));
368 
369  /*
370  TEST 1: Solution of an identity matrix with random
371  right hand side
372  [I]x = b => x = b
373  */
374 
375  /*
376  Set the values of the matrix elements and the
377  right hand side
378  */
379  for (d = 0; d < Ns; d++) {
380  for (i = 0; i < N; i++) {
381  a1[i*Ns+d] = 0.0;
382  b1[i*Ns+d] = 1.0;
383  c1[i*Ns+d] = 0.0;
384  x [i*Ns+d] = ((double) rand()) / ((double) RAND_MAX);
385  }
386  }
387 
388 
389  /*
390  Copy the original values to calculate error later
391  */
392  CopyArray(a1,a2,N,Ns);
393  CopyArray(b1,b2,N,Ns);
394  CopyArray(c1,c2,N,Ns);
395  CopyArray(x ,y ,N,Ns);
396 
397  /* Solve */
398  printf("TridiagLU Serial test 1 ([I]x = b => x = b): \t");
399  ierr = LUSolver(a1,b1,c1,x,N,Ns,&context,NULL);
400  if (ierr == -1) printf("Error - system is singular\t");
401 
402  /*
403  Calculate Error
404  */
405  error = CalculateError(a2,b2,c2,y,x,N,Ns);
406  printf("error=%E\n",error);
407 
408  /*
409  TEST 2: Solution of an upper triangular matrix
410  [U]x = b
411  */
412 
413  /*
414  Set the values of the matrix elements and the
415  right hand side
416  */
417  for (d = 0; d < Ns; d++) {
418  for (i = 0; i < N; i++) {
419  a1[i*Ns+d] = 0.0;
420  b1[i*Ns+d] = 100.0;
421  c1[i*Ns+d] = (i == N-1 ? 0 : 0.5);
422  x [i*Ns+d] = 1.0;
423  }
424  }
425 
426  /*
427  Copy the original values to calculate error later
428  */
429  CopyArray(a1,a2,N,Ns);
430  CopyArray(b1,b2,N,Ns);
431  CopyArray(c1,c2,N,Ns);
432  CopyArray(x ,y ,N,Ns);
433 
434  /* Solve */
435  printf("TridiagLU Serial test 2 ([U]x = b => x = [U]^(-1)b):\t");
436  ierr = LUSolver(a1,b1,c1,x,N,Ns,&context,NULL);
437  if (ierr == -1) printf("Error - system is singular\t");
438 
439  /*
440  Calculate Error
441  */
442  error = CalculateError(a2,b2,c2,y,x,N,Ns);
443  printf("error=%E\n",error);
444 
445  /*
446  TEST 3: Solution of a tridiagonal matrix with random
447  entries and right hand side
448  [A]x = b => x = b
449  */
450 
451  /*
452  Set the values of the matrix elements and the
453  right hand side
454  */
455  for (d = 0; d < Ns; d++) {
456  for (i = 0; i < N; i++) {
457  a1[i*Ns+d] = (i == 0 ? 0.0 : ((double) rand()) / ((double) RAND_MAX));
458  b1[i*Ns+d] = 100.0*(1.0+((double) rand()) / ((double) RAND_MAX));
459  c1[i*Ns+d] = (i == N-1 ? 0 : ((double) rand()) / ((double) RAND_MAX));
460  x [i*Ns+d] = ((double) rand()) / ((double) RAND_MAX);
461  }
462  }
463 
464  /*
465  Copy the original values to calculate error later
466  */
467  CopyArray(a1,a2,N,Ns);
468  CopyArray(b1,b2,N,Ns);
469  CopyArray(c1,c2,N,Ns);
470  CopyArray(x ,y ,N,Ns);
471 
472  /* Solve */
473  printf("TridiagLU Serial test 3 ([A]x = b => x = [A]^(-1)b):\t");
474  ierr = LUSolver(a1,b1,c1,x,N,Ns,&context,NULL);
475  if (ierr == -1) printf("Error - system is singular\t");
476 
477  /*
478  Calculate Error
479  */
480  error = CalculateError(a2,b2,c2,y,x,N,Ns);
481  printf("error=%E\n",error);
482 
483  /*
484  DEALLOCATE ALL ARRAYS
485  */
486  free(a1);
487  free(b1);
488  free(c1);
489  free(a2);
490  free(b2);
491  free(c2);
492  free(x);
493  free(y);
494 
495  /* Return */
496  return(0);
497 }
498 
499 /*
500  THIS FUNCTION TESTS THE SERIAL IMPLEMENTATION OF A
501  BLOCK TRIDIAGONAL SOLVER
502 */
503 int test_block_serial(int N,int Ns,int bs,int (*LUSolver)(double*,double*,double*,double*,int,int,int,void*,void*))
504 {
505  int d,i,j,k,ierr=0;
506  double error;
507  TridiagLU context;
508 
509  /* Initialize tridiagonal solver parameters */
510  ierr = tridiagLUInit(&context,NULL); if (ierr) return(ierr);
511 
512  /* Variable declarations */
513  double *a1; /* sub-diagonal */
514  double *b1; /* diagonal */
515  double *c1; /* super-diagonal */
516  double *x; /* right hand side, will contain the solution */
517 
518  /*
519  Since a,b,c and x are not preserved, declaring variables to
520  store a copy of them to calculate error after the solve
521  */
522  double *a2,*b2,*c2,*y;
523 
524  /* Initialize random number generator */
525  srand(time(NULL));
526 
527  /*
528  Allocate arrays of dimension (Ns x N)
529  Ns -> number of systems
530  N -> size of each system
531  */
532  a1 = (double*) calloc (N*Ns*bs*bs,sizeof(double));
533  b1 = (double*) calloc (N*Ns*bs*bs,sizeof(double));
534  c1 = (double*) calloc (N*Ns*bs*bs,sizeof(double));
535  a2 = (double*) calloc (N*Ns*bs*bs,sizeof(double));
536  b2 = (double*) calloc (N*Ns*bs*bs,sizeof(double));
537  c2 = (double*) calloc (N*Ns*bs*bs,sizeof(double));
538  x = (double*) calloc (N*Ns*bs ,sizeof(double));
539  y = (double*) calloc (N*Ns*bs ,sizeof(double));
540 
541  /*
542  TEST 1: Solution of an identity matrix with random
543  right hand side
544  [I]x = b => x = b
545  */
546 
547  /*
548  Set the values of the matrix elements and the
549  right hand side
550  */
551  for (d = 0; d < Ns; d++) {
552  for (i = 0; i < N; i++) {
553  for (j=0; j<bs; j++) {
554  for (k=0; k<bs; k++) {
555  a1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
556  if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 1.0;
557  else b1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
558  c1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
559  }
560  x [(i*Ns+d)*bs+j] = ((double) rand()) / ((double) RAND_MAX);
561  }
562  }
563  }
564 
565 
566  /*
567  Copy the original values to calculate error later
568  */
569  CopyArraySimple(a2,a1,N*Ns*bs*bs);
570  CopyArraySimple(b2,b1,N*Ns*bs*bs);
571  CopyArraySimple(c2,c1,N*Ns*bs*bs);
572  CopyArraySimple(y ,x ,N*Ns*bs );
573 
574  /* Solve */
575  printf("Block TridiagLU Serial test 1 ([I]x = b => x = b): \t");
576  ierr = LUSolver(a1,b1,c1,x,N,Ns,bs,&context,NULL);
577  if (ierr == -1) printf("Error - system is singular\t");
578 
579  /*
580  Calculate Error
581  */
582  error = CalculateErrorBlock(a2,b2,c2,y,x,N,Ns,bs);
583  printf("error=%E\n",error);
584 
585  /*
586  TEST 2: Solution of an upper triangular matrix
587  [U]x = b
588  */
589 
590  /*
591  Set the values of the matrix elements and the
592  right hand side
593  */
594  for (d = 0; d < Ns; d++) {
595  for (i = 0; i < N; i++) {
596  for (j=0; j<bs; j++) {
597  for (k=0; k<bs; k++) {
598  a1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
599  b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0 + ((double) rand()) / ((double) RAND_MAX);
600  c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == N-1 ? 0 : 0.5*(((double) rand()) / ((double) RAND_MAX)));
601  }
602  x [(i*Ns+d)*bs+j] = ((double) rand()) / ((double) RAND_MAX);
603  }
604  }
605  }
606 
607  /*
608  Copy the original values to calculate error later
609  */
610  CopyArraySimple(a2,a1,N*Ns*bs*bs);
611  CopyArraySimple(b2,b1,N*Ns*bs*bs);
612  CopyArraySimple(c2,c1,N*Ns*bs*bs);
613  CopyArraySimple(y ,x ,N*Ns*bs );
614 
615  /* Solve */
616  printf("Block TridiagLU Serial test 2 ([U]x = b => x = [U]^(-1)b):\t");
617  ierr = LUSolver(a1,b1,c1,x,N,Ns,bs,&context,NULL);
618  if (ierr == -1) printf("Error - system is singular\t");
619 
620  /*
621  Calculate Error
622  */
623  error = CalculateErrorBlock(a2,b2,c2,y,x,N,Ns,bs);
624  printf("error=%E\n",error);
625 
626  /*
627  TEST 3: Solution of a tridiagonal matrix with random
628  entries and right hand side
629  [A]x = b => x = b
630  */
631 
632  /*
633  Set the values of the matrix elements and the
634  right hand side
635  */
636  for (d = 0; d < Ns; d++) {
637  for (i = 0; i < N; i++) {
638  for (j=0; j<bs; j++) {
639  for (k=0; k<bs; k++) {
640  a1[(i*Ns+d)*bs*bs+j*bs+k] = (i == 0 ? 0.0 : ((double) rand()) / ((double) RAND_MAX));
641  b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0*(1.0+((double) rand()) / ((double) RAND_MAX));
642  c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == N-1 ? 0 : ((double) rand()) / ((double) RAND_MAX));
643  }
644  x [(i*Ns+d)*bs+j] = ((double) rand()) / ((double) RAND_MAX);
645  }
646  }
647  }
648 
649  /*
650  Copy the original values to calculate error later
651  */
652  CopyArraySimple(a2,a1,N*Ns*bs*bs);
653  CopyArraySimple(b2,b1,N*Ns*bs*bs);
654  CopyArraySimple(c2,c1,N*Ns*bs*bs);
655  CopyArraySimple(y ,x ,N*Ns*bs );
656 
657  /* Solve */
658  printf("Block TridiagLU Serial test 3 ([A]x = b => x = [A]^(-1)b):\t");
659  ierr = LUSolver(a1,b1,c1,x,N,Ns,bs,&context,NULL);
660  if (ierr == -1) printf("Error - system is singular\t");
661 
662  /*
663  Calculate Error
664  */
665  error = CalculateErrorBlock(a2,b2,c2,y,x,N,Ns,bs);
666  printf("error=%E\n",error);
667 
668  /*
669  DEALLOCATE ALL ARRAYS
670  */
671  free(a1);
672  free(b1);
673  free(c1);
674  free(a2);
675  free(b2);
676  free(c2);
677  free(x);
678  free(y);
679 
680  /* Return */
681  return(0);
682 }
683 
684 #else
685 
690  int N,
691  int Ns,
692  int NRuns,
693  int rank,
694  int nproc,
695  int blacs_context
696  )
697 {
698  int ierr = 0;
699 
700 #ifdef test_LUGS
701  if (!rank) printf("\nTesting MPI tridiagLUGS() with N=%d, Ns=%d on %d processes\n",N,Ns,nproc);
702  ierr = test_mpi(N,Ns,NRuns,rank,nproc,0,blacs_context,&tridiagLUGS,"walltimes_tridiagLUGS.dat"); if (ierr) return(ierr);
703  MPI_Barrier(MPI_COMM_WORLD);
704 #else
705  if (!rank) printf("\nSkipping tridiagLUGS(). Compile with -Dtest_LUGS flag to enable.\n");
706 #endif
707 
708 #ifdef test_IterJacobi
709  if (!rank) printf("\nTesting MPI tridiagIterJacobi() with N=%d, Ns=%d on %d processes\n",N,Ns,nproc);
710  ierr = test_mpi(N,Ns,NRuns,rank,nproc,0,blacs_context,&tridiagIterJacobi,"walltimes_tridiagIterJac.dat"); if (ierr) return(ierr);
711  MPI_Barrier(MPI_COMM_WORLD);
712 #else
713  if (!rank) printf("\nSkipping tridiagIterJacobi(). Compile with -Dtest_IterJacobi flag to enable.\n");
714 #endif
715 
716 #ifdef test_LU
717  if (!rank) printf("\nTesting MPI tridiagLU() with N=%d, Ns=%d on %d processes\n",N,Ns,nproc);
718  ierr = test_mpi(N,Ns,NRuns,rank,nproc,1,blacs_context,&tridiagLU,"walltimes_tridiagLU.dat"); if (ierr) return(ierr);
719  MPI_Barrier(MPI_COMM_WORLD);
720 #else
721  if (!rank) printf("\nSkipping tridiagLU(). Compile with -Dtest_LU flag to enable.\n");
722 #endif
723 
724 #ifdef with_scalapack
725 #ifdef test_scalapack
726  if (!rank) printf("\nTesting MPI tridiagScaLPK() with N=%d, Ns=%d on %d processes\n",N,Ns,nproc);
727  ierr = test_mpi(N,Ns,NRuns,rank,nproc,1,blacs_context,&tridiagScaLPK,"walltimes_tridiagScaLPK.dat"); if (ierr) return(ierr);
728  MPI_Barrier(MPI_COMM_WORLD);
729 #else
730  if (!rank) printf("\nSkipping tridiagScaLPK(). Compile with -Dtest_scalapack flag to enable.\n");
731 #endif
732 #endif
733 
734  if (!rank) printf("-----------------------------------------------------------------\n");
735  MPI_Barrier(MPI_COMM_WORLD);
736 
737 #ifdef test_blockIterJacobi
738  int bs;
739  for (bs=1; bs <= MAX_BS; bs++) {
740  if (!rank) printf("\nTesting MPI blocktridiagIterJacobi() with N=%d, Ns=%d, bs=%d on %d processes\n",N,Ns,bs,nproc);
741  ierr = test_block_mpi(N,Ns,bs,NRuns,rank,nproc,0,&blocktridiagIterJacobi,"walltimes_blocktridiagIterJac.dat"); if(ierr) return(ierr);
742  MPI_Barrier(MPI_COMM_WORLD);
743  }
744 #else
745  if (!rank) printf("\nSkipping blocktridiagIterJacobi(). Compile with -Dtest_blockIterJacobi flag to enable.\n");
746 #endif
747 
748 #ifdef testblockLU
749  for (bs=1; bs <= MAX_BS; bs++) {
750  if (!rank) printf("\nTesting MPI blocktridiagLU() with N=%d, Ns=%d, bs=%d on %d processes\n",N,Ns,bs,nproc);
751  ierr = test_block_mpi(N,Ns,bs,NRuns,rank,nproc,1,&blocktridiagLU,"walltimes_blocktridiagLU.dat"); if(ierr) return(ierr);
752  MPI_Barrier(MPI_COMM_WORLD);
753  }
754 #else
755  if (!rank) printf("\nSkipping blocktridiagLU(). Compile with -Dtest_blockLU flag to enable.\n");
756 #endif
757  /* Return */
758  return(0);
759 }
760 
773  int N,
774  int Ns,
775  int NRuns,
776  int rank,
777  int nproc,
778  int flag,
779  int blacs_context,
782  int(*LUSolver)(double*,double*,double*,double*,int,int,void*,void*),
783  const char *filename
784  )
785 {
786  int i,d,ierr=0,nlocal;
787  double error;
788  MPI_Comm world;
789  TridiagLU context;
790  /* Variable declarations */
791  double *a1; /* sub-diagonal */
792  double *b1; /* diagonal */
793  double *c1; /* super-diagonal */
794  double *x; /* right hand side, will contain the solution */
795 
796  /*
797  Since a,b,c and x are not preserved, declaring variables to
798  store a copy of them to calculate error after the solve
799  */
800  double *a2,*b2,*c2,*y;
801 
802  /* Creating a duplicate communicator */
803  MPI_Comm_dup(MPI_COMM_WORLD,&world);
804 
805  /* Initialize tridiagonal solver parameters */
806  ierr = tridiagLUInit(&context,&world); if (ierr) return(ierr);
807 
808 #ifdef with_scalapack
809  context.blacs_ctxt = blacs_context;
810 #endif
811 
812  /* Initialize random number generator */
813  srand(time(NULL));
814 
815  /*
816  Calculate local size on this process, given
817  the total size N and number of processes
818  nproc
819  */
820  ierr = partition1D(N,nproc,rank,&nlocal);
821  MPI_Barrier(MPI_COMM_WORLD);
822 
823  /*
824  Allocate arrays of dimension (Ns x nlocal)
825  Ns -> number of systems
826  nlocal -> local size of each system
827  */
828  a1 = (double*) calloc (Ns*nlocal,sizeof(double));
829  b1 = (double*) calloc (Ns*nlocal,sizeof(double));
830  c1 = (double*) calloc (Ns*nlocal,sizeof(double));
831  a2 = (double*) calloc (Ns*nlocal,sizeof(double));
832  b2 = (double*) calloc (Ns*nlocal,sizeof(double));
833  c2 = (double*) calloc (Ns*nlocal,sizeof(double));
834  x = (double*) calloc (Ns*nlocal,sizeof(double));
835  y = (double*) calloc (Ns*nlocal,sizeof(double));
836 
837 
838  /*
839  TESTING THE LU SOLVER
840  */
841  MPI_Barrier(MPI_COMM_WORLD);
842 #ifndef speed_test_only
843  /*
844  TEST 1: Solution of an identity matrix with random
845  right hand side
846  [I]x = b => x = b
847  */
848 
849  /*
850  Set the values of the matrix elements and the
851  right hand side
852  */
853  for (d = 0; d < Ns; d++) {
854  for (i = 0; i < nlocal; i++) {
855  a1[i*Ns+d] = 0.0;
856  b1[i*Ns+d] = 1.0;
857  c1[i*Ns+d] = 0.0;
858  x [i*Ns+d] = rand();
859  }
860  }
861 
862  /*
863  Copy the original values to calculate error later
864  */
865  CopyArray(a1,a2,nlocal,Ns);
866  CopyArray(b1,b2,nlocal,Ns);
867  CopyArray(c1,c2,nlocal,Ns);
868  CopyArray(x ,y ,nlocal,Ns);
869 
870  /* Solve */
871  if (!rank) printf("MPI test 1 ([I]x = b => x = b): \t");
872  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,&context,&world);
873  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
874 
875  /*
876  Calculate Error
877  */
878  error = CalculateError(a2,b2,c2,y,x,nlocal,Ns,rank,nproc);
879  if (!rank) printf("error=%E\n",error);
880  MPI_Barrier(MPI_COMM_WORLD);
881 
882  /*
883  TEST 2: Solution of an upper triangular matrix
884  [U]x = b
885  */
886 
887  /*
888  Set the values of the matrix elements and the
889  right hand side
890  */
891  for (d = 0; d < Ns; d++) {
892  for (i = 0; i < nlocal; i++) {
893  a1[i*Ns+d] = 0.0;
894  b1[i*Ns+d] = 400.0;
895  if (rank == nproc-1) c1[i*Ns+d] = (i == nlocal-1 ? 0 : 0.5);
896  else c1[i*Ns+d] = 0.5;
897  x[i*Ns+d] = 1.0;
898  }
899  }
900 
901  /*
902  Copy the original values to calculate error later
903  */
904  CopyArray(a1,a2,nlocal,Ns);
905  CopyArray(b1,b2,nlocal,Ns);
906  CopyArray(c1,c2,nlocal,Ns);
907  CopyArray(x ,y ,nlocal,Ns);
908 
909  /* Solve */
910  if (!rank) printf("MPI test 2 ([U]x = b => x = [U]^(-1)b):\t");
911  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,&context,&world);
912  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
913 
914  /*
915  Calculate Error
916  */
917  error = CalculateError(a2,b2,c2,y,x,nlocal,Ns,rank,nproc);
918  if (!rank) printf("error=%E\n",error);
919  MPI_Barrier(MPI_COMM_WORLD);
920 
921  /*
922  TEST 3: Solution of a tridiagonal matrix with random
923  entries and right hand side
924  [A]x = b => x = b
925  */
926 
927  /*
928  Set the values of the matrix elements and the
929  right hand side
930  */
931  for (d = 0; d < Ns; d++) {
932  for (i = 0; i < nlocal; i++) {
933  if (!rank ) a1[i*Ns+d] = (i == 0 ? 0.0 : ((double) rand()) / ((double) RAND_MAX));
934  else a1[i*Ns+d] = ((double) rand()) / ((double) RAND_MAX);
935  b1[i*Ns+d] = 100.0*(1.0 + ((double) rand()) / ((double) RAND_MAX));
936  if (rank == nproc-1) c1[i*Ns+d] = (i == nlocal-1 ? 0 : ((double) rand()) / ((double) RAND_MAX));
937  else c1[i*Ns+d] = ((double) rand()) / ((double) RAND_MAX);
938  x[i*Ns+d] = ((double) rand()) / ((double) RAND_MAX);
939  }
940  }
941 
942  /*
943  Copy the original values to calculate error later
944  */
945  CopyArray(a1,a2,nlocal,Ns);
946  CopyArray(b1,b2,nlocal,Ns);
947  CopyArray(c1,c2,nlocal,Ns);
948  CopyArray(x ,y ,nlocal,Ns);
949 
950  /* Solve */
951  if (!rank) printf("MPI test 3 ([A]x = b => x = [A]^(-1)b):\t");
952  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,&context,&world);
953  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
954 
955  /*
956  Calculate Error
957  */
958  error = CalculateError(a2,b2,c2,y,x,nlocal,Ns,rank,nproc);
959  if (!rank) printf("error=%E\n",error);
960  MPI_Barrier(MPI_COMM_WORLD);
961 
962  /*
963  DONE TESTING THE LU SOLVER
964  */
965 #else
966  if (!rank) printf("Skipping accuracy tests. Remove -Dspeed_test_only compilation flag to enable.\n");
967 #endif
968 
969  /*
970  TESTING WALLTIMES FOR NRuns NUMBER OF RUNS OF TRIDIAGLU()
971  FOR SCALABILITY CHECK IF REQUIRED
972  */
973 
974  if (flag) {
975 
976  /*
977  TEST 4: Same as TEST 3
978  Set the values of the matrix elements and the
979  right hand side
980  */
981  for (d = 0; d < Ns; d++) {
982  for (i = 0; i < nlocal; i++) {
983 
984  if (!rank ) a1[i*Ns+d] = (i == 0 ? 0.0 : 0.3 );
985  else a1[i*Ns+d] = 0.3;
986 
987  if (!rank) b1[i*Ns+d] = (i == 0 ? 1.0 : 0.6 );
988  else if (rank == nproc-1) b1[i*Ns+d] = (i == nlocal-1 ? 1.0 : 0.6 );
989  else b1[i*Ns+d] = 0.6;
990 
991  if (rank == nproc-1) c1[i*Ns+d] = (i == nlocal-1 ? 0 : 0.1 );
992  else c1[i*Ns+d] = 0.1;
993 
994  x[i*Ns+d] = ((double) rand()) / ((double) RAND_MAX);
995  }
996  }
997 
998  /*
999  Keep a copy of the original values
1000  */
1001  CopyArray(a1,a2,nlocal,Ns);
1002  CopyArray(b1,b2,nlocal,Ns);
1003  CopyArray(c1,c2,nlocal,Ns);
1004  CopyArray(x ,y ,nlocal,Ns);
1005 
1006  if (!rank)
1007  printf("\nMPI test 4 (Speed test - %d Tridiagonal Solves):\n",NRuns);
1008  double runtimes[5] = {0.0,0.0,0.0,0.0,0.0};
1009  error = 0;
1010  /*
1011  Solve the systen NRuns times
1012  */
1013  for (i = 0; i < NRuns; i++) {
1014  /* Copy the original values */
1015  CopyArray(a2,a1,nlocal,Ns);
1016  CopyArray(b2,b1,nlocal,Ns);
1017  CopyArray(c2,c1,nlocal,Ns);
1018  CopyArray(y ,x ,nlocal,Ns);
1019  /* Solve the system */
1020  MPI_Barrier(MPI_COMM_WORLD);
1021  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,&context,&world);
1022  MPI_Barrier(MPI_COMM_WORLD);
1023  /* Calculate errors */
1024  double err = CalculateError(a2,b2,c2,y,x,nlocal,Ns,rank,nproc);
1025  /* Add the walltimes to the cumulative total */
1026  runtimes[0] += context.total_time;
1027  runtimes[1] += context.stage1_time;
1028  runtimes[2] += context.stage2_time;
1029  runtimes[3] += context.stage3_time;
1030  runtimes[4] += context.stage4_time;
1031  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1032  error += err;
1033  }
1034 
1035  /* Calculate average error */
1036  error /= NRuns;
1037 
1038  /* Calculate maximum value of walltime across all processes */
1039  MPI_Allreduce(MPI_IN_PLACE,&runtimes[0],5,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
1040 
1041  /* Print results */
1042  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1043  if (!rank) {
1044  printf("\t\tTotal walltime = %E\n",runtimes[0]);
1045  printf("\t\tStage1 walltime = %E\n",runtimes[1]);
1046  printf("\t\tStage2 walltime = %E\n",runtimes[2]);
1047  printf("\t\tStage3 walltime = %E\n",runtimes[3]);
1048  printf("\t\tStage4 walltime = %E\n",runtimes[4]);
1049  printf("\t\tAverage error = %E\n",error);
1050  FILE *out;
1051  out = fopen(filename,"w");
1052  fprintf(out,"%5d %1.16E %1.16E %1.16E %1.16E %1.16E %1.16E\n",nproc,runtimes[0],
1053  runtimes[1],runtimes[2],runtimes[3],runtimes[4],error);
1054  fclose(out);
1055  }
1056  MPI_Barrier(MPI_COMM_WORLD);
1057  }
1058 
1059  /*
1060  DONE TESTING TRIDIAGLU() WALLTIMES
1061  */
1062 
1063  /*
1064  DEALLOCATE ALL ARRAYS
1065  */
1066  free(a1);
1067  free(b1);
1068  free(c1);
1069  free(a2);
1070  free(b2);
1071  free(c2);
1072  free(x);
1073  free(y);
1074  MPI_Comm_free(&world);
1075 
1076  /* Return */
1077  return(0);
1078 }
1079 
1093  int N,
1094  int Ns,
1095  int bs,
1096  int NRuns,
1097  int rank,
1098  int nproc,
1099  int flag,
1102  int(*LUSolver)(double*,double*,double*,double*,int,int,int,void*,void*),
1103  const char *filename
1104  )
1105 {
1106  int i,j,k,d,ierr=0,nlocal;
1107  double error;
1108  MPI_Comm world;
1109  TridiagLU context;
1110  /* Variable declarations */
1111  double *a1; /* sub-diagonal */
1112  double *b1; /* diagonal */
1113  double *c1; /* super-diagonal */
1114  double *x; /* right hand side, will contain the solution */
1115 
1116  /*
1117  Since a,b,c and x are not preserved, declaring variables to
1118  store a copy of them to calculate error after the solve
1119  */
1120  double *a2,*b2,*c2,*y;
1121 
1122  /* Creating a duplicate communicator */
1123  MPI_Comm_dup(MPI_COMM_WORLD,&world);
1124 
1125  /* Initialize tridiagonal solver parameters */
1126  ierr = tridiagLUInit(&context,&world); if (ierr) return(ierr);
1127 
1128  /* Initialize random number generator */
1129  srand(time(NULL));
1130 
1131  /*
1132  Calculate local size on this process, given
1133  the total size N and number of processes
1134  nproc
1135  */
1136  ierr = partition1D(N,nproc,rank,&nlocal);
1137  MPI_Barrier(MPI_COMM_WORLD);
1138 
1139  /*
1140  Allocate arrays of dimension (Ns x nlocal)
1141  Ns -> number of systems
1142  nlocal -> local size of each system
1143  */
1144  a1 = (double*) calloc (Ns*nlocal*bs*bs,sizeof(double));
1145  b1 = (double*) calloc (Ns*nlocal*bs*bs,sizeof(double));
1146  c1 = (double*) calloc (Ns*nlocal*bs*bs,sizeof(double));
1147  a2 = (double*) calloc (Ns*nlocal*bs*bs,sizeof(double));
1148  b2 = (double*) calloc (Ns*nlocal*bs*bs,sizeof(double));
1149  c2 = (double*) calloc (Ns*nlocal*bs*bs,sizeof(double));
1150  x = (double*) calloc (Ns*nlocal*bs ,sizeof(double));
1151  y = (double*) calloc (Ns*nlocal*bs ,sizeof(double));
1152 
1153 
1154  /*
1155  TESTING THE LU SOLVER
1156  */
1157  MPI_Barrier(MPI_COMM_WORLD);
1158 
1159 #ifndef speed_test_only
1160  /*
1161  TEST 1: Solution of an identity matrix with random
1162  right hand side
1163  [I]x = b => x = b
1164  */
1165 
1166  /*
1167  Set the values of the matrix elements and the
1168  right hand side
1169  */
1170  for (d = 0; d < Ns; d++) {
1171  for (i = 0; i < nlocal; i++) {
1172  for (j=0; j<bs; j++) {
1173  for (k=0; k<bs; k++) {
1174  a1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
1175  if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 1.0;
1176  else b1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
1177  c1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
1178  }
1179  x [(i*Ns+d)*bs+j] = rand();
1180  }
1181  }
1182  }
1183 
1184  /*
1185  Copy the original values to calculate error later
1186  */
1187  CopyArraySimple(a2,a1,nlocal*Ns*bs*bs);
1188  CopyArraySimple(b2,b1,nlocal*Ns*bs*bs);
1189  CopyArraySimple(c2,c1,nlocal*Ns*bs*bs);
1190  CopyArraySimple(y ,x ,nlocal*Ns*bs );
1191 
1192  /* Solve */
1193  if (!rank) printf("Block MPI test 1 ([I]x = b => x = b): \t");
1194  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,bs,&context,&world);
1195  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1196 
1197  /*
1198  Calculate Error
1199  */
1200  error = CalculateErrorBlock(a2,b2,c2,y,x,nlocal,Ns,bs,rank,nproc);
1201  if (!rank) printf("error=%E\n",error);
1202  MPI_Barrier(MPI_COMM_WORLD);
1203 
1204  /*
1205  TEST 2: Solution of an upper triangular matrix
1206  [U]x = b
1207  */
1208 
1209  /*
1210  Set the values of the matrix elements and the
1211  right hand side
1212  */
1213  for (d = 0; d < Ns; d++) {
1214  for (i = 0; i < nlocal; i++) {
1215  for (j=0; j<bs; j++) {
1216  for (k=0; k<bs; k++) {
1217  a1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
1218  if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 400.0 + ((double) rand()) / ((double) RAND_MAX);
1219  else b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0 + ((double) rand()) / ((double) RAND_MAX);
1220  if (rank == nproc-1) c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == nlocal-1 ? 0 : ((double) rand()) / ((double) RAND_MAX));
1221  else c1[(i*Ns+d)*bs*bs+j*bs+k] = ((double) rand()) / ((double) RAND_MAX);
1222  }
1223  x[(i*Ns+d)*bs+j] = ((double) rand()) / ((double) RAND_MAX);
1224  }
1225  }
1226  }
1227 
1228  /*
1229  Copy the original values to calculate error later
1230  */
1231  CopyArraySimple(a2,a1,nlocal*Ns*bs*bs);
1232  CopyArraySimple(b2,b1,nlocal*Ns*bs*bs);
1233  CopyArraySimple(c2,c1,nlocal*Ns*bs*bs);
1234  CopyArraySimple(y ,x ,nlocal*Ns*bs );
1235 
1236  /* Solve */
1237  if (!rank) printf("Block MPI test 2 ([U]x = b => x = [U]^(-1)b):\t");
1238  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,bs,&context,&world);
1239  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1240 
1241  /*
1242  Calculate Error
1243  */
1244  error = CalculateErrorBlock(a2,b2,c2,y,x,nlocal,Ns,bs,rank,nproc);
1245  if (!rank) printf("error=%E\n",error);
1246  MPI_Barrier(MPI_COMM_WORLD);
1247 
1248  /*
1249  TEST 3: Solution of a tridiagonal matrix with random
1250  entries and right hand side
1251  [A]x = b => x = b
1252  */
1253 
1254  /*
1255  Set the values of the matrix elements and the
1256  right hand side
1257  */
1258  for (d = 0; d < Ns; d++) {
1259  for (i = 0; i < nlocal; i++) {
1260  for (j=0; j<bs; j++) {
1261  for (k=0; k<bs; k++) {
1262  if (!rank ) a1[(i*Ns+d)*bs*bs+j*bs+k] = (i == 0 ? 0.0 : ((double) rand()) / ((double) RAND_MAX));
1263  else a1[(i*Ns+d)*bs*bs+j*bs+k] = ((double) rand()) / ((double) RAND_MAX);
1264  if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 200.0*(1.0 + ((double) rand()) / ((double) RAND_MAX));
1265  else b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0*(1.0 + ((double) rand()) / ((double) RAND_MAX));
1266  if (rank == nproc-1) c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == nlocal-1 ? 0 : ((double) rand()) / ((double) RAND_MAX));
1267  else c1[(i*Ns+d)*bs*bs+j*bs+k] = ((double) rand()) / ((double) RAND_MAX);
1268  }
1269  x[(i*Ns+d)*bs+j] = ((double) rand()) / ((double) RAND_MAX);
1270  }
1271  }
1272  }
1273 
1274  /*
1275  Copy the original values to calculate error later
1276  */
1277  CopyArraySimple(a2,a1,nlocal*Ns*bs*bs);
1278  CopyArraySimple(b2,b1,nlocal*Ns*bs*bs);
1279  CopyArraySimple(c2,c1,nlocal*Ns*bs*bs);
1280  CopyArraySimple(y ,x ,nlocal*Ns*bs );
1281 
1282  /* Solve */
1283  if (!rank) printf("Block MPI test 3 ([A]x = b => x = [A]^(-1)b):\t");
1284  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,bs,&context,&world);
1285  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1286 
1287  /*
1288  Calculate Error
1289  */
1290  error = CalculateErrorBlock(a2,b2,c2,y,x,nlocal,Ns,bs,rank,nproc);
1291  if (!rank) printf("error=%E\n",error);
1292  MPI_Barrier(MPI_COMM_WORLD);
1293 #else
1294  if (!rank) printf("Skipping accuracy tests. Remove -Dspeed_test_only compilation flag to enable.\n");
1295 #endif
1296  /*
1297  DONE TESTING THE LU SOLVER
1298  */
1299 
1300 
1301  /*
1302  TESTING WALLTIMES FOR NRuns NUMBER OF RUNS OF TRIDIAGLU()
1303  FOR SCALABILITY CHECK IF REQUIRED
1304  */
1305 
1306  if (flag) {
1307 
1308  /*
1309  TEST 4: Same as TEST 3
1310  Set the values of the matrix elements and the
1311  right hand side
1312  */
1313  for (d = 0; d < Ns; d++) {
1314  for (i = 0; i < nlocal; i++) {
1315  for (j=0; j<bs; j++) {
1316  for (k=0; k<bs; k++) {
1317  if (!rank ) a1[(i*Ns+d)*bs*bs+j*bs+k] = (i == 0 ? 0.0 : ((double) rand()) / ((double) RAND_MAX));
1318  else a1[(i*Ns+d)*bs*bs+j*bs+k] = ((double) rand()) / ((double) RAND_MAX);
1319  if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 200.0*(1.0 + ((double) rand()) / ((double) RAND_MAX));
1320  else b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0*(1.0 + ((double) rand()) / ((double) RAND_MAX));
1321  if (rank == nproc-1) c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == nlocal-1 ? 0 : ((double) rand()) / ((double) RAND_MAX));
1322  else c1[(i*Ns+d)*bs*bs+j*bs+k] = ((double) rand()) / ((double) RAND_MAX);
1323  }
1324  x[(i*Ns+d)*bs+j] = ((double) rand()) / ((double) RAND_MAX);
1325  }
1326  }
1327  }
1328 
1329  /*
1330  Keep a copy of the original values
1331  */
1332  CopyArraySimple(a2,a1,nlocal*Ns*bs*bs);
1333  CopyArraySimple(b2,b1,nlocal*Ns*bs*bs);
1334  CopyArraySimple(c2,c1,nlocal*Ns*bs*bs);
1335  CopyArraySimple(y ,x ,nlocal*Ns*bs );
1336 
1337  if (!rank)
1338  printf("\nBlock MPI test 4 (Speed test - %d Tridiagonal Solves):\n",NRuns);
1339  double runtimes[5] = {0.0,0.0,0.0,0.0,0.0};
1340  error = 0;
1341  /*
1342  Solve the systen NRuns times
1343  */
1344  for (i = 0; i < NRuns; i++) {
1345  /* Copy the original values */
1346  CopyArraySimple(a1,a2,nlocal*Ns*bs*bs);
1347  CopyArraySimple(b1,b2,nlocal*Ns*bs*bs);
1348  CopyArraySimple(c1,c2,nlocal*Ns*bs*bs);
1349  CopyArraySimple(x ,y ,nlocal*Ns*bs );
1350  /* Solve the system */
1351  MPI_Barrier(MPI_COMM_WORLD);
1352  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,bs,&context,&world);
1353  MPI_Barrier(MPI_COMM_WORLD);
1354  /* Calculate errors */
1355  double err = CalculateErrorBlock(a2,b2,c2,y,x,nlocal,Ns,bs,rank,nproc);
1356  /* Add the walltimes to the cumulative total */
1357  runtimes[0] += context.total_time;
1358  runtimes[1] += context.stage1_time;
1359  runtimes[2] += context.stage2_time;
1360  runtimes[3] += context.stage3_time;
1361  runtimes[4] += context.stage4_time;
1362  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1363  error += err;
1364  }
1365 
1366  /* Calculate average error */
1367  error /= NRuns;
1368 
1369  /* Calculate maximum value of walltime across all processes */
1370  MPI_Allreduce(MPI_IN_PLACE,&runtimes[0],5,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
1371 
1372  /* Print results */
1373  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1374  if (!rank) {
1375  printf("\t\tTotal walltime = %E\n",runtimes[0]);
1376  printf("\t\tStage1 walltime = %E\n",runtimes[1]);
1377  printf("\t\tStage2 walltime = %E\n",runtimes[2]);
1378  printf("\t\tStage3 walltime = %E\n",runtimes[3]);
1379  printf("\t\tStage4 walltime = %E\n",runtimes[4]);
1380  printf("\t\tAverage error = %E\n",error);
1381  FILE *out;
1382  out = fopen(filename,"w");
1383  fprintf(out,"%5d %1.16E %1.16E %1.16E %1.16E %1.16E %1.16E\n",nproc,runtimes[0],
1384  runtimes[1],runtimes[2],runtimes[3],runtimes[4],error);
1385  fclose(out);
1386  }
1387  MPI_Barrier(MPI_COMM_WORLD);
1388  }
1389  /*
1390  DONE TESTING TRIDIAGLU() WALLTIMES
1391  */
1392 
1393  /*
1394  DEALLOCATE ALL ARRAYS
1395  */
1396  free(a1);
1397  free(b1);
1398  free(c1);
1399  free(a2);
1400  free(b2);
1401  free(c2);
1402  free(x);
1403  free(y);
1404  MPI_Comm_free(&world);
1405 
1406  /* Return */
1407  return(0);
1408 }
1409 
1410 #endif
1411 
1412 
1417  double *x,
1418  double *y,
1419  int N
1420  )
1421 {
1422  int i;
1423  for (i = 0; i < N; i++) x[i] = y[i];
1424  return;
1425 }
1426 
1432  double *x,
1433  double *y,
1434  int N,
1435  int Ns
1436  )
1437 {
1438  int i,d;
1439  for (d = 0; d < Ns; d++) {
1440  for (i = 0; i < N; i++) {
1441  y[i*Ns+d] = x[i*Ns+d];
1442  }
1443  }
1444  return;
1445 }
1446 
1447 /*
1448  Functions to calculate the error in the computed solution
1449 */
1450 #ifdef serial
1451 
1452 double CalculateError(double *a,double *b,double *c,double *y,double *x,
1453  int N,int Ns)
1454 {
1455  int i,d;
1456  double error = 0;
1457  for (d = 0; d < Ns; d++) {
1458  for (i = 0; i < N; i++) {
1459  double val;
1460  if (i == 0) val = y[i*Ns+d] - (b[i*Ns+d]*x[i*Ns+d]+c[i*Ns+d]*x[(i+1)*Ns+d]);
1461  else if (i == N-1) val = y[i*Ns+d] - (a[i*Ns+d]*x[(i-1)*Ns+d]+b[i*Ns+d]*x[i*Ns+d]);
1462  else val = y[i*Ns+d] - (a[i*Ns+d]*x[(i-1)*Ns+d]+b[i*Ns+d]*x[i*Ns+d]+c[i*Ns+d]*x[(i+1)*Ns+d]);
1463  error += val * val;
1464  }
1465  }
1466  return(error);
1467 }
1468 
1469 
1470 double CalculateErrorBlock(double *a,double *b,double *c,double *y,double *x,
1471  int N,int Ns,int bs)
1472 {
1473  int i,d,j;
1474  double error = 0;
1475  for (d = 0; d < Ns; d++) {
1476  for (i = 0; i < N; i++) {
1477  double val[bs]; for (j=0; j<bs; j++) val[j] = y[(i*Ns+d)*bs+j];
1478  _MatVecMultiplySubtract_(val,b+(i*Ns+d)*bs*bs,x+(i*Ns+d)*bs,bs);
1479  if (i != 0) _MatVecMultiplySubtract_(val,a+(i*Ns+d)*bs*bs,x+((i-1)*Ns+d)*bs,bs);
1480  if (i != N-1) _MatVecMultiplySubtract_(val,c+(i*Ns+d)*bs*bs,x+((i+1)*Ns+d)*bs,bs);
1481  for (j=0; j<bs; j++) error += val[j] * val[j];
1482  }
1483  }
1484  return(error);
1485 }
1486 
1487 #else
1488 
1496  double *a,
1497  double *b,
1498  double *c,
1499  double *y,
1500  double *x,
1501  int N,
1502  int Ns,
1503  int rank,
1504  int nproc
1505  )
1506 {
1507  double error = 0, norm = 0;
1508  int i,d;
1509  double xp1, xm1; /* solution from neighboring processes */
1510 
1511  for (d = 0; d < Ns; d++) {
1512  xp1 = 0;
1513  if (nproc > 1) {
1514  MPI_Request request = MPI_REQUEST_NULL;
1515  if (rank != nproc-1) MPI_Irecv(&xp1,1,MPI_DOUBLE,rank+1,1738,MPI_COMM_WORLD,&request);
1516  if (rank) MPI_Send(&x[d],1,MPI_DOUBLE,rank-1,1738,MPI_COMM_WORLD);
1517  MPI_Wait(&request,MPI_STATUS_IGNORE);
1518  }
1519 
1520  xm1 = 0;
1521  if (nproc > 1) {
1522  MPI_Request request = MPI_REQUEST_NULL;
1523  if (rank) MPI_Irecv(&xm1,1,MPI_DOUBLE,rank-1,1739,MPI_COMM_WORLD,&request);
1524  if (rank != nproc-1) MPI_Send(&x[d+(N-1)*Ns],1,MPI_DOUBLE,rank+1,1739,MPI_COMM_WORLD);
1525  MPI_Wait(&request,MPI_STATUS_IGNORE);
1526  }
1527 
1528  error = 0;
1529  norm = 0;
1530  for (i = 0; i < N; i++) {
1531  double val = 0;
1532  if (i == 0) val += a[i*Ns+d]*xm1;
1533  else val += a[i*Ns+d]*x[(i-1)*Ns+d];
1534  val += b[i*Ns+d]*x[i*Ns+d];
1535  if (i == N-1) val += c[i*Ns+d]*xp1;
1536  else val += c[i*Ns+d]*x[(i+1)*Ns+d];
1537  val = y[i*Ns+d] - val;
1538  error += val * val;
1539  norm += y[i*Ns+d] * y[i*Ns+d];
1540  }
1541  }
1542 
1543  double global_error = 0, global_norm = 0;
1544  if (nproc > 1) MPI_Allreduce(&error,&global_error,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
1545  else global_error = error;
1546  if (nproc > 1) MPI_Allreduce(&norm,&global_norm,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
1547  else global_norm = norm;
1548  if (global_norm != 0.0) global_error /= global_norm;
1549 
1550  return(global_error);
1551 }
1552 
1553 
1561  double *a,
1562  double *b,
1563  double *c,
1564  double *y,
1565  double *x,
1566  int N,
1568  int Ns,
1569  int bs,
1570  int rank,
1571  int nproc
1572  )
1573 {
1574  double error = 0, norm = 0;
1575  int i,j,d;
1576  double xp1[bs], xm1[bs]; /* solution from neighboring processes */
1577 
1578  for (d = 0; d < Ns; d++) {
1579  for (i=0; i<bs; i++) xp1[i] = 0;
1580  if (nproc > 1) {
1581  MPI_Request request = MPI_REQUEST_NULL;
1582  if (rank != nproc-1) MPI_Irecv(&xp1,bs,MPI_DOUBLE,rank+1,1738,MPI_COMM_WORLD,&request);
1583  if (rank) MPI_Send(&x[d*bs],bs,MPI_DOUBLE,rank-1,1738,MPI_COMM_WORLD);
1584  MPI_Wait(&request,MPI_STATUS_IGNORE);
1585  }
1586 
1587  for (i=0; i<bs; i++) xm1[i] = 0;
1588  if (nproc > 1) {
1589  MPI_Request request = MPI_REQUEST_NULL;
1590  if (rank) MPI_Irecv(&xm1,bs,MPI_DOUBLE,rank-1,1739,MPI_COMM_WORLD,&request);
1591  if (rank != nproc-1) MPI_Send (&x[(d+(N-1)*Ns)*bs],bs,MPI_DOUBLE,rank+1,1739,MPI_COMM_WORLD);
1592  MPI_Wait(&request,MPI_STATUS_IGNORE);
1593  }
1594 
1595  error = 0;
1596  norm = 0;
1597  for (i = 0; i < N; i++) {
1598  double val[bs]; for (j=0; j<bs; j++) val[j] = y[(i*Ns+d)*bs+j];
1599  _MatVecMultiplySubtract_(val,b+(i*Ns+d)*bs*bs,x+(i*Ns+d)*bs,bs);
1600  if (i == 0) _MatVecMultiplySubtract_(val,a+(i*Ns+d)*bs*bs,xm1,bs)
1601  else _MatVecMultiplySubtract_(val,a+(i*Ns+d)*bs*bs,x+((i-1)*Ns+d)*bs,bs)
1602  if (i == N-1) _MatVecMultiplySubtract_(val,c+(i*Ns+d)*bs*bs,xp1,bs)
1603  else _MatVecMultiplySubtract_(val,c+(i*Ns+d)*bs*bs,x+((i+1)*Ns+d)*bs,bs)
1604  for (j=0; j<bs; j++) error += val[j] * val[j];
1605  for (j=0; j<bs; j++) norm += y[(i*Ns+d)*bs+j] * y[(i*Ns+d)*bs+j];
1606  }
1607  }
1608 
1609  double global_error = 0, global_norm = 0;
1610  if (nproc > 1) MPI_Allreduce(&error,&global_error,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
1611  else global_error = error;
1612  if (nproc > 1) MPI_Allreduce(&norm,&global_norm,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
1613  else global_norm = norm;
1614  if (global_norm != 0.0) global_error /= global_norm;
1615 
1616  return(global_error);
1617 }
1618 #endif
1619 
1620 #ifndef serial
1621 
1628  int nglobal,
1629  int nproc,
1630  int rank,
1631  int *nlocal
1632  )
1633 {
1634  if (nglobal%nproc == 0) *nlocal = nglobal/nproc;
1635  else {
1636  if (rank == nproc-1) *nlocal = nglobal/nproc+nglobal%nproc;
1637  else *nlocal = nglobal/nproc;
1638  }
1639  return(0);
1640 }
1641 
1642 #endif
static void CopyArraySimple(double *, double *, int)
Definition: test.c:1416
void Cblacs_gridinit()
double stage3_time
Definition: tridiagLU.h:101
int tridiagLUGS(double *, double *, double *, double *, int, int, void *, void *)
Definition: tridiagLUGS.c:64
int tridiagScaLPK(double *, double *, double *, double *, int, int, void *, void *)
Definition: tridiagScaLPK.c:71
static double CalculateErrorBlock(double *, double *, double *, double *, double *, int, int, int, int, int)
Definition: test.c:1560
int tridiagLUInit(void *, void *)
Definition: tridiagLUInit.c:39
static int main_mpi(int, int, int, int, int, int)
Definition: test.c:689
int MAX_BS
Definition: test.c:189
double stage2_time
Definition: tridiagLU.h:100
int blacs_ctxt
Definition: tridiagLU.h:105
static void CopyArray(double *, double *, int, int)
Definition: test.c:1431
int tridiagLU(double *, double *, double *, double *, int, int, void *, void *)
Definition: tridiagLU.c:83
double stage1_time
Definition: tridiagLU.h:99
Header file for TridiagLU.
void Cblacs_get()
Contains macros and function definitions for common matrix operations.
void Cblacs_pinfo()
static int test_mpi(int, int, int, int, int, int, int, int(*)(double *, double *, double *, double *, int, int, void *, void *), const char *)
Definition: test.c:772
double stage4_time
Definition: tridiagLU.h:102
void Cblacs_exit()
int blocktridiagIterJacobi(double *, double *, double *, double *, int, int, int, void *, void *)
#define _MatVecMultiplySubtract_(y, A, x, N)
Definition: matops.h:78
static double CalculateError(double *, double *, double *, double *, double *, int, int, int, int)
Definition: test.c:1495
int tridiagIterJacobi(double *, double *, double *, double *, int, int, void *, void *)
static int test_block_mpi(int, int, int, int, int, int, int, int(*)(double *, double *, double *, double *, int, int, int, void *, void *), const char *)
Definition: test.c:1092
static int partition1D(int, int, int, int *)
Definition: test.c:1627
double total_time
Definition: tridiagLU.h:98
int blocktridiagLU(double *, double *, double *, double *, int, int, int, void *, void *)
int main(int argc, char *argv[])
Definition: test.c:227
void Cblacs_gridexit()