TridiagLU  1.0
Scalable, parallel solver for tridiagonal system of equations
test.c File Reference

Tests for tridiagLU solvers. More...

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
#include <tridiagLU.h>
#include <matops.h>

Go to the source code of this file.

Functions

static void CopyArray (double *, double *, int, int)
 
static void CopyArraySimple (double *, double *, int)
 
static int main_mpi (int, int, int, int, int, int)
 
static int test_mpi (int, int, int, int, int, int, int, int(*)(double *, double *, double *, double *, int, int, void *, void *), const char *)
 
static int test_block_mpi (int, int, int, int, int, int, int, int(*)(double *, double *, double *, double *, int, int, int, void *, void *), const char *)
 
static int partition1D (int, int, int, int *)
 
static double CalculateError (double *, double *, double *, double *, double *, int, int, int, int)
 
static double CalculateErrorBlock (double *, double *, double *, double *, double *, int, int, int, int, int)
 
void Cblacs_pinfo ()
 
void Cblacs_get ()
 
void Cblacs_gridinit ()
 
void Cblacs_gridexit ()
 
void Cblacs_exit ()
 
int main (int argc, char *argv[])
 

Variables

int MAX_BS = 5
 

Detailed Description

Tests for tridiagLU solvers.

Author
Debojyoti Ghosh

Definition in file test.c.

Function Documentation

void CopyArray ( double *  x,
double *  y,
int  N,
int  Ns 
)
static

Function to copy the values of one logically 2D array into another, where the 2D arrays are stored as 1D array in row-major format.

Parameters
xArray to copy from
yArray to copy to
NSize along first (inner) dimension
NsSize along second (outer) dimension

Definition at line 1431 of file test.c.

1437 {
1438  int i,d;
1439  for (d = 0; d < Ns; d++) {
1440  for (i = 0; i < N; i++) {
1441  y[i*Ns+d] = x[i*Ns+d];
1442  }
1443  }
1444  return;
1445 }
void CopyArraySimple ( double *  x,
double *  y,
int  N 
)
static

Function to copy the values of one array into another

Parameters
xArray to copy to
yArray to copy from
NArray size

Definition at line 1416 of file test.c.

1421 {
1422  int i;
1423  for (i = 0; i < N; i++) x[i] = y[i];
1424  return;
1425 }
int main_mpi ( int  N,
int  Ns,
int  NRuns,
int  rank,
int  nproc,
int  blacs_context 
)
static

This function calls the test functions for the various tridiagonal solvers.

Parameters
NGlobal size of system
NsNumber of systems
NRunsNumber of repeated solves (to measure average wall time)
rankMPI rank of this process
nprocNumber of MPI ranks
blacs_contextScaLAPACK context (irrelevant if not using ScaLAPACK)

Definition at line 689 of file test.c.

697 {
698  int ierr = 0;
699 
700 #ifdef test_LUGS
701  if (!rank) printf("\nTesting MPI tridiagLUGS() with N=%d, Ns=%d on %d processes\n",N,Ns,nproc);
702  ierr = test_mpi(N,Ns,NRuns,rank,nproc,0,blacs_context,&tridiagLUGS,"walltimes_tridiagLUGS.dat"); if (ierr) return(ierr);
703  MPI_Barrier(MPI_COMM_WORLD);
704 #else
705  if (!rank) printf("\nSkipping tridiagLUGS(). Compile with -Dtest_LUGS flag to enable.\n");
706 #endif
707 
708 #ifdef test_IterJacobi
709  if (!rank) printf("\nTesting MPI tridiagIterJacobi() with N=%d, Ns=%d on %d processes\n",N,Ns,nproc);
710  ierr = test_mpi(N,Ns,NRuns,rank,nproc,0,blacs_context,&tridiagIterJacobi,"walltimes_tridiagIterJac.dat"); if (ierr) return(ierr);
711  MPI_Barrier(MPI_COMM_WORLD);
712 #else
713  if (!rank) printf("\nSkipping tridiagIterJacobi(). Compile with -Dtest_IterJacobi flag to enable.\n");
714 #endif
715 
716 #ifdef test_LU
717  if (!rank) printf("\nTesting MPI tridiagLU() with N=%d, Ns=%d on %d processes\n",N,Ns,nproc);
718  ierr = test_mpi(N,Ns,NRuns,rank,nproc,1,blacs_context,&tridiagLU,"walltimes_tridiagLU.dat"); if (ierr) return(ierr);
719  MPI_Barrier(MPI_COMM_WORLD);
720 #else
721  if (!rank) printf("\nSkipping tridiagLU(). Compile with -Dtest_LU flag to enable.\n");
722 #endif
723 
724 #ifdef with_scalapack
725 #ifdef test_scalapack
726  if (!rank) printf("\nTesting MPI tridiagScaLPK() with N=%d, Ns=%d on %d processes\n",N,Ns,nproc);
727  ierr = test_mpi(N,Ns,NRuns,rank,nproc,1,blacs_context,&tridiagScaLPK,"walltimes_tridiagScaLPK.dat"); if (ierr) return(ierr);
728  MPI_Barrier(MPI_COMM_WORLD);
729 #else
730  if (!rank) printf("\nSkipping tridiagScaLPK(). Compile with -Dtest_scalapack flag to enable.\n");
731 #endif
732 #endif
733 
734  if (!rank) printf("-----------------------------------------------------------------\n");
735  MPI_Barrier(MPI_COMM_WORLD);
736 
737 #ifdef test_blockIterJacobi
738  int bs;
739  for (bs=1; bs <= MAX_BS; bs++) {
740  if (!rank) printf("\nTesting MPI blocktridiagIterJacobi() with N=%d, Ns=%d, bs=%d on %d processes\n",N,Ns,bs,nproc);
741  ierr = test_block_mpi(N,Ns,bs,NRuns,rank,nproc,0,&blocktridiagIterJacobi,"walltimes_blocktridiagIterJac.dat"); if(ierr) return(ierr);
742  MPI_Barrier(MPI_COMM_WORLD);
743  }
744 #else
745  if (!rank) printf("\nSkipping blocktridiagIterJacobi(). Compile with -Dtest_blockIterJacobi flag to enable.\n");
746 #endif
747 
748 #ifdef testblockLU
749  for (bs=1; bs <= MAX_BS; bs++) {
750  if (!rank) printf("\nTesting MPI blocktridiagLU() with N=%d, Ns=%d, bs=%d on %d processes\n",N,Ns,bs,nproc);
751  ierr = test_block_mpi(N,Ns,bs,NRuns,rank,nproc,1,&blocktridiagLU,"walltimes_blocktridiagLU.dat"); if(ierr) return(ierr);
752  MPI_Barrier(MPI_COMM_WORLD);
753  }
754 #else
755  if (!rank) printf("\nSkipping blocktridiagLU(). Compile with -Dtest_blockLU flag to enable.\n");
756 #endif
757  /* Return */
758  return(0);
759 }
int tridiagLUGS(double *, double *, double *, double *, int, int, void *, void *)
Definition: tridiagLUGS.c:64
int tridiagScaLPK(double *, double *, double *, double *, int, int, void *, void *)
Definition: tridiagScaLPK.c:71
int MAX_BS
Definition: test.c:189
int tridiagLU(double *, double *, double *, double *, int, int, void *, void *)
Definition: tridiagLU.c:83
static int test_mpi(int, int, int, int, int, int, int, int(*)(double *, double *, double *, double *, int, int, void *, void *), const char *)
Definition: test.c:772
int blocktridiagIterJacobi(double *, double *, double *, double *, int, int, int, void *, void *)
int tridiagIterJacobi(double *, double *, double *, double *, int, int, void *, void *)
static int test_block_mpi(int, int, int, int, int, int, int, int(*)(double *, double *, double *, double *, int, int, int, void *, void *), const char *)
Definition: test.c:1092
int blocktridiagLU(double *, double *, double *, double *, int, int, int, void *, void *)
int test_mpi ( int  N,
int  Ns,
int  NRuns,
int  rank,
int  nproc,
int  flag,
int  blacs_context,
int(*)(double *, double *, double *, double *, int, int, void *, void *)  LUSolver,
const char *  filename 
)
static

This function tests the implementation of a parallel (distributed memory) solver of tridiagonal (non-periodic) systems of equations. Three tests are performed and the errors are reported:

  • Solve the identity matrix with random right-hand-side.
  • Solve an upper triangular (tridiagonal) matrix.
  • Solve a diagonally dominant tridiagonal system that is diagonally dominant.

Finally, if flag = 1 (in the function arguments), a wall time test is run, which repeatedly performs the 3rd test above NRuns number of times and reports the average wall time. Compile with "-Dspeed_test_only" to run the wall times test only.

Parameters
NGlobal size of system
NsNumber of systems
NRunsNumber of repeated solves (to measure average wall time)
rankMPI rank of this process
nprocNumber of MPI ranks
flagFlag to turn on/off wall time measurement test
blacs_contextScaLAPACK context (irrelevant if not using ScaLAPACK)
LUSolverFunction pointer to solver function to test (ignore the fact that it's called "LUSolver", it can test any tridiagonal solver)
filenameFilename to write out wall times data

Definition at line 772 of file test.c.

785 {
786  int i,d,ierr=0,nlocal;
787  double error;
788  MPI_Comm world;
789  TridiagLU context;
790  /* Variable declarations */
791  double *a1; /* sub-diagonal */
792  double *b1; /* diagonal */
793  double *c1; /* super-diagonal */
794  double *x; /* right hand side, will contain the solution */
795 
796  /*
797  Since a,b,c and x are not preserved, declaring variables to
798  store a copy of them to calculate error after the solve
799  */
800  double *a2,*b2,*c2,*y;
801 
802  /* Creating a duplicate communicator */
803  MPI_Comm_dup(MPI_COMM_WORLD,&world);
804 
805  /* Initialize tridiagonal solver parameters */
806  ierr = tridiagLUInit(&context,&world); if (ierr) return(ierr);
807 
808 #ifdef with_scalapack
809  context.blacs_ctxt = blacs_context;
810 #endif
811 
812  /* Initialize random number generator */
813  srand(time(NULL));
814 
815  /*
816  Calculate local size on this process, given
817  the total size N and number of processes
818  nproc
819  */
820  ierr = partition1D(N,nproc,rank,&nlocal);
821  MPI_Barrier(MPI_COMM_WORLD);
822 
823  /*
824  Allocate arrays of dimension (Ns x nlocal)
825  Ns -> number of systems
826  nlocal -> local size of each system
827  */
828  a1 = (double*) calloc (Ns*nlocal,sizeof(double));
829  b1 = (double*) calloc (Ns*nlocal,sizeof(double));
830  c1 = (double*) calloc (Ns*nlocal,sizeof(double));
831  a2 = (double*) calloc (Ns*nlocal,sizeof(double));
832  b2 = (double*) calloc (Ns*nlocal,sizeof(double));
833  c2 = (double*) calloc (Ns*nlocal,sizeof(double));
834  x = (double*) calloc (Ns*nlocal,sizeof(double));
835  y = (double*) calloc (Ns*nlocal,sizeof(double));
836 
837 
838  /*
839  TESTING THE LU SOLVER
840  */
841  MPI_Barrier(MPI_COMM_WORLD);
842 #ifndef speed_test_only
843  /*
844  TEST 1: Solution of an identity matrix with random
845  right hand side
846  [I]x = b => x = b
847  */
848 
849  /*
850  Set the values of the matrix elements and the
851  right hand side
852  */
853  for (d = 0; d < Ns; d++) {
854  for (i = 0; i < nlocal; i++) {
855  a1[i*Ns+d] = 0.0;
856  b1[i*Ns+d] = 1.0;
857  c1[i*Ns+d] = 0.0;
858  x [i*Ns+d] = rand();
859  }
860  }
861 
862  /*
863  Copy the original values to calculate error later
864  */
865  CopyArray(a1,a2,nlocal,Ns);
866  CopyArray(b1,b2,nlocal,Ns);
867  CopyArray(c1,c2,nlocal,Ns);
868  CopyArray(x ,y ,nlocal,Ns);
869 
870  /* Solve */
871  if (!rank) printf("MPI test 1 ([I]x = b => x = b): \t");
872  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,&context,&world);
873  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
874 
875  /*
876  Calculate Error
877  */
878  error = CalculateError(a2,b2,c2,y,x,nlocal,Ns,rank,nproc);
879  if (!rank) printf("error=%E\n",error);
880  MPI_Barrier(MPI_COMM_WORLD);
881 
882  /*
883  TEST 2: Solution of an upper triangular matrix
884  [U]x = b
885  */
886 
887  /*
888  Set the values of the matrix elements and the
889  right hand side
890  */
891  for (d = 0; d < Ns; d++) {
892  for (i = 0; i < nlocal; i++) {
893  a1[i*Ns+d] = 0.0;
894  b1[i*Ns+d] = 400.0;
895  if (rank == nproc-1) c1[i*Ns+d] = (i == nlocal-1 ? 0 : 0.5);
896  else c1[i*Ns+d] = 0.5;
897  x[i*Ns+d] = 1.0;
898  }
899  }
900 
901  /*
902  Copy the original values to calculate error later
903  */
904  CopyArray(a1,a2,nlocal,Ns);
905  CopyArray(b1,b2,nlocal,Ns);
906  CopyArray(c1,c2,nlocal,Ns);
907  CopyArray(x ,y ,nlocal,Ns);
908 
909  /* Solve */
910  if (!rank) printf("MPI test 2 ([U]x = b => x = [U]^(-1)b):\t");
911  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,&context,&world);
912  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
913 
914  /*
915  Calculate Error
916  */
917  error = CalculateError(a2,b2,c2,y,x,nlocal,Ns,rank,nproc);
918  if (!rank) printf("error=%E\n",error);
919  MPI_Barrier(MPI_COMM_WORLD);
920 
921  /*
922  TEST 3: Solution of a tridiagonal matrix with random
923  entries and right hand side
924  [A]x = b => x = b
925  */
926 
927  /*
928  Set the values of the matrix elements and the
929  right hand side
930  */
931  for (d = 0; d < Ns; d++) {
932  for (i = 0; i < nlocal; i++) {
933  if (!rank ) a1[i*Ns+d] = (i == 0 ? 0.0 : ((double) rand()) / ((double) RAND_MAX));
934  else a1[i*Ns+d] = ((double) rand()) / ((double) RAND_MAX);
935  b1[i*Ns+d] = 100.0*(1.0 + ((double) rand()) / ((double) RAND_MAX));
936  if (rank == nproc-1) c1[i*Ns+d] = (i == nlocal-1 ? 0 : ((double) rand()) / ((double) RAND_MAX));
937  else c1[i*Ns+d] = ((double) rand()) / ((double) RAND_MAX);
938  x[i*Ns+d] = ((double) rand()) / ((double) RAND_MAX);
939  }
940  }
941 
942  /*
943  Copy the original values to calculate error later
944  */
945  CopyArray(a1,a2,nlocal,Ns);
946  CopyArray(b1,b2,nlocal,Ns);
947  CopyArray(c1,c2,nlocal,Ns);
948  CopyArray(x ,y ,nlocal,Ns);
949 
950  /* Solve */
951  if (!rank) printf("MPI test 3 ([A]x = b => x = [A]^(-1)b):\t");
952  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,&context,&world);
953  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
954 
955  /*
956  Calculate Error
957  */
958  error = CalculateError(a2,b2,c2,y,x,nlocal,Ns,rank,nproc);
959  if (!rank) printf("error=%E\n",error);
960  MPI_Barrier(MPI_COMM_WORLD);
961 
962  /*
963  DONE TESTING THE LU SOLVER
964  */
965 #else
966  if (!rank) printf("Skipping accuracy tests. Remove -Dspeed_test_only compilation flag to enable.\n");
967 #endif
968 
969  /*
970  TESTING WALLTIMES FOR NRuns NUMBER OF RUNS OF TRIDIAGLU()
971  FOR SCALABILITY CHECK IF REQUIRED
972  */
973 
974  if (flag) {
975 
976  /*
977  TEST 4: Same as TEST 3
978  Set the values of the matrix elements and the
979  right hand side
980  */
981  for (d = 0; d < Ns; d++) {
982  for (i = 0; i < nlocal; i++) {
983 
984  if (!rank ) a1[i*Ns+d] = (i == 0 ? 0.0 : 0.3 );
985  else a1[i*Ns+d] = 0.3;
986 
987  if (!rank) b1[i*Ns+d] = (i == 0 ? 1.0 : 0.6 );
988  else if (rank == nproc-1) b1[i*Ns+d] = (i == nlocal-1 ? 1.0 : 0.6 );
989  else b1[i*Ns+d] = 0.6;
990 
991  if (rank == nproc-1) c1[i*Ns+d] = (i == nlocal-1 ? 0 : 0.1 );
992  else c1[i*Ns+d] = 0.1;
993 
994  x[i*Ns+d] = ((double) rand()) / ((double) RAND_MAX);
995  }
996  }
997 
998  /*
999  Keep a copy of the original values
1000  */
1001  CopyArray(a1,a2,nlocal,Ns);
1002  CopyArray(b1,b2,nlocal,Ns);
1003  CopyArray(c1,c2,nlocal,Ns);
1004  CopyArray(x ,y ,nlocal,Ns);
1005 
1006  if (!rank)
1007  printf("\nMPI test 4 (Speed test - %d Tridiagonal Solves):\n",NRuns);
1008  double runtimes[5] = {0.0,0.0,0.0,0.0,0.0};
1009  error = 0;
1010  /*
1011  Solve the systen NRuns times
1012  */
1013  for (i = 0; i < NRuns; i++) {
1014  /* Copy the original values */
1015  CopyArray(a2,a1,nlocal,Ns);
1016  CopyArray(b2,b1,nlocal,Ns);
1017  CopyArray(c2,c1,nlocal,Ns);
1018  CopyArray(y ,x ,nlocal,Ns);
1019  /* Solve the system */
1020  MPI_Barrier(MPI_COMM_WORLD);
1021  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,&context,&world);
1022  MPI_Barrier(MPI_COMM_WORLD);
1023  /* Calculate errors */
1024  double err = CalculateError(a2,b2,c2,y,x,nlocal,Ns,rank,nproc);
1025  /* Add the walltimes to the cumulative total */
1026  runtimes[0] += context.total_time;
1027  runtimes[1] += context.stage1_time;
1028  runtimes[2] += context.stage2_time;
1029  runtimes[3] += context.stage3_time;
1030  runtimes[4] += context.stage4_time;
1031  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1032  error += err;
1033  }
1034 
1035  /* Calculate average error */
1036  error /= NRuns;
1037 
1038  /* Calculate maximum value of walltime across all processes */
1039  MPI_Allreduce(MPI_IN_PLACE,&runtimes[0],5,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
1040 
1041  /* Print results */
1042  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1043  if (!rank) {
1044  printf("\t\tTotal walltime = %E\n",runtimes[0]);
1045  printf("\t\tStage1 walltime = %E\n",runtimes[1]);
1046  printf("\t\tStage2 walltime = %E\n",runtimes[2]);
1047  printf("\t\tStage3 walltime = %E\n",runtimes[3]);
1048  printf("\t\tStage4 walltime = %E\n",runtimes[4]);
1049  printf("\t\tAverage error = %E\n",error);
1050  FILE *out;
1051  out = fopen(filename,"w");
1052  fprintf(out,"%5d %1.16E %1.16E %1.16E %1.16E %1.16E %1.16E\n",nproc,runtimes[0],
1053  runtimes[1],runtimes[2],runtimes[3],runtimes[4],error);
1054  fclose(out);
1055  }
1056  MPI_Barrier(MPI_COMM_WORLD);
1057  }
1058 
1059  /*
1060  DONE TESTING TRIDIAGLU() WALLTIMES
1061  */
1062 
1063  /*
1064  DEALLOCATE ALL ARRAYS
1065  */
1066  free(a1);
1067  free(b1);
1068  free(c1);
1069  free(a2);
1070  free(b2);
1071  free(c2);
1072  free(x);
1073  free(y);
1074  MPI_Comm_free(&world);
1075 
1076  /* Return */
1077  return(0);
1078 }
double stage3_time
Definition: tridiagLU.h:101
int tridiagLUInit(void *, void *)
Definition: tridiagLUInit.c:39
double stage2_time
Definition: tridiagLU.h:100
int blacs_ctxt
Definition: tridiagLU.h:105
static void CopyArray(double *, double *, int, int)
Definition: test.c:1431
double stage1_time
Definition: tridiagLU.h:99
double stage4_time
Definition: tridiagLU.h:102
static double CalculateError(double *, double *, double *, double *, double *, int, int, int, int)
Definition: test.c:1495
static int partition1D(int, int, int, int *)
Definition: test.c:1627
double total_time
Definition: tridiagLU.h:98
int test_block_mpi ( int  N,
int  Ns,
int  bs,
int  NRuns,
int  rank,
int  nproc,
int  flag,
int(*)(double *, double *, double *, double *, int, int, int, void *, void *)  LUSolver,
const char *  filename 
)
static

This function tests the implementation of a parallel (distributed memory) solver of block tridiagonal (non-periodic) systems of equations. Three tests are performed and the errors are reported:

  • Solve the identity matrix with random right-hand-side.
  • Solve a block upper triangular (tridiagonal) matrix.
  • Solve a diagonally-dominant block tridiagonal system that is diagonally dominant.

Finally, if flag = 1 (in the function arguments), a wall time test is run, which repeatedly performs the 3rd test above NRuns number of times and reports the average wall time. Compile with "-Dspeed_test_only" to run the wall times test only.

Parameters
NGlobal size of system
NsNumber of systems
bsBlock size
NRunsNumber of repeated solves (to measure average wall time)
rankMPI rank of this process
nprocNumber of MPI ranks
flagFlag to turn on/off wall time measurement test
LUSolverFunction pointer to solver function to test (ignore the fact that it's called "LUSolver", it can test any block tridiagonal solver)
filenameFilename to write out wall times data

Definition at line 1092 of file test.c.

1105 {
1106  int i,j,k,d,ierr=0,nlocal;
1107  double error;
1108  MPI_Comm world;
1109  TridiagLU context;
1110  /* Variable declarations */
1111  double *a1; /* sub-diagonal */
1112  double *b1; /* diagonal */
1113  double *c1; /* super-diagonal */
1114  double *x; /* right hand side, will contain the solution */
1115 
1116  /*
1117  Since a,b,c and x are not preserved, declaring variables to
1118  store a copy of them to calculate error after the solve
1119  */
1120  double *a2,*b2,*c2,*y;
1121 
1122  /* Creating a duplicate communicator */
1123  MPI_Comm_dup(MPI_COMM_WORLD,&world);
1124 
1125  /* Initialize tridiagonal solver parameters */
1126  ierr = tridiagLUInit(&context,&world); if (ierr) return(ierr);
1127 
1128  /* Initialize random number generator */
1129  srand(time(NULL));
1130 
1131  /*
1132  Calculate local size on this process, given
1133  the total size N and number of processes
1134  nproc
1135  */
1136  ierr = partition1D(N,nproc,rank,&nlocal);
1137  MPI_Barrier(MPI_COMM_WORLD);
1138 
1139  /*
1140  Allocate arrays of dimension (Ns x nlocal)
1141  Ns -> number of systems
1142  nlocal -> local size of each system
1143  */
1144  a1 = (double*) calloc (Ns*nlocal*bs*bs,sizeof(double));
1145  b1 = (double*) calloc (Ns*nlocal*bs*bs,sizeof(double));
1146  c1 = (double*) calloc (Ns*nlocal*bs*bs,sizeof(double));
1147  a2 = (double*) calloc (Ns*nlocal*bs*bs,sizeof(double));
1148  b2 = (double*) calloc (Ns*nlocal*bs*bs,sizeof(double));
1149  c2 = (double*) calloc (Ns*nlocal*bs*bs,sizeof(double));
1150  x = (double*) calloc (Ns*nlocal*bs ,sizeof(double));
1151  y = (double*) calloc (Ns*nlocal*bs ,sizeof(double));
1152 
1153 
1154  /*
1155  TESTING THE LU SOLVER
1156  */
1157  MPI_Barrier(MPI_COMM_WORLD);
1158 
1159 #ifndef speed_test_only
1160  /*
1161  TEST 1: Solution of an identity matrix with random
1162  right hand side
1163  [I]x = b => x = b
1164  */
1165 
1166  /*
1167  Set the values of the matrix elements and the
1168  right hand side
1169  */
1170  for (d = 0; d < Ns; d++) {
1171  for (i = 0; i < nlocal; i++) {
1172  for (j=0; j<bs; j++) {
1173  for (k=0; k<bs; k++) {
1174  a1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
1175  if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 1.0;
1176  else b1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
1177  c1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
1178  }
1179  x [(i*Ns+d)*bs+j] = rand();
1180  }
1181  }
1182  }
1183 
1184  /*
1185  Copy the original values to calculate error later
1186  */
1187  CopyArraySimple(a2,a1,nlocal*Ns*bs*bs);
1188  CopyArraySimple(b2,b1,nlocal*Ns*bs*bs);
1189  CopyArraySimple(c2,c1,nlocal*Ns*bs*bs);
1190  CopyArraySimple(y ,x ,nlocal*Ns*bs );
1191 
1192  /* Solve */
1193  if (!rank) printf("Block MPI test 1 ([I]x = b => x = b): \t");
1194  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,bs,&context,&world);
1195  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1196 
1197  /*
1198  Calculate Error
1199  */
1200  error = CalculateErrorBlock(a2,b2,c2,y,x,nlocal,Ns,bs,rank,nproc);
1201  if (!rank) printf("error=%E\n",error);
1202  MPI_Barrier(MPI_COMM_WORLD);
1203 
1204  /*
1205  TEST 2: Solution of an upper triangular matrix
1206  [U]x = b
1207  */
1208 
1209  /*
1210  Set the values of the matrix elements and the
1211  right hand side
1212  */
1213  for (d = 0; d < Ns; d++) {
1214  for (i = 0; i < nlocal; i++) {
1215  for (j=0; j<bs; j++) {
1216  for (k=0; k<bs; k++) {
1217  a1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
1218  if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 400.0 + ((double) rand()) / ((double) RAND_MAX);
1219  else b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0 + ((double) rand()) / ((double) RAND_MAX);
1220  if (rank == nproc-1) c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == nlocal-1 ? 0 : ((double) rand()) / ((double) RAND_MAX));
1221  else c1[(i*Ns+d)*bs*bs+j*bs+k] = ((double) rand()) / ((double) RAND_MAX);
1222  }
1223  x[(i*Ns+d)*bs+j] = ((double) rand()) / ((double) RAND_MAX);
1224  }
1225  }
1226  }
1227 
1228  /*
1229  Copy the original values to calculate error later
1230  */
1231  CopyArraySimple(a2,a1,nlocal*Ns*bs*bs);
1232  CopyArraySimple(b2,b1,nlocal*Ns*bs*bs);
1233  CopyArraySimple(c2,c1,nlocal*Ns*bs*bs);
1234  CopyArraySimple(y ,x ,nlocal*Ns*bs );
1235 
1236  /* Solve */
1237  if (!rank) printf("Block MPI test 2 ([U]x = b => x = [U]^(-1)b):\t");
1238  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,bs,&context,&world);
1239  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1240 
1241  /*
1242  Calculate Error
1243  */
1244  error = CalculateErrorBlock(a2,b2,c2,y,x,nlocal,Ns,bs,rank,nproc);
1245  if (!rank) printf("error=%E\n",error);
1246  MPI_Barrier(MPI_COMM_WORLD);
1247 
1248  /*
1249  TEST 3: Solution of a tridiagonal matrix with random
1250  entries and right hand side
1251  [A]x = b => x = b
1252  */
1253 
1254  /*
1255  Set the values of the matrix elements and the
1256  right hand side
1257  */
1258  for (d = 0; d < Ns; d++) {
1259  for (i = 0; i < nlocal; i++) {
1260  for (j=0; j<bs; j++) {
1261  for (k=0; k<bs; k++) {
1262  if (!rank ) a1[(i*Ns+d)*bs*bs+j*bs+k] = (i == 0 ? 0.0 : ((double) rand()) / ((double) RAND_MAX));
1263  else a1[(i*Ns+d)*bs*bs+j*bs+k] = ((double) rand()) / ((double) RAND_MAX);
1264  if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 200.0*(1.0 + ((double) rand()) / ((double) RAND_MAX));
1265  else b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0*(1.0 + ((double) rand()) / ((double) RAND_MAX));
1266  if (rank == nproc-1) c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == nlocal-1 ? 0 : ((double) rand()) / ((double) RAND_MAX));
1267  else c1[(i*Ns+d)*bs*bs+j*bs+k] = ((double) rand()) / ((double) RAND_MAX);
1268  }
1269  x[(i*Ns+d)*bs+j] = ((double) rand()) / ((double) RAND_MAX);
1270  }
1271  }
1272  }
1273 
1274  /*
1275  Copy the original values to calculate error later
1276  */
1277  CopyArraySimple(a2,a1,nlocal*Ns*bs*bs);
1278  CopyArraySimple(b2,b1,nlocal*Ns*bs*bs);
1279  CopyArraySimple(c2,c1,nlocal*Ns*bs*bs);
1280  CopyArraySimple(y ,x ,nlocal*Ns*bs );
1281 
1282  /* Solve */
1283  if (!rank) printf("Block MPI test 3 ([A]x = b => x = [A]^(-1)b):\t");
1284  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,bs,&context,&world);
1285  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1286 
1287  /*
1288  Calculate Error
1289  */
1290  error = CalculateErrorBlock(a2,b2,c2,y,x,nlocal,Ns,bs,rank,nproc);
1291  if (!rank) printf("error=%E\n",error);
1292  MPI_Barrier(MPI_COMM_WORLD);
1293 #else
1294  if (!rank) printf("Skipping accuracy tests. Remove -Dspeed_test_only compilation flag to enable.\n");
1295 #endif
1296  /*
1297  DONE TESTING THE LU SOLVER
1298  */
1299 
1300 
1301  /*
1302  TESTING WALLTIMES FOR NRuns NUMBER OF RUNS OF TRIDIAGLU()
1303  FOR SCALABILITY CHECK IF REQUIRED
1304  */
1305 
1306  if (flag) {
1307 
1308  /*
1309  TEST 4: Same as TEST 3
1310  Set the values of the matrix elements and the
1311  right hand side
1312  */
1313  for (d = 0; d < Ns; d++) {
1314  for (i = 0; i < nlocal; i++) {
1315  for (j=0; j<bs; j++) {
1316  for (k=0; k<bs; k++) {
1317  if (!rank ) a1[(i*Ns+d)*bs*bs+j*bs+k] = (i == 0 ? 0.0 : ((double) rand()) / ((double) RAND_MAX));
1318  else a1[(i*Ns+d)*bs*bs+j*bs+k] = ((double) rand()) / ((double) RAND_MAX);
1319  if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 200.0*(1.0 + ((double) rand()) / ((double) RAND_MAX));
1320  else b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0*(1.0 + ((double) rand()) / ((double) RAND_MAX));
1321  if (rank == nproc-1) c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == nlocal-1 ? 0 : ((double) rand()) / ((double) RAND_MAX));
1322  else c1[(i*Ns+d)*bs*bs+j*bs+k] = ((double) rand()) / ((double) RAND_MAX);
1323  }
1324  x[(i*Ns+d)*bs+j] = ((double) rand()) / ((double) RAND_MAX);
1325  }
1326  }
1327  }
1328 
1329  /*
1330  Keep a copy of the original values
1331  */
1332  CopyArraySimple(a2,a1,nlocal*Ns*bs*bs);
1333  CopyArraySimple(b2,b1,nlocal*Ns*bs*bs);
1334  CopyArraySimple(c2,c1,nlocal*Ns*bs*bs);
1335  CopyArraySimple(y ,x ,nlocal*Ns*bs );
1336 
1337  if (!rank)
1338  printf("\nBlock MPI test 4 (Speed test - %d Tridiagonal Solves):\n",NRuns);
1339  double runtimes[5] = {0.0,0.0,0.0,0.0,0.0};
1340  error = 0;
1341  /*
1342  Solve the systen NRuns times
1343  */
1344  for (i = 0; i < NRuns; i++) {
1345  /* Copy the original values */
1346  CopyArraySimple(a1,a2,nlocal*Ns*bs*bs);
1347  CopyArraySimple(b1,b2,nlocal*Ns*bs*bs);
1348  CopyArraySimple(c1,c2,nlocal*Ns*bs*bs);
1349  CopyArraySimple(x ,y ,nlocal*Ns*bs );
1350  /* Solve the system */
1351  MPI_Barrier(MPI_COMM_WORLD);
1352  ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,bs,&context,&world);
1353  MPI_Barrier(MPI_COMM_WORLD);
1354  /* Calculate errors */
1355  double err = CalculateErrorBlock(a2,b2,c2,y,x,nlocal,Ns,bs,rank,nproc);
1356  /* Add the walltimes to the cumulative total */
1357  runtimes[0] += context.total_time;
1358  runtimes[1] += context.stage1_time;
1359  runtimes[2] += context.stage2_time;
1360  runtimes[3] += context.stage3_time;
1361  runtimes[4] += context.stage4_time;
1362  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1363  error += err;
1364  }
1365 
1366  /* Calculate average error */
1367  error /= NRuns;
1368 
1369  /* Calculate maximum value of walltime across all processes */
1370  MPI_Allreduce(MPI_IN_PLACE,&runtimes[0],5,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
1371 
1372  /* Print results */
1373  if (ierr == -1) printf("Error - system is singular on process %d\t",rank);
1374  if (!rank) {
1375  printf("\t\tTotal walltime = %E\n",runtimes[0]);
1376  printf("\t\tStage1 walltime = %E\n",runtimes[1]);
1377  printf("\t\tStage2 walltime = %E\n",runtimes[2]);
1378  printf("\t\tStage3 walltime = %E\n",runtimes[3]);
1379  printf("\t\tStage4 walltime = %E\n",runtimes[4]);
1380  printf("\t\tAverage error = %E\n",error);
1381  FILE *out;
1382  out = fopen(filename,"w");
1383  fprintf(out,"%5d %1.16E %1.16E %1.16E %1.16E %1.16E %1.16E\n",nproc,runtimes[0],
1384  runtimes[1],runtimes[2],runtimes[3],runtimes[4],error);
1385  fclose(out);
1386  }
1387  MPI_Barrier(MPI_COMM_WORLD);
1388  }
1389  /*
1390  DONE TESTING TRIDIAGLU() WALLTIMES
1391  */
1392 
1393  /*
1394  DEALLOCATE ALL ARRAYS
1395  */
1396  free(a1);
1397  free(b1);
1398  free(c1);
1399  free(a2);
1400  free(b2);
1401  free(c2);
1402  free(x);
1403  free(y);
1404  MPI_Comm_free(&world);
1405 
1406  /* Return */
1407  return(0);
1408 }
static void CopyArraySimple(double *, double *, int)
Definition: test.c:1416
static double CalculateErrorBlock(double *, double *, double *, double *, double *, int, int, int, int, int)
Definition: test.c:1560
int tridiagLUInit(void *, void *)
Definition: tridiagLUInit.c:39
static int partition1D(int, int, int, int *)
Definition: test.c:1627
int partition1D ( int  nglobal,
int  nproc,
int  rank,
int *  nlocal 
)
static

Function to calculate the local size for each process, given process rank, global size and number of processes

Parameters
nglobalGlobal size
nprocNumber of MPI ranks
rankMPI rank
nlocalLocal size (computed)

Definition at line 1627 of file test.c.

1633 {
1634  if (nglobal%nproc == 0) *nlocal = nglobal/nproc;
1635  else {
1636  if (rank == nproc-1) *nlocal = nglobal/nproc+nglobal%nproc;
1637  else *nlocal = nglobal/nproc;
1638  }
1639  return(0);
1640 }
double CalculateError ( double *  a,
double *  b,
double *  c,
double *  y,
double *  x,
int  N,
int  Ns,
int  rank,
int  nproc 
)
static

This function computes the error in the solutions of tridiagonal systems of equation: \({\bf \epsilon}^k = A^k {\bf x}^k - {\bf y}^k ,\ k=1,\cdots,Ns\). The L2 norm of the error is returned. If \(Ns > 1\), the norm is computed over all the systems ( \(k=1,\cdots,Ns\)).

Parameters
aArray of subdiagonal elements
bArray of diagonal elements
cArray of superdiagonal elements
yRight-hand side
xSolution
NLocal size of system
NsNumber of systems
rankMPI rank of this process
nprocTotal number of MPI ranks

Definition at line 1495 of file test.c.

1506 {
1507  double error = 0, norm = 0;
1508  int i,d;
1509  double xp1, xm1; /* solution from neighboring processes */
1510 
1511  for (d = 0; d < Ns; d++) {
1512  xp1 = 0;
1513  if (nproc > 1) {
1514  MPI_Request request = MPI_REQUEST_NULL;
1515  if (rank != nproc-1) MPI_Irecv(&xp1,1,MPI_DOUBLE,rank+1,1738,MPI_COMM_WORLD,&request);
1516  if (rank) MPI_Send(&x[d],1,MPI_DOUBLE,rank-1,1738,MPI_COMM_WORLD);
1517  MPI_Wait(&request,MPI_STATUS_IGNORE);
1518  }
1519 
1520  xm1 = 0;
1521  if (nproc > 1) {
1522  MPI_Request request = MPI_REQUEST_NULL;
1523  if (rank) MPI_Irecv(&xm1,1,MPI_DOUBLE,rank-1,1739,MPI_COMM_WORLD,&request);
1524  if (rank != nproc-1) MPI_Send(&x[d+(N-1)*Ns],1,MPI_DOUBLE,rank+1,1739,MPI_COMM_WORLD);
1525  MPI_Wait(&request,MPI_STATUS_IGNORE);
1526  }
1527 
1528  error = 0;
1529  norm = 0;
1530  for (i = 0; i < N; i++) {
1531  double val = 0;
1532  if (i == 0) val += a[i*Ns+d]*xm1;
1533  else val += a[i*Ns+d]*x[(i-1)*Ns+d];
1534  val += b[i*Ns+d]*x[i*Ns+d];
1535  if (i == N-1) val += c[i*Ns+d]*xp1;
1536  else val += c[i*Ns+d]*x[(i+1)*Ns+d];
1537  val = y[i*Ns+d] - val;
1538  error += val * val;
1539  norm += y[i*Ns+d] * y[i*Ns+d];
1540  }
1541  }
1542 
1543  double global_error = 0, global_norm = 0;
1544  if (nproc > 1) MPI_Allreduce(&error,&global_error,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
1545  else global_error = error;
1546  if (nproc > 1) MPI_Allreduce(&norm,&global_norm,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
1547  else global_norm = norm;
1548  if (global_norm != 0.0) global_error /= global_norm;
1549 
1550  return(global_error);
1551 }
double CalculateErrorBlock ( double *  a,
double *  b,
double *  c,
double *  y,
double *  x,
int  N,
int  Ns,
int  bs,
int  rank,
int  nproc 
)
static

This function computes the error in the solutions of block tridiagonal systems of equation: \({\bf \epsilon}^k = A^k {\bf x}^k - {\bf y}^k ,\ k=1,\cdots,Ns\). The L2 norm of the error is returned. If \(Ns > 1\), the norm is computed over all the systems ( \(k=1,\cdots,Ns\)).

Parameters
aArray of subdiagonal elements
bArray of diagonal elements
cArray of superdiagonal elements
yRight-hand side
xSolution
NLocal size of system (not multiplied by the block size)
NsNumber of systems
bsBlock size
rankMPI rank of this process
nprocTotal number of MPI ranks

Definition at line 1560 of file test.c.

1573 {
1574  double error = 0, norm = 0;
1575  int i,j,d;
1576  double xp1[bs], xm1[bs]; /* solution from neighboring processes */
1577 
1578  for (d = 0; d < Ns; d++) {
1579  for (i=0; i<bs; i++) xp1[i] = 0;
1580  if (nproc > 1) {
1581  MPI_Request request = MPI_REQUEST_NULL;
1582  if (rank != nproc-1) MPI_Irecv(&xp1,bs,MPI_DOUBLE,rank+1,1738,MPI_COMM_WORLD,&request);
1583  if (rank) MPI_Send(&x[d*bs],bs,MPI_DOUBLE,rank-1,1738,MPI_COMM_WORLD);
1584  MPI_Wait(&request,MPI_STATUS_IGNORE);
1585  }
1586 
1587  for (i=0; i<bs; i++) xm1[i] = 0;
1588  if (nproc > 1) {
1589  MPI_Request request = MPI_REQUEST_NULL;
1590  if (rank) MPI_Irecv(&xm1,bs,MPI_DOUBLE,rank-1,1739,MPI_COMM_WORLD,&request);
1591  if (rank != nproc-1) MPI_Send (&x[(d+(N-1)*Ns)*bs],bs,MPI_DOUBLE,rank+1,1739,MPI_COMM_WORLD);
1592  MPI_Wait(&request,MPI_STATUS_IGNORE);
1593  }
1594 
1595  error = 0;
1596  norm = 0;
1597  for (i = 0; i < N; i++) {
1598  double val[bs]; for (j=0; j<bs; j++) val[j] = y[(i*Ns+d)*bs+j];
1599  _MatVecMultiplySubtract_(val,b+(i*Ns+d)*bs*bs,x+(i*Ns+d)*bs,bs);
1600  if (i == 0) _MatVecMultiplySubtract_(val,a+(i*Ns+d)*bs*bs,xm1,bs)
1601  else _MatVecMultiplySubtract_(val,a+(i*Ns+d)*bs*bs,x+((i-1)*Ns+d)*bs,bs)
1602  if (i == N-1) _MatVecMultiplySubtract_(val,c+(i*Ns+d)*bs*bs,xp1,bs)
1603  else _MatVecMultiplySubtract_(val,c+(i*Ns+d)*bs*bs,x+((i+1)*Ns+d)*bs,bs)
1604  for (j=0; j<bs; j++) error += val[j] * val[j];
1605  for (j=0; j<bs; j++) norm += y[(i*Ns+d)*bs+j] * y[(i*Ns+d)*bs+j];
1606  }
1607  }
1608 
1609  double global_error = 0, global_norm = 0;
1610  if (nproc > 1) MPI_Allreduce(&error,&global_error,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
1611  else global_error = error;
1612  if (nproc > 1) MPI_Allreduce(&norm,&global_norm,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
1613  else global_norm = norm;
1614  if (global_norm != 0.0) global_error /= global_norm;
1615 
1616  return(global_error);
1617 }
#define _MatVecMultiplySubtract_(y, A, x, N)
Definition: matops.h:78
void Cblacs_gridinit ( )
void Cblacs_gridexit ( )
int main ( int  argc,
char *  argv[] 
)

Main function: reads in an input file called input which should contain three integers: global size of system to test, number of systems to test for, and number of repeated runs. The last one is to help obtain an average wall time.

Definition at line 227 of file test.c.

228 {
229  int ierr,N,Ns,NRuns;
230 
231 #ifdef serial
232 
233  /* If compiled in serial, run the serial tests */
234  /* read in size of system, number of system and number of solves */
235  FILE *in; in=fopen("input","r");
236  ierr = fscanf (in,"%d %d %d",&N,&Ns,&NRuns);
237  if (ierr != 3) {
238  fprintf(stderr,"Invalid input file.\n");
239  return(0);
240  }
241  fclose(in);
242  /* call the test function */
243  ierr = main_serial(N,Ns);
244  if (ierr) fprintf(stderr,"main_serial() returned with an error code of %d.\n",ierr);
245 
246 #else
247 
248  /* If compiled in parallel, run the MPI tests */
249  int rank,nproc;
250  MPI_Init(&argc,&argv);
251  MPI_Comm_size(MPI_COMM_WORLD,&nproc);
252  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
253 
254 #ifdef with_scalapack
255  /* initialize BLACS */
256  int rank_blacs,nproc_blacs,blacs_context;
257  Cblacs_pinfo(&rank_blacs,&nproc_blacs);
258  Cblacs_get(-1,0,&blacs_context);
259  Cblacs_gridinit(&blacs_context,"R",1,nproc_blacs);
260 #else
261  int blacs_context = -1;
262 #endif
263 
264  /* read in size of system, number of system and number of solves */
265 
266  if (!rank) {
267  FILE *in; in=fopen("input","r");
268  ierr = fscanf (in,"%d %d %d",&N,&Ns,&NRuns);
269  if (ierr != 3) {
270  fprintf(stderr,"Invalid input file.\n");
271  return(0);
272  }
273  fclose(in);
274  }
275  /* Broadcast the input values to all the processes */
276  MPI_Bcast(&N ,1,MPI_INT,0,MPI_COMM_WORLD);
277  MPI_Bcast(&Ns,1,MPI_INT,0,MPI_COMM_WORLD);
278  MPI_Bcast(&NRuns,1,MPI_INT,0,MPI_COMM_WORLD);
279 
280  /* call the test function */
281  ierr = main_mpi(N,Ns,NRuns,rank,nproc,blacs_context);
282  if (ierr) fprintf(stderr,"main_mpi() returned with an error code of %d on rank %d.\n",ierr,rank);
283 
284 #ifdef with_scalapack
285  Cblacs_gridexit(blacs_context);
286  Cblacs_exit(0);
287 #else
288  MPI_Finalize();
289 #endif
290 #endif
291  return(0);
292 }
void Cblacs_gridinit()
static int main_mpi(int, int, int, int, int, int)
Definition: test.c:689
void Cblacs_get()
void Cblacs_pinfo()
void Cblacs_exit()
void Cblacs_gridexit()

Variable Documentation

int MAX_BS = 5

Maximum block size to test: the block tridiagonal tests will test systems with block size 1, ..., MAX_BS

Definition at line 189 of file test.c.