192 static void CopyArray (
double*,
double*,
int,
int);
195 static int main_serial (
int,
int);
196 static int test_serial (
int,
int,
int(*)(
double*,
double*,
double*,
double*,
int,
int,
void*,
void*));
197 static int test_block_serial (
int,
int,
int,
int(*)(
double*,
double*,
double*,
double*,
int,
int,
int,
void*,
void*));
198 static double CalculateError (
double*,
double*,
double*,
double*,
double*,
int,
int);
201 static int main_mpi (
int,
int,
int,
int,
int,
int);
202 static int test_mpi (
int,
int,
int,
int,
int,
int,
int,
int(*)(
double*,
double*,
double*,
double*,
int,
int,
void*,
void*),
const char*);
203 static int test_block_mpi (
int,
int,
int,
int,
int,
int,
int,
int(*)(
double*,
double*,
double*,
double*,
int,
int,
int,
void*,
void*),
const char*);
205 static double CalculateError (
double*,
double*,
double*,
double*,
double*,
int,
int,
int,
int);
206 static double CalculateErrorBlock (
double*,
double*,
double*,
double*,
double*,
int,
int,
int,
int,
int);
209 #ifdef with_scalapack
227 int main(
int argc,
char *argv[])
235 FILE *in; in=fopen(
"input",
"r");
236 ierr = fscanf (in,
"%d %d %d",&N,&Ns,&NRuns);
238 fprintf(stderr,
"Invalid input file.\n");
243 ierr = main_serial(N,Ns);
244 if (ierr) fprintf(stderr,
"main_serial() returned with an error code of %d.\n",ierr);
250 MPI_Init(&argc,&argv);
251 MPI_Comm_size(MPI_COMM_WORLD,&nproc);
252 MPI_Comm_rank(MPI_COMM_WORLD,&rank);
254 #ifdef with_scalapack
256 int rank_blacs,nproc_blacs,blacs_context;
261 int blacs_context = -1;
267 FILE *in; in=fopen(
"input",
"r");
268 ierr = fscanf (in,
"%d %d %d",&N,&Ns,&NRuns);
270 fprintf(stderr,
"Invalid input file.\n");
276 MPI_Bcast(&N ,1,MPI_INT,0,MPI_COMM_WORLD);
277 MPI_Bcast(&Ns,1,MPI_INT,0,MPI_COMM_WORLD);
278 MPI_Bcast(&NRuns,1,MPI_INT,0,MPI_COMM_WORLD);
281 ierr =
main_mpi(N,Ns,NRuns,rank,nproc,blacs_context);
282 if (ierr) fprintf(stderr,
"main_mpi() returned with an error code of %d on rank %d.\n",ierr,rank);
284 #ifdef with_scalapack
300 int main_serial(
int N,
int Ns)
304 printf(
"Testing serial tridiagLUGS() with N=%d, Ns=%d\n",N,Ns);
305 ierr = test_serial(N,Ns,&
tridiagLUGS);
if(ierr)
return(ierr);
307 printf(
"Testing serial tridiagIterJacobi() with N=%d, Ns=%d\n",N,Ns);
310 printf(
"Testing serial tridiagLU() with N=%d, Ns=%d\n",N,Ns);
311 ierr = test_serial(N,Ns,&
tridiagLU);
if(ierr)
return(ierr);
314 for (bs=1; bs <=
MAX_BS; bs++) {
315 printf(
"Testing serial blocktridiagIterJacobi() with N=%d, Ns=%d, bs=%d\n",N,Ns,bs);
318 for (bs=1; bs <=
MAX_BS; bs++) {
319 printf(
"Testing serial blocktridiagLU() with N=%d, Ns=%d, bs=%d\n",N,Ns,bs);
320 ierr = test_block_serial(N,Ns,bs,&
blocktridiagLU);
if(ierr)
return(ierr);
331 int test_serial(
int N,
int Ns,
int (*LUSolver)(
double*,
double*,
double*,
double*,
int,
int,
void*,
void*))
350 double *a2,*b2,*c2,*y;
360 a1 = (
double*) calloc (N*Ns,
sizeof(
double));
361 b1 = (
double*) calloc (N*Ns,
sizeof(
double));
362 c1 = (
double*) calloc (N*Ns,
sizeof(
double));
363 a2 = (
double*) calloc (N*Ns,
sizeof(
double));
364 b2 = (
double*) calloc (N*Ns,
sizeof(
double));
365 c2 = (
double*) calloc (N*Ns,
sizeof(
double));
366 x = (
double*) calloc (N*Ns,
sizeof(
double));
367 y = (
double*) calloc (N*Ns,
sizeof(
double));
379 for (d = 0; d < Ns; d++) {
380 for (i = 0; i < N; i++) {
384 x [i*Ns+d] = ((double) rand()) / ((
double) RAND_MAX);
398 printf(
"TridiagLU Serial test 1 ([I]x = b => x = b): \t");
399 ierr = LUSolver(a1,b1,c1,x,N,Ns,&context,NULL);
400 if (ierr == -1) printf(
"Error - system is singular\t");
406 printf(
"error=%E\n",error);
417 for (d = 0; d < Ns; d++) {
418 for (i = 0; i < N; i++) {
421 c1[i*Ns+d] = (i == N-1 ? 0 : 0.5);
435 printf(
"TridiagLU Serial test 2 ([U]x = b => x = [U]^(-1)b):\t");
436 ierr = LUSolver(a1,b1,c1,x,N,Ns,&context,NULL);
437 if (ierr == -1) printf(
"Error - system is singular\t");
443 printf(
"error=%E\n",error);
455 for (d = 0; d < Ns; d++) {
456 for (i = 0; i < N; i++) {
457 a1[i*Ns+d] = (i == 0 ? 0.0 : ((double) rand()) / ((
double) RAND_MAX));
458 b1[i*Ns+d] = 100.0*(1.0+((double) rand()) / ((
double) RAND_MAX));
459 c1[i*Ns+d] = (i == N-1 ? 0 : ((double) rand()) / ((
double) RAND_MAX));
460 x [i*Ns+d] = ((double) rand()) / ((
double) RAND_MAX);
473 printf(
"TridiagLU Serial test 3 ([A]x = b => x = [A]^(-1)b):\t");
474 ierr = LUSolver(a1,b1,c1,x,N,Ns,&context,NULL);
475 if (ierr == -1) printf(
"Error - system is singular\t");
481 printf(
"error=%E\n",error);
503 int test_block_serial(
int N,
int Ns,
int bs,
int (*LUSolver)(
double*,
double*,
double*,
double*,
int,
int,
int,
void*,
void*))
522 double *a2,*b2,*c2,*y;
532 a1 = (
double*) calloc (N*Ns*bs*bs,
sizeof(
double));
533 b1 = (
double*) calloc (N*Ns*bs*bs,
sizeof(
double));
534 c1 = (
double*) calloc (N*Ns*bs*bs,
sizeof(
double));
535 a2 = (
double*) calloc (N*Ns*bs*bs,
sizeof(
double));
536 b2 = (
double*) calloc (N*Ns*bs*bs,
sizeof(
double));
537 c2 = (
double*) calloc (N*Ns*bs*bs,
sizeof(
double));
538 x = (
double*) calloc (N*Ns*bs ,
sizeof(
double));
539 y = (
double*) calloc (N*Ns*bs ,
sizeof(
double));
551 for (d = 0; d < Ns; d++) {
552 for (i = 0; i < N; i++) {
553 for (j=0; j<bs; j++) {
554 for (k=0; k<bs; k++) {
555 a1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
556 if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 1.0;
557 else b1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
558 c1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
560 x [(i*Ns+d)*bs+j] = ((
double) rand()) / ((
double) RAND_MAX);
575 printf("Block
TridiagLU Serial test 1 ([I]x = b => x = b): \t");
576 ierr = LUSolver(a1,b1,c1,x,N,Ns,bs,&context,NULL);
577 if (ierr == -1) printf("Error - system is singular\t");
583 printf("error=%E\n",error);
594 for (d = 0; d < Ns; d++) {
595 for (i = 0; i < N; i++) {
596 for (j=0; j<bs; j++) {
597 for (k=0; k<bs; k++) {
598 a1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
599 b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0 + ((
double) rand()) / ((
double) RAND_MAX);
600 c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == N-1 ? 0 : 0.5*(((
double) rand()) / ((
double) RAND_MAX)));
602 x [(i*Ns+d)*bs+j] = ((
double) rand()) / ((
double) RAND_MAX);
616 printf("Block
TridiagLU Serial test 2 ([U]x = b => x = [U]^(-1)b):\t");
617 ierr = LUSolver(a1,b1,c1,x,N,Ns,bs,&context,NULL);
618 if (ierr == -1) printf("Error - system is singular\t");
624 printf("error=%E\n",error);
636 for (d = 0; d < Ns; d++) {
637 for (i = 0; i < N; i++) {
638 for (j=0; j<bs; j++) {
639 for (k=0; k<bs; k++) {
640 a1[(i*Ns+d)*bs*bs+j*bs+k] = (i == 0 ? 0.0 : ((
double) rand()) / ((
double) RAND_MAX));
641 b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0*(1.0+((
double) rand()) / ((
double) RAND_MAX));
642 c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == N-1 ? 0 : ((
double) rand()) / ((
double) RAND_MAX));
644 x [(i*Ns+d)*bs+j] = ((
double) rand()) / ((
double) RAND_MAX);
658 printf("Block
TridiagLU Serial test 3 ([A]x = b => x = [A]^(-1)b):\t");
659 ierr = LUSolver(a1,b1,c1,x,N,Ns,bs,&context,NULL);
660 if (ierr == -1) printf("Error - system is singular\t");
666 printf("error=%E\n",error);
701 if (!rank) printf(
"\nTesting MPI tridiagLUGS() with N=%d, Ns=%d on %d processes\n",N,Ns,nproc);
702 ierr =
test_mpi(N,Ns,NRuns,rank,nproc,0,blacs_context,&
tridiagLUGS,
"walltimes_tridiagLUGS.dat");
if (ierr)
return(ierr);
703 MPI_Barrier(MPI_COMM_WORLD);
705 if (!rank) printf(
"\nSkipping tridiagLUGS(). Compile with -Dtest_LUGS flag to enable.\n");
708 #ifdef test_IterJacobi
709 if (!rank) printf(
"\nTesting MPI tridiagIterJacobi() with N=%d, Ns=%d on %d processes\n",N,Ns,nproc);
710 ierr =
test_mpi(N,Ns,NRuns,rank,nproc,0,blacs_context,&
tridiagIterJacobi,
"walltimes_tridiagIterJac.dat");
if (ierr)
return(ierr);
711 MPI_Barrier(MPI_COMM_WORLD);
713 if (!rank) printf(
"\nSkipping tridiagIterJacobi(). Compile with -Dtest_IterJacobi flag to enable.\n");
717 if (!rank) printf(
"\nTesting MPI tridiagLU() with N=%d, Ns=%d on %d processes\n",N,Ns,nproc);
718 ierr =
test_mpi(N,Ns,NRuns,rank,nproc,1,blacs_context,&
tridiagLU,
"walltimes_tridiagLU.dat");
if (ierr)
return(ierr);
719 MPI_Barrier(MPI_COMM_WORLD);
721 if (!rank) printf(
"\nSkipping tridiagLU(). Compile with -Dtest_LU flag to enable.\n");
724 #ifdef with_scalapack
725 #ifdef test_scalapack
726 if (!rank) printf(
"\nTesting MPI tridiagScaLPK() with N=%d, Ns=%d on %d processes\n",N,Ns,nproc);
727 ierr =
test_mpi(N,Ns,NRuns,rank,nproc,1,blacs_context,&
tridiagScaLPK,
"walltimes_tridiagScaLPK.dat");
if (ierr)
return(ierr);
728 MPI_Barrier(MPI_COMM_WORLD);
730 if (!rank) printf(
"\nSkipping tridiagScaLPK(). Compile with -Dtest_scalapack flag to enable.\n");
734 if (!rank) printf(
"-----------------------------------------------------------------\n");
735 MPI_Barrier(MPI_COMM_WORLD);
737 #ifdef test_blockIterJacobi
739 for (bs=1; bs <=
MAX_BS; bs++) {
740 if (!rank) printf(
"\nTesting MPI blocktridiagIterJacobi() with N=%d, Ns=%d, bs=%d on %d processes\n",N,Ns,bs,nproc);
742 MPI_Barrier(MPI_COMM_WORLD);
745 if (!rank) printf(
"\nSkipping blocktridiagIterJacobi(). Compile with -Dtest_blockIterJacobi flag to enable.\n");
749 for (bs=1; bs <=
MAX_BS; bs++) {
750 if (!rank) printf(
"\nTesting MPI blocktridiagLU() with N=%d, Ns=%d, bs=%d on %d processes\n",N,Ns,bs,nproc);
752 MPI_Barrier(MPI_COMM_WORLD);
755 if (!rank) printf(
"\nSkipping blocktridiagLU(). Compile with -Dtest_blockLU flag to enable.\n");
782 int(*LUSolver)(
double*,
double*,
double*,
double*,
int,
int,
void*,
void*),
786 int i,d,ierr=0,nlocal;
800 double *a2,*b2,*c2,*y;
803 MPI_Comm_dup(MPI_COMM_WORLD,&world);
806 ierr =
tridiagLUInit(&context,&world);
if (ierr)
return(ierr);
808 #ifdef with_scalapack
821 MPI_Barrier(MPI_COMM_WORLD);
828 a1 = (
double*) calloc (Ns*nlocal,
sizeof(
double));
829 b1 = (
double*) calloc (Ns*nlocal,
sizeof(
double));
830 c1 = (
double*) calloc (Ns*nlocal,
sizeof(
double));
831 a2 = (
double*) calloc (Ns*nlocal,
sizeof(
double));
832 b2 = (
double*) calloc (Ns*nlocal,
sizeof(
double));
833 c2 = (
double*) calloc (Ns*nlocal,
sizeof(
double));
834 x = (
double*) calloc (Ns*nlocal,
sizeof(
double));
835 y = (
double*) calloc (Ns*nlocal,
sizeof(
double));
841 MPI_Barrier(MPI_COMM_WORLD);
842 #ifndef speed_test_only
853 for (d = 0; d < Ns; d++) {
854 for (i = 0; i < nlocal; i++) {
871 if (!rank) printf(
"MPI test 1 ([I]x = b => x = b): \t");
872 ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,&context,&world);
873 if (ierr == -1) printf(
"Error - system is singular on process %d\t",rank);
879 if (!rank) printf(
"error=%E\n",error);
880 MPI_Barrier(MPI_COMM_WORLD);
891 for (d = 0; d < Ns; d++) {
892 for (i = 0; i < nlocal; i++) {
895 if (rank == nproc-1) c1[i*Ns+d] = (i == nlocal-1 ? 0 : 0.5);
896 else c1[i*Ns+d] = 0.5;
910 if (!rank) printf(
"MPI test 2 ([U]x = b => x = [U]^(-1)b):\t");
911 ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,&context,&world);
912 if (ierr == -1) printf(
"Error - system is singular on process %d\t",rank);
918 if (!rank) printf(
"error=%E\n",error);
919 MPI_Barrier(MPI_COMM_WORLD);
931 for (d = 0; d < Ns; d++) {
932 for (i = 0; i < nlocal; i++) {
933 if (!rank ) a1[i*Ns+d] = (i == 0 ? 0.0 : ((double) rand()) / ((
double) RAND_MAX));
934 else a1[i*Ns+d] = ((double) rand()) / ((
double) RAND_MAX);
935 b1[i*Ns+d] = 100.0*(1.0 + ((double) rand()) / ((
double) RAND_MAX));
936 if (rank == nproc-1) c1[i*Ns+d] = (i == nlocal-1 ? 0 : ((double) rand()) / ((
double) RAND_MAX));
937 else c1[i*Ns+d] = ((double) rand()) / ((
double) RAND_MAX);
938 x[i*Ns+d] = ((double) rand()) / ((
double) RAND_MAX);
951 if (!rank) printf(
"MPI test 3 ([A]x = b => x = [A]^(-1)b):\t");
952 ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,&context,&world);
953 if (ierr == -1) printf(
"Error - system is singular on process %d\t",rank);
959 if (!rank) printf(
"error=%E\n",error);
960 MPI_Barrier(MPI_COMM_WORLD);
966 if (!rank) printf(
"Skipping accuracy tests. Remove -Dspeed_test_only compilation flag to enable.\n");
981 for (d = 0; d < Ns; d++) {
982 for (i = 0; i < nlocal; i++) {
984 if (!rank ) a1[i*Ns+d] = (i == 0 ? 0.0 : 0.3 );
985 else a1[i*Ns+d] = 0.3;
987 if (!rank) b1[i*Ns+d] = (i == 0 ? 1.0 : 0.6 );
988 else if (rank == nproc-1) b1[i*Ns+d] = (i == nlocal-1 ? 1.0 : 0.6 );
989 else b1[i*Ns+d] = 0.6;
991 if (rank == nproc-1) c1[i*Ns+d] = (i == nlocal-1 ? 0 : 0.1 );
992 else c1[i*Ns+d] = 0.1;
994 x[i*Ns+d] = ((double) rand()) / ((
double) RAND_MAX);
1007 printf(
"\nMPI test 4 (Speed test - %d Tridiagonal Solves):\n",NRuns);
1008 double runtimes[5] = {0.0,0.0,0.0,0.0,0.0};
1013 for (i = 0; i < NRuns; i++) {
1020 MPI_Barrier(MPI_COMM_WORLD);
1021 ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,&context,&world);
1022 MPI_Barrier(MPI_COMM_WORLD);
1031 if (ierr == -1) printf(
"Error - system is singular on process %d\t",rank);
1039 MPI_Allreduce(MPI_IN_PLACE,&runtimes[0],5,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
1042 if (ierr == -1) printf(
"Error - system is singular on process %d\t",rank);
1044 printf(
"\t\tTotal walltime = %E\n",runtimes[0]);
1045 printf(
"\t\tStage1 walltime = %E\n",runtimes[1]);
1046 printf(
"\t\tStage2 walltime = %E\n",runtimes[2]);
1047 printf(
"\t\tStage3 walltime = %E\n",runtimes[3]);
1048 printf(
"\t\tStage4 walltime = %E\n",runtimes[4]);
1049 printf(
"\t\tAverage error = %E\n",error);
1051 out = fopen(filename,
"w");
1052 fprintf(out,
"%5d %1.16E %1.16E %1.16E %1.16E %1.16E %1.16E\n",nproc,runtimes[0],
1053 runtimes[1],runtimes[2],runtimes[3],runtimes[4],error);
1056 MPI_Barrier(MPI_COMM_WORLD);
1074 MPI_Comm_free(&world);
1102 int(*LUSolver)(
double*,
double*,
double*,
double*,
int,
int,
int,
void*,
void*),
1103 const char *filename
1106 int i,j,k,d,ierr=0,nlocal;
1120 double *a2,*b2,*c2,*y;
1123 MPI_Comm_dup(MPI_COMM_WORLD,&world);
1126 ierr =
tridiagLUInit(&context,&world);
if (ierr)
return(ierr);
1137 MPI_Barrier(MPI_COMM_WORLD);
1144 a1 = (
double*) calloc (Ns*nlocal*bs*bs,
sizeof(
double));
1145 b1 = (
double*) calloc (Ns*nlocal*bs*bs,
sizeof(
double));
1146 c1 = (
double*) calloc (Ns*nlocal*bs*bs,
sizeof(
double));
1147 a2 = (
double*) calloc (Ns*nlocal*bs*bs,
sizeof(
double));
1148 b2 = (
double*) calloc (Ns*nlocal*bs*bs,
sizeof(
double));
1149 c2 = (
double*) calloc (Ns*nlocal*bs*bs,
sizeof(
double));
1150 x = (
double*) calloc (Ns*nlocal*bs ,
sizeof(
double));
1151 y = (
double*) calloc (Ns*nlocal*bs ,
sizeof(
double));
1157 MPI_Barrier(MPI_COMM_WORLD);
1159 #ifndef speed_test_only
1170 for (d = 0; d < Ns; d++) {
1171 for (i = 0; i < nlocal; i++) {
1172 for (j=0; j<bs; j++) {
1173 for (k=0; k<bs; k++) {
1174 a1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
1175 if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 1.0;
1176 else b1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
1177 c1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
1179 x [(i*Ns+d)*bs+j] = rand();
1193 if (!rank) printf(
"Block MPI test 1 ([I]x = b => x = b): \t");
1194 ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,bs,&context,&world);
1195 if (ierr == -1) printf(
"Error - system is singular on process %d\t",rank);
1201 if (!rank) printf(
"error=%E\n",error);
1202 MPI_Barrier(MPI_COMM_WORLD);
1213 for (d = 0; d < Ns; d++) {
1214 for (i = 0; i < nlocal; i++) {
1215 for (j=0; j<bs; j++) {
1216 for (k=0; k<bs; k++) {
1217 a1[(i*Ns+d)*bs*bs+j*bs+k] = 0.0;
1218 if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 400.0 + ((
double) rand()) / ((
double) RAND_MAX);
1219 else b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0 + ((
double) rand()) / ((
double) RAND_MAX);
1220 if (rank == nproc-1) c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == nlocal-1 ? 0 : ((
double) rand()) / ((
double) RAND_MAX));
1221 else c1[(i*Ns+d)*bs*bs+j*bs+k] = ((
double) rand()) / ((
double) RAND_MAX);
1223 x[(i*Ns+d)*bs+j] = ((
double) rand()) / ((
double) RAND_MAX);
1237 if (!rank) printf(
"Block MPI test 2 ([U]x = b => x = [U]^(-1)b):\t");
1238 ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,bs,&context,&world);
1239 if (ierr == -1) printf(
"Error - system is singular on process %d\t",rank);
1245 if (!rank) printf(
"error=%E\n",error);
1246 MPI_Barrier(MPI_COMM_WORLD);
1258 for (d = 0; d < Ns; d++) {
1259 for (i = 0; i < nlocal; i++) {
1260 for (j=0; j<bs; j++) {
1261 for (k=0; k<bs; k++) {
1262 if (!rank ) a1[(i*Ns+d)*bs*bs+j*bs+k] = (i == 0 ? 0.0 : ((
double) rand()) / ((double) RAND_MAX));
1263 else a1[(i*Ns+d)*bs*bs+j*bs+k] = ((
double) rand()) / ((
double) RAND_MAX);
1264 if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 200.0*(1.0 + ((
double) rand()) / ((
double) RAND_MAX));
1265 else b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0*(1.0 + ((
double) rand()) / ((double) RAND_MAX));
1266 if (rank == nproc-1) c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == nlocal-1 ? 0 : ((
double) rand()) / ((
double) RAND_MAX));
1267 else c1[(i*Ns+d)*bs*bs+j*bs+k] = ((
double) rand()) / ((
double) RAND_MAX);
1269 x[(i*Ns+d)*bs+j] = ((
double) rand()) / ((
double) RAND_MAX);
1283 if (!rank) printf(
"Block MPI test 3 ([A]x = b => x = [A]^(-1)b):\t");
1284 ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,bs,&context,&world);
1285 if (ierr == -1) printf(
"Error - system is singular on process %d\t",rank);
1291 if (!rank) printf(
"error=%E\n",error);
1292 MPI_Barrier(MPI_COMM_WORLD);
1294 if (!rank) printf(
"Skipping accuracy tests. Remove -Dspeed_test_only compilation flag to enable.\n");
1313 for (d = 0; d < Ns; d++) {
1314 for (i = 0; i < nlocal; i++) {
1315 for (j=0; j<bs; j++) {
1316 for (k=0; k<bs; k++) {
1317 if (!rank ) a1[(i*Ns+d)*bs*bs+j*bs+k] = (i == 0 ? 0.0 : ((
double) rand()) / ((double) RAND_MAX));
1318 else a1[(i*Ns+d)*bs*bs+j*bs+k] = ((
double) rand()) / ((
double) RAND_MAX);
1319 if (j==k) b1[(i*Ns+d)*bs*bs+j*bs+k] = 200.0*(1.0 + ((
double) rand()) / ((
double) RAND_MAX));
1320 else b1[(i*Ns+d)*bs*bs+j*bs+k] = 100.0*(1.0 + ((
double) rand()) / ((double) RAND_MAX));
1321 if (rank == nproc-1) c1[(i*Ns+d)*bs*bs+j*bs+k] = (i == nlocal-1 ? 0 : ((
double) rand()) / ((
double) RAND_MAX));
1322 else c1[(i*Ns+d)*bs*bs+j*bs+k] = ((
double) rand()) / ((
double) RAND_MAX);
1324 x[(i*Ns+d)*bs+j] = ((
double) rand()) / ((
double) RAND_MAX);
1338 printf(
"\nBlock MPI test 4 (Speed test - %d Tridiagonal Solves):\n",NRuns);
1339 double runtimes[5] = {0.0,0.0,0.0,0.0,0.0};
1344 for (i = 0; i < NRuns; i++) {
1351 MPI_Barrier(MPI_COMM_WORLD);
1352 ierr = LUSolver(a1,b1,c1,x,nlocal,Ns,bs,&context,&world);
1353 MPI_Barrier(MPI_COMM_WORLD);
1362 if (ierr == -1) printf(
"Error - system is singular on process %d\t",rank);
1370 MPI_Allreduce(MPI_IN_PLACE,&runtimes[0],5,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
1373 if (ierr == -1) printf(
"Error - system is singular on process %d\t",rank);
1375 printf(
"\t\tTotal walltime = %E\n",runtimes[0]);
1376 printf(
"\t\tStage1 walltime = %E\n",runtimes[1]);
1377 printf(
"\t\tStage2 walltime = %E\n",runtimes[2]);
1378 printf(
"\t\tStage3 walltime = %E\n",runtimes[3]);
1379 printf(
"\t\tStage4 walltime = %E\n",runtimes[4]);
1380 printf(
"\t\tAverage error = %E\n",error);
1382 out = fopen(filename,
"w");
1383 fprintf(out,
"%5d %1.16E %1.16E %1.16E %1.16E %1.16E %1.16E\n",nproc,runtimes[0],
1384 runtimes[1],runtimes[2],runtimes[3],runtimes[4],error);
1387 MPI_Barrier(MPI_COMM_WORLD);
1404 MPI_Comm_free(&world);
1423 for (i = 0; i < N; i++) x[i] = y[i];
1439 for (d = 0; d < Ns; d++) {
1440 for (i = 0; i < N; i++) {
1441 y[i*Ns+d] = x[i*Ns+d];
1452 double CalculateError(
double *a,
double *b,
double *c,
double *y,
double *x,
1457 for (d = 0; d < Ns; d++) {
1458 for (i = 0; i < N; i++) {
1460 if (i == 0) val = y[i*Ns+d] - (b[i*Ns+d]*x[i*Ns+d]+c[i*Ns+d]*x[(i+1)*Ns+d]);
1461 else if (i == N-1) val = y[i*Ns+d] - (a[i*Ns+d]*x[(i-1)*Ns+d]+b[i*Ns+d]*x[i*Ns+d]);
1462 else val = y[i*Ns+d] - (a[i*Ns+d]*x[(i-1)*Ns+d]+b[i*Ns+d]*x[i*Ns+d]+c[i*Ns+d]*x[(i+1)*Ns+d]);
1471 int N,
int Ns,
int bs)
1475 for (d = 0; d < Ns; d++) {
1476 for (i = 0; i < N; i++) {
1477 double val[bs];
for (j=0; j<bs; j++) val[j] = y[(i*Ns+d)*bs+j];
1481 for (j=0; j<bs; j++) error += val[j] * val[j];
1507 double error = 0, norm = 0;
1511 for (d = 0; d < Ns; d++) {
1514 MPI_Request request = MPI_REQUEST_NULL;
1515 if (rank != nproc-1) MPI_Irecv(&xp1,1,MPI_DOUBLE,rank+1,1738,MPI_COMM_WORLD,&request);
1516 if (rank) MPI_Send(&x[d],1,MPI_DOUBLE,rank-1,1738,MPI_COMM_WORLD);
1517 MPI_Wait(&request,MPI_STATUS_IGNORE);
1522 MPI_Request request = MPI_REQUEST_NULL;
1523 if (rank) MPI_Irecv(&xm1,1,MPI_DOUBLE,rank-1,1739,MPI_COMM_WORLD,&request);
1524 if (rank != nproc-1) MPI_Send(&x[d+(N-1)*Ns],1,MPI_DOUBLE,rank+1,1739,MPI_COMM_WORLD);
1525 MPI_Wait(&request,MPI_STATUS_IGNORE);
1530 for (i = 0; i < N; i++) {
1532 if (i == 0) val += a[i*Ns+d]*xm1;
1533 else val += a[i*Ns+d]*x[(i-1)*Ns+d];
1534 val += b[i*Ns+d]*x[i*Ns+d];
1535 if (i == N-1) val += c[i*Ns+d]*xp1;
1536 else val += c[i*Ns+d]*x[(i+1)*Ns+d];
1537 val = y[i*Ns+d] - val;
1539 norm += y[i*Ns+d] * y[i*Ns+d];
1543 double global_error = 0, global_norm = 0;
1544 if (nproc > 1) MPI_Allreduce(&error,&global_error,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
1545 else global_error = error;
1546 if (nproc > 1) MPI_Allreduce(&norm,&global_norm,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
1547 else global_norm = norm;
1548 if (global_norm != 0.0) global_error /= global_norm;
1550 return(global_error);
1574 double error = 0, norm = 0;
1576 double xp1[bs], xm1[bs];
1578 for (d = 0; d < Ns; d++) {
1579 for (i=0; i<bs; i++) xp1[i] = 0;
1581 MPI_Request request = MPI_REQUEST_NULL;
1582 if (rank != nproc-1) MPI_Irecv(&xp1,bs,MPI_DOUBLE,rank+1,1738,MPI_COMM_WORLD,&request);
1583 if (rank) MPI_Send(&x[d*bs],bs,MPI_DOUBLE,rank-1,1738,MPI_COMM_WORLD);
1584 MPI_Wait(&request,MPI_STATUS_IGNORE);
1587 for (i=0; i<bs; i++) xm1[i] = 0;
1589 MPI_Request request = MPI_REQUEST_NULL;
1590 if (rank) MPI_Irecv(&xm1,bs,MPI_DOUBLE,rank-1,1739,MPI_COMM_WORLD,&request);
1591 if (rank != nproc-1) MPI_Send (&x[(d+(N-1)*Ns)*bs],bs,MPI_DOUBLE,rank+1,1739,MPI_COMM_WORLD);
1592 MPI_Wait(&request,MPI_STATUS_IGNORE);
1597 for (i = 0; i < N; i++) {
1598 double val[bs];
for (j=0; j<bs; j++) val[j] = y[(i*Ns+d)*bs+j];
1604 for (j=0; j<bs; j++) error += val[j] * val[j];
1605 for (j=0; j<bs; j++) norm += y[(i*Ns+d)*bs+j] * y[(i*Ns+d)*bs+j];
1609 double global_error = 0, global_norm = 0;
1610 if (nproc > 1) MPI_Allreduce(&error,&global_error,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
1611 else global_error = error;
1612 if (nproc > 1) MPI_Allreduce(&norm,&global_norm,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
1613 else global_norm = norm;
1614 if (global_norm != 0.0) global_error /= global_norm;
1616 return(global_error);
1634 if (nglobal%nproc == 0) *nlocal = nglobal/nproc;
1636 if (rank == nproc-1) *nlocal = nglobal/nproc+nglobal%nproc;
1637 else *nlocal = nglobal/nproc;
static void CopyArraySimple(double *, double *, int)
int tridiagLUGS(double *, double *, double *, double *, int, int, void *, void *)
int tridiagScaLPK(double *, double *, double *, double *, int, int, void *, void *)
static double CalculateErrorBlock(double *, double *, double *, double *, double *, int, int, int, int, int)
int tridiagLUInit(void *, void *)
static int main_mpi(int, int, int, int, int, int)
static void CopyArray(double *, double *, int, int)
int tridiagLU(double *, double *, double *, double *, int, int, void *, void *)
Header file for TridiagLU.
Contains macros and function definitions for common matrix operations.
static int test_mpi(int, int, int, int, int, int, int, int(*)(double *, double *, double *, double *, int, int, void *, void *), const char *)
int blocktridiagIterJacobi(double *, double *, double *, double *, int, int, int, void *, void *)
#define _MatVecMultiplySubtract_(y, A, x, N)
static double CalculateError(double *, double *, double *, double *, double *, int, int, int, int)
int tridiagIterJacobi(double *, double *, double *, double *, int, int, void *, void *)
static int test_block_mpi(int, int, int, int, int, int, int, int(*)(double *, double *, double *, double *, int, int, int, void *, void *), const char *)
static int partition1D(int, int, int, int *)
int blocktridiagLU(double *, double *, double *, double *, int, int, int, void *, void *)
int main(int argc, char *argv[])