/* * Example LAM/MPI CR self program * Josh Hursey * April 9, 2006 * * To compile: * $ mpicc -g -export -o personal-cr personal-cr.c * * To Run: * $ mpirun -np 2 \ * -ssi cr_self_prefix my_personal \ * -ssi rpi crtcp \ * -ssi cr self \ * personal-cr * * To Checkpoint: * $ lamcheckpoint -ssi cr self -pid XXXX * * To Restart: (Don't use lamrestart) * $ mpirun -np 2 \ * -ssi cr_self_prefix my_personal \ * -ssi rpi crtcp \ * -ssi cr self \ * -ssi cr_self_do_restart 1 * personal-cr */ #include #include #define LIMIT 20 /************************ * Function Declarations ************************/ /* Default LAM/MPI cr self callback functions */ int lam_cr_self_checkpoint(void); int lam_cr_self_continue(void); int lam_cr_self_restart(void); /* LAM/MPI cr self callback functions */ int my_personal_checkpoint(void); int my_personal_continue(void); int my_personal_restart(void); /******************* * Global Variables *******************/ int current_step = 0; char ckpt_file[128] = "my-personal-cr-file.ckpt"; /********* * Main *********/ int main(int argc, char *argv[]) { int rank, size; current_step = 0; /* * If we are restarting, then our callback is called * from inside MPI_Init. * It is safe to initialize global variables before calling * MPI_Init with out fear of overwriting the values that * the restart callback might change them too. */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); for(; current_step < LIMIT; current_step += 1) { printf("%d of %d: Step %d\n", rank, size, current_step); sleep(1); } MPI_Finalize(); return 0; } /* LAM/MPI default cr self callbacks for checkpoint */ int lam_cr_self_checkpoint(void) { printf("lam_cr_self_checkpoint callback...\n"); return 0; } int lam_cr_self_continue(void) { printf("lam_cr_self_continue callback...\n"); return 0; } int lam_cr_self_restart(void) { printf("lam_cr_self_restart callback...\n"); return 0; } /* LAM/MPI cr self callback for checkpoint */ int my_personal_checkpoint() { FILE *fp; printf("my_personal_checkpoint callback...\n"); /* * Open our checkpoint file */ if( NULL == (fp = fopen(ckpt_file, "w")) ) { fprintf(stderr, "Error: Unable to open file (%s)\n", ckpt_file); return; } /* * Save the process state */ fprintf(fp, "%d\n", current_step); /* * Close the checkpoint file */ fclose(fp); return 0; } int my_personal_continue() { printf("my_personal_continue callback...\n"); /* Don't need to do anything here since we are in the * state that we want to be in already. */ return 0; } int my_personal_restart() { FILE *fp; printf("my_personal_restart callback...\n"); /* * Open our checkpoint file */ if( NULL == (fp = fopen(ckpt_file, "r")) ) { fprintf(stderr, "Error: Unable to open file (%s)\n", ckpt_file); return; } /* * Access the process state that we saved and * update the current step variable. */ fscanf(fp, "%d", ¤t_step); fclose(fp); printf("my_personal_restart: Restarting from step %d\n", current_step); return 0; }