%%file hello_world_mpi.c #include #include int main(int argc, char *argv[]) { int rank_id, ierr, num_ranks; double start_time, wall_time; printf("C MPI minimal working example\n"); ierr = MPI_Init(&argc, &argv); start_time = MPI_Wtime(); ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank_id); if (rank_id == 0) { printf("Number of available processors = %d.\n", num_ranks); } printf("\tProcess number %d branching off.\n", rank_id); wall_time = MPI_Wtime() - start_time; if (rank_id == 0) { printf("Elapsed wallclock time = %8.6fs.\n", wall_time); } MPI_Finalize(); return 0; } !mpicc -o hello_world_mpi hello_world_mpi.c !./hello_world_mpi !mpiexec ./hello_world_mpi !mpiexec -np 8 ./hello_world_mpi %%file darts_pi.c #include #include #include #include #include #define N 100000 #define R 1 double uniform_rng() { return (double)rand() / (double)RAND_MAX; } // Not great but not the point of this exercise. int main(int argc, char *argv[]) { int rank_id, ierr, num_ranks; MPI_Status status; ierr = MPI_Init(&argc, &argv); ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank_id); srand(time(NULL) + rank_id); // Calculate some number of darts thrown into the circle. int n_circle = 0; double x, y; for (int t = 0; t < N; t++) { x = uniform_rng(); y = uniform_rng(); if (x*x + y*y < (double)R*R) n_circle++; } // If this is the first process, then gather everyone's data. Otherwise, send it to the first process. int total_circle[num_ranks]; // C99 Variable Length Arrays; compile with `-std=c99` or else use `malloc`. if (rank_id == 0) { total_circle[0] = n_circle; for (int i = 1; i < num_ranks; i++) { ierr = MPI_Recv(&total_circle[i], 1, MPI_INT, i, MPI_ANY_TAG, MPI_COMM_WORLD, &status); //printf ("\t%d: recv %d from %d\n", rank_id, total_circle[i], i); } } else { ierr = MPI_Send(&n_circle, 1, MPI_INT, 0, 1, MPI_COMM_WORLD); //printf ("\t%d: send %d to %d\n", rank_id, n_circle, 0); } // Now sum over the data and calculate the approximation of pi. int total = 0; if (rank_id == 0) { for (int i = 0; i < num_ranks; i++) { total += total_circle[i]; } printf("With %d trials, the resulting approximation to pi = %f.\n", num_ranks*N, 4.0*(double)total/((double)N*(double)num_ranks)); } MPI_Finalize(); return 0; } # To run this code from within the notebook. !mpicc -std=c99 -o darts_pi darts_pi.c !mpiexec -n 4 ./darts_pi %%file darts_pi.c #include #include #include #include #include #define N 100000 #define R 1 double uniform_rng() { return (double)rand() / (double)RAND_MAX; } // Not good but not the point of this exercise. int main(int argc, char *argv[]) { int rank_id, ierr, num_ranks; MPI_Status status; ierr = MPI_Init(&argc, &argv); ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank_id); srand(time(NULL) + rank_id); // Calculate some number of darts thrown into the circle. int n_circle = 0; double x, y; for (int t = 0; t < N; t++) { x = uniform_rng(); y = uniform_rng(); if (x*x + y*y < (double)R*R) n_circle++; } // Reduce by summation over all data and output the result. int total; ierr = MPI_Reduce(&n_circle, &total, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank_id == 0) { printf("With %d trials, the resulting approximation to pi = %f.\n", num_ranks*N, 4.0*(double)total/((double)N*(double)num_ranks)); } MPI_Finalize(); return 0; } # To run this code from within the notebook. !mpicc -std=c99 -o darts_pi darts_pi.c !mpiexec -n 4 ./darts_pi %%file fd_charge.c // This is a serial version of the code for reference. #include #include #include #include //for memcpy() #include #define BOUNDS 1.0 double uniform_rng() { return (double)rand() / (double)RAND_MAX; } // Not good but not the point of this exercise. struct point_t { int x, y; double mag; }; int main(int argc, char *argv[]) { int N = 10; if (argc > 1) N = atoi(argv[1]); int size = N % 2 == 0 ? N : N+1; // ensure odd N int iter = 10; if (argc > 2) iter = atoi(argv[2]); int out_step = 1000; if (argc > 3) out_step = atoi(argv[3]); double eps = 8.8541878176e-12; // permittivity of the material srand(time(NULL)); // Initialize the grid with an initial guess of zeros as well as the coordinates. double phi[size][size]; // C99 Variable Length Arrays; compile with `-std=c99` or else use `malloc`. double oldphi[size][size]; double x[size], y[size]; double dx = (2*BOUNDS)/size, dy = dx; for (int i = 0; i < size; i++) { x[i] = (double)i*dx - BOUNDS; for (int j = 0; j < size; j++) { if (i == 0) y[j] = (double)j*dy - BOUNDS; phi[i][j] = 0.0; } } // Set up a few random points and magnitudes for the electrostatics problem. int K = 20; struct point_t pt_srcs[K]; for (int k = 0; k < K; k++) { pt_srcs[k].x = (int)(uniform_rng() * N); pt_srcs[k].y = (int)(uniform_rng() * N); pt_srcs[k].mag = uniform_rng() * 2.0 - 1.0; printf("(%f, %f) @ %f\n", x[pt_srcs[k].x], y[pt_srcs[k].y], pt_srcs[k].mag); } // Iterate forward. int n_steps = 0; // total number of steps iterated double inveps = 1.0 / eps; // saves a division every iteration over the square loop double pt_src = 0.0; // accumulator for whether a point source is located at a specific (i,j) index site while (n_steps < iter) { memcpy(oldphi, phi, size*size); for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { // Calculate point source contributions. pt_src = 0; for (int k = 0; k < K; k++) { pt_src = pt_src + ((pt_srcs[k].x == i && pt_srcs[k].y == j) ? pt_srcs[k].mag : 0.0); } phi[i][j] = 0.25*dx*dx * pt_src * inveps + 0.25*(i == 0 ? 0.0 : phi[i-1][j]) + 0.25*(i == size-1 ? 0.0 : phi[i+1][j]) + 0.25*(j == 0 ? 0.0 : phi[i][j-1]) + 0.25*(j == size-1 ? 0.0 : phi[i][j+1]); } } if (n_steps % out_step == 0) { printf("Iteration #%d:\n", n_steps); printf("\tphi(%f, %f) = %24.20f\n", x[(int)(0.5*N-1)], y[(int)(0.25*N+1)], phi[(int)(0.5*N-1)][(int)(0.25*N+1)]); } n_steps++; } // Write the final condition out to disk and terminate. printf("Terminated after %d steps.\n", n_steps); FILE* f; f = fopen("./data.txt", "w"); // wb -write binary if (f != NULL) { for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { fprintf(f, "%f\t", phi[i][j]); } fprintf(f, "\n"); } fclose(f); } else { //failed to create the file } return 0; } !gcc -std=c99 -o fd_charge fd_charge.c !./fd_charge 200 30000 1000 # Use Python to visualize the result quickly. import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib import cm %matplotlib inline #mpl.rcParams['figure.figsize']=[20,20] data = np.loadtxt('./data.txt') grdt = np.gradient(data); mx = 10**np.floor(np.log10(data.max())) fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20,20)) axes[0].imshow(data, cmap=cm.seismic, vmin=-mx, vmax=mx, extent=[-1,1,-1,1]) axes[1].imshow(data, cmap=cm.seismic, vmin=-mx, vmax=mx) axes[1].contour(data, cmap=cm.bwr, vmin=-mx, vmax=mx, levels=np.arange(-mx,mx+1,(2*mx)/1e2)) fig.show() %%file fd_charge.c // This is a parallel version of the code. The domain consists of a column of square domains abutting each other. #include #include #include #include //for memcpy() #include #include #define BOUNDS 1.0 double uniform_rng() { return (double)rand() / (double)RAND_MAX; } // Not good but not the point of this exercise. struct point_t { int x, y; double mag; }; int main(int argc, char *argv[]) { // Get number of discrete steps along domain. int size = 50; if (argc > 1) size = atoi(argv[1]); // Get number of iterations to use in solver. int iter = 10000; if (argc > 2) iter = atoi(argv[2]); // Get interval for output of test value in convergence. int out_step = iter/10; if (argc > 3) out_step = atoi(argv[3]); double eps = 8.8541878176e-12; // permittivity of the material int rank_id, ierr, num_ranks; MPI_Status status; MPI_Request request; ierr = MPI_Init(&argc, &argv); ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank_id); srand(time(NULL) + rank_id); // We will restrict the domain (and thus processes) to only two neighboring processes for the sake of pedagogy. if (num_ranks != 2) { printf("Expected 2 processes; actual number of processes %d.\n", num_ranks); MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER); return 0; } // Calculate the processor grid preparatory to the domain partition. We'll make the assignment into a // 2 x 1 array, making determination of neighboring processes trivial. int nbr_t, nbr_b; if (rank_id == 0) { nbr_t = -1; nbr_b = 1; } else { nbr_t = 0; nbr_b = -1; } // Initialize the _local_ grid with an initial guess of zeros as well as the coordinates. We will keep each // local domain the same size as the original domain, 2x2 in coordinates, and just translate them to make a // larger domain---in this case, adding processes increases the domain size rather than the mesh resolution. double phi[size][size]; // potential grid---compile with `-std=c99` or else use `malloc`. double x[size], // x-domain vector y[size]; // y-domain vector double dx = (2*BOUNDS)/size,// increment in x-direction dy = dx; // increment in y-direction double base_x = -BOUNDS, // [-1,1) for both processes base_y = 2*BOUNDS*(rank_id % 2) - BOUNDS*2; // [-2,0) for process 0; [0,2) for process 1 for (int i = 0; i < size; i++) { x[i] = (double)i*dx + base_x; for (int j = 0; j < size; j++) { if (i == 0) y[j] = (double)j*dy + base_y; phi[i][j] = 0.0; } } double phi_t_in[size], // ghost cell messaging arrays phi_b_in[size], phi_t_out[size], phi_b_out[size]; for (int i = 0; i < size; i++) { phi_t_in[i] = 0.0; phi_b_in[i] = 0.0; } // Set up a few random points and magnitudes for the electrostatics problem. int K = 10; struct point_t pt_srcs[K]; for (int k = 0; k < K; k++) { (uniform_rng()); // demonstration that the RNG is bad: without this the first number is the same on every process int x_rn = (int)(uniform_rng() * size); int y_rn = (int)(uniform_rng() * size); pt_srcs[k].x = x_rn; pt_srcs[k].y = y_rn; pt_srcs[k].mag = uniform_rng() * 2.0 - 1.0; } // Iterate forward. int n_steps = 0; // total number of steps iterated double inveps = 1.0 / eps; // saves a division every iteration over the square loop double pt_src = 0.0; // accumulator for whether a point source is located at a specific (i,j) index site while (n_steps < iter) { // Propagate the matrix equation forward towards a solution. for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { // Calculate point source contributions. pt_src = 0; for (int k = 0; k < K; k++) { pt_src = pt_src + ((pt_srcs[k].x == i && pt_srcs[k].y == j) ? pt_srcs[k].mag : 0.0); } phi[i][j] = 0.25*dx*dx * pt_src * inveps + 0.25*(i == 0 ? 0.0 : phi[i-1][j]) + 0.25*(i == size-1 ? 0.0 : phi[i+1][j]) + 0.25*(j == 0 ? (nbr_b < 0 ? 0.0 : phi_b_in[i]) : phi[i][j-1]) + 0.25*(j == size-1 ? (nbr_t < 0 ? 0.0 : phi_t_in[i]) : phi[i][j+1]); } } // Communicate the border cell information to neighboring processes. We will alternate odd and even processes, // although there are more sophisticated ways to do this with nonblocking message passing. Why do we alternate? for (int i = 0; i < size; i++) { phi_t_out[i] = phi[i][size-1]; phi_b_out[i] = phi[i][0]; } // Pass data up. if (nbr_t >= 0) MPI_Isend(phi_t_out, size, MPI_DOUBLE, nbr_t, 0, MPI_COMM_WORLD, &request); if (nbr_b >= 0) MPI_Irecv(phi_b_in, size, MPI_DOUBLE, nbr_b, MPI_ANY_TAG, MPI_COMM_WORLD, &request); // Pass data down. if (nbr_b >= 0) MPI_Isend(phi_b_out, size, MPI_DOUBLE, nbr_b, 0, MPI_COMM_WORLD, &request); if (nbr_t >= 0) MPI_Irecv(phi_t_in, size, MPI_DOUBLE, nbr_t, MPI_ANY_TAG, MPI_COMM_WORLD, &request); MPI_Barrier(MPI_COMM_WORLD); // Output information periodically. if (rank_id == 0 && n_steps % out_step == 0) { printf("Iteration #%d:\n", n_steps); printf("\tphi(%6.3f, %6.3f) = %24.20f\n", x[(int)(0.5*size-1)], y[(int)(0.25*size+1)], phi[(int)(0.5*size-1)][(int)(0.25*size+1)]); } n_steps++; } if (rank_id == 0) { printf("Terminated after %d steps.\n", n_steps); printf("\tphi(%6.3f, %6.3f) = %24.20f\n", x[(int)(0.5*size-1)], y[(int)(0.25*size+1)], phi[(int)(0.5*size-1)][(int)(0.25*size+1)]); } // Write the final condition out to disk and terminate. // Parallel I/O, while supported by MPI, is a whole other ball game and we won't go into that here. // Thus we will gather all of the data to process rank 0 and output it from there. double phi_vector[size*2*size]; // We actually have to transpose each process's data into a column-major format to align properly. double phi_trans[size][size]; for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { phi_trans[size-j-1][i] = phi[i][j]; } } MPI_Gather(&phi_trans[0][0], size*size, MPI_DOUBLE, &phi_vector[0], size*size, MPI_DOUBLE, 0, MPI_COMM_WORLD); // At this point, the data are in one array on process rank 0 but not in a two-dimensional array, so fix that. double phi_new[size][size*num_ranks]; int offset; for (int p = 0; p < num_ranks; p++) { // source process for data offset = p*size; for (int i = 0; i < size; i++) { // x-index of process data for (int j = 0; j < size; j++) { // y-index of process data phi_new[offset+i][j] = phi_vector[p*size*size+i*size+j]; } } } if (rank_id == 0) { FILE* f; char filename[16]; f = fopen("data.txt", "w"); // wb -write binary if (f != NULL) { for (int i = 0; i < 2*size; i++) { for (int j = 0; j < size; j++) { fprintf(f, "%f\t", phi_new[i][j]); } fprintf(f, "\n"); } fclose(f); } else { //failed to create the file } } MPI_Finalize(); return 0; } !mpicc -std=c99 -o fd_charge fd_charge.c !mpiexec -np 2 ./fd_charge 100 10001 1000 # Use Python to visualize the result quickly. import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib import cm %matplotlib inline data = np.loadtxt('./data.txt') #mx = 10**np.floor(np.log10(data.max())) mx = 0.5*data.max() fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8,8)) axes[0].imshow( data, cmap=cm.seismic, vmin=-mx, vmax=mx) axes[1].contour(data[::-1,:], cmap=cm.bwr, vmin=-mx, vmax=mx, levels=np.arange(-8*mx,8*mx,mx/10)) fig.show() %%file parprefix.c // your code here