c++ - Creating a counter that stays synchronized across MPI processes -
i have quite bit of experience using basic comm , group mpi2 methods, , quite bit of embarrassingly parallel simulation work using mpi. until now, have structured code have dispatch node, , bunch of worker nodes. dispatch node has list of parameter files run simulator. seeds each worker node parameter file. worker nodes run simulation, request parameter file, dispatch node provides. once parameter files have been run, dispatch node shuts down each worker node, before shutting down.
the parameter files typically named "par_n.txt" n identifying integer (e.g.- n = 1-1000). thinking, if create counter, , have counter synchronized across of nodes, eliminate need have dispatch node, , make system bit more simple. simple sounds in theory, in practice suspect bit more difficult, i'd need ensure counter locked while being changed, etc.. , thought there might built-in way mpi handle this. thoughts? on thinking this?
implementing shared counter isn't trivial, once , have in library somewhere can lot it.
in using mpi-2 book, should have hand if you're going implement stuff, 1 of examples (the code available online) shared counter. "non-scalable" 1 should work out several dozens of processes -- counter array of 0..size-1 of integers, 1 per rank, , `get next work item #' operation consists of locking window, reading elses' contribution counter (in case, how many items they've taken), updating own (++), closing window, , calculating total. done passive one-sided operations. (the better-scaling 1 uses tree rather 1-d array).
so use have rank 0 host counter, , keeps doing work units , updating counter next 1 until there's no more work; wait @ barrier or , finalize.
once have - using shared value next work unit available - working, can generalize more sophisticated approach. suzterpatt suggested, taking "their share" of work units @ start works great, if finish faster others? usual answer work-stealing; keeps list of work units in dequeue, , when 1 runs out of work, steals work units other end of elses dequeue, until there's no more work left. completely-distributed version of master-worker, there's no more single master partitioning work. once have single shared counter working, can make mutexes those, , can implement dequeue. if simple shared-counter works enough, may not need go there.
update: ok, here's hacky-attempt @ doing shared counter - version of simple 1 in mpi-2 book: seems work, wouldn't stronger (haven't played stuff long time). there's simple counter implementation (corresponding non-scaling version in mpi-2 book) 2 simple tests, 1 corresponding work case; each item updates counter work item, "work" (sleeps random amount of time). @ end of each test, counter data structure printed out, # of increments each rank has done.
#include <mpi.h> #include <stdlib.h> #include <stdio.h> #include <unistd.h> struct mpi_counter_t { mpi_win win; int hostrank ; int myval; int *data; int rank, size; }; struct mpi_counter_t *create_counter(int hostrank) { struct mpi_counter_t *count; count = (struct mpi_counter_t *)malloc(sizeof(struct mpi_counter_t)); count->hostrank = hostrank; mpi_comm_rank(mpi_comm_world, &(count->rank)); mpi_comm_size(mpi_comm_world, &(count->size)); if (count->rank == hostrank) { mpi_alloc_mem(count->size * sizeof(int), mpi_info_null, &(count->data)); (int i=0; i<count->size; i++) count->data[i] = 0; mpi_win_create(count->data, count->size * sizeof(int), sizeof(int), mpi_info_null, mpi_comm_world, &(count->win)); } else { count->data = null; mpi_win_create(count->data, 0, 1, mpi_info_null, mpi_comm_world, &(count->win)); } count -> myval = 0; return count; } int increment_counter(struct mpi_counter_t *count, int increment) { int *vals = (int *)malloc( count->size * sizeof(int) ); int val; mpi_win_lock(mpi_lock_exclusive, count->hostrank, 0, count->win); (int i=0; i<count->size; i++) { if (i == count->rank) { mpi_accumulate(&increment, 1, mpi_int, 0, i, 1, mpi_int, mpi_sum, count->win); } else { mpi_get(&vals[i], 1, mpi_int, 0, i, 1, mpi_int, count->win); } } mpi_win_unlock(0, count->win); count->myval += increment; vals[count->rank] = count->myval; val = 0; (int i=0; i<count->size; i++) val += vals[i]; free(vals); return val; } void delete_counter(struct mpi_counter_t **count) { if ((*count)->rank == (*count)->hostrank) { mpi_free_mem((*count)->data); } mpi_win_free(&((*count)->win)); free((*count)); *count = null; return; } void print_counter(struct mpi_counter_t *count) { if (count->rank == count->hostrank) { (int i=0; i<count->size; i++) { printf("%2d ", count->data[i]); } puts(""); } } int test1() { struct mpi_counter_t *c; int rank; int result; c = create_counter(0); mpi_comm_rank(mpi_comm_world, &rank); result = increment_counter(c, 1); printf("%d got counter %d\n", rank, result); mpi_barrier(mpi_comm_world); print_counter(c); delete_counter(&c); } int test2() { const int workitems=50; struct mpi_counter_t *c; int rank; int result = 0; c = create_counter(0); mpi_comm_rank(mpi_comm_world, &rank); srandom(rank); while (result < workitems) { result = increment_counter(c, 1); if (result <= workitems) { printf("%d working on item %d...\n", rank, result); sleep(random() % 10); } else { printf("%d done\n", rank); } } mpi_barrier(mpi_comm_world); print_counter(c); delete_counter(&c); } int main(int argc, char **argv) { mpi_init(&argc, &argv); test1(); test2(); mpi_finalize(); }
Comments
Post a Comment