Commit f2fa8d28 authored by Alexandru Dura's avatar Alexandru Dura

Parallelize local computation using OpenMP

parent 26c9df1a
......@@ -31,8 +31,8 @@ public:
}
void print(std::ostream &os) {
for (int i = 0; i < numRows(); ++i) {
for (int j = 0; j < numCols(); ++j) {
for (unsigned i = 0; i < numRows(); ++i) {
for (unsigned j = 0; j < numCols(); ++j) {
os << (*this)[i][j] << " ";
}
os << "\n";
......@@ -42,10 +42,19 @@ public:
std::pair<double, double> minMax() const {
double maxval = std::numeric_limits<double>::min();
double minval = std::numeric_limits<double>::max();
for (int y = 0; y < ydim; ++y) {
for (int x = 0; x < xdim; ++x) {
maxval = std::max(maxval, (*this)[y+1][x+1]);
minval = std::min(minval, (*this)[y+1][x+1]);
#pragma omp for schedule(static)
for (unsigned y = 1; y < ydim + 1; ++y) {
double lMaxval = std::numeric_limits<double>::min();
double lMinval = std::numeric_limits<double>::max();
for (unsigned x = 1; x < xdim + 1; ++x) {
lMaxval = std::max(lMaxval, (*this)[y][x]);
lMinval = std::min(lMinval, (*this)[y][x]);
}
#pragma omp critical
{
maxval = std::max(lMaxval, maxval);
minval = std::min(lMinval, minval);
}
}
return std::make_pair(minval, maxval);
......@@ -56,9 +65,10 @@ public:
double norm = (double)(bitdepth - 1) / (maxval - minval);
for (int y = 0; y < ydim; ++y) {
for (int x = 0; x < xdim; ++x) {
ret[y][x] = norm * ((*this)[y+1][x+1] - minval);
#pragma omp for schedule(static)
for (unsigned y = 1; y < ydim + 1; ++y) {
for (unsigned x = 1; x < xdim + 1; ++x) {
ret[y-1][x-1] = norm * ((*this)[y][x] - minval);
}
}
......
......@@ -49,7 +49,7 @@ struct GlobalProblemConfig {
}
};
void exchangeHalos(int rank, int nsize, Problem &p) {
static void exchangeHalos(int rank, int nsize, Problem &p) {
MPI_Request request_up, request_dn;
Field &f = p.getField();
int upNeighbour = rank + 1;
......@@ -71,7 +71,38 @@ void exchangeHalos(int rank, int nsize, Problem &p) {
MPI_Wait(&request_dn, MPI_STATUS_IGNORE);
}
static void readData(int rank, Problem &p, const char *restartFile) {
printf("%d: Restarting from file %s\n", rank, restartFile);
MPI_File fh;
int dataSize = p.getField().numCols() * (p.getField().numRows() - 2);
double *data = p.getField().firstRow().raw();
MPI_File_open(DEFAULT_COMM, restartFile, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh);
MPI_File_set_view(fh, rank * dataSize * sizeof(double),
MPI_DOUBLE, MPI_DOUBLE, "native", MPI_INFO_NULL);
MPI_File_read(fh, data, dataSize, MPI_DOUBLE, MPI_STATUS_IGNORE);
MPI_File_close(&fh);
}
static void writeData(int rank, Problem &p, const char *pauseFile) {
MPI_File fh;
int dataSize = p.getField().numCols() * (p.getField().numRows() - 2);
double *data = p.getField().firstRow().raw();
MPI_File_open(DEFAULT_COMM, pauseFile, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
MPI_File_set_view(fh, rank * dataSize * sizeof(double),
MPI_DOUBLE, MPI_DOUBLE, "native", MPI_INFO_NULL);
MPI_File_write(fh, data, dataSize, MPI_DOUBLE, MPI_STATUS_IGNORE);
MPI_File_close(&fh);
printf("%d: Problem written out to file %s\n", rank, pauseFile);
}
static bool isMainThread() {
int isMainThread = 0;
MPI_Is_thread_main(&isMainThread);
return isMainThread;
}
int main(int argc, char **argv) {
// Process arguments for checkpoint facility
......@@ -87,9 +118,11 @@ int main(int argc, char **argv) {
}
// Initialize the MPI machinery
MPI_Init(nullptr, nullptr);
int nsize, rank;
int providedSupport;
MPI_Init_thread(nullptr, nullptr, MPI_THREAD_FUNNELED, &providedSupport);
assert(providedSupport & MPI_THREAD_FUNNELED);
int nsize, rank;
MPI_Comm_size(DEFAULT_COMM, &nsize);
MPI_Comm_rank(DEFAULT_COMM, &rank);
......@@ -115,40 +148,41 @@ int main(int argc, char **argv) {
double gateVal = 100;
unsigned nGates = 2;
Problem p(nsize, rank, localXdim, localYdim, nGates, nGates, gateVal);
if (restartFile) {
printf("%d: Restarting from file %s\n", rank, restartFile);
MPI_File fh;
int dataSize = p.getField().numCols() * localYdim;
double *data = p.getField().firstRow().raw();
MPI_File_open(DEFAULT_COMM, restartFile, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh);
MPI_File_set_view(fh, rank * dataSize * sizeof(double),
MPI_DOUBLE, MPI_DOUBLE, "native", MPI_INFO_NULL);
MPI_File_read(fh, data, dataSize, MPI_DOUBLE, MPI_STATUS_IGNORE);
MPI_File_close(&fh);
}
// Simulate
for (int i = 0; i < config.maxiter; ++i) {
exchangeHalos(rank, nsize, p);
p.step(config.cx, config.cy);
}
if (pauseFile) {
// Write the problem out, for later restart
MPI_File fh;
int dataSize = p.getField().numCols() * localYdim;
double *data = p.getField().firstRow().raw();
MPI_File_open(DEFAULT_COMM, pauseFile, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
MPI_File_set_view(fh, rank * dataSize * sizeof(double),
MPI_DOUBLE, MPI_DOUBLE, "native", MPI_INFO_NULL);
MPI_File_write(fh, data, dataSize, MPI_DOUBLE, MPI_STATUS_IGNORE);
MPI_File_close(&fh);
#pragma omp parallel default(none) \
shared(p, restartFile, pauseFile, rank, config, nsize)
{
printf("%d: Problem written out to file %s\n", rank, pauseFile);
}
// Initialize inside the parallel region. This is the first
// time data is touched, so we want to make sure it ends up
// close to the core that performs the computation.
p.initialize();
#pragma omp single
if (restartFile)
readData(rank, p, restartFile);
// Simulate
for (int i = 0; i < config.maxiter; ++i) {
// Check if this is the main thread, if so, exchange the halos
if (isMainThread())
exchangeHalos(rank, nsize, p);
// step
p.step(config.cx, config.cy);
}
#pragma omp single
if (pauseFile) {
// Write the problem out, for later restart
writeData(rank, p, pauseFile);
}
} // end of pramga omp parallel
// Compute the global min and max
double localMin, localMax;
......
......@@ -8,16 +8,21 @@ MPI_LD_FLAGS = $(shell mpic++ -showme:link)
CXX=g++
MPICXX=mpic++
CXX_FLAGS=-O3 -I/usr/lib/x86_64-linux-gnu/openmpi/include/
CXX_FLAGS=-fopenmp
all: main main-debug test
main-debug : Main.cpp $(CPP_SOURCE_FILES) $(HEADER_FILES)
$(CXX) $(MPIFLAGS) -O0 -g -Wall Main.cpp $(CPP_SOURCE_FILES) $(MPI_LD_FLAGS) -o $@
$(CXX) $(MPIFLAGS) $(CXX_FLAGS) -O0 -g -Wall Main.cpp $(CPP_SOURCE_FILES) $(MPI_LD_FLAGS) -o $@
main : Main.cpp $(CPP_SOURCE_FILES) $(HEADER_FILES)
$(CXX) $(MPIFLAGS) -O3 Main.cpp $(CPP_SOURCE_FILES) $(MPI_LD_FLAGS) -o main
$(CXX) $(MPIFLAGS) $(CXX_FLAGS) -O3 Main.cpp $(CPP_SOURCE_FILES) $(MPI_LD_FLAGS) -o main
test : Test.cpp $(CPP_SOURCE_FILES) $(HEADER_FILES)
$(CXX) -g -O0 Test.cpp $(CPP_SOURCE_FILES) -o test
clean :
rm *.o
rm main-debug
rm main
rm test
#include "Problem.h"
void Problem::initField(Field &f) {
for (int i = 0; i < ydim + 2; ++i) {
for (int j = 0; j < xdim + 2; ++j) {
// This is the first touch of our data; make sure that it matches the access
// patterns in step().
#pragma omp for schedule(static)
for (unsigned i = 1; i < ydim + 1; ++i) {
for (unsigned j = 0; j < xdim + 2; ++j) {
f[i][j] = 0;
}
}
// Initialize the rest
#pragma omp single
{
for (unsigned j = 0; j < xdim + 2; ++j) {
f[0][j] = f[ydim + 1][j] = 0;
}
}
}
void Problem::addGates(Field &f, unsigned xGates, unsigned yGates, double value) {
......@@ -45,6 +56,7 @@ void Problem::step(double cx, double cy) {
auto &nextf = *this->nextf;
auto &f = *this->currentf;
#pragma omp for schedule(static)
for (unsigned y = 1; y < ydim + 1; y++) {
for (unsigned x = 1; x < xdim + 1; x++) {
nextf[y][x] = 0.25 * ((1.0 + 0.5 * cx) * f[y][x+1] + (1.0 - 0.5 * cx) * f[y][x-1]
......@@ -52,7 +64,7 @@ void Problem::step(double cx, double cy) {
}
}
#pragma omp single
std::swap(this->nextf, this->currentf);
}
......@@ -60,7 +72,9 @@ double Problem::residualSquared(double cx, double cy) const {
double res_sqr = 0.0;
const auto &solution = *this->currentf;
#pragma omp for schedule(static)
for(unsigned iy = 1 ; iy < ydim + 1; iy++) {
double res_local = 0.0;
for(unsigned ix = 1; ix < xdim + 1; ix++) {
double res_i = (1.0+0.5*cy) * solution[iy+1][ix]
+ (1.0-0.5*cy) * solution[iy-1][ix]
......@@ -68,8 +82,10 @@ double Problem::residualSquared(double cx, double cy) const {
+ (1.0-0.5*cx) * solution[iy][ix-1]
- 4.0 * solution[iy][ix];
res_sqr += res_i*res_i;
res_local += res_i*res_i;
}
#pragma omp atomic
res_sqr += res_local;
}
return res_sqr;
......
......@@ -33,7 +33,9 @@ public:
f1(xdim, ydim),
currentf(&f0),
nextf(&f1), xgates(xgates), ygates(ygates), gateVal(gateVal) {
}
void initialize() {
initField(f0);
initField(f1);
addGates(f0, xgates, ygates, gateVal);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment