Wednesday, May 13, 2009

Matrix Multiplication using MPI

MPI(Message Passing Interface) is a library specification for message-passing. MPI was designed for high performance on both massively parallel machines and on workstation clusters. Following matrix multiplication is written in accordance to MPI. You could download the code from here.

/******************************************************************************
* Matrix Multiplication Program
* Heshan Suriyaarachchi
* ABOUT:
* Master task distributes a matrix multiply
* operation to numtasks-1 worker tasks.
*
******************************************************************************/

#include <stdio.h>
#include "mpi.h"
#define NRA 512 /* number of rows in matrix A */
#define NCA 512 /* number of columns in matrix A */
#define NCB 512 /* number of columns in matrix B */
#define MASTER 0 /* taskid of first task */
#define FROM_MASTER 1 /* setting a message type */
#define FROM_WORKER 2 /* setting a message type */

MPI_Status status;

double a[NRA][NCA], /* matrix A to be multiplied */
b[NCA][NCB], /* matrix B to be multiplied */
c[NRA][NCB]; /* result matrix C */

main(int argc, char **argv)
{
int numtasks, /* number of tasks in partition */
taskid, /* a task identifier */
numworkers, /* number of worker tasks */
source, /* task id of message source */
dest, /* task id of message destination */
nbytes, /* number of bytes in message */
mtype, /* message type */
intsize, /* size of an integer in bytes */
dbsize, /* size of a double float in bytes */
rows, /* rows of matrix A sent to each worker */
averow, extra, offset, /* used to determine rows sent to each worker */
i, j, k, /* misc */
count;

struct timeval start, stop;

intsize = sizeof(int);
dbsize = sizeof(double);

MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
numworkers = numtasks-1;

//printf(" size of matrix A = %d by %d\n",NRA,NCA);
//printf(" size of matrix B = %d by %d\n",NRA,NCB);
/*---------------------------- master ----------------------------*/
if (taskid == MASTER) {
printf("Number of worker tasks = %d\n",numworkers);
for (i=0; i<NRA; i++)
for (j=0; j<NCA; j++)
a[i][j]= i+j;
for (i=0; i<NCA; i++)
for (j=0; j<NCB; j++)
b[i][j]= i*j;

gettimeofday(&start, 0);

/* send matrix data to the worker tasks */
averow = NRA/numworkers;
extra = NRA%numworkers;
offset = 0;
mtype = FROM_MASTER;
for (dest=1; dest<=numworkers; dest++) {
rows = (dest <= extra) ? averow+1 : averow;
//printf(" Sending %d rows to task %d\n",rows,dest);
MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
count = rows*NCA;
MPI_Send(&a[offset][0], count, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
count = NCA*NCB;
MPI_Send(&b, count, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);

offset = offset + rows;
}

/* wait for results from all worker tasks */
mtype = FROM_WORKER;
for (i=1; i<=numworkers; i++) {
source = i;
MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
count = rows*NCB;
MPI_Recv(&c[offset][0], count, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD,
&status);

}

#ifdef PRINT
printf("Here is the result matrix\n");
for (i=0; i<NRA; i++) {
printf("\n");
for (j=0; j<NCB; j++)
printf("%6.2f ", c[i][j]);
}
printf ("\n");
#endif

gettimeofday(&stop, 0);


fprintf(stdout,"Time = %.6f\n\n",
(stop.tv_sec+stop.tv_usec*1e-6)-(start.tv_sec+start.tv_usec*1e-6));

} /* end of master section */

/*---------------------------- worker (slave)----------------------------*/
if (taskid > MASTER) {
mtype = FROM_MASTER;
source = MASTER;
#ifdef PRINT
printf ("Master =%d, mtype=%d\n", source, mtype);
#endif
MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
#ifdef PRINT
printf ("offset =%d\n", offset);
#endif
MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
#ifdef PRINT
printf ("row =%d\n", rows);
#endif
count = rows*NCA;
MPI_Recv(&a, count, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status);
#ifdef PRINT
printf ("a[0][0] =%e\n", a[0][0]);
#endif
count = NCA*NCB;
MPI_Recv(&b, count, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status);
#ifdef PRINT
printf ("b=\n");
#endif
for (k=0; k<NCB; k++)
for (i=0; i<rows; i++) {
c[i][k] = 0.0;
for (j=0; j<NCA; j++)
c[i][k] = c[i][k] + a[i][j] * b[j][k];
}

//mtype = FROM_WORKER;
#ifdef PRINT
printf ("after computer\n");
#endif
//MPI_Send(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&offset, 1, MPI_INT, MASTER, FROM_WORKER, MPI_COMM_WORLD);
//MPI_Send(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, MASTER, FROM_WORKER, MPI_COMM_WORLD);
//MPI_Send(&c, rows*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&c, rows*NCB, MPI_DOUBLE, MASTER, FROM_WORKER, MPI_COMM_WORLD);
#ifdef PRINT
printf ("after send\n");
#endif
} /* end of worker */

MPI_Finalize();
} /* end of main */

17 comments:

Saliya Ekanayake said...

This is cool. I was searching for some beginner stuff on MPI :)

Heshan Suriyaarachchi said...

Thanks :). If you need further help, drop in a comment ;).

Unknown said...

hi, very nice work.
When i am trying to run the program says:
program.c: In function 'main':
program.c:41: error: storage size of 'start' isn't known
program.c:41: error: storage size of 'stop' isn't known

Where is the error?
Thank you

Azure said...

Hello,

We are trying to run your code on Visual studio 2008.

We installed mpich2. But we can not get it to work with the libraries.

Is there a ready solution you have that we can run the code? We also get the error for timeval not defined.

Is it because we didn't install cygwin ? is that necessary?

Although mpi is very useful and famous, i cannot find one single source to help me get started.

We would really appreciate you help..

Thanks,
this is my email:
reza.azure@gmail.com

Tac said...

Hi,
This is a good code for large matrix multiplication.
Clannis & Asure have a trouble on timeval.
I'll suggest just to modify the codes related timeval with MPI_Wtime().

Tac

Tac said...

#include <sys/time.h> is neccessary for timeval,etc.

Trang blog cua Huy Hoang said...

thanks a lot for you code. I'm a newbie in mpi programming.

Heshan Suriyaarachchi said...

You are welcome! :)

Sabareesh Vysyaraju said...

Sir...Thank you very much for your reply. Sir I have to use one node as Master and the other two nodes as Slaves.So I am asking whether the program "Matrix Multiplication using MPI in C" can be used in the above stated environment????????

abhi said...
This comment has been removed by the author.
Priyadarshini Narasimhan said...

How many number of processors should be given to run this program?
I compiled like this
mpicc -o mm MatrixMultiplication.c
and when i need to run
mpirun -np # mm
here how many number of processors # should be given?
Thanks

Unknown said...

Hi

I tried the code but when i compile on aix i got this error

"mul_matrix.c", line 48.8: 1506-007 (S) "struct timeval" is undefined.
"mul_matrix.c", line 48.8: 1506-007 (S) "struct timeval" is undefined.
"mul_matrix.c", line 61.1: 1506-046 (S) Syntax error.
"mul_matrix.c", line 63.1: 1506-046 (S) Syntax error.
"mul_matrix.c", line 102.1: 1506-046 (S) Syntax error.
"mul_matrix.c", line 241.1: 1506-046 (S) Syntax error.
"mul_matrix.c", line 251.1: 1506-046 (S) Syntax error.
"mul_matrix.c", line 255.1: 1506-046 (S) Syntax error.

Tahir Rauf said...

Hi,
May be it is little bit irrelevant, but I am stuck at one question.
I have a situation in which every processor wants to send a particular "column" to every other processor, what function/method should I use.
Any help will be appreciated.
Regards

Unknown said...

SIR I have done it with mpi and threads but i now i want to do it for hybrid memory.that is shared as well as distributed. is it posible
hd2361@gmail.com

Rahul said...

Hello Sir, my name is Rahul. I'm working on my project i.e; Matrix multiplication using MPI. I'm using LAM-7.1.4 version and working on Fedora-12. The problem is that when I configured the systems and ran LAMBOOT command it was not able to configure the remote hosts. It finished booting n0 but for n1 it aborted. Pls, help me out sir. I've to give a demo on the project. Pls, Help me, asap.

Sinta said...

Hai sir. I want to ask, if it's any Matrix Multiplication using MPI that running in Windows platform ? Thanks

kunal singh said...

thanks for sharing the code :)
but,I need to display the parts of matrix being received and the computation done on the worker nodes.... is this possible???
plz help...