Thursday, May 21, 2009

Wednesday, May 13, 2009

Matrix Multiplication using MPI

MPI(Message Passing Interface) is a library specification for message-passing. MPI was designed for high performance on both massively parallel machines and on workstation clusters. Following matrix multiplication is written in accordance to MPI. You could download the code from here.

/******************************************************************************
* Matrix Multiplication Program
* Heshan Suriyaarachchi
* ABOUT:
* Master task distributes a matrix multiply
* operation to numtasks-1 worker tasks.
*
******************************************************************************/

#include <stdio.h>
#include "mpi.h"
#define NRA 512 /* number of rows in matrix A */
#define NCA 512 /* number of columns in matrix A */
#define NCB 512 /* number of columns in matrix B */
#define MASTER 0 /* taskid of first task */
#define FROM_MASTER 1 /* setting a message type */
#define FROM_WORKER 2 /* setting a message type */

MPI_Status status;

double a[NRA][NCA], /* matrix A to be multiplied */
b[NCA][NCB], /* matrix B to be multiplied */
c[NRA][NCB]; /* result matrix C */

main(int argc, char **argv)
{
int numtasks, /* number of tasks in partition */
taskid, /* a task identifier */
numworkers, /* number of worker tasks */
source, /* task id of message source */
dest, /* task id of message destination */
nbytes, /* number of bytes in message */
mtype, /* message type */
intsize, /* size of an integer in bytes */
dbsize, /* size of a double float in bytes */
rows, /* rows of matrix A sent to each worker */
averow, extra, offset, /* used to determine rows sent to each worker */
i, j, k, /* misc */
count;

struct timeval start, stop;

intsize = sizeof(int);
dbsize = sizeof(double);

MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
numworkers = numtasks-1;

//printf(" size of matrix A = %d by %d\n",NRA,NCA);
//printf(" size of matrix B = %d by %d\n",NRA,NCB);
/*---------------------------- master ----------------------------*/
if (taskid == MASTER) {
printf("Number of worker tasks = %d\n",numworkers);
for (i=0; i<NRA; i++)
for (j=0; j<NCA; j++)
a[i][j]= i+j;
for (i=0; i<NCA; i++)
for (j=0; j<NCB; j++)
b[i][j]= i*j;

gettimeofday(&start, 0);

/* send matrix data to the worker tasks */
averow = NRA/numworkers;
extra = NRA%numworkers;
offset = 0;
mtype = FROM_MASTER;
for (dest=1; dest<=numworkers; dest++) {
rows = (dest <= extra) ? averow+1 : averow;
//printf(" Sending %d rows to task %d\n",rows,dest);
MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
count = rows*NCA;
MPI_Send(&a[offset][0], count, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
count = NCA*NCB;
MPI_Send(&b, count, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);

offset = offset + rows;
}

/* wait for results from all worker tasks */
mtype = FROM_WORKER;
for (i=1; i<=numworkers; i++) {
source = i;
MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
count = rows*NCB;
MPI_Recv(&c[offset][0], count, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD,
&status);

}

#ifdef PRINT
printf("Here is the result matrix\n");
for (i=0; i<NRA; i++) {
printf("\n");
for (j=0; j<NCB; j++)
printf("%6.2f ", c[i][j]);
}
printf ("\n");
#endif

gettimeofday(&stop, 0);


fprintf(stdout,"Time = %.6f\n\n",
(stop.tv_sec+stop.tv_usec*1e-6)-(start.tv_sec+start.tv_usec*1e-6));

} /* end of master section */

/*---------------------------- worker (slave)----------------------------*/
if (taskid > MASTER) {
mtype = FROM_MASTER;
source = MASTER;
#ifdef PRINT
printf ("Master =%d, mtype=%d\n", source, mtype);
#endif
MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
#ifdef PRINT
printf ("offset =%d\n", offset);
#endif
MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
#ifdef PRINT
printf ("row =%d\n", rows);
#endif
count = rows*NCA;
MPI_Recv(&a, count, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status);
#ifdef PRINT
printf ("a[0][0] =%e\n", a[0][0]);
#endif
count = NCA*NCB;
MPI_Recv(&b, count, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status);
#ifdef PRINT
printf ("b=\n");
#endif
for (k=0; k<NCB; k++)
for (i=0; i<rows; i++) {
c[i][k] = 0.0;
for (j=0; j<NCA; j++)
c[i][k] = c[i][k] + a[i][j] * b[j][k];
}

//mtype = FROM_WORKER;
#ifdef PRINT
printf ("after computer\n");
#endif
//MPI_Send(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&offset, 1, MPI_INT, MASTER, FROM_WORKER, MPI_COMM_WORLD);
//MPI_Send(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, MASTER, FROM_WORKER, MPI_COMM_WORLD);
//MPI_Send(&c, rows*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&c, rows*NCB, MPI_DOUBLE, MASTER, FROM_WORKER, MPI_COMM_WORLD);
#ifdef PRINT
printf ("after send\n");
#endif
} /* end of worker */

MPI_Finalize();
} /* end of main */

Matrix Multiplication using pthread

Following code shows how pthreads can be used for matrix multiplication. Pthread is a POSIX standard for threads. The standard defines an API for creating and manipulating threads.You can download the code here.

/******************************************************************************
* Matrix Multiplication Program
* Heshan Suriyaarachchi
* ABOUT:
* Matrix multiplication is done via pthreads
*
******************************************************************************/

#include <stdio.h>
#include <sys/types.h>
#include <pthread.h>

#define MAX_THREAD 20 /* number of threads */
#define NDIM 200 /* number of NDIMs */
double a[NDIM][NDIM];
double b[NDIM][NDIM];
double c[NDIM][NDIM];

typedef struct
{
int id; /* identification */
int noproc; /* process */
int dim; /* number of threads */
double (*a)[NDIM][NDIM],(*b)[NDIM][NDIM],(*c)[NDIM][NDIM];
} parm;

void mm(int me_no, int noproc, int n, double a[NDIM][NDIM], double b[NDIM][NDIM], double c[NDIM][NDIM])
{
int i,j,k;
double sum;
i=me_no;
while (i<n) {

for (j = 0; j < n; j++) {
sum = 0.0;
for (k = 0; k < n; k++) {
sum = sum + a[i][k] * b[k][j];
}
c[i][j] = sum;

}
i+=noproc;
}
}

void * worker(void *arg)
{
parm *p = (parm *) arg;
mm(p->id, p->noproc, p->dim, *(p->a), *(p->b), *(p->c));
}


void main(int argc, char *argv[])
{
int j, k, noproc, me_no;
double sum;
double t1, t2;

pthread_t *threads;

parm *arg;
int n, i;

for (i = 0; i < NDIM; i++)
for (j = 0; j < NDIM; j++)
{
a[i][j] = i + j;
b[i][j] = i + j;
}

if (argc != 2)
{
printf("Usage: %s n\n where n is no. of thread\n", argv[0]);
exit(1);
}
n = atoi(argv[1]);

if ((n < 1) || (n > MAX_THREAD))
{
printf("The no of thread should between 1 and %d.\n", MAX_THREAD);
exit(1);
}
threads = (pthread_t *) malloc(n * sizeof(pthread_t));

arg=(parm *)malloc(sizeof(parm)*n);
/* setup barrier */

/* Start up thread */

/* Spawn thread */
for (i = 0; i < n; i++)
{
arg[i].id = i;
arg[i].noproc = n;
arg[i].dim = NDIM;
arg[i].a = &a;
arg[i].b = &b;
arg[i].c = &c;
pthread_create(&threads[i], NULL, worker, (void *)(arg+i));
}

for (i = 0; i < n; i++)
{
pthread_join(threads[i], NULL);

}
/* print_matrix(NDIM); */
check_matrix(NDIM);
free(arg);
}

print_matrix(dim)
int dim;
{
int i,j;

printf("The %d * %d matrix is\n", dim,dim);
for(i=0;i<dim;i++){
for(j=0;j<dim;j++)
printf("%lf ", c[i][j]);
printf("\n");
}
}

check_matrix(dim)
int dim;
{
int i,j,k;
int error=0;

printf("Now checking the results\n");
for(i=0;i<dim;i++)
for(j=0;j<dim;j++) {
double e=0.0;

for (k=0;k<dim;k++)
e+=a[i][k]*b[k][j];

if (e!=c[i][j]) {
printf("(%d,%d) error\n",i,j);
error++;
}
}
if (error)
printf("%d elements error\n",error);
else
printf("success\n");
}


Monday, May 11, 2009

Monday, May 4, 2009

Setting up NS2 in Ubuntu

1. Download ns-allinone-2.33.tar from here.

2. Place it in somewhere and extract it.
$ cd /home/installations
$ tar -xvf ns-allinone-2.33.tar
3. Download & install some packages from repository.
$ sudo apt-get install build-essential autoconf automake libxmu-dev
4. Install the ns2.
$ cd ns-allinone-2.33
$ ./install
5. Edit paths.
$ gedit ~/.bashrc
Add follwoing lines on that file.
# LD_LIBRARY_PATH
OTCL_LIB=/home/installations/ns-allinone-2.33/otcl-1.13
NS2_LIB=/home/installations/ns-allinone-2.33/lib
X11_LIB=/usr/X11R6/lib
USR_LOCAL_LIB=/usr/local/lib
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:
$OTCL_LIB:$NS2_LIB:$X11_LIB:$USR_LOCAL_LIB

# TCL_LIBRARY
TCL_LIB=/home/installations/ns-allinone-2.33/tcl8.4.18/library
USR_LIB=/usr/lib
export TCL_LIBRARY=$TCL_LIB:$USR_LIB

# PATH
XGRAPH=/home/installations/ns-allinone-2.33/bin:/home/installations/ns-allinone-2.33/tcl8.4.18/unix:/home/installations/ns-allinone-2.33/tk8.4.18/unix
NS=/home/installations/ns-allinone-2.33/ns-2.33/
NAM=/home/installations/ns-allinone-2.33/nam-1.13/
export PATH=$PATH:$XGRAPH:$NS:$NAM
6. Validate it.
$ cd ns-2.33
$ ./validate
7. Create a symlink.
$ sudo ln -s /home/installations/ns-allinone-2.33/ns-2.33/ns /usr/bin/ns
8. Run it.
$ ns