I did a performance comparison between my MPI & pthread codes for Matrix multiplication. Matrices used were 1000*1000 and MPI code was run on UCSC's swelanka cluster (using 4 CPUs). While the MPI code took only 2.6 seconds to multiply two 1000*1000 matrices, the pthread code took more that 45 seconds(on avg).
Showing posts with label pthread. Show all posts
Showing posts with label pthread. Show all posts
Tuesday, June 2, 2009
Wednesday, May 13, 2009
Matrix Multiplication using pthread
Following code shows how pthreads can be used for matrix multiplication. Pthread is a POSIX standard for threads. The standard defines an API for creating and manipulating threads.You can download the code here.
/******************************************************************************
* Matrix Multiplication Program
* Heshan Suriyaarachchi
* ABOUT:
* Matrix multiplication is done via pthreads
*
******************************************************************************/
#include <stdio.h>
#include <sys/types.h>
#include <pthread.h>
#define MAX_THREAD 20 /* number of threads */
#define NDIM 200 /* number of NDIMs */
double a[NDIM][NDIM];
double b[NDIM][NDIM];
double c[NDIM][NDIM];
typedef struct
{
int id; /* identification */
int noproc; /* process */
int dim; /* number of threads */
double (*a)[NDIM][NDIM],(*b)[NDIM][NDIM],(*c)[NDIM][NDIM];
} parm;
void mm(int me_no, int noproc, int n, double a[NDIM][NDIM], double b[NDIM][NDIM], double c[NDIM][NDIM])
{
int i,j,k;
double sum;
i=me_no;
while (i<n) {
for (j = 0; j < n; j++) {
sum = 0.0;
for (k = 0; k < n; k++) {
sum = sum + a[i][k] * b[k][j];
}
c[i][j] = sum;
}
i+=noproc;
}
}
void * worker(void *arg)
{
parm *p = (parm *) arg;
mm(p->id, p->noproc, p->dim, *(p->a), *(p->b), *(p->c));
}
void main(int argc, char *argv[])
{
int j, k, noproc, me_no;
double sum;
double t1, t2;
pthread_t *threads;
parm *arg;
int n, i;
for (i = 0; i < NDIM; i++)
for (j = 0; j < NDIM; j++)
{
a[i][j] = i + j;
b[i][j] = i + j;
}
if (argc != 2)
{
printf("Usage: %s n\n where n is no. of thread\n", argv[0]);
exit(1);
}
n = atoi(argv[1]);
if ((n < 1) || (n > MAX_THREAD))
{
printf("The no of thread should between 1 and %d.\n", MAX_THREAD);
exit(1);
}
threads = (pthread_t *) malloc(n * sizeof(pthread_t));
arg=(parm *)malloc(sizeof(parm)*n);
/* setup barrier */
/* Start up thread */
/* Spawn thread */
for (i = 0; i < n; i++)
{
arg[i].id = i;
arg[i].noproc = n;
arg[i].dim = NDIM;
arg[i].a = &a;
arg[i].b = &b;
arg[i].c = &c;
pthread_create(&threads[i], NULL, worker, (void *)(arg+i));
}
for (i = 0; i < n; i++)
{
pthread_join(threads[i], NULL);
}
/* print_matrix(NDIM); */
check_matrix(NDIM);
free(arg);
}
print_matrix(dim)
int dim;
{
int i,j;
printf("The %d * %d matrix is\n", dim,dim);
for(i=0;i<dim;i++){
for(j=0;j<dim;j++)
printf("%lf ", c[i][j]);
printf("\n");
}
}
check_matrix(dim)
int dim;
{
int i,j,k;
int error=0;
printf("Now checking the results\n");
for(i=0;i<dim;i++)
for(j=0;j<dim;j++) {
double e=0.0;
for (k=0;k<dim;k++)
e+=a[i][k]*b[k][j];
if (e!=c[i][j]) {
printf("(%d,%d) error\n",i,j);
error++;
}
}
if (error)
printf("%d elements error\n",error);
else
printf("success\n");
}
Subscribe to:
Posts (Atom)