heat2d.f90
program heat2D
use mpi
implicit none
 
 integer :: niter,i,j,t,rank, ndim, nbproc, ierror,MPI_COMM_CART
 real(8), allocatable, dimension(:,:) :: U0,U1
 real(8) :: alpha,totaltime,Dt,Stab,DtAlpha
 integer, dimension(1:2) :: Nx, Nxl, nbproc_axe, cart_position
 real(8), dimension(1:2) :: Lx,Dx,DxDx,InvDxDx
 character(len=4) :: charbuff1,charbuff2
 integer, dimension(-1:1,-1:1) :: cart_neigh
 logical, dimension(1:2) :: cart_boundaries
 
 cart_boundaries(1) = .false.
 cart_boundaries(2) = .true.
 niter = 8000
 ndim = 2
 Nx(:) = 800
 
 nbproc_axe(1) = 2
 nbproc_axe(2) = 2
 
 ! Assuming we can divide number of points by number of processes per axes
 ! Nxl is the number of grid points for eaxh process
 Nxl(:) = Nx(:)/nbproc_axe(:)
 
 alpha = 0.4d0
 totaltime = 0.005d0
 Dt = totaltime/((niter-1)*1.0d0)
 Lx(:) = 1.0d0
 Dx(:) = Lx(:)/((Nx(:)-1)*1.0d0)
 InvDxDx(:) = 1.0d0/(Dx(:)*Dx(:))
 DxDx(:) = Dx(:)*Dx(:)
 Stab = alpha*Dt*(InvDxDx(1)) + alpha*Dt*(InvDxDx(2))
 DtAlpha = Dt*alpha
 print *, "Stability factor (need < 0.5): ",stab
 if(stab > 0.5d0) stop
 print *,"Calculating, please wait..."
 
 allocate(U0(0:Nxl(1)+1,0:Nxl(2)+1),U1(0:Nxl(1)+1,0:Nxl(2)+1))
 
 
 call MPI_INIT( ierror )
 call MPI_COMM_SIZE( MPI_COMM_WORLD , nbproc , ierror )
 
 call MPI_CART_CREATE( MPI_COMM_WORLD , ndim , nbproc_axe(1:ndim) , &
 & cart_boundaries(1:ndim) , .true. , MPI_COMM_CART , ierror )
 
 call MPI_COMM_RANK( MPI_COMM_CART , rank , ierror )
 
 call MPI_CART_COORDS( MPI_COMM_CART , rank , ndim , cart_position(1:ndim) , ierror )
 
 call MPI_CART_SHIFT (MPI_COMM_CART, 0, 1, cart_neigh(-1,0), cart_neigh(+1,0), ierror)
 call MPI_CART_SHIFT (MPI_COMM_CART, 1, 1, cart_neigh(0,-1), cart_neigh(0,+1), ierror)
 
 U0(:,:) = 1.0d0
 if(rank==0) U0(100:150,200:250) = -10.0d0
 if(rank==2) U0(300:350,150:200) = +12.0d0
 U1(:,:) = U0(:,:)
 
 write(charbuff1,'(I4.4)') cart_position(1)
 write(charbuff2,'(I4.4)') cart_position(2)
 open(10,file=trim(adjustl('IN'//'.'//trim(adjustl(charbuff1))//'.'//trim(adjustl(charbuff2))//'.dat')))
 do j=1,Nxl(2),10 ! do not write all data
    do i=1,Nxl(1),10
       write(10,*) i+(cart_position(1)*Nxl(1)),j+(cart_position(2)*Nxl(2)),U0(i,j)
    end do
 end do
 close(10)
 
 do t=1,niter
 
    ! Recv left, send right
    call MPI_SENDRECV ( U0(Nxl(1),1:Nxl(2)) , Nxl(2) , MPI_DOUBLE_PRECISION , cart_neigh(1,0) , 7777 , &
    & U0(0,1:Nxl(2)) , Nxl(2) , MPI_DOUBLE_PRECISION , cart_neigh(-1,0) , 7777 , MPI_COMM_CART , MPI_STATUS_IGNORE , ierror )
    ! Recv right, send left
    call MPI_SENDRECV ( U0(1,1:Nxl(2)) , Nxl(2) , MPI_DOUBLE_PRECISION , cart_neigh(-1,0) , 7778 , &
    & U0(Nxl(1)+1,1:Nxl(2)) , Nxl(2) , MPI_DOUBLE_PRECISION , cart_neigh(1,0) , 7778 , MPI_COMM_CART , MPI_STATUS_IGNORE , ierror )
 
    ! Recv down, send up
    call MPI_SENDRECV ( U0(1:Nxl(1),Nxl(2)) , Nxl(1) , MPI_DOUBLE_PRECISION , cart_neigh(0,1) , 7779 , &
    & U0(1:Nxl(1),0) , Nxl(1) , MPI_DOUBLE_PRECISION , cart_neigh(0,-1) , 7779 , MPI_COMM_CART , MPI_STATUS_IGNORE , ierror )
    ! Recv up, send down
    call MPI_SENDRECV ( U0(1:Nxl(1),1) , Nxl(1) , MPI_DOUBLE_PRECISION , cart_neigh(0,-1) , 7780 , &
    & U0(1:Nxl(1),Nxl(2)+1) , Nxl(1) , MPI_DOUBLE_PRECISION , cart_neigh(0,1) , 7780 , MPI_COMM_CART , MPI_STATUS_IGNORE , ierror )
 
    do j=1,Nxl(2)
       do i=1,Nxl(1)
 
          U1(i,j) = U0(i,j) + DtAlpha* &
          & ( (U0(i+1,j)-2.0*U0(i,j)+U0(i-1,j))*InvDxDx(1) + &
          &   (U0(i,j+1)-2.0*U0(i,j)+U0(i,j-1))*InvDxDx(2) )
 
       end do
    end do
 
    ! Recv left, send right
    call MPI_SENDRECV ( U1(Nxl(1),1:Nxl(2)) , Nxl(2) , MPI_DOUBLE_PRECISION , cart_neigh(1,0) , 7781 , &
    & U1(0,1:Nxl(2)) , Nxl(2) , MPI_DOUBLE_PRECISION , cart_neigh(-1,0) , 7781 , MPI_COMM_CART , MPI_STATUS_IGNORE , ierror )
    ! Recv right, send left
    call MPI_SENDRECV ( U1(1,1:Nxl(2)) , Nxl(2) , MPI_DOUBLE_PRECISION , cart_neigh(-1,0) , 7782 , &
    & U1(Nxl(1)+1,1:Nxl(2)) , Nxl(2) , MPI_DOUBLE_PRECISION , cart_neigh(1,0) , 7782 , MPI_COMM_CART , MPI_STATUS_IGNORE , ierror )
 
    ! Recv down, send up
    call MPI_SENDRECV ( U1(1:Nxl(1),Nxl(2)) , Nxl(1) , MPI_DOUBLE_PRECISION , cart_neigh(0,1) , 7783 , &
    & U1(1:Nxl(1),0) , Nxl(1) , MPI_DOUBLE_PRECISION , cart_neigh(0,-1) , 7783 , MPI_COMM_CART , MPI_STATUS_IGNORE , ierror )
    ! Recv up, send down
    call MPI_SENDRECV ( U1(1:Nxl(1),1) , Nxl(1) , MPI_DOUBLE_PRECISION , cart_neigh(0,-1) , 7784 , &
    & U1(1:Nxl(1),Nxl(2)+1) , Nxl(1) , MPI_DOUBLE_PRECISION , cart_neigh(0,1) , 7784 , MPI_COMM_CART , MPI_STATUS_IGNORE , ierror )
 
    do j=1,Nxl(2)
       do i=1,Nxl(1)
 
          U0(i,j) = U1(i,j) + DtAlpha* &
          & ( (U1(i+1,j)-2.0*U1(i,j)+U1(i-1,j))*InvDxDx(1) + &
          &   (U1(i,j+1)-2.0*U1(i,j)+U1(i,j-1))*InvDxDx(2) )
 
       end do
    end do
 
 end do
 
 write(charbuff1,'(I4.4)') cart_position(1)
 write(charbuff2,'(I4.4)') cart_position(2)
 open(10,file=trim(adjustl('OUT'//'.'//trim(adjustl(charbuff1))//'.'//trim(adjustl(charbuff2))//'.dat')))
 do j=1,Nxl(2),10 ! do not write all data
    do i=1,Nxl(1),10
       write(10,*) i+(cart_position(1)*Nxl(1)),j+(cart_position(2)*Nxl(2)),U0(i,j)
    end do
 end do
 close(10)
 
 call MPI_FINALIZE ( ierror ) ! Close MPI
 
 deallocate(U0,U1)
 
end program heat2D
heat2d.c
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
 
#define LEFT   0
#define RIGHT  1
#define TOP    2
#define BOTTOM 3
 
int main(int argc, char** argv) 
{
	int niter,i,j,t,rank;
	double alpha,totaltime,Dt,Stab,DtAlpha;
	int Nx[2], NxPerProc[2];
	double Lx[2], Dx[2], DxDx[2], InvDxDx[2];
 
	int mpiSize, mpiRank;
	MPI_Comm MPI_COMM_CART;
	int dims[2] = {2, 2};
	int periods[2] = {0, 0};
	int cartPos[2];
	int neighbors[4];
 
	MPI_Init(NULL, NULL);
	MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
 
	if(mpiSize != 4) { printf("This program is design to be run with 4 processes only"); exit(0);}
 
	MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &MPI_COMM_CART);
	MPI_Comm_rank  (MPI_COMM_CART, &mpiRank);	
	MPI_Cart_coords(MPI_COMM_CART, mpiRank, 2, cartPos);
	MPI_Cart_shift (MPI_COMM_CART, 0, 1, &neighbors[LEFT], &neighbors[RIGHT]);
	MPI_Cart_shift (MPI_COMM_CART, 1, 1, &neighbors[TOP ], &neighbors[BOTTOM]);
 
	// parameters
	niter     = 8000;
	Nx[0]     = 800; 
	Nx[1]     = 800;
	alpha     = 0.4;
	totaltime = 0.005;
 
	// initializations computation
	Dt = totaltime/((niter-1)*1.0);
	Lx[0] = 1.0; 
	Lx[1] = 1.0;
 
	Dx[0] = Lx[0]/((Nx[0]-1)*1.0); 
	Dx[1] = Lx[1]/((Nx[1]-1)*1.0);
 
	InvDxDx[0] = 1.0/(Dx[0]*Dx[0]); 
	InvDxDx[1] = 1.0/(Dx[1]*Dx[1]);
 
	DxDx[0] = Dx[0]*Dx[0]; 
	DxDx[1] = Dx[1]*Dx[1];
 
	Stab    = alpha*Dt*(InvDxDx[0]) + alpha*Dt*(InvDxDx[1]);
	DtAlpha = Dt*alpha;
 
	// verify stability condition
	printf("Stability factor (need < 0.5): %lf\n", Stab);
	if(Stab > 0.5) 
		exit(0);
 
	NxPerProc[0] = Nx[0] / dims[0];
	NxPerProc[1] = Nx[1] / dims[1];
 
	MPI_Datatype lineType, columnType;
 
	MPI_Type_contiguous(NxPerProc[0]+2, MPI_DOUBLE, &lineType);
	MPI_Type_commit(&lineType);
 
	MPI_Type_vector(NxPerProc[1]+2, 1, NxPerProc[1]+2, MPI_DOUBLE, &columnType);
	MPI_Type_commit(&columnType);
 
	double *U0 = malloc((NxPerProc[0]+2) * (NxPerProc[1]+2) * sizeof(double));
	double *U1 = malloc((NxPerProc[0]+2) * (NxPerProc[1]+2) * sizeof(double));
 
	// init grid
	if(mpiRank == 0)
	{
		for(j = 0; j < NxPerProc[1]+2; ++j)
			for(i = 0; i < NxPerProc[0]+2; ++i) 
			{ 
				U0[i+j*NxPerProc[0]] = 1.0;
				U1[i+j*NxPerProc[0]] = 1.0;
				if(i >= 100 && i <= 150 && j>=200 && j<=250) { U0[i+j*NxPerProc[0]] = -10.0; U1[i+j*NxPerProc[0]] = -10.0;}
				if(i >= 300 && i <= 350 && j>=300 && j<=350) { U0[i+j*NxPerProc[0]] =  12.0; U1[i+j*NxPerProc[0]] =  12.0;}
			}
	}	
	else
	{
		for(j = 0; j < NxPerProc[1]+2; ++j)
			for(i = 0; i < NxPerProc[0]+2; ++i)
			{
				U0[i+j*NxPerProc[0]] = 1.0;
				U1[i+j*NxPerProc[0]] = 1.0;
			}
	}
 
	char fileName[2048];
	sprintf(fileName, "IN.%d.dat", mpiRank);
 
	FILE *file = NULL;
	file = fopen(fileName, "w");
	for(j = 1; j < NxPerProc[0]+1; j += 10)
		for(i = 1; i < NxPerProc[1]+1; i += 10)
			fprintf(file, "%d\t%d\t%lf\n", i, j, U0[i+j*NxPerProc[0]]);
	fclose(file);
 
	printf("Calculating, please wait...\n");
	for(t = 0; t < niter; ++t) 
	{
		MPI_Sendrecv(&U0[1                                    ], 1, columnType, neighbors[LEFT  ], 1234,
		             &U0[NxPerProc[1] +1                      ], 1, columnType, neighbors[RIGHT ], 1234, MPI_COMM_CART, MPI_STATUSES_IGNORE);
 
		MPI_Sendrecv(&U0[NxPerProc[1] +0                      ], 1, columnType, neighbors[RIGHT ], 1234,
		             &U0[0                                    ], 1, columnType, neighbors[LEFT  ], 1234, MPI_COMM_CART, MPI_STATUSES_IGNORE);
 
		MPI_Sendrecv(&U0[1                                    ], 1, lineType,   neighbors[TOP   ], 1234,
		             &U0[(NxPerProc[0] +1) * (NxPerProc[1] +2)], 1, lineType,   neighbors[BOTTOM], 1234, MPI_COMM_CART, MPI_STATUSES_IGNORE);
 
		MPI_Sendrecv(&U0[(NxPerProc[0] +0) * (NxPerProc[1] +2)], 1, lineType,   neighbors[BOTTOM], 1234,
		             &U0[0                                    ], 1, lineType,   neighbors[TOP   ], 1234, MPI_COMM_CART, MPI_STATUSES_IGNORE);
 
		for(j = 1; j < NxPerProc[1]+1; ++j)
			for(i = 1; i < NxPerProc[0]+1; ++i) 
				U1[i+j*NxPerProc[0]] = U0[i+j*NxPerProc[0]] + DtAlpha * 
                          ((U0[i+1+j*NxPerProc[0]]   -2.0*U0[i+j*NxPerProc[0]] + U0[i-1+j*NxPerProc[0]]  )*InvDxDx[0] + 
				           (U0[i+(j+1)*NxPerProc[0]] -2.0*U0[i+j*NxPerProc[0]] + U0[i+(j-1)*NxPerProc[0]])*InvDxDx[1]);
 
		MPI_Barrier(MPI_COMM_CART);
 
		MPI_Sendrecv(&U1[1                                    ], 1, columnType, neighbors[LEFT  ], 1234,
		             &U1[NxPerProc[1] +1                      ], 1, columnType, neighbors[RIGHT ], 1234, MPI_COMM_CART, MPI_STATUSES_IGNORE);
 
		MPI_Sendrecv(&U1[NxPerProc[1]                         ], 1, columnType, neighbors[RIGHT ], 1234,
		             &U1[0                                    ], 1, columnType, neighbors[LEFT  ], 1234, MPI_COMM_CART, MPI_STATUSES_IGNORE);
 
		MPI_Sendrecv(&U1[1                                    ], 1, lineType,   neighbors[TOP   ], 1234,
		             &U1[(NxPerProc[0] +1) * (NxPerProc[1] +2)], 1, lineType,   neighbors[BOTTOM], 1234, MPI_COMM_CART, MPI_STATUSES_IGNORE);
 
		MPI_Sendrecv(&U1[(NxPerProc[0] +0) * (NxPerProc[1] +2)], 1, lineType,   neighbors[BOTTOM], 1234,
		             &U1[0                                    ], 1, lineType,   neighbors[TOP   ], 1234, MPI_COMM_CART, MPI_STATUSES_IGNORE);
 
		for(j = 1; j < NxPerProc[1]+1; ++j)
			for(i = 1; i < NxPerProc[0]+1; ++i) 
				U0[i+j*NxPerProc[0]] = U1[i+j*NxPerProc[0]] + DtAlpha * 
                           ((U1[i+1+j*NxPerProc[0]]   -2.0*U1[i+j*NxPerProc[0]] + U1[i-1+j*NxPerProc[0]]  )*InvDxDx[0] + 
				            (U1[i+(j+1)*NxPerProc[0]] -2.0*U1[i+j*NxPerProc[0]] + U1[i+(j-1)*NxPerProc[0]])*InvDxDx[1]);
 
		MPI_Barrier(MPI_COMM_CART);
	}
 
	sprintf(fileName, "OUT.%d.dat", mpiRank);
	file = fopen(fileName, "w");
	for(j = 1; j < NxPerProc[1]+1; j += 10)
		for(i = 1; i < NxPerProc[0]+1; i += 10)
			fprintf(file, "%d\t%d\t%lf\n", i, j, U0[i+j*NxPerProc[0]]);
	fclose(file);
 
	free(U0);
	free(U1);
 
	MPI_Finalize();
 
	return 0;
}