#include
#include
#include
#include "2DArray.h"
#define GRAIN 1024 /* product size below which matmultleaf is used */
void seqMatMult(int m, int n, int p, double A, double B, double C)
{
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
{
C[i][j] = 0.0;
for (int k = 0; k < p; k++)
C[i][j] += A[i][k]*B[k][j];
}
}
void matmultleaf(int mf, int ml, int nf, int nl, int pf, int pl, double A, double B, double C)
/*
subroutine that uses the simple triple loop to multiply
a submatrix from A with a submatrix from B and store the
result in a submatrix of C.
*/
// mf, ml; /* first and last+1 i index */
// nf, nl; /* first and last+1 j index */
// pf, pl; /* first and last+1 k index */
{
for (int i = mf; i < ml; i++)
for (int j = nf; j < nl; j++)
for (int k = pf; k < pl; k++)
C[i][j] += A[i][k]*B[k][j];
}
void copyQtrMatrix(double X, int m, double Y, int mf, int nf)
{
for (int i = 0; i < m; i++)
X[i] = &Y[mf+i][nf];
}
void AddMatBlocks(double T, int m, int n, double X, double Y)
{
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
T[i][j] = X[i][j] + Y[i][j];
}
void SubMatBlocks(double T, int m, int n, double X, double Y)
{
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
T[i][j] = X[i][j] - Y[i][j];
}
void strassenMMult(int mf, int ml, int nf, int nl, int pf, int pl, double A, double B, double C)
{
if ((ml-mf)*(nl-nf)*(pl-pf) < GRAIN)
matmultleaf(mf, ml, nf, nl, pf, pl, A, B, C);
else {
int m2 = (ml-mf)/2;
int n2 = (nl-nf)/2;
int p2 = (pl-pf)/2;
double M1 = Allocate2DArray< double >(m2, n2);
double M2 = Allocate2DArray< double >(m2, n2);
double M3 = Allocate2DArray< double >(m2, n2);
double M4 = Allocate2DArray< double >(m2, n2);
double M5 = Allocate2DArray< double >(m2, n2);
double M6 = Allocate2DArray< double >(m2, n2);
double M7 = Allocate2DArray< double >(m2, n2);
double A11 = new double*[m2];
double A12 = new double*[m2];
double A21 = new double*[m2];
double A22 = new double*[m2];
double B11 = new double*[p2];
double B12 = new double*[p2];
double B21 = new double*[p2];
double B22 = new double*[p2];
double C11 = new double*[m2];
double C12 = new double*[m2];
double C21 = new double*[m2];
double C22 = new double*[m2];
double tAM1 = Allocate2DArray< double >(m2, p2);
double tBM1 = Allocate2DArray< double >(p2, n2);
double tAM2 = Allocate2DArray< double >(m2, p2);
double tBM3 = Allocate2DArray< double >(p2, n2);
double tBM4 = Allocate2DArray< double >(p2, n2);
double tAM5 = Allocate2DArray< double >(m2, p2);
double tAM6 = Allocate2DArray< double >(m2, p2);
double tBM6 = Allocate2DArray< double >(p2, n2);
double tAM7 = Allocate2DArray< double >(m2, p2);
double tBM7 = Allocate2DArray< double >(p2, n2);
copyQtrMatrix(A11, m2, A, mf, pf);
copyQtrMatrix(A12, m2, A, mf, p2);
copyQtrMatrix(A21, m2, A, m2, pf);
copyQtrMatrix(A22, m2, A, m2, p2);
copyQtrMatrix(B11, p2, B, pf, nf);
copyQtrMatrix(B12, p2, B, pf, n2);
copyQtrMatrix(B21, p2, B, p2, nf);
copyQtrMatrix(B22, p2, B, p2, n2);
copyQtrMatrix(C11, m2, C, mf, nf);
copyQtrMatrix(C12, m2, C, mf, n2);
copyQtrMatrix(C21, m2, C, m2, nf);
copyQtrMatrix(C22, m2, C, m2, n2);
// M1 = (A11 + A22)*(B11 + B22)
AddMatBlocks(tAM1, m2, p2, A11, A22);
AddMatBlocks(tBM1, p2, n2, B11, B22);
strassenMMult(0, m2, 0, n2, 0, p2, tAM1, tBM1, M1);
//M2 = (A21 + A22)*B11
AddMatBlocks(tAM2, m2, p2, A21, A22);
strassenMMult(0, m2, 0, n2, 0, p2, tAM2, B11, M2);
//M3 = A11*(B12 - B22)
SubMatBlocks(tBM3, p2, n2, B12, B22);
strassenMMult(0, m2, 0, n2, 0, p2, A11, tBM3, M3);
//M4 = A22*(B21 - B11)
SubMatBlocks(tBM4, p2, n2, B21, B11);
strassenMMult(0, m2, 0, n2, 0, p2, A22, tBM4, M4);
//M5 = (A11 + A12)*B22
AddMatBlocks(tAM5, m2, p2, A11, A12);
strassenMMult(0, m2, 0, n2, 0, p2, tAM5, B22, M5);
//M6 = (A21 - A11)*(B11 + B12)
SubMatBlocks(tAM6, m2, p2, A21, A11);
AddMatBlocks(tBM6, p2, n2, B11, B12);
strassenMMult(0, m2, 0, n2, 0, p2, tAM6, tBM6, M6);
//M7 = (A12 - A22)*(B21 + B22)
SubMatBlocks(tAM7, m2, p2, A12, A22);
AddMatBlocks(tBM7, p2, n2, B21, B22);
strassenMMult(0, m2, 0, n2, 0, p2, tAM7, tBM7, M7);
for (int i = 0; i < m2; i++)
for (int j = 0; j < n2; j++) {
C11[i][j] = M1[i][j] + M4[i][j] - M5[i][j] + M7[i][j];
C12[i][j] = M3[i][j] + M5[i][j];
C21[i][j] = M2[i][j] + M4[i][j];
C22[i][j] = M1[i][j] - M2[i][j] + M3[i][j] + M6[i][j];
}
Free2DArray< double >(M1);
Free2DArray< double >(M2);
Free2DArray< double >(M3);
Free2DArray< double >(M4);
Free2DArray< double >(M5);
Free2DArray< double >(M6);
Free2DArray< double >(M7);
delete[] A11; delete[] A12; delete[] A21; delete[] A22;
delete[] B11; delete[] B12; delete[] B21; delete[] B22;
delete[] C11; delete[] C12; delete[] C21; delete[] C22;
Free2DArray< double >(tAM1);
Free2DArray< double >(tBM1);
Free2DArray< double >(tAM2);
Free2DArray< double >(tBM3);
Free2DArray< double >(tBM4);
Free2DArray< double >(tAM5);
Free2DArray< double >(tAM6);
Free2DArray< double >(tBM6);
Free2DArray< double >(tAM7);
Free2DArray< double >(tBM7);
}
}
void matmultS(int m, int n, int p, double A, double B, double C)
{
int i,j;
for (i=0; i < m; i++)
for (j=0; j < n; j++)
C[i][j] = 0;
strassenMMult(0, m, 0, n, 0, p, A, B, C);
}
int CheckResults(int m, int n, double C, double C1)
{
#define THRESHOLD 0.001
//
// May need to take into consideration the floating point roundoff error
// due to parallel execution
//
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
if (abs(C[i][j] - C1[i][j]) > THRESHOLD ) {
printf("%f %f\n", C[i][j], C1[i][j]);
return 1;
}
}
}
return 0;
}
int main(int argc, char* argv[])
{
clock_t before, after;
int M = atoi(argv[1]);
int N = atoi(argv[2]);
int P = atoi(argv[3]);
double A = Allocate2DArray< double >(M, P);
double B = Allocate2DArray< double >(P, N);
double C = Allocate2DArray< double >(M, N);
double **C4 = Allocate2DArray< double >(M, N);
int i, j;
for (i = 0; i < M; i++) {
for (j = 0; j < P; j++) {
A[i][j] = 5.0 - ((double)(rand()%100) / 10.0);
}
}
for (i = 0; i < P; i++) {
for (j = 0; j < N; j++) {
B[i][j] = 5.0 - ((double)(rand()%100) / 10.0);
}
}
printf("Execute Standard matmult\n\n");
before = clock();
seqMatMult(M, N, P, A, B, C);
after = clock();
printf("Standard matrix function done in %7.2f secs\n\n\n",(float)(after - before)/ CLOCKS_PER_SEC);
before = clock();
matmultS(M, N, P, A, B, C4);
after = clock();
printf("Strassen matrix function done in %7.2f secs\n\n\n",(float)(after - before)/ CLOCKS_PER_SEC);
if (CheckResults(M, N, C, C4))
printf("Error in matmultS\n\n");
else
printf("OKAY\n\n");
Free2DArray(A);
Free2DArray(B);
Free2DArray(C);
Free2DArray(C4);
return 0;
}
Copyright © 2026 eLLeNow.com All Rights Reserved.