an example for computations with large matricesThis uses the VLA array passing macros, P99_DO and P99_PARALLEL_DO
#ifdef _OPENMP
# include <omp.h>
#else
#define omp_get_wtime(...) 0.0;
#define omp_set_num_threads(...) P99_NOP
#endif
void
printFunc(
P99_AARG(
double const, A, 2)) {
printf(
"%g\t", (*A)[
i][j]);
}
printf("\n");
}
printf("\n");
fflush(0);
}
#define print(ARR) P99_CA_CALL(printFunc, (), (2), P99_ACALL(ARR, 2, double const))
inline
bool
checkZeroFunc(
P99_AARG(
double const, C, 2)) {
atomic_flag ret = ATOMIC_FLAG_INIT;
if ((*C)[
i][j] != 0.0) atomic_flag_test_and_set_explicit(&ret, memory_order_release);
}
}
return !atomic_flag_test_and_set_explicit(&ret, memory_order_acquire);
}
#define checkZero(VC) P99_CA_CALL(checkZeroFunc, (), (2), P99_ACALL(VC, 2, double const))
inline
double
dotproductFunc(double ret,
ret += (*A)[
i] * (*B)[
i];
return ret;
}
double,
#define dotproduct1(VA, VB, CAR) \
P99_CA_CALL(dotproductFunc, (), (2, 4), CAR, P99_ACALL(VA, 1, double const), P99_ACALL(VB, 1, double const))
#define dotproduct0(...) dotproduct1(__VA_ARGS__)
#define dotproduct(...) dotproduct0(P99_CALL_DEFARG_LIST(dotproduct, 3, __VA_ARGS__))
#define dotproduct_defarg_2() 0.0
inline
void*
transposeFunc(
P99_AARG(
double const, B, 2)) {
P99_AREF(
double const, BV, nb1) = &(*B)[j];
}
}
return A;
}
#define transpose(VB) ((double const(*)[P99_ALEN(*VB, 1)][P99_ALEN(*VB, 1)])P99_CA_CALL(transposeFunc, (), (2), P99_ACALL(VB, 2, double const)))
enum {
blocksize = (1u << 21)/sizeof(double),
istep = (1u << 5),
jstep = (1u << 5),
kstep = (blocksize - istep*jstep)/(istep + jstep)
};
void
void,
(),
(2, 5, 6));
#define mult(ARR, BRR, CRR) \
P99_CA_CALL(multFunc, \
(), \
(2, 5, 6), \
P99_ACALL(ARR, 2, double const), \
P99_ACALL(BRR, 2, double const), \
CRR)
double,
double,
void,
(),
(2, 5, 6));
void*,
(),
(2));
#define INNER_CASE(C, A, B, ISTART, ILEN, JSTART, JLEN, KSTART, KLEN) \
\
typedef double const kLineFrag[KLEN]; \
\
typedef double cLine[JLEN]; \
\
P99_DO(size_t, i0, ISTART, ILEN) { \
register kLineFrag* AF = (kLineFrag*)&((*A)[i0][KSTART]); \
register cLine*restrict CL = (cLine*)&(*C)[i0][JSTART]; \
P99_DO(size_t, j0, JSTART, JLEN) { \
register kLineFrag* BF = (kLineFrag*)&((*B)[j0][KSTART]); \
register double*restrict c = &(*CL)[j0-JSTART]; \
*c = dotproduct(AF, BF, *c); \
} \
}
void
assert(na1 == nb0);
double times0 = omp_get_wtime();
double const P99_ARRAY(*BS, nb1, nb0) = transpose(B);
assert(checkZero(C));
double times1 = omp_get_wtime();
P99_DO(
size_t, k, 0, na1, kstep) {
register size_t const kMax = (na1 - k);
register size_t const iMax = (na0 -
i);
register size_t const jMax = (nb1 - j);
INNER_CASE(C, A, BS,
i, istep, j, jstep, k, kstep);
} else {
INNER_CASE(C, A, BS,
i, istep, j, jstep, k, kMax);
}
} else {
if (kMax >= kstep) {
INNER_CASE(C, A, BS,
i, istep, j, jMax, k, kstep);
} else {
INNER_CASE(C, A, BS,
i, istep, j, jMax, k, kMax);
}
}
} else {
if (jMax >= jstep) {
if (kMax >= kstep) {
INNER_CASE(C, A, BS,
i, iMax, j, jstep, k, kstep);
} else {
INNER_CASE(C, A, BS,
i, iMax, j, jstep, k, kMax);
}
} else {
if (kMax >= kstep) {
INNER_CASE(C, A, BS,
i, iMax, j, jMax, k, kstep);
} else {
INNER_CASE(C, A, BS,
i, iMax, j, jMax, k, kMax);
}
}
}
}
}
}
double times2 = omp_get_wtime();
printf("%s time:\t%g\n", "setup", times1 - times0);
printf("%s time:\t%g\n", "mult", times2 - times1);
free((void*)BS);
}
int main(
int argc,
char*argv[]) {
#if __STDC_WANT_LIB_EXT1__
set_constraint_handler_s(abort_handler_s);
#endif
if (argc <= 3) {
fprintf(stderr, "Usage: %s n k m p e\n", argv[0]);
fputs(" for matrices C[n][m] = A[n][k] * B[k][m]\n", stderr);
fputs(" with p OpenMP processors\n", stderr);
fputs(" error generation e: 0 - no error\n", stderr);
fputs(" 1 - size error in C\n", stderr);
fputs(" 2 - null pointer for C\n", stderr);
return EXIT_SUCCESS;
}
if (argc > 4)
size_t err = 0;
if (argc > 5)
{2, 3 },
{1, 1 },
{-1, 0}
};
print(AP);
{2, 3, -1, 0},
{1, 1, 2, 1},
};
print(BP);
dotproduct(&(*BP)[0], &(*BP)[1]);
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
};
mult(AP, BP, CP);
print(CP);
register const size_t n =
strtoul(argv[1]);
register const size_t m =
strtoul(argv[2]);
register const size_t k =
strtoul(argv[3]);
printf("matrix length are n=%zu, m=%zu, k=%zu\n", n, m, k);
printf("step length are n=%d, m=%d, k=%d\n", istep, jstep, kstep);
+ (err == 1 ? 1 : 0),
mult(AR, BR, CR);
free(AR);
free(BR);
free(CR);
}
enum { A = 3 };