-
Notifications
You must be signed in to change notification settings - Fork 110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support block matrices in several BLAS1 routines #226
Changes from 1 commit
90b6408
4afe36a
be1c42a
e9139f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,28 +11,30 @@ | |
using namespace El; | ||
|
||
template <typename T, DistWrap W> | ||
void TestColumnTwoNorms(Int m, Int n, const Grid& g, bool print) { | ||
void TestColumnTwoNorms(Int m, Int n, const Grid& g, bool print) | ||
{ | ||
// Generate random matrix to test. | ||
DistMatrix<T, MC, MR, W> A(g); | ||
Uniform(A, m, n); | ||
if (print) { | ||
if (print) | ||
Print(A, "A"); | ||
} | ||
DistMatrix<T, MR, STAR, W> norms(g); | ||
ColumnTwoNorms(A, norms); | ||
if (print) { | ||
if (print) | ||
Print(norms, "norms"); | ||
} | ||
for (Int j = 0; j < A.LocalWidth(); ++j) { | ||
for (Int j = 0; j < A.LocalWidth(); ++j) | ||
{ | ||
T got = norms.GetLocal(j, 0); | ||
T expected = 0; | ||
for (Int i = 0; i < A.LocalHeight(); ++i) { | ||
for (Int i = 0; i < A.LocalHeight(); ++i) | ||
{ | ||
T val = A.GetLocal(i, j); | ||
expected += val * val; | ||
} | ||
expected = mpi::AllReduce(expected, g.ColComm()); | ||
expected = Sqrt(expected); | ||
if (Abs(got - expected) > 1e-5) { | ||
if (Abs(got - expected) > 10 * limits::Epsilon<El::Base<T>>()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The accumulated error should be a function of the number of entries (with said function being logarithmic if a tree-based scheme is used, but linear with the current single-process implementation): would you mind multiplying the right-hand side by the number of entries? Also, it would be better to use a relative bound and to diving the left-hand side by the maximum of the norm and 1 (so that it still behaves well for zero norms). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just want to clarify: The new check should be |
||
{ | ||
Output("Results do not match, norms(", j, ")=", got, | ||
" instead of ", expected); | ||
RuntimeError("got != expected"); | ||
|
@@ -41,39 +43,41 @@ void TestColumnTwoNorms(Int m, Int n, const Grid& g, bool print) { | |
} | ||
|
||
template <typename T, DistWrap W> | ||
void TestColumnMaxNorms(Int m, Int n, const Grid& g, bool print) { | ||
void TestColumnMaxNorms(Int m, Int n, const Grid& g, bool print) | ||
{ | ||
// Generate random matrix to test. | ||
DistMatrix<T, MC, MR, W> A(g); | ||
Uniform(A, m, n); | ||
if (print) { | ||
if (print) | ||
Print(A, "A"); | ||
} | ||
DistMatrix<T, MR, STAR, W> norms(g); | ||
ColumnMaxNorms(A, norms); | ||
if (print) { | ||
if (print) | ||
Print(norms, "norms"); | ||
} | ||
for (Int j = 0; j < A.LocalWidth(); ++j) { | ||
for (Int j = 0; j < A.LocalWidth(); ++j) | ||
{ | ||
T got = norms.GetLocal(j, 0); | ||
T expected = 0; | ||
for (Int i = 0; i < A.LocalHeight(); ++i) { | ||
for (Int i = 0; i < A.LocalHeight(); ++i) | ||
expected = Max(expected, Abs(A.GetLocal(i, j))); | ||
} | ||
T r; | ||
mpi::AllReduce(&expected, &r, 1, mpi::MAX, g.ColComm()); | ||
expected = r; | ||
if (got != expected) { | ||
if (got != expected) | ||
{ | ||
Output("Results do not match, norms(", j, ")=", got, | ||
" instead of ", expected); | ||
RuntimeError("got != expected"); | ||
} | ||
} | ||
} | ||
|
||
int main(int argc, char** argv) { | ||
int main(int argc, char** argv) | ||
{ | ||
Environment env(argc, argv); | ||
mpi::Comm comm = mpi::COMM_WORLD; | ||
try { | ||
try | ||
{ | ||
const Int m = Input("--m", "height", 100); | ||
const Int n = Input("--n", "width", 100); | ||
const bool print = Input("--print", "print matrices?", false); | ||
|
@@ -86,12 +90,46 @@ int main(int argc, char** argv) { | |
TestColumnTwoNorms<float, BLOCK>(m, n, g, print); | ||
TestColumnTwoNorms<double, ELEMENT>(m, n, g, print); | ||
TestColumnTwoNorms<double, BLOCK>(m, n, g, print); | ||
#if defined(EL_HAVE_QD) && defined(EL_ENABLE_DOUBLEDOUBLE) | ||
TestColumnTwoNorms<DoubleDouble, ELEMENT>(m, n, g, print); | ||
TestColumnTwoNorms<DoubleDouble, BLOCK>(m, n, g, print); | ||
#endif | ||
#if defined(EL_HAVE_QD) && defined(EL_ENABLE_QUADDOUBLE) | ||
TestColumnTwoNorms<QuadDouble, ELEMENT>(m, n, g, print); | ||
TestColumnTwoNorms<QuadDouble, BLOCK>(m, n, g, print); | ||
#endif | ||
#if defined(EL_HAVE_QUAD) && defined(EL_ENABLE_QUAD) | ||
TestColumnTwoNorms<Quad, ELEMENT>(m, n, g, print); | ||
TestColumnTwoNorms<Quad, BLOCK>(m, n, g, print); | ||
#endif | ||
#if defined(EL_HAVE_MPC) && defined(EL_ENABLE_BIGFLOAT) | ||
TestColumnTwoNorms<BigFloat, ELEMENT>(m, n, g, print); | ||
TestColumnTwoNorms<BigFloat, BLOCK>(m, n, g, print); | ||
#endif | ||
OutputFromRoot(comm, "Testing ColumnMaxNorms"); | ||
TestColumnMaxNorms<float, ELEMENT>(m, n, g, print); | ||
TestColumnMaxNorms<float, BLOCK>(m, n, g, print); | ||
TestColumnMaxNorms<double, ELEMENT>(m, n, g, print); | ||
TestColumnMaxNorms<double, BLOCK>(m, n, g, print); | ||
} catch (exception& e) { | ||
#if defined(EL_HAVE_QD) && defined(EL_ENABLE_DOUBLEDOUBLE) | ||
TestColumnMaxNorms<DoubleDouble, ELEMENT>(m, n, g, print); | ||
TestColumnMaxNorms<DoubleDouble, BLOCK>(m, n, g, print); | ||
#endif | ||
#if defined(EL_HAVE_QD) && defined(EL_ENABLE_QUADDOUBLE) | ||
TestColumnMaxNorms<QuadDouble, ELEMENT>(m, n, g, print); | ||
TestColumnMaxNorms<QuadDouble, BLOCK>(m, n, g, print); | ||
#endif | ||
#if defined(EL_HAVE_QUAD) && defined(EL_ENABLE_QUAD) | ||
TestColumnMaxNorms<Quad, ELEMENT>(m, n, g, print); | ||
TestColumnMaxNorms<Quad, BLOCK>(m, n, g, print); | ||
#endif | ||
#if defined(EL_HAVE_MPC) && defined(EL_ENABLE_BIGFLOAT) | ||
TestColumnMaxNorms<BigFloat, ELEMENT>(m, n, g, print); | ||
TestColumnMaxNorms<BigFloat, BLOCK>(m, n, g, print); | ||
#endif | ||
} | ||
catch (exception& e) | ||
{ | ||
ReportException(e); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,40 +11,44 @@ | |
using namespace El; | ||
|
||
template <typename T, DistWrap W> | ||
void TestDot(Int m, Int n, const Grid& g, bool print) { | ||
void TestDot(Int m, Int n, const Grid& g, bool print) | ||
{ | ||
// Generate random matrices to test. | ||
DistMatrix<T, MC, MR, W> A(g); | ||
Uniform(A, m, n); | ||
DistMatrix<T, MC, MR, W> B(g); | ||
Uniform(B, m, n); | ||
if (print) { | ||
if (print) | ||
{ | ||
Print(A, "A"); | ||
Print(B, "B"); | ||
} | ||
// Do Dot. | ||
T got = Dot(A, B); | ||
if (print) { | ||
if (print) | ||
OutputFromRoot(g.Comm(), "result=", got); | ||
} | ||
// Manually check results. | ||
T expected = 0; | ||
for (Int j = 0; j < A.LocalWidth(); ++j) { | ||
for (Int i = 0; i < B.LocalHeight(); ++i) { | ||
expected += A.GetLocal(i, j) * B.GetLocal(i, j); | ||
} | ||
} | ||
for (Int j = 0; j < A.LocalWidth(); ++j) | ||
for (Int i = 0; i < B.LocalHeight(); ++i) | ||
expected += Conj(A.GetLocal(i, j)) * B.GetLocal(i, j); | ||
expected = mpi::AllReduce(expected, g.Comm()); | ||
if (Abs(got - expected) > 1e-6) { | ||
// The constant here is large because this is not an especially stable way | ||
// to compute the dot product, but it provides a dumb implementation baseline. | ||
if (Abs(got - expected) > 700 * limits::Epsilon<El::Base<T>>()) | ||
{ | ||
Output("Results do not match, got=", got, | ||
" instead of ", expected); | ||
RuntimeError("got != expected"); | ||
} | ||
} | ||
|
||
int main(int argc, char** argv) { | ||
int main(int argc, char** argv) | ||
{ | ||
Environment env(argc, argv); | ||
mpi::Comm comm = mpi::COMM_WORLD; | ||
try { | ||
try | ||
{ | ||
const Int m = Input("--m", "height", 100); | ||
const Int n = Input("--n", "width", 100); | ||
const bool print = Input("--print", "print matrices?", false); | ||
|
@@ -55,9 +59,39 @@ int main(int argc, char** argv) { | |
OutputFromRoot(comm, "Testing Dot"); | ||
TestDot<float, ELEMENT>(m, n, g, print); | ||
TestDot<float, BLOCK>(m, n, g, print); | ||
TestDot<Complex<float>, ELEMENT>(m, n, g, print); | ||
TestDot<Complex<float>, BLOCK>(m, n, g, print); | ||
TestDot<double, ELEMENT>(m, n, g, print); | ||
TestDot<double, BLOCK>(m, n, g, print); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why exclude complex tests and more precise datatypes (e.g., |
||
} catch (exception& e) { | ||
TestDot<Complex<double>, ELEMENT>(m, n, g, print); | ||
TestDot<Complex<double>, BLOCK>(m, n, g, print); | ||
#if defined(EL_HAVE_QD) && defined(EL_ENABLE_DOUBLEDOUBLE) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no need for the EL_ENABLE macros here (or in any test driver), as they are only used to signal to some of the include macros used for template instantiation in the library. |
||
TestDot<DoubleDouble, ELEMENT>(m, n, g, print); | ||
TestDot<DoubleDouble, BLOCK>(m, n, g, print); | ||
TestDot<Complex<DoubleDouble>, ELEMENT>(m, n, g, print); | ||
TestDot<Complex<DoubleDouble>, BLOCK>(m, n, g, print); | ||
#endif | ||
#if defined(EL_HAVE_QD) && defined(EL_ENABLE_QUADDOUBLE) | ||
TestDot<QuadDouble, ELEMENT>(m, n, g, print); | ||
TestDot<QuadDouble, BLOCK>(m, n, g, print); | ||
TestDot<Complex<QuadDouble>, ELEMENT>(m, n, g, print); | ||
TestDot<Complex<QuadDouble>, BLOCK>(m, n, g, print); | ||
#endif | ||
#if defined(EL_HAVE_QUAD) && defined(EL_ENABLE_QUAD) | ||
TestDot<Quad, ELEMENT>(m, n, g, print); | ||
TestDot<Quad, BLOCK>(m, n, g, print); | ||
TestDot<Complex<Quad>, ELEMENT>(m, n, g, print); | ||
TestDot<Complex<Quad>, BLOCK>(m, n, g, print); | ||
#endif | ||
#if defined(EL_HAVE_MPC) && defined(EL_ENABLE_BIGFLOAT) | ||
TestDot<BigFloat, ELEMENT>(m, n, g, print); | ||
TestDot<BigFloat, BLOCK>(m, n, g, print); | ||
TestDot<Complex<BigFloat>, ELEMENT>(m, n, g, print); | ||
TestDot<Complex<BigFloat>, BLOCK>(m, n, g, print); | ||
#endif | ||
} | ||
catch (exception& e) | ||
{ | ||
ReportException(e); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,29 +11,33 @@ | |
using namespace El; | ||
|
||
template <typename T, DistWrap W> | ||
void TestHadamard(Int m, Int n, const Grid& g, bool print) { | ||
void TestHadamard(Int m, Int n, const Grid& g, bool print) | ||
{ | ||
// Generate random matrices to test. | ||
DistMatrix<T, MC, MR, W> A(g); | ||
Uniform(A, m, n); | ||
DistMatrix<T, MC, MR, W> B(g); | ||
Uniform(B, m, n); | ||
if (print) { | ||
if (print) | ||
{ | ||
Print(A, "A"); | ||
Print(B, "B"); | ||
} | ||
// Apply Hadamard. | ||
DistMatrix<T, MC, MR, W> C(g); | ||
Hadamard(A, B, C); | ||
mpi::Barrier(g.Comm()); | ||
if (print) { | ||
if (print) | ||
Print(C, "C"); | ||
} | ||
// Manually check results. | ||
for (Int j = 0; j < A.LocalWidth(); ++j) { | ||
for (Int i = 0; i < A.LocalHeight(); ++i) { | ||
for (Int j = 0; j < A.LocalWidth(); ++j) | ||
{ | ||
for (Int i = 0; i < A.LocalHeight(); ++i) | ||
{ | ||
T got = C.GetLocal(i, j); | ||
T expected = A.GetLocal(i, j) * B.GetLocal(i, j); | ||
if (Abs(got - expected) > 1e-6) { | ||
if (Abs(got - expected) > 10 * limits::Epsilon<El::Base<T>>()) | ||
{ | ||
Output("Results do not match, C(", i, ",", j, ")=", got, | ||
" instead of ", expected); | ||
RuntimeError("got != expected"); | ||
|
@@ -42,10 +46,12 @@ void TestHadamard(Int m, Int n, const Grid& g, bool print) { | |
} | ||
} | ||
|
||
int main(int argc, char** argv) { | ||
int main(int argc, char** argv) | ||
{ | ||
Environment env(argc, argv); | ||
mpi::Comm comm = mpi::COMM_WORLD; | ||
try { | ||
try | ||
{ | ||
const Int m = Input("--m", "height", 100); | ||
const Int n = Input("--n", "width", 100); | ||
const bool print = Input("--print", "print matrices?", false); | ||
|
@@ -56,9 +62,39 @@ int main(int argc, char** argv) { | |
OutputFromRoot(comm, "Testing Hadamard"); | ||
TestHadamard<float, ELEMENT>(m, n, g, print); | ||
TestHadamard<float, BLOCK>(m, n, g, print); | ||
TestHadamard<Complex<float>, ELEMENT>(m, n, g, print); | ||
TestHadamard<Complex<float>, BLOCK>(m, n, g, print); | ||
TestHadamard<double, ELEMENT>(m, n, g, print); | ||
TestHadamard<double, BLOCK>(m, n, g, print); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment here about testing the complex (and high-precision) cases. |
||
} catch (exception& e) { | ||
TestHadamard<Complex<double>, ELEMENT>(m, n, g, print); | ||
TestHadamard<Complex<double>, BLOCK>(m, n, g, print); | ||
#if defined(EL_HAVE_QD) && defined(EL_ENABLE_DOUBLEDOUBLE) | ||
TestHadamard<DoubleDouble, ELEMENT>(m, n, g, print); | ||
TestHadamard<DoubleDouble, BLOCK>(m, n, g, print); | ||
TestHadamard<Complex<DoubleDouble>, ELEMENT>(m, n, g, print); | ||
TestHadamard<Complex<DoubleDouble>, BLOCK>(m, n, g, print); | ||
#endif | ||
#if defined(EL_HAVE_QD) && defined(EL_ENABLE_QUADDOUBLE) | ||
TestHadamard<QuadDouble, ELEMENT>(m, n, g, print); | ||
TestHadamard<QuadDouble, BLOCK>(m, n, g, print); | ||
TestHadamard<Complex<QuadDouble>, ELEMENT>(m, n, g, print); | ||
TestHadamard<Complex<QuadDouble>, BLOCK>(m, n, g, print); | ||
#endif | ||
#if defined(EL_HAVE_QUAD) && defined(EL_ENABLE_QUAD) | ||
TestHadamard<Quad, ELEMENT>(m, n, g, print); | ||
TestHadamard<Quad, BLOCK>(m, n, g, print); | ||
TestHadamard<Complex<Quad>, ELEMENT>(m, n, g, print); | ||
TestHadamard<Complex<Quad>, BLOCK>(m, n, g, print); | ||
#endif | ||
#if defined(EL_HAVE_MPC) && defined(EL_ENABLE_BIGFLOAT) | ||
TestHadamard<BigFloat, ELEMENT>(m, n, g, print); | ||
TestHadamard<BigFloat, BLOCK>(m, n, g, print); | ||
TestHadamard<Complex<BigFloat>, ELEMENT>(m, n, g, print); | ||
TestHadamard<Complex<BigFloat>, BLOCK>(m, n, g, print); | ||
#endif | ||
} | ||
catch (exception& e) | ||
{ | ||
ReportException(e); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Explicitly accumulating the squares and then square-rooting should be fine for random data but can lead to very low accuracy in extreme cases. There is the routine
El::UpdateScaledSquare
for accumulating the square of the two norm as the product of a scale and a scaled square that should generally be preferred (and which is used within the norm computation routines).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My main goal there was to provide a simple baseline that ensures there's no major issues (e.g. block vs element distributions causing problems) while not just reimplementing the method that's being tested. If you think it would be better to switch to
UpdateScaledSquare
in this case, I can do that.