MyraMath
ptrmm2


Source: tests/pdense/ptrmm2.cpp

1 // ========================================================================= //
2 // This file is part of MyraMath, copyright (c) 2014-2019 by Ryan A Chilton //
3 // and distributed by MyraCore, LLC. See LICENSE.txt for license terms. //
4 // ========================================================================= //
5 
11 // Containers.
13 #include <myramath/dense/Matrix.h>
17 
18 // Serial algorithms.
21 #include <myramath/dense/tril.h>
22 #include <myramath/dense/triu.h>
23 #include <myramath/dense/trmm.h>
24 
25 // Parallel algorithms.
26 #include <myramath/pdense/ptrmm.h>
27 #include <myramath/pdense/pgemm.h>
29 
30 // Reporting.
31 #include <tests/myratest.h>
32 
33 using namespace myra;
34 typedef pdense::Options Options;
35 
36 namespace {
37 
38 template<class Number> void test(int I, int J, typename ReflectPrecision<Number>::type tolerance)
39  {
40  typedef typename ReflectPrecision<Number>::type Precision;
41  myra::out() << typestring<Number>() << std::endl;
42  // Make random L
43  auto L = LowerMatrix<Number>::random(I);
44  Number one(1);
45  auto alpha = random<Number>();
46  // Initialize options.
47  auto options = Options::create().set_nthreads(4).set_blocksize(64);
48  // Try all variations of side/op.
49  std::vector<char> sides;
50  sides.push_back('L');
51  sides.push_back('R');
52  std::vector<char> ops;
53  ops.push_back('N');
54  ops.push_back('T');
55  ops.push_back('H');
56  ops.push_back('C');
57  for (int s = 0; s < sides.size(); ++s)
58  for (int o = 0; o < ops.size(); ++o)
59  {
60  char side = sides[s];
61  char op = ops[o];
62  // Generate random X/B
63  auto X = (side == 'L') ? Matrix<Number>::random(I,J) : Matrix<Number>::random(J,I);
64  auto B = (side == 'L') ? alpha*pgemm(L.make_Matrix(),op,X) : alpha*pgemm(X,L.make_Matrix(),op);
65  // Multiply using both trmm() and ptrmm()
66  auto X_serial = X;
67  auto X_parallel = X;
68  trmm_inplace(side,op,L,X_serial,'N',alpha);
69  ptrmm_inplace(side,op,L,X_parallel,'N',alpha,options);
70  // Compare errors.
71  Precision e_serial = frobenius(X_serial-B) / frobenius(B);
72  Precision e_parallel = frobenius(X_parallel-B) / frobenius(B);
73  if (side == 'L')
74  {
75  myra::out() << " |L^" << op << "*B-X| serial = " << e_serial << std::endl;
76  myra::out() << " |L^" << op << "*B-X| parallel = " << e_parallel << std::endl;
77  }
78  else if (side == 'R')
79  {
80  myra::out() << " |B*L^" << op << "-X| serial = " << e_serial << std::endl;
81  myra::out() << " |B*L^" << op << "-X| parallel = " << e_parallel << std::endl;
82  }
83  REQUIRE(e_serial < tolerance);
84  REQUIRE(e_parallel < tolerance);
85  }
86  }
87 
88 } // namespace
89 
90 ADD_TEST("ptrmm2","[pdense][parallel]")
91  {
92  int I = 256;
93  int J = 128;
94  test<NumberS>(I,J,1.0e-4f);
95  test<NumberD>(I,J,1.0e-8);
96  test<NumberC>(I,J,1.0e-4f);
97  test<NumberZ>(I,J,1.0e-8);
98  }
99 
Interface class for representing subranges of dense Matrix&#39;s.
Tabulates an IxJ matrix. Allows random access, has column major layout to be compatible with BLAS/LAP...
Definition: bdsqr.h:20
Routines for computing Frobenius norms of various algebraic containers.
static Matrix< Number > random(int I, int J)
Generates a random Matrix of specified size.
Definition: Matrix.cpp:353
Options pack for routines in /pdense.
Definition: Options.h:24
Range construct for a lower triangular matrix stored in rectangular packed format.
Definition: syntax.dox:1
Routines for multiplying by a triangular Matrix or LowerMatrix.
Specialized container for a lower triangular matrix, O(N^2/2) storage. Used by symmetry exploiting ma...
Returns the lower triangle of a dense Matrix.
Returns the upper triangle of a dense Matrix.
Various utility functions/classes related to scalar Number types.
General purpose dense matrix container, O(i*j) storage.
Options pack for routines in /pdense.
Reflects Precision trait for a Number, scalar Number types should specialize it.
Definition: Number.h:33
Thread-parallel version of dense/trmm.h, triangular Matrix * dense Matrix multiplication.
Simplistic random number functions.
Thread-parallel version of dense/gemm.h, Matrix*Matrix multiplication.
static LowerMatrix< Number > random(int N)
Generates a random LowerMatrix of specified size.
Definition: LowerMatrix.cpp:249


Results: [PASS]

float
|L^N*B-X| serial = 2.53095e-07
|L^N*B-X| parallel = 2.18184e-07
|L^T*B-X| serial = 2.56949e-07
|L^T*B-X| parallel = 2.02036e-07
|L^H*B-X| serial = 2.5948e-07
|L^H*B-X| parallel = 2.02457e-07
|L^C*B-X| serial = 2.57776e-07
|L^C*B-X| parallel = 2.19654e-07
|B*L^N-X| serial = 2.6558e-07
|B*L^N-X| parallel = 1.98404e-07
|B*L^T-X| serial = 2.65203e-07
|B*L^T-X| parallel = 2.19372e-07
|B*L^H-X| serial = 2.70392e-07
|B*L^H-X| parallel = 2.21607e-07
|B*L^C-X| serial = 2.66407e-07
|B*L^C-X| parallel = 1.97841e-07
double
|L^N*B-X| serial = 3.87107e-16
|L^N*B-X| parallel = 2.92475e-16
|L^T*B-X| serial = 3.81256e-16
|L^T*B-X| parallel = 2.81406e-16
|L^H*B-X| serial = 3.85273e-16
|L^H*B-X| parallel = 2.82781e-16
|L^C*B-X| serial = 3.84984e-16
|L^C*B-X| parallel = 2.9615e-16
|B*L^N-X| serial = 4.04396e-16
|B*L^N-X| parallel = 2.66533e-16
|B*L^T-X| serial = 4.07734e-16
|B*L^T-X| parallel = 2.91352e-16
|B*L^H-X| serial = 4.02137e-16
|B*L^H-X| parallel = 2.87477e-16
|B*L^C-X| serial = 4.00793e-16
|B*L^C-X| parallel = 2.63748e-16
std::complex<float>
|L^N*B-X| serial = 2.6978e-07
|L^N*B-X| parallel = 2.2849e-07
|L^T*B-X| serial = 2.66903e-07
|L^T*B-X| parallel = 2.15101e-07
|L^H*B-X| serial = 2.70349e-07
|L^H*B-X| parallel = 2.15684e-07
|L^C*B-X| serial = 2.70909e-07
|L^C*B-X| parallel = 2.29597e-07
|B*L^N-X| serial = 2.74094e-07
|B*L^N-X| parallel = 2.18348e-07
|B*L^T-X| serial = 2.76707e-07
|B*L^T-X| parallel = 2.30205e-07
|B*L^H-X| serial = 2.74062e-07
|B*L^H-X| parallel = 2.28124e-07
|B*L^C-X| serial = 2.72584e-07
|B*L^C-X| parallel = 2.17341e-07
std::complex<double>
|L^N*B-X| serial = 4.1294e-16
|L^N*B-X| parallel = 3.16149e-16
|L^T*B-X| serial = 4.09071e-16
|L^T*B-X| parallel = 3.10439e-16
|L^H*B-X| serial = 4.11319e-16
|L^H*B-X| parallel = 3.10745e-16
|L^C*B-X| serial = 4.12567e-16
|L^C*B-X| parallel = 3.15023e-16
|B*L^N-X| serial = 4.23415e-16
|B*L^N-X| parallel = 3.17058e-16
|B*L^T-X| serial = 4.20723e-16
|B*L^T-X| parallel = 3.19332e-16
|B*L^H-X| serial = 4.2498e-16
|B*L^H-X| parallel = 3.21324e-16
|B*L^C-X| serial = 4.17804e-16
|B*L^C-X| parallel = 3.16839e-16


Go back to Summary of /test programs.