MyraMath
pgetrf_tile


Source: tests/pdense/pgetrf_tile.cpp

1 // ========================================================================= //
2 // This file is part of MyraMath, copyright (c) 2014-2019 by Ryan A Chilton //
3 // and distributed by MyraCore, LLC. See LICENSE.txt for license terms. //
4 // ========================================================================= //
5 
11 // Containers.
13 #include <myramath/dense/Matrix.h>
16 
17 // Serial algorithms.
18 #include <myramath/dense/getrf.h>
19 #include <myramath/dense/trsm.h>
20 #include <myramath/dense/gemm.h>
22 #include <myramath/dense/swaps.h>
23 
24 // Parallel algorithms.
25 #include <myramath/pdense/pgemm.h>
26 #include <myramath/pdense/pgetrf.h>
27 #include <myramath/pdense/ptrsm.h>
29 
30 // Reporting.
31 #include <tests/myratest.h>
32 
33 using namespace myra;
34 typedef pdense::Options Options;
35 
36 namespace {
37 
38 template<class Number> void test(int I, int J, typename ReflectPrecision<Number>::type tolerance)
39  {
40  typedef typename ReflectPrecision<Number>::type Precision;
41  myra::out() << typestring<Number>() << std::endl;
42  // Construct random matrix and right hand side.
43  auto A = Matrix<Number>::random(I,I);
44  auto B = Matrix<Number>::random(I,J);
45  Number one(1);
46  // Initialize options.
47  auto options = Options::create().set_nthreads(4).set_blocksize(128);
48  // Measure factor/solve times for serial routines.
49  auto A_serial = A;
50  auto X_serial = B;
51  auto P_serial = getrf_inplace(A_serial);
52  swap_rows(P_serial,X_serial); // solve by P
53  trsm_inplace('L', 'L', 'N', A_serial, X_serial, 'U', one); // solve by L
54  trsm_inplace('L', 'U', 'N', A_serial, X_serial, 'N', one); // solve by U
55  Precision e_serial = frobenius(gemm(A,X_serial)-B) / frobenius(B);
56  // Measure factor/solve times for parallel routines.
57  auto A_parallel = A;
58  auto X_parallel = B;
59  auto P_parallel = pgetrf_tile(A_parallel,options);
60  swap_rows(P_parallel,X_parallel); // solve by P
61  ptrsm_inplace('L', 'L', 'N', A_parallel, X_parallel, 'U', one, options); // solve by L
62  ptrsm_inplace('L', 'U', 'N', A_parallel, X_parallel, 'N', one, options); // solve by U
63  Precision e_parallel = frobenius(gemm(A,X_parallel)-B) / frobenius(B);
64  // Report accuracy and time.
65  myra::out() << " |U\\(L\\B)-X|, serial = " << e_serial << std::endl;
66  myra::out() << " |U\\(L\\B)-X|, parallel = " << e_parallel << std::endl;
67  REQUIRE(e_serial < tolerance);
68  REQUIRE(e_parallel < tolerance);
69  }
70 
71 } // namespace
72 
73 ADD_TEST("pgetrf_tile","[pdense][parallel]")
74  {
75  int I = 512;
76  int J = 256;
77  test<NumberD> (I,J,1.0e-8);
78  test<NumberZ> (I,J,1.0e-8);
79  }
Interface class for representing subranges of dense Matrix&#39;s.
Routines for computing Frobenius norms of various algebraic containers.
static Matrix< Number > random(int I, int J)
Generates a random Matrix of specified size.
Definition: Matrix.cpp:353
Options pack for routines in /pdense.
Definition: Options.h:24
Thread-parallel version of dense/trsm.h, triangular Matrix \ dense Matrix backsolution.
Definition: syntax.dox:1
Routines for backsolving by a triangular Matrix or LowerMatrix.
Various utility functions/classes related to scalar Number types.
Routines related to swap sequences, often used during pivoting.
General purpose dense matrix container, O(i*j) storage.
Options pack for routines in /pdense.
Reflects Precision trait for a Number, scalar Number types should specialize it.
Definition: Number.h:33
Thread-parallel version of dense/gemm.h, Matrix*Matrix multiplication.
Variety of routines all for dense Matrix*Matrix multiplies. Delegates to the BLAS.
General purpose A = P&#39;*L*U decomposition for square Matrix&#39;s.
Thread-parallel versions of dense/getrf.h, LU decomposition of a Matrix.
std::vector< int > pgetrf_tile(const MatrixRange< NumberS > &A, pdense::Options options=pdense::Options::create())
Overwrites A with its P&#39;*L*U factorization, returns pivoting data P_swaps.
Definition: pgetrf_tile.cpp:213
Interface class for representing subranges of contiguous int&#39;s.


Results: [PASS]

double
|U\(L\B)-X|, serial = 4.00395e-13
|U\(L\B)-X|, parallel = 2.49123e-12
std::complex<double>
|U\(L\B)-X|, serial = 3.58178e-13
|U\(L\B)-X|, parallel = 3.91325e-13


Go back to Summary of /test programs.