The Kokkos assembly example.
This example depends on simple-solver, poisson-solver, three-pt-stencil-solver, .
Introduction
This example solves a 1D Poisson equation:
using a finite difference method on an equidistant grid with K
discretization points (K
can be controlled with a command line parameter).
The resulting CSR matrix is assembled using Kokkos kernels. This example show how to use Ginkgo data with Kokkos kernels.
Notes
If this example is built as part of Ginkgo, it is advised to configure Ginkgo with -DGINKGO_WITH_CCACHE=OFF
to prevent incompabilities with Kokkos' compiler wrapper for nvcc
.
The commented program
* of arrays.
*
* @tparam ValueType_c The value type of the matrix elements, might have
* other cv qualifiers than ValueType
* @tparam IndexType_c The index type of the matrix elements, might have
* other cv qualifiers than IndexType
* /
template <typename ValueType_c, typename IndexType_c>
struct type {
using index_array = typename index_mapper::template type<IndexType_c>;
using value_array = typename value_mapper::template type<ValueType_c>;
/ **
* Constructor based on size and raw pointers
*
* @param size The number of stored elements
* @param row_idxs Pointer to the row indices
* @param col_idxs Pointer to the column indices
* @param values Pointer to the values
*
* @
return An
object which has each
gko::array of the
* device_matrix_data mapped to a Kokkos view
* /
static type map(size_type size, IndexType_c* row_idxs,
IndexType_c* col_idxs, ValueType_c* values)
{
return {index_mapper::map(row_idxs, size),
index_mapper::map(col_idxs, size),
value_mapper::map(values, size)};
}
index_array row_idxs;
index_array col_idxs;
value_array values;
};
static type<ValueType, IndexType> map(
device_matrix_data<ValueType, IndexType>& md)
{
assert_compatibility<MemorySpace>(md);
return type<ValueType, IndexType>::map(
md.get_num_stored_elements(), md.get_row_idxs(), md.get_col_idxs(),
md.get_values());
}
static type<const ValueType, const IndexType> map(
const device_matrix_data<ValueType, IndexType>& md)
{
assert_compatibility<MemorySpace>(md);
return type<const ValueType, const IndexType>::map(
md.get_num_stored_elements(), md.get_const_row_idxs(),
md.get_const_col_idxs(), md.get_const_values());
}
};
}
An array is a container which encapsulates fixed-sized arrays, stored on the Executor tied to the arr...
Definition logger.hpp:25
Creates a stencil matrix in CSR format for the given number of discretization points.
template <typename ValueType, typename IndexType>
{
const auto discretization_points = matrix->
get_size ()[0];
const dim< 2 > & get_size() const noexcept
Returns the size of the operator.
Definition lin_op.hpp:211
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor of the object.
Definition polymorphic_object.hpp:235
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition sparsity_csr.hpp:22
Over-allocate storage for the matrix elements. Each row has 3 entries, except for the first and last one. To handle each row uniformly, we allocate space for 3x the number of rows.
discretization_points * 3);
This type is a device-side equivalent to matrix_data.
Definition device_matrix_data.hpp:36
Create Kokkos views on Ginkgo data.
auto k_md = gko::ext::kokkos::map_data(md);
Create the matrix entries. This also creates zero entries for the first and second row to handle all rows uniformly.
Kokkos::parallel_for(
"generate_stencil_matrix" , md.get_num_stored_elements(),
KOKKOS_LAMBDA(int i) {
const ValueType coefs[] = {-1, 2, -1};
auto ofs = static_cast< IndexType> ((i % 3) - 1);
auto row = static_cast< IndexType> (i / 3);
auto col = row + ofs;
To prevent branching, a mask is used to set the entry to zero, if the column is out-of-bounds
auto mask =
static_cast< IndexType> (0 <= col && col < discretization_points);
k_md.row_idxs[i] = mask * row;
k_md.col_idxs[i] = mask * col;
k_md.values[i] = mask * coefs[ofs + 1];
});
Add up duplicate (zero) entries.
Build Csr matrix.
matrix->
read (std::move(md));
}
void read(const mat_data &data) override
Reads a matrix from a matrix_data structure.
Generates the RHS vector given f
and the boundary conditions.
template <typename Closure, typename ValueType>
void generate_rhs(Closure&& f, ValueType u0, ValueType u1,
{
const auto discretization_points = rhs->get_size()[0];
auto k_rhs = gko::ext::kokkos::map_data(rhs);
Kokkos::parallel_for(
"generate_rhs" , discretization_points, KOKKOS_LAMBDA(int i) {
const ValueType h = 1.0 / (discretization_points + 1);
const ValueType xi = ValueType(i + 1) * h;
k_rhs(i, 0) = -f(xi) * h * h;
if (i == 0) {
k_rhs(i, 0) += u0;
}
if (i == discretization_points - 1) {
k_rhs(i, 0) += u1;
}
});
}
Dense is a matrix format which explicitly stores all values of the matrix.
Definition sparsity_csr.hpp:26
Computes the 1-norm of the error given the computed u
and the correct solution function correct_u
.
template <typename Closure, typename ValueType>
double calculate_error(int discretization_points,
Closure&& correct_u)
{
auto k_u = gko::ext::kokkos::map_data(u);
auto error = 0.0;
Kokkos::parallel_reduce(
"calculate_error" , discretization_points,
KOKKOS_LAMBDA(int i, double & lsum) {
const auto h = 1.0 / (discretization_points + 1);
const auto xi = (i + 1) * h;
lsum += Kokkos::abs((k_u(i, 0) - correct_u(xi)) /
Kokkos::abs(correct_u(xi)));
},
error);
return error;
}
int main(int argc, char * argv[])
{
Some shortcuts
using ValueType = double;
using IndexType = int;
A block-Jacobi preconditioner is a block-diagonal linear operator, obtained by inverting the diagonal...
Definition jacobi.hpp:190
CG or the conjugate gradient method is an iterative type Krylov subspace method which is suitable for...
Definition cg.hpp:51
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition math.hpp:326
Print help message. For details on the kokkos-options see https://kokkos.github.io/kokkos-core-wiki/ProgrammingGuide/Initialization.html#initialization-by-command-line-arguments
if (argc == 2 && (std::string(argv[1]) == "--help" )) {
std::cerr << "Usage: " << argv[0]
<< " [discretization_points] [kokkos-options]" << std::endl;
Kokkos::ScopeGuard kokkos(argc, argv);
std::exit(1);
}
Kokkos::ScopeGuard kokkos(argc, argv);
const auto discretization_points =
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:86
chooses the executor that corresponds to the Kokkos DefaultExecutionSpace
auto exec = gko::ext::kokkos::create_default_executor();
problem:
auto correct_u = [] KOKKOS_FUNCTION(ValueType x) { return x * x * x; };
auto f = [] KOKKOS_FUNCTION(ValueType x) { return ValueType{6} * x; };
auto u0 = correct_u(0);
auto u1 = correct_u(1);
initialize vectors
auto rhs = vec::create(exec,
gko::dim<2> (discretization_points, 1));
generate_rhs(f, u0, u1, rhs.get());
auto u = vec::create(exec,
gko::dim<2> (discretization_points, 1));
void fill(const ValueType value)
Fill the dense matrix with a given value.
A type representing the dimensions of a multidimensional object.
Definition dim.hpp:27
initialize the stencil matrix
auto A = share(mtx::create(
exec,
gko::dim<2> {discretization_points, discretization_points}));
generate_stencil_matrix(A.get());
const RealValueType reduction_factor{1e-7};
Generate solver and solve the system
cg::build()
.with_criteria(
gko::stop::Iteration::build().with_max_iters(discretization_points),
reduction_factor))
.with_preconditioner(bj::build())
.on(exec)
->generate(A)
->apply(rhs, u);
std::cout << "\nSolve complete."
<< "\nThe average relative error is "
<< calculate_error(discretization_points, u.get(), correct_u) /
discretization_points
<< std::endl;
}
The ResidualNorm class is a stopping criterion which stops the iteration process when the actual resi...
Definition residual_norm.hpp:110
Results
Example output:
> ./kokkos-assembly
Solve complete.
The average relative error is 1.05488e-11
The actual error depends on the used hardware.
The plain program
#include <iostream>
#include <string>
#include <Kokkos_Core.hpp>
#include <ginkgo/extensions/kokkos.hpp>
#include <ginkgo/ginkgo.hpp>
namespace gko::ext::kokkos::detail {
template <typename ValueType, typename IndexType, typename MemorySpace>
struct mapper<device_matrix_data<ValueType, IndexType>, MemorySpace> {
using index_mapper = mapper<array<IndexType>, MemorySpace>;
using value_mapper = mapper<array<ValueType>, MemorySpace>;
template <typename ValueType_c, typename IndexType_c>
struct type {
using index_array = typename index_mapper::template type<IndexType_c>;
using value_array = typename value_mapper::template type<ValueType_c>;
static type map(size_type size, IndexType_c* row_idxs,
IndexType_c* col_idxs, ValueType_c* values)
{
return {index_mapper::map(row_idxs, size),
index_mapper::map(col_idxs, size),
value_mapper::map(values, size)};
}
index_array row_idxs;
index_array col_idxs;
value_array values;
};
static type<ValueType, IndexType> map(
device_matrix_data<ValueType, IndexType>& md)
{
assert_compatibility<MemorySpace>(md);
return type<ValueType, IndexType>::map(
md.get_num_stored_elements(), md.get_row_idxs(), md.get_col_idxs(),
md.get_values());
}
static type<const ValueType, const IndexType> map(
const device_matrix_data<ValueType, IndexType>& md)
{
assert_compatibility<MemorySpace>(md);
return type<const ValueType, const IndexType>::map(
md.get_num_stored_elements(), md.get_const_row_idxs(),
md.get_const_col_idxs(), md.get_const_values());
}
};
}
template <typename ValueType, typename IndexType>
{
const auto discretization_points = matrix->
get_size ()[0];
discretization_points * 3);
auto k_md = gko::ext::kokkos::map_data(md);
Kokkos::parallel_for(
"generate_stencil_matrix" , md.get_num_stored_elements(),
KOKKOS_LAMBDA(int i) {
const ValueType coefs[] = {-1, 2, -1};
auto ofs = static_cast< IndexType> ((i % 3) - 1);
auto row = static_cast< IndexType> (i / 3);
auto col = row + ofs;
auto mask =
static_cast< IndexType> (0 <= col && col < discretization_points);
k_md.row_idxs[i] = mask * row;
k_md.col_idxs[i] = mask * col;
k_md.values[i] = mask * coefs[ofs + 1];
});
md.sum_duplicates();
matrix->
read (std::move(md));
}
template <typename Closure, typename ValueType>
void generate_rhs(Closure&& f, ValueType u0, ValueType u1,
{
const auto discretization_points =
rhs ->get_size()[0];
auto k_rhs = gko::ext::kokkos::map_data(rhs);
Kokkos::parallel_for(
"generate_rhs" , discretization_points, KOKKOS_LAMBDA(int i) {
const ValueType h = 1.0 / (discretization_points + 1);
const ValueType xi = ValueType(i + 1) * h;
k_rhs(i, 0) = -f(xi) * h * h;
if (i == 0) {
k_rhs(i, 0) += u0;
}
if (i == discretization_points - 1) {
k_rhs(i, 0) += u1;
}
});
}
template <typename Closure, typename ValueType>
double calculate_error(int discretization_points,
Closure&& correct_u)
{
auto k_u = gko::ext::kokkos::map_data(u);
auto error = 0.0;
Kokkos::parallel_reduce(
"calculate_error" , discretization_points,
KOKKOS_LAMBDA(int i, double & lsum) {
const auto h = 1.0 / (discretization_points + 1);
const auto xi = (i + 1) * h;
lsum += Kokkos::abs((k_u(i, 0) - correct_u(xi)) /
Kokkos::abs(correct_u(xi)));
},
error);
return error;
}
int main(int argc, char * argv[])
{
using ValueType = double;
using IndexType = int;
if (argc == 2 && (std::string(argv[1]) == "--help" )) {
std::cerr << "Usage: " << argv[0]
<< " [discretization_points] [kokkos-options]" << std::endl;
Kokkos::ScopeGuard kokkos(argc, argv);
std::exit(1);
}
Kokkos::ScopeGuard kokkos(argc, argv);
const auto discretization_points =
auto exec = gko::ext::kokkos::create_default_executor();
auto correct_u = [] KOKKOS_FUNCTION(ValueType x) { return x * x * x; };
auto f = [] KOKKOS_FUNCTION(ValueType x) { return ValueType{6} * x; };
auto u0 = correct_u(0);
auto u1 = correct_u(1);
generate_rhs(f, u0, u1,
rhs .get());
auto u = vec::create(exec,
gko::dim<2> (discretization_points, 1));
auto A =
share (mtx::create(
exec,
gko::dim<2> {discretization_points, discretization_points}));
generate_stencil_matrix(A.get());
const RealValueType reduction_factor{1e-7};
cg::build()
.with_criteria(
gko::stop::Iteration::build().with_max_iters(discretization_points),
reduction_factor))
.with_preconditioner(bj::build())
.on(exec)
->generate(A)
->apply(rhs, u);
std::cout << "\nSolve complete."
<< "\nThe average relative error is "
<< calculate_error(discretization_points, u.get(), correct_u) /
discretization_points
<< std::endl;
}
@ rhs
the input is right hand side
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition utils_helper.hpp:226