Ginkgo Generated from branch based on master. Ginkgo version 1.8.0
A numerical linear algebra library targeting many-core architectures
Loading...
Searching...
No Matches
The mixed-multigrid-preconditioned-solver program

The preconditioned solver example.

This example depends on multigrid-preconditioned-solver, mixed-multigrid-solver.

Table of contents
  1. Introduction
  2. The commented program
  1. Results
  2. The plain program

This example shows how to use the mixed-precision multigrid preconditioner.

In this example, we first read in a matrix from a file. The preconditioned CG solver is enhanced with a mixed-precision multigrid preconditioner. The example features the generating time and runtime of the CG solver.

The commented program

Parallel graph match (Pgm) is the aggregate method introduced in the paper M.
Definition pgm.hpp:53

Print version information

std::cout << gko::version_info::get() << std::endl;
const auto executor_string = argc >= 2 ? argv[1] : "reference";
static const version_info & get()
Returns an instance of version_info.
Definition version.hpp:140

Figure out where to run the code

std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
0, gko::ReferenceExecutor::create());
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
Creates a new CudaExecutor.
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
Creates a new DpcppExecutor.
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
Creates a new HipExecutor.
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Creates a new OmpExecutor.
Definition executor.hpp:1345

executor where Ginkgo will perform the computation

const auto exec = exec_map.at(executor_string)(); // throws if not valid
const int mixed_int = argc >= 3 ? std::atoi(argv[2]) : 1;
const bool use_mixed = mixed_int != 0; // nonzero uses mixed
std::cout << "Using mixed precision? " << use_mixed << std::endl;

Read data

auto A = share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));
std::unique_ptr< MatrixType > read(StreamType &&is, MatrixArgs &&... args)
Reads a matrix stored in matrix market format from an input stream.
Definition mtx_io.hpp:160

Create RHS as 1 and initial guess as 0

gko::size_type size = A->get_size()[0];
auto host_x = vec::create(exec->get_master(), gko::dim<2>(size, 1));
auto host_b = vec::create(exec->get_master(), gko::dim<2>(size, 1));
for (auto i = 0; i < size; i++) {
host_x->at(i, 0) = 0.;
host_b->at(i, 0) = 1.;
}
auto x = vec::create(exec);
auto b = vec::create(exec);
x->copy_from(host_x);
b->copy_from(host_b);
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:86
A type representing the dimensions of a multidimensional object.
Definition dim.hpp:27

Calculate initial residual by overwriting b

auto one = gko::initialize<vec>({1.0}, exec);
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto initres = gko::initialize<vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(initres);
std::unique_ptr< Matrix > initialize(size_type stride, std::initializer_list< typename Matrix::value_type > vals, std::shared_ptr< const Executor > exec, TArgs &&... create_args)
Creates and initializes a column-vector.
Definition dense.hpp:1540

copy b again

b->copy_from(host_b);

Prepare the stopping criteria

const gko::remove_complex<ValueType> tolerance = 1e-8;
auto iter_stop =
gko::share(gko::stop::Iteration::build().with_max_iters(100u).on(exec));
.with_baseline(gko::stop::mode::absolute)
.with_reduction_factor(tolerance)
.on(exec));
std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
iter_stop->add_logger(logger);
tol_stop->add_logger(logger);
static std::unique_ptr< Convergence > create(std::shared_ptr< const Executor >, const mask_type &enabled_events=Logger::criterion_events_mask|Logger::iteration_complete_mask)
Creates a convergence logger.
Definition convergence.hpp:74
The ResidualNorm class is a stopping criterion which stops the iteration process when the actual resi...
Definition residual_norm.hpp:110
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition math.hpp:326
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition utils_helper.hpp:226

Create smoother factory (ir with bj)

auto inner_solver_gen =
gko::share(bj::build().with_max_block_size(1u).on(exec));
auto inner_solver_gen_f =
gko::share(bj_f::build().with_max_block_size(1u).on(exec));
auto smoother_gen = gko::share(
ir::build()
.with_solver(inner_solver_gen)
.with_relaxation_factor(static_cast<ValueType>(0.9))
.with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
.on(exec));
auto smoother_gen_f = gko::share(
ir_f::build()
.with_solver(inner_solver_gen_f)
.with_relaxation_factor(static_cast<MixedType>(0.9))
.with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
.on(exec));

Create MultigridLevel factory

auto mg_level_gen =
gko::share(pgm::build().with_deterministic(true).on(exec));
auto mg_level_gen_f =
gko::share(pgm_f::build().with_deterministic(true).on(exec));

Create CoarsestSolver factory

auto coarsest_gen = gko::share(
ir::build()
.with_solver(inner_solver_gen)
.with_relaxation_factor(static_cast<ValueType>(0.9))
.with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
.on(exec));
auto coarsest_gen_f = gko::share(
ir_f::build()
.with_solver(inner_solver_gen_f)
.with_relaxation_factor(static_cast<MixedType>(0.9))
.with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
.on(exec));

Create multigrid factory

std::shared_ptr<gko::LinOpFactory> multigrid_gen;
if (use_mixed) {
multigrid_gen =
mg::build()
.with_max_levels(10u)
.with_min_coarse_rows(2u)
.with_pre_smoother(smoother_gen, smoother_gen_f)
.with_post_uses_pre(true)
.with_mg_level(mg_level_gen, mg_level_gen_f)
.with_level_selector([](const gko::size_type level,
Definition lin_op.hpp:118

The first (index 0) level will use the first mg_level_gen, smoother_gen which are the factories with ValueType. The rest of levels (>= 1) will use the second (index 1) mg_level_gen2 and smoother_gen2 which use the MixedType. The rest of levels will use different type than the normal multigrid.

return level >= 1 ? 1 : 0;
})
.with_coarsest_solver(coarsest_gen_f)
.with_default_initial_guess(
.with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
.on(exec);
} else {
multigrid_gen =
mg::build()
.with_max_levels(10u)
.with_min_coarse_rows(2u)
.with_pre_smoother(smoother_gen)
.with_post_uses_pre(true)
.with_mg_level(mg_level_gen)
.with_coarsest_solver(coarsest_gen)
.with_default_initial_guess(
.with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
.on(exec);
}

Create solver factory

auto solver_gen = cg::build()
.with_criteria(iter_stop, tol_stop)
.with_preconditioner(multigrid_gen)
.on(exec);

Create solver

std::chrono::nanoseconds gen_time(0);
auto gen_tic = std::chrono::steady_clock::now();
auto solver = solver_gen->generate(A);
exec->synchronize();
auto gen_toc = std::chrono::steady_clock::now();
gen_time +=
std::chrono::duration_cast<std::chrono::nanoseconds>(gen_toc - gen_tic);

Solve system

exec->synchronize();
std::chrono::nanoseconds time(0);
auto tic = std::chrono::steady_clock::now();
solver->apply(b, x);
exec->synchronize();
auto toc = std::chrono::steady_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);

Calculate residual

auto res = gko::initialize<vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(res);
std::cout << "Initial residual norm sqrt(r^T r): \n";
write(std::cout, initres);
std::cout << "Final residual norm sqrt(r^T r): \n";
write(std::cout, res);

Print solver statistics

std::cout << "CG iteration count: " << logger->get_num_iterations()
<< std::endl;
std::cout << "CG generation time [ms]: "
<< static_cast<double>(gen_time.count()) / 1000000.0 << std::endl;
std::cout << "CG execution time [ms]: "
<< static_cast<double>(time.count()) / 1000000.0 << std::endl;
std::cout << "CG execution time per iteration[ms]: "
<< static_cast<double>(time.count()) / 1000000.0 /
logger->get_num_iterations()
<< std::endl;
}

Results

This is the expected output:

Initial residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
4.3589
Final residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
1.69858e-09
CG iteration count: 39
CG generation time [ms]: 2.04293
CG execution time [ms]: 22.3874
CG execution time per iteration[ms]: 0.574036

Comments about programming and debugging

The plain program

#include <ginkgo/ginkgo.hpp>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <string>
int main(int argc, char* argv[])
{
using ValueType = double;
using MixedType = float;
using IndexType = int;
std::cout << gko::version_info::get() << std::endl;
const auto executor_string = argc >= 2 ? argv[1] : "reference";
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
0, gko::ReferenceExecutor::create());
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
const auto exec = exec_map.at(executor_string)(); // throws if not valid
const int mixed_int = argc >= 3 ? std::atoi(argv[2]) : 1;
const bool use_mixed = mixed_int != 0; // nonzero uses mixed
std::cout << "Using mixed precision? " << use_mixed << std::endl;
auto A = share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));
gko::size_type size = A->get_size()[0];
auto host_x = vec::create(exec->get_master(), gko::dim<2>(size, 1));
auto host_b = vec::create(exec->get_master(), gko::dim<2>(size, 1));
for (auto i = 0; i < size; i++) {
host_x->at(i, 0) = 0.;
host_b->at(i, 0) = 1.;
}
auto x = vec::create(exec);
auto b = vec::create(exec);
x->copy_from(host_x);
b->copy_from(host_b);
auto one = gko::initialize<vec>({1.0}, exec);
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto initres = gko::initialize<vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(initres);
b->copy_from(host_b);
const gko::remove_complex<ValueType> tolerance = 1e-8;
auto iter_stop =
gko::share(gko::stop::Iteration::build().with_max_iters(100u).on(exec));
.with_baseline(gko::stop::mode::absolute)
.with_reduction_factor(tolerance)
.on(exec));
std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
iter_stop->add_logger(logger);
tol_stop->add_logger(logger);
auto inner_solver_gen =
gko::share(bj::build().with_max_block_size(1u).on(exec));
auto inner_solver_gen_f =
gko::share(bj_f::build().with_max_block_size(1u).on(exec));
auto smoother_gen = gko::share(
ir::build()
.with_solver(inner_solver_gen)
.with_relaxation_factor(static_cast<ValueType>(0.9))
.with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
.on(exec));
auto smoother_gen_f = gko::share(
ir_f::build()
.with_solver(inner_solver_gen_f)
.with_relaxation_factor(static_cast<MixedType>(0.9))
.with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
.on(exec));
auto mg_level_gen =
gko::share(pgm::build().with_deterministic(true).on(exec));
auto mg_level_gen_f =
gko::share(pgm_f::build().with_deterministic(true).on(exec));
auto coarsest_gen = gko::share(
ir::build()
.with_solver(inner_solver_gen)
.with_relaxation_factor(static_cast<ValueType>(0.9))
.with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
.on(exec));
auto coarsest_gen_f = gko::share(
ir_f::build()
.with_solver(inner_solver_gen_f)
.with_relaxation_factor(static_cast<MixedType>(0.9))
.with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
.on(exec));
std::shared_ptr<gko::LinOpFactory> multigrid_gen;
if (use_mixed) {
multigrid_gen =
mg::build()
.with_max_levels(10u)
.with_min_coarse_rows(2u)
.with_pre_smoother(smoother_gen, smoother_gen_f)
.with_post_uses_pre(true)
.with_mg_level(mg_level_gen, mg_level_gen_f)
.with_level_selector([](const gko::size_type level,
return level >= 1 ? 1 : 0;
})
.with_coarsest_solver(coarsest_gen_f)
.with_default_initial_guess(
.with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
.on(exec);
} else {
multigrid_gen =
mg::build()
.with_max_levels(10u)
.with_min_coarse_rows(2u)
.with_pre_smoother(smoother_gen)
.with_post_uses_pre(true)
.with_mg_level(mg_level_gen)
.with_coarsest_solver(coarsest_gen)
.with_default_initial_guess(
.with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
.on(exec);
}
auto solver_gen = cg::build()
.with_criteria(iter_stop, tol_stop)
.with_preconditioner(multigrid_gen)
.on(exec);
std::chrono::nanoseconds gen_time(0);
auto gen_tic = std::chrono::steady_clock::now();
auto solver = solver_gen->generate(A);
exec->synchronize();
auto gen_toc = std::chrono::steady_clock::now();
gen_time +=
std::chrono::duration_cast<std::chrono::nanoseconds>(gen_toc - gen_tic);
exec->synchronize();
std::chrono::nanoseconds time(0);
auto tic = std::chrono::steady_clock::now();
solver->apply(b, x);
exec->synchronize();
auto toc = std::chrono::steady_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
auto res = gko::initialize<vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(res);
std::cout << "Initial residual norm sqrt(r^T r): \n";
write(std::cout, initres);
std::cout << "Final residual norm sqrt(r^T r): \n";
write(std::cout, res);
std::cout << "CG iteration count: " << logger->get_num_iterations()
<< std::endl;
std::cout << "CG generation time [ms]: "
<< static_cast<double>(gen_time.count()) / 1000000.0 << std::endl;
std::cout << "CG execution time [ms]: "
<< static_cast<double>(time.count()) / 1000000.0 << std::endl;
std::cout << "CG execution time per iteration[ms]: "
<< static_cast<double>(time.count()) / 1000000.0 /
logger->get_num_iterations()
<< std::endl;
}
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition sparsity_csr.hpp:22
Dense is a matrix format which explicitly stores all values of the matrix.
Definition sparsity_csr.hpp:26
A block-Jacobi preconditioner is a block-diagonal linear operator, obtained by inverting the diagonal...
Definition jacobi.hpp:190
CG or the conjugate gradient method is an iterative type Krylov subspace method which is suitable for...
Definition cg.hpp:51
FCG or the flexible conjugate gradient method is an iterative type Krylov subspace method which is su...
Definition fcg.hpp:56
Iterative refinement (IR) is an iterative method that uses another coarse method to approximate the e...
Definition ir.hpp:86
Multigrid methods have a hierarchy of many levels, whose corase level is a subset of the fine level,...
Definition multigrid.hpp:111
constexpr T one()
Returns the multiplicative identity for T.
Definition math.hpp:775
void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType > > >::default_layout)
Writes a matrix into an output stream in matrix market format.
Definition mtx_io.hpp:296