The Kokkos assembly example.
This example depends on simple-solver, poisson-solver, three-pt-stencil-solver, .
 
 Introduction 
This example solves a 1D Poisson equation:
using a finite difference method on an equidistant grid with K  discretization points (K  can be controlled with a command line parameter).
The resulting CSR matrix is assembled using Kokkos kernels. This example show how to use Ginkgo data with Kokkos kernels.
Notes  
If this example is built as part of Ginkgo, it is advised to configure Ginkgo with -DGINKGO_WITH_CCACHE=OFF  to prevent incompabilities with Kokkos' compiler wrapper for nvcc . 
The commented program 
     *
     * @tparam ValueType_c  The value type of the matrix elements, might have
     *                      other cv qualifiers than ValueType
     * @tparam IndexType_c  The index type of the matrix elements, might have
     *                      other cv qualifiers than IndexType
     * /
    template  <typename  ValueType_c, typename  IndexType_c>
    struct  type {
        using  index_array = typename  index_mapper::template type<IndexType_c>;
        using  value_array = typename  value_mapper::template type<ValueType_c>;
 
        / **
         * Constructor based on size and raw pointers
         *
         * @param size  The number of stored elements
         * @param row_idxs  Pointer to the row indices
         * @param col_idxs  Pointer to the column indices
         * @param values  Pointer to the values
         *
         * @
return   An 
object  which has each 
gko::array  of the
         *          device_matrix_data mapped to a Kokkos view
         * /
        static  type map(size_type size, IndexType_c* row_idxs,
                        IndexType_c* col_idxs, ValueType_c* values)
        {
            return  {index_mapper::map(row_idxs, size),
                    index_mapper::map(col_idxs, size),
                    value_mapper::map(values, size)};
        }
 
        index_array row_idxs;
        index_array col_idxs;
        value_array values;
    };
 
    static  type<ValueType, IndexType> map(
        device_matrix_data<ValueType, IndexType>& md)
    {
        assert_compatibility<MemorySpace>(md);
        return  type<ValueType, IndexType>::map(
            md.get_num_stored_elements(), md.get_row_idxs(), md.get_col_idxs(),
            md.get_values());
    }
 
    static  type<const ValueType, const IndexType> map(
        const  device_matrix_data<ValueType, IndexType>& md)
    {
        assert_compatibility<MemorySpace>(md);
        return  type<const ValueType, const IndexType>::map(
            md.get_num_stored_elements(), md.get_const_row_idxs(),
            md.get_const_col_idxs(), md.get_const_values());
    }
};
 
 
}  
An array is a container which encapsulates fixed-sized arrays, stored on the Executor tied to the arr...
Definition  array.hpp:166
Creates a stencil matrix in CSR format for the given number of discretization points.
template  <typename  ValueType, typename  IndexType>
{
    const  auto  discretization_points = matrix->
get_size ()[0];
 
const dim< 2 > & get_size() const noexcept
Returns the size of the operator.
Definition  lin_op.hpp:210
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor of the object.
Definition  polymorphic_object.hpp:243
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition  csr.hpp:126
Over-allocate storage for the matrix elements. Each row has 3 entries, except for the first and last one. To handle each row uniformly, we allocate space for 3x the number of rows.
                                                 discretization_points * 3);
This type is a device-side equivalent to matrix_data.
Definition  device_matrix_data.hpp:36
 Create Kokkos views on Ginkgo data.
auto  k_md = gko::ext::kokkos::map_data(md);
Create the matrix entries. This also creates zero entries for the first and second row to handle all rows uniformly.
Kokkos::parallel_for(
    "generate_stencil_matrix" , md.get_num_stored_elements(),
    KOKKOS_LAMBDA(int  i) {
        const  ValueType coefs[] = {-1, 2, -1};
        auto  ofs = static_cast< IndexType> ((i % 3) - 1);
        auto  row = static_cast< IndexType> (i / 3);
        auto  col = row + ofs;
To prevent branching, a mask is used to set the entry to zero, if the column is out-of-bounds
    auto  mask =
        static_cast< IndexType> (0 <= col && col < discretization_points);
 
    k_md.row_idxs[i] = mask * row;
    k_md.col_idxs[i] = mask * col;
    k_md.values[i] = mask * coefs[ofs + 1];
});
Add up duplicate (zero) entries.
Build Csr matrix.
    matrix->read(std::move(md));
}
Generates the RHS vector given f  and the boundary conditions.
template  <typename  Closure, typename  ValueType>
void  generate_rhs(Closure&& f, ValueType u0, ValueType u1,
{
    const  auto  discretization_points = rhs->get_size()[0];
    auto  k_rhs = gko::ext::kokkos::map_data(rhs);
    Kokkos::parallel_for(
        "generate_rhs" , discretization_points, KOKKOS_LAMBDA(int  i) {
            const  ValueType h = 1.0 / (discretization_points + 1);
            const  ValueType xi = ValueType(i + 1) * h;
            k_rhs(i, 0) = -f(xi) * h * h;
            if  (i == 0) {
                k_rhs(i, 0) += u0;
            }
            if  (i == discretization_points - 1) {
                k_rhs(i, 0) += u1;
            }
        });
}
Dense is a matrix format which explicitly stores all values of the matrix.
Definition  dense.hpp:120
Computes the 1-norm of the error given the computed u  and the correct solution function correct_u .
template  <typename  Closure, typename  ValueType>
double  calculate_error(int  discretization_points,
                       Closure&& correct_u)
{
    auto  k_u = gko::ext::kokkos::map_data(u);
    auto  error = 0.0;
    Kokkos::parallel_reduce(
        "calculate_error" , discretization_points,
        KOKKOS_LAMBDA(int  i, double & lsum) {
            const  auto  h = 1.0 / (discretization_points + 1);
            const  auto  xi = (i + 1) * h;
            lsum += Kokkos::abs((k_u(i, 0) - correct_u(xi)) /
                                Kokkos::abs(correct_u(xi)));
        },
        error);
    return  error;
}
 
 
int  main(int  argc, char * argv[])
{
Some shortcuts
using  ValueType = double;
using  IndexType = int;
 
A block-Jacobi preconditioner is a block-diagonal linear operator, obtained by inverting the diagonal...
Definition  jacobi.hpp:190
CG or the conjugate gradient method is an iterative type Krylov subspace method which is suitable for...
Definition  cg.hpp:50
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition  math.hpp:264
Print help message. For details on the kokkos-options see https://kokkos.github.io/kokkos-core-wiki/ProgrammingGuide/Initialization.html#initialization-by-command-line-arguments 
if  (argc == 2 && (std::string(argv[1]) == "--help" )) {
    std::cerr << "Usage: "  << argv[0]
              << " [discretization_points] [kokkos-options]"  << std::endl;
    Kokkos::ScopeGuard kokkos(argc, argv);  
    std::exit(1);
}
 
Kokkos::ScopeGuard kokkos(argc, argv);
 
const  auto  discretization_points =
std::size_t size_type
Integral type used for allocation quantities.
Definition  types.hpp:90
chooses the executor that corresponds to the Kokkos DefaultExecutionSpace
auto  exec = gko::ext::kokkos::create_default_executor();
problem:
auto  correct_u = [] KOKKOS_FUNCTION(ValueType x) { return  x * x * x; };
auto  f = [] KOKKOS_FUNCTION(ValueType x) { return  ValueType{6} * x; };
auto  u0 = correct_u(0);
auto  u1 = correct_u(1);
initialize vectors
auto  rhs = vec::create(exec, 
gko::dim<2> (discretization_points, 1));
generate_rhs(f, u0, u1, rhs.get());
auto  u = vec::create(exec, 
gko::dim<2> (discretization_points, 1));
void fill(const ValueType value)
Fill the dense matrix with a given value.
A type representing the dimensions of a multidimensional object.
Definition  dim.hpp:26
initialize the stencil matrix
auto  A = share(mtx::create(
    exec, 
gko::dim<2> {discretization_points, discretization_points}));
generate_stencil_matrix(A.get());
 
const  RealValueType reduction_factor{1e-7};
Generate solver and solve the system
    cg::build()
        .with_criteria(
            gko::stop::Iteration::build().with_max_iters(discretization_points),
            gko::stop::ResidualNorm<ValueType>::build().with_reduction_factor(
                reduction_factor))
        .with_preconditioner(bj::build())
        .on(exec)
        ->generate(A)
        ->apply(rhs, u);
 
    std::cout << "\nSolve complete." 
              << "\nThe average relative error is " 
              << calculate_error(discretization_points, u.get(), correct_u) /
                     discretization_points
              << std::endl;
}
 
Results 
Example output: 
> ./kokkos-assembly
 
Solve complete.
The average relative error is 1.05488e-11
The actual error depends on the used hardware. 
The plain program 
 
#include <iostream> 
#include <string> 
 
#include <Kokkos_Core.hpp> 
 
#include <ginkgo/ginkgo.hpp> 
 
#include <ginkgo/extensions/kokkos.hpp> 
 
 
namespace  gko::ext::kokkos::detail {
 
template  <typename  ValueType, typename  IndexType, typename  MemorySpace>
struct  mapper<device_matrix_data<ValueType, IndexType>, MemorySpace> {
    using  index_mapper = mapper<array<IndexType>, MemorySpace>;
    using  value_mapper = mapper<array<ValueType>, MemorySpace>;
    template  <typename  ValueType_c, typename  IndexType_c>
    struct  type {
        using  index_array = typename  index_mapper::template type<IndexType_c>;
        using  value_array = typename  value_mapper::template type<ValueType_c>;
        static  type map(size_type size, IndexType_c* row_idxs,
                        IndexType_c* col_idxs, ValueType_c* values)
        {
            return  {index_mapper::map(row_idxs, size),
                    index_mapper::map(col_idxs, size),
                    value_mapper::map(values, size)};
        }
 
        index_array row_idxs;
        index_array col_idxs;
        value_array values;
    };
 
    static  type<ValueType, IndexType> map(
        device_matrix_data<ValueType, IndexType>& md)
    {
        assert_compatibility<MemorySpace>(md);
        return  type<ValueType, IndexType>::map(
            md.get_num_stored_elements(), md.get_row_idxs(), md.get_col_idxs(),
            md.get_values());
    }
 
    static  type<const ValueType, const IndexType> map(
        const  device_matrix_data<ValueType, IndexType>& md)
    {
        assert_compatibility<MemorySpace>(md);
        return  type<const ValueType, const IndexType>::map(
            md.get_num_stored_elements(), md.get_const_row_idxs(),
            md.get_const_col_idxs(), md.get_const_values());
    }
};
 
 
}  
 
 
template  <typename  ValueType, typename  IndexType>
{
    const  auto  discretization_points = matrix->
get_size ()[0];
 
 
                                                     discretization_points * 3);
 
    auto  k_md = gko::ext::kokkos::map_data(md);
 
    Kokkos::parallel_for(
        "generate_stencil_matrix" , md.get_num_stored_elements(),
        KOKKOS_LAMBDA(int  i) {
            const  ValueType coefs[] = {-1, 2, -1};
            auto  ofs = static_cast< IndexType> ((i % 3) - 1);
            auto  row = static_cast< IndexType> (i / 3);
            auto  col = row + ofs;
 
            auto  mask =
                static_cast< IndexType> (0 <= col && col < discretization_points);
 
            k_md.row_idxs[i] = mask * row;
            k_md.col_idxs[i] = mask * col;
            k_md.values[i] = mask * coefs[ofs + 1];
        });
 
    md.sum_duplicates();
 
    matrix->read(std::move(md));
}
 
 
template  <typename  Closure, typename  ValueType>
void  generate_rhs(Closure&& f, ValueType u0, ValueType u1,
{
    const  auto  discretization_points = 
rhs ->get_size()[0];
 
    auto  k_rhs = gko::ext::kokkos::map_data(rhs);
    Kokkos::parallel_for(
        "generate_rhs" , discretization_points, KOKKOS_LAMBDA(int  i) {
            const  ValueType h = 1.0 / (discretization_points + 1);
            const  ValueType xi = ValueType(i + 1) * h;
            k_rhs(i, 0) = -f(xi) * h * h;
            if  (i == 0) {
                k_rhs(i, 0) += u0;
            }
            if  (i == discretization_points - 1) {
                k_rhs(i, 0) += u1;
            }
        });
}
 
 
template  <typename  Closure, typename  ValueType>
double  calculate_error(int  discretization_points,
                       Closure&& correct_u)
{
    auto  k_u = gko::ext::kokkos::map_data(u);
    auto  error = 0.0;
    Kokkos::parallel_reduce(
        "calculate_error" , discretization_points,
        KOKKOS_LAMBDA(int  i, double & lsum) {
            const  auto  h = 1.0 / (discretization_points + 1);
            const  auto  xi = (i + 1) * h;
            lsum += Kokkos::abs((k_u(i, 0) - correct_u(xi)) /
                                Kokkos::abs(correct_u(xi)));
        },
        error);
    return  error;
}
 
 
int  main(int  argc, char * argv[])
{
    using  ValueType = double;
    using  IndexType = int;
 
 
    if  (argc == 2 && (std::string(argv[1]) == "--help" )) {
        std::cerr << "Usage: "  << argv[0]
                  << " [discretization_points] [kokkos-options]"  << std::endl;
        Kokkos::ScopeGuard kokkos(argc, argv);  
        std::exit(1);
    }
 
    Kokkos::ScopeGuard kokkos(argc, argv);
 
    const  auto  discretization_points =
 
    auto  exec = gko::ext::kokkos::create_default_executor();
 
    auto  correct_u = [] KOKKOS_FUNCTION(ValueType x) { return  x * x * x; };
    auto  f = [] KOKKOS_FUNCTION(ValueType x) { return  ValueType{6} * x; };
    auto  u0 = correct_u(0);
    auto  u1 = correct_u(1);
 
    generate_rhs(f, u0, u1, 
rhs .get());
    auto  u = vec::create(exec, 
gko::dim<2> (discretization_points, 1));
 
 
    auto  A = 
share (mtx::create(
 
        exec, 
gko::dim<2> {discretization_points, discretization_points}));
    generate_stencil_matrix(A.get());
 
    const  RealValueType reduction_factor{1e-7};
    cg::build()
        .with_criteria(
            gko::stop::Iteration::build().with_max_iters(discretization_points),
            gko::stop::ResidualNorm<ValueType>::build().with_reduction_factor(
                reduction_factor))
        .with_preconditioner(bj::build())
        .on(exec)
        ->generate(A)
        ->apply(rhs, u);
 
    std::cout << "\nSolve complete." 
              << "\nThe average relative error is " 
              << calculate_error(discretization_points, u.get(), correct_u) /
                     discretization_points
              << std::endl;
}
@ rhs
the input is right hand side
Definition  solver_base.hpp:41
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition  utils_helper.hpp:224