1 #ifndef VIENNACL_DEVICE_SPECIFIC_TEMPLATES_TEMPLATE_BASE_ 
    2 #define VIENNACL_DEVICE_SPECIFIC_TEMPLATES_TEMPLATE_BASE_ 
   44 namespace device_specific
 
   89       return result_type(
new T(utils::numeric_type_to_string(numeric_type(statement,root_idx)), binder_.get(NULL), mapped_object::node_info(mapping, statement, root_idx)));
 
   92     template<
class NumericT>
 
   93     result_type operator()(
NumericT const & )
 const 
   95       return result_type(
new mapped_host_scalar(utils::type_to_string<NumericT>::value(), binder_.get(NULL)));
 
   99     template<
class NumericT>
 
  100     result_type operator()(scalar<NumericT> 
const & scal)
 const 
  102       return result_type(
new mapped_scalar(utils::type_to_string<NumericT>::value(), binder_.get(&
viennacl::traits::handle(scal))));
 
  106     template<
class NumericT>
 
  107     result_type operator()(vector_base<NumericT> 
const & vec)
 const 
  109       return result_type(
new mapped_vector(utils::type_to_string<NumericT>::value(), binder_.get(&
viennacl::traits::handle(vec))));
 
  113     template<
class NumericT>
 
  114     result_type operator()(implicit_vector_base<NumericT> 
const & )
 const 
  116       return result_type(
new mapped_implicit_vector(utils::type_to_string<NumericT>::value(), binder_.get(NULL)));
 
  120     template<
class NumericT>
 
  121     result_type operator()(matrix_base<NumericT> 
const & mat)
 const 
  123       return result_type(
new mapped_matrix(utils::type_to_string<NumericT>::value(), binder_.get(&
viennacl::traits::handle(mat)),
 
  128     template<
class NumericT>
 
  129     result_type operator()(implicit_matrix_base<NumericT> 
const & )
 const 
  131       return result_type(
new mapped_implicit_matrix(utils::type_to_string<NumericT>::value(), binder_.get(NULL)));
 
  136       mapping_type::key_type key(root_idx, leaf_t);
 
  137       scheduler::statement_node 
const & root_node = statement.array()[root_idx];
 
  140         mapping_.insert(mapping_type::value_type(key, utils::call_on_element(root_node.lhs, *
this)));
 
  142         mapping_.insert(mapping_type::value_type(key,  utils::call_on_element(root_node.rhs, *
this)));
 
  146           mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_vector_diag>(&statement, root_idx, &mapping_)));
 
  148           mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_diag>(&statement, root_idx, &mapping_)));
 
  150           mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_row>(&statement, root_idx, &mapping_)));
 
  152           mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_column>(&statement, root_idx, &mapping_)));
 
  154           mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_scalar_reduction>(&statement, root_idx, &mapping_)));
 
  156           mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_row_wise_reduction>(&statement, root_idx, &mapping_)));
 
  158           mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_product>(&statement, root_idx, &mapping_)));
 
  160           mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_trans>(&statement, root_idx, &mapping_)));
 
  165     symbolic_binder & binder_;
 
  170   class prototype_generation_traversal : 
public tree_parsing::traversal_functor
 
  173     std::set<std::string> & already_generated_;
 
  176     std::map<std::string, unsigned int> 
const & widths_;
 
  178     prototype_generation_traversal(std::set<std::string> & already_generated, std::string & str, 
mapping_type const & mapping, std::map<std::string, unsigned int> 
const & widths) :
 
  179       already_generated_(already_generated), str_(str),  mapping_(mapping), widths_(widths){ }
 
  181     void operator()(scheduler::statement 
const & statement, 
vcl_size_t root_idx, leaf_t leaf)
 const 
  183       scheduler::statement_node 
const & root_node = statement.array()[root_idx];
 
  187         mapped_object * obj = 
at(mapping_, std::make_pair(root_idx,leaf)).get();
 
  188         if(widths_.find(obj->name())!=widths_.end())
 
  189           obj->append_kernel_arguments(already_generated_, str_, 
at(widths_, obj->name()));
 
  191           obj->append_kernel_arguments(already_generated_, str_, 1);
 
  199   class set_arguments_functor : 
public tree_parsing::traversal_functor
 
  202     typedef void result_type;
 
  204     set_arguments_functor(symbolic_binder & binder, 
unsigned int & current_arg, 
viennacl::ocl::kernel & kernel) : binder_(binder), current_arg_(current_arg), kernel_(kernel){ }
 
  206     template<
class NumericT>
 
  207     result_type operator()(
NumericT const & scal)
 const {
 
  209       kernel_.arg(current_arg_++, cl_scalartype(scal));
 
  213     template<
class NumericT>
 
  214     result_type operator()(scalar<NumericT> 
const & scal)
 const {
 
  216         kernel_.arg(current_arg_++, scal.handle().opencl_handle());
 
  220     template<
class NumericT>
 
  221     result_type operator()(vector_base<NumericT> 
const & vec)
 const {
 
  224         kernel_.arg(current_arg_++, vec.handle().opencl_handle());
 
  231     template<
class NumericT>
 
  232     result_type operator()(implicit_vector_base<NumericT> 
const & vec)
 const 
  235       kernel_.arg(current_arg_++, cl_scalartype(vec.value()));
 
  237         kernel_.arg(current_arg_++, cl_uint(vec.index()));
 
  241     template<
class NumericT>
 
  242     result_type operator()(matrix_base<NumericT> 
const & mat)
 const 
  246         kernel_.arg(current_arg_++, mat.handle().opencl_handle());
 
  266     template<
class NumericT>
 
  267     result_type operator()(implicit_matrix_base<NumericT> 
const & mat)
 const 
  273     void operator()(scheduler::statement 
const & statement, 
vcl_size_t root_idx, leaf_t leaf_t)
 const 
  275       scheduler::statement_node 
const & root_node = statement.array()[root_idx];
 
  277         utils::call_on_element(root_node.lhs, *
this);
 
  279         utils::call_on_element(root_node.rhs, *
this);
 
  283     symbolic_binder & binder_;
 
  284     unsigned int & current_arg_;
 
  291                                  std::map<std::string, unsigned int> 
const & widths)
 
  293     statements_container::data_type::const_iterator sit;
 
  294     std::vector<mapping_type>::const_iterator mit;
 
  295     std::set<std::string> already_generated;
 
  297     std::string arguments = first_arguments;
 
  298     for (mit = mappings.begin(), sit = statements.
data().begin(); sit != statements.
data().end(); ++sit, ++mit)
 
  299       tree_parsing::traverse(*sit, sit->root(), prototype_generation_traversal(already_generated, arguments, *mit, widths), 
true);
 
  300     arguments.erase(arguments.size()-1); 
 
  301     stream << 
"__kernel " << 
"void " << name << 
"(" << arguments << 
")" << std::endl;
 
  306     generate_prototype(stream, name, first_arguments, mappings, statements, std::map<std::string, unsigned int>());
 
  312     for (statements_container::data_type::const_iterator itt = statements.
data().begin(); itt != statements.
data().end(); ++itt)
 
  321       message_(
"ViennaCL: Internal error: The generator cannot apply the given template to the given statement: " + message + 
"\n" 
  322                "If you are using a builtin template, please report on viennacl-support@lists.sourceforge.net! We will provide a fix as soon as possible\n" 
  323                "If you are using your own template, please try using other parameters") {}
 
  324     virtual const char* 
what() 
const throw() { 
return message_.c_str(); }
 
  327     std::string message_;
 
  340       std::string chunk_size = 
"chunk_size";
 
  341       std::string chunk_start = 
"chunk_start";
 
  342       std::string chunk_end = 
"chunk_end";
 
  344       stream << 
"unsigned int " << chunk_size << 
" = (" << bound << 
"+" << domain_size << 
"-1)/" << domain_size << 
";" << std::endl;
 
  345       stream << 
"unsigned int " << chunk_start << 
" =" << domain_id << 
"*" << chunk_size << 
";" << std::endl;
 
  346       stream << 
"unsigned int " << chunk_end << 
" = min(" << chunk_start << 
"+" << chunk_size << 
", " << bound << 
");" << std::endl;
 
  348       upper_bound = chunk_end;
 
  366       node = &array[(node->*ptr).node_index];
 
  376     static char suffixes[] = {
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9',
 
  377                              'a',
'b',
'c',
'd',
'e',
'f'};
 
  390     for (statements_container::data_type::const_iterator it = statements.
data().begin(); it != statements.
data().end(); ++it)
 
  393       std::vector<scheduler::lhs_rhs_element> vectors;
 
  395       for (std::vector<scheduler::lhs_rhs_element>::iterator itt = vectors.begin(); itt != vectors.end(); ++itt)
 
  400       std::vector<scheduler::lhs_rhs_element> matrices;
 
  402       for (std::vector<scheduler::lhs_rhs_element>::iterator itt = matrices.begin(); itt != matrices.end(); ++itt)
 
  406       std::vector<vcl_size_t> striding_operators;
 
  408       if(striding_operators.size() > 0)
 
  416     using namespace scheduler;
 
  417     using namespace utils;
 
  420       vcl_size_t size1 = up_to_internal_size?call_on_matrix(node.
lhs, internal_size1_fun()):call_on_matrix(node.
lhs, size1_fun());
 
  421       vcl_size_t size2 = up_to_internal_size?call_on_matrix(node.
lhs, internal_size2_fun()):call_on_matrix(node.
lhs, size2_fun());
 
  425       return up_to_internal_size?call_on_matrix(node.
lhs, internal_size2_fun()):call_on_matrix(node.
lhs, size2_fun());
 
  427       return up_to_internal_size?call_on_matrix(node.
lhs, internal_size1_fun()):call_on_matrix(node.
lhs, size1_fun());
 
  429       return up_to_internal_size?call_on_vector(node.
lhs, internal_size_fun()):call_on_vector(node.
lhs, size_fun());
 
  440                                    fetching_policy_type fetch, 
unsigned int simd_width, std::string 
const & i, std::string 
const & bound, std::string 
const & domain_id, std::string 
const & domain_size)
 
  443     std::string boundround = bound + 
"/" + strwidth;
 
  445     std::string init, upper_bound, inc;
 
  446     fetching_loop_info(fetch, boundround, stream, init, upper_bound, inc, domain_id, domain_size);
 
  447     stream << 
"for(unsigned int " << i << 
" = " << init << 
"; " << i << 
" < " << upper_bound << 
"; " << i << 
" += " << inc << 
")" << std::endl;
 
  448     stream << 
"{" << std::endl;
 
  450     loop_body(stream, simd_width);
 
  452     stream << 
"}" << std::endl;
 
  456       stream << 
"for(unsigned int " << i << 
" = " << boundround << 
"*" << strwidth << 
" + " << domain_id << 
"; " << i << 
" < " << bound << 
"; " << i << 
" += " + domain_size + 
")" << std::endl;
 
  457       stream << 
"{" << std::endl;
 
  459       loop_body(stream, 1);
 
  461       stream << 
"}" << std::endl;
 
  465   static std::string 
vstore(
unsigned int simd_width, std::string 
const & value, std::string 
const & offset, std::string 
const & ptr)
 
  468       return "(" + ptr + 
")[" + offset + 
"] = " + value;
 
  470       return utils::append_width(
"vstore", simd_width) + 
"(" + value + 
", " + offset + 
", " + ptr + 
")";
 
  473   static std::string 
vload(
unsigned int simd_width, std::string 
const & offset, std::string 
const & ptr)
 
  476       return "(" + ptr + 
")[" + offset + 
"]";
 
  483   virtual std::vector<std::string> generate_impl(std::string 
const & kernel_prefix, 
statements_container const & statements, std::vector<mapping_type> 
const & mapping) 
const = 0;
 
  492     statements_container::data_type::const_iterator sit;
 
  493     std::vector<mapping_type>::iterator mit;
 
  499     std::vector<mapping_type> mappings(statements.
data().size());
 
  501     for (mit = mappings.begin(), sit = statements.
data().begin(); sit != statements.
data().end(); ++sit, ++mit)
 
  504     return generate_impl(kernel_prefix, statements, mappings);
 
  510   virtual void enqueue(std::string 
const & kernel_prefix, std::vector<lazy_program_compiler> & programs, 
statements_container const & statements) = 0;
 
  518 template<
class TemplateType, 
class ParametersType>
 
  524   virtual unsigned int n_lmem_elements()
 const { 
return 0; }
 
  545     using namespace viennacl::tools;
 
  548     unsigned int scalartype_size = 
utils::size_of(
lhs_most(statement.array(), statement.root()).lhs.numeric_type);
 
  552     vcl_size_t lmem_usage = scalartype_size*n_lmem_elements();
 
  553     if (lmem_usage>lmem_available)
 
  554       return TEMPLATE_LOCAL_MEMORY_OVERFLOW;
 
  559     if (p_.local_size_0*p_.local_size_1 > max_workgroup_size)
 
  560       return TEMPLATE_WORK_GROUP_SIZE_OVERFLOW;
 
  561     if (p_.local_size_0 > max_work_item_sizes[0])
 
  562       return TEMPLATE_LOCAL_SIZE_0_OVERFLOW;
 
  564     if (p_.local_size_1 > max_work_item_sizes[1])
 
  565       return TEMPLATE_LOCAL_SIZE_1_OVERFLOW;
 
  568     unsigned int warp_size = 8;
 
  569     if (device.
type()==CL_DEVICE_TYPE_GPU)
 
  577     if (((p_.local_size_0*p_.local_size_1)%warp_size)>0)
 
  578       return TEMPLATE_LOCAL_SIZE_NOT_WARP_MULTIPLE;
 
  581     if (p_.simd_width!=1 && p_.simd_width!=2 &&
 
  582         p_.simd_width!=4 && p_.simd_width!=8 &&
 
  584       return TEMPLATE_INVALID_SIMD_WIDTH;
 
  586     return check_invalid_impl(device);
 
unsigned int local_size_0
tools::shared_ptr< template_base > clone() const 
virtual int check_invalid(statements_container const &statements, viennacl::ocl::device const &device) const =0
returns whether or not the profile has undefined behavior on particular device 
virtual ~loop_body_base()
Represents an OpenCL device within ViennaCL. 
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Exception for the case the generator is unable to deal with the operation. 
void set_arguments(statements_container const &statements, viennacl::ocl::kernel &kernel, unsigned int ¤t_arg)
Represents an OpenCL kernel within ViennaCL. 
invalid_template_exception()
static std::string vstore(unsigned int simd_width, std::string const &value, std::string const &offset, std::string const &ptr)
int check_invalid(statements_container const &statements, viennacl::ocl::device const &device) const 
returns whether or not the profile has undefined behavior on particular device 
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.) 
Some helper routines for reading/writing/printing scheduler expressions. 
void traverse(scheduler::statement const &statement, vcl_size_t root_idx, Fun const &fun, bool inspect)
Recursively execute a functor on a statement. 
static bool is_node_trans(scheduler::statement::container_type const &array, vcl_size_t root_idx, leaf_t leaf_type)
parameters_type const & parameters() const 
static bool is_striding_operator(scheduler::statement_node const &node)
A class representing a compute device (e.g. a GPU) 
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
result_of::size_type< T >::type start1(T const &obj)
cl_ulong local_mem_size() const 
Size of local memory arena in bytes. The minimum value is 32 KB. 
cl_device_type type() const 
The OpenCL device type. 
A class representing the 'data' for the LHS or RHS operand of the respective node. 
container_type const & array() const 
cl_uint vendor_id() const 
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
statement_node_numeric_type
Encodes the type of a node in the statement tree. 
std::list< scheduler::statement > const & data() const 
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
static std::string append_simd_suffix(std::string const &str, unsigned int i)
virtual tools::shared_ptr< template_base > clone() const =0
scheduler::statement_node const & lhs_most(scheduler::statement::container_type const &array, vcl_size_t root)
bool is_scalar_reduction(scheduler::statement_node const &node)
std::vector< value_type > container_type
result_of::size_type< T >::type start2(T const &obj)
vcl_size_t ld(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
unsigned int local_size_1
Map ViennaCL objects to generator wrappers. 
static bool has_strided_access(statements_container const &statements)
static void fetching_loop_info(fetching_policy_type policy, std::string const &bound, utils::kernel_generation_stream &stream, std::string &init, std::string &upper_bound, std::string &inc, std::string const &domain_id, std::string const &domain_size)
Helper for compiling a program lazily. 
statement_node_numeric_type numeric_type
result_of::size_type< T >::type start(T const &obj)
Various utility implementations for dispatching with respect to the different devices available on th...
invalid_template_exception(std::string message)
virtual ~invalid_template_exception()
base functor class for traversing a statement 
Provides the datastructures for dealing with a single statement such as 'x = y + z;'. 
static vcl_size_t vector_size(scheduler::statement_node const &node, bool up_to_internal_size)
static void generate_prototype(utils::kernel_generation_stream &stream, std::string const &name, std::string const &first_arguments, std::vector< mapping_type > const &mappings, statements_container const &statements, std::map< std::string, unsigned int > const &widths)
virtual const char * what() const 
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
static void generate_prototype(utils::kernel_generation_stream &stream, std::string const &name, std::string const &first_arguments, std::vector< mapping_type > const &mappings, statements_container const &statements)
std::map< mapping_key, tools::shared_ptr< mapped_object > > mapping_type
virtual void enqueue(std::string const &kernel_prefix, std::vector< lazy_program_compiler > &programs, statements_container const &statements)=0
Code for parsing the expression trees. 
std::vector< std::string > generate(std::string const &kernel_prefix, statements_container const &statements, viennacl::ocl::device const &device)
Representation of an OpenCL kernel in ViennaCL. 
template_base_impl(parameters_type const ¶meters, binding_policy_t binding_policy)
The constructor. 
bool row_major(T const &)
static void element_wise_loop_1D(utils::kernel_generation_stream &stream, loop_body_base const &loop_body, fetching_policy_type fetch, unsigned int simd_width, std::string const &i, std::string const &bound, std::string const &domain_id, std::string const &domain_size)
static std::string vload(unsigned int simd_width, std::string const &offset, std::string const &ptr)
The main class for representing a statement such as x = inner_prod(y,z); at runtime. 
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version. 
bool is_vector_reduction(scheduler::statement_node const &node)
ValueT const & at(std::map< KeyT, ValueT > const &map, KeyT const &key)
Emulation of C++11's .at() member for std::map<>, const-version. 
template_base(binding_policy_t binding_policy)
Main datastructure for an node in the statement tree. 
size_t max_work_group_size() const 
Maximum number of work-items in a work-group executing a kernel using the data parallel execution mod...
unsigned int size_of(scheduler::statement_node_numeric_type type)
tools::shared_ptr< symbolic_binder > make_binder(binding_policy_t policy)
parameters_type(unsigned int _simd_width, unsigned int _local_size_1, unsigned int _local_size_2, unsigned int _num_kernels)
std::vector< size_t > max_work_item_sizes() const 
Maximum number of work-items that can be specified in each dimension of the work-group. 
std::string append_width(std::string const &str, unsigned int width)
ParametersType parameters_type