1 #ifndef VIENNACL_BACKEND_MEMORY_HPP 
    2 #define VIENNACL_BACKEND_MEMORY_HPP 
   36 #ifdef VIENNACL_WITH_OPENCL 
   41 #ifdef VIENNACL_WITH_CUDA 
   56 #ifdef VIENNACL_WITH_CUDA 
   57     cudaDeviceSynchronize();
 
   59 #ifdef VIENNACL_WITH_OPENCL 
   89     if (size_in_bytes > 0)
 
  100 #ifdef VIENNACL_WITH_OPENCL 
  102         handle.opencl_handle().context(ctx.opencl_context());
 
  103         handle.opencl_handle() = 
opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr);
 
  107 #ifdef VIENNACL_WITH_CUDA 
  148     if (bytes_to_copy > 0)
 
  155 #ifdef VIENNACL_WITH_OPENCL 
  157         opencl::memory_copy(src_buffer.opencl_handle(), dst_buffer.opencl_handle(), src_offset, dst_offset, bytes_to_copy);
 
  160 #ifdef VIENNACL_WITH_CUDA 
  162         cuda::memory_copy(src_buffer.cuda_handle(), dst_buffer.cuda_handle(), src_offset, dst_offset, bytes_to_copy);
 
  189 #ifdef VIENNACL_WITH_OPENCL 
  192       dst_buffer.opencl_handle() = src_buffer.opencl_handle();
 
  196 #ifdef VIENNACL_WITH_CUDA 
  199       dst_buffer.cuda_handle() = src_buffer.cuda_handle();
 
  226     if (bytes_to_write > 0)
 
  233 #ifdef VIENNACL_WITH_OPENCL 
  238 #ifdef VIENNACL_WITH_CUDA 
  269     if (bytes_to_read > 0)
 
  276 #ifdef VIENNACL_WITH_OPENCL 
  281 #ifdef VIENNACL_WITH_CUDA 
  283         cuda::memory_read(src_buffer.cuda_handle(), src_offset, bytes_to_read, ptr, async);
 
  307                                             #ifdef VIENNACL_WITH_OPENCL 
  312 #ifdef VIENNACL_WITH_OPENCL 
  314         return sizeof(cl_ulong);
 
  316       return sizeof(
unsigned long);
 
  321                                    #ifdef VIENNACL_WITH_OPENCL 
  326 #ifdef VIENNACL_WITH_OPENCL 
  328         return sizeof(cl_long);
 
  336                                            #ifdef VIENNACL_WITH_OPENCL 
  341 #ifdef VIENNACL_WITH_OPENCL 
  343         return sizeof(cl_uint);
 
  345       return sizeof(
unsigned int);
 
  350                                   #ifdef VIENNACL_WITH_OPENCL 
  355 #ifdef VIENNACL_WITH_OPENCL 
  357         return sizeof(cl_int);
 
  367   template<
typename DataType>
 
  376 #ifdef VIENNACL_WITH_OPENCL 
  378         handle.opencl_handle().context(new_ctx.opencl_context());
 
  386     if (size_dst != size_src)  
 
  388       throw memory_exception(
"Heterogeneous data element sizes not yet supported!");
 
  396 #ifdef VIENNACL_WITH_OPENCL 
  398           handle.opencl_handle().context(new_ctx.opencl_context());
 
  402 #ifdef VIENNACL_WITH_CUDA 
  412 #ifdef VIENNACL_WITH_OPENCL 
  415         std::vector<DataType> buffer;
 
  423 #ifdef VIENNACL_WITH_CUDA 
  425           buffer.resize(handle.
raw_size() / 
sizeof(DataType));
 
  435 #ifdef VIENNACL_WITH_CUDA 
  438         std::vector<DataType> buffer;
 
  447 #ifdef VIENNACL_WITH_OPENCL 
  449           buffer.resize(handle.
raw_size() / 
sizeof(DataType));
 
  469   template<
typename DataType>
 
  478     if (element_size_src != element_size_dst)
 
  488       DataType 
const * src_data;
 
  492         src_data = 
reinterpret_cast<DataType 
const *
>(handle_src.
ram_handle().
get());
 
  493         for (
vcl_size_t i=0; i<buffer_dst.size(); ++i)
 
  494           buffer_dst.
set(i, src_data[i]);
 
  497 #ifdef VIENNACL_WITH_OPENCL 
  499         buffer_src.
resize(handle_src, handle_src.
raw_size() / element_size_src);
 
  501         for (
vcl_size_t i=0; i<buffer_dst.size(); ++i)
 
  502           buffer_dst.set(i, buffer_src[i]);
 
  505 #ifdef VIENNACL_WITH_CUDA 
  507         buffer_src.
resize(handle_src, handle_src.
raw_size() / element_size_src);
 
  509         for (
vcl_size_t i=0; i<buffer_dst.size(); ++i)
 
  510           buffer_dst.set(i, buffer_src[i]);
 
  521       if (handle_dst.
raw_size() == buffer_dst.raw_size())
 
  569           buffer.
resize(handle_src, handle_src.
raw_size() / element_size_src);
 
  591           buffer.
resize(handle_src, handle_src.
raw_size() / element_size_src);
 
  624   obj.switch_memory_context(new_ctx);
 
void typesafe_memory_copy(mem_handle const &handle_src, mem_handle &handle_dst)
Copies data of the provided 'DataType' from 'handle_src' to 'handle_dst' and converts the data if the...
Helper class implementing an array on the host. Default case: No conversion necessary. 
void memory_read(handle_type const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_copy, void *ptr, bool)
Reads data from a buffer back to main RAM. 
cl_mem memory_create(viennacl::ocl::context const &ctx, vcl_size_t size_in_bytes, const void *host_ptr=NULL)
Creates an array of the specified size in the current OpenCL context. If the second argument is provi...
void memory_write(mem_handle &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'. 
handle_type memory_create(vcl_size_t size_in_bytes, const void *host_ptr=NULL)
Creates an array of the specified size in main RAM. If the second argument is provided, the buffer is initialized with data from that pointer. 
void finish() const 
Waits until all kernels in the queue have finished their execution. 
Exception class in case of memory errors. 
void memory_write(viennacl::ocl::handle< cl_mem > &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_copy, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the OpenCL buffer identified by 'dst_buffer'. 
void memory_write(handle_type &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_copy, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the CUDA buffer identified by 'dst_buffer'. 
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed. 
vcl_size_t element_size< unsigned long >(memory_types)
void memory_copy(handle_type const &src_buffer, handle_type &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' on the CUDA device to memory star...
This file provides the forward declarations for the main types used within ViennaCL. 
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM. 
void memory_read(viennacl::ocl::handle< cl_mem > const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_copy, void *ptr, bool async=false)
Reads data from an OpenCL buffer back to main RAM. 
void resize(mem_handle const &handle, vcl_size_t num)
Resize including initialization of new memory (cf. std::vector<>) 
void memory_write(handle_type &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_copy, const void *ptr, bool)
Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'. 
Implementation of a OpenCL-like context, which serves as a unification of {OpenMP, CUDA, OpenCL} at the user API. 
vcl_size_t element_size(memory_types)
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
vcl_size_t element_size< unsigned int >(memory_types)
vcl_size_t element_size< long >(memory_types)
vcl_size_t element_size< int >(memory_types)
void memory_copy(viennacl::ocl::handle< cl_mem > const &src_buffer, viennacl::ocl::handle< cl_mem > &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' in the OpenCL context to memory s...
viennacl::ocl::command_queue & get_queue()
Convenience function for getting the default queue for the currently active device in the active cont...
Implementations for the OpenCL backend functionality. 
Extracts the underlying context from objects. 
Implements the multi-memory-domain handle. 
void switch_memory_context(mem_handle &handle, viennacl::context new_ctx)
Switches the active memory domain within a memory handle. Data is copied if the new active domain dif...
Implementations for the CUDA backend functionality. 
void memory_copy(handle_type const &src_buffer, handle_type &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'ds...
Implementations of the OpenCL backend, where all contexts are stored in. 
memory_types default_memory_type()
Returns the default memory type for the given configuration. 
viennacl::memory_types memory_type() const 
handle_type memory_create(vcl_size_t size_in_bytes, const void *host_ptr=NULL)
Creates an array of the specified size on the CUDA device. If the second argument is provided...
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
void memory_copy(mem_handle const &src_buffer, mem_handle &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'ds...
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object. 
void memory_read(handle_type const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_copy, void *ptr, bool async=false)
Reads data from a CUDA buffer back to main RAM. 
void set(vcl_size_t index, U value)
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
vcl_size_t raw_size() const 
Returns the number of bytes of the currently active buffer. 
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
Implementations for the OpenCL backend functionality. 
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version. 
vcl_size_t raw_size() const 
void memory_shallow_copy(mem_handle const &src_buffer, mem_handle &dst_buffer)
A 'shallow' copy operation from an initialized buffer to an uninitialized buffer. The uninitialized b...
ram_handle_type & ram_handle()
Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated...
memory_types get_active_handle_id() const 
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
Helper functionality for working with different memory domains. 
void switch_memory_context(T &obj, viennacl::context new_ctx)
Generic convenience routine for migrating data of an object to a new memory domain.