18 #ifndef __DRIVER_CUDA_H__
19 #define __DRIVER_CUDA_H__
23 #include <common/config.h>
25 void _starpu_cuda_preinit(
void);
27 #ifdef STARPU_USE_CUDA
29 #include <cuda_runtime_api.h>
30 #ifdef STARPU_HAVE_LIBNVIDIA_ML
36 #include <core/workers.h>
39 #pragma GCC visibility push(hidden)
44 extern int _starpu_nworker_per_cuda;
46 void _starpu_cuda_init(
void);
47 unsigned _starpu_get_cuda_device_count(
void);
48 #ifdef STARPU_HAVE_HWLOC
50 hwloc_obj_t _starpu_cuda_get_hwloc_obj(hwloc_topology_t topology,
int devid);
54 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
59 void _starpu_init_cuda(
void);
60 void _starpu_init_cublas_v2_func(
void);
61 void _starpu_shutdown_cublas_v2_func(
void);
62 void _starpu_cublas_v2_init(
void);
63 void _starpu_cublas_v2_shutdown(
void);
64 void *_starpu_cuda_worker(
void *);
65 #ifdef STARPU_HAVE_LIBNVIDIA_ML
66 nvmlDevice_t _starpu_cuda_get_nvmldev(
struct cudaDeviceProp *props);
69 # define _starpu_cuda_discover_devices(config) ((void) config)
72 #ifdef STARPU_USE_CUDA
73 #ifdef STARPU_USE_CUDA_MAP
74 uintptr_t _starpu_cuda_map_ram(uintptr_t src_ptr,
size_t src_offset,
unsigned src_node,
unsigned dst_node,
size_t size,
int *ret);
75 int _starpu_cuda_unmap_ram(uintptr_t src_ptr,
size_t src_offset,
unsigned src_node, uintptr_t dst_ptr,
unsigned dst_node,
size_t size);
76 int _starpu_cuda_update_map(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t size);
83 int _starpu_cuda_copy_interface_from_cpu_to_cuda(starpu_data_handle_t handle,
void *src_interface,
unsigned src_node,
void *dst_interface,
unsigned dst_node,
struct _starpu_data_request *req);
84 int _starpu_cuda_copy_interface_from_cuda_to_cuda(starpu_data_handle_t handle,
void *src_interface,
unsigned src_node,
void *dst_interface,
unsigned dst_node,
struct _starpu_data_request *req);
85 int _starpu_cuda_copy_interface_from_cuda_to_cpu(starpu_data_handle_t handle,
void *src_interface,
unsigned src_node,
void *dst_interface,
unsigned dst_node,
struct _starpu_data_request *req);
87 int _starpu_cuda_copy_data_from_cuda_to_cuda(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t size,
struct _starpu_async_channel *async_channel);
88 int _starpu_cuda_copy_data_from_cuda_to_cpu(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t size,
struct _starpu_async_channel *async_channel);
89 int _starpu_cuda_copy_data_from_cpu_to_cuda(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t size,
struct _starpu_async_channel *async_channel);
91 int _starpu_cuda_copy2d_data_from_cuda_to_cuda(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t blocksize,
size_t numblocks,
size_t ld_src,
size_t ld_dst,
struct _starpu_async_channel *async_channel);
92 int _starpu_cuda_copy2d_data_from_cuda_to_cpu(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t blocksize,
size_t numblocks,
size_t ld_src,
size_t ld_dst,
struct _starpu_async_channel *async_channel);
93 int _starpu_cuda_copy2d_data_from_cpu_to_cuda(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t blocksize,
size_t numblocks,
size_t ld_src,
size_t ld_dst,
struct _starpu_async_channel *async_channel);
95 int _starpu_cuda_is_direct_access_supported(
unsigned node,
unsigned handling_node);
96 uintptr_t _starpu_cuda_malloc_on_node(
unsigned dst_node,
size_t size,
int flags);
97 void _starpu_cuda_free_on_node(
unsigned dst_node, uintptr_t addr,
size_t size,
int flags);
99 #pragma GCC visibility pop
Definition: copy_driver.h:71
#define STARPU_MAXNUMANODES
Definition: starpu_config.h:251
Definition: workers.h:441
Definition: workers.h:352
int devid[STARPU_NARCH][STARPU_NMAXDEVS]
Definition: workers.h:404
Definition: node_ops.h:92
Definition: workers.h:155