3#include "tool/cudalib.h"
4#include "tool/gpucard.h"
21template <
class K,
class... Ts>
22void launch_k3s(cudaStream_t st,
size_t sh,
int bs,
int np, K k, Ts&&... a)
24 int gs = (np + bs - 1) / bs;
25 k<<<gs, bs, sh, st>>>(std::forward<Ts>(a)...);
40template <
class K,
class... Ts>
41void launch_k3b(cudaStream_t st,
size_t sh,
int bs,
int np, K k, Ts&&... a)
44 int gs = (np + bs - 1) / bs;
45 k<<<gs, bs, sh, st>>>(std::forward<Ts>(a)...);
51template <
class K,
class... Ts>
52void launch_k2s(cudaStream_t st,
int bs,
int np, K k, Ts&&... a)
55 launch_k3s(st, sh, bs, np, k, std::forward<Ts>(a)...);
62template <
class K,
class... Ts>
63void launch_k2b(cudaStream_t st,
int bs,
int np, K k, Ts&&... a)
66 launch_k3b(st, sh, bs, np, k, std::forward<Ts>(a)...);
73template <
class K,
class... Ts>
77 launch_k2s(st, bs, np, k, std::forward<Ts>(a)...);
85template <
class K,
class... Ts>
89 launch_k2b(st, bs, np, k, std::forward<Ts>(a)...);
94#define ITHREAD threadIdx.x + blockIdx.x* blockDim.x
95#define STRIDE blockDim.x* gridDim.x
void launch_k1b(cudaStream_t st, int np, K k, Ts &&... a)
Definition: launch.h:86
void launch_k3s(cudaStream_t st, size_t sh, int bs, int np, K k, Ts &&... a)
Definition: launch.h:22
void launch_k3b(cudaStream_t st, size_t sh, int bs, int np, K k, Ts &&... a)
Definition: launch.h:41
void launch_k2b(cudaStream_t st, int bs, int np, K k, Ts &&... a)
Definition: launch.h:63
void launch_k1s(cudaStream_t st, int np, K k, Ts &&... a)
Definition: launch.h:74
void launch_k2s(cudaStream_t st, int bs, int np, K k, Ts &&... a)
Definition: launch.h:52
constexpr unsigned BLOCK_DIM
Default dimension of thread blocks.
Definition: gpucard.h:95