Skip to content

Commit

Permalink
Bugfix: gpu::reduce was not running on the GPU when using HIP
Browse files Browse the repository at this point in the history
  • Loading branch information
xavierandrade committed Oct 11, 2024
1 parent 2405385 commit 3d2ac47
Showing 1 changed file with 11 additions and 14 deletions.
25 changes: 11 additions & 14 deletions external_libs/gpurun/include/gpu/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,11 @@

#include <inq_config.h>

#ifdef ENABLE_CUDA
#include <cuda.h>
#endif

#include <cassert>

#include <gpu/run.hpp>
#include <gpu/array.hpp>
#include <gpu/host.hpp>

namespace gpu {

Expand All @@ -30,7 +27,7 @@ struct reduce {
};


#ifdef ENABLE_CUDA
#ifdef ENABLE_GPU
template <class kernel_type, class array_type>
__global__ void reduce_kernel_r(long size, kernel_type kernel, array_type odata) {

Expand Down Expand Up @@ -84,7 +81,7 @@ auto run(reduce const & red, kernel_type kernel) -> decltype(kernel(0)) {

using type = decltype(kernel(0));

#ifndef ENABLE_CUDA
#ifndef ENABLE_GPU

type accumulator(0.0);
for(long ii = 0; ii < size; ii++){
Expand Down Expand Up @@ -112,7 +109,7 @@ auto run(reduce const & red, kernel_type kernel) -> decltype(kernel(0)) {
#endif
}

#ifdef ENABLE_CUDA
#ifdef ENABLE_GPU
template <class kernel_type, class array_type>
__global__ void reduce_kernel_rr(long sizex, long sizey, kernel_type kernel, array_type odata) {

Expand Down Expand Up @@ -154,7 +151,7 @@ auto run(reduce const & redx, reduce const & redy, kernel_type kernel) -> declty

using type = decltype(kernel(0, 0));

#ifndef ENABLE_CUDA
#ifndef ENABLE_GPU

type accumulator(0.0);
for(long iy = 0; iy < sizey; iy++){
Expand Down Expand Up @@ -187,7 +184,7 @@ auto run(reduce const & redx, reduce const & redy, kernel_type kernel) -> declty
#endif
}

#ifdef ENABLE_CUDA
#ifdef ENABLE_GPU
template <class kernel_type, class array_type>
__global__ void reduce_kernel_rrr(long sizex, long sizey, long sizez, kernel_type kernel, array_type odata) {

Expand Down Expand Up @@ -233,7 +230,7 @@ auto run(reduce const & redx, reduce const & redy, reduce const & redz, kernel_t

if(sizex == 0 or sizey == 0 or sizez == 0) return initial_value;

#ifndef ENABLE_CUDA
#ifndef ENABLE_GPU

type accumulator = initial_value;
for(long iy = 0; iy < sizey; iy++){
Expand Down Expand Up @@ -273,7 +270,7 @@ auto run(reduce const & redx, reduce const & redy, reduce const & redz, kernel_t
#endif
}

#ifdef ENABLE_CUDA
#ifdef ENABLE_GPU
template <class kernel_type, class array_type>
__global__ void reduce_kernel_vr(long sizex, long sizey, kernel_type kernel, array_type odata) {

Expand Down Expand Up @@ -317,7 +314,7 @@ auto run(long sizex, reduce const & redy, kernel_type kernel) -> gpu::array<decl

using type = decltype(kernel(0, 0));

#ifndef ENABLE_CUDA
#ifndef ENABLE_GPU

gpu::array<type, 1> accumulator(sizex, 0.0);

Expand Down Expand Up @@ -373,7 +370,7 @@ auto run(long sizex, reduce const & redy, kernel_type kernel) -> gpu::array<decl

}

#ifdef ENABLE_CUDA
#ifdef ENABLE_GPU
template <class kernel_type, class array_type>
__global__ void reduce_kernel_vrr(long sizex, long sizey,long sizez, kernel_type kernel, array_type odata) {

Expand Down Expand Up @@ -417,7 +414,7 @@ auto run(long sizex, reduce const & redy, reduce const & redz, kernel_type kerne

using type = decltype(kernel(0, 0, 0));

#ifndef ENABLE_CUDA
#ifndef ENABLE_GPU

gpu::array<type, 1> accumulator(sizex, 0.0);

Expand Down

0 comments on commit 3d2ac47

Please sign in to comment.