Commit 62e542b5 authored by root's avatar root
Browse files

Scaled transpose test compiling but failing

parent bdaf6a0c
......@@ -8,6 +8,7 @@ set(PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES
set(psrdada_cpp_effelsberg_edd_src
src/Unpacker.cu
src/DetectorAccumulator.cu
src/ScaledTransposeTFtoTFT.cu
)
cuda_add_library(${CMAKE_PROJECT_NAME}_effelsberg_edd ${psrdada_cpp_effelsberg_edd_src})
......
......@@ -17,7 +17,7 @@ void tf_to_tft_transpose(
const float scale,
const float offset)
{
extern __shared__ char2* temp; //nbytes = sizeof(char2) * nsamps_per_load * nchans;
extern __shared__ char2 temp[]; //nbytes = sizeof(char2) * nsamps_per_load * nchans;
const int load_offset = nsamps_per_packet * blockIdx.x * nchans;
for (int sub_samp_load_idx = 0;
sub_samp_load_idx < nsamps_per_packet/nsamps_per_load;
......@@ -41,17 +41,16 @@ void tf_to_tft_transpose(
}
}
__syncthreads();
int store_offset = load_offset + nsamps_per_load * sub_samp_load_idx;
for (int chan_store_idx = threadIdx.y;
chan_store_idx < nchans;
chan_store_idx += blockDim.y)
{
for (int samp_store_idx = threadIdx.x;
samp_store_idx < nsamps_per_load;
samp_store_idx += blockDix.x)
samp_store_idx += blockIdx.x)
{
int store_idx = (load_offset + chan_store_idx * nsamps_per_packet
+ samps_per_load * sub_samp_load_idx + samp_store_idx);
+ nsamps_per_load * sub_samp_load_idx + samp_store_idx);
output[store_idx] = temp[samp_store_idx * nsamps_per_load + chan_store_idx];
}
}
......@@ -88,7 +87,7 @@ void ScaledTransposeTFtoTFT::transpose(InputType const& input, OutputType& outpu
assert(input.size() % (_nchans * _nsamps_per_packet) == 0 /* Input is not a multiple of _nchans * _nsamps_per_packet*/);
output.resize(input.size());
const int nsamps_per_load = 16;
assert(_nsamps_per_packet % nsamps_per_load) == 0;
assert((_nsamps_per_packet % nsamps_per_load) == 0);
const int nsamps = input.size() / _nchans;
int shared_mem_bytes = sizeof(OutputType::value_type) * _nchans * nsamps_per_load;
int nblocks = nsamps / _nsamps_per_packet;
......
......@@ -7,7 +7,7 @@ set(
src/DetectorAccumulatorTester.cu
src/FftSpectrometerTester.cu
src/UnpackerTester.cu
src/ScaledTransposeTFtoTFT.cu
src/ScaledTransposeTFtoTFTTester.cu
)
cuda_add_executable(gtest_edd ${gtest_edd_src} )
target_link_libraries(gtest_edd ${PSRDADA_CPP_EFFELSBERG_EDD_LIBRARIES} ${CUDA_CUFFT_LIBRARIES})
......
......@@ -37,11 +37,11 @@ void ScaledTransposeTFtoTFTTester::transpose_c_reference(
const int nsamps,
const int nsamps_per_packet,
const float scale,
const float offset);
const float offset)
{
int nsamples = input.size() / nchans;
int outer_t_dim = nsamps / nsamps_per_packet;
output.size(input.size());
output.resize(input.size());
for (int outer_t_idx = 0; outer_t_idx < outer_t_dim; ++outer_t_idx)
{
for (int chan_idx = 0; chan_idx < nchans; ++chan_idx)
......@@ -68,7 +68,8 @@ void ScaledTransposeTFtoTFTTester::compare_against_host(
ASSERT_EQ(host_output.size(), copy_from_gpu.size());
for (std::size_t ii = 0; ii < host_output.size(); ++ii)
{
ASSERT_EQ(host_output[ii], copy_from_gpu[ii]);
ASSERT_EQ(host_output[ii].x, copy_from_gpu[ii].x);
ASSERT_EQ(host_output[ii].y, copy_from_gpu[ii].y);
}
}
......@@ -78,7 +79,7 @@ TEST_F(ScaledTransposeTFtoTFTTester, counter_test)
int nsamps_per_packet = 8192/nchans;
float stdev = 64.0f;
float scale = 4.0f;
int nsamps = nsamps_per_packet * 1024
int nsamps = nsamps_per_packet * 1024;
int n = nchans * nsamps;
std::default_random_engine generator;
std::normal_distribution<float> distribution(0.0, stdev);
......@@ -91,9 +92,9 @@ TEST_F(ScaledTransposeTFtoTFTTester, counter_test)
ScaledTransposeTFtoTFT::InputType gpu_input = host_input;
ScaledTransposeTFtoTFT::OutputType gpu_output;
OutputType host_output;
ScaledTransposeTFtoTFT transposer(nchans, nsamps_per_packet, nscale, 0.0, _stream);
ScaledTransposeTFtoTFT transposer(nchans, nsamps_per_packet, scale, 0.0, _stream);
transposer.transpose(gpu_input, gpu_output);
detect_c_reference(host_input, host_output, nchans, nsamps, nsamps_per_packet, scale, 0.0);
transpose_c_reference(host_input, host_output, nchans, nsamps, nsamps_per_packet, scale, 0.0);
compare_against_host(gpu_output, host_output);
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment