Skip to content
Snippets Groups Projects
Commit fe18fc23 authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Added missing PackerTest and refactored packer

parent a5489844
Branches
Tags
No related merge requests found
......@@ -11,6 +11,44 @@ namespace edd {
namespace kernels {
// convert a float to an int32 clipped to minv, maxv and with a maxium
// bit_depth. For an input_bit_depth of 2 and 4 the loop is faster than fmin,
// fmax
template <unsigned int input_bit_depth>
__device__ __forceinline__ uint32_t convert32(float inp, float maxV, float minV, float level)
{
uint32_t p = 0;
#pragma unroll
for (int k = 1; k < (1 << input_bit_depth); k++) {
p += (inp > ((k * level) + minV));
} // this is more efficient than fmin, fmax for clamp and cast.
return p;
}
template <>
__device__ __forceinline__ uint32_t convert32<8>(float inp, float maxV, float minV, float level)
{
inp -= minV;
inp /= level;
inp = fminf(inp, ((1 << 8)- 1));
inp = fmaxf(inp, 0);
uint32_t p = uint32_t (inp);
return p;
}
template <>
__device__ __forceinline__ uint32_t convert32<16>(float inp, float maxV, float minV, float level)
{
inp -= minV;
inp /= level;
inp = fminf(inp, ((1 << 16)- 1));
inp = fmaxf(inp, 0);
uint32_t p = uint32_t (inp);
return p;
}
// pack float to 2,4,8,16 bit integers with linear scaling
template <unsigned int input_bit_depth>
__global__ void packNbit(const float *__restrict__ input,
......@@ -32,12 +70,7 @@ __global__ void packNbit(const float *__restrict__ input,
// Load new input value, clip and convert to Nbit integer
const float inp = input[i + j * blockDim.x];
uint32_t p = 0;
#pragma unroll
for (int k = 1; k < (1 << input_bit_depth); k++) {
p += (inp > ((k * l) + minV));
} // this is more efficient than fmin, fmax for clamp and cast.
uint32_t p = convert32<input_bit_depth>(inp, maxV, minV, l);
// store in shared memory with linear access
tmp[threadIdx.x] += p << (input_bit_depth * j);
}
......@@ -64,6 +97,7 @@ __global__ void packNbit(const float *__restrict__ input,
__syncthreads();
}
}
} // namespace kernels
......
......@@ -128,7 +128,7 @@ int main(int argc, char **argv) {
vdifHeader.setThreadId(thread_id);
vdifHeader.setStationId(station_id);
BOOST_LOG_TRIVIAL(warning) << "SETTING FIXED REFERENCE EPOCH AND SECONDS FROM EPOCH!! Should be read from data stream!!";
vdifHeader.setReferenceEpoch(123);
vdifHeader.setReferenceEpoch(0);
vdifHeader.setSecondsFromReferenceEpoch(42); // for first block
......
#include <time.h>
#include <stdlib.h>
#include "psrdada_cpp/effelsberg/edd/Packer.cuh"
#include "gtest/gtest.h"
//TEST(PackerTest, check2bit)
//{
// std::size_t n = 1024;
// thrust::device_vector<float> input(n);
// thrust::device_vector<uint32_t> output(n);
//
// {
// float minV = -2;
// float maxV = 2;
//
// srand (time(NULL));
// for (int i =0; i < input.size(); i++)
// {
// input[i] = ((float(rand()) / RAND_MAX) - 0.5) * 2.5 * (maxV-minV) + maxV + minV;
// }
//
// thrust::fill(output.begin(), output.end(), 5);
// cudaStream_t stream;
// cudaStreamCreate(&stream);
// psrdada_cpp::effelsberg::edd::pack<2>(input, output, minV, maxV, stream);
// EXPECT_EQ(output.size(), n / 16);
//
// float step = (maxV - minV) / 3;
// float L2 = minV + step;
// float L3 = minV + 2 * step;
// float L4 = minV + 3 * step;
//
// const size_t nbp = 16; // 16 samples per output value
// for(int i = 0; i < input.size() / nbp; i++)
// {
// uint32_t of = output[i];
// for (size_t j =0; j< nbp; j++)
// {
// uint32_t a = ((of >> (j *2)) & 3);
// int k = i * nbp + j;
// if (input[k] >= L4)
// EXPECT_EQ(a, 3);
// else if (input[k] >= L3)
// EXPECT_EQ(a, 2);
// else if (input[k] >= L2)
// EXPECT_EQ(a, 1);
// else
// EXPECT_EQ(a, 0);
// }
// }
// }
//}
class PackerTest: public ::testing::Test
{
protected:
thrust::device_vector<float> input;
thrust::device_vector<uint32_t> output;
float minV;
float maxV;
cudaStream_t stream;
void SetUp() override {
input.resize(1024);
minV = -2;
maxV = 2;
srand (time(NULL));
for (int i =0; i < input.size(); i++)
{
input[i] = ((float(rand()) / RAND_MAX) - 0.5) * 2.5 * (maxV-minV) + maxV + minV;
}
cudaStreamCreate(&stream);
}
void TearDown()
{
cudaStreamDestroy(stream);
}
void checkOutputSize(unsigned int bit_depth)
{
//SCOPED_TRACE("Input Bitdepth: " << bit_depth );
EXPECT_EQ(output.size(), input.size() / (32 / bit_depth));
}
void checkOutputValues(unsigned int bit_depth)
{
float step = (maxV - minV) / ((1 << bit_depth) - 1);
const size_t nbp = 32 / bit_depth;
for(int i = 0; i < input.size() / nbp; i++)
{
uint32_t of = output[i];
for (size_t j =0; j< nbp; j++)
{
uint32_t a = ((of >> (j * bit_depth)) & ((1 << bit_depth) - 1));
int k = i * nbp + j;
if (input[k] <= minV)
EXPECT_EQ(0, int (a)) << "input[ " << k << "] = " << input[k];
else if (input[k] >= maxV)
EXPECT_EQ(((1 << bit_depth) - 1), int (a)) << "input[ " << k << "] = " << input[k];
else
EXPECT_EQ(int((input[k] - minV) / step), int(a)) << "input[ " << k << "] = " << input[k];
}
}
}
};
TEST_F(PackerTest, 2bit)
{
psrdada_cpp::effelsberg::edd::pack<2>(input, output, minV, maxV, stream);
checkOutputSize(2);
checkOutputValues(2);
}
TEST_F(PackerTest, 4bit)
{
psrdada_cpp::effelsberg::edd::pack<4>(input, output, minV, maxV, stream);
checkOutputSize(4);
checkOutputValues(4);
}
TEST_F(PackerTest, 8bit)
{
psrdada_cpp::effelsberg::edd::pack<8>(input, output, minV, maxV, stream);
checkOutputSize(8);
checkOutputValues(8);
}
TEST_F(PackerTest, 16bit)
{
psrdada_cpp::effelsberg::edd::pack<16>(input, output, minV, maxV, stream);
checkOutputSize(16);
checkOutputValues(16);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment