diff --git a/psrdada_cpp/effelsberg/edd/Packer.cuh b/psrdada_cpp/effelsberg/edd/Packer.cuh index f5607de63662f7318311b14601c8febafeaeaad6..e0fb321511db5790367b49426938f6b07faadc9a 100644 --- a/psrdada_cpp/effelsberg/edd/Packer.cuh +++ b/psrdada_cpp/effelsberg/edd/Packer.cuh @@ -11,6 +11,44 @@ namespace edd { namespace kernels { +// convert a float to an int32 clipped to minv, maxv and with a maxium +// bit_depth. For an input_bit_depth of 2 and 4 the loop is faster than fmin, +// fmax +template <unsigned int input_bit_depth> +__device__ __forceinline__ uint32_t convert32(float inp, float maxV, float minV, float level) +{ + uint32_t p = 0; + #pragma unroll + for (int k = 1; k < (1 << input_bit_depth); k++) { + p += (inp > ((k * level) + minV)); + } // this is more efficient than fmin, fmax for clamp and cast. + return p; +} + +template <> +__device__ __forceinline__ uint32_t convert32<8>(float inp, float maxV, float minV, float level) +{ + inp -= minV; + inp /= level; + inp = fminf(inp, ((1 << 8)- 1)); + inp = fmaxf(inp, 0); + uint32_t p = uint32_t (inp); + return p; +} + +template <> +__device__ __forceinline__ uint32_t convert32<16>(float inp, float maxV, float minV, float level) +{ + inp -= minV; + inp /= level; + inp = fminf(inp, ((1 << 16)- 1)); + inp = fmaxf(inp, 0); + uint32_t p = uint32_t (inp); + return p; +} + + + // pack float to 2,4,8,16 bit integers with linear scaling template <unsigned int input_bit_depth> __global__ void packNbit(const float *__restrict__ input, @@ -32,12 +70,7 @@ __global__ void packNbit(const float *__restrict__ input, // Load new input value, clip and convert to Nbit integer const float inp = input[i + j * blockDim.x]; - uint32_t p = 0; - #pragma unroll - for (int k = 1; k < (1 << input_bit_depth); k++) { - p += (inp > ((k * l) + minV)); - } // this is more efficient than fmin, fmax for clamp and cast. - + uint32_t p = convert32<input_bit_depth>(inp, maxV, minV, l); // store in shared memory with linear access tmp[threadIdx.x] += p << (input_bit_depth * j); } @@ -64,6 +97,7 @@ __global__ void packNbit(const float *__restrict__ input, __syncthreads(); } } + } // namespace kernels diff --git a/psrdada_cpp/effelsberg/edd/src/VLBI_cli.cu b/psrdada_cpp/effelsberg/edd/src/VLBI_cli.cu index 8c92df1365f002ebb0e21db79a4062d996377db7..dfe82c39371e0df23d5a09bc4d4b82e15f063c08 100644 --- a/psrdada_cpp/effelsberg/edd/src/VLBI_cli.cu +++ b/psrdada_cpp/effelsberg/edd/src/VLBI_cli.cu @@ -128,7 +128,7 @@ int main(int argc, char **argv) { vdifHeader.setThreadId(thread_id); vdifHeader.setStationId(station_id); BOOST_LOG_TRIVIAL(warning) << "SETTING FIXED REFERENCE EPOCH AND SECONDS FROM EPOCH!! Should be read from data stream!!"; - vdifHeader.setReferenceEpoch(123); + vdifHeader.setReferenceEpoch(0); vdifHeader.setSecondsFromReferenceEpoch(42); // for first block diff --git a/psrdada_cpp/effelsberg/edd/test/src/PackerTest.cu b/psrdada_cpp/effelsberg/edd/test/src/PackerTest.cu new file mode 100644 index 0000000000000000000000000000000000000000..cc5eac9f76dfb49d9ef1e4432981ec8843808b95 --- /dev/null +++ b/psrdada_cpp/effelsberg/edd/test/src/PackerTest.cu @@ -0,0 +1,142 @@ +#include <time.h> +#include <stdlib.h> +#include "psrdada_cpp/effelsberg/edd/Packer.cuh" + +#include "gtest/gtest.h" + +//TEST(PackerTest, check2bit) +//{ +// std::size_t n = 1024; +// thrust::device_vector<float> input(n); +// thrust::device_vector<uint32_t> output(n); +// +// { +// float minV = -2; +// float maxV = 2; +// +// srand (time(NULL)); +// for (int i =0; i < input.size(); i++) +// { +// input[i] = ((float(rand()) / RAND_MAX) - 0.5) * 2.5 * (maxV-minV) + maxV + minV; +// } +// +// thrust::fill(output.begin(), output.end(), 5); +// cudaStream_t stream; +// cudaStreamCreate(&stream); +// psrdada_cpp::effelsberg::edd::pack<2>(input, output, minV, maxV, stream); +// EXPECT_EQ(output.size(), n / 16); +// +// float step = (maxV - minV) / 3; +// float L2 = minV + step; +// float L3 = minV + 2 * step; +// float L4 = minV + 3 * step; +// +// const size_t nbp = 16; // 16 samples per output value +// for(int i = 0; i < input.size() / nbp; i++) +// { +// uint32_t of = output[i]; +// for (size_t j =0; j< nbp; j++) +// { +// uint32_t a = ((of >> (j *2)) & 3); +// int k = i * nbp + j; +// if (input[k] >= L4) +// EXPECT_EQ(a, 3); +// else if (input[k] >= L3) +// EXPECT_EQ(a, 2); +// else if (input[k] >= L2) +// EXPECT_EQ(a, 1); +// else +// EXPECT_EQ(a, 0); +// } +// } +// } +//} + +class PackerTest: public ::testing::Test +{ + protected: + thrust::device_vector<float> input; + thrust::device_vector<uint32_t> output; + float minV; + float maxV; + cudaStream_t stream; + + void SetUp() override { + input.resize(1024); + minV = -2; + maxV = 2; + + srand (time(NULL)); + for (int i =0; i < input.size(); i++) + { + input[i] = ((float(rand()) / RAND_MAX) - 0.5) * 2.5 * (maxV-minV) + maxV + minV; + } + + cudaStreamCreate(&stream); + } + + void TearDown() + { + cudaStreamDestroy(stream); + } + + void checkOutputSize(unsigned int bit_depth) + { + //SCOPED_TRACE("Input Bitdepth: " << bit_depth ); + EXPECT_EQ(output.size(), input.size() / (32 / bit_depth)); + } + + + void checkOutputValues(unsigned int bit_depth) + { + + float step = (maxV - minV) / ((1 << bit_depth) - 1); + + const size_t nbp = 32 / bit_depth; + for(int i = 0; i < input.size() / nbp; i++) + { + uint32_t of = output[i]; + for (size_t j =0; j< nbp; j++) + { + uint32_t a = ((of >> (j * bit_depth)) & ((1 << bit_depth) - 1)); + int k = i * nbp + j; + + if (input[k] <= minV) + EXPECT_EQ(0, int (a)) << "input[ " << k << "] = " << input[k]; + else if (input[k] >= maxV) + EXPECT_EQ(((1 << bit_depth) - 1), int (a)) << "input[ " << k << "] = " << input[k]; + else + EXPECT_EQ(int((input[k] - minV) / step), int(a)) << "input[ " << k << "] = " << input[k]; + } + } + } + }; + +TEST_F(PackerTest, 2bit) +{ + psrdada_cpp::effelsberg::edd::pack<2>(input, output, minV, maxV, stream); + checkOutputSize(2); + checkOutputValues(2); +} + + +TEST_F(PackerTest, 4bit) +{ + psrdada_cpp::effelsberg::edd::pack<4>(input, output, minV, maxV, stream); + checkOutputSize(4); + checkOutputValues(4); +} + +TEST_F(PackerTest, 8bit) +{ + psrdada_cpp::effelsberg::edd::pack<8>(input, output, minV, maxV, stream); + checkOutputSize(8); + checkOutputValues(8); +} + +TEST_F(PackerTest, 16bit) +{ + psrdada_cpp::effelsberg::edd::pack<16>(input, output, minV, maxV, stream); + checkOutputSize(16); + checkOutputValues(16); +}