diff --git a/psrdada_cpp/merger/src/merger.cpp b/psrdada_cpp/merger/src/merger.cpp index 7d844abb96108775d82d882b8a823c39160c421e..6c184dbd18305795dba4e08783e9464e595cf0f4 100644 --- a/psrdada_cpp/merger/src/merger.cpp +++ b/psrdada_cpp/merger/src/merger.cpp @@ -14,7 +14,7 @@ namespace { uint64_t interleave(uint32_t x, uint32_t y) { __m128i xvec = _mm_cvtsi32_si128(x); __m128i yvec = _mm_cvtsi32_si128(y); - __m128i interleaved = _mm_unpacklo_epi8(yvec, xvec); + __m128i interleaved = _mm_unpacklo_epi8(xvec, yvec); return _mm_cvtsi128_si64(interleaved); } @@ -228,11 +228,10 @@ void PolnMerger10to8::process(char* idata, char* odata, std::size_t size) uint32_t* S1 = reinterpret_cast<uint32_t*>(S1_8bit); for (std::size_t ii = 0; ii < 8; ++ii) { - *D++ = interleave(S1[ii], S0[ii]); + *D++ = interleave(S0[ii], S1[ii]); } } } } - -} // namespace merger -} // namespace psrdada_cpp +} +} diff --git a/psrdada_cpp/merger/test/src/merge_tester.cpp b/psrdada_cpp/merger/test/src/merge_tester.cpp index ff69cb26d504b284c96db36abcd6a475aaabd99c..edc0336ea51a37e154090fc5b95cff043d403940 100644 --- a/psrdada_cpp/merger/test/src/merge_tester.cpp +++ b/psrdada_cpp/merger/test/src/merge_tester.cpp @@ -1,5 +1,54 @@ #include "psrdada_cpp/merger/test/merge_tester.hpp" +#include "psrdada_cpp/merger/merger.hpp" +#include <gtest/gtest.h> +#include <vector> +#include <array> +#include <cstdint> +#include <cstring> +#include <iostream> +#include <iomanip> +#include <sstream> +namespace { + +// Helper function: packs a constant 10-bit value X into a 40-byte block. +// The block is built by writing 32 consecutive 10-bit fields (each equal to X) +// in big-endian order (starting at the most-significant bit). +static std::array<uint8_t, 40> pack_constant_block(uint16_t X) { + std::array<uint8_t, 40> block = {0}; + int bitPos = 0; + for (int i = 0; i < 32; i++) { + for (int b = 0; b < 10; b++) { + int bitVal = (X >> (9 - b)) & 1; + int pos = bitPos + b; + int byteIndex = pos / 8; + int bitInByte = 7 - (pos % 8); + if (bitVal) + block[byteIndex] |= (1 << bitInByte); + } + bitPos += 10; + } + return block; +} + +// Optional: Utility function to print a vector of char in hexadecimal (for debugging). +static void printVectorHex(const std::vector<char>& vec, const std::string &label) { + std::cout << label << " (" << vec.size() << " bytes):\n"; + const size_t bytesPerLine = 32; + for (size_t i = 0; i < vec.size(); i++) { + if (i % bytesPerLine == 0) { + std::cout << std::setw(4) << std::setfill('0') << i << ": "; + } + std::cout << std::hex << std::setw(2) << std::setfill('0') + << (static_cast<unsigned int>(static_cast<unsigned char>(vec[i]))) << " "; + if ((i + 1) % bytesPerLine == 0) + std::cout << std::dec << "\n"; + } + if (vec.size() % bytesPerLine != 0) + std::cout << std::dec << "\n"; +} + +} // end anonymous namespace namespace psrdada_cpp { namespace merger { @@ -135,8 +184,123 @@ INSTANTIATE_TEST_SUITE_P(ParameterizedTest, PolnMergeTester, ::testing::Values( )); -} // namespace test -} // namespace merger -} // namespace psrdada_cpp +//--------------------------------------------------- +// New tester class for PolnMerger10to8 +//--------------------------------------------------- +class PolnMerge10to8Tester { +public: + // Test with constant input. + // For pol0 each block is filled with constant 10-bit value 0, + // and for pol1 each block is filled with constant 10-bit value 4 (which converts to 1 after >>2). + void test_constant_input() { + const size_t npol = 2; + const size_t nsamps = 4096; // samples per polarization. + const size_t in_per_pol = 128 * 40; // 5120 bytes per polarization. + size_t total_input_size = npol * in_per_pol; // 10240 bytes total. + + // Build input vector with layout: [pol0 | pol1] + std::vector<char> input(total_input_size, 0); + auto block0 = pack_constant_block(0); // For pol0: constant 0. + auto block1 = pack_constant_block(4); // For pol1: constant 4. + for (size_t rep = 0; rep < 128; rep++) { + memcpy(input.data() + rep * 40, block0.data(), 40); + memcpy(input.data() + in_per_pol + rep * 40, block1.data(), 40); + } + + // Expected output: npol * nsamps = 8192 bytes. + // Output ordering is interleaved: [pol0, pol1, pol0, pol1, ...] + std::vector<char> expected(npol * nsamps, 0); + for (size_t s = 0; s < nsamps; s++) { + expected[s * 2] = 0; // from pol0. + expected[s * 2 + 1] = 1; // from pol1 (4 >> 2 equals 1). + } + + // Process using PolnMerger10to8. + std::vector<char> output(npol * nsamps, 0); + PolnMerger10to8 merger(npol, 2); // Using 2 threads. + merger.process(input.data(), output.data(), total_input_size); + + // Compare results. + ASSERT_EQ(output.size(), expected.size()); + for (size_t i = 0; i < output.size(); i++) { + ASSERT_EQ(static_cast<unsigned char>(output[i]), + static_cast<unsigned char>(expected[i])) + << "Mismatch at index " << i; + } + } + + // Test with varied input. + // For each block b in each heap: + // For pol0, v0 = (b * 3) mod 128. + // For pol1, v1 = ((b * 3 + 7) mod 128). + // Input blocks are packed with (v << 2) so that after conversion they become v. + void test_varied_input() { + const size_t npol = 2; + const size_t nsamps = 4096; // 4096 samples per polarization per heap. + const size_t blocks = 128; // 128 blocks per heap. + const size_t in_per_pol = blocks * 40; // 5120 bytes per polarization per heap. + size_t heaps = 2; + size_t total_input_size = heaps * npol * in_per_pol; // For 2 heaps. + + // Prepare expected output: for each heap, expected size is npol * nsamps = 8192 bytes. + std::vector<char> input(total_input_size, 0); + std::vector<char> expected(heaps * 8192, 0); + + for (size_t h = 0; h < heaps; h++) { + size_t heapBase = h * (npol * in_per_pol); + size_t outBase = h * 8192; // 8192 bytes per heap. + for (size_t b = 0; b < blocks; b++) { + // Compute varying values. + uint8_t v0 = (b * 3) % 128; + uint8_t v1 = ((b * 3 + 7) % 128); + // Pack each block with (v << 2) so that (>>2) recovers v. + std::array<uint8_t, 40> block0 = pack_constant_block(v0 << 2); + std::array<uint8_t, 40> block1 = pack_constant_block(v1 << 2); + // Copy into input vector. + memcpy(input.data() + heapBase + b * 40, block0.data(), 40); + memcpy(input.data() + heapBase + in_per_pol + b * 40, block1.data(), 40); + + // Each block produces 64 bytes: interleaved 32 pairs [v0, v1]. + for (size_t i = 0; i < 32; i++) { + size_t outIdx = outBase + b * 64 + 2 * i; + expected[outIdx] = v0; + expected[outIdx + 1] = v1; + } + } + } + + // Allocate output buffer. + size_t total_output_size = heaps * npol * nsamps; + std::vector<char> output(total_output_size, 0); + + // Process the input. + PolnMerger10to8 merger(npol, 2); + merger.process(input.data(), output.data(), input.size()); + + // Compare outputs. + ASSERT_EQ(output.size(), expected.size()); + for (size_t i = 0; i < output.size(); i++) { + ASSERT_EQ(static_cast<unsigned char>(output[i]), + static_cast<unsigned char>(expected[i])) + << "Mismatch at index " << i; + } + } +}; + +//--------------------------------------------------------------------- +// TEST MACROS for PolnMerger10to8 +//--------------------------------------------------------------------- +TEST(PolnMerge10to8Tester, ConstantInputDebug) { + PolnMerge10to8Tester tester; + tester.test_constant_input(); +} + +TEST(PolnMerge10to8Tester, VariedInputDebug) { + PolnMerge10to8Tester tester; + tester.test_varied_input(); +} +// Original tests for PFBMerger and PolnMerger remain in merge_tester.cpp. + +}}} // namespace psrdada_cpp::merger::test