diff --git a/psrdada_cpp/merger/src/merger.cpp b/psrdada_cpp/merger/src/merger.cpp
index 7d844abb96108775d82d882b8a823c39160c421e..6c184dbd18305795dba4e08783e9464e595cf0f4 100644
--- a/psrdada_cpp/merger/src/merger.cpp
+++ b/psrdada_cpp/merger/src/merger.cpp
@@ -14,7 +14,7 @@ namespace {
     uint64_t interleave(uint32_t x, uint32_t y) {
         __m128i xvec = _mm_cvtsi32_si128(x);
         __m128i yvec = _mm_cvtsi32_si128(y);
-        __m128i interleaved = _mm_unpacklo_epi8(yvec, xvec);
+        __m128i interleaved = _mm_unpacklo_epi8(xvec, yvec);
         return _mm_cvtsi128_si64(interleaved);
     }
 
@@ -228,11 +228,10 @@ void PolnMerger10to8::process(char* idata, char* odata, std::size_t size)
             uint32_t* S1 = reinterpret_cast<uint32_t*>(S1_8bit);
             for (std::size_t ii = 0; ii < 8; ++ii)
             {
-                *D++ = interleave(S1[ii], S0[ii]);
+                *D++ = interleave(S0[ii], S1[ii]);
             }
         }
     }
 }
-
-} // namespace merger
-} // namespace psrdada_cpp
+}
+}
diff --git a/psrdada_cpp/merger/test/src/merge_tester.cpp b/psrdada_cpp/merger/test/src/merge_tester.cpp
index ff69cb26d504b284c96db36abcd6a475aaabd99c..edc0336ea51a37e154090fc5b95cff043d403940 100644
--- a/psrdada_cpp/merger/test/src/merge_tester.cpp
+++ b/psrdada_cpp/merger/test/src/merge_tester.cpp
@@ -1,5 +1,54 @@
 #include "psrdada_cpp/merger/test/merge_tester.hpp"
+#include "psrdada_cpp/merger/merger.hpp"
+#include <gtest/gtest.h>
+#include <vector>
+#include <array>
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include <iomanip>
+#include <sstream>
 
+namespace {
+
+// Helper function: packs a constant 10-bit value X into a 40-byte block.
+// The block is built by writing 32 consecutive 10-bit fields (each equal to X)
+// in big-endian order (starting at the most-significant bit).
+static std::array<uint8_t, 40> pack_constant_block(uint16_t X) {
+    std::array<uint8_t, 40> block = {0};
+    int bitPos = 0;
+    for (int i = 0; i < 32; i++) {
+        for (int b = 0; b < 10; b++) {
+            int bitVal = (X >> (9 - b)) & 1;
+            int pos = bitPos + b;
+            int byteIndex = pos / 8;
+            int bitInByte = 7 - (pos % 8);
+            if (bitVal)
+                block[byteIndex] |= (1 << bitInByte);
+        }
+        bitPos += 10;
+    }
+    return block;
+}
+
+// Optional: Utility function to print a vector of char in hexadecimal (for debugging).
+static void printVectorHex(const std::vector<char>& vec, const std::string &label) {
+    std::cout << label << " (" << vec.size() << " bytes):\n";
+    const size_t bytesPerLine = 32;
+    for (size_t i = 0; i < vec.size(); i++) {
+        if (i % bytesPerLine == 0) {
+            std::cout << std::setw(4) << std::setfill('0') << i << ": ";
+        }
+        std::cout << std::hex << std::setw(2) << std::setfill('0')
+                  << (static_cast<unsigned int>(static_cast<unsigned char>(vec[i]))) << " ";
+        if ((i + 1) % bytesPerLine == 0)
+            std::cout << std::dec << "\n";
+    }
+    if (vec.size() % bytesPerLine != 0)
+        std::cout << std::dec << "\n";
+}
+
+} // end anonymous namespace
 
 namespace psrdada_cpp {
 namespace merger {
@@ -135,8 +184,123 @@ INSTANTIATE_TEST_SUITE_P(ParameterizedTest, PolnMergeTester, ::testing::Values(
 ));
 
 
-} // namespace test
-} // namespace merger
-} // namespace psrdada_cpp
+//---------------------------------------------------
+// New tester class for PolnMerger10to8
+//---------------------------------------------------
+class PolnMerge10to8Tester {
+public:
+    // Test with constant input.
+    // For pol0 each block is filled with constant 10-bit value 0,
+    // and for pol1 each block is filled with constant 10-bit value 4 (which converts to 1 after >>2).
+    void test_constant_input() {
+        const size_t npol = 2;
+        const size_t nsamps = 4096;              // samples per polarization.
+        const size_t in_per_pol = 128 * 40;        // 5120 bytes per polarization.
+        size_t total_input_size = npol * in_per_pol; // 10240 bytes total.
+
+        // Build input vector with layout: [pol0 | pol1]
+        std::vector<char> input(total_input_size, 0);
+        auto block0 = pack_constant_block(0);      // For pol0: constant 0.
+        auto block1 = pack_constant_block(4);      // For pol1: constant 4.
+        for (size_t rep = 0; rep < 128; rep++) {
+            memcpy(input.data() + rep * 40, block0.data(), 40);
+            memcpy(input.data() + in_per_pol + rep * 40, block1.data(), 40);
+        }
+
+        // Expected output: npol * nsamps = 8192 bytes.
+        // Output ordering is interleaved: [pol0, pol1, pol0, pol1, ...]
+        std::vector<char> expected(npol * nsamps, 0);
+        for (size_t s = 0; s < nsamps; s++) {
+            expected[s * 2]     = 0;  // from pol0.
+            expected[s * 2 + 1] = 1;  // from pol1 (4 >> 2 equals 1).
+        }
+
+        // Process using PolnMerger10to8.
+        std::vector<char> output(npol * nsamps, 0);
+        PolnMerger10to8 merger(npol, 2);  // Using 2 threads.
+        merger.process(input.data(), output.data(), total_input_size);
+
+        // Compare results.
+        ASSERT_EQ(output.size(), expected.size());
+        for (size_t i = 0; i < output.size(); i++) {
+            ASSERT_EQ(static_cast<unsigned char>(output[i]),
+                      static_cast<unsigned char>(expected[i]))
+                << "Mismatch at index " << i;
+        }
+    }
+
+    // Test with varied input.
+    // For each block b in each heap:
+    //   For pol0, v0 = (b * 3) mod 128.
+    //   For pol1, v1 = ((b * 3 + 7) mod 128).
+    // Input blocks are packed with (v << 2) so that after conversion they become v.
+    void test_varied_input() {
+        const size_t npol = 2;
+        const size_t nsamps = 4096;             // 4096 samples per polarization per heap.
+        const size_t blocks = 128;              // 128 blocks per heap.
+        const size_t in_per_pol = blocks * 40;    // 5120 bytes per polarization per heap.
+        size_t heaps = 2;
+        size_t total_input_size = heaps * npol * in_per_pol;  // For 2 heaps.
+
+        // Prepare expected output: for each heap, expected size is npol * nsamps = 8192 bytes.
+        std::vector<char> input(total_input_size, 0);
+        std::vector<char> expected(heaps * 8192, 0);
+
+        for (size_t h = 0; h < heaps; h++) {
+            size_t heapBase = h * (npol * in_per_pol);
+            size_t outBase  = h * 8192; // 8192 bytes per heap.
+            for (size_t b = 0; b < blocks; b++) {
+                // Compute varying values.
+                uint8_t v0 = (b * 3) % 128;
+                uint8_t v1 = ((b * 3 + 7) % 128);
+                // Pack each block with (v << 2) so that (>>2) recovers v.
+                std::array<uint8_t, 40> block0 = pack_constant_block(v0 << 2);
+                std::array<uint8_t, 40> block1 = pack_constant_block(v1 << 2);
+                // Copy into input vector.
+                memcpy(input.data() + heapBase + b * 40, block0.data(), 40);
+                memcpy(input.data() + heapBase + in_per_pol + b * 40, block1.data(), 40);
+
+                // Each block produces 64 bytes: interleaved 32 pairs [v0, v1].
+                for (size_t i = 0; i < 32; i++) {
+                    size_t outIdx = outBase + b * 64 + 2 * i;
+                    expected[outIdx]     = v0;
+                    expected[outIdx + 1] = v1;
+                }
+            }
+        }
+
+        // Allocate output buffer.
+        size_t total_output_size = heaps * npol * nsamps;
+        std::vector<char> output(total_output_size, 0);
+
+        // Process the input.
+        PolnMerger10to8 merger(npol, 2);
+        merger.process(input.data(), output.data(), input.size());
+
+        // Compare outputs.
+        ASSERT_EQ(output.size(), expected.size());
+        for (size_t i = 0; i < output.size(); i++) {
+            ASSERT_EQ(static_cast<unsigned char>(output[i]),
+                      static_cast<unsigned char>(expected[i]))
+                << "Mismatch at index " << i;
+        }
+    }
+};
+
+//---------------------------------------------------------------------
+// TEST MACROS for PolnMerger10to8
+//---------------------------------------------------------------------
+TEST(PolnMerge10to8Tester, ConstantInputDebug) {
+    PolnMerge10to8Tester tester;
+    tester.test_constant_input();
+}
+
+TEST(PolnMerge10to8Tester, VariedInputDebug) {
+    PolnMerge10to8Tester tester;
+    tester.test_varied_input();
+}
 
+// Original tests for PFBMerger and PolnMerger remain in merge_tester.cpp.
+  
+}}} // namespace psrdada_cpp::merger::test