byte-swapping - is this efficient?

I'm looking to implement host-to-network byte swapping (and vice-versa) which can be used seamlessly for any types.

Does anything already exist?

Is this following code efficient? Is there a better way to implement this?

At the moment it supports:
int8_t, uint8_t
int16_t, uint16_t
int32_t, uint32_t
int64_t, uint64_t
float, double

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include <boost/type_traits.hpp>
#include <boost/static_assert.hpp>
#include <boost/detail/endian.hpp>
#include <stdexcept>

enum endianness
{
    little_endian,
    big_endian,
    network_endian = big_endian,
    
    #if defined(BOOST_LITTLE_ENDIAN)
        host_endian = little_endian
    #elif defined(BOOST_BIG_ENDIAN)
        host_endian = big_endian
    #else
        #error "unable to determine system endianness."
    #endif
};

namespace detail {

template<typename T, size_t sz>
struct swap_bytes
{
    inline T operator()(T val)
    {
        throw std::out_of_range("data size");
    }
};

template<typename T>
struct swap_bytes<T, 2>
{
    inline T operator()(T val)
    {
        return ((((val) >> 8) & 0xff) | (((val) & 0xff) << 8));
    }
};

template<typename T>
struct swap_bytes<T, 4>
{
    inline T operator()(T val)
    {
        return ((((val) & 0xff000000) >> 24) |
                (((val) & 0x00ff0000) >>  8) |
                (((val) & 0x0000ff00) <<  8) |
                (((val) & 0x000000ff) << 24));
    }
};

template<>
struct swap_bytes<float, 4>
{
    inline float operator()(float val)
    {
        uint32_t mem = *(uint32_t*)&val;
        mem = swap_bytes<uint32_t, sizeof(uint32_t)>()(mem);
        return *(float*)&mem;
    }
};

template<typename T>
struct swap_bytes<T, 8>
{
    inline T operator()(T val)
    {
        return ((((val) & 0xff00000000000000ull) >> 56) |
                (((val) & 0x00ff000000000000ull) >> 40) |
                (((val) & 0x0000ff0000000000ull) >> 24) |
                (((val) & 0x000000ff00000000ull) >> 8 ) |
                (((val) & 0x00000000ff000000ull) << 8 ) |
                (((val) & 0x0000000000ff0000ull) << 24) |
                (((val) & 0x000000000000ff00ull) << 40) |
                (((val) & 0x00000000000000ffull) << 56));
    }
};

template<>
struct swap_bytes<double, 8>
{
    inline double operator()(double val)
    {
        uint64_t mem = *(uint64_t*)&val;
        mem = swap_bytes<uint64_t, sizeof(uint64_t)>()(mem);
        return *(double*)&mem;
    }
};

} // namespace detail

template<endianness from, endianness to, class T>
inline T byte_swap(T value)
{
    // ensure the data is only 2, 4 or 8 bytes
    BOOST_STATIC_ASSERT(sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
    // ensure we're only swapping arithmetic types
    BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);

    return (from == to) ? value : detail::swap_bytes<T, sizeof(T)>()(value);
}

template<endianness from, endianness to>
inline int8_t byte_swap(int8_t value)
{
    return value;
}

template<endianness from, endianness to>
inline uint8_t byte_swap(uint8_t value)
{
    return value;
}
Well, you're breaking the rules by including one of the detail/ headers directly...

But that aside, you should be able to optimize more by performing the from == to check at compile
time.

Maybe you can try to use FOX Toolkit's FXStream class?

(See http://www.fox-toolkit.org/serialization.html for more details)
Thanks (again) jsmith! :)

I can't find a non-detail place where endian.hpp is included

I've done a grep for endian.hpp through my boost source tree

./spirit/home/qi/binary/binary.hpp:14:#include <boost/spirit/home/support/detail/integer/endian.hpp>
./spirit/home/qi/binary/meta_grammar.hpp:16:#include <boost/spirit/home/support/detail/integer/endian.hpp>
./spirit/home/support/detail/integer/endian.hpp:1:// Boost endian.hpp header file (proposed) ----------------------------------//
./spirit/home/support/detail/integer/endian.hpp:22:#include <boost/detail/endian.hpp>
./spirit/home/support/detail/math/detail/fp_traits.hpp:20:#include <boost/detail/endian.hpp>
./spirit/home/karma/binary/binary.hpp:14:#include <boost/spirit/home/support/detail/integer/endian.hpp>
./spirit/home/karma/binary/meta_grammar.hpp:16:#include <boost/spirit/home/support/detail/integer/endian.hpp>
./detail/limits.hpp:49:#include <boost/detail/endian.hpp>
./detail/endian.hpp:69:# error The file boost/detail/endian.hpp needs to be set up for your CPU type.
./math/special_functions/detail/fp_traits.hpp:27:#include <boost/detail/endian.hpp>


Result line no 3: // Boost endian.hpp header file (proposed)

Should I take this as endian.hpp is not yet released?

Should I avoid using it?

I specialised on equal template parameters for the endianness to make that a compile-time check as you suggested - thanks.

~Steve
Your code is bugged.

You should always Mask AFTER you shift, not before. Otherwise you have problems with shifting the sign bit

Consider the following:

1
2
3
4
5
6
7
8
9
10
11
template<typename T>
struct swap_bytes<T, 4>
{
    inline T operator()(T val)
    {
        return ((((val) & 0xff000000) >> 24) |  // <- This line
                (((val) & 0x00ff0000) >>  8) |
                (((val) & 0x0000ff00) <<  8) |
                (((val) & 0x000000ff) << 24));
    }
};


Say for example you do the following:

1
2
3
4
int32_t foo = 0x80000000;
foo = byte_swap<xxx,yyy>( foo );

cout << hex << foo; // SURPRISE, prints "ffffff80" 
Last edited on
Are you sure?

1
2
3
4
5
6
7
8
9
10
    int32_t foo = 0x80000000;

    logging::cout << "swapping foo " << std::hex << foo << std::endl;
    cmt::utils::print_buf((char*)&foo, sizeof(foo));
    foo = byte_swap<network_endian, host_endian>(foo);
    logging::cout << "swapped foo network to host " << std::hex << foo << std::endl;
    cmt::utils::print_buf((char*)&foo, sizeof(foo));
    foo = byte_swap<host_endian, network_endian>(foo);
    logging::cout << "swapped foo host to network " << std::hex << foo << std::endl;
    cmt::utils::print_buf((char*)&foo, sizeof(foo));



swapping foo 80000000
0 0000 0080 ....
swapped foo network to host 80
0 8000 0000 ....
swapped foo host to network 80000000
0 0000 0080 ....
hrm....

I just tried it out and it looks like I was mistaken.

1
2
3
4
5
int food = 0xFF000000;

int a = (food & 0xFF000000) >> 24;    // 0x000000FF
// however...
int b = food >> 24;  // 0xFFFFFFFF 


I guess the 0xFF000000 casts the operation to an unsigned type, even if foo is signed.

So I guess it's not bugged. I'd still be weary though, but maybe I'm just paranoid.
Last edited on
Final code - tests show it to be pretty efficient...

I also made use of bultin swap if using gcc - my tests show a marginal improvement in efficiency (like REALLY marginal, over 10 000 000 iterations)

Thanks for the help folks

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#include <boost/type_traits.hpp>
#include <boost/static_assert.hpp>
#include <boost/detail/endian.hpp>
#include <stdexcept>

// Little-endian operating systems:
//---------------------------------
// Linux on x86, x64, Alpha and Itanium
// Mac OS on x86, x64
// Solaris on x86, x64, PowerPC
// Tru64 on Alpha
// Windows on x86, x64 and Itanium

// Big-endian operating systems:
//------------------------------
// AIX on POWER
// AmigaOS on PowerPC and 680x0
// HP-UX on Itanium and PA-RISC
// Linux on MIPS, SPARC, PA-RISC, POWER, PowerPC, and 680x0
// Mac OS on PowerPC and 680x0
// Solaris on SPARC

enum endianness
{
    little_endian,
    big_endian,
    network_endian = big_endian,
    
    #if defined(BOOST_LITTLE_ENDIAN)
        host_endian = little_endian
    #elif defined(BOOST_BIG_ENDIAN)
        host_endian = big_endian
    #else
        #error "unable to determine system endianness"
    #endif
};

namespace detail {

template<typename T, size_t sz>
struct swap_bytes
{
    inline T operator()(T val)
    {
        throw std::out_of_range("data size");
    }
};

template<typename T>
struct swap_bytes<T, 1>
{
    inline T operator()(T val)
    {
        return val;
    }
};

template<typename T>
struct swap_bytes<T, 2> // for 16 bit
{
    inline T operator()(T val)
    {
        return ((((val) >> 8) & 0xff) | (((val) & 0xff) << 8));
    }
};

template<typename T>
struct swap_bytes<T, 4> // for 32 bit
{
    inline T operator()(T val)
    {
        #if defined(_USE_BUILTIN_BSWAPS) && defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4)
            return __builtin_bswap32(val);
        #else
            return ((((val) & 0xff000000) >> 24) |
                    (((val) & 0x00ff0000) >>  8) |
                    (((val) & 0x0000ff00) <<  8) |
                    (((val) & 0x000000ff) << 24));
        #endif
    }
};

template<>
struct swap_bytes<float, 4>
{
    inline float operator()(float val)
    {
        uint32_t mem = swap_bytes<uint32_t, sizeof(uint32_t)>()(*(uint32_t*)&val);
        return *(float*)&mem;
    }
};

template<typename T>
struct swap_bytes<T, 8> // for 64 bit
{
    inline T operator()(T val)
    {
        #if defined(_USE_BUILTIN_BSWAPS) && defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4)
            return __builtin_bswap64(val);
        #else
            return ((((val) & 0xff00000000000000ull) >> 56) |
                    (((val) & 0x00ff000000000000ull) >> 40) |
                    (((val) & 0x0000ff0000000000ull) >> 24) |
                    (((val) & 0x000000ff00000000ull) >> 8 ) |
                    (((val) & 0x00000000ff000000ull) << 8 ) |
                    (((val) & 0x0000000000ff0000ull) << 24) |
                    (((val) & 0x000000000000ff00ull) << 40) |
                    (((val) & 0x00000000000000ffull) << 56));
        #endif
    }
};

template<>
struct swap_bytes<double, 8>
{
    inline double operator()(double val)
    {
        uint64_t mem = swap_bytes<uint64_t, sizeof(uint64_t)>()(*(uint64_t*)&val);
        return *(double*)&mem;
    }
};

template<endianness from, endianness to, class T>
struct do_byte_swap
{
    inline T operator()(T value)
    {
        return swap_bytes<T, sizeof(T)>()(value);
    }
};
// specialisations when attempting to swap to the same endianess
template<class T> struct do_byte_swap<little_endian, little_endian, T> { inline T operator()(T value) { return value; } };
template<class T> struct do_byte_swap<big_endian,    big_endian,    T> { inline T operator()(T value) { return value; } };

} // namespace detail

template<endianness from, endianness to, class T>
inline T byte_swap(T value)
{
    // ensure the data is only 1, 2, 4 or 8 bytes
    BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
    // ensure we're only swapping arithmetic types
    BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);

    return detail::do_byte_swap<from, to, T>()(value);
}

Are you sure the builtin swap is actually used? My tests show that it is much faster. To be precise, with the following testing method each loop iteration takes exactly two CPU cycles with the builtin bswap. With the manual swap it takes six CPU cycles per iteration. So bswap is estimatedly at least five times faster (at least on my Phenom II X4).

1
2
3
  int sum=0;
  for (int i=0;i<2000000000;i++)sum+=byte_swap<little_endian,big_endian>(i);
  return sum;
Topic archived. No new replies allowed.