is my custom transformation iterator ok?

Hi all, here's another really weird question for you all.

I'm playing around with Unicode encodings (UTF-8, UTF-16, and UTF-32), and I have written some simple routines to encode/decode using iterators. For example:
1
2
3
4
5
6
    template <typename InputWordIterator>
    uchar decode_utf16(
      const InputWordIterator& begin, InputWordIterator end
      ) {
      ...
      }

I did this with the idea in mind that I can create a custom iterator class that will handle endianness transformation from a given bytestream iterator.

However, now that I'm trying to write the custom iterator class, I think I'm over my head.

I'd like for it to have the following characteristics:

»
Have the same iterator_category as the reference iterator
»
Have a uchar (a 32-bit Unicode character type) value_type
»
Properly pointer/reference said type to affect the reference iterator data
»
Properly interface with the STL and std::iterator_traits <>

I admit I don't understand the inner workings of iterators that well. This is what I have so far:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
    //------------------------------------------------------------------------
    // This class handles 'uchar' references for the endianness transformation
    // iterator class that follows.
    //
    template <bool IsBigEndian, unsigned Size, typename Iterator>
    struct uchar_reference_t
      {
      Iterator& iter;

      //......................................................................
      uchar_reference_t( Iterator& iter ):
        iter( iter )
        { }

      //......................................................................
      operator uchar() const
        {
        Iterator* p_iter = &iter;
        uchar     result = 0;
        if (IsBigEndian)
          for (unsigned n = 0; n < Size; n++)
            result = (result << 8) | **p_iter++;
        else
          for (unsigned n = 0; n < Size; n++)
            result = ((**p_iter++) << (n * 8)) | result;
        return result;
        }

      //......................................................................
      uchar_reference_t& operator = ( uchar value )
        {
        Iterator* p_iter = &iter;
        if (IsBigEndian)
          for (unsigned n = 0; n < Size; n++)
            **p_iter++ = (value >> ((Size - 1 - n) * 8)) & 0xFF;
        else
          for (unsigned n = 0; n < Size; n++)
            **p_iter++ = (value >> (n * 8)) & 0xFF;
        return *this;
        }
      };

    //------------------------------------------------------------------------
    // This class transforms a bytestream iterator into a word or doubleword
    // iterator. The bytestream has a known endianness (big or little). The
    // resulting iterator returns whole values (in host endian order, of
    // course).
    //
    template <bool IsBigEndian, unsigned Size, typename Iterator>
    class endian_iterator:
      public std::iterator <
        typename Iterator::iterator_category,
        uchar,
        typename Iterator::difference_type,
        uchar_reference_t <IsBigEndian, Size, Iterator> *,
        uchar_reference_t <IsBigEndian, Size, Iterator> &
        >
      {
      public:
        typedef uchar_reference_t <IsBigEndian, Size, Iterator> uchar_ref_type;
        typedef endian_iterator   <IsBigEndian, Size, Iterator> this_iterator_type;
        typedef Iterator                                        reference_iterator_type;

        typedef typename Iterator::iterator_category            iterator_category;
        typedef uchar                                           value_type;
        typedef typename Iterator::difference_type              difference_type;
        typedef uchar_ref_type*                                 pointer;
        typedef uchar_ref_type&                                 reference;

        endian_iterator( Iterator& iter = Iterator() ):
          uref( iter )
          { }

        endian_iterator( const this_iterator_type& iter ):
          uref( iter.uref )
          { }

        this_iterator_type& operator = ( const this_iterator_type& iter )
          {
          uref = iter.uref;
          return *this;
          }

        this_iterator_type& operator ++ () // ++prefix
          {
          for (unsigned n = 0; n < Size; n++) ++uref.iter;
          return *this;
          }
        this_iterator_type operator ++ (int) // postfix++
          {
          this_iterator_type result( *this );
          for (unsigned n = 0; n < Size; n++) uref.iter++;
          return result;
          }

        this_iterator_type& operator -- () // --prefix
          {
          for (unsigned n = 0; n < Size; n++) --uref.iter;
          return *this;
          }
        this_iterator_type operator -- (int) // postfix--
          {
          this_iterator_type result( *this );
          for (unsigned n = 0; n < Size; n++) uref.iter--;
          return result;
          }

        uchar_ref_type& operator *  () { return  uref; }
        uchar_ref_type* operator -> () { return &uref; }

        this_iterator_type  operator +  ( difference_type n ) { return endian_iterator( uref.iter + (n * Size) ); }
        this_iterator_type  operator -  ( difference_type n ) { return endian_iterator( uref.iter - (n * Size) ); }
        this_iterator_type& operator += ( difference_type n ) { uref.iter += n * Size; }
        this_iterator_type& operator -= ( difference_type n ) { uref.iter -= n * Size; }

        bool operator == ( const this_iterator_type& iter ) { return uref.iter == iter.uref.iter; }
        bool operator != ( const this_iterator_type& iter ) { return uref.iter != iter.uref.iter; }
        bool operator <  ( const this_iterator_type& iter ) { return uref.iter < iter.uref.iter; }
        bool operator >  ( const this_iterator_type& iter ) { return uref.iter > iter.uref.iter; }
        bool operator <= ( const this_iterator_type& iter ) { return !(operator > ( iter )); }
        bool operator >= ( const this_iterator_type& iter ) { return !(operator < ( iter )); }

        uchar_ref_type& operator [] ( difference_type n ) { return uchar_ref_type( uref.iter + (n * Size) ); }

      private:
        uchar_ref_type uref;
      };

    #define ADDOP( op ) \
      template <bool IsBigEndian, unsigned Size, typename Iterator> \
      endian_iterator <IsBigEndian, Size, Iterator> \
      operator op ( \
        typename Iterator::difference_type                   n, \
        const endian_iterator <IsBigEndian, Size, Iterator>& iter \
        ) { \
        return iter op n; \
        }
    ADDOP( + )
    ADDOP( - )
    #undef ADDOP

    #define ITER( name, isbe, size ) \
      template <typename Iterator> \
      struct name ## _iterator: public endian_iterator <isbe, size, Iterator>     \
        {                                                                         \
        typedef uchar_reference_t <isbe, size, Iterator> uchar_ref_type;          \
        typedef endian_iterator   <isbe, size, Iterator> this_iterator_type;      \
        typedef Iterator                                 reference_iterator_type; \
                                                                                  \
        typedef typename Iterator::iterator_category  iterator_category;          \
        typedef uchar                                 value_type;                 \
        typedef typename Iterator::difference_type    difference_type;            \
        typedef uchar_ref_type*                       pointer;                    \
        typedef uchar_ref_type&                       reference;                  \
        };
    ITER( utf16be, true,  2 )
    ITER( utf16le, false, 2 )
    ITER( utf32be, true,  4 )
    ITER( utf32le, false, 4 )
    #undef ITER 

I'm not entirely certain about line 109.

Also, I'm not sure if I got everything const-correct. (It compiles fine with "g++ -Wall -ansi -pedantic".)

Any insights on how to improve the code or any errors I don't see?

Thanks!
comparsion operators
bool operator == ( const this_iterator_type& iter ) { return uref.iter == iter.uref.iter; }
etc..., and
1
2
this_iterator_type  operator +  ( difference_type n ) { return endian_iterator( uref.iter + (n * Size) ); }
this_iterator_type  operator -  ( difference_type n ) { return endian_iterator( uref.iter - (n * Size) ); }

should be const.

I think you should also have a const pair for
1
2
3
4
5
uchar_ref_type& operator *  () { return  uref; }
uchar_ref_type* operator -> () { return &uref; }

const uchar_ref_type& operator *  () const { return  uref; }
const uchar_ref_type* operator -> () const { return &uref; }

and for
1
2
uchar_ref_type& operator [] ( difference_type n ) { return uchar_ref_type( uref.iter + (n * Size) ); }
const uchar_ref_type& operator [] ( difference_type n ) const { return uchar_ref_type( uref.iter + (n * Size) ); }
Last edited on
Yoinks! You're right! I'll fix those above here:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
    //------------------------------------------------------------------------
    // This class handles 'uchar' references for the endianness transformation
    // iterator class that follows.
    //
    template <bool IsBigEndian, unsigned Size, typename Iterator>
    struct uchar_reference_t
      {
      Iterator& iter;

      //......................................................................
      uchar_reference_t( Iterator& iter ):
        iter( iter )
        { }

      //......................................................................
      operator uchar() const
        {
        Iterator* p_iter = &iter;
        uchar     result = 0;
        if (IsBigEndian)
          for (unsigned n = 0; n < Size; n++)
            result = (result << 8) | **p_iter++;
        else
          for (unsigned n = 0; n < Size; n++)
            result = ((**p_iter++) << (n * 8)) | result;
        return result;
        }

      //......................................................................
      uchar_reference_t& operator = ( uchar value )
        {
        Iterator* p_iter = &iter;
        if (IsBigEndian)
          for (unsigned n = 0; n < Size; n++)
            **p_iter++ = (value >> ((Size - 1 - n) * 8)) & 0xFF;
        else
          for (unsigned n = 0; n < Size; n++)
            **p_iter++ = (value >> (n * 8)) & 0xFF;
        return *this;
        }
      };

    //------------------------------------------------------------------------
    // This class transforms a bytestream iterator into a word or doubleword
    // iterator. The bytestream has a known endianness (big or little). The
    // resulting iterator returns whole values (in host endian order, of
    // course).
    //
    template <bool IsBigEndian, unsigned Size, typename Iterator>
    class endian_iterator:
      public std::iterator <
        typename Iterator::iterator_category,
        uchar,
        typename Iterator::difference_type,
        uchar_reference_t <IsBigEndian, Size, Iterator> *,
        uchar_reference_t <IsBigEndian, Size, Iterator> &
        >
      {
      public:
        typedef uchar_reference_t <IsBigEndian, Size, Iterator> uchar_ref_type;
        typedef endian_iterator   <IsBigEndian, Size, Iterator> this_iterator_type;
        typedef Iterator                                        reference_iterator_type;

        typedef typename Iterator::iterator_category            iterator_category;
        typedef uchar                                           value_type;
        typedef typename Iterator::difference_type              difference_type;
        typedef uchar_ref_type*                                 pointer;
        typedef uchar_ref_type&                                 reference;

        endian_iterator( Iterator& iter = Iterator() ):
          uref( iter )
          { }

        endian_iterator( const this_iterator_type& iter ):
          uref( iter.uref )
          { }

        this_iterator_type& operator = ( const this_iterator_type& iter )
          {
          uref = iter.uref;
          return *this;
          }

        this_iterator_type& operator ++ () // ++prefix
          {
          for (unsigned n = 0; n < Size; n++) ++uref.iter;
          return *this;
          }
        this_iterator_type operator ++ (int) // postfix++
          {
          this_iterator_type result( *this );
          for (unsigned n = 0; n < Size; n++) uref.iter++;
          return result;
          }

        this_iterator_type& operator -- () // --prefix
          {
          for (unsigned n = 0; n < Size; n++) --uref.iter;
          return *this;
          }
        this_iterator_type operator -- (int) // postfix--
          {
          this_iterator_type result( *this );
          for (unsigned n = 0; n < Size; n++) uref.iter--;
          return result;
          }

        uchar_ref_type& operator *  () { return  uref; }
        uchar_ref_type* operator -> () { return &uref; }

        const uchar_ref_type& operator *  () const { return  uref; }
        const uchar_ref_type* operator -> () const { return &uref; }

        this_iterator_type  operator +  ( difference_type n ) const { return endian_iterator( uref.iter + (n * Size) ); }
        this_iterator_type  operator -  ( difference_type n ) const { return endian_iterator( uref.iter - (n * Size) ); }
        this_iterator_type& operator += ( difference_type n )       { uref.iter += n * Size; }
        this_iterator_type& operator -= ( difference_type n )       { uref.iter -= n * Size; }

        bool operator == ( const this_iterator_type& iter ) const { return uref.iter == iter.uref.iter; }
        bool operator != ( const this_iterator_type& iter ) const { return uref.iter != iter.uref.iter; }
        bool operator <  ( const this_iterator_type& iter ) const { return uref.iter < iter.uref.iter; }
        bool operator >  ( const this_iterator_type& iter ) const { return uref.iter > iter.uref.iter; }
        bool operator <= ( const this_iterator_type& iter ) const { return !(operator > ( iter )); }
        bool operator >= ( const this_iterator_type& iter ) const { return !(operator < ( iter )); }

        uchar_ref_type&       operator [] ( difference_type n )       { return uchar_ref_type( uref.iter + (n * Size) ); }
        const uchar_ref_type& operator [] ( difference_type n ) const { return uchar_ref_type( uref.iter + (n * Size) ); }

      private:
        uchar_ref_type uref;
      };

    #define ADDOP( op ) \
      template <bool IsBigEndian, unsigned Size, typename Iterator> \
      endian_iterator <IsBigEndian, Size, Iterator> \
      operator op ( \
        typename Iterator::difference_type                   n, \
        const endian_iterator <IsBigEndian, Size, Iterator>& iter \
        ) { \
        return iter op n; \
        }
    ADDOP( + )
    ADDOP( - )
    #undef ADDOP

    #define ITER( name, isbe, size ) \
      template <typename Iterator> \
      struct name ## _iterator: public endian_iterator <isbe, size, Iterator>     \
        {                                                                         \
        typedef uchar_reference_t <isbe, size, Iterator> uchar_ref_type;          \
        typedef endian_iterator   <isbe, size, Iterator> this_iterator_type;      \
        typedef Iterator                                 reference_iterator_type; \
                                                                                  \
        typedef typename Iterator::iterator_category  iterator_category;          \
        typedef uchar                                 value_type;                 \
        typedef typename Iterator::difference_type    difference_type;            \
        typedef uchar_ref_type*                       pointer;                    \
        typedef uchar_ref_type&                       reference;                  \
        };
    ITER( utf16be, true,  2 )
    ITER( utf16le, false, 2 )
    ITER( utf32be, true,  4 )
    ITER( utf32le, false, 4 )
    #undef ITER 
Last edited on
I noticed another error which I've just corrected (after staring at mind-bogglingly long GCC template error messages for a while)

1
2
3
4
5
6
    template <typename InputWordIterator>
    uchar decode_utf16(
      /*not const*/ InputWordIterator& begin, InputWordIterator end
      ) {
      ...
      }


Those who are still looking, does the structure of my class seem simple and correct? Is it right to have that uchar_reference_t like that? Is there a simpler way to do this I am too dense to see right now?
I've also learned something about std::iterator_traits. For example, line 52 is changed from:

52 typename Iterator::iterator_category,

to:

52 typename std::iterator_traits <Iterator> ::iterator_category,

Now it will work with Iterator = char* and the like...
Topic archived. No new replies allowed.