u5e
UnicodeTextC++Library
basic_encodedstring.hpp
1 #ifndef INCLUDED_U5E_BASIC_ENCODEDSTRING
2 #define INCLUDED_U5E_BASIC_ENCODEDSTRING
3 
4 #include <iterator>
5 #include <u5e/codepoint.hpp>
6 #include <u5e/codepoint_traits.hpp>
7 #include <u5e/basic_grapheme_iterator.hpp>
8 #include <u5e/utf32ne.hpp>
9 
10 namespace u5e {
11  template <typename T> class basic_grapheme_iterator;
12 
13  /**
14  * \brief basic encoding support over string-like objects.
15  *
16  * u5e::basic_encodedstring implements encoding support on top of a
17  * string-like object, it is implemented by simply wrapping the
18  * native string type in order to provide a customized iterator
19  * that offers codepoint-by-codepoint access instead of iterating
20  * over the native type.
21  *
22  * \tparam Encoding Text is always represented in a specific
23  * encoding, there is no such thing as a "natural", or "native"
24  * representation of text, for that reason, the encoding is a part
25  * of the type.
26  *
27  * \tparam NativeString In order to re-use the string support,
28  * this will always be implemented as a wrapper around an
29  * native string-like type. The idea is that the C++ string
30  * libraries operate on unencoded memory, while the u5e types
31  * offer a layer on top of that for the purposes of implementing
32  * unicode in a type-safe way. Note that this applies to any
33  * 'string-like' object, such as string or string_view.
34  */
35  template <typename Encoding,
36  typename NativeString>
38  public:
39  //@{
40  /**
41  * Offer an interface such that the size of the thing you're
42  * iterating over is a codepoint, regardless of the native
43  * type.
44  */
50  typedef const value_type& const_reference;
51  typedef typename NativeString::pointer pointer;
52  typedef typename NativeString::const_pointer const_pointer;
53  //@}
54 
55  //@{
56  /**
57  * The Encoding template argument must provide iterator and
58  * const_iterator member types. Those should iterate over
59  * codepoints, regardless of the encoding and the native type.
60  *
61  * The iterator and const_iterator member types must be themselves
62  * templates that take the NativeString type as a template
63  * argument.
64  */
65  typedef typename Encoding::template iterator<NativeString>
67  typedef typename Encoding::template const_iterator<NativeString>
69  //@}
70 
71  //@{
72  /**
73  * Delegated to std::reverse_iterator
74  */
75  typedef std::reverse_iterator<iterator> reverse_iterator;
76  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
77  //@}
78 
79  /**
80  * \brief Raw buffer as specified by the native type.
81  *
82  * This means that this class is exactly as expensive as whichever
83  * native type is being used, it also means this class delegates
84  * all memory management to that native type.
85  *
86  * This member is public because you should be able to completely
87  * manage the native object if you need to.
88  */
89  NativeString native_string;
90 
91  /**
92  * Default constructor, delegated to the native type.
93  */
94  basic_encodedstring() = default;
95 
96  /**
97  * Implicit conversion from the native type.
98  */
99  basic_encodedstring(const NativeString& s)
100  : native_string(s) { };
101 
102  /**
103  * Assignment operator, assigns the native type.
104  */
106  operator= (const basic_encodedstring &other) {
107  native_string = other;
108  }
109 
110  //@{
111  /**
112  * Get begin and end native iterators.
113  */
114  inline typename NativeString::iterator native_begin() {
115  return native_string.begin();
116  }
117  inline typename NativeString::iterator native_end() {
118  return native_string.end();
119  }
120  inline typename NativeString::const_iterator native_cbegin() {
121  return native_string.cbegin();
122  }
123  inline typename NativeString::const_iterator native_cend() {
124  return native_string.cend();
125  }
126  //@}
127 
128  //@{
129  /**
130  * Get begin and end codepoint iterators.
131  */
133  return iterator(native_string.begin());
134  }
136  return iterator(native_string.end());
137  }
139  return const_iterator(native_string.cbegin());
140  }
142  return const_iterator(native_string.cend());
143  }
144  //@}
145 
146  //@{
147  /**
148  * Get begin and end grapheme iterators.
149  * Graphemes are always built from the const iterators, since graphemes
150  * are always immutable.
151  */
152  inline basic_grapheme_iterator<basic_encodedstring> grapheme_begin() {
153  basic_grapheme_iterator<basic_encodedstring> i(codepoint_cbegin(),
155  return i;
156  }
157  inline basic_grapheme_iterator<basic_encodedstring> grapheme_end() {
158  basic_grapheme_iterator<basic_encodedstring> i(codepoint_cbegin(),
161  return i;
162  }
163  //@}
164 
165  //@{
166  /**
167  * Append from input iterators.
168  *
169  * Note that this is only possible from iterators of the same
170  * encoding. This will not perform any conversion.
171  */
172  template <typename StorageType>
174  (
175  typename basic_encodedstring<Encoding, StorageType>::const_iterator first,
176  typename basic_encodedstring<Encoding, StorageType>::const_iterator last
177  ) {
178  native_string.append
179  (Encoding::template native_const_iterator<StorageType>(first),
180  Encoding::template native_const_iterator<StorageType>(last)
181  );
182  return *this;
183  }
184 
187  ) {
188  return append<NativeString>(first,last);
189  }
190 
191  template <typename StorageType>
193  (basic_grapheme_iterator<basic_encodedstring<Encoding, StorageType>>& first,
194  basic_grapheme_iterator<basic_encodedstring<Encoding, StorageType>>& last)
195  {
196  native_string.append((*first).codepoint_begin(),
197  (*last).codepoint_begin());
198  return *this;
199  }
200 
202  (basic_grapheme_iterator<basic_encodedstring>& first,
203  basic_grapheme_iterator<basic_encodedstring>& last) {
204  return append<NativeString>(first, last);
205  }
206 
207  template <typename StorageType>
209  (
210  typename basic_encodedstring<utf32ne, StorageType>::const_iterator first,
211  typename basic_encodedstring<utf32ne, StorageType>::const_iterator last
212  ) {
213  Encoding::append_from_utf32ne
214  (utf32ne::template native_const_iterator<StorageType>(first),
215  utf32ne::template native_const_iterator<StorageType>(last),
216  native_string);
217  return *this;
218  }
219  //@}
220 
221  };
222 
223 }
224 
225 #endif
NativeString::const_iterator native_cbegin()
Encoding::template const_iterator< NativeString > const_iterator
NativeString native_string
Raw buffer as specified by the native type.
main u5e namespace
NativeString::pointer pointer
Native representation of a codepoint.
Definition: codepoint.hpp:15
u5e::codepoint_traits::pos_type size_type
NativeString::const_pointer const_pointer
const value_type & const_reference
NativeString::iterator native_end()
basic_encodedstring & append(basic_grapheme_iterator< basic_encodedstring< Encoding, StorageType >> &first, basic_grapheme_iterator< basic_encodedstring< Encoding, StorageType >> &last)
basic_encodedstring & operator=(const basic_encodedstring &other)
u5e::codepoint_traits traits_type
basic_encodedstring(const NativeString &s)
NativeString::const_iterator native_cend()
Architecture-specific type to interface UTF32BE or UTF32LE.
Definition: utf32ne.hpp:20
basic_encodedstring & append(basic_grapheme_iterator< basic_encodedstring > &first, basic_grapheme_iterator< basic_encodedstring > &last)
basic_grapheme_iterator< basic_encodedstring > grapheme_end()
basic encoding support over string-like objects.
Encoding::template iterator< NativeString > iterator
basic_encodedstring & append_from_utf32ne(typename basic_encodedstring< utf32ne, StorageType >::const_iterator first, typename basic_encodedstring< utf32ne, StorageType >::const_iterator last)
std::reverse_iterator< const_iterator > const_reverse_iterator
basic_encodedstring & append(const_iterator first, const_iterator last)
basic_encodedstring & append(typename basic_encodedstring< Encoding, StorageType >::const_iterator first, typename basic_encodedstring< Encoding, StorageType >::const_iterator last)
NativeString::iterator native_begin()
basic_grapheme_iterator< basic_encodedstring > grapheme_begin()
Type information for codepoint.
u5e::codepoint_traits::off_type difference_type