u5e
UnicodeTextC++Library
utf8.hpp
1 #ifndef INCLUDED_U5E_UTF8
2 #define INCLUDED_U5E_UTF8
3 
4 #include <iterator>
5 #include <cstring>
6 #include <type_traits>
7 #include <u5e/codepoint.hpp>
8 #include <u5e/encoding_assertion.hpp>
9 #include <u5e/utf8_iterator.hpp>
10 
11 
12 namespace u5e {
13  /**
14  * \brief Encoding type for UTF8 text.
15  * Unlike UTF16 and UTF32, UTF8 is endian independent.
16  */
17  class utf8 {
18  public:
19 
20  /**
21  * Delegated to utf8_iterator of the native type.
22  * \tparam NativeString the native string type with utf8 data
23  */
24  template <typename NativeString>
25  using iterator =
26  utf8_iterator<typename NativeString::iterator>;
27 
28  /**
29  * Delegated to utf8_const_iterator of the native type
30  * \tparam NativeString the native string type with utf8 data
31  */
32  template <typename NativeString>
33  using const_iterator =
34  utf8_const_iterator<typename NativeString::const_iterator>;
35 
36  /**
37  * Get access to the native const_iterator with the native data.
38  */
39  template <typename NativeString>
40  static typename NativeString::const_iterator
42  (utf8_const_iterator<typename NativeString::const_iterator> it) {
43  it.rewind_to_start_of_codepoint(*(it.raw_iterator_));
44  return it.raw_iterator_;
45  }
46 
47  template <typename InputNativeIterator, typename OutputNativeString>
48  static void append_from_utf32ne
49  (InputNativeIterator first, InputNativeIterator last,
50  OutputNativeString& output) {
51  while (first != last) {
52  codepoint c = *first;
53  char buf[6] = {}; // utf8 codepoint is never bigger than 6 chars
54  utf8_iterator<char*> o_begin(buf);
55  utf8_iterator<char*> o_i = o_begin;
56  *o_i = c;
57  ++o_i;
58  output.append(o_begin.raw_iterator_, o_i.raw_iterator_);
59  ++first;
60  }
61  }
62 
63  };
64 }
65 
66 #endif
proxyobject & operator=(const codepoint c)
utf8_iterator(const NativeIterator raw_iterator)
main u5e namespace
Native representation of a codepoint.
Definition: codepoint.hpp:15
mutable utf8 iterator
const iterator for utf8 encoded strings.
proxyobject operator*()
utf8_iterator & operator++()
Encoding type for UTF8 text. Unlike UTF16 and UTF32, UTF8 is endian independent.
Definition: utf8.hpp:17
static NativeString::const_iterator native_const_iterator(utf8_const_iterator< typename NativeString::const_iterator > it)
Definition: utf8.hpp:42