1 #ifndef INCLUDED_U5E_UTF8_ITERATOR 2 #define INCLUDED_U5E_UTF8_ITERATOR 6 #include <u5e/codepoint.hpp> 7 #include <u5e/iterator_assertion.hpp> 8 #include <u5e/utf8_util.hpp> 16 template <
typename NativeIterator>
94 unsigned char mask_first_octet = ~(0xFF<<(7-size));
95 int value = (first_octet & mask_first_octet);
97 value = value<<6 | (*(++copy_) & 0b00111111);
109 template <
typename NativeIterator>
135 this->forward_one_codepoint();
151 this->rewind_one_codepoint();
169 c = *(copy.raw_iterator_);
170 copy.rewind_to_start_of_codepoint(c);
171 c = *(copy.raw_iterator_);
174 if (copy.raw_iterator_ == rhs.raw_iterator_) {
177 ++(copy.raw_iterator_);
184 return !(*
this == rhs);
192 return this->current_codepoint();
206 template <
typename NativeIterator>
232 this->forward_one_codepoint();
248 this->rewind_one_codepoint();
266 c = *(copy.raw_iterator_);
267 copy.rewind_to_start_of_codepoint(c);
268 c = *(copy.raw_iterator_);
271 if (copy.raw_iterator_ == rhs.raw_iterator_) {
274 ++(copy.raw_iterator_);
281 return !(*
this == rhs);
306 value = copy.current_codepoint().value;
320 *(ref.raw_iterator_) = (value & 0xFF);
322 unsigned char first_octet = (0xFF<<(8-size));
323 first_octet |= ((value>>((size-1)*6)) & 0xFF);
324 *(ref.raw_iterator_) = first_octet;
326 unsigned char octet = 0b10000000;
327 octet |= ((value>>((size-1)*6)) & 0b00111111);
329 *(ref.raw_iterator_) = octet;
NativeIterator raw_iterator_
bool operator==(const utf8_iterator &rhs) const
proxyobject & operator=(const codepoint c)
utf8_const_iterator(const utf8_const_iterator &tocopy)
utf8_iterator(const NativeIterator raw_iterator)
utf8_iterator(const utf8_iterator &tocopy)
std::bidirectional_iterator_tag iterator_category
Native representation of a codepoint.
utf8_const_iterator operator++(int junk)
const iterator for utf8 encoded strings.
Defines the basic inner workings of utf8 iterator.
bool rewind_to_start_of_codepoint(const char current_octet)
utf8_const_iterator(const NativeIterator raw_iterator)
static int encoded_size(int value)
iterator_assertion< NativeIterator, char > _assertions
utf8_const_iterator operator--(int junk)
utf8_iterator_base(const NativeIterator raw_iterator)
utf8_iterator & operator++()
void rewind_one_codepoint()
bool operator==(const utf8_const_iterator &rhs) const
utf8_const_iterator & operator++()
utf8_iterator operator++(int junk)
utf8_const_iterator pointer
offers write access to the iterator at a given position
static bool is_7bit_character(const char octet)
void forward_one_codepoint()
static bool is_codepoint_continuation(const char octet)
static int codepoint_size(const char first_octet)
const codepoint current_codepoint()
Basic operations necessary for implementing utf8.
bool operator!=(const utf8_const_iterator &rhs) const
proxyobject(utf8_iterator< NativeIterator > &refin)
utf8_iterator operator--(int junk)
utf8_iterator & operator--()
bool operator!=(const utf8_iterator &rhs) const
Asserts the iterator is consistently defined.
utf8_const_iterator & operator--()
const codepoint operator*()
codepoint_traits::int_type value
const codepoint & reference