u5e
UnicodeTextC++Library
utf8_util.hpp
1 #ifndef INCLUDED_U5E_UTF8_UTIL
2 #define INCLUDED_U5E_UTF8_UTIL
3 
4 #include <cmath>
5 #include <algorithm>
6 
7 namespace u5e {
8  /**
9  * \brief Basic operations necessary for implementing utf8
10  */
11  class utf8_util {
12  public:
13 
14  /**
15  * Check whether or not this is a 7bit character
16  */
17  inline static bool is_7bit_character(const char octet) {
18  if (octet & 0b10000000) {
19  return false;
20  } else {
21  return true;
22  }
23  }
24 
25  /**
26  * Check whether or not this is octet is a codepoint continuation
27  */
28  inline static bool is_codepoint_continuation(const char octet) {
29  if ((octet & 0b11000000) == 0b10000000) {
30  return true;
31  } else {
32  return false;
33  }
34  }
35 
36  /**
37  * Check whether or not this is a first octet in a sequence
38  */
39  inline static bool is_codepoint_start(const char octet) {
40  if ((octet & 0b11000000) == 0b11000000) {
41  return true;
42  } else {
43  return false;
44  }
45  }
46 
47  /**
48  * Find the codepoint size given the first utf8 octet
49  */
50  inline static int codepoint_size(const char first_octet) {
51  // count leading zeros on bitwise negated first octet. for
52  // single-octet codepoints, this would return 0, so we do
53  // std::max for 1 for those cases.
54  return std::max(__builtin_clz(~(first_octet << 24)),1);
55  }
56 
57  /**
58  * How many octets will this codepoint take
59  */
60  inline static int encoded_size(int value) {
61  return std::ceil((float)(32 - __builtin_clz(value) - 1) / (float)6);
62  }
63 
64  };
65 }
66 
67 #endif
static bool is_codepoint_start(const char octet)
Definition: utf8_util.hpp:39
main u5e namespace
static int encoded_size(int value)
Definition: utf8_util.hpp:60
static bool is_7bit_character(const char octet)
Definition: utf8_util.hpp:17
static bool is_codepoint_continuation(const char octet)
Definition: utf8_util.hpp:28
static int codepoint_size(const char first_octet)
Definition: utf8_util.hpp:50
Basic operations necessary for implementing utf8.
Definition: utf8_util.hpp:11