u5e
UnicodeTextC++Library
codepoint_decomposition.hpp
1 #ifndef INCLUDED_U5E_CODEPOINT_DECOMPOSITION
2 #define INCLUDED_U5E_CODEPOINT_DECOMPOSITION
3 
4 #include <iterator>
5 #include <algorithm>
6 #include <experimental/string_view>
7 
8 #include <u5e/codepoint.hpp>
9 #include <u5e/props/compatibility_and_canonical_decomposition_mapping.hpp>
10 #include <u5e/utf32ne_string_view.hpp>
11 
12 namespace u5e {
13 
14  /**
15  * \brief Perform codepoint by codepoint decomposition
16  *
17  * This is one step of the normalization process, you probably want
18  * to use that instead.
19  *
20  * This implements only the logic of dealing with the resolved data,
21  * the actual database resolution is a template parameter.
22  *
23  * This is meant to be used as an operation for u5e::filter.
24  *
25  * \tparam PropResolver the function that resolves the input
26  * codepoint into a sequence of decomposed codepoints.
27  *
28  * \tparam OutputStringType the output string type to be used.
29  * Because this reads data from the database, the returned data is
30  * utf32ne, so you need an OutputStringType that is compatible with
31  * that.
32  *
33  */
34  template <typename PropResolver, typename OutputStringType>
35  inline int codepoint_decomposition
36  (const codepoint input,
37  OutputStringType& output,
38  PropResolver& resolver) {
39  int const * mapping = resolver(input);
40  int const * begin;
41  int const * end;
42  int count = 0;
43  if (mapping == NULL) {
44  begin = &(input.value);
45  end = begin;
46  end++;
47  count = 1;
48  } else {
49  begin = mapping;
50  end = begin;
51  while (*end != 0) {
52  end++;
53  count++;
54  }
55  }
56  utf32ne_string_view from_database
57  (std::experimental::basic_string_view<int>(begin, count));
58  output.template append<utf32ne_string_view>
59  (from_database.codepoint_cbegin(),
60  from_database.codepoint_cend());
61  return count;
62  }
63 
64 }
65 
66 #endif
main u5e namespace
Native representation of a codepoint.
Definition: codepoint.hpp:15
basic_encodedstring(const NativeString &s)
int codepoint_decomposition(const codepoint input, OutputStringType &output, PropResolver &resolver)
Perform codepoint by codepoint decomposition.
basic_encodedstring< u5e::utf32ne, std::experimental::basic_string_view< int > > utf32ne_string_view
A basic_encodedstring of utf32ne and basic_string_view<int>
codepoint_traits::int_type value
Definition: codepoint.hpp:20