u5e
UnicodeTextC++Library
normalization_form_d.hpp
1 #ifndef INCLUDED_U5E_NORMALIZATION_FORM_D
2 #define INCLUDED_U5E_NORMALIZATION_FORM_D
3 
4 #include <algorithm>
5 #include <u5e/basic_grapheme.hpp>
6 #include <u5e/utf32ne_string.hpp>
7 #include <u5e/filter.hpp>
8 #include <u5e/canonical_decomposition.hpp>
9 #include <u5e/canonical_combining_order.hpp>
10 
11 namespace u5e {
12  /**
13  * \brief u5e::filter algorithm for normalizing graphemes
14  *
15  * This will work by reading an input grapheme iterator and,
16  * grapheme by grapheme normalize them in form D.
17  *
18  * This will use the unicode database to search for equivalent
19  * codepoint sequences.
20  */
21  template <typename InputStorageType,
22  typename OutputStorageType = InputStorageType>
23  inline int normalization_form_d(basic_grapheme<InputStorageType> grapheme,
24  OutputStorageType& output) {
25 
26  // first step is to decompose the grapheme
27  utf32ne_string decomposed;
28  int count = u5e::filter(grapheme.codepoint_begin(),
29  grapheme.codepoint_end(),
30  decomposed,
31  canonical_decomposition<utf32ne_string>);
32 
33  // then sort based on canonical combining class
34  std::sort(decomposed.codepoint_begin(), decomposed.codepoint_end(),
36 
37  // finally append the output
38  output.template append_from_utf32ne<utf32ne_string>
39  (decomposed.codepoint_begin(),
40  decomposed.codepoint_end());
41 
42  // we re-use the counter from the decomposition filter.
43  return count;
44  }
45 }
46 
47 #endif
main u5e namespace
basic_encodedstring< utf32ne, std::basic_string< int > > utf32ne_string
A basic_encodedstring of utf32ne and std::basic_string<int>
int normalization_form_d(basic_grapheme< InputStorageType > grapheme, OutputStorageType &output)
u5e::filter algorithm for normalizing graphemes
bool canonical_combining_order(int a, int b)
compare codepoints according to the canonical combining order
Represents a single grapheme cluster.