u5e
UnicodeTextC++Library
truncate_on_grapheme.cpp
1 
2 /**
3  * \page truncate_on_grapheme Example: Truncate a string on grapheme boundary
4  *
5  * The challenge of truncating a string is that if you operate on the
6  * native level, you risk breaking a codepoint in half. And even when
7  * you take the codepoint boundaries in consideration, you still risk
8  * breaking a grapheme in half.
9  *
10  * The u5e library provides an easy way to truncate a text, starting
11  * from a target native size and finding the correct boundary.
12  *
13  * \code
14  */// Example on how to truncate on correct grapheme boundaries
15 #include <algorithm>
16 #include <iostream>
17 #include <string>
18 #include <u5e/utf8_string.hpp>
19 #include <u5e/utf8_string_grapheme_iterator.hpp>
20 using std::string;
21 using u5e::utf8_string;
22 using u5e::utf8_string_grapheme_iterator;
23 int main(int argc, char** argv) {
24  // the original string
25  string str("Ola\xCC\x81!");
26 
27  // we find the point on the native string where we want to truncate.
28  // Count 4 in octets leaves us in the middle of the second codepoint
29  // of a grapheme.
30  string::const_iterator stri = str.cbegin();
31  std::advance(stri, 4);
32 
33  // We need to promote the iterators to utf8 iterators.
34  utf8_string::const_iterator u8b(str.begin());
35  utf8_string::const_iterator u8e(str.end());
36  utf8_string::const_iterator u8i(stri);
37 
38  // In order to work with graphemes, we need to obtain grapheme
39  // iterators.
42 
43  // now we produce the output string truncated at where we want.
44  // note that this assumes that the text was already in utf8. this is
45  // not a conversion operator, it just makes sure the append happens
46  // while respecting the codepoint and grapheme boundaries in the
47  // utf8 text.
48  utf8_string output("");
49  output.append(gb, gi);
50 
51  std::cout << output.native_string << std::endl;
52 }
53 /**
54  * \endcode
55  */
basic_grapheme_iterator(const_codepoint_iterator b, const_codepoint_iterator e, const_codepoint_iterator w)
start at a specific point find the start and the end of the grapheme
basic_grapheme_iterator(const_codepoint_iterator b, const_codepoint_iterator e)
start at the beginning of the text
main u5e namespace
utf8_const_iterator(const NativeIterator raw_iterator)
basic_encodedstring(const NativeString &s)
basic_grapheme_iterator< utf8_string > utf8_string_grapheme_iterator
A basic_grapheme_iterator of utf8_string.
basic_encodedstring & append(basic_grapheme_iterator< basic_encodedstring > &first, basic_grapheme_iterator< basic_encodedstring > &last)
basic_encodedstring< utf8, std::string > utf8_string
A basic_encodedstring of utf8 and std::string.
Definition: utf8_string.hpp:19