main u5e namespace More...

Namespaces
	props
	codepoint property handling

	version
	introspection over the vesion of the library both at compile time and at runtime.

Classes
class	basic_encodedstring
	basic encoding support over string-like objects. More...

class	basic_grapheme
	Represents a single grapheme cluster. More...

class	basic_grapheme_iterator
	Iterator that describes full graphemes. More...

class	codepoint
	Native representation of a codepoint. More...

class	codepoint_traits
	Type information for codepoint. More...

class	encoding_assertion
	Assert the encoding matches the native type. More...

class	iterator_assertion
	Asserts the iterator is consistently defined. More...

class	utf32ne
	Architecture-specific type to interface UTF32BE or UTF32LE. More...

class	utf32ne_string
	Typedef: basic_encodedstring of utf32ne and std::basic_string<int> More...

class	utf32ne_string_grapheme
	Typedef: basic_grapheme of utf32ne_string. More...

class	utf32ne_string_grapheme_iterator
	Typedef: basic_grapheme_iterator of utf32ne_string. More...

class	utf32ne_string_view
	Typedef: basic_encodedstring of utf32ne and basic_string_view<int> More...

class	utf32ne_string_view_grapheme
	Typedef: basic_grapheme of utf32ne_string_view. More...

class	utf32ne_string_view_grapheme_iterator
	Typedef: basic_grapheme_iterator of utf32ne_string_view. More...

class	utf8
	Encoding type for UTF8 text. Unlike UTF16 and UTF32, UTF8 is endian independent. More...

class	utf8_bounds
	Check and enforce bounds of utf8 text. More...

class	utf8_const_iterator
	const iterator for utf8 encoded strings. More...

class	utf8_iterator
	mutable utf8 iterator More...

class	utf8_iterator_base
	Defines the basic inner workings of utf8 iterator. More...

class	utf8_string
	Typedef: basic_encodedstring of utf8 and std::string. More...

class	utf8_string_grapheme
	Typedef: basic_grapheme of utf8_string. More...

class	utf8_string_grapheme_iterator
	Typedef: basic_grapheme_iterator of utf8_string. More...

class	utf8_string_view
	Typedef: basic_encodedstring of utf8 and string_view. More...

class	utf8_string_view_grapheme
	Typedef: basic_grapheme of utf8_string_view. More...

class	utf8_string_view_grapheme_iterator
	Typedef: basic_grapheme_iterator of utf8_string_view. More...

class	utf8_util
	Basic operations necessary for implementing utf8. More...

Typedefs
typedef basic_encodedstring< utf32ne, std::basic_string< int > >	utf32ne_string
	A basic_encodedstring of utf32ne and std::basic_string<int>

typedef basic_grapheme< utf32ne_string >	utf32ne_string_grapheme
	A basic_grapheme of utf32ne_string.

typedef basic_grapheme_iterator< utf32ne_string >	utf32ne_string_grapheme_iterator
	A basic_grapheme_iterator of utf32ne_string.

typedef basic_encodedstring< u5e::utf32ne, std::experimental::basic_string_view< int > >	utf32ne_string_view
	A basic_encodedstring of utf32ne and basic_string_view<int>

typedef basic_grapheme< utf32ne_string_view >	utf32ne_string_view_grapheme
	A basic_grapheme of utf32ne_string_view.

typedef basic_grapheme_iterator< utf32ne_string_view >	utf32ne_string_view_grapheme_iterator
	A basic_grapheme_iterator of utf32ne_string_view.

typedef basic_encodedstring< utf8, std::string >	utf8_string
	A basic_encodedstring of utf8 and std::string.

typedef basic_grapheme< utf8_string >	utf8_string_grapheme
	A basic_grapheme of utf8_string.

typedef basic_grapheme_iterator< utf8_string >	utf8_string_grapheme_iterator
	A basic_grapheme_iterator of utf8_string.

typedef basic_encodedstring< utf8, std::experimental::string_view >	utf8_string_view
	A basic_encodedstring of utf8 and string_view.

typedef basic_grapheme< utf8_string_view >	utf8_string_view_grapheme
	A basic_grapheme of utf8_string_view.

typedef basic_grapheme_iterator< utf8_string_view >	utf8_string_view_grapheme_iterator
	A basic_grapheme_iterator of utf8_string_view.

Functions
bool	canonical_combining_order (int a, int b)
	compare codepoints according to the canonical combining order More...

template<typename StorageType >
StorageType::iterator	canonical_composition (StorageType &data, int *count)
	performs in-place canonical composition. More...

template<typename OutputStringType >
int	canonical_decomposition (const codepoint input, OutputStringType &output)
	Perform codepoint-by-codepoint canonical decomposition. More...

constexpr bool	operator== (const codepoint &a, const codepoint &b)

template<typename PropResolver , typename OutputStringType >
int	codepoint_decomposition (const codepoint input, OutputStringType &output, PropResolver &resolver)
	Perform codepoint by codepoint decomposition. More...

template<typename OutputStringType >
int	compatibility_and_canonical_decomposition (const codepoint input, OutputStringType &output)
	Perform compatibility and canonical decomposition. More...

template<typename InputIteratorType , typename OutputType , typename Functor >
int	filter (InputIteratorType input_from, InputIteratorType input_to, OutputType &output, Functor operation)
	Walks an input iterator through a filter. More...

template<typename InputStorageType , typename OutputStorageType = InputStorageType>
int	normalization_form_c (basic_grapheme< InputStorageType > grapheme, OutputStorageType &output)
	u5e::filter algorithm for normalizing graphemes More...

template<typename InputStorageType , typename OutputStorageType = InputStorageType>
int	normalization_form_d (basic_grapheme< InputStorageType > grapheme, OutputStorageType &output)
	u5e::filter algorithm for normalizing graphemes More...

template<typename InputStorageType , typename OutputStorageType = InputStorageType>
int	normalization_form_kc (basic_grapheme< InputStorageType > grapheme, OutputStorageType &output)
	u5e::filter algorithm for normalizing graphemes More...

template<typename InputStorageType , typename OutputStorageType = InputStorageType>
int	normalization_form_kd (basic_grapheme< InputStorageType > grapheme, OutputStorageType &output)
	u5e::filter algorithm for normalizing graphemes More...


constexpr bool	operator== (const codepoint_traits::int_type a, const codepoint &b)

constexpr bool	operator== (const codepoint &a, const codepoint_traits::int_type b)

Detailed Description

main u5e namespace

Function Documentation

bool u5e::canonical_combining_order	(	int	a,
		int	b
	)

inline

compare codepoints according to the canonical combining order

This is intended to be used with std::sort on a utf32ne string type.

Definition at line 13 of file canonical_combining_order.hpp.

template<typename StorageType >

StorageType::iterator u5e::canonical_composition	(	StorageType &	data,
		int *	count
	)

inline

performs in-place canonical composition.

This will return the iterator in the end position after the composition.

Template Parameters

StorageType the storage type where to apply it.

Must support codepoint_begin, codepont_cbegin, codepoint_end, codepoint_cend, as well as the member types iterator and const_iterator. It is also a requirement that you should be able to write to it as you read it, which means that this must only be used in utf32 iterators, otherwise the output may race ahead of the input.

Parameters

data	the object where the canonical composition will be performed.
count	return pointer for how many compositions were performed

Definition at line 29 of file canonical_composition.hpp.

template<typename OutputStringType >

int u5e::canonical_decomposition	(	const codepoint	input,
		OutputStringType &	output
	)

inline

Perform codepoint-by-codepoint canonical decomposition.

This is one step of the normalization process, you probably want to use that instead.

This is meant to be used as an operation for u5e::filter.

Template Parameters

OutputStringType the output string type to be used. Because this reads data from the database, the returned data is utf32ne, so you need an OutputStringType that is compatible with that.

Definition at line 31 of file canonical_decomposition.hpp.

template<typename PropResolver , typename OutputStringType >

int u5e::codepoint_decomposition	(	const codepoint	input,
		OutputStringType &	output,
		PropResolver &	resolver
	)

inline

Perform codepoint by codepoint decomposition.

This is one step of the normalization process, you probably want to use that instead.

This implements only the logic of dealing with the resolved data, the actual database resolution is a template parameter.

This is meant to be used as an operation for u5e::filter.

Template Parameters

PropResolver	the function that resolves the input codepoint into a sequence of decomposed codepoints.
OutputStringType	the output string type to be used. Because this reads data from the database, the returned data is utf32ne, so you need an OutputStringType that is compatible with that.

Definition at line 36 of file codepoint_decomposition.hpp.

template<typename OutputStringType >

int u5e::compatibility_and_canonical_decomposition	(	const codepoint	input,
		OutputStringType &	output
	)

inline

Perform compatibility and canonical decomposition.

This is one step of the normalization process, you probably want to use that instead.

This is meant to be used as an operation for u5e::filter.

Template Parameters

OutputStringType the output string type to be used. Because this reads data from the database, the returned data is utf32ne, so you need an OutputStringType that is compatible with that.

Definition at line 31 of file compatibility_and_canonical_decomposition.hpp.

template<typename InputIteratorType , typename OutputType , typename Functor >

int u5e::filter	(	InputIteratorType	input_from,
		InputIteratorType	input_to,
		OutputType &	output,
		Functor	operation
	)

inline

Walks an input iterator through a filter.

This will go from the begin to the end of the input iterator and will execute the filter function once for every input element.

Unlike std::transform, the filter function does not return the output element, but it receives the output object and will do whatever makes sense with the output object.

That means that the type of filter will define what type of object can be used as output. The filter function itself will not touch the output object, but simply forward it to the operator function.

The operator function returns an int that is meant to indicate how much output was produced. The filter function will accumulate those values and return the sum.

The filter is not required to produce a constant number of outputs for each input. The function can be produce many outputs or even none at all during the processing of each element.

The value type for input and output is not required to be the same. The input type is resolved by the value_type member type of the input iterator type.

Template Parameters

InputIteratorType	the type of the input iterator
OutputType	the type of the output iterator
Functor	the callback function type called for each element

Parameters

input_from	starting position for the input iterator
input_to	end position for the input iterator
output	output container sent to the operator function
operation	function that takes the element, the output container and returns the number of outputted elements

Definition at line 46 of file filter.hpp.

template<typename InputStorageType , typename OutputStorageType = InputStorageType>

int u5e::normalization_form_c	(	basic_grapheme< InputStorageType >	grapheme,
		OutputStorageType &	output
	)

inline

u5e::filter algorithm for normalizing graphemes

This will work by reading an input grapheme iterator and, grapheme by grapheme normalize them in form C.

This will use the unicode database to search for equivalent codepoint sequences.

Definition at line 24 of file normalization_form_c.hpp.

template<typename InputStorageType , typename OutputStorageType = InputStorageType>

int u5e::normalization_form_d	(	basic_grapheme< InputStorageType >	grapheme,
		OutputStorageType &	output
	)

inline

u5e::filter algorithm for normalizing graphemes

This will work by reading an input grapheme iterator and, grapheme by grapheme normalize them in form D.

This will use the unicode database to search for equivalent codepoint sequences.

Definition at line 23 of file normalization_form_d.hpp.

template<typename InputStorageType , typename OutputStorageType = InputStorageType>

int u5e::normalization_form_kc	(	basic_grapheme< InputStorageType >	grapheme,
		OutputStorageType &	output
	)

inline

u5e::filter algorithm for normalizing graphemes

This will work by reading an input grapheme iterator and, grapheme by grapheme normalize them in form KC.

This will use the unicode database to search for equivalent codepoint sequences.

Definition at line 24 of file normalization_form_kc.hpp.

template<typename InputStorageType , typename OutputStorageType = InputStorageType>

int u5e::normalization_form_kd	(	basic_grapheme< InputStorageType >	grapheme,
		OutputStorageType &	output
	)

inline

u5e::filter algorithm for normalizing graphemes

This will work by reading an input grapheme iterator and, grapheme by grapheme normalize them in form KD.

This will use the unicode database to search for equivalent codepoint sequences.

Definition at line 23 of file normalization_form_kd.hpp.

constexpr bool u5e::operator==	(	const codepoint &	a,
		const codepoint &	b
	)

Compare two codepoints by comparing their values.

Definition at line 56 of file codepoint.hpp.

constexpr bool u5e::operator==	(	const codepoint_traits::int_type	a,
		const codepoint &	b
	)

Compare an int to a codepoint by comparing the codepoint's value with the integer.

Definition at line 63 of file codepoint.hpp.

constexpr bool u5e::operator==	(	const codepoint &	a,
		const codepoint_traits::int_type	b
	)

Compare an int to a codepoint by comparing the codepoint's value with the integer.

Definition at line 64 of file codepoint.hpp.

Namespaces

Classes

Typedefs

Functions

Detailed Description

Function Documentation