#include <StrU.h>

Public Types
enum	UTFLead_TYPE { UTFLead_0 = 0xefU , UTFLead_1 = 0xbbU , UTFLead_2 = 0xbfU , UTFLead_X = 0xbeU }

Static Public Member Functions
static bool __stdcall	IsUTFLead (const void *pvU)

static StrLen_t __stdcall	UTF8Size (wchar_t wChar, int &riStartBits)

static StrLen_t __stdcall	UTF8Size1 (unsigned char chFirst, int &riStartBits)

static StrLen_t __stdcall	UTF8Size (const char *pInp, StrLen_t iSizeInpBytes, int &riStartBits)

static StrLen_t __stdcall	UTF8toUNICODE (wchar_t &wChar, const char *pInp, StrLen_t iSizeInpBytes)

static StrLen_t __stdcall	UNICODEtoUTF8 (char *pOut, StrLen_t iSizeOutMaxBytes, wchar_t wChar)

static StrLen_t __stdcall	UTF8toUNICODELen (const char *pInp, StrLen_t iSizeInpBytes=k_StrLen_UNK)

static StrLen_t __stdcall	UNICODEtoUTF8Size (const wchar_t *pInp, StrLen_t iSizeInpChars=k_StrLen_UNK)

static StrLen_t __stdcall	UTF8toUNICODE (OUT wchar_t pOut, StrLen_t iSizeOutMaxChars, const char pInp, StrLen_t iSizeInpBytes=k_StrLen_UNK)

static StrLen_t __stdcall	UNICODEtoUTF8 (OUT char pOut, StrLen_t iSizeOutMaxBytes, const wchar_t pInp, StrLen_t iSizeInpChars=k_StrLen_UNK)

Static Public Attributes
static const StrLen_t	k_UTF8_SIZE_MAX = 4
	Max of 4 BYTEs to encode any UNICODE char. More...

Friends
class	StrUTests

Detailed Description

A bunch of functions for UNICODE strings and UTF8. Might be named StrW ? Opposite of StrA.

Member Enumeration Documentation

◆ UTFLead_TYPE

enum Gray::StrU::UTFLead_TYPE

http://www.unicode.org/faq/utf_bom.html Invalid UTF8 sequences are used for special meaning by M$. Placed at start of text file to indicate encoding. ef bb bf (M$ "lead bytes") ef bf be ef bf bf

Enumerator
UTFLead_0
UTFLead_1
UTFLead_2
UTFLead_X

Member Function Documentation

◆ IsUTFLead()

bool GRAYCALL Gray::StrU::IsUTFLead ( const void * pvU )

static

Skip the stupid Microsoft UTF-8 Byte order marks that are put at the start of a file.

◆ UNICODEtoUTF8() [1/2]

StrLen_t GRAYCALL Gray::StrU::UNICODEtoUTF8	(	char *	pOut,
		StrLen_t	iSizeOutMaxBytes,
		wchar_t	wChar
	)

static

Convert a single UNICODE char to UTF8 encoded char (maybe using multi chars).

Returns: The length < iSizeOutMaxBytes, 0=FAILED

bytes bits representation: 1 7 0bbbbbbb 2 11 110bbbbb 10bbbbbb 3 16 1110bbbb 10bbbbbb 10bbbbbb 4 21 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb

◆ UNICODEtoUTF8() [2/2]

StrLen_t GRAYCALL Gray::StrU::UNICODEtoUTF8	(	OUT char *	pOut,
		StrLen_t	iSizeOutMaxBytes,
		const wchar_t *	pInp,
		StrLen_t	iSizeInpChars = `k_StrLen_UNK`
	)

static

Copy CODEPAGE_t CP_UTF8 to UNICODE. similar to _WIN32 ::WideCharToMultiByte().

iSizeInpChars = limit UNICODE chars incoming. -1 = go to null. iSizeOutMaxBytes = max output size in bytes (MUST HAVE ROOM FOR '\0')
Returns
Number of bytes. (not including null)

Note
This need not be a properly terminated string.

◆ UNICODEtoUTF8Size()

StrLen_t GRAYCALL Gray::StrU::UNICODEtoUTF8Size	(	const wchar_t *	pInp,
		StrLen_t	iSizeInpChars = `k_StrLen_UNK`
	)

static

How many UTF8 bytes to store this UNICODE string ?

Note: if return size is same as input size then no multi char encoding is needed. (isANSI)

Returns: Number of bytes. (not including null)

◆ UTF8Size() [1/2]

StrLen_t GRAYCALL Gray::StrU::UTF8Size	(	const char *	pInp,
		StrLen_t	iSizeInpBytes,
		int &	riStartBits
	)

static

How much UTF8 data do i need to make the UNICODE char?

riStartBits = BIT_ENUM_t
Returns
The length in bytes i need (from pInp) to make the UNICODE char, 0=FAILED

◆ UTF8Size() [2/2]

StrLen_t GRAYCALL Gray::StrU::UTF8Size	(	wchar_t	wChar,
		int &	riStartBits
	)

static

How big would this UNICODE char be as UTF8?

wChar = int not wchar_t just to allow any overflow to be detected.
Returns
The length in bytes i need to store the UTF8, 0=FAILED RFC 3629 = http://www.ietf.org/rfc/rfc3629.txt

◆ UTF8Size1()

StrLen_t GRAYCALL Gray::StrU::UTF8Size1	(	unsigned char	chFirst,
		int &	riStartBits
	)

static

How many more bytes in this UTF8 sequence? estimated from the first byte of a UTF sequence.

chFirst = the first char of the UTF8 sequence.
Returns
<= StrU::k_UTF8_SIZE_MAX

◆ UTF8toUNICODE() [1/2]

StrLen_t GRAYCALL Gray::StrU::UTF8toUNICODE	(	OUT wchar_t *	pOut,
		StrLen_t	iSizeOutMaxChars,
		const char *	pInp,
		StrLen_t	iSizeInpBytes = `k_StrLen_UNK`
	)

static

Convert the CODEPAGE_t CP_UTF8 default text format to UNICODE May be network byte order! Adds null. similar to _WIN32 ::MultiByteToWideChar().

iSizeOutMaxChars = max output size in chars (not bytes) (MUST HAVE ROOM FOR '\0') iSizeInpBytes = size of the input string. -1 = '\0' terminated.
Returns
Number of wide chars copied. not including '\0'.

◆ UTF8toUNICODE() [2/2]

StrLen_t GRAYCALL Gray::StrU::UTF8toUNICODE	(	wchar_t &	wChar,
		const char *	pInp,
		StrLen_t	iSizeInpBytes
	)

static

Convert a single UTF8 encoded character (multi chars) to a single UNICODE char.

Returns: The length used from input string. < iSizeInpBytes, 0=FAILED multi byte chars can be up to 4 bytes long! StrU::k_UTF8_SIZE_MAX

Bytes bits representation: 1 7 0bbbbbbb 2 11 110bbbbb 10bbbbbb 3 16 1110bbbb 10bbbbbb 10bbbbbb 4 21 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb

◆ UTF8toUNICODELen()

StrLen_t GRAYCALL Gray::StrU::UTF8toUNICODELen	(	const char *	pInp,
		StrLen_t	iSizeInpBytes = `k_StrLen_UNK`
	)

static

How many UNICODE chars to store this UTF8 string ?

Note: if return size is same as input size then no multi char encoding was used. (isANSI)

Returns: Number of wide chars. not including null.

Friends And Related Function Documentation

◆ StrUTests

friend class StrUTests

friend

Member Data Documentation

◆ k_UTF8_SIZE_MAX

const StrLen_t Gray::StrU::k_UTF8_SIZE_MAX = 4

static

Max of 4 BYTEs to encode any UNICODE char.

The documentation for this struct was generated from the following files:

c:/Dennis/Source/Gray/GrayCore/include/StrU.h
c:/Dennis/Source/Gray/GrayCore/src/StrU.cpp

Public Types

Static Public Member Functions

Static Public Attributes

Friends

Detailed Description

Member Enumeration Documentation

◆ UTFLead_TYPE

Member Function Documentation

◆ IsUTFLead()

◆ UNICODEtoUTF8() [1/2]

◆ UNICODEtoUTF8() [2/2]

◆ UNICODEtoUTF8Size()

◆ UTF8Size() [1/2]

◆ UTF8Size() [2/2]

◆ UTF8Size1()

◆ UTF8toUNICODE() [1/2]

◆ UTF8toUNICODE() [2/2]

◆ UTF8toUNICODELen()

Friends And Related Function Documentation

◆ StrUTests

Member Data Documentation

◆ k_UTF8_SIZE_MAX