Gray C++ Libraries  0.0.2
A set of C++ libraries for MSVC, GNU on Windows, WinCE, Linux
GrayLib::cXmlReader Class Reference

#include <cXmlReader.h>

Inheritance diagram for GrayLib::cXmlReader:
Gray::cTextReader GrayLib::cXml Gray::cTextPos

Public Member Functions

 cXmlReader (const char *pszStart="", StrLen_t nLenMax=StrT::k_LEN_MAX, StrLen_t iTabSize=cStrConst::k_TabSize, CXML_ENCODING_TYPE eEncoding=CXML_ENCODING_UNKNOWN, bool bCondenseWhitespace=true)
 
 ~cXmlReader ()
 
HRESULT ReturnError (HRESULT hRes) const
 
void put_CondenseWhitespace (bool bCondense) noexcept
 
bool isCondensedWhitespace () const noexcept
 
HRESULT Parse (cXmlReadVisitor *pVisitor, cXmlString sElementParent)
 
HRESULT Parse (cXmlReadVisitor *pVisitor)
 
HRESULT ParseText (const char *pszText, cXmlReadVisitor *pVisitor)
 
HRESULT ParseStream (cStreamInput &rInp, cXmlReadVisitor *pVisitor)
 
HRESULT ParseFile (const FILECHAR_t *pszXMLFile, cXmlReadVisitor *pVisitor)
 
- Public Member Functions inherited from Gray::cTextReader
 cTextReader (const char *pszStart, StrLen_t nLenMax=StrT::k_LEN_MAX, StrLen_t nTabSize=cStrConst::k_TabSize)
 
StrLen_t get_LenMax () const noexcept
 
StrLen_t get_LenRemaining () const noexcept
 
bool isValidIndex () const noexcept
 
bool isValidPos () const noexcept
 
const char * get_CursorPtr () const noexcept
 
char get_CursorChar () const noexcept
 
void IncToks (StrLen_t nLen=1)
 
void IncTab (StrLen_t nLenChar=1)
 
StrLen_t IncLineCR (StrLen_t nLenChar=1)
 
bool isEOF () const
 
void SetStartPtr (const char *pszStart, StrLen_t nLenMax=StrT::k_LEN_MAX)
 
- Public Member Functions inherited from Gray::cTextPos
 cTextPos (STREAM_POS_t lOffset=(STREAM_POS_t) k_ITERATE_BAD, ITERATE_t iLineNum=k_ITERATE_BAD, StrLen_t iColNum=k_StrLen_UNK) noexcept
 
void InitTop () noexcept
 
bool isTopLine () const noexcept
 
bool isValidPos () const noexcept
 
STREAM_POS_t get_Offset () const noexcept
 
ITERATE_t get_LineNum () const noexcept
 < Get 0 based line. More...
 
ITERATE_t get_Line1 () const noexcept
 < Get 1 based line. More...
 
StrLen_t get_Column1 () const noexcept
 < Get 1 based column. More...
 
void IncOffset (StrLen_t nLenOffsetSrc) noexcept
 
void IncOffset (StrLen_t nLenOffsetSrc, StrLen_t nLenCol) noexcept
 
void IncChar (StrLen_t nLenChar=1) noexcept
 
void IncLine (StrLen_t nLenChar=1) noexcept
 
StrLen_t GetStr2 (OUT char *pszOut, StrLen_t nLenOut) const
 

Static Public Member Functions

static CXML_ENCODING_TYPE GRAYCALL GetEncodingEnum (cXmlString sEncoding)
 
static StrLen_t GRAYCALL FixNewLines (char *pszBuffer, StrLen_t nLenInp)
 
- Static Public Member Functions inherited from GrayLib::cXml
static StrLen_t GRAYCALL ParseMicrosoftBOM (const BYTE *pszData)
 
static bool GRAYCALL IsXML (const IniChar_t *pszStr, StrLen_t iLenMax=StrT::k_LEN_MAX)
 
static cXmlString GRAYCALL GetAttributeStr (const IniChar_t *pszName, const cXmlString &sValue)
 
static void AddPathElement (cXmlString &sPath, const cXmlString &sElement)
 
static void RemovePathElement (cXmlString &sPath, const cXmlString &sElement)
 

Public Attributes

cTextPos m_PosNode
 The cursor at the start of current XMLNODE_TYPE m_Pos. as opposed to cTextReader is the current cursor. More...
 
CXML_ENCODING_TYPE m_eEncoding
 current encoding. More...
 
bool m_bCondenseWhitespace
 try to preserve Whitespace or not? More...
 
int m_iDepth
 Indent depth of reader. More...
 
- Public Attributes inherited from Gray::cTextReader
const StrLen_t m_iTabSize
 for proper tracking of the column number on errors. and m_CursorPos. 0 = not used/don't care. More...
 

Protected Member Functions

bool IsAlpha (wchar_t anyByte) const
 
bool IsAlphaNum (wchar_t anyByte) const
 
wchar_t ToLower (wchar_t v) const
 
bool IsStringEqual (const char *pszData, const char *pszEndTag, bool bIgnoreCase=false) const
 
bool ReadWhitespace ()
 
HRESULT ParseName (OUT cXmlString &rsName)
 
HRESULT ParseStringTo (OUT cXmlString &rsText, const char *pszEndTag, bool bIgnoreWhitespace)
 
HRESULT ParseAttrib (OUT cXmlString &sName, OUT cXmlString &sValue)
 
HRESULT ParseAttribSet (OUT cXmlAttributeSet &Attribs)
 
HRESULT ParseDeclaration (OUT cXmlAttributeSet &Attribs)
 
HRESULT ParseUnknown (OUT cXmlString &sValue)
 
HRESULT ParseComment (OUT cXmlString &sValue)
 
HRESULT ParseText (OUT cXmlString &sValue, bool bCDATAExpected)
 
HRESULT ParseNodeType ()
 
bool ParseMicrosoftBOM ()
 
HRESULT ParseElement (OUT cXmlString &sElement, OUT cXmlAttributeSet &Attribs)
 
HRESULT ParseVisitType (cXmlReadVisitor *pVisitor, XMLNODE_TYPE eNodeType, bool bDoc, bool bCDATA)
 

Friends

class cXmlNode
 

Additional Inherited Members

- Public Types inherited from GrayLib::cXml
enum  XMLNODE_TYPE { XMLNODE_QTY }
 
- Static Public Attributes inherited from Gray::cTextPos
static const cTextPos k_Invalid
 Set to invalid values. More...
 
static const cTextPos k_Zero
 Top of file. More...
 
- Static Public Attributes inherited from GrayLib::cXml
static const char k_xmlHeader [6] = "<?xml"
 "<?xml" // not case sensitive ! More...
 
static const char k_xmlEnd [3] = "?>"
 "?>" More...
 
static const char k_commentHeader [5] = "<!--"
 "<!--" // comments are not reentrant! More...
 
static const char k_commentEnd [4] = "-->"
 "-->" More...
 
static const char k_cdataHeader [10] = "<![CDATA["
 "<![CDATA[" More...
 
static const char k_cdataEnd [4] = "]]>"
 "]]>" More...
 
static const char k_dtdHeader [3] = "<!"
 "<!" More...
 
- Protected Attributes inherited from Gray::cTextReader
const char * m_pszStart
 starting read position in the data parsing stream/buffer. cTextPos cursor = m_pszStart + m_lOffset. More...
 
StrLen_t m_nLenMax
 don't advance cTextPos::m_lOffset past this. More...
 
- Protected Attributes inherited from Gray::cTextPos
STREAM_POS_t m_lOffset
 byte offset into the file. 0 based More...
 
ITERATE_t m_iLineNum
 0 based row/line, for debug purposes if there is an error. More...
 
StrLen_t m_iColNum
 0 based column number. if used. # of characters, not bytes. UTF can have multi bytes per char. More...
 

Detailed Description

The state of XML reading/parsing at a given time. eXPat like generic XML loading. Similar to .Net XmlReader. Sort of similar to the JavaScript DOMParser Acts like cStreamInput ?

Constructor & Destructor Documentation

◆ cXmlReader()

GrayLib::cXmlReader::cXmlReader ( const char *  pszStart = "",
StrLen_t  nLenMax = StrT::k_LEN_MAX,
StrLen_t  iTabSize = cStrConst::k_TabSize,
CXML_ENCODING_TYPE  eEncoding = CXML_ENCODING_UNKNOWN,
bool  bCondenseWhitespace = true 
)

◆ ~cXmlReader()

GrayLib::cXmlReader::~cXmlReader ( )

Member Function Documentation

◆ FixNewLines()

StrLen_t GRAYCALL GrayLib::cXmlReader::FixNewLines ( char *  pszBuffer,
StrLen_t  nLenInp 
)
static

Convert any "\r\n" or "\r" to just "\n" used by LoadFile.

◆ GetEncodingEnum()

CXML_ENCODING_TYPE GRAYCALL GrayLib::cXmlReader::GetEncodingEnum ( cXmlString  sEncoding)
static

◆ IsAlpha()

bool GrayLib::cXmlReader::IsAlpha ( wchar_t  anyByte) const
protected

None of these methods are reliable for any language except English. Good for approximation, not great for accuracy.

This will only work for low-ASCII, everything else is assumed to be a valid letter. I'm not sure this is the best approach, but it is quite tricky trying to figure out alphabetical vs. not across encoding. So take a very conservative approach.

◆ IsAlphaNum()

bool GrayLib::cXmlReader::IsAlphaNum ( wchar_t  anyByte) const
protected

This will only work for low-ASCII, everything else is assumed to be a valid letter. I'm not sure this is the best approach, but it is quite tricky trying to figure out alphabetical vs. not across encoding. So take a very conservative approach.

◆ isCondensedWhitespace()

bool GrayLib::cXmlReader::isCondensedWhitespace ( ) const
inlinenoexcept

Return the current white space setting.

◆ IsStringEqual()

bool GrayLib::cXmlReader::IsStringEqual ( const char *  pszData,
const char *  pszEndTag,
bool  bIgnoreCase = false 
) const
protected

Return true if the next characters in the stream are any of the endTag sequences. Ignore case only works for English, and should only be relied on when comparing to English words: IsStringEqual( pszData, "version", true ) is fine.

  • bIgnoreCase = false = default.

◆ Parse() [1/2]

HRESULT GrayLib::cXmlReader::Parse ( cXmlReadVisitor pVisitor)

Parse a buffer/document into pVisitor

◆ Parse() [2/2]

HRESULT GrayLib::cXmlReader::Parse ( cXmlReadVisitor pVisitor,
cXmlString  sElementParent 
)

Parse a element node.

◆ ParseAttrib()

HRESULT GrayLib::cXmlReader::ParseAttrib ( OUT cXmlString sName,
OUT cXmlString sValue 
)
protected

Attribute parsing starts: first letter of the name

Returns
the next char after the value end quote

Parse a single attribute. e.g. name="value".

◆ ParseAttribSet()

HRESULT GrayLib::cXmlReader::ParseAttribSet ( OUT cXmlAttributeSet Attribs)
protected

Parse a set of attributes.

◆ ParseComment()

HRESULT GrayLib::cXmlReader::ParseComment ( OUT cXmlString sValue)
protected

from the XML spec: [Definition: Comments may appear anywhere in a document outside other markup; in addition, they may appear within the document type declaration at places allowed by the grammar. They are not part of the document's character data; an XML processor MAY, but need not, make it possible for an application to retrieve the text of comments. For compatibility, the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity references MUST NOT be recognized within comments.

An example of a comment:

//! <!-- declarations for <head> & <body> -->
//! 

◆ ParseDeclaration()

HRESULT GrayLib::cXmlReader::ParseDeclaration ( OUT cXmlAttributeSet Attribs)
protected

◆ ParseElement()

HRESULT GrayLib::cXmlReader::ParseElement ( OUT cXmlString sElement,
OUT cXmlAttributeSet Attribs 
)
protected

Parse an "<element>" or "<element />"

Returns
S_FALSE = Closed "<element />" S_OK = open tag must look for "</element>" close.

◆ ParseFile()

HRESULT GrayLib::cXmlReader::ParseFile ( const FILECHAR_t pszXMLFile,
cXmlReadVisitor pVisitor 
)

◆ ParseMicrosoftBOM()

bool GrayLib::cXmlReader::ParseMicrosoftBOM ( )
protected

Check for the Microsoft UTF-8 lead bytes.

◆ ParseName()

HRESULT GrayLib::cXmlReader::ParseName ( OUT cXmlString rsName)
protected

Reads an XML name into the string provided. Returns a pointer just past the last character of the name, or 0 if the function has an error.

Names start with letters or underscores. Of course, in unicode, cXml has no idea what a letter is. The algorithm is generous.

After that, they can be letters, underscores, numbers, hyphens, or colons. (Colons are valid only for namespaces, but cXml can't tell namespaces from names.)

Returns
pointer to pszData after name.

◆ ParseNodeType()

HRESULT GrayLib::cXmlReader::ParseNodeType ( )
protected

Determine the XMLNODE_Type on the next data.

Returns
XMLNODE_TYPE

Elements start with a letter or underscore, but XML is reserved. Comments:

◆ ParseStream()

HRESULT GrayLib::cXmlReader::ParseStream ( cStreamInput rInp,
cXmlReadVisitor pVisitor 
)

◆ ParseStringTo()

HRESULT GrayLib::cXmlReader::ParseStringTo ( OUT cXmlString rsText,
const char *  pszEndTag,
bool  bIgnoreWhitespace 
)
protected

Reads text. Returns a pointer past the given end tag. Wickedly complex options, but it keeps the (sensitive) code in one place.

Read text up to pszEndTag. substitute entities. But DONT read pszEndTag.

  • rsText = the string read
  • pszEndTag = read up to this. ignore case in the end tag
  • bTrimWhitespace = whether to keep the white space. certain tags always keep Whitespace

◆ ParseText() [1/2]

HRESULT GrayLib::cXmlReader::ParseText ( const char *  pszText,
cXmlReadVisitor pVisitor 
)

◆ ParseText() [2/2]

HRESULT GrayLib::cXmlReader::ParseText ( OUT cXmlString sValue,
bool  bCDATAExpected 
)
protected

Read in text chunk. Might use CDATA style.

Returns
S_FALSE = CDATA used.

◆ ParseUnknown()

HRESULT GrayLib::cXmlReader::ParseUnknown ( OUT cXmlString sValue)
protected

read some unknown block enclosed in "<unknowntag>"

◆ ParseVisitType()

HRESULT GrayLib::cXmlReader::ParseVisitType ( cXmlReadVisitor pVisitor,
XMLNODE_TYPE  eNodeType,
bool  bDoc,
bool  bCDATA 
)
protected

◆ put_CondenseWhitespace()

void GrayLib::cXmlReader::put_CondenseWhitespace ( bool  bCondense)
inlinenoexcept

The world does not agree on whether white space should be kept or not. In order to make everyone happy, these global, static functions are provided to set whether or not cXml will condense all white space into a single space or not. The default is to condense.

◆ ReadWhitespace()

bool GrayLib::cXmlReader::ReadWhitespace ( )
protected

skip whitespace.

Returns
false = no Whitespace found.

◆ ReturnError()

HRESULT GrayLib::cXmlReader::ReturnError ( HRESULT  hRes) const

◆ ToLower()

wchar_t GrayLib::cXmlReader::ToLower ( wchar_t  v) const
protected

Friends And Related Function Documentation

◆ cXmlNode

friend class cXmlNode
friend

Member Data Documentation

◆ m_bCondenseWhitespace

bool GrayLib::cXmlReader::m_bCondenseWhitespace

try to preserve Whitespace or not?

◆ m_eEncoding

CXML_ENCODING_TYPE GrayLib::cXmlReader::m_eEncoding

current encoding.

◆ m_iDepth

int GrayLib::cXmlReader::m_iDepth

Indent depth of reader.

◆ m_PosNode

cTextPos GrayLib::cXmlReader::m_PosNode

The cursor at the start of current XMLNODE_TYPE m_Pos. as opposed to cTextReader is the current cursor.


The documentation for this class was generated from the following files: