![]() |
Gray C++ Libraries
0.0.2
A set of C++ libraries for MSVC, GNU on Windows, WinCE, Linux
|
#include <cXmlReader.h>
Public Member Functions | |
cXmlReader (const char *pszStart="", StrLen_t nLenMax=StrT::k_LEN_MAX, StrLen_t iTabSize=cStrConst::k_TabSize, CXML_ENCODING_TYPE eEncoding=CXML_ENCODING_UNKNOWN, bool bCondenseWhitespace=true) | |
~cXmlReader () | |
HRESULT | ReturnError (HRESULT hRes) const |
void | put_CondenseWhitespace (bool bCondense) noexcept |
bool | isCondensedWhitespace () const noexcept |
HRESULT | Parse (cXmlReadVisitor *pVisitor, cXmlString sElementParent) |
HRESULT | Parse (cXmlReadVisitor *pVisitor) |
HRESULT | ParseText (const char *pszText, cXmlReadVisitor *pVisitor) |
HRESULT | ParseStream (cStreamInput &rInp, cXmlReadVisitor *pVisitor) |
HRESULT | ParseFile (const FILECHAR_t *pszXMLFile, cXmlReadVisitor *pVisitor) |
![]() | |
cTextReader (const char *pszStart, StrLen_t nLenMax=StrT::k_LEN_MAX, StrLen_t nTabSize=cStrConst::k_TabSize) | |
StrLen_t | get_LenMax () const noexcept |
StrLen_t | get_LenRemaining () const noexcept |
bool | isValidIndex () const noexcept |
bool | isValidPos () const noexcept |
const char * | get_CursorPtr () const noexcept |
char | get_CursorChar () const noexcept |
void | IncToks (StrLen_t nLen=1) |
void | IncTab (StrLen_t nLenChar=1) |
StrLen_t | IncLineCR (StrLen_t nLenChar=1) |
bool | isEOF () const |
void | SetStartPtr (const char *pszStart, StrLen_t nLenMax=StrT::k_LEN_MAX) |
![]() | |
cTextPos (STREAM_POS_t lOffset=(STREAM_POS_t) k_ITERATE_BAD, ITERATE_t iLineNum=k_ITERATE_BAD, StrLen_t iColNum=k_StrLen_UNK) noexcept | |
void | InitTop () noexcept |
bool | isTopLine () const noexcept |
bool | isValidPos () const noexcept |
STREAM_POS_t | get_Offset () const noexcept |
ITERATE_t | get_LineNum () const noexcept |
< Get 0 based line. More... | |
ITERATE_t | get_Line1 () const noexcept |
< Get 1 based line. More... | |
StrLen_t | get_Column1 () const noexcept |
< Get 1 based column. More... | |
void | IncOffset (StrLen_t nLenOffsetSrc) noexcept |
void | IncOffset (StrLen_t nLenOffsetSrc, StrLen_t nLenCol) noexcept |
void | IncChar (StrLen_t nLenChar=1) noexcept |
void | IncLine (StrLen_t nLenChar=1) noexcept |
StrLen_t | GetStr2 (OUT char *pszOut, StrLen_t nLenOut) const |
Static Public Member Functions | |
static CXML_ENCODING_TYPE GRAYCALL | GetEncodingEnum (cXmlString sEncoding) |
static StrLen_t GRAYCALL | FixNewLines (char *pszBuffer, StrLen_t nLenInp) |
![]() | |
static StrLen_t GRAYCALL | ParseMicrosoftBOM (const BYTE *pszData) |
static bool GRAYCALL | IsXML (const IniChar_t *pszStr, StrLen_t iLenMax=StrT::k_LEN_MAX) |
static cXmlString GRAYCALL | GetAttributeStr (const IniChar_t *pszName, const cXmlString &sValue) |
static void | AddPathElement (cXmlString &sPath, const cXmlString &sElement) |
static void | RemovePathElement (cXmlString &sPath, const cXmlString &sElement) |
Public Attributes | |
cTextPos | m_PosNode |
The cursor at the start of current XMLNODE_TYPE m_Pos. as opposed to cTextReader is the current cursor. More... | |
CXML_ENCODING_TYPE | m_eEncoding |
current encoding. More... | |
bool | m_bCondenseWhitespace |
try to preserve Whitespace or not? More... | |
int | m_iDepth |
Indent depth of reader. More... | |
![]() | |
const StrLen_t | m_iTabSize |
for proper tracking of the column number on errors. and m_CursorPos. 0 = not used/don't care. More... | |
Protected Member Functions | |
bool | IsAlpha (wchar_t anyByte) const |
bool | IsAlphaNum (wchar_t anyByte) const |
wchar_t | ToLower (wchar_t v) const |
bool | IsStringEqual (const char *pszData, const char *pszEndTag, bool bIgnoreCase=false) const |
bool | ReadWhitespace () |
HRESULT | ParseName (OUT cXmlString &rsName) |
HRESULT | ParseStringTo (OUT cXmlString &rsText, const char *pszEndTag, bool bIgnoreWhitespace) |
HRESULT | ParseAttrib (OUT cXmlString &sName, OUT cXmlString &sValue) |
HRESULT | ParseAttribSet (OUT cXmlAttributeSet &Attribs) |
HRESULT | ParseDeclaration (OUT cXmlAttributeSet &Attribs) |
HRESULT | ParseUnknown (OUT cXmlString &sValue) |
HRESULT | ParseComment (OUT cXmlString &sValue) |
HRESULT | ParseText (OUT cXmlString &sValue, bool bCDATAExpected) |
HRESULT | ParseNodeType () |
bool | ParseMicrosoftBOM () |
HRESULT | ParseElement (OUT cXmlString &sElement, OUT cXmlAttributeSet &Attribs) |
HRESULT | ParseVisitType (cXmlReadVisitor *pVisitor, XMLNODE_TYPE eNodeType, bool bDoc, bool bCDATA) |
Friends | |
class | cXmlNode |
Additional Inherited Members | |
![]() | |
enum | XMLNODE_TYPE { XMLNODE_QTY } |
![]() | |
static const cTextPos | k_Invalid |
Set to invalid values. More... | |
static const cTextPos | k_Zero |
Top of file. More... | |
![]() | |
static const char | k_xmlHeader [6] = "<?xml" |
"<?xml" // not case sensitive ! More... | |
static const char | k_xmlEnd [3] = "?>" |
"?>" More... | |
static const char | k_commentHeader [5] = "<!--" |
"<!--" // comments are not reentrant! More... | |
static const char | k_commentEnd [4] = "-->" |
"-->" More... | |
static const char | k_cdataHeader [10] = "<![CDATA[" |
"<![CDATA[" More... | |
static const char | k_cdataEnd [4] = "]]>" |
"]]>" More... | |
static const char | k_dtdHeader [3] = "<!" |
"<!" More... | |
![]() | |
const char * | m_pszStart |
starting read position in the data parsing stream/buffer. cTextPos cursor = m_pszStart + m_lOffset. More... | |
StrLen_t | m_nLenMax |
don't advance cTextPos::m_lOffset past this. More... | |
![]() | |
STREAM_POS_t | m_lOffset |
byte offset into the file. 0 based More... | |
ITERATE_t | m_iLineNum |
0 based row/line, for debug purposes if there is an error. More... | |
StrLen_t | m_iColNum |
0 based column number. if used. # of characters, not bytes. UTF can have multi bytes per char. More... | |
The state of XML reading/parsing at a given time. eXPat like generic XML loading. Similar to .Net XmlReader. Sort of similar to the JavaScript DOMParser Acts like cStreamInput ?
GrayLib::cXmlReader::cXmlReader | ( | const char * | pszStart = "" , |
StrLen_t | nLenMax = StrT::k_LEN_MAX , |
||
StrLen_t | iTabSize = cStrConst::k_TabSize , |
||
CXML_ENCODING_TYPE | eEncoding = CXML_ENCODING_UNKNOWN , |
||
bool | bCondenseWhitespace = true |
||
) |
GrayLib::cXmlReader::~cXmlReader | ( | ) |
Convert any "\r\n" or "\r" to just "\n" used by LoadFile.
|
static |
|
protected |
None of these methods are reliable for any language except English. Good for approximation, not great for accuracy.
This will only work for low-ASCII, everything else is assumed to be a valid letter. I'm not sure this is the best approach, but it is quite tricky trying to figure out alphabetical vs. not across encoding. So take a very conservative approach.
|
protected |
This will only work for low-ASCII, everything else is assumed to be a valid letter. I'm not sure this is the best approach, but it is quite tricky trying to figure out alphabetical vs. not across encoding. So take a very conservative approach.
|
inlinenoexcept |
Return the current white space setting.
|
protected |
Return true if the next characters in the stream are any of the endTag sequences. Ignore case only works for English, and should only be relied on when comparing to English words: IsStringEqual( pszData, "version", true ) is fine.
HRESULT GrayLib::cXmlReader::Parse | ( | cXmlReadVisitor * | pVisitor | ) |
Parse a buffer/document into pVisitor
HRESULT GrayLib::cXmlReader::Parse | ( | cXmlReadVisitor * | pVisitor, |
cXmlString | sElementParent | ||
) |
Parse a element node.
|
protected |
Attribute parsing starts: first letter of the name
Parse a single attribute. e.g. name="value".
|
protected |
Parse a set of attributes.
|
protected |
from the XML spec: [Definition: Comments may appear anywhere in a document outside other markup; in addition, they may appear within the document type declaration at places allowed by the grammar. They are not part of the document's character data; an XML processor MAY, but need not, make it possible for an application to retrieve the text of comments. For compatibility, the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity references MUST NOT be recognized within comments.
An example of a comment:
//! <!-- declarations for <head> & <body> --> //!
|
protected |
|
protected |
Parse an "<element>" or "<element />"
HRESULT GrayLib::cXmlReader::ParseFile | ( | const FILECHAR_t * | pszXMLFile, |
cXmlReadVisitor * | pVisitor | ||
) |
|
protected |
Check for the Microsoft UTF-8 lead bytes.
|
protected |
Reads an XML name into the string provided. Returns a pointer just past the last character of the name, or 0 if the function has an error.
Names start with letters or underscores. Of course, in unicode, cXml has no idea what a letter is. The algorithm is generous.
After that, they can be letters, underscores, numbers, hyphens, or colons. (Colons are valid only for namespaces, but cXml can't tell namespaces from names.)
|
protected |
Determine the XMLNODE_Type on the next data.
Elements start with a letter or underscore, but XML is reserved. Comments:
HRESULT GrayLib::cXmlReader::ParseStream | ( | cStreamInput & | rInp, |
cXmlReadVisitor * | pVisitor | ||
) |
|
protected |
Reads text. Returns a pointer past the given end tag. Wickedly complex options, but it keeps the (sensitive) code in one place.
Read text up to pszEndTag. substitute entities. But DONT read pszEndTag.
HRESULT GrayLib::cXmlReader::ParseText | ( | const char * | pszText, |
cXmlReadVisitor * | pVisitor | ||
) |
|
protected |
Read in text chunk. Might use CDATA style.
|
protected |
read some unknown block enclosed in "<unknowntag>"
|
protected |
|
inlinenoexcept |
The world does not agree on whether white space should be kept or not. In order to make everyone happy, these global, static functions are provided to set whether or not cXml will condense all white space into a single space or not. The default is to condense.
|
protected |
skip whitespace.
|
protected |
|
friend |
bool GrayLib::cXmlReader::m_bCondenseWhitespace |
try to preserve Whitespace or not?
CXML_ENCODING_TYPE GrayLib::cXmlReader::m_eEncoding |
current encoding.
int GrayLib::cXmlReader::m_iDepth |
Indent depth of reader.
cTextPos GrayLib::cXmlReader::m_PosNode |
The cursor at the start of current XMLNODE_TYPE m_Pos. as opposed to cTextReader is the current cursor.