KSquare Utilities
XmlTokenizer.h
Go to the documentation of this file.
1 /* XmlTokenizer.h -- Class to Manage Token Parsing
2  * Copyright (C) 1994-2014 Kurt Kramer
3  * For conditions of distribution and use, see copyright notice in KKB.h
4  */
5 
6 #ifndef _XMLTOKENIZER_
7 #define _XMLTOKENIZER_
8 /**
9  *@class KKB::XmlTokenizer
10  *@brief Manages the break down a stream into a set of logical tokens compatible with the XML format.
11  *@author Kurt Kramer
12  *@details Breaks up a source KKStr, text file, or [[TokenBuffer]] into logical tokens compatible with the XML format. XmlStream
13  * utilizes this object to parse streams.
14  *@ref XmlStream
15  */
16 
17 
18 #include <deque>
19 #include <fstream>
20 #include <vector>
21 #include "TokenBuffer.h"
22 
23 namespace KKB
24 {
26  {
27  public:
28  /**
29  *@brief Constructs a XmlTokenizer using the provided [[TokenBuffer]] _in as the data stream source.
30  *@details Does NOT take ownership of _in; next token extracted will be from current position in _in.
31  *@param _in Will retrieve tokens from starting from its current position; does not take ownership.
32  */
34 
35  /** @brief Manages the extraction of xml tokens from a KKStr instance; accomplishes this by building a [[TokenBufferStr]] around _str. */
36  XmlTokenizer (const KKStr& _str);
37 
38  XmlTokenizer (const KKStr& _fileName,
39  bool& _fileOpened
40  );
41 
42  ~XmlTokenizer ();
43 
44 
45  /** Indicates if there anymore tokens that can be extracted. */
46  bool EndOfFile ();
47 
48 
49  /**
50  *@brief Will retrieve the next token in the stream which will be either a tag token or up to
51  * one line of the content part of an element. If it is a content token it may end with a '\n'
52  * character. The idea is tat when reading content we will never return more than one line of
53  * text at a time.
54  */
55  KKStrPtr GetNextToken ();
56 
57 
58  /**
59  *@brief Returns a list of tokens up to and including the first occurrence of 'delToken'.
60  *@details Caller will take ownership of the returned tokens, and be responsible for
61  * deleting them.
62  */
63  KKStrListPtr GetNextTokens (const KKStr& delToken);
64 
65  /**@brief Allows you to look at future tokens in the stream; index of 0 would be the next token to be extracted. */
66  KKStrConstPtr Peek (kkuint32 idx);
67 
68 
69  /**@brief places token at current position such that it will be the next token extracted from the stream. */
70  void PushTokenOnFront (KKStrPtr t);
71 
72 
73  KKStrConstPtr operator[](kkuint32 idx); /**< Returns pointers to following Tokens in the stream where idx==0 indicates the next token. */
74 
75 
76  private:
77  KKStrPtr GetNextTokenRaw ();
78  char GetNextChar ();
79 
80  void Initialize ();
81  KKStrPtr ProcessTagToken ();
82  KKStrPtr ProcessBodyToken ();
83  void ProcessAmpersand ();
84 
85 
86  void ReadInNextLogicalToken (); /**< Will retrieve the next token in the stream which will be either a tag token
87  * or up to one line of the content part of an element.
88  */
89 
90  bool WhiteSpaceChar (char c) const;
91 
92  char LookUpEntity (const KKStr& entityName) const;
93 
94  bool atEndOfFile;
95  TokenBufferPtr in;
96 
97  kkuint32 tokenListLen;
98  std::deque<KKStrPtr> tokenList; /**< @brief Will contain a fixed list of future tokens to read.
99  * As end of stream is approached will fill with end of file
100  * Tokens as a flag.
101  */
102 
103  bool weOwnTokenBuffer; /**< @brief Set to true indicates that we need to call the destructor on the TokenBuffer 'in' that we are processing. */
104 
105  char firstChar;
106 
107  std::map<KKStr,char> entityMap; /**< @brief Used to maintain a list of valid entities and their respective replacement characters. THese are
108  * the name of the xml escape characters, ex: "quot" = '"', "lt" = '<'. These are the escape sequences that
109  * start with a ampersand(&) and end with a semicolon.
110  */
111 
112  std::ofstream logger1;
113  std::ofstream logger2;
114  }; /* XmlTokenizer */
115 
116 
118 }
119 
120 #endif
void PushTokenOnFront(KKStrPtr t)
places token at current position such that it will be the next token extracted from the stream...
Manages the break down a stream into a set of logical tokens compatible with the XML format...
Definition: XmlTokenizer.h:25
XmlTokenizer(const KKStr &_fileName, bool &_fileOpened)
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
XmlTokenizer(TokenBufferPtr _in)
Constructs a XmlTokenizer using the provided [[TokenBuffer]] _in as the data stream source...
KKTHread * KKTHreadPtr
XmlTokenizer * XmlTokenizerPtr
Definition: XmlTokenizer.h:117
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
XmlTokenizer(const KKStr &_str)
Manages the extraction of xml tokens from a KKStr instance; accomplishes this by building a [[TokenBu...
KKStrConstPtr Peek(kkuint32 idx)
Allows you to look at future tokens in the stream; index of 0 would be the next token to be extracted...
TokenBuffer * TokenBufferPtr
Definition: TokenBuffer.h:31
KKStrConstPtr operator[](kkuint32 idx)
KKStrListPtr GetNextTokens(const KKStr &delToken)
Returns a list of tokens up to and including the first occurrence of &#39;delToken&#39;.
KKStrPtr GetNextToken()
Will retrieve the next token in the stream which will be either a tag token or up to one line of the ...