KSquare Utilities
Tokenizer.cpp
Go to the documentation of this file.
1 /* Tokenizer.cpp -- Class to Manage Token Parsing
2  * Copyright (C) 1994-2014 Kurt Kramer
3  * For conditions of distribution and use, see copyright notice in KKB.h
4  */
5 #include "FirstIncludes.h"
6 #include <iostream>
7 #include <fstream>
8 #include <map>
9 #include <vector>
10 #include <string.h>
11 #include "MemoryDebug.h"
12 using namespace std;
13 
14 #include "KKBaseTypes.h"
15 #include "Tokenizer.h"
16 #include "KKStr.h"
17 #include "TokenBuffer.h"
18 using namespace KKB;
19 
20 
21 
23 
24  atEndOfFile (false),
25  in (_in),
26  secondCharAtEndOfFile (false),
27  operatorChars (NULL),
28  tokenList (true),
29  weOwnTokenBuffer (false)
30 {
31  Initialize ();
32 }
33 
34 
35 
36 Tokenizer::Tokenizer (const KKStr& _str):
37 
38  atEndOfFile (false),
39  in (NULL),
40  secondCharAtEndOfFile (false),
41  operatorChars (NULL),
42  tokenList (true),
43  weOwnTokenBuffer (false)
44 {
45  in = new TokenBufferStr (_str);
46  weOwnTokenBuffer = true;
47  Initialize ();
48 }
49 
50 
51 
52 Tokenizer::Tokenizer (const KKStr& _fileName,
53  bool& _fileOpened
54  ):
55 
56  atEndOfFile (false),
57  in (NULL),
58  secondCharAtEndOfFile (false),
59  operatorChars (NULL),
60  tokenList (true),
61  weOwnTokenBuffer (false)
62 {
63  in = new TokenBufferStream (_fileName);
64  _fileOpened = (in->Valid ());
65  if (_fileOpened)
66  {
67  weOwnTokenBuffer = true;
68  Initialize ();
69  }
70 }
71 
72 
73 
74 
76 {
77  if (weOwnTokenBuffer)
78  {
79  delete in;
80  in = NULL;
81  }
82  delete operatorChars;
83  operatorChars = NULL;
84 }
85 
86 
87 
88 
89 
90 
91 
92 void Tokenizer::Initialize ()
93 {
94  DefineOperatorChars (",+-*/^=%[]{}()<>");
95 
96  GetNextChar ();
97  GetNextChar ();
98 
99  tokenListLen = 10;
100 
101  while (tokenList.QueueSize () < tokenListLen)
102  {
103  ReadInNextLogicalToken ();
104  }
105 } /* Initialize */
106 
107 
108 
109 void Tokenizer::DefineOperatorChars (char* const _operatorChars)
110 {
111  delete operatorChars ;
112  operatorChars = KKB::STRDUP (_operatorChars);
113 }
114 
115 
117 {
118  while (tokenList.QueueSize () < 1)
119  ReadInNextLogicalToken ();
120 
121  KKStrPtr t = tokenList.PopFromFront ();
122  return t;
123 } /* GetNextToken */
124 
125 
126 
127 /**
128  @brief Will return a list of tokens up to and including the first occurrence if 'delToken'.
129  */
130 KKStrListPtr Tokenizer::GetNextTokens (const KKStr& delToken)
131 {
132  if (delToken.Empty ())
133  return NULL;
134 
135  KKStrListPtr tokens = new KKStrList (true);
136  KKStrPtr t = GetNextToken ();
137  if (t == NULL)
138  return NULL;
139 
140  while ((t != NULL) && (*t != delToken))
141  {
142  tokens->PushOnBack (t);
143  t = GetNextToken ();
144  }
145 
146  if (t)
147  tokens->PushOnBack (t);
148 
149  return tokens;
150 } /* GetNextTokens */
151 
152 
153 
154 void Tokenizer::PushTokenOnFront (KKStrPtr t)
155 {
156  tokenList.PushOnFront (t);
157 }
158 
159 
160 
161 
162 KKStrConstPtr Tokenizer::Peek (kkuint32 idx)
163 {
164  while ((tokenList.QueueSize () < (kkint32)(idx + 1)) && !atEndOfFile)
165  ReadInNextLogicalToken ();
166 
167  if (idx >= tokenList.size ())
168  return NULL;
169 
170  return tokenList.IdxToPtr ((kkint32)idx);
171 } /* Peek */
172 
173 
174 
176 {
177 // if (tokenList.QueueSize () == 0)
178 // return true;
179  while ((tokenList.QueueSize () < 1) && (!atEndOfFile))
180  ReadInNextLogicalToken ();
181 
182  return (tokenList.QueueSize () < 1);
183 } /* EndOfFile */
184 
185 
186 
187 char Tokenizer::GetNextChar ()
188 {
189  if (atEndOfFile)
190  {
191  firstChar = 0;
192  secondChar = 0;
193  }
194 
195  else if (secondCharAtEndOfFile)
196  {
197  firstChar = 0;
198  secondChar = 0;
199  atEndOfFile = true;
200  }
201 
202  else
203  {
204  firstChar = secondChar;
205  if (in->EndOfFile ())
206  {
207  secondChar = 0;
208  secondCharAtEndOfFile = true;
209  }
210  else
211  {
212  secondChar = in->GetNextChar ();
213  }
214  }
215 
216  return firstChar;
217 } /* GetNextChar */
218 
219 
220 
221 
222 void Tokenizer::ReadInNextLogicalToken ()
223 {
224  KKStrPtr t = GetNextTokenRaw ();
225  if (t == NULL)
226  {
227  //tokenList.PushOnBack (new Token (tokEndOfFile, "EndOfFile"));
228  }
229  else
230  {
231  tokenList.PushOnBack (t);
232  }
233 } /* ReadInNextLogicalToken */
234 
235 
236 
237 bool Tokenizer::WhiteSpaceChar (char c) const
238 {
239  if (strchr (" ", c) == NULL)
240  return false;
241  else
242  return true;
243 } /* WhiteSpaceChar */
244 
245 
246 
247 
248 bool Tokenizer::DelimiterChar (char c) const
249 {
250  return (strchr ("\n\r\t", c) != NULL);
251 }
252 
253 
254 bool Tokenizer::OperatorChar (char c) const
255 {
256  return (strchr (operatorChars, c) != NULL);
257 }
258 
259 
260 
261 KKStrPtr Tokenizer::GetNextTokenRaw ()
262 {
263  if (atEndOfFile)
264  return NULL;
265 
266  // Lets skip whitespace
267  while ((firstChar == ' ') && (!atEndOfFile))
268  GetNextChar ();
269 
270  if (firstChar == '\n')
271  {
272  if (secondChar == '\r')
273  GetNextChar ();
274  }
275  else if (firstChar == '\r')
276  {
277  if (secondChar == '\n')
278  GetNextChar ();
279  }
280 
281  if (atEndOfFile)
282  {
283  return NULL;
284  }
285 
286  KKStrPtr nextRawToken = NULL;
287 
288  if ((firstChar == '"') || (firstChar == '\''))
289  {
290  // We are at the start of a string
291  nextRawToken = ProcessStringToken (firstChar);
292  }
293 
294  else if (OperatorChar (firstChar))
295  {
296  nextRawToken = ProcessOperatorToken ();
297  }
298 
299  else
300  {
301  nextRawToken = ProcessFieldToken ();
302  }
303 
304  return nextRawToken;
305 } /* Get Next Token */
306 
307 
308 
309 
310 
311 
312 KKStrPtr Tokenizer::ProcessStringToken (char strDelChar)
313 {
314  if (firstChar == strDelChar)
315  GetNextChar ();
316 
317  KKStr str (20);
318 
319  // Scan until we hit another '"' character, or end of KKStr.
320  while (!atEndOfFile)
321  {
322  if (firstChar == strDelChar)
323  {
324  // We reached the end of the string
325  GetNextChar ();
326  break;
327  }
328 
329  else if (firstChar == '\\')
330  {
331  GetNextChar ();
332  // We have a escape character.
333  switch (firstChar)
334  {
335  case '\'': str.Append ('\''); break;
336  case '"': str.Append ('"'); break;
337  case 't': str.Append ('\t'); break;
338  case 'n': str.Append ('\n'); break;
339  case 'r': str.Append ('\r'); break;
340  default: str.Append (firstChar); break;
341  }
342  }
343 
344  else
345  {
346  str.Append (firstChar);
347  }
348 
349  GetNextChar ();
350  }
351  return new KKStr (str);
352 
353 } /* ProcessStringToken */
354 
355 
356 
357 
358 KKStrPtr Tokenizer::ProcessOperatorToken ()
359 {
360  KKStrPtr field = new KKStr (3);
361  field->Append (firstChar);
362 
363  if ((firstChar == '+') && (secondChar == '+'))
364  {
365  field->Append (secondChar);
366  GetNextChar ();
367  }
368 
369  else if ((firstChar == '-') && (secondChar == '-'))
370  {
371  field->Append (secondChar);
372  GetNextChar ();
373  }
374 
375  else if (firstChar == '=')
376  {
377  if (strchr ("=<>+-*/^", secondChar) != NULL)
378  {
379  field->Append (secondChar);
380  GetNextChar ();
381  }
382  }
383 
384  else if (strchr ("+-*/^<>", firstChar) != NULL)
385  {
386  if (secondChar == '=')
387  {
388  field->Append (secondChar);
389  GetNextChar ();
390  }
391  }
392 
393  return field;
394 } /* ProcessFieldToken */
395 
396 
397 
398 KKStrPtr Tokenizer::ProcessFieldToken ()
399 {
400  // We have a token that we don't recognize. We will create a token
401  // of type tokNULL and place all characters up till the next whitespace
402  // or delimiter character.
403  KKStrPtr field = new KKStr (10);
404  while ((!WhiteSpaceChar (firstChar)) &&
405  (!DelimiterChar (firstChar)) &&
406  (!atEndOfFile)
407  )
408  {
409  field->Append (firstChar);
410  GetNextChar ();
411  }
412 
413  return field;
414 } /* ProcessFieldToken */
415 
416 
417 
418 
419 KKStrConstPtr Tokenizer::operator[](kkuint32 idx)
420 {
421  return Peek (idx);
422 } /* operator[] */
KKStr(kkint32 size)
Creates a KKStr object that pre-allocates space for &#39;size&#39; characters.
Definition: KKStr.cpp:655
bool EndOfFile()
Definition: Tokenizer.cpp:175
char * STRDUP(const char *src)
Definition: KKStr.cpp:62
TokenBufferStr(const KKStr &_buff)
Definition: TokenBuffer.cpp:34
virtual bool Valid()=0
TokenBufferStream(const KKStr &_fileName)
KKStrConstPtr operator[](kkuint32 idx)
Definition: Tokenizer.cpp:419
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
Tokenizer(const KKStr &_fileName, bool &_fileOpened)
Definition: Tokenizer.cpp:52
virtual bool EndOfFile()=0
KKTHread * KKTHreadPtr
void Append(char ch)
Definition: KKStr.cpp:1863
KKStr(const KKStr &str)
Copy Constructor.
Definition: KKStr.cpp:561
bool Empty() const
Definition: KKStr.h:241
void PushTokenOnFront(KKStrPtr t)
Definition: Tokenizer.cpp:154
KKStrPtr GetNextToken()
Definition: Tokenizer.cpp:116
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
KKStrConstPtr Peek(kkuint32 idx)
Definition: Tokenizer.cpp:162
Tokenizer(TokenBufferPtr _in)
Definition: Tokenizer.cpp:22
bool operator!=(const KKStr &right) const
Definition: KKStr.cpp:1558
TokenBuffer * TokenBufferPtr
Definition: TokenBuffer.h:31
Class is meant to break down a stream into a set of logical tokens.
Definition: Tokenizer.h:23
KKStrList(bool owner)
Definition: KKStr.cpp:4485
Tokenizer(const KKStr &_str)
Definition: Tokenizer.cpp:36
KKStrListPtr GetNextTokens(const KKStr &delToken)
Returns a list of tokens up to and including the first occurrence of &#39;delToken&#39;.
Definition: Tokenizer.cpp:130
void DefineOperatorChars(char *const _operatorChars)
Definition: Tokenizer.cpp:109
virtual char GetNextChar()=0