amath  1.8.5
Simple command line calculator
Lexer Class Reference

Encapsulates an lexical analyzer. Provides token for the parser. More...

#include <lexer.h>

Collaboration diagram for Lexer:

Public Member Functions

 Lexer (const char *input)
 
 ~Lexer ()
 
void Tokenize ()
 
TokenGetFirstToken () const
 
char * GetInput () const
 

Static Public Member Functions

static char * FindKeyword (Symbol symbol)
 

Private Member Functions

void GetNextToken ()
 
bool GetOperator ()
 
bool GetQuotedIdent ()
 
bool GetLiteral ()
 
bool GetDigitValue ()
 

Static Private Member Functions

static bool ShouldSkip (char character)
 
static Symbol FindKeyword (const char *ident)
 

Private Attributes

char * input
 
char * str
 
unsigned int pos
 
Tokenfirst
 
Tokencurrent
 

Detailed Description

Encapsulates an lexical analyzer. Provides token for the parser.

More info on lexical analysis is available at Wikipedia: https://wikipedia.org/wiki/Lexical_analysis

Definition at line 48 of file lexer.h.

Constructor & Destructor Documentation

◆ Lexer()

Lexer::Lexer ( const char *  input)
explicit

Definition at line 38 of file lexer.cpp.

References AllocAndCopy(), current, first, input, pos, and str.

Referenced by Parser::Parser().

39 {
40  AllocAndCopy(&this->input, input);
41  pos = 0;
42  str = this->input;
43  first = nullptr;
44  current = nullptr;
45 }
Token * current
Definition: lexer.h:64
unsigned int AllocAndCopy(char **destination, const char *source)
Allocate memory and copy a string into the array.
Definition: alloccpy.c:40
char * input
Definition: lexer.h:60
char * str
Definition: lexer.h:61
unsigned int pos
Definition: lexer.h:62
Token * first
Definition: lexer.h:63
Here is the call graph for this function:
Here is the caller graph for this function:

◆ ~Lexer()

Lexer::~Lexer ( )

Definition at line 47 of file lexer.cpp.

References first, and input.

48 {
49  delete [] input;
50 
51  if (first != nullptr)
52  {
53  delete first;
54  }
55 }
char * input
Definition: lexer.h:60
Token * first
Definition: lexer.h:63

Member Function Documentation

◆ FindKeyword() [1/2]

char * Lexer::FindKeyword ( Symbol  symbol)
static

Definition at line 248 of file lexer.cpp.

References operatordef::chr, keywords, keyworddef::name, operators, operatordef::symbol, and keyworddef::symbol.

Referenced by PreferencesBase::GetDescription().

249 {
250  static const unsigned int kwcount = sizeof(keywords) / sizeof(keyworddef);
251  for (unsigned int i = 0; i < kwcount; i++)
252  {
253  if (keywords[i].symbol == symbol)
254  {
255  return const_cast<char*>(keywords[i].name);
256  }
257  }
258 
259  static const unsigned int ocount = sizeof(operators) / sizeof(operatordef);
260  for (unsigned int i = 0; i < ocount; i++)
261  {
262  if (operators[i].symbol == symbol)
263  {
264  return const_cast<char*>(&(operators[i].chr));
265  }
266  }
267 
268  return nullptr;
269 }
const char * name
Definition: kword.h:52
Character representation of keyword tied with its symbol.
Definition: kword.h:48
static const keyworddef keywords[]
Definition: kword.h:55
Character definition of operators.
Definition: operatordefs.h:45
static const operatordef operators[]
Character representation of operators tied with their symbols.
Definition: operatordefs.h:54
Here is the caller graph for this function:

◆ FindKeyword() [2/2]

Symbol Lexer::FindKeyword ( const char *  ident)
staticprivate

Definition at line 243 of file lexer.cpp.

References Language::FindKeyword().

Referenced by GetLiteral().

244 {
245  return Program->Language->FindKeyword(ident);
246 }
Master control class.
Definition: program.h:55
Symbol FindKeyword(const char *ident) const
Definition: language.cpp:118
class Language * Language
Definition: program.h:71
Here is the call graph for this function:
Here is the caller graph for this function:

◆ GetDigitValue()

bool Lexer::GetDigitValue ( )
private

Definition at line 216 of file lexer.cpp.

References current, Program::Input, MemCopy(), NumeralSystem::Parse(), pos, str, symnumber, and Token::Token().

Referenced by GetNextToken().

217 {
218  unsigned int len;
219  char* end;
220 
221  // Only the numeral parser can determine if next token is a value
222  Number* number = Program->Input->Parse(str, &len, &end);
223 
224  if (str == end)
225  {
226  delete number;
227  return false;
228  }
229 
230  char* text = new char[len + 1];
231  MemCopy(text, str, len);
232  *(text + len) = '\0';
233 
234  current = new Token(current, symnumber, text, pos);
235  delete [] text;
236  delete number;
237 
238  pos += len;
239  str = end;
240  return true;
241 }
Master control class.
Definition: program.h:55
Token * current
Definition: lexer.h:64
Tokens are created by the Lexical Analyzer and provides an intermediate state for input consumed by t...
Definition: token.h:46
Definition: numb.h:66
virtual Number * Parse(const char *text, unsigned int *length, char **end)=0
char * str
Definition: lexer.h:61
unsigned int pos
Definition: lexer.h:62
void MemCopy(void *destination, const void *source, unsigned int length)
Copy a block of memory, handling overlap.
Definition: memcpy.c:75
class NumeralSystem * Input
Definition: program.h:75
Here is the call graph for this function:
Here is the caller graph for this function:

◆ GetFirstToken()

Token * Lexer::GetFirstToken ( ) const

Definition at line 62 of file lexer.cpp.

References first.

Referenced by Parser::GetToken(), and Parser::Peek().

63 {
64  return first;
65 }
Token * first
Definition: lexer.h:63
Here is the caller graph for this function:

◆ GetInput()

char * Lexer::GetInput ( ) const

Definition at line 57 of file lexer.cpp.

References input.

Referenced by Parser::Parse(), Parser::ParseDigistStatement(), Parser::ParseFileStatement(), Parser::ParseFunctionDef(), Parser::ParseIdent(), Parser::ParseNumeralStatement(), and Parser::TryParseStatement().

58 {
59  return input;
60 }
char * input
Definition: lexer.h:60
Here is the caller graph for this function:

◆ GetLiteral()

bool Lexer::GetLiteral ( )
private

Definition at line 166 of file lexer.cpp.

References Language::CharIsAlpha(), Language::CharIsDigit(), current, FindKeyword(), MemCopy(), pos, str, symident, and Token::Token().

Referenced by GetNextToken().

167 {
168  const char* start = str;
169  const unsigned int startPos = pos;
170  int unsigned len = 0;
171  bool found = true;
172  Symbol ksymbol;
173 
174  while (found)
175  {
177  {
178  str++;
179  len++;
180  }
181  else if (len != 0 && Program->Language->CharIsDigit(*str))
182  {
183  str++;
184  len++;
185  }
186  else
187  {
188  found = false;
189  }
190  }
191 
192  if (len == 0)
193  {
194  return false;
195  }
196 
197  char* ident = new char[len + 1];
198  MemCopy(ident, start, len);
199  ident[len] = 0;
200 
201  if ((ksymbol = FindKeyword(ident)))
202  {
203  current = new Token(current, ksymbol, startPos);
204  }
205  else
206  {
207  current = new Token(current, symident, ident, startPos);
208  }
209 
210  pos += len;
211 
212  delete [] ident;
213  return true;
214 }
Master control class.
Definition: program.h:55
virtual bool CharIsAlpha(unsigned long character)=0
Token * current
Definition: lexer.h:64
Tokens are created by the Lexical Analyzer and provides an intermediate state for input consumed by t...
Definition: token.h:46
static char * FindKeyword(Symbol symbol)
Definition: lexer.cpp:248
class Language * Language
Definition: program.h:71
char * str
Definition: lexer.h:61
unsigned int pos
Definition: lexer.h:62
virtual bool CharIsDigit(unsigned long character)=0
Symbol
Symbols generated by the Lexer.
Definition: symbol.h:41
void MemCopy(void *destination, const void *source, unsigned int length)
Copy a block of memory, handling overlap.
Definition: memcpy.c:75
Here is the call graph for this function:
Here is the caller graph for this function:

◆ GetNextToken()

void Lexer::GetNextToken ( )
private

Definition at line 86 of file lexer.cpp.

References Language::CharIsSpace(), current, GetDigitValue(), GetLiteral(), GetOperator(), GetQuotedIdent(), pos, ShouldSkip(), str, symend, symunknown, and Token::Token().

Referenced by Tokenize().

87 {
88  // Skip spaces and non visible characters
89  while (*str != 0 && ShouldSkip(*str))
90  {
91  str++;
93  {
94  pos++;
95  }
96  }
97 
98  if (*str == 0)
99  {
100  current = new Token(current, symend, pos);
101  return;
102  }
103 
104  if (GetOperator() || GetQuotedIdent() || GetDigitValue() || GetLiteral())
105  return;
106 
107  str++;
108  pos++;
109  current = new Token(current, symunknown, pos - 1);
110 }
virtual bool CharIsSpace(unsigned long character)=0
Master control class.
Definition: program.h:55
bool GetOperator()
Definition: lexer.cpp:112
static bool ShouldSkip(char character)
Definition: lexer.cpp:271
bool GetQuotedIdent()
Definition: lexer.cpp:129
Token * current
Definition: lexer.h:64
Tokens are created by the Lexical Analyzer and provides an intermediate state for input consumed by t...
Definition: token.h:46
class Language * Language
Definition: program.h:71
char * str
Definition: lexer.h:61
unsigned int pos
Definition: lexer.h:62
Definition: symbol.h:81
bool GetDigitValue()
Definition: lexer.cpp:216
bool GetLiteral()
Definition: lexer.cpp:166
Here is the call graph for this function:
Here is the caller graph for this function:

◆ GetOperator()

bool Lexer::GetOperator ( )
private

Definition at line 112 of file lexer.cpp.

References operatordef::chr, current, operators, pos, str, operatordef::symbol, and Token::Token().

Referenced by GetNextToken().

113 {
114  static const unsigned int count = sizeof(operators) / sizeof(operatordef);
115  for (unsigned int i = 0; i < count; i++)
116  {
117  if (operators[i].chr == *str)
118  {
119  current = new Token(current, operators[i].symbol, pos);
120  str++;
121  pos++;
122  return true;
123  }
124  }
125 
126  return false;
127 }
Token * current
Definition: lexer.h:64
Tokens are created by the Lexical Analyzer and provides an intermediate state for input consumed by t...
Definition: token.h:46
char * str
Definition: lexer.h:61
unsigned int pos
Definition: lexer.h:62
Character definition of operators.
Definition: operatordefs.h:45
static const operatordef operators[]
Character representation of operators tied with their symbols.
Definition: operatordefs.h:54
Here is the call graph for this function:
Here is the caller graph for this function:

◆ GetQuotedIdent()

bool Lexer::GetQuotedIdent ( )
private

Definition at line 129 of file lexer.cpp.

References Language::CharIsCntrl(), current, MemCopy(), pos, str, symqident, and Token::Token().

Referenced by GetNextToken().

130 {
131  if (*str != '"')
132  {
133  return false;
134  }
135 
136  char* start = str;
137  const unsigned int startPos = pos;
138  int unsigned len = 0;
139  str++;
140 
141  while (*str != 0 && *str != '"' && !Program->Language->CharIsCntrl(*str))
142  {
143  str++;
144  len++;
145  }
146 
147  if (len == 0 || *str != '"')
148  {
149  str = start;
150  return false;
151  }
152 
153  char* ident = new char[len + 1];
154  MemCopy(ident, start + 1, len);
155  ident[len] = 0;
156 
157  current = new Token(current, symqident, ident, startPos);
158 
159  str++;
160  pos += len + 1;
161 
162  delete [] ident;
163  return true;
164 }
Master control class.
Definition: program.h:55
Token * current
Definition: lexer.h:64
Tokens are created by the Lexical Analyzer and provides an intermediate state for input consumed by t...
Definition: token.h:46
class Language * Language
Definition: program.h:71
char * str
Definition: lexer.h:61
unsigned int pos
Definition: lexer.h:62
virtual bool CharIsCntrl(unsigned long character)=0
void MemCopy(void *destination, const void *source, unsigned int length)
Copy a block of memory, handling overlap.
Definition: memcpy.c:75
Here is the call graph for this function:
Here is the caller graph for this function:

◆ ShouldSkip()

bool Lexer::ShouldSkip ( char  character)
staticprivate

Definition at line 271 of file lexer.cpp.

References Language::CharIsCntrl(), and Language::CharIsSpace().

Referenced by GetNextToken().

272 {
273  if (character == '\n')
274  {
275  return false;
276  }
277 
278  if (Program->Language->CharIsCntrl(character))
279  {
280  return true;
281  }
282 
283  if (Program->Language->CharIsSpace(character))
284  {
285  return true;
286  }
287 
288  return false;
289 }
virtual bool CharIsSpace(unsigned long character)=0
Master control class.
Definition: program.h:55
class Language * Language
Definition: program.h:71
virtual bool CharIsCntrl(unsigned long character)=0
Here is the call graph for this function:
Here is the caller graph for this function:

◆ Tokenize()

void Lexer::Tokenize ( )

Definition at line 67 of file lexer.cpp.

References current, first, GetNextToken(), input, Token::next, pos, str, Token::symbol, and symend.

Referenced by Parser::Parse().

68 {
69  pos = 0;
70  str = input;
71  first = nullptr;
72  current = nullptr;
73 
74  GetNextToken();
75  first = current;
76 
77  do
78  {
79  Token* last = current;
80  GetNextToken();
81  last->next = current;
82  }
83  while (current->symbol != symend);
84 }
Token * current
Definition: lexer.h:64
Tokens are created by the Lexical Analyzer and provides an intermediate state for input consumed by t...
Definition: token.h:46
Symbol symbol
Definition: token.h:53
char * input
Definition: lexer.h:60
void GetNextToken()
Definition: lexer.cpp:86
char * str
Definition: lexer.h:61
unsigned int pos
Definition: lexer.h:62
Definition: symbol.h:81
Token * first
Definition: lexer.h:63
Token * next
Definition: token.h:63
Here is the call graph for this function:
Here is the caller graph for this function:

Member Data Documentation

◆ current

Token* Lexer::current
private

◆ first

Token* Lexer::first
private

Definition at line 63 of file lexer.h.

Referenced by GetFirstToken(), Lexer(), Tokenize(), and ~Lexer().

◆ input

char* Lexer::input
private

Definition at line 60 of file lexer.h.

Referenced by GetInput(), Lexer(), Tokenize(), and ~Lexer().

◆ pos

unsigned int Lexer::pos
private

◆ str

char* Lexer::str
private

The documentation for this class was generated from the following files: