amath  1.8.5
Simple command line calculator
lexer.cpp
Go to the documentation of this file.
1 /*-
2  * Copyright (c) 2014-2018 Carsten Sonne Larsen <cs@innolan.net>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  * Project homepage:
26  * https://amath.innolan.net
27  *
28  */
29 
30 #include "amathc.h"
31 #include "lexer.h"
32 #include "token.h"
33 #include "symbol.h"
34 #include "loc/kword.h"
35 #include "operatordefs.h"
36 #include "system/program.h"
37 
38 Lexer::Lexer(const char* input)
39 {
40  AllocAndCopy(&this->input, input);
41  pos = 0;
42  str = this->input;
43  first = nullptr;
44  current = nullptr;
45 }
46 
48 {
49  delete [] input;
50 
51  if (first != nullptr)
52  {
53  delete first;
54  }
55 }
56 
57 char* Lexer::GetInput() const
58 {
59  return input;
60 }
61 
63 {
64  return first;
65 }
66 
67 void Lexer::Tokenize()
68 {
69  pos = 0;
70  str = input;
71  first = nullptr;
72  current = nullptr;
73 
75  first = current;
76 
77  do
78  {
79  Token* last = current;
81  last->next = current;
82  }
83  while (current->symbol != symend);
84 }
85 
87 {
88  // Skip spaces and non visible characters
89  while (*str != 0 && ShouldSkip(*str))
90  {
91  str++;
92  if (Program->Language->CharIsSpace(*str))
93  {
94  pos++;
95  }
96  }
97 
98  if (*str == 0)
99  {
101  return;
102  }
103 
105  return;
106 
107  str++;
108  pos++;
110 }
111 
113 {
114  static const unsigned int count = sizeof(operators) / sizeof(operatordef);
115  for (unsigned int i = 0; i < count; i++)
116  {
117  if (operators[i].chr == *str)
118  {
120  str++;
121  pos++;
122  return true;
123  }
124  }
125 
126  return false;
127 }
128 
130 {
131  if (*str != '"')
132  {
133  return false;
134  }
135 
136  char* start = str;
137  const unsigned int startPos = pos;
138  int unsigned len = 0;
139  str++;
140 
141  while (*str != 0 && *str != '"' && !Program->Language->CharIsCntrl(*str))
142  {
143  str++;
144  len++;
145  }
146 
147  if (len == 0 || *str != '"')
148  {
149  str = start;
150  return false;
151  }
152 
153  char* ident = new char[len + 1];
154  MemCopy(ident, start + 1, len);
155  ident[len] = 0;
156 
157  current = new Token(current, symqident, ident, startPos);
158 
159  str++;
160  pos += len + 1;
161 
162  delete [] ident;
163  return true;
164 }
165 
167 {
168  const char* start = str;
169  const unsigned int startPos = pos;
170  int unsigned len = 0;
171  bool found = true;
172  Symbol ksymbol;
173 
174  while (found)
175  {
176  if (Program->Language->CharIsAlpha(*str))
177  {
178  str++;
179  len++;
180  }
181  else if (len != 0 && Program->Language->CharIsDigit(*str))
182  {
183  str++;
184  len++;
185  }
186  else
187  {
188  found = false;
189  }
190  }
191 
192  if (len == 0)
193  {
194  return false;
195  }
196 
197  char* ident = new char[len + 1];
198  MemCopy(ident, start, len);
199  ident[len] = 0;
200 
201  if ((ksymbol = FindKeyword(ident)))
202  {
203  current = new Token(current, ksymbol, startPos);
204  }
205  else
206  {
207  current = new Token(current, symident, ident, startPos);
208  }
209 
210  pos += len;
211 
212  delete [] ident;
213  return true;
214 }
215 
217 {
218  unsigned int len;
219  char* end;
220 
221  // Only the numeral parser can determine if next token is a value
222  Number* number = Program->Input->Parse(str, &len, &end);
223 
224  if (str == end)
225  {
226  delete number;
227  return false;
228  }
229 
230  char* text = new char[len + 1];
231  MemCopy(text, str, len);
232  *(text + len) = '\0';
233 
235  delete [] text;
236  delete number;
237 
238  pos += len;
239  str = end;
240  return true;
241 }
242 
243 Symbol Lexer::FindKeyword(const char* ident)
244 {
245  return Program->Language->FindKeyword(ident);
246 }
247 
248 char* Lexer::FindKeyword(Symbol symbol)
249 {
250  static const unsigned int kwcount = sizeof(keywords) / sizeof(keyworddef);
251  for (unsigned int i = 0; i < kwcount; i++)
252  {
253  if (keywords[i].symbol == symbol)
254  {
255  return const_cast<char*>(keywords[i].name);
256  }
257  }
258 
259  static const unsigned int ocount = sizeof(operators) / sizeof(operatordef);
260  for (unsigned int i = 0; i < ocount; i++)
261  {
262  if (operators[i].symbol == symbol)
263  {
264  return const_cast<char*>(&(operators[i].chr));
265  }
266  }
267 
268  return nullptr;
269 }
270 
271 bool Lexer::ShouldSkip(char character)
272 {
273  if (character == '\n')
274  {
275  return false;
276  }
277 
278  if (Program->Language->CharIsCntrl(character))
279  {
280  return true;
281  }
282 
283  if (Program->Language->CharIsSpace(character))
284  {
285  return true;
286  }
287 
288  return false;
289 }
virtual bool CharIsSpace(unsigned long character)=0
bool GetOperator()
Definition: lexer.cpp:112
const char * name
Definition: kword.h:52
static bool ShouldSkip(char character)
Definition: lexer.cpp:271
Token(Token *last, Symbol symbol, int pos)
Definition: token.cpp:37
Symbol FindKeyword(const char *ident) const
Definition: language.cpp:118
virtual bool CharIsAlpha(unsigned long character)=0
Encapsulates an lexical analyzer. Provides token for the parser.
Definition: lexer.h:48
bool GetQuotedIdent()
Definition: lexer.cpp:129
Character representation of keyword tied with its symbol.
Definition: kword.h:48
Symbol symbol
Definition: operatordefs.h:48
Token * current
Definition: lexer.h:64
Tokens are created by the Lexical Analyzer and provides an intermediate state for input consumed by t...
Definition: token.h:46
static const keyworddef keywords[]
Definition: kword.h:55
static Symbol FindKeyword(const char *ident)
Definition: lexer.cpp:243
Definition: numb.h:66
Symbol symbol
Definition: token.h:53
static char * FindKeyword(Symbol symbol)
Definition: lexer.cpp:248
~Lexer()
Definition: lexer.cpp:47
virtual Number * Parse(const char *text, unsigned int *length, char **end)=0
Symbol symbol
Definition: kword.h:51
char * input
Definition: lexer.h:60
Token * GetFirstToken() const
Definition: lexer.cpp:62
void GetNextToken()
Definition: lexer.cpp:86
Lexer(const char *input)
Definition: lexer.cpp:38
char * str
Definition: lexer.h:61
unsigned int pos
Definition: lexer.h:62
Definition: symbol.h:81
virtual bool CharIsDigit(unsigned long character)=0
Character definition of operators.
Definition: operatordefs.h:45
virtual bool CharIsCntrl(unsigned long character)=0
char * GetInput() const
Definition: lexer.cpp:57
bool GetDigitValue()
Definition: lexer.cpp:216
bool GetLiteral()
Definition: lexer.cpp:166
Token * first
Definition: lexer.h:63
void Tokenize()
Definition: lexer.cpp:67
Token * next
Definition: token.h:63
unsigned int AllocAndCopy(char **destination, const char *source)
Allocate memory and copy a string into the array.
Definition: alloccpy.c:40
class NumeralSystem * Input
Definition: program.h:75
static const operatordef operators[]
Character representation of operators tied with their symbols.
Definition: operatordefs.h:54
void MemCopy(void *destination, const void *source, unsigned int length)
Copy a block of memory, handling overlap.
Definition: memcpy.c:75
Token(Token *last, Symbol symbol, const char *text, int pos)
Definition: token.cpp:46