2019-05-18 18:35:51 +00:00
|
|
|
#include <utility>
|
|
|
|
#include <cmath>
|
|
|
|
|
|
|
|
#include "Lexer.hpp"
|
|
|
|
|
|
|
|
Lexer::Lexer(string script) {
|
|
|
|
Lexer::Script = std::move(script);
|
|
|
|
Lexer::Position = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<IToken*> Lexer::Lex() {
|
|
|
|
vector<IToken*> tokens;
|
|
|
|
while (true){
|
|
|
|
IToken* next = Lexer::LexNext(Lexer::Next());
|
|
|
|
tokens.push_back(next);
|
|
|
|
if (next->GetKind() == TokenKind::EndOfFile)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return tokens;
|
|
|
|
}
|
|
|
|
|
|
|
|
char Lexer::Peek(){
|
|
|
|
if (Lexer::Position > Lexer::Script.length())
|
|
|
|
return '\0';
|
|
|
|
return Lexer::Script[Lexer::Position];
|
|
|
|
}
|
|
|
|
|
|
|
|
char Lexer::Next(){
|
|
|
|
char next = Peek();
|
|
|
|
Lexer::Position++;
|
|
|
|
return next;
|
|
|
|
}
|
|
|
|
|
|
|
|
IToken* Lexer::LexNext(char c){
|
|
|
|
switch (c) {
|
|
|
|
case '\0':
|
|
|
|
return new SimpleToken(TokenKind::EndOfFile);
|
|
|
|
case ' ':
|
|
|
|
case '\t':
|
|
|
|
case '\n':
|
|
|
|
case '\r':
|
|
|
|
return new SimpleToken(TokenKind::WhiteSpace);
|
|
|
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
|
|
return LexNumber(c);
|
|
|
|
default:
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int CharToInt(char c){
|
|
|
|
switch (c){
|
|
|
|
case '0': return 0;
|
|
|
|
case '1': return 1;
|
|
|
|
case '2': return 2;
|
|
|
|
case '3': return 3;
|
|
|
|
case '4': return 4;
|
|
|
|
case '5': return 5;
|
|
|
|
case '6': return 6;
|
|
|
|
case '7': return 7;
|
|
|
|
case '8': return 8;
|
|
|
|
case '9': return 9;
|
|
|
|
default: return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
IToken* Lexer::LexNumber(char c){
|
|
|
|
long int_value = CharToInt(c);
|
|
|
|
double float_value = 0;
|
|
|
|
short decimal_index = 0;
|
|
|
|
bool has_point = false;
|
|
|
|
bool is_searching = true;
|
|
|
|
while (is_searching){
|
2019-05-19 10:20:08 +00:00
|
|
|
char next = Lexer::Peek();
|
2019-05-18 18:35:51 +00:00
|
|
|
int next_val = CharToInt(next);
|
|
|
|
if (next_val == -1){
|
|
|
|
switch (next){
|
2019-05-19 10:20:08 +00:00
|
|
|
case '_':
|
|
|
|
Lexer::Next();
|
|
|
|
continue;
|
2019-05-18 18:35:51 +00:00
|
|
|
case '.':
|
2019-05-19 10:20:08 +00:00
|
|
|
Lexer::Next();
|
2019-05-18 18:35:51 +00:00
|
|
|
has_point = true;
|
|
|
|
decimal_index = 0;
|
|
|
|
float_value = int_value;
|
|
|
|
continue;
|
|
|
|
default:
|
|
|
|
is_searching = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else{
|
2019-05-19 10:20:08 +00:00
|
|
|
Lexer::Next();
|
2019-05-18 18:35:51 +00:00
|
|
|
if (has_point){
|
|
|
|
decimal_index++;
|
|
|
|
float_value += next_val / pow(10, decimal_index);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
int_value *= 10;
|
|
|
|
int_value += next_val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (has_point){
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
return new IntegerToken(int_value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef TESTS_BUILD
|
|
|
|
#include <catch2/catch.hpp>
|
|
|
|
|
|
|
|
TEST_CASE( "When at end of script return terminator", "[lexer]" ) {
|
|
|
|
Lexer lexer = Lexer("");
|
|
|
|
REQUIRE(lexer.Peek() == '\0');
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE( "Peek doesn't advance", "[lexer]" ) {
|
|
|
|
Lexer lexer = Lexer("5 + 5");
|
|
|
|
REQUIRE(lexer.Peek() == '5');
|
|
|
|
REQUIRE(lexer.Peek() == '5');
|
|
|
|
REQUIRE(lexer.Peek() == '5');
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE( "Next does advance", "[lexer]" ) {
|
|
|
|
Lexer lexer = Lexer("5 + 5");
|
|
|
|
REQUIRE(lexer.Next() == '5');
|
|
|
|
REQUIRE(lexer.Next() == ' ');
|
|
|
|
REQUIRE(lexer.Next() == '+');
|
|
|
|
REQUIRE(lexer.Next() == ' ');
|
|
|
|
REQUIRE(lexer.Next() == '5');
|
|
|
|
REQUIRE(lexer.Next() == '\0');
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE( "Lex Null Terminator as EOF", "[lexer]" ) {
|
|
|
|
Lexer lexer = Lexer("");
|
|
|
|
REQUIRE(lexer.LexNext('\0') -> GetKind() == TokenKind::EndOfFile);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE( "Lex Whitespace", "[lexer]" ) {
|
|
|
|
Lexer lexer = Lexer("");
|
|
|
|
CHECK(lexer.LexNext(' ') -> GetKind() == TokenKind::WhiteSpace);
|
|
|
|
CHECK(lexer.LexNext('\t') -> GetKind() == TokenKind::WhiteSpace);
|
|
|
|
CHECK(lexer.LexNext('\n') -> GetKind() == TokenKind::WhiteSpace);
|
|
|
|
CHECK(lexer.LexNext('\r') -> GetKind() == TokenKind::WhiteSpace);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE( "Lex Basic Integers", "[lexer]" ) {
|
|
|
|
Lexer lexer = Lexer("");
|
|
|
|
CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer);
|
|
|
|
CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer);
|
|
|
|
CHECK(lexer.LexNext('2') -> GetKind() == TokenKind::Integer);
|
|
|
|
CHECK(lexer.LexNext('3') -> GetKind() == TokenKind::Integer);
|
|
|
|
CHECK(lexer.LexNext('4') -> GetKind() == TokenKind::Integer);
|
|
|
|
CHECK(lexer.LexNext('5') -> GetKind() == TokenKind::Integer);
|
|
|
|
CHECK(lexer.LexNext('6') -> GetKind() == TokenKind::Integer);
|
|
|
|
CHECK(lexer.LexNext('7') -> GetKind() == TokenKind::Integer);
|
|
|
|
CHECK(lexer.LexNext('8') -> GetKind() == TokenKind::Integer);
|
|
|
|
CHECK(lexer.LexNext('9') -> GetKind() == TokenKind::Integer);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE( "Lex Longer Integers", "[lexer]" ) {
|
|
|
|
long integers[] {0,1,5,9,10,50,100,1000,99999,6484,62163,48862};
|
|
|
|
for (int integer : integers){
|
|
|
|
Lexer lexer = Lexer(std::to_string(integer));
|
|
|
|
auto tokens = lexer.Lex();
|
|
|
|
REQUIRE(tokens.size() == 2);
|
|
|
|
IToken* firstToken = tokens[0];
|
|
|
|
REQUIRE(firstToken -> GetKind() == TokenKind::Integer);
|
|
|
|
auto* integerToken = (IntegerToken *)firstToken;
|
|
|
|
CHECK(integerToken -> Value == integer);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|