PorygonLang/src/Parser/Lexer.cpp

234 lines
6.9 KiB
C++
Raw Normal View History

2019-05-18 18:35:51 +00:00
#include <utility>
#include <cmath>
#include "Lexer.hpp"
Lexer::Lexer(string script) {
Lexer::Script = std::move(script);
Lexer::Position = 0;
}
vector<IToken*> Lexer::Lex() {
vector<IToken*> tokens;
while (true){
IToken* next = Lexer::LexNext(Lexer::Next());
tokens.push_back(next);
if (next->GetKind() == TokenKind::EndOfFile)
break;
}
return tokens;
}
char Lexer::Peek(){
if (Lexer::Position > Lexer::Script.length())
return '\0';
return Lexer::Script[Lexer::Position];
}
char Lexer::Next(){
char next = Peek();
Lexer::Position++;
return next;
}
IToken* Lexer::LexNext(char c){
switch (c) {
case '\0':
return new SimpleToken(TokenKind::EndOfFile);
2019-05-19 10:49:26 +00:00
case ' ': case '\t': case '\n': case '\r':
2019-05-18 18:35:51 +00:00
return new SimpleToken(TokenKind::WhiteSpace);
2019-05-19 10:49:26 +00:00
case '+':
return new SimpleToken(TokenKind::PlusToken);
case '-':
return new SimpleToken(TokenKind::MinusToken);
case '/':
return new SimpleToken(TokenKind::SlashToken);
case '*':
return new SimpleToken(TokenKind::StarToken);
case '=':
if (Lexer::Peek() == '='){
Lexer::Next();
return new SimpleToken(TokenKind::EqualityToken);
}
return new SimpleToken(TokenKind::AssignmentToken);
2019-05-18 18:35:51 +00:00
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
return LexNumber(c);
default:
throw;
}
}
int CharToInt(char c){
switch (c){
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
default: return -1;
}
}
IToken* Lexer::LexNumber(char c){
long int_value = CharToInt(c);
double float_value = 0;
short decimal_index = 0;
bool has_point = false;
bool is_searching = true;
while (is_searching){
char next = Lexer::Peek();
2019-05-18 18:35:51 +00:00
int next_val = CharToInt(next);
if (next_val == -1){
switch (next){
case '_':
Lexer::Next();
continue;
2019-05-18 18:35:51 +00:00
case '.':
Lexer::Next();
2019-05-18 18:35:51 +00:00
has_point = true;
decimal_index = 0;
float_value = int_value;
continue;
default:
is_searching = false;
continue;
}
}
else{
Lexer::Next();
2019-05-18 18:35:51 +00:00
if (has_point){
decimal_index++;
float_value += next_val / pow(10, decimal_index);
}
else {
int_value *= 10;
int_value += next_val;
}
}
}
if (has_point){
2019-05-19 10:49:26 +00:00
return new FloatToken(float_value);
2019-05-18 18:35:51 +00:00
}
else{
return new IntegerToken(int_value);
}
}
#ifdef TESTS_BUILD
#include <catch2/catch.hpp>
TEST_CASE( "When at end of script return terminator", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.Peek() == '\0');
}
TEST_CASE( "Peek doesn't advance", "[lexer]" ) {
Lexer lexer = Lexer("5 + 5");
REQUIRE(lexer.Peek() == '5');
REQUIRE(lexer.Peek() == '5');
REQUIRE(lexer.Peek() == '5');
}
TEST_CASE( "Next does advance", "[lexer]" ) {
Lexer lexer = Lexer("5 + 5");
REQUIRE(lexer.Next() == '5');
REQUIRE(lexer.Next() == ' ');
REQUIRE(lexer.Next() == '+');
REQUIRE(lexer.Next() == ' ');
REQUIRE(lexer.Next() == '5');
REQUIRE(lexer.Next() == '\0');
}
TEST_CASE( "Lex Null Terminator as EOF", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('\0') -> GetKind() == TokenKind::EndOfFile);
}
2019-05-19 10:49:26 +00:00
TEST_CASE( "Lex Plus Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('+') -> GetKind() == TokenKind::PlusToken);
}
TEST_CASE( "Lex Minus Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('-') -> GetKind() == TokenKind::MinusToken);
}
TEST_CASE( "Lex Slash Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('/') -> GetKind() == TokenKind::SlashToken);
}
TEST_CASE( "Lex Star Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('*') -> GetKind() == TokenKind::StarToken);
}
TEST_CASE( "Lex Assignment Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('=') -> GetKind() == TokenKind::AssignmentToken);
}
TEST_CASE( "Lex Equality Token", "[lexer]" ) {
Lexer lexer = Lexer("==");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::EqualityToken);
}
2019-05-18 18:35:51 +00:00
TEST_CASE( "Lex Whitespace", "[lexer]" ) {
Lexer lexer = Lexer("");
CHECK(lexer.LexNext(' ') -> GetKind() == TokenKind::WhiteSpace);
CHECK(lexer.LexNext('\t') -> GetKind() == TokenKind::WhiteSpace);
CHECK(lexer.LexNext('\n') -> GetKind() == TokenKind::WhiteSpace);
CHECK(lexer.LexNext('\r') -> GetKind() == TokenKind::WhiteSpace);
}
TEST_CASE( "Lex Basic Integers", "[lexer]" ) {
Lexer lexer = Lexer("");
CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('2') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('3') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('4') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('5') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('6') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('7') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('8') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('9') -> GetKind() == TokenKind::Integer);
}
TEST_CASE( "Lex Longer Integers", "[lexer]" ) {
long integers[] {0,1,5,9,10,50,100,1000,99999,6484,62163,48862};
for (int integer : integers){
Lexer lexer = Lexer(std::to_string(integer));
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::Integer);
auto* integerToken = (IntegerToken *)firstToken;
CHECK(integerToken -> Value == integer);
}
}
2019-05-19 10:49:26 +00:00
TEST_CASE( "Lex Floats", "[lexer]" ) {
double floats[] {0.5, 0.8, 100.7, 52.3548, 8461354.1324886};
for (double f : floats){
Lexer lexer = Lexer(std::to_string(f));
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::Float);
auto* floatToken = (FloatToken *)firstToken;
CHECK(floatToken -> Value == Approx(f));
}
}
2019-05-18 18:35:51 +00:00
#endif