Added lexing support for identifiers and keywords
This commit is contained in:
@@ -53,7 +53,12 @@ IToken* Lexer::LexNext(char c){
|
||||
return new SimpleToken(TokenKind::AssignmentToken);
|
||||
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
||||
return LexNumber(c);
|
||||
case '_':
|
||||
return LexIdentifierOrKeyword(c);
|
||||
default:
|
||||
if (isalpha(c)){
|
||||
return LexIdentifierOrKeyword(c);
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@@ -119,116 +124,46 @@ IToken* Lexer::LexNumber(char c){
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef TESTS_BUILD
|
||||
#include <catch2/catch.hpp>
|
||||
|
||||
TEST_CASE( "When at end of script return terminator", "[lexer]" ) {
|
||||
Lexer lexer = Lexer("");
|
||||
REQUIRE(lexer.Peek() == '\0');
|
||||
unsigned constexpr const_hash(char const *input) {
|
||||
return *input ?
|
||||
static_cast<unsigned int>(*input) + 33 * const_hash(input + 1) :
|
||||
5381;
|
||||
}
|
||||
|
||||
TEST_CASE( "Peek doesn't advance", "[lexer]" ) {
|
||||
Lexer lexer = Lexer("5 + 5");
|
||||
REQUIRE(lexer.Peek() == '5');
|
||||
REQUIRE(lexer.Peek() == '5');
|
||||
REQUIRE(lexer.Peek() == '5');
|
||||
}
|
||||
|
||||
TEST_CASE( "Next does advance", "[lexer]" ) {
|
||||
Lexer lexer = Lexer("5 + 5");
|
||||
REQUIRE(lexer.Next() == '5');
|
||||
REQUIRE(lexer.Next() == ' ');
|
||||
REQUIRE(lexer.Next() == '+');
|
||||
REQUIRE(lexer.Next() == ' ');
|
||||
REQUIRE(lexer.Next() == '5');
|
||||
REQUIRE(lexer.Next() == '\0');
|
||||
}
|
||||
|
||||
TEST_CASE( "Lex Null Terminator as EOF", "[lexer]" ) {
|
||||
Lexer lexer = Lexer("");
|
||||
REQUIRE(lexer.LexNext('\0') -> GetKind() == TokenKind::EndOfFile);
|
||||
}
|
||||
|
||||
TEST_CASE( "Lex Plus Token", "[lexer]" ) {
|
||||
Lexer lexer = Lexer("");
|
||||
REQUIRE(lexer.LexNext('+') -> GetKind() == TokenKind::PlusToken);
|
||||
}
|
||||
|
||||
TEST_CASE( "Lex Minus Token", "[lexer]" ) {
|
||||
Lexer lexer = Lexer("");
|
||||
REQUIRE(lexer.LexNext('-') -> GetKind() == TokenKind::MinusToken);
|
||||
}
|
||||
|
||||
TEST_CASE( "Lex Slash Token", "[lexer]" ) {
|
||||
Lexer lexer = Lexer("");
|
||||
REQUIRE(lexer.LexNext('/') -> GetKind() == TokenKind::SlashToken);
|
||||
}
|
||||
|
||||
TEST_CASE( "Lex Star Token", "[lexer]" ) {
|
||||
Lexer lexer = Lexer("");
|
||||
REQUIRE(lexer.LexNext('*') -> GetKind() == TokenKind::StarToken);
|
||||
}
|
||||
|
||||
TEST_CASE( "Lex Assignment Token", "[lexer]" ) {
|
||||
Lexer lexer = Lexer("");
|
||||
REQUIRE(lexer.LexNext('=') -> GetKind() == TokenKind::AssignmentToken);
|
||||
}
|
||||
|
||||
TEST_CASE( "Lex Equality Token", "[lexer]" ) {
|
||||
Lexer lexer = Lexer("==");
|
||||
auto tokens = lexer.Lex();
|
||||
REQUIRE(tokens.size() == 2);
|
||||
IToken* firstToken = tokens[0];
|
||||
REQUIRE(firstToken -> GetKind() == TokenKind::EqualityToken);
|
||||
}
|
||||
|
||||
TEST_CASE( "Lex Whitespace", "[lexer]" ) {
|
||||
Lexer lexer = Lexer("");
|
||||
CHECK(lexer.LexNext(' ') -> GetKind() == TokenKind::WhiteSpace);
|
||||
CHECK(lexer.LexNext('\t') -> GetKind() == TokenKind::WhiteSpace);
|
||||
CHECK(lexer.LexNext('\n') -> GetKind() == TokenKind::WhiteSpace);
|
||||
CHECK(lexer.LexNext('\r') -> GetKind() == TokenKind::WhiteSpace);
|
||||
}
|
||||
|
||||
TEST_CASE( "Lex Basic Integers", "[lexer]" ) {
|
||||
Lexer lexer = Lexer("");
|
||||
CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer);
|
||||
CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer);
|
||||
CHECK(lexer.LexNext('2') -> GetKind() == TokenKind::Integer);
|
||||
CHECK(lexer.LexNext('3') -> GetKind() == TokenKind::Integer);
|
||||
CHECK(lexer.LexNext('4') -> GetKind() == TokenKind::Integer);
|
||||
CHECK(lexer.LexNext('5') -> GetKind() == TokenKind::Integer);
|
||||
CHECK(lexer.LexNext('6') -> GetKind() == TokenKind::Integer);
|
||||
CHECK(lexer.LexNext('7') -> GetKind() == TokenKind::Integer);
|
||||
CHECK(lexer.LexNext('8') -> GetKind() == TokenKind::Integer);
|
||||
CHECK(lexer.LexNext('9') -> GetKind() == TokenKind::Integer);
|
||||
}
|
||||
|
||||
TEST_CASE( "Lex Longer Integers", "[lexer]" ) {
|
||||
long integers[] {0,1,5,9,10,50,100,1000,99999,6484,62163,48862};
|
||||
for (int integer : integers){
|
||||
Lexer lexer = Lexer(std::to_string(integer));
|
||||
auto tokens = lexer.Lex();
|
||||
REQUIRE(tokens.size() == 2);
|
||||
IToken* firstToken = tokens[0];
|
||||
REQUIRE(firstToken -> GetKind() == TokenKind::Integer);
|
||||
auto* integerToken = (IntegerToken *)firstToken;
|
||||
CHECK(integerToken -> Value == integer);
|
||||
IToken* Lexer::LexIdentifierOrKeyword(char c){
|
||||
vector<char> charVec(1, c);
|
||||
while (true){
|
||||
char next = Lexer::Peek();
|
||||
if (next == '\0') break;
|
||||
if (isalpha(next) || next == '_'){
|
||||
Lexer::Next();
|
||||
charVec.push_back(next);
|
||||
}
|
||||
else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE( "Lex Floats", "[lexer]" ) {
|
||||
double floats[] {0.5, 0.8, 100.7, 52.3548, 8461354.1324886};
|
||||
for (double f : floats){
|
||||
Lexer lexer = Lexer(std::to_string(f));
|
||||
auto tokens = lexer.Lex();
|
||||
REQUIRE(tokens.size() == 2);
|
||||
IToken* firstToken = tokens[0];
|
||||
REQUIRE(firstToken -> GetKind() == TokenKind::Float);
|
||||
auto* floatToken = (FloatToken *)firstToken;
|
||||
CHECK(floatToken -> Value == Approx(f));
|
||||
string s = string(charVec.begin(), charVec.end());
|
||||
switch (const_hash(s.c_str())){
|
||||
case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword);
|
||||
case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword);
|
||||
case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword);
|
||||
case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword);
|
||||
case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword);
|
||||
case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword);
|
||||
case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword);
|
||||
case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword);
|
||||
case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword);
|
||||
case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword);
|
||||
case const_hash("in"): return new SimpleToken(TokenKind::InKeyword);
|
||||
case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword);
|
||||
case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword);
|
||||
case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword);
|
||||
case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword);
|
||||
case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword);
|
||||
case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword);
|
||||
case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword);
|
||||
case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword);
|
||||
default: return new IdentifierToken(s);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
Reference in New Issue
Block a user