Added lexing support for identifiers and keywords

This commit is contained in:
Deukhoofd 2019-05-19 14:26:21 +02:00
parent 8285811fb7
commit 06fe0e7c4c
No known key found for this signature in database
GPG Key ID: B4C087AC81641654
9 changed files with 343 additions and 140 deletions

View File

@ -3,11 +3,12 @@ project(PorygonLang)
set(CMAKE_CXX_STANDARD 17)
add_subdirectory(extern/Catch2)
add_library(PorygonLang src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp)
add_executable(PorygonLangTests src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp src/Parser/LexerTests.cpp)
add_library(PorygonLang library.cpp library.h src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp)
add_executable(PorygonLangTests library.cpp library.h src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp)
add_subdirectory(extern/Catch2)
target_link_libraries(PorygonLangTests Catch2::Catch2)
target_compile_definitions(PorygonLangTests PRIVATE TESTS_BUILD)
include(CTest)

View File

@ -1,18 +0,0 @@
#include "library.h"
#include <iostream>
#define CATCH_CONFIG_MAIN
unsigned int Factorial( unsigned int number ) {
return number <= 1 ? number : Factorial(number-1)*number;
}
#ifdef TESTS_BUILD
#include <catch2/catch.hpp>
TEST_CASE( "Factorials are computed", "[factorial]" ) {
REQUIRE( Factorial(1) == 1 );
REQUIRE( Factorial(2) == 2 );
REQUIRE( Factorial(3) == 6 );
REQUIRE( Factorial(10) == 3628800 );
}
#endif

View File

@ -1,6 +0,0 @@
#ifndef PORYGONLANG_LIBRARY_H
#define PORYGONLANG_LIBRARY_H
int theAnswer();
#endif //PORYGONLANG_LIBRARY_H

View File

@ -53,7 +53,12 @@ IToken* Lexer::LexNext(char c){
return new SimpleToken(TokenKind::AssignmentToken);
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
return LexNumber(c);
case '_':
return LexIdentifierOrKeyword(c);
default:
if (isalpha(c)){
return LexIdentifierOrKeyword(c);
}
throw;
}
}
@ -119,116 +124,46 @@ IToken* Lexer::LexNumber(char c){
}
}
#ifdef TESTS_BUILD
#include <catch2/catch.hpp>
TEST_CASE( "When at end of script return terminator", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.Peek() == '\0');
unsigned constexpr const_hash(char const *input) {
return *input ?
static_cast<unsigned int>(*input) + 33 * const_hash(input + 1) :
5381;
}
TEST_CASE( "Peek doesn't advance", "[lexer]" ) {
Lexer lexer = Lexer("5 + 5");
REQUIRE(lexer.Peek() == '5');
REQUIRE(lexer.Peek() == '5');
REQUIRE(lexer.Peek() == '5');
}
TEST_CASE( "Next does advance", "[lexer]" ) {
Lexer lexer = Lexer("5 + 5");
REQUIRE(lexer.Next() == '5');
REQUIRE(lexer.Next() == ' ');
REQUIRE(lexer.Next() == '+');
REQUIRE(lexer.Next() == ' ');
REQUIRE(lexer.Next() == '5');
REQUIRE(lexer.Next() == '\0');
}
TEST_CASE( "Lex Null Terminator as EOF", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('\0') -> GetKind() == TokenKind::EndOfFile);
}
TEST_CASE( "Lex Plus Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('+') -> GetKind() == TokenKind::PlusToken);
}
TEST_CASE( "Lex Minus Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('-') -> GetKind() == TokenKind::MinusToken);
}
TEST_CASE( "Lex Slash Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('/') -> GetKind() == TokenKind::SlashToken);
}
TEST_CASE( "Lex Star Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('*') -> GetKind() == TokenKind::StarToken);
}
TEST_CASE( "Lex Assignment Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('=') -> GetKind() == TokenKind::AssignmentToken);
}
TEST_CASE( "Lex Equality Token", "[lexer]" ) {
Lexer lexer = Lexer("==");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::EqualityToken);
}
TEST_CASE( "Lex Whitespace", "[lexer]" ) {
Lexer lexer = Lexer("");
CHECK(lexer.LexNext(' ') -> GetKind() == TokenKind::WhiteSpace);
CHECK(lexer.LexNext('\t') -> GetKind() == TokenKind::WhiteSpace);
CHECK(lexer.LexNext('\n') -> GetKind() == TokenKind::WhiteSpace);
CHECK(lexer.LexNext('\r') -> GetKind() == TokenKind::WhiteSpace);
}
TEST_CASE( "Lex Basic Integers", "[lexer]" ) {
Lexer lexer = Lexer("");
CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('2') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('3') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('4') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('5') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('6') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('7') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('8') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('9') -> GetKind() == TokenKind::Integer);
}
TEST_CASE( "Lex Longer Integers", "[lexer]" ) {
long integers[] {0,1,5,9,10,50,100,1000,99999,6484,62163,48862};
for (int integer : integers){
Lexer lexer = Lexer(std::to_string(integer));
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::Integer);
auto* integerToken = (IntegerToken *)firstToken;
CHECK(integerToken -> Value == integer);
IToken* Lexer::LexIdentifierOrKeyword(char c){
vector<char> charVec(1, c);
while (true){
char next = Lexer::Peek();
if (next == '\0') break;
if (isalpha(next) || next == '_'){
Lexer::Next();
charVec.push_back(next);
}
else{
break;
}
}
}
TEST_CASE( "Lex Floats", "[lexer]" ) {
double floats[] {0.5, 0.8, 100.7, 52.3548, 8461354.1324886};
for (double f : floats){
Lexer lexer = Lexer(std::to_string(f));
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::Float);
auto* floatToken = (FloatToken *)firstToken;
CHECK(floatToken -> Value == Approx(f));
string s = string(charVec.begin(), charVec.end());
switch (const_hash(s.c_str())){
case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword);
case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword);
case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword);
case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword);
case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword);
case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword);
case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword);
case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword);
case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword);
case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword);
case const_hash("in"): return new SimpleToken(TokenKind::InKeyword);
case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword);
case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword);
case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword);
case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword);
case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword);
case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword);
case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword);
case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword);
default: return new IdentifierToken(s);
}
}
#endif
}

View File

@ -16,10 +16,10 @@ public:
char Next();
IToken* LexNext(char c);
IToken* LexNumber(char c);
IToken *LexIdentifierOrKeyword(char c);
public:
vector<IToken*> Lex();
explicit Lexer(string script);
};

256
src/Parser/LexerTests.cpp Normal file
View File

@ -0,0 +1,256 @@
#ifdef TESTS_BUILD
#include <catch2/catch.hpp>
#include "Lexer.hpp"
TEST_CASE( "When at end of script return terminator", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.Peek() == '\0');
}
TEST_CASE( "Peek doesn't advance", "[lexer]" ) {
Lexer lexer = Lexer("5 + 5");
REQUIRE(lexer.Peek() == '5');
REQUIRE(lexer.Peek() == '5');
REQUIRE(lexer.Peek() == '5');
}
TEST_CASE( "Next does advance", "[lexer]" ) {
Lexer lexer = Lexer("5 + 5");
REQUIRE(lexer.Next() == '5');
REQUIRE(lexer.Next() == ' ');
REQUIRE(lexer.Next() == '+');
REQUIRE(lexer.Next() == ' ');
REQUIRE(lexer.Next() == '5');
REQUIRE(lexer.Next() == '\0');
}
TEST_CASE( "Lex Null Terminator as EOF", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('\0') -> GetKind() == TokenKind::EndOfFile);
}
TEST_CASE( "Lex Plus Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('+') -> GetKind() == TokenKind::PlusToken);
}
TEST_CASE( "Lex Minus Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('-') -> GetKind() == TokenKind::MinusToken);
}
TEST_CASE( "Lex Slash Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('/') -> GetKind() == TokenKind::SlashToken);
}
TEST_CASE( "Lex Star Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('*') -> GetKind() == TokenKind::StarToken);
}
TEST_CASE( "Lex Assignment Token", "[lexer]" ) {
Lexer lexer = Lexer("");
REQUIRE(lexer.LexNext('=') -> GetKind() == TokenKind::AssignmentToken);
}
TEST_CASE( "Lex Equality Token", "[lexer]" ) {
Lexer lexer = Lexer("==");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::EqualityToken);
}
TEST_CASE( "Lex Whitespace", "[lexer]" ) {
Lexer lexer = Lexer("");
CHECK(lexer.LexNext(' ') -> GetKind() == TokenKind::WhiteSpace);
CHECK(lexer.LexNext('\t') -> GetKind() == TokenKind::WhiteSpace);
CHECK(lexer.LexNext('\n') -> GetKind() == TokenKind::WhiteSpace);
CHECK(lexer.LexNext('\r') -> GetKind() == TokenKind::WhiteSpace);
}
TEST_CASE( "Lex Basic Integers", "[lexer]" ) {
Lexer lexer = Lexer("");
CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('2') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('3') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('4') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('5') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('6') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('7') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('8') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('9') -> GetKind() == TokenKind::Integer);
}
TEST_CASE( "Lex Longer Integers", "[lexer]" ) {
long integers[] {0,1,5,9,10,50,100,1000,99999,6484,62163,48862};
for (long integer : integers){
Lexer lexer = Lexer(std::to_string(integer));
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::Integer);
auto* integerToken = (IntegerToken *)firstToken;
CHECK(integerToken -> Value == integer);
}
}
TEST_CASE( "Lex Floats", "[lexer]" ) {
double floats[] {0.5, 0.8, 100.7, 52.3548, 8461354.1324886};
for (double f : floats){
Lexer lexer = Lexer(std::to_string(f));
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::Float);
auto* floatToken = (FloatToken *)firstToken;
CHECK(floatToken -> Value == Approx(f));
}
}
TEST_CASE( "Lex And Keyword", "[lexer]" ) {
Lexer lexer = Lexer("and");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::AndKeyword);
}
TEST_CASE( "Lex Break Keyword", "[lexer]" ) {
Lexer lexer = Lexer("break");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::BreakKeyword);
}
TEST_CASE( "Lex Do Keyword", "[lexer]" ) {
Lexer lexer = Lexer("do");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::DoKeyword);
}
TEST_CASE( "Lex else Keyword", "[lexer]" ) {
Lexer lexer = Lexer("else");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::ElseKeyword);
}
TEST_CASE( "Lex else if Keyword", "[lexer]" ) {
Lexer lexer = Lexer("elseif");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::ElseIfKeyword);
}
TEST_CASE( "Lex end Keyword", "[lexer]" ) {
Lexer lexer = Lexer("end");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::EndKeyword);
}
TEST_CASE( "Lex false Keyword", "[lexer]" ) {
Lexer lexer = Lexer("false");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::FalseKeyword);
}
TEST_CASE( "Lex for Keyword", "[lexer]" ) {
Lexer lexer = Lexer("for");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::ForKeyword);
}
TEST_CASE( "Lex function Keyword", "[lexer]" ) {
Lexer lexer = Lexer("function");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::FunctionKeyword);
}
TEST_CASE( "Lex if Keyword", "[lexer]" ) {
Lexer lexer = Lexer("if");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::IfKeyword);
}
TEST_CASE( "Lex in Keyword", "[lexer]" ) {
Lexer lexer = Lexer("in");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::InKeyword);
}
TEST_CASE( "Lex local Keyword", "[lexer]" ) {
Lexer lexer = Lexer("local");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::LocalKeyword);
}
TEST_CASE( "Lex nil Keyword", "[lexer]" ) {
Lexer lexer = Lexer("nil");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::NilKeyword);
}
TEST_CASE( "Lex not Keyword", "[lexer]" ) {
Lexer lexer = Lexer("not");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::NotKeyword);
}
TEST_CASE( "Lex or Keyword", "[lexer]" ) {
Lexer lexer = Lexer("or");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::OrKeyword);
}
TEST_CASE( "Lex return Keyword", "[lexer]" ) {
Lexer lexer = Lexer("return");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::ReturnKeyword);
}
TEST_CASE( "Lex then Keyword", "[lexer]" ) {
Lexer lexer = Lexer("then");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::ThenKeyword);
}
TEST_CASE( "Lex true Keyword", "[lexer]" ) {
Lexer lexer = Lexer("true");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::TrueKeyword);
}
TEST_CASE( "Lex while Keyword", "[lexer]" ) {
Lexer lexer = Lexer("while");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::WhileKeyword);
}
TEST_CASE( "Lex identifier", "[lexer]" ) {
Lexer lexer = Lexer("foo");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 2);
IToken* firstToken = tokens[0];
REQUIRE(firstToken -> GetKind() == TokenKind::Identifier);
REQUIRE(((IdentifierToken*)firstToken) -> Value == "foo");
}
#endif

View File

@ -1,7 +1,10 @@
#ifndef PORYGONLANG_TOKEN_HPP
#define PORYGONLANG_TOKEN_HPP
#include <utility>
#include <string>
#include "TokenKind.hpp"
using namespace std;
class IToken{
public:
@ -47,4 +50,17 @@ public:
}
};
class IdentifierToken : public IToken{
public:
string Value;
explicit IdentifierToken(string value){
Value = std::move(value);
}
TokenKind GetKind() override{
return TokenKind::Identifier;
}
};
#endif //PORYGONLANG_TOKEN_HPP

View File

@ -12,8 +12,30 @@ enum TokenKind{
AssignmentToken,
EqualityToken,
Identifier,
Integer,
Float,
AndKeyword,
BreakKeyword,
DoKeyword,
ElseKeyword,
ElseIfKeyword,
EndKeyword,
FalseKeyword,
ForKeyword,
FunctionKeyword,
IfKeyword,
InKeyword,
LocalKeyword,
NilKeyword,
NotKeyword,
OrKeyword,
ReturnKeyword,
ThenKeyword,
TrueKeyword,
WhileKeyword,
};

View File

@ -2,9 +2,6 @@
#define PORYGONLANG_MAIN_H
class main {
};
#endif //PORYGONLANG_MAIN_H