Implements expression parsing

This commit is contained in:
Deukhoofd 2019-05-20 17:45:03 +02:00
parent 7edd51d5e3
commit 37e770f1cb
No known key found for this signature in database
GPG Key ID: B4C087AC81641654
10 changed files with 519 additions and 4 deletions

View File

@ -2,6 +2,5 @@
<project version="4"> <project version="4">
<component name="VcsDirectoryMappings"> <component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" /> <mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="$PROJECT_DIR$/extern/Catch2" vcs="Git" />
</component> </component>
</project> </project>

View File

@ -5,8 +5,8 @@ set(CMAKE_CXX_STANDARD 17)
#add_subdirectory(extern) #add_subdirectory(extern)
include_directories(extern) include_directories(extern)
add_library(PorygonLang src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp) add_library(PorygonLang src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp src/Parser/Parser.cpp src/Parser/Parser.hpp src/Parser/ParsedStatements/ParsedStatement.hpp src/Parser/ParsedExpressions/ParsedExpression.hpp src/Parser/BinaryOperatorKind.hpp)
add_executable(PorygonLangTests src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp src/Parser/LexerTests.cpp) add_executable(PorygonLangTests src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp src/Parser/LexerTests.cpp src/Parser/Parser.cpp src/Parser/Parser.hpp src/Parser/ParsedStatements/ParsedStatement.hpp src/Parser/ParsedExpressions/ParsedExpression.hpp src/Parser/BinaryOperatorKind.hpp)
target_compile_definitions(PorygonLangTests PRIVATE TESTS_BUILD) target_compile_definitions(PorygonLangTests PRIVATE TESTS_BUILD)

View File

@ -0,0 +1,13 @@
#ifndef PORYGONLANG_BINARYOPERATORKIND_HPP
#define PORYGONLANG_BINARYOPERATORKIND_HPP
enum class BinaryOperatorKind{
Addition,
Subtraction,
Multiplication,
Division,
Equality,
LogicalAnd,
LogicalOr,
};
#endif //PORYGONLANG_BINARYOPERATORKIND_HPP

View File

@ -0,0 +1,142 @@
#ifndef PORYGONLANG_PARSEDEXPRESSION_HPP
#define PORYGONLANG_PARSEDEXPRESSION_HPP
#include "../Token.hpp"
#include "../UnaryOperatorKind.hpp"
#include "../BinaryOperatorKind.hpp"
enum class ParsedExpressionKind{
LiteralInteger,
LiteralFloat,
LiteralString,
LiteralBool,
Unary,
Binary,
};
class ParsedExpression {
unsigned int _position;
unsigned int _length;
public:
virtual ParsedExpressionKind GetKind() = 0;
ParsedExpression(unsigned int position, unsigned int length){
_position = position;
_length = length;
}
unsigned int GetStartPosition(){
return _position;
}
unsigned int GetEndPosition(){
return _position + _length - 1;
}
unsigned int GetLength(){
return _length;
}
};
class LiteralIntegerExpression : public ParsedExpression{
long _value;
public:
ParsedExpressionKind GetKind() final{
return ParsedExpressionKind::LiteralInteger;
}
explicit LiteralIntegerExpression(IntegerToken* token) : ParsedExpression(token -> GetStartPosition(), token -> GetLength()){
_value = token->Value;
}
long GetValue(){
return _value;
}
};
class LiteralFloatExpression : public ParsedExpression{
double _value;
public:
ParsedExpressionKind GetKind() final{
return ParsedExpressionKind::LiteralFloat;
}
explicit LiteralFloatExpression(FloatToken* token) : ParsedExpression(token -> GetStartPosition(), token -> GetLength()){
_value = token->Value;
}
double GetValue(){
return _value;
}
};
class LiteralBoolExpression : public ParsedExpression{
bool _value;
public:
ParsedExpressionKind GetKind() final{
return ParsedExpressionKind::LiteralBool;
}
explicit LiteralBoolExpression(IToken* token) : ParsedExpression(token -> GetStartPosition(), token -> GetLength()){
_value = token -> GetKind() == TokenKind::TrueKeyword;
}
bool GetValue(){
return _value;
}
};
class UnaryExpression : public ParsedExpression{
UnaryOperatorKind _kind;
ParsedExpression* _operand;
public:
ParsedExpressionKind GetKind() final{
return ParsedExpressionKind::Unary;
}
UnaryExpression(UnaryOperatorKind kind, ParsedExpression* operand, unsigned int start, unsigned int length)
: ParsedExpression(start, length){
_kind = kind;
_operand = operand;
}
UnaryOperatorKind GetOperatorKind(){
return _kind;
}
ParsedExpression* GetOperand(){
return _operand;
}
};
class BinaryExpression : public ParsedExpression{
BinaryOperatorKind _kind;
ParsedExpression* _left;
ParsedExpression* _right;
public:
ParsedExpressionKind GetKind() final{
return ParsedExpressionKind::Binary;
}
BinaryExpression(BinaryOperatorKind kind, ParsedExpression* left, ParsedExpression* right, unsigned int start,
unsigned int length)
: ParsedExpression(start, length){
_kind = kind;
_left = left;
_right = right;
}
BinaryOperatorKind GetOperatorKind(){
return _kind;
}
ParsedExpression* GetLeft() {
return _left;
}
ParsedExpression* GetRight() {
return _right;
}
};
#endif //PORYGONLANG_PARSEDEXPRESSION_HPP

View File

@ -0,0 +1,77 @@
#include <utility>
#ifndef PORYGONLANG_PARSEDSTATEMENT_HPP
#define PORYGONLANG_PARSEDSTATEMENT_HPP
#include <vector>
#include "../ParsedExpressions/ParsedExpression.hpp"
enum class ParsedStatementKind{
Script,
Block,
Expression,
};
class ParsedStatement {
unsigned int _start;
unsigned int _length;
public:
virtual ParsedStatementKind GetKind() = 0;
ParsedStatement(unsigned int start, unsigned int length){
_start = start;
_length = length;
}
unsigned int GetStartPosition(){
return _start;
}
unsigned int GetEndPosition(){
return _start + _length - 1;
}
};
class ParsedBlockStatement : public ParsedStatement{
std::vector<ParsedStatement*> _statements;
public:
explicit ParsedBlockStatement(std::vector<ParsedStatement*> statements)
: ParsedStatement(statements.front()->GetStartPosition(), statements.back()->GetEndPosition() - statements.front()->GetStartPosition()){
_statements = std::move(statements);
}
ParsedStatementKind GetKind() override{
return ParsedStatementKind ::Block;
}
std::vector<ParsedStatement*> GetStatements(){
return _statements;
}
};
class ParsedScriptStatement : public ParsedBlockStatement{
public:
explicit ParsedScriptStatement(std::vector<ParsedStatement*> statements) : ParsedBlockStatement(statements){}
ParsedStatementKind GetKind() final{
return ParsedStatementKind ::Script;
}
};
class ParsedExpressionStatement : public ParsedStatement{
ParsedExpression* _expression;
public:
explicit ParsedExpressionStatement(ParsedExpression* expression) : ParsedStatement(expression->GetStartPosition(), expression->GetLength()){
_expression = expression;
}
ParsedStatementKind GetKind() final{
return ParsedStatementKind ::Expression;
}
ParsedExpression* GetExpression(){
return _expression;
}
};
#endif //PORYGONLANG_PARSEDSTATEMENT_HPP

232
src/Parser/Parser.cpp Normal file
View File

@ -0,0 +1,232 @@
#include "Parser.hpp"
#include "UnaryOperatorKind.hpp"
#include "BinaryOperatorKind.hpp"
ParsedScriptStatement* Parser::Parse() {
vector<ParsedStatement*> statements;
while (true){
auto next = Parser::Next();
if (next->GetKind() == TokenKind::EndOfFile){
break;
}
if (next->GetKind() == TokenKind::WhiteSpace){
continue;
}
statements.push_back(Parser::ParseStatement(next));
}
return new ParsedScriptStatement(statements);
}
ParsedStatement* Parser::ParseStatement(IToken* current){
return new ParsedExpressionStatement(Parser::ParseExpression(current));
}
ParsedExpression* Parser::ParseExpression(IToken* current){
return Parser::ParseBinaryExpression(current, OperatorPrecedence::No);
}
OperatorPrecedence GetUnaryPrecedence(TokenKind kind){
switch (kind){
case TokenKind::PlusToken:
case TokenKind::MinusToken:
case TokenKind::NotKeyword:
return OperatorPrecedence::Unary;
default:
return OperatorPrecedence::No;
}
}
UnaryOperatorKind GetUnaryOperatorKind(TokenKind kind){
switch (kind){
case TokenKind::PlusToken: return UnaryOperatorKind::Identity;
case TokenKind::MinusToken: return UnaryOperatorKind::Negation;
case TokenKind::NotKeyword: return UnaryOperatorKind::LogicalNegation;
default:
throw;
}
}
BinaryOperatorKind GetBinaryOperatorKind(TokenKind kind){
switch (kind){
case TokenKind::PlusToken: return BinaryOperatorKind ::Addition;
case TokenKind::MinusToken: return BinaryOperatorKind ::Subtraction;
case TokenKind::StarToken: return BinaryOperatorKind ::Multiplication;
case TokenKind::SlashToken: return BinaryOperatorKind ::Division;
case TokenKind::EqualityToken: return BinaryOperatorKind ::Equality;
case TokenKind::AndKeyword: return BinaryOperatorKind ::LogicalAnd;
case TokenKind::OrKeyword: return BinaryOperatorKind ::LogicalOr;
default:
throw;
}
}
OperatorPrecedence GetBinaryPrecedence(TokenKind kind){
switch (kind){
case TokenKind::PlusToken: return OperatorPrecedence ::Additive;
case TokenKind::MinusToken: return OperatorPrecedence ::Additive;
case TokenKind::StarToken: return OperatorPrecedence ::Multiplication;
case TokenKind::SlashToken: return OperatorPrecedence ::Multiplication;
case TokenKind::EqualityToken: return OperatorPrecedence ::Equality;
case TokenKind::AndKeyword: return OperatorPrecedence ::LogicalAnd;
case TokenKind::OrKeyword: return OperatorPrecedence ::LogicalOr;
default:
return OperatorPrecedence::No;
}
}
ParsedExpression* Parser::ParseBinaryExpression(IToken* current, OperatorPrecedence parentPrecedence){
OperatorPrecedence unaryPrecedence = GetUnaryPrecedence(current -> GetKind());
ParsedExpression* left;
if (unaryPrecedence != OperatorPrecedence::No && unaryPrecedence >= parentPrecedence){
UnaryOperatorKind operatorKind = GetUnaryOperatorKind(current -> GetKind());
auto next = Parser::Next();
auto operand = Parser::ParseBinaryExpression(next, unaryPrecedence);
auto startPos = current -> GetStartPosition();
left = new UnaryExpression(operatorKind, operand, startPos, operand -> GetEndPosition() - startPos);
} else{
left = Parser::ParsePrimaryExpression(current);
}
while (true){
auto next = Parser::Peek();
OperatorPrecedence binaryPrecedence = GetBinaryPrecedence(next -> GetKind());
if (binaryPrecedence == OperatorPrecedence::No || binaryPrecedence <= parentPrecedence){
break;
}
auto operatorKind = GetBinaryOperatorKind(next -> GetKind());
Parser::Next();
auto right = ParseBinaryExpression(Parser::Next(), binaryPrecedence);
auto startPos = left -> GetStartPosition();
left = new BinaryExpression(operatorKind, left, right, startPos, right -> GetEndPosition() - startPos);
}
return left;
}
ParsedExpression *Parser::ParsePrimaryExpression(IToken *current) {
switch (current -> GetKind()){
case TokenKind ::Integer: return new LiteralIntegerExpression((IntegerToken*)current);
case TokenKind ::Float: return new LiteralFloatExpression((FloatToken*)current);
case TokenKind ::TrueKeyword: return new LiteralBoolExpression(current);
case TokenKind ::FalseKeyword: return new LiteralBoolExpression(current);
default:
throw;
}
}
IToken *Parser::Peek() {
return Parser::_tokens[_position];
}
IToken *Parser::Next() {
Parser::_position++;
return Parser::_tokens[_position - 1];
}
#ifdef TESTS_BUILD
#include <catch.hpp>
TEST_CASE( "Parse single true keyword", "[parser]" ) {
vector<IToken*> v {new SimpleToken(TokenKind::TrueKeyword,0,0), new SimpleToken(TokenKind::EndOfFile,0,0)};
Parser parser = Parser(v);
auto parsedStatements = parser.Parse() -> GetStatements();
REQUIRE(parsedStatements.size() == 1);
auto firstStatement = parsedStatements[0];
REQUIRE(firstStatement -> GetKind() == ParsedStatementKind::Expression);
auto expression = ((ParsedExpressionStatement*)firstStatement)->GetExpression();
REQUIRE(expression -> GetKind() == ParsedExpressionKind::LiteralBool);
auto boolean = ((LiteralBoolExpression*)expression);
REQUIRE(boolean->GetValue() == true);
}
TEST_CASE( "Parse single false keyword", "[parser]" ) {
vector<IToken*> v {new SimpleToken(TokenKind::FalseKeyword,0,0), new SimpleToken(TokenKind::EndOfFile,0,0)};
Parser parser = Parser(v);
auto parsedStatements = parser.Parse() -> GetStatements();
REQUIRE(parsedStatements.size() == 1);
auto firstStatement = parsedStatements[0];
REQUIRE(firstStatement -> GetKind() == ParsedStatementKind::Expression);
auto expression = ((ParsedExpressionStatement*)firstStatement)->GetExpression();
REQUIRE(expression -> GetKind() == ParsedExpressionKind::LiteralBool);
auto boolean = ((LiteralBoolExpression*)expression);
REQUIRE(boolean->GetValue() == false);
}
TEST_CASE( "Parse simple addition", "[parser]" ) {
vector<IToken*> v {
new IntegerToken(5, 0, 0),
new SimpleToken(TokenKind::PlusToken,0,0),
new IntegerToken(10, 0, 0),
new SimpleToken(TokenKind::EndOfFile,0,0)
};
Parser parser = Parser(v);
auto parsedStatements = parser.Parse() -> GetStatements();
REQUIRE(parsedStatements.size() == 1);
auto firstStatement = parsedStatements[0];
REQUIRE(firstStatement -> GetKind() == ParsedStatementKind::Expression);
auto expression = ((ParsedExpressionStatement*)firstStatement)->GetExpression();
REQUIRE(expression -> GetKind() == ParsedExpressionKind::Binary);
auto binary = ((BinaryExpression*)expression);
CHECK(binary -> GetOperatorKind() == BinaryOperatorKind::Addition);
auto left = binary->GetLeft();
auto right = binary->GetRight();
REQUIRE(left->GetKind() == ParsedExpressionKind::LiteralInteger);
REQUIRE(right->GetKind() == ParsedExpressionKind::LiteralInteger);
CHECK(((LiteralIntegerExpression*)left)->GetValue() == 5);
CHECK(((LiteralIntegerExpression*)right)->GetValue() == 10);
}
TEST_CASE( "Parse simple negation", "[parser]" ) {
vector<IToken*> v {
new SimpleToken(TokenKind::MinusToken,0,0),
new IntegerToken(10, 0, 0),
new SimpleToken(TokenKind::EndOfFile,0,0)
};
Parser parser = Parser(v);
auto parsedStatements = parser.Parse() -> GetStatements();
REQUIRE(parsedStatements.size() == 1);
auto firstStatement = parsedStatements[0];
REQUIRE(firstStatement -> GetKind() == ParsedStatementKind::Expression);
auto expression = ((ParsedExpressionStatement*)firstStatement)->GetExpression();
REQUIRE(expression -> GetKind() == ParsedExpressionKind::Unary);
auto unary = ((UnaryExpression*)expression);
CHECK(unary -> GetOperatorKind() == UnaryOperatorKind::Negation);
auto operand = unary->GetOperand();
REQUIRE(operand->GetKind() == ParsedExpressionKind::LiteralInteger);
CHECK(((LiteralIntegerExpression*)operand)->GetValue() == 10);
}
TEST_CASE( "Assert binary precedence", "[parser]" ) {
vector<IToken*> v {
new IntegerToken(5, 0, 0),
new SimpleToken(TokenKind::PlusToken,0,0),
new IntegerToken(10, 0, 0),
new SimpleToken(TokenKind::StarToken,0,0),
new IntegerToken(6, 0, 0),
new SimpleToken(TokenKind::EndOfFile,0,0)
};
Parser parser = Parser(v);
auto parsedStatements = parser.Parse() -> GetStatements();
REQUIRE(parsedStatements.size() == 1);
auto firstStatement = parsedStatements[0];
REQUIRE(firstStatement -> GetKind() == ParsedStatementKind::Expression);
auto expression = ((ParsedExpressionStatement*)firstStatement)->GetExpression();
REQUIRE(expression -> GetKind() == ParsedExpressionKind::Binary);
auto binary = ((BinaryExpression*)expression);
CHECK(binary -> GetOperatorKind() == BinaryOperatorKind::Addition);
auto left = binary->GetLeft();
auto right = binary->GetRight();
REQUIRE(left->GetKind() == ParsedExpressionKind::LiteralInteger);
REQUIRE(right->GetKind() == ParsedExpressionKind::Binary);
CHECK(((LiteralIntegerExpression*)left)->GetValue() == 5);
left = ((BinaryExpression*)right)->GetLeft();
right = ((BinaryExpression*)right)->GetRight();
CHECK(((LiteralIntegerExpression*)left)->GetValue() == 10);
CHECK(((LiteralIntegerExpression*)right)->GetValue() == 6);
}
#endif

38
src/Parser/Parser.hpp Normal file
View File

@ -0,0 +1,38 @@
#include <utility>
#ifndef PORYGONLANG_PARSER_HPP
#define PORYGONLANG_PARSER_HPP
#include "ParsedStatements/ParsedStatement.hpp"
enum class OperatorPrecedence {
No,
LogicalOr,
LogicalAnd,
Equality,
Additive,
Multiplication,
Unary,
};
class Parser {
vector<IToken*> _tokens;
unsigned int _position;
IToken* Peek();
IToken* Next();
ParsedStatement* ParseStatement(IToken* current);
ParsedExpression* ParseExpression(IToken* current);
ParsedExpression* ParseBinaryExpression(IToken* current, OperatorPrecedence parentPrecedence);
ParsedExpression* ParsePrimaryExpression(IToken* current);
public:
ParsedScriptStatement* Parse();
explicit Parser(vector<IToken*> tokens){
_tokens = std::move(tokens);
_position = 0;
}
};
#endif //PORYGONLANG_PARSER_HPP

View File

@ -24,6 +24,10 @@ public:
unsigned int GetEndPosition(){ unsigned int GetEndPosition(){
return Position + Length - 1; return Position + Length - 1;
} }
unsigned int GetLength(){
return Length;
}
}; };
class SimpleToken : public IToken{ class SimpleToken : public IToken{

View File

@ -1,7 +1,7 @@
#ifndef PORYGONLANG_TOKENKIND_HPP #ifndef PORYGONLANG_TOKENKIND_HPP
#define PORYGONLANG_TOKENKIND_HPP #define PORYGONLANG_TOKENKIND_HPP
enum TokenKind{ enum class TokenKind{
EndOfFile, EndOfFile,
WhiteSpace, WhiteSpace,

View File

@ -0,0 +1,10 @@
#ifndef PORYGONLANG_UNARYOPERATORKIND_HPP
#define PORYGONLANG_UNARYOPERATORKIND_HPP
enum class UnaryOperatorKind{
Identity,
Negation,
LogicalNegation,
};
#endif //PORYGONLANG_UNARYOPERATORKIND_HPP